21#define FSE_STATIC_LINKING_ONLY
23#define HUF_STATIC_LINKING_ONLY
38#if defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
39 defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
40#error "Cannot force the use of the short and the long ZSTD_decompressSequences variants!"
62 U32 const cSize = cBlockHeader >> 3;
75 const void*
src,
size_t srcSize);
80 const void*
src,
size_t srcSize)
82 DEBUGLOG(5,
"ZSTD_decodeLiteralsBlock");
91 DEBUGLOG(5,
"set_repeat flag : re-using stats from previous compressed literals block");
97 { size_t lhSize, litSize, litCSize;
99 U32 const lhlCode = (istart[0] >> 2) & 3;
100 U32 const lhc = MEM_readLE32(istart);
104 case 0: case 1: default: /* note : default is impossible, since lhlCode into [0..3] */
105 /* 2 - 2 - 10 - 10 */
106 singleStream = !lhlCode;
108 litSize = (lhc >> 4) & 0x3FF;
109 litCSize = (lhc >> 14) & 0x3FF;
112 /* 2 - 2 - 14 - 14 */
114 litSize = (lhc >> 4) & 0x3FFF;
115 litCSize = lhc >> 18;
118 /* 2 - 2 - 18 - 18 */
120 litSize = (lhc >> 4) & 0x3FFFF;
121 litCSize = (lhc >> 22) + ((size_t)istart[4] << 10);
124 RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
125 RETURN_ERROR_IF(litCSize + lhSize > srcSize, corruption_detected, "");
127 /* prefetch huffman table if cold */
128 if (dctx->ddictIsCold && (litSize > 768 /* heuristic */)) {
129 PREFETCH_AREA(dctx->HUFptr, sizeof(dctx->entropy.hufTable));
132 if (litEncType==set_repeat) {
134 hufSuccess = HUF_decompress1X_usingDTable_bmi2(
135 dctx->litBuffer, litSize, istart+lhSize, litCSize,
136 dctx->HUFptr, dctx->bmi2);
138 hufSuccess = HUF_decompress4X_usingDTable_bmi2(
139 dctx->litBuffer, litSize, istart+lhSize, litCSize,
140 dctx->HUFptr, dctx->bmi2);
144#if defined(HUF_FORCE_DECOMPRESS_X2)
145 hufSuccess = HUF_decompress1X_DCtx_wksp(
146 dctx->entropy.hufTable, dctx->litBuffer, litSize,
147 istart+lhSize, litCSize, dctx->workspace,
148 sizeof(dctx->workspace));
150 hufSuccess = HUF_decompress1X1_DCtx_wksp_bmi2(
151 dctx->entropy.hufTable, dctx->litBuffer, litSize,
152 istart+lhSize, litCSize, dctx->workspace,
153 sizeof(dctx->workspace), dctx->bmi2);
156 hufSuccess = HUF_decompress4X_hufOnly_wksp_bmi2(
157 dctx->entropy.hufTable, dctx->litBuffer, litSize,
158 istart+lhSize, litCSize, dctx->workspace,
159 sizeof(dctx->workspace), dctx->bmi2);
163 RETURN_ERROR_IF(HUF_isError(hufSuccess), corruption_detected, "");
165 dctx->litPtr = dctx->litBuffer;
166 dctx->litSize = litSize;
167 dctx->litEntropy = 1;
168 if (litEncType==set_compressed) dctx->HUFptr = dctx->entropy.hufTable;
169 memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
170 return litCSize + lhSize;
174 { size_t litSize, lhSize;
175 U32 const lhlCode = ((istart[0]) >> 2) & 3;
178 case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
180 litSize = istart[0] >> 3;
184 litSize = MEM_readLE16(istart) >> 4;
188 litSize = MEM_readLE24(istart) >> 4;
192 if (lhSize+litSize+WILDCOPY_OVERLENGTH > srcSize) { /* risk reading beyond src buffer with wildcopy */
193 RETURN_ERROR_IF(litSize+lhSize > srcSize, corruption_detected, "");
194 memcpy(dctx->litBuffer, istart+lhSize, litSize);
195 dctx->litPtr = dctx->litBuffer;
196 dctx->litSize = litSize;
197 memset(dctx->litBuffer + dctx->litSize, 0, WILDCOPY_OVERLENGTH);
198 return lhSize+litSize;
200 /* direct reference into compressed stream */
201 dctx->litPtr = istart+lhSize;
202 dctx->litSize = litSize;
203 return lhSize+litSize;
207 { U32 const lhlCode = ((istart[0]) >> 2) & 3;
208 size_t litSize, lhSize;
211 case 0: case 2: default: /* note : default is impossible, since lhlCode into [0..3] */
213 litSize = istart[0] >> 3;
217 litSize = MEM_readLE16(istart) >> 4;
221 litSize = MEM_readLE24(istart) >> 4;
222 RETURN_ERROR_IF(srcSize<4, corruption_detected, "srcSize >=
MIN_CBLOCK_SIZE == 3; here we
need lhSize+1 = 4
");
225 RETURN_ERROR_IF(litSize > ZSTD_BLOCKSIZE_MAX, corruption_detected, "");
226 memset(dctx->litBuffer, istart[lhSize], litSize + WILDCOPY_OVERLENGTH);
227 dctx->litPtr = dctx->litBuffer;
228 dctx->litSize = litSize;
232 RETURN_ERROR(corruption_detected, "impossible
");
237/* Default FSE distribution tables.
238 * These are pre-calculated FSE decoding tables using default distributions as defined in specification :
239 * https://github.com/facebook/zstd/blob/master/doc/zstd_compression_format.md#default-distributions
240 * They were generated programmatically with following method :
241 * - start from default distributions, present in /lib/common/zstd_internal.h
242 * - generate tables normally, using ZSTD_buildFSETable()
243 * - printout the content of tables
244 * - pretify output, report below, test with fuzzer to ensure it's correct */
246/* Default FSE distribution table for Literal Lengths */
247static const ZSTD_seqSymbol LL_defaultDTable[(1<<LL_DEFAULTNORMLOG)+1] = {
248 { 1, 1, 1, LL_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
249 /* nextState, nbAddBits, nbBits, baseVal */
250 { 0, 0, 4, 0}, { 16, 0, 4, 0},
251 { 32, 0, 5, 1}, { 0, 0, 5, 3},
252 { 0, 0, 5, 4}, { 0, 0, 5, 6},
253 { 0, 0, 5, 7}, { 0, 0, 5, 9},
254 { 0, 0, 5, 10}, { 0, 0, 5, 12},
255 { 0, 0, 6, 14}, { 0, 1, 5, 16},
256 { 0, 1, 5, 20}, { 0, 1, 5, 22},
257 { 0, 2, 5, 28}, { 0, 3, 5, 32},
258 { 0, 4, 5, 48}, { 32, 6, 5, 64},
259 { 0, 7, 5, 128}, { 0, 8, 6, 256},
260 { 0, 10, 6, 1024}, { 0, 12, 6, 4096},
261 { 32, 0, 4, 0}, { 0, 0, 4, 1},
262 { 0, 0, 5, 2}, { 32, 0, 5, 4},
263 { 0, 0, 5, 5}, { 32, 0, 5, 7},
264 { 0, 0, 5, 8}, { 32, 0, 5, 10},
265 { 0, 0, 5, 11}, { 0, 0, 6, 13},
266 { 32, 1, 5, 16}, { 0, 1, 5, 18},
267 { 32, 1, 5, 22}, { 0, 2, 5, 24},
268 { 32, 3, 5, 32}, { 0, 3, 5, 40},
269 { 0, 6, 4, 64}, { 16, 6, 4, 64},
270 { 32, 7, 5, 128}, { 0, 9, 6, 512},
271 { 0, 11, 6, 2048}, { 48, 0, 4, 0},
272 { 16, 0, 4, 1}, { 32, 0, 5, 2},
273 { 32, 0, 5, 3}, { 32, 0, 5, 5},
274 { 32, 0, 5, 6}, { 32, 0, 5, 8},
275 { 32, 0, 5, 9}, { 32, 0, 5, 11},
276 { 32, 0, 5, 12}, { 0, 0, 6, 15},
277 { 32, 1, 5, 18}, { 32, 1, 5, 20},
278 { 32, 2, 5, 24}, { 32, 2, 5, 28},
279 { 32, 3, 5, 40}, { 32, 4, 5, 48},
280 { 0, 16, 6,65536}, { 0, 15, 6,32768},
281 { 0, 14, 6,16384}, { 0, 13, 6, 8192},
282}; /* LL_defaultDTable */
284/* Default FSE distribution table for Offset Codes */
285static const ZSTD_seqSymbol OF_defaultDTable[(1<<OF_DEFAULTNORMLOG)+1] = {
286 { 1, 1, 1, OF_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
287 /* nextState, nbAddBits, nbBits, baseVal */
288 { 0, 0, 5, 0}, { 0, 6, 4, 61},
289 { 0, 9, 5, 509}, { 0, 15, 5,32765},
290 { 0, 21, 5,2097149}, { 0, 3, 5, 5},
291 { 0, 7, 4, 125}, { 0, 12, 5, 4093},
292 { 0, 18, 5,262141}, { 0, 23, 5,8388605},
293 { 0, 5, 5, 29}, { 0, 8, 4, 253},
294 { 0, 14, 5,16381}, { 0, 20, 5,1048573},
295 { 0, 2, 5, 1}, { 16, 7, 4, 125},
296 { 0, 11, 5, 2045}, { 0, 17, 5,131069},
297 { 0, 22, 5,4194301}, { 0, 4, 5, 13},
298 { 16, 8, 4, 253}, { 0, 13, 5, 8189},
299 { 0, 19, 5,524285}, { 0, 1, 5, 1},
300 { 16, 6, 4, 61}, { 0, 10, 5, 1021},
301 { 0, 16, 5,65533}, { 0, 28, 5,268435453},
302 { 0, 27, 5,134217725}, { 0, 26, 5,67108861},
303 { 0, 25, 5,33554429}, { 0, 24, 5,16777213},
304}; /* OF_defaultDTable */
307/* Default FSE distribution table for Match Lengths */
308static const ZSTD_seqSymbol ML_defaultDTable[(1<<ML_DEFAULTNORMLOG)+1] = {
309 { 1, 1, 1, ML_DEFAULTNORMLOG}, /* header : fastMode, tableLog */
310 /* nextState, nbAddBits, nbBits, baseVal */
311 { 0, 0, 6, 3}, { 0, 0, 4, 4},
312 { 32, 0, 5, 5}, { 0, 0, 5, 6},
313 { 0, 0, 5, 8}, { 0, 0, 5, 9},
314 { 0, 0, 5, 11}, { 0, 0, 6, 13},
315 { 0, 0, 6, 16}, { 0, 0, 6, 19},
316 { 0, 0, 6, 22}, { 0, 0, 6, 25},
317 { 0, 0, 6, 28}, { 0, 0, 6, 31},
318 { 0, 0, 6, 34}, { 0, 1, 6, 37},
319 { 0, 1, 6, 41}, { 0, 2, 6, 47},
320 { 0, 3, 6, 59}, { 0, 4, 6, 83},
321 { 0, 7, 6, 131}, { 0, 9, 6, 515},
322 { 16, 0, 4, 4}, { 0, 0, 4, 5},
323 { 32, 0, 5, 6}, { 0, 0, 5, 7},
324 { 32, 0, 5, 9}, { 0, 0, 5, 10},
325 { 0, 0, 6, 12}, { 0, 0, 6, 15},
326 { 0, 0, 6, 18}, { 0, 0, 6, 21},
327 { 0, 0, 6, 24}, { 0, 0, 6, 27},
328 { 0, 0, 6, 30}, { 0, 0, 6, 33},
329 { 0, 1, 6, 35}, { 0, 1, 6, 39},
330 { 0, 2, 6, 43}, { 0, 3, 6, 51},
331 { 0, 4, 6, 67}, { 0, 5, 6, 99},
332 { 0, 8, 6, 259}, { 32, 0, 4, 4},
333 { 48, 0, 4, 4}, { 16, 0, 4, 5},
334 { 32, 0, 5, 7}, { 32, 0, 5, 8},
335 { 32, 0, 5, 10}, { 32, 0, 5, 11},
336 { 0, 0, 6, 14}, { 0, 0, 6, 17},
337 { 0, 0, 6, 20}, { 0, 0, 6, 23},
338 { 0, 0, 6, 26}, { 0, 0, 6, 29},
339 { 0, 0, 6, 32}, { 0, 16, 6,65539},
340 { 0, 15, 6,32771}, { 0, 14, 6,16387},
341 { 0, 13, 6, 8195}, { 0, 12, 6, 4099},
342 { 0, 11, 6, 2051}, { 0, 10, 6, 1027},
343}; /* ML_defaultDTable */
346static void ZSTD_buildSeqTable_rle(ZSTD_seqSymbol* dt, U32 baseValue, U32 nbAddBits)
349 ZSTD_seqSymbol_header* const DTableH = (ZSTD_seqSymbol_header*)ptr;
350 ZSTD_seqSymbol* const cell = dt + 1;
352 DTableH->tableLog = 0;
353 DTableH->fastMode = 0;
357 assert(nbAddBits < 255);
358 cell->nbAdditionalBits = (BYTE)nbAddBits;
359 cell->baseValue = baseValue;
363/* ZSTD_buildFSETable() :
364 * generate FSE decoding table for one symbol (ll, ml or off)
365 * cannot fail if input is valid =>
366 * all inputs are presumed validated at this stage */
368ZSTD_buildFSETable(ZSTD_seqSymbol* dt,
369 const short* normalizedCounter, unsigned maxSymbolValue,
370 const U32* baseValue, const U32* nbAdditionalBits,
373 ZSTD_seqSymbol* const tableDecode = dt+1;
374 U16 symbolNext[MaxSeq+1];
376 U32 const maxSV1 = maxSymbolValue + 1;
377 U32 const tableSize = 1 << tableLog;
378 U32 highThreshold = tableSize-1;
381 assert(maxSymbolValue <= MaxSeq);
382 assert(tableLog <= MaxFSELog);
384 /* Init, lay down lowprob symbols */
385 { ZSTD_seqSymbol_header DTableH;
386 DTableH.tableLog = tableLog;
387 DTableH.fastMode = 1;
388 { S16 const largeLimit= (S16)(1 << (tableLog-1));
390 for (s=0; s<maxSV1; s++) {
391 if (normalizedCounter[s]==-1) {
392 tableDecode[highThreshold--].baseValue = s;
395 if (normalizedCounter[s] >= largeLimit) DTableH.fastMode=0;
396 assert(normalizedCounter[s]>=0);
397 symbolNext[s] = (U16)normalizedCounter[s];
399 memcpy(dt, &DTableH, sizeof(DTableH));
403 { U32 const tableMask = tableSize-1;
404 U32 const step = FSE_TABLESTEP(tableSize);
406 for (s=0; s<maxSV1; s++) {
408 for (i=0; i<normalizedCounter[s]; i++) {
409 tableDecode[position].baseValue = s;
410 position = (position + step) & tableMask;
411 while (position > highThreshold) position = (position + step) & tableMask; /* lowprob area */
413 assert(position == 0); /* position must reach all cells once, otherwise normalizedCounter is incorrect */
416 /* Build Decoding table */
418 for (u=0; u<tableSize; u++) {
419 U32 const symbol = tableDecode[u].baseValue;
420 U32 const nextState = symbolNext[symbol]++;
421 tableDecode[u].nbBits = (BYTE) (tableLog - BIT_highbit32(nextState) );
422 tableDecode[u].nextState = (U16) ( (nextState << tableDecode[u].nbBits) - tableSize);
423 assert(nbAdditionalBits[symbol] < 255);
424 tableDecode[u].nbAdditionalBits = (BYTE)nbAdditionalBits[symbol];
425 tableDecode[u].baseValue = baseValue[symbol];
433static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol* DTableSpace, const ZSTD_seqSymbol** DTablePtr,
434 symbolEncodingType_e type, unsigned max, U32 maxLog,
435 const void* src, size_t srcSize,
436 const U32* baseValue, const U32* nbAdditionalBits,
437 const ZSTD_seqSymbol* defaultTable, U32 flagRepeatTable,
438 int ddictIsCold, int nbSeq)
443 RETURN_ERROR_IF(!srcSize, srcSize_wrong, "");
444 RETURN_ERROR_IF((*(const BYTE*)src) > max, corruption_detected, "");
445 { U32 const symbol = *(const BYTE*)src;
446 U32 const baseline = baseValue[symbol];
447 U32 const nbBits = nbAdditionalBits[symbol];
448 ZSTD_buildSeqTable_rle(DTableSpace, baseline, nbBits);
450 *DTablePtr = DTableSpace;
453 *DTablePtr = defaultTable;
456 RETURN_ERROR_IF(!flagRepeatTable, corruption_detected, "");
457 /* prefetch FSE table if used */
458 if (ddictIsCold && (nbSeq > 24 /* heuristic */)) {
459 const void* const pStart = *DTablePtr;
460 size_t const pSize = sizeof(ZSTD_seqSymbol) * (SEQSYMBOL_TABLE_SIZE(maxLog));
461 PREFETCH_AREA(pStart, pSize);
464 case set_compressed :
467 size_t const headerSize = FSE_readNCount(norm, &max, &tableLog, src, srcSize);
468 RETURN_ERROR_IF(FSE_isError(headerSize), corruption_detected, "");
469 RETURN_ERROR_IF(tableLog > maxLog, corruption_detected, "");
470 ZSTD_buildFSETable(DTableSpace, norm, max, baseValue, nbAdditionalBits, tableLog);
471 *DTablePtr = DTableSpace;
476 RETURN_ERROR(GENERIC, "impossible
");
480size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
481 const void* src, size_t srcSize)
483 const BYTE* const istart = (const BYTE* const)src;
484 const BYTE* const iend = istart + srcSize;
485 const BYTE* ip = istart;
490 RETURN_ERROR_IF(srcSize < MIN_SEQUENCES_SIZE, srcSize_wrong, "");
496 RETURN_ERROR_IF(srcSize != 1, srcSize_wrong, "");
501 RETURN_ERROR_IF(ip+2 > iend, srcSize_wrong, "");
502 nbSeq = MEM_readLE16(ip) + LONGNBSEQ, ip+=2;
504 RETURN_ERROR_IF(ip >= iend, srcSize_wrong, "");
505 nbSeq = ((nbSeq-0x80)<<8) + *ip++;
510 /* FSE table descriptors */
511 RETURN_ERROR_IF(ip+1 > iend, srcSize_wrong, ""); /* minimum possible size: 1 byte for symbol encoding types */
512 { symbolEncodingType_e const LLtype = (symbolEncodingType_e)(*ip >> 6);
513 symbolEncodingType_e const OFtype = (symbolEncodingType_e)((*ip >> 4) & 3);
514 symbolEncodingType_e const MLtype = (symbolEncodingType_e)((*ip >> 2) & 3);
518 { size_t const llhSize = ZSTD_buildSeqTable(dctx->entropy.LLTable, &dctx->LLTptr,
519 LLtype, MaxLL, LLFSELog,
522 LL_defaultDTable, dctx->fseEntropy,
523 dctx->ddictIsCold, nbSeq);
524 RETURN_ERROR_IF(ZSTD_isError(llhSize), corruption_detected, "ZSTD_buildSeqTable failed
");
528 { size_t const ofhSize = ZSTD_buildSeqTable(dctx->entropy.OFTable, &dctx->OFTptr,
529 OFtype, MaxOff, OffFSELog,
532 OF_defaultDTable, dctx->fseEntropy,
533 dctx->ddictIsCold, nbSeq);
534 RETURN_ERROR_IF(ZSTD_isError(ofhSize), corruption_detected, "ZSTD_buildSeqTable failed
");
538 { size_t const mlhSize = ZSTD_buildSeqTable(dctx->entropy.MLTable, &dctx->MLTptr,
539 MLtype, MaxML, MLFSELog,
542 ML_defaultDTable, dctx->fseEntropy,
543 dctx->ddictIsCold, nbSeq);
544 RETURN_ERROR_IF(ZSTD_isError(mlhSize), corruption_detected, "ZSTD_buildSeqTable failed
");
562 const ZSTD_seqSymbol* table;
566 BIT_DStream_t DStream;
567 ZSTD_fseState stateLL;
568 ZSTD_fseState stateOffb;
569 ZSTD_fseState stateML;
570 size_t prevOffset[ZSTD_REP_NUM];
571 const BYTE* prefixStart;
583HINT_INLINE void ZSTD_overlapCopy8(BYTE** op, BYTE const** ip, size_t offset) {
586 /* close range match, overlap */
587 static const U32 dec32table[] = { 0, 1, 2, 1, 4, 4, 4, 4 }; /* added */
588 static const int dec64table[] = { 8, 8, 8, 7, 8, 9,10,11 }; /* subtracted */
589 int const sub2 = dec64table[offset];
594 *ip += dec32table[offset];
595 ZSTD_copy4(*op+4, *ip);
598 ZSTD_copy8(*op, *ip);
602 assert(*op - *ip >= 8);
616static void ZSTD_safecopy(BYTE* op, BYTE* const oend_w, BYTE const* ip, ptrdiff_t length, ZSTD_overlap_e ovtype) {
617 ptrdiff_t const diff = op - ip;
618 BYTE* const oend = op + length;
620 assert((ovtype == ZSTD_no_overlap && (diff <= -8 || diff >= 8 || op >= oend_w)) ||
621 (ovtype == ZSTD_overlap_src_before_dst && diff >= 0));
624 /* Handle short lengths. */
625 while (op < oend) *op++ = *ip++;
628 if (ovtype == ZSTD_overlap_src_before_dst) {
629 /* Copy 8 bytes and ensure the offset >= 8 when there can be overlap. */
631 ZSTD_overlapCopy8(&op, &ip, diff);
632 assert(op - ip >= 8);
636 if (oend <= oend_w) {
637 /* No risk of overwrite. */
638 ZSTD_wildcopy(op, ip, length, ovtype);
642 /* Wildcopy until we get close to the end. */
643 assert(oend > oend_w);
644 ZSTD_wildcopy(op, ip, oend_w - op, ovtype);
648 /* Handle the leftovers. */
649 while (op < oend) *op++ = *ip++;
652/* ZSTD_execSequenceEnd():
653 * This version handles cases that are near the end of the output buffer. It requires
654 * more careful checks to make sure there is no overflow. By separating out these hard
655 * and unlikely cases, we can speed up the common cases.
657 * NOTE: This function needs to be fast for a single long sequence, but doesn't need
658 * to be optimized for many small sequences, since those fall into ZSTD_execSequence().
661size_t ZSTD_execSequenceEnd(BYTE* op,
662 BYTE* const oend, seq_t sequence,
663 const BYTE** litPtr, const BYTE* const litLimit,
664 const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
666 BYTE* const oLitEnd = op + sequence.litLength;
667 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
668 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
669 const BYTE* match = oLitEnd - sequence.offset;
670 BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH;
672 /* bounds checks : careful of address space overflow in 32-bit mode */
673 RETURN_ERROR_IF(sequenceLength > (size_t)(oend - op), dstSize_tooSmall, "last match must fit within dstBuffer
");
674 RETURN_ERROR_IF(sequence.litLength > (size_t)(litLimit - *litPtr), corruption_detected, "try to
read beyond literal
buffer");
675 assert(op < op + sequenceLength);
676 assert(oLitEnd < op + sequenceLength);
679 ZSTD_safecopy(op, oend_w, *litPtr, sequence.litLength, ZSTD_no_overlap);
684 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
685 /* offset beyond prefix */
686 RETURN_ERROR_IF(sequence.offset > (size_t)(oLitEnd - virtualStart), corruption_detected, "");
687 match = dictEnd - (prefixStart-match);
688 if (match + sequence.matchLength <= dictEnd) {
689 memmove(oLitEnd, match, sequence.matchLength);
690 return sequenceLength;
692 /* span extDict & currentPrefixSegment */
693 { size_t const length1 = dictEnd - match;
694 memmove(oLitEnd, match, length1);
695 op = oLitEnd + length1;
696 sequence.matchLength -= length1;
699 ZSTD_safecopy(op, oend_w, match, sequence.matchLength, ZSTD_overlap_src_before_dst);
700 return sequenceLength;
704size_t ZSTD_execSequence(BYTE* op,
705 BYTE* const oend, seq_t sequence,
706 const BYTE** litPtr, const BYTE* const litLimit,
707 const BYTE* const prefixStart, const BYTE* const virtualStart, const BYTE* const dictEnd)
709 BYTE* const oLitEnd = op + sequence.litLength;
710 size_t const sequenceLength = sequence.litLength + sequence.matchLength;
711 BYTE* const oMatchEnd = op + sequenceLength; /* risk : address space overflow (32-bits) */
712 BYTE* const oend_w = oend - WILDCOPY_OVERLENGTH; /* risk : address space underflow on oend=NULL */
713 const BYTE* const iLitEnd = *litPtr + sequence.litLength;
714 const BYTE* match = oLitEnd - sequence.offset;
716 assert(op != NULL /* Precondition */);
717 assert(oend_w < oend /* No underflow */);
718 /* Handle edge cases in a slow path:
719 * - Read beyond end of literals
720 * - Match end is within WILDCOPY_OVERLIMIT of oend
721 * - 32-bit mode and the match length overflows
724 iLitEnd > litLimit ||
725 oMatchEnd > oend_w ||
726 (MEM_32bits() && (size_t)(oend - op) < sequenceLength + WILDCOPY_OVERLENGTH)))
727 return ZSTD_execSequenceEnd(op, oend, sequence, litPtr, litLimit, prefixStart, virtualStart, dictEnd);
729 /* Assumptions (everything else goes into ZSTD_execSequenceEnd()) */
730 assert(op <= oLitEnd /* No overflow */);
731 assert(oLitEnd < oMatchEnd /* Non-zero match & no overflow */);
732 assert(oMatchEnd <= oend /* No underflow */);
733 assert(iLitEnd <= litLimit /* Literal length is in bounds */);
734 assert(oLitEnd <= oend_w /* Can wildcopy literals */);
735 assert(oMatchEnd <= oend_w /* Can wildcopy matches */);
738 * Split out litLength <= 16 since it is nearly always true. +1.6% on gcc-9.
739 * We likely don't need the full 32-byte wildcopy.
741 assert(WILDCOPY_OVERLENGTH >= 16);
742 ZSTD_copy16(op, (*litPtr));
743 if (UNLIKELY(sequence.litLength > 16)) {
744 ZSTD_wildcopy(op+16, (*litPtr)+16, sequence.litLength-16, ZSTD_no_overlap);
747 *litPtr = iLitEnd; /* update for next sequence */
750 if (sequence.offset > (size_t)(oLitEnd - prefixStart)) {
751 /* offset beyond prefix -> go into extDict */
752 RETURN_ERROR_IF(UNLIKELY(sequence.offset > (size_t)(oLitEnd - virtualStart)), corruption_detected, "");
753 match = dictEnd + (match - prefixStart);
754 if (match + sequence.matchLength <= dictEnd) {
755 memmove(oLitEnd, match, sequence.matchLength);
756 return sequenceLength;
758 /* span extDict & currentPrefixSegment */
759 { size_t const length1 = dictEnd - match;
760 memmove(oLitEnd, match, length1);
761 op = oLitEnd + length1;
762 sequence.matchLength -= length1;
765 /* Match within prefix of 1 or more bytes */
766 assert(op <= oMatchEnd);
767 assert(oMatchEnd <= oend_w);
768 assert(match >= prefixStart);
769 assert(sequence.matchLength >= 1);
771 /* Nearly all offsets are >= WILDCOPY_VECLEN bytes, which means we can use wildcopy
772 * without overlap checking.
774 if (LIKELY(sequence.offset >= WILDCOPY_VECLEN)) {
775 /* We bet on a full wildcopy for matches, since we expect matches to be
776 * longer than literals (in general). In silesia, ~10% of matches are longer
779 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength, ZSTD_no_overlap);
780 return sequenceLength;
782 assert(sequence.offset < WILDCOPY_VECLEN);
784 /* Copy 8 bytes and spread the offset to be >= 8. */
785 ZSTD_overlapCopy8(&op, &match, sequence.offset);
787 /* If the match length is > 8 bytes, then continue with the wildcopy. */
788 if (sequence.matchLength > 8) {
789 assert(op < oMatchEnd);
790 ZSTD_wildcopy(op, match, (ptrdiff_t)sequence.matchLength-8, ZSTD_overlap_src_before_dst);
792 return sequenceLength;
796ZSTD_initFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, const ZSTD_seqSymbol* dt)
798 const void* ptr = dt;
799 const ZSTD_seqSymbol_header* const DTableH = (const ZSTD_seqSymbol_header*)ptr;
800 DStatePtr->state = BIT_readBits(bitD, DTableH->tableLog);
802 (U32)DStatePtr->state, DTableH->tableLog);
803 BIT_reloadDStream(bitD);
804 DStatePtr->table = dt + 1;
807FORCE_INLINE_TEMPLATE void
808ZSTD_updateFseState(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD)
810 ZSTD_seqSymbol const DInfo = DStatePtr->table[DStatePtr->state];
811 U32 const nbBits = DInfo.nbBits;
812 size_t const lowBits = BIT_readBits(bitD, nbBits);
813 DStatePtr->state = DInfo.nextState + lowBits;
816FORCE_INLINE_TEMPLATE void
817ZSTD_updateFseStateWithDInfo(ZSTD_fseState* DStatePtr, BIT_DStream_t* bitD, ZSTD_seqSymbol const DInfo)
819 U32 const nbBits = DInfo.nbBits;
820 size_t const lowBits = BIT_readBits(bitD, nbBits);
821 DStatePtr->state = DInfo.nextState + lowBits;
824/* We need to add at most (ZSTD_WINDOWLOG_MAX_32 - 1) bits to read the maximum
825 * offset bits. But we can only read at most (STREAM_ACCUMULATOR_MIN_32 - 1)
826 * bits before reloading. This value is the maximum number of bytes we read
827 * after reloading when we are decoding long offsets.
829#define LONG_OFFSETS_MAX_EXTRA_BITS_32 \
830 (ZSTD_WINDOWLOG_MAX_32 > STREAM_ACCUMULATOR_MIN_32 \
831 ? ZSTD_WINDOWLOG_MAX_32 - STREAM_ACCUMULATOR_MIN_32 \
834typedef enum { ZSTD_lo_isRegularOffset, ZSTD_lo_isLongOffset=1 } ZSTD_longOffset_e;
835typedef enum { ZSTD_p_noPrefetch=0, ZSTD_p_prefetch=1 } ZSTD_prefetch_e;
837FORCE_INLINE_TEMPLATE seq_t
838ZSTD_decodeSequence(seqState_t* seqState, const ZSTD_longOffset_e longOffsets, const ZSTD_prefetch_e prefetch)
841 ZSTD_seqSymbol const llDInfo = seqState->stateLL.table[seqState->stateLL.state];
842 ZSTD_seqSymbol const mlDInfo = seqState->stateML.table[seqState->stateML.state];
843 ZSTD_seqSymbol const ofDInfo = seqState->stateOffb.table[seqState->stateOffb.state];
844 U32 const llBase = llDInfo.baseValue;
845 U32 const mlBase = mlDInfo.baseValue;
846 U32 const ofBase = ofDInfo.baseValue;
847 BYTE const llBits = llDInfo.nbAdditionalBits;
848 BYTE const mlBits = mlDInfo.nbAdditionalBits;
849 BYTE const ofBits = ofDInfo.nbAdditionalBits;
850 BYTE const totalBits = llBits+mlBits+ofBits;
855 ZSTD_STATIC_ASSERT(ZSTD_lo_isLongOffset == 1);
856 ZSTD_STATIC_ASSERT(LONG_OFFSETS_MAX_EXTRA_BITS_32 == 5);
857 assert(ofBits <= MaxOff);
858 if (MEM_32bits() && longOffsets && (ofBits >= STREAM_ACCUMULATOR_MIN_32)) {
859 U32 const extraBits = ofBits - MIN(ofBits, 32 - seqState->DStream.bitsConsumed);
860 offset = ofBase + (BIT_readBitsFast(&seqState->DStream, ofBits - extraBits) << extraBits);
861 BIT_reloadDStream(&seqState->DStream);
862 if (extraBits) offset += BIT_readBitsFast(&seqState->DStream, extraBits);
863 assert(extraBits <= LONG_OFFSETS_MAX_EXTRA_BITS_32); /* to avoid another reload */
865 offset = ofBase + BIT_readBitsFast(&seqState->DStream, ofBits/*>0*/); /* <= (ZSTD_WINDOWLOG_MAX-1) bits */
866 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream);
868 seqState->prevOffset[2] = seqState->prevOffset[1];
869 seqState->prevOffset[1] = seqState->prevOffset[0];
870 seqState->prevOffset[0] = offset;
872 U32 const ll0 = (llBase == 0);
873 if (LIKELY((ofBits == 0))) {
875 offset = seqState->prevOffset[0];
877 offset = seqState->prevOffset[1];
878 seqState->prevOffset[1] = seqState->prevOffset[0];
879 seqState->prevOffset[0] = offset;
882 offset = ofBase + ll0 + BIT_readBitsFast(&seqState->DStream, 1);
883 { size_t temp = (offset==3) ? seqState->prevOffset[0] - 1 : seqState->prevOffset[offset];
884 temp += !temp; /* 0 is not valid; input is corrupted; force offset to 1 */
885 if (offset != 1) seqState->prevOffset[2] = seqState->prevOffset[1];
886 seqState->prevOffset[1] = seqState->prevOffset[0];
887 seqState->prevOffset[0] = offset = temp;
892 seq.matchLength = mlBase;
894 seq.matchLength += BIT_readBitsFast(&seqState->DStream, mlBits/*>0*/);
896 if (MEM_32bits() && (mlBits+llBits >= STREAM_ACCUMULATOR_MIN_32-LONG_OFFSETS_MAX_EXTRA_BITS_32))
897 BIT_reloadDStream(&seqState->DStream);
898 if (MEM_64bits() && UNLIKELY(totalBits >= STREAM_ACCUMULATOR_MIN_64-(LLFSELog+MLFSELog+OffFSELog)))
899 BIT_reloadDStream(&seqState->DStream);
900 /* Ensure there are enough bits to read the rest of data in 64-bit mode. */
901 ZSTD_STATIC_ASSERT(16+LLFSELog+MLFSELog+OffFSELog < STREAM_ACCUMULATOR_MIN_64);
903 seq.litLength = llBase;
905 seq.litLength += BIT_readBitsFast(&seqState->DStream, llBits/*>0*/);
908 BIT_reloadDStream(&seqState->DStream);
910 DEBUGLOG(6, "seq: litL=%
u, matchL=%
u,
offset=%
u",
911 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
913 if (prefetch == ZSTD_p_prefetch) {
914 size_t const pos = seqState->pos + seq.litLength;
915 const BYTE* const matchBase = (seq.offset > pos) ? seqState->dictEnd : seqState->prefixStart;
916 seq.match = matchBase + pos - seq.offset; /* note : this operation can overflow when seq.offset is really too large, which can only happen when input is corrupted.
917 * No consequence though : no memory access will occur, offset is only used for prefetching */
918 seqState->pos = pos + seq.matchLength;
922 * gcc-9.0.0 does 2.5% worse with ZSTD_updateFseStateWithDInfo().
923 * clang-9.2.0 does 7% worse with ZSTD_updateFseState().
924 * Naturally it seems like ZSTD_updateFseStateWithDInfo() should be the
925 * better option, so it is the default for other compilers. But, if you
926 * measure that it is worse, please put up a pull request.
929#if defined(__GNUC__) && !defined(__clang__)
930 const int kUseUpdateFseState = 1;
932 const int kUseUpdateFseState = 0;
934 if (kUseUpdateFseState) {
935 ZSTD_updateFseState(&seqState->stateLL, &seqState->DStream); /* <= 9 bits */
936 ZSTD_updateFseState(&seqState->stateML, &seqState->DStream); /* <= 9 bits */
937 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
938 ZSTD_updateFseState(&seqState->stateOffb, &seqState->DStream); /* <= 8 bits */
940 ZSTD_updateFseStateWithDInfo(&seqState->stateLL, &seqState->DStream, llDInfo); /* <= 9 bits */
941 ZSTD_updateFseStateWithDInfo(&seqState->stateML, &seqState->DStream, mlDInfo); /* <= 9 bits */
942 if (MEM_32bits()) BIT_reloadDStream(&seqState->DStream); /* <= 18 bits */
943 ZSTD_updateFseStateWithDInfo(&seqState->stateOffb, &seqState->DStream, ofDInfo); /* <= 8 bits */
950#ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
951static int ZSTD_dictionaryIsActive(ZSTD_DCtx const* dctx, BYTE const* prefixStart, BYTE const* oLitEnd)
953 size_t const windowSize = dctx->fParams.windowSize;
954 /* No dictionary used. */
955 if (dctx->dictContentEndForFuzzing == NULL) return 0;
956 /* Dictionary is our prefix. */
957 if (prefixStart == dctx->dictContentBeginForFuzzing) return 1;
958 /* Dictionary is not our ext-dict. */
959 if (dctx->dictEnd != dctx->dictContentEndForFuzzing) return 0;
960 /* Dictionary is not within our window size. */
961 if ((size_t)(oLitEnd - prefixStart) >= windowSize) return 0;
962 /* Dictionary is active. */
966MEM_STATIC void ZSTD_assertValidSequence(
967 ZSTD_DCtx const* dctx,
968 BYTE const* op, BYTE const* oend,
970 BYTE const* prefixStart, BYTE const* virtualStart)
972 size_t const windowSize = dctx->fParams.windowSize;
973 size_t const sequenceSize = seq.litLength + seq.matchLength;
974 BYTE const* const oLitEnd = op + seq.litLength;
976 (U32)seq.litLength, (U32)seq.matchLength, (U32)seq.offset);
978 assert((size_t)(oend - op) >= sequenceSize);
979 assert(sequenceSize <= ZSTD_BLOCKSIZE_MAX);
980 if (ZSTD_dictionaryIsActive(dctx, prefixStart, oLitEnd)) {
981 size_t const dictSize = (size_t)((char const*)dctx->dictContentEndForFuzzing - (char const*)dctx->dictContentBeginForFuzzing);
982 /* Offset must be within the dictionary. */
983 assert(seq.offset <= (size_t)(oLitEnd - virtualStart));
984 assert(seq.offset <= windowSize + dictSize);
986 /* Offset must be within our window. */
987 assert(seq.offset <= windowSize);
992#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
993FORCE_INLINE_TEMPLATE size_t
995ZSTD_decompressSequences_body( ZSTD_DCtx* dctx,
996 void* dst, size_t maxDstSize,
997 const void* seqStart, size_t seqSize, int nbSeq,
998 const ZSTD_longOffset_e isLongOffset,
1001 const BYTE* ip = (const BYTE*)seqStart;
1002 const BYTE* const iend = ip + seqSize;
1003 BYTE* const ostart = (BYTE* const)dst;
1004 BYTE* const oend = ostart + maxDstSize;
1006 const BYTE* litPtr = dctx->litPtr;
1007 const BYTE* const litEnd = litPtr + dctx->litSize;
1008 const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1009 const BYTE* const vBase = (const BYTE*) (dctx->virtualStart);
1010 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1014 /* Regen sequences */
1016 seqState_t seqState;
1018 dctx->fseEntropy = 1;
1019 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1021 ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1022 corruption_detected, "");
1023 ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1024 ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1025 ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1026 assert(dst != NULL);
1029 BIT_DStream_unfinished < BIT_DStream_completed &&
1030 BIT_DStream_endOfBuffer < BIT_DStream_completed &&
1031 BIT_DStream_completed < BIT_DStream_overflow);
1033#if defined(__GNUC__) && defined(__x86_64__)
1034 /* Align the decompression loop to 32 + 16 bytes.
1036 * zstd compiled with gcc-9 on an Intel i9-9900k shows 10% decompression
1037 * speed swings based on the alignment of the decompression loop. This
1038 * performance swing is caused by parts of the decompression loop falling
1039 * out of the DSB. The entire decompression loop should fit in the DSB,
1040 * when it can't we get much worse performance. You can measure if you've
1041 * hit the good case or the bad case with this perf command for some
1042 * compressed file test.zst:
1044 * perf stat -e cycles -e instructions -e idq.all_dsb_cycles_any_uops \
1045 * -e idq.all_mite_cycles_any_uops -- ./zstd -tq test.zst
1047 * If you see most cycles served out of the MITE you've hit the bad case.
1048 * If you see most cycles served out of the DSB you've hit the good case.
1049 * If it is pretty even then you may be in an okay case.
1051 * I've been able to reproduce this issue on the following CPUs:
1052 * - Kabylake: Macbook Pro (15-inch, 2019) 2.4 GHz Intel Core i9
1053 * Use Instruments->Counters to get DSB/MITE cycles.
1054 * I never got performance swings, but I was able to
1055 * go from the good case of mostly DSB to half of the
1056 * cycles served from MITE.
1057 * - Coffeelake: Intel i9-9900k
1059 * I haven't been able to reproduce the instability or DSB misses on any
1060 * of the following CPUS:
1062 * - Broadwell: Intel(R) Xeon(R) CPU E5-2680 v4 @ 2.40GH
1065 * If you are seeing performance stability this script can help test.
1066 * It tests on 4 commits in zstd where I saw performance change.
1068 * https://gist.github.com/terrelln/9889fc06a423fd5ca6e99351564473f4
1070 __asm__(".p2align 5
");
1072 __asm__(".p2align 4
");
1075 seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_noPrefetch);
1076 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequence, &litPtr, litEnd, prefixStart, vBase, dictEnd);
1077#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1078 assert(!ZSTD_isError(oneSeqSize));
1079 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequence, prefixStart, vBase);
1081 DEBUGLOG(6, "regenerated
sequence size : %
u", (U32)oneSeqSize);
1082 BIT_reloadDStream(&(seqState.DStream));
1083 /* gcc and clang both don't like early returns in this loop.
1084 * gcc doesn't like early breaks either.
1085 * Instead save an error and report it at the end.
1086 * When there is an error, don't increment op, so we don't
1089 if (UNLIKELY(ZSTD_isError(oneSeqSize))) error = oneSeqSize;
1090 else op += oneSeqSize;
1091 if (UNLIKELY(!--nbSeq)) break;
1094 /* check if reached exact end */
1096 if (ZSTD_isError(error)) return error;
1097 RETURN_ERROR_IF(nbSeq, corruption_detected, "");
1098 RETURN_ERROR_IF(BIT_reloadDStream(&seqState.DStream) < BIT_DStream_completed, corruption_detected, "");
1099 /* save reps for next block */
1100 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1103 /* last literal segment */
1104 { size_t const lastLLSize = litEnd - litPtr;
1105 RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1107 memcpy(op, litPtr, lastLLSize);
1116ZSTD_decompressSequences_default(ZSTD_DCtx* dctx,
1117 void* dst, size_t maxDstSize,
1118 const void* seqStart, size_t seqSize, int nbSeq,
1119 const ZSTD_longOffset_e isLongOffset,
1122 return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1124#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1126#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1127FORCE_INLINE_TEMPLATE size_t
1128ZSTD_decompressSequencesLong_body(
1130 void* dst, size_t maxDstSize,
1131 const void* seqStart, size_t seqSize, int nbSeq,
1132 const ZSTD_longOffset_e isLongOffset,
1135 const BYTE* ip = (const BYTE*)seqStart;
1136 const BYTE* const iend = ip + seqSize;
1137 BYTE* const ostart = (BYTE* const)dst;
1138 BYTE* const oend = ostart + maxDstSize;
1140 const BYTE* litPtr = dctx->litPtr;
1141 const BYTE* const litEnd = litPtr + dctx->litSize;
1142 const BYTE* const prefixStart = (const BYTE*) (dctx->prefixStart);
1143 const BYTE* const dictStart = (const BYTE*) (dctx->virtualStart);
1144 const BYTE* const dictEnd = (const BYTE*) (dctx->dictEnd);
1147 /* Regen sequences */
1149#define STORED_SEQS 4
1150#define STORED_SEQS_MASK (STORED_SEQS-1)
1151#define ADVANCED_SEQS 4
1152 seq_t sequences[STORED_SEQS];
1153 int const seqAdvance = MIN(nbSeq, ADVANCED_SEQS);
1154 seqState_t seqState;
1156 dctx->fseEntropy = 1;
1157 { int i; for (i=0; i<ZSTD_REP_NUM; i++) seqState.prevOffset[i] = dctx->entropy.rep[i]; }
1158 seqState.prefixStart = prefixStart;
1159 seqState.pos = (size_t)(op-prefixStart);
1160 seqState.dictEnd = dictEnd;
1161 assert(dst != NULL);
1164 ERR_isError(BIT_initDStream(&seqState.DStream, ip, iend-ip)),
1165 corruption_detected, "");
1166 ZSTD_initFseState(&seqState.stateLL, &seqState.DStream, dctx->LLTptr);
1167 ZSTD_initFseState(&seqState.stateOffb, &seqState.DStream, dctx->OFTptr);
1168 ZSTD_initFseState(&seqState.stateML, &seqState.DStream, dctx->MLTptr);
1170 /* prepare in advance */
1171 for (seqNb=0; (BIT_reloadDStream(&seqState.DStream) <= BIT_DStream_completed) && (seqNb<seqAdvance); seqNb++) {
1172 sequences[seqNb] = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
1173 PREFETCH_L1(sequences[seqNb].match); PREFETCH_L1(sequences[seqNb].match + sequences[seqNb].matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1175 RETURN_ERROR_IF(seqNb<seqAdvance, corruption_detected, "");
1177 /* decode and decompress */
1178 for ( ; (BIT_reloadDStream(&(seqState.DStream)) <= BIT_DStream_completed) && (seqNb<nbSeq) ; seqNb++) {
1179 seq_t const sequence = ZSTD_decodeSequence(&seqState, isLongOffset, ZSTD_p_prefetch);
1180 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1181#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1182 assert(!ZSTD_isError(oneSeqSize));
1183 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[(seqNb-ADVANCED_SEQS) & STORED_SEQS_MASK], prefixStart, dictStart);
1185 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1186 PREFETCH_L1(sequence.match); PREFETCH_L1(sequence.match + sequence.matchLength - 1); /* note : it's safe to invoke PREFETCH() on any memory address, including invalid ones */
1187 sequences[seqNb & STORED_SEQS_MASK] = sequence;
1190 RETURN_ERROR_IF(seqNb<nbSeq, corruption_detected, "");
1193 seqNb -= seqAdvance;
1194 for ( ; seqNb<nbSeq ; seqNb++) {
1195 size_t const oneSeqSize = ZSTD_execSequence(op, oend, sequences[seqNb&STORED_SEQS_MASK], &litPtr, litEnd, prefixStart, dictStart, dictEnd);
1196#if defined(FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION) && defined(FUZZING_ASSERT_VALID_SEQUENCE)
1197 assert(!ZSTD_isError(oneSeqSize));
1198 if (frame) ZSTD_assertValidSequence(dctx, op, oend, sequences[seqNb&STORED_SEQS_MASK], prefixStart, dictStart);
1200 if (ZSTD_isError(oneSeqSize)) return oneSeqSize;
1204 /* save reps for next block */
1205 { U32 i; for (i=0; i<ZSTD_REP_NUM; i++) dctx->entropy.rep[i] = (U32)(seqState.prevOffset[i]); }
1208 /* last literal segment */
1209 { size_t const lastLLSize = litEnd - litPtr;
1210 RETURN_ERROR_IF(lastLLSize > (size_t)(oend-op), dstSize_tooSmall, "");
1212 memcpy(op, litPtr, lastLLSize);
1221ZSTD_decompressSequencesLong_default(ZSTD_DCtx* dctx,
1222 void* dst, size_t maxDstSize,
1223 const void* seqStart, size_t seqSize, int nbSeq,
1224 const ZSTD_longOffset_e isLongOffset,
1227 return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1229#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1235#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1236static TARGET_ATTRIBUTE("bmi2
") size_t
1238ZSTD_decompressSequences_bmi2(ZSTD_DCtx* dctx,
1239 void* dst, size_t maxDstSize,
1240 const void* seqStart, size_t seqSize, int nbSeq,
1241 const ZSTD_longOffset_e isLongOffset,
1244 return ZSTD_decompressSequences_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1246#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1248#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1249static TARGET_ATTRIBUTE("bmi2
") size_t
1250ZSTD_decompressSequencesLong_bmi2(ZSTD_DCtx* dctx,
1251 void* dst, size_t maxDstSize,
1252 const void* seqStart, size_t seqSize, int nbSeq,
1253 const ZSTD_longOffset_e isLongOffset,
1256 return ZSTD_decompressSequencesLong_body(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1258#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1260#endif /* DYNAMIC_BMI2 */
1262typedef size_t (*ZSTD_decompressSequences_t)(
1264 void* dst, size_t maxDstSize,
1265 const void* seqStart, size_t seqSize, int nbSeq,
1266 const ZSTD_longOffset_e isLongOffset,
1269#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1271ZSTD_decompressSequences(ZSTD_DCtx* dctx, void* dst, size_t maxDstSize,
1272 const void* seqStart, size_t seqSize, int nbSeq,
1273 const ZSTD_longOffset_e isLongOffset,
1279 return ZSTD_decompressSequences_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1282 return ZSTD_decompressSequences_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1284#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG */
1287#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1288/* ZSTD_decompressSequencesLong() :
1289 * decompression function triggered when a minimum share of offsets is considered "long",
1291 * note : "long" definition seems overloaded here, sometimes meaning "wider than
bitstream register", and sometimes meaning "farther than
memory cache distance".
1292 * This function will try to mitigate main memory latency through the use of prefetching */
1294ZSTD_decompressSequencesLong(ZSTD_DCtx* dctx,
1295 void* dst, size_t maxDstSize,
1296 const void* seqStart, size_t seqSize, int nbSeq,
1297 const ZSTD_longOffset_e isLongOffset,
1303 return ZSTD_decompressSequencesLong_bmi2(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1306 return ZSTD_decompressSequencesLong_default(dctx, dst, maxDstSize, seqStart, seqSize, nbSeq, isLongOffset, frame);
1308#endif /* ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT */
1312#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1313 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1314/* ZSTD_getLongOffsetsShare() :
1315 * condition : offTable must be valid
1316 * @return : "share
" of long offsets (arbitrarily defined as > (1<<23))
1317 * compared to maximum possible of (1<<OffFSELog) */
1319ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol* offTable)
1321 const void* ptr = offTable;
1322 U32 const tableLog = ((const ZSTD_seqSymbol_header*)ptr)[0].tableLog;
1323 const ZSTD_seqSymbol* table = offTable + 1;
1324 U32 const max = 1 << tableLog;
1328 assert(max <= (1 << OffFSELog)); /* max not too large */
1329 for (u=0; u<max; u++) {
1330 if (table[u].nbAdditionalBits > 22) total += 1;
1333 assert(tableLog <= OffFSELog);
1334 total <<= (OffFSELog - tableLog); /* scale to OffFSELog */
1341ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
1342 void* dst, size_t dstCapacity,
1343 const void* src, size_t srcSize, const int frame)
1344{ /* blockType == blockCompressed */
1345 const BYTE* ip = (const BYTE*)src;
1346 /* isLongOffset must be true if there are long offsets.
1347 * Offsets are long if they are larger than 2^STREAM_ACCUMULATOR_MIN.
1348 * We don't expect that to be the case in 64-bit mode.
1349 * In block mode, window size is not known, so we have to be conservative.
1350 * (note: but it could be evaluated from current-lowLimit)
1352 ZSTD_longOffset_e const isLongOffset = (ZSTD_longOffset_e)(MEM_32bits() && (!frame || (dctx->fParams.windowSize > (1ULL << STREAM_ACCUMULATOR_MIN))));
1355 RETURN_ERROR_IF(srcSize >= ZSTD_BLOCKSIZE_MAX, srcSize_wrong, "");
1357 /* Decode literals section */
1358 { size_t const litCSize = ZSTD_decodeLiteralsBlock(dctx, src, srcSize);
1360 if (ZSTD_isError(litCSize)) return litCSize;
1362 srcSize -= litCSize;
1365 /* Build Decoding Tables */
1367 /* These macros control at build-time which decompressor implementation
1368 * we use. If neither is defined, we do some inspection and dispatch at
1371#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1372 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1373 int usePrefetchDecoder = dctx->ddictIsCold;
1376 size_t const seqHSize = ZSTD_decodeSeqHeaders(dctx, &nbSeq, ip, srcSize);
1377 if (ZSTD_isError(seqHSize)) return seqHSize;
1379 srcSize -= seqHSize;
1381 RETURN_ERROR_IF(dst == NULL && nbSeq > 0, dstSize_tooSmall, "NULL not handled
");
1383#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1384 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1385 if ( !usePrefetchDecoder
1386 && (!frame || (dctx->fParams.windowSize > (1<<24)))
1387 && (nbSeq>ADVANCED_SEQS) ) { /* could probably use a larger nbSeq limit */
1388 U32 const shareLongOffsets = ZSTD_getLongOffsetsShare(dctx->OFTptr);
1389 U32 const minShare = MEM_64bits() ? 7 : 20; /* heuristic values, correspond to 2.73% and 7.81% */
1390 usePrefetchDecoder = (shareLongOffsets >= minShare);
1394 dctx->ddictIsCold = 0;
1396#if !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT) && \
1397 !defined(ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG)
1398 if (usePrefetchDecoder)
1400#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_SHORT
1401 return ZSTD_decompressSequencesLong(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
1404#ifndef ZSTD_FORCE_DECOMPRESS_SEQUENCES_LONG
1406 return ZSTD_decompressSequences(dctx, dst, dstCapacity, ip, srcSize, nbSeq, isLongOffset, frame);
1412void ZSTD_checkContinuity(ZSTD_DCtx* dctx, const void* dst)
1414 if (dst != dctx->previousDstEnd) { /* not contiguous */
1415 dctx->dictEnd = dctx->previousDstEnd;
1416 dctx->virtualStart = (const char*)dst - ((const char*)(dctx->previousDstEnd) - (const char*)(dctx->prefixStart));
1417 dctx->prefixStart = dst;
1418 dctx->previousDstEnd = dst;
1423size_t ZSTD_decompressBlock(ZSTD_DCtx* dctx,
1424 void* dst, size_t dstCapacity,
1425 const void* src, size_t srcSize)
1428 ZSTD_checkContinuity(dctx, dst);
1429 dSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 0);
1430 dctx->previousDstEnd = (char*)dst + dSize;
static BYTE decode(char c)
MEM_STATIC U32 MEM_readLE24(const void *memPtr)
GLsizei GLsizei GLfloat distance
GLenum GLint GLenum GLsizei GLsizei GLsizei GLint GLsizei const GLvoid * bits
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble const GLfloat const GLdouble const GLfloat GLint i
GLsizei GLenum const GLvoid GLsizei GLenum GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLint GLint GLint GLshort GLshort GLshort GLubyte GLubyte GLubyte GLuint GLuint GLuint GLushort GLushort GLushort GLbyte GLbyte GLbyte GLbyte GLdouble GLdouble GLdouble GLdouble GLfloat GLfloat GLfloat GLfloat GLint GLint GLint GLint GLshort GLshort GLshort GLshort GLubyte GLubyte GLubyte GLubyte GLuint GLuint GLuint GLuint GLushort GLushort GLushort GLushort GLboolean const GLdouble const GLfloat const GLint const GLshort const GLbyte const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLdouble const GLfloat const GLfloat const GLint const GLint const GLshort const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort const GLdouble const GLfloat const GLint const GLshort GLenum GLenum GLenum GLfloat GLenum GLint GLenum GLenum GLenum GLfloat GLenum GLenum GLint GLenum GLfloat GLenum GLint GLint GLushort GLenum GLenum GLfloat GLenum GLenum GLint GLfloat const GLubyte GLenum GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLint GLint GLsizei GLsizei GLint GLenum GLenum const GLvoid GLenum GLenum const GLfloat GLenum GLenum const GLint GLenum GLenum const GLdouble GLenum GLenum const GLfloat GLenum GLenum const GLint GLsizei GLuint GLfloat GLuint GLbitfield GLfloat GLint GLuint GLboolean GLenum GLfloat GLenum GLbitfield GLenum GLfloat GLfloat GLint GLint const GLfloat GLenum GLfloat GLfloat GLint GLint GLfloat GLfloat GLint GLint const GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat GLint GLfloat GLfloat const GLdouble * u
#define memcpy(s1, s2, n)
static char memory[1024 *256]
static UINT PSTR DWORD UINT * need
__inline int after(__u32 seq1, __u32 seq2)
static unsigned ZSTD_getLongOffsetsShare(const ZSTD_seqSymbol *offTable)
static size_t ZSTD_buildSeqTable(ZSTD_seqSymbol *DTableSpace, const ZSTD_seqSymbol **DTablePtr, symbolEncodingType_e type, unsigned max, U32 maxLog, const void *src, size_t srcSize, const U32 *baseValue, const U32 *nbAdditionalBits, const ZSTD_seqSymbol *defaultTable, U32 flagRepeatTable, int ddictIsCold, int nbSeq)
static size_t ZSTD_decompressSequences(ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, size_t seqSize, int nbSeq, const ZSTD_longOffset_e isLongOffset, const int frame)
static size_t ZSTD_decompressSequencesLong(ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, size_t seqSize, int nbSeq, const ZSTD_longOffset_e isLongOffset, const int frame)
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx *dctx, void *dst, size_t dstCapacity, const void *src, size_t srcSize, const int frame)
size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx *dctx, int *nbSeqPtr, const void *src, size_t srcSize)
static void ZSTD_copy4(void *dst, const void *src)
FORCE_INLINE_TEMPLATE size_t DONT_VECTORIZE ZSTD_decompressSequences_body(ZSTD_DCtx *dctx, void *dst, size_t maxDstSize, const void *seqStart, size_t seqSize, int nbSeq, const ZSTD_longOffset_e isLongOffset, const int frame)
size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx *dctx, const void *src, size_t srcSize)
static void ZSTD_initFseState(ZSTD_fseState *DStatePtr, BIT_DStream_t *bitD, const ZSTD_seqSymbol *dt)
size_t ZSTD_getcBlockSize(const void *src, size_t srcSize, blockProperties_t *bpPtr)
static const size_t ZSTD_blockHeaderSize
#define RETURN_ERROR_IF(cond, err,...)