1 /*
2  * Copyright (c) Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 #ifndef ZSTD_CCOMMON_H_MODULE
12 #define ZSTD_CCOMMON_H_MODULE
13 
14 /* this module contains definitions which must be identical
15  * across compression, decompression and dictBuilder.
16  * It also contains a few functions useful to at least 2 of them
17  * and which benefit from being inlined */
18 
19 /*-*************************************
20 *  Dependencies
21 ***************************************/
22 #include "compiler.h"
23 #include "cpu.h"
24 #include "mem.h"
25 #include "debug.h"                 /* assert, DEBUGLOG, RAWLOG, g_debuglevel */
26 #include "error_private.h"
27 #define ZSTD_STATIC_LINKING_ONLY
28 #include <linux/zstd.h>
29 #define FSE_STATIC_LINKING_ONLY
30 #include "fse.h"
31 #define HUF_STATIC_LINKING_ONLY
32 #include "huf.h"
33 #include <linux/xxhash.h>                /* XXH_reset, update, digest */
34 #define ZSTD_TRACE 0
35 
36 /* ---- static assert (debug) --- */
37 #define ZSTD_STATIC_ASSERT(c) DEBUG_STATIC_ASSERT(c)
38 #define ZSTD_isError ERR_isError   /* for inlining */
39 #define FSE_isError  ERR_isError
40 #define HUF_isError  ERR_isError
41 
42 /*-*************************************
43 *  shared macros
44 ***************************************/
45 #undef MIN
46 #undef MAX
47 #define MIN(a,b) ((a)<(b) ? (a) : (b))
48 #define MAX(a,b) ((a)>(b) ? (a) : (b))
49 #define BOUNDED(min,val,max) (MAX(min,MIN(val,max)))
50 
51 /*-*************************************
52 *  Common constants
53 ***************************************/
54 #define ZSTD_OPT_NUM    (1<<12)
55 
56 #define ZSTD_REP_NUM      3                 /* number of repcodes */
57 static UNUSED_ATTR const U32 repStartValue[ZSTD_REP_NUM] = { 1, 4, 8 };
58 
59 #define KB *(1 <<10)
60 #define MB *(1 <<20)
61 #define GB *(1U<<30)
62 
63 #define BIT7 128
64 #define BIT6  64
65 #define BIT5  32
66 #define BIT4  16
67 #define BIT1   2
68 #define BIT0   1
69 
70 #define ZSTD_WINDOWLOG_ABSOLUTEMIN 10
71 static UNUSED_ATTR const size_t ZSTD_fcs_fieldSize[4] = { 0, 2, 4, 8 };
72 static UNUSED_ATTR const size_t ZSTD_did_fieldSize[4] = { 0, 1, 2, 4 };
73 
74 #define ZSTD_FRAMEIDSIZE 4   /* magic number size */
75 
76 #define ZSTD_BLOCKHEADERSIZE 3   /* C standard doesn't allow `static const` variable to be init using another `static const` variable */
77 static UNUSED_ATTR const size_t ZSTD_blockHeaderSize = ZSTD_BLOCKHEADERSIZE;
78 typedef enum { bt_raw, bt_rle, bt_compressed, bt_reserved } blockType_e;
79 
80 #define ZSTD_FRAMECHECKSUMSIZE 4
81 
82 #define MIN_SEQUENCES_SIZE 1 /* nbSeq==0 */
83 #define MIN_CBLOCK_SIZE (1 /*litCSize*/ + 1 /* RLE or RAW */ + MIN_SEQUENCES_SIZE /* nbSeq==0 */)   /* for a non-null block */
84 
85 #define HufLog 12
86 typedef enum { set_basic, set_rle, set_compressed, set_repeat } symbolEncodingType_e;
87 
88 #define LONGNBSEQ 0x7F00
89 
90 #define MINMATCH 3
91 
92 #define Litbits  8
93 #define MaxLit ((1<<Litbits) - 1)
94 #define MaxML   52
95 #define MaxLL   35
96 #define DefaultMaxOff 28
97 #define MaxOff  31
98 #define MaxSeq MAX(MaxLL, MaxML)   /* Assumption : MaxOff < MaxLL,MaxML */
99 #define MLFSELog    9
100 #define LLFSELog    9
101 #define OffFSELog   8
102 #define MaxFSELog  MAX(MAX(MLFSELog, LLFSELog), OffFSELog)
103 
104 #define ZSTD_MAX_HUF_HEADER_SIZE 128 /* header + <= 127 byte tree description */
105 /* Each table cannot take more than #symbols * FSELog bits */
106 #define ZSTD_MAX_FSE_HEADERS_SIZE (((MaxML + 1) * MLFSELog + (MaxLL + 1) * LLFSELog + (MaxOff + 1) * OffFSELog + 7) / 8)
107 
108 static UNUSED_ATTR const U8 LL_bits[MaxLL+1] = {
109      0, 0, 0, 0, 0, 0, 0, 0,
110      0, 0, 0, 0, 0, 0, 0, 0,
111      1, 1, 1, 1, 2, 2, 3, 3,
112      4, 6, 7, 8, 9,10,11,12,
113     13,14,15,16
114 };
115 static UNUSED_ATTR const S16 LL_defaultNorm[MaxLL+1] = {
116      4, 3, 2, 2, 2, 2, 2, 2,
117      2, 2, 2, 2, 2, 1, 1, 1,
118      2, 2, 2, 2, 2, 2, 2, 2,
119      2, 3, 2, 1, 1, 1, 1, 1,
120     -1,-1,-1,-1
121 };
122 #define LL_DEFAULTNORMLOG 6  /* for static allocation */
123 static UNUSED_ATTR const U32 LL_defaultNormLog = LL_DEFAULTNORMLOG;
124 
125 static UNUSED_ATTR const U8 ML_bits[MaxML+1] = {
126      0, 0, 0, 0, 0, 0, 0, 0,
127      0, 0, 0, 0, 0, 0, 0, 0,
128      0, 0, 0, 0, 0, 0, 0, 0,
129      0, 0, 0, 0, 0, 0, 0, 0,
130      1, 1, 1, 1, 2, 2, 3, 3,
131      4, 4, 5, 7, 8, 9,10,11,
132     12,13,14,15,16
133 };
134 static UNUSED_ATTR const S16 ML_defaultNorm[MaxML+1] = {
135      1, 4, 3, 2, 2, 2, 2, 2,
136      2, 1, 1, 1, 1, 1, 1, 1,
137      1, 1, 1, 1, 1, 1, 1, 1,
138      1, 1, 1, 1, 1, 1, 1, 1,
139      1, 1, 1, 1, 1, 1, 1, 1,
140      1, 1, 1, 1, 1, 1,-1,-1,
141     -1,-1,-1,-1,-1
142 };
143 #define ML_DEFAULTNORMLOG 6  /* for static allocation */
144 static UNUSED_ATTR const U32 ML_defaultNormLog = ML_DEFAULTNORMLOG;
145 
146 static UNUSED_ATTR const S16 OF_defaultNorm[DefaultMaxOff+1] = {
147      1, 1, 1, 1, 1, 1, 2, 2,
148      2, 1, 1, 1, 1, 1, 1, 1,
149      1, 1, 1, 1, 1, 1, 1, 1,
150     -1,-1,-1,-1,-1
151 };
152 #define OF_DEFAULTNORMLOG 5  /* for static allocation */
153 static UNUSED_ATTR const U32 OF_defaultNormLog = OF_DEFAULTNORMLOG;
154 
155 /*-*******************************************
156 *  Shared functions to include for inlining
157 *********************************************/
ZSTD_copy8(void * dst,const void * src)158 static void ZSTD_copy8(void* dst, const void* src) {
159 #if defined(ZSTD_ARCH_ARM_NEON)
160     vst1_u8((uint8_t*)dst, vld1_u8((const uint8_t*)src));
161 #else
162     ZSTD_memcpy(dst, src, 8);
163 #endif
164 }
165 #define COPY8(d,s) { ZSTD_copy8(d,s); d+=8; s+=8; }
166 
167 /* Need to use memmove here since the literal buffer can now be located within
168    the dst buffer. In circumstances where the op "catches up" to where the
169    literal buffer is, there can be partial overlaps in this call on the final
170    copy if the literal is being shifted by less than 16 bytes. */
ZSTD_copy16(void * dst,const void * src)171 static void ZSTD_copy16(void* dst, const void* src) {
172 #if defined(ZSTD_ARCH_ARM_NEON)
173     vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
174 #elif defined(ZSTD_ARCH_X86_SSE2)
175     _mm_storeu_si128((__m128i*)dst, _mm_loadu_si128((const __m128i*)src));
176 #elif defined(__clang__)
177     ZSTD_memmove(dst, src, 16);
178 #else
179     /* ZSTD_memmove is not inlined properly by gcc */
180     BYTE copy16_buf[16];
181     ZSTD_memcpy(copy16_buf, src, 16);
182     ZSTD_memcpy(dst, copy16_buf, 16);
183 #endif
184 }
185 #define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
186 
187 #define WILDCOPY_OVERLENGTH 32
188 #define WILDCOPY_VECLEN 16
189 
190 typedef enum {
191     ZSTD_no_overlap,
192     ZSTD_overlap_src_before_dst
193     /*  ZSTD_overlap_dst_before_src, */
194 } ZSTD_overlap_e;
195 
196 /*! ZSTD_wildcopy() :
197  *  Custom version of ZSTD_memcpy(), can over read/write up to WILDCOPY_OVERLENGTH bytes (if length==0)
198  *  @param ovtype controls the overlap detection
199  *         - ZSTD_no_overlap: The source and destination are guaranteed to be at least WILDCOPY_VECLEN bytes apart.
200  *         - ZSTD_overlap_src_before_dst: The src and dst may overlap, but they MUST be at least 8 bytes apart.
201  *           The src buffer must be before the dst buffer.
202  */
203 MEM_STATIC FORCE_INLINE_ATTR
ZSTD_wildcopy(void * dst,const void * src,ptrdiff_t length,ZSTD_overlap_e const ovtype)204 void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e const ovtype)
205 {
206     ptrdiff_t diff = (BYTE*)dst - (const BYTE*)src;
207     const BYTE* ip = (const BYTE*)src;
208     BYTE* op = (BYTE*)dst;
209     BYTE* const oend = op + length;
210 
211     if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
212         /* Handle short offset copies. */
213         do {
214             COPY8(op, ip)
215         } while (op < oend);
216     } else {
217         assert(diff >= WILDCOPY_VECLEN || diff <= -WILDCOPY_VECLEN);
218         /* Separate out the first COPY16() call because the copy length is
219          * almost certain to be short, so the branches have different
220          * probabilities. Since it is almost certain to be short, only do
221          * one COPY16() in the first call. Then, do two calls per loop since
222          * at that point it is more likely to have a high trip count.
223          */
224 #ifdef __aarch64__
225         do {
226             COPY16(op, ip);
227         }
228         while (op < oend);
229 #else
230         ZSTD_copy16(op, ip);
231         if (16 >= length) return;
232         op += 16;
233         ip += 16;
234         do {
235             COPY16(op, ip);
236             COPY16(op, ip);
237         }
238         while (op < oend);
239 #endif
240     }
241 }
242 
ZSTD_limitCopy(void * dst,size_t dstCapacity,const void * src,size_t srcSize)243 MEM_STATIC size_t ZSTD_limitCopy(void* dst, size_t dstCapacity, const void* src, size_t srcSize)
244 {
245     size_t const length = MIN(dstCapacity, srcSize);
246     if (length > 0) {
247         ZSTD_memcpy(dst, src, length);
248     }
249     return length;
250 }
251 
252 /* define "workspace is too large" as this number of times larger than needed */
253 #define ZSTD_WORKSPACETOOLARGE_FACTOR 3
254 
255 /* when workspace is continuously too large
256  * during at least this number of times,
257  * context's memory usage is considered wasteful,
258  * because it's sized to handle a worst case scenario which rarely happens.
259  * In which case, resize it down to free some memory */
260 #define ZSTD_WORKSPACETOOLARGE_MAXDURATION 128
261 
262 /* Controls whether the input/output buffer is buffered or stable. */
263 typedef enum {
264     ZSTD_bm_buffered = 0,  /* Buffer the input/output */
265     ZSTD_bm_stable = 1     /* ZSTD_inBuffer/ZSTD_outBuffer is stable */
266 } ZSTD_bufferMode_e;
267 
268 /*-*******************************************
269 *  Private declarations
270 *********************************************/
271 typedef struct seqDef_s {
272     U32 offBase;   /* offBase == Offset + ZSTD_REP_NUM, or repcode 1,2,3 */
273     U16 litLength;
274     U16 mlBase;    /* mlBase == matchLength - MINMATCH */
275 } seqDef;
276 
277 /* Controls whether seqStore has a single "long" litLength or matchLength. See seqStore_t. */
278 typedef enum {
279     ZSTD_llt_none = 0,             /* no longLengthType */
280     ZSTD_llt_literalLength = 1,    /* represents a long literal */
281     ZSTD_llt_matchLength = 2       /* represents a long match */
282 } ZSTD_longLengthType_e;
283 
284 typedef struct {
285     seqDef* sequencesStart;
286     seqDef* sequences;      /* ptr to end of sequences */
287     BYTE* litStart;
288     BYTE* lit;              /* ptr to end of literals */
289     BYTE* llCode;
290     BYTE* mlCode;
291     BYTE* ofCode;
292     size_t maxNbSeq;
293     size_t maxNbLit;
294 
295     /* longLengthPos and longLengthType to allow us to represent either a single litLength or matchLength
296      * in the seqStore that has a value larger than U16 (if it exists). To do so, we increment
297      * the existing value of the litLength or matchLength by 0x10000.
298      */
299     ZSTD_longLengthType_e   longLengthType;
300     U32                     longLengthPos;  /* Index of the sequence to apply long length modification to */
301 } seqStore_t;
302 
303 typedef struct {
304     U32 litLength;
305     U32 matchLength;
306 } ZSTD_sequenceLength;
307 
308 /*
309  * Returns the ZSTD_sequenceLength for the given sequences. It handles the decoding of long sequences
310  * indicated by longLengthPos and longLengthType, and adds MINMATCH back to matchLength.
311  */
ZSTD_getSequenceLength(seqStore_t const * seqStore,seqDef const * seq)312 MEM_STATIC ZSTD_sequenceLength ZSTD_getSequenceLength(seqStore_t const* seqStore, seqDef const* seq)
313 {
314     ZSTD_sequenceLength seqLen;
315     seqLen.litLength = seq->litLength;
316     seqLen.matchLength = seq->mlBase + MINMATCH;
317     if (seqStore->longLengthPos == (U32)(seq - seqStore->sequencesStart)) {
318         if (seqStore->longLengthType == ZSTD_llt_literalLength) {
319             seqLen.litLength += 0xFFFF;
320         }
321         if (seqStore->longLengthType == ZSTD_llt_matchLength) {
322             seqLen.matchLength += 0xFFFF;
323         }
324     }
325     return seqLen;
326 }
327 
328 /*
329  * Contains the compressed frame size and an upper-bound for the decompressed frame size.
330  * Note: before using `compressedSize`, check for errors using ZSTD_isError().
331  *       similarly, before using `decompressedBound`, check for errors using:
332  *          `decompressedBound != ZSTD_CONTENTSIZE_ERROR`
333  */
334 typedef struct {
335     size_t compressedSize;
336     unsigned long long decompressedBound;
337 } ZSTD_frameSizeInfo;   /* decompress & legacy */
338 
339 const seqStore_t* ZSTD_getSeqStore(const ZSTD_CCtx* ctx);   /* compress & dictBuilder */
340 void ZSTD_seqToCodes(const seqStore_t* seqStorePtr);   /* compress, dictBuilder, decodeCorpus (shouldn't get its definition from here) */
341 
342 /* custom memory allocation functions */
343 void* ZSTD_customMalloc(size_t size, ZSTD_customMem customMem);
344 void* ZSTD_customCalloc(size_t size, ZSTD_customMem customMem);
345 void ZSTD_customFree(void* ptr, ZSTD_customMem customMem);
346 
ZSTD_highbit32(U32 val)347 MEM_STATIC U32 ZSTD_highbit32(U32 val)   /* compress, dictBuilder, decodeCorpus */
348 {
349     assert(val != 0);
350     {
351 #   if (__GNUC__ >= 3)   /* GCC Intrinsic */
352         return __builtin_clz (val) ^ 31;
353 #   else   /* Software version */
354         static const U32 DeBruijnClz[32] = { 0, 9, 1, 10, 13, 21, 2, 29, 11, 14, 16, 18, 22, 25, 3, 30, 8, 12, 20, 28, 15, 17, 24, 7, 19, 27, 23, 6, 26, 5, 4, 31 };
355         U32 v = val;
356         v |= v >> 1;
357         v |= v >> 2;
358         v |= v >> 4;
359         v |= v >> 8;
360         v |= v >> 16;
361         return DeBruijnClz[(v * 0x07C4ACDDU) >> 27];
362 #   endif
363     }
364 }
365 
366 /*
367  * Counts the number of trailing zeros of a `size_t`.
368  * Most compilers should support CTZ as a builtin. A backup
369  * implementation is provided if the builtin isn't supported, but
370  * it may not be terribly efficient.
371  */
ZSTD_countTrailingZeros(size_t val)372 MEM_STATIC unsigned ZSTD_countTrailingZeros(size_t val)
373 {
374     if (MEM_64bits()) {
375 #       if (__GNUC__ >= 4)
376             return __builtin_ctzll((U64)val);
377 #       else
378             static const int DeBruijnBytePos[64] = {  0,  1,  2,  7,  3, 13,  8, 19,
379                                                       4, 25, 14, 28,  9, 34, 20, 56,
380                                                       5, 17, 26, 54, 15, 41, 29, 43,
381                                                       10, 31, 38, 35, 21, 45, 49, 57,
382                                                       63,  6, 12, 18, 24, 27, 33, 55,
383                                                       16, 53, 40, 42, 30, 37, 44, 48,
384                                                       62, 11, 23, 32, 52, 39, 36, 47,
385                                                       61, 22, 51, 46, 60, 50, 59, 58 };
386             return DeBruijnBytePos[((U64)((val & -(long long)val) * 0x0218A392CDABBD3FULL)) >> 58];
387 #       endif
388     } else { /* 32 bits */
389 #       if (__GNUC__ >= 3)
390             return __builtin_ctz((U32)val);
391 #       else
392             static const int DeBruijnBytePos[32] = {  0,  1, 28,  2, 29, 14, 24,  3,
393                                                      30, 22, 20, 15, 25, 17,  4,  8,
394                                                      31, 27, 13, 23, 21, 19, 16,  7,
395                                                      26, 12, 18,  6, 11,  5, 10,  9 };
396             return DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >> 27];
397 #       endif
398     }
399 }
400 
401 /* ZSTD_invalidateRepCodes() :
402  * ensures next compression will not use repcodes from previous block.
403  * Note : only works with regular variant;
404  *        do not use with extDict variant ! */
405 void ZSTD_invalidateRepCodes(ZSTD_CCtx* cctx);   /* zstdmt, adaptive_compression (shouldn't get this definition from here) */
406 
407 typedef struct {
408     blockType_e blockType;
409     U32 lastBlock;
410     U32 origSize;
411 } blockProperties_t;   /* declared here for decompress and fullbench */
412 
413 /*! ZSTD_getcBlockSize() :
414  *  Provides the size of compressed block from block header `src` */
415 /* Used by: decompress, fullbench (does not get its definition from here) */
416 size_t ZSTD_getcBlockSize(const void* src, size_t srcSize,
417                           blockProperties_t* bpPtr);
418 
419 /*! ZSTD_decodeSeqHeaders() :
420  *  decode sequence header from src */
421 /* Used by: decompress, fullbench (does not get its definition from here) */
422 size_t ZSTD_decodeSeqHeaders(ZSTD_DCtx* dctx, int* nbSeqPtr,
423                        const void* src, size_t srcSize);
424 
425 /*
426  * @returns true iff the CPU supports dynamic BMI2 dispatch.
427  */
ZSTD_cpuSupportsBmi2(void)428 MEM_STATIC int ZSTD_cpuSupportsBmi2(void)
429 {
430     ZSTD_cpuid_t cpuid = ZSTD_cpuid();
431     return ZSTD_cpuid_bmi1(cpuid) && ZSTD_cpuid_bmi2(cpuid);
432 }
433 
434 #endif   /* ZSTD_CCOMMON_H_MODULE */
435