1 /*
2  * Copyright (c) Yann Collet, Facebook, Inc.
3  * All rights reserved.
4  *
5  * This source code is licensed under both the BSD-style license (found in the
6  * LICENSE file in the root directory of this source tree) and the GPLv2 (found
7  * in the COPYING file in the root directory of this source tree).
8  * You may select, at your option, one of the above-listed licenses.
9  */
10 
11 /* zstd_ddict.c :
12  * concentrates all logic that needs to know the internals of ZSTD_DDict object */
13 
14 /*-*******************************************************
15 *  Dependencies
16 *********************************************************/
17 #include "../common/zstd_deps.h"   /* ZSTD_memcpy, ZSTD_memmove, ZSTD_memset */
18 #include "../common/cpu.h"         /* bmi2 */
19 #include "../common/mem.h"         /* low level memory routines */
20 #define FSE_STATIC_LINKING_ONLY
21 #include "../common/fse.h"
22 #define HUF_STATIC_LINKING_ONLY
23 #include "../common/huf.h"
24 #include "zstd_decompress_internal.h"
25 #include "zstd_ddict.h"
26 
27 /*-*******************************************************
28 *  Types
29 *********************************************************/
30 struct ZSTD_DDict_s {
31     void* dictBuffer;
32     const void* dictContent;
33     size_t dictSize;
34     ZSTD_entropyDTables_t entropy;
35     U32 dictID;
36     U32 entropyPresent;
37     ZSTD_customMem cMem;
38 };  /* typedef'd to ZSTD_DDict within "zstd.h" */
39 
ZSTD_DDict_dictContent(const ZSTD_DDict * ddict)40 const void* ZSTD_DDict_dictContent(const ZSTD_DDict* ddict)
41 {
42     assert(ddict != NULL);
43     return ddict->dictContent;
44 }
45 
ZSTD_DDict_dictSize(const ZSTD_DDict * ddict)46 size_t ZSTD_DDict_dictSize(const ZSTD_DDict* ddict)
47 {
48     assert(ddict != NULL);
49     return ddict->dictSize;
50 }
51 
ZSTD_copyDDictParameters(ZSTD_DCtx * dctx,const ZSTD_DDict * ddict)52 void ZSTD_copyDDictParameters(ZSTD_DCtx* dctx, const ZSTD_DDict* ddict)
53 {
54     DEBUGLOG(4, "ZSTD_copyDDictParameters");
55     assert(dctx != NULL);
56     assert(ddict != NULL);
57     dctx->dictID = ddict->dictID;
58     dctx->prefixStart = ddict->dictContent;
59     dctx->virtualStart = ddict->dictContent;
60     dctx->dictEnd = (const BYTE*)ddict->dictContent + ddict->dictSize;
61     dctx->previousDstEnd = dctx->dictEnd;
62 #ifdef FUZZING_BUILD_MODE_UNSAFE_FOR_PRODUCTION
63     dctx->dictContentBeginForFuzzing = dctx->prefixStart;
64     dctx->dictContentEndForFuzzing = dctx->previousDstEnd;
65 #endif
66     if (ddict->entropyPresent) {
67         dctx->litEntropy = 1;
68         dctx->fseEntropy = 1;
69         dctx->LLTptr = ddict->entropy.LLTable;
70         dctx->MLTptr = ddict->entropy.MLTable;
71         dctx->OFTptr = ddict->entropy.OFTable;
72         dctx->HUFptr = ddict->entropy.hufTable;
73         dctx->entropy.rep[0] = ddict->entropy.rep[0];
74         dctx->entropy.rep[1] = ddict->entropy.rep[1];
75         dctx->entropy.rep[2] = ddict->entropy.rep[2];
76     } else {
77         dctx->litEntropy = 0;
78         dctx->fseEntropy = 0;
79     }
80 }
81 
82 static size_t
ZSTD_loadEntropy_intoDDict(ZSTD_DDict * ddict,ZSTD_dictContentType_e dictContentType)83 ZSTD_loadEntropy_intoDDict(ZSTD_DDict* ddict,
84                            ZSTD_dictContentType_e dictContentType)
85 {
86     ddict->dictID = 0;
87     ddict->entropyPresent = 0;
88     if (dictContentType == ZSTD_dct_rawContent) return 0;
89 
90     if (ddict->dictSize < 8) {
91         if (dictContentType == ZSTD_dct_fullDict)
92             return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
93         return 0;   /* pure content mode */
94     }
95     {   U32 const magic = MEM_readLE32(ddict->dictContent);
96         if (magic != ZSTD_MAGIC_DICTIONARY) {
97             if (dictContentType == ZSTD_dct_fullDict)
98                 return ERROR(dictionary_corrupted);   /* only accept specified dictionaries */
99             return 0;   /* pure content mode */
100         }
101     }
102     ddict->dictID = MEM_readLE32((const char*)ddict->dictContent + ZSTD_FRAMEIDSIZE);
103 
104     /* load entropy tables */
105     RETURN_ERROR_IF(ZSTD_isError(ZSTD_loadDEntropy(
106             &ddict->entropy, ddict->dictContent, ddict->dictSize)),
107         dictionary_corrupted, "");
108     ddict->entropyPresent = 1;
109     return 0;
110 }
111 
ZSTD_initDDict_internal(ZSTD_DDict * ddict,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)112 static size_t ZSTD_initDDict_internal(ZSTD_DDict* ddict,
113                                       const void* dict, size_t dictSize,
114                                       ZSTD_dictLoadMethod_e dictLoadMethod,
115                                       ZSTD_dictContentType_e dictContentType)
116 {
117     if ((dictLoadMethod == ZSTD_dlm_byRef) || (!dict) || (!dictSize)) {
118         ddict->dictBuffer = NULL;
119         ddict->dictContent = dict;
120         if (!dict) dictSize = 0;
121     } else {
122         void* const internalBuffer = ZSTD_customMalloc(dictSize, ddict->cMem);
123         ddict->dictBuffer = internalBuffer;
124         ddict->dictContent = internalBuffer;
125         if (!internalBuffer) return ERROR(memory_allocation);
126         ZSTD_memcpy(internalBuffer, dict, dictSize);
127     }
128     ddict->dictSize = dictSize;
129     ddict->entropy.hufTable[0] = (HUF_DTable)((HufLog)*0x1000001);  /* cover both little and big endian */
130 
131     /* parse dictionary content */
132     FORWARD_IF_ERROR( ZSTD_loadEntropy_intoDDict(ddict, dictContentType) , "");
133 
134     return 0;
135 }
136 
ZSTD_createDDict_advanced(const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType,ZSTD_customMem customMem)137 ZSTD_DDict* ZSTD_createDDict_advanced(const void* dict, size_t dictSize,
138                                       ZSTD_dictLoadMethod_e dictLoadMethod,
139                                       ZSTD_dictContentType_e dictContentType,
140                                       ZSTD_customMem customMem)
141 {
142     if ((!customMem.customAlloc) ^ (!customMem.customFree)) return NULL;
143 
144     {   ZSTD_DDict* const ddict = (ZSTD_DDict*) ZSTD_customMalloc(sizeof(ZSTD_DDict), customMem);
145         if (ddict == NULL) return NULL;
146         ddict->cMem = customMem;
147         {   size_t const initResult = ZSTD_initDDict_internal(ddict,
148                                             dict, dictSize,
149                                             dictLoadMethod, dictContentType);
150             if (ZSTD_isError(initResult)) {
151                 ZSTD_freeDDict(ddict);
152                 return NULL;
153         }   }
154         return ddict;
155     }
156 }
157 
158 /*! ZSTD_createDDict() :
159 *   Create a digested dictionary, to start decompression without startup delay.
160 *   `dict` content is copied inside DDict.
161 *   Consequently, `dict` can be released after `ZSTD_DDict` creation */
ZSTD_createDDict(const void * dict,size_t dictSize)162 ZSTD_DDict* ZSTD_createDDict(const void* dict, size_t dictSize)
163 {
164     ZSTD_customMem const allocator = { NULL, NULL, NULL };
165     return ZSTD_createDDict_advanced(dict, dictSize, ZSTD_dlm_byCopy, ZSTD_dct_auto, allocator);
166 }
167 
168 /*! ZSTD_createDDict_byReference() :
169  *  Create a digested dictionary, to start decompression without startup delay.
170  *  Dictionary content is simply referenced, it will be accessed during decompression.
171  *  Warning : dictBuffer must outlive DDict (DDict must be freed before dictBuffer) */
ZSTD_createDDict_byReference(const void * dictBuffer,size_t dictSize)172 ZSTD_DDict* ZSTD_createDDict_byReference(const void* dictBuffer, size_t dictSize)
173 {
174     ZSTD_customMem const allocator = { NULL, NULL, NULL };
175     return ZSTD_createDDict_advanced(dictBuffer, dictSize, ZSTD_dlm_byRef, ZSTD_dct_auto, allocator);
176 }
177 
ZSTD_initStaticDDict(void * sBuffer,size_t sBufferSize,const void * dict,size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod,ZSTD_dictContentType_e dictContentType)178 const ZSTD_DDict* ZSTD_initStaticDDict(
179                                 void* sBuffer, size_t sBufferSize,
180                                 const void* dict, size_t dictSize,
181                                 ZSTD_dictLoadMethod_e dictLoadMethod,
182                                 ZSTD_dictContentType_e dictContentType)
183 {
184     size_t const neededSpace = sizeof(ZSTD_DDict)
185                              + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
186     ZSTD_DDict* const ddict = (ZSTD_DDict*)sBuffer;
187     assert(sBuffer != NULL);
188     assert(dict != NULL);
189     if ((size_t)sBuffer & 7) return NULL;   /* 8-aligned */
190     if (sBufferSize < neededSpace) return NULL;
191     if (dictLoadMethod == ZSTD_dlm_byCopy) {
192         ZSTD_memcpy(ddict+1, dict, dictSize);  /* local copy */
193         dict = ddict+1;
194     }
195     if (ZSTD_isError( ZSTD_initDDict_internal(ddict,
196                                               dict, dictSize,
197                                               ZSTD_dlm_byRef, dictContentType) ))
198         return NULL;
199     return ddict;
200 }
201 
ZSTD_freeDDict(ZSTD_DDict * ddict)202 size_t ZSTD_freeDDict(ZSTD_DDict* ddict)
203 {
204     if (ddict==NULL) return 0;   /* support free on NULL */
205     {   ZSTD_customMem const cMem = ddict->cMem;
206         ZSTD_customFree(ddict->dictBuffer, cMem);
207         ZSTD_customFree(ddict, cMem);
208         return 0;
209     }
210 }
211 
212 /*! ZSTD_estimateDDictSize() :
213  *  Estimate amount of memory that will be needed to create a dictionary for decompression.
214  *  Note : dictionary created by reference using ZSTD_dlm_byRef are smaller */
ZSTD_estimateDDictSize(size_t dictSize,ZSTD_dictLoadMethod_e dictLoadMethod)215 size_t ZSTD_estimateDDictSize(size_t dictSize, ZSTD_dictLoadMethod_e dictLoadMethod)
216 {
217     return sizeof(ZSTD_DDict) + (dictLoadMethod == ZSTD_dlm_byRef ? 0 : dictSize);
218 }
219 
ZSTD_sizeof_DDict(const ZSTD_DDict * ddict)220 size_t ZSTD_sizeof_DDict(const ZSTD_DDict* ddict)
221 {
222     if (ddict==NULL) return 0;   /* support sizeof on NULL */
223     return sizeof(*ddict) + (ddict->dictBuffer ? ddict->dictSize : 0) ;
224 }
225 
226 /*! ZSTD_getDictID_fromDDict() :
227  *  Provides the dictID of the dictionary loaded into `ddict`.
228  *  If @return == 0, the dictionary is not conformant to Zstandard specification, or empty.
229  *  Non-conformant dictionaries can still be loaded, but as content-only dictionaries. */
ZSTD_getDictID_fromDDict(const ZSTD_DDict * ddict)230 unsigned ZSTD_getDictID_fromDDict(const ZSTD_DDict* ddict)
231 {
232     if (ddict==NULL) return 0;
233     return ZSTD_getDictID_fromDict(ddict->dictContent, ddict->dictSize);
234 }
235