1 /*
2  * uzlib  -  tiny deflate/inflate library (deflate, gzip, zlib)
3  *
4  * Copyright (c) 2003 by Joergen Ibsen / Jibz
5  * All Rights Reserved
6  * http://www.ibsensoftware.com/
7  *
8  * Copyright (c) 2014-2018 by Paul Sokolovsky
9  *
10  * This software is provided 'as-is', without any express
11  * or implied warranty.  In no event will the authors be
12  * held liable for any damages arising from the use of
13  * this software.
14  *
15  * Permission is granted to anyone to use this software
16  * for any purpose, including commercial applications,
17  * and to alter it and redistribute it freely, subject to
18  * the following restrictions:
19  *
20  * 1. The origin of this software must not be
21  *    misrepresented; you must not claim that you
22  *    wrote the original software. If you use this
23  *    software in a product, an acknowledgment in
24  *    the product documentation would be appreciated
25  *    but is not required.
26  *
27  * 2. Altered source versions must be plainly marked
28  *    as such, and must not be misrepresented as
29  *    being the original software.
30  *
31  * 3. This notice may not be removed or altered from
32  *    any source distribution.
33  */
34 
35 #include <assert.h>
36 #include "tinf.h"
37 
38 #define UZLIB_DUMP_ARRAY(heading, arr, size) \
39     { \
40         printf("%s", heading); \
41         for (int i = 0; i < size; ++i) { \
42             printf(" %d", (arr)[i]); \
43         } \
44         printf("\n"); \
45     }
46 
47 uint32_t tinf_get_le_uint32(TINF_DATA *d);
48 uint32_t tinf_get_be_uint32(TINF_DATA *d);
49 
50 /* --------------------------------------------------- *
51  * -- uninitialized global data (static structures) -- *
52  * --------------------------------------------------- */
53 
54 #ifdef RUNTIME_BITS_TABLES
55 
56 /* extra bits and base tables for length codes */
57 unsigned char length_bits[30];
58 unsigned short length_base[30];
59 
60 /* extra bits and base tables for distance codes */
61 unsigned char dist_bits[30];
62 unsigned short dist_base[30];
63 
64 #else
65 
66 const unsigned char length_bits[30] = {
67    0, 0, 0, 0, 0, 0, 0, 0,
68    1, 1, 1, 1, 2, 2, 2, 2,
69    3, 3, 3, 3, 4, 4, 4, 4,
70    5, 5, 5, 5
71 };
72 const unsigned short length_base[30] = {
73    3, 4, 5, 6, 7, 8, 9, 10,
74    11, 13, 15, 17, 19, 23, 27, 31,
75    35, 43, 51, 59, 67, 83, 99, 115,
76    131, 163, 195, 227, 258
77 };
78 
79 const unsigned char dist_bits[30] = {
80    0, 0, 0, 0, 1, 1, 2, 2,
81    3, 3, 4, 4, 5, 5, 6, 6,
82    7, 7, 8, 8, 9, 9, 10, 10,
83    11, 11, 12, 12, 13, 13
84 };
85 const unsigned short dist_base[30] = {
86    1, 2, 3, 4, 5, 7, 9, 13,
87    17, 25, 33, 49, 65, 97, 129, 193,
88    257, 385, 513, 769, 1025, 1537, 2049, 3073,
89    4097, 6145, 8193, 12289, 16385, 24577
90 };
91 
92 #endif
93 
94 /* special ordering of code length codes */
95 const unsigned char clcidx[] = {
96    16, 17, 18, 0, 8, 7, 9, 6,
97    10, 5, 11, 4, 12, 3, 13, 2,
98    14, 1, 15
99 };
100 
101 /* ----------------------- *
102  * -- utility functions -- *
103  * ----------------------- */
104 
105 #ifdef RUNTIME_BITS_TABLES
106 /* build extra bits and base tables */
tinf_build_bits_base(unsigned char * bits,unsigned short * base,int delta,int first)107 static void tinf_build_bits_base(unsigned char *bits, unsigned short *base, int delta, int first)
108 {
109    int i, sum;
110 
111    /* build bits table */
112    for (i = 0; i < delta; ++i) bits[i] = 0;
113    for (i = 0; i < 30 - delta; ++i) bits[i + delta] = i / delta;
114 
115    /* build base table */
116    for (sum = first, i = 0; i < 30; ++i)
117    {
118       base[i] = sum;
119       sum += 1 << bits[i];
120    }
121 }
122 #endif
123 
124 /* build the fixed huffman trees */
tinf_build_fixed_trees(TINF_TREE * lt,TINF_TREE * dt)125 static void tinf_build_fixed_trees(TINF_TREE *lt, TINF_TREE *dt)
126 {
127    int i;
128 
129    /* build fixed length tree */
130    for (i = 0; i < 7; ++i) lt->table[i] = 0;
131 
132    lt->table[7] = 24;
133    lt->table[8] = 152;
134    lt->table[9] = 112;
135 
136    for (i = 0; i < 24; ++i) lt->trans[i] = 256 + i;
137    for (i = 0; i < 144; ++i) lt->trans[24 + i] = i;
138    for (i = 0; i < 8; ++i) lt->trans[24 + 144 + i] = 280 + i;
139    for (i = 0; i < 112; ++i) lt->trans[24 + 144 + 8 + i] = 144 + i;
140 
141    /* build fixed distance tree */
142    for (i = 0; i < 5; ++i) dt->table[i] = 0;
143 
144    dt->table[5] = 32;
145 
146    for (i = 0; i < 32; ++i) dt->trans[i] = i;
147 }
148 
149 /* given an array of code lengths, build a tree */
tinf_build_tree(TINF_TREE * t,const unsigned char * lengths,unsigned int num)150 static void tinf_build_tree(TINF_TREE *t, const unsigned char *lengths, unsigned int num)
151 {
152    unsigned short offs[16];
153    unsigned int i, sum;
154 
155    /* clear code length count table */
156    for (i = 0; i < 16; ++i) t->table[i] = 0;
157 
158    /* scan symbol lengths, and sum code length counts */
159    for (i = 0; i < num; ++i) t->table[lengths[i]]++;
160 
161    #if UZLIB_CONF_DEBUG_LOG >= 2
162    UZLIB_DUMP_ARRAY("codelen counts:", t->table, TINF_ARRAY_SIZE(t->table));
163    #endif
164 
165    /* In the lengths array, 0 means unused code. So, t->table[0] now contains
166       number of unused codes. But table's purpose is to contain # of codes of
167       particular length, and there're 0 codes of length 0. */
168    t->table[0] = 0;
169 
170    /* compute offset table for distribution sort */
171    for (sum = 0, i = 0; i < 16; ++i)
172    {
173       offs[i] = sum;
174       sum += t->table[i];
175    }
176 
177    #if UZLIB_CONF_DEBUG_LOG >= 2
178    UZLIB_DUMP_ARRAY("codelen offsets:", offs, TINF_ARRAY_SIZE(offs));
179    #endif
180 
181    /* create code->symbol translation table (symbols sorted by code) */
182    for (i = 0; i < num; ++i)
183    {
184       if (lengths[i]) t->trans[offs[lengths[i]]++] = i;
185    }
186 }
187 
188 /* ---------------------- *
189  * -- decode functions -- *
190  * ---------------------- */
191 
uzlib_get_byte(TINF_DATA * d)192 unsigned char uzlib_get_byte(TINF_DATA *d)
193 {
194     /* If end of source buffer is not reached, return next byte from source
195        buffer. */
196     if (d->source < d->source_limit) {
197         return *d->source++;
198     }
199 
200     /* Otherwise if there's callback and we haven't seen EOF yet, try to
201        read next byte using it. (Note: the callback can also update ->source
202        and ->source_limit). */
203     if (d->readSource && !d->eof) {
204         int val = d->readSource(d);
205         if (val >= 0) {
206             return (unsigned char)val;
207         }
208     }
209 
210     /* Otherwise, we hit EOF (either from ->readSource() or from exhaustion
211        of the buffer), and it will be "sticky", i.e. further calls to this
212        function will end up here too. */
213     d->eof = true;
214 
215     return 0;
216 }
217 
tinf_get_le_uint32(TINF_DATA * d)218 uint32_t tinf_get_le_uint32(TINF_DATA *d)
219 {
220     uint32_t val = 0;
221     int i;
222     for (i = 4; i--;) {
223         val = val >> 8 | ((uint32_t)uzlib_get_byte(d)) << 24;
224     }
225     return val;
226 }
227 
tinf_get_be_uint32(TINF_DATA * d)228 uint32_t tinf_get_be_uint32(TINF_DATA *d)
229 {
230     uint32_t val = 0;
231     int i;
232     for (i = 4; i--;) {
233         val = val << 8 | uzlib_get_byte(d);
234     }
235     return val;
236 }
237 
238 /* get one bit from source stream */
tinf_getbit(TINF_DATA * d)239 static int tinf_getbit(TINF_DATA *d)
240 {
241    unsigned int bit;
242 
243    /* check if tag is empty */
244    if (!d->bitcount--)
245    {
246       /* load next tag */
247       d->tag = uzlib_get_byte(d);
248       d->bitcount = 7;
249    }
250 
251    /* shift bit out of tag */
252    bit = d->tag & 0x01;
253    d->tag >>= 1;
254 
255    return bit;
256 }
257 
258 /* read a num bit value from a stream and add base */
tinf_read_bits(TINF_DATA * d,int num,int base)259 static unsigned int tinf_read_bits(TINF_DATA *d, int num, int base)
260 {
261    unsigned int val = 0;
262 
263    /* read num bits */
264    if (num)
265    {
266       unsigned int limit = 1 << (num);
267       unsigned int mask;
268 
269       for (mask = 1; mask < limit; mask *= 2)
270          if (tinf_getbit(d)) val += mask;
271    }
272 
273    return val + base;
274 }
275 
276 /* given a data stream and a tree, decode a symbol */
tinf_decode_symbol(TINF_DATA * d,TINF_TREE * t)277 static int tinf_decode_symbol(TINF_DATA *d, TINF_TREE *t)
278 {
279    int sum = 0, cur = 0, len = 0;
280 
281    /* get more bits while code value is above sum */
282    do {
283 
284       cur = 2*cur + tinf_getbit(d);
285 
286       if (++len == TINF_ARRAY_SIZE(t->table)) {
287          return TINF_DATA_ERROR;
288       }
289 
290       sum += t->table[len];
291       cur -= t->table[len];
292 
293    } while (cur >= 0);
294 
295    sum += cur;
296    #if UZLIB_CONF_PARANOID_CHECKS
297    if (sum < 0 || sum >= TINF_ARRAY_SIZE(t->trans)) {
298       return TINF_DATA_ERROR;
299    }
300    #endif
301 
302    return t->trans[sum];
303 }
304 
305 /* given a data stream, decode dynamic trees from it */
tinf_decode_trees(TINF_DATA * d,TINF_TREE * lt,TINF_TREE * dt)306 static int tinf_decode_trees(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt)
307 {
308    /* code lengths for 288 literal/len symbols and 32 dist symbols */
309    unsigned char lengths[288+32];
310    unsigned int hlit, hdist, hclen, hlimit;
311    unsigned int i, num, length;
312 
313    /* get 5 bits HLIT (257-286) */
314    hlit = tinf_read_bits(d, 5, 257);
315 
316    /* get 5 bits HDIST (1-32) */
317    hdist = tinf_read_bits(d, 5, 1);
318 
319    /* get 4 bits HCLEN (4-19) */
320    hclen = tinf_read_bits(d, 4, 4);
321 
322    for (i = 0; i < 19; ++i) lengths[i] = 0;
323 
324    /* read code lengths for code length alphabet */
325    for (i = 0; i < hclen; ++i)
326    {
327       /* get 3 bits code length (0-7) */
328       unsigned int clen = tinf_read_bits(d, 3, 0);
329 
330       lengths[clcidx[i]] = clen;
331    }
332 
333    /* build code length tree, temporarily use length tree */
334    tinf_build_tree(lt, lengths, 19);
335 
336    /* decode code lengths for the dynamic trees */
337    hlimit = hlit + hdist;
338    for (num = 0; num < hlimit; )
339    {
340       int sym = tinf_decode_symbol(d, lt);
341       unsigned char fill_value = 0;
342       int lbits, lbase = 3;
343 
344       /* error decoding */
345       if (sym < 0) return sym;
346 
347       switch (sym)
348       {
349       case 16:
350          /* copy previous code length 3-6 times (read 2 bits) */
351          if (num == 0) return TINF_DATA_ERROR;
352          fill_value = lengths[num - 1];
353          lbits = 2;
354          break;
355       case 17:
356          /* repeat code length 0 for 3-10 times (read 3 bits) */
357          lbits = 3;
358          break;
359       case 18:
360          /* repeat code length 0 for 11-138 times (read 7 bits) */
361          lbits = 7;
362          lbase = 11;
363          break;
364       default:
365          /* values 0-15 represent the actual code lengths */
366          lengths[num++] = sym;
367          /* continue the for loop */
368          continue;
369       }
370 
371       /* special code length 16-18 are handled here */
372       length = tinf_read_bits(d, lbits, lbase);
373       if (num + length > hlimit) return TINF_DATA_ERROR;
374       for (; length; --length)
375       {
376          lengths[num++] = fill_value;
377       }
378    }
379 
380    #if UZLIB_CONF_DEBUG_LOG >= 2
381    printf("lit code lengths (%d):", hlit);
382    UZLIB_DUMP_ARRAY("", lengths, hlit);
383    printf("dist code lengths (%d):", hdist);
384    UZLIB_DUMP_ARRAY("", lengths + hlit, hdist);
385    #endif
386 
387    #if UZLIB_CONF_PARANOID_CHECKS
388    /* Check that there's "end of block" symbol */
389    if (lengths[256] == 0) {
390       return TINF_DATA_ERROR;
391    }
392    #endif
393 
394    /* build dynamic trees */
395    tinf_build_tree(lt, lengths, hlit);
396    tinf_build_tree(dt, lengths + hlit, hdist);
397 
398    return TINF_OK;
399 }
400 
401 /* ----------------------------- *
402  * -- block inflate functions -- *
403  * ----------------------------- */
404 
405 /* given a stream and two trees, inflate next byte of output */
tinf_inflate_block_data(TINF_DATA * d,TINF_TREE * lt,TINF_TREE * dt)406 static int tinf_inflate_block_data(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt)
407 {
408     if (d->curlen == 0) {
409         unsigned int offs;
410         int dist;
411         int sym = tinf_decode_symbol(d, lt);
412         //printf("huff sym: %02x\n", sym);
413 
414         if (d->eof) {
415             return TINF_DATA_ERROR;
416         }
417 
418         /* literal byte */
419         if (sym < 256) {
420             TINF_PUT(d, sym);
421             return TINF_OK;
422         }
423 
424         /* end of block */
425         if (sym == 256) {
426             return TINF_DONE;
427         }
428 
429         /* substring from sliding dictionary */
430         sym -= 257;
431         if (sym >= 29) {
432             return TINF_DATA_ERROR;
433         }
434 
435         /* possibly get more bits from length code */
436         d->curlen = tinf_read_bits(d, length_bits[sym], length_base[sym]);
437 
438         dist = tinf_decode_symbol(d, dt);
439         if (dist >= 30) {
440             return TINF_DATA_ERROR;
441         }
442 
443         /* possibly get more bits from distance code */
444         offs = tinf_read_bits(d, dist_bits[dist], dist_base[dist]);
445 
446         /* calculate and validate actual LZ offset to use */
447         if (d->dict_ring) {
448             if (offs > d->dict_size) {
449                 return TINF_DICT_ERROR;
450             }
451             /* Note: unlike full-dest-in-memory case below, we don't
452                try to catch offset which points to not yet filled
453                part of the dictionary here. Doing so would require
454                keeping another variable to track "filled in" size
455                of the dictionary. Appearance of such an offset cannot
456                lead to accessing memory outside of the dictionary
457                buffer, and clients which don't want to leak unrelated
458                information, should explicitly initialize dictionary
459                buffer passed to uzlib. */
460 
461             d->lzOff = d->dict_idx - offs;
462             if (d->lzOff < 0) {
463                 d->lzOff += d->dict_size;
464             }
465         } else {
466             /* catch trying to point before the start of dest buffer */
467             if (offs > (unsigned int)(d->dest - d->destStart)) {
468                 return TINF_DATA_ERROR;
469             }
470             d->lzOff = -offs;
471         }
472     }
473 
474     /* copy next byte from dict substring */
475     if (d->dict_ring) {
476         TINF_PUT(d, d->dict_ring[d->lzOff]);
477         if ((unsigned)++d->lzOff == d->dict_size) {
478             d->lzOff = 0;
479         }
480     } else {
481         d->dest[0] = d->dest[d->lzOff];
482         d->dest++;
483     }
484     d->curlen--;
485     return TINF_OK;
486 }
487 
488 /* inflate next byte from uncompressed block of data */
tinf_inflate_uncompressed_block(TINF_DATA * d)489 static int tinf_inflate_uncompressed_block(TINF_DATA *d)
490 {
491     if (d->curlen == 0) {
492         unsigned int length, invlength;
493 
494         /* get length */
495         length = uzlib_get_byte(d);
496         length += 256 * uzlib_get_byte(d);
497         /* get one's complement of length */
498         invlength = uzlib_get_byte(d);
499         invlength += 256 * uzlib_get_byte(d);
500         /* check length */
501         if (length != (~invlength & 0x0000ffff)) return TINF_DATA_ERROR;
502 
503         /* increment length to properly return TINF_DONE below, without
504            producing data at the same time */
505         d->curlen = length + 1;
506 
507         /* make sure we start next block on a byte boundary */
508         d->bitcount = 0;
509     }
510 
511     if (--d->curlen == 0) {
512         return TINF_DONE;
513     }
514 
515     unsigned char c = uzlib_get_byte(d);
516     TINF_PUT(d, c);
517     return TINF_OK;
518 }
519 
520 /* ---------------------- *
521  * -- public functions -- *
522  * ---------------------- */
523 
524 /* initialize global (static) data */
uzlib_init(void)525 void uzlib_init(void)
526 {
527 #ifdef RUNTIME_BITS_TABLES
528    /* build extra bits and base tables */
529    tinf_build_bits_base(length_bits, length_base, 4, 3);
530    tinf_build_bits_base(dist_bits, dist_base, 2, 1);
531 
532    /* fix a special case */
533    length_bits[28] = 0;
534    length_base[28] = 258;
535 #endif
536 }
537 
538 /* initialize decompression structure */
uzlib_uncompress_init(TINF_DATA * d,void * dict,unsigned int dictLen)539 void uzlib_uncompress_init(TINF_DATA *d, void *dict, unsigned int dictLen)
540 {
541    d->eof = 0;
542    d->bitcount = 0;
543    d->bfinal = 0;
544    d->btype = -1;
545    d->dict_size = dictLen;
546    d->dict_ring = dict;
547    d->dict_idx = 0;
548    d->curlen = 0;
549 }
550 
551 /* inflate next output bytes from compressed stream */
uzlib_uncompress(TINF_DATA * d)552 int uzlib_uncompress(TINF_DATA *d)
553 {
554     do {
555         int res;
556 
557         /* start a new block */
558         if (d->btype == -1) {
559 next_blk:
560             /* read final block flag */
561             d->bfinal = tinf_getbit(d);
562             /* read block type (2 bits) */
563             d->btype = tinf_read_bits(d, 2, 0);
564 
565             #if UZLIB_CONF_DEBUG_LOG >= 1
566             printf("Started new block: type=%d final=%d\n", d->btype, d->bfinal);
567             #endif
568 
569             if (d->btype == 1) {
570                 /* build fixed huffman trees */
571                 tinf_build_fixed_trees(&d->ltree, &d->dtree);
572             } else if (d->btype == 2) {
573                 /* decode trees from stream */
574                 res = tinf_decode_trees(d, &d->ltree, &d->dtree);
575                 if (res != TINF_OK) {
576                     return res;
577                 }
578             }
579         }
580 
581         /* process current block */
582         switch (d->btype)
583         {
584         case 0:
585             /* decompress uncompressed block */
586             res = tinf_inflate_uncompressed_block(d);
587             break;
588         case 1:
589         case 2:
590             /* decompress block with fixed/dynamic huffman trees */
591             /* trees were decoded previously, so it's the same routine for both */
592             res = tinf_inflate_block_data(d, &d->ltree, &d->dtree);
593             break;
594         default:
595             return TINF_DATA_ERROR;
596         }
597 
598         if (res == TINF_DONE && !d->bfinal) {
599             /* the block has ended (without producing more data), but we
600                can't return without data, so start procesing next block */
601             goto next_blk;
602         }
603 
604         if (res != TINF_OK) {
605             return res;
606         }
607 
608     } while (d->dest < d->dest_limit);
609 
610     return TINF_OK;
611 }
612 
613 /* inflate next output bytes from compressed stream, updating
614    checksum, and at the end of stream, verify it */
uzlib_uncompress_chksum(TINF_DATA * d)615 int uzlib_uncompress_chksum(TINF_DATA *d)
616 {
617     int res;
618     unsigned char *data = d->dest;
619 
620     res = uzlib_uncompress(d);
621 
622     if (res < 0) return res;
623 
624     switch (d->checksum_type) {
625 
626     case TINF_CHKSUM_ADLER:
627         d->checksum = uzlib_adler32(data, d->dest - data, d->checksum);
628         break;
629 
630     case TINF_CHKSUM_CRC:
631         d->checksum = uzlib_crc32(data, d->dest - data, d->checksum);
632         break;
633     }
634 
635     if (res == TINF_DONE) {
636         unsigned int val;
637 
638         switch (d->checksum_type) {
639 
640         case TINF_CHKSUM_ADLER:
641             val = tinf_get_be_uint32(d);
642             if (d->checksum != val) {
643                 return TINF_CHKSUM_ERROR;
644             }
645             break;
646 
647         case TINF_CHKSUM_CRC:
648             val = tinf_get_le_uint32(d);
649             if (~d->checksum != val) {
650                 return TINF_CHKSUM_ERROR;
651             }
652             // Uncompressed size. TODO: Check
653             val = tinf_get_le_uint32(d);
654             break;
655         }
656     }
657 
658     return res;
659 }
660