1 /* gzread.c -- zlib functions for reading gzip files
2  * Copyright (C) 2004, 2005, 2010 Mark Adler
3  * For conditions of distribution and use, see copyright notice in zlib.h
4  */
5 
6 #include "gzguts.h"
7 
8 /* Local functions */
9 local int gz_load OF((gz_statep, unsigned char *, unsigned, unsigned *));
10 local int gz_avail OF((gz_statep));
11 local int gz_next4 OF((gz_statep, unsigned long *));
12 local int gz_head OF((gz_statep));
13 local int gz_decomp OF((gz_statep));
14 local int gz_make OF((gz_statep));
15 local int gz_skip OF((gz_statep, z_off64_t));
16 
17 /* Use read() to load a buffer -- return -1 on error, otherwise 0.  Read from
18    state->fd, and update state->eof, state->err, and state->msg as appropriate.
19    This function needs to loop on read(), since read() is not guaranteed to
20    read the number of bytes requested, depending on the type of descriptor. */
gz_load(state,buf,len,have)21 local int gz_load(state, buf, len, have)
22     gz_statep state;
23     unsigned char *buf;
24     unsigned len;
25     unsigned *have;
26 {
27     int ret;
28 
29     *have = 0;
30     do {
31         ret = read(state->fd, buf + *have, len - *have);
32         if (ret <= 0)
33             break;
34         *have += ret;
35     } while (*have < len);
36     if (ret < 0) {
37         gz_error(state, Z_ERRNO, zstrerror());
38         return -1;
39     }
40     if (ret == 0)
41         state->eof = 1;
42     return 0;
43 }
44 
45 /* Load up input buffer and set eof flag if last data loaded -- return -1 on
46    error, 0 otherwise.  Note that the eof flag is set when the end of the input
47    file is reached, even though there may be unused data in the buffer.  Once
48    that data has been used, no more attempts will be made to read the file.
49    gz_avail() assumes that strm->avail_in == 0. */
gz_avail(state)50 local int gz_avail(state)
51     gz_statep state;
52 {
53     z_streamp strm = &(state->strm);
54 
55     if (state->err != Z_OK)
56         return -1;
57     if (state->eof == 0) {
58         if (gz_load(state, state->in, state->size,
59                 (unsigned *)&(strm->avail_in)) == -1)
60             return -1;
61         strm->next_in = state->in;
62     }
63     return 0;
64 }
65 
66 /* Get next byte from input, or -1 if end or error. */
67 #define NEXT() ((strm->avail_in == 0 && gz_avail(state) == -1) ? -1 : \
68                 (strm->avail_in == 0 ? -1 : \
69                  (strm->avail_in--, *(strm->next_in)++)))
70 
71 /* Get a four-byte little-endian integer and return 0 on success and the value
72    in *ret.  Otherwise -1 is returned and *ret is not modified. */
gz_next4(state,ret)73 local int gz_next4(state, ret)
74     gz_statep state;
75     unsigned long *ret;
76 {
77     int ch;
78     unsigned long val;
79     z_streamp strm = &(state->strm);
80 
81     val = NEXT();
82     val += (unsigned)NEXT() << 8;
83     val += (unsigned long)NEXT() << 16;
84     ch = NEXT();
85     if (ch == -1)
86         return -1;
87     val += (unsigned long)ch << 24;
88     *ret = val;
89     return 0;
90 }
91 
92 /* Look for gzip header, set up for inflate or copy.  state->have must be zero.
93    If this is the first time in, allocate required memory.  state->how will be
94    left unchanged if there is no more input data available, will be set to COPY
95    if there is no gzip header and direct copying will be performed, or it will
96    be set to GZIP for decompression, and the gzip header will be skipped so
97    that the next available input data is the raw deflate stream.  If direct
98    copying, then leftover input data from the input buffer will be copied to
99    the output buffer.  In that case, all further file reads will be directly to
100    either the output buffer or a user buffer.  If decompressing, the inflate
101    state and the check value will be initialized.  gz_head() will return 0 on
102    success or -1 on failure.  Failures may include read errors or gzip header
103    errors.  */
gz_head(state)104 local int gz_head(state)
105     gz_statep state;
106 {
107     z_streamp strm = &(state->strm);
108     int flags;
109     unsigned len;
110 
111     /* allocate read buffers and inflate memory */
112     if (state->size == 0) {
113         /* allocate buffers */
114         state->in = malloc(state->want);
115         state->out = malloc(state->want << 1);
116         if (state->in == NULL || state->out == NULL) {
117             if (state->out != NULL)
118                 free(state->out);
119             if (state->in != NULL)
120                 free(state->in);
121             gz_error(state, Z_MEM_ERROR, "out of memory");
122             return -1;
123         }
124         state->size = state->want;
125 
126         /* allocate inflate memory */
127         state->strm.zalloc = Z_NULL;
128         state->strm.zfree = Z_NULL;
129         state->strm.opaque = Z_NULL;
130         state->strm.avail_in = 0;
131         state->strm.next_in = Z_NULL;
132         if (inflateInit2(&(state->strm), -15) != Z_OK) {    /* raw inflate */
133             free(state->out);
134             free(state->in);
135             state->size = 0;
136             gz_error(state, Z_MEM_ERROR, "out of memory");
137             return -1;
138         }
139     }
140 
141     /* get some data in the input buffer */
142     if (strm->avail_in == 0) {
143         if (gz_avail(state) == -1)
144             return -1;
145         if (strm->avail_in == 0)
146             return 0;
147     }
148 
149     /* look for the gzip magic header bytes 31 and 139 */
150     if (strm->next_in[0] == 31) {
151         strm->avail_in--;
152         strm->next_in++;
153         if (strm->avail_in == 0 && gz_avail(state) == -1)
154             return -1;
155         if (strm->avail_in && strm->next_in[0] == 139) {
156             /* we have a gzip header, woo hoo! */
157             strm->avail_in--;
158             strm->next_in++;
159 
160             /* skip rest of header */
161             if (NEXT() != 8) {      /* compression method */
162                 gz_error(state, Z_DATA_ERROR, "unknown compression method");
163                 return -1;
164             }
165             flags = NEXT();
166             if (flags & 0xe0) {     /* reserved flag bits */
167                 gz_error(state, Z_DATA_ERROR, "unknown header flags set");
168                 return -1;
169             }
170             NEXT();                 /* modification time */
171             NEXT();
172             NEXT();
173             NEXT();
174             NEXT();                 /* extra flags */
175             NEXT();                 /* operating system */
176             if (flags & 4) {        /* extra field */
177                 len = (unsigned)NEXT();
178                 len += (unsigned)NEXT() << 8;
179                 while (len--)
180                     if (NEXT() < 0)
181                         break;
182             }
183             if (flags & 8)          /* file name */
184                 while (NEXT() > 0)
185                     ;
186             if (flags & 16)         /* comment */
187                 while (NEXT() > 0)
188                     ;
189             if (flags & 2) {        /* header crc */
190                 NEXT();
191                 NEXT();
192             }
193             /* an unexpected end of file is not checked for here -- it will be
194                noticed on the first request for uncompressed data */
195 
196             /* set up for decompression */
197             inflateReset(strm);
198             strm->adler = crc32(0L, Z_NULL, 0);
199             state->how = GZIP;
200             state->direct = 0;
201             return 0;
202         }
203         else {
204             /* not a gzip file -- save first byte (31) and fall to raw i/o */
205             state->out[0] = 31;
206             state->have = 1;
207         }
208     }
209 
210     /* doing raw i/o, save start of raw data for seeking, copy any leftover
211        input to output -- this assumes that the output buffer is larger than
212        the input buffer, which also assures space for gzungetc() */
213     state->raw = state->pos;
214     state->next = state->out;
215     if (strm->avail_in) {
216         memcpy(state->next + state->have, strm->next_in, strm->avail_in);
217         state->have += strm->avail_in;
218         strm->avail_in = 0;
219     }
220     state->how = COPY;
221     state->direct = 1;
222     return 0;
223 }
224 
225 /* Decompress from input to the provided next_out and avail_out in the state.
226    If the end of the compressed data is reached, then verify the gzip trailer
227    check value and length (modulo 2^32).  state->have and state->next are set
228    to point to the just decompressed data, and the crc is updated.  If the
229    trailer is verified, state->how is reset to LOOK to look for the next gzip
230    stream or raw data, once state->have is depleted.  Returns 0 on success, -1
231    on failure.  Failures may include invalid compressed data or a failed gzip
232    trailer verification. */
gz_decomp(state)233 local int gz_decomp(state)
234     gz_statep state;
235 {
236     int ret;
237     unsigned had;
238     unsigned long crc, len;
239     z_streamp strm = &(state->strm);
240 
241     /* fill output buffer up to end of deflate stream */
242     had = strm->avail_out;
243     do {
244         /* get more input for inflate() */
245         if (strm->avail_in == 0 && gz_avail(state) == -1)
246             return -1;
247         if (strm->avail_in == 0) {
248             gz_error(state, Z_DATA_ERROR, "unexpected end of file");
249             return -1;
250         }
251 
252         /* decompress and handle errors */
253         ret = inflate(strm, Z_NO_FLUSH);
254         if (ret == Z_STREAM_ERROR || ret == Z_NEED_DICT) {
255             gz_error(state, Z_STREAM_ERROR,
256                       "internal error: inflate stream corrupt");
257             return -1;
258         }
259         if (ret == Z_MEM_ERROR) {
260             gz_error(state, Z_MEM_ERROR, "out of memory");
261             return -1;
262         }
263         if (ret == Z_DATA_ERROR) {              /* deflate stream invalid */
264             gz_error(state, Z_DATA_ERROR,
265                       strm->msg == NULL ? "compressed data error" : strm->msg);
266             return -1;
267         }
268     } while (strm->avail_out && ret != Z_STREAM_END);
269 
270     /* update available output and crc check value */
271     state->have = had - strm->avail_out;
272     state->next = strm->next_out - state->have;
273     strm->adler = crc32(strm->adler, state->next, state->have);
274 
275     /* check gzip trailer if at end of deflate stream */
276     if (ret == Z_STREAM_END) {
277         if (gz_next4(state, &crc) == -1 || gz_next4(state, &len) == -1) {
278             gz_error(state, Z_DATA_ERROR, "unexpected end of file");
279             return -1;
280         }
281         if (crc != strm->adler) {
282             gz_error(state, Z_DATA_ERROR, "incorrect data check");
283             return -1;
284         }
285         if (len != (strm->total_out & 0xffffffffL)) {
286             gz_error(state, Z_DATA_ERROR, "incorrect length check");
287             return -1;
288         }
289         state->how = LOOK;      /* ready for next stream, once have is 0 (leave
290                                    state->direct unchanged to remember how) */
291     }
292 
293     /* good decompression */
294     return 0;
295 }
296 
297 /* Make data and put in the output buffer.  Assumes that state->have == 0.
298    Data is either copied from the input file or decompressed from the input
299    file depending on state->how.  If state->how is LOOK, then a gzip header is
300    looked for (and skipped if found) to determine wither to copy or decompress.
301    Returns -1 on error, otherwise 0.  gz_make() will leave state->have as COPY
302    or GZIP unless the end of the input file has been reached and all data has
303    been processed.  */
gz_make(state)304 local int gz_make(state)
305     gz_statep state;
306 {
307     z_streamp strm = &(state->strm);
308 
309     if (state->how == LOOK) {           /* look for gzip header */
310         if (gz_head(state) == -1)
311             return -1;
312         if (state->have)                /* got some data from gz_head() */
313             return 0;
314     }
315     if (state->how == COPY) {           /* straight copy */
316         if (gz_load(state, state->out, state->size << 1, &(state->have)) == -1)
317             return -1;
318         state->next = state->out;
319     }
320     else if (state->how == GZIP) {      /* decompress */
321         strm->avail_out = state->size << 1;
322         strm->next_out = state->out;
323         if (gz_decomp(state) == -1)
324             return -1;
325     }
326     return 0;
327 }
328 
329 /* Skip len uncompressed bytes of output.  Return -1 on error, 0 on success. */
gz_skip(state,len)330 local int gz_skip(state, len)
331     gz_statep state;
332     z_off64_t len;
333 {
334     unsigned n;
335 
336     /* skip over len bytes or reach end-of-file, whichever comes first */
337     while (len)
338         /* skip over whatever is in output buffer */
339         if (state->have) {
340             n = GT_OFF(state->have) || (z_off64_t)state->have > len ?
341                 (unsigned)len : state->have;
342             state->have -= n;
343             state->next += n;
344             state->pos += n;
345             len -= n;
346         }
347 
348         /* output buffer empty -- return if we're at the end of the input */
349         else if (state->eof && state->strm.avail_in == 0)
350             break;
351 
352         /* need more data to skip -- load up output buffer */
353         else {
354             /* get more output, looking for header if required */
355             if (gz_make(state) == -1)
356                 return -1;
357         }
358     return 0;
359 }
360 
361 /* -- see zlib.h -- */
gzread(file,buf,len)362 int ZEXPORT gzread(file, buf, len)
363     gzFile file;
364     voidp buf;
365     unsigned len;
366 {
367     unsigned got, n;
368     gz_statep state;
369     z_streamp strm;
370 
371     /* get internal structure */
372     if (file == NULL)
373         return -1;
374     state = (gz_statep)file;
375     strm = &(state->strm);
376 
377     /* check that we're reading and that there's no error */
378     if (state->mode != GZ_READ || state->err != Z_OK)
379         return -1;
380 
381     /* since an int is returned, make sure len fits in one, otherwise return
382        with an error (this avoids the flaw in the interface) */
383     if ((int)len < 0) {
384         gz_error(state, Z_BUF_ERROR, "requested length does not fit in int");
385         return -1;
386     }
387 
388     /* if len is zero, avoid unnecessary operations */
389     if (len == 0)
390         return 0;
391 
392     /* process a skip request */
393     if (state->seek) {
394         state->seek = 0;
395         if (gz_skip(state, state->skip) == -1)
396             return -1;
397     }
398 
399     /* get len bytes to buf, or less than len if at the end */
400     got = 0;
401     do {
402         /* first just try copying data from the output buffer */
403         if (state->have) {
404             n = state->have > len ? len : state->have;
405             memcpy(buf, state->next, n);
406             state->next += n;
407             state->have -= n;
408         }
409 
410         /* output buffer empty -- return if we're at the end of the input */
411         else if (state->eof && strm->avail_in == 0)
412             break;
413 
414         /* need output data -- for small len or new stream load up our output
415            buffer */
416         else if (state->how == LOOK || len < (state->size << 1)) {
417             /* get more output, looking for header if required */
418             if (gz_make(state) == -1)
419                 return -1;
420             continue;       /* no progress yet -- go back to memcpy() above */
421             /* the copy above assures that we will leave with space in the
422                output buffer, allowing at least one gzungetc() to succeed */
423         }
424 
425         /* large len -- read directly into user buffer */
426         else if (state->how == COPY) {      /* read directly */
427             if (gz_load(state, buf, len, &n) == -1)
428                 return -1;
429         }
430 
431         /* large len -- decompress directly into user buffer */
432         else {  /* state->how == GZIP */
433             strm->avail_out = len;
434             strm->next_out = buf;
435             if (gz_decomp(state) == -1)
436                 return -1;
437             n = state->have;
438             state->have = 0;
439         }
440 
441         /* update progress */
442         len -= n;
443         buf = (char *)buf + n;
444         got += n;
445         state->pos += n;
446     } while (len);
447 
448     /* return number of bytes read into user buffer (will fit in int) */
449     return (int)got;
450 }
451 
452 /* -- see zlib.h -- */
gzgetc(file)453 int ZEXPORT gzgetc(file)
454     gzFile file;
455 {
456     int ret;
457     unsigned char buf[1];
458     gz_statep state;
459 
460     /* get internal structure */
461     if (file == NULL)
462         return -1;
463     state = (gz_statep)file;
464 
465     /* check that we're reading and that there's no error */
466     if (state->mode != GZ_READ || state->err != Z_OK)
467         return -1;
468 
469     /* try output buffer (no need to check for skip request) */
470     if (state->have) {
471         state->have--;
472         state->pos++;
473         return *(state->next)++;
474     }
475 
476     /* nothing there -- try gzread() */
477     ret = gzread(file, buf, 1);
478     return ret < 1 ? -1 : buf[0];
479 }
480 
481 /* -- see zlib.h -- */
gzungetc(c,file)482 int ZEXPORT gzungetc(c, file)
483     int c;
484     gzFile file;
485 {
486     gz_statep state;
487 
488     /* get internal structure */
489     if (file == NULL)
490         return -1;
491     state = (gz_statep)file;
492 
493     /* check that we're reading and that there's no error */
494     if (state->mode != GZ_READ || state->err != Z_OK)
495         return -1;
496 
497     /* process a skip request */
498     if (state->seek) {
499         state->seek = 0;
500         if (gz_skip(state, state->skip) == -1)
501             return -1;
502     }
503 
504     /* can't push EOF */
505     if (c < 0)
506         return -1;
507 
508     /* if output buffer empty, put byte at end (allows more pushing) */
509     if (state->have == 0) {
510         state->have = 1;
511         state->next = state->out + (state->size << 1) - 1;
512         state->next[0] = c;
513         state->pos--;
514         return c;
515     }
516 
517     /* if no room, give up (must have already done a gzungetc()) */
518     if (state->have == (state->size << 1)) {
519         gz_error(state, Z_BUF_ERROR, "out of room to push characters");
520         return -1;
521     }
522 
523     /* slide output data if needed and insert byte before existing data */
524     if (state->next == state->out) {
525         unsigned char *src = state->out + state->have;
526         unsigned char *dest = state->out + (state->size << 1);
527         while (src > state->out)
528             *--dest = *--src;
529         state->next = dest;
530     }
531     state->have++;
532     state->next--;
533     state->next[0] = c;
534     state->pos--;
535     return c;
536 }
537 
538 /* -- see zlib.h -- */
gzgets(file,buf,len)539 char * ZEXPORT gzgets(file, buf, len)
540     gzFile file;
541     char *buf;
542     int len;
543 {
544     unsigned left, n;
545     char *str;
546     unsigned char *eol;
547     gz_statep state;
548 
549     /* check parameters and get internal structure */
550     if (file == NULL || buf == NULL || len < 1)
551         return NULL;
552     state = (gz_statep)file;
553 
554     /* check that we're reading and that there's no error */
555     if (state->mode != GZ_READ || state->err != Z_OK)
556         return NULL;
557 
558     /* process a skip request */
559     if (state->seek) {
560         state->seek = 0;
561         if (gz_skip(state, state->skip) == -1)
562             return NULL;
563     }
564 
565     /* copy output bytes up to new line or len - 1, whichever comes first --
566        append a terminating zero to the string (we don't check for a zero in
567        the contents, let the user worry about that) */
568     str = buf;
569     left = (unsigned)len - 1;
570     if (left) do {
571         /* assure that something is in the output buffer */
572         if (state->have == 0) {
573             if (gz_make(state) == -1)
574                 return NULL;            /* error */
575             if (state->have == 0) {     /* end of file */
576                 if (buf == str)         /* got bupkus */
577                     return NULL;
578                 break;                  /* got something -- return it */
579             }
580         }
581 
582         /* look for end-of-line in current output buffer */
583         n = state->have > left ? left : state->have;
584         eol = memchr(state->next, '\n', n);
585         if (eol != NULL)
586             n = (unsigned)(eol - state->next) + 1;
587 
588         /* copy through end-of-line, or remainder if not found */
589         memcpy(buf, state->next, n);
590         state->have -= n;
591         state->next += n;
592         state->pos += n;
593         left -= n;
594         buf += n;
595     } while (left && eol == NULL);
596 
597     /* found end-of-line or out of space -- terminate string and return it */
598     buf[0] = 0;
599     return str;
600 }
601 
602 /* -- see zlib.h -- */
gzdirect(file)603 int ZEXPORT gzdirect(file)
604     gzFile file;
605 {
606     gz_statep state;
607 
608     /* get internal structure */
609     if (file == NULL)
610         return 0;
611     state = (gz_statep)file;
612 
613     /* check that we're reading */
614     if (state->mode != GZ_READ)
615         return 0;
616 
617     /* if the state is not known, but we can find out, then do so (this is
618        mainly for right after a gzopen() or gzdopen()) */
619     if (state->how == LOOK && state->have == 0)
620         (void)gz_head(state);
621 
622     /* return 1 if reading direct, 0 if decompressing a gzip stream */
623     return state->direct;
624 }
625 
626 /* -- see zlib.h -- */
gzclose_r(file)627 int ZEXPORT gzclose_r(file)
628     gzFile file;
629 {
630     int ret;
631     gz_statep state;
632 
633     /* get internal structure */
634     if (file == NULL)
635         return Z_STREAM_ERROR;
636     state = (gz_statep)file;
637 
638     /* check that we're reading */
639     if (state->mode != GZ_READ)
640         return Z_STREAM_ERROR;
641 
642     /* free memory and close file */
643     if (state->size) {
644         inflateEnd(&(state->strm));
645         free(state->out);
646         free(state->in);
647     }
648     gz_error(state, Z_OK, NULL);
649     free(state->path);
650     ret = close(state->fd);
651     free(state);
652     return ret ? Z_ERRNO : Z_OK;
653 }
654