1 /*
2 * lzo.c -- LZO1X Compressor from LZO
3 *
4 * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com>
5 *
6 * The full LZO package can be found at:
7 * http://www.oberhumer.com/opensource/lzo/
8 *
9 * Adapted for Xen (files combined and syntactic/header changes) by:
10 * Dan Magenheimer <dan.magenheimer@oracle.com>
11 *
12 */
13
14 /*
15 * lzodefs.h -- architecture, OS and compiler specific defines
16 *
17 * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com>
18 *
19 * The full LZO package can be found at:
20 * http://www.oberhumer.com/opensource/lzo/
21 *
22 * Changed for Linux kernel use by:
23 * Nitin Gupta <nitingupta910@gmail.com>
24 * Richard Purdie <rpurdie@openedhand.com>
25 */
26
27
28 #define COPY4(dst, src) \
29 put_unaligned(get_unaligned((const u32 *)(src)), (u32 *)(dst))
30 #if defined(__x86_64__)
31 #define COPY8(dst, src) \
32 put_unaligned(get_unaligned((const u64 *)(src)), (u64 *)(dst))
33 #else
34 #define COPY8(dst, src) \
35 COPY4(dst, src); COPY4((dst) + 4, (src) + 4)
36 #endif
37
38 #ifdef __MINIOS__
39 # include <lib.h>
40 # if __BYTE_ORDER == __LITTLE_ENDIAN
41 # undef __BIG_ENDIAN
42 # endif
43 # if __BYTE_ORDER == __BIG_ENDIAN
44 # undef __LITTLE_ENDIAN
45 # endif
46 #endif
47
48 #if defined(__BIG_ENDIAN) && defined(__LITTLE_ENDIAN)
49 #error "conflicting endian definitions"
50 #elif defined(__x86_64__)
51 #define LZO_USE_CTZ64 1
52 #define LZO_USE_CTZ32 1
53 #elif defined(__i386__) || defined(__powerpc__)
54 #define LZO_USE_CTZ32 1
55 #elif defined(__arm__) && (__LINUX_ARM_ARCH__ >= 5)
56 #define LZO_USE_CTZ32 1
57 #endif
58
59 #define M1_MAX_OFFSET 0x0400
60 #define M2_MAX_OFFSET 0x0800
61 #define M3_MAX_OFFSET 0x4000
62 #define M4_MAX_OFFSET 0xbfff
63
64 #define M1_MIN_LEN 2
65 #define M1_MAX_LEN 2
66 #define M2_MIN_LEN 3
67 #define M2_MAX_LEN 8
68 #define M3_MIN_LEN 3
69 #define M3_MAX_LEN 33
70 #define M4_MIN_LEN 3
71 #define M4_MAX_LEN 9
72
73 #define M1_MARKER 0
74 #define M2_MARKER 64
75 #define M3_MARKER 32
76 #define M4_MARKER 16
77
78 #define lzo_dict_t unsigned short
79 #define D_BITS 13
80 #define D_SIZE (1u << D_BITS)
81 #define D_MASK (D_SIZE - 1)
82 #define D_HIGH ((D_MASK >> 1) + 1)
83
84 /*
85 * LZO1X Compressor from LZO
86 *
87 * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com>
88 *
89 * The full LZO package can be found at:
90 * http://www.oberhumer.com/opensource/lzo/
91 *
92 * Changed for Linux kernel use by:
93 * Nitin Gupta <nitingupta910@gmail.com>
94 * Richard Purdie <rpurdie@openedhand.com>
95 */
96
97 #ifdef __XEN__
98 #include <xen/lib.h>
99 #include <asm/byteorder.h>
100 #endif
101
102 #include <xen/lzo.h>
103 #define get_unaligned(_p) (*(_p))
104 #define put_unaligned(_val,_p) (*(_p)=_val)
105 #define get_unaligned_le16(_p) (*(u16 *)(_p))
106 #define get_unaligned_le32(_p) (*(u32 *)(_p))
107
108 static noinline size_t
lzo1x_1_do_compress(const unsigned char * in,size_t in_len,unsigned char * out,size_t * out_len,size_t ti,void * wrkmem)109 lzo1x_1_do_compress(const unsigned char *in, size_t in_len,
110 unsigned char *out, size_t *out_len,
111 size_t ti, void *wrkmem)
112 {
113 const unsigned char *ip;
114 unsigned char *op;
115 const unsigned char * const in_end = in + in_len;
116 const unsigned char * const ip_end = in + in_len - 20;
117 const unsigned char *ii;
118 lzo_dict_t * const dict = (lzo_dict_t *) wrkmem;
119
120 op = out;
121 ip = in;
122 ii = ip;
123 ip += ti < 4 ? 4 - ti : 0;
124
125 for (;;) {
126 const unsigned char *m_pos;
127 size_t t, m_len, m_off;
128 u32 dv;
129 literal:
130 ip += 1 + ((ip - ii) >> 5);
131 next:
132 if (unlikely(ip >= ip_end))
133 break;
134 dv = get_unaligned_le32(ip);
135 t = ((dv * 0x1824429d) >> (32 - D_BITS)) & D_MASK;
136 m_pos = in + dict[t];
137 dict[t] = (lzo_dict_t) (ip - in);
138 if (unlikely(dv != get_unaligned_le32(m_pos)))
139 goto literal;
140
141 ii -= ti;
142 ti = 0;
143 t = ip - ii;
144 if (t != 0) {
145 if (t <= 3) {
146 op[-2] |= t;
147 COPY4(op, ii);
148 op += t;
149 } else if (t <= 16) {
150 *op++ = (t - 3);
151 COPY8(op, ii);
152 COPY8(op + 8, ii + 8);
153 op += t;
154 } else {
155 if (t <= 18) {
156 *op++ = (t - 3);
157 } else {
158 size_t tt = t - 18;
159 *op++ = 0;
160 while (unlikely(tt > 255)) {
161 tt -= 255;
162 *op++ = 0;
163 }
164 *op++ = tt;
165 }
166 do {
167 COPY8(op, ii);
168 COPY8(op + 8, ii + 8);
169 op += 16;
170 ii += 16;
171 t -= 16;
172 } while (t >= 16);
173 if (t > 0) do {
174 *op++ = *ii++;
175 } while (--t > 0);
176 }
177 }
178
179 m_len = 4;
180 {
181 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ64)
182 u64 v;
183 v = get_unaligned((const u64 *) (ip + m_len)) ^
184 get_unaligned((const u64 *) (m_pos + m_len));
185 if (unlikely(v == 0)) {
186 do {
187 m_len += 8;
188 v = get_unaligned((const u64 *) (ip + m_len)) ^
189 get_unaligned((const u64 *) (m_pos + m_len));
190 if (unlikely(ip + m_len >= ip_end))
191 goto m_len_done;
192 } while (v == 0);
193 }
194 # if defined(__LITTLE_ENDIAN)
195 m_len += (unsigned) __builtin_ctzll(v) / 8;
196 # elif defined(__BIG_ENDIAN)
197 m_len += (unsigned) __builtin_clzll(v) / 8;
198 # else
199 # error "missing endian definition"
200 # endif
201 #elif defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && defined(LZO_USE_CTZ32)
202 u32 v;
203 v = get_unaligned((const u32 *) (ip + m_len)) ^
204 get_unaligned((const u32 *) (m_pos + m_len));
205 if (unlikely(v == 0)) {
206 do {
207 m_len += 4;
208 v = get_unaligned((const u32 *) (ip + m_len)) ^
209 get_unaligned((const u32 *) (m_pos + m_len));
210 if (v != 0)
211 break;
212 m_len += 4;
213 v = get_unaligned((const u32 *) (ip + m_len)) ^
214 get_unaligned((const u32 *) (m_pos + m_len));
215 if (unlikely(ip + m_len >= ip_end))
216 goto m_len_done;
217 } while (v == 0);
218 }
219 # if defined(__LITTLE_ENDIAN)
220 m_len += (unsigned) __builtin_ctz(v) / 8;
221 # elif defined(__BIG_ENDIAN)
222 m_len += (unsigned) __builtin_clz(v) / 8;
223 # else
224 # error "missing endian definition"
225 # endif
226 #else
227 if (unlikely(ip[m_len] == m_pos[m_len])) {
228 do {
229 m_len += 1;
230 if (ip[m_len] != m_pos[m_len])
231 break;
232 m_len += 1;
233 if (ip[m_len] != m_pos[m_len])
234 break;
235 m_len += 1;
236 if (ip[m_len] != m_pos[m_len])
237 break;
238 m_len += 1;
239 if (ip[m_len] != m_pos[m_len])
240 break;
241 m_len += 1;
242 if (ip[m_len] != m_pos[m_len])
243 break;
244 m_len += 1;
245 if (ip[m_len] != m_pos[m_len])
246 break;
247 m_len += 1;
248 if (ip[m_len] != m_pos[m_len])
249 break;
250 m_len += 1;
251 if (unlikely(ip + m_len >= ip_end))
252 goto m_len_done;
253 } while (ip[m_len] == m_pos[m_len]);
254 }
255 #endif
256 }
257 m_len_done:
258
259 m_off = ip - m_pos;
260 ip += m_len;
261 ii = ip;
262 if (m_len <= M2_MAX_LEN && m_off <= M2_MAX_OFFSET) {
263 m_off -= 1;
264 *op++ = (((m_len - 1) << 5) | ((m_off & 7) << 2));
265 *op++ = (m_off >> 3);
266 } else if (m_off <= M3_MAX_OFFSET) {
267 m_off -= 1;
268 if (m_len <= M3_MAX_LEN)
269 *op++ = (M3_MARKER | (m_len - 2));
270 else {
271 m_len -= M3_MAX_LEN;
272 *op++ = M3_MARKER | 0;
273 while (unlikely(m_len > 255)) {
274 m_len -= 255;
275 *op++ = 0;
276 }
277 *op++ = (m_len);
278 }
279 *op++ = (m_off << 2);
280 *op++ = (m_off >> 6);
281 } else {
282 m_off -= 0x4000;
283 if (m_len <= M4_MAX_LEN)
284 *op++ = (M4_MARKER | ((m_off >> 11) & 8)
285 | (m_len - 2));
286 else {
287 m_len -= M4_MAX_LEN;
288 *op++ = (M4_MARKER | ((m_off >> 11) & 8));
289 while (unlikely(m_len > 255)) {
290 m_len -= 255;
291 *op++ = 0;
292 }
293 *op++ = (m_len);
294 }
295 *op++ = (m_off << 2);
296 *op++ = (m_off >> 6);
297 }
298 goto next;
299 }
300 *out_len = op - out;
301 return in_end - (ii - ti);
302 }
303
lzo1x_1_compress(const unsigned char * in,size_t in_len,unsigned char * out,size_t * out_len,void * wrkmem)304 int lzo1x_1_compress(const unsigned char *in, size_t in_len,
305 unsigned char *out, size_t *out_len,
306 void *wrkmem)
307 {
308 const unsigned char *ip = in;
309 unsigned char *op = out;
310 size_t l = in_len;
311 size_t t = 0;
312
313 while (l > 20) {
314 size_t ll = l <= (M4_MAX_OFFSET + 1) ? l : (M4_MAX_OFFSET + 1);
315 uintptr_t ll_end = (uintptr_t) ip + ll;
316 if ((ll_end + ((t + ll) >> 5)) <= ll_end)
317 break;
318 BUILD_BUG_ON(D_SIZE * sizeof(lzo_dict_t) > LZO1X_1_MEM_COMPRESS);
319 memset(wrkmem, 0, D_SIZE * sizeof(lzo_dict_t));
320 t = lzo1x_1_do_compress(ip, ll, op, out_len, t, wrkmem);
321 ip += ll;
322 op += *out_len;
323 l -= ll;
324 }
325 t += l;
326
327 if (t > 0) {
328 const unsigned char *ii = in + in_len - t;
329
330 if (op == out && t <= 238) {
331 *op++ = (17 + t);
332 } else if (t <= 3) {
333 op[-2] |= t;
334 } else if (t <= 18) {
335 *op++ = (t - 3);
336 } else {
337 size_t tt = t - 18;
338 *op++ = 0;
339 while (tt > 255) {
340 tt -= 255;
341 *op++ = 0;
342 }
343 *op++ = tt;
344 }
345 if (t >= 16) do {
346 COPY8(op, ii);
347 COPY8(op + 8, ii + 8);
348 op += 16;
349 ii += 16;
350 t -= 16;
351 } while (t >= 16);
352 if (t > 0) do {
353 *op++ = *ii++;
354 } while (--t > 0);
355 }
356
357 *op++ = M4_MARKER | 1;
358 *op++ = 0;
359 *op++ = 0;
360
361 *out_len = op - out;
362 return LZO_E_OK;
363 }
364
365 /*
366 * LZO1X Decompressor from LZO
367 *
368 * Copyright (C) 1996-2012 Markus F.X.J. Oberhumer <markus@oberhumer.com>
369 *
370 * The full LZO package can be found at:
371 * http://www.oberhumer.com/opensource/lzo/
372 *
373 * Changed for Linux kernel use by:
374 * Nitin Gupta <nitingupta910@gmail.com>
375 * Richard Purdie <rpurdie@openedhand.com>
376 */
377
378 #define HAVE_IP(x) ((size_t)(ip_end - ip) >= (size_t)(x))
379 #define HAVE_OP(x) ((size_t)(op_end - op) >= (size_t)(x))
380 #define NEED_IP(x) if (!HAVE_IP(x)) goto input_overrun
381 #define NEED_OP(x) if (!HAVE_OP(x)) goto output_overrun
382 #define TEST_LB(m_pos) if ((m_pos) < out) goto lookbehind_overrun
383
384 /* This MAX_255_COUNT is the maximum number of times we can add 255 to a base
385 * count without overflowing an integer. The multiply will overflow when
386 * multiplying 255 by more than MAXINT/255. The sum will overflow earlier
387 * depending on the base count. Since the base count is taken from a u8
388 * and a few bits, it is safe to assume that it will always be lower than
389 * or equal to 2*255, thus we can always prevent any overflow by accepting
390 * two less 255 steps. See Documentation/lzo.txt for more information.
391 */
392 #define MAX_255_COUNT ((((size_t)~0) / 255) - 2)
393
lzo1x_decompress_safe(const unsigned char * in,size_t in_len,unsigned char * out,size_t * out_len)394 int lzo1x_decompress_safe(const unsigned char *in, size_t in_len,
395 unsigned char *out, size_t *out_len)
396 {
397 unsigned char *op;
398 const unsigned char *ip;
399 size_t t, next;
400 size_t state = 0;
401 const unsigned char *m_pos;
402 const unsigned char * const ip_end = in + in_len;
403 unsigned char * const op_end = out + *out_len;
404
405 op = out;
406 ip = in;
407
408 if (unlikely(in_len < 3))
409 goto input_overrun;
410 if (*ip > 17) {
411 t = *ip++ - 17;
412 if (t < 4) {
413 next = t;
414 goto match_next;
415 }
416 goto copy_literal_run;
417 }
418
419 for (;;) {
420 t = *ip++;
421 if (t < 16) {
422 if (likely(state == 0)) {
423 if (unlikely(t == 0)) {
424 size_t offset;
425 const unsigned char *ip_last = ip;
426
427 while (unlikely(*ip == 0)) {
428 ip++;
429 NEED_IP(1);
430 }
431 offset = ip - ip_last;
432 if (unlikely(offset > MAX_255_COUNT))
433 return LZO_E_ERROR;
434
435 offset = (offset << 8) - offset;
436 t += offset + 15 + *ip++;
437 }
438 t += 3;
439 copy_literal_run:
440 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
441 if (likely(HAVE_IP(t + 15) && HAVE_OP(t + 15))) {
442 const unsigned char *ie = ip + t;
443 unsigned char *oe = op + t;
444 do {
445 COPY8(op, ip);
446 op += 8;
447 ip += 8;
448 COPY8(op, ip);
449 op += 8;
450 ip += 8;
451 } while (ip < ie);
452 ip = ie;
453 op = oe;
454 } else
455 #endif
456 {
457 NEED_OP(t);
458 NEED_IP(t + 3);
459 do {
460 *op++ = *ip++;
461 } while (--t > 0);
462 }
463 state = 4;
464 continue;
465 } else if (state != 4) {
466 next = t & 3;
467 m_pos = op - 1;
468 m_pos -= t >> 2;
469 m_pos -= *ip++ << 2;
470 TEST_LB(m_pos);
471 NEED_OP(2);
472 op[0] = m_pos[0];
473 op[1] = m_pos[1];
474 op += 2;
475 goto match_next;
476 } else {
477 next = t & 3;
478 m_pos = op - (1 + M2_MAX_OFFSET);
479 m_pos -= t >> 2;
480 m_pos -= *ip++ << 2;
481 t = 3;
482 }
483 } else if (t >= 64) {
484 next = t & 3;
485 m_pos = op - 1;
486 m_pos -= (t >> 2) & 7;
487 m_pos -= *ip++ << 3;
488 t = (t >> 5) - 1 + (3 - 1);
489 } else if (t >= 32) {
490 t = (t & 31) + (3 - 1);
491 if (unlikely(t == 2)) {
492 size_t offset;
493 const unsigned char *ip_last = ip;
494
495 while (unlikely(*ip == 0)) {
496 ip++;
497 NEED_IP(1);
498 }
499 offset = ip - ip_last;
500 if (unlikely(offset > MAX_255_COUNT))
501 return LZO_E_ERROR;
502
503 offset = (offset << 8) - offset;
504 t += offset + 31 + *ip++;
505 NEED_IP(2);
506 }
507 m_pos = op - 1;
508 next = get_unaligned_le16(ip);
509 ip += 2;
510 m_pos -= next >> 2;
511 next &= 3;
512 } else {
513 m_pos = op;
514 m_pos -= (t & 8) << 11;
515 t = (t & 7) + (3 - 1);
516 if (unlikely(t == 2)) {
517 size_t offset;
518 const unsigned char *ip_last = ip;
519
520 while (unlikely(*ip == 0)) {
521 ip++;
522 NEED_IP(1);
523 }
524 offset = ip - ip_last;
525 if (unlikely(offset > MAX_255_COUNT))
526 return LZO_E_ERROR;
527
528 offset = (offset << 8) - offset;
529 t += offset + 7 + *ip++;
530 NEED_IP(2);
531 }
532 next = get_unaligned_le16(ip);
533 ip += 2;
534 m_pos -= next >> 2;
535 next &= 3;
536 if (m_pos == op)
537 goto eof_found;
538 m_pos -= 0x4000;
539 }
540 TEST_LB(m_pos);
541 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
542 if (op - m_pos >= 8) {
543 unsigned char *oe = op + t;
544 if (likely(HAVE_OP(t + 15))) {
545 do {
546 COPY8(op, m_pos);
547 op += 8;
548 m_pos += 8;
549 COPY8(op, m_pos);
550 op += 8;
551 m_pos += 8;
552 } while (op < oe);
553 op = oe;
554 if (HAVE_IP(6)) {
555 state = next;
556 COPY4(op, ip);
557 op += next;
558 ip += next;
559 continue;
560 }
561 } else {
562 NEED_OP(t);
563 do {
564 *op++ = *m_pos++;
565 } while (op < oe);
566 }
567 } else
568 #endif
569 {
570 unsigned char *oe = op + t;
571 NEED_OP(t);
572 op[0] = m_pos[0];
573 op[1] = m_pos[1];
574 op += 2;
575 m_pos += 2;
576 do {
577 *op++ = *m_pos++;
578 } while (op < oe);
579 }
580 match_next:
581 state = next;
582 t = next;
583 #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)
584 if (likely(HAVE_IP(6) && HAVE_OP(4))) {
585 COPY4(op, ip);
586 op += t;
587 ip += t;
588 } else
589 #endif
590 {
591 NEED_IP(t + 3);
592 NEED_OP(t);
593 while (t > 0) {
594 *op++ = *ip++;
595 t--;
596 }
597 }
598 }
599
600 eof_found:
601 *out_len = op - out;
602 return (t != 3 ? LZO_E_ERROR :
603 ip == ip_end ? LZO_E_OK :
604 ip < ip_end ? LZO_E_INPUT_NOT_CONSUMED : LZO_E_INPUT_OVERRUN);
605
606 input_overrun:
607 *out_len = op - out;
608 return LZO_E_INPUT_OVERRUN;
609
610 output_overrun:
611 *out_len = op - out;
612 return LZO_E_OUTPUT_OVERRUN;
613
614 lookbehind_overrun:
615 *out_len = op - out;
616 return LZO_E_LOOKBEHIND_OVERRUN;
617 }
618