1 /*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34
35 #include <sys/zfs_context.h>
36
37 static int real_LZ4_compress(const char *source, char *dest, int isize,
38 int osize);
39 static int real_LZ4_uncompress(const char *source, char *dest, int osize);
40 static int LZ4_compressBound(int isize);
41 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
42 int isize, int maxOutputSize);
43 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
44 int isize, int osize);
45 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
46 int isize, int osize);
47
48 /*ARGSUSED*/
49 size_t
50 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
51 {
52 uint32_t bufsiz;
53 char *dest = d_start;
54
55 ASSERT(d_len >= sizeof (bufsiz));
56
57 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
58 d_len - sizeof (bufsiz));
59
60 /* Signal an error if the compression routine returned zero. */
61 if (bufsiz == 0)
62 return (s_len);
63
64 /*
65 * Encode the compresed buffer size at the start. We'll need this in
66 * decompression to counter the effects of padding which might be
67 * added to the compressed buffer and which, if unhandled, would
68 * confuse the hell out of our decompression function.
69 */
70 *(uint32_t *)dest = BE_32(bufsiz);
71
72 return (bufsiz + sizeof (bufsiz));
73 }
74
75 /*ARGSUSED*/
76 int
77 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
78 {
79 const char *src = s_start;
80 uint32_t bufsiz = BE_IN32(src);
81
82 /* invalid compressed buffer size encoded at start */
83 if (bufsiz + sizeof (bufsiz) > s_len)
84 return (1);
85
86 /*
87 * Returns 0 on success (decompression function returned non-negative)
88 * and non-zero on failure (decompression function returned negative.
89 */
90 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
91 d_start, bufsiz, d_len) < 0);
92 }
93
94 /*
95 * LZ4 API Description:
96 *
97 * Simple Functions:
98 * real_LZ4_compress() :
99 * isize : is the input size. Max supported value is ~1.9GB
100 * return : the number of bytes written in buffer dest
101 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
102 * note : destination buffer must be already allocated.
103 * destination buffer must be sized to handle worst cases
104 * situations (input data not compressible) worst case size
105 * evaluation is provided by function LZ4_compressBound().
106 *
107 * real_LZ4_uncompress() :
108 * osize : is the output size, therefore the original size
109 * return : the number of bytes read in the source buffer.
110 * If the source stream is malformed, the function will stop
111 * decoding and return a negative result, indicating the byte
112 * position of the faulty instruction. This function never
113 * writes beyond dest + osize, and is therefore protected
114 * against malicious data packets.
115 * note : destination buffer must be already allocated
116 *
117 * Advanced Functions
118 *
119 * LZ4_compressBound() :
120 * Provides the maximum size that LZ4 may output in a "worst case"
121 * scenario (input data not compressible) primarily useful for memory
122 * allocation of output buffer.
123 *
124 * isize : is the input size. Max supported value is ~1.9GB
125 * return : maximum output size in a "worst case" scenario
126 * note : this function is limited by "int" range (2^31-1)
127 *
128 * LZ4_uncompress_unknownOutputSize() :
129 * isize : is the input size, therefore the compressed size
130 * maxOutputSize : is the size of the destination buffer (which must be
131 * already allocated)
132 * return : the number of bytes decoded in the destination buffer
133 * (necessarily <= maxOutputSize). If the source stream is
134 * malformed, the function will stop decoding and return a
135 * negative result, indicating the byte position of the faulty
136 * instruction. This function never writes beyond dest +
137 * maxOutputSize, and is therefore protected against malicious
138 * data packets.
139 * note : Destination buffer must be already allocated.
140 * This version is slightly slower than real_LZ4_uncompress()
141 *
142 * LZ4_compressCtx() :
143 * This function explicitly handles the CTX memory structure.
144 *
145 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
146 * by the caller (either on the stack or using kmem_zalloc). Passing NULL
147 * isn't valid.
148 *
149 * LZ4_compress64kCtx() :
150 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
151 * isize *Must* be <64KB, otherwise the output will be corrupted.
152 *
153 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
154 * by the caller (either on the stack or using kmem_zalloc). Passing NULL
155 * isn't valid.
156 */
157
158 /*
159 * Tuning parameters
160 */
161
162 /*
163 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
164 * Lowering this value reduces memory usage. Reduced memory usage
165 * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
166 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
167 * (examples : 12 -> 16KB ; 17 -> 512KB)
168 */
169 #define COMPRESSIONLEVEL 12
170
171 /*
172 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
173 * algorithm skip faster data segments considered "incompressible".
174 * This may decrease compression ratio dramatically, but will be
175 * faster on incompressible data. Increasing this value will make
176 * the algorithm search more before declaring a segment "incompressible".
177 * This could improve compression a bit, but will be slower on
178 * incompressible data. The default value (6) is recommended.
179 */
180 #define NOTCOMPRESSIBLE_CONFIRMATION 6
181
182 /*
183 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
184 * performance for big endian cpu, but the resulting compressed stream
185 * will be incompatible with little-endian CPU. You can set this option
186 * to 1 in situations where data will stay within closed environment.
187 * This option is useless on Little_Endian CPU (such as x86).
188 */
189 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
190
191 /*
192 * CPU Feature Detection
193 */
194
195 /* 32 or 64 bits ? */
196 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
197 defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
198 defined(__LP64__) || defined(_LP64))
199 #define LZ4_ARCH64 1
200 #else
201 #define LZ4_ARCH64 0
202 #endif
203
204 /*
205 * Limits the amount of stack space that the algorithm may consume to hold
206 * the compression lookup table. The value `9' here means we'll never use
207 * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
208 * If more memory is needed, it is allocated from the heap.
209 */
210 #define STACKLIMIT 9
211
212 /*
213 * Little Endian or Big Endian?
214 * Note: overwrite the below #define if you know your architecture endianess.
215 */
216 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \
217 defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \
218 defined(__PPC) || defined(PPC) || defined(__powerpc__) || \
219 defined(__powerpc) || defined(powerpc) || \
220 ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))))
221 #define LZ4_BIG_ENDIAN 1
222 #else
223 /*
224 * Little Endian assumed. PDP Endian and other very rare endian format
225 * are unsupported.
226 */
227 #endif
228
229 /*
230 * Unaligned memory access is automatically enabled for "common" CPU,
231 * such as x86. For others CPU, the compiler will be more cautious, and
232 * insert extra code to ensure aligned access is respected. If you know
233 * your target CPU supports unaligned memory access, you may want to
234 * force this option manually to improve performance
235 */
236 #if defined(__ARM_FEATURE_UNALIGNED)
237 #define LZ4_FORCE_UNALIGNED_ACCESS 1
238 #endif
239
240 /* #define LZ4_FORCE_SW_BITCOUNT */
241
242 /*
243 * Compiler Options
244 */
245 #if __STDC_VERSION__ >= 199901L /* C99 */
246 /* "restrict" is a known keyword */
247 #else
248 /* Disable restrict */
249 #define restrict
250 #endif
251
252 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
253
254 #ifdef _MSC_VER
255 /* Visual Studio */
256 /* Visual is not C99, but supports some kind of inline */
257 #define inline __forceinline
258 #if LZ4_ARCH64
259 /* For Visual 2005 */
260 #pragma intrinsic(_BitScanForward64)
261 #pragma intrinsic(_BitScanReverse64)
262 #else /* !LZ4_ARCH64 */
263 /* For Visual 2005 */
264 #pragma intrinsic(_BitScanForward)
265 #pragma intrinsic(_BitScanReverse)
266 #endif /* !LZ4_ARCH64 */
267 #endif /* _MSC_VER */
268
269 #ifdef _MSC_VER
270 #define lz4_bswap16(x) _byteswap_ushort(x)
271 #else /* !_MSC_VER */
272 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
273 (((x) & 0xffu) << 8)))
274 #endif /* !_MSC_VER */
275
276 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
277 #define expect(expr, value) (__builtin_expect((expr), (value)))
278 #else
279 #define expect(expr, value) (expr)
280 #endif
281
282 #define likely(expr) expect((expr) != 0, 1)
283 #define unlikely(expr) expect((expr) != 0, 0)
284
285 /* Basic types */
286 #if defined(_MSC_VER)
287 /* Visual Studio does not support 'stdint' natively */
288 #define BYTE unsigned __int8
289 #define U16 unsigned __int16
290 #define U32 unsigned __int32
291 #define S32 __int32
292 #define U64 unsigned __int64
293 #else /* !defined(_MSC_VER) */
294 #define BYTE uint8_t
295 #define U16 uint16_t
296 #define U32 uint32_t
297 #define S32 int32_t
298 #define U64 uint64_t
299 #endif /* !defined(_MSC_VER) */
300
301 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
302 #pragma pack(1)
303 #endif
304
305 typedef struct _U16_S {
306 U16 v;
307 } U16_S;
308 typedef struct _U32_S {
309 U32 v;
310 } U32_S;
311 typedef struct _U64_S {
312 U64 v;
313 } U64_S;
314
315 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
316 #pragma pack()
317 #endif
318
319 #define A64(x) (((U64_S *)(x))->v)
320 #define A32(x) (((U32_S *)(x))->v)
321 #define A16(x) (((U16_S *)(x))->v)
322
323 /*
324 * Constants
325 */
326 #define MINMATCH 4
327
328 #define HASH_LOG COMPRESSIONLEVEL
329 #define HASHTABLESIZE (1 << HASH_LOG)
330 #define HASH_MASK (HASHTABLESIZE - 1)
331
332 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
333 NOTCOMPRESSIBLE_CONFIRMATION : 2)
334
335 /*
336 * Defines if memory is allocated into the stack (local variable),
337 * or into the heap (kmem_alloc()).
338 */
339 #define HEAPMODE (HASH_LOG > STACKLIMIT)
340 #define COPYLENGTH 8
341 #define LASTLITERALS 5
342 #define MFLIMIT (COPYLENGTH + MINMATCH)
343 #define MINLENGTH (MFLIMIT + 1)
344
345 #define MAXD_LOG 16
346 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
347
348 #define ML_BITS 4
349 #define ML_MASK ((1U<<ML_BITS)-1)
350 #define RUN_BITS (8-ML_BITS)
351 #define RUN_MASK ((1U<<RUN_BITS)-1)
352
353
354 /*
355 * Architecture-specific macros
356 */
357 #if LZ4_ARCH64
358 #define STEPSIZE 8
359 #define UARCH U64
360 #define AARCH A64
361 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
362 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
363 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
364 #define HTYPE U32
365 #define INITBASE(base) const BYTE* const base = ip
366 #else /* !LZ4_ARCH64 */
367 #define STEPSIZE 4
368 #define UARCH U32
369 #define AARCH A32
370 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
371 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
372 #define LZ4_SECURECOPY LZ4_WILDCOPY
373 #define HTYPE const BYTE *
374 #define INITBASE(base) const int base = 0
375 #endif /* !LZ4_ARCH64 */
376
377 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
378 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
379 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
380 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
381 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
382 #else
383 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
384 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
385 #endif
386
387
388 /* Local structures */
389 struct refTables {
390 HTYPE hashTable[HASHTABLESIZE];
391 };
392
393
394 /* Macros */
395 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
396 HASH_LOG))
397 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
398 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
399 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
400 d = e; }
401
402
403 /* Private functions */
404 #if LZ4_ARCH64
405
406 static inline int
407 LZ4_NbCommonBytes(register U64 val)
408 {
409 #if defined(LZ4_BIG_ENDIAN)
410 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
411 unsigned long r = 0;
412 _BitScanReverse64(&r, val);
413 return (int)(r >> 3);
414 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
415 !defined(LZ4_FORCE_SW_BITCOUNT)
416 return (__builtin_clzll(val) >> 3);
417 #else
418 int r;
419 if (!(val >> 32)) {
420 r = 4;
421 } else {
422 r = 0;
423 val >>= 32;
424 }
425 if (!(val >> 16)) {
426 r += 2;
427 val >>= 8;
428 } else {
429 val >>= 24;
430 }
431 r += (!val);
432 return (r);
433 #endif
434 #else
435 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
436 unsigned long r = 0;
437 _BitScanForward64(&r, val);
438 return (int)(r >> 3);
439 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
440 !defined(LZ4_FORCE_SW_BITCOUNT)
441 return (__builtin_ctzll(val) >> 3);
442 #else
443 static const int DeBruijnBytePos[64] =
444 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
445 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
446 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
447 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
448 };
449 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
450 58];
451 #endif
452 #endif
453 }
454
455 #else
456
457 static inline int
458 LZ4_NbCommonBytes(register U32 val)
459 {
460 #if defined(LZ4_BIG_ENDIAN)
461 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
462 unsigned long r = 0;
463 _BitScanReverse(&r, val);
464 return (int)(r >> 3);
465 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
466 !defined(LZ4_FORCE_SW_BITCOUNT)
467 return (__builtin_clz(val) >> 3);
468 #else
469 int r;
470 if (!(val >> 16)) {
471 r = 2;
472 val >>= 8;
473 } else {
474 r = 0;
475 val >>= 24;
476 }
477 r += (!val);
478 return (r);
479 #endif
480 #else
481 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
482 unsigned long r = 0;
483 _BitScanForward(&r, val);
484 return (int)(r >> 3);
485 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
486 !defined(LZ4_FORCE_SW_BITCOUNT)
487 return (__builtin_ctz(val) >> 3);
488 #else
489 static const int DeBruijnBytePos[32] = {
490 0, 0, 3, 0, 3, 1, 3, 0,
491 3, 2, 2, 1, 3, 2, 0, 1,
492 3, 3, 1, 2, 2, 2, 2, 0,
493 3, 1, 2, 0, 1, 0, 1, 1
494 };
495 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
496 27];
497 #endif
498 #endif
499 }
500
501 #endif
502
503 /* Public functions */
504
505 static int
506 LZ4_compressBound(int isize)
507 {
508 return (isize + (isize / 255) + 16);
509 }
510
511 /* Compression functions */
512
513 /*ARGSUSED*/
514 static int
515 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
516 int osize)
517 {
518 #if HEAPMODE
519 struct refTables *srt = (struct refTables *)ctx;
520 HTYPE *HashTable = (HTYPE *) (srt->hashTable);
521 #else
522 HTYPE HashTable[HASHTABLESIZE] = { 0 };
523 #endif
524
525 const BYTE *ip = (BYTE *) source;
526 INITBASE(base);
527 const BYTE *anchor = ip;
528 const BYTE *const iend = ip + isize;
529 const BYTE *const oend = (BYTE *) dest + osize;
530 const BYTE *const mflimit = iend - MFLIMIT;
531 #define matchlimit (iend - LASTLITERALS)
532
533 BYTE *op = (BYTE *) dest;
534
535 int len, length;
536 const int skipStrength = SKIPSTRENGTH;
537 U32 forwardH;
538
539
540 /* Init */
541 if (isize < MINLENGTH)
542 goto _last_literals;
543
544 /* First Byte */
545 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
546 ip++;
547 forwardH = LZ4_HASH_VALUE(ip);
548
549 /* Main Loop */
550 for (;;) {
551 int findMatchAttempts = (1U << skipStrength) + 3;
552 const BYTE *forwardIp = ip;
553 const BYTE *ref;
554 BYTE *token;
555
556 /* Find a match */
557 do {
558 U32 h = forwardH;
559 int step = findMatchAttempts++ >> skipStrength;
560 ip = forwardIp;
561 forwardIp = ip + step;
562
563 if unlikely(forwardIp > mflimit) {
564 goto _last_literals;
565 }
566
567 forwardH = LZ4_HASH_VALUE(forwardIp);
568 ref = base + HashTable[h];
569 HashTable[h] = ip - base;
570
571 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
572
573 /* Catch up */
574 while ((ip > anchor) && (ref > (BYTE *) source) &&
575 unlikely(ip[-1] == ref[-1])) {
576 ip--;
577 ref--;
578 }
579
580 /* Encode Literal length */
581 length = ip - anchor;
582 token = op++;
583
584 /* Check output limit */
585 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
586 (length >> 8) > oend)
587 return (0);
588
589 if (length >= (int)RUN_MASK) {
590 *token = (RUN_MASK << ML_BITS);
591 len = length - RUN_MASK;
592 for (; len > 254; len -= 255)
593 *op++ = 255;
594 *op++ = (BYTE)len;
595 } else
596 *token = (length << ML_BITS);
597
598 /* Copy Literals */
599 LZ4_BLINDCOPY(anchor, op, length);
600
601 _next_match:
602 /* Encode Offset */
603 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
604
605 /* Start Counting */
606 ip += MINMATCH;
607 ref += MINMATCH; /* MinMatch verified */
608 anchor = ip;
609 while likely(ip < matchlimit - (STEPSIZE - 1)) {
610 UARCH diff = AARCH(ref) ^ AARCH(ip);
611 if (!diff) {
612 ip += STEPSIZE;
613 ref += STEPSIZE;
614 continue;
615 }
616 ip += LZ4_NbCommonBytes(diff);
617 goto _endCount;
618 }
619 #if LZ4_ARCH64
620 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
621 ip += 4;
622 ref += 4;
623 }
624 #endif
625 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
626 ip += 2;
627 ref += 2;
628 }
629 if ((ip < matchlimit) && (*ref == *ip))
630 ip++;
631 _endCount:
632
633 /* Encode MatchLength */
634 len = (ip - anchor);
635 /* Check output limit */
636 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
637 return (0);
638 if (len >= (int)ML_MASK) {
639 *token += ML_MASK;
640 len -= ML_MASK;
641 for (; len > 509; len -= 510) {
642 *op++ = 255;
643 *op++ = 255;
644 }
645 if (len > 254) {
646 len -= 255;
647 *op++ = 255;
648 }
649 *op++ = (BYTE)len;
650 } else
651 *token += len;
652
653 /* Test end of chunk */
654 if (ip > mflimit) {
655 anchor = ip;
656 break;
657 }
658 /* Fill table */
659 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
660
661 /* Test next position */
662 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
663 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
664 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
665 token = op++;
666 *token = 0;
667 goto _next_match;
668 }
669 /* Prepare next loop */
670 anchor = ip++;
671 forwardH = LZ4_HASH_VALUE(ip);
672 }
673
674 _last_literals:
675 /* Encode Last Literals */
676 {
677 int lastRun = iend - anchor;
678 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
679 oend)
680 return (0);
681 if (lastRun >= (int)RUN_MASK) {
682 *op++ = (RUN_MASK << ML_BITS);
683 lastRun -= RUN_MASK;
684 for (; lastRun > 254; lastRun -= 255) {
685 *op++ = 255;
686 }
687 *op++ = (BYTE)lastRun;
688 } else
689 *op++ = (lastRun << ML_BITS);
690 (void) memcpy(op, anchor, iend - anchor);
691 op += iend - anchor;
692 }
693
694 /* End */
695 return (int)(((char *)op) - dest);
696 }
697
698
699
700 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
701 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
702 #define HASHLOG64K (HASH_LOG + 1)
703 #define HASH64KTABLESIZE (1U << HASHLOG64K)
704 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
705 HASHLOG64K))
706 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
707
708 /*ARGSUSED*/
709 static int
710 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
711 int osize)
712 {
713 #if HEAPMODE
714 struct refTables *srt = (struct refTables *)ctx;
715 U16 *HashTable = (U16 *) (srt->hashTable);
716 #else
717 U16 HashTable[HASH64KTABLESIZE] = { 0 };
718 #endif
719
720 const BYTE *ip = (BYTE *) source;
721 const BYTE *anchor = ip;
722 const BYTE *const base = ip;
723 const BYTE *const iend = ip + isize;
724 const BYTE *const oend = (BYTE *) dest + osize;
725 const BYTE *const mflimit = iend - MFLIMIT;
726 #define matchlimit (iend - LASTLITERALS)
727
728 BYTE *op = (BYTE *) dest;
729
730 int len, length;
731 const int skipStrength = SKIPSTRENGTH;
732 U32 forwardH;
733
734 /* Init */
735 if (isize < MINLENGTH)
736 goto _last_literals;
737
738 /* First Byte */
739 ip++;
740 forwardH = LZ4_HASH64K_VALUE(ip);
741
742 /* Main Loop */
743 for (;;) {
744 int findMatchAttempts = (1U << skipStrength) + 3;
745 const BYTE *forwardIp = ip;
746 const BYTE *ref;
747 BYTE *token;
748
749 /* Find a match */
750 do {
751 U32 h = forwardH;
752 int step = findMatchAttempts++ >> skipStrength;
753 ip = forwardIp;
754 forwardIp = ip + step;
755
756 if (forwardIp > mflimit) {
757 goto _last_literals;
758 }
759
760 forwardH = LZ4_HASH64K_VALUE(forwardIp);
761 ref = base + HashTable[h];
762 HashTable[h] = ip - base;
763
764 } while (A32(ref) != A32(ip));
765
766 /* Catch up */
767 while ((ip > anchor) && (ref > (BYTE *) source) &&
768 (ip[-1] == ref[-1])) {
769 ip--;
770 ref--;
771 }
772
773 /* Encode Literal length */
774 length = ip - anchor;
775 token = op++;
776
777 /* Check output limit */
778 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
779 (length >> 8) > oend)
780 return (0);
781
782 if (length >= (int)RUN_MASK) {
783 *token = (RUN_MASK << ML_BITS);
784 len = length - RUN_MASK;
785 for (; len > 254; len -= 255)
786 *op++ = 255;
787 *op++ = (BYTE)len;
788 } else
789 *token = (length << ML_BITS);
790
791 /* Copy Literals */
792 LZ4_BLINDCOPY(anchor, op, length);
793
794 _next_match:
795 /* Encode Offset */
796 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
797
798 /* Start Counting */
799 ip += MINMATCH;
800 ref += MINMATCH; /* MinMatch verified */
801 anchor = ip;
802 while (ip < matchlimit - (STEPSIZE - 1)) {
803 UARCH diff = AARCH(ref) ^ AARCH(ip);
804 if (!diff) {
805 ip += STEPSIZE;
806 ref += STEPSIZE;
807 continue;
808 }
809 ip += LZ4_NbCommonBytes(diff);
810 goto _endCount;
811 }
812 #if LZ4_ARCH64
813 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
814 ip += 4;
815 ref += 4;
816 }
817 #endif
818 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
819 ip += 2;
820 ref += 2;
821 }
822 if ((ip < matchlimit) && (*ref == *ip))
823 ip++;
824 _endCount:
825
826 /* Encode MatchLength */
827 len = (ip - anchor);
828 /* Check output limit */
829 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
830 return (0);
831 if (len >= (int)ML_MASK) {
832 *token += ML_MASK;
833 len -= ML_MASK;
834 for (; len > 509; len -= 510) {
835 *op++ = 255;
836 *op++ = 255;
837 }
838 if (len > 254) {
839 len -= 255;
840 *op++ = 255;
841 }
842 *op++ = (BYTE)len;
843 } else
844 *token += len;
845
846 /* Test end of chunk */
847 if (ip > mflimit) {
848 anchor = ip;
849 break;
850 }
851 /* Fill table */
852 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
853
854 /* Test next position */
855 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
856 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
857 if (A32(ref) == A32(ip)) {
858 token = op++;
859 *token = 0;
860 goto _next_match;
861 }
862 /* Prepare next loop */
863 anchor = ip++;
864 forwardH = LZ4_HASH64K_VALUE(ip);
865 }
866
867 _last_literals:
868 /* Encode Last Literals */
869 {
870 int lastRun = iend - anchor;
871 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
872 oend)
873 return (0);
874 if (lastRun >= (int)RUN_MASK) {
875 *op++ = (RUN_MASK << ML_BITS);
876 lastRun -= RUN_MASK;
877 for (; lastRun > 254; lastRun -= 255)
878 *op++ = 255;
879 *op++ = (BYTE)lastRun;
880 } else
881 *op++ = (lastRun << ML_BITS);
882 (void) memcpy(op, anchor, iend - anchor);
883 op += iend - anchor;
884 }
885
886 /* End */
887 return (int)(((char *)op) - dest);
888 }
889
890 static int
891 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
892 {
893 #if HEAPMODE
894 void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP);
895 int result;
896
897 /*
898 * out of kernel memory, gently fall through - this will disable
899 * compression in zio_compress_data
900 */
901 if (ctx == NULL)
902 return (0);
903
904 if (isize < LZ4_64KLIMIT)
905 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
906 else
907 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
908
909 kmem_free(ctx, sizeof (struct refTables));
910 return (result);
911 #else
912 if (isize < (int)LZ4_64KLIMIT)
913 return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
914 return (LZ4_compressCtx(NULL, source, dest, isize, osize));
915 #endif
916 }
917
918 /* Decompression functions */
919
920 /*
921 * Note: The decoding functions real_LZ4_uncompress() and
922 * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
923 * attack type. They will never write nor read outside of the provided
924 * output buffers. LZ4_uncompress_unknownOutputSize() also insures that
925 * it will never read outside of the input buffer. A corrupted input
926 * will produce an error result, a negative int, indicating the position
927 * of the error within input stream.
928 */
929
930 static int
931 real_LZ4_uncompress(const char *source, char *dest, int osize)
932 {
933 /* Local Variables */
934 const BYTE *restrict ip = (const BYTE *) source;
935 const BYTE *ref;
936
937 BYTE *op = (BYTE *) dest;
938 BYTE *const oend = op + osize;
939 BYTE *cpy;
940
941 unsigned token;
942
943 size_t length;
944 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
945 #if LZ4_ARCH64
946 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
947 #endif
948
949 /* Main Loop */
950 for (;;) {
951 /* get runlength */
952 token = *ip++;
953 if ((length = (token >> ML_BITS)) == RUN_MASK) {
954 size_t len;
955 for (; (len = *ip++) == 255; length += 255) {
956 }
957 length += len;
958 }
959 /* copy literals */
960 cpy = op + length;
961 if unlikely(cpy > oend - COPYLENGTH) {
962 if (cpy != oend)
963 /* Error: we must necessarily stand at EOF */
964 goto _output_error;
965 (void) memcpy(op, ip, length);
966 ip += length;
967 break; /* EOF */
968 }
969 LZ4_WILDCOPY(ip, op, cpy);
970 ip -= (op - cpy);
971 op = cpy;
972
973 /* get offset */
974 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
975 ip += 2;
976 if unlikely(ref < (BYTE * const) dest)
977 /*
978 * Error: offset create reference outside destination
979 * buffer
980 */
981 goto _output_error;
982
983 /* get matchlength */
984 if ((length = (token & ML_MASK)) == ML_MASK) {
985 for (; *ip == 255; length += 255) {
986 ip++;
987 }
988 length += *ip++;
989 }
990 /* copy repeated sequence */
991 if unlikely(op - ref < STEPSIZE) {
992 #if LZ4_ARCH64
993 size_t dec64 = dec64table[op-ref];
994 #else
995 const int dec64 = 0;
996 #endif
997 op[0] = ref[0];
998 op[1] = ref[1];
999 op[2] = ref[2];
1000 op[3] = ref[3];
1001 op += 4;
1002 ref += 4;
1003 ref -= dec32table[op-ref];
1004 A32(op) = A32(ref);
1005 op += STEPSIZE - 4;
1006 ref -= dec64;
1007 } else {
1008 LZ4_COPYSTEP(ref, op);
1009 }
1010 cpy = op + length - (STEPSIZE - 4);
1011 if (cpy > oend - COPYLENGTH) {
1012 if (cpy > oend)
1013 /*
1014 * Error: request to write beyond destination
1015 * buffer
1016 */
1017 goto _output_error;
1018 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1019 while (op < cpy)
1020 *op++ = *ref++;
1021 op = cpy;
1022 if (op == oend)
1023 /*
1024 * Check EOF (should never happen, since last
1025 * 5 bytes are supposed to be literals)
1026 */
1027 goto _output_error;
1028 continue;
1029 }
1030 LZ4_SECURECOPY(ref, op, cpy);
1031 op = cpy; /* correction */
1032 }
1033
1034 /* end of decoding */
1035 return (int)(((char *)ip) - source);
1036
1037 /* write overflow error detected */
1038 _output_error:
1039 return (int)(-(((char *)ip) - source));
1040 }
1041
1042 static int
1043 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
1044 int maxOutputSize)
1045 {
1046 /* Local Variables */
1047 const BYTE *restrict ip = (const BYTE *) source;
1048 const BYTE *const iend = ip + isize;
1049 const BYTE *ref;
1050
1051 BYTE *op = (BYTE *) dest;
1052 BYTE *const oend = op + maxOutputSize;
1053 BYTE *cpy;
1054
1055 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
1056 #if LZ4_ARCH64
1057 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
1058 #endif
1059
1060 /* Main Loop */
1061 while (ip < iend) {
1062 unsigned token;
1063 size_t length;
1064
1065 /* get runlength */
1066 token = *ip++;
1067 if ((length = (token >> ML_BITS)) == RUN_MASK) {
1068 int s = 255;
1069 while ((ip < iend) && (s == 255)) {
1070 s = *ip++;
1071 length += s;
1072 }
1073 }
1074 /* copy literals */
1075 cpy = op + length;
1076 if ((cpy > oend - COPYLENGTH) ||
1077 (ip + length > iend - COPYLENGTH)) {
1078 if (cpy > oend)
1079 /* Error: writes beyond output buffer */
1080 goto _output_error;
1081 if (ip + length != iend)
1082 /*
1083 * Error: LZ4 format requires to consume all
1084 * input at this stage
1085 */
1086 goto _output_error;
1087 (void) memcpy(op, ip, length);
1088 op += length;
1089 /* Necessarily EOF, due to parsing restrictions */
1090 break;
1091 }
1092 LZ4_WILDCOPY(ip, op, cpy);
1093 ip -= (op - cpy);
1094 op = cpy;
1095
1096 /* get offset */
1097 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
1098 ip += 2;
1099 if (ref < (BYTE * const) dest)
1100 /*
1101 * Error: offset creates reference outside of
1102 * destination buffer
1103 */
1104 goto _output_error;
1105
1106 /* get matchlength */
1107 if ((length = (token & ML_MASK)) == ML_MASK) {
1108 while (ip < iend) {
1109 int s = *ip++;
1110 length += s;
1111 if (s == 255)
1112 continue;
1113 break;
1114 }
1115 }
1116 /* copy repeated sequence */
1117 if unlikely(op - ref < STEPSIZE) {
1118 #if LZ4_ARCH64
1119 size_t dec64 = dec64table[op-ref];
1120 #else
1121 const int dec64 = 0;
1122 #endif
1123 op[0] = ref[0];
1124 op[1] = ref[1];
1125 op[2] = ref[2];
1126 op[3] = ref[3];
1127 op += 4;
1128 ref += 4;
1129 ref -= dec32table[op-ref];
1130 A32(op) = A32(ref);
1131 op += STEPSIZE - 4;
1132 ref -= dec64;
1133 } else {
1134 LZ4_COPYSTEP(ref, op);
1135 }
1136 cpy = op + length - (STEPSIZE - 4);
1137 if (cpy > oend - COPYLENGTH) {
1138 if (cpy > oend)
1139 /*
1140 * Error: request to write outside of
1141 * destination buffer
1142 */
1143 goto _output_error;
1144 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1145 while (op < cpy)
1146 *op++ = *ref++;
1147 op = cpy;
1148 if (op == oend)
1149 /*
1150 * Check EOF (should never happen, since
1151 * last 5 bytes are supposed to be literals)
1152 */
1153 goto _output_error;
1154 continue;
1155 }
1156 LZ4_SECURECOPY(ref, op, cpy);
1157 op = cpy; /* correction */
1158 }
1159
1160 /* end of decoding */
1161 return (int)(((char *)op) - dest);
1162
1163 /* write overflow error detected */
1164 _output_error:
1165 return (int)(-(((char *)ip) - source));
1166 }