1 /*
2 * LZ4 - Fast LZ compression algorithm
3 * Header File
4 * Copyright (C) 2011-2013, Yann Collet.
5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are
9 * met:
10 *
11 * * Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * * Redistributions in binary form must reproduce the above
14 * copyright notice, this list of conditions and the following disclaimer
15 * in the documentation and/or other materials provided with the
16 * distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 *
30 * You can contact the author at :
31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
32 * - LZ4 source repository : http://code.google.com/p/lz4/
33 */
34
35 #include <sys/zfs_context.h>
36 #include <sys/lz4hc.h>
37
38 static int real_LZ4_compress(const char *source, char *dest, int isize,
39 int osize);
40 static int real_LZ4_uncompress(const char *source, char *dest, int osize);
41 static int LZ4_compressBound(int isize);
42 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
43 int isize, int maxOutputSize);
44 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
45 int isize, int osize);
46 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
47 int isize, int osize);
48
49 size_t
50 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
51 {
52 uint32_t bufsiz;
53 char *dest = d_start;
54
55 ASSERT(d_len >= sizeof (bufsiz));
56
57 /*
58 * Switch between high-compression and fast-compression. Decompression
59 * is performed by the same routine.
60 */
61 if (n == 1)
62 bufsiz = zfs_LZ4_compressHC(s_start, &dest[sizeof (bufsiz)],
63 s_len, d_len - sizeof (bufsiz));
64 else
65 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)],
66 s_len, d_len - sizeof (bufsiz));
67
68 /* Signal an error if the compression routine returned zero. */
69 if (bufsiz == 0)
70 return (s_len);
71
72 /*
73 * Encode the compresed buffer size at the start. We'll need this in
74 * decompression to counter the effects of padding which might be
75 * added to the compressed buffer and which, if unhandled, would
76 * confuse the hell out of our decompression function.
77 */
78 *(uint32_t *)dest = BE_32(bufsiz);
79
80 return (bufsiz + sizeof (bufsiz));
81 }
82
83 /*ARGSUSED*/
84 int
85 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
86 {
87 const char *src = s_start;
88 uint32_t bufsiz = BE_IN32(src);
89
90 /* invalid compressed buffer size encoded at start */
91 if (bufsiz + sizeof (bufsiz) > s_len)
92 return (1);
93
94 /*
95 * Returns 0 on success (decompression function returned non-negative)
96 * and non-zero on failure (decompression function returned negative.
97 */
98 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
99 d_start, bufsiz, d_len) < 0);
100 }
101
102 /*
103 * LZ4 API Description:
104 *
105 * Simple Functions:
106 * real_LZ4_compress() :
107 * isize : is the input size. Max supported value is ~1.9GB
108 * return : the number of bytes written in buffer dest
109 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
110 * note : destination buffer must be already allocated.
111 * destination buffer must be sized to handle worst cases
112 * situations (input data not compressible) worst case size
113 * evaluation is provided by function LZ4_compressBound().
114 *
115 * real_LZ4_uncompress() :
116 * osize : is the output size, therefore the original size
117 * return : the number of bytes read in the source buffer.
118 * If the source stream is malformed, the function will stop
119 * decoding and return a negative result, indicating the byte
120 * position of the faulty instruction. This function never
121 * writes beyond dest + osize, and is therefore protected
122 * against malicious data packets.
123 * note : destination buffer must be already allocated
124 *
125 * Advanced Functions
126 *
127 * LZ4_compressBound() :
128 * Provides the maximum size that LZ4 may output in a "worst case"
129 * scenario (input data not compressible) primarily useful for memory
130 * allocation of output buffer.
131 *
132 * isize : is the input size. Max supported value is ~1.9GB
133 * return : maximum output size in a "worst case" scenario
134 * note : this function is limited by "int" range (2^31-1)
135 *
136 * LZ4_uncompress_unknownOutputSize() :
137 * isize : is the input size, therefore the compressed size
138 * maxOutputSize : is the size of the destination buffer (which must be
139 * already allocated)
140 * return : the number of bytes decoded in the destination buffer
141 * (necessarily <= maxOutputSize). If the source stream is
142 * malformed, the function will stop decoding and return a
143 * negative result, indicating the byte position of the faulty
144 * instruction. This function never writes beyond dest +
145 * maxOutputSize, and is therefore protected against malicious
146 * data packets.
147 * note : Destination buffer must be already allocated.
148 * This version is slightly slower than real_LZ4_uncompress()
149 *
150 * LZ4_compressCtx() :
151 * This function explicitly handles the CTX memory structure.
152 *
153 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
154 * by the caller (either on the stack or using kmem_zalloc). Passing NULL
155 * isn't valid.
156 *
157 * LZ4_compress64kCtx() :
158 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
159 * isize *Must* be <64KB, otherwise the output will be corrupted.
160 *
161 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
162 * by the caller (either on the stack or using kmem_zalloc). Passing NULL
163 * isn't valid.
164 */
165
166 /*
167 * Tuning parameters
168 */
169
170 /*
171 * COMPRESSIONLEVEL: Increasing this value improves compression ratio
172 * Lowering this value reduces memory usage. Reduced memory usage
173 * typically improves speed, due to cache effect (ex: L1 32KB for Intel,
174 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
175 * (examples : 12 -> 16KB ; 17 -> 512KB)
176 */
177 #define COMPRESSIONLEVEL 12
178
179 /*
180 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
181 * algorithm skip faster data segments considered "incompressible".
182 * This may decrease compression ratio dramatically, but will be
183 * faster on incompressible data. Increasing this value will make
184 * the algorithm search more before declaring a segment "incompressible".
185 * This could improve compression a bit, but will be slower on
186 * incompressible data. The default value (6) is recommended.
187 */
188 #define NOTCOMPRESSIBLE_CONFIRMATION 6
189
190 /*
191 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
192 * performance for big endian cpu, but the resulting compressed stream
193 * will be incompatible with little-endian CPU. You can set this option
194 * to 1 in situations where data will stay within closed environment.
195 * This option is useless on Little_Endian CPU (such as x86).
196 */
197 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
198
199 /*
200 * CPU Feature Detection
201 */
202
203 /* 32 or 64 bits ? */
204 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
205 defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
206 defined(__LP64__) || defined(_LP64))
207 #define LZ4_ARCH64 1
208 /*
209 * Illumos: On amd64 we have 20k of stack and 24k on sun4u and sun4v, so we
210 * can spend 16k on the algorithm
211 */
212 #define STACKLIMIT 12
213 #else
214 #define LZ4_ARCH64 0
215 /*
216 * Illumos: On i386 we only have 12k of stack, so in order to maintain the
217 * same COMPRESSIONLEVEL we have to use heap allocation. Performance will
218 * suck, but alas, it's ZFS on 32-bit we're talking about, so...
219 */
220 #define STACKLIMIT 11
221 #endif
222
223 /*
224 * Little Endian or Big Endian?
225 * Note: overwrite the below #define if you know your architecture endianess.
226 */
227 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \
228 defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \
229 defined(__PPC) || defined(PPC) || defined(__powerpc__) || \
230 defined(__powerpc) || defined(powerpc) || \
231 ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))))
232 #define LZ4_BIG_ENDIAN 1
233 #else
234 /*
235 * Little Endian assumed. PDP Endian and other very rare endian format
236 * are unsupported.
237 */
238 #endif
239
240 /*
241 * Unaligned memory access is automatically enabled for "common" CPU,
242 * such as x86. For others CPU, the compiler will be more cautious, and
243 * insert extra code to ensure aligned access is respected. If you know
244 * your target CPU supports unaligned memory access, you may want to
245 * force this option manually to improve performance
246 */
247 #if defined(__ARM_FEATURE_UNALIGNED)
248 #define LZ4_FORCE_UNALIGNED_ACCESS 1
249 #endif
250
251 /*
252 * Illumos: we can't use GCC's __builtin_ctz family of builtins in the
253 * kernel
254 */
255 #define LZ4_FORCE_SW_BITCOUNT
256
257 /*
258 * Compiler Options
259 */
260 #if __STDC_VERSION__ >= 199901L /* C99 */
261 /* "restrict" is a known keyword */
262 #else
263 /* Disable restrict */
264 #define restrict
265 #endif
266
267 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
268
269 #ifdef _MSC_VER
270 /* Visual Studio */
271 /* Visual is not C99, but supports some kind of inline */
272 #define inline __forceinline
273 #if LZ4_ARCH64
274 /* For Visual 2005 */
275 #pragma intrinsic(_BitScanForward64)
276 #pragma intrinsic(_BitScanReverse64)
277 #else /* !LZ4_ARCH64 */
278 /* For Visual 2005 */
279 #pragma intrinsic(_BitScanForward)
280 #pragma intrinsic(_BitScanReverse)
281 #endif /* !LZ4_ARCH64 */
282 #endif /* _MSC_VER */
283
284 #ifdef _MSC_VER
285 #define lz4_bswap16(x) _byteswap_ushort(x)
286 #else /* !_MSC_VER */
287 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
288 (((x) & 0xffu) << 8)))
289 #endif /* !_MSC_VER */
290
291 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
292 #define expect(expr, value) (__builtin_expect((expr), (value)))
293 #else
294 #define expect(expr, value) (expr)
295 #endif
296
297 #define likely(expr) expect((expr) != 0, 1)
298 #define unlikely(expr) expect((expr) != 0, 0)
299
300 /* Basic types */
301 #if defined(_MSC_VER)
302 /* Visual Studio does not support 'stdint' natively */
303 #define BYTE unsigned __int8
304 #define U16 unsigned __int16
305 #define U32 unsigned __int32
306 #define S32 __int32
307 #define U64 unsigned __int64
308 #else /* !defined(_MSC_VER) */
309 #define BYTE uint8_t
310 #define U16 uint16_t
311 #define U32 uint32_t
312 #define S32 int32_t
313 #define U64 uint64_t
314 #endif /* !defined(_MSC_VER) */
315
316 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
317 #pragma pack(1)
318 #endif
319
320 typedef struct _U16_S {
321 U16 v;
322 } U16_S;
323 typedef struct _U32_S {
324 U32 v;
325 } U32_S;
326 typedef struct _U64_S {
327 U64 v;
328 } U64_S;
329
330 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
331 #pragma pack()
332 #endif
333
334 #define A64(x) (((U64_S *)(x))->v)
335 #define A32(x) (((U32_S *)(x))->v)
336 #define A16(x) (((U16_S *)(x))->v)
337
338 /*
339 * Constants
340 */
341 #define MINMATCH 4
342
343 #define HASH_LOG COMPRESSIONLEVEL
344 #define HASHTABLESIZE (1 << HASH_LOG)
345 #define HASH_MASK (HASHTABLESIZE - 1)
346
347 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
348 NOTCOMPRESSIBLE_CONFIRMATION : 2)
349
350 /*
351 * Defines if memory is allocated into the stack (local variable),
352 * or into the heap (kmem_alloc()).
353 */
354 #define HEAPMODE (HASH_LOG > STACKLIMIT)
355 #define COPYLENGTH 8
356 #define LASTLITERALS 5
357 #define MFLIMIT (COPYLENGTH + MINMATCH)
358 #define MINLENGTH (MFLIMIT + 1)
359
360 #define MAXD_LOG 16
361 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
362
363 #define ML_BITS 4
364 #define ML_MASK ((1U<<ML_BITS)-1)
365 #define RUN_BITS (8-ML_BITS)
366 #define RUN_MASK ((1U<<RUN_BITS)-1)
367
368
369 /*
370 * Architecture-specific macros
371 */
372 #if LZ4_ARCH64
373 #define STEPSIZE 8
374 #define UARCH U64
375 #define AARCH A64
376 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8;
377 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d)
378 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
379 #define HTYPE U32
380 #define INITBASE(base) const BYTE* const base = ip
381 #else /* !LZ4_ARCH64 */
382 #define STEPSIZE 4
383 #define UARCH U32
384 #define AARCH A32
385 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4;
386 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
387 #define LZ4_SECURECOPY LZ4_WILDCOPY
388 #define HTYPE const BYTE *
389 #define INITBASE(base) const int base = 0
390 #endif /* !LZ4_ARCH64 */
391
392 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
393 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
394 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
395 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
396 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
397 #else
398 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
399 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; }
400 #endif
401
402
403 /* Local structures */
404 struct refTables {
405 HTYPE hashTable[HASHTABLESIZE];
406 };
407
408
409 /* Macros */
410 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
411 HASH_LOG))
412 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
413 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
414 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
415 d = e; }
416
417
418 /* Private functions */
419 #if LZ4_ARCH64
420
421 static inline int
422 LZ4_NbCommonBytes(register U64 val)
423 {
424 #if defined(LZ4_BIG_ENDIAN)
425 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
426 unsigned long r = 0;
427 _BitScanReverse64(&r, val);
428 return (int)(r >> 3);
429 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
430 !defined(LZ4_FORCE_SW_BITCOUNT)
431 return (__builtin_clzll(val) >> 3);
432 #else
433 int r;
434 if (!(val >> 32)) {
435 r = 4;
436 } else {
437 r = 0;
438 val >>= 32;
439 }
440 if (!(val >> 16)) {
441 r += 2;
442 val >>= 8;
443 } else {
444 val >>= 24;
445 }
446 r += (!val);
447 return (r);
448 #endif
449 #else
450 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
451 unsigned long r = 0;
452 _BitScanForward64(&r, val);
453 return (int)(r >> 3);
454 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
455 !defined(LZ4_FORCE_SW_BITCOUNT)
456 return (__builtin_ctzll(val) >> 3);
457 #else
458 static const int DeBruijnBytePos[64] =
459 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
460 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
461 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
462 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
463 };
464 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
465 58];
466 #endif
467 #endif
468 }
469
470 #else
471
472 static inline int
473 LZ4_NbCommonBytes(register U32 val)
474 {
475 #if defined(LZ4_BIG_ENDIAN)
476 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
477 unsigned long r = 0;
478 _BitScanReverse(&r, val);
479 return (int)(r >> 3);
480 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
481 !defined(LZ4_FORCE_SW_BITCOUNT)
482 return (__builtin_clz(val) >> 3);
483 #else
484 int r;
485 if (!(val >> 16)) {
486 r = 2;
487 val >>= 8;
488 } else {
489 r = 0;
490 val >>= 24;
491 }
492 r += (!val);
493 return (r);
494 #endif
495 #else
496 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
497 unsigned long r = 0;
498 _BitScanForward(&r, val);
499 return (int)(r >> 3);
500 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
501 !defined(LZ4_FORCE_SW_BITCOUNT)
502 return (__builtin_ctz(val) >> 3);
503 #else
504 static const int DeBruijnBytePos[32] = {
505 0, 0, 3, 0, 3, 1, 3, 0,
506 3, 2, 2, 1, 3, 2, 0, 1,
507 3, 3, 1, 2, 2, 2, 2, 0,
508 3, 1, 2, 0, 1, 0, 1, 1
509 };
510 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
511 27];
512 #endif
513 #endif
514 }
515
516 #endif
517
518 /* Public functions */
519
520 static int
521 LZ4_compressBound(int isize)
522 {
523 return (isize + (isize / 255) + 16);
524 }
525
526 /* Compression functions */
527
528 /*ARGSUSED*/
529 static int
530 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
531 int osize)
532 {
533 #if HEAPMODE
534 struct refTables *srt = (struct refTables *)ctx;
535 HTYPE *HashTable = (HTYPE *) (srt->hashTable);
536 #else
537 HTYPE HashTable[HASHTABLESIZE] = { 0 };
538 #endif
539
540 const BYTE *ip = (BYTE *) source;
541 INITBASE(base);
542 const BYTE *anchor = ip;
543 const BYTE *const iend = ip + isize;
544 const BYTE *const oend = (BYTE *) dest + osize;
545 const BYTE *const mflimit = iend - MFLIMIT;
546 #define matchlimit (iend - LASTLITERALS)
547
548 BYTE *op = (BYTE *) dest;
549
550 int len, length;
551 const int skipStrength = SKIPSTRENGTH;
552 U32 forwardH;
553
554
555 /* Init */
556 if (isize < MINLENGTH)
557 goto _last_literals;
558
559 /* First Byte */
560 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
561 ip++;
562 forwardH = LZ4_HASH_VALUE(ip);
563
564 /* Main Loop */
565 for (;;) {
566 int findMatchAttempts = (1U << skipStrength) + 3;
567 const BYTE *forwardIp = ip;
568 const BYTE *ref;
569 BYTE *token;
570
571 /* Find a match */
572 do {
573 U32 h = forwardH;
574 int step = findMatchAttempts++ >> skipStrength;
575 ip = forwardIp;
576 forwardIp = ip + step;
577
578 if unlikely(forwardIp > mflimit) {
579 goto _last_literals;
580 }
581
582 forwardH = LZ4_HASH_VALUE(forwardIp);
583 ref = base + HashTable[h];
584 HashTable[h] = ip - base;
585
586 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
587
588 /* Catch up */
589 while ((ip > anchor) && (ref > (BYTE *) source) &&
590 unlikely(ip[-1] == ref[-1])) {
591 ip--;
592 ref--;
593 }
594
595 /* Encode Literal length */
596 length = ip - anchor;
597 token = op++;
598
599 /* Check output limit */
600 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
601 (length >> 8) > oend)
602 return (0);
603
604 if (length >= (int)RUN_MASK) {
605 *token = (RUN_MASK << ML_BITS);
606 len = length - RUN_MASK;
607 for (; len > 254; len -= 255)
608 *op++ = 255;
609 *op++ = (BYTE)len;
610 } else
611 *token = (length << ML_BITS);
612
613 /* Copy Literals */
614 LZ4_BLINDCOPY(anchor, op, length);
615
616 _next_match:
617 /* Encode Offset */
618 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
619
620 /* Start Counting */
621 ip += MINMATCH;
622 ref += MINMATCH; /* MinMatch verified */
623 anchor = ip;
624 while likely(ip < matchlimit - (STEPSIZE - 1)) {
625 UARCH diff = AARCH(ref) ^ AARCH(ip);
626 if (!diff) {
627 ip += STEPSIZE;
628 ref += STEPSIZE;
629 continue;
630 }
631 ip += LZ4_NbCommonBytes(diff);
632 goto _endCount;
633 }
634 #if LZ4_ARCH64
635 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
636 ip += 4;
637 ref += 4;
638 }
639 #endif
640 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
641 ip += 2;
642 ref += 2;
643 }
644 if ((ip < matchlimit) && (*ref == *ip))
645 ip++;
646 _endCount:
647
648 /* Encode MatchLength */
649 len = (ip - anchor);
650 /* Check output limit */
651 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
652 return (0);
653 if (len >= (int)ML_MASK) {
654 *token += ML_MASK;
655 len -= ML_MASK;
656 for (; len > 509; len -= 510) {
657 *op++ = 255;
658 *op++ = 255;
659 }
660 if (len > 254) {
661 len -= 255;
662 *op++ = 255;
663 }
664 *op++ = (BYTE)len;
665 } else
666 *token += len;
667
668 /* Test end of chunk */
669 if (ip > mflimit) {
670 anchor = ip;
671 break;
672 }
673 /* Fill table */
674 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
675
676 /* Test next position */
677 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
678 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
679 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
680 token = op++;
681 *token = 0;
682 goto _next_match;
683 }
684 /* Prepare next loop */
685 anchor = ip++;
686 forwardH = LZ4_HASH_VALUE(ip);
687 }
688
689 _last_literals:
690 /* Encode Last Literals */
691 {
692 int lastRun = iend - anchor;
693 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
694 oend)
695 return (0);
696 if (lastRun >= (int)RUN_MASK) {
697 *op++ = (RUN_MASK << ML_BITS);
698 lastRun -= RUN_MASK;
699 for (; lastRun > 254; lastRun -= 255) {
700 *op++ = 255;
701 }
702 *op++ = (BYTE)lastRun;
703 } else
704 *op++ = (lastRun << ML_BITS);
705 (void) memcpy(op, anchor, iend - anchor);
706 op += iend - anchor;
707 }
708
709 /* End */
710 return (int)(((char *)op) - dest);
711 }
712
713
714
715 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
716 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
717 #define HASHLOG64K (HASH_LOG + 1)
718 #define HASH64KTABLESIZE (1U << HASHLOG64K)
719 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
720 HASHLOG64K))
721 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p))
722
723 /*ARGSUSED*/
724 static int
725 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
726 int osize)
727 {
728 #if HEAPMODE
729 struct refTables *srt = (struct refTables *)ctx;
730 U16 *HashTable = (U16 *) (srt->hashTable);
731 #else
732 U16 HashTable[HASH64KTABLESIZE] = { 0 };
733 #endif
734
735 const BYTE *ip = (BYTE *) source;
736 const BYTE *anchor = ip;
737 const BYTE *const base = ip;
738 const BYTE *const iend = ip + isize;
739 const BYTE *const oend = (BYTE *) dest + osize;
740 const BYTE *const mflimit = iend - MFLIMIT;
741 #define matchlimit (iend - LASTLITERALS)
742
743 BYTE *op = (BYTE *) dest;
744
745 int len, length;
746 const int skipStrength = SKIPSTRENGTH;
747 U32 forwardH;
748
749 /* Init */
750 if (isize < MINLENGTH)
751 goto _last_literals;
752
753 /* First Byte */
754 ip++;
755 forwardH = LZ4_HASH64K_VALUE(ip);
756
757 /* Main Loop */
758 for (;;) {
759 int findMatchAttempts = (1U << skipStrength) + 3;
760 const BYTE *forwardIp = ip;
761 const BYTE *ref;
762 BYTE *token;
763
764 /* Find a match */
765 do {
766 U32 h = forwardH;
767 int step = findMatchAttempts++ >> skipStrength;
768 ip = forwardIp;
769 forwardIp = ip + step;
770
771 if (forwardIp > mflimit) {
772 goto _last_literals;
773 }
774
775 forwardH = LZ4_HASH64K_VALUE(forwardIp);
776 ref = base + HashTable[h];
777 HashTable[h] = ip - base;
778
779 } while (A32(ref) != A32(ip));
780
781 /* Catch up */
782 while ((ip > anchor) && (ref > (BYTE *) source) &&
783 (ip[-1] == ref[-1])) {
784 ip--;
785 ref--;
786 }
787
788 /* Encode Literal length */
789 length = ip - anchor;
790 token = op++;
791
792 /* Check output limit */
793 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
794 (length >> 8) > oend)
795 return (0);
796
797 if (length >= (int)RUN_MASK) {
798 *token = (RUN_MASK << ML_BITS);
799 len = length - RUN_MASK;
800 for (; len > 254; len -= 255)
801 *op++ = 255;
802 *op++ = (BYTE)len;
803 } else
804 *token = (length << ML_BITS);
805
806 /* Copy Literals */
807 LZ4_BLINDCOPY(anchor, op, length);
808
809 _next_match:
810 /* Encode Offset */
811 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
812
813 /* Start Counting */
814 ip += MINMATCH;
815 ref += MINMATCH; /* MinMatch verified */
816 anchor = ip;
817 while (ip < matchlimit - (STEPSIZE - 1)) {
818 UARCH diff = AARCH(ref) ^ AARCH(ip);
819 if (!diff) {
820 ip += STEPSIZE;
821 ref += STEPSIZE;
822 continue;
823 }
824 ip += LZ4_NbCommonBytes(diff);
825 goto _endCount;
826 }
827 #if LZ4_ARCH64
828 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
829 ip += 4;
830 ref += 4;
831 }
832 #endif
833 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
834 ip += 2;
835 ref += 2;
836 }
837 if ((ip < matchlimit) && (*ref == *ip))
838 ip++;
839 _endCount:
840
841 /* Encode MatchLength */
842 len = (ip - anchor);
843 /* Check output limit */
844 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
845 return (0);
846 if (len >= (int)ML_MASK) {
847 *token += ML_MASK;
848 len -= ML_MASK;
849 for (; len > 509; len -= 510) {
850 *op++ = 255;
851 *op++ = 255;
852 }
853 if (len > 254) {
854 len -= 255;
855 *op++ = 255;
856 }
857 *op++ = (BYTE)len;
858 } else
859 *token += len;
860
861 /* Test end of chunk */
862 if (ip > mflimit) {
863 anchor = ip;
864 break;
865 }
866 /* Fill table */
867 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
868
869 /* Test next position */
870 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
871 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
872 if (A32(ref) == A32(ip)) {
873 token = op++;
874 *token = 0;
875 goto _next_match;
876 }
877 /* Prepare next loop */
878 anchor = ip++;
879 forwardH = LZ4_HASH64K_VALUE(ip);
880 }
881
882 _last_literals:
883 /* Encode Last Literals */
884 {
885 int lastRun = iend - anchor;
886 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
887 oend)
888 return (0);
889 if (lastRun >= (int)RUN_MASK) {
890 *op++ = (RUN_MASK << ML_BITS);
891 lastRun -= RUN_MASK;
892 for (; lastRun > 254; lastRun -= 255)
893 *op++ = 255;
894 *op++ = (BYTE)lastRun;
895 } else
896 *op++ = (lastRun << ML_BITS);
897 (void) memcpy(op, anchor, iend - anchor);
898 op += iend - anchor;
899 }
900
901 /* End */
902 return (int)(((char *)op) - dest);
903 }
904
905 static int
906 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
907 {
908 #if HEAPMODE
909 void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP);
910 int result;
911
912 /*
913 * out of kernel memory, gently fall through - this will disable
914 * compression in zio_compress_data
915 */
916 if (ctx == NULL)
917 return (0);
918
919 if (isize < LZ4_64KLIMIT)
920 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
921 else
922 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
923
924 kmem_free(ctx, sizeof (struct refTables));
925 return (result);
926 #else
927 if (isize < (int)LZ4_64KLIMIT)
928 return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
929 return (LZ4_compressCtx(NULL, source, dest, isize, osize));
930 #endif
931 }
932
933 /* Decompression functions */
934
935 /*
936 * Note: The decoding functions real_LZ4_uncompress() and
937 * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
938 * attack type. They will never write nor read outside of the provided
939 * output buffers. LZ4_uncompress_unknownOutputSize() also insures that
940 * it will never read outside of the input buffer. A corrupted input
941 * will produce an error result, a negative int, indicating the position
942 * of the error within input stream.
943 */
944
945 static int
946 real_LZ4_uncompress(const char *source, char *dest, int osize)
947 {
948 /* Local Variables */
949 const BYTE *restrict ip = (const BYTE *) source;
950 const BYTE *ref;
951
952 BYTE *op = (BYTE *) dest;
953 BYTE *const oend = op + osize;
954 BYTE *cpy;
955
956 unsigned token;
957
958 size_t length;
959 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
960 #if LZ4_ARCH64
961 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
962 #endif
963
964 /* Main Loop */
965 for (;;) {
966 /* get runlength */
967 token = *ip++;
968 if ((length = (token >> ML_BITS)) == RUN_MASK) {
969 size_t len;
970 for (; (len = *ip++) == 255; length += 255) {
971 }
972 length += len;
973 }
974 /* copy literals */
975 cpy = op + length;
976 if unlikely(cpy > oend - COPYLENGTH) {
977 if (cpy != oend)
978 /* Error: we must necessarily stand at EOF */
979 goto _output_error;
980 (void) memcpy(op, ip, length);
981 ip += length;
982 break; /* EOF */
983 }
984 LZ4_WILDCOPY(ip, op, cpy);
985 ip -= (op - cpy);
986 op = cpy;
987
988 /* get offset */
989 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
990 ip += 2;
991 if unlikely(ref < (BYTE * const) dest)
992 /*
993 * Error: offset create reference outside destination
994 * buffer
995 */
996 goto _output_error;
997
998 /* get matchlength */
999 if ((length = (token & ML_MASK)) == ML_MASK) {
1000 for (; *ip == 255; length += 255) {
1001 ip++;
1002 }
1003 length += *ip++;
1004 }
1005 /* copy repeated sequence */
1006 if unlikely(op - ref < STEPSIZE) {
1007 #if LZ4_ARCH64
1008 size_t dec64 = dec64table[op-ref];
1009 #else
1010 const int dec64 = 0;
1011 #endif
1012 op[0] = ref[0];
1013 op[1] = ref[1];
1014 op[2] = ref[2];
1015 op[3] = ref[3];
1016 op += 4;
1017 ref += 4;
1018 ref -= dec32table[op-ref];
1019 A32(op) = A32(ref);
1020 op += STEPSIZE - 4;
1021 ref -= dec64;
1022 } else {
1023 LZ4_COPYSTEP(ref, op);
1024 }
1025 cpy = op + length - (STEPSIZE - 4);
1026 if (cpy > oend - COPYLENGTH) {
1027 if (cpy > oend)
1028 /*
1029 * Error: request to write beyond destination
1030 * buffer
1031 */
1032 goto _output_error;
1033 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1034 while (op < cpy)
1035 *op++ = *ref++;
1036 op = cpy;
1037 if (op == oend)
1038 /*
1039 * Check EOF (should never happen, since last
1040 * 5 bytes are supposed to be literals)
1041 */
1042 goto _output_error;
1043 continue;
1044 }
1045 LZ4_SECURECOPY(ref, op, cpy);
1046 op = cpy; /* correction */
1047 }
1048
1049 /* end of decoding */
1050 return (int)(((char *)ip) - source);
1051
1052 /* write overflow error detected */
1053 _output_error:
1054 return (int)(-(((char *)ip) - source));
1055 }
1056
1057 static int
1058 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
1059 int maxOutputSize)
1060 {
1061 /* Local Variables */
1062 const BYTE *restrict ip = (const BYTE *) source;
1063 const BYTE *const iend = ip + isize;
1064 const BYTE *ref;
1065
1066 BYTE *op = (BYTE *) dest;
1067 BYTE *const oend = op + maxOutputSize;
1068 BYTE *cpy;
1069
1070 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
1071 #if LZ4_ARCH64
1072 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
1073 #endif
1074
1075 /* Main Loop */
1076 while (ip < iend) {
1077 unsigned token;
1078 size_t length;
1079
1080 /* get runlength */
1081 token = *ip++;
1082 if ((length = (token >> ML_BITS)) == RUN_MASK) {
1083 int s = 255;
1084 while ((ip < iend) && (s == 255)) {
1085 s = *ip++;
1086 length += s;
1087 }
1088 }
1089 /* copy literals */
1090 cpy = op + length;
1091 if ((cpy > oend - COPYLENGTH) ||
1092 (ip + length > iend - COPYLENGTH)) {
1093 if (cpy > oend)
1094 /* Error: writes beyond output buffer */
1095 goto _output_error;
1096 if (ip + length != iend)
1097 /*
1098 * Error: LZ4 format requires to consume all
1099 * input at this stage
1100 */
1101 goto _output_error;
1102 (void) memcpy(op, ip, length);
1103 op += length;
1104 /* Necessarily EOF, due to parsing restrictions */
1105 break;
1106 }
1107 LZ4_WILDCOPY(ip, op, cpy);
1108 ip -= (op - cpy);
1109 op = cpy;
1110
1111 /* get offset */
1112 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
1113 ip += 2;
1114 if (ref < (BYTE * const) dest)
1115 /*
1116 * Error: offset creates reference outside of
1117 * destination buffer
1118 */
1119 goto _output_error;
1120
1121 /* get matchlength */
1122 if ((length = (token & ML_MASK)) == ML_MASK) {
1123 while (ip < iend) {
1124 int s = *ip++;
1125 length += s;
1126 if (s == 255)
1127 continue;
1128 break;
1129 }
1130 }
1131 /* copy repeated sequence */
1132 if unlikely(op - ref < STEPSIZE) {
1133 #if LZ4_ARCH64
1134 size_t dec64 = dec64table[op-ref];
1135 #else
1136 const int dec64 = 0;
1137 #endif
1138 op[0] = ref[0];
1139 op[1] = ref[1];
1140 op[2] = ref[2];
1141 op[3] = ref[3];
1142 op += 4;
1143 ref += 4;
1144 ref -= dec32table[op-ref];
1145 A32(op) = A32(ref);
1146 op += STEPSIZE - 4;
1147 ref -= dec64;
1148 } else {
1149 LZ4_COPYSTEP(ref, op);
1150 }
1151 cpy = op + length - (STEPSIZE - 4);
1152 if (cpy > oend - COPYLENGTH) {
1153 if (cpy > oend)
1154 /*
1155 * Error: request to write outside of
1156 * destination buffer
1157 */
1158 goto _output_error;
1159 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1160 while (op < cpy)
1161 *op++ = *ref++;
1162 op = cpy;
1163 if (op == oend)
1164 /*
1165 * Check EOF (should never happen, since
1166 * last 5 bytes are supposed to be literals)
1167 */
1168 goto _output_error;
1169 continue;
1170 }
1171 LZ4_SECURECOPY(ref, op, cpy);
1172 op = cpy; /* correction */
1173 }
1174
1175 /* end of decoding */
1176 return (int)(((char *)op) - dest);
1177
1178 /* write overflow error detected */
1179 _output_error:
1180 return (int)(-(((char *)ip) - source));
1181 }