1 /* 2 * LZ4 - Fast LZ compression algorithm 3 * Header File 4 * Copyright (C) 2011-2013, Yann Collet. 5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * You can contact the author at : 31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 32 * - LZ4 source repository : http://code.google.com/p/lz4/ 33 */ 34 35 #include <sys/zfs_context.h> 36 #include <sys/lz4hc.h> 37 38 static int real_LZ4_compress(const char *source, char *dest, int isize, 39 int osize); 40 static int real_LZ4_uncompress(const char *source, char *dest, int osize); 41 static int LZ4_compressBound(int isize); 42 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, 43 int isize, int maxOutputSize); 44 static int LZ4_compressCtx(void *ctx, const char *source, char *dest, 45 int isize, int osize); 46 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, 47 int isize, int osize); 48 49 size_t 50 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) 51 { 52 uint32_t bufsiz; 53 char *dest = d_start; 54 55 ASSERT(d_len >= sizeof (bufsiz)); 56 57 /* 58 * Switch between high-compression and fast-compression. Decompression 59 * is performed by the same routine. 60 */ 61 if (n == 1) 62 bufsiz = zfs_LZ4_compressHC(s_start, &dest[sizeof (bufsiz)], 63 s_len, d_len - sizeof (bufsiz)); 64 else 65 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], 66 s_len, d_len - sizeof (bufsiz)); 67 68 /* Signal an error if the compression routine returned zero. */ 69 if (bufsiz == 0) 70 return (s_len); 71 72 /* 73 * Encode the compresed buffer size at the start. We'll need this in 74 * decompression to counter the effects of padding which might be 75 * added to the compressed buffer and which, if unhandled, would 76 * confuse the hell out of our decompression function. 77 */ 78 *(uint32_t *)dest = BE_32(bufsiz); 79 80 return (bufsiz + sizeof (bufsiz)); 81 } 82 83 /*ARGSUSED*/ 84 int 85 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) 86 { 87 const char *src = s_start; 88 uint32_t bufsiz = BE_IN32(src); 89 90 /* invalid compressed buffer size encoded at start */ 91 if (bufsiz + sizeof (bufsiz) > s_len) 92 return (1); 93 94 /* 95 * Returns 0 on success (decompression function returned non-negative) 96 * and non-zero on failure (decompression function returned negative. 97 */ 98 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], 99 d_start, bufsiz, d_len) < 0); 100 } 101 102 /* 103 * LZ4 API Description: 104 * 105 * Simple Functions: 106 * real_LZ4_compress() : 107 * isize : is the input size. Max supported value is ~1.9GB 108 * return : the number of bytes written in buffer dest 109 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). 110 * note : destination buffer must be already allocated. 111 * destination buffer must be sized to handle worst cases 112 * situations (input data not compressible) worst case size 113 * evaluation is provided by function LZ4_compressBound(). 114 * 115 * real_LZ4_uncompress() : 116 * osize : is the output size, therefore the original size 117 * return : the number of bytes read in the source buffer. 118 * If the source stream is malformed, the function will stop 119 * decoding and return a negative result, indicating the byte 120 * position of the faulty instruction. This function never 121 * writes beyond dest + osize, and is therefore protected 122 * against malicious data packets. 123 * note : destination buffer must be already allocated 124 * 125 * Advanced Functions 126 * 127 * LZ4_compressBound() : 128 * Provides the maximum size that LZ4 may output in a "worst case" 129 * scenario (input data not compressible) primarily useful for memory 130 * allocation of output buffer. 131 * 132 * isize : is the input size. Max supported value is ~1.9GB 133 * return : maximum output size in a "worst case" scenario 134 * note : this function is limited by "int" range (2^31-1) 135 * 136 * LZ4_uncompress_unknownOutputSize() : 137 * isize : is the input size, therefore the compressed size 138 * maxOutputSize : is the size of the destination buffer (which must be 139 * already allocated) 140 * return : the number of bytes decoded in the destination buffer 141 * (necessarily <= maxOutputSize). If the source stream is 142 * malformed, the function will stop decoding and return a 143 * negative result, indicating the byte position of the faulty 144 * instruction. This function never writes beyond dest + 145 * maxOutputSize, and is therefore protected against malicious 146 * data packets. 147 * note : Destination buffer must be already allocated. 148 * This version is slightly slower than real_LZ4_uncompress() 149 * 150 * LZ4_compressCtx() : 151 * This function explicitly handles the CTX memory structure. 152 * 153 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 154 * by the caller (either on the stack or using kmem_zalloc). Passing NULL 155 * isn't valid. 156 * 157 * LZ4_compress64kCtx() : 158 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). 159 * isize *Must* be <64KB, otherwise the output will be corrupted. 160 * 161 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 162 * by the caller (either on the stack or using kmem_zalloc). Passing NULL 163 * isn't valid. 164 */ 165 166 /* 167 * Tuning parameters 168 */ 169 170 /* 171 * COMPRESSIONLEVEL: Increasing this value improves compression ratio 172 * Lowering this value reduces memory usage. Reduced memory usage 173 * typically improves speed, due to cache effect (ex: L1 32KB for Intel, 174 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes 175 * (examples : 12 -> 16KB ; 17 -> 512KB) 176 */ 177 #define COMPRESSIONLEVEL 12 178 179 /* 180 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the 181 * algorithm skip faster data segments considered "incompressible". 182 * This may decrease compression ratio dramatically, but will be 183 * faster on incompressible data. Increasing this value will make 184 * the algorithm search more before declaring a segment "incompressible". 185 * This could improve compression a bit, but will be slower on 186 * incompressible data. The default value (6) is recommended. 187 */ 188 #define NOTCOMPRESSIBLE_CONFIRMATION 6 189 190 /* 191 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to 192 * performance for big endian cpu, but the resulting compressed stream 193 * will be incompatible with little-endian CPU. You can set this option 194 * to 1 in situations where data will stay within closed environment. 195 * This option is useless on Little_Endian CPU (such as x86). 196 */ 197 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ 198 199 /* 200 * CPU Feature Detection 201 */ 202 203 /* 32 or 64 bits ? */ 204 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \ 205 defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \ 206 defined(__LP64__) || defined(_LP64)) 207 #define LZ4_ARCH64 1 208 /* 209 * Illumos: On amd64 we have 20k of stack and 24k on sun4u and sun4v, so we 210 * can spend 16k on the algorithm 211 */ 212 #define STACKLIMIT 12 213 #else 214 #define LZ4_ARCH64 0 215 /* 216 * Illumos: On i386 we only have 12k of stack, so in order to maintain the 217 * same COMPRESSIONLEVEL we have to use heap allocation. Performance will 218 * suck, but alas, it's ZFS on 32-bit we're talking about, so... 219 */ 220 #define STACKLIMIT 11 221 #endif 222 223 /* 224 * Little Endian or Big Endian? 225 * Note: overwrite the below #define if you know your architecture endianess. 226 */ 227 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \ 228 defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \ 229 defined(__PPC) || defined(PPC) || defined(__powerpc__) || \ 230 defined(__powerpc) || defined(powerpc) || \ 231 ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))) 232 #define LZ4_BIG_ENDIAN 1 233 #else 234 /* 235 * Little Endian assumed. PDP Endian and other very rare endian format 236 * are unsupported. 237 */ 238 #endif 239 240 /* 241 * Unaligned memory access is automatically enabled for "common" CPU, 242 * such as x86. For others CPU, the compiler will be more cautious, and 243 * insert extra code to ensure aligned access is respected. If you know 244 * your target CPU supports unaligned memory access, you may want to 245 * force this option manually to improve performance 246 */ 247 #if defined(__ARM_FEATURE_UNALIGNED) 248 #define LZ4_FORCE_UNALIGNED_ACCESS 1 249 #endif 250 251 /* 252 * Illumos: we can't use GCC's __builtin_ctz family of builtins in the 253 * kernel 254 */ 255 #define LZ4_FORCE_SW_BITCOUNT 256 257 /* 258 * Compiler Options 259 */ 260 #if __STDC_VERSION__ >= 199901L /* C99 */ 261 /* "restrict" is a known keyword */ 262 #else 263 /* Disable restrict */ 264 #define restrict 265 #endif 266 267 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 268 269 #ifdef _MSC_VER 270 /* Visual Studio */ 271 /* Visual is not C99, but supports some kind of inline */ 272 #define inline __forceinline 273 #if LZ4_ARCH64 274 /* For Visual 2005 */ 275 #pragma intrinsic(_BitScanForward64) 276 #pragma intrinsic(_BitScanReverse64) 277 #else /* !LZ4_ARCH64 */ 278 /* For Visual 2005 */ 279 #pragma intrinsic(_BitScanForward) 280 #pragma intrinsic(_BitScanReverse) 281 #endif /* !LZ4_ARCH64 */ 282 #endif /* _MSC_VER */ 283 284 #ifdef _MSC_VER 285 #define lz4_bswap16(x) _byteswap_ushort(x) 286 #else /* !_MSC_VER */ 287 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ 288 (((x) & 0xffu) << 8))) 289 #endif /* !_MSC_VER */ 290 291 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) 292 #define expect(expr, value) (__builtin_expect((expr), (value))) 293 #else 294 #define expect(expr, value) (expr) 295 #endif 296 297 #define likely(expr) expect((expr) != 0, 1) 298 #define unlikely(expr) expect((expr) != 0, 0) 299 300 /* Basic types */ 301 #if defined(_MSC_VER) 302 /* Visual Studio does not support 'stdint' natively */ 303 #define BYTE unsigned __int8 304 #define U16 unsigned __int16 305 #define U32 unsigned __int32 306 #define S32 __int32 307 #define U64 unsigned __int64 308 #else /* !defined(_MSC_VER) */ 309 #define BYTE uint8_t 310 #define U16 uint16_t 311 #define U32 uint32_t 312 #define S32 int32_t 313 #define U64 uint64_t 314 #endif /* !defined(_MSC_VER) */ 315 316 #ifndef LZ4_FORCE_UNALIGNED_ACCESS 317 #pragma pack(1) 318 #endif 319 320 typedef struct _U16_S { 321 U16 v; 322 } U16_S; 323 typedef struct _U32_S { 324 U32 v; 325 } U32_S; 326 typedef struct _U64_S { 327 U64 v; 328 } U64_S; 329 330 #ifndef LZ4_FORCE_UNALIGNED_ACCESS 331 #pragma pack() 332 #endif 333 334 #define A64(x) (((U64_S *)(x))->v) 335 #define A32(x) (((U32_S *)(x))->v) 336 #define A16(x) (((U16_S *)(x))->v) 337 338 /* 339 * Constants 340 */ 341 #define MINMATCH 4 342 343 #define HASH_LOG COMPRESSIONLEVEL 344 #define HASHTABLESIZE (1 << HASH_LOG) 345 #define HASH_MASK (HASHTABLESIZE - 1) 346 347 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ 348 NOTCOMPRESSIBLE_CONFIRMATION : 2) 349 350 /* 351 * Defines if memory is allocated into the stack (local variable), 352 * or into the heap (kmem_alloc()). 353 */ 354 #define HEAPMODE (HASH_LOG > STACKLIMIT) 355 #define COPYLENGTH 8 356 #define LASTLITERALS 5 357 #define MFLIMIT (COPYLENGTH + MINMATCH) 358 #define MINLENGTH (MFLIMIT + 1) 359 360 #define MAXD_LOG 16 361 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 362 363 #define ML_BITS 4 364 #define ML_MASK ((1U<<ML_BITS)-1) 365 #define RUN_BITS (8-ML_BITS) 366 #define RUN_MASK ((1U<<RUN_BITS)-1) 367 368 369 /* 370 * Architecture-specific macros 371 */ 372 #if LZ4_ARCH64 373 #define STEPSIZE 8 374 #define UARCH U64 375 #define AARCH A64 376 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; 377 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) 378 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) 379 #define HTYPE U32 380 #define INITBASE(base) const BYTE* const base = ip 381 #else /* !LZ4_ARCH64 */ 382 #define STEPSIZE 4 383 #define UARCH U32 384 #define AARCH A32 385 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; 386 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); 387 #define LZ4_SECURECOPY LZ4_WILDCOPY 388 #define HTYPE const BYTE * 389 #define INITBASE(base) const int base = 0 390 #endif /* !LZ4_ARCH64 */ 391 392 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 393 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ 394 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } 395 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ 396 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } 397 #else 398 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } 399 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } 400 #endif 401 402 403 /* Local structures */ 404 struct refTables { 405 HTYPE hashTable[HASHTABLESIZE]; 406 }; 407 408 409 /* Macros */ 410 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \ 411 HASH_LOG)) 412 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) 413 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); 414 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ 415 d = e; } 416 417 418 /* Private functions */ 419 #if LZ4_ARCH64 420 421 static inline int 422 LZ4_NbCommonBytes(register U64 val) 423 { 424 #if defined(LZ4_BIG_ENDIAN) 425 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 426 unsigned long r = 0; 427 _BitScanReverse64(&r, val); 428 return (int)(r >> 3); 429 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 430 !defined(LZ4_FORCE_SW_BITCOUNT) 431 return (__builtin_clzll(val) >> 3); 432 #else 433 int r; 434 if (!(val >> 32)) { 435 r = 4; 436 } else { 437 r = 0; 438 val >>= 32; 439 } 440 if (!(val >> 16)) { 441 r += 2; 442 val >>= 8; 443 } else { 444 val >>= 24; 445 } 446 r += (!val); 447 return (r); 448 #endif 449 #else 450 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 451 unsigned long r = 0; 452 _BitScanForward64(&r, val); 453 return (int)(r >> 3); 454 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 455 !defined(LZ4_FORCE_SW_BITCOUNT) 456 return (__builtin_ctzll(val) >> 3); 457 #else 458 static const int DeBruijnBytePos[64] = 459 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 460 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 461 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 462 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 463 }; 464 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> 465 58]; 466 #endif 467 #endif 468 } 469 470 #else 471 472 static inline int 473 LZ4_NbCommonBytes(register U32 val) 474 { 475 #if defined(LZ4_BIG_ENDIAN) 476 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 477 unsigned long r = 0; 478 _BitScanReverse(&r, val); 479 return (int)(r >> 3); 480 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 481 !defined(LZ4_FORCE_SW_BITCOUNT) 482 return (__builtin_clz(val) >> 3); 483 #else 484 int r; 485 if (!(val >> 16)) { 486 r = 2; 487 val >>= 8; 488 } else { 489 r = 0; 490 val >>= 24; 491 } 492 r += (!val); 493 return (r); 494 #endif 495 #else 496 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 497 unsigned long r = 0; 498 _BitScanForward(&r, val); 499 return (int)(r >> 3); 500 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 501 !defined(LZ4_FORCE_SW_BITCOUNT) 502 return (__builtin_ctz(val) >> 3); 503 #else 504 static const int DeBruijnBytePos[32] = { 505 0, 0, 3, 0, 3, 1, 3, 0, 506 3, 2, 2, 1, 3, 2, 0, 1, 507 3, 3, 1, 2, 2, 2, 2, 0, 508 3, 1, 2, 0, 1, 0, 1, 1 509 }; 510 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 511 27]; 512 #endif 513 #endif 514 } 515 516 #endif 517 518 /* Public functions */ 519 520 static int 521 LZ4_compressBound(int isize) 522 { 523 return (isize + (isize / 255) + 16); 524 } 525 526 /* Compression functions */ 527 528 /*ARGSUSED*/ 529 static int 530 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, 531 int osize) 532 { 533 #if HEAPMODE 534 struct refTables *srt = (struct refTables *)ctx; 535 HTYPE *HashTable = (HTYPE *) (srt->hashTable); 536 #else 537 HTYPE HashTable[HASHTABLESIZE] = { 0 }; 538 #endif 539 540 const BYTE *ip = (BYTE *) source; 541 INITBASE(base); 542 const BYTE *anchor = ip; 543 const BYTE *const iend = ip + isize; 544 const BYTE *const oend = (BYTE *) dest + osize; 545 const BYTE *const mflimit = iend - MFLIMIT; 546 #define matchlimit (iend - LASTLITERALS) 547 548 BYTE *op = (BYTE *) dest; 549 550 int len, length; 551 const int skipStrength = SKIPSTRENGTH; 552 U32 forwardH; 553 554 555 /* Init */ 556 if (isize < MINLENGTH) 557 goto _last_literals; 558 559 /* First Byte */ 560 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 561 ip++; 562 forwardH = LZ4_HASH_VALUE(ip); 563 564 /* Main Loop */ 565 for (;;) { 566 int findMatchAttempts = (1U << skipStrength) + 3; 567 const BYTE *forwardIp = ip; 568 const BYTE *ref; 569 BYTE *token; 570 571 /* Find a match */ 572 do { 573 U32 h = forwardH; 574 int step = findMatchAttempts++ >> skipStrength; 575 ip = forwardIp; 576 forwardIp = ip + step; 577 578 if unlikely(forwardIp > mflimit) { 579 goto _last_literals; 580 } 581 582 forwardH = LZ4_HASH_VALUE(forwardIp); 583 ref = base + HashTable[h]; 584 HashTable[h] = ip - base; 585 586 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); 587 588 /* Catch up */ 589 while ((ip > anchor) && (ref > (BYTE *) source) && 590 unlikely(ip[-1] == ref[-1])) { 591 ip--; 592 ref--; 593 } 594 595 /* Encode Literal length */ 596 length = ip - anchor; 597 token = op++; 598 599 /* Check output limit */ 600 if unlikely(op + length + (2 + 1 + LASTLITERALS) + 601 (length >> 8) > oend) 602 return (0); 603 604 if (length >= (int)RUN_MASK) { 605 *token = (RUN_MASK << ML_BITS); 606 len = length - RUN_MASK; 607 for (; len > 254; len -= 255) 608 *op++ = 255; 609 *op++ = (BYTE)len; 610 } else 611 *token = (length << ML_BITS); 612 613 /* Copy Literals */ 614 LZ4_BLINDCOPY(anchor, op, length); 615 616 _next_match: 617 /* Encode Offset */ 618 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 619 620 /* Start Counting */ 621 ip += MINMATCH; 622 ref += MINMATCH; /* MinMatch verified */ 623 anchor = ip; 624 while likely(ip < matchlimit - (STEPSIZE - 1)) { 625 UARCH diff = AARCH(ref) ^ AARCH(ip); 626 if (!diff) { 627 ip += STEPSIZE; 628 ref += STEPSIZE; 629 continue; 630 } 631 ip += LZ4_NbCommonBytes(diff); 632 goto _endCount; 633 } 634 #if LZ4_ARCH64 635 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 636 ip += 4; 637 ref += 4; 638 } 639 #endif 640 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 641 ip += 2; 642 ref += 2; 643 } 644 if ((ip < matchlimit) && (*ref == *ip)) 645 ip++; 646 _endCount: 647 648 /* Encode MatchLength */ 649 len = (ip - anchor); 650 /* Check output limit */ 651 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) 652 return (0); 653 if (len >= (int)ML_MASK) { 654 *token += ML_MASK; 655 len -= ML_MASK; 656 for (; len > 509; len -= 510) { 657 *op++ = 255; 658 *op++ = 255; 659 } 660 if (len > 254) { 661 len -= 255; 662 *op++ = 255; 663 } 664 *op++ = (BYTE)len; 665 } else 666 *token += len; 667 668 /* Test end of chunk */ 669 if (ip > mflimit) { 670 anchor = ip; 671 break; 672 } 673 /* Fill table */ 674 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; 675 676 /* Test next position */ 677 ref = base + HashTable[LZ4_HASH_VALUE(ip)]; 678 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 679 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { 680 token = op++; 681 *token = 0; 682 goto _next_match; 683 } 684 /* Prepare next loop */ 685 anchor = ip++; 686 forwardH = LZ4_HASH_VALUE(ip); 687 } 688 689 _last_literals: 690 /* Encode Last Literals */ 691 { 692 int lastRun = iend - anchor; 693 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 694 oend) 695 return (0); 696 if (lastRun >= (int)RUN_MASK) { 697 *op++ = (RUN_MASK << ML_BITS); 698 lastRun -= RUN_MASK; 699 for (; lastRun > 254; lastRun -= 255) { 700 *op++ = 255; 701 } 702 *op++ = (BYTE)lastRun; 703 } else 704 *op++ = (lastRun << ML_BITS); 705 (void) memcpy(op, anchor, iend - anchor); 706 op += iend - anchor; 707 } 708 709 /* End */ 710 return (int)(((char *)op) - dest); 711 } 712 713 714 715 /* Note : this function is valid only if isize < LZ4_64KLIMIT */ 716 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) 717 #define HASHLOG64K (HASH_LOG + 1) 718 #define HASH64KTABLESIZE (1U << HASHLOG64K) 719 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ 720 HASHLOG64K)) 721 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) 722 723 /*ARGSUSED*/ 724 static int 725 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, 726 int osize) 727 { 728 #if HEAPMODE 729 struct refTables *srt = (struct refTables *)ctx; 730 U16 *HashTable = (U16 *) (srt->hashTable); 731 #else 732 U16 HashTable[HASH64KTABLESIZE] = { 0 }; 733 #endif 734 735 const BYTE *ip = (BYTE *) source; 736 const BYTE *anchor = ip; 737 const BYTE *const base = ip; 738 const BYTE *const iend = ip + isize; 739 const BYTE *const oend = (BYTE *) dest + osize; 740 const BYTE *const mflimit = iend - MFLIMIT; 741 #define matchlimit (iend - LASTLITERALS) 742 743 BYTE *op = (BYTE *) dest; 744 745 int len, length; 746 const int skipStrength = SKIPSTRENGTH; 747 U32 forwardH; 748 749 /* Init */ 750 if (isize < MINLENGTH) 751 goto _last_literals; 752 753 /* First Byte */ 754 ip++; 755 forwardH = LZ4_HASH64K_VALUE(ip); 756 757 /* Main Loop */ 758 for (;;) { 759 int findMatchAttempts = (1U << skipStrength) + 3; 760 const BYTE *forwardIp = ip; 761 const BYTE *ref; 762 BYTE *token; 763 764 /* Find a match */ 765 do { 766 U32 h = forwardH; 767 int step = findMatchAttempts++ >> skipStrength; 768 ip = forwardIp; 769 forwardIp = ip + step; 770 771 if (forwardIp > mflimit) { 772 goto _last_literals; 773 } 774 775 forwardH = LZ4_HASH64K_VALUE(forwardIp); 776 ref = base + HashTable[h]; 777 HashTable[h] = ip - base; 778 779 } while (A32(ref) != A32(ip)); 780 781 /* Catch up */ 782 while ((ip > anchor) && (ref > (BYTE *) source) && 783 (ip[-1] == ref[-1])) { 784 ip--; 785 ref--; 786 } 787 788 /* Encode Literal length */ 789 length = ip - anchor; 790 token = op++; 791 792 /* Check output limit */ 793 if unlikely(op + length + (2 + 1 + LASTLITERALS) + 794 (length >> 8) > oend) 795 return (0); 796 797 if (length >= (int)RUN_MASK) { 798 *token = (RUN_MASK << ML_BITS); 799 len = length - RUN_MASK; 800 for (; len > 254; len -= 255) 801 *op++ = 255; 802 *op++ = (BYTE)len; 803 } else 804 *token = (length << ML_BITS); 805 806 /* Copy Literals */ 807 LZ4_BLINDCOPY(anchor, op, length); 808 809 _next_match: 810 /* Encode Offset */ 811 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 812 813 /* Start Counting */ 814 ip += MINMATCH; 815 ref += MINMATCH; /* MinMatch verified */ 816 anchor = ip; 817 while (ip < matchlimit - (STEPSIZE - 1)) { 818 UARCH diff = AARCH(ref) ^ AARCH(ip); 819 if (!diff) { 820 ip += STEPSIZE; 821 ref += STEPSIZE; 822 continue; 823 } 824 ip += LZ4_NbCommonBytes(diff); 825 goto _endCount; 826 } 827 #if LZ4_ARCH64 828 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 829 ip += 4; 830 ref += 4; 831 } 832 #endif 833 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 834 ip += 2; 835 ref += 2; 836 } 837 if ((ip < matchlimit) && (*ref == *ip)) 838 ip++; 839 _endCount: 840 841 /* Encode MatchLength */ 842 len = (ip - anchor); 843 /* Check output limit */ 844 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) 845 return (0); 846 if (len >= (int)ML_MASK) { 847 *token += ML_MASK; 848 len -= ML_MASK; 849 for (; len > 509; len -= 510) { 850 *op++ = 255; 851 *op++ = 255; 852 } 853 if (len > 254) { 854 len -= 255; 855 *op++ = 255; 856 } 857 *op++ = (BYTE)len; 858 } else 859 *token += len; 860 861 /* Test end of chunk */ 862 if (ip > mflimit) { 863 anchor = ip; 864 break; 865 } 866 /* Fill table */ 867 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; 868 869 /* Test next position */ 870 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; 871 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; 872 if (A32(ref) == A32(ip)) { 873 token = op++; 874 *token = 0; 875 goto _next_match; 876 } 877 /* Prepare next loop */ 878 anchor = ip++; 879 forwardH = LZ4_HASH64K_VALUE(ip); 880 } 881 882 _last_literals: 883 /* Encode Last Literals */ 884 { 885 int lastRun = iend - anchor; 886 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 887 oend) 888 return (0); 889 if (lastRun >= (int)RUN_MASK) { 890 *op++ = (RUN_MASK << ML_BITS); 891 lastRun -= RUN_MASK; 892 for (; lastRun > 254; lastRun -= 255) 893 *op++ = 255; 894 *op++ = (BYTE)lastRun; 895 } else 896 *op++ = (lastRun << ML_BITS); 897 (void) memcpy(op, anchor, iend - anchor); 898 op += iend - anchor; 899 } 900 901 /* End */ 902 return (int)(((char *)op) - dest); 903 } 904 905 static int 906 real_LZ4_compress(const char *source, char *dest, int isize, int osize) 907 { 908 #if HEAPMODE 909 void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP); 910 int result; 911 912 /* 913 * out of kernel memory, gently fall through - this will disable 914 * compression in zio_compress_data 915 */ 916 if (ctx == NULL) 917 return (0); 918 919 if (isize < LZ4_64KLIMIT) 920 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); 921 else 922 result = LZ4_compressCtx(ctx, source, dest, isize, osize); 923 924 kmem_free(ctx, sizeof (struct refTables)); 925 return (result); 926 #else 927 if (isize < (int)LZ4_64KLIMIT) 928 return (LZ4_compress64kCtx(NULL, source, dest, isize, osize)); 929 return (LZ4_compressCtx(NULL, source, dest, isize, osize)); 930 #endif 931 } 932 933 /* Decompression functions */ 934 935 /* 936 * Note: The decoding functions real_LZ4_uncompress() and 937 * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow" 938 * attack type. They will never write nor read outside of the provided 939 * output buffers. LZ4_uncompress_unknownOutputSize() also insures that 940 * it will never read outside of the input buffer. A corrupted input 941 * will produce an error result, a negative int, indicating the position 942 * of the error within input stream. 943 */ 944 945 static int 946 real_LZ4_uncompress(const char *source, char *dest, int osize) 947 { 948 /* Local Variables */ 949 const BYTE *restrict ip = (const BYTE *) source; 950 const BYTE *ref; 951 952 BYTE *op = (BYTE *) dest; 953 BYTE *const oend = op + osize; 954 BYTE *cpy; 955 956 unsigned token; 957 958 size_t length; 959 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 960 #if LZ4_ARCH64 961 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 962 #endif 963 964 /* Main Loop */ 965 for (;;) { 966 /* get runlength */ 967 token = *ip++; 968 if ((length = (token >> ML_BITS)) == RUN_MASK) { 969 size_t len; 970 for (; (len = *ip++) == 255; length += 255) { 971 } 972 length += len; 973 } 974 /* copy literals */ 975 cpy = op + length; 976 if unlikely(cpy > oend - COPYLENGTH) { 977 if (cpy != oend) 978 /* Error: we must necessarily stand at EOF */ 979 goto _output_error; 980 (void) memcpy(op, ip, length); 981 ip += length; 982 break; /* EOF */ 983 } 984 LZ4_WILDCOPY(ip, op, cpy); 985 ip -= (op - cpy); 986 op = cpy; 987 988 /* get offset */ 989 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); 990 ip += 2; 991 if unlikely(ref < (BYTE * const) dest) 992 /* 993 * Error: offset create reference outside destination 994 * buffer 995 */ 996 goto _output_error; 997 998 /* get matchlength */ 999 if ((length = (token & ML_MASK)) == ML_MASK) { 1000 for (; *ip == 255; length += 255) { 1001 ip++; 1002 } 1003 length += *ip++; 1004 } 1005 /* copy repeated sequence */ 1006 if unlikely(op - ref < STEPSIZE) { 1007 #if LZ4_ARCH64 1008 size_t dec64 = dec64table[op-ref]; 1009 #else 1010 const int dec64 = 0; 1011 #endif 1012 op[0] = ref[0]; 1013 op[1] = ref[1]; 1014 op[2] = ref[2]; 1015 op[3] = ref[3]; 1016 op += 4; 1017 ref += 4; 1018 ref -= dec32table[op-ref]; 1019 A32(op) = A32(ref); 1020 op += STEPSIZE - 4; 1021 ref -= dec64; 1022 } else { 1023 LZ4_COPYSTEP(ref, op); 1024 } 1025 cpy = op + length - (STEPSIZE - 4); 1026 if (cpy > oend - COPYLENGTH) { 1027 if (cpy > oend) 1028 /* 1029 * Error: request to write beyond destination 1030 * buffer 1031 */ 1032 goto _output_error; 1033 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); 1034 while (op < cpy) 1035 *op++ = *ref++; 1036 op = cpy; 1037 if (op == oend) 1038 /* 1039 * Check EOF (should never happen, since last 1040 * 5 bytes are supposed to be literals) 1041 */ 1042 goto _output_error; 1043 continue; 1044 } 1045 LZ4_SECURECOPY(ref, op, cpy); 1046 op = cpy; /* correction */ 1047 } 1048 1049 /* end of decoding */ 1050 return (int)(((char *)ip) - source); 1051 1052 /* write overflow error detected */ 1053 _output_error: 1054 return (int)(-(((char *)ip) - source)); 1055 } 1056 1057 static int 1058 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize, 1059 int maxOutputSize) 1060 { 1061 /* Local Variables */ 1062 const BYTE *restrict ip = (const BYTE *) source; 1063 const BYTE *const iend = ip + isize; 1064 const BYTE *ref; 1065 1066 BYTE *op = (BYTE *) dest; 1067 BYTE *const oend = op + maxOutputSize; 1068 BYTE *cpy; 1069 1070 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 1071 #if LZ4_ARCH64 1072 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 1073 #endif 1074 1075 /* Main Loop */ 1076 while (ip < iend) { 1077 unsigned token; 1078 size_t length; 1079 1080 /* get runlength */ 1081 token = *ip++; 1082 if ((length = (token >> ML_BITS)) == RUN_MASK) { 1083 int s = 255; 1084 while ((ip < iend) && (s == 255)) { 1085 s = *ip++; 1086 length += s; 1087 } 1088 } 1089 /* copy literals */ 1090 cpy = op + length; 1091 if ((cpy > oend - COPYLENGTH) || 1092 (ip + length > iend - COPYLENGTH)) { 1093 if (cpy > oend) 1094 /* Error: writes beyond output buffer */ 1095 goto _output_error; 1096 if (ip + length != iend) 1097 /* 1098 * Error: LZ4 format requires to consume all 1099 * input at this stage 1100 */ 1101 goto _output_error; 1102 (void) memcpy(op, ip, length); 1103 op += length; 1104 /* Necessarily EOF, due to parsing restrictions */ 1105 break; 1106 } 1107 LZ4_WILDCOPY(ip, op, cpy); 1108 ip -= (op - cpy); 1109 op = cpy; 1110 1111 /* get offset */ 1112 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); 1113 ip += 2; 1114 if (ref < (BYTE * const) dest) 1115 /* 1116 * Error: offset creates reference outside of 1117 * destination buffer 1118 */ 1119 goto _output_error; 1120 1121 /* get matchlength */ 1122 if ((length = (token & ML_MASK)) == ML_MASK) { 1123 while (ip < iend) { 1124 int s = *ip++; 1125 length += s; 1126 if (s == 255) 1127 continue; 1128 break; 1129 } 1130 } 1131 /* copy repeated sequence */ 1132 if unlikely(op - ref < STEPSIZE) { 1133 #if LZ4_ARCH64 1134 size_t dec64 = dec64table[op-ref]; 1135 #else 1136 const int dec64 = 0; 1137 #endif 1138 op[0] = ref[0]; 1139 op[1] = ref[1]; 1140 op[2] = ref[2]; 1141 op[3] = ref[3]; 1142 op += 4; 1143 ref += 4; 1144 ref -= dec32table[op-ref]; 1145 A32(op) = A32(ref); 1146 op += STEPSIZE - 4; 1147 ref -= dec64; 1148 } else { 1149 LZ4_COPYSTEP(ref, op); 1150 } 1151 cpy = op + length - (STEPSIZE - 4); 1152 if (cpy > oend - COPYLENGTH) { 1153 if (cpy > oend) 1154 /* 1155 * Error: request to write outside of 1156 * destination buffer 1157 */ 1158 goto _output_error; 1159 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); 1160 while (op < cpy) 1161 *op++ = *ref++; 1162 op = cpy; 1163 if (op == oend) 1164 /* 1165 * Check EOF (should never happen, since 1166 * last 5 bytes are supposed to be literals) 1167 */ 1168 goto _output_error; 1169 continue; 1170 } 1171 LZ4_SECURECOPY(ref, op, cpy); 1172 op = cpy; /* correction */ 1173 } 1174 1175 /* end of decoding */ 1176 return (int)(((char *)op) - dest); 1177 1178 /* write overflow error detected */ 1179 _output_error: 1180 return (int)(-(((char *)ip) - source)); 1181 }