1 /* 2 * LZ4 - Fast LZ compression algorithm 3 * Header File 4 * Copyright (C) 2011-2013, Yann Collet. 5 * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions are 9 * met: 10 * 11 * * Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * * Redistributions in binary form must reproduce the above 14 * copyright notice, this list of conditions and the following disclaimer 15 * in the documentation and/or other materials provided with the 16 * distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 * 30 * You can contact the author at : 31 * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html 32 * - LZ4 source repository : http://code.google.com/p/lz4/ 33 */ 34 35 #include <sys/zfs_context.h> 36 37 static int real_LZ4_compress(const char *source, char *dest, int isize, 38 int osize); 39 static int real_LZ4_uncompress(const char *source, char *dest, int osize); 40 static int LZ4_compressBound(int isize); 41 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest, 42 int isize, int maxOutputSize); 43 static int LZ4_compressCtx(void *ctx, const char *source, char *dest, 44 int isize, int osize); 45 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest, 46 int isize, int osize); 47 48 /*ARGSUSED*/ 49 size_t 50 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) 51 { 52 uint32_t bufsiz; 53 char *dest = d_start; 54 55 ASSERT(d_len >= sizeof (bufsiz)); 56 57 bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len, 58 d_len - sizeof (bufsiz)); 59 60 /* Signal an error if the compression routine returned zero. */ 61 if (bufsiz == 0) 62 return (s_len); 63 64 /* 65 * Encode the compresed buffer size at the start. We'll need this in 66 * decompression to counter the effects of padding which might be 67 * added to the compressed buffer and which, if unhandled, would 68 * confuse the hell out of our decompression function. 69 */ 70 *(uint32_t *)dest = BE_32(bufsiz); 71 72 return (bufsiz + sizeof (bufsiz)); 73 } 74 75 /*ARGSUSED*/ 76 int 77 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n) 78 { 79 const char *src = s_start; 80 uint32_t bufsiz = BE_IN32(src); 81 82 /* invalid compressed buffer size encoded at start */ 83 if (bufsiz + sizeof (bufsiz) > s_len) 84 return (1); 85 86 /* 87 * Returns 0 on success (decompression function returned non-negative) 88 * and non-zero on failure (decompression function returned negative. 89 */ 90 return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)], 91 d_start, bufsiz, d_len) < 0); 92 } 93 94 /* 95 * LZ4 API Description: 96 * 97 * Simple Functions: 98 * real_LZ4_compress() : 99 * isize : is the input size. Max supported value is ~1.9GB 100 * return : the number of bytes written in buffer dest 101 * or 0 if the compression fails (if LZ4_COMPRESSMIN is set). 102 * note : destination buffer must be already allocated. 103 * destination buffer must be sized to handle worst cases 104 * situations (input data not compressible) worst case size 105 * evaluation is provided by function LZ4_compressBound(). 106 * 107 * real_LZ4_uncompress() : 108 * osize : is the output size, therefore the original size 109 * return : the number of bytes read in the source buffer. 110 * If the source stream is malformed, the function will stop 111 * decoding and return a negative result, indicating the byte 112 * position of the faulty instruction. This function never 113 * writes beyond dest + osize, and is therefore protected 114 * against malicious data packets. 115 * note : destination buffer must be already allocated 116 * 117 * Advanced Functions 118 * 119 * LZ4_compressBound() : 120 * Provides the maximum size that LZ4 may output in a "worst case" 121 * scenario (input data not compressible) primarily useful for memory 122 * allocation of output buffer. 123 * 124 * isize : is the input size. Max supported value is ~1.9GB 125 * return : maximum output size in a "worst case" scenario 126 * note : this function is limited by "int" range (2^31-1) 127 * 128 * LZ4_uncompress_unknownOutputSize() : 129 * isize : is the input size, therefore the compressed size 130 * maxOutputSize : is the size of the destination buffer (which must be 131 * already allocated) 132 * return : the number of bytes decoded in the destination buffer 133 * (necessarily <= maxOutputSize). If the source stream is 134 * malformed, the function will stop decoding and return a 135 * negative result, indicating the byte position of the faulty 136 * instruction. This function never writes beyond dest + 137 * maxOutputSize, and is therefore protected against malicious 138 * data packets. 139 * note : Destination buffer must be already allocated. 140 * This version is slightly slower than real_LZ4_uncompress() 141 * 142 * LZ4_compressCtx() : 143 * This function explicitly handles the CTX memory structure. 144 * 145 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 146 * by the caller (either on the stack or using kmem_zalloc). Passing NULL 147 * isn't valid. 148 * 149 * LZ4_compress64kCtx() : 150 * Same as LZ4_compressCtx(), but specific to small inputs (<64KB). 151 * isize *Must* be <64KB, otherwise the output will be corrupted. 152 * 153 * ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated 154 * by the caller (either on the stack or using kmem_zalloc). Passing NULL 155 * isn't valid. 156 */ 157 158 /* 159 * Tuning parameters 160 */ 161 162 /* 163 * COMPRESSIONLEVEL: Increasing this value improves compression ratio 164 * Lowering this value reduces memory usage. Reduced memory usage 165 * typically improves speed, due to cache effect (ex: L1 32KB for Intel, 166 * L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes 167 * (examples : 12 -> 16KB ; 17 -> 512KB) 168 */ 169 #define COMPRESSIONLEVEL 12 170 171 /* 172 * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the 173 * algorithm skip faster data segments considered "incompressible". 174 * This may decrease compression ratio dramatically, but will be 175 * faster on incompressible data. Increasing this value will make 176 * the algorithm search more before declaring a segment "incompressible". 177 * This could improve compression a bit, but will be slower on 178 * incompressible data. The default value (6) is recommended. 179 */ 180 #define NOTCOMPRESSIBLE_CONFIRMATION 6 181 182 /* 183 * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to 184 * performance for big endian cpu, but the resulting compressed stream 185 * will be incompatible with little-endian CPU. You can set this option 186 * to 1 in situations where data will stay within closed environment. 187 * This option is useless on Little_Endian CPU (such as x86). 188 */ 189 /* #define BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */ 190 191 /* 192 * CPU Feature Detection 193 */ 194 195 /* 32 or 64 bits ? */ 196 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \ 197 defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \ 198 defined(__LP64__) || defined(_LP64)) 199 #define LZ4_ARCH64 1 200 #else 201 #define LZ4_ARCH64 0 202 #endif 203 204 /* 205 * Limits the amount of stack space that the algorithm may consume to hold 206 * the compression lookup table. The value `9' here means we'll never use 207 * more than 2k of stack (see above for a description of COMPRESSIONLEVEL). 208 * If more memory is needed, it is allocated from the heap. 209 */ 210 #define STACKLIMIT 9 211 212 /* 213 * Little Endian or Big Endian? 214 * Note: overwrite the below #define if you know your architecture endianess. 215 */ 216 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \ 217 defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \ 218 defined(__PPC) || defined(PPC) || defined(__powerpc__) || \ 219 defined(__powerpc) || defined(powerpc) || \ 220 ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)))) 221 #define LZ4_BIG_ENDIAN 1 222 #else 223 /* 224 * Little Endian assumed. PDP Endian and other very rare endian format 225 * are unsupported. 226 */ 227 #endif 228 229 /* 230 * Unaligned memory access is automatically enabled for "common" CPU, 231 * such as x86. For others CPU, the compiler will be more cautious, and 232 * insert extra code to ensure aligned access is respected. If you know 233 * your target CPU supports unaligned memory access, you may want to 234 * force this option manually to improve performance 235 */ 236 #if defined(__ARM_FEATURE_UNALIGNED) 237 #define LZ4_FORCE_UNALIGNED_ACCESS 1 238 #endif 239 240 /* #define LZ4_FORCE_SW_BITCOUNT */ 241 242 /* 243 * Compiler Options 244 */ 245 #if __STDC_VERSION__ >= 199901L /* C99 */ 246 /* "restrict" is a known keyword */ 247 #else 248 /* Disable restrict */ 249 #define restrict 250 #endif 251 252 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__) 253 254 #ifdef _MSC_VER 255 /* Visual Studio */ 256 /* Visual is not C99, but supports some kind of inline */ 257 #define inline __forceinline 258 #if LZ4_ARCH64 259 /* For Visual 2005 */ 260 #pragma intrinsic(_BitScanForward64) 261 #pragma intrinsic(_BitScanReverse64) 262 #else /* !LZ4_ARCH64 */ 263 /* For Visual 2005 */ 264 #pragma intrinsic(_BitScanForward) 265 #pragma intrinsic(_BitScanReverse) 266 #endif /* !LZ4_ARCH64 */ 267 #endif /* _MSC_VER */ 268 269 #ifdef _MSC_VER 270 #define lz4_bswap16(x) _byteswap_ushort(x) 271 #else /* !_MSC_VER */ 272 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \ 273 (((x) & 0xffu) << 8))) 274 #endif /* !_MSC_VER */ 275 276 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__) 277 #define expect(expr, value) (__builtin_expect((expr), (value))) 278 #else 279 #define expect(expr, value) (expr) 280 #endif 281 282 #define likely(expr) expect((expr) != 0, 1) 283 #define unlikely(expr) expect((expr) != 0, 0) 284 285 /* Basic types */ 286 #if defined(_MSC_VER) 287 /* Visual Studio does not support 'stdint' natively */ 288 #define BYTE unsigned __int8 289 #define U16 unsigned __int16 290 #define U32 unsigned __int32 291 #define S32 __int32 292 #define U64 unsigned __int64 293 #else /* !defined(_MSC_VER) */ 294 #define BYTE uint8_t 295 #define U16 uint16_t 296 #define U32 uint32_t 297 #define S32 int32_t 298 #define U64 uint64_t 299 #endif /* !defined(_MSC_VER) */ 300 301 #ifndef LZ4_FORCE_UNALIGNED_ACCESS 302 #pragma pack(1) 303 #endif 304 305 typedef struct _U16_S { 306 U16 v; 307 } U16_S; 308 typedef struct _U32_S { 309 U32 v; 310 } U32_S; 311 typedef struct _U64_S { 312 U64 v; 313 } U64_S; 314 315 #ifndef LZ4_FORCE_UNALIGNED_ACCESS 316 #pragma pack() 317 #endif 318 319 #define A64(x) (((U64_S *)(x))->v) 320 #define A32(x) (((U32_S *)(x))->v) 321 #define A16(x) (((U16_S *)(x))->v) 322 323 /* 324 * Constants 325 */ 326 #define MINMATCH 4 327 328 #define HASH_LOG COMPRESSIONLEVEL 329 #define HASHTABLESIZE (1 << HASH_LOG) 330 #define HASH_MASK (HASHTABLESIZE - 1) 331 332 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \ 333 NOTCOMPRESSIBLE_CONFIRMATION : 2) 334 335 /* 336 * Defines if memory is allocated into the stack (local variable), 337 * or into the heap (kmem_alloc()). 338 */ 339 #define HEAPMODE (HASH_LOG > STACKLIMIT) 340 #define COPYLENGTH 8 341 #define LASTLITERALS 5 342 #define MFLIMIT (COPYLENGTH + MINMATCH) 343 #define MINLENGTH (MFLIMIT + 1) 344 345 #define MAXD_LOG 16 346 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1) 347 348 #define ML_BITS 4 349 #define ML_MASK ((1U<<ML_BITS)-1) 350 #define RUN_BITS (8-ML_BITS) 351 #define RUN_MASK ((1U<<RUN_BITS)-1) 352 353 354 /* 355 * Architecture-specific macros 356 */ 357 #if LZ4_ARCH64 358 #define STEPSIZE 8 359 #define UARCH U64 360 #define AARCH A64 361 #define LZ4_COPYSTEP(s, d) A64(d) = A64(s); d += 8; s += 8; 362 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d) 363 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e) 364 #define HTYPE U32 365 #define INITBASE(base) const BYTE* const base = ip 366 #else /* !LZ4_ARCH64 */ 367 #define STEPSIZE 4 368 #define UARCH U32 369 #define AARCH A32 370 #define LZ4_COPYSTEP(s, d) A32(d) = A32(s); d += 4; s += 4; 371 #define LZ4_COPYPACKET(s, d) LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d); 372 #define LZ4_SECURECOPY LZ4_WILDCOPY 373 #define HTYPE const BYTE * 374 #define INITBASE(base) const int base = 0 375 #endif /* !LZ4_ARCH64 */ 376 377 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE)) 378 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \ 379 { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; } 380 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \ 381 { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; } 382 #else 383 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); } 384 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) { A16(p) = v; p += 2; } 385 #endif 386 387 388 /* Local structures */ 389 struct refTables { 390 HTYPE hashTable[HASHTABLESIZE]; 391 }; 392 393 394 /* Macros */ 395 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \ 396 HASH_LOG)) 397 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p)) 398 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e); 399 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \ 400 d = e; } 401 402 403 /* Private functions */ 404 #if LZ4_ARCH64 405 406 static inline int 407 LZ4_NbCommonBytes(register U64 val) 408 { 409 #if defined(LZ4_BIG_ENDIAN) 410 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 411 unsigned long r = 0; 412 _BitScanReverse64(&r, val); 413 return (int)(r >> 3); 414 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 415 !defined(LZ4_FORCE_SW_BITCOUNT) 416 return (__builtin_clzll(val) >> 3); 417 #else 418 int r; 419 if (!(val >> 32)) { 420 r = 4; 421 } else { 422 r = 0; 423 val >>= 32; 424 } 425 if (!(val >> 16)) { 426 r += 2; 427 val >>= 8; 428 } else { 429 val >>= 24; 430 } 431 r += (!val); 432 return (r); 433 #endif 434 #else 435 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 436 unsigned long r = 0; 437 _BitScanForward64(&r, val); 438 return (int)(r >> 3); 439 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 440 !defined(LZ4_FORCE_SW_BITCOUNT) 441 return (__builtin_ctzll(val) >> 3); 442 #else 443 static const int DeBruijnBytePos[64] = 444 { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5, 445 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5, 446 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4, 447 4, 5, 7, 2, 6, 5, 7, 6, 7, 7 448 }; 449 return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >> 450 58]; 451 #endif 452 #endif 453 } 454 455 #else 456 457 static inline int 458 LZ4_NbCommonBytes(register U32 val) 459 { 460 #if defined(LZ4_BIG_ENDIAN) 461 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 462 unsigned long r = 0; 463 _BitScanReverse(&r, val); 464 return (int)(r >> 3); 465 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 466 !defined(LZ4_FORCE_SW_BITCOUNT) 467 return (__builtin_clz(val) >> 3); 468 #else 469 int r; 470 if (!(val >> 16)) { 471 r = 2; 472 val >>= 8; 473 } else { 474 r = 0; 475 val >>= 24; 476 } 477 r += (!val); 478 return (r); 479 #endif 480 #else 481 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT) 482 unsigned long r = 0; 483 _BitScanForward(&r, val); 484 return (int)(r >> 3); 485 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \ 486 !defined(LZ4_FORCE_SW_BITCOUNT) 487 return (__builtin_ctz(val) >> 3); 488 #else 489 static const int DeBruijnBytePos[32] = { 490 0, 0, 3, 0, 3, 1, 3, 0, 491 3, 2, 2, 1, 3, 2, 0, 1, 492 3, 3, 1, 2, 2, 2, 2, 0, 493 3, 1, 2, 0, 1, 0, 1, 1 494 }; 495 return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >> 496 27]; 497 #endif 498 #endif 499 } 500 501 #endif 502 503 /* Public functions */ 504 505 static int 506 LZ4_compressBound(int isize) 507 { 508 return (isize + (isize / 255) + 16); 509 } 510 511 /* Compression functions */ 512 513 /*ARGSUSED*/ 514 static int 515 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize, 516 int osize) 517 { 518 #if HEAPMODE 519 struct refTables *srt = (struct refTables *)ctx; 520 HTYPE *HashTable = (HTYPE *) (srt->hashTable); 521 #else 522 HTYPE HashTable[HASHTABLESIZE] = { 0 }; 523 #endif 524 525 const BYTE *ip = (BYTE *) source; 526 INITBASE(base); 527 const BYTE *anchor = ip; 528 const BYTE *const iend = ip + isize; 529 const BYTE *const oend = (BYTE *) dest + osize; 530 const BYTE *const mflimit = iend - MFLIMIT; 531 #define matchlimit (iend - LASTLITERALS) 532 533 BYTE *op = (BYTE *) dest; 534 535 int len, length; 536 const int skipStrength = SKIPSTRENGTH; 537 U32 forwardH; 538 539 540 /* Init */ 541 if (isize < MINLENGTH) 542 goto _last_literals; 543 544 /* First Byte */ 545 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 546 ip++; 547 forwardH = LZ4_HASH_VALUE(ip); 548 549 /* Main Loop */ 550 for (;;) { 551 int findMatchAttempts = (1U << skipStrength) + 3; 552 const BYTE *forwardIp = ip; 553 const BYTE *ref; 554 BYTE *token; 555 556 /* Find a match */ 557 do { 558 U32 h = forwardH; 559 int step = findMatchAttempts++ >> skipStrength; 560 ip = forwardIp; 561 forwardIp = ip + step; 562 563 if unlikely(forwardIp > mflimit) { 564 goto _last_literals; 565 } 566 567 forwardH = LZ4_HASH_VALUE(forwardIp); 568 ref = base + HashTable[h]; 569 HashTable[h] = ip - base; 570 571 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip))); 572 573 /* Catch up */ 574 while ((ip > anchor) && (ref > (BYTE *) source) && 575 unlikely(ip[-1] == ref[-1])) { 576 ip--; 577 ref--; 578 } 579 580 /* Encode Literal length */ 581 length = ip - anchor; 582 token = op++; 583 584 /* Check output limit */ 585 if unlikely(op + length + (2 + 1 + LASTLITERALS) + 586 (length >> 8) > oend) 587 return (0); 588 589 if (length >= (int)RUN_MASK) { 590 *token = (RUN_MASK << ML_BITS); 591 len = length - RUN_MASK; 592 for (; len > 254; len -= 255) 593 *op++ = 255; 594 *op++ = (BYTE)len; 595 } else 596 *token = (length << ML_BITS); 597 598 /* Copy Literals */ 599 LZ4_BLINDCOPY(anchor, op, length); 600 601 _next_match: 602 /* Encode Offset */ 603 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 604 605 /* Start Counting */ 606 ip += MINMATCH; 607 ref += MINMATCH; /* MinMatch verified */ 608 anchor = ip; 609 while likely(ip < matchlimit - (STEPSIZE - 1)) { 610 UARCH diff = AARCH(ref) ^ AARCH(ip); 611 if (!diff) { 612 ip += STEPSIZE; 613 ref += STEPSIZE; 614 continue; 615 } 616 ip += LZ4_NbCommonBytes(diff); 617 goto _endCount; 618 } 619 #if LZ4_ARCH64 620 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 621 ip += 4; 622 ref += 4; 623 } 624 #endif 625 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 626 ip += 2; 627 ref += 2; 628 } 629 if ((ip < matchlimit) && (*ref == *ip)) 630 ip++; 631 _endCount: 632 633 /* Encode MatchLength */ 634 len = (ip - anchor); 635 /* Check output limit */ 636 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) 637 return (0); 638 if (len >= (int)ML_MASK) { 639 *token += ML_MASK; 640 len -= ML_MASK; 641 for (; len > 509; len -= 510) { 642 *op++ = 255; 643 *op++ = 255; 644 } 645 if (len > 254) { 646 len -= 255; 647 *op++ = 255; 648 } 649 *op++ = (BYTE)len; 650 } else 651 *token += len; 652 653 /* Test end of chunk */ 654 if (ip > mflimit) { 655 anchor = ip; 656 break; 657 } 658 /* Fill table */ 659 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base; 660 661 /* Test next position */ 662 ref = base + HashTable[LZ4_HASH_VALUE(ip)]; 663 HashTable[LZ4_HASH_VALUE(ip)] = ip - base; 664 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) { 665 token = op++; 666 *token = 0; 667 goto _next_match; 668 } 669 /* Prepare next loop */ 670 anchor = ip++; 671 forwardH = LZ4_HASH_VALUE(ip); 672 } 673 674 _last_literals: 675 /* Encode Last Literals */ 676 { 677 int lastRun = iend - anchor; 678 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 679 oend) 680 return (0); 681 if (lastRun >= (int)RUN_MASK) { 682 *op++ = (RUN_MASK << ML_BITS); 683 lastRun -= RUN_MASK; 684 for (; lastRun > 254; lastRun -= 255) { 685 *op++ = 255; 686 } 687 *op++ = (BYTE)lastRun; 688 } else 689 *op++ = (lastRun << ML_BITS); 690 (void) memcpy(op, anchor, iend - anchor); 691 op += iend - anchor; 692 } 693 694 /* End */ 695 return (int)(((char *)op) - dest); 696 } 697 698 699 700 /* Note : this function is valid only if isize < LZ4_64KLIMIT */ 701 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1)) 702 #define HASHLOG64K (HASH_LOG + 1) 703 #define HASH64KTABLESIZE (1U << HASHLOG64K) 704 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \ 705 HASHLOG64K)) 706 #define LZ4_HASH64K_VALUE(p) LZ4_HASH64K_FUNCTION(A32(p)) 707 708 /*ARGSUSED*/ 709 static int 710 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize, 711 int osize) 712 { 713 #if HEAPMODE 714 struct refTables *srt = (struct refTables *)ctx; 715 U16 *HashTable = (U16 *) (srt->hashTable); 716 #else 717 U16 HashTable[HASH64KTABLESIZE] = { 0 }; 718 #endif 719 720 const BYTE *ip = (BYTE *) source; 721 const BYTE *anchor = ip; 722 const BYTE *const base = ip; 723 const BYTE *const iend = ip + isize; 724 const BYTE *const oend = (BYTE *) dest + osize; 725 const BYTE *const mflimit = iend - MFLIMIT; 726 #define matchlimit (iend - LASTLITERALS) 727 728 BYTE *op = (BYTE *) dest; 729 730 int len, length; 731 const int skipStrength = SKIPSTRENGTH; 732 U32 forwardH; 733 734 /* Init */ 735 if (isize < MINLENGTH) 736 goto _last_literals; 737 738 /* First Byte */ 739 ip++; 740 forwardH = LZ4_HASH64K_VALUE(ip); 741 742 /* Main Loop */ 743 for (;;) { 744 int findMatchAttempts = (1U << skipStrength) + 3; 745 const BYTE *forwardIp = ip; 746 const BYTE *ref; 747 BYTE *token; 748 749 /* Find a match */ 750 do { 751 U32 h = forwardH; 752 int step = findMatchAttempts++ >> skipStrength; 753 ip = forwardIp; 754 forwardIp = ip + step; 755 756 if (forwardIp > mflimit) { 757 goto _last_literals; 758 } 759 760 forwardH = LZ4_HASH64K_VALUE(forwardIp); 761 ref = base + HashTable[h]; 762 HashTable[h] = ip - base; 763 764 } while (A32(ref) != A32(ip)); 765 766 /* Catch up */ 767 while ((ip > anchor) && (ref > (BYTE *) source) && 768 (ip[-1] == ref[-1])) { 769 ip--; 770 ref--; 771 } 772 773 /* Encode Literal length */ 774 length = ip - anchor; 775 token = op++; 776 777 /* Check output limit */ 778 if unlikely(op + length + (2 + 1 + LASTLITERALS) + 779 (length >> 8) > oend) 780 return (0); 781 782 if (length >= (int)RUN_MASK) { 783 *token = (RUN_MASK << ML_BITS); 784 len = length - RUN_MASK; 785 for (; len > 254; len -= 255) 786 *op++ = 255; 787 *op++ = (BYTE)len; 788 } else 789 *token = (length << ML_BITS); 790 791 /* Copy Literals */ 792 LZ4_BLINDCOPY(anchor, op, length); 793 794 _next_match: 795 /* Encode Offset */ 796 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref); 797 798 /* Start Counting */ 799 ip += MINMATCH; 800 ref += MINMATCH; /* MinMatch verified */ 801 anchor = ip; 802 while (ip < matchlimit - (STEPSIZE - 1)) { 803 UARCH diff = AARCH(ref) ^ AARCH(ip); 804 if (!diff) { 805 ip += STEPSIZE; 806 ref += STEPSIZE; 807 continue; 808 } 809 ip += LZ4_NbCommonBytes(diff); 810 goto _endCount; 811 } 812 #if LZ4_ARCH64 813 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) { 814 ip += 4; 815 ref += 4; 816 } 817 #endif 818 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) { 819 ip += 2; 820 ref += 2; 821 } 822 if ((ip < matchlimit) && (*ref == *ip)) 823 ip++; 824 _endCount: 825 826 /* Encode MatchLength */ 827 len = (ip - anchor); 828 /* Check output limit */ 829 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend) 830 return (0); 831 if (len >= (int)ML_MASK) { 832 *token += ML_MASK; 833 len -= ML_MASK; 834 for (; len > 509; len -= 510) { 835 *op++ = 255; 836 *op++ = 255; 837 } 838 if (len > 254) { 839 len -= 255; 840 *op++ = 255; 841 } 842 *op++ = (BYTE)len; 843 } else 844 *token += len; 845 846 /* Test end of chunk */ 847 if (ip > mflimit) { 848 anchor = ip; 849 break; 850 } 851 /* Fill table */ 852 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base; 853 854 /* Test next position */ 855 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)]; 856 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base; 857 if (A32(ref) == A32(ip)) { 858 token = op++; 859 *token = 0; 860 goto _next_match; 861 } 862 /* Prepare next loop */ 863 anchor = ip++; 864 forwardH = LZ4_HASH64K_VALUE(ip); 865 } 866 867 _last_literals: 868 /* Encode Last Literals */ 869 { 870 int lastRun = iend - anchor; 871 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) > 872 oend) 873 return (0); 874 if (lastRun >= (int)RUN_MASK) { 875 *op++ = (RUN_MASK << ML_BITS); 876 lastRun -= RUN_MASK; 877 for (; lastRun > 254; lastRun -= 255) 878 *op++ = 255; 879 *op++ = (BYTE)lastRun; 880 } else 881 *op++ = (lastRun << ML_BITS); 882 (void) memcpy(op, anchor, iend - anchor); 883 op += iend - anchor; 884 } 885 886 /* End */ 887 return (int)(((char *)op) - dest); 888 } 889 890 static int 891 real_LZ4_compress(const char *source, char *dest, int isize, int osize) 892 { 893 #if HEAPMODE 894 void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP); 895 int result; 896 897 /* 898 * out of kernel memory, gently fall through - this will disable 899 * compression in zio_compress_data 900 */ 901 if (ctx == NULL) 902 return (0); 903 904 if (isize < LZ4_64KLIMIT) 905 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize); 906 else 907 result = LZ4_compressCtx(ctx, source, dest, isize, osize); 908 909 kmem_free(ctx, sizeof (struct refTables)); 910 return (result); 911 #else 912 if (isize < (int)LZ4_64KLIMIT) 913 return (LZ4_compress64kCtx(NULL, source, dest, isize, osize)); 914 return (LZ4_compressCtx(NULL, source, dest, isize, osize)); 915 #endif 916 } 917 918 /* Decompression functions */ 919 920 /* 921 * Note: The decoding functions real_LZ4_uncompress() and 922 * LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow" 923 * attack type. They will never write nor read outside of the provided 924 * output buffers. LZ4_uncompress_unknownOutputSize() also insures that 925 * it will never read outside of the input buffer. A corrupted input 926 * will produce an error result, a negative int, indicating the position 927 * of the error within input stream. 928 */ 929 930 static int 931 real_LZ4_uncompress(const char *source, char *dest, int osize) 932 { 933 /* Local Variables */ 934 const BYTE *restrict ip = (const BYTE *) source; 935 const BYTE *ref; 936 937 BYTE *op = (BYTE *) dest; 938 BYTE *const oend = op + osize; 939 BYTE *cpy; 940 941 unsigned token; 942 943 size_t length; 944 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 945 #if LZ4_ARCH64 946 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 947 #endif 948 949 /* Main Loop */ 950 for (;;) { 951 /* get runlength */ 952 token = *ip++; 953 if ((length = (token >> ML_BITS)) == RUN_MASK) { 954 size_t len; 955 for (; (len = *ip++) == 255; length += 255) { 956 } 957 length += len; 958 } 959 /* copy literals */ 960 cpy = op + length; 961 if unlikely(cpy > oend - COPYLENGTH) { 962 if (cpy != oend) 963 /* Error: we must necessarily stand at EOF */ 964 goto _output_error; 965 (void) memcpy(op, ip, length); 966 ip += length; 967 break; /* EOF */ 968 } 969 LZ4_WILDCOPY(ip, op, cpy); 970 ip -= (op - cpy); 971 op = cpy; 972 973 /* get offset */ 974 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); 975 ip += 2; 976 if unlikely(ref < (BYTE * const) dest) 977 /* 978 * Error: offset create reference outside destination 979 * buffer 980 */ 981 goto _output_error; 982 983 /* get matchlength */ 984 if ((length = (token & ML_MASK)) == ML_MASK) { 985 for (; *ip == 255; length += 255) { 986 ip++; 987 } 988 length += *ip++; 989 } 990 /* copy repeated sequence */ 991 if unlikely(op - ref < STEPSIZE) { 992 #if LZ4_ARCH64 993 size_t dec64 = dec64table[op-ref]; 994 #else 995 const int dec64 = 0; 996 #endif 997 op[0] = ref[0]; 998 op[1] = ref[1]; 999 op[2] = ref[2]; 1000 op[3] = ref[3]; 1001 op += 4; 1002 ref += 4; 1003 ref -= dec32table[op-ref]; 1004 A32(op) = A32(ref); 1005 op += STEPSIZE - 4; 1006 ref -= dec64; 1007 } else { 1008 LZ4_COPYSTEP(ref, op); 1009 } 1010 cpy = op + length - (STEPSIZE - 4); 1011 if (cpy > oend - COPYLENGTH) { 1012 if (cpy > oend) 1013 /* 1014 * Error: request to write beyond destination 1015 * buffer 1016 */ 1017 goto _output_error; 1018 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); 1019 while (op < cpy) 1020 *op++ = *ref++; 1021 op = cpy; 1022 if (op == oend) 1023 /* 1024 * Check EOF (should never happen, since last 1025 * 5 bytes are supposed to be literals) 1026 */ 1027 goto _output_error; 1028 continue; 1029 } 1030 LZ4_SECURECOPY(ref, op, cpy); 1031 op = cpy; /* correction */ 1032 } 1033 1034 /* end of decoding */ 1035 return (int)(((char *)ip) - source); 1036 1037 /* write overflow error detected */ 1038 _output_error: 1039 return (int)(-(((char *)ip) - source)); 1040 } 1041 1042 static int 1043 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize, 1044 int maxOutputSize) 1045 { 1046 /* Local Variables */ 1047 const BYTE *restrict ip = (const BYTE *) source; 1048 const BYTE *const iend = ip + isize; 1049 const BYTE *ref; 1050 1051 BYTE *op = (BYTE *) dest; 1052 BYTE *const oend = op + maxOutputSize; 1053 BYTE *cpy; 1054 1055 size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; 1056 #if LZ4_ARCH64 1057 size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3}; 1058 #endif 1059 1060 /* Main Loop */ 1061 while (ip < iend) { 1062 unsigned token; 1063 size_t length; 1064 1065 /* get runlength */ 1066 token = *ip++; 1067 if ((length = (token >> ML_BITS)) == RUN_MASK) { 1068 int s = 255; 1069 while ((ip < iend) && (s == 255)) { 1070 s = *ip++; 1071 length += s; 1072 } 1073 } 1074 /* copy literals */ 1075 cpy = op + length; 1076 if ((cpy > oend - COPYLENGTH) || 1077 (ip + length > iend - COPYLENGTH)) { 1078 if (cpy > oend) 1079 /* Error: writes beyond output buffer */ 1080 goto _output_error; 1081 if (ip + length != iend) 1082 /* 1083 * Error: LZ4 format requires to consume all 1084 * input at this stage 1085 */ 1086 goto _output_error; 1087 (void) memcpy(op, ip, length); 1088 op += length; 1089 /* Necessarily EOF, due to parsing restrictions */ 1090 break; 1091 } 1092 LZ4_WILDCOPY(ip, op, cpy); 1093 ip -= (op - cpy); 1094 op = cpy; 1095 1096 /* get offset */ 1097 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip); 1098 ip += 2; 1099 if (ref < (BYTE * const) dest) 1100 /* 1101 * Error: offset creates reference outside of 1102 * destination buffer 1103 */ 1104 goto _output_error; 1105 1106 /* get matchlength */ 1107 if ((length = (token & ML_MASK)) == ML_MASK) { 1108 while (ip < iend) { 1109 int s = *ip++; 1110 length += s; 1111 if (s == 255) 1112 continue; 1113 break; 1114 } 1115 } 1116 /* copy repeated sequence */ 1117 if unlikely(op - ref < STEPSIZE) { 1118 #if LZ4_ARCH64 1119 size_t dec64 = dec64table[op-ref]; 1120 #else 1121 const int dec64 = 0; 1122 #endif 1123 op[0] = ref[0]; 1124 op[1] = ref[1]; 1125 op[2] = ref[2]; 1126 op[3] = ref[3]; 1127 op += 4; 1128 ref += 4; 1129 ref -= dec32table[op-ref]; 1130 A32(op) = A32(ref); 1131 op += STEPSIZE - 4; 1132 ref -= dec64; 1133 } else { 1134 LZ4_COPYSTEP(ref, op); 1135 } 1136 cpy = op + length - (STEPSIZE - 4); 1137 if (cpy > oend - COPYLENGTH) { 1138 if (cpy > oend) 1139 /* 1140 * Error: request to write outside of 1141 * destination buffer 1142 */ 1143 goto _output_error; 1144 LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); 1145 while (op < cpy) 1146 *op++ = *ref++; 1147 op = cpy; 1148 if (op == oend) 1149 /* 1150 * Check EOF (should never happen, since 1151 * last 5 bytes are supposed to be literals) 1152 */ 1153 goto _output_error; 1154 continue; 1155 } 1156 LZ4_SECURECOPY(ref, op, cpy); 1157 op = cpy; /* correction */ 1158 } 1159 1160 /* end of decoding */ 1161 return (int)(((char *)op) - dest); 1162 1163 /* write overflow error detected */ 1164 _output_error: 1165 return (int)(-(((char *)ip) - source)); 1166 }