1 /*
   2  * LZ4 - Fast LZ compression algorithm
   3  * Header File
   4  * Copyright (C) 2011-2013, Yann Collet.
   5  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions are
   9  * met:
  10  *
  11  *     * Redistributions of source code must retain the above copyright
  12  * notice, this list of conditions and the following disclaimer.
  13  *     * Redistributions in binary form must reproduce the above
  14  * copyright notice, this list of conditions and the following disclaimer
  15  * in the documentation and/or other materials provided with the
  16  * distribution.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  19  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  20  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  21  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  22  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  23  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29  *
  30  * You can contact the author at :
  31  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
  32  * - LZ4 source repository : http://code.google.com/p/lz4/
  33  */
  34 
  35 #include <sys/zfs_context.h>
  36 
  37 static int real_LZ4_compress(const char *source, char *dest, int isize,
  38     int osize);
  39 static int real_LZ4_uncompress(const char *source, char *dest, int osize);
  40 static int LZ4_compressBound(int isize);
  41 static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
  42     int isize, int maxOutputSize);
  43 static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
  44     int isize, int osize);
  45 static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
  46     int isize, int osize);
  47 
  48 /*ARGSUSED*/
  49 size_t
  50 lz4_compress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
  51 {
  52         uint32_t bufsiz;
  53         char *dest = d_start;
  54 
  55         ASSERT(d_len >= sizeof (bufsiz));
  56 
  57         bufsiz = real_LZ4_compress(s_start, &dest[sizeof (bufsiz)], s_len,
  58             d_len - sizeof (bufsiz));
  59 
  60         /* Signal an error if the compression routine returned zero. */
  61         if (bufsiz == 0)
  62                 return (s_len);
  63 
  64         /*
  65          * Encode the compresed buffer size at the start. We'll need this in
  66          * decompression to counter the effects of padding which might be
  67          * added to the compressed buffer and which, if unhandled, would
  68          * confuse the hell out of our decompression function.
  69          */
  70         *(uint32_t *)dest = BE_32(bufsiz);
  71 
  72         return (bufsiz + sizeof (bufsiz));
  73 }
  74 
  75 /*ARGSUSED*/
  76 int
  77 lz4_decompress(void *s_start, void *d_start, size_t s_len, size_t d_len, int n)
  78 {
  79         const char *src = s_start;
  80         uint32_t bufsiz = BE_IN32(src);
  81 
  82         /* invalid compressed buffer size encoded at start */
  83         if (bufsiz + sizeof (bufsiz) > s_len)
  84                 return (1);
  85 
  86         /*
  87          * Returns 0 on success (decompression function returned non-negative)
  88          * and non-zero on failure (decompression function returned negative.
  89          */
  90         return (LZ4_uncompress_unknownOutputSize(&src[sizeof (bufsiz)],
  91             d_start, bufsiz, d_len) < 0);
  92 }
  93 
  94 /*
  95  * LZ4 API Description:
  96  *
  97  * Simple Functions:
  98  * real_LZ4_compress() :
  99  *      isize  : is the input size. Max supported value is ~1.9GB
 100  *      return : the number of bytes written in buffer dest
 101  *               or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
 102  *      note : destination buffer must be already allocated.
 103  *              destination buffer must be sized to handle worst cases
 104  *              situations (input data not compressible) worst case size
 105  *              evaluation is provided by function LZ4_compressBound().
 106  *
 107  * real_LZ4_uncompress() :
 108  *      osize  : is the output size, therefore the original size
 109  *      return : the number of bytes read in the source buffer.
 110  *              If the source stream is malformed, the function will stop
 111  *              decoding and return a negative result, indicating the byte
 112  *              position of the faulty instruction. This function never
 113  *              writes beyond dest + osize, and is therefore protected
 114  *              against malicious data packets.
 115  *      note : destination buffer must be already allocated
 116  *
 117  * Advanced Functions
 118  *
 119  * LZ4_compressBound() :
 120  *      Provides the maximum size that LZ4 may output in a "worst case"
 121  *      scenario (input data not compressible) primarily useful for memory
 122  *      allocation of output buffer.
 123  *
 124  *      isize  : is the input size. Max supported value is ~1.9GB
 125  *      return : maximum output size in a "worst case" scenario
 126  *      note : this function is limited by "int" range (2^31-1)
 127  *
 128  * LZ4_uncompress_unknownOutputSize() :
 129  *      isize  : is the input size, therefore the compressed size
 130  *      maxOutputSize : is the size of the destination buffer (which must be
 131  *              already allocated)
 132  *      return : the number of bytes decoded in the destination buffer
 133  *              (necessarily <= maxOutputSize). If the source stream is
 134  *              malformed, the function will stop decoding and return a
 135  *              negative result, indicating the byte position of the faulty
 136  *              instruction. This function never writes beyond dest +
 137  *              maxOutputSize, and is therefore protected against malicious
 138  *              data packets.
 139  *      note   : Destination buffer must be already allocated.
 140  *              This version is slightly slower than real_LZ4_uncompress()
 141  *
 142  * LZ4_compressCtx() :
 143  *      This function explicitly handles the CTX memory structure.
 144  *
 145  *      ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
 146  *      by the caller (either on the stack or using kmem_zalloc). Passing NULL
 147  *      isn't valid.
 148  *
 149  * LZ4_compress64kCtx() :
 150  *      Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
 151  *      isize *Must* be <64KB, otherwise the output will be corrupted.
 152  *
 153  *      ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
 154  *      by the caller (either on the stack or using kmem_zalloc). Passing NULL
 155  *      isn't valid.
 156  */
 157 
 158 /*
 159  * Tuning parameters
 160  */
 161 
 162 /*
 163  * COMPRESSIONLEVEL: Increasing this value improves compression ratio
 164  *       Lowering this value reduces memory usage. Reduced memory usage
 165  *      typically improves speed, due to cache effect (ex: L1 32KB for Intel,
 166  *      L1 64KB for AMD). Memory usage formula : N->2^(N+2) Bytes
 167  *      (examples : 12 -> 16KB ; 17 -> 512KB)
 168  */
 169 #define COMPRESSIONLEVEL 12
 170 
 171 /*
 172  * NOTCOMPRESSIBLE_CONFIRMATION: Decreasing this value will make the
 173  *      algorithm skip faster data segments considered "incompressible".
 174  *      This may decrease compression ratio dramatically, but will be
 175  *      faster on incompressible data. Increasing this value will make
 176  *      the algorithm search more before declaring a segment "incompressible".
 177  *      This could improve compression a bit, but will be slower on
 178  *      incompressible data. The default value (6) is recommended.
 179  */
 180 #define NOTCOMPRESSIBLE_CONFIRMATION 6
 181 
 182 /*
 183  * BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE: This will provide a boost to
 184  * performance for big endian cpu, but the resulting compressed stream
 185  * will be incompatible with little-endian CPU. You can set this option
 186  * to 1 in situations where data will stay within closed environment.
 187  * This option is useless on Little_Endian CPU (such as x86).
 188  */
 189 /* #define      BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE 1 */
 190 
 191 /*
 192  * CPU Feature Detection
 193  */
 194 
 195 /* 32 or 64 bits ? */
 196 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
 197     defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
 198     defined(__LP64__) || defined(_LP64))
 199 #define LZ4_ARCH64 1
 200 #else
 201 #define LZ4_ARCH64 0
 202 #endif
 203 
 204 /*
 205  * Limits the amount of stack space that the algorithm may consume to hold
 206  * the compression lookup table. The value `9' here means we'll never use
 207  * more than 2k of stack (see above for a description of COMPRESSIONLEVEL).
 208  * If more memory is needed, it is allocated from the heap.
 209  */
 210 #define STACKLIMIT 9
 211 
 212 /*
 213  * Little Endian or Big Endian?
 214  * Note: overwrite the below #define if you know your architecture endianess.
 215  */
 216 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \
 217         defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \
 218         defined(__PPC) || defined(PPC) || defined(__powerpc__) || \
 219         defined(__powerpc) || defined(powerpc) || \
 220         ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))))
 221 #define LZ4_BIG_ENDIAN 1
 222 #else
 223 /*
 224  * Little Endian assumed. PDP Endian and other very rare endian format
 225  * are unsupported.
 226  */
 227 #endif
 228 
 229 /*
 230  * Unaligned memory access is automatically enabled for "common" CPU,
 231  * such as x86. For others CPU, the compiler will be more cautious, and
 232  * insert extra code to ensure aligned access is respected. If you know
 233  * your target CPU supports unaligned memory access, you may want to
 234  * force this option manually to improve performance
 235  */
 236 #if defined(__ARM_FEATURE_UNALIGNED)
 237 #define LZ4_FORCE_UNALIGNED_ACCESS 1
 238 #endif
 239 
 240 /* #define      LZ4_FORCE_SW_BITCOUNT */
 241 
 242 /*
 243  * Compiler Options
 244  */
 245 #if __STDC_VERSION__ >= 199901L      /* C99 */
 246 /* "restrict" is a known keyword */
 247 #else
 248 /* Disable restrict */
 249 #define restrict
 250 #endif
 251 
 252 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
 253 
 254 #ifdef _MSC_VER
 255 /* Visual Studio */
 256 /* Visual is not C99, but supports some kind of inline */
 257 #define inline __forceinline
 258 #if LZ4_ARCH64
 259 /* For Visual 2005 */
 260 #pragma intrinsic(_BitScanForward64)
 261 #pragma intrinsic(_BitScanReverse64)
 262 #else /* !LZ4_ARCH64 */
 263 /* For Visual 2005 */
 264 #pragma intrinsic(_BitScanForward)
 265 #pragma intrinsic(_BitScanReverse)
 266 #endif /* !LZ4_ARCH64 */
 267 #endif /* _MSC_VER */
 268 
 269 #ifdef _MSC_VER
 270 #define lz4_bswap16(x) _byteswap_ushort(x)
 271 #else /* !_MSC_VER */
 272 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
 273         (((x) & 0xffu) << 8)))
 274 #endif /* !_MSC_VER */
 275 
 276 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
 277 #define expect(expr, value)    (__builtin_expect((expr), (value)))
 278 #else
 279 #define expect(expr, value)    (expr)
 280 #endif
 281 
 282 #define likely(expr)    expect((expr) != 0, 1)
 283 #define unlikely(expr)  expect((expr) != 0, 0)
 284 
 285 /* Basic types */
 286 #if defined(_MSC_VER)
 287 /* Visual Studio does not support 'stdint' natively */
 288 #define BYTE    unsigned __int8
 289 #define U16     unsigned __int16
 290 #define U32     unsigned __int32
 291 #define S32     __int32
 292 #define U64     unsigned __int64
 293 #else /* !defined(_MSC_VER) */
 294 #define BYTE    uint8_t
 295 #define U16     uint16_t
 296 #define U32     uint32_t
 297 #define S32     int32_t
 298 #define U64     uint64_t
 299 #endif /* !defined(_MSC_VER) */
 300 
 301 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
 302 #pragma pack(1)
 303 #endif
 304 
 305 typedef struct _U16_S {
 306         U16 v;
 307 } U16_S;
 308 typedef struct _U32_S {
 309         U32 v;
 310 } U32_S;
 311 typedef struct _U64_S {
 312         U64 v;
 313 } U64_S;
 314 
 315 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
 316 #pragma pack()
 317 #endif
 318 
 319 #define A64(x) (((U64_S *)(x))->v)
 320 #define A32(x) (((U32_S *)(x))->v)
 321 #define A16(x) (((U16_S *)(x))->v)
 322 
 323 /*
 324  * Constants
 325  */
 326 #define MINMATCH 4
 327 
 328 #define HASH_LOG COMPRESSIONLEVEL
 329 #define HASHTABLESIZE (1 << HASH_LOG)
 330 #define HASH_MASK (HASHTABLESIZE - 1)
 331 
 332 #define SKIPSTRENGTH (NOTCOMPRESSIBLE_CONFIRMATION > 2 ? \
 333         NOTCOMPRESSIBLE_CONFIRMATION : 2)
 334 
 335 /*
 336  * Defines if memory is allocated into the stack (local variable),
 337  * or into the heap (kmem_alloc()).
 338  */
 339 #define HEAPMODE (HASH_LOG > STACKLIMIT)
 340 #define COPYLENGTH 8
 341 #define LASTLITERALS 5
 342 #define MFLIMIT (COPYLENGTH + MINMATCH)
 343 #define MINLENGTH (MFLIMIT + 1)
 344 
 345 #define MAXD_LOG 16
 346 #define MAX_DISTANCE ((1 << MAXD_LOG) - 1)
 347 
 348 #define ML_BITS 4
 349 #define ML_MASK ((1U<<ML_BITS)-1)
 350 #define RUN_BITS (8-ML_BITS)
 351 #define RUN_MASK ((1U<<RUN_BITS)-1)
 352 
 353 
 354 /*
 355  * Architecture-specific macros
 356  */
 357 #if LZ4_ARCH64
 358 #define STEPSIZE 8
 359 #define UARCH U64
 360 #define AARCH A64
 361 #define LZ4_COPYSTEP(s, d)      A64(d) = A64(s); d += 8; s += 8;
 362 #define LZ4_COPYPACKET(s, d)    LZ4_COPYSTEP(s, d)
 363 #define LZ4_SECURECOPY(s, d, e) if (d < e) LZ4_WILDCOPY(s, d, e)
 364 #define HTYPE U32
 365 #define INITBASE(base)          const BYTE* const base = ip
 366 #else /* !LZ4_ARCH64 */
 367 #define STEPSIZE 4
 368 #define UARCH U32
 369 #define AARCH A32
 370 #define LZ4_COPYSTEP(s, d)      A32(d) = A32(s); d += 4; s += 4;
 371 #define LZ4_COPYPACKET(s, d)    LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
 372 #define LZ4_SECURECOPY          LZ4_WILDCOPY
 373 #define HTYPE const BYTE *
 374 #define INITBASE(base)          const int base = 0
 375 #endif /* !LZ4_ARCH64 */
 376 
 377 #if (defined(LZ4_BIG_ENDIAN) && !defined(BIG_ENDIAN_NATIVE_BUT_INCOMPATIBLE))
 378 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
 379         { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
 380 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
 381         { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
 382 #else
 383 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) { d = (s) - A16(p); }
 384 #define LZ4_WRITE_LITTLEENDIAN_16(p, v)  { A16(p) = v; p += 2; }
 385 #endif
 386 
 387 
 388 /* Local structures */
 389 struct refTables {
 390         HTYPE hashTable[HASHTABLESIZE];
 391 };
 392 
 393 
 394 /* Macros */
 395 #define LZ4_HASH_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH * 8) - \
 396         HASH_LOG))
 397 #define LZ4_HASH_VALUE(p) LZ4_HASH_FUNCTION(A32(p))
 398 #define LZ4_WILDCOPY(s, d, e) do { LZ4_COPYPACKET(s, d) } while (d < e);
 399 #define LZ4_BLINDCOPY(s, d, l) { BYTE* e = (d) + l; LZ4_WILDCOPY(s, d, e); \
 400         d = e; }
 401 
 402 
 403 /* Private functions */
 404 #if LZ4_ARCH64
 405 
 406 static inline int
 407 LZ4_NbCommonBytes(register U64 val)
 408 {
 409 #if defined(LZ4_BIG_ENDIAN)
 410 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
 411         unsigned long r = 0;
 412         _BitScanReverse64(&r, val);
 413         return (int)(r >> 3);
 414 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
 415         !defined(LZ4_FORCE_SW_BITCOUNT)
 416         return (__builtin_clzll(val) >> 3);
 417 #else
 418         int r;
 419         if (!(val >> 32)) {
 420                 r = 4;
 421         } else {
 422                 r = 0;
 423                 val >>= 32;
 424         }
 425         if (!(val >> 16)) {
 426                 r += 2;
 427                 val >>= 8;
 428         } else {
 429                 val >>= 24;
 430         }
 431         r += (!val);
 432         return (r);
 433 #endif
 434 #else
 435 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
 436         unsigned long r = 0;
 437         _BitScanForward64(&r, val);
 438         return (int)(r >> 3);
 439 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
 440         !defined(LZ4_FORCE_SW_BITCOUNT)
 441         return (__builtin_ctzll(val) >> 3);
 442 #else
 443         static const int DeBruijnBytePos[64] =
 444             { 0, 0, 0, 0, 0, 1, 1, 2, 0, 3, 1, 3, 1, 4, 2, 7, 0, 2, 3, 6, 1, 5,
 445                 3, 5, 1, 3, 4, 4, 2, 5, 6, 7, 7, 0, 1, 2, 3, 3, 4, 6, 2, 6, 5,
 446                 5, 3, 4, 5, 6, 7, 1, 2, 4, 6, 4,
 447                 4, 5, 7, 2, 6, 5, 7, 6, 7, 7
 448         };
 449         return DeBruijnBytePos[((U64) ((val & -val) * 0x0218A392CDABBD3F)) >>
 450             58];
 451 #endif
 452 #endif
 453 }
 454 
 455 #else
 456 
 457 static inline int
 458 LZ4_NbCommonBytes(register U32 val)
 459 {
 460 #if defined(LZ4_BIG_ENDIAN)
 461 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
 462         unsigned long r = 0;
 463         _BitScanReverse(&r, val);
 464         return (int)(r >> 3);
 465 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
 466         !defined(LZ4_FORCE_SW_BITCOUNT)
 467         return (__builtin_clz(val) >> 3);
 468 #else
 469         int r;
 470         if (!(val >> 16)) {
 471                 r = 2;
 472                 val >>= 8;
 473         } else {
 474                 r = 0;
 475                 val >>= 24;
 476         }
 477         r += (!val);
 478         return (r);
 479 #endif
 480 #else
 481 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
 482         unsigned long r = 0;
 483         _BitScanForward(&r, val);
 484         return (int)(r >> 3);
 485 #elif defined(__GNUC__) && (GCC_VERSION >= 304) && \
 486         !defined(LZ4_FORCE_SW_BITCOUNT)
 487         return (__builtin_ctz(val) >> 3);
 488 #else
 489         static const int DeBruijnBytePos[32] = {
 490                 0, 0, 3, 0, 3, 1, 3, 0,
 491                 3, 2, 2, 1, 3, 2, 0, 1,
 492                 3, 3, 1, 2, 2, 2, 2, 0,
 493                 3, 1, 2, 0, 1, 0, 1, 1
 494         };
 495         return DeBruijnBytePos[((U32) ((val & -(S32) val) * 0x077CB531U)) >>
 496             27];
 497 #endif
 498 #endif
 499 }
 500 
 501 #endif
 502 
 503 /* Public functions */
 504 
 505 static int
 506 LZ4_compressBound(int isize)
 507 {
 508         return (isize + (isize / 255) + 16);
 509 }
 510 
 511 /* Compression functions */
 512 
 513 /*ARGSUSED*/
 514 static int
 515 LZ4_compressCtx(void *ctx, const char *source, char *dest, int isize,
 516     int osize)
 517 {
 518 #if HEAPMODE
 519         struct refTables *srt = (struct refTables *)ctx;
 520         HTYPE *HashTable = (HTYPE *) (srt->hashTable);
 521 #else
 522         HTYPE HashTable[HASHTABLESIZE] = { 0 };
 523 #endif
 524 
 525         const BYTE *ip = (BYTE *) source;
 526         INITBASE(base);
 527         const BYTE *anchor = ip;
 528         const BYTE *const iend = ip + isize;
 529         const BYTE *const oend = (BYTE *) dest + osize;
 530         const BYTE *const mflimit = iend - MFLIMIT;
 531 #define matchlimit (iend - LASTLITERALS)
 532 
 533         BYTE *op = (BYTE *) dest;
 534 
 535         int len, length;
 536         const int skipStrength = SKIPSTRENGTH;
 537         U32 forwardH;
 538 
 539 
 540         /* Init */
 541         if (isize < MINLENGTH)
 542                 goto _last_literals;
 543 
 544         /* First Byte */
 545         HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
 546         ip++;
 547         forwardH = LZ4_HASH_VALUE(ip);
 548 
 549         /* Main Loop */
 550         for (;;) {
 551                 int findMatchAttempts = (1U << skipStrength) + 3;
 552                 const BYTE *forwardIp = ip;
 553                 const BYTE *ref;
 554                 BYTE *token;
 555 
 556                 /* Find a match */
 557                 do {
 558                         U32 h = forwardH;
 559                         int step = findMatchAttempts++ >> skipStrength;
 560                         ip = forwardIp;
 561                         forwardIp = ip + step;
 562 
 563                         if unlikely(forwardIp > mflimit) {
 564                                 goto _last_literals;
 565                         }
 566 
 567                         forwardH = LZ4_HASH_VALUE(forwardIp);
 568                         ref = base + HashTable[h];
 569                         HashTable[h] = ip - base;
 570 
 571                 } while ((ref < ip - MAX_DISTANCE) || (A32(ref) != A32(ip)));
 572 
 573                 /* Catch up */
 574                 while ((ip > anchor) && (ref > (BYTE *) source) &&
 575                     unlikely(ip[-1] == ref[-1])) {
 576                         ip--;
 577                         ref--;
 578                 }
 579 
 580                 /* Encode Literal length */
 581                 length = ip - anchor;
 582                 token = op++;
 583 
 584                 /* Check output limit */
 585                 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
 586                     (length >> 8) > oend)
 587                         return (0);
 588 
 589                 if (length >= (int)RUN_MASK) {
 590                         *token = (RUN_MASK << ML_BITS);
 591                         len = length - RUN_MASK;
 592                         for (; len > 254; len -= 255)
 593                                 *op++ = 255;
 594                         *op++ = (BYTE)len;
 595                 } else
 596                         *token = (length << ML_BITS);
 597 
 598                 /* Copy Literals */
 599                 LZ4_BLINDCOPY(anchor, op, length);
 600 
 601                 _next_match:
 602                 /* Encode Offset */
 603                 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
 604 
 605                 /* Start Counting */
 606                 ip += MINMATCH;
 607                 ref += MINMATCH;        /* MinMatch verified */
 608                 anchor = ip;
 609                 while likely(ip < matchlimit - (STEPSIZE - 1)) {
 610                         UARCH diff = AARCH(ref) ^ AARCH(ip);
 611                         if (!diff) {
 612                                 ip += STEPSIZE;
 613                                 ref += STEPSIZE;
 614                                 continue;
 615                         }
 616                         ip += LZ4_NbCommonBytes(diff);
 617                         goto _endCount;
 618                 }
 619 #if LZ4_ARCH64
 620                 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
 621                         ip += 4;
 622                         ref += 4;
 623                 }
 624 #endif
 625                 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
 626                         ip += 2;
 627                         ref += 2;
 628                 }
 629                 if ((ip < matchlimit) && (*ref == *ip))
 630                         ip++;
 631                 _endCount:
 632 
 633                 /* Encode MatchLength */
 634                 len = (ip - anchor);
 635                 /* Check output limit */
 636                 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
 637                         return (0);
 638                 if (len >= (int)ML_MASK) {
 639                         *token += ML_MASK;
 640                         len -= ML_MASK;
 641                         for (; len > 509; len -= 510) {
 642                                 *op++ = 255;
 643                                 *op++ = 255;
 644                         }
 645                         if (len > 254) {
 646                                 len -= 255;
 647                                 *op++ = 255;
 648                         }
 649                         *op++ = (BYTE)len;
 650                 } else
 651                         *token += len;
 652 
 653                 /* Test end of chunk */
 654                 if (ip > mflimit) {
 655                         anchor = ip;
 656                         break;
 657                 }
 658                 /* Fill table */
 659                 HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
 660 
 661                 /* Test next position */
 662                 ref = base + HashTable[LZ4_HASH_VALUE(ip)];
 663                 HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
 664                 if ((ref > ip - (MAX_DISTANCE + 1)) && (A32(ref) == A32(ip))) {
 665                         token = op++;
 666                         *token = 0;
 667                         goto _next_match;
 668                 }
 669                 /* Prepare next loop */
 670                 anchor = ip++;
 671                 forwardH = LZ4_HASH_VALUE(ip);
 672         }
 673 
 674         _last_literals:
 675         /* Encode Last Literals */
 676         {
 677                 int lastRun = iend - anchor;
 678                 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
 679                     oend)
 680                         return (0);
 681                 if (lastRun >= (int)RUN_MASK) {
 682                         *op++ = (RUN_MASK << ML_BITS);
 683                         lastRun -= RUN_MASK;
 684                         for (; lastRun > 254; lastRun -= 255) {
 685                                 *op++ = 255;
 686                         }
 687                         *op++ = (BYTE)lastRun;
 688                 } else
 689                         *op++ = (lastRun << ML_BITS);
 690                 (void) memcpy(op, anchor, iend - anchor);
 691                 op += iend - anchor;
 692         }
 693 
 694         /* End */
 695         return (int)(((char *)op) - dest);
 696 }
 697 
 698 
 699 
 700 /* Note : this function is valid only if isize < LZ4_64KLIMIT */
 701 #define LZ4_64KLIMIT ((1 << 16) + (MFLIMIT - 1))
 702 #define HASHLOG64K (HASH_LOG + 1)
 703 #define HASH64KTABLESIZE (1U << HASHLOG64K)
 704 #define LZ4_HASH64K_FUNCTION(i) (((i) * 2654435761U) >> ((MINMATCH*8) - \
 705         HASHLOG64K))
 706 #define LZ4_HASH64K_VALUE(p)    LZ4_HASH64K_FUNCTION(A32(p))
 707 
 708 /*ARGSUSED*/
 709 static int
 710 LZ4_compress64kCtx(void *ctx, const char *source, char *dest, int isize,
 711     int osize)
 712 {
 713 #if HEAPMODE
 714         struct refTables *srt = (struct refTables *)ctx;
 715         U16 *HashTable = (U16 *) (srt->hashTable);
 716 #else
 717         U16 HashTable[HASH64KTABLESIZE] = { 0 };
 718 #endif
 719 
 720         const BYTE *ip = (BYTE *) source;
 721         const BYTE *anchor = ip;
 722         const BYTE *const base = ip;
 723         const BYTE *const iend = ip + isize;
 724         const BYTE *const oend = (BYTE *) dest + osize;
 725         const BYTE *const mflimit = iend - MFLIMIT;
 726 #define matchlimit (iend - LASTLITERALS)
 727 
 728         BYTE *op = (BYTE *) dest;
 729 
 730         int len, length;
 731         const int skipStrength = SKIPSTRENGTH;
 732         U32 forwardH;
 733 
 734         /* Init */
 735         if (isize < MINLENGTH)
 736                 goto _last_literals;
 737 
 738         /* First Byte */
 739         ip++;
 740         forwardH = LZ4_HASH64K_VALUE(ip);
 741 
 742         /* Main Loop */
 743         for (;;) {
 744                 int findMatchAttempts = (1U << skipStrength) + 3;
 745                 const BYTE *forwardIp = ip;
 746                 const BYTE *ref;
 747                 BYTE *token;
 748 
 749                 /* Find a match */
 750                 do {
 751                         U32 h = forwardH;
 752                         int step = findMatchAttempts++ >> skipStrength;
 753                         ip = forwardIp;
 754                         forwardIp = ip + step;
 755 
 756                         if (forwardIp > mflimit) {
 757                                 goto _last_literals;
 758                         }
 759 
 760                         forwardH = LZ4_HASH64K_VALUE(forwardIp);
 761                         ref = base + HashTable[h];
 762                         HashTable[h] = ip - base;
 763 
 764                 } while (A32(ref) != A32(ip));
 765 
 766                 /* Catch up */
 767                 while ((ip > anchor) && (ref > (BYTE *) source) &&
 768                     (ip[-1] == ref[-1])) {
 769                         ip--;
 770                         ref--;
 771                 }
 772 
 773                 /* Encode Literal length */
 774                 length = ip - anchor;
 775                 token = op++;
 776 
 777                 /* Check output limit */
 778                 if unlikely(op + length + (2 + 1 + LASTLITERALS) +
 779                     (length >> 8) > oend)
 780                         return (0);
 781 
 782                 if (length >= (int)RUN_MASK) {
 783                         *token = (RUN_MASK << ML_BITS);
 784                         len = length - RUN_MASK;
 785                         for (; len > 254; len -= 255)
 786                                 *op++ = 255;
 787                         *op++ = (BYTE)len;
 788                 } else
 789                         *token = (length << ML_BITS);
 790 
 791                 /* Copy Literals */
 792                 LZ4_BLINDCOPY(anchor, op, length);
 793 
 794                 _next_match:
 795                 /* Encode Offset */
 796                 LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
 797 
 798                 /* Start Counting */
 799                 ip += MINMATCH;
 800                 ref += MINMATCH;        /* MinMatch verified */
 801                 anchor = ip;
 802                 while (ip < matchlimit - (STEPSIZE - 1)) {
 803                         UARCH diff = AARCH(ref) ^ AARCH(ip);
 804                         if (!diff) {
 805                                 ip += STEPSIZE;
 806                                 ref += STEPSIZE;
 807                                 continue;
 808                         }
 809                         ip += LZ4_NbCommonBytes(diff);
 810                         goto _endCount;
 811                 }
 812 #if LZ4_ARCH64
 813                 if ((ip < (matchlimit - 3)) && (A32(ref) == A32(ip))) {
 814                         ip += 4;
 815                         ref += 4;
 816                 }
 817 #endif
 818                 if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
 819                         ip += 2;
 820                         ref += 2;
 821                 }
 822                 if ((ip < matchlimit) && (*ref == *ip))
 823                         ip++;
 824                 _endCount:
 825 
 826                 /* Encode MatchLength */
 827                 len = (ip - anchor);
 828                 /* Check output limit */
 829                 if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
 830                         return (0);
 831                 if (len >= (int)ML_MASK) {
 832                         *token += ML_MASK;
 833                         len -= ML_MASK;
 834                         for (; len > 509; len -= 510) {
 835                                 *op++ = 255;
 836                                 *op++ = 255;
 837                         }
 838                         if (len > 254) {
 839                                 len -= 255;
 840                                 *op++ = 255;
 841                         }
 842                         *op++ = (BYTE)len;
 843                 } else
 844                         *token += len;
 845 
 846                 /* Test end of chunk */
 847                 if (ip > mflimit) {
 848                         anchor = ip;
 849                         break;
 850                 }
 851                 /* Fill table */
 852                 HashTable[LZ4_HASH64K_VALUE(ip - 2)] = ip - 2 - base;
 853 
 854                 /* Test next position */
 855                 ref = base + HashTable[LZ4_HASH64K_VALUE(ip)];
 856                 HashTable[LZ4_HASH64K_VALUE(ip)] = ip - base;
 857                 if (A32(ref) == A32(ip)) {
 858                         token = op++;
 859                         *token = 0;
 860                         goto _next_match;
 861                 }
 862                 /* Prepare next loop */
 863                 anchor = ip++;
 864                 forwardH = LZ4_HASH64K_VALUE(ip);
 865         }
 866 
 867         _last_literals:
 868         /* Encode Last Literals */
 869         {
 870                 int lastRun = iend - anchor;
 871                 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
 872                     oend)
 873                         return (0);
 874                 if (lastRun >= (int)RUN_MASK) {
 875                         *op++ = (RUN_MASK << ML_BITS);
 876                         lastRun -= RUN_MASK;
 877                         for (; lastRun > 254; lastRun -= 255)
 878                                 *op++ = 255;
 879                         *op++ = (BYTE)lastRun;
 880                 } else
 881                         *op++ = (lastRun << ML_BITS);
 882                 (void) memcpy(op, anchor, iend - anchor);
 883                 op += iend - anchor;
 884         }
 885 
 886         /* End */
 887         return (int)(((char *)op) - dest);
 888 }
 889 
 890 static int
 891 real_LZ4_compress(const char *source, char *dest, int isize, int osize)
 892 {
 893 #if HEAPMODE
 894         void *ctx = kmem_zalloc(sizeof (struct refTables), KM_NOSLEEP);
 895         int result;
 896 
 897         /*
 898          * out of kernel memory, gently fall through - this will disable
 899          * compression in zio_compress_data
 900          */
 901         if (ctx == NULL)
 902                 return (0);
 903 
 904         if (isize < LZ4_64KLIMIT)
 905                 result = LZ4_compress64kCtx(ctx, source, dest, isize, osize);
 906         else
 907                 result = LZ4_compressCtx(ctx, source, dest, isize, osize);
 908 
 909         kmem_free(ctx, sizeof (struct refTables));
 910         return (result);
 911 #else
 912         if (isize < (int)LZ4_64KLIMIT)
 913                 return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
 914         return (LZ4_compressCtx(NULL, source, dest, isize, osize));
 915 #endif
 916 }
 917 
 918 /* Decompression functions */
 919 
 920 /*
 921  * Note: The decoding functions real_LZ4_uncompress() and
 922  *      LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
 923  *      attack type. They will never write nor read outside of the provided
 924  *      output buffers. LZ4_uncompress_unknownOutputSize() also insures that
 925  *      it will never read outside of the input buffer. A corrupted input
 926  *      will produce an error result, a negative int, indicating the position
 927  *      of the error within input stream.
 928  */
 929 
 930 static int
 931 real_LZ4_uncompress(const char *source, char *dest, int osize)
 932 {
 933         /* Local Variables */
 934         const BYTE *restrict ip = (const BYTE *) source;
 935         const BYTE *ref;
 936 
 937         BYTE *op = (BYTE *) dest;
 938         BYTE *const oend = op + osize;
 939         BYTE *cpy;
 940 
 941         unsigned token;
 942 
 943         size_t length;
 944         size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
 945 #if LZ4_ARCH64
 946         size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
 947 #endif
 948 
 949         /* Main Loop */
 950         for (;;) {
 951                 /* get runlength */
 952                 token = *ip++;
 953                 if ((length = (token >> ML_BITS)) == RUN_MASK) {
 954                         size_t len;
 955                         for (; (len = *ip++) == 255; length += 255) {
 956                         }
 957                         length += len;
 958                 }
 959                 /* copy literals */
 960                 cpy = op + length;
 961                 if unlikely(cpy > oend - COPYLENGTH) {
 962                         if (cpy != oend)
 963                                 /* Error: we must necessarily stand at EOF */
 964                                 goto _output_error;
 965                         (void) memcpy(op, ip, length);
 966                         ip += length;
 967                         break;  /* EOF */
 968                         }
 969                 LZ4_WILDCOPY(ip, op, cpy);
 970                 ip -= (op - cpy);
 971                 op = cpy;
 972 
 973                 /* get offset */
 974                 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
 975                 ip += 2;
 976                 if unlikely(ref < (BYTE * const) dest)
 977                         /*
 978                          * Error: offset create reference outside destination
 979                          * buffer
 980                          */
 981                         goto _output_error;
 982 
 983                 /* get matchlength */
 984                 if ((length = (token & ML_MASK)) == ML_MASK) {
 985                         for (; *ip == 255; length += 255) {
 986                                 ip++;
 987                         }
 988                         length += *ip++;
 989                 }
 990                 /* copy repeated sequence */
 991                 if unlikely(op - ref < STEPSIZE) {
 992 #if LZ4_ARCH64
 993                         size_t dec64 = dec64table[op-ref];
 994 #else
 995                         const int dec64 = 0;
 996 #endif
 997                         op[0] = ref[0];
 998                         op[1] = ref[1];
 999                         op[2] = ref[2];
1000                         op[3] = ref[3];
1001                         op += 4;
1002                         ref += 4;
1003                         ref -= dec32table[op-ref];
1004                         A32(op) = A32(ref);
1005                         op += STEPSIZE - 4;
1006                         ref -= dec64;
1007                 } else {
1008                         LZ4_COPYSTEP(ref, op);
1009                 }
1010                 cpy = op + length - (STEPSIZE - 4);
1011                 if (cpy > oend - COPYLENGTH) {
1012                         if (cpy > oend)
1013                                 /*
1014                                  * Error: request to write beyond destination
1015                                  * buffer
1016                                  */
1017                                 goto _output_error;
1018                         LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1019                         while (op < cpy)
1020                                 *op++ = *ref++;
1021                         op = cpy;
1022                         if (op == oend)
1023                                 /*
1024                                  * Check EOF (should never happen, since last
1025                                  * 5 bytes are supposed to be literals)
1026                                  */
1027                                 goto _output_error;
1028                         continue;
1029                 }
1030                 LZ4_SECURECOPY(ref, op, cpy);
1031                 op = cpy;       /* correction */
1032         }
1033 
1034         /* end of decoding */
1035         return (int)(((char *)ip) - source);
1036 
1037         /* write overflow error detected */
1038         _output_error:
1039         return (int)(-(((char *)ip) - source));
1040 }
1041 
1042 static int
1043 LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
1044     int maxOutputSize)
1045 {
1046         /* Local Variables */
1047         const BYTE *restrict ip = (const BYTE *) source;
1048         const BYTE *const iend = ip + isize;
1049         const BYTE *ref;
1050 
1051         BYTE *op = (BYTE *) dest;
1052         BYTE *const oend = op + maxOutputSize;
1053         BYTE *cpy;
1054 
1055         size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
1056 #if LZ4_ARCH64
1057         size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
1058 #endif
1059 
1060         /* Main Loop */
1061         while (ip < iend) {
1062                 unsigned token;
1063                 size_t length;
1064 
1065                 /* get runlength */
1066                 token = *ip++;
1067                 if ((length = (token >> ML_BITS)) == RUN_MASK) {
1068                         int s = 255;
1069                         while ((ip < iend) && (s == 255)) {
1070                                 s = *ip++;
1071                                 length += s;
1072                         }
1073                 }
1074                 /* copy literals */
1075                 cpy = op + length;
1076                 if ((cpy > oend - COPYLENGTH) ||
1077                     (ip + length > iend - COPYLENGTH)) {
1078                         if (cpy > oend)
1079                                 /* Error: writes beyond output buffer */
1080                                 goto _output_error;
1081                         if (ip + length != iend)
1082                                 /*
1083                                  * Error: LZ4 format requires to consume all
1084                                  * input at this stage
1085                                  */
1086                                 goto _output_error;
1087                         (void) memcpy(op, ip, length);
1088                         op += length;
1089                         /* Necessarily EOF, due to parsing restrictions */
1090                         break;
1091                 }
1092                 LZ4_WILDCOPY(ip, op, cpy);
1093                 ip -= (op - cpy);
1094                 op = cpy;
1095 
1096                 /* get offset */
1097                 LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
1098                 ip += 2;
1099                 if (ref < (BYTE * const) dest)
1100                         /*
1101                          * Error: offset creates reference outside of
1102                          * destination buffer
1103                          */
1104                         goto _output_error;
1105 
1106                 /* get matchlength */
1107                 if ((length = (token & ML_MASK)) == ML_MASK) {
1108                         while (ip < iend) {
1109                                 int s = *ip++;
1110                                 length += s;
1111                                 if (s == 255)
1112                                         continue;
1113                                 break;
1114                         }
1115                 }
1116                 /* copy repeated sequence */
1117                 if unlikely(op - ref < STEPSIZE) {
1118 #if LZ4_ARCH64
1119                         size_t dec64 = dec64table[op-ref];
1120 #else
1121                         const int dec64 = 0;
1122 #endif
1123                         op[0] = ref[0];
1124                         op[1] = ref[1];
1125                         op[2] = ref[2];
1126                         op[3] = ref[3];
1127                         op += 4;
1128                         ref += 4;
1129                         ref -= dec32table[op-ref];
1130                         A32(op) = A32(ref);
1131                         op += STEPSIZE - 4;
1132                         ref -= dec64;
1133                 } else {
1134                         LZ4_COPYSTEP(ref, op);
1135                 }
1136                 cpy = op + length - (STEPSIZE - 4);
1137                 if (cpy > oend - COPYLENGTH) {
1138                         if (cpy > oend)
1139                                 /*
1140                                  * Error: request to write outside of
1141                                  * destination buffer
1142                                  */
1143                                 goto _output_error;
1144                         LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1145                         while (op < cpy)
1146                                 *op++ = *ref++;
1147                         op = cpy;
1148                         if (op == oend)
1149                                 /*
1150                                  * Check EOF (should never happen, since
1151                                  * last 5 bytes are supposed to be literals)
1152                                  */
1153                                 goto _output_error;
1154                         continue;
1155                 }
1156                 LZ4_SECURECOPY(ref, op, cpy);
1157                 op = cpy;       /* correction */
1158         }
1159 
1160         /* end of decoding */
1161         return (int)(((char *)op) - dest);
1162 
1163         /* write overflow error detected */
1164         _output_error:
1165         return (int)(-(((char *)ip) - source));
1166 }