1 /*
   2  * LZ4 HC - High Compression Mode of LZ4
   3  * Copyright (C) 2011-2012, Yann Collet.
   4  * BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php)
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions are
   8  * met:
   9  *
  10  *     * Redistributions of source code must retain the above copyright
  11  * notice, this list of conditions and the following disclaimer.
  12  *     * Redistributions in binary form must reproduce the above
  13  * copyright notice, this list of conditions and the following disclaimer
  14  * in the documentation and/or other materials provided with the
  15  * distribution.
  16  *
  17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  18  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  19  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  20  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  21  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  22  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  23  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  24  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  25  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  26  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  27  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  28  *
  29  * You can contact the author at :
  30  * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
  31  * - LZ4 source repository : http://code.google.com/p/lz4/
  32  */
  33 
  34 
  35 /*
  36  * CPU Feature Detection
  37  */
  38 
  39 /* 32 or 64 bits ? */
  40 #if (defined(__x86_64__) || defined(__x86_64) || defined(__amd64__) || \
  41     defined(__amd64) || defined(__ppc64__) || defined(_WIN64) || \
  42     defined(__LP64__) || defined(_LP64))
  43 #define LZ4_ARCH64 1
  44 #else
  45 #define LZ4_ARCH64 0
  46 #endif
  47 
  48 /*
  49  * Little Endian or Big Endian?
  50  * Note: overwrite the below #define if you know your architecture endianess.
  51  */
  52 #if (defined(__BIG_ENDIAN__) || defined(__BIG_ENDIAN) || \
  53         defined(_BIG_ENDIAN) || defined(_ARCH_PPC) || defined(__PPC__) || \
  54         defined(__PPC) || defined(PPC) || defined(__powerpc__) || \
  55         defined(__powerpc) || defined(powerpc) || \
  56         ((defined(__BYTE_ORDER__)&&(__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__))))
  57 #define LZ4_BIG_ENDIAN 1
  58 #else
  59 /*
  60  * Little Endian assumed. PDP Endian and other very rare endian format
  61  * are unsupported.
  62  */
  63 #endif
  64 
  65 /*
  66  * Unaligned memory access is automatically enabled for "common" CPU,
  67  * such as x86. For others CPU, the compiler will be more cautious, and
  68  * insert extra code to ensure aligned access is respected. If you know
  69  * your target CPU supports unaligned memory access, you may want to
  70  * force this option manually to improve performance
  71  */
  72 #if defined(__ARM_FEATURE_UNALIGNED)
  73 #define LZ4_FORCE_UNALIGNED_ACCESS 1
  74 #endif
  75 
  76 /*
  77  * Illumos: we can't use GCC's __builtin_ctz family of builtins in the
  78  * kernel
  79  */
  80 #define LZ4_FORCE_SW_BITCOUNT
  81 
  82 /*
  83  * Compiler Options
  84  */
  85 #if __STDC_VERSION__ >= 199901L /* C99 */
  86 /* "restrict" is a known keyword */
  87 #else
  88 /* Disable restrict */
  89 #define restrict
  90 #endif
  91 
  92 #define GCC_VERSION (__GNUC__ * 100 + __GNUC_MINOR__)
  93 
  94 #ifdef _MSC_VER
  95 #define lz4_bswap16(x) _byteswap_ushort(x)
  96 #else /* !_MSC_VER */
  97 #define lz4_bswap16(x) ((unsigned short int) ((((x) >> 8) & 0xffu) | \
  98         (((x) & 0xffu) << 8)))
  99 #endif /* !_MSC_VER */
 100 
 101 #if (GCC_VERSION >= 302) || (__INTEL_COMPILER >= 800) || defined(__clang__)
 102 #define expect(expr, value)     (__builtin_expect((expr), (value)))
 103 #else
 104 #define expect(expr, value)     (expr)
 105 #endif
 106 
 107 #define likely(expr)    expect((expr) != 0, 1)
 108 #define unlikely(expr)  expect((expr) != 0, 0)
 109 
 110 /*
 111  * Includes
 112  */
 113 #include <sys/zfs_context.h>
 114 
 115 #define ALLOCATOR(s) kmem_alloc(s, KM_NOSLEEP)
 116 #define FREEMEM kmem_free
 117 #define MEM_INIT (void) memset
 118 
 119 /* Basic Types */
 120 #if defined(_MSC_VER)
 121 /* Visual Studio does not support 'stdint' natively */
 122 #define BYTE    unsigned __int8
 123 #define U16     unsigned __int16
 124 #define U32     unsigned __int32
 125 #define S32     __int32
 126 #define U64     unsigned __int64
 127 #else /* !defined(_MSC_VER) */
 128 #define BYTE    uint8_t
 129 #define U16     uint16_t
 130 #define U32     uint32_t
 131 #define S32     int32_t
 132 #define U64     uint64_t
 133 #endif /* !defined(_MSC_VER) */
 134 
 135 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
 136 #pragma pack(1)
 137 #endif
 138 
 139 typedef struct _U16_S {
 140         U16 v;
 141 } U16_S;
 142 typedef struct _U32_S {
 143         U32 v;
 144 } U32_S;
 145 typedef struct _U64_S {
 146         U64 v;
 147 } U64_S;
 148 
 149 #ifndef LZ4_FORCE_UNALIGNED_ACCESS
 150 #pragma pack()
 151 #endif
 152 
 153 #define A64(x) (((U64_S *)(x))->v)
 154 #define A32(x) (((U32_S *)(x))->v)
 155 #define A16(x) (((U16_S *)(x))->v)
 156 
 157 
 158 /* Constants */
 159 #define MINMATCH 4
 160 
 161 #define DICTIONARY_LOGSIZE 16
 162 #define MAXD (1<<DICTIONARY_LOGSIZE)
 163 #define MAXD_MASK ((U32)(MAXD - 1))
 164 #define MAX_DISTANCE (MAXD - 1)
 165 
 166 #define HASH_LOG (DICTIONARY_LOGSIZE - 1)
 167 #define HASHTABLESIZE (1 << HASH_LOG)
 168 #define HASH_MASK (HASHTABLESIZE - 1)
 169 
 170 #define MAX_NB_ATTEMPTS 256
 171 
 172 #define ML_BITS  4
 173 #define ML_MASK  (size_t)((1U << ML_BITS) - 1)
 174 #define RUN_BITS (8 - ML_BITS)
 175 #define RUN_MASK ((1U << RUN_BITS) - 1)
 176 
 177 #define COPYLENGTH 8
 178 #define LASTLITERALS 5
 179 #define MFLIMIT (COPYLENGTH + MINMATCH)
 180 #define MINLENGTH (MFLIMIT + 1)
 181 #define OPTIMAL_ML (int)((ML_MASK - 1) + MINMATCH)
 182 
 183 
 184 /* Architecture-specific macros */
 185 #if LZ4_ARCH64
 186 #define STEPSIZE 8
 187 #define LZ4_COPYSTEP(s, d)      A64(d) = A64(s); d += 8; s += 8;
 188 #define LZ4_COPYPACKET(s, d)    LZ4_COPYSTEP(s, d)
 189 #define UARCH U64
 190 #define AARCH A64
 191 #define HTYPE                   U32
 192 #define INITBASE(b, s)          const BYTE* const b = s
 193 #else /* !LZ4_ARCH64 */
 194 #define STEPSIZE 4
 195 #define LZ4_COPYSTEP(s, d)      A32(d) = A32(s); d += 4; s += 4;
 196 #define LZ4_COPYPACKET(s, d)    LZ4_COPYSTEP(s, d); LZ4_COPYSTEP(s, d);
 197 #define UARCH U32
 198 #define AARCH A32
 199 #define HTYPE                   const BYTE*
 200 #define INITBASE(b, s)          const int b = 0
 201 #endif /* !LZ4_ARCH64 */
 202 
 203 #if defined(LZ4_BIG_ENDIAN)
 204 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
 205         { U16 v = A16(p); v = lz4_bswap16(v); d = (s) - v; }
 206 #define LZ4_WRITE_LITTLEENDIAN_16(p, i) \
 207         { U16 v = (U16)(i); v = lz4_bswap16(v); A16(p) = v; p += 2; }
 208 #else /* !defined(LZ4_BIG_ENDIAN) */
 209 #define LZ4_READ_LITTLEENDIAN_16(d, s, p) \
 210         { d = (s) - A16(p); }
 211 #define LZ4_WRITE_LITTLEENDIAN_16(p, v) \
 212         { A16(p) = v; p += 2; }
 213 #endif /* !defined(LZ4_BIG_ENDIAN) */
 214 
 215 
 216 /* Local Types */
 217 typedef struct {
 218         const BYTE* base;
 219         HTYPE hashTable[HASHTABLESIZE];
 220         U16 chainTable[MAXD];
 221         const BYTE* nextToUpdate;
 222 } LZ4HC_Data_Structure;
 223 
 224 
 225 /* Macros */
 226 #define LZ4_WILDCOPY(s, d, e)   do { LZ4_COPYPACKET(s, d) } while (d < e);
 227 #define LZ4_BLINDCOPY(s, d, l)  { BYTE* e = d + l; LZ4_WILDCOPY(s, d, e); \
 228         d = e; }
 229 #define HASH_FUNCTION(i)        (((i) * 2654435761U) >> ((MINMATCH * 8) - \
 230         HASH_LOG))
 231 #define HASH_VALUE(p)           HASH_FUNCTION(A32(p))
 232 #define HASH_POINTER(p)         (HashTable[HASH_VALUE(p)] + base)
 233 #define DELTANEXT(p)            chainTable[(size_t)(p) & MAXD_MASK]
 234 #define GETNEXT(p)              ((p) - (size_t)DELTANEXT(p))
 235 #define ADD_HASH(p) \
 236         { \
 237                 size_t delta = (p) - HASH_POINTER(p); \
 238                 if (delta > MAX_DISTANCE) \
 239                         delta = MAX_DISTANCE; \
 240                 DELTANEXT(p) = (U16)delta; \
 241                 HashTable[HASH_VALUE(p)] = (p) - base; \
 242         }
 243 
 244 
 245 /* Private functions */
 246 #if LZ4_ARCH64
 247 
 248 static inline int
 249 LZ4_NbCommonBytes(register U64 val)
 250 {
 251 #if defined(LZ4_BIG_ENDIAN)
 252 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
 253         unsigned long r = 0;
 254         _BitScanReverse64(&r, val);
 255         return (int)(r >> 3);
 256 #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && \
 257         !defined(LZ4_FORCE_SW_BITCOUNT)
 258         return (__builtin_clzll(val) >> 3);
 259 #else
 260         int r;
 261         if (!(val >> 32))
 262                 r = 4;
 263         else {
 264                 r = 0;
 265                 val >>= 32;
 266         }
 267         if (!(val >> 16)) {
 268                 r += 2;
 269                 val >>= 8;
 270         } else
 271                 val >>= 24;
 272         r += (!val);
 273         return (r);
 274 #endif
 275 #else /* !defined(LZ4_BIG_ENDIAN) */
 276 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
 277         unsigned long r = 0;
 278         _BitScanForward64(&r, val);
 279         return (int)(r >> 3);
 280 #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && \
 281         !defined(LZ4_FORCE_SW_BITCOUNT)
 282         return (__builtin_ctzll(val) >> 3);
 283 #else
 284         static const int DeBruijnBytePos[64] = {
 285             0, 0, 0, 0, 0, 1, 1, 2,
 286             0, 3, 1, 3, 1, 4, 2, 7,
 287             0, 2, 3, 6, 1, 5, 3, 5,
 288             1, 3, 4, 4, 2, 5, 6, 7,
 289             7, 0, 1, 2, 3, 3, 4, 6,
 290             2, 6, 5, 5, 3, 4, 5, 6,
 291             7, 1, 2, 4, 6, 4, 4, 5,
 292             7, 2, 6, 5, 7, 6, 7, 7
 293         };
 294         return DeBruijnBytePos[((U64)((val & -val) * 0x0218A392CDABBD3F)) >>
 295             58];
 296 #endif
 297 #endif /* !defined(LZ4_BIG_ENDIAN) */
 298 }
 299 
 300 #else /* !LZ4_ARCH64 */
 301 
 302 static inline int
 303 LZ4_NbCommonBytes(register U32 val)
 304 {
 305 #if defined(LZ4_BIG_ENDIAN)
 306 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
 307         unsigned long r = 0;
 308         _BitScanReverse(&r, val);
 309         return (int)(r >> 3);
 310 #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && \
 311         !defined(LZ4_FORCE_SW_BITCOUNT)
 312         return (__builtin_clz(val) >> 3);
 313 #else
 314         int r;
 315         if (!(val >> 16)) {
 316                 r = 2;
 317                 val >>= 8;
 318         } else {
 319                 r = 0;
 320                 val >>= 24;
 321         }
 322         r += (!val);
 323         return (r);
 324 #endif
 325 #else /* !defined(LZ4_BIG_ENDIAN) */
 326 #if defined(_MSC_VER) && !defined(LZ4_FORCE_SW_BITCOUNT)
 327         unsigned long r = 0;
 328         _BitScanForward(&r, val);
 329         return (int)(r >> 3);
 330 #elif defined(__GNUC__) && ((__GNUC__ * 100 + __GNUC_MINOR__) >= 304) && \
 331         !defined(LZ4_FORCE_SW_BITCOUNT)
 332         return (__builtin_ctz(val) >> 3);
 333 #else
 334         static const int DeBruijnBytePos[32] = {
 335             0, 0, 3, 0, 3, 1, 3, 0,
 336             3, 2, 2, 1, 3, 2, 0, 1,
 337             3, 3, 1, 2, 2, 2, 2, 0,
 338             3, 1, 2, 0, 1, 0, 1, 1
 339         };
 340         return (DeBruijnBytePos[((U32)((val & -(S32)val) * 0x077CB531U)) >>
 341             27]);
 342 #endif
 343 #endif /* !defined(LZ4_BIG_ENDIAN) */
 344 }
 345 
 346 #endif /* !LZ4_ARCH64 */
 347 
 348 static inline void
 349 LZ4HC_Init(LZ4HC_Data_Structure *hc4, const BYTE *base)
 350 {
 351         MEM_INIT((void *)hc4->hashTable, 0, sizeof (hc4->hashTable));
 352         MEM_INIT(hc4->chainTable, 0xFF, sizeof (hc4->chainTable));
 353         hc4->nextToUpdate = base + LZ4_ARCH64;
 354         hc4->base = base;
 355 }
 356 
 357 static inline void *
 358 LZ4HC_Create(const BYTE *base)
 359 {
 360         LZ4HC_Data_Structure *hc4 = ALLOCATOR(sizeof (LZ4HC_Data_Structure));
 361 
 362         LZ4HC_Init(hc4, base);
 363         return (hc4);
 364 }
 365 
 366 static inline void
 367 LZ4HC_Free(LZ4HC_Data_Structure **LZ4HC_Data)
 368 {
 369         FREEMEM(*LZ4HC_Data, sizeof (LZ4HC_Data_Structure));
 370         *LZ4HC_Data = NULL;
 371 }
 372 
 373 
 374 static inline void
 375 LZ4HC_Insert(LZ4HC_Data_Structure *hc4, const BYTE *ip)
 376 {
 377         U16 *chainTable = hc4->chainTable;
 378         HTYPE *HashTable = hc4->hashTable;
 379         INITBASE(base, hc4->base);
 380 
 381         while (hc4->nextToUpdate < ip) {
 382                 ADD_HASH(hc4->nextToUpdate);
 383                 hc4->nextToUpdate++;
 384         }
 385 }
 386 
 387 
 388 static inline int
 389 LZ4HC_InsertAndFindBestMatch(LZ4HC_Data_Structure *hc4, const BYTE *ip,
 390     const BYTE *const matchlimit, const BYTE **matchpos)
 391 {
 392         U16 *const chainTable = hc4->chainTable;
 393         HTYPE *const HashTable = hc4->hashTable;
 394         const BYTE *ref;
 395         INITBASE(base, hc4->base);
 396         int nbAttempts = MAX_NB_ATTEMPTS;
 397         int ml = 0;
 398 
 399         /* HC4 match finder */
 400         LZ4HC_Insert(hc4, ip);
 401         ref = HASH_POINTER(ip);
 402         while ((ref >= (ip - MAX_DISTANCE)) && (nbAttempts)) {
 403                 nbAttempts--;
 404                 if (*(ref + ml) == *(ip + ml))
 405                 if (A32(ref) == A32(ip)) {
 406                         const BYTE *reft = ref + MINMATCH;
 407                         const BYTE *ipt = ip + MINMATCH;
 408 
 409                         while (ipt < matchlimit - (STEPSIZE - 1)) {
 410                                 UARCH diff = AARCH(reft) ^ AARCH(ipt);
 411                                 if (!diff) {
 412                                         ipt += STEPSIZE;
 413                                         reft += STEPSIZE;
 414                                         continue;
 415                                 }
 416                                 ipt += LZ4_NbCommonBytes(diff);
 417                                 goto _endCount;
 418                         }
 419 #if LZ4_ARCH64
 420                         if ((ipt < (matchlimit - 3)) &&
 421                             (A32(reft) == A32(ipt))) {
 422                                 ipt += 4;
 423                                 reft += 4;
 424                         }
 425 #endif /* LZ4_ARCH64 */
 426                         if ((ipt < (matchlimit - 1)) &&
 427                             (A16(reft) == A16(ipt))) {
 428                                 ipt += 2;
 429                                 reft += 2;
 430                         }
 431                         if ((ipt < matchlimit) && (*reft == *ipt))
 432                                 ipt++;
 433                 _endCount:
 434                         if (ipt - ip > ml) {
 435                                 ml = (int)(ipt - ip);
 436                                 *matchpos = ref;
 437                         }
 438                 }
 439                 ref = GETNEXT(ref);
 440         }
 441 
 442         return (ml);
 443 }
 444 
 445 
 446 static inline int
 447 LZ4HC_InsertAndGetWiderMatch(LZ4HC_Data_Structure *hc4, const BYTE *ip,
 448     const BYTE *startLimit, const BYTE *matchlimit, int longest,
 449     const BYTE **matchpos, const BYTE **startpos)
 450 {
 451         U16 *const chainTable = hc4->chainTable;
 452         HTYPE *const HashTable = hc4->hashTable;
 453         INITBASE(base, hc4->base);
 454         const BYTE *ref;
 455         int nbAttempts = MAX_NB_ATTEMPTS;
 456         int delta = (int)(ip - startLimit);
 457 
 458         /* First Match */
 459         LZ4HC_Insert(hc4, ip);
 460         ref = HASH_POINTER(ip);
 461 
 462         while ((ref >= ip-MAX_DISTANCE) && (ref >= hc4->base) && (nbAttempts)) {
 463                 nbAttempts--;
 464                 if (*(startLimit + longest) == *(ref - delta + longest))
 465                         if (A32(ref) == A32(ip)) {
 466                                 const BYTE *reft = ref + MINMATCH;
 467                                 const BYTE *ipt = ip + MINMATCH;
 468                                 const BYTE *startt = ip;
 469 
 470                                 while (ipt < matchlimit - (STEPSIZE - 1)) {
 471                                         UARCH diff = AARCH(reft) ^ AARCH(ipt);
 472                                         if (!diff) {
 473                                                 ipt += STEPSIZE;
 474                                                 reft += STEPSIZE;
 475                                                 continue;
 476                                         }
 477                                         ipt += LZ4_NbCommonBytes(diff);
 478                                         goto _endCount;
 479                                 }
 480 #if LZ4_ARCH64
 481                                 if ((ipt < (matchlimit - 3)) &&
 482                                     (A32(reft) == A32(ipt))) {
 483                                         ipt += 4;
 484                                         reft += 4;
 485                                 }
 486 #endif /* LZ4_ARCH64 */
 487                                 if ((ipt < (matchlimit - 1)) &&
 488                                     (A16(reft) == A16(ipt))) {
 489                                         ipt += 2;
 490                                         reft += 2;
 491                                 }
 492                                 if ((ipt < matchlimit) && (*reft == *ipt))
 493                                         ipt++;
 494 
 495                         _endCount:
 496                                 reft = ref;
 497                                 while ((startt > startLimit) &&
 498                                     (reft > hc4->base) &&
 499                                     (startt[-1] == reft[-1])) {
 500                                         startt--;
 501                                         reft--;
 502                                 }
 503 
 504                                 if ((ipt-startt) > longest) {
 505                                         longest = (int)(ipt-startt);
 506                                         *matchpos = reft;
 507                                         *startpos = startt;
 508                                 }
 509                         }
 510                 ref = GETNEXT(ref);
 511         }
 512 
 513         return (longest);
 514 }
 515 
 516 static inline int
 517 LZ4_encodeSequence(const BYTE **ip, BYTE **op, const BYTE *oend,
 518     const BYTE **anchor, int ml, const BYTE *ref)
 519 {
 520         int length, len;
 521         BYTE *token;
 522 
 523         /* Encode Literal length */
 524         length = (int)(*ip - *anchor);
 525         token = (*op)++;
 526 
 527         /* Check output limit */
 528         if unlikely((*op) + length + (2 + 1 + LASTLITERALS) + (length >> 8) >
 529             oend)
 530                 return (0);
 531 
 532         if (length >= (int) RUN_MASK) {
 533                 *token = (RUN_MASK << ML_BITS);
 534                 len = length - RUN_MASK;
 535                 for (; len > 254; len -= 255)
 536                         *(*op)++ = 255;
 537                 *(*op)++ = (BYTE)len;
 538         } else
 539                 *token = (length << ML_BITS);
 540 
 541         /* Copy Literals */
 542         LZ4_BLINDCOPY(*anchor, *op, length);
 543 
 544         /* Encode Offset */
 545         LZ4_WRITE_LITTLEENDIAN_16(*op, (U16)(*ip - ref));
 546 
 547         /* Encode MatchLength */
 548         len = (int)(ml - MINMATCH);
 549         /* Check output limit */
 550         if unlikely((*op) + (1 + LASTLITERALS) + (len >> 8) > oend)
 551                 return (0);
 552         if (len >= (int)ML_MASK) {
 553                 *token += ML_MASK;
 554                 len -= ML_MASK;
 555                 for (; len > 509; len -= 510) {
 556                         *(*op)++ = 255;
 557                         *(*op)++ = 255;
 558                 }
 559                 if (len > 254) {
 560                         len -= 255;
 561                         *(*op)++ = 255;
 562                 }
 563                 *(*op)++ = (BYTE)len;
 564         } else
 565                 *token += len;
 566 
 567         /* Prepare next loop */
 568         *ip += ml;
 569         *anchor = *ip;
 570 
 571         return (1);
 572 }
 573 
 574 /* Compression CODE */
 575 
 576 static int
 577 LZ4_compressHCCtx(LZ4HC_Data_Structure *ctx, const char *source, char *dest,
 578     int isize, int osize)
 579 {
 580         const BYTE *ip = (const BYTE*) source;
 581         const BYTE *anchor = ip;
 582         const BYTE *const iend = ip + isize;
 583         const BYTE *const oend = (BYTE *) dest + osize;
 584         const BYTE *const mflimit = iend - MFLIMIT;
 585         const BYTE *const matchlimit = (iend - LASTLITERALS);
 586 
 587         BYTE *op = (BYTE*) dest;
 588 
 589         int ml, ml2, ml3, ml0;
 590         const BYTE *ref = NULL;
 591         const BYTE *start2 = NULL;
 592         const BYTE *ref2 = NULL;
 593         const BYTE *start3 = NULL;
 594         const BYTE *ref3 = NULL;
 595         const BYTE *start0;
 596         const BYTE *ref0;
 597 
 598         ip++;
 599 
 600         /* Main Loop */
 601         while (ip < mflimit) {
 602                 ml = LZ4HC_InsertAndFindBestMatch(ctx, ip, matchlimit, (&ref));
 603                 if (!ml) {
 604                         ip++;
 605                         continue;
 606                 }
 607 
 608                 /* saved, in case we would skip too much */
 609                 start0 = ip;
 610                 ref0 = ref;
 611                 ml0 = ml;
 612 
 613         _Search2:
 614                 if (ip+ml < mflimit)
 615                         ml2 = LZ4HC_InsertAndGetWiderMatch(ctx, ip + ml - 2,
 616                             ip + 1, matchlimit, ml, &ref2, &start2);
 617                 else
 618                         ml2 = ml;
 619 
 620                 /* No better match */
 621                 if (ml2 == ml) {
 622                         if unlikely(!LZ4_encodeSequence(&ip, &op, oend,
 623                             &anchor, ml, ref))
 624                                 return (0);
 625                         continue;
 626                 }
 627 
 628                 if (start0 < ip) {
 629                         /* empirical */
 630                         if (start2 < ip + ml0) {
 631                                 ip = start0;
 632                                 ref = ref0;
 633                                 ml = ml0;
 634                         }
 635                 }
 636 
 637                 /* Here, start0 == ip */
 638                 if ((start2 - ip) < 3) {
 639                         /* First Match too small : removed */
 640                         ml = ml2;
 641                         ip = start2;
 642                         ref = ref2;
 643                         goto _Search2;
 644                 }
 645 
 646         _Search3:
 647                 /*
 648                  * Currently we have:
 649                  * ml2 > ml1, and
 650                  * ip1+3 <= ip2 (usually < ip1+ml1)
 651                  */
 652                 if ((start2 - ip) < OPTIMAL_ML) {
 653                         int correction;
 654                         int new_ml = ml;
 655                         if (new_ml > OPTIMAL_ML)
 656                                 new_ml = OPTIMAL_ML;
 657                         if (ip+new_ml > start2 + ml2 - MINMATCH)
 658                                 new_ml = (int)(start2 - ip) + ml2 - MINMATCH;
 659                         correction = new_ml - (int)(start2 - ip);
 660                         if (correction > 0) {
 661                                 start2 += correction;
 662                                 ref2 += correction;
 663                                 ml2 -= correction;
 664                         }
 665                 }
 666                 /*
 667                  * Now, we have start2 = ip+new_ml, with
 668                  * new_ml=min(ml, OPTIMAL_ML=18)
 669                  */
 670 
 671                 if (start2 + ml2 < mflimit)
 672                         ml3 = LZ4HC_InsertAndGetWiderMatch(ctx, start2 + ml2 -
 673                             3, start2, matchlimit, ml2, &ref3, &start3);
 674                 else
 675                         ml3 = ml2;
 676 
 677                 /* No better match: 2 sequences to encode */
 678                 if (ml3 == ml2) {
 679                         /* ip & ref are known; Now for ml */
 680                         if (start2 < ip+ml) {
 681                                 if ((start2 - ip) < OPTIMAL_ML) {
 682                                         int correction;
 683                                         if (ml > OPTIMAL_ML) ml = OPTIMAL_ML;
 684                                         if (ip+ml > start2 + ml2 - MINMATCH)
 685                                                 ml = (int)(start2 - ip) +
 686                                                     ml2 - MINMATCH;
 687                                         correction = ml - (int)(start2 - ip);
 688                                         if (correction > 0) {
 689                                                 start2 += correction;
 690                                                 ref2 += correction;
 691                                                 ml2 -= correction;
 692                                         }
 693                                 } else {
 694                                         ml = (int)(start2 - ip);
 695                                 }
 696                         }
 697                         /* Now, encode 2 sequences */
 698                         if unlikely(!LZ4_encodeSequence(&ip, &op, oend,
 699                             &anchor, ml, ref))
 700                                 return (0);
 701                         ip = start2;
 702                         if unlikely(!LZ4_encodeSequence(&ip, &op, oend,
 703                             &anchor, ml2, ref2))
 704                                 return (0);
 705                         continue;
 706                 }
 707 
 708                 /* Not enough space for match 2: remove it */
 709                 if (start3 < ip + ml + 3) {
 710                         /*
 711                          * can write Seq1 immediately ==> Seq2 is removed,
 712                          * so Seq3 becomes Seq1
 713                          */
 714                         if (start3 >= (ip+ml)) {
 715                                 if (start2 < ip+ml) {
 716                                         int correction = (int)(ip+ml - start2);
 717                                         start2 += correction;
 718                                         ref2 += correction;
 719                                         ml2 -= correction;
 720                                         if (ml2 < MINMATCH) {
 721                                                 start2 = start3;
 722                                                 ref2 = ref3;
 723                                                 ml2 = ml3;
 724                                         }
 725                                 }
 726 
 727                                 if unlikely(!LZ4_encodeSequence(&ip, &op, oend,
 728                                     &anchor, ml, ref))
 729                                         return (0);
 730                                 ip  = start3;
 731                                 ref = ref3;
 732                                 ml  = ml3;
 733 
 734                                 start0 = start2;
 735                                 ref0 = ref2;
 736                                 ml0 = ml2;
 737                                 goto _Search2;
 738                         }
 739 
 740                         start2 = start3;
 741                         ref2 = ref3;
 742                         ml2 = ml3;
 743                         goto _Search3;
 744                 }
 745 
 746                 /*
 747                  * OK, now we have 3 ascending matches; let's write at least
 748                  * the first one
 749                  * ip & ref are known; Now for ml
 750                  */
 751                 if (start2 < ip+ml) {
 752                         if ((start2 - ip) < (int)ML_MASK) {
 753                                 int correction;
 754                                 if (ml > OPTIMAL_ML)
 755                                         ml = OPTIMAL_ML;
 756                                 if (ip + ml > start2 + ml2 - MINMATCH)
 757                                         ml = (int)(start2 - ip) + ml2 -
 758                                             MINMATCH;
 759                                 correction = ml - (int)(start2 - ip);
 760                                 if (correction > 0) {
 761                                         start2 += correction;
 762                                         ref2 += correction;
 763                                         ml2 -= correction;
 764                                 }
 765                         } else {
 766                                 ml = (int)(start2 - ip);
 767                         }
 768                 }
 769                 if unlikely(!LZ4_encodeSequence(&ip, &op, oend, &anchor, ml,
 770                     ref))
 771                         return (0);
 772 
 773                 ip = start2;
 774                 ref = ref2;
 775                 ml = ml2;
 776 
 777                 start2 = start3;
 778                 ref2 = ref3;
 779                 ml2 = ml3;
 780 
 781                 goto _Search3;
 782 
 783         }
 784 
 785         /* Encode Last Literals */
 786         {
 787                 int lastRun = (int)(iend - anchor);
 788                 if (op + lastRun + 1 + ((lastRun + 255 - RUN_MASK) / 255) >
 789                     oend)
 790                         return (0);
 791                 if (lastRun >= (int) RUN_MASK) {
 792                         *op++ = (RUN_MASK << ML_BITS);
 793                         lastRun -= RUN_MASK;
 794                         for (; lastRun > 254; lastRun -= 255) {
 795                                 *op++ = 255;
 796                         }
 797                         *op++ = (BYTE) lastRun;
 798                 } else
 799                         *op++ = (lastRun << ML_BITS);
 800                 (void) memcpy(op, anchor, iend - anchor);
 801                 op += iend - anchor;
 802         }
 803 
 804         /* End */
 805         return (int) (((char *) op) - dest);
 806 }
 807 
 808 int
 809 zfs_LZ4_compressHC(const char *source, char *dest, int isize, int osize)
 810 {
 811         LZ4HC_Data_Structure *ctx = LZ4HC_Create((const BYTE*) source);
 812         int result = LZ4_compressHCCtx(ctx, source, dest, isize, osize);
 813         LZ4HC_Free(&ctx);
 814 
 815         return (result);
 816 }