Print this page
Integrated r91 LZ4.

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/lz4.c
          +++ new/usr/src/uts/common/fs/zfs/lz4.c
↓ open down ↓ 22 lines elided ↑ open up ↑
  23   23   * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  24   24   * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  25   25   * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  26   26   * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  27   27   * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28   28   * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29   29   *
  30   30   * You can contact the author at :
  31   31   * - LZ4 homepage : http://fastcompression.blogspot.com/p/lz4.html
  32   32   * - LZ4 source repository : http://code.google.com/p/lz4/
       33 + * Upstream release : r91
  33   34   */
  34   35  
  35   36  #include <sys/zfs_context.h>
  36   37  
  37   38  static int real_LZ4_compress(const char *source, char *dest, int isize,
  38   39      int osize);
  39      -static int real_LZ4_uncompress(const char *source, char *dest, int osize);
  40   40  static int LZ4_compressBound(int isize);
  41   41  static int LZ4_uncompress_unknownOutputSize(const char *source, char *dest,
  42   42      int isize, int maxOutputSize);
  43   43  static int LZ4_compressCtx(void *ctx, const char *source, char *dest,
  44   44      int isize, int osize);
  45   45  static int LZ4_compress64kCtx(void *ctx, const char *source, char *dest,
  46   46      int isize, int osize);
  47   47  
  48   48  /*ARGSUSED*/
  49   49  size_t
↓ open down ↓ 47 lines elided ↑ open up ↑
  97   97   * Simple Functions:
  98   98   * real_LZ4_compress() :
  99   99   *      isize  : is the input size. Max supported value is ~1.9GB
 100  100   *      return : the number of bytes written in buffer dest
 101  101   *               or 0 if the compression fails (if LZ4_COMPRESSMIN is set).
 102  102   *      note : destination buffer must be already allocated.
 103  103   *              destination buffer must be sized to handle worst cases
 104  104   *              situations (input data not compressible) worst case size
 105  105   *              evaluation is provided by function LZ4_compressBound().
 106  106   *
 107      - * real_LZ4_uncompress() :
 108      - *      osize  : is the output size, therefore the original size
 109      - *      return : the number of bytes read in the source buffer.
 110      - *              If the source stream is malformed, the function will stop
 111      - *              decoding and return a negative result, indicating the byte
 112      - *              position of the faulty instruction. This function never
 113      - *              writes beyond dest + osize, and is therefore protected
 114      - *              against malicious data packets.
 115      - *      note : destination buffer must be already allocated
 116      - *
 117  107   * Advanced Functions
 118  108   *
 119  109   * LZ4_compressBound() :
 120  110   *      Provides the maximum size that LZ4 may output in a "worst case"
 121  111   *      scenario (input data not compressible) primarily useful for memory
 122  112   *      allocation of output buffer.
 123  113   *
 124  114   *      isize  : is the input size. Max supported value is ~1.9GB
 125  115   *      return : maximum output size in a "worst case" scenario
 126  116   *      note : this function is limited by "int" range (2^31-1)
↓ open down ↓ 3 lines elided ↑ open up ↑
 130  120   *      maxOutputSize : is the size of the destination buffer (which must be
 131  121   *              already allocated)
 132  122   *      return : the number of bytes decoded in the destination buffer
 133  123   *              (necessarily <= maxOutputSize). If the source stream is
 134  124   *              malformed, the function will stop decoding and return a
 135  125   *              negative result, indicating the byte position of the faulty
 136  126   *              instruction. This function never writes beyond dest +
 137  127   *              maxOutputSize, and is therefore protected against malicious
 138  128   *              data packets.
 139  129   *      note   : Destination buffer must be already allocated.
 140      - *              This version is slightly slower than real_LZ4_uncompress()
 141  130   *
 142  131   * LZ4_compressCtx() :
 143  132   *      This function explicitly handles the CTX memory structure.
 144  133   *
 145  134   *      ILLUMOS CHANGES: the CTX memory structure must be explicitly allocated
 146  135   *      by the caller (either on the stack or using kmem_zalloc). Passing NULL
 147  136   *      isn't valid.
 148  137   *
 149  138   * LZ4_compress64kCtx() :
 150  139   *      Same as LZ4_compressCtx(), but specific to small inputs (<64KB).
↓ open down ↓ 374 lines elided ↑ open up ↑
 525  514          const BYTE *ip = (BYTE *) source;
 526  515          INITBASE(base);
 527  516          const BYTE *anchor = ip;
 528  517          const BYTE *const iend = ip + isize;
 529  518          const BYTE *const oend = (BYTE *) dest + osize;
 530  519          const BYTE *const mflimit = iend - MFLIMIT;
 531  520  #define matchlimit (iend - LASTLITERALS)
 532  521  
 533  522          BYTE *op = (BYTE *) dest;
 534  523  
 535      -        int len, length;
      524 +        int length;
 536  525          const int skipStrength = SKIPSTRENGTH;
 537  526          U32 forwardH;
 538  527  
 539  528  
 540  529          /* Init */
 541  530          if (isize < MINLENGTH)
 542  531                  goto _last_literals;
 543  532  
 544  533          /* First Byte */
 545  534          HashTable[LZ4_HASH_VALUE(ip)] = ip - base;
↓ open down ↓ 34 lines elided ↑ open up ↑
 580  569                  /* Encode Literal length */
 581  570                  length = ip - anchor;
 582  571                  token = op++;
 583  572  
 584  573                  /* Check output limit */
 585  574                  if unlikely(op + length + (2 + 1 + LASTLITERALS) +
 586  575                      (length >> 8) > oend)
 587  576                          return (0);
 588  577  
 589  578                  if (length >= (int)RUN_MASK) {
      579 +                        int len;
 590  580                          *token = (RUN_MASK << ML_BITS);
 591  581                          len = length - RUN_MASK;
 592  582                          for (; len > 254; len -= 255)
 593  583                                  *op++ = 255;
 594  584                          *op++ = (BYTE)len;
 595  585                  } else
 596  586                          *token = (length << ML_BITS);
 597  587  
 598  588                  /* Copy Literals */
 599  589                  LZ4_BLINDCOPY(anchor, op, length);
 600  590  
 601  591                  _next_match:
 602  592                  /* Encode Offset */
 603  593                  LZ4_WRITE_LITTLEENDIAN_16(op, ip - ref);
 604  594  
 605  595                  /* Start Counting */
 606  596                  ip += MINMATCH;
 607      -                ref += MINMATCH;        /* MinMatch verified */
      597 +                ref += MINMATCH;        /* MinMatch already verified */
 608  598                  anchor = ip;
 609  599                  while likely(ip < matchlimit - (STEPSIZE - 1)) {
 610  600                          UARCH diff = AARCH(ref) ^ AARCH(ip);
 611  601                          if (!diff) {
 612  602                                  ip += STEPSIZE;
 613  603                                  ref += STEPSIZE;
 614  604                                  continue;
 615  605                          }
 616  606                          ip += LZ4_NbCommonBytes(diff);
 617  607                          goto _endCount;
↓ open down ↓ 6 lines elided ↑ open up ↑
 624  614  #endif
 625  615                  if ((ip < (matchlimit - 1)) && (A16(ref) == A16(ip))) {
 626  616                          ip += 2;
 627  617                          ref += 2;
 628  618                  }
 629  619                  if ((ip < matchlimit) && (*ref == *ip))
 630  620                          ip++;
 631  621                  _endCount:
 632  622  
 633  623                  /* Encode MatchLength */
 634      -                len = (ip - anchor);
      624 +                length = (int)(ip - anchor);
 635  625                  /* Check output limit */
 636      -                if unlikely(op + (1 + LASTLITERALS) + (len >> 8) > oend)
      626 +                if unlikely(op + (1 + LASTLITERALS) + (length >> 8) > oend)
 637  627                          return (0);
 638      -                if (len >= (int)ML_MASK) {
      628 +                if (length >= (int)ML_MASK) {
 639  629                          *token += ML_MASK;
 640      -                        len -= ML_MASK;
 641      -                        for (; len > 509; len -= 510) {
      630 +                        length -= ML_MASK;
      631 +                        for (; length > 509; length -= 510) {
 642  632                                  *op++ = 255;
 643  633                                  *op++ = 255;
 644  634                          }
 645      -                        if (len > 254) {
 646      -                                len -= 255;
      635 +                        if (length > 254) {
      636 +                                length -= 255;
 647  637                                  *op++ = 255;
 648  638                          }
 649      -                        *op++ = (BYTE)len;
      639 +                        *op++ = (BYTE)length;
 650  640                  } else
 651      -                        *token += len;
      641 +                        *token += length;
 652  642  
 653  643                  /* Test end of chunk */
 654  644                  if (ip > mflimit) {
 655  645                          anchor = ip;
 656  646                          break;
 657  647                  }
 658  648                  /* Fill table */
 659  649                  HashTable[LZ4_HASH_VALUE(ip - 2)] = ip - 2 - base;
 660  650  
 661  651                  /* Test next position */
↓ open down ↓ 248 lines elided ↑ open up ↑
 910  900          return (result);
 911  901  #else
 912  902          if (isize < (int)LZ4_64KLIMIT)
 913  903                  return (LZ4_compress64kCtx(NULL, source, dest, isize, osize));
 914  904          return (LZ4_compressCtx(NULL, source, dest, isize, osize));
 915  905  #endif
 916  906  }
 917  907  
 918  908  /* Decompression functions */
 919  909  
 920      -/*
 921      - * Note: The decoding functions real_LZ4_uncompress() and
 922      - *      LZ4_uncompress_unknownOutputSize() are safe against "buffer overflow"
 923      - *      attack type. They will never write nor read outside of the provided
 924      - *      output buffers. LZ4_uncompress_unknownOutputSize() also insures that
 925      - *      it will never read outside of the input buffer. A corrupted input
 926      - *      will produce an error result, a negative int, indicating the position
 927      - *      of the error within input stream.
 928      - */
 929      -
 930  910  static int
 931      -real_LZ4_uncompress(const char *source, char *dest, int osize)
 932      -{
 933      -        /* Local Variables */
 934      -        const BYTE *restrict ip = (const BYTE *) source;
 935      -        const BYTE *ref;
 936      -
 937      -        BYTE *op = (BYTE *) dest;
 938      -        BYTE *const oend = op + osize;
 939      -        BYTE *cpy;
 940      -
 941      -        unsigned token;
 942      -
 943      -        size_t length;
 944      -        size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
 945      -#if LZ4_ARCH64
 946      -        size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
 947      -#endif
 948      -
 949      -        /* Main Loop */
 950      -        for (;;) {
 951      -                /* get runlength */
 952      -                token = *ip++;
 953      -                if ((length = (token >> ML_BITS)) == RUN_MASK) {
 954      -                        size_t len;
 955      -                        for (; (len = *ip++) == 255; length += 255) {
 956      -                        }
 957      -                        length += len;
 958      -                }
 959      -                /* copy literals */
 960      -                cpy = op + length;
 961      -                if unlikely(cpy > oend - COPYLENGTH) {
 962      -                        if (cpy != oend)
 963      -                                /* Error: we must necessarily stand at EOF */
 964      -                                goto _output_error;
 965      -                        (void) memcpy(op, ip, length);
 966      -                        ip += length;
 967      -                        break;  /* EOF */
 968      -                        }
 969      -                LZ4_WILDCOPY(ip, op, cpy);
 970      -                ip -= (op - cpy);
 971      -                op = cpy;
 972      -
 973      -                /* get offset */
 974      -                LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
 975      -                ip += 2;
 976      -                if unlikely(ref < (BYTE * const) dest)
 977      -                        /*
 978      -                         * Error: offset create reference outside destination
 979      -                         * buffer
 980      -                         */
 981      -                        goto _output_error;
 982      -
 983      -                /* get matchlength */
 984      -                if ((length = (token & ML_MASK)) == ML_MASK) {
 985      -                        for (; *ip == 255; length += 255) {
 986      -                                ip++;
 987      -                        }
 988      -                        length += *ip++;
 989      -                }
 990      -                /* copy repeated sequence */
 991      -                if unlikely(op - ref < STEPSIZE) {
 992      -#if LZ4_ARCH64
 993      -                        size_t dec64 = dec64table[op-ref];
 994      -#else
 995      -                        const int dec64 = 0;
 996      -#endif
 997      -                        op[0] = ref[0];
 998      -                        op[1] = ref[1];
 999      -                        op[2] = ref[2];
1000      -                        op[3] = ref[3];
1001      -                        op += 4;
1002      -                        ref += 4;
1003      -                        ref -= dec32table[op-ref];
1004      -                        A32(op) = A32(ref);
1005      -                        op += STEPSIZE - 4;
1006      -                        ref -= dec64;
1007      -                } else {
1008      -                        LZ4_COPYSTEP(ref, op);
1009      -                }
1010      -                cpy = op + length - (STEPSIZE - 4);
1011      -                if (cpy > oend - COPYLENGTH) {
1012      -                        if (cpy > oend)
1013      -                                /*
1014      -                                 * Error: request to write beyond destination
1015      -                                 * buffer
1016      -                                 */
1017      -                                goto _output_error;
1018      -                        LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1019      -                        while (op < cpy)
1020      -                                *op++ = *ref++;
1021      -                        op = cpy;
1022      -                        if (op == oend)
1023      -                                /*
1024      -                                 * Check EOF (should never happen, since last
1025      -                                 * 5 bytes are supposed to be literals)
1026      -                                 */
1027      -                                goto _output_error;
1028      -                        continue;
1029      -                }
1030      -                LZ4_SECURECOPY(ref, op, cpy);
1031      -                op = cpy;       /* correction */
1032      -        }
1033      -
1034      -        /* end of decoding */
1035      -        return (int)(((char *)ip) - source);
1036      -
1037      -        /* write overflow error detected */
1038      -        _output_error:
1039      -        return (int)(-(((char *)ip) - source));
1040      -}
1041      -
1042      -static int
1043  911  LZ4_uncompress_unknownOutputSize(const char *source, char *dest, int isize,
1044  912      int maxOutputSize)
1045  913  {
1046  914          /* Local Variables */
1047  915          const BYTE *restrict ip = (const BYTE *) source;
1048  916          const BYTE *const iend = ip + isize;
1049  917          const BYTE *ref;
1050  918  
1051  919          BYTE *op = (BYTE *) dest;
1052  920          BYTE *const oend = op + maxOutputSize;
1053  921          BYTE *cpy;
1054  922  
1055  923          size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0};
1056  924  #if LZ4_ARCH64
1057  925          size_t dec64table[] = {0, 0, 0, (size_t)-1, 0, 1, 2, 3};
1058  926  #endif
1059  927  
      928 +        /*
      929 +         * Special case
      930 +         * A correctly formed null-compressed LZ4 must have at least
      931 +         * one byte (token=0)
      932 +         */
      933 +        if (unlikely(ip == iend))
      934 +                goto _output_error;
      935 +
1060  936          /* Main Loop */
1061      -        while (ip < iend) {
      937 +        /*LINTED E_CONSTANT_CONDITION*/
      938 +        while (1) {
1062  939                  unsigned token;
1063  940                  size_t length;
1064  941  
1065  942                  /* get runlength */
1066  943                  token = *ip++;
1067  944                  if ((length = (token >> ML_BITS)) == RUN_MASK) {
1068  945                          int s = 255;
1069  946                          while ((ip < iend) && (s == 255)) {
1070  947                                  s = *ip++;
1071  948                                  length += s;
1072  949                          }
1073  950                  }
1074  951                  /* copy literals */
1075  952                  cpy = op + length;
1076      -                if ((cpy > oend - COPYLENGTH) ||
1077      -                    (ip + length > iend - COPYLENGTH)) {
      953 +                if ((cpy > oend - MFLIMIT) ||
      954 +                    (ip + length > iend - (2 + 1 + LASTLITERALS))) {
1078  955                          if (cpy > oend)
1079  956                                  /* Error: writes beyond output buffer */
1080  957                                  goto _output_error;
1081  958                          if (ip + length != iend)
1082  959                                  /*
1083  960                                   * Error: LZ4 format requires to consume all
1084      -                                 * input at this stage
      961 +                                 * input at this stage (no match within the
      962 +                                 * last 11 bytes, and at least 8 remaining
      963 +                                 * input bytes for another match + literals
1085  964                                   */
1086  965                                  goto _output_error;
1087  966                          (void) memcpy(op, ip, length);
1088  967                          op += length;
1089  968                          /* Necessarily EOF, due to parsing restrictions */
1090  969                          break;
1091  970                  }
1092  971                  LZ4_WILDCOPY(ip, op, cpy);
1093  972                  ip -= (op - cpy);
1094  973                  op = cpy;
1095  974  
1096  975                  /* get offset */
1097  976                  LZ4_READ_LITTLEENDIAN_16(ref, cpy, ip);
1098  977                  ip += 2;
1099      -                if (ref < (BYTE * const) dest)
      978 +                if (unlikely(ref < (BYTE * const) dest))
1100  979                          /*
1101  980                           * Error: offset creates reference outside of
1102  981                           * destination buffer
1103  982                           */
1104  983                          goto _output_error;
1105  984  
1106  985                  /* get matchlength */
1107  986                  if ((length = (token & ML_MASK)) == ML_MASK) {
1108      -                        while (ip < iend) {
      987 +                        while (likely(ip < iend - (LASTLITERALS + 1))) {
1109  988                                  int s = *ip++;
1110  989                                  length += s;
1111  990                                  if (s == 255)
1112  991                                          continue;
1113  992                                  break;
1114  993                          }
1115  994                  }
1116  995                  /* copy repeated sequence */
1117  996                  if unlikely(op - ref < STEPSIZE) {
1118  997  #if LZ4_ARCH64
↓ open down ↓ 8 lines elided ↑ open up ↑
1127 1006                          op += 4;
1128 1007                          ref += 4;
1129 1008                          ref -= dec32table[op-ref];
1130 1009                          A32(op) = A32(ref);
1131 1010                          op += STEPSIZE - 4;
1132 1011                          ref -= dec64;
1133 1012                  } else {
1134 1013                          LZ4_COPYSTEP(ref, op);
1135 1014                  }
1136 1015                  cpy = op + length - (STEPSIZE - 4);
1137      -                if (cpy > oend - COPYLENGTH) {
1138      -                        if (cpy > oend)
     1016 +                if (unlikely(cpy > oend - (COPYLENGTH + (STEPSIZE - 4)))) {
     1017 +                        if (cpy > oend - LASTLITERALS)
1139 1018                                  /*
1140      -                                 * Error: request to write outside of
1141      -                                 * destination buffer
     1019 +                                 * Error: last 5 bytes must be literals
1142 1020                                   */
1143 1021                                  goto _output_error;
1144 1022                          LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH));
1145 1023                          while (op < cpy)
1146 1024                                  *op++ = *ref++;
1147 1025                          op = cpy;
1148 1026                          if (op == oend)
1149 1027                                  /*
1150 1028                                   * Check EOF (should never happen, since
1151 1029                                   * last 5 bytes are supposed to be literals)
1152 1030                                   */
1153 1031                                  goto _output_error;
1154 1032                          continue;
1155 1033                  }
1156      -                LZ4_SECURECOPY(ref, op, cpy);
     1034 +                LZ4_WILDCOPY(ref, op, cpy);
1157 1035                  op = cpy;       /* correction */
1158 1036          }
1159 1037  
1160 1038          /* end of decoding */
1161 1039          return (int)(((char *)op) - dest);
1162 1040  
1163 1041          /* write overflow error detected */
1164 1042          _output_error:
1165 1043          return (int)(-(((char *)ip) - source));
1166 1044  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX