Print this page
4896 Performance improvements for KCF AES modes

Split Close
Expand all
Collapse all
          --- old/usr/src/common/crypto/modes/cbc.c
          +++ new/usr/src/common/crypto/modes/cbc.c
↓ open down ↓ 14 lines elided ↑ open up ↑
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
       25 +/*
       26 + * Copyright 2015 by Saso Kiselkov. All rights reserved.
       27 + */
  25   28  
  26   29  #ifndef _KERNEL
  27   30  #include <strings.h>
  28   31  #include <limits.h>
  29   32  #include <assert.h>
  30   33  #include <security/cryptoki.h>
  31   34  #endif
  32   35  
  33   36  #include <sys/types.h>
       37 +#define INLINE_CRYPTO_GET_PTRS
  34   38  #include <modes/modes.h>
  35   39  #include <sys/crypto/common.h>
  36   40  #include <sys/crypto/impl.h>
  37   41  
       42 +boolean_t cbc_fastpath_enabled = B_TRUE;
       43 +
       44 +static void
       45 +cbc_decrypt_fastpath(cbc_ctx_t *ctx, const uint8_t *data, size_t length,
       46 +    uint8_t *out, size_t block_size,
       47 +    int (*decrypt)(const void *, const uint8_t *, uint8_t *),
       48 +    int (*decrypt_ecb)(const void *, const uint8_t *, uint8_t *, uint64_t),
       49 +    void (*xor_block)(const uint8_t *, uint8_t *),
       50 +    void (*xor_block_range)(const uint8_t *, uint8_t *, uint64_t))
       51 +{
       52 +        const uint8_t *iv = (uint8_t *)ctx->cbc_iv;
       53 +
       54 +        /* Use bulk decryption when available. */
       55 +        if (decrypt_ecb != NULL) {
       56 +                decrypt_ecb(ctx->cbc_keysched, data, out, length);
       57 +        } else {
       58 +                for (size_t i = 0; i < length; i += block_size)
       59 +                        decrypt(ctx->cbc_keysched, &data[i], &out[i]);
       60 +        }
       61 +
       62 +        /* Use bulk XOR when available. */
       63 +        if (xor_block_range != NULL && length >= 2 * block_size) {
       64 +                xor_block(iv, out);
       65 +                xor_block_range(data, &out[block_size], length - block_size);
       66 +        } else {
       67 +                for (size_t i = 0; i < length; i += block_size) {
       68 +                        xor_block(iv, &out[i]);
       69 +                        iv = &data[i];
       70 +                }
       71 +        }
       72 +}
       73 +
  38   74  /*
  39   75   * Algorithm independent CBC functions.
  40   76   */
  41   77  int
  42   78  cbc_encrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length,
  43   79      crypto_data_t *out, size_t block_size,
  44   80      int (*encrypt)(const void *, const uint8_t *, uint8_t *),
  45      -    void (*copy_block)(uint8_t *, uint8_t *),
  46      -    void (*xor_block)(uint8_t *, uint8_t *))
       81 +    void (*copy_block)(const uint8_t *, uint8_t *),
       82 +    void (*xor_block)(const uint8_t *, uint8_t *),
       83 +    int (*encrypt_cbc)(const void *, const uint8_t *, uint8_t *,
       84 +    const uint8_t *, uint64_t))
  47   85  {
  48   86          size_t remainder = length;
  49   87          size_t need;
  50   88          uint8_t *datap = (uint8_t *)data;
  51   89          uint8_t *blockp;
  52   90          uint8_t *lastp;
  53   91          void *iov_or_mp;
  54   92          offset_t offset;
  55   93          uint8_t *out_data_1;
  56   94          uint8_t *out_data_2;
  57   95          size_t out_data_1_len;
  58   96  
       97 +        /*
       98 +         * CBC encryption fastpath requirements:
       99 +         * - fastpath is enabled
      100 +         * - algorithm-specific acceleration function is available
      101 +         * - input is block-aligned
      102 +         * - output is a single contiguous region or the user requested that
      103 +         *   we overwrite their input buffer (input/output aliasing allowed)
      104 +         */
      105 +        if (cbc_fastpath_enabled && encrypt_cbc != NULL && length != 0 &&
      106 +            ctx->cbc_remainder_len == 0 && (length & (block_size - 1)) == 0 &&
      107 +            (out == NULL || CRYPTO_DATA_IS_SINGLE_BLOCK(out))) {
      108 +                if (out == NULL) {
      109 +                        encrypt_cbc(ctx->cbc_keysched, (uint8_t *)data,
      110 +                            (uint8_t *)data, (uint8_t *)ctx->cbc_iv, length);
      111 +                        ctx->cbc_lastp = (uint8_t *)&data[length - block_size];
      112 +                } else {
      113 +                        uint8_t *outp = CRYPTO_DATA_FIRST_BLOCK(out);
      114 +                        encrypt_cbc(ctx->cbc_keysched, (uint8_t *)data, outp,
      115 +                            (uint8_t *)ctx->cbc_iv, length);
      116 +                        out->cd_offset += length;
      117 +                        ctx->cbc_lastp = &outp[length - block_size];
      118 +                }
      119 +                goto out;
      120 +        }
      121 +
  59  122          if (length + ctx->cbc_remainder_len < block_size) {
  60  123                  /* accumulate bytes here and return */
  61  124                  bcopy(datap,
  62  125                      (uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len,
  63  126                      length);
  64  127                  ctx->cbc_remainder_len += length;
  65  128                  ctx->cbc_copy_to = datap;
  66  129                  return (CRYPTO_SUCCESS);
  67  130          }
  68  131  
↓ open down ↓ 93 lines elided ↑ open up ↑
 162  225  }
 163  226  
 164  227  #define OTHER(a, ctx) \
 165  228          (((a) == (ctx)->cbc_lastblock) ? (ctx)->cbc_iv : (ctx)->cbc_lastblock)
 166  229  
 167  230  /* ARGSUSED */
 168  231  int
 169  232  cbc_decrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length,
 170  233      crypto_data_t *out, size_t block_size,
 171  234      int (*decrypt)(const void *, const uint8_t *, uint8_t *),
 172      -    void (*copy_block)(uint8_t *, uint8_t *),
 173      -    void (*xor_block)(uint8_t *, uint8_t *))
      235 +    void (*copy_block)(const uint8_t *, uint8_t *),
      236 +    void (*xor_block)(const uint8_t *, uint8_t *),
      237 +    int (*decrypt_ecb)(const void *, const uint8_t *, uint8_t *, uint64_t),
      238 +    void (*xor_block_range)(const uint8_t *, uint8_t *, uint64_t))
 174  239  {
 175  240          size_t remainder = length;
 176  241          size_t need;
 177  242          uint8_t *datap = (uint8_t *)data;
 178  243          uint8_t *blockp;
 179  244          uint8_t *lastp;
 180  245          void *iov_or_mp;
 181  246          offset_t offset;
 182  247          uint8_t *out_data_1;
 183  248          uint8_t *out_data_2;
 184  249          size_t out_data_1_len;
 185  250  
      251 +        /*
      252 +         * CBC decryption fastpath requirements:
      253 +         * - fastpath is enabled
      254 +         * - input is block-aligned
      255 +         * - output is a single contiguous region and doesn't alias input
      256 +         */
      257 +        if (cbc_fastpath_enabled && ctx->cbc_remainder_len == 0 &&
      258 +            length != 0 && (length & (block_size - 1)) == 0 &&
      259 +            CRYPTO_DATA_IS_SINGLE_BLOCK(out)) {
      260 +                uint8_t *outp = CRYPTO_DATA_FIRST_BLOCK(out);
      261 +
      262 +                cbc_decrypt_fastpath(ctx, (uint8_t *)data, length, outp,
      263 +                    block_size, decrypt, decrypt_ecb, xor_block,
      264 +                    xor_block_range);
      265 +                out->cd_offset += length;
      266 +                bcopy(&data[length - block_size], ctx->cbc_iv, block_size);
      267 +                ctx->cbc_lastp = (uint8_t *)ctx->cbc_iv;
      268 +                return (CRYPTO_SUCCESS);
      269 +        }
      270 +
 186  271          if (length + ctx->cbc_remainder_len < block_size) {
 187  272                  /* accumulate bytes here and return */
 188  273                  bcopy(datap,
 189  274                      (uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len,
 190  275                      length);
 191  276                  ctx->cbc_remainder_len += length;
 192  277                  ctx->cbc_copy_to = datap;
 193  278                  return (CRYPTO_SUCCESS);
 194  279          }
 195  280  
↓ open down ↓ 77 lines elided ↑ open up ↑
 273  358                  ctx->cbc_copy_to = NULL;
 274  359  
 275  360          } while (remainder > 0);
 276  361  
 277  362          ctx->cbc_lastp = lastp;
 278  363          return (CRYPTO_SUCCESS);
 279  364  }
 280  365  
 281  366  int
 282  367  cbc_init_ctx(cbc_ctx_t *cbc_ctx, char *param, size_t param_len,
 283      -    size_t block_size, void (*copy_block)(uint8_t *, uint64_t *))
      368 +    size_t block_size, void (*copy_block)(const uint8_t *, uint64_t *))
 284  369  {
 285  370          /*
 286  371           * Copy IV into context.
 287  372           *
 288  373           * If cm_param == NULL then the IV comes from the
 289  374           * cd_miscdata field in the crypto_data structure.
 290  375           */
 291  376          if (param != NULL) {
 292  377  #ifdef _KERNEL
 293  378                  ASSERT(param_len == block_size);
↓ open down ↓ 27 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX