Print this page
4896 Performance improvements for KCF AES modes

Split Close
Expand all
Collapse all
          --- old/usr/src/common/crypto/modes/gcm.c
          +++ new/usr/src/common/crypto/modes/gcm.c
↓ open down ↓ 12 lines elided ↑ open up ↑
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
       23 + * Copyright 2015 by Saso Kiselkov. All rights reserved.
  23   24   */
  24   25  
  25   26  
  26   27  #ifndef _KERNEL
  27   28  #include <strings.h>
  28   29  #include <limits.h>
  29   30  #include <assert.h>
  30   31  #include <security/cryptoki.h>
  31   32  #endif  /* _KERNEL */
  32   33  
  33      -
       34 +#include <sys/cmn_err.h>
  34   35  #include <sys/types.h>
  35   36  #include <sys/kmem.h>
       37 +#define INLINE_CRYPTO_GET_PTRS
  36   38  #include <modes/modes.h>
  37   39  #include <sys/crypto/common.h>
  38   40  #include <sys/crypto/impl.h>
  39   41  #include <sys/byteorder.h>
  40   42  
       43 +#define COUNTER_MASK    0x00000000ffffffffULL
       44 +
       45 +#ifdef  _KERNEL
       46 +#include <sys/sdt.h>            /* SET_ERROR */
       47 +#endif  /* _KERNEL */
       48 +
  41   49  #ifdef __amd64
  42   50  
  43   51  #ifdef _KERNEL
  44   52  #include <sys/cpuvar.h>         /* cpu_t, CPU */
  45   53  #include <sys/x86_archext.h>    /* x86_featureset, X86FSET_*, CPUID_* */
  46   54  #include <sys/disp.h>           /* kpreempt_disable(), kpreempt_enable */
  47   55  /* Workaround for no XMM kernel thread save/restore */
  48      -#define KPREEMPT_DISABLE        kpreempt_disable()
  49      -#define KPREEMPT_ENABLE         kpreempt_enable()
       56 +extern void gcm_accel_save(void *savestate);
       57 +extern void gcm_accel_restore(void *savestate);
  50   58  
       59 +#if     defined(lint) || defined(__lint)
       60 +#define GCM_ACCEL_SAVESTATE(name)       uint8_t name[16 * 16 + 8]
  51   61  #else
       62 +#define GCM_ACCEL_SAVESTATE(name) \
       63 +        /* stack space for xmm0--xmm15 and cr0 (16 x 128 bits + 64 bits) */ \
       64 +        uint8_t name[16 * 16 + 8] __attribute__((aligned(16)))
       65 +#endif
       66 +
       67 +/*
       68 + * Disables kernel thread preemption and conditionally gcm_accel_save() iff
       69 + * Intel PCLMULQDQ support is present. Must be balanced by GCM_ACCEL_EXIT.
       70 + * This must be present in all externally callable GCM functions which
       71 + * invoke GHASH operations using FPU-accelerated implementations, or call
       72 + * static functions which do (such as gcm_encrypt_fastpath128()).
       73 + */
       74 +#define GCM_ACCEL_ENTER \
       75 +        GCM_ACCEL_SAVESTATE(savestate); \
       76 +        do { \
       77 +                if (intel_pclmulqdq_instruction_present()) { \
       78 +                        kpreempt_disable(); \
       79 +                        gcm_accel_save(savestate); \
       80 +                } \
       81 +                _NOTE(CONSTCOND) \
       82 +        } while (0)
       83 +#define GCM_ACCEL_EXIT \
       84 +        do { \
       85 +                if (intel_pclmulqdq_instruction_present()) { \
       86 +                        gcm_accel_restore(savestate); \
       87 +                        kpreempt_enable(); \
       88 +                } \
       89 +                _NOTE(CONSTCOND) \
       90 +        } while (0)
       91 +
       92 +#else   /* _KERNEL */
  52   93  #include <sys/auxv.h>           /* getisax() */
  53   94  #include <sys/auxv_386.h>       /* AV_386_PCLMULQDQ bit */
  54      -#define KPREEMPT_DISABLE
  55      -#define KPREEMPT_ENABLE
       95 +#define SET_ERROR(x)    (x)
  56   96  #endif  /* _KERNEL */
  57   97  
  58   98  extern void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res);
  59      -static int intel_pclmulqdq_instruction_present(void);
  60      -#endif  /* __amd64 */
       99 +extern void gcm_init_clmul(const uint64_t hash_init[2], uint8_t Htable[256]);
      100 +extern void gcm_ghash_clmul(uint64_t ghash[2], const uint8_t Htable[256],
      101 +    const uint8_t *inp, size_t length);
      102 +static inline int intel_pclmulqdq_instruction_present(void);
      103 +#else   /* !__amd64 */
      104 +#define GCM_ACCEL_ENTER
      105 +#define GCM_ACCEL_EXIT
      106 +#endif  /* !__amd64 */
  61  107  
  62  108  struct aes_block {
  63  109          uint64_t a;
  64  110          uint64_t b;
  65  111  };
  66  112  
  67  113  
  68  114  /*
  69  115   * gcm_mul()
  70  116   * Perform a carry-less multiplication (that is, use XOR instead of the
  71  117   * multiply operator) on *x_in and *y and place the result in *res.
  72  118   *
  73  119   * Byte swap the input (*x_in and *y) and the output (*res).
  74  120   *
  75  121   * Note: x_in, y, and res all point to 16-byte numbers (an array of two
  76  122   * 64-bit integers).
  77  123   */
  78      -void
      124 +static inline void
  79  125  gcm_mul(uint64_t *x_in, uint64_t *y, uint64_t *res)
  80  126  {
  81  127  #ifdef __amd64
  82  128          if (intel_pclmulqdq_instruction_present()) {
  83      -                KPREEMPT_DISABLE;
      129 +                /*
      130 +                 * FPU context will have been saved and kernel thread
      131 +                 * preemption disabled already.
      132 +                 */
  84  133                  gcm_mul_pclmulqdq(x_in, y, res);
  85      -                KPREEMPT_ENABLE;
  86  134          } else
  87  135  #endif  /* __amd64 */
  88  136          {
  89  137                  static const uint64_t R = 0xe100000000000000ULL;
  90  138                  struct aes_block z = {0, 0};
  91  139                  struct aes_block v;
  92  140                  uint64_t x;
  93  141                  int i, j;
  94  142  
  95  143                  v.a = ntohll(y[0]);
↓ open down ↓ 13 lines elided ↑ open up ↑
 109  157                                          v.b = (v.a << 63)|(v.b >> 1);
 110  158                                          v.a = v.a >> 1;
 111  159                                  }
 112  160                          }
 113  161                  }
 114  162                  res[0] = htonll(z.a);
 115  163                  res[1] = htonll(z.b);
 116  164          }
 117  165  }
 118  166  
 119      -
 120  167  #define GHASH(c, d, t) \
 121      -        xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
 122      -        gcm_mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
 123      -        (uint64_t *)(void *)(t));
      168 +        do { \
      169 +                xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
      170 +                gcm_mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
      171 +                    (uint64_t *)(void *)(t)); \
      172 +                _NOTE(CONSTCOND) \
      173 +        } while (0)
 124  174  
      175 +boolean_t gcm_fastpath_enabled = B_TRUE;
 125  176  
 126      -/*
 127      - * Encrypt multiple blocks of data in GCM mode.  Decrypt for GCM mode
 128      - * is done in another function.
 129      - */
 130      -int
 131      -gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
 132      -    crypto_data_t *out, size_t block_size,
      177 +static void
      178 +gcm_fastpath128(gcm_ctx_t *ctx, const uint8_t *data, size_t length,
      179 +    uint8_t *out, boolean_t encrypt,
 133  180      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 134      -    void (*copy_block)(uint8_t *, uint8_t *),
 135      -    void (*xor_block)(uint8_t *, uint8_t *))
      181 +    void (*xor_block)(const uint8_t *, uint8_t *),
      182 +    int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
      183 +    uint64_t *))
 136  184  {
      185 +        /* When decrypting, `data' holds the ciphertext we need to GHASH. */
      186 +        if (!encrypt) {
      187 +#ifdef  __amd64
      188 +                if (intel_pclmulqdq_instruction_present())
      189 +                        gcm_ghash_clmul(ctx->gcm_ghash, ctx->gcm_H_table,
      190 +                            data, length);
      191 +                else
      192 +#endif  /* __amd64 */
      193 +                        for (size_t i = 0; i < length; i += 16)
      194 +                                GHASH(ctx, &data[i], ctx->gcm_ghash);
      195 +        }
      196 +
      197 +        if (cipher_ctr != NULL) {
      198 +                /*
      199 +                 * GCM is almost but not quite like CTR. GCM increments the
      200 +                 * counter value *before* processing the first input block,
      201 +                 * whereas CTR does so afterwards. So we need to increment
      202 +                 * the counter before calling CTR and decrement it afterwards.
      203 +                 */
      204 +                uint64_t counter = ntohll(ctx->gcm_cb[1]);
      205 +
      206 +                ctx->gcm_cb[1] = htonll((counter & ~COUNTER_MASK) |
      207 +                    ((counter & COUNTER_MASK) + 1));
      208 +                cipher_ctr(ctx->gcm_keysched, data, out, length, ctx->gcm_cb);
      209 +                counter = ntohll(ctx->gcm_cb[1]);
      210 +                ctx->gcm_cb[1] = htonll((counter & ~COUNTER_MASK) |
      211 +                    ((counter & COUNTER_MASK) - 1));
      212 +        } else {
      213 +                uint64_t counter = ntohll(ctx->gcm_cb[1]);
      214 +
      215 +                for (size_t i = 0; i < length; i += 16) {
      216 +                        /*LINTED(E_BAD_PTR_CAST_ALIGN)*/
      217 +                        *(uint64_t *)&out[i] = ctx->gcm_cb[0];
      218 +                        /*LINTED(E_BAD_PTR_CAST_ALIGN)*/
      219 +                        *(uint64_t *)&out[i + 8] = htonll(counter++);
      220 +                        encrypt_block(ctx->gcm_keysched, &out[i], &out[i]);
      221 +                        xor_block(&data[i], &out[i]);
      222 +                }
      223 +
      224 +                ctx->gcm_cb[1] = htonll(counter);
      225 +        }
      226 +
      227 +        /* When encrypting, `out' holds the ciphertext we need to GHASH. */
      228 +        if (encrypt) {
      229 +#ifdef  __amd64
      230 +                if (intel_pclmulqdq_instruction_present())
      231 +                        gcm_ghash_clmul(ctx->gcm_ghash, ctx->gcm_H_table,
      232 +                            out, length);
      233 +                else
      234 +#endif  /* __amd64 */
      235 +                        for (size_t i = 0; i < length; i += 16)
      236 +                                GHASH(ctx, &out[i], ctx->gcm_ghash);
      237 +
      238 +                /* If no more data comes in, the last block is the auth tag. */
      239 +                bcopy(&out[length - 16], ctx->gcm_tmp, 16);
      240 +        }
      241 +
      242 +        ctx->gcm_processed_data_len += length;
      243 +}
      244 +
      245 +static int
      246 +gcm_process_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
      247 +    crypto_data_t *out, size_t block_size, boolean_t encrypt,
      248 +    int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
      249 +    void (*copy_block)(const uint8_t *, uint8_t *),
      250 +    void (*xor_block)(const uint8_t *, uint8_t *),
      251 +    int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
      252 +    uint64_t *))
      253 +{
 137  254          size_t remainder = length;
 138  255          size_t need;
 139  256          uint8_t *datap = (uint8_t *)data;
 140  257          uint8_t *blockp;
 141  258          uint8_t *lastp;
 142  259          void *iov_or_mp;
 143  260          offset_t offset;
 144  261          uint8_t *out_data_1;
 145  262          uint8_t *out_data_2;
 146  263          size_t out_data_1_len;
 147  264          uint64_t counter;
 148  265          uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
      266 +        int rv = CRYPTO_SUCCESS;
 149  267  
      268 +        GCM_ACCEL_ENTER;
      269 +
      270 +        /*
      271 +         * GCM mode fastpath requirements:
      272 +         * - fastpath is enabled
      273 +         * - block size is 128 bits
      274 +         * - input is block-aligned
      275 +         * - the counter value won't overflow
      276 +         * - output is a single contiguous region and doesn't alias input
      277 +         */
      278 +        if (gcm_fastpath_enabled && block_size == 16 &&
      279 +            ctx->gcm_remainder_len == 0 && (length & (block_size - 1)) == 0 &&
      280 +            ntohll(ctx->gcm_cb[1] & counter_mask) <= ntohll(counter_mask) -
      281 +            length / block_size && CRYPTO_DATA_IS_SINGLE_BLOCK(out)) {
      282 +                gcm_fastpath128(ctx, (uint8_t *)data, length,
      283 +                    CRYPTO_DATA_FIRST_BLOCK(out), encrypt, encrypt_block,
      284 +                    xor_block, cipher_ctr);
      285 +                out->cd_offset += length;
      286 +                goto out;
      287 +        }
      288 +
 150  289          if (length + ctx->gcm_remainder_len < block_size) {
 151  290                  /* accumulate bytes here and return */
 152  291                  bcopy(datap,
 153  292                      (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
 154  293                      length);
 155  294                  ctx->gcm_remainder_len += length;
 156  295                  ctx->gcm_copy_to = datap;
 157      -                return (CRYPTO_SUCCESS);
      296 +                goto out;
 158  297          }
 159  298  
 160  299          lastp = (uint8_t *)ctx->gcm_cb;
 161  300          if (out != NULL)
 162  301                  crypto_init_ptrs(out, &iov_or_mp, &offset);
 163  302  
 164  303          do {
 165  304                  /* Unprocessed data from last call. */
 166  305                  if (ctx->gcm_remainder_len > 0) {
 167  306                          need = block_size - ctx->gcm_remainder_len;
 168  307  
 169      -                        if (need > remainder)
 170      -                                return (CRYPTO_DATA_LEN_RANGE);
      308 +                        if (need > remainder) {
      309 +                                rv = SET_ERROR(CRYPTO_DATA_LEN_RANGE);
      310 +                                goto out;
      311 +                        }
 171  312  
 172  313                          bcopy(datap, &((uint8_t *)ctx->gcm_remainder)
 173  314                              [ctx->gcm_remainder_len], need);
 174  315  
 175  316                          blockp = (uint8_t *)ctx->gcm_remainder;
 176  317                  } else {
 177  318                          blockp = datap;
 178  319                  }
 179  320  
      321 +                /* add ciphertext to the hash */
      322 +                if (!encrypt)
      323 +                        GHASH(ctx, blockp, ctx->gcm_ghash);
      324 +
 180  325                  /*
 181  326                   * Increment counter. Counter bits are confined
 182  327                   * to the bottom 32 bits of the counter block.
 183  328                   */
 184  329                  counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 185  330                  counter = htonll(counter + 1);
 186  331                  counter &= counter_mask;
 187  332                  ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 188  333  
 189  334                  encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
↓ open down ↓ 24 lines elided ↑ open up ↑
 214  359                                          bcopy(lastp + out_data_1_len,
 215  360                                              out_data_2,
 216  361                                              block_size - out_data_1_len);
 217  362                                  }
 218  363                          }
 219  364                          /* update offset */
 220  365                          out->cd_offset += block_size;
 221  366                  }
 222  367  
 223  368                  /* add ciphertext to the hash */
 224      -                GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
      369 +                if (encrypt)
      370 +                        GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
 225  371  
 226  372                  /* Update pointer to next block of data to be processed. */
 227  373                  if (ctx->gcm_remainder_len != 0) {
 228  374                          datap += need;
 229  375                          ctx->gcm_remainder_len = 0;
 230  376                  } else {
 231  377                          datap += block_size;
 232  378                  }
 233  379  
 234  380                  remainder = (size_t)&data[length] - (size_t)datap;
↓ open down ↓ 2 lines elided ↑ open up ↑
 237  383                  if (remainder > 0 && remainder < block_size) {
 238  384                          bcopy(datap, ctx->gcm_remainder, remainder);
 239  385                          ctx->gcm_remainder_len = remainder;
 240  386                          ctx->gcm_copy_to = datap;
 241  387                          goto out;
 242  388                  }
 243  389                  ctx->gcm_copy_to = NULL;
 244  390  
 245  391          } while (remainder > 0);
 246  392  out:
 247      -        return (CRYPTO_SUCCESS);
      393 +        GCM_ACCEL_EXIT;
      394 +
      395 +        return (rv);
 248  396  }
 249  397  
      398 +
      399 +/*
      400 + * Encrypt multiple blocks of data in GCM mode.  Decrypt for GCM mode
      401 + * is done in another function.
      402 + */
      403 +/*ARGSUSED*/
      404 +int
      405 +gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
      406 +    crypto_data_t *out, size_t block_size,
      407 +    int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
      408 +    void (*copy_block)(const uint8_t *, uint8_t *),
      409 +    void (*xor_block)(const uint8_t *, uint8_t *),
      410 +    int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
      411 +    uint64_t *))
      412 +{
      413 +        return (gcm_process_contiguous_blocks(ctx, data, length, out,
      414 +            block_size, B_TRUE, encrypt_block, copy_block, xor_block,
      415 +            cipher_ctr));
      416 +}
      417 +
 250  418  /* ARGSUSED */
 251  419  int
 252  420  gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
 253  421      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 254      -    void (*copy_block)(uint8_t *, uint8_t *),
 255      -    void (*xor_block)(uint8_t *, uint8_t *))
      422 +    void (*copy_block)(const uint8_t *, uint8_t *),
      423 +    void (*xor_block)(const uint8_t *, uint8_t *))
 256  424  {
 257  425          uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 258  426          uint8_t *ghash, *macp;
 259  427          int i, rv;
 260  428  
 261      -        if (out->cd_length <
 262      -            (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
 263      -                return (CRYPTO_DATA_LEN_RANGE);
      429 +        GCM_ACCEL_ENTER;
      430 +
      431 +        if (out->cd_length < (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
      432 +                rv = CRYPTO_DATA_LEN_RANGE;
      433 +                goto out;
 264  434          }
 265  435  
 266  436          ghash = (uint8_t *)ctx->gcm_ghash;
 267  437  
 268  438          if (ctx->gcm_remainder_len > 0) {
 269  439                  uint64_t counter;
 270  440                  uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp;
 271  441  
 272  442                  /*
 273  443                   * Here is where we deal with data that is not a
↓ open down ↓ 29 lines elided ↑ open up ↑
 303  473          ctx->gcm_len_a_len_c[1] =
 304  474              htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
 305  475          GHASH(ctx, ctx->gcm_len_a_len_c, ghash);
 306  476          encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
 307  477              (uint8_t *)ctx->gcm_J0);
 308  478          xor_block((uint8_t *)ctx->gcm_J0, ghash);
 309  479  
 310  480          if (ctx->gcm_remainder_len > 0) {
 311  481                  rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
 312  482                  if (rv != CRYPTO_SUCCESS)
 313      -                        return (rv);
      483 +                        goto out;
 314  484          }
 315  485          out->cd_offset += ctx->gcm_remainder_len;
 316  486          ctx->gcm_remainder_len = 0;
 317  487          rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
 318  488          if (rv != CRYPTO_SUCCESS)
 319      -                return (rv);
      489 +                goto out;
 320  490          out->cd_offset += ctx->gcm_tag_len;
 321      -
 322      -        return (CRYPTO_SUCCESS);
      491 +out:
      492 +        GCM_ACCEL_EXIT;
      493 +        return (rv);
 323  494  }
 324  495  
 325  496  /*
 326  497   * This will only deal with decrypting the last block of the input that
 327  498   * might not be a multiple of block length.
 328  499   */
      500 +/*ARGSUSED*/
 329  501  static void
 330      -gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index,
      502 +gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, uint8_t *data, size_t length,
      503 +    size_t block_size, crypto_data_t *out,
 331  504      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 332      -    void (*xor_block)(uint8_t *, uint8_t *))
      505 +    void (*xor_block)(const uint8_t *, uint8_t *))
 333  506  {
 334      -        uint8_t *datap, *outp, *counterp;
 335  507          uint64_t counter;
 336  508          uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 337      -        int i;
 338  509  
      510 +        /* padd last block and add to GHASH */
      511 +        bcopy(data, ctx->gcm_tmp, length);
      512 +        bzero(((uint8_t *)ctx->gcm_tmp) + length,
      513 +            sizeof (ctx->gcm_tmp) - length);
      514 +        GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
      515 +
 339  516          /*
 340  517           * Increment counter.
 341      -         * Counter bits are confined to the bottom 32 bits
      518 +         * Counter bits are confined to the bottom 32 bits.
 342  519           */
 343  520          counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 344  521          counter = htonll(counter + 1);
 345  522          counter &= counter_mask;
 346  523          ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 347  524  
 348      -        datap = (uint8_t *)ctx->gcm_remainder;
 349      -        outp = &((ctx->gcm_pt_buf)[index]);
 350      -        counterp = (uint8_t *)ctx->gcm_tmp;
      525 +        encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
      526 +            (uint8_t *)ctx->gcm_tmp);
 351  527  
 352      -        /* authentication tag */
 353      -        bzero((uint8_t *)ctx->gcm_tmp, block_size);
 354      -        bcopy(datap, (uint8_t *)ctx->gcm_tmp, ctx->gcm_remainder_len);
 355      -
 356      -        /* add ciphertext to the hash */
 357      -        GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
 358      -
 359      -        /* decrypt remaining ciphertext */
 360      -        encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp);
 361      -
 362  528          /* XOR with counter block */
 363      -        for (i = 0; i < ctx->gcm_remainder_len; i++) {
 364      -                outp[i] = datap[i] ^ counterp[i];
      529 +        for (size_t i = 0; i < length; i++)
      530 +                ((uint8_t *)ctx->gcm_tmp)[i] ^= data[i];
      531 +
      532 +        if (out != NULL) {
      533 +                (void) crypto_put_output_data((uchar_t *)ctx->gcm_tmp, out,
      534 +                    length);
      535 +                out->cd_offset += length;
      536 +        } else {
      537 +                bcopy(ctx->gcm_tmp, data, length);
 365  538          }
 366  539  }
 367  540  
 368  541  /* ARGSUSED */
 369  542  int
 370  543  gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
 371  544      crypto_data_t *out, size_t block_size,
 372  545      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 373      -    void (*copy_block)(uint8_t *, uint8_t *),
 374      -    void (*xor_block)(uint8_t *, uint8_t *))
      546 +    void (*copy_block)(const uint8_t *, uint8_t *),
      547 +    void (*xor_block)(const uint8_t *, uint8_t *),
      548 +    int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
      549 +    uint64_t *))
 375  550  {
 376      -        size_t new_len;
 377      -        uint8_t *new;
      551 +        int rv = CRYPTO_SUCCESS;
 378  552  
      553 +        GCM_ACCEL_ENTER;
      554 +
 379  555          /*
 380      -         * Copy contiguous ciphertext input blocks to plaintext buffer.
 381      -         * Ciphertext will be decrypted in the final.
      556 +         * Previous calls accumulate data in the input buffer to make sure
      557 +         * we have the auth tag (the last part of the ciphertext) when we
      558 +         * receive a final() call.
 382  559           */
 383      -        if (length > 0) {
 384      -                new_len = ctx->gcm_pt_buf_len + length;
 385      -#ifdef _KERNEL
 386      -                new = kmem_alloc(new_len, ctx->gcm_kmflag);
 387      -                bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len);
 388      -                kmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
 389      -#else
 390      -                new = malloc(new_len);
 391      -                bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len);
 392      -                free(ctx->gcm_pt_buf);
 393      -#endif
 394      -                if (new == NULL)
 395      -                        return (CRYPTO_HOST_MEMORY);
      560 +        if (ctx->gcm_last_input_fill > 0) {
      561 +                /* Try to complete the input buffer */
      562 +                size_t to_copy = MIN(length,
      563 +                    sizeof (ctx->gcm_last_input) - ctx->gcm_last_input_fill);
 396  564  
 397      -                ctx->gcm_pt_buf = new;
 398      -                ctx->gcm_pt_buf_len = new_len;
 399      -                bcopy(data, &ctx->gcm_pt_buf[ctx->gcm_processed_data_len],
 400      -                    length);
 401      -                ctx->gcm_processed_data_len += length;
      565 +                bcopy(data, ctx->gcm_last_input + ctx->gcm_last_input_fill,
      566 +                    to_copy);
      567 +                data += to_copy;
      568 +                ctx->gcm_last_input_fill += to_copy;
      569 +                length -= to_copy;
      570 +
      571 +                if (ctx->gcm_last_input_fill < sizeof (ctx->gcm_last_input))
      572 +                        /* Not enough input data to continue */
      573 +                        goto out;
      574 +
      575 +                if (length < ctx->gcm_tag_len) {
      576 +                        /*
      577 +                         * There isn't enough data ahead to constitute a full
      578 +                         * auth tag, so only crunch one input block and copy
      579 +                         * the remainder of the input into our buffer.
      580 +                         */
      581 +                        rv = gcm_process_contiguous_blocks(ctx,
      582 +                            (char *)ctx->gcm_last_input, block_size, out,
      583 +                            block_size, B_FALSE, encrypt_block, copy_block,
      584 +                            xor_block, cipher_ctr);
      585 +                        if (rv != CRYPTO_SUCCESS)
      586 +                                goto out;
      587 +                        ctx->gcm_last_input_fill -= block_size;
      588 +                        bcopy(ctx->gcm_last_input + block_size,
      589 +                            ctx->gcm_last_input, ctx->gcm_last_input_fill);
      590 +                        bcopy(data, ctx->gcm_last_input +
      591 +                            ctx->gcm_last_input_fill, length);
      592 +                        ctx->gcm_last_input_fill += length;
      593 +                        /* No more input left */
      594 +                        goto out;
      595 +                }
      596 +                /*
      597 +                 * There is enough data ahead for the auth tag, so crunch
      598 +                 * everything in our buffer now and empty it.
      599 +                 */
      600 +                rv = gcm_process_contiguous_blocks(ctx,
      601 +                    (char *)ctx->gcm_last_input, ctx->gcm_last_input_fill,
      602 +                    out, block_size, B_FALSE, encrypt_block, copy_block,
      603 +                    xor_block, cipher_ctr);
      604 +                if (rv != CRYPTO_SUCCESS)
      605 +                        goto out;
      606 +                ctx->gcm_last_input_fill = 0;
 402  607          }
      608 +        /*
      609 +         * Last input buffer is empty, so what's left ahead is block-aligned.
      610 +         * Crunch all the blocks up until the near end, which might be our
      611 +         * auth tag and we must NOT decrypt.
      612 +         */
      613 +        ASSERT(ctx->gcm_last_input_fill == 0);
      614 +        if (length >= block_size + ctx->gcm_tag_len) {
      615 +                size_t to_decrypt = (length - ctx->gcm_tag_len) &
      616 +                    ~(block_size - 1);
 403  617  
 404      -        ctx->gcm_remainder_len = 0;
 405      -        return (CRYPTO_SUCCESS);
      618 +                rv = gcm_process_contiguous_blocks(ctx, data, to_decrypt, out,
      619 +                    block_size, B_FALSE, encrypt_block, copy_block, xor_block,
      620 +                    cipher_ctr);
      621 +                if (rv != CRYPTO_SUCCESS)
      622 +                        goto out;
      623 +                data += to_decrypt;
      624 +                length -= to_decrypt;
      625 +        }
      626 +        /*
      627 +         * Copy the remainder into our input buffer, it's potentially
      628 +         * the auth tag and a last partial block.
      629 +         */
      630 +        ASSERT(length < sizeof (ctx->gcm_last_input));
      631 +        bcopy(data, ctx->gcm_last_input, length);
      632 +        ctx->gcm_last_input_fill += length;
      633 +out:
      634 +        GCM_ACCEL_EXIT;
      635 +
      636 +        return (rv);
 406  637  }
 407  638  
 408  639  int
 409  640  gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
 410  641      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 411      -    void (*xor_block)(uint8_t *, uint8_t *))
      642 +    void (*copy_block)(const uint8_t *, uint8_t *),
      643 +    void (*xor_block)(const uint8_t *, uint8_t *),
      644 +    int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
      645 +    uint64_t *))
 412  646  {
 413      -        size_t pt_len;
 414      -        size_t remainder;
 415      -        uint8_t *ghash;
 416      -        uint8_t *blockp;
 417      -        uint8_t *cbp;
 418      -        uint64_t counter;
 419      -        uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 420      -        int processed = 0, rv;
      647 +        int rv = CRYPTO_SUCCESS;
 421  648  
 422      -        ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len);
      649 +        /* Check there's enough data to at least compute a tag */
      650 +        if (ctx->gcm_last_input_fill < ctx->gcm_tag_len)
      651 +                return (SET_ERROR(CRYPTO_DATA_LEN_RANGE));
 423  652  
 424      -        pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
 425      -        ghash = (uint8_t *)ctx->gcm_ghash;
 426      -        blockp = ctx->gcm_pt_buf;
 427      -        remainder = pt_len;
 428      -        while (remainder > 0) {
 429      -                /* Incomplete last block */
 430      -                if (remainder < block_size) {
 431      -                        bcopy(blockp, ctx->gcm_remainder, remainder);
 432      -                        ctx->gcm_remainder_len = remainder;
 433      -                        /*
 434      -                         * not expecting anymore ciphertext, just
 435      -                         * compute plaintext for the remaining input
 436      -                         */
 437      -                        gcm_decrypt_incomplete_block(ctx, block_size,
 438      -                            processed, encrypt_block, xor_block);
 439      -                        ctx->gcm_remainder_len = 0;
 440      -                        goto out;
 441      -                }
 442      -                /* add ciphertext to the hash */
 443      -                GHASH(ctx, blockp, ghash);
      653 +        GCM_ACCEL_ENTER;
 444  654  
 445      -                /*
 446      -                 * Increment counter.
 447      -                 * Counter bits are confined to the bottom 32 bits
 448      -                 */
 449      -                counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 450      -                counter = htonll(counter + 1);
 451      -                counter &= counter_mask;
 452      -                ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
      655 +        /* Finish any unprocessed input */
      656 +        if (ctx->gcm_last_input_fill > ctx->gcm_tag_len) {
      657 +                size_t last_blk_len = MIN(block_size,
      658 +                    ctx->gcm_last_input_fill - ctx->gcm_tag_len);
 453  659  
 454      -                cbp = (uint8_t *)ctx->gcm_tmp;
 455      -                encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp);
      660 +                /* Finish last full block */
      661 +                if (last_blk_len >= block_size) {
      662 +                        rv = gcm_process_contiguous_blocks(ctx,
      663 +                            (char *)ctx->gcm_last_input, block_size, out,
      664 +                            block_size, B_FALSE, encrypt_block, copy_block,
      665 +                            xor_block, cipher_ctr);
      666 +                        if (rv != CRYPTO_SUCCESS)
      667 +                                goto errout;
 456  668  
 457      -                /* XOR with ciphertext */
 458      -                xor_block(cbp, blockp);
      669 +                        last_blk_len -= block_size;
      670 +                        ctx->gcm_processed_data_len += block_size;
      671 +                        ctx->gcm_last_input_fill -= block_size;
 459  672  
 460      -                processed += block_size;
 461      -                blockp += block_size;
 462      -                remainder -= block_size;
      673 +                        /* Shift what remains in the input buffer forward */
      674 +                        bcopy(ctx->gcm_last_input + block_size,
      675 +                            ctx->gcm_last_input, ctx->gcm_last_input_fill);
      676 +                }
      677 +                /* Finish last incomplete block before auth tag */
      678 +                if (last_blk_len > 0) {
      679 +                        gcm_decrypt_incomplete_block(ctx, ctx->gcm_last_input,
      680 +                            last_blk_len, block_size, out, encrypt_block,
      681 +                            xor_block);
      682 +
      683 +                        ctx->gcm_processed_data_len += last_blk_len;
      684 +                        ctx->gcm_last_input_fill -= last_blk_len;
      685 +
      686 +                        /* Shift what remains in the input buffer forward */
      687 +                        bcopy(ctx->gcm_last_input + last_blk_len,
      688 +                            ctx->gcm_last_input, ctx->gcm_last_input_fill);
      689 +                }
      690 +                /* Now the last_input buffer holds just the auth tag */
 463  691          }
 464      -out:
 465      -        ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
 466      -        GHASH(ctx, ctx->gcm_len_a_len_c, ghash);
      692 +
      693 +        ASSERT(ctx->gcm_last_input_fill == ctx->gcm_tag_len);
      694 +
      695 +        ctx->gcm_len_a_len_c[1] =
      696 +            htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
      697 +        GHASH(ctx, ctx->gcm_len_a_len_c, ctx->gcm_ghash);
 467  698          encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
 468  699              (uint8_t *)ctx->gcm_J0);
 469      -        xor_block((uint8_t *)ctx->gcm_J0, ghash);
      700 +        xor_block((uint8_t *)ctx->gcm_J0, (uint8_t *)ctx->gcm_ghash);
 470  701  
      702 +        GCM_ACCEL_EXIT;
      703 +
 471  704          /* compare the input authentication tag with what we calculated */
 472      -        if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
 473      -                /* They don't match */
 474      -                return (CRYPTO_INVALID_MAC);
 475      -        } else {
 476      -                rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
 477      -                if (rv != CRYPTO_SUCCESS)
 478      -                        return (rv);
 479      -                out->cd_offset += pt_len;
 480      -        }
      705 +        if (bcmp(&ctx->gcm_last_input, ctx->gcm_ghash, ctx->gcm_tag_len) != 0)
      706 +                return (SET_ERROR(CRYPTO_INVALID_MAC));
      707 +
 481  708          return (CRYPTO_SUCCESS);
      709 +
      710 +errout:
      711 +        GCM_ACCEL_EXIT;
      712 +        return (rv);
 482  713  }
 483  714  
 484  715  static int
 485  716  gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
 486  717  {
 487  718          size_t tag_len;
 488  719  
 489  720          /*
 490  721           * Check the length of the authentication tag (in bits).
 491  722           */
↓ open down ↓ 1 lines elided ↑ open up ↑
 493  724          switch (tag_len) {
 494  725          case 32:
 495  726          case 64:
 496  727          case 96:
 497  728          case 104:
 498  729          case 112:
 499  730          case 120:
 500  731          case 128:
 501  732                  break;
 502  733          default:
 503      -                return (CRYPTO_MECHANISM_PARAM_INVALID);
      734 +                return (SET_ERROR(CRYPTO_MECHANISM_PARAM_INVALID));
 504  735          }
 505  736  
 506  737          if (gcm_param->ulIvLen == 0)
 507      -                return (CRYPTO_MECHANISM_PARAM_INVALID);
      738 +                return (SET_ERROR(CRYPTO_MECHANISM_PARAM_INVALID));
 508  739  
 509  740          return (CRYPTO_SUCCESS);
 510  741  }
 511  742  
      743 +/*ARGSUSED*/
 512  744  static void
 513  745  gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
 514  746      gcm_ctx_t *ctx, size_t block_size,
 515      -    void (*copy_block)(uint8_t *, uint8_t *),
 516      -    void (*xor_block)(uint8_t *, uint8_t *))
      747 +    void (*copy_block)(const uint8_t *, uint8_t *),
      748 +    void (*xor_block)(const uint8_t *, uint8_t *))
 517  749  {
 518  750          uint8_t *cb;
 519  751          ulong_t remainder = iv_len;
 520  752          ulong_t processed = 0;
 521  753          uint8_t *datap, *ghash;
 522  754          uint64_t len_a_len_c[2];
 523  755  
 524  756          ghash = (uint8_t *)ctx->gcm_ghash;
 525  757          cb = (uint8_t *)ctx->gcm_cb;
 526  758          if (iv_len == 12) {
↓ open down ↓ 30 lines elided ↑ open up ↑
 557  789  }
 558  790  
 559  791  /*
 560  792   * The following function is called at encrypt or decrypt init time
 561  793   * for AES GCM mode.
 562  794   */
 563  795  int
 564  796  gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
 565  797      unsigned char *auth_data, size_t auth_data_len, size_t block_size,
 566  798      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 567      -    void (*copy_block)(uint8_t *, uint8_t *),
 568      -    void (*xor_block)(uint8_t *, uint8_t *))
      799 +    void (*copy_block)(const uint8_t *, uint8_t *),
      800 +    void (*xor_block)(const uint8_t *, uint8_t *))
 569  801  {
 570  802          uint8_t *ghash, *datap, *authp;
 571  803          size_t remainder, processed;
 572  804  
      805 +        GCM_ACCEL_ENTER;
      806 +
 573  807          /* encrypt zero block to get subkey H */
 574  808          bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
 575  809          encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
 576  810              (uint8_t *)ctx->gcm_H);
 577  811  
 578  812          gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
 579  813              copy_block, xor_block);
 580  814  
      815 +#ifdef  __amd64
      816 +        if (intel_pclmulqdq_instruction_present()) {
      817 +                uint64_t H_bswap64[2] = {
      818 +                    ntohll(ctx->gcm_H[0]), ntohll(ctx->gcm_H[1])
      819 +                };
      820 +
      821 +                gcm_init_clmul(H_bswap64, ctx->gcm_H_table);
      822 +        }
      823 +#endif
      824 +
 581  825          authp = (uint8_t *)ctx->gcm_tmp;
 582  826          ghash = (uint8_t *)ctx->gcm_ghash;
 583  827          bzero(authp, block_size);
 584  828          bzero(ghash, block_size);
 585  829  
 586  830          processed = 0;
 587  831          remainder = auth_data_len;
 588  832          do {
 589  833                  if (remainder < block_size) {
 590  834                          /*
↓ open down ↓ 8 lines elided ↑ open up ↑
 599  843                          datap = (uint8_t *)(&(auth_data[processed]));
 600  844                          processed += block_size;
 601  845                          remainder -= block_size;
 602  846                  }
 603  847  
 604  848                  /* add auth data to the hash */
 605  849                  GHASH(ctx, datap, ghash);
 606  850  
 607  851          } while (remainder > 0);
 608  852  
      853 +        GCM_ACCEL_EXIT;
      854 +
 609  855          return (CRYPTO_SUCCESS);
 610  856  }
 611  857  
 612  858  int
 613  859  gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
 614  860      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 615      -    void (*copy_block)(uint8_t *, uint8_t *),
 616      -    void (*xor_block)(uint8_t *, uint8_t *))
      861 +    void (*copy_block)(const uint8_t *, uint8_t *),
      862 +    void (*xor_block)(const uint8_t *, uint8_t *))
 617  863  {
      864 +        /*
      865 +         * No GHASH invocations in this function and gcm_init does its own
      866 +         * FPU saving, so no need to GCM_ACCEL_ENTER/GCM_ACCEL_EXIT here.
      867 +         */
 618  868          int rv;
 619  869          CK_AES_GCM_PARAMS *gcm_param;
 620  870  
 621  871          if (param != NULL) {
 622  872                  gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
 623  873  
 624  874                  if ((rv = gcm_validate_args(gcm_param)) != 0) {
 625  875                          return (rv);
 626  876                  }
 627  877  
↓ open down ↓ 17 lines elided ↑ open up ↑
 645  895              encrypt_block, copy_block, xor_block) != 0) {
 646  896                  rv = CRYPTO_MECHANISM_PARAM_INVALID;
 647  897          }
 648  898  out:
 649  899          return (rv);
 650  900  }
 651  901  
 652  902  int
 653  903  gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
 654  904      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 655      -    void (*copy_block)(uint8_t *, uint8_t *),
 656      -    void (*xor_block)(uint8_t *, uint8_t *))
      905 +    void (*copy_block)(const uint8_t *, uint8_t *),
      906 +    void (*xor_block)(const uint8_t *, uint8_t *))
 657  907  {
      908 +        /*
      909 +         * No GHASH invocations in this function and gcm_init does its own
      910 +         * FPU saving, so no need to GCM_ACCEL_ENTER/GCM_ACCEL_EXIT here.
      911 +         */
 658  912          int rv;
 659  913          CK_AES_GMAC_PARAMS *gmac_param;
 660  914  
 661  915          if (param != NULL) {
 662  916                  gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param;
 663  917  
 664  918                  gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS);
 665  919                  gcm_ctx->gcm_processed_data_len = 0;
 666  920  
 667  921                  /* these values are in bits */
↓ open down ↓ 57 lines elided ↑ open up ↑
 725  979  
 726  980  #ifdef __amd64
 727  981  /*
 728  982   * Return 1 if executing on Intel with PCLMULQDQ instructions,
 729  983   * otherwise 0 (i.e., Intel without PCLMULQDQ or AMD64).
 730  984   * Cache the result, as the CPU can't change.
 731  985   *
 732  986   * Note: the userland version uses getisax().  The kernel version uses
 733  987   * is_x86_featureset().
 734  988   */
 735      -static int
      989 +static inline int
 736  990  intel_pclmulqdq_instruction_present(void)
 737  991  {
 738  992          static int      cached_result = -1;
 739  993  
 740  994          if (cached_result == -1) { /* first time */
 741  995  #ifdef _KERNEL
 742  996                  cached_result =
 743  997                      is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ);
 744  998  #else
 745  999                  uint_t          ui = 0;
 746 1000  
 747 1001                  (void) getisax(&ui, 1);
 748 1002                  cached_result = (ui & AV_386_PCLMULQDQ) != 0;
 749 1003  #endif  /* _KERNEL */
 750 1004          }
 751 1005  
 752 1006          return (cached_result);
 753 1007  }
 754 1008  #endif  /* __amd64 */
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX