1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2015 by Saso Kiselkov. All rights reserved.
  24  */
  25 
  26 
  27 #ifndef _KERNEL
  28 #include <strings.h>
  29 #include <limits.h>
  30 #include <assert.h>
  31 #include <security/cryptoki.h>
  32 #endif  /* _KERNEL */
  33 
  34 #include <sys/cmn_err.h>
  35 #include <sys/types.h>
  36 #include <sys/kmem.h>
  37 #define INLINE_CRYPTO_GET_PTRS
  38 #include <modes/modes.h>
  39 #include <sys/crypto/common.h>
  40 #include <sys/crypto/impl.h>
  41 #include <sys/byteorder.h>
  42 
  43 #define COUNTER_MASK    0x00000000ffffffffULL
  44 
  45 #ifdef  _KERNEL
  46 #include <sys/sdt.h>              /* SET_ERROR */
  47 #endif  /* _KERNEL */
  48 
  49 #ifdef __amd64
  50 
  51 #ifdef _KERNEL
  52 #include <sys/cpuvar.h>           /* cpu_t, CPU */
  53 #include <sys/x86_archext.h>      /* x86_featureset, X86FSET_*, CPUID_* */
  54 #include <sys/disp.h>             /* kpreempt_disable(), kpreempt_enable */
  55 /* Workaround for no XMM kernel thread save/restore */
  56 extern void gcm_accel_save(void *savestate);
  57 extern void gcm_accel_restore(void *savestate);
  58 
  59 #if     defined(lint) || defined(__lint)
  60 #define GCM_ACCEL_SAVESTATE(name)       uint8_t name[16 * 16 + 8]
  61 #else
  62 #define GCM_ACCEL_SAVESTATE(name) \
  63         /* stack space for xmm0--xmm15 and cr0 (16 x 128 bits + 64 bits) */ \
  64         uint8_t name[16 * 16 + 8] __attribute__((aligned(16)))
  65 #endif
  66 
  67 /*
  68  * Disables kernel thread preemption and conditionally gcm_accel_save() iff
  69  * Intel PCLMULQDQ support is present. Must be balanced by GCM_ACCEL_EXIT.
  70  * This must be present in all externally callable GCM functions which
  71  * invoke GHASH operations using FPU-accelerated implementations, or call
  72  * static functions which do (such as gcm_encrypt_fastpath128()).
  73  */
  74 #define GCM_ACCEL_ENTER \
  75         GCM_ACCEL_SAVESTATE(savestate); \
  76         do { \
  77                 if (intel_pclmulqdq_instruction_present()) { \
  78                         kpreempt_disable(); \
  79                         gcm_accel_save(savestate); \
  80                 } \
  81                 _NOTE(CONSTCOND) \
  82         } while (0)
  83 #define GCM_ACCEL_EXIT \
  84         do { \
  85                 if (intel_pclmulqdq_instruction_present()) { \
  86                         gcm_accel_restore(savestate); \
  87                         kpreempt_enable(); \
  88                 } \
  89                 _NOTE(CONSTCOND) \
  90         } while (0)
  91 
  92 #else   /* _KERNEL */
  93 #include <sys/auxv.h>             /* getisax() */
  94 #include <sys/auxv_386.h> /* AV_386_PCLMULQDQ bit */
  95 #define SET_ERROR(x)    (x)
  96 #endif  /* _KERNEL */
  97 
  98 extern void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res);
  99 extern void gcm_init_clmul(const uint64_t hash_init[2], uint8_t Htable[256]);
 100 extern void gcm_ghash_clmul(uint64_t ghash[2], const uint8_t Htable[256],
 101     const uint8_t *inp, size_t length);
 102 static inline int intel_pclmulqdq_instruction_present(void);
 103 #else   /* !__amd64 */
 104 #define GCM_ACCEL_ENTER
 105 #define GCM_ACCEL_EXIT
 106 #endif  /* !__amd64 */
 107 
 108 struct aes_block {
 109         uint64_t a;
 110         uint64_t b;
 111 };
 112 
 113 
 114 /*
 115  * gcm_mul()
 116  * Perform a carry-less multiplication (that is, use XOR instead of the
 117  * multiply operator) on *x_in and *y and place the result in *res.
 118  *
 119  * Byte swap the input (*x_in and *y) and the output (*res).
 120  *
 121  * Note: x_in, y, and res all point to 16-byte numbers (an array of two
 122  * 64-bit integers).
 123  */
 124 static inline void
 125 gcm_mul(uint64_t *x_in, uint64_t *y, uint64_t *res)
 126 {
 127 #ifdef __amd64
 128         if (intel_pclmulqdq_instruction_present()) {
 129                 /*
 130                  * FPU context will have been saved and kernel thread
 131                  * preemption disabled already.
 132                  */
 133                 gcm_mul_pclmulqdq(x_in, y, res);
 134         } else
 135 #endif  /* __amd64 */
 136         {
 137                 static const uint64_t R = 0xe100000000000000ULL;
 138                 struct aes_block z = {0, 0};
 139                 struct aes_block v;
 140                 uint64_t x;
 141                 int i, j;
 142 
 143                 v.a = ntohll(y[0]);
 144                 v.b = ntohll(y[1]);
 145 
 146                 for (j = 0; j < 2; j++) {
 147                         x = ntohll(x_in[j]);
 148                         for (i = 0; i < 64; i++, x <<= 1) {
 149                                 if (x & 0x8000000000000000ULL) {
 150                                         z.a ^= v.a;
 151                                         z.b ^= v.b;
 152                                 }
 153                                 if (v.b & 1ULL) {
 154                                         v.b = (v.a << 63)|(v.b >> 1);
 155                                         v.a = (v.a >> 1) ^ R;
 156                                 } else {
 157                                         v.b = (v.a << 63)|(v.b >> 1);
 158                                         v.a = v.a >> 1;
 159                                 }
 160                         }
 161                 }
 162                 res[0] = htonll(z.a);
 163                 res[1] = htonll(z.b);
 164         }
 165 }
 166 
 167 #define GHASH(c, d, t) \
 168         do { \
 169                 xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
 170                 gcm_mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
 171                     (uint64_t *)(void *)(t)); \
 172                 _NOTE(CONSTCOND) \
 173         } while (0)
 174 
 175 boolean_t gcm_fastpath_enabled = B_TRUE;
 176 
 177 static void
 178 gcm_fastpath128(gcm_ctx_t *ctx, const uint8_t *data, size_t length,
 179     uint8_t *out, boolean_t encrypt,
 180     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 181     void (*xor_block)(const uint8_t *, uint8_t *),
 182     int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
 183     uint64_t *))
 184 {
 185         /* When decrypting, `data' holds the ciphertext we need to GHASH. */
 186         if (!encrypt) {
 187 #ifdef  __amd64
 188                 if (intel_pclmulqdq_instruction_present())
 189                         gcm_ghash_clmul(ctx->gcm_ghash, ctx->gcm_H_table,
 190                             data, length);
 191                 else
 192 #endif  /* __amd64 */
 193                         for (size_t i = 0; i < length; i += 16)
 194                                 GHASH(ctx, &data[i], ctx->gcm_ghash);
 195         }
 196 
 197         if (cipher_ctr != NULL) {
 198                 /*
 199                  * GCM is almost but not quite like CTR. GCM increments the
 200                  * counter value *before* processing the first input block,
 201                  * whereas CTR does so afterwards. So we need to increment
 202                  * the counter before calling CTR and decrement it afterwards.
 203                  */
 204                 uint64_t counter = ntohll(ctx->gcm_cb[1]);
 205 
 206                 ctx->gcm_cb[1] = htonll((counter & ~COUNTER_MASK) |
 207                     ((counter & COUNTER_MASK) + 1));
 208                 cipher_ctr(ctx->gcm_keysched, data, out, length, ctx->gcm_cb);
 209                 counter = ntohll(ctx->gcm_cb[1]);
 210                 ctx->gcm_cb[1] = htonll((counter & ~COUNTER_MASK) |
 211                     ((counter & COUNTER_MASK) - 1));
 212         } else {
 213                 uint64_t counter = ntohll(ctx->gcm_cb[1]);
 214 
 215                 for (size_t i = 0; i < length; i += 16) {
 216                         /*LINTED(E_BAD_PTR_CAST_ALIGN)*/
 217                         *(uint64_t *)&out[i] = ctx->gcm_cb[0];
 218                         /*LINTED(E_BAD_PTR_CAST_ALIGN)*/
 219                         *(uint64_t *)&out[i + 8] = htonll(counter++);
 220                         encrypt_block(ctx->gcm_keysched, &out[i], &out[i]);
 221                         xor_block(&data[i], &out[i]);
 222                 }
 223 
 224                 ctx->gcm_cb[1] = htonll(counter);
 225         }
 226 
 227         /* When encrypting, `out' holds the ciphertext we need to GHASH. */
 228         if (encrypt) {
 229 #ifdef  __amd64
 230                 if (intel_pclmulqdq_instruction_present())
 231                         gcm_ghash_clmul(ctx->gcm_ghash, ctx->gcm_H_table,
 232                             out, length);
 233                 else
 234 #endif  /* __amd64 */
 235                         for (size_t i = 0; i < length; i += 16)
 236                                 GHASH(ctx, &out[i], ctx->gcm_ghash);
 237 
 238                 /* If no more data comes in, the last block is the auth tag. */
 239                 bcopy(&out[length - 16], ctx->gcm_tmp, 16);
 240         }
 241 
 242         ctx->gcm_processed_data_len += length;
 243 }
 244 
 245 static int
 246 gcm_process_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
 247     crypto_data_t *out, size_t block_size, boolean_t encrypt,
 248     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 249     void (*copy_block)(const uint8_t *, uint8_t *),
 250     void (*xor_block)(const uint8_t *, uint8_t *),
 251     int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
 252     uint64_t *))
 253 {
 254         size_t remainder = length;
 255         size_t need;
 256         uint8_t *datap = (uint8_t *)data;
 257         uint8_t *blockp;
 258         uint8_t *lastp;
 259         void *iov_or_mp;
 260         offset_t offset;
 261         uint8_t *out_data_1;
 262         uint8_t *out_data_2;
 263         size_t out_data_1_len;
 264         uint64_t counter;
 265         uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 266         int rv = CRYPTO_SUCCESS;
 267 
 268         GCM_ACCEL_ENTER;
 269 
 270         /*
 271          * GCM mode fastpath requirements:
 272          * - fastpath is enabled
 273          * - block size is 128 bits
 274          * - input is block-aligned
 275          * - the counter value won't overflow
 276          * - output is a single contiguous region and doesn't alias input
 277          */
 278         if (gcm_fastpath_enabled && block_size == 16 &&
 279             ctx->gcm_remainder_len == 0 && (length & (block_size - 1)) == 0 &&
 280             ntohll(ctx->gcm_cb[1] & counter_mask) <= ntohll(counter_mask) -
 281             length / block_size && CRYPTO_DATA_IS_SINGLE_BLOCK(out)) {
 282                 gcm_fastpath128(ctx, (uint8_t *)data, length,
 283                     CRYPTO_DATA_FIRST_BLOCK(out), encrypt, encrypt_block,
 284                     xor_block, cipher_ctr);
 285                 out->cd_offset += length;
 286                 goto out;
 287         }
 288 
 289         if (length + ctx->gcm_remainder_len < block_size) {
 290                 /* accumulate bytes here and return */
 291                 bcopy(datap,
 292                     (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
 293                     length);
 294                 ctx->gcm_remainder_len += length;
 295                 ctx->gcm_copy_to = datap;
 296                 goto out;
 297         }
 298 
 299         lastp = (uint8_t *)ctx->gcm_cb;
 300         if (out != NULL)
 301                 crypto_init_ptrs(out, &iov_or_mp, &offset);
 302 
 303         do {
 304                 /* Unprocessed data from last call. */
 305                 if (ctx->gcm_remainder_len > 0) {
 306                         need = block_size - ctx->gcm_remainder_len;
 307 
 308                         if (need > remainder) {
 309                                 rv = SET_ERROR(CRYPTO_DATA_LEN_RANGE);
 310                                 goto out;
 311                         }
 312 
 313                         bcopy(datap, &((uint8_t *)ctx->gcm_remainder)
 314                             [ctx->gcm_remainder_len], need);
 315 
 316                         blockp = (uint8_t *)ctx->gcm_remainder;
 317                 } else {
 318                         blockp = datap;
 319                 }
 320 
 321                 /* add ciphertext to the hash */
 322                 if (!encrypt)
 323                         GHASH(ctx, blockp, ctx->gcm_ghash);
 324 
 325                 /*
 326                  * Increment counter. Counter bits are confined
 327                  * to the bottom 32 bits of the counter block.
 328                  */
 329                 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 330                 counter = htonll(counter + 1);
 331                 counter &= counter_mask;
 332                 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 333 
 334                 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
 335                     (uint8_t *)ctx->gcm_tmp);
 336                 xor_block(blockp, (uint8_t *)ctx->gcm_tmp);
 337 
 338                 lastp = (uint8_t *)ctx->gcm_tmp;
 339 
 340                 ctx->gcm_processed_data_len += block_size;
 341 
 342                 if (out == NULL) {
 343                         if (ctx->gcm_remainder_len > 0) {
 344                                 bcopy(blockp, ctx->gcm_copy_to,
 345                                     ctx->gcm_remainder_len);
 346                                 bcopy(blockp + ctx->gcm_remainder_len, datap,
 347                                     need);
 348                         }
 349                 } else {
 350                         crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
 351                             &out_data_1_len, &out_data_2, block_size);
 352 
 353                         /* copy block to where it belongs */
 354                         if (out_data_1_len == block_size) {
 355                                 copy_block(lastp, out_data_1);
 356                         } else {
 357                                 bcopy(lastp, out_data_1, out_data_1_len);
 358                                 if (out_data_2 != NULL) {
 359                                         bcopy(lastp + out_data_1_len,
 360                                             out_data_2,
 361                                             block_size - out_data_1_len);
 362                                 }
 363                         }
 364                         /* update offset */
 365                         out->cd_offset += block_size;
 366                 }
 367 
 368                 /* add ciphertext to the hash */
 369                 if (encrypt)
 370                         GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
 371 
 372                 /* Update pointer to next block of data to be processed. */
 373                 if (ctx->gcm_remainder_len != 0) {
 374                         datap += need;
 375                         ctx->gcm_remainder_len = 0;
 376                 } else {
 377                         datap += block_size;
 378                 }
 379 
 380                 remainder = (size_t)&data[length] - (size_t)datap;
 381 
 382                 /* Incomplete last block. */
 383                 if (remainder > 0 && remainder < block_size) {
 384                         bcopy(datap, ctx->gcm_remainder, remainder);
 385                         ctx->gcm_remainder_len = remainder;
 386                         ctx->gcm_copy_to = datap;
 387                         goto out;
 388                 }
 389                 ctx->gcm_copy_to = NULL;
 390 
 391         } while (remainder > 0);
 392 out:
 393         GCM_ACCEL_EXIT;
 394 
 395         return (rv);
 396 }
 397 
 398 
 399 /*
 400  * Encrypt multiple blocks of data in GCM mode.  Decrypt for GCM mode
 401  * is done in another function.
 402  */
 403 /*ARGSUSED*/
 404 int
 405 gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
 406     crypto_data_t *out, size_t block_size,
 407     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 408     void (*copy_block)(const uint8_t *, uint8_t *),
 409     void (*xor_block)(const uint8_t *, uint8_t *),
 410     int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
 411     uint64_t *))
 412 {
 413         return (gcm_process_contiguous_blocks(ctx, data, length, out,
 414             block_size, B_TRUE, encrypt_block, copy_block, xor_block,
 415             cipher_ctr));
 416 }
 417 
 418 /* ARGSUSED */
 419 int
 420 gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
 421     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 422     void (*copy_block)(const uint8_t *, uint8_t *),
 423     void (*xor_block)(const uint8_t *, uint8_t *))
 424 {
 425         uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 426         uint8_t *ghash, *macp;
 427         int i, rv;
 428 
 429         GCM_ACCEL_ENTER;
 430 
 431         if (out->cd_length < (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
 432                 rv = CRYPTO_DATA_LEN_RANGE;
 433                 goto out;
 434         }
 435 
 436         ghash = (uint8_t *)ctx->gcm_ghash;
 437 
 438         if (ctx->gcm_remainder_len > 0) {
 439                 uint64_t counter;
 440                 uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp;
 441 
 442                 /*
 443                  * Here is where we deal with data that is not a
 444                  * multiple of the block size.
 445                  */
 446 
 447                 /*
 448                  * Increment counter.
 449                  */
 450                 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 451                 counter = htonll(counter + 1);
 452                 counter &= counter_mask;
 453                 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 454 
 455                 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
 456                     (uint8_t *)ctx->gcm_tmp);
 457 
 458                 macp = (uint8_t *)ctx->gcm_remainder;
 459                 bzero(macp + ctx->gcm_remainder_len,
 460                     block_size - ctx->gcm_remainder_len);
 461 
 462                 /* XOR with counter block */
 463                 for (i = 0; i < ctx->gcm_remainder_len; i++) {
 464                         macp[i] ^= tmpp[i];
 465                 }
 466 
 467                 /* add ciphertext to the hash */
 468                 GHASH(ctx, macp, ghash);
 469 
 470                 ctx->gcm_processed_data_len += ctx->gcm_remainder_len;
 471         }
 472 
 473         ctx->gcm_len_a_len_c[1] =
 474             htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
 475         GHASH(ctx, ctx->gcm_len_a_len_c, ghash);
 476         encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
 477             (uint8_t *)ctx->gcm_J0);
 478         xor_block((uint8_t *)ctx->gcm_J0, ghash);
 479 
 480         if (ctx->gcm_remainder_len > 0) {
 481                 rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
 482                 if (rv != CRYPTO_SUCCESS)
 483                         goto out;
 484         }
 485         out->cd_offset += ctx->gcm_remainder_len;
 486         ctx->gcm_remainder_len = 0;
 487         rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
 488         if (rv != CRYPTO_SUCCESS)
 489                 goto out;
 490         out->cd_offset += ctx->gcm_tag_len;
 491 out:
 492         GCM_ACCEL_EXIT;
 493         return (rv);
 494 }
 495 
 496 /*
 497  * This will only deal with decrypting the last block of the input that
 498  * might not be a multiple of block length.
 499  */
 500 /*ARGSUSED*/
 501 static void
 502 gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, uint8_t *data, size_t length,
 503     size_t block_size, crypto_data_t *out,
 504     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 505     void (*xor_block)(const uint8_t *, uint8_t *))
 506 {
 507         uint64_t counter;
 508         uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 509 
 510         /* padd last block and add to GHASH */
 511         bcopy(data, ctx->gcm_tmp, length);
 512         bzero(((uint8_t *)ctx->gcm_tmp) + length,
 513             sizeof (ctx->gcm_tmp) - length);
 514         GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
 515 
 516         /*
 517          * Increment counter.
 518          * Counter bits are confined to the bottom 32 bits.
 519          */
 520         counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 521         counter = htonll(counter + 1);
 522         counter &= counter_mask;
 523         ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 524 
 525         encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
 526             (uint8_t *)ctx->gcm_tmp);
 527 
 528         /* XOR with counter block */
 529         for (size_t i = 0; i < length; i++)
 530                 ((uint8_t *)ctx->gcm_tmp)[i] ^= data[i];
 531 
 532         if (out != NULL) {
 533                 (void) crypto_put_output_data((uchar_t *)ctx->gcm_tmp, out,
 534                     length);
 535                 out->cd_offset += length;
 536         } else {
 537                 bcopy(ctx->gcm_tmp, data, length);
 538         }
 539 }
 540 
 541 /* ARGSUSED */
 542 int
 543 gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
 544     crypto_data_t *out, size_t block_size,
 545     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 546     void (*copy_block)(const uint8_t *, uint8_t *),
 547     void (*xor_block)(const uint8_t *, uint8_t *),
 548     int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
 549     uint64_t *))
 550 {
 551         int rv = CRYPTO_SUCCESS;
 552 
 553         GCM_ACCEL_ENTER;
 554 
 555         /*
 556          * Previous calls accumulate data in the input buffer to make sure
 557          * we have the auth tag (the last part of the ciphertext) when we
 558          * receive a final() call.
 559          */
 560         if (ctx->gcm_last_input_fill > 0) {
 561                 /* Try to complete the input buffer */
 562                 size_t to_copy = MIN(length,
 563                     sizeof (ctx->gcm_last_input) - ctx->gcm_last_input_fill);
 564 
 565                 bcopy(data, ctx->gcm_last_input + ctx->gcm_last_input_fill,
 566                     to_copy);
 567                 data += to_copy;
 568                 ctx->gcm_last_input_fill += to_copy;
 569                 length -= to_copy;
 570 
 571                 if (ctx->gcm_last_input_fill < sizeof (ctx->gcm_last_input))
 572                         /* Not enough input data to continue */
 573                         goto out;
 574 
 575                 if (length < ctx->gcm_tag_len) {
 576                         /*
 577                          * There isn't enough data ahead to constitute a full
 578                          * auth tag, so only crunch one input block and copy
 579                          * the remainder of the input into our buffer.
 580                          */
 581                         rv = gcm_process_contiguous_blocks(ctx,
 582                             (char *)ctx->gcm_last_input, block_size, out,
 583                             block_size, B_FALSE, encrypt_block, copy_block,
 584                             xor_block, cipher_ctr);
 585                         if (rv != CRYPTO_SUCCESS)
 586                                 goto out;
 587                         ctx->gcm_last_input_fill -= block_size;
 588                         bcopy(ctx->gcm_last_input + block_size,
 589                             ctx->gcm_last_input, ctx->gcm_last_input_fill);
 590                         bcopy(data, ctx->gcm_last_input +
 591                             ctx->gcm_last_input_fill, length);
 592                         ctx->gcm_last_input_fill += length;
 593                         /* No more input left */
 594                         goto out;
 595                 }
 596                 /*
 597                  * There is enough data ahead for the auth tag, so crunch
 598                  * everything in our buffer now and empty it.
 599                  */
 600                 rv = gcm_process_contiguous_blocks(ctx,
 601                     (char *)ctx->gcm_last_input, ctx->gcm_last_input_fill,
 602                     out, block_size, B_FALSE, encrypt_block, copy_block,
 603                     xor_block, cipher_ctr);
 604                 if (rv != CRYPTO_SUCCESS)
 605                         goto out;
 606                 ctx->gcm_last_input_fill = 0;
 607         }
 608         /*
 609          * Last input buffer is empty, so what's left ahead is block-aligned.
 610          * Crunch all the blocks up until the near end, which might be our
 611          * auth tag and we must NOT decrypt.
 612          */
 613         ASSERT(ctx->gcm_last_input_fill == 0);
 614         if (length >= block_size + ctx->gcm_tag_len) {
 615                 size_t to_decrypt = (length - ctx->gcm_tag_len) &
 616                     ~(block_size - 1);
 617 
 618                 rv = gcm_process_contiguous_blocks(ctx, data, to_decrypt, out,
 619                     block_size, B_FALSE, encrypt_block, copy_block, xor_block,
 620                     cipher_ctr);
 621                 if (rv != CRYPTO_SUCCESS)
 622                         goto out;
 623                 data += to_decrypt;
 624                 length -= to_decrypt;
 625         }
 626         /*
 627          * Copy the remainder into our input buffer, it's potentially
 628          * the auth tag and a last partial block.
 629          */
 630         ASSERT(length < sizeof (ctx->gcm_last_input));
 631         bcopy(data, ctx->gcm_last_input, length);
 632         ctx->gcm_last_input_fill += length;
 633 out:
 634         GCM_ACCEL_EXIT;
 635 
 636         return (rv);
 637 }
 638 
 639 int
 640 gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
 641     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 642     void (*copy_block)(const uint8_t *, uint8_t *),
 643     void (*xor_block)(const uint8_t *, uint8_t *),
 644     int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
 645     uint64_t *))
 646 {
 647         int rv = CRYPTO_SUCCESS;
 648 
 649         /* Check there's enough data to at least compute a tag */
 650         if (ctx->gcm_last_input_fill < ctx->gcm_tag_len)
 651                 return (SET_ERROR(CRYPTO_DATA_LEN_RANGE));
 652 
 653         GCM_ACCEL_ENTER;
 654 
 655         /* Finish any unprocessed input */
 656         if (ctx->gcm_last_input_fill > ctx->gcm_tag_len) {
 657                 size_t last_blk_len = MIN(block_size,
 658                     ctx->gcm_last_input_fill - ctx->gcm_tag_len);
 659 
 660                 /* Finish last full block */
 661                 if (last_blk_len >= block_size) {
 662                         rv = gcm_process_contiguous_blocks(ctx,
 663                             (char *)ctx->gcm_last_input, block_size, out,
 664                             block_size, B_FALSE, encrypt_block, copy_block,
 665                             xor_block, cipher_ctr);
 666                         if (rv != CRYPTO_SUCCESS)
 667                                 goto errout;
 668 
 669                         last_blk_len -= block_size;
 670                         ctx->gcm_processed_data_len += block_size;
 671                         ctx->gcm_last_input_fill -= block_size;
 672 
 673                         /* Shift what remains in the input buffer forward */
 674                         bcopy(ctx->gcm_last_input + block_size,
 675                             ctx->gcm_last_input, ctx->gcm_last_input_fill);
 676                 }
 677                 /* Finish last incomplete block before auth tag */
 678                 if (last_blk_len > 0) {
 679                         gcm_decrypt_incomplete_block(ctx, ctx->gcm_last_input,
 680                             last_blk_len, block_size, out, encrypt_block,
 681                             xor_block);
 682 
 683                         ctx->gcm_processed_data_len += last_blk_len;
 684                         ctx->gcm_last_input_fill -= last_blk_len;
 685 
 686                         /* Shift what remains in the input buffer forward */
 687                         bcopy(ctx->gcm_last_input + last_blk_len,
 688                             ctx->gcm_last_input, ctx->gcm_last_input_fill);
 689                 }
 690                 /* Now the last_input buffer holds just the auth tag */
 691         }
 692 
 693         ASSERT(ctx->gcm_last_input_fill == ctx->gcm_tag_len);
 694 
 695         ctx->gcm_len_a_len_c[1] =
 696             htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
 697         GHASH(ctx, ctx->gcm_len_a_len_c, ctx->gcm_ghash);
 698         encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
 699             (uint8_t *)ctx->gcm_J0);
 700         xor_block((uint8_t *)ctx->gcm_J0, (uint8_t *)ctx->gcm_ghash);
 701 
 702         GCM_ACCEL_EXIT;
 703 
 704         /* compare the input authentication tag with what we calculated */
 705         if (bcmp(&ctx->gcm_last_input, ctx->gcm_ghash, ctx->gcm_tag_len) != 0)
 706                 return (SET_ERROR(CRYPTO_INVALID_MAC));
 707 
 708         return (CRYPTO_SUCCESS);
 709 
 710 errout:
 711         GCM_ACCEL_EXIT;
 712         return (rv);
 713 }
 714 
 715 static int
 716 gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
 717 {
 718         size_t tag_len;
 719 
 720         /*
 721          * Check the length of the authentication tag (in bits).
 722          */
 723         tag_len = gcm_param->ulTagBits;
 724         switch (tag_len) {
 725         case 32:
 726         case 64:
 727         case 96:
 728         case 104:
 729         case 112:
 730         case 120:
 731         case 128:
 732                 break;
 733         default:
 734                 return (SET_ERROR(CRYPTO_MECHANISM_PARAM_INVALID));
 735         }
 736 
 737         if (gcm_param->ulIvLen == 0)
 738                 return (SET_ERROR(CRYPTO_MECHANISM_PARAM_INVALID));
 739 
 740         return (CRYPTO_SUCCESS);
 741 }
 742 
 743 /*ARGSUSED*/
 744 static void
 745 gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
 746     gcm_ctx_t *ctx, size_t block_size,
 747     void (*copy_block)(const uint8_t *, uint8_t *),
 748     void (*xor_block)(const uint8_t *, uint8_t *))
 749 {
 750         uint8_t *cb;
 751         ulong_t remainder = iv_len;
 752         ulong_t processed = 0;
 753         uint8_t *datap, *ghash;
 754         uint64_t len_a_len_c[2];
 755 
 756         ghash = (uint8_t *)ctx->gcm_ghash;
 757         cb = (uint8_t *)ctx->gcm_cb;
 758         if (iv_len == 12) {
 759                 bcopy(iv, cb, 12);
 760                 cb[12] = 0;
 761                 cb[13] = 0;
 762                 cb[14] = 0;
 763                 cb[15] = 1;
 764                 /* J0 will be used again in the final */
 765                 copy_block(cb, (uint8_t *)ctx->gcm_J0);
 766         } else {
 767                 /* GHASH the IV */
 768                 do {
 769                         if (remainder < block_size) {
 770                                 bzero(cb, block_size);
 771                                 bcopy(&(iv[processed]), cb, remainder);
 772                                 datap = (uint8_t *)cb;
 773                                 remainder = 0;
 774                         } else {
 775                                 datap = (uint8_t *)(&(iv[processed]));
 776                                 processed += block_size;
 777                                 remainder -= block_size;
 778                         }
 779                         GHASH(ctx, datap, ghash);
 780                 } while (remainder > 0);
 781 
 782                 len_a_len_c[0] = 0;
 783                 len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len));
 784                 GHASH(ctx, len_a_len_c, ctx->gcm_J0);
 785 
 786                 /* J0 will be used again in the final */
 787                 copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb);
 788         }
 789 }
 790 
 791 /*
 792  * The following function is called at encrypt or decrypt init time
 793  * for AES GCM mode.
 794  */
 795 int
 796 gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
 797     unsigned char *auth_data, size_t auth_data_len, size_t block_size,
 798     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 799     void (*copy_block)(const uint8_t *, uint8_t *),
 800     void (*xor_block)(const uint8_t *, uint8_t *))
 801 {
 802         uint8_t *ghash, *datap, *authp;
 803         size_t remainder, processed;
 804 
 805         GCM_ACCEL_ENTER;
 806 
 807         /* encrypt zero block to get subkey H */
 808         bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
 809         encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
 810             (uint8_t *)ctx->gcm_H);
 811 
 812         gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
 813             copy_block, xor_block);
 814 
 815 #ifdef  __amd64
 816         if (intel_pclmulqdq_instruction_present()) {
 817                 uint64_t H_bswap64[2] = {
 818                     ntohll(ctx->gcm_H[0]), ntohll(ctx->gcm_H[1])
 819                 };
 820 
 821                 gcm_init_clmul(H_bswap64, ctx->gcm_H_table);
 822         }
 823 #endif
 824 
 825         authp = (uint8_t *)ctx->gcm_tmp;
 826         ghash = (uint8_t *)ctx->gcm_ghash;
 827         bzero(authp, block_size);
 828         bzero(ghash, block_size);
 829 
 830         processed = 0;
 831         remainder = auth_data_len;
 832         do {
 833                 if (remainder < block_size) {
 834                         /*
 835                          * There's not a block full of data, pad rest of
 836                          * buffer with zero
 837                          */
 838                         bzero(authp, block_size);
 839                         bcopy(&(auth_data[processed]), authp, remainder);
 840                         datap = (uint8_t *)authp;
 841                         remainder = 0;
 842                 } else {
 843                         datap = (uint8_t *)(&(auth_data[processed]));
 844                         processed += block_size;
 845                         remainder -= block_size;
 846                 }
 847 
 848                 /* add auth data to the hash */
 849                 GHASH(ctx, datap, ghash);
 850 
 851         } while (remainder > 0);
 852 
 853         GCM_ACCEL_EXIT;
 854 
 855         return (CRYPTO_SUCCESS);
 856 }
 857 
 858 int
 859 gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
 860     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 861     void (*copy_block)(const uint8_t *, uint8_t *),
 862     void (*xor_block)(const uint8_t *, uint8_t *))
 863 {
 864         /*
 865          * No GHASH invocations in this function and gcm_init does its own
 866          * FPU saving, so no need to GCM_ACCEL_ENTER/GCM_ACCEL_EXIT here.
 867          */
 868         int rv;
 869         CK_AES_GCM_PARAMS *gcm_param;
 870 
 871         if (param != NULL) {
 872                 gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
 873 
 874                 if ((rv = gcm_validate_args(gcm_param)) != 0) {
 875                         return (rv);
 876                 }
 877 
 878                 gcm_ctx->gcm_tag_len = gcm_param->ulTagBits;
 879                 gcm_ctx->gcm_tag_len >>= 3;
 880                 gcm_ctx->gcm_processed_data_len = 0;
 881 
 882                 /* these values are in bits */
 883                 gcm_ctx->gcm_len_a_len_c[0]
 884                     = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen));
 885 
 886                 rv = CRYPTO_SUCCESS;
 887                 gcm_ctx->gcm_flags |= GCM_MODE;
 888         } else {
 889                 rv = CRYPTO_MECHANISM_PARAM_INVALID;
 890                 goto out;
 891         }
 892 
 893         if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
 894             gcm_param->pAAD, gcm_param->ulAADLen, block_size,
 895             encrypt_block, copy_block, xor_block) != 0) {
 896                 rv = CRYPTO_MECHANISM_PARAM_INVALID;
 897         }
 898 out:
 899         return (rv);
 900 }
 901 
 902 int
 903 gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
 904     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 905     void (*copy_block)(const uint8_t *, uint8_t *),
 906     void (*xor_block)(const uint8_t *, uint8_t *))
 907 {
 908         /*
 909          * No GHASH invocations in this function and gcm_init does its own
 910          * FPU saving, so no need to GCM_ACCEL_ENTER/GCM_ACCEL_EXIT here.
 911          */
 912         int rv;
 913         CK_AES_GMAC_PARAMS *gmac_param;
 914 
 915         if (param != NULL) {
 916                 gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param;
 917 
 918                 gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS);
 919                 gcm_ctx->gcm_processed_data_len = 0;
 920 
 921                 /* these values are in bits */
 922                 gcm_ctx->gcm_len_a_len_c[0]
 923                     = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen));
 924 
 925                 rv = CRYPTO_SUCCESS;
 926                 gcm_ctx->gcm_flags |= GMAC_MODE;
 927         } else {
 928                 rv = CRYPTO_MECHANISM_PARAM_INVALID;
 929                 goto out;
 930         }
 931 
 932         if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
 933             gmac_param->pAAD, gmac_param->ulAADLen, block_size,
 934             encrypt_block, copy_block, xor_block) != 0) {
 935                 rv = CRYPTO_MECHANISM_PARAM_INVALID;
 936         }
 937 out:
 938         return (rv);
 939 }
 940 
 941 void *
 942 gcm_alloc_ctx(int kmflag)
 943 {
 944         gcm_ctx_t *gcm_ctx;
 945 
 946 #ifdef _KERNEL
 947         if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
 948 #else
 949         if ((gcm_ctx = calloc(1, sizeof (gcm_ctx_t))) == NULL)
 950 #endif
 951                 return (NULL);
 952 
 953         gcm_ctx->gcm_flags = GCM_MODE;
 954         return (gcm_ctx);
 955 }
 956 
 957 void *
 958 gmac_alloc_ctx(int kmflag)
 959 {
 960         gcm_ctx_t *gcm_ctx;
 961 
 962 #ifdef _KERNEL
 963         if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
 964 #else
 965         if ((gcm_ctx = calloc(1, sizeof (gcm_ctx_t))) == NULL)
 966 #endif
 967                 return (NULL);
 968 
 969         gcm_ctx->gcm_flags = GMAC_MODE;
 970         return (gcm_ctx);
 971 }
 972 
 973 void
 974 gcm_set_kmflag(gcm_ctx_t *ctx, int kmflag)
 975 {
 976         ctx->gcm_kmflag = kmflag;
 977 }
 978 
 979 
 980 #ifdef __amd64
 981 /*
 982  * Return 1 if executing on Intel with PCLMULQDQ instructions,
 983  * otherwise 0 (i.e., Intel without PCLMULQDQ or AMD64).
 984  * Cache the result, as the CPU can't change.
 985  *
 986  * Note: the userland version uses getisax().  The kernel version uses
 987  * is_x86_featureset().
 988  */
 989 static inline int
 990 intel_pclmulqdq_instruction_present(void)
 991 {
 992         static int      cached_result = -1;
 993 
 994         if (cached_result == -1) { /* first time */
 995 #ifdef _KERNEL
 996                 cached_result =
 997                     is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ);
 998 #else
 999                 uint_t          ui = 0;
1000 
1001                 (void) getisax(&ui, 1);
1002                 cached_result = (ui & AV_386_PCLMULQDQ) != 0;
1003 #endif  /* _KERNEL */
1004         }
1005 
1006         return (cached_result);
1007 }
1008 #endif  /* __amd64 */