Print this page
4896 Performance improvements for KCF AES modes


   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.

  23  */
  24 
  25 
  26 #ifndef _KERNEL
  27 #include <strings.h>
  28 #include <limits.h>
  29 #include <assert.h>
  30 #include <security/cryptoki.h>
  31 #endif  /* _KERNEL */
  32 
  33 
  34 #include <sys/types.h>
  35 #include <sys/kmem.h>

  36 #include <modes/modes.h>
  37 #include <sys/crypto/common.h>
  38 #include <sys/crypto/impl.h>
  39 #include <sys/byteorder.h>
  40 






  41 #ifdef __amd64
  42 
  43 #ifdef _KERNEL
  44 #include <sys/cpuvar.h>           /* cpu_t, CPU */
  45 #include <sys/x86_archext.h>      /* x86_featureset, X86FSET_*, CPUID_* */
  46 #include <sys/disp.h>             /* kpreempt_disable(), kpreempt_enable */
  47 /* Workaround for no XMM kernel thread save/restore */
  48 #define KPREEMPT_DISABLE        kpreempt_disable()
  49 #define KPREEMPT_ENABLE         kpreempt_enable()
  50 


  51 #else































  52 #include <sys/auxv.h>             /* getisax() */
  53 #include <sys/auxv_386.h> /* AV_386_PCLMULQDQ bit */
  54 #define KPREEMPT_DISABLE
  55 #define KPREEMPT_ENABLE
  56 #endif  /* _KERNEL */
  57 
  58 extern void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res);
  59 static int intel_pclmulqdq_instruction_present(void);
  60 #endif  /* __amd64 */






  61 
  62 struct aes_block {
  63         uint64_t a;
  64         uint64_t b;
  65 };
  66 
  67 
  68 /*
  69  * gcm_mul()
  70  * Perform a carry-less multiplication (that is, use XOR instead of the
  71  * multiply operator) on *x_in and *y and place the result in *res.
  72  *
  73  * Byte swap the input (*x_in and *y) and the output (*res).
  74  *
  75  * Note: x_in, y, and res all point to 16-byte numbers (an array of two
  76  * 64-bit integers).
  77  */
  78 void
  79 gcm_mul(uint64_t *x_in, uint64_t *y, uint64_t *res)
  80 {
  81 #ifdef __amd64
  82         if (intel_pclmulqdq_instruction_present()) {
  83                 KPREEMPT_DISABLE;



  84                 gcm_mul_pclmulqdq(x_in, y, res);
  85                 KPREEMPT_ENABLE;
  86         } else
  87 #endif  /* __amd64 */
  88         {
  89                 static const uint64_t R = 0xe100000000000000ULL;
  90                 struct aes_block z = {0, 0};
  91                 struct aes_block v;
  92                 uint64_t x;
  93                 int i, j;
  94 
  95                 v.a = ntohll(y[0]);
  96                 v.b = ntohll(y[1]);
  97 
  98                 for (j = 0; j < 2; j++) {
  99                         x = ntohll(x_in[j]);
 100                         for (i = 0; i < 64; i++, x <<= 1) {
 101                                 if (x & 0x8000000000000000ULL) {
 102                                         z.a ^= v.a;
 103                                         z.b ^= v.b;
 104                                 }
 105                                 if (v.b & 1ULL) {
 106                                         v.b = (v.a << 63)|(v.b >> 1);
 107                                         v.a = (v.a >> 1) ^ R;
 108                                 } else {
 109                                         v.b = (v.a << 63)|(v.b >> 1);
 110                                         v.a = v.a >> 1;
 111                                 }
 112                         }
 113                 }
 114                 res[0] = htonll(z.a);
 115                 res[1] = htonll(z.b);
 116         }
 117 }
 118 
 119 
 120 #define GHASH(c, d, t) \

 121         xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
 122         gcm_mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
 123         (uint64_t *)(void *)(t));


 124 

 125 
 126 /*
 127  * Encrypt multiple blocks of data in GCM mode.  Decrypt for GCM mode
 128  * is done in another function.























 129  */
 130 int
 131 gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
 132     crypto_data_t *out, size_t block_size,









































 133     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 134     void (*copy_block)(uint8_t *, uint8_t *),
 135     void (*xor_block)(uint8_t *, uint8_t *))


 136 {
 137         size_t remainder = length;
 138         size_t need;
 139         uint8_t *datap = (uint8_t *)data;
 140         uint8_t *blockp;
 141         uint8_t *lastp;
 142         void *iov_or_mp;
 143         offset_t offset;
 144         uint8_t *out_data_1;
 145         uint8_t *out_data_2;
 146         size_t out_data_1_len;
 147         uint64_t counter;
 148         uint64_t counter_mask = ntohll(0x00000000ffffffffULL);

 149 





















 150         if (length + ctx->gcm_remainder_len < block_size) {
 151                 /* accumulate bytes here and return */
 152                 bcopy(datap,
 153                     (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
 154                     length);
 155                 ctx->gcm_remainder_len += length;
 156                 ctx->gcm_copy_to = datap;
 157                 return (CRYPTO_SUCCESS);
 158         }
 159 
 160         lastp = (uint8_t *)ctx->gcm_cb;
 161         if (out != NULL)
 162                 crypto_init_ptrs(out, &iov_or_mp, &offset);
 163 
 164         do {
 165                 /* Unprocessed data from last call. */
 166                 if (ctx->gcm_remainder_len > 0) {
 167                         need = block_size - ctx->gcm_remainder_len;
 168 
 169                         if (need > remainder)
 170                                 return (CRYPTO_DATA_LEN_RANGE);


 171 
 172                         bcopy(datap, &((uint8_t *)ctx->gcm_remainder)
 173                             [ctx->gcm_remainder_len], need);
 174 
 175                         blockp = (uint8_t *)ctx->gcm_remainder;
 176                 } else {
 177                         blockp = datap;
 178                 }
 179 




 180                 /*
 181                  * Increment counter. Counter bits are confined
 182                  * to the bottom 32 bits of the counter block.
 183                  */
 184                 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 185                 counter = htonll(counter + 1);
 186                 counter &= counter_mask;
 187                 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 188 
 189                 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
 190                     (uint8_t *)ctx->gcm_tmp);
 191                 xor_block(blockp, (uint8_t *)ctx->gcm_tmp);
 192 
 193                 lastp = (uint8_t *)ctx->gcm_tmp;
 194 
 195                 ctx->gcm_processed_data_len += block_size;
 196 
 197                 if (out == NULL) {
 198                         if (ctx->gcm_remainder_len > 0) {
 199                                 bcopy(blockp, ctx->gcm_copy_to,


 204                 } else {
 205                         crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
 206                             &out_data_1_len, &out_data_2, block_size);
 207 
 208                         /* copy block to where it belongs */
 209                         if (out_data_1_len == block_size) {
 210                                 copy_block(lastp, out_data_1);
 211                         } else {
 212                                 bcopy(lastp, out_data_1, out_data_1_len);
 213                                 if (out_data_2 != NULL) {
 214                                         bcopy(lastp + out_data_1_len,
 215                                             out_data_2,
 216                                             block_size - out_data_1_len);
 217                                 }
 218                         }
 219                         /* update offset */
 220                         out->cd_offset += block_size;
 221                 }
 222 
 223                 /* add ciphertext to the hash */

 224                 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
 225 
 226                 /* Update pointer to next block of data to be processed. */
 227                 if (ctx->gcm_remainder_len != 0) {
 228                         datap += need;
 229                         ctx->gcm_remainder_len = 0;
 230                 } else {
 231                         datap += block_size;
 232                 }
 233 
 234                 remainder = (size_t)&data[length] - (size_t)datap;
 235 
 236                 /* Incomplete last block. */
 237                 if (remainder > 0 && remainder < block_size) {
 238                         bcopy(datap, ctx->gcm_remainder, remainder);
 239                         ctx->gcm_remainder_len = remainder;
 240                         ctx->gcm_copy_to = datap;
 241                         goto out;
 242                 }
 243                 ctx->gcm_copy_to = NULL;
 244 
 245         } while (remainder > 0);
 246 out:
 247         return (CRYPTO_SUCCESS);


 248 }
 249 




















 250 /* ARGSUSED */
 251 int
 252 gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
 253     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 254     void (*copy_block)(uint8_t *, uint8_t *),
 255     void (*xor_block)(uint8_t *, uint8_t *))
 256 {
 257         uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 258         uint8_t *ghash, *macp;
 259         int i, rv;
 260 
 261         if (out->cd_length <
 262             (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
 263                 return (CRYPTO_DATA_LEN_RANGE);


 264         }
 265 
 266         ghash = (uint8_t *)ctx->gcm_ghash;
 267 
 268         if (ctx->gcm_remainder_len > 0) {
 269                 uint64_t counter;
 270                 uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp;
 271 
 272                 /*
 273                  * Here is where we deal with data that is not a
 274                  * multiple of the block size.
 275                  */
 276 
 277                 /*
 278                  * Increment counter.
 279                  */
 280                 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 281                 counter = htonll(counter + 1);
 282                 counter &= counter_mask;
 283                 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;


 293                 for (i = 0; i < ctx->gcm_remainder_len; i++) {
 294                         macp[i] ^= tmpp[i];
 295                 }
 296 
 297                 /* add ciphertext to the hash */
 298                 GHASH(ctx, macp, ghash);
 299 
 300                 ctx->gcm_processed_data_len += ctx->gcm_remainder_len;
 301         }
 302 
 303         ctx->gcm_len_a_len_c[1] =
 304             htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
 305         GHASH(ctx, ctx->gcm_len_a_len_c, ghash);
 306         encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
 307             (uint8_t *)ctx->gcm_J0);
 308         xor_block((uint8_t *)ctx->gcm_J0, ghash);
 309 
 310         if (ctx->gcm_remainder_len > 0) {
 311                 rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
 312                 if (rv != CRYPTO_SUCCESS)
 313                         return (rv);
 314         }
 315         out->cd_offset += ctx->gcm_remainder_len;
 316         ctx->gcm_remainder_len = 0;
 317         rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
 318         if (rv != CRYPTO_SUCCESS)
 319                 return (rv);
 320         out->cd_offset += ctx->gcm_tag_len;
 321 
 322         return (CRYPTO_SUCCESS);

 323 }
 324 
 325 /*
 326  * This will only deal with decrypting the last block of the input that
 327  * might not be a multiple of block length.
 328  */

 329 static void
 330 gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index,

 331     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 332     void (*xor_block)(uint8_t *, uint8_t *))
 333 {
 334         uint8_t *datap, *outp, *counterp;
 335         uint64_t counter;
 336         uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 337         int i;
 338 






 339         /*
 340          * Increment counter.
 341          * Counter bits are confined to the bottom 32 bits
 342          */
 343         counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 344         counter = htonll(counter + 1);
 345         counter &= counter_mask;
 346         ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 347 
 348         datap = (uint8_t *)ctx->gcm_remainder;
 349         outp = &((ctx->gcm_pt_buf)[index]);
 350         counterp = (uint8_t *)ctx->gcm_tmp;
 351 
 352         /* authentication tag */
 353         bzero((uint8_t *)ctx->gcm_tmp, block_size);
 354         bcopy(datap, (uint8_t *)ctx->gcm_tmp, ctx->gcm_remainder_len);
 355 
 356         /* add ciphertext to the hash */
 357         GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
 358 
 359         /* decrypt remaining ciphertext */
 360         encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp);
 361 
 362         /* XOR with counter block */
 363         for (i = 0; i < ctx->gcm_remainder_len; i++) {
 364                 outp[i] = datap[i] ^ counterp[i];







 365         }
 366 }
 367 
 368 /* ARGSUSED */
 369 int
 370 gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
 371     crypto_data_t *out, size_t block_size,
 372     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 373     void (*copy_block)(uint8_t *, uint8_t *),
 374     void (*xor_block)(uint8_t *, uint8_t *))


 375 {
 376         size_t new_len;
 377         uint8_t *new;
 378 


 379         /*
 380          * Copy contiguous ciphertext input blocks to plaintext buffer.
 381          * Ciphertext will be decrypted in the final.

 382          */
 383         if (length > 0) {
 384                 new_len = ctx->gcm_pt_buf_len + length;
 385 #ifdef _KERNEL
 386                 new = kmem_alloc(new_len, ctx->gcm_kmflag);
 387                 bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len);
 388                 kmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
 389 #else
 390                 new = malloc(new_len);
 391                 bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len);
 392                 free(ctx->gcm_pt_buf);
 393 #endif
 394                 if (new == NULL)
 395                         return (CRYPTO_HOST_MEMORY);
 396 
 397                 ctx->gcm_pt_buf = new;
 398                 ctx->gcm_pt_buf_len = new_len;
 399                 bcopy(data, &ctx->gcm_pt_buf[ctx->gcm_processed_data_len],
 400                     length);
 401                 ctx->gcm_processed_data_len += length;

























 402         }





















 403 
 404         ctx->gcm_remainder_len = 0;
 405         return (CRYPTO_SUCCESS);

















 406 }
 407 
 408 int
 409 gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
 410     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 411     void (*xor_block)(uint8_t *, uint8_t *))



 412 {
 413         size_t pt_len;
 414         size_t remainder;
 415         uint8_t *ghash;
 416         uint8_t *blockp;
 417         uint8_t *cbp;
 418         uint64_t counter;
 419         uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 420         int processed = 0, rv;
 421 
 422         ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len);


 423 
 424         pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
 425         ghash = (uint8_t *)ctx->gcm_ghash;
 426         blockp = ctx->gcm_pt_buf;
 427         remainder = pt_len;
 428         while (remainder > 0) {
 429                 /* Incomplete last block */
 430                 if (remainder < block_size) {
 431                         bcopy(blockp, ctx->gcm_remainder, remainder);
 432                         ctx->gcm_remainder_len = remainder;
 433                         /*
 434                          * not expecting anymore ciphertext, just
 435                          * compute plaintext for the remaining input
 436                          */
 437                         gcm_decrypt_incomplete_block(ctx, block_size,
 438                             processed, encrypt_block, xor_block);
 439                         ctx->gcm_remainder_len = 0;
 440                         goto out;
 441                 }
 442                 /* add ciphertext to the hash */
 443                 GHASH(ctx, blockp, ghash);
 444 
 445                 /*
 446                  * Increment counter.
 447                  * Counter bits are confined to the bottom 32 bits
 448                  */
 449                 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 450                 counter = htonll(counter + 1);
 451                 counter &= counter_mask;
 452                 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 453 
 454                 cbp = (uint8_t *)ctx->gcm_tmp;
 455                 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp);






 456 
 457                 /* XOR with ciphertext */
 458                 xor_block(cbp, blockp);

 459 
 460                 processed += block_size;
 461                 blockp += block_size;
 462                 remainder -= block_size;
 463         }
 464 out:
 465         ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
 466         GHASH(ctx, ctx->gcm_len_a_len_c, ghash);


















 467         encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
 468             (uint8_t *)ctx->gcm_J0);
 469         xor_block((uint8_t *)ctx->gcm_J0, ghash);
 470 


 471         /* compare the input authentication tag with what we calculated */
 472         if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
 473                 /* They don't match */
 474                 return (CRYPTO_INVALID_MAC);
 475         } else {
 476                 rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
 477                 if (rv != CRYPTO_SUCCESS)
 478                         return (rv);
 479                 out->cd_offset += pt_len;
 480         }
 481         return (CRYPTO_SUCCESS);




 482 }
 483 
 484 static int
 485 gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
 486 {
 487         size_t tag_len;
 488 
 489         /*
 490          * Check the length of the authentication tag (in bits).
 491          */
 492         tag_len = gcm_param->ulTagBits;
 493         switch (tag_len) {
 494         case 32:
 495         case 64:
 496         case 96:
 497         case 104:
 498         case 112:
 499         case 120:
 500         case 128:
 501                 break;
 502         default:
 503                 return (CRYPTO_MECHANISM_PARAM_INVALID);
 504         }
 505 
 506         if (gcm_param->ulIvLen == 0)
 507                 return (CRYPTO_MECHANISM_PARAM_INVALID);
 508 
 509         return (CRYPTO_SUCCESS);
 510 }
 511 

 512 static void
 513 gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
 514     gcm_ctx_t *ctx, size_t block_size,
 515     void (*copy_block)(uint8_t *, uint8_t *),
 516     void (*xor_block)(uint8_t *, uint8_t *))
 517 {
 518         uint8_t *cb;
 519         ulong_t remainder = iv_len;
 520         ulong_t processed = 0;
 521         uint8_t *datap, *ghash;
 522         uint64_t len_a_len_c[2];
 523 
 524         ghash = (uint8_t *)ctx->gcm_ghash;
 525         cb = (uint8_t *)ctx->gcm_cb;
 526         if (iv_len == 12) {
 527                 bcopy(iv, cb, 12);
 528                 cb[12] = 0;
 529                 cb[13] = 0;
 530                 cb[14] = 0;
 531                 cb[15] = 1;
 532                 /* J0 will be used again in the final */
 533                 copy_block(cb, (uint8_t *)ctx->gcm_J0);
 534         } else {
 535                 /* GHASH the IV */
 536                 do {


 547                         GHASH(ctx, datap, ghash);
 548                 } while (remainder > 0);
 549 
 550                 len_a_len_c[0] = 0;
 551                 len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len));
 552                 GHASH(ctx, len_a_len_c, ctx->gcm_J0);
 553 
 554                 /* J0 will be used again in the final */
 555                 copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb);
 556         }
 557 }
 558 
 559 /*
 560  * The following function is called at encrypt or decrypt init time
 561  * for AES GCM mode.
 562  */
 563 int
 564 gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
 565     unsigned char *auth_data, size_t auth_data_len, size_t block_size,
 566     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 567     void (*copy_block)(uint8_t *, uint8_t *),
 568     void (*xor_block)(uint8_t *, uint8_t *))
 569 {
 570         uint8_t *ghash, *datap, *authp;
 571         size_t remainder, processed;
 572 


 573         /* encrypt zero block to get subkey H */
 574         bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
 575         encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
 576             (uint8_t *)ctx->gcm_H);
 577 
 578         gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
 579             copy_block, xor_block);
 580 










 581         authp = (uint8_t *)ctx->gcm_tmp;
 582         ghash = (uint8_t *)ctx->gcm_ghash;
 583         bzero(authp, block_size);
 584         bzero(ghash, block_size);
 585 
 586         processed = 0;
 587         remainder = auth_data_len;
 588         do {
 589                 if (remainder < block_size) {
 590                         /*
 591                          * There's not a block full of data, pad rest of
 592                          * buffer with zero
 593                          */
 594                         bzero(authp, block_size);
 595                         bcopy(&(auth_data[processed]), authp, remainder);
 596                         datap = (uint8_t *)authp;
 597                         remainder = 0;
 598                 } else {
 599                         datap = (uint8_t *)(&(auth_data[processed]));
 600                         processed += block_size;
 601                         remainder -= block_size;
 602                 }
 603 
 604                 /* add auth data to the hash */
 605                 GHASH(ctx, datap, ghash);
 606 
 607         } while (remainder > 0);
 608 


 609         return (CRYPTO_SUCCESS);
 610 }
 611 
 612 int
 613 gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
 614     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 615     void (*copy_block)(uint8_t *, uint8_t *),
 616     void (*xor_block)(uint8_t *, uint8_t *))
 617 {




 618         int rv;
 619         CK_AES_GCM_PARAMS *gcm_param;
 620 
 621         if (param != NULL) {
 622                 gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
 623 
 624                 if ((rv = gcm_validate_args(gcm_param)) != 0) {
 625                         return (rv);
 626                 }
 627 
 628                 gcm_ctx->gcm_tag_len = gcm_param->ulTagBits;
 629                 gcm_ctx->gcm_tag_len >>= 3;
 630                 gcm_ctx->gcm_processed_data_len = 0;
 631 
 632                 /* these values are in bits */
 633                 gcm_ctx->gcm_len_a_len_c[0]
 634                     = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen));
 635 
 636                 rv = CRYPTO_SUCCESS;
 637                 gcm_ctx->gcm_flags |= GCM_MODE;
 638         } else {
 639                 rv = CRYPTO_MECHANISM_PARAM_INVALID;
 640                 goto out;
 641         }
 642 
 643         if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
 644             gcm_param->pAAD, gcm_param->ulAADLen, block_size,
 645             encrypt_block, copy_block, xor_block) != 0) {
 646                 rv = CRYPTO_MECHANISM_PARAM_INVALID;
 647         }
 648 out:
 649         return (rv);
 650 }
 651 
 652 int
 653 gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
 654     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 655     void (*copy_block)(uint8_t *, uint8_t *),
 656     void (*xor_block)(uint8_t *, uint8_t *))
 657 {




 658         int rv;
 659         CK_AES_GMAC_PARAMS *gmac_param;
 660 
 661         if (param != NULL) {
 662                 gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param;
 663 
 664                 gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS);
 665                 gcm_ctx->gcm_processed_data_len = 0;
 666 
 667                 /* these values are in bits */
 668                 gcm_ctx->gcm_len_a_len_c[0]
 669                     = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen));
 670 
 671                 rv = CRYPTO_SUCCESS;
 672                 gcm_ctx->gcm_flags |= GMAC_MODE;
 673         } else {
 674                 rv = CRYPTO_MECHANISM_PARAM_INVALID;
 675                 goto out;
 676         }
 677 


 715         gcm_ctx->gcm_flags = GMAC_MODE;
 716         return (gcm_ctx);
 717 }
 718 
 719 void
 720 gcm_set_kmflag(gcm_ctx_t *ctx, int kmflag)
 721 {
 722         ctx->gcm_kmflag = kmflag;
 723 }
 724 
 725 
 726 #ifdef __amd64
 727 /*
 728  * Return 1 if executing on Intel with PCLMULQDQ instructions,
 729  * otherwise 0 (i.e., Intel without PCLMULQDQ or AMD64).
 730  * Cache the result, as the CPU can't change.
 731  *
 732  * Note: the userland version uses getisax().  The kernel version uses
 733  * is_x86_featureset().
 734  */
 735 static int
 736 intel_pclmulqdq_instruction_present(void)
 737 {
 738         static int      cached_result = -1;
 739 
 740         if (cached_result == -1) { /* first time */
 741 #ifdef _KERNEL
 742                 cached_result =
 743                     is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ);
 744 #else
 745                 uint_t          ui = 0;
 746 
 747                 (void) getisax(&ui, 1);
 748                 cached_result = (ui & AV_386_PCLMULQDQ) != 0;
 749 #endif  /* _KERNEL */
 750         }
 751 
 752         return (cached_result);
 753 }
 754 #endif  /* __amd64 */


   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2015 by Saso Kiselkov. All rights reserved.
  24  */
  25 
  26 
  27 #ifndef _KERNEL
  28 #include <strings.h>
  29 #include <limits.h>
  30 #include <assert.h>
  31 #include <security/cryptoki.h>
  32 #endif  /* _KERNEL */
  33 
  34 #include <sys/cmn_err.h>
  35 #include <sys/types.h>
  36 #include <sys/kmem.h>
  37 #define INLINE_CRYPTO_GET_PTRS
  38 #include <modes/modes.h>
  39 #include <sys/crypto/common.h>
  40 #include <sys/crypto/impl.h>
  41 #include <sys/byteorder.h>
  42 
  43 #define COUNTER_MASK    0x00000000ffffffffULL
  44 
  45 #ifdef  _KERNEL
  46 #include <sys/sdt.h>              /* SET_ERROR */
  47 #endif  /* _KERNEL */
  48 
  49 #ifdef __amd64
  50 
  51 #ifdef _KERNEL
  52 #include <sys/cpuvar.h>           /* cpu_t, CPU */
  53 #include <sys/x86_archext.h>      /* x86_featureset, X86FSET_*, CPUID_* */
  54 #include <sys/disp.h>             /* kpreempt_disable(), kpreempt_enable */
  55 /* Workaround for no XMM kernel thread save/restore */
  56 extern void gcm_accel_save(void *savestate);
  57 extern void gcm_accel_restore(void *savestate);
  58 
  59 #if     defined(lint) || defined(__lint)
  60 #define GCM_ACCEL_SAVESTATE(name)       uint8_t name[16 * 16 + 8]
  61 #else
  62 #define GCM_ACCEL_SAVESTATE(name) \
  63         /* stack space for xmm0--xmm15 and cr0 (16 x 128 bits + 64 bits) */ \
  64         uint8_t name[16 * 16 + 8] __attribute__((aligned(16)))
  65 #endif
  66 
  67 /*
  68  * Disables kernel thread preemption and conditionally gcm_accel_save() iff
  69  * Intel PCLMULQDQ support is present. Must be balanced by GCM_ACCEL_EXIT.
  70  * This must be present in all externally callable GCM functions which
  71  * invoke GHASH operations using FPU-accelerated implementations, or call
  72  * static functions which do (such as gcm_encrypt_fastpath128()).
  73  */
  74 #define GCM_ACCEL_ENTER \
  75         GCM_ACCEL_SAVESTATE(savestate); \
  76         do { \
  77                 if (intel_pclmulqdq_instruction_present()) { \
  78                         kpreempt_disable(); \
  79                         gcm_accel_save(savestate); \
  80                 } \
  81                 _NOTE(CONSTCOND) \
  82         } while (0)
  83 #define GCM_ACCEL_EXIT \
  84         do { \
  85                 if (intel_pclmulqdq_instruction_present()) { \
  86                         gcm_accel_restore(savestate); \
  87                         kpreempt_enable(); \
  88                 } \
  89                 _NOTE(CONSTCOND) \
  90         } while (0)
  91 
  92 #else   /* _KERNEL */
  93 #include <sys/auxv.h>             /* getisax() */
  94 #include <sys/auxv_386.h> /* AV_386_PCLMULQDQ bit */
  95 #define SET_ERROR(x)    (x)

  96 #endif  /* _KERNEL */
  97 
  98 extern void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res);
  99 extern void gcm_init_clmul(const uint64_t hash_init[2], uint8_t Htable[256]);
 100 extern void gcm_ghash_clmul(uint64_t ghash[2], const uint8_t Htable[256],
 101     const uint8_t *inp, size_t length);
 102 static inline int intel_pclmulqdq_instruction_present(void);
 103 #else   /* !__amd64 */
 104 #define GCM_ACCEL_ENTER
 105 #define GCM_ACCEL_EXIT
 106 #endif  /* !__amd64 */
 107 
 108 struct aes_block {
 109         uint64_t a;
 110         uint64_t b;
 111 };
 112 
 113 
 114 /*
 115  * gcm_mul()
 116  * Perform a carry-less multiplication (that is, use XOR instead of the
 117  * multiply operator) on *x_in and *y and place the result in *res.
 118  *
 119  * Byte swap the input (*x_in and *y) and the output (*res).
 120  *
 121  * Note: x_in, y, and res all point to 16-byte numbers (an array of two
 122  * 64-bit integers).
 123  */
 124 static inline void
 125 gcm_mul(uint64_t *x_in, uint64_t *y, uint64_t *res)
 126 {
 127 #ifdef __amd64
 128         if (intel_pclmulqdq_instruction_present()) {
 129                 /*
 130                  * FPU context will have been saved and kernel thread
 131                  * preemption disabled already.
 132                  */
 133                 gcm_mul_pclmulqdq(x_in, y, res);

 134         } else
 135 #endif  /* __amd64 */
 136         {
 137                 static const uint64_t R = 0xe100000000000000ULL;
 138                 struct aes_block z = {0, 0};
 139                 struct aes_block v;
 140                 uint64_t x;
 141                 int i, j;
 142 
 143                 v.a = ntohll(y[0]);
 144                 v.b = ntohll(y[1]);
 145 
 146                 for (j = 0; j < 2; j++) {
 147                         x = ntohll(x_in[j]);
 148                         for (i = 0; i < 64; i++, x <<= 1) {
 149                                 if (x & 0x8000000000000000ULL) {
 150                                         z.a ^= v.a;
 151                                         z.b ^= v.b;
 152                                 }
 153                                 if (v.b & 1ULL) {
 154                                         v.b = (v.a << 63)|(v.b >> 1);
 155                                         v.a = (v.a >> 1) ^ R;
 156                                 } else {
 157                                         v.b = (v.a << 63)|(v.b >> 1);
 158                                         v.a = v.a >> 1;
 159                                 }
 160                         }
 161                 }
 162                 res[0] = htonll(z.a);
 163                 res[1] = htonll(z.b);
 164         }
 165 }
 166 

 167 #define GHASH(c, d, t) \
 168         do { \
 169                 xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
 170                 gcm_mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
 171                     (uint64_t *)(void *)(t)); \
 172                 _NOTE(CONSTCOND) \
 173         } while (0)
 174 
 175 boolean_t gcm_fastpath_enabled = B_TRUE;
 176 
 177 static void
 178 gcm_fastpath128(gcm_ctx_t *ctx, const uint8_t *data, size_t length,
 179     uint8_t *out, boolean_t encrypt,
 180     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 181     void (*xor_block)(const uint8_t *, uint8_t *),
 182     int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
 183     uint64_t *))
 184 {
 185         /* When decrypting, `data' holds the ciphertext we need to GHASH. */
 186         if (!encrypt) {
 187 #ifdef  __amd64
 188                 if (intel_pclmulqdq_instruction_present())
 189                         gcm_ghash_clmul(ctx->gcm_ghash, ctx->gcm_H_table,
 190                             data, length);
 191                 else
 192 #endif  /* __amd64 */
 193                         for (size_t i = 0; i < length; i += 16)
 194                                 GHASH(ctx, &data[i], ctx->gcm_ghash);
 195         }
 196 
 197         if (cipher_ctr != NULL) {
 198                 /*
 199                  * GCM is almost but not quite like CTR. GCM increments the
 200                  * counter value *before* processing the first input block,
 201                  * whereas CTR does so afterwards. So we need to increment
 202                  * the counter before calling CTR and decrement it afterwards.
 203                  */
 204                 uint64_t counter = ntohll(ctx->gcm_cb[1]);
 205 
 206                 ctx->gcm_cb[1] = htonll((counter & ~COUNTER_MASK) |
 207                     ((counter & COUNTER_MASK) + 1));
 208                 cipher_ctr(ctx->gcm_keysched, data, out, length, ctx->gcm_cb);
 209                 counter = ntohll(ctx->gcm_cb[1]);
 210                 ctx->gcm_cb[1] = htonll((counter & ~COUNTER_MASK) |
 211                     ((counter & COUNTER_MASK) - 1));
 212         } else {
 213                 uint64_t counter = ntohll(ctx->gcm_cb[1]);
 214 
 215                 for (size_t i = 0; i < length; i += 16) {
 216                         /*LINTED(E_BAD_PTR_CAST_ALIGN)*/
 217                         *(uint64_t *)&out[i] = ctx->gcm_cb[0];
 218                         /*LINTED(E_BAD_PTR_CAST_ALIGN)*/
 219                         *(uint64_t *)&out[i + 8] = htonll(counter++);
 220                         encrypt_block(ctx->gcm_keysched, &out[i], &out[i]);
 221                         xor_block(&data[i], &out[i]);
 222                 }
 223 
 224                 ctx->gcm_cb[1] = htonll(counter);
 225         }
 226 
 227         /* When encrypting, `out' holds the ciphertext we need to GHASH. */
 228         if (encrypt) {
 229 #ifdef  __amd64
 230                 if (intel_pclmulqdq_instruction_present())
 231                         gcm_ghash_clmul(ctx->gcm_ghash, ctx->gcm_H_table,
 232                             out, length);
 233                 else
 234 #endif  /* __amd64 */
 235                         for (size_t i = 0; i < length; i += 16)
 236                                 GHASH(ctx, &out[i], ctx->gcm_ghash);
 237 
 238                 /* If no more data comes in, the last block is the auth tag. */
 239                 bcopy(&out[length - 16], ctx->gcm_tmp, 16);
 240         }
 241 
 242         ctx->gcm_processed_data_len += length;
 243 }
 244 
 245 static int
 246 gcm_process_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
 247     crypto_data_t *out, size_t block_size, boolean_t encrypt,
 248     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 249     void (*copy_block)(const uint8_t *, uint8_t *),
 250     void (*xor_block)(const uint8_t *, uint8_t *),
 251     int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
 252     uint64_t *))
 253 {
 254         size_t remainder = length;
 255         size_t need;
 256         uint8_t *datap = (uint8_t *)data;
 257         uint8_t *blockp;
 258         uint8_t *lastp;
 259         void *iov_or_mp;
 260         offset_t offset;
 261         uint8_t *out_data_1;
 262         uint8_t *out_data_2;
 263         size_t out_data_1_len;
 264         uint64_t counter;
 265         uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 266         int rv = CRYPTO_SUCCESS;
 267 
 268         GCM_ACCEL_ENTER;
 269 
 270         /*
 271          * GCM mode fastpath requirements:
 272          * - fastpath is enabled
 273          * - block size is 128 bits
 274          * - input is block-aligned
 275          * - the counter value won't overflow
 276          * - output is a single contiguous region and doesn't alias input
 277          */
 278         if (gcm_fastpath_enabled && block_size == 16 &&
 279             ctx->gcm_remainder_len == 0 && (length & (block_size - 1)) == 0 &&
 280             ntohll(ctx->gcm_cb[1] & counter_mask) <= ntohll(counter_mask) -
 281             length / block_size && CRYPTO_DATA_IS_SINGLE_BLOCK(out)) {
 282                 gcm_fastpath128(ctx, (uint8_t *)data, length,
 283                     CRYPTO_DATA_FIRST_BLOCK(out), encrypt, encrypt_block,
 284                     xor_block, cipher_ctr);
 285                 out->cd_offset += length;
 286                 goto out;
 287         }
 288 
 289         if (length + ctx->gcm_remainder_len < block_size) {
 290                 /* accumulate bytes here and return */
 291                 bcopy(datap,
 292                     (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
 293                     length);
 294                 ctx->gcm_remainder_len += length;
 295                 ctx->gcm_copy_to = datap;
 296                 goto out;
 297         }
 298 
 299         lastp = (uint8_t *)ctx->gcm_cb;
 300         if (out != NULL)
 301                 crypto_init_ptrs(out, &iov_or_mp, &offset);
 302 
 303         do {
 304                 /* Unprocessed data from last call. */
 305                 if (ctx->gcm_remainder_len > 0) {
 306                         need = block_size - ctx->gcm_remainder_len;
 307 
 308                         if (need > remainder) {
 309                                 rv = SET_ERROR(CRYPTO_DATA_LEN_RANGE);
 310                                 goto out;
 311                         }
 312 
 313                         bcopy(datap, &((uint8_t *)ctx->gcm_remainder)
 314                             [ctx->gcm_remainder_len], need);
 315 
 316                         blockp = (uint8_t *)ctx->gcm_remainder;
 317                 } else {
 318                         blockp = datap;
 319                 }
 320 
 321                 /* add ciphertext to the hash */
 322                 if (!encrypt)
 323                         GHASH(ctx, blockp, ctx->gcm_ghash);
 324 
 325                 /*
 326                  * Increment counter. Counter bits are confined
 327                  * to the bottom 32 bits of the counter block.
 328                  */
 329                 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 330                 counter = htonll(counter + 1);
 331                 counter &= counter_mask;
 332                 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 333 
 334                 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
 335                     (uint8_t *)ctx->gcm_tmp);
 336                 xor_block(blockp, (uint8_t *)ctx->gcm_tmp);
 337 
 338                 lastp = (uint8_t *)ctx->gcm_tmp;
 339 
 340                 ctx->gcm_processed_data_len += block_size;
 341 
 342                 if (out == NULL) {
 343                         if (ctx->gcm_remainder_len > 0) {
 344                                 bcopy(blockp, ctx->gcm_copy_to,


 349                 } else {
 350                         crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
 351                             &out_data_1_len, &out_data_2, block_size);
 352 
 353                         /* copy block to where it belongs */
 354                         if (out_data_1_len == block_size) {
 355                                 copy_block(lastp, out_data_1);
 356                         } else {
 357                                 bcopy(lastp, out_data_1, out_data_1_len);
 358                                 if (out_data_2 != NULL) {
 359                                         bcopy(lastp + out_data_1_len,
 360                                             out_data_2,
 361                                             block_size - out_data_1_len);
 362                                 }
 363                         }
 364                         /* update offset */
 365                         out->cd_offset += block_size;
 366                 }
 367 
 368                 /* add ciphertext to the hash */
 369                 if (encrypt)
 370                         GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
 371 
 372                 /* Update pointer to next block of data to be processed. */
 373                 if (ctx->gcm_remainder_len != 0) {
 374                         datap += need;
 375                         ctx->gcm_remainder_len = 0;
 376                 } else {
 377                         datap += block_size;
 378                 }
 379 
 380                 remainder = (size_t)&data[length] - (size_t)datap;
 381 
 382                 /* Incomplete last block. */
 383                 if (remainder > 0 && remainder < block_size) {
 384                         bcopy(datap, ctx->gcm_remainder, remainder);
 385                         ctx->gcm_remainder_len = remainder;
 386                         ctx->gcm_copy_to = datap;
 387                         goto out;
 388                 }
 389                 ctx->gcm_copy_to = NULL;
 390 
 391         } while (remainder > 0);
 392 out:
 393         GCM_ACCEL_EXIT;
 394 
 395         return (rv);
 396 }
 397 
 398 
 399 /*
 400  * Encrypt multiple blocks of data in GCM mode.  Decrypt for GCM mode
 401  * is done in another function.
 402  */
 403 /*ARGSUSED*/
 404 int
 405 gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
 406     crypto_data_t *out, size_t block_size,
 407     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 408     void (*copy_block)(const uint8_t *, uint8_t *),
 409     void (*xor_block)(const uint8_t *, uint8_t *),
 410     int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
 411     uint64_t *))
 412 {
 413         return (gcm_process_contiguous_blocks(ctx, data, length, out,
 414             block_size, B_TRUE, encrypt_block, copy_block, xor_block,
 415             cipher_ctr));
 416 }
 417 
 418 /* ARGSUSED */
 419 int
 420 gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
 421     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 422     void (*copy_block)(const uint8_t *, uint8_t *),
 423     void (*xor_block)(const uint8_t *, uint8_t *))
 424 {
 425         uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
 426         uint8_t *ghash, *macp;
 427         int i, rv;
 428 
 429         GCM_ACCEL_ENTER;
 430 
 431         if (out->cd_length < (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
 432                 rv = CRYPTO_DATA_LEN_RANGE;
 433                 goto out;
 434         }
 435 
 436         ghash = (uint8_t *)ctx->gcm_ghash;
 437 
 438         if (ctx->gcm_remainder_len > 0) {
 439                 uint64_t counter;
 440                 uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp;
 441 
 442                 /*
 443                  * Here is where we deal with data that is not a
 444                  * multiple of the block size.
 445                  */
 446 
 447                 /*
 448                  * Increment counter.
 449                  */
 450                 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 451                 counter = htonll(counter + 1);
 452                 counter &= counter_mask;
 453                 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;


 463                 for (i = 0; i < ctx->gcm_remainder_len; i++) {
 464                         macp[i] ^= tmpp[i];
 465                 }
 466 
 467                 /* add ciphertext to the hash */
 468                 GHASH(ctx, macp, ghash);
 469 
 470                 ctx->gcm_processed_data_len += ctx->gcm_remainder_len;
 471         }
 472 
 473         ctx->gcm_len_a_len_c[1] =
 474             htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
 475         GHASH(ctx, ctx->gcm_len_a_len_c, ghash);
 476         encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
 477             (uint8_t *)ctx->gcm_J0);
 478         xor_block((uint8_t *)ctx->gcm_J0, ghash);
 479 
 480         if (ctx->gcm_remainder_len > 0) {
 481                 rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
 482                 if (rv != CRYPTO_SUCCESS)
 483                         goto out;
 484         }
 485         out->cd_offset += ctx->gcm_remainder_len;
 486         ctx->gcm_remainder_len = 0;
 487         rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
 488         if (rv != CRYPTO_SUCCESS)
 489                 goto out;
 490         out->cd_offset += ctx->gcm_tag_len;
 491 out:
 492         GCM_ACCEL_EXIT;
 493         return (rv);
 494 }
 495 
 496 /*
 497  * This will only deal with decrypting the last block of the input that
 498  * might not be a multiple of block length.
 499  */
 500 /*ARGSUSED*/
 501 static void
 502 gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, uint8_t *data, size_t length,
 503     size_t block_size, crypto_data_t *out,
 504     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 505     void (*xor_block)(const uint8_t *, uint8_t *))
 506 {

 507         uint64_t counter;
 508         uint64_t counter_mask = ntohll(0x00000000ffffffffULL);

 509 
 510         /* padd last block and add to GHASH */
 511         bcopy(data, ctx->gcm_tmp, length);
 512         bzero(((uint8_t *)ctx->gcm_tmp) + length,
 513             sizeof (ctx->gcm_tmp) - length);
 514         GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
 515 
 516         /*
 517          * Increment counter.
 518          * Counter bits are confined to the bottom 32 bits.
 519          */
 520         counter = ntohll(ctx->gcm_cb[1] & counter_mask);
 521         counter = htonll(counter + 1);
 522         counter &= counter_mask;
 523         ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
 524 
 525         encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
 526             (uint8_t *)ctx->gcm_tmp);

 527 










 528         /* XOR with counter block */
 529         for (size_t i = 0; i < length; i++)
 530                 ((uint8_t *)ctx->gcm_tmp)[i] ^= data[i];
 531 
 532         if (out != NULL) {
 533                 (void) crypto_put_output_data((uchar_t *)ctx->gcm_tmp, out,
 534                     length);
 535                 out->cd_offset += length;
 536         } else {
 537                 bcopy(ctx->gcm_tmp, data, length);
 538         }
 539 }
 540 
 541 /* ARGSUSED */
 542 int
 543 gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
 544     crypto_data_t *out, size_t block_size,
 545     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 546     void (*copy_block)(const uint8_t *, uint8_t *),
 547     void (*xor_block)(const uint8_t *, uint8_t *),
 548     int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
 549     uint64_t *))
 550 {
 551         int rv = CRYPTO_SUCCESS;

 552 
 553         GCM_ACCEL_ENTER;
 554 
 555         /*
 556          * Previous calls accumulate data in the input buffer to make sure
 557          * we have the auth tag (the last part of the ciphertext) when we
 558          * receive a final() call.
 559          */
 560         if (ctx->gcm_last_input_fill > 0) {
 561                 /* Try to complete the input buffer */
 562                 size_t to_copy = MIN(length,
 563                     sizeof (ctx->gcm_last_input) - ctx->gcm_last_input_fill);









 564 
 565                 bcopy(data, ctx->gcm_last_input + ctx->gcm_last_input_fill,
 566                     to_copy);
 567                 data += to_copy;
 568                 ctx->gcm_last_input_fill += to_copy;
 569                 length -= to_copy;
 570 
 571                 if (ctx->gcm_last_input_fill < sizeof (ctx->gcm_last_input))
 572                         /* Not enough input data to continue */
 573                         goto out;
 574 
 575                 if (length < ctx->gcm_tag_len) {
 576                         /*
 577                          * There isn't enough data ahead to constitute a full
 578                          * auth tag, so only crunch one input block and copy
 579                          * the remainder of the input into our buffer.
 580                          */
 581                         rv = gcm_process_contiguous_blocks(ctx,
 582                             (char *)ctx->gcm_last_input, block_size, out,
 583                             block_size, B_FALSE, encrypt_block, copy_block,
 584                             xor_block, cipher_ctr);
 585                         if (rv != CRYPTO_SUCCESS)
 586                                 goto out;
 587                         ctx->gcm_last_input_fill -= block_size;
 588                         bcopy(ctx->gcm_last_input + block_size,
 589                             ctx->gcm_last_input, ctx->gcm_last_input_fill);
 590                         bcopy(data, ctx->gcm_last_input +
 591                             ctx->gcm_last_input_fill, length);
 592                         ctx->gcm_last_input_fill += length;
 593                         /* No more input left */
 594                         goto out;
 595                 }
 596                 /*
 597                  * There is enough data ahead for the auth tag, so crunch
 598                  * everything in our buffer now and empty it.
 599                  */
 600                 rv = gcm_process_contiguous_blocks(ctx,
 601                     (char *)ctx->gcm_last_input, ctx->gcm_last_input_fill,
 602                     out, block_size, B_FALSE, encrypt_block, copy_block,
 603                     xor_block, cipher_ctr);
 604                 if (rv != CRYPTO_SUCCESS)
 605                         goto out;
 606                 ctx->gcm_last_input_fill = 0;
 607         }
 608         /*
 609          * Last input buffer is empty, so what's left ahead is block-aligned.
 610          * Crunch all the blocks up until the near end, which might be our
 611          * auth tag and we must NOT decrypt.
 612          */
 613         ASSERT(ctx->gcm_last_input_fill == 0);
 614         if (length >= block_size + ctx->gcm_tag_len) {
 615                 size_t to_decrypt = (length - ctx->gcm_tag_len) &
 616                     ~(block_size - 1);
 617 
 618                 rv = gcm_process_contiguous_blocks(ctx, data, to_decrypt, out,
 619                     block_size, B_FALSE, encrypt_block, copy_block, xor_block,
 620                     cipher_ctr);
 621                 if (rv != CRYPTO_SUCCESS)
 622                         goto out;
 623                 data += to_decrypt;
 624                 length -= to_decrypt;
 625         }
 626         /*
 627          * Copy the remainder into our input buffer, it's potentially
 628          * the auth tag and a last partial block.
 629          */
 630         ASSERT(length < sizeof (ctx->gcm_last_input));
 631         bcopy(data, ctx->gcm_last_input, length);
 632         ctx->gcm_last_input_fill += length;
 633 out:
 634         GCM_ACCEL_EXIT;
 635 
 636         return (rv);
 637 }
 638 
 639 int
 640 gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
 641     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 642     void (*copy_block)(const uint8_t *, uint8_t *),
 643     void (*xor_block)(const uint8_t *, uint8_t *),
 644     int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
 645     uint64_t *))
 646 {
 647         int rv = CRYPTO_SUCCESS;







 648 
 649         /* Check there's enough data to at least compute a tag */
 650         if (ctx->gcm_last_input_fill < ctx->gcm_tag_len)
 651                 return (SET_ERROR(CRYPTO_DATA_LEN_RANGE));
 652 
 653         GCM_ACCEL_ENTER;



















 654 
 655         /* Finish any unprocessed input */
 656         if (ctx->gcm_last_input_fill > ctx->gcm_tag_len) {
 657                 size_t last_blk_len = MIN(block_size,
 658                     ctx->gcm_last_input_fill - ctx->gcm_tag_len);




 659 
 660                 /* Finish last full block */
 661                 if (last_blk_len >= block_size) {
 662                         rv = gcm_process_contiguous_blocks(ctx,
 663                             (char *)ctx->gcm_last_input, block_size, out,
 664                             block_size, B_FALSE, encrypt_block, copy_block,
 665                             xor_block, cipher_ctr);
 666                         if (rv != CRYPTO_SUCCESS)
 667                                 goto errout;
 668 
 669                         last_blk_len -= block_size;
 670                         ctx->gcm_processed_data_len += block_size;
 671                         ctx->gcm_last_input_fill -= block_size;
 672 
 673                         /* Shift what remains in the input buffer forward */
 674                         bcopy(ctx->gcm_last_input + block_size,
 675                             ctx->gcm_last_input, ctx->gcm_last_input_fill);
 676                 }
 677                 /* Finish last incomplete block before auth tag */
 678                 if (last_blk_len > 0) {
 679                         gcm_decrypt_incomplete_block(ctx, ctx->gcm_last_input,
 680                             last_blk_len, block_size, out, encrypt_block,
 681                             xor_block);
 682 
 683                         ctx->gcm_processed_data_len += last_blk_len;
 684                         ctx->gcm_last_input_fill -= last_blk_len;
 685 
 686                         /* Shift what remains in the input buffer forward */
 687                         bcopy(ctx->gcm_last_input + last_blk_len,
 688                             ctx->gcm_last_input, ctx->gcm_last_input_fill);
 689                 }
 690                 /* Now the last_input buffer holds just the auth tag */
 691         }
 692 
 693         ASSERT(ctx->gcm_last_input_fill == ctx->gcm_tag_len);
 694 
 695         ctx->gcm_len_a_len_c[1] =
 696             htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
 697         GHASH(ctx, ctx->gcm_len_a_len_c, ctx->gcm_ghash);
 698         encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
 699             (uint8_t *)ctx->gcm_J0);
 700         xor_block((uint8_t *)ctx->gcm_J0, (uint8_t *)ctx->gcm_ghash);
 701 
 702         GCM_ACCEL_EXIT;
 703 
 704         /* compare the input authentication tag with what we calculated */
 705         if (bcmp(&ctx->gcm_last_input, ctx->gcm_ghash, ctx->gcm_tag_len) != 0)
 706                 return (SET_ERROR(CRYPTO_INVALID_MAC));
 707 






 708         return (CRYPTO_SUCCESS);
 709 
 710 errout:
 711         GCM_ACCEL_EXIT;
 712         return (rv);
 713 }
 714 
 715 static int
 716 gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
 717 {
 718         size_t tag_len;
 719 
 720         /*
 721          * Check the length of the authentication tag (in bits).
 722          */
 723         tag_len = gcm_param->ulTagBits;
 724         switch (tag_len) {
 725         case 32:
 726         case 64:
 727         case 96:
 728         case 104:
 729         case 112:
 730         case 120:
 731         case 128:
 732                 break;
 733         default:
 734                 return (SET_ERROR(CRYPTO_MECHANISM_PARAM_INVALID));
 735         }
 736 
 737         if (gcm_param->ulIvLen == 0)
 738                 return (SET_ERROR(CRYPTO_MECHANISM_PARAM_INVALID));
 739 
 740         return (CRYPTO_SUCCESS);
 741 }
 742 
 743 /*ARGSUSED*/
 744 static void
 745 gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
 746     gcm_ctx_t *ctx, size_t block_size,
 747     void (*copy_block)(const uint8_t *, uint8_t *),
 748     void (*xor_block)(const uint8_t *, uint8_t *))
 749 {
 750         uint8_t *cb;
 751         ulong_t remainder = iv_len;
 752         ulong_t processed = 0;
 753         uint8_t *datap, *ghash;
 754         uint64_t len_a_len_c[2];
 755 
 756         ghash = (uint8_t *)ctx->gcm_ghash;
 757         cb = (uint8_t *)ctx->gcm_cb;
 758         if (iv_len == 12) {
 759                 bcopy(iv, cb, 12);
 760                 cb[12] = 0;
 761                 cb[13] = 0;
 762                 cb[14] = 0;
 763                 cb[15] = 1;
 764                 /* J0 will be used again in the final */
 765                 copy_block(cb, (uint8_t *)ctx->gcm_J0);
 766         } else {
 767                 /* GHASH the IV */
 768                 do {


 779                         GHASH(ctx, datap, ghash);
 780                 } while (remainder > 0);
 781 
 782                 len_a_len_c[0] = 0;
 783                 len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len));
 784                 GHASH(ctx, len_a_len_c, ctx->gcm_J0);
 785 
 786                 /* J0 will be used again in the final */
 787                 copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb);
 788         }
 789 }
 790 
 791 /*
 792  * The following function is called at encrypt or decrypt init time
 793  * for AES GCM mode.
 794  */
 795 int
 796 gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
 797     unsigned char *auth_data, size_t auth_data_len, size_t block_size,
 798     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 799     void (*copy_block)(const uint8_t *, uint8_t *),
 800     void (*xor_block)(const uint8_t *, uint8_t *))
 801 {
 802         uint8_t *ghash, *datap, *authp;
 803         size_t remainder, processed;
 804 
 805         GCM_ACCEL_ENTER;
 806 
 807         /* encrypt zero block to get subkey H */
 808         bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
 809         encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
 810             (uint8_t *)ctx->gcm_H);
 811 
 812         gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
 813             copy_block, xor_block);
 814 
 815 #ifdef  __amd64
 816         if (intel_pclmulqdq_instruction_present()) {
 817                 uint64_t H_bswap64[2] = {
 818                     ntohll(ctx->gcm_H[0]), ntohll(ctx->gcm_H[1])
 819                 };
 820 
 821                 gcm_init_clmul(H_bswap64, ctx->gcm_H_table);
 822         }
 823 #endif
 824 
 825         authp = (uint8_t *)ctx->gcm_tmp;
 826         ghash = (uint8_t *)ctx->gcm_ghash;
 827         bzero(authp, block_size);
 828         bzero(ghash, block_size);
 829 
 830         processed = 0;
 831         remainder = auth_data_len;
 832         do {
 833                 if (remainder < block_size) {
 834                         /*
 835                          * There's not a block full of data, pad rest of
 836                          * buffer with zero
 837                          */
 838                         bzero(authp, block_size);
 839                         bcopy(&(auth_data[processed]), authp, remainder);
 840                         datap = (uint8_t *)authp;
 841                         remainder = 0;
 842                 } else {
 843                         datap = (uint8_t *)(&(auth_data[processed]));
 844                         processed += block_size;
 845                         remainder -= block_size;
 846                 }
 847 
 848                 /* add auth data to the hash */
 849                 GHASH(ctx, datap, ghash);
 850 
 851         } while (remainder > 0);
 852 
 853         GCM_ACCEL_EXIT;
 854 
 855         return (CRYPTO_SUCCESS);
 856 }
 857 
 858 int
 859 gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
 860     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 861     void (*copy_block)(const uint8_t *, uint8_t *),
 862     void (*xor_block)(const uint8_t *, uint8_t *))
 863 {
 864         /*
 865          * No GHASH invocations in this function and gcm_init does its own
 866          * FPU saving, so no need to GCM_ACCEL_ENTER/GCM_ACCEL_EXIT here.
 867          */
 868         int rv;
 869         CK_AES_GCM_PARAMS *gcm_param;
 870 
 871         if (param != NULL) {
 872                 gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
 873 
 874                 if ((rv = gcm_validate_args(gcm_param)) != 0) {
 875                         return (rv);
 876                 }
 877 
 878                 gcm_ctx->gcm_tag_len = gcm_param->ulTagBits;
 879                 gcm_ctx->gcm_tag_len >>= 3;
 880                 gcm_ctx->gcm_processed_data_len = 0;
 881 
 882                 /* these values are in bits */
 883                 gcm_ctx->gcm_len_a_len_c[0]
 884                     = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen));
 885 
 886                 rv = CRYPTO_SUCCESS;
 887                 gcm_ctx->gcm_flags |= GCM_MODE;
 888         } else {
 889                 rv = CRYPTO_MECHANISM_PARAM_INVALID;
 890                 goto out;
 891         }
 892 
 893         if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
 894             gcm_param->pAAD, gcm_param->ulAADLen, block_size,
 895             encrypt_block, copy_block, xor_block) != 0) {
 896                 rv = CRYPTO_MECHANISM_PARAM_INVALID;
 897         }
 898 out:
 899         return (rv);
 900 }
 901 
 902 int
 903 gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
 904     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 905     void (*copy_block)(const uint8_t *, uint8_t *),
 906     void (*xor_block)(const uint8_t *, uint8_t *))
 907 {
 908         /*
 909          * No GHASH invocations in this function and gcm_init does its own
 910          * FPU saving, so no need to GCM_ACCEL_ENTER/GCM_ACCEL_EXIT here.
 911          */
 912         int rv;
 913         CK_AES_GMAC_PARAMS *gmac_param;
 914 
 915         if (param != NULL) {
 916                 gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param;
 917 
 918                 gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS);
 919                 gcm_ctx->gcm_processed_data_len = 0;
 920 
 921                 /* these values are in bits */
 922                 gcm_ctx->gcm_len_a_len_c[0]
 923                     = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen));
 924 
 925                 rv = CRYPTO_SUCCESS;
 926                 gcm_ctx->gcm_flags |= GMAC_MODE;
 927         } else {
 928                 rv = CRYPTO_MECHANISM_PARAM_INVALID;
 929                 goto out;
 930         }
 931 


 969         gcm_ctx->gcm_flags = GMAC_MODE;
 970         return (gcm_ctx);
 971 }
 972 
 973 void
 974 gcm_set_kmflag(gcm_ctx_t *ctx, int kmflag)
 975 {
 976         ctx->gcm_kmflag = kmflag;
 977 }
 978 
 979 
 980 #ifdef __amd64
 981 /*
 982  * Return 1 if executing on Intel with PCLMULQDQ instructions,
 983  * otherwise 0 (i.e., Intel without PCLMULQDQ or AMD64).
 984  * Cache the result, as the CPU can't change.
 985  *
 986  * Note: the userland version uses getisax().  The kernel version uses
 987  * is_x86_featureset().
 988  */
 989 static inline int
 990 intel_pclmulqdq_instruction_present(void)
 991 {
 992         static int      cached_result = -1;
 993 
 994         if (cached_result == -1) { /* first time */
 995 #ifdef _KERNEL
 996                 cached_result =
 997                     is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ);
 998 #else
 999                 uint_t          ui = 0;
1000 
1001                 (void) getisax(&ui, 1);
1002                 cached_result = (ui & AV_386_PCLMULQDQ) != 0;
1003 #endif  /* _KERNEL */
1004         }
1005 
1006         return (cached_result);
1007 }
1008 #endif  /* __amd64 */