1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2015 by Saso Kiselkov. All rights reserved.
24 */
25
26
27 #ifndef _KERNEL
28 #include <strings.h>
29 #include <limits.h>
30 #include <assert.h>
31 #include <security/cryptoki.h>
32 #endif /* _KERNEL */
33
34 #include <sys/cmn_err.h>
35 #include <sys/types.h>
36 #include <sys/kmem.h>
37 #define INLINE_CRYPTO_GET_PTRS
38 #include <modes/modes.h>
39 #include <sys/crypto/common.h>
40 #include <sys/crypto/impl.h>
41 #include <sys/byteorder.h>
42
43 #define COUNTER_MASK 0x00000000ffffffffULL
44
45 #ifdef _KERNEL
46 #include <sys/sdt.h> /* SET_ERROR */
47 #endif /* _KERNEL */
48
49 #ifdef __amd64
50
51 #ifdef _KERNEL
52 #include <sys/cpuvar.h> /* cpu_t, CPU */
53 #include <sys/x86_archext.h> /* x86_featureset, X86FSET_*, CPUID_* */
54 #include <sys/disp.h> /* kpreempt_disable(), kpreempt_enable */
55 /* Workaround for no XMM kernel thread save/restore */
56 extern void gcm_accel_save(void *savestate);
57 extern void gcm_accel_restore(void *savestate);
58
59 #if defined(lint) || defined(__lint)
60 #define GCM_ACCEL_SAVESTATE(name) uint8_t name[16 * 16 + 8]
61 #else
62 #define GCM_ACCEL_SAVESTATE(name) \
63 /* stack space for xmm0--xmm15 and cr0 (16 x 128 bits + 64 bits) */ \
64 uint8_t name[16 * 16 + 8] __attribute__((aligned(16)))
65 #endif
66
67 /*
68 * Disables kernel thread preemption and conditionally gcm_accel_save() iff
69 * Intel PCLMULQDQ support is present. Must be balanced by GCM_ACCEL_EXIT.
70 * This must be present in all externally callable GCM functions which
71 * invoke GHASH operations using FPU-accelerated implementations, or call
72 * static functions which do (such as gcm_encrypt_fastpath128()).
73 */
74 #define GCM_ACCEL_ENTER \
75 GCM_ACCEL_SAVESTATE(savestate); \
76 do { \
77 if (intel_pclmulqdq_instruction_present()) { \
78 kpreempt_disable(); \
79 gcm_accel_save(savestate); \
80 } \
81 _NOTE(CONSTCOND) \
82 } while (0)
83 #define GCM_ACCEL_EXIT \
84 do { \
85 if (intel_pclmulqdq_instruction_present()) { \
86 gcm_accel_restore(savestate); \
87 kpreempt_enable(); \
88 } \
89 _NOTE(CONSTCOND) \
90 } while (0)
91
92 #else /* _KERNEL */
93 #include <sys/auxv.h> /* getisax() */
94 #include <sys/auxv_386.h> /* AV_386_PCLMULQDQ bit */
95 #define SET_ERROR(x) (x)
96 #endif /* _KERNEL */
97
98 extern void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res);
99 extern void gcm_init_clmul(const uint64_t hash_init[2], uint8_t Htable[256]);
100 extern void gcm_ghash_clmul(uint64_t ghash[2], const uint8_t Htable[256],
101 const uint8_t *inp, size_t length);
102 static inline int intel_pclmulqdq_instruction_present(void);
103 #else /* !__amd64 */
104 #define GCM_ACCEL_ENTER
105 #define GCM_ACCEL_EXIT
106 #endif /* !__amd64 */
107
108 struct aes_block {
109 uint64_t a;
110 uint64_t b;
111 };
112
113
114 /*
115 * gcm_mul()
116 * Perform a carry-less multiplication (that is, use XOR instead of the
117 * multiply operator) on *x_in and *y and place the result in *res.
118 *
119 * Byte swap the input (*x_in and *y) and the output (*res).
120 *
121 * Note: x_in, y, and res all point to 16-byte numbers (an array of two
122 * 64-bit integers).
123 */
124 static inline void
125 gcm_mul(uint64_t *x_in, uint64_t *y, uint64_t *res)
126 {
127 #ifdef __amd64
128 if (intel_pclmulqdq_instruction_present()) {
129 /*
130 * FPU context will have been saved and kernel thread
131 * preemption disabled already.
132 */
133 gcm_mul_pclmulqdq(x_in, y, res);
134 } else
135 #endif /* __amd64 */
136 {
137 static const uint64_t R = 0xe100000000000000ULL;
138 struct aes_block z = {0, 0};
139 struct aes_block v;
140 uint64_t x;
141 int i, j;
142
143 v.a = ntohll(y[0]);
144 v.b = ntohll(y[1]);
145
146 for (j = 0; j < 2; j++) {
147 x = ntohll(x_in[j]);
148 for (i = 0; i < 64; i++, x <<= 1) {
149 if (x & 0x8000000000000000ULL) {
150 z.a ^= v.a;
151 z.b ^= v.b;
152 }
153 if (v.b & 1ULL) {
154 v.b = (v.a << 63)|(v.b >> 1);
155 v.a = (v.a >> 1) ^ R;
156 } else {
157 v.b = (v.a << 63)|(v.b >> 1);
158 v.a = v.a >> 1;
159 }
160 }
161 }
162 res[0] = htonll(z.a);
163 res[1] = htonll(z.b);
164 }
165 }
166
167 #define GHASH(c, d, t) \
168 do { \
169 xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
170 gcm_mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
171 (uint64_t *)(void *)(t)); \
172 _NOTE(CONSTCOND) \
173 } while (0)
174
175 boolean_t gcm_fastpath_enabled = B_TRUE;
176
177 static void
178 gcm_fastpath128(gcm_ctx_t *ctx, const uint8_t *data, size_t length,
179 uint8_t *out, boolean_t encrypt,
180 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
181 void (*xor_block)(const uint8_t *, uint8_t *),
182 int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
183 uint64_t *))
184 {
185 /* When decrypting, `data' holds the ciphertext we need to GHASH. */
186 if (!encrypt) {
187 #ifdef __amd64
188 if (intel_pclmulqdq_instruction_present())
189 gcm_ghash_clmul(ctx->gcm_ghash, ctx->gcm_H_table,
190 data, length);
191 else
192 #endif /* __amd64 */
193 for (size_t i = 0; i < length; i += 16)
194 GHASH(ctx, &data[i], ctx->gcm_ghash);
195 }
196
197 if (cipher_ctr != NULL) {
198 /*
199 * GCM is almost but not quite like CTR. GCM increments the
200 * counter value *before* processing the first input block,
201 * whereas CTR does so afterwards. So we need to increment
202 * the counter before calling CTR and decrement it afterwards.
203 */
204 uint64_t counter = ntohll(ctx->gcm_cb[1]);
205
206 ctx->gcm_cb[1] = htonll((counter & ~COUNTER_MASK) |
207 ((counter & COUNTER_MASK) + 1));
208 cipher_ctr(ctx->gcm_keysched, data, out, length, ctx->gcm_cb);
209 counter = ntohll(ctx->gcm_cb[1]);
210 ctx->gcm_cb[1] = htonll((counter & ~COUNTER_MASK) |
211 ((counter & COUNTER_MASK) - 1));
212 } else {
213 uint64_t counter = ntohll(ctx->gcm_cb[1]);
214
215 for (size_t i = 0; i < length; i += 16) {
216 /*LINTED(E_BAD_PTR_CAST_ALIGN)*/
217 *(uint64_t *)&out[i] = ctx->gcm_cb[0];
218 /*LINTED(E_BAD_PTR_CAST_ALIGN)*/
219 *(uint64_t *)&out[i + 8] = htonll(counter++);
220 encrypt_block(ctx->gcm_keysched, &out[i], &out[i]);
221 xor_block(&data[i], &out[i]);
222 }
223
224 ctx->gcm_cb[1] = htonll(counter);
225 }
226
227 /* When encrypting, `out' holds the ciphertext we need to GHASH. */
228 if (encrypt) {
229 #ifdef __amd64
230 if (intel_pclmulqdq_instruction_present())
231 gcm_ghash_clmul(ctx->gcm_ghash, ctx->gcm_H_table,
232 out, length);
233 else
234 #endif /* __amd64 */
235 for (size_t i = 0; i < length; i += 16)
236 GHASH(ctx, &out[i], ctx->gcm_ghash);
237
238 /* If no more data comes in, the last block is the auth tag. */
239 bcopy(&out[length - 16], ctx->gcm_tmp, 16);
240 }
241
242 ctx->gcm_processed_data_len += length;
243 }
244
245 static int
246 gcm_process_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
247 crypto_data_t *out, size_t block_size, boolean_t encrypt,
248 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
249 void (*copy_block)(const uint8_t *, uint8_t *),
250 void (*xor_block)(const uint8_t *, uint8_t *),
251 int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
252 uint64_t *))
253 {
254 size_t remainder = length;
255 size_t need;
256 uint8_t *datap = (uint8_t *)data;
257 uint8_t *blockp;
258 uint8_t *lastp;
259 void *iov_or_mp;
260 offset_t offset;
261 uint8_t *out_data_1;
262 uint8_t *out_data_2;
263 size_t out_data_1_len;
264 uint64_t counter;
265 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
266 int rv = CRYPTO_SUCCESS;
267
268 GCM_ACCEL_ENTER;
269
270 /*
271 * GCM mode fastpath requirements:
272 * - fastpath is enabled
273 * - block size is 128 bits
274 * - input is block-aligned
275 * - the counter value won't overflow
276 * - output is a single contiguous region and doesn't alias input
277 */
278 if (gcm_fastpath_enabled && block_size == 16 &&
279 ctx->gcm_remainder_len == 0 && (length & (block_size - 1)) == 0 &&
280 ntohll(ctx->gcm_cb[1] & counter_mask) <= ntohll(counter_mask) -
281 length / block_size && CRYPTO_DATA_IS_SINGLE_BLOCK(out)) {
282 gcm_fastpath128(ctx, (uint8_t *)data, length,
283 CRYPTO_DATA_FIRST_BLOCK(out), encrypt, encrypt_block,
284 xor_block, cipher_ctr);
285 out->cd_offset += length;
286 goto out;
287 }
288
289 if (length + ctx->gcm_remainder_len < block_size) {
290 /* accumulate bytes here and return */
291 bcopy(datap,
292 (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
293 length);
294 ctx->gcm_remainder_len += length;
295 ctx->gcm_copy_to = datap;
296 goto out;
297 }
298
299 lastp = (uint8_t *)ctx->gcm_cb;
300 if (out != NULL)
301 crypto_init_ptrs(out, &iov_or_mp, &offset);
302
303 do {
304 /* Unprocessed data from last call. */
305 if (ctx->gcm_remainder_len > 0) {
306 need = block_size - ctx->gcm_remainder_len;
307
308 if (need > remainder) {
309 rv = SET_ERROR(CRYPTO_DATA_LEN_RANGE);
310 goto out;
311 }
312
313 bcopy(datap, &((uint8_t *)ctx->gcm_remainder)
314 [ctx->gcm_remainder_len], need);
315
316 blockp = (uint8_t *)ctx->gcm_remainder;
317 } else {
318 blockp = datap;
319 }
320
321 /* add ciphertext to the hash */
322 if (!encrypt)
323 GHASH(ctx, blockp, ctx->gcm_ghash);
324
325 /*
326 * Increment counter. Counter bits are confined
327 * to the bottom 32 bits of the counter block.
328 */
329 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
330 counter = htonll(counter + 1);
331 counter &= counter_mask;
332 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
333
334 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
335 (uint8_t *)ctx->gcm_tmp);
336 xor_block(blockp, (uint8_t *)ctx->gcm_tmp);
337
338 lastp = (uint8_t *)ctx->gcm_tmp;
339
340 ctx->gcm_processed_data_len += block_size;
341
342 if (out == NULL) {
343 if (ctx->gcm_remainder_len > 0) {
344 bcopy(blockp, ctx->gcm_copy_to,
345 ctx->gcm_remainder_len);
346 bcopy(blockp + ctx->gcm_remainder_len, datap,
347 need);
348 }
349 } else {
350 crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
351 &out_data_1_len, &out_data_2, block_size);
352
353 /* copy block to where it belongs */
354 if (out_data_1_len == block_size) {
355 copy_block(lastp, out_data_1);
356 } else {
357 bcopy(lastp, out_data_1, out_data_1_len);
358 if (out_data_2 != NULL) {
359 bcopy(lastp + out_data_1_len,
360 out_data_2,
361 block_size - out_data_1_len);
362 }
363 }
364 /* update offset */
365 out->cd_offset += block_size;
366 }
367
368 /* add ciphertext to the hash */
369 if (encrypt)
370 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
371
372 /* Update pointer to next block of data to be processed. */
373 if (ctx->gcm_remainder_len != 0) {
374 datap += need;
375 ctx->gcm_remainder_len = 0;
376 } else {
377 datap += block_size;
378 }
379
380 remainder = (size_t)&data[length] - (size_t)datap;
381
382 /* Incomplete last block. */
383 if (remainder > 0 && remainder < block_size) {
384 bcopy(datap, ctx->gcm_remainder, remainder);
385 ctx->gcm_remainder_len = remainder;
386 ctx->gcm_copy_to = datap;
387 goto out;
388 }
389 ctx->gcm_copy_to = NULL;
390
391 } while (remainder > 0);
392 out:
393 GCM_ACCEL_EXIT;
394
395 return (rv);
396 }
397
398
399 /*
400 * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode
401 * is done in another function.
402 */
403 /*ARGSUSED*/
404 int
405 gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
406 crypto_data_t *out, size_t block_size,
407 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
408 void (*copy_block)(const uint8_t *, uint8_t *),
409 void (*xor_block)(const uint8_t *, uint8_t *),
410 int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
411 uint64_t *))
412 {
413 return (gcm_process_contiguous_blocks(ctx, data, length, out,
414 block_size, B_TRUE, encrypt_block, copy_block, xor_block,
415 cipher_ctr));
416 }
417
418 /* ARGSUSED */
419 int
420 gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
421 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
422 void (*copy_block)(const uint8_t *, uint8_t *),
423 void (*xor_block)(const uint8_t *, uint8_t *))
424 {
425 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
426 uint8_t *ghash, *macp;
427 int i, rv;
428
429 GCM_ACCEL_ENTER;
430
431 if (out->cd_length < (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
432 rv = CRYPTO_DATA_LEN_RANGE;
433 goto out;
434 }
435
436 ghash = (uint8_t *)ctx->gcm_ghash;
437
438 if (ctx->gcm_remainder_len > 0) {
439 uint64_t counter;
440 uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp;
441
442 /*
443 * Here is where we deal with data that is not a
444 * multiple of the block size.
445 */
446
447 /*
448 * Increment counter.
449 */
450 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
451 counter = htonll(counter + 1);
452 counter &= counter_mask;
453 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
454
455 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
456 (uint8_t *)ctx->gcm_tmp);
457
458 macp = (uint8_t *)ctx->gcm_remainder;
459 bzero(macp + ctx->gcm_remainder_len,
460 block_size - ctx->gcm_remainder_len);
461
462 /* XOR with counter block */
463 for (i = 0; i < ctx->gcm_remainder_len; i++) {
464 macp[i] ^= tmpp[i];
465 }
466
467 /* add ciphertext to the hash */
468 GHASH(ctx, macp, ghash);
469
470 ctx->gcm_processed_data_len += ctx->gcm_remainder_len;
471 }
472
473 ctx->gcm_len_a_len_c[1] =
474 htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
475 GHASH(ctx, ctx->gcm_len_a_len_c, ghash);
476 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
477 (uint8_t *)ctx->gcm_J0);
478 xor_block((uint8_t *)ctx->gcm_J0, ghash);
479
480 if (ctx->gcm_remainder_len > 0) {
481 rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
482 if (rv != CRYPTO_SUCCESS)
483 goto out;
484 }
485 out->cd_offset += ctx->gcm_remainder_len;
486 ctx->gcm_remainder_len = 0;
487 rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
488 if (rv != CRYPTO_SUCCESS)
489 goto out;
490 out->cd_offset += ctx->gcm_tag_len;
491 out:
492 GCM_ACCEL_EXIT;
493 return (rv);
494 }
495
496 /*
497 * This will only deal with decrypting the last block of the input that
498 * might not be a multiple of block length.
499 */
500 /*ARGSUSED*/
501 static void
502 gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, uint8_t *data, size_t length,
503 size_t block_size, crypto_data_t *out,
504 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
505 void (*xor_block)(const uint8_t *, uint8_t *))
506 {
507 uint64_t counter;
508 uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
509
510 /* padd last block and add to GHASH */
511 bcopy(data, ctx->gcm_tmp, length);
512 bzero(((uint8_t *)ctx->gcm_tmp) + length,
513 sizeof (ctx->gcm_tmp) - length);
514 GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
515
516 /*
517 * Increment counter.
518 * Counter bits are confined to the bottom 32 bits.
519 */
520 counter = ntohll(ctx->gcm_cb[1] & counter_mask);
521 counter = htonll(counter + 1);
522 counter &= counter_mask;
523 ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
524
525 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
526 (uint8_t *)ctx->gcm_tmp);
527
528 /* XOR with counter block */
529 for (size_t i = 0; i < length; i++)
530 ((uint8_t *)ctx->gcm_tmp)[i] ^= data[i];
531
532 if (out != NULL) {
533 (void) crypto_put_output_data((uchar_t *)ctx->gcm_tmp, out,
534 length);
535 out->cd_offset += length;
536 } else {
537 bcopy(ctx->gcm_tmp, data, length);
538 }
539 }
540
541 /* ARGSUSED */
542 int
543 gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
544 crypto_data_t *out, size_t block_size,
545 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
546 void (*copy_block)(const uint8_t *, uint8_t *),
547 void (*xor_block)(const uint8_t *, uint8_t *),
548 int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
549 uint64_t *))
550 {
551 int rv = CRYPTO_SUCCESS;
552
553 GCM_ACCEL_ENTER;
554
555 /*
556 * Previous calls accumulate data in the input buffer to make sure
557 * we have the auth tag (the last part of the ciphertext) when we
558 * receive a final() call.
559 */
560 if (ctx->gcm_last_input_fill > 0) {
561 /* Try to complete the input buffer */
562 size_t to_copy = MIN(length,
563 sizeof (ctx->gcm_last_input) - ctx->gcm_last_input_fill);
564
565 bcopy(data, ctx->gcm_last_input + ctx->gcm_last_input_fill,
566 to_copy);
567 data += to_copy;
568 ctx->gcm_last_input_fill += to_copy;
569 length -= to_copy;
570
571 if (ctx->gcm_last_input_fill < sizeof (ctx->gcm_last_input))
572 /* Not enough input data to continue */
573 goto out;
574
575 if (length < ctx->gcm_tag_len) {
576 /*
577 * There isn't enough data ahead to constitute a full
578 * auth tag, so only crunch one input block and copy
579 * the remainder of the input into our buffer.
580 */
581 rv = gcm_process_contiguous_blocks(ctx,
582 (char *)ctx->gcm_last_input, block_size, out,
583 block_size, B_FALSE, encrypt_block, copy_block,
584 xor_block, cipher_ctr);
585 if (rv != CRYPTO_SUCCESS)
586 goto out;
587 ctx->gcm_last_input_fill -= block_size;
588 bcopy(ctx->gcm_last_input + block_size,
589 ctx->gcm_last_input, ctx->gcm_last_input_fill);
590 bcopy(data, ctx->gcm_last_input +
591 ctx->gcm_last_input_fill, length);
592 ctx->gcm_last_input_fill += length;
593 /* No more input left */
594 goto out;
595 }
596 /*
597 * There is enough data ahead for the auth tag, so crunch
598 * everything in our buffer now and empty it.
599 */
600 rv = gcm_process_contiguous_blocks(ctx,
601 (char *)ctx->gcm_last_input, ctx->gcm_last_input_fill,
602 out, block_size, B_FALSE, encrypt_block, copy_block,
603 xor_block, cipher_ctr);
604 if (rv != CRYPTO_SUCCESS)
605 goto out;
606 ctx->gcm_last_input_fill = 0;
607 }
608 /*
609 * Last input buffer is empty, so what's left ahead is block-aligned.
610 * Crunch all the blocks up until the near end, which might be our
611 * auth tag and we must NOT decrypt.
612 */
613 ASSERT(ctx->gcm_last_input_fill == 0);
614 if (length >= block_size + ctx->gcm_tag_len) {
615 size_t to_decrypt = (length - ctx->gcm_tag_len) &
616 ~(block_size - 1);
617
618 rv = gcm_process_contiguous_blocks(ctx, data, to_decrypt, out,
619 block_size, B_FALSE, encrypt_block, copy_block, xor_block,
620 cipher_ctr);
621 if (rv != CRYPTO_SUCCESS)
622 goto out;
623 data += to_decrypt;
624 length -= to_decrypt;
625 }
626 /*
627 * Copy the remainder into our input buffer, it's potentially
628 * the auth tag and a last partial block.
629 */
630 ASSERT(length < sizeof (ctx->gcm_last_input));
631 bcopy(data, ctx->gcm_last_input, length);
632 ctx->gcm_last_input_fill += length;
633 out:
634 GCM_ACCEL_EXIT;
635
636 return (rv);
637 }
638
639 int
640 gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
641 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
642 void (*copy_block)(const uint8_t *, uint8_t *),
643 void (*xor_block)(const uint8_t *, uint8_t *),
644 int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
645 uint64_t *))
646 {
647 int rv = CRYPTO_SUCCESS;
648
649 /* Check there's enough data to at least compute a tag */
650 if (ctx->gcm_last_input_fill < ctx->gcm_tag_len)
651 return (SET_ERROR(CRYPTO_DATA_LEN_RANGE));
652
653 GCM_ACCEL_ENTER;
654
655 /* Finish any unprocessed input */
656 if (ctx->gcm_last_input_fill > ctx->gcm_tag_len) {
657 size_t last_blk_len = MIN(block_size,
658 ctx->gcm_last_input_fill - ctx->gcm_tag_len);
659
660 /* Finish last full block */
661 if (last_blk_len >= block_size) {
662 rv = gcm_process_contiguous_blocks(ctx,
663 (char *)ctx->gcm_last_input, block_size, out,
664 block_size, B_FALSE, encrypt_block, copy_block,
665 xor_block, cipher_ctr);
666 if (rv != CRYPTO_SUCCESS)
667 goto errout;
668
669 last_blk_len -= block_size;
670 ctx->gcm_processed_data_len += block_size;
671 ctx->gcm_last_input_fill -= block_size;
672
673 /* Shift what remains in the input buffer forward */
674 bcopy(ctx->gcm_last_input + block_size,
675 ctx->gcm_last_input, ctx->gcm_last_input_fill);
676 }
677 /* Finish last incomplete block before auth tag */
678 if (last_blk_len > 0) {
679 gcm_decrypt_incomplete_block(ctx, ctx->gcm_last_input,
680 last_blk_len, block_size, out, encrypt_block,
681 xor_block);
682
683 ctx->gcm_processed_data_len += last_blk_len;
684 ctx->gcm_last_input_fill -= last_blk_len;
685
686 /* Shift what remains in the input buffer forward */
687 bcopy(ctx->gcm_last_input + last_blk_len,
688 ctx->gcm_last_input, ctx->gcm_last_input_fill);
689 }
690 /* Now the last_input buffer holds just the auth tag */
691 }
692
693 ASSERT(ctx->gcm_last_input_fill == ctx->gcm_tag_len);
694
695 ctx->gcm_len_a_len_c[1] =
696 htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
697 GHASH(ctx, ctx->gcm_len_a_len_c, ctx->gcm_ghash);
698 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
699 (uint8_t *)ctx->gcm_J0);
700 xor_block((uint8_t *)ctx->gcm_J0, (uint8_t *)ctx->gcm_ghash);
701
702 GCM_ACCEL_EXIT;
703
704 /* compare the input authentication tag with what we calculated */
705 if (bcmp(&ctx->gcm_last_input, ctx->gcm_ghash, ctx->gcm_tag_len) != 0)
706 return (SET_ERROR(CRYPTO_INVALID_MAC));
707
708 return (CRYPTO_SUCCESS);
709
710 errout:
711 GCM_ACCEL_EXIT;
712 return (rv);
713 }
714
715 static int
716 gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
717 {
718 size_t tag_len;
719
720 /*
721 * Check the length of the authentication tag (in bits).
722 */
723 tag_len = gcm_param->ulTagBits;
724 switch (tag_len) {
725 case 32:
726 case 64:
727 case 96:
728 case 104:
729 case 112:
730 case 120:
731 case 128:
732 break;
733 default:
734 return (SET_ERROR(CRYPTO_MECHANISM_PARAM_INVALID));
735 }
736
737 if (gcm_param->ulIvLen == 0)
738 return (SET_ERROR(CRYPTO_MECHANISM_PARAM_INVALID));
739
740 return (CRYPTO_SUCCESS);
741 }
742
743 /*ARGSUSED*/
744 static void
745 gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
746 gcm_ctx_t *ctx, size_t block_size,
747 void (*copy_block)(const uint8_t *, uint8_t *),
748 void (*xor_block)(const uint8_t *, uint8_t *))
749 {
750 uint8_t *cb;
751 ulong_t remainder = iv_len;
752 ulong_t processed = 0;
753 uint8_t *datap, *ghash;
754 uint64_t len_a_len_c[2];
755
756 ghash = (uint8_t *)ctx->gcm_ghash;
757 cb = (uint8_t *)ctx->gcm_cb;
758 if (iv_len == 12) {
759 bcopy(iv, cb, 12);
760 cb[12] = 0;
761 cb[13] = 0;
762 cb[14] = 0;
763 cb[15] = 1;
764 /* J0 will be used again in the final */
765 copy_block(cb, (uint8_t *)ctx->gcm_J0);
766 } else {
767 /* GHASH the IV */
768 do {
769 if (remainder < block_size) {
770 bzero(cb, block_size);
771 bcopy(&(iv[processed]), cb, remainder);
772 datap = (uint8_t *)cb;
773 remainder = 0;
774 } else {
775 datap = (uint8_t *)(&(iv[processed]));
776 processed += block_size;
777 remainder -= block_size;
778 }
779 GHASH(ctx, datap, ghash);
780 } while (remainder > 0);
781
782 len_a_len_c[0] = 0;
783 len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len));
784 GHASH(ctx, len_a_len_c, ctx->gcm_J0);
785
786 /* J0 will be used again in the final */
787 copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb);
788 }
789 }
790
791 /*
792 * The following function is called at encrypt or decrypt init time
793 * for AES GCM mode.
794 */
795 int
796 gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
797 unsigned char *auth_data, size_t auth_data_len, size_t block_size,
798 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
799 void (*copy_block)(const uint8_t *, uint8_t *),
800 void (*xor_block)(const uint8_t *, uint8_t *))
801 {
802 uint8_t *ghash, *datap, *authp;
803 size_t remainder, processed;
804
805 GCM_ACCEL_ENTER;
806
807 /* encrypt zero block to get subkey H */
808 bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
809 encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
810 (uint8_t *)ctx->gcm_H);
811
812 gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
813 copy_block, xor_block);
814
815 #ifdef __amd64
816 if (intel_pclmulqdq_instruction_present()) {
817 uint64_t H_bswap64[2] = {
818 ntohll(ctx->gcm_H[0]), ntohll(ctx->gcm_H[1])
819 };
820
821 gcm_init_clmul(H_bswap64, ctx->gcm_H_table);
822 }
823 #endif
824
825 authp = (uint8_t *)ctx->gcm_tmp;
826 ghash = (uint8_t *)ctx->gcm_ghash;
827 bzero(authp, block_size);
828 bzero(ghash, block_size);
829
830 processed = 0;
831 remainder = auth_data_len;
832 do {
833 if (remainder < block_size) {
834 /*
835 * There's not a block full of data, pad rest of
836 * buffer with zero
837 */
838 bzero(authp, block_size);
839 bcopy(&(auth_data[processed]), authp, remainder);
840 datap = (uint8_t *)authp;
841 remainder = 0;
842 } else {
843 datap = (uint8_t *)(&(auth_data[processed]));
844 processed += block_size;
845 remainder -= block_size;
846 }
847
848 /* add auth data to the hash */
849 GHASH(ctx, datap, ghash);
850
851 } while (remainder > 0);
852
853 GCM_ACCEL_EXIT;
854
855 return (CRYPTO_SUCCESS);
856 }
857
858 int
859 gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
860 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
861 void (*copy_block)(const uint8_t *, uint8_t *),
862 void (*xor_block)(const uint8_t *, uint8_t *))
863 {
864 /*
865 * No GHASH invocations in this function and gcm_init does its own
866 * FPU saving, so no need to GCM_ACCEL_ENTER/GCM_ACCEL_EXIT here.
867 */
868 int rv;
869 CK_AES_GCM_PARAMS *gcm_param;
870
871 if (param != NULL) {
872 gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
873
874 if ((rv = gcm_validate_args(gcm_param)) != 0) {
875 return (rv);
876 }
877
878 gcm_ctx->gcm_tag_len = gcm_param->ulTagBits;
879 gcm_ctx->gcm_tag_len >>= 3;
880 gcm_ctx->gcm_processed_data_len = 0;
881
882 /* these values are in bits */
883 gcm_ctx->gcm_len_a_len_c[0]
884 = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen));
885
886 rv = CRYPTO_SUCCESS;
887 gcm_ctx->gcm_flags |= GCM_MODE;
888 } else {
889 rv = CRYPTO_MECHANISM_PARAM_INVALID;
890 goto out;
891 }
892
893 if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
894 gcm_param->pAAD, gcm_param->ulAADLen, block_size,
895 encrypt_block, copy_block, xor_block) != 0) {
896 rv = CRYPTO_MECHANISM_PARAM_INVALID;
897 }
898 out:
899 return (rv);
900 }
901
902 int
903 gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
904 int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
905 void (*copy_block)(const uint8_t *, uint8_t *),
906 void (*xor_block)(const uint8_t *, uint8_t *))
907 {
908 /*
909 * No GHASH invocations in this function and gcm_init does its own
910 * FPU saving, so no need to GCM_ACCEL_ENTER/GCM_ACCEL_EXIT here.
911 */
912 int rv;
913 CK_AES_GMAC_PARAMS *gmac_param;
914
915 if (param != NULL) {
916 gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param;
917
918 gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS);
919 gcm_ctx->gcm_processed_data_len = 0;
920
921 /* these values are in bits */
922 gcm_ctx->gcm_len_a_len_c[0]
923 = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen));
924
925 rv = CRYPTO_SUCCESS;
926 gcm_ctx->gcm_flags |= GMAC_MODE;
927 } else {
928 rv = CRYPTO_MECHANISM_PARAM_INVALID;
929 goto out;
930 }
931
932 if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
933 gmac_param->pAAD, gmac_param->ulAADLen, block_size,
934 encrypt_block, copy_block, xor_block) != 0) {
935 rv = CRYPTO_MECHANISM_PARAM_INVALID;
936 }
937 out:
938 return (rv);
939 }
940
941 void *
942 gcm_alloc_ctx(int kmflag)
943 {
944 gcm_ctx_t *gcm_ctx;
945
946 #ifdef _KERNEL
947 if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
948 #else
949 if ((gcm_ctx = calloc(1, sizeof (gcm_ctx_t))) == NULL)
950 #endif
951 return (NULL);
952
953 gcm_ctx->gcm_flags = GCM_MODE;
954 return (gcm_ctx);
955 }
956
957 void *
958 gmac_alloc_ctx(int kmflag)
959 {
960 gcm_ctx_t *gcm_ctx;
961
962 #ifdef _KERNEL
963 if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
964 #else
965 if ((gcm_ctx = calloc(1, sizeof (gcm_ctx_t))) == NULL)
966 #endif
967 return (NULL);
968
969 gcm_ctx->gcm_flags = GMAC_MODE;
970 return (gcm_ctx);
971 }
972
973 void
974 gcm_set_kmflag(gcm_ctx_t *ctx, int kmflag)
975 {
976 ctx->gcm_kmflag = kmflag;
977 }
978
979
980 #ifdef __amd64
981 /*
982 * Return 1 if executing on Intel with PCLMULQDQ instructions,
983 * otherwise 0 (i.e., Intel without PCLMULQDQ or AMD64).
984 * Cache the result, as the CPU can't change.
985 *
986 * Note: the userland version uses getisax(). The kernel version uses
987 * is_x86_featureset().
988 */
989 static inline int
990 intel_pclmulqdq_instruction_present(void)
991 {
992 static int cached_result = -1;
993
994 if (cached_result == -1) { /* first time */
995 #ifdef _KERNEL
996 cached_result =
997 is_x86_feature(x86_featureset, X86FSET_PCLMULQDQ);
998 #else
999 uint_t ui = 0;
1000
1001 (void) getisax(&ui, 1);
1002 cached_result = (ui & AV_386_PCLMULQDQ) != 0;
1003 #endif /* _KERNEL */
1004 }
1005
1006 return (cached_result);
1007 }
1008 #endif /* __amd64 */