Print this page
4896 Performance improvements for KCF AES modes
*** 18,27 ****
--- 18,28 ----
*
* CDDL HEADER END
*/
/*
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ * Copyright 2015 by Saso Kiselkov. All rights reserved.
*/
#ifndef _KERNEL
#include <strings.h>
*** 28,65 ****
#include <limits.h>
#include <assert.h>
#include <security/cryptoki.h>
#endif /* _KERNEL */
!
#include <sys/types.h>
#include <sys/kmem.h>
#include <modes/modes.h>
#include <sys/crypto/common.h>
#include <sys/crypto/impl.h>
#include <sys/byteorder.h>
#ifdef __amd64
#ifdef _KERNEL
#include <sys/cpuvar.h> /* cpu_t, CPU */
#include <sys/x86_archext.h> /* x86_featureset, X86FSET_*, CPUID_* */
#include <sys/disp.h> /* kpreempt_disable(), kpreempt_enable */
/* Workaround for no XMM kernel thread save/restore */
! #define KPREEMPT_DISABLE kpreempt_disable()
! #define KPREEMPT_ENABLE kpreempt_enable()
#else
#include <sys/auxv.h> /* getisax() */
#include <sys/auxv_386.h> /* AV_386_PCLMULQDQ bit */
! #define KPREEMPT_DISABLE
! #define KPREEMPT_ENABLE
#endif /* _KERNEL */
extern void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res);
! static int intel_pclmulqdq_instruction_present(void);
! #endif /* __amd64 */
struct aes_block {
uint64_t a;
uint64_t b;
};
--- 29,111 ----
#include <limits.h>
#include <assert.h>
#include <security/cryptoki.h>
#endif /* _KERNEL */
! #include <sys/cmn_err.h>
#include <sys/types.h>
#include <sys/kmem.h>
+ #define INLINE_CRYPTO_GET_PTRS
#include <modes/modes.h>
#include <sys/crypto/common.h>
#include <sys/crypto/impl.h>
#include <sys/byteorder.h>
+ #define COUNTER_MASK 0x00000000ffffffffULL
+
+ #ifdef _KERNEL
+ #include <sys/sdt.h> /* SET_ERROR */
+ #endif /* _KERNEL */
+
#ifdef __amd64
#ifdef _KERNEL
#include <sys/cpuvar.h> /* cpu_t, CPU */
#include <sys/x86_archext.h> /* x86_featureset, X86FSET_*, CPUID_* */
#include <sys/disp.h> /* kpreempt_disable(), kpreempt_enable */
/* Workaround for no XMM kernel thread save/restore */
! extern void gcm_accel_save(void *savestate);
! extern void gcm_accel_restore(void *savestate);
+ #if defined(lint) || defined(__lint)
+ #define GCM_ACCEL_SAVESTATE(name) uint8_t name[16 * 16 + 8]
#else
+ #define GCM_ACCEL_SAVESTATE(name) \
+ /* stack space for xmm0--xmm15 and cr0 (16 x 128 bits + 64 bits) */ \
+ uint8_t name[16 * 16 + 8] __attribute__((aligned(16)))
+ #endif
+
+ /*
+ * Disables kernel thread preemption and conditionally gcm_accel_save() iff
+ * Intel PCLMULQDQ support is present. Must be balanced by GCM_ACCEL_EXIT.
+ * This must be present in all externally callable GCM functions which
+ * invoke GHASH operations using FPU-accelerated implementations, or call
+ * static functions which do (such as gcm_encrypt_fastpath128()).
+ */
+ #define GCM_ACCEL_ENTER \
+ GCM_ACCEL_SAVESTATE(savestate); \
+ do { \
+ if (intel_pclmulqdq_instruction_present()) { \
+ kpreempt_disable(); \
+ gcm_accel_save(savestate); \
+ } \
+ _NOTE(CONSTCOND) \
+ } while (0)
+ #define GCM_ACCEL_EXIT \
+ do { \
+ if (intel_pclmulqdq_instruction_present()) { \
+ gcm_accel_restore(savestate); \
+ kpreempt_enable(); \
+ } \
+ _NOTE(CONSTCOND) \
+ } while (0)
+
+ #else /* _KERNEL */
#include <sys/auxv.h> /* getisax() */
#include <sys/auxv_386.h> /* AV_386_PCLMULQDQ bit */
! #define SET_ERROR(x) (x)
#endif /* _KERNEL */
extern void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res);
! extern void gcm_init_clmul(const uint64_t hash_init[2], uint8_t Htable[256]);
! extern void gcm_ghash_clmul(uint64_t ghash[2], const uint8_t Htable[256],
! const uint8_t *inp, size_t length);
! static inline int intel_pclmulqdq_instruction_present(void);
! #else /* !__amd64 */
! #define GCM_ACCEL_ENTER
! #define GCM_ACCEL_EXIT
! #endif /* !__amd64 */
struct aes_block {
uint64_t a;
uint64_t b;
};
*** 73,90 ****
* Byte swap the input (*x_in and *y) and the output (*res).
*
* Note: x_in, y, and res all point to 16-byte numbers (an array of two
* 64-bit integers).
*/
! void
gcm_mul(uint64_t *x_in, uint64_t *y, uint64_t *res)
{
#ifdef __amd64
if (intel_pclmulqdq_instruction_present()) {
! KPREEMPT_DISABLE;
gcm_mul_pclmulqdq(x_in, y, res);
- KPREEMPT_ENABLE;
} else
#endif /* __amd64 */
{
static const uint64_t R = 0xe100000000000000ULL;
struct aes_block z = {0, 0};
--- 119,138 ----
* Byte swap the input (*x_in and *y) and the output (*res).
*
* Note: x_in, y, and res all point to 16-byte numbers (an array of two
* 64-bit integers).
*/
! static inline void
gcm_mul(uint64_t *x_in, uint64_t *y, uint64_t *res)
{
#ifdef __amd64
if (intel_pclmulqdq_instruction_present()) {
! /*
! * FPU context will have been saved and kernel thread
! * preemption disabled already.
! */
gcm_mul_pclmulqdq(x_in, y, res);
} else
#endif /* __amd64 */
{
static const uint64_t R = 0xe100000000000000ULL;
struct aes_block z = {0, 0};
*** 114,140 ****
res[0] = htonll(z.a);
res[1] = htonll(z.b);
}
}
-
#define GHASH(c, d, t) \
xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
gcm_mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
! (uint64_t *)(void *)(t));
! /*
! * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode
! * is done in another function.
*/
! int
! gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
! crypto_data_t *out, size_t block_size,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
! void (*copy_block)(uint8_t *, uint8_t *),
! void (*xor_block)(uint8_t *, uint8_t *))
{
size_t remainder = length;
size_t need;
uint8_t *datap = (uint8_t *)data;
uint8_t *blockp;
--- 162,257 ----
res[0] = htonll(z.a);
res[1] = htonll(z.b);
}
}
#define GHASH(c, d, t) \
+ do { \
xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
gcm_mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
! (uint64_t *)(void *)(t)); \
! _NOTE(CONSTCOND) \
! } while (0)
+ boolean_t gcm_fastpath_enabled = B_TRUE;
! static void
! gcm_fastpath128(gcm_ctx_t *ctx, const uint8_t *data, size_t length,
! uint8_t *out, boolean_t encrypt,
! int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
! void (*xor_block)(const uint8_t *, uint8_t *),
! int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
! uint64_t *))
! {
! /* When decrypting, `data' holds the ciphertext we need to GHASH. */
! if (!encrypt) {
! #ifdef __amd64
! if (intel_pclmulqdq_instruction_present())
! gcm_ghash_clmul(ctx->gcm_ghash, ctx->gcm_H_table,
! data, length);
! else
! #endif /* __amd64 */
! for (size_t i = 0; i < length; i += 16)
! GHASH(ctx, &data[i], ctx->gcm_ghash);
! }
!
! if (cipher_ctr != NULL) {
! /*
! * GCM is almost but not quite like CTR. GCM increments the
! * counter value *before* processing the first input block,
! * whereas CTR does so afterwards. So we need to increment
! * the counter before calling CTR and decrement it afterwards.
*/
! uint64_t counter = ntohll(ctx->gcm_cb[1]);
!
! ctx->gcm_cb[1] = htonll((counter & ~COUNTER_MASK) |
! ((counter & COUNTER_MASK) + 1));
! cipher_ctr(ctx->gcm_keysched, data, out, length, ctx->gcm_cb);
! counter = ntohll(ctx->gcm_cb[1]);
! ctx->gcm_cb[1] = htonll((counter & ~COUNTER_MASK) |
! ((counter & COUNTER_MASK) - 1));
! } else {
! uint64_t counter = ntohll(ctx->gcm_cb[1]);
!
! for (size_t i = 0; i < length; i += 16) {
! /*LINTED(E_BAD_PTR_CAST_ALIGN)*/
! *(uint64_t *)&out[i] = ctx->gcm_cb[0];
! /*LINTED(E_BAD_PTR_CAST_ALIGN)*/
! *(uint64_t *)&out[i + 8] = htonll(counter++);
! encrypt_block(ctx->gcm_keysched, &out[i], &out[i]);
! xor_block(&data[i], &out[i]);
! }
!
! ctx->gcm_cb[1] = htonll(counter);
! }
!
! /* When encrypting, `out' holds the ciphertext we need to GHASH. */
! if (encrypt) {
! #ifdef __amd64
! if (intel_pclmulqdq_instruction_present())
! gcm_ghash_clmul(ctx->gcm_ghash, ctx->gcm_H_table,
! out, length);
! else
! #endif /* __amd64 */
! for (size_t i = 0; i < length; i += 16)
! GHASH(ctx, &out[i], ctx->gcm_ghash);
!
! /* If no more data comes in, the last block is the auth tag. */
! bcopy(&out[length - 16], ctx->gcm_tmp, 16);
! }
!
! ctx->gcm_processed_data_len += length;
! }
!
! static int
! gcm_process_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
! crypto_data_t *out, size_t block_size, boolean_t encrypt,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
! void (*copy_block)(const uint8_t *, uint8_t *),
! void (*xor_block)(const uint8_t *, uint8_t *),
! int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
! uint64_t *))
{
size_t remainder = length;
size_t need;
uint8_t *datap = (uint8_t *)data;
uint8_t *blockp;
*** 144,162 ****
uint8_t *out_data_1;
uint8_t *out_data_2;
size_t out_data_1_len;
uint64_t counter;
uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
if (length + ctx->gcm_remainder_len < block_size) {
/* accumulate bytes here and return */
bcopy(datap,
(uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
length);
ctx->gcm_remainder_len += length;
ctx->gcm_copy_to = datap;
! return (CRYPTO_SUCCESS);
}
lastp = (uint8_t *)ctx->gcm_cb;
if (out != NULL)
crypto_init_ptrs(out, &iov_or_mp, &offset);
--- 261,301 ----
uint8_t *out_data_1;
uint8_t *out_data_2;
size_t out_data_1_len;
uint64_t counter;
uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
+ int rv = CRYPTO_SUCCESS;
+ GCM_ACCEL_ENTER;
+
+ /*
+ * GCM mode fastpath requirements:
+ * - fastpath is enabled
+ * - block size is 128 bits
+ * - input is block-aligned
+ * - the counter value won't overflow
+ * - output is a single contiguous region and doesn't alias input
+ */
+ if (gcm_fastpath_enabled && block_size == 16 &&
+ ctx->gcm_remainder_len == 0 && (length & (block_size - 1)) == 0 &&
+ ntohll(ctx->gcm_cb[1] & counter_mask) <= ntohll(counter_mask) -
+ length / block_size && CRYPTO_DATA_IS_SINGLE_BLOCK(out)) {
+ gcm_fastpath128(ctx, (uint8_t *)data, length,
+ CRYPTO_DATA_FIRST_BLOCK(out), encrypt, encrypt_block,
+ xor_block, cipher_ctr);
+ out->cd_offset += length;
+ goto out;
+ }
+
if (length + ctx->gcm_remainder_len < block_size) {
/* accumulate bytes here and return */
bcopy(datap,
(uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
length);
ctx->gcm_remainder_len += length;
ctx->gcm_copy_to = datap;
! goto out;
}
lastp = (uint8_t *)ctx->gcm_cb;
if (out != NULL)
crypto_init_ptrs(out, &iov_or_mp, &offset);
*** 164,184 ****
do {
/* Unprocessed data from last call. */
if (ctx->gcm_remainder_len > 0) {
need = block_size - ctx->gcm_remainder_len;
! if (need > remainder)
! return (CRYPTO_DATA_LEN_RANGE);
bcopy(datap, &((uint8_t *)ctx->gcm_remainder)
[ctx->gcm_remainder_len], need);
blockp = (uint8_t *)ctx->gcm_remainder;
} else {
blockp = datap;
}
/*
* Increment counter. Counter bits are confined
* to the bottom 32 bits of the counter block.
*/
counter = ntohll(ctx->gcm_cb[1] & counter_mask);
--- 303,329 ----
do {
/* Unprocessed data from last call. */
if (ctx->gcm_remainder_len > 0) {
need = block_size - ctx->gcm_remainder_len;
! if (need > remainder) {
! rv = SET_ERROR(CRYPTO_DATA_LEN_RANGE);
! goto out;
! }
bcopy(datap, &((uint8_t *)ctx->gcm_remainder)
[ctx->gcm_remainder_len], need);
blockp = (uint8_t *)ctx->gcm_remainder;
} else {
blockp = datap;
}
+ /* add ciphertext to the hash */
+ if (!encrypt)
+ GHASH(ctx, blockp, ctx->gcm_ghash);
+
/*
* Increment counter. Counter bits are confined
* to the bottom 32 bits of the counter block.
*/
counter = ntohll(ctx->gcm_cb[1] & counter_mask);
*** 219,228 ****
--- 364,374 ----
/* update offset */
out->cd_offset += block_size;
}
/* add ciphertext to the hash */
+ if (encrypt)
GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
/* Update pointer to next block of data to be processed. */
if (ctx->gcm_remainder_len != 0) {
datap += need;
*** 242,268 ****
}
ctx->gcm_copy_to = NULL;
} while (remainder > 0);
out:
! return (CRYPTO_SUCCESS);
}
/* ARGSUSED */
int
gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
! void (*copy_block)(uint8_t *, uint8_t *),
! void (*xor_block)(uint8_t *, uint8_t *))
{
uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
uint8_t *ghash, *macp;
int i, rv;
! if (out->cd_length <
! (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
! return (CRYPTO_DATA_LEN_RANGE);
}
ghash = (uint8_t *)ctx->gcm_ghash;
if (ctx->gcm_remainder_len > 0) {
--- 388,438 ----
}
ctx->gcm_copy_to = NULL;
} while (remainder > 0);
out:
! GCM_ACCEL_EXIT;
!
! return (rv);
}
+
+ /*
+ * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode
+ * is done in another function.
+ */
+ /*ARGSUSED*/
+ int
+ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(const uint8_t *, uint8_t *),
+ void (*xor_block)(const uint8_t *, uint8_t *),
+ int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
+ uint64_t *))
+ {
+ return (gcm_process_contiguous_blocks(ctx, data, length, out,
+ block_size, B_TRUE, encrypt_block, copy_block, xor_block,
+ cipher_ctr));
+ }
+
/* ARGSUSED */
int
gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
! void (*copy_block)(const uint8_t *, uint8_t *),
! void (*xor_block)(const uint8_t *, uint8_t *))
{
uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
uint8_t *ghash, *macp;
int i, rv;
! GCM_ACCEL_ENTER;
!
! if (out->cd_length < (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
! rv = CRYPTO_DATA_LEN_RANGE;
! goto out;
}
ghash = (uint8_t *)ctx->gcm_ghash;
if (ctx->gcm_remainder_len > 0) {
*** 308,486 ****
xor_block((uint8_t *)ctx->gcm_J0, ghash);
if (ctx->gcm_remainder_len > 0) {
rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
if (rv != CRYPTO_SUCCESS)
! return (rv);
}
out->cd_offset += ctx->gcm_remainder_len;
ctx->gcm_remainder_len = 0;
rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
if (rv != CRYPTO_SUCCESS)
! return (rv);
out->cd_offset += ctx->gcm_tag_len;
!
! return (CRYPTO_SUCCESS);
}
/*
* This will only deal with decrypting the last block of the input that
* might not be a multiple of block length.
*/
static void
! gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
! void (*xor_block)(uint8_t *, uint8_t *))
{
- uint8_t *datap, *outp, *counterp;
uint64_t counter;
uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
- int i;
/*
* Increment counter.
! * Counter bits are confined to the bottom 32 bits
*/
counter = ntohll(ctx->gcm_cb[1] & counter_mask);
counter = htonll(counter + 1);
counter &= counter_mask;
ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
! datap = (uint8_t *)ctx->gcm_remainder;
! outp = &((ctx->gcm_pt_buf)[index]);
! counterp = (uint8_t *)ctx->gcm_tmp;
- /* authentication tag */
- bzero((uint8_t *)ctx->gcm_tmp, block_size);
- bcopy(datap, (uint8_t *)ctx->gcm_tmp, ctx->gcm_remainder_len);
-
- /* add ciphertext to the hash */
- GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
-
- /* decrypt remaining ciphertext */
- encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp);
-
/* XOR with counter block */
! for (i = 0; i < ctx->gcm_remainder_len; i++) {
! outp[i] = datap[i] ^ counterp[i];
}
}
/* ARGSUSED */
int
gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
crypto_data_t *out, size_t block_size,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
! void (*copy_block)(uint8_t *, uint8_t *),
! void (*xor_block)(uint8_t *, uint8_t *))
{
! size_t new_len;
! uint8_t *new;
/*
! * Copy contiguous ciphertext input blocks to plaintext buffer.
! * Ciphertext will be decrypted in the final.
*/
! if (length > 0) {
! new_len = ctx->gcm_pt_buf_len + length;
! #ifdef _KERNEL
! new = kmem_alloc(new_len, ctx->gcm_kmflag);
! bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len);
! kmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
! #else
! new = malloc(new_len);
! bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len);
! free(ctx->gcm_pt_buf);
! #endif
! if (new == NULL)
! return (CRYPTO_HOST_MEMORY);
! ctx->gcm_pt_buf = new;
! ctx->gcm_pt_buf_len = new_len;
! bcopy(data, &ctx->gcm_pt_buf[ctx->gcm_processed_data_len],
! length);
! ctx->gcm_processed_data_len += length;
}
! ctx->gcm_remainder_len = 0;
! return (CRYPTO_SUCCESS);
}
int
gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
! void (*xor_block)(uint8_t *, uint8_t *))
{
! size_t pt_len;
! size_t remainder;
! uint8_t *ghash;
! uint8_t *blockp;
! uint8_t *cbp;
! uint64_t counter;
! uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
! int processed = 0, rv;
! ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len);
! pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
! ghash = (uint8_t *)ctx->gcm_ghash;
! blockp = ctx->gcm_pt_buf;
! remainder = pt_len;
! while (remainder > 0) {
! /* Incomplete last block */
! if (remainder < block_size) {
! bcopy(blockp, ctx->gcm_remainder, remainder);
! ctx->gcm_remainder_len = remainder;
! /*
! * not expecting anymore ciphertext, just
! * compute plaintext for the remaining input
! */
! gcm_decrypt_incomplete_block(ctx, block_size,
! processed, encrypt_block, xor_block);
! ctx->gcm_remainder_len = 0;
! goto out;
! }
! /* add ciphertext to the hash */
! GHASH(ctx, blockp, ghash);
! /*
! * Increment counter.
! * Counter bits are confined to the bottom 32 bits
! */
! counter = ntohll(ctx->gcm_cb[1] & counter_mask);
! counter = htonll(counter + 1);
! counter &= counter_mask;
! ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
! cbp = (uint8_t *)ctx->gcm_tmp;
! encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp);
! /* XOR with ciphertext */
! xor_block(cbp, blockp);
! processed += block_size;
! blockp += block_size;
! remainder -= block_size;
}
! out:
! ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
! GHASH(ctx, ctx->gcm_len_a_len_c, ghash);
encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
(uint8_t *)ctx->gcm_J0);
! xor_block((uint8_t *)ctx->gcm_J0, ghash);
/* compare the input authentication tag with what we calculated */
! if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
! /* They don't match */
! return (CRYPTO_INVALID_MAC);
! } else {
! rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
! if (rv != CRYPTO_SUCCESS)
! return (rv);
! out->cd_offset += pt_len;
! }
return (CRYPTO_SUCCESS);
}
static int
gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
{
--- 478,717 ----
xor_block((uint8_t *)ctx->gcm_J0, ghash);
if (ctx->gcm_remainder_len > 0) {
rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
if (rv != CRYPTO_SUCCESS)
! goto out;
}
out->cd_offset += ctx->gcm_remainder_len;
ctx->gcm_remainder_len = 0;
rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
if (rv != CRYPTO_SUCCESS)
! goto out;
out->cd_offset += ctx->gcm_tag_len;
! out:
! GCM_ACCEL_EXIT;
! return (rv);
}
/*
* This will only deal with decrypting the last block of the input that
* might not be a multiple of block length.
*/
+ /*ARGSUSED*/
static void
! gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, uint8_t *data, size_t length,
! size_t block_size, crypto_data_t *out,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
! void (*xor_block)(const uint8_t *, uint8_t *))
{
uint64_t counter;
uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
+ /* padd last block and add to GHASH */
+ bcopy(data, ctx->gcm_tmp, length);
+ bzero(((uint8_t *)ctx->gcm_tmp) + length,
+ sizeof (ctx->gcm_tmp) - length);
+ GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash);
+
/*
* Increment counter.
! * Counter bits are confined to the bottom 32 bits.
*/
counter = ntohll(ctx->gcm_cb[1] & counter_mask);
counter = htonll(counter + 1);
counter &= counter_mask;
ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
! encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
! (uint8_t *)ctx->gcm_tmp);
/* XOR with counter block */
! for (size_t i = 0; i < length; i++)
! ((uint8_t *)ctx->gcm_tmp)[i] ^= data[i];
!
! if (out != NULL) {
! (void) crypto_put_output_data((uchar_t *)ctx->gcm_tmp, out,
! length);
! out->cd_offset += length;
! } else {
! bcopy(ctx->gcm_tmp, data, length);
}
}
/* ARGSUSED */
int
gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
crypto_data_t *out, size_t block_size,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
! void (*copy_block)(const uint8_t *, uint8_t *),
! void (*xor_block)(const uint8_t *, uint8_t *),
! int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
! uint64_t *))
{
! int rv = CRYPTO_SUCCESS;
+ GCM_ACCEL_ENTER;
+
/*
! * Previous calls accumulate data in the input buffer to make sure
! * we have the auth tag (the last part of the ciphertext) when we
! * receive a final() call.
*/
! if (ctx->gcm_last_input_fill > 0) {
! /* Try to complete the input buffer */
! size_t to_copy = MIN(length,
! sizeof (ctx->gcm_last_input) - ctx->gcm_last_input_fill);
! bcopy(data, ctx->gcm_last_input + ctx->gcm_last_input_fill,
! to_copy);
! data += to_copy;
! ctx->gcm_last_input_fill += to_copy;
! length -= to_copy;
!
! if (ctx->gcm_last_input_fill < sizeof (ctx->gcm_last_input))
! /* Not enough input data to continue */
! goto out;
!
! if (length < ctx->gcm_tag_len) {
! /*
! * There isn't enough data ahead to constitute a full
! * auth tag, so only crunch one input block and copy
! * the remainder of the input into our buffer.
! */
! rv = gcm_process_contiguous_blocks(ctx,
! (char *)ctx->gcm_last_input, block_size, out,
! block_size, B_FALSE, encrypt_block, copy_block,
! xor_block, cipher_ctr);
! if (rv != CRYPTO_SUCCESS)
! goto out;
! ctx->gcm_last_input_fill -= block_size;
! bcopy(ctx->gcm_last_input + block_size,
! ctx->gcm_last_input, ctx->gcm_last_input_fill);
! bcopy(data, ctx->gcm_last_input +
! ctx->gcm_last_input_fill, length);
! ctx->gcm_last_input_fill += length;
! /* No more input left */
! goto out;
}
+ /*
+ * There is enough data ahead for the auth tag, so crunch
+ * everything in our buffer now and empty it.
+ */
+ rv = gcm_process_contiguous_blocks(ctx,
+ (char *)ctx->gcm_last_input, ctx->gcm_last_input_fill,
+ out, block_size, B_FALSE, encrypt_block, copy_block,
+ xor_block, cipher_ctr);
+ if (rv != CRYPTO_SUCCESS)
+ goto out;
+ ctx->gcm_last_input_fill = 0;
+ }
+ /*
+ * Last input buffer is empty, so what's left ahead is block-aligned.
+ * Crunch all the blocks up until the near end, which might be our
+ * auth tag and we must NOT decrypt.
+ */
+ ASSERT(ctx->gcm_last_input_fill == 0);
+ if (length >= block_size + ctx->gcm_tag_len) {
+ size_t to_decrypt = (length - ctx->gcm_tag_len) &
+ ~(block_size - 1);
! rv = gcm_process_contiguous_blocks(ctx, data, to_decrypt, out,
! block_size, B_FALSE, encrypt_block, copy_block, xor_block,
! cipher_ctr);
! if (rv != CRYPTO_SUCCESS)
! goto out;
! data += to_decrypt;
! length -= to_decrypt;
! }
! /*
! * Copy the remainder into our input buffer, it's potentially
! * the auth tag and a last partial block.
! */
! ASSERT(length < sizeof (ctx->gcm_last_input));
! bcopy(data, ctx->gcm_last_input, length);
! ctx->gcm_last_input_fill += length;
! out:
! GCM_ACCEL_EXIT;
!
! return (rv);
}
int
gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
! void (*copy_block)(const uint8_t *, uint8_t *),
! void (*xor_block)(const uint8_t *, uint8_t *),
! int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
! uint64_t *))
{
! int rv = CRYPTO_SUCCESS;
! /* Check there's enough data to at least compute a tag */
! if (ctx->gcm_last_input_fill < ctx->gcm_tag_len)
! return (SET_ERROR(CRYPTO_DATA_LEN_RANGE));
! GCM_ACCEL_ENTER;
! /* Finish any unprocessed input */
! if (ctx->gcm_last_input_fill > ctx->gcm_tag_len) {
! size_t last_blk_len = MIN(block_size,
! ctx->gcm_last_input_fill - ctx->gcm_tag_len);
! /* Finish last full block */
! if (last_blk_len >= block_size) {
! rv = gcm_process_contiguous_blocks(ctx,
! (char *)ctx->gcm_last_input, block_size, out,
! block_size, B_FALSE, encrypt_block, copy_block,
! xor_block, cipher_ctr);
! if (rv != CRYPTO_SUCCESS)
! goto errout;
! last_blk_len -= block_size;
! ctx->gcm_processed_data_len += block_size;
! ctx->gcm_last_input_fill -= block_size;
! /* Shift what remains in the input buffer forward */
! bcopy(ctx->gcm_last_input + block_size,
! ctx->gcm_last_input, ctx->gcm_last_input_fill);
}
! /* Finish last incomplete block before auth tag */
! if (last_blk_len > 0) {
! gcm_decrypt_incomplete_block(ctx, ctx->gcm_last_input,
! last_blk_len, block_size, out, encrypt_block,
! xor_block);
!
! ctx->gcm_processed_data_len += last_blk_len;
! ctx->gcm_last_input_fill -= last_blk_len;
!
! /* Shift what remains in the input buffer forward */
! bcopy(ctx->gcm_last_input + last_blk_len,
! ctx->gcm_last_input, ctx->gcm_last_input_fill);
! }
! /* Now the last_input buffer holds just the auth tag */
! }
!
! ASSERT(ctx->gcm_last_input_fill == ctx->gcm_tag_len);
!
! ctx->gcm_len_a_len_c[1] =
! htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
! GHASH(ctx, ctx->gcm_len_a_len_c, ctx->gcm_ghash);
encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
(uint8_t *)ctx->gcm_J0);
! xor_block((uint8_t *)ctx->gcm_J0, (uint8_t *)ctx->gcm_ghash);
+ GCM_ACCEL_EXIT;
+
/* compare the input authentication tag with what we calculated */
! if (bcmp(&ctx->gcm_last_input, ctx->gcm_ghash, ctx->gcm_tag_len) != 0)
! return (SET_ERROR(CRYPTO_INVALID_MAC));
!
return (CRYPTO_SUCCESS);
+
+ errout:
+ GCM_ACCEL_EXIT;
+ return (rv);
}
static int
gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
{
*** 498,521 ****
case 112:
case 120:
case 128:
break;
default:
! return (CRYPTO_MECHANISM_PARAM_INVALID);
}
if (gcm_param->ulIvLen == 0)
! return (CRYPTO_MECHANISM_PARAM_INVALID);
return (CRYPTO_SUCCESS);
}
static void
gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
gcm_ctx_t *ctx, size_t block_size,
! void (*copy_block)(uint8_t *, uint8_t *),
! void (*xor_block)(uint8_t *, uint8_t *))
{
uint8_t *cb;
ulong_t remainder = iv_len;
ulong_t processed = 0;
uint8_t *datap, *ghash;
--- 729,753 ----
case 112:
case 120:
case 128:
break;
default:
! return (SET_ERROR(CRYPTO_MECHANISM_PARAM_INVALID));
}
if (gcm_param->ulIvLen == 0)
! return (SET_ERROR(CRYPTO_MECHANISM_PARAM_INVALID));
return (CRYPTO_SUCCESS);
}
+ /*ARGSUSED*/
static void
gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
gcm_ctx_t *ctx, size_t block_size,
! void (*copy_block)(const uint8_t *, uint8_t *),
! void (*xor_block)(const uint8_t *, uint8_t *))
{
uint8_t *cb;
ulong_t remainder = iv_len;
ulong_t processed = 0;
uint8_t *datap, *ghash;
*** 562,585 ****
*/
int
gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
unsigned char *auth_data, size_t auth_data_len, size_t block_size,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
! void (*copy_block)(uint8_t *, uint8_t *),
! void (*xor_block)(uint8_t *, uint8_t *))
{
uint8_t *ghash, *datap, *authp;
size_t remainder, processed;
/* encrypt zero block to get subkey H */
bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
(uint8_t *)ctx->gcm_H);
gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
copy_block, xor_block);
authp = (uint8_t *)ctx->gcm_tmp;
ghash = (uint8_t *)ctx->gcm_ghash;
bzero(authp, block_size);
bzero(ghash, block_size);
--- 794,829 ----
*/
int
gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
unsigned char *auth_data, size_t auth_data_len, size_t block_size,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
! void (*copy_block)(const uint8_t *, uint8_t *),
! void (*xor_block)(const uint8_t *, uint8_t *))
{
uint8_t *ghash, *datap, *authp;
size_t remainder, processed;
+ GCM_ACCEL_ENTER;
+
/* encrypt zero block to get subkey H */
bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
(uint8_t *)ctx->gcm_H);
gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
copy_block, xor_block);
+ #ifdef __amd64
+ if (intel_pclmulqdq_instruction_present()) {
+ uint64_t H_bswap64[2] = {
+ ntohll(ctx->gcm_H[0]), ntohll(ctx->gcm_H[1])
+ };
+
+ gcm_init_clmul(H_bswap64, ctx->gcm_H_table);
+ }
+ #endif
+
authp = (uint8_t *)ctx->gcm_tmp;
ghash = (uint8_t *)ctx->gcm_ghash;
bzero(authp, block_size);
bzero(ghash, block_size);
*** 604,622 ****
/* add auth data to the hash */
GHASH(ctx, datap, ghash);
} while (remainder > 0);
return (CRYPTO_SUCCESS);
}
int
gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
! void (*copy_block)(uint8_t *, uint8_t *),
! void (*xor_block)(uint8_t *, uint8_t *))
{
int rv;
CK_AES_GCM_PARAMS *gcm_param;
if (param != NULL) {
gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
--- 848,872 ----
/* add auth data to the hash */
GHASH(ctx, datap, ghash);
} while (remainder > 0);
+ GCM_ACCEL_EXIT;
+
return (CRYPTO_SUCCESS);
}
int
gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
! void (*copy_block)(const uint8_t *, uint8_t *),
! void (*xor_block)(const uint8_t *, uint8_t *))
{
+ /*
+ * No GHASH invocations in this function and gcm_init does its own
+ * FPU saving, so no need to GCM_ACCEL_ENTER/GCM_ACCEL_EXIT here.
+ */
int rv;
CK_AES_GCM_PARAMS *gcm_param;
if (param != NULL) {
gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
*** 650,662 ****
}
int
gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
! void (*copy_block)(uint8_t *, uint8_t *),
! void (*xor_block)(uint8_t *, uint8_t *))
{
int rv;
CK_AES_GMAC_PARAMS *gmac_param;
if (param != NULL) {
gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param;
--- 900,916 ----
}
int
gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
! void (*copy_block)(const uint8_t *, uint8_t *),
! void (*xor_block)(const uint8_t *, uint8_t *))
{
+ /*
+ * No GHASH invocations in this function and gcm_init does its own
+ * FPU saving, so no need to GCM_ACCEL_ENTER/GCM_ACCEL_EXIT here.
+ */
int rv;
CK_AES_GMAC_PARAMS *gmac_param;
if (param != NULL) {
gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param;
*** 730,740 ****
* Cache the result, as the CPU can't change.
*
* Note: the userland version uses getisax(). The kernel version uses
* is_x86_featureset().
*/
! static int
intel_pclmulqdq_instruction_present(void)
{
static int cached_result = -1;
if (cached_result == -1) { /* first time */
--- 984,994 ----
* Cache the result, as the CPU can't change.
*
* Note: the userland version uses getisax(). The kernel version uses
* is_x86_featureset().
*/
! static inline int
intel_pclmulqdq_instruction_present(void)
{
static int cached_result = -1;
if (cached_result == -1) { /* first time */