Print this page
4896 Performance improvements for KCF AES modes
@@ -20,10 +20,13 @@
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
+/*
+ * Copyright 2015 by Saso Kiselkov. All rights reserved.
+ */
#ifndef _COMMON_CRYPTO_MODES_H
#define _COMMON_CRYPTO_MODES_H
#ifdef __cplusplus
@@ -185,38 +188,48 @@
*
* gcm_processed_data_len:
* Length of processed plaintext (encrypt) or
* length of processed ciphertext (decrypt).
*
- * gcm_pt_buf: Stores the decrypted plaintext returned by
- * decrypt_final when the computed authentication
- * tag matches the user supplied tag.
- *
- * gcm_pt_buf_len: Length of the plaintext buffer.
- *
* gcm_H: Subkey.
*
+ * gcm_H_table: Pipelined Karatsuba multipliers.
+ *
* gcm_J0: Pre-counter block generated from the IV.
*
+ * gcm_tmp: Temp storage for ciphertext when padding is needed.
+ *
* gcm_len_a_len_c: 64-bit representations of the bit lengths of
* AAD and ciphertext.
*
* gcm_kmflag: Current value of kmflag. Used only for allocating
* the plaintext buffer during decryption.
+ *
+ * gcm_last_input: Buffer of (up to) two last blocks. This is used when
+ * input is not block-aligned and to temporarily hold
+ * the end of the ciphertext stream during decryption,
+ * since that could potentially be the GHASH auth tag
+ * which we must check in the final() call instead of
+ * decrypting it.
+ *
+ * gcm_last_input_fill: Number of bytes actually stored in gcm_last_input.
*/
typedef struct gcm_ctx {
struct common_ctx gcm_common;
size_t gcm_tag_len;
size_t gcm_processed_data_len;
- size_t gcm_pt_buf_len;
- uint32_t gcm_tmp[4];
uint64_t gcm_ghash[2];
uint64_t gcm_H[2];
+#ifdef __amd64
+ uint8_t gcm_H_table[256];
+#endif
uint64_t gcm_J0[2];
+ uint64_t gcm_tmp[2];
uint64_t gcm_len_a_len_c[2];
- uint8_t *gcm_pt_buf;
int gcm_kmflag;
+ uint8_t gcm_last_input[32];
+ size_t gcm_last_input_fill;
} gcm_ctx_t;
#define gcm_keysched gcm_common.cc_keysched
#define gcm_keysched_len gcm_common.cc_keysched_len
#define gcm_cb gcm_common.cc_iv
@@ -281,103 +294,113 @@
#define dc_keysched_len dcu.dcu_ecb.ecb_common.cc_keysched_len
#define dc_iv dcu.dcu_ecb.ecb_common.cc_iv
#define dc_lastp dcu.dcu_ecb.ecb_common.cc_lastp
extern int ecb_cipher_contiguous_blocks(ecb_ctx_t *, char *, size_t,
- crypto_data_t *, size_t, int (*cipher)(const void *, const uint8_t *,
- uint8_t *));
+ crypto_data_t *, size_t,
+ int (*cipher)(const void *, const uint8_t *, uint8_t *),
+ int (*cipher_ecb)(const void *, const uint8_t *, uint8_t *, uint64_t));
extern int cbc_encrypt_contiguous_blocks(cbc_ctx_t *, char *, size_t,
crypto_data_t *, size_t,
int (*encrypt)(const void *, const uint8_t *, uint8_t *),
- void (*copy_block)(uint8_t *, uint8_t *),
- void (*xor_block)(uint8_t *, uint8_t *));
+ void (*copy_block)(const uint8_t *, uint8_t *),
+ void (*xor_block)(const uint8_t *, uint8_t *),
+ int (*encrypt_cbc)(const void *, const uint8_t *, uint8_t *,
+ const uint8_t *, uint64_t));
extern int cbc_decrypt_contiguous_blocks(cbc_ctx_t *, char *, size_t,
crypto_data_t *, size_t,
int (*decrypt)(const void *, const uint8_t *, uint8_t *),
- void (*copy_block)(uint8_t *, uint8_t *),
- void (*xor_block)(uint8_t *, uint8_t *));
+ void (*copy_block)(const uint8_t *, uint8_t *),
+ void (*xor_block)(const uint8_t *, uint8_t *),
+ int (*decrypt_ecb)(const void *, const uint8_t *, uint8_t *, uint64_t),
+ void (*xor_block_range)(const uint8_t *, uint8_t *, uint64_t));
extern int ctr_mode_contiguous_blocks(ctr_ctx_t *, char *, size_t,
crypto_data_t *, size_t,
int (*cipher)(const void *, const uint8_t *, uint8_t *),
- void (*xor_block)(uint8_t *, uint8_t *));
+ void (*xor_block)(const uint8_t *, uint8_t *),
+ int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
+ uint64_t *));
extern int ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *, char *, size_t,
crypto_data_t *, size_t,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
- void (*copy_block)(uint8_t *, uint8_t *),
- void (*xor_block)(uint8_t *, uint8_t *));
+ void (*copy_block)(const uint8_t *, uint8_t *),
+ void (*xor_block)(const uint8_t *, uint8_t *));
extern int ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *, char *, size_t,
crypto_data_t *, size_t,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
- void (*copy_block)(uint8_t *, uint8_t *),
- void (*xor_block)(uint8_t *, uint8_t *));
+ void (*copy_block)(const uint8_t *, uint8_t *),
+ void (*xor_block)(const uint8_t *, uint8_t *));
extern int gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *, char *, size_t,
crypto_data_t *, size_t,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
- void (*copy_block)(uint8_t *, uint8_t *),
- void (*xor_block)(uint8_t *, uint8_t *));
+ void (*copy_block)(const uint8_t *, uint8_t *),
+ void (*xor_block)(const uint8_t *, uint8_t *),
+ int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
+ uint64_t *));
extern int gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *, char *, size_t,
crypto_data_t *, size_t,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
- void (*copy_block)(uint8_t *, uint8_t *),
- void (*xor_block)(uint8_t *, uint8_t *));
+ void (*copy_block)(const uint8_t *, uint8_t *),
+ void (*xor_block)(const uint8_t *, uint8_t *),
+ int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
+ uint64_t *));
int ccm_encrypt_final(ccm_ctx_t *, crypto_data_t *, size_t,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
- void (*xor_block)(uint8_t *, uint8_t *));
+ void (*xor_block)(const uint8_t *, uint8_t *));
int gcm_encrypt_final(gcm_ctx_t *, crypto_data_t *, size_t,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
- void (*copy_block)(uint8_t *, uint8_t *),
- void (*xor_block)(uint8_t *, uint8_t *));
+ void (*copy_block)(const uint8_t *, uint8_t *),
+ void (*xor_block)(const uint8_t *, uint8_t *));
extern int ccm_decrypt_final(ccm_ctx_t *, crypto_data_t *, size_t,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
- void (*copy_block)(uint8_t *, uint8_t *),
- void (*xor_block)(uint8_t *, uint8_t *));
+ void (*copy_block)(const uint8_t *, uint8_t *),
+ void (*xor_block)(const uint8_t *, uint8_t *));
extern int gcm_decrypt_final(gcm_ctx_t *, crypto_data_t *, size_t,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
- void (*xor_block)(uint8_t *, uint8_t *));
+ void (*copy_block)(const uint8_t *, uint8_t *),
+ void (*xor_block)(const uint8_t *, uint8_t *),
+ int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
+ uint64_t *));
extern int ctr_mode_final(ctr_ctx_t *, crypto_data_t *,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *));
extern int cbc_init_ctx(cbc_ctx_t *, char *, size_t, size_t,
- void (*copy_block)(uint8_t *, uint64_t *));
+ void (*copy_block)(const uint8_t *, uint64_t *));
extern int ctr_init_ctx(ctr_ctx_t *, ulong_t, uint8_t *,
- void (*copy_block)(uint8_t *, uint8_t *));
+ void (*copy_block)(const uint8_t *, uint8_t *));
extern int ccm_init_ctx(ccm_ctx_t *, char *, int, boolean_t, size_t,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
- void (*xor_block)(uint8_t *, uint8_t *));
+ void (*xor_block)(const uint8_t *, uint8_t *));
extern int gcm_init_ctx(gcm_ctx_t *, char *, size_t,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
- void (*copy_block)(uint8_t *, uint8_t *),
- void (*xor_block)(uint8_t *, uint8_t *));
+ void (*copy_block)(const uint8_t *, uint8_t *),
+ void (*xor_block)(const uint8_t *, uint8_t *));
extern int gmac_init_ctx(gcm_ctx_t *, char *, size_t,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
- void (*copy_block)(uint8_t *, uint8_t *),
- void (*xor_block)(uint8_t *, uint8_t *));
+ void (*copy_block)(const uint8_t *, uint8_t *),
+ void (*xor_block)(const uint8_t *, uint8_t *));
extern void calculate_ccm_mac(ccm_ctx_t *, uint8_t *,
int (*encrypt_block)(const void *, const uint8_t *, uint8_t *));
-extern void gcm_mul(uint64_t *, uint64_t *, uint64_t *);
-
extern void crypto_init_ptrs(crypto_data_t *, void **, offset_t *);
-extern void crypto_get_ptrs(crypto_data_t *, void **, offset_t *,
- uint8_t **, size_t *, uint8_t **, size_t);
extern void *ecb_alloc_ctx(int);
extern void *cbc_alloc_ctx(int);
extern void *ctr_alloc_ctx(int);
extern void *ccm_alloc_ctx(int);
@@ -384,10 +407,119 @@
extern void *gcm_alloc_ctx(int);
extern void *gmac_alloc_ctx(int);
extern void crypto_free_mode_ctx(void *);
extern void gcm_set_kmflag(gcm_ctx_t *, int);
+#ifdef INLINE_CRYPTO_GET_PTRS
+/*
+ * Get pointers for where in the output to copy a block of encrypted or
+ * decrypted data. The iov_or_mp argument stores a pointer to the current
+ * iovec or mp, and offset stores an offset into the current iovec or mp.
+ */
+static inline void
+crypto_get_ptrs(crypto_data_t *out, void **iov_or_mp, offset_t *current_offset,
+ uint8_t **out_data_1, size_t *out_data_1_len, uint8_t **out_data_2,
+ size_t amt)
+{
+ offset_t offset;
+
+ switch (out->cd_format) {
+ case CRYPTO_DATA_RAW: {
+ iovec_t *iov;
+
+ offset = *current_offset;
+ iov = &out->cd_raw;
+ if ((offset + amt) <= iov->iov_len) {
+ /* one block fits */
+ *out_data_1 = (uint8_t *)iov->iov_base + offset;
+ *out_data_1_len = amt;
+ *out_data_2 = NULL;
+ *current_offset = offset + amt;
+ }
+ break;
+ }
+
+ case CRYPTO_DATA_UIO: {
+ uio_t *uio = out->cd_uio;
+ iovec_t *iov;
+ offset_t offset;
+ uintptr_t vec_idx;
+ uint8_t *p;
+
+ offset = *current_offset;
+ vec_idx = (uintptr_t)(*iov_or_mp);
+ iov = &uio->uio_iov[vec_idx];
+ p = (uint8_t *)iov->iov_base + offset;
+ *out_data_1 = p;
+
+ if (offset + amt <= iov->iov_len) {
+ /* can fit one block into this iov */
+ *out_data_1_len = amt;
+ *out_data_2 = NULL;
+ *current_offset = offset + amt;
+ } else {
+ /* one block spans two iovecs */
+ *out_data_1_len = iov->iov_len - offset;
+ if (vec_idx == uio->uio_iovcnt)
+ return;
+ vec_idx++;
+ iov = &uio->uio_iov[vec_idx];
+ *out_data_2 = (uint8_t *)iov->iov_base;
+ *current_offset = amt - *out_data_1_len;
+ }
+ *iov_or_mp = (void *)vec_idx;
+ break;
+ }
+
+ case CRYPTO_DATA_MBLK: {
+ mblk_t *mp;
+ uint8_t *p;
+
+ offset = *current_offset;
+ mp = (mblk_t *)*iov_or_mp;
+ p = mp->b_rptr + offset;
+ *out_data_1 = p;
+ if ((p + amt) <= mp->b_wptr) {
+ /* can fit one block into this mblk */
+ *out_data_1_len = amt;
+ *out_data_2 = NULL;
+ *current_offset = offset + amt;
+ } else {
+ /* one block spans two mblks */
+ *out_data_1_len = _PTRDIFF(mp->b_wptr, p);
+ if ((mp = mp->b_cont) == NULL)
+ return;
+ *out_data_2 = mp->b_rptr;
+ *current_offset = (amt - *out_data_1_len);
+ }
+ *iov_or_mp = mp;
+ break;
+ }
+ } /* end switch */
+}
+#endif /* INLINE_CRYPTO_GET_PTRS */
+
+/*
+ * Checks whether a crypto_data_t object is composed of a single contiguous
+ * buffer. This is used in all fastpath detection code to avoid the
+ * possibility of having to do partial block splicing.
+ */
+#define CRYPTO_DATA_IS_SINGLE_BLOCK(cd) \
+ (cd != NULL && (cd->cd_format == CRYPTO_DATA_RAW || \
+ (cd->cd_format == CRYPTO_DATA_UIO && cd->cd_uio->uio_iovcnt == 1) || \
+ (cd->cd_format == CRYPTO_DATA_MBLK && cd->cd_mp->b_next == NULL)))
+
+/*
+ * Returns the first contiguous data buffer in a crypto_data_t object.
+ */
+#define CRYPTO_DATA_FIRST_BLOCK(cd) \
+ (cd->cd_format == CRYPTO_DATA_RAW ? \
+ (void *)(cd->cd_raw.iov_base + cd->cd_offset) : \
+ (cd->cd_format == CRYPTO_DATA_UIO ? \
+ (void *)(cd->cd_uio->uio_iov[0].iov_base + cd->cd_offset) : \
+ (void *)(cd->cd_mp->b_rptr + cd->cd_offset)))
+
#ifdef __cplusplus
}
#endif
#endif /* _COMMON_CRYPTO_MODES_H */