Print this page
4896 Performance improvements for KCF AES modes

*** 20,29 **** --- 20,32 ---- */ /* * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ + /* + * Copyright 2015 by Saso Kiselkov. All rights reserved. + */ #ifndef _COMMON_CRYPTO_MODES_H #define _COMMON_CRYPTO_MODES_H #ifdef __cplusplus
*** 185,222 **** * * gcm_processed_data_len: * Length of processed plaintext (encrypt) or * length of processed ciphertext (decrypt). * - * gcm_pt_buf: Stores the decrypted plaintext returned by - * decrypt_final when the computed authentication - * tag matches the user supplied tag. - * - * gcm_pt_buf_len: Length of the plaintext buffer. - * * gcm_H: Subkey. * * gcm_J0: Pre-counter block generated from the IV. * * gcm_len_a_len_c: 64-bit representations of the bit lengths of * AAD and ciphertext. * * gcm_kmflag: Current value of kmflag. Used only for allocating * the plaintext buffer during decryption. */ typedef struct gcm_ctx { struct common_ctx gcm_common; size_t gcm_tag_len; size_t gcm_processed_data_len; - size_t gcm_pt_buf_len; - uint32_t gcm_tmp[4]; uint64_t gcm_ghash[2]; uint64_t gcm_H[2]; uint64_t gcm_J0[2]; uint64_t gcm_len_a_len_c[2]; - uint8_t *gcm_pt_buf; int gcm_kmflag; } gcm_ctx_t; #define gcm_keysched gcm_common.cc_keysched #define gcm_keysched_len gcm_common.cc_keysched_len #define gcm_cb gcm_common.cc_iv --- 188,235 ---- * * gcm_processed_data_len: * Length of processed plaintext (encrypt) or * length of processed ciphertext (decrypt). * * gcm_H: Subkey. * + * gcm_H_table: Pipelined Karatsuba multipliers. + * * gcm_J0: Pre-counter block generated from the IV. * + * gcm_tmp: Temp storage for ciphertext when padding is needed. + * * gcm_len_a_len_c: 64-bit representations of the bit lengths of * AAD and ciphertext. * * gcm_kmflag: Current value of kmflag. Used only for allocating * the plaintext buffer during decryption. + * + * gcm_last_input: Buffer of (up to) two last blocks. This is used when + * input is not block-aligned and to temporarily hold + * the end of the ciphertext stream during decryption, + * since that could potentially be the GHASH auth tag + * which we must check in the final() call instead of + * decrypting it. + * + * gcm_last_input_fill: Number of bytes actually stored in gcm_last_input. */ typedef struct gcm_ctx { struct common_ctx gcm_common; size_t gcm_tag_len; size_t gcm_processed_data_len; uint64_t gcm_ghash[2]; uint64_t gcm_H[2]; + #ifdef __amd64 + uint8_t gcm_H_table[256]; + #endif uint64_t gcm_J0[2]; + uint64_t gcm_tmp[2]; uint64_t gcm_len_a_len_c[2]; int gcm_kmflag; + uint8_t gcm_last_input[32]; + size_t gcm_last_input_fill; } gcm_ctx_t; #define gcm_keysched gcm_common.cc_keysched #define gcm_keysched_len gcm_common.cc_keysched_len #define gcm_cb gcm_common.cc_iv
*** 281,383 **** #define dc_keysched_len dcu.dcu_ecb.ecb_common.cc_keysched_len #define dc_iv dcu.dcu_ecb.ecb_common.cc_iv #define dc_lastp dcu.dcu_ecb.ecb_common.cc_lastp extern int ecb_cipher_contiguous_blocks(ecb_ctx_t *, char *, size_t, ! crypto_data_t *, size_t, int (*cipher)(const void *, const uint8_t *, ! uint8_t *)); extern int cbc_encrypt_contiguous_blocks(cbc_ctx_t *, char *, size_t, crypto_data_t *, size_t, int (*encrypt)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(uint8_t *, uint8_t *), ! void (*xor_block)(uint8_t *, uint8_t *)); extern int cbc_decrypt_contiguous_blocks(cbc_ctx_t *, char *, size_t, crypto_data_t *, size_t, int (*decrypt)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(uint8_t *, uint8_t *), ! void (*xor_block)(uint8_t *, uint8_t *)); extern int ctr_mode_contiguous_blocks(ctr_ctx_t *, char *, size_t, crypto_data_t *, size_t, int (*cipher)(const void *, const uint8_t *, uint8_t *), ! void (*xor_block)(uint8_t *, uint8_t *)); extern int ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *, char *, size_t, crypto_data_t *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(uint8_t *, uint8_t *), ! void (*xor_block)(uint8_t *, uint8_t *)); extern int ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *, char *, size_t, crypto_data_t *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(uint8_t *, uint8_t *), ! void (*xor_block)(uint8_t *, uint8_t *)); extern int gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *, char *, size_t, crypto_data_t *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(uint8_t *, uint8_t *), ! void (*xor_block)(uint8_t *, uint8_t *)); extern int gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *, char *, size_t, crypto_data_t *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(uint8_t *, uint8_t *), ! void (*xor_block)(uint8_t *, uint8_t *)); int ccm_encrypt_final(ccm_ctx_t *, crypto_data_t *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*xor_block)(uint8_t *, uint8_t *)); int gcm_encrypt_final(gcm_ctx_t *, crypto_data_t *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(uint8_t *, uint8_t *), ! void (*xor_block)(uint8_t *, uint8_t *)); extern int ccm_decrypt_final(ccm_ctx_t *, crypto_data_t *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(uint8_t *, uint8_t *), ! void (*xor_block)(uint8_t *, uint8_t *)); extern int gcm_decrypt_final(gcm_ctx_t *, crypto_data_t *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*xor_block)(uint8_t *, uint8_t *)); extern int ctr_mode_final(ctr_ctx_t *, crypto_data_t *, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *)); extern int cbc_init_ctx(cbc_ctx_t *, char *, size_t, size_t, ! void (*copy_block)(uint8_t *, uint64_t *)); extern int ctr_init_ctx(ctr_ctx_t *, ulong_t, uint8_t *, ! void (*copy_block)(uint8_t *, uint8_t *)); extern int ccm_init_ctx(ccm_ctx_t *, char *, int, boolean_t, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*xor_block)(uint8_t *, uint8_t *)); extern int gcm_init_ctx(gcm_ctx_t *, char *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(uint8_t *, uint8_t *), ! void (*xor_block)(uint8_t *, uint8_t *)); extern int gmac_init_ctx(gcm_ctx_t *, char *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(uint8_t *, uint8_t *), ! void (*xor_block)(uint8_t *, uint8_t *)); extern void calculate_ccm_mac(ccm_ctx_t *, uint8_t *, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *)); - extern void gcm_mul(uint64_t *, uint64_t *, uint64_t *); - extern void crypto_init_ptrs(crypto_data_t *, void **, offset_t *); - extern void crypto_get_ptrs(crypto_data_t *, void **, offset_t *, - uint8_t **, size_t *, uint8_t **, size_t); extern void *ecb_alloc_ctx(int); extern void *cbc_alloc_ctx(int); extern void *ctr_alloc_ctx(int); extern void *ccm_alloc_ctx(int); --- 294,406 ---- #define dc_keysched_len dcu.dcu_ecb.ecb_common.cc_keysched_len #define dc_iv dcu.dcu_ecb.ecb_common.cc_iv #define dc_lastp dcu.dcu_ecb.ecb_common.cc_lastp extern int ecb_cipher_contiguous_blocks(ecb_ctx_t *, char *, size_t, ! crypto_data_t *, size_t, ! int (*cipher)(const void *, const uint8_t *, uint8_t *), ! int (*cipher_ecb)(const void *, const uint8_t *, uint8_t *, uint64_t)); extern int cbc_encrypt_contiguous_blocks(cbc_ctx_t *, char *, size_t, crypto_data_t *, size_t, int (*encrypt)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(const uint8_t *, uint8_t *), ! void (*xor_block)(const uint8_t *, uint8_t *), ! int (*encrypt_cbc)(const void *, const uint8_t *, uint8_t *, ! const uint8_t *, uint64_t)); extern int cbc_decrypt_contiguous_blocks(cbc_ctx_t *, char *, size_t, crypto_data_t *, size_t, int (*decrypt)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(const uint8_t *, uint8_t *), ! void (*xor_block)(const uint8_t *, uint8_t *), ! int (*decrypt_ecb)(const void *, const uint8_t *, uint8_t *, uint64_t), ! void (*xor_block_range)(const uint8_t *, uint8_t *, uint64_t)); extern int ctr_mode_contiguous_blocks(ctr_ctx_t *, char *, size_t, crypto_data_t *, size_t, int (*cipher)(const void *, const uint8_t *, uint8_t *), ! void (*xor_block)(const uint8_t *, uint8_t *), ! int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t, ! uint64_t *)); extern int ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *, char *, size_t, crypto_data_t *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(const uint8_t *, uint8_t *), ! void (*xor_block)(const uint8_t *, uint8_t *)); extern int ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *, char *, size_t, crypto_data_t *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(const uint8_t *, uint8_t *), ! void (*xor_block)(const uint8_t *, uint8_t *)); extern int gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *, char *, size_t, crypto_data_t *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(const uint8_t *, uint8_t *), ! void (*xor_block)(const uint8_t *, uint8_t *), ! int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t, ! uint64_t *)); extern int gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *, char *, size_t, crypto_data_t *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(const uint8_t *, uint8_t *), ! void (*xor_block)(const uint8_t *, uint8_t *), ! int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t, ! uint64_t *)); int ccm_encrypt_final(ccm_ctx_t *, crypto_data_t *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*xor_block)(const uint8_t *, uint8_t *)); int gcm_encrypt_final(gcm_ctx_t *, crypto_data_t *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(const uint8_t *, uint8_t *), ! void (*xor_block)(const uint8_t *, uint8_t *)); extern int ccm_decrypt_final(ccm_ctx_t *, crypto_data_t *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(const uint8_t *, uint8_t *), ! void (*xor_block)(const uint8_t *, uint8_t *)); extern int gcm_decrypt_final(gcm_ctx_t *, crypto_data_t *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(const uint8_t *, uint8_t *), ! void (*xor_block)(const uint8_t *, uint8_t *), ! int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t, ! uint64_t *)); extern int ctr_mode_final(ctr_ctx_t *, crypto_data_t *, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *)); extern int cbc_init_ctx(cbc_ctx_t *, char *, size_t, size_t, ! void (*copy_block)(const uint8_t *, uint64_t *)); extern int ctr_init_ctx(ctr_ctx_t *, ulong_t, uint8_t *, ! void (*copy_block)(const uint8_t *, uint8_t *)); extern int ccm_init_ctx(ccm_ctx_t *, char *, int, boolean_t, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*xor_block)(const uint8_t *, uint8_t *)); extern int gcm_init_ctx(gcm_ctx_t *, char *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(const uint8_t *, uint8_t *), ! void (*xor_block)(const uint8_t *, uint8_t *)); extern int gmac_init_ctx(gcm_ctx_t *, char *, size_t, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), ! void (*copy_block)(const uint8_t *, uint8_t *), ! void (*xor_block)(const uint8_t *, uint8_t *)); extern void calculate_ccm_mac(ccm_ctx_t *, uint8_t *, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *)); extern void crypto_init_ptrs(crypto_data_t *, void **, offset_t *); extern void *ecb_alloc_ctx(int); extern void *cbc_alloc_ctx(int); extern void *ctr_alloc_ctx(int); extern void *ccm_alloc_ctx(int);
*** 384,393 **** --- 407,525 ---- extern void *gcm_alloc_ctx(int); extern void *gmac_alloc_ctx(int); extern void crypto_free_mode_ctx(void *); extern void gcm_set_kmflag(gcm_ctx_t *, int); + #ifdef INLINE_CRYPTO_GET_PTRS + /* + * Get pointers for where in the output to copy a block of encrypted or + * decrypted data. The iov_or_mp argument stores a pointer to the current + * iovec or mp, and offset stores an offset into the current iovec or mp. + */ + static inline void + crypto_get_ptrs(crypto_data_t *out, void **iov_or_mp, offset_t *current_offset, + uint8_t **out_data_1, size_t *out_data_1_len, uint8_t **out_data_2, + size_t amt) + { + offset_t offset; + + switch (out->cd_format) { + case CRYPTO_DATA_RAW: { + iovec_t *iov; + + offset = *current_offset; + iov = &out->cd_raw; + if ((offset + amt) <= iov->iov_len) { + /* one block fits */ + *out_data_1 = (uint8_t *)iov->iov_base + offset; + *out_data_1_len = amt; + *out_data_2 = NULL; + *current_offset = offset + amt; + } + break; + } + + case CRYPTO_DATA_UIO: { + uio_t *uio = out->cd_uio; + iovec_t *iov; + offset_t offset; + uintptr_t vec_idx; + uint8_t *p; + + offset = *current_offset; + vec_idx = (uintptr_t)(*iov_or_mp); + iov = &uio->uio_iov[vec_idx]; + p = (uint8_t *)iov->iov_base + offset; + *out_data_1 = p; + + if (offset + amt <= iov->iov_len) { + /* can fit one block into this iov */ + *out_data_1_len = amt; + *out_data_2 = NULL; + *current_offset = offset + amt; + } else { + /* one block spans two iovecs */ + *out_data_1_len = iov->iov_len - offset; + if (vec_idx == uio->uio_iovcnt) + return; + vec_idx++; + iov = &uio->uio_iov[vec_idx]; + *out_data_2 = (uint8_t *)iov->iov_base; + *current_offset = amt - *out_data_1_len; + } + *iov_or_mp = (void *)vec_idx; + break; + } + + case CRYPTO_DATA_MBLK: { + mblk_t *mp; + uint8_t *p; + + offset = *current_offset; + mp = (mblk_t *)*iov_or_mp; + p = mp->b_rptr + offset; + *out_data_1 = p; + if ((p + amt) <= mp->b_wptr) { + /* can fit one block into this mblk */ + *out_data_1_len = amt; + *out_data_2 = NULL; + *current_offset = offset + amt; + } else { + /* one block spans two mblks */ + *out_data_1_len = _PTRDIFF(mp->b_wptr, p); + if ((mp = mp->b_cont) == NULL) + return; + *out_data_2 = mp->b_rptr; + *current_offset = (amt - *out_data_1_len); + } + *iov_or_mp = mp; + break; + } + } /* end switch */ + } + #endif /* INLINE_CRYPTO_GET_PTRS */ + + /* + * Checks whether a crypto_data_t object is composed of a single contiguous + * buffer. This is used in all fastpath detection code to avoid the + * possibility of having to do partial block splicing. + */ + #define CRYPTO_DATA_IS_SINGLE_BLOCK(cd) \ + (cd != NULL && (cd->cd_format == CRYPTO_DATA_RAW || \ + (cd->cd_format == CRYPTO_DATA_UIO && cd->cd_uio->uio_iovcnt == 1) || \ + (cd->cd_format == CRYPTO_DATA_MBLK && cd->cd_mp->b_next == NULL))) + + /* + * Returns the first contiguous data buffer in a crypto_data_t object. + */ + #define CRYPTO_DATA_FIRST_BLOCK(cd) \ + (cd->cd_format == CRYPTO_DATA_RAW ? \ + (void *)(cd->cd_raw.iov_base + cd->cd_offset) : \ + (cd->cd_format == CRYPTO_DATA_UIO ? \ + (void *)(cd->cd_uio->uio_iov[0].iov_base + cd->cd_offset) : \ + (void *)(cd->cd_mp->b_rptr + cd->cd_offset))) + #ifdef __cplusplus } #endif #endif /* _COMMON_CRYPTO_MODES_H */