Print this page
4896 Performance improvements for KCF AES modes

@@ -20,10 +20,13 @@
  */
 /*
  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  * Use is subject to license terms.
  */
+/*
+ * Copyright 2015 by Saso Kiselkov. All rights reserved.
+ */
 
 #ifndef _COMMON_CRYPTO_MODES_H
 #define _COMMON_CRYPTO_MODES_H
 
 #ifdef  __cplusplus

@@ -185,38 +188,48 @@
  *
  * gcm_processed_data_len:
  *                      Length of processed plaintext (encrypt) or
  *                      length of processed ciphertext (decrypt).
  *
- * gcm_pt_buf:          Stores the decrypted plaintext returned by
- *                      decrypt_final when the computed authentication
- *                      tag matches the user supplied tag.
- *
- * gcm_pt_buf_len:      Length of the plaintext buffer.
- *
  * gcm_H:               Subkey.
  *
+ * gcm_H_table:         Pipelined Karatsuba multipliers.
+ *
  * gcm_J0:              Pre-counter block generated from the IV.
  *
+ * gcm_tmp:             Temp storage for ciphertext when padding is needed.
+ *
  * gcm_len_a_len_c:     64-bit representations of the bit lengths of
  *                      AAD and ciphertext.
  *
  * gcm_kmflag:          Current value of kmflag. Used only for allocating
  *                      the plaintext buffer during decryption.
+ *
+ * gcm_last_input:      Buffer of (up to) two last blocks. This is used when
+ *                      input is not block-aligned and to temporarily hold
+ *                      the end of the ciphertext stream during decryption,
+ *                      since that could potentially be the GHASH auth tag
+ *                      which we must check in the final() call instead of
+ *                      decrypting it.
+ *
+ * gcm_last_input_fill: Number of bytes actually stored in gcm_last_input.
  */
 typedef struct gcm_ctx {
         struct common_ctx gcm_common;
         size_t gcm_tag_len;
         size_t gcm_processed_data_len;
-        size_t gcm_pt_buf_len;
-        uint32_t gcm_tmp[4];
         uint64_t gcm_ghash[2];
         uint64_t gcm_H[2];
+#ifdef  __amd64
+        uint8_t gcm_H_table[256];
+#endif
         uint64_t gcm_J0[2];
+        uint64_t gcm_tmp[2];
         uint64_t gcm_len_a_len_c[2];
-        uint8_t *gcm_pt_buf;
         int gcm_kmflag;
+        uint8_t gcm_last_input[32];
+        size_t gcm_last_input_fill;
 } gcm_ctx_t;
 
 #define gcm_keysched            gcm_common.cc_keysched
 #define gcm_keysched_len        gcm_common.cc_keysched_len
 #define gcm_cb                  gcm_common.cc_iv

@@ -281,103 +294,113 @@
 #define dc_keysched_len         dcu.dcu_ecb.ecb_common.cc_keysched_len
 #define dc_iv                   dcu.dcu_ecb.ecb_common.cc_iv
 #define dc_lastp                dcu.dcu_ecb.ecb_common.cc_lastp
 
 extern int ecb_cipher_contiguous_blocks(ecb_ctx_t *, char *, size_t,
-    crypto_data_t *, size_t, int (*cipher)(const void *, const uint8_t *,
-    uint8_t *));
+    crypto_data_t *, size_t,
+    int (*cipher)(const void *, const uint8_t *, uint8_t *),
+    int (*cipher_ecb)(const void *, const uint8_t *, uint8_t *, uint64_t));
 
 extern int cbc_encrypt_contiguous_blocks(cbc_ctx_t *, char *, size_t,
     crypto_data_t *, size_t,
     int (*encrypt)(const void *, const uint8_t *, uint8_t *),
-    void (*copy_block)(uint8_t *, uint8_t *),
-    void (*xor_block)(uint8_t *, uint8_t *));
+    void (*copy_block)(const uint8_t *, uint8_t *),
+    void (*xor_block)(const uint8_t *, uint8_t *),
+    int (*encrypt_cbc)(const void *, const uint8_t *, uint8_t *,
+    const uint8_t *, uint64_t));
 
 extern int cbc_decrypt_contiguous_blocks(cbc_ctx_t *, char *, size_t,
     crypto_data_t *, size_t,
     int (*decrypt)(const void *, const uint8_t *, uint8_t *),
-    void (*copy_block)(uint8_t *, uint8_t *),
-    void (*xor_block)(uint8_t *, uint8_t *));
+    void (*copy_block)(const uint8_t *, uint8_t *),
+    void (*xor_block)(const uint8_t *, uint8_t *),
+    int (*decrypt_ecb)(const void *, const uint8_t *, uint8_t *, uint64_t),
+    void (*xor_block_range)(const uint8_t *, uint8_t *, uint64_t));
 
 extern int ctr_mode_contiguous_blocks(ctr_ctx_t *, char *, size_t,
     crypto_data_t *, size_t,
     int (*cipher)(const void *, const uint8_t *, uint8_t *),
-    void (*xor_block)(uint8_t *, uint8_t *));
+    void (*xor_block)(const uint8_t *, uint8_t *),
+    int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
+    uint64_t *));
 
 extern int ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *, char *, size_t,
     crypto_data_t *, size_t,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
-    void (*copy_block)(uint8_t *, uint8_t *),
-    void (*xor_block)(uint8_t *, uint8_t *));
+    void (*copy_block)(const uint8_t *, uint8_t *),
+    void (*xor_block)(const uint8_t *, uint8_t *));
 
 extern int ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *, char *, size_t,
     crypto_data_t *, size_t,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
-    void (*copy_block)(uint8_t *, uint8_t *),
-    void (*xor_block)(uint8_t *, uint8_t *));
+    void (*copy_block)(const uint8_t *, uint8_t *),
+    void (*xor_block)(const uint8_t *, uint8_t *));
 
 extern int gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *, char *, size_t,
     crypto_data_t *, size_t,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
-    void (*copy_block)(uint8_t *, uint8_t *),
-    void (*xor_block)(uint8_t *, uint8_t *));
+    void (*copy_block)(const uint8_t *, uint8_t *),
+    void (*xor_block)(const uint8_t *, uint8_t *),
+    int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
+    uint64_t *));
 
 extern int gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *, char *, size_t,
     crypto_data_t *, size_t,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
-    void (*copy_block)(uint8_t *, uint8_t *),
-    void (*xor_block)(uint8_t *, uint8_t *));
+    void (*copy_block)(const uint8_t *, uint8_t *),
+    void (*xor_block)(const uint8_t *, uint8_t *),
+    int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
+    uint64_t *));
 
 int ccm_encrypt_final(ccm_ctx_t *, crypto_data_t *, size_t,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
-    void (*xor_block)(uint8_t *, uint8_t *));
+    void (*xor_block)(const uint8_t *, uint8_t *));
 
 int gcm_encrypt_final(gcm_ctx_t *, crypto_data_t *, size_t,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
-    void (*copy_block)(uint8_t *, uint8_t *),
-    void (*xor_block)(uint8_t *, uint8_t *));
+    void (*copy_block)(const uint8_t *, uint8_t *),
+    void (*xor_block)(const uint8_t *, uint8_t *));
 
 extern int ccm_decrypt_final(ccm_ctx_t *, crypto_data_t *, size_t,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
-    void (*copy_block)(uint8_t *, uint8_t *),
-    void (*xor_block)(uint8_t *, uint8_t *));
+    void (*copy_block)(const uint8_t *, uint8_t *),
+    void (*xor_block)(const uint8_t *, uint8_t *));
 
 extern int gcm_decrypt_final(gcm_ctx_t *, crypto_data_t *, size_t,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
-    void (*xor_block)(uint8_t *, uint8_t *));
+    void (*copy_block)(const uint8_t *, uint8_t *),
+    void (*xor_block)(const uint8_t *, uint8_t *),
+    int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
+    uint64_t *));
 
 extern int ctr_mode_final(ctr_ctx_t *, crypto_data_t *,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *));
 
 extern int cbc_init_ctx(cbc_ctx_t *, char *, size_t, size_t,
-    void (*copy_block)(uint8_t *, uint64_t *));
+    void (*copy_block)(const uint8_t *, uint64_t *));
 
 extern int ctr_init_ctx(ctr_ctx_t *, ulong_t, uint8_t *,
-    void (*copy_block)(uint8_t *, uint8_t *));
+    void (*copy_block)(const uint8_t *, uint8_t *));
 
 extern int ccm_init_ctx(ccm_ctx_t *, char *, int, boolean_t, size_t,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
-    void (*xor_block)(uint8_t *, uint8_t *));
+    void (*xor_block)(const uint8_t *, uint8_t *));
 
 extern int gcm_init_ctx(gcm_ctx_t *, char *, size_t,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
-    void (*copy_block)(uint8_t *, uint8_t *),
-    void (*xor_block)(uint8_t *, uint8_t *));
+    void (*copy_block)(const uint8_t *, uint8_t *),
+    void (*xor_block)(const uint8_t *, uint8_t *));
 
 extern int gmac_init_ctx(gcm_ctx_t *, char *, size_t,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
-    void (*copy_block)(uint8_t *, uint8_t *),
-    void (*xor_block)(uint8_t *, uint8_t *));
+    void (*copy_block)(const uint8_t *, uint8_t *),
+    void (*xor_block)(const uint8_t *, uint8_t *));
 
 extern void calculate_ccm_mac(ccm_ctx_t *, uint8_t *,
     int (*encrypt_block)(const void *, const uint8_t *, uint8_t *));
 
-extern void gcm_mul(uint64_t *, uint64_t *, uint64_t *);
-
 extern void crypto_init_ptrs(crypto_data_t *, void **, offset_t *);
-extern void crypto_get_ptrs(crypto_data_t *, void **, offset_t *,
-    uint8_t **, size_t *, uint8_t **, size_t);
 
 extern void *ecb_alloc_ctx(int);
 extern void *cbc_alloc_ctx(int);
 extern void *ctr_alloc_ctx(int);
 extern void *ccm_alloc_ctx(int);

@@ -384,10 +407,119 @@
 extern void *gcm_alloc_ctx(int);
 extern void *gmac_alloc_ctx(int);
 extern void crypto_free_mode_ctx(void *);
 extern void gcm_set_kmflag(gcm_ctx_t *, int);
 
+#ifdef  INLINE_CRYPTO_GET_PTRS
+/*
+ * Get pointers for where in the output to copy a block of encrypted or
+ * decrypted data.  The iov_or_mp argument stores a pointer to the current
+ * iovec or mp, and offset stores an offset into the current iovec or mp.
+ */
+static inline void
+crypto_get_ptrs(crypto_data_t *out, void **iov_or_mp, offset_t *current_offset,
+    uint8_t **out_data_1, size_t *out_data_1_len, uint8_t **out_data_2,
+    size_t amt)
+{
+        offset_t offset;
+
+        switch (out->cd_format) {
+        case CRYPTO_DATA_RAW: {
+                iovec_t *iov;
+
+                offset = *current_offset;
+                iov = &out->cd_raw;
+                if ((offset + amt) <= iov->iov_len) {
+                        /* one block fits */
+                        *out_data_1 = (uint8_t *)iov->iov_base + offset;
+                        *out_data_1_len = amt;
+                        *out_data_2 = NULL;
+                        *current_offset = offset + amt;
+                }
+                break;
+        }
+
+        case CRYPTO_DATA_UIO: {
+                uio_t *uio = out->cd_uio;
+                iovec_t *iov;
+                offset_t offset;
+                uintptr_t vec_idx;
+                uint8_t *p;
+
+                offset = *current_offset;
+                vec_idx = (uintptr_t)(*iov_or_mp);
+                iov = &uio->uio_iov[vec_idx];
+                p = (uint8_t *)iov->iov_base + offset;
+                *out_data_1 = p;
+
+                if (offset + amt <= iov->iov_len) {
+                        /* can fit one block into this iov */
+                        *out_data_1_len = amt;
+                        *out_data_2 = NULL;
+                        *current_offset = offset + amt;
+                } else {
+                        /* one block spans two iovecs */
+                        *out_data_1_len = iov->iov_len - offset;
+                        if (vec_idx == uio->uio_iovcnt)
+                                return;
+                        vec_idx++;
+                        iov = &uio->uio_iov[vec_idx];
+                        *out_data_2 = (uint8_t *)iov->iov_base;
+                        *current_offset = amt - *out_data_1_len;
+                }
+                *iov_or_mp = (void *)vec_idx;
+                break;
+        }
+
+        case CRYPTO_DATA_MBLK: {
+                mblk_t *mp;
+                uint8_t *p;
+
+                offset = *current_offset;
+                mp = (mblk_t *)*iov_or_mp;
+                p = mp->b_rptr + offset;
+                *out_data_1 = p;
+                if ((p + amt) <= mp->b_wptr) {
+                        /* can fit one block into this mblk */
+                        *out_data_1_len = amt;
+                        *out_data_2 = NULL;
+                        *current_offset = offset + amt;
+                } else {
+                        /* one block spans two mblks */
+                        *out_data_1_len = _PTRDIFF(mp->b_wptr, p);
+                        if ((mp = mp->b_cont) == NULL)
+                                return;
+                        *out_data_2 = mp->b_rptr;
+                        *current_offset = (amt - *out_data_1_len);
+                }
+                *iov_or_mp = mp;
+                break;
+        }
+        } /* end switch */
+}
+#endif  /* INLINE_CRYPTO_GET_PTRS */
+
+/*
+ * Checks whether a crypto_data_t object is composed of a single contiguous
+ * buffer. This is used in all fastpath detection code to avoid the
+ * possibility of having to do partial block splicing.
+ */
+#define CRYPTO_DATA_IS_SINGLE_BLOCK(cd) \
+        (cd != NULL && (cd->cd_format == CRYPTO_DATA_RAW || \
+        (cd->cd_format == CRYPTO_DATA_UIO && cd->cd_uio->uio_iovcnt == 1) || \
+        (cd->cd_format == CRYPTO_DATA_MBLK && cd->cd_mp->b_next == NULL)))
+
+/*
+ * Returns the first contiguous data buffer in a crypto_data_t object.
+ */
+#define CRYPTO_DATA_FIRST_BLOCK(cd) \
+        (cd->cd_format == CRYPTO_DATA_RAW ? \
+        (void *)(cd->cd_raw.iov_base + cd->cd_offset) : \
+        (cd->cd_format == CRYPTO_DATA_UIO ? \
+        (void *)(cd->cd_uio->uio_iov[0].iov_base + cd->cd_offset) : \
+        (void *)(cd->cd_mp->b_rptr + cd->cd_offset)))
+
 #ifdef  __cplusplus
 }
 #endif
 
 #endif  /* _COMMON_CRYPTO_MODES_H */