Print this page
4896 Performance improvements for KCF AES modes


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */



  25 
  26 #ifndef _AES_IMPL_H
  27 #define _AES_IMPL_H
  28 
  29 /*
  30  * Common definitions used by AES.
  31  */
  32 
  33 #ifdef  __cplusplus
  34 extern "C" {
  35 #endif
  36 
  37 #include <sys/types.h>
  38 #include <sys/crypto/common.h>
  39 
  40 /* Similar to sysmacros.h IS_P2ALIGNED, but checks two pointers: */
  41 #define IS_P2ALIGNED2(v, w, a) \
  42         ((((uintptr_t)(v) | (uintptr_t)(w)) & ((uintptr_t)(a) - 1)) == 0)
  43 
  44 #define AES_BLOCK_LEN   16      /* bytes */
  45 /* Round constant length, in number of 32-bit elements: */
  46 #define RC_LENGTH       (5 * ((AES_BLOCK_LEN) / 4 - 2))
  47 
  48 #define AES_COPY_BLOCK(src, dst) \
  49         (dst)[0] = (src)[0]; \
  50         (dst)[1] = (src)[1]; \
  51         (dst)[2] = (src)[2]; \
  52         (dst)[3] = (src)[3]; \
  53         (dst)[4] = (src)[4]; \
  54         (dst)[5] = (src)[5]; \
  55         (dst)[6] = (src)[6]; \
  56         (dst)[7] = (src)[7]; \
  57         (dst)[8] = (src)[8]; \
  58         (dst)[9] = (src)[9]; \
  59         (dst)[10] = (src)[10]; \
  60         (dst)[11] = (src)[11]; \
  61         (dst)[12] = (src)[12]; \
  62         (dst)[13] = (src)[13]; \
  63         (dst)[14] = (src)[14]; \
  64         (dst)[15] = (src)[15]
  65 
  66 #define AES_XOR_BLOCK(src, dst) \
  67         (dst)[0] ^= (src)[0]; \
  68         (dst)[1] ^= (src)[1]; \
  69         (dst)[2] ^= (src)[2]; \
  70         (dst)[3] ^= (src)[3]; \
  71         (dst)[4] ^= (src)[4]; \
  72         (dst)[5] ^= (src)[5]; \
  73         (dst)[6] ^= (src)[6]; \
  74         (dst)[7] ^= (src)[7]; \
  75         (dst)[8] ^= (src)[8]; \
  76         (dst)[9] ^= (src)[9]; \
  77         (dst)[10] ^= (src)[10]; \
  78         (dst)[11] ^= (src)[11]; \
  79         (dst)[12] ^= (src)[12]; \
  80         (dst)[13] ^= (src)[13]; \
  81         (dst)[14] ^= (src)[14]; \
  82         (dst)[15] ^= (src)[15]
  83 








  84 /* AES key size definitions */
  85 #define AES_MINBITS             128
  86 #define AES_MINBYTES            ((AES_MINBITS) >> 3)
  87 #define AES_MAXBITS             256
  88 #define AES_MAXBYTES            ((AES_MAXBITS) >> 3)
  89 
  90 #define AES_MIN_KEY_BYTES       ((AES_MINBITS) >> 3)
  91 #define AES_MAX_KEY_BYTES       ((AES_MAXBITS) >> 3)
  92 #define AES_192_KEY_BYTES       24
  93 #define AES_IV_LEN              16
  94 
  95 /* AES key schedule may be implemented with 32- or 64-bit elements: */
  96 #define AES_32BIT_KS            32
  97 #define AES_64BIT_KS            64
  98 
  99 #define MAX_AES_NR              14 /* Maximum number of rounds */
 100 #define MAX_AES_NB              4  /* Number of columns comprising a state */
 101 





































 102 typedef union {
 103 #ifdef  sun4u
 104         uint64_t        ks64[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
 105 #endif
 106         uint32_t        ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
 107 } aes_ks_t;
 108 
 109 /* aes_key.flags value: */
 110 #define INTEL_AES_NI_CAPABLE    0x1     /* AES-NI instructions present */
 111 
 112 typedef struct aes_key aes_key_t;
 113 struct aes_key {
 114         aes_ks_t        encr_ks;  /* encryption key schedule */
 115         aes_ks_t        decr_ks;  /* decryption key schedule */
 116 #ifdef __amd64
 117         long double     align128; /* Align fields above for Intel AES-NI */
 118         int             flags;    /* implementation-dependent flags */
 119 #endif  /* __amd64 */
 120         int             nr;       /* number of rounds (10, 12, or 14) */
 121         int             type;     /* key schedule size (32 or 64 bits) */
 122 };
 123 
 124 /*
 125  * Core AES functions.
 126  * ks and keysched are pointers to aes_key_t.
 127  * They are declared void* as they are intended to be opaque types.
 128  * Use function aes_alloc_keysched() to allocate memory for ks and keysched.
 129  */
 130 extern void *aes_alloc_keysched(size_t *size, int kmflag);
 131 extern void aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits,
 132         void *keysched);
 133 extern int aes_encrypt_block(const void *ks, const uint8_t *pt, uint8_t *ct);
 134 extern int aes_decrypt_block(const void *ks, const uint8_t *ct, uint8_t *pt);








 135 
 136 /*
 137  * AES mode functions.
 138  * The first 2 functions operate on 16-byte AES blocks.
 139  */
 140 extern void aes_copy_block(uint8_t *in, uint8_t *out);
 141 extern void aes_xor_block(uint8_t *data, uint8_t *dst);








 142 



 143 /* Note: ctx is a pointer to aes_ctx_t defined in modes.h */
 144 extern int aes_encrypt_contiguous_blocks(void *ctx, char *data, size_t length,
 145     crypto_data_t *out);
 146 extern int aes_decrypt_contiguous_blocks(void *ctx, char *data, size_t length,
 147     crypto_data_t *out);
 148 

 149 /*









 150  * The following definitions and declarations are only used by AES FIPS POST
 151  */
 152 #ifdef _AES_IMPL
 153 
 154 #ifdef _KERNEL
 155 typedef enum aes_mech_type {
 156         AES_ECB_MECH_INFO_TYPE,         /* SUN_CKM_AES_ECB */
 157         AES_CBC_MECH_INFO_TYPE,         /* SUN_CKM_AES_CBC */
 158         AES_CBC_PAD_MECH_INFO_TYPE,     /* SUN_CKM_AES_CBC_PAD */
 159         AES_CTR_MECH_INFO_TYPE,         /* SUN_CKM_AES_CTR */
 160         AES_CCM_MECH_INFO_TYPE,         /* SUN_CKM_AES_CCM */
 161         AES_GCM_MECH_INFO_TYPE,         /* SUN_CKM_AES_GCM */
 162         AES_GMAC_MECH_INFO_TYPE         /* SUN_CKM_AES_GMAC */
 163 } aes_mech_type_t;
 164 
 165 #endif  /* _KERNEL */
 166 #endif /* _AES_IMPL */
 167 
 168 #ifdef  __cplusplus
 169 }


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25 /*
  26  * Copyright 2015 by Saso Kiselkov. All rights reserved.
  27  */
  28 
  29 #ifndef _AES_IMPL_H
  30 #define _AES_IMPL_H
  31 
  32 /*
  33  * Common definitions used by AES.
  34  */
  35 
  36 #ifdef  __cplusplus
  37 extern "C" {
  38 #endif
  39 
  40 #include <sys/types.h>
  41 #include <sys/crypto/common.h>
  42 
  43 /* Similar to sysmacros.h IS_P2ALIGNED, but checks two pointers: */
  44 #define IS_P2ALIGNED2(v, w, a) \
  45         ((((uintptr_t)(v) | (uintptr_t)(w)) & ((uintptr_t)(a) - 1)) == 0)
  46 
  47 #define AES_BLOCK_LEN   16      /* bytes */
  48 /* Round constant length, in number of 32-bit elements: */
  49 #define RC_LENGTH       (5 * ((AES_BLOCK_LEN) / 4 - 2))
  50 
  51 #define AES_COPY_BLOCK_UNALIGNED(src, dst) \
  52         (dst)[0] = (src)[0]; \
  53         (dst)[1] = (src)[1]; \
  54         (dst)[2] = (src)[2]; \
  55         (dst)[3] = (src)[3]; \
  56         (dst)[4] = (src)[4]; \
  57         (dst)[5] = (src)[5]; \
  58         (dst)[6] = (src)[6]; \
  59         (dst)[7] = (src)[7]; \
  60         (dst)[8] = (src)[8]; \
  61         (dst)[9] = (src)[9]; \
  62         (dst)[10] = (src)[10]; \
  63         (dst)[11] = (src)[11]; \
  64         (dst)[12] = (src)[12]; \
  65         (dst)[13] = (src)[13]; \
  66         (dst)[14] = (src)[14]; \
  67         (dst)[15] = (src)[15]
  68 
  69 #define AES_XOR_BLOCK_UNALIGNED(src, dst) \
  70         (dst)[0] ^= (src)[0]; \
  71         (dst)[1] ^= (src)[1]; \
  72         (dst)[2] ^= (src)[2]; \
  73         (dst)[3] ^= (src)[3]; \
  74         (dst)[4] ^= (src)[4]; \
  75         (dst)[5] ^= (src)[5]; \
  76         (dst)[6] ^= (src)[6]; \
  77         (dst)[7] ^= (src)[7]; \
  78         (dst)[8] ^= (src)[8]; \
  79         (dst)[9] ^= (src)[9]; \
  80         (dst)[10] ^= (src)[10]; \
  81         (dst)[11] ^= (src)[11]; \
  82         (dst)[12] ^= (src)[12]; \
  83         (dst)[13] ^= (src)[13]; \
  84         (dst)[14] ^= (src)[14]; \
  85         (dst)[15] ^= (src)[15]
  86 
  87 #define AES_COPY_BLOCK_ALIGNED(src, dst) \
  88         ((uint64_t *)(void *)(dst))[0] = ((uint64_t *)(void *)(src))[0]; \
  89         ((uint64_t *)(void *)(dst))[1] = ((uint64_t *)(void *)(src))[1]
  90 
  91 #define AES_XOR_BLOCK_ALIGNED(src, dst) \
  92         ((uint64_t *)(void *)(dst))[0] ^= ((uint64_t *)(void *)(src))[0]; \
  93         ((uint64_t *)(void *)(dst))[1] ^= ((uint64_t *)(void *)(src))[1]
  94 
  95 /* AES key size definitions */
  96 #define AES_MINBITS             128
  97 #define AES_MINBYTES            ((AES_MINBITS) >> 3)
  98 #define AES_MAXBITS             256
  99 #define AES_MAXBYTES            ((AES_MAXBITS) >> 3)
 100 
 101 #define AES_MIN_KEY_BYTES       ((AES_MINBITS) >> 3)
 102 #define AES_MAX_KEY_BYTES       ((AES_MAXBITS) >> 3)
 103 #define AES_192_KEY_BYTES       24
 104 #define AES_IV_LEN              16
 105 
 106 /* AES key schedule may be implemented with 32- or 64-bit elements: */
 107 #define AES_32BIT_KS            32
 108 #define AES_64BIT_KS            64
 109 
 110 #define MAX_AES_NR              14 /* Maximum number of rounds */
 111 #define MAX_AES_NB              4  /* Number of columns comprising a state */
 112 
 113 /*
 114  * Architecture-specific acceleration support autodetection.
 115  * Some architectures provide hardware-assisted acceleration using floating
 116  * point registers, which need special handling inside of the kernel, so the
 117  * macros below define the auxiliary functions needed to utilize them.
 118  */
 119 #if     defined(__amd64) && defined(_KERNEL)
 120 /*
 121  * Using floating point registers requires temporarily disabling kernel
 122  * thread preemption, so we need to operate on small-enough chunks to
 123  * prevent scheduling latency bubbles.
 124  * A typical 64-bit CPU can sustain around 300-400MB/s/core even in the
 125  * slowest encryption modes (CBC), which with 32k per run works out to ~100us
 126  * per run. CPUs with AES-NI in fast modes (ECB, CTR, CBC decryption) can
 127  * easily sustain 3GB/s/core, so the latency potential essentially vanishes.
 128  */
 129 #define AES_OPSZ        32768
 130 
 131 #if     defined(lint) || defined(__lint)
 132 #define AES_ACCEL_SAVESTATE(name)       uint8_t name[16 * 16 + 8]
 133 #else   /* lint || __lint */
 134 #define AES_ACCEL_SAVESTATE(name) \
 135         /* stack space for xmm0--xmm15 and cr0 (16 x 128 bits + 64 bits) */ \
 136         uint8_t name[16 * 16 + 8] __attribute__((aligned(16)))
 137 #endif  /* lint || __lint */
 138 
 139 #else   /* !defined(__amd64) || !defined(_KERNEL) */
 140 /*
 141  * All other accel support
 142  */
 143 #define AES_OPSZ        ((size_t)-1)
 144 /* On other architectures or outside of the kernel these get stubbed out */
 145 #define AES_ACCEL_SAVESTATE(name)
 146 #define aes_accel_enter(savestate)
 147 #define aes_accel_exit(savestate)
 148 #endif  /* !defined(__amd64) || !defined(_KERNEL) */
 149 
 150 typedef union {
 151 #ifdef  sun4u
 152         uint64_t        ks64[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
 153 #endif
 154         uint32_t        ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
 155 } aes_ks_t;
 156 



 157 typedef struct aes_key aes_key_t;
 158 struct aes_key {
 159         aes_ks_t        encr_ks;  /* encryption key schedule */
 160         aes_ks_t        decr_ks;  /* decryption key schedule */
 161 #ifdef __amd64
 162         long double     align128; /* Align fields above for Intel AES-NI */

 163 #endif  /* __amd64 */
 164         int             nr;       /* number of rounds (10, 12, or 14) */
 165         int             type;     /* key schedule size (32 or 64 bits) */
 166 };
 167 
 168 /*
 169  * Core AES functions.
 170  * ks and keysched are pointers to aes_key_t.
 171  * They are declared void* as they are intended to be opaque types.
 172  * Use function aes_alloc_keysched() to allocate memory for ks and keysched.
 173  */
 174 extern void *aes_alloc_keysched(size_t *size, int kmflag);
 175 extern void aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits,
 176         void *keysched);
 177 extern int aes_encrypt_block(const void *ks, const uint8_t *pt, uint8_t *ct);
 178 extern int aes_decrypt_block(const void *ks, const uint8_t *ct, uint8_t *pt);
 179 extern int aes_encrypt_ecb(const void *ks, const uint8_t *pt, uint8_t *ct,
 180     uint64_t length);
 181 extern int aes_decrypt_ecb(const void *ks, const uint8_t *pt, uint8_t *ct,
 182     uint64_t length);
 183 extern int aes_encrypt_cbc(const void *ks, const uint8_t *pt, uint8_t *ct,
 184     const uint8_t *iv, uint64_t length);
 185 extern int aes_ctr_mode(const void *ks, const uint8_t *pt, uint8_t *ct,
 186     uint64_t length, uint64_t counter[2]);
 187 
 188 /*
 189  * AES mode functions.
 190  * The first 2 functions operate on 16-byte AES blocks.
 191  */
 192 #ifdef  __amd64
 193 #define AES_COPY_BLOCK  aes_copy_intel
 194 #define AES_XOR_BLOCK   aes_xor_intel
 195 extern void aes_copy_intel(const uint8_t *src, uint8_t *dst);
 196 extern void aes_xor_intel(const uint8_t *src, uint8_t *dst);
 197 extern void aes_xor_intel8(const uint8_t *src, uint8_t *dst);
 198 #else   /* !__amd64 */
 199 #define AES_COPY_BLOCK  aes_copy_block
 200 #define AES_XOR_BLOCK   aes_xor_block
 201 #endif  /* !__amd64 */
 202 
 203 extern void aes_copy_block(const uint8_t *src, uint8_t *dst);
 204 extern void aes_xor_block(const uint8_t *src, uint8_t *dst);
 205 
 206 /* Note: ctx is a pointer to aes_ctx_t defined in modes.h */
 207 extern int aes_encrypt_contiguous_blocks(void *ctx, char *data, size_t length,
 208     crypto_data_t *out);
 209 extern int aes_decrypt_contiguous_blocks(void *ctx, char *data, size_t length,
 210     crypto_data_t *out);
 211 
 212 #if     defined(__amd64) && defined(_KERNEL)
 213 /*
 214  * When AES floating-point acceleration is available, these will be called
 215  * by the worker functions to clear and restore floating point state and
 216  * control kernel thread preemption.
 217  */
 218 extern void aes_accel_enter(void *savestate);
 219 extern void aes_accel_exit(void *savestate);
 220 #endif  /* __amd64 && _KERNEL */
 221 
 222 /*
 223  * The following definitions and declarations are only used by AES FIPS POST
 224  */
 225 #ifdef _AES_IMPL
 226 
 227 #ifdef _KERNEL
 228 typedef enum aes_mech_type {
 229         AES_ECB_MECH_INFO_TYPE,         /* SUN_CKM_AES_ECB */
 230         AES_CBC_MECH_INFO_TYPE,         /* SUN_CKM_AES_CBC */
 231         AES_CBC_PAD_MECH_INFO_TYPE,     /* SUN_CKM_AES_CBC_PAD */
 232         AES_CTR_MECH_INFO_TYPE,         /* SUN_CKM_AES_CTR */
 233         AES_CCM_MECH_INFO_TYPE,         /* SUN_CKM_AES_CCM */
 234         AES_GCM_MECH_INFO_TYPE,         /* SUN_CKM_AES_GCM */
 235         AES_GMAC_MECH_INFO_TYPE         /* SUN_CKM_AES_GMAC */
 236 } aes_mech_type_t;
 237 
 238 #endif  /* _KERNEL */
 239 #endif /* _AES_IMPL */
 240 
 241 #ifdef  __cplusplus
 242 }