Print this page
4896 Performance improvements for KCF AES modes
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/common/crypto/aes/aes_impl.h
+++ new/usr/src/common/crypto/aes/aes_impl.h
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 +/*
26 + * Copyright 2015 by Saso Kiselkov. All rights reserved.
27 + */
25 28
26 29 #ifndef _AES_IMPL_H
27 30 #define _AES_IMPL_H
28 31
29 32 /*
30 33 * Common definitions used by AES.
31 34 */
32 35
33 36 #ifdef __cplusplus
34 37 extern "C" {
35 38 #endif
36 39
37 40 #include <sys/types.h>
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
38 41 #include <sys/crypto/common.h>
39 42
40 43 /* Similar to sysmacros.h IS_P2ALIGNED, but checks two pointers: */
41 44 #define IS_P2ALIGNED2(v, w, a) \
42 45 ((((uintptr_t)(v) | (uintptr_t)(w)) & ((uintptr_t)(a) - 1)) == 0)
43 46
44 47 #define AES_BLOCK_LEN 16 /* bytes */
45 48 /* Round constant length, in number of 32-bit elements: */
46 49 #define RC_LENGTH (5 * ((AES_BLOCK_LEN) / 4 - 2))
47 50
48 -#define AES_COPY_BLOCK(src, dst) \
51 +#define AES_COPY_BLOCK_UNALIGNED(src, dst) \
49 52 (dst)[0] = (src)[0]; \
50 53 (dst)[1] = (src)[1]; \
51 54 (dst)[2] = (src)[2]; \
52 55 (dst)[3] = (src)[3]; \
53 56 (dst)[4] = (src)[4]; \
54 57 (dst)[5] = (src)[5]; \
55 58 (dst)[6] = (src)[6]; \
56 59 (dst)[7] = (src)[7]; \
57 60 (dst)[8] = (src)[8]; \
58 61 (dst)[9] = (src)[9]; \
59 62 (dst)[10] = (src)[10]; \
60 63 (dst)[11] = (src)[11]; \
61 64 (dst)[12] = (src)[12]; \
62 65 (dst)[13] = (src)[13]; \
63 66 (dst)[14] = (src)[14]; \
64 67 (dst)[15] = (src)[15]
65 68
66 -#define AES_XOR_BLOCK(src, dst) \
69 +#define AES_XOR_BLOCK_UNALIGNED(src, dst) \
67 70 (dst)[0] ^= (src)[0]; \
68 71 (dst)[1] ^= (src)[1]; \
69 72 (dst)[2] ^= (src)[2]; \
70 73 (dst)[3] ^= (src)[3]; \
71 74 (dst)[4] ^= (src)[4]; \
72 75 (dst)[5] ^= (src)[5]; \
73 76 (dst)[6] ^= (src)[6]; \
74 77 (dst)[7] ^= (src)[7]; \
75 78 (dst)[8] ^= (src)[8]; \
76 79 (dst)[9] ^= (src)[9]; \
77 80 (dst)[10] ^= (src)[10]; \
78 81 (dst)[11] ^= (src)[11]; \
79 82 (dst)[12] ^= (src)[12]; \
80 83 (dst)[13] ^= (src)[13]; \
81 84 (dst)[14] ^= (src)[14]; \
82 85 (dst)[15] ^= (src)[15]
83 86
87 +#define AES_COPY_BLOCK_ALIGNED(src, dst) \
88 + ((uint64_t *)(void *)(dst))[0] = ((uint64_t *)(void *)(src))[0]; \
89 + ((uint64_t *)(void *)(dst))[1] = ((uint64_t *)(void *)(src))[1]
90 +
91 +#define AES_XOR_BLOCK_ALIGNED(src, dst) \
92 + ((uint64_t *)(void *)(dst))[0] ^= ((uint64_t *)(void *)(src))[0]; \
93 + ((uint64_t *)(void *)(dst))[1] ^= ((uint64_t *)(void *)(src))[1]
94 +
84 95 /* AES key size definitions */
85 96 #define AES_MINBITS 128
86 97 #define AES_MINBYTES ((AES_MINBITS) >> 3)
87 98 #define AES_MAXBITS 256
88 99 #define AES_MAXBYTES ((AES_MAXBITS) >> 3)
89 100
90 101 #define AES_MIN_KEY_BYTES ((AES_MINBITS) >> 3)
91 102 #define AES_MAX_KEY_BYTES ((AES_MAXBITS) >> 3)
92 103 #define AES_192_KEY_BYTES 24
93 104 #define AES_IV_LEN 16
94 105
95 106 /* AES key schedule may be implemented with 32- or 64-bit elements: */
96 107 #define AES_32BIT_KS 32
97 108 #define AES_64BIT_KS 64
98 109
99 110 #define MAX_AES_NR 14 /* Maximum number of rounds */
100 111 #define MAX_AES_NB 4 /* Number of columns comprising a state */
101 112
113 +/*
114 + * Architecture-specific acceleration support autodetection.
115 + * Some architectures provide hardware-assisted acceleration using floating
116 + * point registers, which need special handling inside of the kernel, so the
117 + * macros below define the auxiliary functions needed to utilize them.
118 + */
119 +#if defined(__amd64) && defined(_KERNEL)
120 +/*
121 + * Using floating point registers requires temporarily disabling kernel
122 + * thread preemption, so we need to operate on small-enough chunks to
123 + * prevent scheduling latency bubbles.
124 + * A typical 64-bit CPU can sustain around 300-400MB/s/core even in the
125 + * slowest encryption modes (CBC), which with 32k per run works out to ~100us
126 + * per run. CPUs with AES-NI in fast modes (ECB, CTR, CBC decryption) can
127 + * easily sustain 3GB/s/core, so the latency potential essentially vanishes.
128 + */
129 +#define AES_OPSZ 32768
130 +
131 +#if defined(lint) || defined(__lint)
132 +#define AES_ACCEL_SAVESTATE(name) uint8_t name[16 * 16 + 8]
133 +#else /* lint || __lint */
134 +#define AES_ACCEL_SAVESTATE(name) \
135 + /* stack space for xmm0--xmm15 and cr0 (16 x 128 bits + 64 bits) */ \
136 + uint8_t name[16 * 16 + 8] __attribute__((aligned(16)))
137 +#endif /* lint || __lint */
138 +
139 +#else /* !defined(__amd64) || !defined(_KERNEL) */
140 +/*
141 + * All other accel support
142 + */
143 +#define AES_OPSZ ((size_t)-1)
144 +/* On other architectures or outside of the kernel these get stubbed out */
145 +#define AES_ACCEL_SAVESTATE(name)
146 +#define aes_accel_enter(savestate)
147 +#define aes_accel_exit(savestate)
148 +#endif /* !defined(__amd64) || !defined(_KERNEL) */
149 +
102 150 typedef union {
103 151 #ifdef sun4u
104 152 uint64_t ks64[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
105 153 #endif
106 154 uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
107 155 } aes_ks_t;
108 156
109 -/* aes_key.flags value: */
110 -#define INTEL_AES_NI_CAPABLE 0x1 /* AES-NI instructions present */
111 -
112 157 typedef struct aes_key aes_key_t;
113 158 struct aes_key {
114 159 aes_ks_t encr_ks; /* encryption key schedule */
115 160 aes_ks_t decr_ks; /* decryption key schedule */
116 161 #ifdef __amd64
117 162 long double align128; /* Align fields above for Intel AES-NI */
118 - int flags; /* implementation-dependent flags */
119 163 #endif /* __amd64 */
120 164 int nr; /* number of rounds (10, 12, or 14) */
121 165 int type; /* key schedule size (32 or 64 bits) */
122 166 };
123 167
124 168 /*
125 169 * Core AES functions.
126 170 * ks and keysched are pointers to aes_key_t.
127 171 * They are declared void* as they are intended to be opaque types.
128 172 * Use function aes_alloc_keysched() to allocate memory for ks and keysched.
129 173 */
130 174 extern void *aes_alloc_keysched(size_t *size, int kmflag);
131 175 extern void aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits,
132 176 void *keysched);
133 177 extern int aes_encrypt_block(const void *ks, const uint8_t *pt, uint8_t *ct);
134 178 extern int aes_decrypt_block(const void *ks, const uint8_t *ct, uint8_t *pt);
179 +extern int aes_encrypt_ecb(const void *ks, const uint8_t *pt, uint8_t *ct,
180 + uint64_t length);
181 +extern int aes_decrypt_ecb(const void *ks, const uint8_t *pt, uint8_t *ct,
182 + uint64_t length);
183 +extern int aes_encrypt_cbc(const void *ks, const uint8_t *pt, uint8_t *ct,
184 + const uint8_t *iv, uint64_t length);
185 +extern int aes_ctr_mode(const void *ks, const uint8_t *pt, uint8_t *ct,
186 + uint64_t length, uint64_t counter[2]);
135 187
136 188 /*
137 189 * AES mode functions.
138 190 * The first 2 functions operate on 16-byte AES blocks.
139 191 */
140 -extern void aes_copy_block(uint8_t *in, uint8_t *out);
141 -extern void aes_xor_block(uint8_t *data, uint8_t *dst);
192 +#ifdef __amd64
193 +#define AES_COPY_BLOCK aes_copy_intel
194 +#define AES_XOR_BLOCK aes_xor_intel
195 +extern void aes_copy_intel(const uint8_t *src, uint8_t *dst);
196 +extern void aes_xor_intel(const uint8_t *src, uint8_t *dst);
197 +extern void aes_xor_intel8(const uint8_t *src, uint8_t *dst);
198 +#else /* !__amd64 */
199 +#define AES_COPY_BLOCK aes_copy_block
200 +#define AES_XOR_BLOCK aes_xor_block
201 +#endif /* !__amd64 */
142 202
203 +extern void aes_copy_block(const uint8_t *src, uint8_t *dst);
204 +extern void aes_xor_block(const uint8_t *src, uint8_t *dst);
205 +
143 206 /* Note: ctx is a pointer to aes_ctx_t defined in modes.h */
144 207 extern int aes_encrypt_contiguous_blocks(void *ctx, char *data, size_t length,
145 208 crypto_data_t *out);
146 209 extern int aes_decrypt_contiguous_blocks(void *ctx, char *data, size_t length,
147 210 crypto_data_t *out);
148 211
212 +#if defined(__amd64) && defined(_KERNEL)
149 213 /*
214 + * When AES floating-point acceleration is available, these will be called
215 + * by the worker functions to clear and restore floating point state and
216 + * control kernel thread preemption.
217 + */
218 +extern void aes_accel_enter(void *savestate);
219 +extern void aes_accel_exit(void *savestate);
220 +#endif /* __amd64 && _KERNEL */
221 +
222 +/*
150 223 * The following definitions and declarations are only used by AES FIPS POST
151 224 */
152 225 #ifdef _AES_IMPL
153 226
154 227 #ifdef _KERNEL
155 228 typedef enum aes_mech_type {
156 229 AES_ECB_MECH_INFO_TYPE, /* SUN_CKM_AES_ECB */
157 230 AES_CBC_MECH_INFO_TYPE, /* SUN_CKM_AES_CBC */
158 231 AES_CBC_PAD_MECH_INFO_TYPE, /* SUN_CKM_AES_CBC_PAD */
159 232 AES_CTR_MECH_INFO_TYPE, /* SUN_CKM_AES_CTR */
160 233 AES_CCM_MECH_INFO_TYPE, /* SUN_CKM_AES_CCM */
161 234 AES_GCM_MECH_INFO_TYPE, /* SUN_CKM_AES_GCM */
162 235 AES_GMAC_MECH_INFO_TYPE /* SUN_CKM_AES_GMAC */
163 236 } aes_mech_type_t;
164 237
165 238 #endif /* _KERNEL */
166 239 #endif /* _AES_IMPL */
167 240
168 241 #ifdef __cplusplus
169 242 }
170 243 #endif
171 244
172 245 #endif /* _AES_IMPL_H */
↓ open down ↓ |
13 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX