Print this page
4896 Performance improvements for KCF AES modes
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/common/crypto/modes/cbc.c
+++ new/usr/src/common/crypto/modes/cbc.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 +/*
26 + * Copyright 2015 by Saso Kiselkov. All rights reserved.
27 + */
25 28
26 29 #ifndef _KERNEL
27 30 #include <strings.h>
28 31 #include <limits.h>
29 32 #include <assert.h>
30 33 #include <security/cryptoki.h>
31 34 #endif
32 35
33 36 #include <sys/types.h>
37 +#define INLINE_CRYPTO_GET_PTRS
34 38 #include <modes/modes.h>
35 39 #include <sys/crypto/common.h>
36 40 #include <sys/crypto/impl.h>
37 41
42 +boolean_t cbc_fastpath_enabled = B_TRUE;
43 +
44 +static void
45 +cbc_decrypt_fastpath(cbc_ctx_t *ctx, const uint8_t *data, size_t length,
46 + uint8_t *out, size_t block_size,
47 + int (*decrypt)(const void *, const uint8_t *, uint8_t *),
48 + int (*decrypt_ecb)(const void *, const uint8_t *, uint8_t *, uint64_t),
49 + void (*xor_block)(const uint8_t *, uint8_t *),
50 + void (*xor_block_range)(const uint8_t *, uint8_t *, uint64_t))
51 +{
52 + const uint8_t *iv = (uint8_t *)ctx->cbc_iv;
53 +
54 + /* Use bulk decryption when available. */
55 + if (decrypt_ecb != NULL) {
56 + decrypt_ecb(ctx->cbc_keysched, data, out, length);
57 + } else {
58 + for (size_t i = 0; i < length; i += block_size)
59 + decrypt(ctx->cbc_keysched, &data[i], &out[i]);
60 + }
61 +
62 + /* Use bulk XOR when available. */
63 + if (xor_block_range != NULL && length >= 2 * block_size) {
64 + xor_block(iv, out);
65 + xor_block_range(data, &out[block_size], length - block_size);
66 + } else {
67 + for (size_t i = 0; i < length; i += block_size) {
68 + xor_block(iv, &out[i]);
69 + iv = &data[i];
70 + }
71 + }
72 +}
73 +
38 74 /*
39 75 * Algorithm independent CBC functions.
40 76 */
41 77 int
42 78 cbc_encrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length,
43 79 crypto_data_t *out, size_t block_size,
44 80 int (*encrypt)(const void *, const uint8_t *, uint8_t *),
45 - void (*copy_block)(uint8_t *, uint8_t *),
46 - void (*xor_block)(uint8_t *, uint8_t *))
81 + void (*copy_block)(const uint8_t *, uint8_t *),
82 + void (*xor_block)(const uint8_t *, uint8_t *),
83 + int (*encrypt_cbc)(const void *, const uint8_t *, uint8_t *,
84 + const uint8_t *, uint64_t))
47 85 {
48 86 size_t remainder = length;
49 87 size_t need;
50 88 uint8_t *datap = (uint8_t *)data;
51 89 uint8_t *blockp;
52 90 uint8_t *lastp;
53 91 void *iov_or_mp;
54 92 offset_t offset;
55 93 uint8_t *out_data_1;
56 94 uint8_t *out_data_2;
57 95 size_t out_data_1_len;
58 96
97 + /*
98 + * CBC encryption fastpath requirements:
99 + * - fastpath is enabled
100 + * - algorithm-specific acceleration function is available
101 + * - input is block-aligned
102 + * - output is a single contiguous region or the user requested that
103 + * we overwrite their input buffer (input/output aliasing allowed)
104 + */
105 + if (cbc_fastpath_enabled && encrypt_cbc != NULL && length != 0 &&
106 + ctx->cbc_remainder_len == 0 && (length & (block_size - 1)) == 0 &&
107 + (out == NULL || CRYPTO_DATA_IS_SINGLE_BLOCK(out))) {
108 + if (out == NULL) {
109 + encrypt_cbc(ctx->cbc_keysched, (uint8_t *)data,
110 + (uint8_t *)data, (uint8_t *)ctx->cbc_iv, length);
111 + ctx->cbc_lastp = (uint8_t *)&data[length - block_size];
112 + } else {
113 + uint8_t *outp = CRYPTO_DATA_FIRST_BLOCK(out);
114 + encrypt_cbc(ctx->cbc_keysched, (uint8_t *)data, outp,
115 + (uint8_t *)ctx->cbc_iv, length);
116 + out->cd_offset += length;
117 + ctx->cbc_lastp = &outp[length - block_size];
118 + }
119 + goto out;
120 + }
121 +
59 122 if (length + ctx->cbc_remainder_len < block_size) {
60 123 /* accumulate bytes here and return */
61 124 bcopy(datap,
62 125 (uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len,
63 126 length);
64 127 ctx->cbc_remainder_len += length;
65 128 ctx->cbc_copy_to = datap;
66 129 return (CRYPTO_SUCCESS);
67 130 }
68 131
69 132 lastp = (uint8_t *)ctx->cbc_iv;
70 133 if (out != NULL)
71 134 crypto_init_ptrs(out, &iov_or_mp, &offset);
72 135
73 136 do {
74 137 /* Unprocessed data from last call. */
75 138 if (ctx->cbc_remainder_len > 0) {
76 139 need = block_size - ctx->cbc_remainder_len;
77 140
78 141 if (need > remainder)
79 142 return (CRYPTO_DATA_LEN_RANGE);
80 143
81 144 bcopy(datap, &((uint8_t *)ctx->cbc_remainder)
82 145 [ctx->cbc_remainder_len], need);
83 146
84 147 blockp = (uint8_t *)ctx->cbc_remainder;
85 148 } else {
86 149 blockp = datap;
87 150 }
88 151
89 152 if (out == NULL) {
90 153 /*
91 154 * XOR the previous cipher block or IV with the
92 155 * current clear block.
93 156 */
94 157 xor_block(lastp, blockp);
95 158 encrypt(ctx->cbc_keysched, blockp, blockp);
96 159
97 160 ctx->cbc_lastp = blockp;
98 161 lastp = blockp;
99 162
100 163 if (ctx->cbc_remainder_len > 0) {
101 164 bcopy(blockp, ctx->cbc_copy_to,
102 165 ctx->cbc_remainder_len);
103 166 bcopy(blockp + ctx->cbc_remainder_len, datap,
104 167 need);
105 168 }
106 169 } else {
107 170 /*
108 171 * XOR the previous cipher block or IV with the
109 172 * current clear block.
110 173 */
111 174 xor_block(blockp, lastp);
112 175 encrypt(ctx->cbc_keysched, lastp, lastp);
113 176 crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
114 177 &out_data_1_len, &out_data_2, block_size);
115 178
116 179 /* copy block to where it belongs */
117 180 if (out_data_1_len == block_size) {
118 181 copy_block(lastp, out_data_1);
119 182 } else {
120 183 bcopy(lastp, out_data_1, out_data_1_len);
121 184 if (out_data_2 != NULL) {
122 185 bcopy(lastp + out_data_1_len,
123 186 out_data_2,
124 187 block_size - out_data_1_len);
125 188 }
126 189 }
127 190 /* update offset */
128 191 out->cd_offset += block_size;
129 192 }
130 193
131 194 /* Update pointer to next block of data to be processed. */
132 195 if (ctx->cbc_remainder_len != 0) {
133 196 datap += need;
134 197 ctx->cbc_remainder_len = 0;
135 198 } else {
136 199 datap += block_size;
137 200 }
138 201
139 202 remainder = (size_t)&data[length] - (size_t)datap;
140 203
141 204 /* Incomplete last block. */
142 205 if (remainder > 0 && remainder < block_size) {
143 206 bcopy(datap, ctx->cbc_remainder, remainder);
144 207 ctx->cbc_remainder_len = remainder;
145 208 ctx->cbc_copy_to = datap;
146 209 goto out;
147 210 }
148 211 ctx->cbc_copy_to = NULL;
149 212
150 213 } while (remainder > 0);
151 214
152 215 out:
153 216 /*
154 217 * Save the last encrypted block in the context.
155 218 */
156 219 if (ctx->cbc_lastp != NULL) {
157 220 copy_block((uint8_t *)ctx->cbc_lastp, (uint8_t *)ctx->cbc_iv);
158 221 ctx->cbc_lastp = (uint8_t *)ctx->cbc_iv;
159 222 }
160 223
161 224 return (CRYPTO_SUCCESS);
↓ open down ↓ |
93 lines elided |
↑ open up ↑ |
162 225 }
163 226
164 227 #define OTHER(a, ctx) \
165 228 (((a) == (ctx)->cbc_lastblock) ? (ctx)->cbc_iv : (ctx)->cbc_lastblock)
166 229
167 230 /* ARGSUSED */
168 231 int
169 232 cbc_decrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length,
170 233 crypto_data_t *out, size_t block_size,
171 234 int (*decrypt)(const void *, const uint8_t *, uint8_t *),
172 - void (*copy_block)(uint8_t *, uint8_t *),
173 - void (*xor_block)(uint8_t *, uint8_t *))
235 + void (*copy_block)(const uint8_t *, uint8_t *),
236 + void (*xor_block)(const uint8_t *, uint8_t *),
237 + int (*decrypt_ecb)(const void *, const uint8_t *, uint8_t *, uint64_t),
238 + void (*xor_block_range)(const uint8_t *, uint8_t *, uint64_t))
174 239 {
175 240 size_t remainder = length;
176 241 size_t need;
177 242 uint8_t *datap = (uint8_t *)data;
178 243 uint8_t *blockp;
179 244 uint8_t *lastp;
180 245 void *iov_or_mp;
181 246 offset_t offset;
182 247 uint8_t *out_data_1;
183 248 uint8_t *out_data_2;
184 249 size_t out_data_1_len;
185 250
251 + /*
252 + * CBC decryption fastpath requirements:
253 + * - fastpath is enabled
254 + * - input is block-aligned
255 + * - output is a single contiguous region and doesn't alias input
256 + */
257 + if (cbc_fastpath_enabled && ctx->cbc_remainder_len == 0 &&
258 + length != 0 && (length & (block_size - 1)) == 0 &&
259 + CRYPTO_DATA_IS_SINGLE_BLOCK(out)) {
260 + uint8_t *outp = CRYPTO_DATA_FIRST_BLOCK(out);
261 +
262 + cbc_decrypt_fastpath(ctx, (uint8_t *)data, length, outp,
263 + block_size, decrypt, decrypt_ecb, xor_block,
264 + xor_block_range);
265 + out->cd_offset += length;
266 + bcopy(&data[length - block_size], ctx->cbc_iv, block_size);
267 + ctx->cbc_lastp = (uint8_t *)ctx->cbc_iv;
268 + return (CRYPTO_SUCCESS);
269 + }
270 +
186 271 if (length + ctx->cbc_remainder_len < block_size) {
187 272 /* accumulate bytes here and return */
188 273 bcopy(datap,
189 274 (uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len,
190 275 length);
191 276 ctx->cbc_remainder_len += length;
192 277 ctx->cbc_copy_to = datap;
193 278 return (CRYPTO_SUCCESS);
194 279 }
195 280
196 281 lastp = ctx->cbc_lastp;
197 282 if (out != NULL)
198 283 crypto_init_ptrs(out, &iov_or_mp, &offset);
199 284
200 285 do {
201 286 /* Unprocessed data from last call. */
202 287 if (ctx->cbc_remainder_len > 0) {
203 288 need = block_size - ctx->cbc_remainder_len;
204 289
205 290 if (need > remainder)
206 291 return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE);
207 292
208 293 bcopy(datap, &((uint8_t *)ctx->cbc_remainder)
209 294 [ctx->cbc_remainder_len], need);
210 295
211 296 blockp = (uint8_t *)ctx->cbc_remainder;
212 297 } else {
213 298 blockp = datap;
214 299 }
215 300
216 301 /* LINTED: pointer alignment */
217 302 copy_block(blockp, (uint8_t *)OTHER((uint64_t *)lastp, ctx));
218 303
219 304 if (out != NULL) {
220 305 decrypt(ctx->cbc_keysched, blockp,
221 306 (uint8_t *)ctx->cbc_remainder);
222 307 blockp = (uint8_t *)ctx->cbc_remainder;
223 308 } else {
224 309 decrypt(ctx->cbc_keysched, blockp, blockp);
225 310 }
226 311
227 312 /*
228 313 * XOR the previous cipher block or IV with the
229 314 * currently decrypted block.
230 315 */
231 316 xor_block(lastp, blockp);
232 317
233 318 /* LINTED: pointer alignment */
234 319 lastp = (uint8_t *)OTHER((uint64_t *)lastp, ctx);
235 320
236 321 if (out != NULL) {
237 322 crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
238 323 &out_data_1_len, &out_data_2, block_size);
239 324
240 325 bcopy(blockp, out_data_1, out_data_1_len);
241 326 if (out_data_2 != NULL) {
242 327 bcopy(blockp + out_data_1_len, out_data_2,
243 328 block_size - out_data_1_len);
244 329 }
245 330
246 331 /* update offset */
247 332 out->cd_offset += block_size;
248 333
249 334 } else if (ctx->cbc_remainder_len > 0) {
250 335 /* copy temporary block to where it belongs */
251 336 bcopy(blockp, ctx->cbc_copy_to, ctx->cbc_remainder_len);
252 337 bcopy(blockp + ctx->cbc_remainder_len, datap, need);
253 338 }
254 339
255 340 /* Update pointer to next block of data to be processed. */
256 341 if (ctx->cbc_remainder_len != 0) {
257 342 datap += need;
258 343 ctx->cbc_remainder_len = 0;
259 344 } else {
260 345 datap += block_size;
261 346 }
262 347
263 348 remainder = (size_t)&data[length] - (size_t)datap;
264 349
265 350 /* Incomplete last block. */
266 351 if (remainder > 0 && remainder < block_size) {
267 352 bcopy(datap, ctx->cbc_remainder, remainder);
268 353 ctx->cbc_remainder_len = remainder;
269 354 ctx->cbc_lastp = lastp;
270 355 ctx->cbc_copy_to = datap;
271 356 return (CRYPTO_SUCCESS);
272 357 }
↓ open down ↓ |
77 lines elided |
↑ open up ↑ |
273 358 ctx->cbc_copy_to = NULL;
274 359
275 360 } while (remainder > 0);
276 361
277 362 ctx->cbc_lastp = lastp;
278 363 return (CRYPTO_SUCCESS);
279 364 }
280 365
281 366 int
282 367 cbc_init_ctx(cbc_ctx_t *cbc_ctx, char *param, size_t param_len,
283 - size_t block_size, void (*copy_block)(uint8_t *, uint64_t *))
368 + size_t block_size, void (*copy_block)(const uint8_t *, uint64_t *))
284 369 {
285 370 /*
286 371 * Copy IV into context.
287 372 *
288 373 * If cm_param == NULL then the IV comes from the
289 374 * cd_miscdata field in the crypto_data structure.
290 375 */
291 376 if (param != NULL) {
292 377 #ifdef _KERNEL
293 378 ASSERT(param_len == block_size);
294 379 #else
295 380 assert(param_len == block_size);
296 381 #endif
297 382 copy_block((uchar_t *)param, cbc_ctx->cbc_iv);
298 383 }
299 384
300 385 cbc_ctx->cbc_lastp = (uint8_t *)&cbc_ctx->cbc_iv[0];
301 386 cbc_ctx->cbc_flags |= CBC_MODE;
302 387 return (CRYPTO_SUCCESS);
303 388 }
304 389
305 390 /* ARGSUSED */
306 391 void *
307 392 cbc_alloc_ctx(int kmflag)
308 393 {
309 394 cbc_ctx_t *cbc_ctx;
310 395
311 396 #ifdef _KERNEL
312 397 if ((cbc_ctx = kmem_zalloc(sizeof (cbc_ctx_t), kmflag)) == NULL)
313 398 #else
314 399 if ((cbc_ctx = calloc(1, sizeof (cbc_ctx_t))) == NULL)
315 400 #endif
316 401 return (NULL);
317 402
318 403 cbc_ctx->cbc_flags = CBC_MODE;
319 404 return (cbc_ctx);
320 405 }
↓ open down ↓ |
27 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX