Print this page
4896 Performance improvements for KCF AES modes
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/common/crypto/aes/aes_modes.c
+++ new/usr/src/common/crypto/aes/aes_modes.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 23 * Use is subject to license terms.
24 24 */
25 +/*
26 + * Copyright 2015 by Saso Kiselkov. All rights reserved.
27 + */
25 28
26 29 #include <sys/types.h>
27 30 #include <sys/sysmacros.h>
28 31 #include <modes/modes.h>
29 32 #include "aes_impl.h"
30 33 #ifndef _KERNEL
31 34 #include <stdlib.h>
32 35 #endif /* !_KERNEL */
33 36
37 +#if defined(__amd64)
34 38
39 +/*
40 + * XORs a range of contiguous AES blocks in `data' with blocks in 'dst'
41 + * and places the result in `dst'. On x86-64 this exploits the 128-bit
42 + * floating point registers (xmm) to maximize performance.
43 + */
44 +static void
45 +aes_xor_range(const uint8_t *data, uint8_t *dst, uint64_t length)
46 +{
47 + uint64_t i = 0;
48 +
49 + /* First use the unrolled version. */
50 + for (; i + 8 * AES_BLOCK_LEN <= length; i += 8 * AES_BLOCK_LEN)
51 + aes_xor_intel8(&data[i], &dst[i]);
52 + /* Finish the rest in single blocks. */
53 + for (; i < length; i += AES_BLOCK_LEN)
54 + aes_xor_intel(&data[i], &dst[i]);
55 +}
56 +
57 +#else /* !__amd64 */
58 +
59 +/*
60 + * XORs a range of contiguous AES blocks in `data' with blocks in 'dst'
61 + * and places the result in `dst'.
62 + */
63 +static void
64 +aes_xor_range(const uint8_t *data, uint8_t *dst, uint64_t length)
65 +{
66 + uint64_t i = 0;
67 +
68 + if (IS_P2ALIGNED2(dst, data, sizeof (uint64_t))) {
69 + /* Unroll the loop to enable efficiency. */
70 + for (; i + 8 * AES_BLOCK_LEN < length; i += 8 * AES_BLOCK_LEN) {
71 + AES_XOR_BLOCK_ALIGNED(&data[i + 0x00], &dst[i + 0x00]);
72 + AES_XOR_BLOCK_ALIGNED(&data[i + 0x10], &dst[i + 0x10]);
73 + AES_XOR_BLOCK_ALIGNED(&data[i + 0x20], &dst[i + 0x20]);
74 + AES_XOR_BLOCK_ALIGNED(&data[i + 0x30], &dst[i + 0x30]);
75 + AES_XOR_BLOCK_ALIGNED(&data[i + 0x40], &dst[i + 0x40]);
76 + AES_XOR_BLOCK_ALIGNED(&data[i + 0x50], &dst[i + 0x50]);
77 + AES_XOR_BLOCK_ALIGNED(&data[i + 0x60], &dst[i + 0x60]);
78 + AES_XOR_BLOCK_ALIGNED(&data[i + 0x70], &dst[i + 0x70]);
79 + }
80 + }
81 + /* Finish the rest in single blocks. */
82 + for (; i < length; i += AES_BLOCK_LEN)
83 + AES_XOR_BLOCK(&data[i], &dst[i]);
84 +}
85 +
86 +#endif /* !__amd64 */
87 +
35 88 /* Copy a 16-byte AES block from "in" to "out" */
36 89 void
37 -aes_copy_block(uint8_t *in, uint8_t *out)
90 +aes_copy_block(const uint8_t *in, uint8_t *out)
38 91 {
39 92 if (IS_P2ALIGNED2(in, out, sizeof (uint32_t))) {
40 - /* LINTED: pointer alignment */
41 - *(uint32_t *)&out[0] = *(uint32_t *)&in[0];
42 - /* LINTED: pointer alignment */
43 - *(uint32_t *)&out[4] = *(uint32_t *)&in[4];
44 - /* LINTED: pointer alignment */
45 - *(uint32_t *)&out[8] = *(uint32_t *)&in[8];
46 - /* LINTED: pointer alignment */
47 - *(uint32_t *)&out[12] = *(uint32_t *)&in[12];
93 + AES_COPY_BLOCK_ALIGNED(in, out);
48 94 } else {
49 - AES_COPY_BLOCK(in, out);
95 + AES_COPY_BLOCK_UNALIGNED(in, out);
50 96 }
51 97 }
52 98
53 -
54 99 /* XOR a 16-byte AES block of data into dst */
55 100 void
56 -aes_xor_block(uint8_t *data, uint8_t *dst)
101 +aes_xor_block(const uint8_t *data, uint8_t *dst)
57 102 {
58 103 if (IS_P2ALIGNED2(dst, data, sizeof (uint32_t))) {
59 - /* LINTED: pointer alignment */
60 - *(uint32_t *)&dst[0] ^= *(uint32_t *)&data[0];
61 - /* LINTED: pointer alignment */
62 - *(uint32_t *)&dst[4] ^= *(uint32_t *)&data[4];
63 - /* LINTED: pointer alignment */
64 - *(uint32_t *)&dst[8] ^= *(uint32_t *)&data[8];
65 - /* LINTED: pointer alignment */
66 - *(uint32_t *)&dst[12] ^= *(uint32_t *)&data[12];
104 + AES_XOR_BLOCK_ALIGNED(data, dst);
67 105 } else {
68 - AES_XOR_BLOCK(data, dst);
106 + AES_XOR_BLOCK_UNALIGNED(data, dst);
69 107 }
70 108 }
71 109
72 -
73 110 /*
74 111 * Encrypt multiple blocks of data according to mode.
75 112 */
76 113 int
77 114 aes_encrypt_contiguous_blocks(void *ctx, char *data, size_t length,
78 115 crypto_data_t *out)
79 116 {
80 117 aes_ctx_t *aes_ctx = ctx;
81 - int rv;
118 + int rv = CRYPTO_SUCCESS;
82 119
83 - if (aes_ctx->ac_flags & CTR_MODE) {
84 - rv = ctr_mode_contiguous_blocks(ctx, data, length, out,
85 - AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
120 + for (size_t i = 0; i < length; i += AES_OPSZ) {
121 + size_t opsz = MIN(length - i, AES_OPSZ);
122 + AES_ACCEL_SAVESTATE(savestate);
123 + aes_accel_enter(savestate);
124 +
125 + if (aes_ctx->ac_flags & CTR_MODE) {
126 + rv = ctr_mode_contiguous_blocks(ctx, &data[i], opsz,
127 + out, AES_BLOCK_LEN, aes_encrypt_block,
128 + AES_XOR_BLOCK, aes_ctr_mode);
86 129 #ifdef _KERNEL
87 - } else if (aes_ctx->ac_flags & CCM_MODE) {
88 - rv = ccm_mode_encrypt_contiguous_blocks(ctx, data, length,
89 - out, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
90 - aes_xor_block);
91 - } else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) {
92 - rv = gcm_mode_encrypt_contiguous_blocks(ctx, data, length,
93 - out, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
94 - aes_xor_block);
130 + } else if (aes_ctx->ac_flags & CCM_MODE) {
131 + rv = ccm_mode_encrypt_contiguous_blocks(ctx, &data[i],
132 + opsz, out, AES_BLOCK_LEN, aes_encrypt_block,
133 + AES_COPY_BLOCK, AES_XOR_BLOCK);
134 + } else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) {
135 + rv = gcm_mode_encrypt_contiguous_blocks(ctx, &data[i],
136 + opsz, out, AES_BLOCK_LEN, aes_encrypt_block,
137 + AES_COPY_BLOCK, AES_XOR_BLOCK, aes_ctr_mode);
95 138 #endif
96 - } else if (aes_ctx->ac_flags & CBC_MODE) {
97 - rv = cbc_encrypt_contiguous_blocks(ctx,
98 - data, length, out, AES_BLOCK_LEN, aes_encrypt_block,
99 - aes_copy_block, aes_xor_block);
100 - } else {
101 - rv = ecb_cipher_contiguous_blocks(ctx, data, length, out,
102 - AES_BLOCK_LEN, aes_encrypt_block);
139 + } else if (aes_ctx->ac_flags & CBC_MODE) {
140 + rv = cbc_encrypt_contiguous_blocks(ctx, &data[i], opsz,
141 + out, AES_BLOCK_LEN, aes_encrypt_block,
142 + AES_COPY_BLOCK, AES_XOR_BLOCK, aes_encrypt_cbc);
143 + } else {
144 + rv = ecb_cipher_contiguous_blocks(ctx, &data[i], opsz,
145 + out, AES_BLOCK_LEN, aes_encrypt_block,
146 + aes_encrypt_ecb);
147 + }
148 +
149 + aes_accel_exit(savestate);
150 +
151 + if (rv != CRYPTO_SUCCESS)
152 + break;
103 153 }
154 +
104 155 return (rv);
105 156 }
106 157
107 -
108 158 /*
109 159 * Decrypt multiple blocks of data according to mode.
110 160 */
111 161 int
112 162 aes_decrypt_contiguous_blocks(void *ctx, char *data, size_t length,
113 163 crypto_data_t *out)
114 164 {
115 165 aes_ctx_t *aes_ctx = ctx;
116 - int rv;
166 + int rv = CRYPTO_SUCCESS;
117 167
118 - if (aes_ctx->ac_flags & CTR_MODE) {
119 - rv = ctr_mode_contiguous_blocks(ctx, data, length, out,
120 - AES_BLOCK_LEN, aes_encrypt_block, aes_xor_block);
121 - if (rv == CRYPTO_DATA_LEN_RANGE)
122 - rv = CRYPTO_ENCRYPTED_DATA_LEN_RANGE;
168 +
169 + for (size_t i = 0; i < length; i += AES_OPSZ) {
170 + size_t opsz = MIN(length - i, AES_OPSZ);
171 + AES_ACCEL_SAVESTATE(savestate);
172 + aes_accel_enter(savestate);
173 +
174 + if (aes_ctx->ac_flags & CTR_MODE) {
175 + rv = ctr_mode_contiguous_blocks(ctx, &data[i], opsz,
176 + out, AES_BLOCK_LEN, aes_encrypt_block,
177 + AES_XOR_BLOCK, aes_ctr_mode);
178 + if (rv == CRYPTO_DATA_LEN_RANGE)
179 + rv = CRYPTO_ENCRYPTED_DATA_LEN_RANGE;
123 180 #ifdef _KERNEL
124 - } else if (aes_ctx->ac_flags & CCM_MODE) {
125 - rv = ccm_mode_decrypt_contiguous_blocks(ctx, data, length,
126 - out, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
127 - aes_xor_block);
128 - } else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) {
129 - rv = gcm_mode_decrypt_contiguous_blocks(ctx, data, length,
130 - out, AES_BLOCK_LEN, aes_encrypt_block, aes_copy_block,
131 - aes_xor_block);
181 + } else if (aes_ctx->ac_flags & CCM_MODE) {
182 + rv = ccm_mode_decrypt_contiguous_blocks(ctx, &data[i],
183 + opsz, out, AES_BLOCK_LEN, aes_encrypt_block,
184 + AES_COPY_BLOCK, AES_XOR_BLOCK);
185 + } else if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) {
186 + rv = gcm_mode_decrypt_contiguous_blocks(ctx, &data[i],
187 + opsz, out, AES_BLOCK_LEN, aes_encrypt_block,
188 + AES_COPY_BLOCK, AES_XOR_BLOCK, aes_ctr_mode);
132 189 #endif
133 - } else if (aes_ctx->ac_flags & CBC_MODE) {
134 - rv = cbc_decrypt_contiguous_blocks(ctx, data, length, out,
135 - AES_BLOCK_LEN, aes_decrypt_block, aes_copy_block,
136 - aes_xor_block);
137 - } else {
138 - rv = ecb_cipher_contiguous_blocks(ctx, data, length, out,
139 - AES_BLOCK_LEN, aes_decrypt_block);
140 - if (rv == CRYPTO_DATA_LEN_RANGE)
141 - rv = CRYPTO_ENCRYPTED_DATA_LEN_RANGE;
190 + } else if (aes_ctx->ac_flags & CBC_MODE) {
191 + rv = cbc_decrypt_contiguous_blocks(ctx, &data[i],
192 + opsz, out, AES_BLOCK_LEN, aes_decrypt_block,
193 + AES_COPY_BLOCK, AES_XOR_BLOCK, aes_decrypt_ecb,
194 + aes_xor_range);
195 + } else {
196 + rv = ecb_cipher_contiguous_blocks(ctx, &data[i],
197 + opsz, out, AES_BLOCK_LEN, aes_decrypt_block,
198 + aes_decrypt_ecb);
199 + if (rv == CRYPTO_DATA_LEN_RANGE)
200 + rv = CRYPTO_ENCRYPTED_DATA_LEN_RANGE;
201 + }
202 +
203 + aes_accel_exit(savestate);
204 +
205 + if (rv != CRYPTO_SUCCESS)
206 + break;
142 207 }
208 +
143 209 return (rv);
144 210 }
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX