3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23 */
24
25 #include <sys/types.h>
26 #include <sys/systm.h>
27 #include <sys/sysmacros.h>
28 #include <netinet/in.h>
29 #include "aes_impl.h"
30 #ifndef _KERNEL
31 #include <strings.h>
32 #include <stdlib.h>
33 #endif /* !_KERNEL */
34
35 #ifdef __amd64
36
37 #ifdef _KERNEL
38 #include <sys/cpuvar.h> /* cpu_t, CPU */
39 #include <sys/x86_archext.h> /* x86_featureset, X86FSET_AES */
40 #include <sys/disp.h> /* kpreempt_disable(), kpreempt_enable */
41
42 /* Workaround for no XMM kernel thread save/restore */
43 #define KPREEMPT_DISABLE kpreempt_disable()
44 #define KPREEMPT_ENABLE kpreempt_enable()
45
46 #else
47 #include <sys/auxv.h> /* getisax() */
48 #include <sys/auxv_386.h> /* AV_386_AES bit */
49 #define KPREEMPT_DISABLE
50 #define KPREEMPT_ENABLE
51 #endif /* _KERNEL */
52 #endif /* __amd64 */
77 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
78 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
79 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
80 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
81 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
82 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
83 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
84 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
85 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
86 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
87 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
88 */
89
90 #if defined(sun4u)
91 /* External assembly functions: */
92 extern void aes_encrypt_impl(const uint32_t rk[], int Nr, const uint32_t pt[4],
93 uint32_t ct[4]);
94 extern void aes_decrypt_impl(const uint32_t rk[], int Nr, const uint32_t ct[4],
95 uint32_t pt[4]);
96
97 #define AES_ENCRYPT_IMPL(a, b, c, d, e) aes_encrypt_impl(a, b, c, d)
98 #define AES_DECRYPT_IMPL(a, b, c, d, e) aes_decrypt_impl(a, b, c, d)
99
100 #elif defined(__amd64)
101
102 /* These functions are used to execute amd64 instructions for AMD or Intel: */
103 extern int rijndael_key_setup_enc_amd64(uint32_t rk[],
104 const uint32_t cipherKey[], int keyBits);
105 extern int rijndael_key_setup_dec_amd64(uint32_t rk[],
106 const uint32_t cipherKey[], int keyBits);
107 extern void aes_encrypt_amd64(const uint32_t rk[], int Nr,
108 const uint32_t pt[4], uint32_t ct[4]);
109 extern void aes_decrypt_amd64(const uint32_t rk[], int Nr,
110 const uint32_t ct[4], uint32_t pt[4]);
111
112 /* These functions are used to execute Intel-specific AES-NI instructions: */
113 extern int rijndael_key_setup_enc_intel(uint32_t rk[],
114 const uint32_t cipherKey[], uint64_t keyBits);
115 extern int rijndael_key_setup_dec_intel(uint32_t rk[],
116 const uint32_t cipherKey[], uint64_t keyBits);
117 extern void aes_encrypt_intel(const uint32_t rk[], int Nr,
118 const uint32_t pt[4], uint32_t ct[4]);
119 extern void aes_decrypt_intel(const uint32_t rk[], int Nr,
120 const uint32_t ct[4], uint32_t pt[4]);
121
122 static int intel_aes_instructions_present(void);
123
124 #define AES_ENCRYPT_IMPL(a, b, c, d, e) rijndael_encrypt(a, b, c, d, e)
125 #define AES_DECRYPT_IMPL(a, b, c, d, e) rijndael_decrypt(a, b, c, d, e)
126
127 #else /* Generic C implementation */
128
129 #define AES_ENCRYPT_IMPL(a, b, c, d, e) rijndael_encrypt(a, b, c, d)
130 #define AES_DECRYPT_IMPL(a, b, c, d, e) rijndael_decrypt(a, b, c, d)
131 #define rijndael_key_setup_enc_raw rijndael_key_setup_enc
132 #endif /* sun4u || __amd64 */
133
134 #if defined(_LITTLE_ENDIAN) && !defined(__amd64)
135 #define AES_BYTE_SWAP
136 #endif
137
138
139 #if !defined(__amd64)
140 /*
141 * Constant tables
142 */
143
144 /*
145 * Te0[x] = S [x].[02, 01, 01, 03];
146 * Te1[x] = S [x].[03, 02, 01, 01];
147 * Te2[x] = S [x].[01, 03, 02, 01];
148 * Te3[x] = S [x].[01, 01, 03, 02];
149 * Te4[x] = S [x].[01, 01, 01, 01];
150 *
151 * Td0[x] = Si[x].[0e, 09, 0d, 0b];
1123 key->nr = rijndael_key_setup_dec(&(key->decr_ks.ks64[0]), keyarr32,
1124 keybits);
1125 key->type = AES_64BIT_KS;
1126 }
1127
1128
1129 #elif defined(__amd64)
1130
1131 /*
1132 * Expand the 32-bit AES cipher key array into the encryption and decryption
1133 * key schedules.
1134 *
1135 * Parameters:
1136 * key AES key schedule to be initialized
1137 * keyarr32 User key
1138 * keyBits AES key size (128, 192, or 256 bits)
1139 */
1140 static void
1141 aes_setupkeys(aes_key_t *key, const uint32_t *keyarr32, int keybits)
1142 {
1143 if (intel_aes_instructions_present()) {
1144 key->flags = INTEL_AES_NI_CAPABLE;
1145 KPREEMPT_DISABLE;
1146 key->nr = rijndael_key_setup_enc_intel(&(key->encr_ks.ks32[0]),
1147 keyarr32, keybits);
1148 key->nr = rijndael_key_setup_dec_intel(&(key->decr_ks.ks32[0]),
1149 keyarr32, keybits);
1150 KPREEMPT_ENABLE;
1151 } else {
1152 key->flags = 0;
1153 key->nr = rijndael_key_setup_enc_amd64(&(key->encr_ks.ks32[0]),
1154 keyarr32, keybits);
1155 key->nr = rijndael_key_setup_dec_amd64(&(key->decr_ks.ks32[0]),
1156 keyarr32, keybits);
1157 }
1158
1159 key->type = AES_32BIT_KS;
1160 }
1161
1162 /*
1163 * Encrypt one block of data. The block is assumed to be an array
1164 * of four uint32_t values, so copy for alignment (and byte-order
1165 * reversal for little endian systems might be necessary on the
1166 * input and output byte streams.
1167 * The size of the key schedule depends on the number of rounds
1168 * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
1169 *
1170 * Parameters:
1171 * rk Key schedule, of aes_ks_t (60 32-bit integers)
1172 * Nr Number of rounds
1173 * pt Input block (plain text)
1174 * ct Output block (crypto text). Can overlap with pt
1175 * flags Indicates whether we're on Intel AES-NI-capable hardware
1176 */
1177 static void
1178 rijndael_encrypt(const uint32_t rk[], int Nr, const uint32_t pt[4],
1179 uint32_t ct[4], int flags) {
1180 if (flags & INTEL_AES_NI_CAPABLE) {
1181 KPREEMPT_DISABLE;
1182 aes_encrypt_intel(rk, Nr, pt, ct);
1183 KPREEMPT_ENABLE;
1184 } else {
1185 aes_encrypt_amd64(rk, Nr, pt, ct);
1186 }
1187 }
1188
1189 /*
1190 * Decrypt one block of data. The block is assumed to be an array
1191 * of four uint32_t values, so copy for alignment (and byte-order
1192 * reversal for little endian systems might be necessary on the
1193 * input and output byte streams.
1194 * The size of the key schedule depends on the number of rounds
1195 * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
1196 *
1197 * Parameters:
1198 * rk Key schedule, of aes_ks_t (60 32-bit integers)
1199 * Nr Number of rounds
1200 * ct Input block (crypto text)
1201 * pt Output block (plain text). Can overlap with pt
1202 * flags Indicates whether we're on Intel AES-NI-capable hardware
1203 */
1204 static void
1205 rijndael_decrypt(const uint32_t rk[], int Nr, const uint32_t ct[4],
1206 uint32_t pt[4], int flags) {
1207 if (flags & INTEL_AES_NI_CAPABLE) {
1208 KPREEMPT_DISABLE;
1209 aes_decrypt_intel(rk, Nr, ct, pt);
1210 KPREEMPT_ENABLE;
1211 } else {
1212 aes_decrypt_amd64(rk, Nr, ct, pt);
1213 }
1214 }
1215
1216
1217 #else /* generic C implementation */
1218
1219 /*
1220 * Expand the cipher key into the decryption key schedule.
1221 * Return the number of rounds for the given cipher key size.
1222 * The size of the key schedule depends on the number of rounds
1223 * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
1224 *
1225 * Parameters:
1226 * rk AES key schedule 32-bit array to be initialized
1227 * cipherKey User key
1228 * keyBits AES key size (128, 192, or 256 bits)
1229 */
1230 static int
1231 rijndael_key_setup_dec(uint32_t rk[], const uint32_t cipherKey[], int keyBits)
1232 {
1233 int Nr, i, j;
1234 uint32_t temp;
1235
1236 /* expand the cipher key: */
1605 */
1606 #ifndef AES_BYTE_SWAP
1607 if (IS_P2ALIGNED(cipherKey, sizeof (uint64_t))) {
1608 for (i = 0, j = 0; j < keysize; i++, j += 8) {
1609 /* LINTED: pointer alignment */
1610 keyarr.ka64[i] = *((uint64_t *)&cipherKey[j]);
1611 }
1612 } else {
1613 bcopy(cipherKey, keyarr.ka32, keysize);
1614 }
1615
1616 #else /* byte swap */
1617 for (i = 0, j = 0; j < keysize; i++, j += 4) {
1618 keyarr.ka32[i] = htonl(*(uint32_t *)(void *)&cipherKey[j]);
1619 }
1620 #endif
1621
1622 aes_setupkeys(newbie, keyarr.ka32, keyBits);
1623 }
1624
1625
1626 /*
1627 * Encrypt one block using AES.
1628 * Align if needed and (for x86 32-bit only) byte-swap.
1629 *
1630 * Parameters:
1631 * ks Key schedule, of type aes_key_t
1632 * pt Input block (plain text)
1633 * ct Output block (crypto text). Can overlap with pt
1634 */
1635 int
1636 aes_encrypt_block(const void *ks, const uint8_t *pt, uint8_t *ct)
1637 {
1638 aes_key_t *ksch = (aes_key_t *)ks;
1639
1640 #ifndef AES_BYTE_SWAP
1641 if (IS_P2ALIGNED2(pt, ct, sizeof (uint32_t))) {
1642 /* LINTED: pointer alignment */
1643 AES_ENCRYPT_IMPL(&ksch->encr_ks.ks32[0], ksch->nr,
1644 /* LINTED: pointer alignment */
1645 (uint32_t *)pt, (uint32_t *)ct, ksch->flags);
1646 } else {
1647 #endif
1648 uint32_t buffer[AES_BLOCK_LEN / sizeof (uint32_t)];
1649
1650 /* Copy input block into buffer */
1651 #ifndef AES_BYTE_SWAP
1652 bcopy(pt, &buffer, AES_BLOCK_LEN);
1653
1654 #else /* byte swap */
1655 buffer[0] = htonl(*(uint32_t *)(void *)&pt[0]);
1656 buffer[1] = htonl(*(uint32_t *)(void *)&pt[4]);
1657 buffer[2] = htonl(*(uint32_t *)(void *)&pt[8]);
1658 buffer[3] = htonl(*(uint32_t *)(void *)&pt[12]);
1659 #endif
1660
1661 AES_ENCRYPT_IMPL(&ksch->encr_ks.ks32[0], ksch->nr,
1662 buffer, buffer, ksch->flags);
1663
1664 /* Copy result from buffer to output block */
1665 #ifndef AES_BYTE_SWAP
1666 bcopy(&buffer, ct, AES_BLOCK_LEN);
1667 }
1668
1669 #else /* byte swap */
1670 *(uint32_t *)(void *)&ct[0] = htonl(buffer[0]);
1671 *(uint32_t *)(void *)&ct[4] = htonl(buffer[1]);
1672 *(uint32_t *)(void *)&ct[8] = htonl(buffer[2]);
1673 *(uint32_t *)(void *)&ct[12] = htonl(buffer[3]);
1674 #endif
1675 return (CRYPTO_SUCCESS);
1676 }
1677
1678
1679 /*
1680 * Decrypt one block using AES.
1681 * Align and byte-swap if needed.
1682 *
1683 * Parameters:
1684 * ks Key schedule, of type aes_key_t
1685 * ct Input block (crypto text)
1686 * pt Output block (plain text). Can overlap with pt
1687 */
1688 int
1689 aes_decrypt_block(const void *ks, const uint8_t *ct, uint8_t *pt)
1690 {
1691 aes_key_t *ksch = (aes_key_t *)ks;
1692
1693 #ifndef AES_BYTE_SWAP
1694 if (IS_P2ALIGNED2(ct, pt, sizeof (uint32_t))) {
1695 /* LINTED: pointer alignment */
1696 AES_DECRYPT_IMPL(&ksch->decr_ks.ks32[0], ksch->nr,
1697 /* LINTED: pointer alignment */
1698 (uint32_t *)ct, (uint32_t *)pt, ksch->flags);
1699 } else {
1700 #endif
1701 uint32_t buffer[AES_BLOCK_LEN / sizeof (uint32_t)];
1702
1703 /* Copy input block into buffer */
1704 #ifndef AES_BYTE_SWAP
1705 bcopy(ct, &buffer, AES_BLOCK_LEN);
1706
1707 #else /* byte swap */
1708 buffer[0] = htonl(*(uint32_t *)(void *)&ct[0]);
1709 buffer[1] = htonl(*(uint32_t *)(void *)&ct[4]);
1710 buffer[2] = htonl(*(uint32_t *)(void *)&ct[8]);
1711 buffer[3] = htonl(*(uint32_t *)(void *)&ct[12]);
1712 #endif
1713
1714 AES_DECRYPT_IMPL(&ksch->decr_ks.ks32[0], ksch->nr,
1715 buffer, buffer, ksch->flags);
1716
1717 /* Copy result from buffer to output block */
1718 #ifndef AES_BYTE_SWAP
1719 bcopy(&buffer, pt, AES_BLOCK_LEN);
1720 }
1721
1722 #else /* byte swap */
1723 *(uint32_t *)(void *)&pt[0] = htonl(buffer[0]);
1724 *(uint32_t *)(void *)&pt[4] = htonl(buffer[1]);
1725 *(uint32_t *)(void *)&pt[8] = htonl(buffer[2]);
1726 *(uint32_t *)(void *)&pt[12] = htonl(buffer[3]);
1727 #endif
1728
1729 return (CRYPTO_SUCCESS);
1730 }
1731
1732
1733 /*
1734 * Allocate key schedule for AES.
1735 *
1736 * Return the pointer and set size to the number of bytes allocated.
1737 * Memory allocated must be freed by the caller when done.
1738 *
1739 * Parameters:
1740 * size Size of key schedule allocated, in bytes
1741 * kmflag Flag passed to kmem_alloc(9F); ignored in userland.
1742 */
1743 /* ARGSUSED */
1744 void *
1745 aes_alloc_keysched(size_t *size, int kmflag)
1746 {
1747 aes_key_t *keysched;
1748
1749 #ifdef _KERNEL
1750 keysched = (aes_key_t *)kmem_alloc(sizeof (aes_key_t), kmflag);
1751 #else /* !_KERNEL */
1752 keysched = (aes_key_t *)malloc(sizeof (aes_key_t));
1753 #endif /* _KERNEL */
1754
1755 if (keysched != NULL) {
1756 *size = sizeof (aes_key_t);
1757 return (keysched);
1758 }
1759 return (NULL);
1760 }
1761
1762
1763 #ifdef __amd64
1764 /*
1765 * Return 1 if executing on Intel with AES-NI instructions,
1766 * otherwise 0 (i.e., Intel without AES-NI or AMD64).
1767 * Cache the result, as the CPU can't change.
1768 *
1769 * Note: the userland version uses getisax(). The kernel version uses
1770 * global variable x86_featureset.
1771 */
1772 static int
1773 intel_aes_instructions_present(void)
1774 {
1775 static int cached_result = -1;
1776
1777 if (cached_result == -1) { /* first time */
1778 #ifdef _KERNEL
1779 cached_result = is_x86_feature(x86_featureset, X86FSET_AES);
1780 #else
1781 uint_t ui = 0;
1782
1783 (void) getisax(&ui, 1);
1784 cached_result = (ui & AV_386_AES) != 0;
1785 #endif /* _KERNEL */
1786 }
1787
1788 return (cached_result);
1789 }
1790 #endif /* __amd64 */
|
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
23 * Copyright 2015 by Saso Kiselkov. All rights reserved.
24 */
25
26 #include <sys/types.h>
27 #include <sys/systm.h>
28 #include <sys/sysmacros.h>
29 #include <netinet/in.h>
30 #include "aes_impl.h"
31 #ifndef _KERNEL
32 #include <strings.h>
33 #include <stdlib.h>
34 #include <sys/note.h>
35 #endif /* !_KERNEL */
36
37 #ifdef __amd64
38
39 #ifdef _KERNEL
40 #include <sys/cpuvar.h> /* cpu_t, CPU */
41 #include <sys/x86_archext.h> /* x86_featureset, X86FSET_AES */
42 #include <sys/disp.h> /* kpreempt_disable(), kpreempt_enable */
43
44 /* Workaround for no XMM kernel thread save/restore */
45 #define KPREEMPT_DISABLE kpreempt_disable()
46 #define KPREEMPT_ENABLE kpreempt_enable()
47
48 #else
49 #include <sys/auxv.h> /* getisax() */
50 #include <sys/auxv_386.h> /* AV_386_AES bit */
51 #define KPREEMPT_DISABLE
52 #define KPREEMPT_ENABLE
53 #endif /* _KERNEL */
54 #endif /* __amd64 */
79 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
80 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
81 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
82 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
83 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
84 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
85 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
86 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
87 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
88 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
89 * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
90 */
91
92 #if defined(sun4u)
93 /* External assembly functions: */
94 extern void aes_encrypt_impl(const uint32_t rk[], int Nr, const uint32_t pt[4],
95 uint32_t ct[4]);
96 extern void aes_decrypt_impl(const uint32_t rk[], int Nr, const uint32_t ct[4],
97 uint32_t pt[4]);
98
99 #define AES_ENCRYPT_IMPL(a, b, c, d) aes_encrypt_impl(a, b, c, d)
100 #define AES_DECRYPT_IMPL(a, b, c, d) aes_decrypt_impl(a, b, c, d)
101
102 #elif defined(__amd64)
103
104 /* These functions are used to execute amd64 instructions for AMD or Intel: */
105 extern int rijndael_key_setup_enc_amd64(uint32_t rk[],
106 const uint32_t cipherKey[], int keyBits);
107 extern int rijndael_key_setup_dec_amd64(uint32_t rk[],
108 const uint32_t cipherKey[], int keyBits);
109 extern void aes_encrypt_amd64(const uint32_t rk[], int Nr,
110 const uint32_t pt[4], uint32_t ct[4]);
111 extern void aes_decrypt_amd64(const uint32_t rk[], int Nr,
112 const uint32_t ct[4], uint32_t pt[4]);
113
114 /* These functions are used to execute Intel-specific AES-NI instructions: */
115 extern int rijndael_key_setup_enc_intel(uint32_t rk[],
116 const uint32_t cipherKey[], uint64_t keyBits);
117 extern int rijndael_key_setup_dec_intel(uint32_t rk[],
118 const uint32_t cipherKey[], uint64_t keyBits);
119 extern void aes_encrypt_intel(const uint32_t rk[], int Nr,
120 const uint32_t pt[4], uint32_t ct[4]);
121 extern void aes_decrypt_intel(const uint32_t rk[], int Nr,
122 const uint32_t ct[4], uint32_t pt[4]);
123 extern void aes_encrypt_intel8(const uint32_t rk[], int Nr,
124 const void *pt, void *ct);
125 extern void aes_decrypt_intel8(const uint32_t rk[], int Nr,
126 const void *ct, void *pt);
127 extern void aes_encrypt_cbc_intel8(const uint32_t rk[], int Nr,
128 const void *pt, void *ct, const void *iv);
129 extern void aes_ctr_intel8(const uint32_t rk[], int Nr,
130 const void *input, void *output, uint64_t counter_upper_BE,
131 uint64_t counter_lower_LE);
132 extern void aes_xor_intel(const uint8_t *, uint8_t *);
133
134 static inline int intel_aes_instructions_present(void);
135
136 #ifdef _KERNEL
137 /*
138 * Some form of floating-point acceleration is available, so declare these.
139 * The implementations will be in a platform-specific assembly file (e.g.
140 * amd64/aes_intel.s for SSE2/AES-NI).
141 */
142 extern void aes_accel_save(void *savestate);
143 extern void aes_accel_restore(void *savestate);
144 #endif /* _KERNEL */
145
146 #else /* Generic C implementation */
147 static void rijndael_encrypt(const uint32_t rk[], int Nr, const uint32_t pt[4],
148 uint32_t ct[4]);
149 static void rijndael_decrypt(const uint32_t rk[], int Nr, const uint32_t pt[4],
150 uint32_t ct[4]);
151 #define rijndael_key_setup_enc_raw rijndael_key_setup_enc
152 #define AES_ENCRYPT_IMPL(a, b, c, d) rijndael_encrypt(a, b, c, d)
153 #define AES_DECRYPT_IMPL(a, b, c, d) rijndael_decrypt(a, b, c, d)
154 #endif /* sun4u || __amd64 */
155
156 #if defined(_LITTLE_ENDIAN) && !defined(__amd64)
157 #define AES_BYTE_SWAP
158 #endif
159
160
161 #if !defined(__amd64)
162 /*
163 * Constant tables
164 */
165
166 /*
167 * Te0[x] = S [x].[02, 01, 01, 03];
168 * Te1[x] = S [x].[03, 02, 01, 01];
169 * Te2[x] = S [x].[01, 03, 02, 01];
170 * Te3[x] = S [x].[01, 01, 03, 02];
171 * Te4[x] = S [x].[01, 01, 01, 01];
172 *
173 * Td0[x] = Si[x].[0e, 09, 0d, 0b];
1145 key->nr = rijndael_key_setup_dec(&(key->decr_ks.ks64[0]), keyarr32,
1146 keybits);
1147 key->type = AES_64BIT_KS;
1148 }
1149
1150
1151 #elif defined(__amd64)
1152
1153 /*
1154 * Expand the 32-bit AES cipher key array into the encryption and decryption
1155 * key schedules.
1156 *
1157 * Parameters:
1158 * key AES key schedule to be initialized
1159 * keyarr32 User key
1160 * keyBits AES key size (128, 192, or 256 bits)
1161 */
1162 static void
1163 aes_setupkeys(aes_key_t *key, const uint32_t *keyarr32, int keybits)
1164 {
1165 AES_ACCEL_SAVESTATE(savestate);
1166 aes_accel_enter(savestate);
1167
1168 if (intel_aes_instructions_present()) {
1169 key->nr = rijndael_key_setup_enc_intel(&(key->encr_ks.ks32[0]),
1170 keyarr32, keybits);
1171 key->nr = rijndael_key_setup_dec_intel(&(key->decr_ks.ks32[0]),
1172 keyarr32, keybits);
1173 } else {
1174 key->nr = rijndael_key_setup_enc_amd64(&(key->encr_ks.ks32[0]),
1175 keyarr32, keybits);
1176 key->nr = rijndael_key_setup_dec_amd64(&(key->decr_ks.ks32[0]),
1177 keyarr32, keybits);
1178 }
1179
1180 aes_accel_exit(savestate);
1181 key->type = AES_32BIT_KS;
1182 }
1183
1184 #else /* generic C implementation */
1185
1186 /*
1187 * Expand the cipher key into the decryption key schedule.
1188 * Return the number of rounds for the given cipher key size.
1189 * The size of the key schedule depends on the number of rounds
1190 * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
1191 *
1192 * Parameters:
1193 * rk AES key schedule 32-bit array to be initialized
1194 * cipherKey User key
1195 * keyBits AES key size (128, 192, or 256 bits)
1196 */
1197 static int
1198 rijndael_key_setup_dec(uint32_t rk[], const uint32_t cipherKey[], int keyBits)
1199 {
1200 int Nr, i, j;
1201 uint32_t temp;
1202
1203 /* expand the cipher key: */
1572 */
1573 #ifndef AES_BYTE_SWAP
1574 if (IS_P2ALIGNED(cipherKey, sizeof (uint64_t))) {
1575 for (i = 0, j = 0; j < keysize; i++, j += 8) {
1576 /* LINTED: pointer alignment */
1577 keyarr.ka64[i] = *((uint64_t *)&cipherKey[j]);
1578 }
1579 } else {
1580 bcopy(cipherKey, keyarr.ka32, keysize);
1581 }
1582
1583 #else /* byte swap */
1584 for (i = 0, j = 0; j < keysize; i++, j += 4) {
1585 keyarr.ka32[i] = htonl(*(uint32_t *)(void *)&cipherKey[j]);
1586 }
1587 #endif
1588
1589 aes_setupkeys(newbie, keyarr.ka32, keyBits);
1590 }
1591
1592 #if defined(__amd64) && defined(_KERNEL)
1593 void
1594 aes_accel_enter(void *savestate)
1595 {
1596 KPREEMPT_DISABLE;
1597 aes_accel_save(savestate);
1598 }
1599
1600 void
1601 aes_accel_exit(void *savestate)
1602 {
1603 aes_accel_restore(savestate);
1604 KPREEMPT_ENABLE;
1605 }
1606 #endif /* defined(__amd64) && defined(_KERNEL) */
1607
1608 /*
1609 * Encrypt one block using AES.
1610 * Align if needed and (for x86 32-bit only) byte-swap.
1611 *
1612 * Parameters:
1613 * ks Key schedule, of type aes_key_t
1614 * pt Input block (plain text)
1615 * ct Output block (crypto text). Can overlap with pt
1616 */
1617 int
1618 aes_encrypt_block(const void *ks, const uint8_t *pt, uint8_t *ct)
1619 {
1620 aes_key_t *ksch = (aes_key_t *)ks;
1621
1622 #ifdef __amd64
1623 if (intel_aes_instructions_present())
1624 aes_encrypt_intel(&ksch->encr_ks.ks32[0], ksch->nr,
1625 /* LINTED: pointer alignment */
1626 (uint32_t *)pt, (uint32_t *)ct);
1627 else
1628 aes_encrypt_amd64(&ksch->encr_ks.ks32[0], ksch->nr,
1629 /* LINTED: pointer alignment */
1630 (uint32_t *)pt, (uint32_t *)ct);
1631 #else /* !__amd64 */
1632 #ifndef AES_BYTE_SWAP
1633 if (IS_P2ALIGNED2(pt, ct, sizeof (uint32_t))) {
1634 AES_ENCRYPT_IMPL(&ksch->encr_ks.ks32[0], ksch->nr,
1635 /* LINTED: pointer alignment */
1636 (uint32_t *)pt, (uint32_t *)ct);
1637 } else {
1638 #endif
1639 uint32_t buffer[AES_BLOCK_LEN / sizeof (uint32_t)];
1640
1641 /* Copy input block into buffer */
1642 #ifndef AES_BYTE_SWAP
1643 bcopy(pt, &buffer, AES_BLOCK_LEN);
1644
1645 #else /* byte swap */
1646 buffer[0] = htonl(*(uint32_t *)(void *)&pt[0]);
1647 buffer[1] = htonl(*(uint32_t *)(void *)&pt[4]);
1648 buffer[2] = htonl(*(uint32_t *)(void *)&pt[8]);
1649 buffer[3] = htonl(*(uint32_t *)(void *)&pt[12]);
1650 #endif /* byte swap */
1651
1652 AES_ENCRYPT_IMPL(&ksch->encr_ks.ks32[0], ksch->nr,
1653 buffer, buffer);
1654
1655 /* Copy result from buffer to output block */
1656 #ifndef AES_BYTE_SWAP
1657 bcopy(&buffer, ct, AES_BLOCK_LEN);
1658 }
1659
1660 #else /* byte swap */
1661 *(uint32_t *)(void *)&ct[0] = htonl(buffer[0]);
1662 *(uint32_t *)(void *)&ct[4] = htonl(buffer[1]);
1663 *(uint32_t *)(void *)&ct[8] = htonl(buffer[2]);
1664 *(uint32_t *)(void *)&ct[12] = htonl(buffer[3]);
1665 #endif /* byte swap */
1666 #endif /* !__amd64 */
1667
1668 return (CRYPTO_SUCCESS);
1669 }
1670
1671
1672 /*
1673 * Decrypt one block using AES.
1674 * Align and byte-swap if needed.
1675 *
1676 * Parameters:
1677 * ks Key schedule, of type aes_key_t
1678 * ct Input block (crypto text)
1679 * pt Output block (plain text). Can overlap with pt
1680 */
1681 int
1682 aes_decrypt_block(const void *ks, const uint8_t *ct, uint8_t *pt)
1683 {
1684 aes_key_t *ksch = (aes_key_t *)ks;
1685
1686 #ifdef __amd64
1687 if (intel_aes_instructions_present())
1688 aes_decrypt_intel(&ksch->decr_ks.ks32[0], ksch->nr,
1689 /* LINTED: pointer alignment */
1690 (uint32_t *)ct, (uint32_t *)pt);
1691 else
1692 aes_decrypt_amd64(&ksch->decr_ks.ks32[0], ksch->nr,
1693 /* LINTED: pointer alignment */
1694 (uint32_t *)ct, (uint32_t *)pt);
1695 #else /* !__amd64 */
1696 #ifndef AES_BYTE_SWAP
1697 if (IS_P2ALIGNED2(ct, pt, sizeof (uint32_t))) {
1698 AES_DECRYPT_IMPL(&ksch->decr_ks.ks32[0], ksch->nr,
1699 /* LINTED: pointer alignment */
1700 (uint32_t *)ct, (uint32_t *)pt);
1701 } else {
1702 #endif
1703 uint32_t buffer[AES_BLOCK_LEN / sizeof (uint32_t)];
1704
1705 /* Copy input block into buffer */
1706 #ifndef AES_BYTE_SWAP
1707 bcopy(ct, &buffer, AES_BLOCK_LEN);
1708
1709 #else /* byte swap */
1710 buffer[0] = htonl(*(uint32_t *)(void *)&ct[0]);
1711 buffer[1] = htonl(*(uint32_t *)(void *)&ct[4]);
1712 buffer[2] = htonl(*(uint32_t *)(void *)&ct[8]);
1713 buffer[3] = htonl(*(uint32_t *)(void *)&ct[12]);
1714 #endif /* byte swap */
1715
1716 AES_DECRYPT_IMPL(&ksch->decr_ks.ks32[0], ksch->nr,
1717 buffer, buffer);
1718
1719 /* Copy result from buffer to output block */
1720 #ifndef AES_BYTE_SWAP
1721 bcopy(&buffer, pt, AES_BLOCK_LEN);
1722 }
1723
1724 #else /* byte swap */
1725 *(uint32_t *)(void *)&pt[0] = htonl(buffer[0]);
1726 *(uint32_t *)(void *)&pt[4] = htonl(buffer[1]);
1727 *(uint32_t *)(void *)&pt[8] = htonl(buffer[2]);
1728 *(uint32_t *)(void *)&pt[12] = htonl(buffer[3]);
1729 #endif /* byte swap */
1730 #endif /* !__amd64 */
1731
1732 return (CRYPTO_SUCCESS);
1733 }
1734
1735 #define ECB_LOOP(ciph_func) \
1736 do { \
1737 for (; i < length; i += AES_BLOCK_LEN) \
1738 ciph_func; \
1739 _NOTE(CONSTCOND) \
1740 } while (0)
1741 #define ECB_LOOP_4P(ciph_func, enc_or_dec, in, out) \
1742 ECB_LOOP(ciph_func(&ksch->enc_or_dec ## r_ks.ks32[0], \
1743 ksch->nr, (void *)&in[i], (void *)&out[i]))
1744 #define ECB_LOOP_3P(ciph_func, in, out) \
1745 ECB_LOOP(ciph_func(ksch, (void *)&in[i], (void *)&out[i]))
1746
1747 #ifdef __amd64
1748 #define ECB_INTEL_IMPL(enc_or_dec, in, out) \
1749 do { \
1750 if (intel_aes_instructions_present()) { \
1751 /* first use the accelerated function */ \
1752 for (; i + 8 * AES_BLOCK_LEN <= length; \
1753 i += 8 * AES_BLOCK_LEN) \
1754 aes_ ## enc_or_dec ## rypt_intel8( \
1755 &ksch->enc_or_dec ## r_ks.ks32[0], \
1756 ksch->nr, &in[i], &out[i]); \
1757 /* finish off the remainder per-block */ \
1758 ECB_LOOP_4P(aes_ ## enc_or_dec ## rypt_intel, \
1759 enc_or_dec, in, out); \
1760 } else { \
1761 ECB_LOOP_4P(aes_ ## enc_or_dec ## rypt_amd64, \
1762 enc_or_dec, in, out); \
1763 } \
1764 _NOTE(CONSTCOND) \
1765 } while (0)
1766 #endif /* __amd64 */
1767
1768 /*
1769 * Perform AES ECB encryption on a sequence of blocks. On x86-64 CPUs with
1770 * the AES-NI extension, this performs the encryption in increments of 8
1771 * blocks at a time, exploiting instruction parallelism more efficiently.
1772 * On other platforms, this simply encrypts the blocks in sequence.
1773 */
1774 int
1775 aes_encrypt_ecb(const void *ks, const uint8_t *pt, uint8_t *ct, uint64_t length)
1776 {
1777 aes_key_t *ksch = (aes_key_t *)ks;
1778 uint64_t i = 0;
1779
1780 #ifdef __amd64
1781 ECB_INTEL_IMPL(enc, pt, ct);
1782 #elif defined(sun4u)
1783 ECB_LOOP_4P(aes_encrypt_impl, enc, pt, ct);
1784 #else /* Generic C implementation */
1785 ECB_LOOP_3P((void) aes_encrypt_block, pt, ct);
1786 #endif /* Generic C implementation */
1787
1788 return (CRYPTO_SUCCESS);
1789 }
1790
1791 /*
1792 * Same as aes_encrypt_ecb, but performs decryption.
1793 */
1794 int
1795 aes_decrypt_ecb(const void *ks, const uint8_t *ct, uint8_t *pt, uint64_t length)
1796 {
1797 aes_key_t *ksch = (aes_key_t *)ks;
1798 uint64_t i = 0;
1799
1800 #ifdef __amd64
1801 ECB_INTEL_IMPL(dec, ct, pt);
1802 #elif defined(sun4u)
1803 ECB_LOOP_4P(aes_decrypt_impl, dec, ct, pt);
1804 #else /* Generic C implementation */
1805 ECB_LOOP_3P((void) aes_decrypt_block, ct, pt);
1806 #endif /* Generic C implementation */
1807
1808 return (CRYPTO_SUCCESS);
1809 }
1810 #ifdef __amd64
1811 #undef ECB_INTEL_IMPL
1812 #endif /* __amd64 */
1813
1814 #undef ECB_LOOP
1815 #undef ECB_LOOP_4P
1816 #undef ECB_LOOP_3P
1817
1818 #define CBC_LOOP(enc_func, xor_func) \
1819 do { \
1820 for (; i < length; i += AES_BLOCK_LEN) { \
1821 /* copy IV to ciphertext */ \
1822 bcopy(iv, &ct[i], AES_BLOCK_LEN); \
1823 /* XOR IV with plaintext with input */ \
1824 xor_func(&pt[i], &ct[i]); \
1825 /* encrypt counter in output region */ \
1826 enc_func; \
1827 iv = &ct[i]; \
1828 } \
1829 _NOTE(CONSTCOND) \
1830 } while (0)
1831 #define CBC_LOOP_4P(enc_func, xor_func) \
1832 CBC_LOOP(enc_func(&ksch->encr_ks.ks32[0], \
1833 ksch->nr, (void *)&ct[i], (void *)&ct[i]), xor_func)
1834 #define CBC_LOOP_3P(enc_func, xor_func) \
1835 CBC_LOOP(enc_func(ksch, (void *)&ct[i], (void *)&ct[i]), xor_func)
1836
1837 /*
1838 * Encrypts a sequence of consecutive AES blocks in CBC mode. On x86-64
1839 * with the AES-NI extension, the encryption is performed on 8 blocks at
1840 * a time using an optimized assembly implementation, giving a speed boost
1841 * of around 75%. On other platforms, this simply performs CBC encryption
1842 * in sequence on the blocks.
1843 *
1844 * Decryption acceleration is implemented in the kernel kcf block cipher
1845 * modes code (cbc.c), because that doesn't require a complete hand-tuned
1846 * CBC implementation in assembly.
1847 */
1848 int
1849 aes_encrypt_cbc(const void *ks, const uint8_t *pt, uint8_t *ct,
1850 const uint8_t *iv, uint64_t length)
1851 {
1852 aes_key_t *ksch = (aes_key_t *)ks;
1853 size_t i = 0;
1854
1855 #ifdef __amd64
1856 if (intel_aes_instructions_present()) {
1857 for (; i + 8 * AES_BLOCK_LEN <= length;
1858 i += 8 * AES_BLOCK_LEN) {
1859 aes_encrypt_cbc_intel8(&ksch->encr_ks.ks32[0],
1860 ksch->nr, &ct[i], &ct[i], iv);
1861 iv = &ct[7 * AES_BLOCK_LEN];
1862 }
1863 CBC_LOOP_4P(aes_encrypt_intel, aes_xor_intel);
1864 } else {
1865 CBC_LOOP_4P(aes_encrypt_amd64, aes_xor_intel);
1866 }
1867 #elif defined(sun4u)
1868 CBC_LOOP_4P(aes_encrypt_impl, aes_xor_block);
1869 #else /* Generic C implementation */
1870 CBC_LOOP_3P((void) aes_encrypt_block, aes_xor_block);
1871 #endif /* Generic C implementation */
1872
1873 return (CRYPTO_SUCCESS);
1874 }
1875 #undef CBC_LOOP
1876 #undef CBC_LOOP_4P
1877 #undef CBC_LOOP_3P
1878
1879 #define CTR_LOOP(enc_func, xor_func) \
1880 do { \
1881 for (; i < length; i += AES_BLOCK_LEN) { \
1882 /* set up counter in output region */ \
1883 *(uint64_t *)(void *)&output[i] = counter[0]; \
1884 *(uint64_t *)(void *)&output[i + 8] = \
1885 htonll(counter[1]++); \
1886 /* encrypt counter in output region */ \
1887 enc_func; \
1888 /* XOR encrypted counter with input */ \
1889 xor_func(&input[i], &output[i]); \
1890 } \
1891 _NOTE(CONSTCOND) \
1892 } while (0)
1893 #define CTR_LOOP_4P(enc_func, xor_func) \
1894 CTR_LOOP(enc_func(&ksch->encr_ks.ks32[0], ksch->nr, \
1895 (void *)&output[i], (void *)&output[i]), xor_func)
1896 #define CTR_LOOP_3P(enc_func, xor_func) \
1897 CTR_LOOP(enc_func(ksch, (void *)&output[i], (void *)&output[i]),\
1898 xor_func)
1899 /*
1900 * Performs high-performance counter mode encryption and decryption on
1901 * a sequence of blocks. In CTR mode, encryption and decryption are the
1902 * same operation, just with the plaintext and ciphertext reversed:
1903 * plaintext = CTR(CTR(plaintext, K), K)
1904 * Blocks also do not interdepend on each other, so it is an excellent
1905 * mode when high performance is required and data authentication/integrity
1906 * checking is provided via some other means, or isn't necessary.
1907 *
1908 * On x86-64 with the AES-NI extension, this code performs CTR mode
1909 * encryption in parallel on 8 blocks at a time and can provide in
1910 * excess of 3GB/s/core of encryption/decryption performance (<1 CPB).
1911 */
1912 int
1913 aes_ctr_mode(const void *ks, const uint8_t *input, uint8_t *output,
1914 uint64_t length, uint64_t counter[2])
1915 {
1916 aes_key_t *ksch = (aes_key_t *)ks;
1917 uint64_t i = 0;
1918
1919 // swap lower part to host order for computations
1920 counter[1] = ntohll(counter[1]);
1921
1922 #ifdef __amd64
1923 if (intel_aes_instructions_present()) {
1924 /* first use the wide-register accelerated function */
1925 for (; i + 8 * AES_BLOCK_LEN <= length;
1926 i += 8 * AES_BLOCK_LEN) {
1927 aes_ctr_intel8(&ksch->encr_ks.ks32[0], ksch->nr,
1928 &input[i], &output[i], counter[0], counter[1]);
1929 counter[1] += 8;
1930 }
1931 /* finish off the remainder using the slow per-block method */
1932 CTR_LOOP_4P(aes_encrypt_intel, aes_xor_intel);
1933 } else {
1934 CTR_LOOP_4P(aes_encrypt_amd64, aes_xor_intel);
1935 }
1936 #elif defined(sun4u)
1937 CTR_LOOP_4P(aes_encrypt_impl, aes_xor_block);
1938 #else /* Generic C implementation */
1939 CTR_LOOP_3P((void) aes_encrypt_block, aes_xor_block);
1940 #endif /* Generic C implementation */
1941
1942 // swap lower part back to big endian
1943 counter[1] = htonll(counter[1]);
1944
1945 return (CRYPTO_SUCCESS);
1946 }
1947 #undef CTR_LOOP
1948
1949 /*
1950 * Allocate key schedule for AES.
1951 *
1952 * Return the pointer and set size to the number of bytes allocated.
1953 * Memory allocated must be freed by the caller when done.
1954 *
1955 * Parameters:
1956 * size Size of key schedule allocated, in bytes
1957 * kmflag Flag passed to kmem_alloc(9F); ignored in userland.
1958 */
1959 /* ARGSUSED */
1960 void *
1961 aes_alloc_keysched(size_t *size, int kmflag)
1962 {
1963 aes_key_t *keysched;
1964
1965 #ifdef _KERNEL
1966 keysched = (aes_key_t *)kmem_alloc(sizeof (aes_key_t), kmflag);
1967 #else /* !_KERNEL */
1968 keysched = (aes_key_t *)malloc(sizeof (aes_key_t));
1969 #endif /* _KERNEL */
1970
1971 if (keysched != NULL) {
1972 *size = sizeof (aes_key_t);
1973 return (keysched);
1974 }
1975 return (NULL);
1976 }
1977
1978
1979 #ifdef __amd64
1980 /*
1981 * Return 1 if executing on x86-64 with AES-NI instructions, otherwise 0.
1982 * Cache the result, as the CPU can't change.
1983 *
1984 * Note: the userland version uses getisax(). The kernel version uses
1985 * global variable x86_featureset.
1986 */
1987 static inline int
1988 intel_aes_instructions_present(void)
1989 {
1990 static int cached_result = -1;
1991
1992 if (cached_result == -1) { /* first time */
1993 #ifdef _KERNEL
1994 cached_result = is_x86_feature(x86_featureset, X86FSET_AES);
1995 #else
1996 uint_t ui = 0;
1997
1998 (void) getisax(&ui, 1);
1999 cached_result = (ui & AV_386_AES) != 0;
2000 #endif /* _KERNEL */
2001 }
2002
2003 return (cached_result);
2004 }
2005 #endif /* __amd64 */
|