1 /* 2 * ==================================================================== 3 * Written by Intel Corporation for the OpenSSL project to add support 4 * for Intel AES-NI instructions. Rights for redistribution and usage 5 * in source and binary forms are granted according to the OpenSSL 6 * license. 7 * 8 * Author: Huang Ying <ying.huang at intel dot com> 9 * Vinodh Gopal <vinodh.gopal at intel dot com> 10 * Kahraman Akdemir 11 * 12 * Intel AES-NI is a new set of Single Instruction Multiple Data (SIMD) 13 * instructions that are going to be introduced in the next generation 14 * of Intel processor, as of 2009. These instructions enable fast and 15 * secure data encryption and decryption, using the Advanced Encryption 16 * Standard (AES), defined by FIPS Publication number 197. The 17 * architecture introduces six instructions that offer full hardware 18 * support for AES. Four of them support high performance data 19 * encryption and decryption, and the other two instructions support 20 * the AES key expansion procedure. 21 * ==================================================================== 22 */ 23 24 /* 25 * ==================================================================== 26 * Copyright (c) 1998-2008 The OpenSSL Project. All rights reserved. 27 * 28 * Redistribution and use in source and binary forms, with or without 29 * modification, are permitted provided that the following conditions 30 * are met: 31 * 32 * 1. Redistributions of source code must retain the above copyright 33 * notice, this list of conditions and the following disclaimer. 34 * 35 * 2. Redistributions in binary form must reproduce the above copyright 36 * notice, this list of conditions and the following disclaimer in 37 * the documentation and/or other materials provided with the 38 * distribution. 39 * 40 * 3. All advertising materials mentioning features or use of this 41 * software must display the following acknowledgment: 42 * "This product includes software developed by the OpenSSL Project 43 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 44 * 45 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 46 * endorse or promote products derived from this software without 47 * prior written permission. For written permission, please contact 48 * openssl-core@openssl.org. 49 * 50 * 5. Products derived from this software may not be called "OpenSSL" 51 * nor may "OpenSSL" appear in their names without prior written 52 * permission of the OpenSSL Project. 53 * 54 * 6. Redistributions of any form whatsoever must retain the following 55 * acknowledgment: 56 * "This product includes software developed by the OpenSSL Project 57 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 58 * 59 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 60 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 61 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 62 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 63 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 64 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 65 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 66 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 67 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 68 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 69 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 70 * OF THE POSSIBILITY OF SUCH DAMAGE. 71 * ==================================================================== 72 */ 73 74 /* 75 * ==================================================================== 76 * OpenSolaris OS modifications 77 * 78 * This source originates as files aes-intel.S and eng_aesni_asm.pl, in 79 * patches sent sent Dec. 9, 2008 and Dec. 24, 2008, respectively, by 80 * Huang Ying of Intel to the openssl-dev mailing list under the subject 81 * of "Add support to Intel AES-NI instruction set for x86_64 platform". 82 * 83 * This OpenSolaris version has these major changes from the original source: 84 * 85 * 1. Added OpenSolaris ENTRY_NP/SET_SIZE macros from 86 * /usr/include/sys/asm_linkage.h, lint(1B) guards, and dummy C function 87 * definitions for lint. 88 * 89 * 2. Formatted code, added comments, and added #includes and #defines. 90 * 91 * 3. If bit CR0.TS is set, clear and set the TS bit, after and before 92 * calling kpreempt_disable() and kpreempt_enable(). 93 * If the TS bit is not set, Save and restore %xmm registers at the beginning 94 * and end of function calls (%xmm* registers are not saved and restored by 95 * during kernel thread preemption). 96 * 97 * 4. Renamed functions, reordered parameters, and changed return value 98 * to match OpenSolaris: 99 * 100 * OpenSSL interface: 101 * int intel_AES_set_encrypt_key(const unsigned char *userKey, 102 * const int bits, AES_KEY *key); 103 * int intel_AES_set_decrypt_key(const unsigned char *userKey, 104 * const int bits, AES_KEY *key); 105 * Return values for above are non-zero on error, 0 on success. 106 * 107 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out, 108 * const AES_KEY *key); 109 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out, 110 * const AES_KEY *key); 111 * typedef struct aes_key_st { 112 * unsigned int rd_key[4 *(AES_MAXNR + 1)]; 113 * int rounds; 114 * unsigned int pad[3]; 115 * } AES_KEY; 116 * Note: AES_LONG is undefined (that is, Intel uses 32-bit key schedules 117 * (ks32) instead of 64-bit (ks64). 118 * Number of rounds (aka round count) is at offset 240 of AES_KEY. 119 * 120 * OpenSolaris OS interface (#ifdefs removed for readability): 121 * int rijndael_key_setup_dec_intel(uint32_t rk[], 122 * const uint32_t cipherKey[], uint64_t keyBits); 123 * int rijndael_key_setup_enc_intel(uint32_t rk[], 124 * const uint32_t cipherKey[], uint64_t keyBits); 125 * Return values for above are 0 on error, number of rounds on success. 126 * 127 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr, 128 * const uint32_t pt[4], uint32_t ct[4]); 129 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr, 130 * const uint32_t pt[4], uint32_t ct[4]); 131 * typedef union {uint64_t ks64[(MAX_AES_NR + 1) * 4]; 132 * uint32_t ks32[(MAX_AES_NR + 1) * 4]; } aes_ks_t; 133 * 134 * typedef union { 135 * uint32_t ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)]; 136 * } aes_ks_t; 137 * typedef struct aes_key { 138 * aes_ks_t encr_ks, decr_ks; 139 * long double align128; 140 * int flags, nr, type; 141 * } aes_key_t; 142 * 143 * Note: ks is the AES key schedule, Nr is number of rounds, pt is plain text, 144 * ct is crypto text, and MAX_AES_NR is 14. 145 * For the x86 64-bit architecture, OpenSolaris OS uses ks32 instead of ks64. 146 * 147 * Note2: aes_ks_t must be aligned on a 0 mod 128 byte boundary. 148 * 149 * ==================================================================== 150 */ 151 152 #if defined(lint) || defined(__lint) 153 154 #include <sys/types.h> 155 156 /* ARGSUSED */ 157 void 158 aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4], 159 uint32_t ct[4]) { 160 } 161 /* ARGSUSED */ 162 void 163 aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4], 164 uint32_t pt[4]) { 165 } 166 /* ARGSUSED */ 167 int 168 rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[], 169 uint64_t keyBits) { 170 return (0); 171 } 172 /* ARGSUSED */ 173 int 174 rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[], 175 uint64_t keyBits) { 176 return (0); 177 } 178 179 180 #else /* lint */ 181 182 #include <sys/asm_linkage.h> 183 #include <sys/controlregs.h> 184 #ifdef _KERNEL 185 #include <sys/machprivregs.h> 186 #endif 187 188 #ifdef _KERNEL 189 /* 190 * Note: the CLTS macro clobbers P2 (%rsi) under i86xpv. That is, 191 * it calls HYPERVISOR_fpu_taskswitch() which modifies %rsi when it 192 * uses it to pass P2 to syscall. 193 * This also occurs with the STTS macro, but we don't care if 194 * P2 (%rsi) is modified just before function exit. 195 * The CLTS and STTS macros push and pop P1 (%rdi) already. 196 */ 197 #ifdef __xpv 198 #define PROTECTED_CLTS \ 199 push %rsi; \ 200 CLTS; \ 201 pop %rsi 202 #else 203 #define PROTECTED_CLTS \ 204 CLTS 205 #endif /* __xpv */ 206 207 #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) \ 208 push %rbp; \ 209 mov %rsp, %rbp; \ 210 movq %cr0, tmpreg; \ 211 testq $CR0_TS, tmpreg; \ 212 jnz 1f; \ 213 and $-XMM_ALIGN, %rsp; \ 214 sub $[XMM_SIZE * 2], %rsp; \ 215 movaps %xmm0, 16(%rsp); \ 216 movaps %xmm1, (%rsp); \ 217 jmp 2f; \ 218 1: \ 219 PROTECTED_CLTS; \ 220 2: 221 222 /* 223 * If CR0_TS was not set above, pop %xmm0 and %xmm1 off stack, 224 * otherwise set CR0_TS. 225 */ 226 #define SET_TS_OR_POP_XMM0_XMM1(tmpreg) \ 227 testq $CR0_TS, tmpreg; \ 228 jnz 1f; \ 229 movaps (%rsp), %xmm1; \ 230 movaps 16(%rsp), %xmm0; \ 231 jmp 2f; \ 232 1: \ 233 STTS(tmpreg); \ 234 2: \ 235 mov %rbp, %rsp; \ 236 pop %rbp 237 238 /* 239 * If CR0_TS is not set, align stack (with push %rbp) and push 240 * %xmm0 - %xmm6 on stack, otherwise clear CR0_TS 241 */ 242 #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) \ 243 push %rbp; \ 244 mov %rsp, %rbp; \ 245 movq %cr0, tmpreg; \ 246 testq $CR0_TS, tmpreg; \ 247 jnz 1f; \ 248 and $-XMM_ALIGN, %rsp; \ 249 sub $[XMM_SIZE * 7], %rsp; \ 250 movaps %xmm0, 96(%rsp); \ 251 movaps %xmm1, 80(%rsp); \ 252 movaps %xmm2, 64(%rsp); \ 253 movaps %xmm3, 48(%rsp); \ 254 movaps %xmm4, 32(%rsp); \ 255 movaps %xmm5, 16(%rsp); \ 256 movaps %xmm6, (%rsp); \ 257 jmp 2f; \ 258 1: \ 259 PROTECTED_CLTS; \ 260 2: 261 262 263 /* 264 * If CR0_TS was not set above, pop %xmm0 - %xmm6 off stack, 265 * otherwise set CR0_TS. 266 */ 267 #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) \ 268 testq $CR0_TS, tmpreg; \ 269 jnz 1f; \ 270 movaps (%rsp), %xmm6; \ 271 movaps 16(%rsp), %xmm5; \ 272 movaps 32(%rsp), %xmm4; \ 273 movaps 48(%rsp), %xmm3; \ 274 movaps 64(%rsp), %xmm2; \ 275 movaps 80(%rsp), %xmm1; \ 276 movaps 96(%rsp), %xmm0; \ 277 jmp 2f; \ 278 1: \ 279 STTS(tmpreg); \ 280 2: \ 281 mov %rbp, %rsp; \ 282 pop %rbp 283 284 285 #else 286 #define PROTECTED_CLTS 287 #define CLEAR_TS_OR_PUSH_XMM0_XMM1(tmpreg) 288 #define SET_TS_OR_POP_XMM0_XMM1(tmpreg) 289 #define CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(tmpreg) 290 #define SET_TS_OR_POP_XMM0_TO_XMM6(tmpreg) 291 #endif /* _KERNEL */ 292 293 294 /* 295 * _key_expansion_128(), * _key_expansion_192a(), _key_expansion_192b(), 296 * _key_expansion_256a(), _key_expansion_256b() 297 * 298 * Helper functions called by rijndael_key_setup_inc_intel(). 299 * Also used indirectly by rijndael_key_setup_dec_intel(). 300 * 301 * Input: 302 * %xmm0 User-provided cipher key 303 * %xmm1 Round constant 304 * Output: 305 * (%rcx) AES key 306 */ 307 308 .align 16 309 _key_expansion_128: 310 _key_expansion_256a: 311 pshufd $0b11111111, %xmm1, %xmm1 312 shufps $0b00010000, %xmm0, %xmm4 313 pxor %xmm4, %xmm0 314 shufps $0b10001100, %xmm0, %xmm4 315 pxor %xmm4, %xmm0 316 pxor %xmm1, %xmm0 317 movaps %xmm0, (%rcx) 318 add $0x10, %rcx 319 ret 320 SET_SIZE(_key_expansion_128) 321 SET_SIZE(_key_expansion_256a) 322 323 .align 16 324 _key_expansion_192a: 325 pshufd $0b01010101, %xmm1, %xmm1 326 shufps $0b00010000, %xmm0, %xmm4 327 pxor %xmm4, %xmm0 328 shufps $0b10001100, %xmm0, %xmm4 329 pxor %xmm4, %xmm0 330 pxor %xmm1, %xmm0 331 332 movaps %xmm2, %xmm5 333 movaps %xmm2, %xmm6 334 pslldq $4, %xmm5 335 pshufd $0b11111111, %xmm0, %xmm3 336 pxor %xmm3, %xmm2 337 pxor %xmm5, %xmm2 338 339 movaps %xmm0, %xmm1 340 shufps $0b01000100, %xmm0, %xmm6 341 movaps %xmm6, (%rcx) 342 shufps $0b01001110, %xmm2, %xmm1 343 movaps %xmm1, 0x10(%rcx) 344 add $0x20, %rcx 345 ret 346 SET_SIZE(_key_expansion_192a) 347 348 .align 16 349 _key_expansion_192b: 350 pshufd $0b01010101, %xmm1, %xmm1 351 shufps $0b00010000, %xmm0, %xmm4 352 pxor %xmm4, %xmm0 353 shufps $0b10001100, %xmm0, %xmm4 354 pxor %xmm4, %xmm0 355 pxor %xmm1, %xmm0 356 357 movaps %xmm2, %xmm5 358 pslldq $4, %xmm5 359 pshufd $0b11111111, %xmm0, %xmm3 360 pxor %xmm3, %xmm2 361 pxor %xmm5, %xmm2 362 363 movaps %xmm0, (%rcx) 364 add $0x10, %rcx 365 ret 366 SET_SIZE(_key_expansion_192b) 367 368 .align 16 369 _key_expansion_256b: 370 pshufd $0b10101010, %xmm1, %xmm1 371 shufps $0b00010000, %xmm2, %xmm4 372 pxor %xmm4, %xmm2 373 shufps $0b10001100, %xmm2, %xmm4 374 pxor %xmm4, %xmm2 375 pxor %xmm1, %xmm2 376 movaps %xmm2, (%rcx) 377 add $0x10, %rcx 378 ret 379 SET_SIZE(_key_expansion_256b) 380 381 382 /* 383 * rijndael_key_setup_enc_intel() 384 * Expand the cipher key into the encryption key schedule. 385 * 386 * For kernel code, caller is responsible for ensuring kpreempt_disable() 387 * has been called. This is because %xmm registers are not saved/restored. 388 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set 389 * on entry. Otherwise, if TS is not set, save and restore %xmm registers 390 * on the stack. 391 * 392 * OpenSolaris interface: 393 * int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[], 394 * uint64_t keyBits); 395 * Return value is 0 on error, number of rounds on success. 396 * 397 * Original Intel OpenSSL interface: 398 * int intel_AES_set_encrypt_key(const unsigned char *userKey, 399 * const int bits, AES_KEY *key); 400 * Return value is non-zero on error, 0 on success. 401 */ 402 403 #ifdef OPENSSL_INTERFACE 404 #define rijndael_key_setup_enc_intel intel_AES_set_encrypt_key 405 #define rijndael_key_setup_dec_intel intel_AES_set_decrypt_key 406 407 #define USERCIPHERKEY rdi /* P1, 64 bits */ 408 #define KEYSIZE32 esi /* P2, 32 bits */ 409 #define KEYSIZE64 rsi /* P2, 64 bits */ 410 #define AESKEY rdx /* P3, 64 bits */ 411 412 #else /* OpenSolaris Interface */ 413 #define AESKEY rdi /* P1, 64 bits */ 414 #define USERCIPHERKEY rsi /* P2, 64 bits */ 415 #define KEYSIZE32 edx /* P3, 32 bits */ 416 #define KEYSIZE64 rdx /* P3, 64 bits */ 417 #endif /* OPENSSL_INTERFACE */ 418 419 #define ROUNDS32 KEYSIZE32 /* temp */ 420 #define ROUNDS64 KEYSIZE64 /* temp */ 421 #define ENDAESKEY USERCIPHERKEY /* temp */ 422 423 424 ENTRY_NP(rijndael_key_setup_enc_intel) 425 CLEAR_TS_OR_PUSH_XMM0_TO_XMM6(%r10) 426 427 / NULL pointer sanity check 428 test %USERCIPHERKEY, %USERCIPHERKEY 429 jz .Lenc_key_invalid_param 430 test %AESKEY, %AESKEY 431 jz .Lenc_key_invalid_param 432 433 movups (%USERCIPHERKEY), %xmm0 / user key (first 16 bytes) 434 movaps %xmm0, (%AESKEY) 435 lea 0x10(%AESKEY), %rcx / key addr 436 pxor %xmm4, %xmm4 / xmm4 is assumed 0 in _key_expansion_x 437 438 cmp $256, %KEYSIZE32 439 jnz .Lenc_key192 440 441 / AES 256: 14 rounds in encryption key schedule 442 #ifdef OPENSSL_INTERFACE 443 mov $14, %ROUNDS32 444 movl %ROUNDS32, 240(%AESKEY) / key.rounds = 14 445 #endif /* OPENSSL_INTERFACE */ 446 447 movups 0x10(%USERCIPHERKEY), %xmm2 / other user key (2nd 16 bytes) 448 movaps %xmm2, (%rcx) 449 add $0x10, %rcx 450 451 aeskeygenassist $0x1, %xmm2, %xmm1 / expand the key 452 call _key_expansion_256a 453 aeskeygenassist $0x1, %xmm0, %xmm1 454 call _key_expansion_256b 455 aeskeygenassist $0x2, %xmm2, %xmm1 / expand the key 456 call _key_expansion_256a 457 aeskeygenassist $0x2, %xmm0, %xmm1 458 call _key_expansion_256b 459 aeskeygenassist $0x4, %xmm2, %xmm1 / expand the key 460 call _key_expansion_256a 461 aeskeygenassist $0x4, %xmm0, %xmm1 462 call _key_expansion_256b 463 aeskeygenassist $0x8, %xmm2, %xmm1 / expand the key 464 call _key_expansion_256a 465 aeskeygenassist $0x8, %xmm0, %xmm1 466 call _key_expansion_256b 467 aeskeygenassist $0x10, %xmm2, %xmm1 / expand the key 468 call _key_expansion_256a 469 aeskeygenassist $0x10, %xmm0, %xmm1 470 call _key_expansion_256b 471 aeskeygenassist $0x20, %xmm2, %xmm1 / expand the key 472 call _key_expansion_256a 473 aeskeygenassist $0x20, %xmm0, %xmm1 474 call _key_expansion_256b 475 aeskeygenassist $0x40, %xmm2, %xmm1 / expand the key 476 call _key_expansion_256a 477 478 SET_TS_OR_POP_XMM0_TO_XMM6(%r10) 479 #ifdef OPENSSL_INTERFACE 480 xor %rax, %rax / return 0 (OK) 481 #else /* Open Solaris Interface */ 482 mov $14, %rax / return # rounds = 14 483 #endif 484 ret 485 486 .align 4 487 .Lenc_key192: 488 cmp $192, %KEYSIZE32 489 jnz .Lenc_key128 490 491 / AES 192: 12 rounds in encryption key schedule 492 #ifdef OPENSSL_INTERFACE 493 mov $12, %ROUNDS32 494 movl %ROUNDS32, 240(%AESKEY) / key.rounds = 12 495 #endif /* OPENSSL_INTERFACE */ 496 497 movq 0x10(%USERCIPHERKEY), %xmm2 / other user key 498 aeskeygenassist $0x1, %xmm2, %xmm1 / expand the key 499 call _key_expansion_192a 500 aeskeygenassist $0x2, %xmm2, %xmm1 / expand the key 501 call _key_expansion_192b 502 aeskeygenassist $0x4, %xmm2, %xmm1 / expand the key 503 call _key_expansion_192a 504 aeskeygenassist $0x8, %xmm2, %xmm1 / expand the key 505 call _key_expansion_192b 506 aeskeygenassist $0x10, %xmm2, %xmm1 / expand the key 507 call _key_expansion_192a 508 aeskeygenassist $0x20, %xmm2, %xmm1 / expand the key 509 call _key_expansion_192b 510 aeskeygenassist $0x40, %xmm2, %xmm1 / expand the key 511 call _key_expansion_192a 512 aeskeygenassist $0x80, %xmm2, %xmm1 / expand the key 513 call _key_expansion_192b 514 515 SET_TS_OR_POP_XMM0_TO_XMM6(%r10) 516 #ifdef OPENSSL_INTERFACE 517 xor %rax, %rax / return 0 (OK) 518 #else /* OpenSolaris Interface */ 519 mov $12, %rax / return # rounds = 12 520 #endif 521 ret 522 523 .align 4 524 .Lenc_key128: 525 cmp $128, %KEYSIZE32 526 jnz .Lenc_key_invalid_key_bits 527 528 / AES 128: 10 rounds in encryption key schedule 529 #ifdef OPENSSL_INTERFACE 530 mov $10, %ROUNDS32 531 movl %ROUNDS32, 240(%AESKEY) / key.rounds = 10 532 #endif /* OPENSSL_INTERFACE */ 533 534 aeskeygenassist $0x1, %xmm0, %xmm1 / expand the key 535 call _key_expansion_128 536 aeskeygenassist $0x2, %xmm0, %xmm1 / expand the key 537 call _key_expansion_128 538 aeskeygenassist $0x4, %xmm0, %xmm1 / expand the key 539 call _key_expansion_128 540 aeskeygenassist $0x8, %xmm0, %xmm1 / expand the key 541 call _key_expansion_128 542 aeskeygenassist $0x10, %xmm0, %xmm1 / expand the key 543 call _key_expansion_128 544 aeskeygenassist $0x20, %xmm0, %xmm1 / expand the key 545 call _key_expansion_128 546 aeskeygenassist $0x40, %xmm0, %xmm1 / expand the key 547 call _key_expansion_128 548 aeskeygenassist $0x80, %xmm0, %xmm1 / expand the key 549 call _key_expansion_128 550 aeskeygenassist $0x1b, %xmm0, %xmm1 / expand the key 551 call _key_expansion_128 552 aeskeygenassist $0x36, %xmm0, %xmm1 / expand the key 553 call _key_expansion_128 554 555 SET_TS_OR_POP_XMM0_TO_XMM6(%r10) 556 #ifdef OPENSSL_INTERFACE 557 xor %rax, %rax / return 0 (OK) 558 #else /* OpenSolaris Interface */ 559 mov $10, %rax / return # rounds = 10 560 #endif 561 ret 562 563 .Lenc_key_invalid_param: 564 #ifdef OPENSSL_INTERFACE 565 SET_TS_OR_POP_XMM0_TO_XMM6(%r10) 566 mov $-1, %rax / user key or AES key pointer is NULL 567 ret 568 #else 569 /* FALLTHROUGH */ 570 #endif /* OPENSSL_INTERFACE */ 571 572 .Lenc_key_invalid_key_bits: 573 SET_TS_OR_POP_XMM0_TO_XMM6(%r10) 574 #ifdef OPENSSL_INTERFACE 575 mov $-2, %rax / keysize is invalid 576 #else /* Open Solaris Interface */ 577 xor %rax, %rax / a key pointer is NULL or invalid keysize 578 #endif /* OPENSSL_INTERFACE */ 579 580 ret 581 SET_SIZE(rijndael_key_setup_enc_intel) 582 583 584 /* 585 * rijndael_key_setup_dec_intel() 586 * Expand the cipher key into the decryption key schedule. 587 * 588 * For kernel code, caller is responsible for ensuring kpreempt_disable() 589 * has been called. This is because %xmm registers are not saved/restored. 590 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set 591 * on entry. Otherwise, if TS is not set, save and restore %xmm registers 592 * on the stack. 593 * 594 * OpenSolaris interface: 595 * int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[], 596 * uint64_t keyBits); 597 * Return value is 0 on error, number of rounds on success. 598 * P1->P2, P2->P3, P3->P1 599 * 600 * Original Intel OpenSSL interface: 601 * int intel_AES_set_decrypt_key(const unsigned char *userKey, 602 * const int bits, AES_KEY *key); 603 * Return value is non-zero on error, 0 on success. 604 */ 605 ENTRY_NP(rijndael_key_setup_dec_intel) 606 / Generate round keys used for encryption 607 call rijndael_key_setup_enc_intel 608 test %rax, %rax 609 #ifdef OPENSSL_INTERFACE 610 jnz .Ldec_key_exit / Failed if returned non-0 611 #else /* OpenSolaris Interface */ 612 jz .Ldec_key_exit / Failed if returned 0 613 #endif /* OPENSSL_INTERFACE */ 614 615 CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10) 616 617 /* 618 * Convert round keys used for encryption 619 * to a form usable for decryption 620 */ 621 #ifndef OPENSSL_INTERFACE /* OpenSolaris Interface */ 622 mov %rax, %ROUNDS64 / set # rounds (10, 12, or 14) 623 / (already set for OpenSSL) 624 #endif 625 626 lea 0x10(%AESKEY), %rcx / key addr 627 shl $4, %ROUNDS32 628 add %AESKEY, %ROUNDS64 629 mov %ROUNDS64, %ENDAESKEY 630 631 .align 4 632 .Ldec_key_reorder_loop: 633 movaps (%AESKEY), %xmm0 634 movaps (%ROUNDS64), %xmm1 635 movaps %xmm0, (%ROUNDS64) 636 movaps %xmm1, (%AESKEY) 637 lea 0x10(%AESKEY), %AESKEY 638 lea -0x10(%ROUNDS64), %ROUNDS64 639 cmp %AESKEY, %ROUNDS64 640 ja .Ldec_key_reorder_loop 641 642 .align 4 643 .Ldec_key_inv_loop: 644 movaps (%rcx), %xmm0 645 / Convert an encryption round key to a form usable for decryption 646 / with the "AES Inverse Mix Columns" instruction 647 aesimc %xmm0, %xmm1 648 movaps %xmm1, (%rcx) 649 lea 0x10(%rcx), %rcx 650 cmp %ENDAESKEY, %rcx 651 jnz .Ldec_key_inv_loop 652 653 SET_TS_OR_POP_XMM0_XMM1(%r10) 654 655 .Ldec_key_exit: 656 / OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error 657 / OpenSSL: rax = 0 for OK, or non-zero for error 658 ret 659 SET_SIZE(rijndael_key_setup_dec_intel) 660 661 662 /* 663 * aes_encrypt_intel() 664 * Encrypt a single block (in and out can overlap). 665 * 666 * For kernel code, caller is responsible for ensuring kpreempt_disable() 667 * has been called. This is because %xmm registers are not saved/restored. 668 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set 669 * on entry. Otherwise, if TS is not set, save and restore %xmm registers 670 * on the stack. 671 * 672 * Temporary register usage: 673 * %xmm0 State 674 * %xmm1 Key 675 * 676 * Original OpenSolaris Interface: 677 * void aes_encrypt_intel(const aes_ks_t *ks, int Nr, 678 * const uint32_t pt[4], uint32_t ct[4]) 679 * 680 * Original Intel OpenSSL Interface: 681 * void intel_AES_encrypt(const unsigned char *in, unsigned char *out, 682 * const AES_KEY *key) 683 */ 684 685 #ifdef OPENSSL_INTERFACE 686 #define aes_encrypt_intel intel_AES_encrypt 687 #define aes_decrypt_intel intel_AES_decrypt 688 689 #define INP rdi /* P1, 64 bits */ 690 #define OUTP rsi /* P2, 64 bits */ 691 #define KEYP rdx /* P3, 64 bits */ 692 693 /* No NROUNDS parameter--offset 240 from KEYP saved in %ecx: */ 694 #define NROUNDS32 ecx /* temporary, 32 bits */ 695 #define NROUNDS cl /* temporary, 8 bits */ 696 697 #else /* OpenSolaris Interface */ 698 #define KEYP rdi /* P1, 64 bits */ 699 #define NROUNDS esi /* P2, 32 bits */ 700 #define INP rdx /* P3, 64 bits */ 701 #define OUTP rcx /* P4, 64 bits */ 702 #endif /* OPENSSL_INTERFACE */ 703 704 #define STATE xmm0 /* temporary, 128 bits */ 705 #define KEY xmm1 /* temporary, 128 bits */ 706 707 ENTRY_NP(aes_encrypt_intel) 708 CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10) 709 710 movups (%INP), %STATE / input 711 movaps (%KEYP), %KEY / key 712 #ifdef OPENSSL_INTERFACE 713 mov 240(%KEYP), %NROUNDS32 / round count 714 #else /* OpenSolaris Interface */ 715 /* Round count is already present as P2 in %rsi/%esi */ 716 #endif /* OPENSSL_INTERFACE */ 717 718 pxor %KEY, %STATE / round 0 719 lea 0x30(%KEYP), %KEYP 720 cmp $12, %NROUNDS 721 jb .Lenc128 722 lea 0x20(%KEYP), %KEYP 723 je .Lenc192 724 725 / AES 256 726 lea 0x20(%KEYP), %KEYP 727 movaps -0x60(%KEYP), %KEY 728 aesenc %KEY, %STATE 729 movaps -0x50(%KEYP), %KEY 730 aesenc %KEY, %STATE 731 732 .align 4 733 .Lenc192: 734 / AES 192 and 256 735 movaps -0x40(%KEYP), %KEY 736 aesenc %KEY, %STATE 737 movaps -0x30(%KEYP), %KEY 738 aesenc %KEY, %STATE 739 740 .align 4 741 .Lenc128: 742 / AES 128, 192, and 256 743 movaps -0x20(%KEYP), %KEY 744 aesenc %KEY, %STATE 745 movaps -0x10(%KEYP), %KEY 746 aesenc %KEY, %STATE 747 movaps (%KEYP), %KEY 748 aesenc %KEY, %STATE 749 movaps 0x10(%KEYP), %KEY 750 aesenc %KEY, %STATE 751 movaps 0x20(%KEYP), %KEY 752 aesenc %KEY, %STATE 753 movaps 0x30(%KEYP), %KEY 754 aesenc %KEY, %STATE 755 movaps 0x40(%KEYP), %KEY 756 aesenc %KEY, %STATE 757 movaps 0x50(%KEYP), %KEY 758 aesenc %KEY, %STATE 759 movaps 0x60(%KEYP), %KEY 760 aesenc %KEY, %STATE 761 movaps 0x70(%KEYP), %KEY 762 aesenclast %KEY, %STATE / last round 763 movups %STATE, (%OUTP) / output 764 765 SET_TS_OR_POP_XMM0_XMM1(%r10) 766 ret 767 SET_SIZE(aes_encrypt_intel) 768 769 770 /* 771 * aes_decrypt_intel() 772 * Decrypt a single block (in and out can overlap). 773 * 774 * For kernel code, caller is responsible for ensuring kpreempt_disable() 775 * has been called. This is because %xmm registers are not saved/restored. 776 * Clear and set the CR0.TS bit on entry and exit, respectively, if TS is set 777 * on entry. Otherwise, if TS is not set, save and restore %xmm registers 778 * on the stack. 779 * 780 * Temporary register usage: 781 * %xmm0 State 782 * %xmm1 Key 783 * 784 * Original OpenSolaris Interface: 785 * void aes_decrypt_intel(const aes_ks_t *ks, int Nr, 786 * const uint32_t pt[4], uint32_t ct[4])/ 787 * 788 * Original Intel OpenSSL Interface: 789 * void intel_AES_decrypt(const unsigned char *in, unsigned char *out, 790 * const AES_KEY *key); 791 */ 792 ENTRY_NP(aes_decrypt_intel) 793 CLEAR_TS_OR_PUSH_XMM0_XMM1(%r10) 794 795 movups (%INP), %STATE / input 796 movaps (%KEYP), %KEY / key 797 #ifdef OPENSSL_INTERFACE 798 mov 240(%KEYP), %NROUNDS32 / round count 799 #else /* OpenSolaris Interface */ 800 /* Round count is already present as P2 in %rsi/%esi */ 801 #endif /* OPENSSL_INTERFACE */ 802 803 pxor %KEY, %STATE / round 0 804 lea 0x30(%KEYP), %KEYP 805 cmp $12, %NROUNDS 806 jb .Ldec128 807 lea 0x20(%KEYP), %KEYP 808 je .Ldec192 809 810 / AES 256 811 lea 0x20(%KEYP), %KEYP 812 movaps -0x60(%KEYP), %KEY 813 aesdec %KEY, %STATE 814 movaps -0x50(%KEYP), %KEY 815 aesdec %KEY, %STATE 816 817 .align 4 818 .Ldec192: 819 / AES 192 and 256 820 movaps -0x40(%KEYP), %KEY 821 aesdec %KEY, %STATE 822 movaps -0x30(%KEYP), %KEY 823 aesdec %KEY, %STATE 824 825 .align 4 826 .Ldec128: 827 / AES 128, 192, and 256 828 movaps -0x20(%KEYP), %KEY 829 aesdec %KEY, %STATE 830 movaps -0x10(%KEYP), %KEY 831 aesdec %KEY, %STATE 832 movaps (%KEYP), %KEY 833 aesdec %KEY, %STATE 834 movaps 0x10(%KEYP), %KEY 835 aesdec %KEY, %STATE 836 movaps 0x20(%KEYP), %KEY 837 aesdec %KEY, %STATE 838 movaps 0x30(%KEYP), %KEY 839 aesdec %KEY, %STATE 840 movaps 0x40(%KEYP), %KEY 841 aesdec %KEY, %STATE 842 movaps 0x50(%KEYP), %KEY 843 aesdec %KEY, %STATE 844 movaps 0x60(%KEYP), %KEY 845 aesdec %KEY, %STATE 846 movaps 0x70(%KEYP), %KEY 847 aesdeclast %KEY, %STATE / last round 848 movups %STATE, (%OUTP) / output 849 850 SET_TS_OR_POP_XMM0_XMM1(%r10) 851 ret 852 SET_SIZE(aes_decrypt_intel) 853 854 #endif /* lint || __lint */