1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2003, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2015 by Saso Kiselkov. All rights reserved.
  24  */
  25 
  26 #include <sys/types.h>
  27 #include <sys/systm.h>
  28 #include <sys/sysmacros.h>
  29 #include <netinet/in.h>
  30 #include "aes_impl.h"
  31 #ifndef _KERNEL
  32 #include <strings.h>
  33 #include <stdlib.h>
  34 #include <sys/note.h>
  35 #endif  /* !_KERNEL */
  36 
  37 #ifdef __amd64
  38 
  39 #ifdef _KERNEL
  40 #include <sys/cpuvar.h>           /* cpu_t, CPU */
  41 #include <sys/x86_archext.h>      /* x86_featureset, X86FSET_AES */
  42 #include <sys/disp.h>             /* kpreempt_disable(), kpreempt_enable */
  43 
  44 /* Workaround for no XMM kernel thread save/restore */
  45 #define KPREEMPT_DISABLE        kpreempt_disable()
  46 #define KPREEMPT_ENABLE         kpreempt_enable()
  47 
  48 #else
  49 #include <sys/auxv.h>             /* getisax() */
  50 #include <sys/auxv_386.h> /* AV_386_AES bit */
  51 #define KPREEMPT_DISABLE
  52 #define KPREEMPT_ENABLE
  53 #endif  /* _KERNEL */
  54 #endif  /* __amd64 */
  55 
  56 
  57 /*
  58  * This file is derived from the file  rijndael-alg-fst.c  taken from the
  59  * "optimized C code v3.0" on the "rijndael home page"
  60  * http://www.iaik.tu-graz.ac.at/research/krypto/AES/old/~rijmen/rijndael/
  61  * pointed by the NIST web-site http://csrc.nist.gov/archive/aes/
  62  *
  63  * The following note is from the original file:
  64  */
  65 
  66 /*
  67  * rijndael-alg-fst.c
  68  *
  69  * @version 3.0 (December 2000)
  70  *
  71  * Optimised ANSI C code for the Rijndael cipher (now AES)
  72  *
  73  * @author Vincent Rijmen <vincent.rijmen@esat.kuleuven.ac.be>
  74  * @author Antoon Bosselaers <antoon.bosselaers@esat.kuleuven.ac.be>
  75  * @author Paulo Barreto <paulo.barreto@terra.com.br>
  76  *
  77  * This code is hereby placed in the public domain.
  78  *
  79  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS ''AS IS'' AND ANY EXPRESS
  80  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
  81  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  82  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE
  83  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  84  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  85  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  86  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  87  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  88  * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
  89  * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  90  */
  91 
  92 #if defined(sun4u)
  93 /* External assembly functions: */
  94 extern void aes_encrypt_impl(const uint32_t rk[], int Nr, const uint32_t pt[4],
  95         uint32_t ct[4]);
  96 extern void aes_decrypt_impl(const uint32_t rk[], int Nr, const uint32_t ct[4],
  97         uint32_t pt[4]);
  98 
  99 #define AES_ENCRYPT_IMPL(a, b, c, d)    aes_encrypt_impl(a, b, c, d)
 100 #define AES_DECRYPT_IMPL(a, b, c, d)    aes_decrypt_impl(a, b, c, d)
 101 
 102 #elif defined(__amd64)
 103 
 104 /* These functions are used to execute amd64 instructions for AMD or Intel: */
 105 extern int rijndael_key_setup_enc_amd64(uint32_t rk[],
 106         const uint32_t cipherKey[], int keyBits);
 107 extern int rijndael_key_setup_dec_amd64(uint32_t rk[],
 108         const uint32_t cipherKey[], int keyBits);
 109 extern void aes_encrypt_amd64(const uint32_t rk[], int Nr,
 110         const uint32_t pt[4], uint32_t ct[4]);
 111 extern void aes_decrypt_amd64(const uint32_t rk[], int Nr,
 112         const uint32_t ct[4], uint32_t pt[4]);
 113 
 114 /* These functions are used to execute Intel-specific AES-NI instructions: */
 115 extern int rijndael_key_setup_enc_intel(uint32_t rk[],
 116         const uint32_t cipherKey[], uint64_t keyBits);
 117 extern int rijndael_key_setup_dec_intel(uint32_t rk[],
 118         const uint32_t cipherKey[], uint64_t keyBits);
 119 extern void aes_encrypt_intel(const uint32_t rk[], int Nr,
 120         const uint32_t pt[4], uint32_t ct[4]);
 121 extern void aes_decrypt_intel(const uint32_t rk[], int Nr,
 122         const uint32_t ct[4], uint32_t pt[4]);
 123 extern void aes_encrypt_intel8(const uint32_t rk[], int Nr,
 124         const void *pt, void *ct);
 125 extern void aes_decrypt_intel8(const uint32_t rk[], int Nr,
 126         const void *ct, void *pt);
 127 extern void aes_encrypt_cbc_intel8(const uint32_t rk[], int Nr,
 128         const void *pt, void *ct, const void *iv);
 129 extern void aes_ctr_intel8(const uint32_t rk[], int Nr,
 130         const void *input, void *output, uint64_t counter_upper_BE,
 131         uint64_t counter_lower_LE);
 132 extern void aes_xor_intel(const uint8_t *, uint8_t *);
 133 
 134 static inline int intel_aes_instructions_present(void);
 135 
 136 #ifdef  _KERNEL
 137 /*
 138  * Some form of floating-point acceleration is available, so declare these.
 139  * The implementations will be in a platform-specific assembly file (e.g.
 140  * amd64/aes_intel.s for SSE2/AES-NI).
 141  */
 142 extern void aes_accel_save(void *savestate);
 143 extern void aes_accel_restore(void *savestate);
 144 #endif  /* _KERNEL */
 145 
 146 #else /* Generic C implementation */
 147 static void rijndael_encrypt(const uint32_t rk[], int Nr, const uint32_t pt[4],
 148     uint32_t ct[4]);
 149 static void rijndael_decrypt(const uint32_t rk[], int Nr, const uint32_t pt[4],
 150     uint32_t ct[4]);
 151 #define rijndael_key_setup_enc_raw      rijndael_key_setup_enc
 152 #define AES_ENCRYPT_IMPL(a, b, c, d)    rijndael_encrypt(a, b, c, d)
 153 #define AES_DECRYPT_IMPL(a, b, c, d)    rijndael_decrypt(a, b, c, d)
 154 #endif  /* sun4u || __amd64 */
 155 
 156 #if defined(_LITTLE_ENDIAN) && !defined(__amd64)
 157 #define AES_BYTE_SWAP
 158 #endif
 159 
 160 
 161 #if !defined(__amd64)
 162 /*
 163  *  Constant tables
 164  */
 165 
 166 /*
 167  * Te0[x] = S [x].[02, 01, 01, 03];
 168  * Te1[x] = S [x].[03, 02, 01, 01];
 169  * Te2[x] = S [x].[01, 03, 02, 01];
 170  * Te3[x] = S [x].[01, 01, 03, 02];
 171  * Te4[x] = S [x].[01, 01, 01, 01];
 172  *
 173  * Td0[x] = Si[x].[0e, 09, 0d, 0b];
 174  * Td1[x] = Si[x].[0b, 0e, 09, 0d];
 175  * Td2[x] = Si[x].[0d, 0b, 0e, 09];
 176  * Td3[x] = Si[x].[09, 0d, 0b, 0e];
 177  * Td4[x] = Si[x].[01, 01, 01, 01];
 178  */
 179 
 180 /* Encrypt Sbox constants (for the substitute bytes operation) */
 181 
 182 #ifndef sun4u
 183 
 184 static const uint32_t Te0[256] =
 185 {
 186         0xc66363a5U, 0xf87c7c84U, 0xee777799U, 0xf67b7b8dU,
 187         0xfff2f20dU, 0xd66b6bbdU, 0xde6f6fb1U, 0x91c5c554U,
 188         0x60303050U, 0x02010103U, 0xce6767a9U, 0x562b2b7dU,
 189         0xe7fefe19U, 0xb5d7d762U, 0x4dababe6U, 0xec76769aU,
 190         0x8fcaca45U, 0x1f82829dU, 0x89c9c940U, 0xfa7d7d87U,
 191         0xeffafa15U, 0xb25959ebU, 0x8e4747c9U, 0xfbf0f00bU,
 192         0x41adadecU, 0xb3d4d467U, 0x5fa2a2fdU, 0x45afafeaU,
 193         0x239c9cbfU, 0x53a4a4f7U, 0xe4727296U, 0x9bc0c05bU,
 194         0x75b7b7c2U, 0xe1fdfd1cU, 0x3d9393aeU, 0x4c26266aU,
 195         0x6c36365aU, 0x7e3f3f41U, 0xf5f7f702U, 0x83cccc4fU,
 196         0x6834345cU, 0x51a5a5f4U, 0xd1e5e534U, 0xf9f1f108U,
 197         0xe2717193U, 0xabd8d873U, 0x62313153U, 0x2a15153fU,
 198         0x0804040cU, 0x95c7c752U, 0x46232365U, 0x9dc3c35eU,
 199         0x30181828U, 0x379696a1U, 0x0a05050fU, 0x2f9a9ab5U,
 200         0x0e070709U, 0x24121236U, 0x1b80809bU, 0xdfe2e23dU,
 201         0xcdebeb26U, 0x4e272769U, 0x7fb2b2cdU, 0xea75759fU,
 202         0x1209091bU, 0x1d83839eU, 0x582c2c74U, 0x341a1a2eU,
 203         0x361b1b2dU, 0xdc6e6eb2U, 0xb45a5aeeU, 0x5ba0a0fbU,
 204         0xa45252f6U, 0x763b3b4dU, 0xb7d6d661U, 0x7db3b3ceU,
 205         0x5229297bU, 0xdde3e33eU, 0x5e2f2f71U, 0x13848497U,
 206         0xa65353f5U, 0xb9d1d168U, 0x00000000U, 0xc1eded2cU,
 207         0x40202060U, 0xe3fcfc1fU, 0x79b1b1c8U, 0xb65b5bedU,
 208         0xd46a6abeU, 0x8dcbcb46U, 0x67bebed9U, 0x7239394bU,
 209         0x944a4adeU, 0x984c4cd4U, 0xb05858e8U, 0x85cfcf4aU,
 210         0xbbd0d06bU, 0xc5efef2aU, 0x4faaaae5U, 0xedfbfb16U,
 211         0x864343c5U, 0x9a4d4dd7U, 0x66333355U, 0x11858594U,
 212         0x8a4545cfU, 0xe9f9f910U, 0x04020206U, 0xfe7f7f81U,
 213         0xa05050f0U, 0x783c3c44U, 0x259f9fbaU, 0x4ba8a8e3U,
 214         0xa25151f3U, 0x5da3a3feU, 0x804040c0U, 0x058f8f8aU,
 215         0x3f9292adU, 0x219d9dbcU, 0x70383848U, 0xf1f5f504U,
 216         0x63bcbcdfU, 0x77b6b6c1U, 0xafdada75U, 0x42212163U,
 217         0x20101030U, 0xe5ffff1aU, 0xfdf3f30eU, 0xbfd2d26dU,
 218         0x81cdcd4cU, 0x180c0c14U, 0x26131335U, 0xc3ecec2fU,
 219         0xbe5f5fe1U, 0x359797a2U, 0x884444ccU, 0x2e171739U,
 220         0x93c4c457U, 0x55a7a7f2U, 0xfc7e7e82U, 0x7a3d3d47U,
 221         0xc86464acU, 0xba5d5de7U, 0x3219192bU, 0xe6737395U,
 222         0xc06060a0U, 0x19818198U, 0x9e4f4fd1U, 0xa3dcdc7fU,
 223         0x44222266U, 0x542a2a7eU, 0x3b9090abU, 0x0b888883U,
 224         0x8c4646caU, 0xc7eeee29U, 0x6bb8b8d3U, 0x2814143cU,
 225         0xa7dede79U, 0xbc5e5ee2U, 0x160b0b1dU, 0xaddbdb76U,
 226         0xdbe0e03bU, 0x64323256U, 0x743a3a4eU, 0x140a0a1eU,
 227         0x924949dbU, 0x0c06060aU, 0x4824246cU, 0xb85c5ce4U,
 228         0x9fc2c25dU, 0xbdd3d36eU, 0x43acacefU, 0xc46262a6U,
 229         0x399191a8U, 0x319595a4U, 0xd3e4e437U, 0xf279798bU,
 230         0xd5e7e732U, 0x8bc8c843U, 0x6e373759U, 0xda6d6db7U,
 231         0x018d8d8cU, 0xb1d5d564U, 0x9c4e4ed2U, 0x49a9a9e0U,
 232         0xd86c6cb4U, 0xac5656faU, 0xf3f4f407U, 0xcfeaea25U,
 233         0xca6565afU, 0xf47a7a8eU, 0x47aeaee9U, 0x10080818U,
 234         0x6fbabad5U, 0xf0787888U, 0x4a25256fU, 0x5c2e2e72U,
 235         0x381c1c24U, 0x57a6a6f1U, 0x73b4b4c7U, 0x97c6c651U,
 236         0xcbe8e823U, 0xa1dddd7cU, 0xe874749cU, 0x3e1f1f21U,
 237         0x964b4bddU, 0x61bdbddcU, 0x0d8b8b86U, 0x0f8a8a85U,
 238         0xe0707090U, 0x7c3e3e42U, 0x71b5b5c4U, 0xcc6666aaU,
 239         0x904848d8U, 0x06030305U, 0xf7f6f601U, 0x1c0e0e12U,
 240         0xc26161a3U, 0x6a35355fU, 0xae5757f9U, 0x69b9b9d0U,
 241         0x17868691U, 0x99c1c158U, 0x3a1d1d27U, 0x279e9eb9U,
 242         0xd9e1e138U, 0xebf8f813U, 0x2b9898b3U, 0x22111133U,
 243         0xd26969bbU, 0xa9d9d970U, 0x078e8e89U, 0x339494a7U,
 244         0x2d9b9bb6U, 0x3c1e1e22U, 0x15878792U, 0xc9e9e920U,
 245         0x87cece49U, 0xaa5555ffU, 0x50282878U, 0xa5dfdf7aU,
 246         0x038c8c8fU, 0x59a1a1f8U, 0x09898980U, 0x1a0d0d17U,
 247         0x65bfbfdaU, 0xd7e6e631U, 0x844242c6U, 0xd06868b8U,
 248         0x824141c3U, 0x299999b0U, 0x5a2d2d77U, 0x1e0f0f11U,
 249         0x7bb0b0cbU, 0xa85454fcU, 0x6dbbbbd6U, 0x2c16163aU
 250 };
 251 
 252 
 253 static const uint32_t Te1[256] =
 254 {
 255         0xa5c66363U, 0x84f87c7cU, 0x99ee7777U, 0x8df67b7bU,
 256         0x0dfff2f2U, 0xbdd66b6bU, 0xb1de6f6fU, 0x5491c5c5U,
 257         0x50603030U, 0x03020101U, 0xa9ce6767U, 0x7d562b2bU,
 258         0x19e7fefeU, 0x62b5d7d7U, 0xe64dababU, 0x9aec7676U,
 259         0x458fcacaU, 0x9d1f8282U, 0x4089c9c9U, 0x87fa7d7dU,
 260         0x15effafaU, 0xebb25959U, 0xc98e4747U, 0x0bfbf0f0U,
 261         0xec41adadU, 0x67b3d4d4U, 0xfd5fa2a2U, 0xea45afafU,
 262         0xbf239c9cU, 0xf753a4a4U, 0x96e47272U, 0x5b9bc0c0U,
 263         0xc275b7b7U, 0x1ce1fdfdU, 0xae3d9393U, 0x6a4c2626U,
 264         0x5a6c3636U, 0x417e3f3fU, 0x02f5f7f7U, 0x4f83ccccU,
 265         0x5c683434U, 0xf451a5a5U, 0x34d1e5e5U, 0x08f9f1f1U,
 266         0x93e27171U, 0x73abd8d8U, 0x53623131U, 0x3f2a1515U,
 267         0x0c080404U, 0x5295c7c7U, 0x65462323U, 0x5e9dc3c3U,
 268         0x28301818U, 0xa1379696U, 0x0f0a0505U, 0xb52f9a9aU,
 269         0x090e0707U, 0x36241212U, 0x9b1b8080U, 0x3ddfe2e2U,
 270         0x26cdebebU, 0x694e2727U, 0xcd7fb2b2U, 0x9fea7575U,
 271         0x1b120909U, 0x9e1d8383U, 0x74582c2cU, 0x2e341a1aU,
 272         0x2d361b1bU, 0xb2dc6e6eU, 0xeeb45a5aU, 0xfb5ba0a0U,
 273         0xf6a45252U, 0x4d763b3bU, 0x61b7d6d6U, 0xce7db3b3U,
 274         0x7b522929U, 0x3edde3e3U, 0x715e2f2fU, 0x97138484U,
 275         0xf5a65353U, 0x68b9d1d1U, 0x00000000U, 0x2cc1ededU,
 276         0x60402020U, 0x1fe3fcfcU, 0xc879b1b1U, 0xedb65b5bU,
 277         0xbed46a6aU, 0x468dcbcbU, 0xd967bebeU, 0x4b723939U,
 278         0xde944a4aU, 0xd4984c4cU, 0xe8b05858U, 0x4a85cfcfU,
 279         0x6bbbd0d0U, 0x2ac5efefU, 0xe54faaaaU, 0x16edfbfbU,
 280         0xc5864343U, 0xd79a4d4dU, 0x55663333U, 0x94118585U,
 281         0xcf8a4545U, 0x10e9f9f9U, 0x06040202U, 0x81fe7f7fU,
 282         0xf0a05050U, 0x44783c3cU, 0xba259f9fU, 0xe34ba8a8U,
 283         0xf3a25151U, 0xfe5da3a3U, 0xc0804040U, 0x8a058f8fU,
 284         0xad3f9292U, 0xbc219d9dU, 0x48703838U, 0x04f1f5f5U,
 285         0xdf63bcbcU, 0xc177b6b6U, 0x75afdadaU, 0x63422121U,
 286         0x30201010U, 0x1ae5ffffU, 0x0efdf3f3U, 0x6dbfd2d2U,
 287         0x4c81cdcdU, 0x14180c0cU, 0x35261313U, 0x2fc3ececU,
 288         0xe1be5f5fU, 0xa2359797U, 0xcc884444U, 0x392e1717U,
 289         0x5793c4c4U, 0xf255a7a7U, 0x82fc7e7eU, 0x477a3d3dU,
 290         0xacc86464U, 0xe7ba5d5dU, 0x2b321919U, 0x95e67373U,
 291         0xa0c06060U, 0x98198181U, 0xd19e4f4fU, 0x7fa3dcdcU,
 292         0x66442222U, 0x7e542a2aU, 0xab3b9090U, 0x830b8888U,
 293         0xca8c4646U, 0x29c7eeeeU, 0xd36bb8b8U, 0x3c281414U,
 294         0x79a7dedeU, 0xe2bc5e5eU, 0x1d160b0bU, 0x76addbdbU,
 295         0x3bdbe0e0U, 0x56643232U, 0x4e743a3aU, 0x1e140a0aU,
 296         0xdb924949U, 0x0a0c0606U, 0x6c482424U, 0xe4b85c5cU,
 297         0x5d9fc2c2U, 0x6ebdd3d3U, 0xef43acacU, 0xa6c46262U,
 298         0xa8399191U, 0xa4319595U, 0x37d3e4e4U, 0x8bf27979U,
 299         0x32d5e7e7U, 0x438bc8c8U, 0x596e3737U, 0xb7da6d6dU,
 300         0x8c018d8dU, 0x64b1d5d5U, 0xd29c4e4eU, 0xe049a9a9U,
 301         0xb4d86c6cU, 0xfaac5656U, 0x07f3f4f4U, 0x25cfeaeaU,
 302         0xafca6565U, 0x8ef47a7aU, 0xe947aeaeU, 0x18100808U,
 303         0xd56fbabaU, 0x88f07878U, 0x6f4a2525U, 0x725c2e2eU,
 304         0x24381c1cU, 0xf157a6a6U, 0xc773b4b4U, 0x5197c6c6U,
 305         0x23cbe8e8U, 0x7ca1ddddU, 0x9ce87474U, 0x213e1f1fU,
 306         0xdd964b4bU, 0xdc61bdbdU, 0x860d8b8bU, 0x850f8a8aU,
 307         0x90e07070U, 0x427c3e3eU, 0xc471b5b5U, 0xaacc6666U,
 308         0xd8904848U, 0x05060303U, 0x01f7f6f6U, 0x121c0e0eU,
 309         0xa3c26161U, 0x5f6a3535U, 0xf9ae5757U, 0xd069b9b9U,
 310         0x91178686U, 0x5899c1c1U, 0x273a1d1dU, 0xb9279e9eU,
 311         0x38d9e1e1U, 0x13ebf8f8U, 0xb32b9898U, 0x33221111U,
 312         0xbbd26969U, 0x70a9d9d9U, 0x89078e8eU, 0xa7339494U,
 313         0xb62d9b9bU, 0x223c1e1eU, 0x92158787U, 0x20c9e9e9U,
 314         0x4987ceceU, 0xffaa5555U, 0x78502828U, 0x7aa5dfdfU,
 315         0x8f038c8cU, 0xf859a1a1U, 0x80098989U, 0x171a0d0dU,
 316         0xda65bfbfU, 0x31d7e6e6U, 0xc6844242U, 0xb8d06868U,
 317         0xc3824141U, 0xb0299999U, 0x775a2d2dU, 0x111e0f0fU,
 318         0xcb7bb0b0U, 0xfca85454U, 0xd66dbbbbU, 0x3a2c1616U
 319 };
 320 
 321 
 322 static const uint32_t Te2[256] =
 323 {
 324         0x63a5c663U, 0x7c84f87cU, 0x7799ee77U, 0x7b8df67bU,
 325         0xf20dfff2U, 0x6bbdd66bU, 0x6fb1de6fU, 0xc55491c5U,
 326         0x30506030U, 0x01030201U, 0x67a9ce67U, 0x2b7d562bU,
 327         0xfe19e7feU, 0xd762b5d7U, 0xabe64dabU, 0x769aec76U,
 328         0xca458fcaU, 0x829d1f82U, 0xc94089c9U, 0x7d87fa7dU,
 329         0xfa15effaU, 0x59ebb259U, 0x47c98e47U, 0xf00bfbf0U,
 330         0xadec41adU, 0xd467b3d4U, 0xa2fd5fa2U, 0xafea45afU,
 331         0x9cbf239cU, 0xa4f753a4U, 0x7296e472U, 0xc05b9bc0U,
 332         0xb7c275b7U, 0xfd1ce1fdU, 0x93ae3d93U, 0x266a4c26U,
 333         0x365a6c36U, 0x3f417e3fU, 0xf702f5f7U, 0xcc4f83ccU,
 334         0x345c6834U, 0xa5f451a5U, 0xe534d1e5U, 0xf108f9f1U,
 335         0x7193e271U, 0xd873abd8U, 0x31536231U, 0x153f2a15U,
 336         0x040c0804U, 0xc75295c7U, 0x23654623U, 0xc35e9dc3U,
 337         0x18283018U, 0x96a13796U, 0x050f0a05U, 0x9ab52f9aU,
 338         0x07090e07U, 0x12362412U, 0x809b1b80U, 0xe23ddfe2U,
 339         0xeb26cdebU, 0x27694e27U, 0xb2cd7fb2U, 0x759fea75U,
 340         0x091b1209U, 0x839e1d83U, 0x2c74582cU, 0x1a2e341aU,
 341         0x1b2d361bU, 0x6eb2dc6eU, 0x5aeeb45aU, 0xa0fb5ba0U,
 342         0x52f6a452U, 0x3b4d763bU, 0xd661b7d6U, 0xb3ce7db3U,
 343         0x297b5229U, 0xe33edde3U, 0x2f715e2fU, 0x84971384U,
 344         0x53f5a653U, 0xd168b9d1U, 0x00000000U, 0xed2cc1edU,
 345         0x20604020U, 0xfc1fe3fcU, 0xb1c879b1U, 0x5bedb65bU,
 346         0x6abed46aU, 0xcb468dcbU, 0xbed967beU, 0x394b7239U,
 347         0x4ade944aU, 0x4cd4984cU, 0x58e8b058U, 0xcf4a85cfU,
 348         0xd06bbbd0U, 0xef2ac5efU, 0xaae54faaU, 0xfb16edfbU,
 349         0x43c58643U, 0x4dd79a4dU, 0x33556633U, 0x85941185U,
 350         0x45cf8a45U, 0xf910e9f9U, 0x02060402U, 0x7f81fe7fU,
 351         0x50f0a050U, 0x3c44783cU, 0x9fba259fU, 0xa8e34ba8U,
 352         0x51f3a251U, 0xa3fe5da3U, 0x40c08040U, 0x8f8a058fU,
 353         0x92ad3f92U, 0x9dbc219dU, 0x38487038U, 0xf504f1f5U,
 354         0xbcdf63bcU, 0xb6c177b6U, 0xda75afdaU, 0x21634221U,
 355         0x10302010U, 0xff1ae5ffU, 0xf30efdf3U, 0xd26dbfd2U,
 356         0xcd4c81cdU, 0x0c14180cU, 0x13352613U, 0xec2fc3ecU,
 357         0x5fe1be5fU, 0x97a23597U, 0x44cc8844U, 0x17392e17U,
 358         0xc45793c4U, 0xa7f255a7U, 0x7e82fc7eU, 0x3d477a3dU,
 359         0x64acc864U, 0x5de7ba5dU, 0x192b3219U, 0x7395e673U,
 360         0x60a0c060U, 0x81981981U, 0x4fd19e4fU, 0xdc7fa3dcU,
 361         0x22664422U, 0x2a7e542aU, 0x90ab3b90U, 0x88830b88U,
 362         0x46ca8c46U, 0xee29c7eeU, 0xb8d36bb8U, 0x143c2814U,
 363         0xde79a7deU, 0x5ee2bc5eU, 0x0b1d160bU, 0xdb76addbU,
 364         0xe03bdbe0U, 0x32566432U, 0x3a4e743aU, 0x0a1e140aU,
 365         0x49db9249U, 0x060a0c06U, 0x246c4824U, 0x5ce4b85cU,
 366         0xc25d9fc2U, 0xd36ebdd3U, 0xacef43acU, 0x62a6c462U,
 367         0x91a83991U, 0x95a43195U, 0xe437d3e4U, 0x798bf279U,
 368         0xe732d5e7U, 0xc8438bc8U, 0x37596e37U, 0x6db7da6dU,
 369         0x8d8c018dU, 0xd564b1d5U, 0x4ed29c4eU, 0xa9e049a9U,
 370         0x6cb4d86cU, 0x56faac56U, 0xf407f3f4U, 0xea25cfeaU,
 371         0x65afca65U, 0x7a8ef47aU, 0xaee947aeU, 0x08181008U,
 372         0xbad56fbaU, 0x7888f078U, 0x256f4a25U, 0x2e725c2eU,
 373         0x1c24381cU, 0xa6f157a6U, 0xb4c773b4U, 0xc65197c6U,
 374         0xe823cbe8U, 0xdd7ca1ddU, 0x749ce874U, 0x1f213e1fU,
 375         0x4bdd964bU, 0xbddc61bdU, 0x8b860d8bU, 0x8a850f8aU,
 376         0x7090e070U, 0x3e427c3eU, 0xb5c471b5U, 0x66aacc66U,
 377         0x48d89048U, 0x03050603U, 0xf601f7f6U, 0x0e121c0eU,
 378         0x61a3c261U, 0x355f6a35U, 0x57f9ae57U, 0xb9d069b9U,
 379         0x86911786U, 0xc15899c1U, 0x1d273a1dU, 0x9eb9279eU,
 380         0xe138d9e1U, 0xf813ebf8U, 0x98b32b98U, 0x11332211U,
 381         0x69bbd269U, 0xd970a9d9U, 0x8e89078eU, 0x94a73394U,
 382         0x9bb62d9bU, 0x1e223c1eU, 0x87921587U, 0xe920c9e9U,
 383         0xce4987ceU, 0x55ffaa55U, 0x28785028U, 0xdf7aa5dfU,
 384         0x8c8f038cU, 0xa1f859a1U, 0x89800989U, 0x0d171a0dU,
 385         0xbfda65bfU, 0xe631d7e6U, 0x42c68442U, 0x68b8d068U,
 386         0x41c38241U, 0x99b02999U, 0x2d775a2dU, 0x0f111e0fU,
 387         0xb0cb7bb0U, 0x54fca854U, 0xbbd66dbbU, 0x163a2c16U
 388 };
 389 
 390 
 391 static const uint32_t Te3[256] =
 392 {
 393         0x6363a5c6U, 0x7c7c84f8U, 0x777799eeU, 0x7b7b8df6U,
 394         0xf2f20dffU, 0x6b6bbdd6U, 0x6f6fb1deU, 0xc5c55491U,
 395         0x30305060U, 0x01010302U, 0x6767a9ceU, 0x2b2b7d56U,
 396         0xfefe19e7U, 0xd7d762b5U, 0xababe64dU, 0x76769aecU,
 397         0xcaca458fU, 0x82829d1fU, 0xc9c94089U, 0x7d7d87faU,
 398         0xfafa15efU, 0x5959ebb2U, 0x4747c98eU, 0xf0f00bfbU,
 399         0xadadec41U, 0xd4d467b3U, 0xa2a2fd5fU, 0xafafea45U,
 400         0x9c9cbf23U, 0xa4a4f753U, 0x727296e4U, 0xc0c05b9bU,
 401         0xb7b7c275U, 0xfdfd1ce1U, 0x9393ae3dU, 0x26266a4cU,
 402         0x36365a6cU, 0x3f3f417eU, 0xf7f702f5U, 0xcccc4f83U,
 403         0x34345c68U, 0xa5a5f451U, 0xe5e534d1U, 0xf1f108f9U,
 404         0x717193e2U, 0xd8d873abU, 0x31315362U, 0x15153f2aU,
 405         0x04040c08U, 0xc7c75295U, 0x23236546U, 0xc3c35e9dU,
 406         0x18182830U, 0x9696a137U, 0x05050f0aU, 0x9a9ab52fU,
 407         0x0707090eU, 0x12123624U, 0x80809b1bU, 0xe2e23ddfU,
 408         0xebeb26cdU, 0x2727694eU, 0xb2b2cd7fU, 0x75759feaU,
 409         0x09091b12U, 0x83839e1dU, 0x2c2c7458U, 0x1a1a2e34U,
 410         0x1b1b2d36U, 0x6e6eb2dcU, 0x5a5aeeb4U, 0xa0a0fb5bU,
 411         0x5252f6a4U, 0x3b3b4d76U, 0xd6d661b7U, 0xb3b3ce7dU,
 412         0x29297b52U, 0xe3e33eddU, 0x2f2f715eU, 0x84849713U,
 413         0x5353f5a6U, 0xd1d168b9U, 0x00000000U, 0xeded2cc1U,
 414         0x20206040U, 0xfcfc1fe3U, 0xb1b1c879U, 0x5b5bedb6U,
 415         0x6a6abed4U, 0xcbcb468dU, 0xbebed967U, 0x39394b72U,
 416         0x4a4ade94U, 0x4c4cd498U, 0x5858e8b0U, 0xcfcf4a85U,
 417         0xd0d06bbbU, 0xefef2ac5U, 0xaaaae54fU, 0xfbfb16edU,
 418         0x4343c586U, 0x4d4dd79aU, 0x33335566U, 0x85859411U,
 419         0x4545cf8aU, 0xf9f910e9U, 0x02020604U, 0x7f7f81feU,
 420         0x5050f0a0U, 0x3c3c4478U, 0x9f9fba25U, 0xa8a8e34bU,
 421         0x5151f3a2U, 0xa3a3fe5dU, 0x4040c080U, 0x8f8f8a05U,
 422         0x9292ad3fU, 0x9d9dbc21U, 0x38384870U, 0xf5f504f1U,
 423         0xbcbcdf63U, 0xb6b6c177U, 0xdada75afU, 0x21216342U,
 424         0x10103020U, 0xffff1ae5U, 0xf3f30efdU, 0xd2d26dbfU,
 425         0xcdcd4c81U, 0x0c0c1418U, 0x13133526U, 0xecec2fc3U,
 426         0x5f5fe1beU, 0x9797a235U, 0x4444cc88U, 0x1717392eU,
 427         0xc4c45793U, 0xa7a7f255U, 0x7e7e82fcU, 0x3d3d477aU,
 428         0x6464acc8U, 0x5d5de7baU, 0x19192b32U, 0x737395e6U,
 429         0x6060a0c0U, 0x81819819U, 0x4f4fd19eU, 0xdcdc7fa3U,
 430         0x22226644U, 0x2a2a7e54U, 0x9090ab3bU, 0x8888830bU,
 431         0x4646ca8cU, 0xeeee29c7U, 0xb8b8d36bU, 0x14143c28U,
 432         0xdede79a7U, 0x5e5ee2bcU, 0x0b0b1d16U, 0xdbdb76adU,
 433         0xe0e03bdbU, 0x32325664U, 0x3a3a4e74U, 0x0a0a1e14U,
 434         0x4949db92U, 0x06060a0cU, 0x24246c48U, 0x5c5ce4b8U,
 435         0xc2c25d9fU, 0xd3d36ebdU, 0xacacef43U, 0x6262a6c4U,
 436         0x9191a839U, 0x9595a431U, 0xe4e437d3U, 0x79798bf2U,
 437         0xe7e732d5U, 0xc8c8438bU, 0x3737596eU, 0x6d6db7daU,
 438         0x8d8d8c01U, 0xd5d564b1U, 0x4e4ed29cU, 0xa9a9e049U,
 439         0x6c6cb4d8U, 0x5656faacU, 0xf4f407f3U, 0xeaea25cfU,
 440         0x6565afcaU, 0x7a7a8ef4U, 0xaeaee947U, 0x08081810U,
 441         0xbabad56fU, 0x787888f0U, 0x25256f4aU, 0x2e2e725cU,
 442         0x1c1c2438U, 0xa6a6f157U, 0xb4b4c773U, 0xc6c65197U,
 443         0xe8e823cbU, 0xdddd7ca1U, 0x74749ce8U, 0x1f1f213eU,
 444         0x4b4bdd96U, 0xbdbddc61U, 0x8b8b860dU, 0x8a8a850fU,
 445         0x707090e0U, 0x3e3e427cU, 0xb5b5c471U, 0x6666aaccU,
 446         0x4848d890U, 0x03030506U, 0xf6f601f7U, 0x0e0e121cU,
 447         0x6161a3c2U, 0x35355f6aU, 0x5757f9aeU, 0xb9b9d069U,
 448         0x86869117U, 0xc1c15899U, 0x1d1d273aU, 0x9e9eb927U,
 449         0xe1e138d9U, 0xf8f813ebU, 0x9898b32bU, 0x11113322U,
 450         0x6969bbd2U, 0xd9d970a9U, 0x8e8e8907U, 0x9494a733U,
 451         0x9b9bb62dU, 0x1e1e223cU, 0x87879215U, 0xe9e920c9U,
 452         0xcece4987U, 0x5555ffaaU, 0x28287850U, 0xdfdf7aa5U,
 453         0x8c8c8f03U, 0xa1a1f859U, 0x89898009U, 0x0d0d171aU,
 454         0xbfbfda65U, 0xe6e631d7U, 0x4242c684U, 0x6868b8d0U,
 455         0x4141c382U, 0x9999b029U, 0x2d2d775aU, 0x0f0f111eU,
 456         0xb0b0cb7bU, 0x5454fca8U, 0xbbbbd66dU, 0x16163a2cU
 457 };
 458 
 459 #endif /* !sun4u */
 460 
 461 static const uint32_t Te4[256] =
 462 {
 463         0x63636363U, 0x7c7c7c7cU, 0x77777777U, 0x7b7b7b7bU,
 464         0xf2f2f2f2U, 0x6b6b6b6bU, 0x6f6f6f6fU, 0xc5c5c5c5U,
 465         0x30303030U, 0x01010101U, 0x67676767U, 0x2b2b2b2bU,
 466         0xfefefefeU, 0xd7d7d7d7U, 0xababababU, 0x76767676U,
 467         0xcacacacaU, 0x82828282U, 0xc9c9c9c9U, 0x7d7d7d7dU,
 468         0xfafafafaU, 0x59595959U, 0x47474747U, 0xf0f0f0f0U,
 469         0xadadadadU, 0xd4d4d4d4U, 0xa2a2a2a2U, 0xafafafafU,
 470         0x9c9c9c9cU, 0xa4a4a4a4U, 0x72727272U, 0xc0c0c0c0U,
 471         0xb7b7b7b7U, 0xfdfdfdfdU, 0x93939393U, 0x26262626U,
 472         0x36363636U, 0x3f3f3f3fU, 0xf7f7f7f7U, 0xccccccccU,
 473         0x34343434U, 0xa5a5a5a5U, 0xe5e5e5e5U, 0xf1f1f1f1U,
 474         0x71717171U, 0xd8d8d8d8U, 0x31313131U, 0x15151515U,
 475         0x04040404U, 0xc7c7c7c7U, 0x23232323U, 0xc3c3c3c3U,
 476         0x18181818U, 0x96969696U, 0x05050505U, 0x9a9a9a9aU,
 477         0x07070707U, 0x12121212U, 0x80808080U, 0xe2e2e2e2U,
 478         0xebebebebU, 0x27272727U, 0xb2b2b2b2U, 0x75757575U,
 479         0x09090909U, 0x83838383U, 0x2c2c2c2cU, 0x1a1a1a1aU,
 480         0x1b1b1b1bU, 0x6e6e6e6eU, 0x5a5a5a5aU, 0xa0a0a0a0U,
 481         0x52525252U, 0x3b3b3b3bU, 0xd6d6d6d6U, 0xb3b3b3b3U,
 482         0x29292929U, 0xe3e3e3e3U, 0x2f2f2f2fU, 0x84848484U,
 483         0x53535353U, 0xd1d1d1d1U, 0x00000000U, 0xededededU,
 484         0x20202020U, 0xfcfcfcfcU, 0xb1b1b1b1U, 0x5b5b5b5bU,
 485         0x6a6a6a6aU, 0xcbcbcbcbU, 0xbebebebeU, 0x39393939U,
 486         0x4a4a4a4aU, 0x4c4c4c4cU, 0x58585858U, 0xcfcfcfcfU,
 487         0xd0d0d0d0U, 0xefefefefU, 0xaaaaaaaaU, 0xfbfbfbfbU,
 488         0x43434343U, 0x4d4d4d4dU, 0x33333333U, 0x85858585U,
 489         0x45454545U, 0xf9f9f9f9U, 0x02020202U, 0x7f7f7f7fU,
 490         0x50505050U, 0x3c3c3c3cU, 0x9f9f9f9fU, 0xa8a8a8a8U,
 491         0x51515151U, 0xa3a3a3a3U, 0x40404040U, 0x8f8f8f8fU,
 492         0x92929292U, 0x9d9d9d9dU, 0x38383838U, 0xf5f5f5f5U,
 493         0xbcbcbcbcU, 0xb6b6b6b6U, 0xdadadadaU, 0x21212121U,
 494         0x10101010U, 0xffffffffU, 0xf3f3f3f3U, 0xd2d2d2d2U,
 495         0xcdcdcdcdU, 0x0c0c0c0cU, 0x13131313U, 0xececececU,
 496         0x5f5f5f5fU, 0x97979797U, 0x44444444U, 0x17171717U,
 497         0xc4c4c4c4U, 0xa7a7a7a7U, 0x7e7e7e7eU, 0x3d3d3d3dU,
 498         0x64646464U, 0x5d5d5d5dU, 0x19191919U, 0x73737373U,
 499         0x60606060U, 0x81818181U, 0x4f4f4f4fU, 0xdcdcdcdcU,
 500         0x22222222U, 0x2a2a2a2aU, 0x90909090U, 0x88888888U,
 501         0x46464646U, 0xeeeeeeeeU, 0xb8b8b8b8U, 0x14141414U,
 502         0xdedededeU, 0x5e5e5e5eU, 0x0b0b0b0bU, 0xdbdbdbdbU,
 503         0xe0e0e0e0U, 0x32323232U, 0x3a3a3a3aU, 0x0a0a0a0aU,
 504         0x49494949U, 0x06060606U, 0x24242424U, 0x5c5c5c5cU,
 505         0xc2c2c2c2U, 0xd3d3d3d3U, 0xacacacacU, 0x62626262U,
 506         0x91919191U, 0x95959595U, 0xe4e4e4e4U, 0x79797979U,
 507         0xe7e7e7e7U, 0xc8c8c8c8U, 0x37373737U, 0x6d6d6d6dU,
 508         0x8d8d8d8dU, 0xd5d5d5d5U, 0x4e4e4e4eU, 0xa9a9a9a9U,
 509         0x6c6c6c6cU, 0x56565656U, 0xf4f4f4f4U, 0xeaeaeaeaU,
 510         0x65656565U, 0x7a7a7a7aU, 0xaeaeaeaeU, 0x08080808U,
 511         0xbabababaU, 0x78787878U, 0x25252525U, 0x2e2e2e2eU,
 512         0x1c1c1c1cU, 0xa6a6a6a6U, 0xb4b4b4b4U, 0xc6c6c6c6U,
 513         0xe8e8e8e8U, 0xddddddddU, 0x74747474U, 0x1f1f1f1fU,
 514         0x4b4b4b4bU, 0xbdbdbdbdU, 0x8b8b8b8bU, 0x8a8a8a8aU,
 515         0x70707070U, 0x3e3e3e3eU, 0xb5b5b5b5U, 0x66666666U,
 516         0x48484848U, 0x03030303U, 0xf6f6f6f6U, 0x0e0e0e0eU,
 517         0x61616161U, 0x35353535U, 0x57575757U, 0xb9b9b9b9U,
 518         0x86868686U, 0xc1c1c1c1U, 0x1d1d1d1dU, 0x9e9e9e9eU,
 519         0xe1e1e1e1U, 0xf8f8f8f8U, 0x98989898U, 0x11111111U,
 520         0x69696969U, 0xd9d9d9d9U, 0x8e8e8e8eU, 0x94949494U,
 521         0x9b9b9b9bU, 0x1e1e1e1eU, 0x87878787U, 0xe9e9e9e9U,
 522         0xcecececeU, 0x55555555U, 0x28282828U, 0xdfdfdfdfU,
 523         0x8c8c8c8cU, 0xa1a1a1a1U, 0x89898989U, 0x0d0d0d0dU,
 524         0xbfbfbfbfU, 0xe6e6e6e6U, 0x42424242U, 0x68686868U,
 525         0x41414141U, 0x99999999U, 0x2d2d2d2dU, 0x0f0f0f0fU,
 526         0xb0b0b0b0U, 0x54545454U, 0xbbbbbbbbU, 0x16161616U
 527 };
 528 
 529 /* Decrypt Sbox constants (for the substitute bytes operation) */
 530 
 531 static const uint32_t Td0[256] =
 532 {
 533         0x51f4a750U, 0x7e416553U, 0x1a17a4c3U, 0x3a275e96U,
 534         0x3bab6bcbU, 0x1f9d45f1U, 0xacfa58abU, 0x4be30393U,
 535         0x2030fa55U, 0xad766df6U, 0x88cc7691U, 0xf5024c25U,
 536         0x4fe5d7fcU, 0xc52acbd7U, 0x26354480U, 0xb562a38fU,
 537         0xdeb15a49U, 0x25ba1b67U, 0x45ea0e98U, 0x5dfec0e1U,
 538         0xc32f7502U, 0x814cf012U, 0x8d4697a3U, 0x6bd3f9c6U,
 539         0x038f5fe7U, 0x15929c95U, 0xbf6d7aebU, 0x955259daU,
 540         0xd4be832dU, 0x587421d3U, 0x49e06929U, 0x8ec9c844U,
 541         0x75c2896aU, 0xf48e7978U, 0x99583e6bU, 0x27b971ddU,
 542         0xbee14fb6U, 0xf088ad17U, 0xc920ac66U, 0x7dce3ab4U,
 543         0x63df4a18U, 0xe51a3182U, 0x97513360U, 0x62537f45U,
 544         0xb16477e0U, 0xbb6bae84U, 0xfe81a01cU, 0xf9082b94U,
 545         0x70486858U, 0x8f45fd19U, 0x94de6c87U, 0x527bf8b7U,
 546         0xab73d323U, 0x724b02e2U, 0xe31f8f57U, 0x6655ab2aU,
 547         0xb2eb2807U, 0x2fb5c203U, 0x86c57b9aU, 0xd33708a5U,
 548         0x302887f2U, 0x23bfa5b2U, 0x02036abaU, 0xed16825cU,
 549         0x8acf1c2bU, 0xa779b492U, 0xf307f2f0U, 0x4e69e2a1U,
 550         0x65daf4cdU, 0x0605bed5U, 0xd134621fU, 0xc4a6fe8aU,
 551         0x342e539dU, 0xa2f355a0U, 0x058ae132U, 0xa4f6eb75U,
 552         0x0b83ec39U, 0x4060efaaU, 0x5e719f06U, 0xbd6e1051U,
 553         0x3e218af9U, 0x96dd063dU, 0xdd3e05aeU, 0x4de6bd46U,
 554         0x91548db5U, 0x71c45d05U, 0x0406d46fU, 0x605015ffU,
 555         0x1998fb24U, 0xd6bde997U, 0x894043ccU, 0x67d99e77U,
 556         0xb0e842bdU, 0x07898b88U, 0xe7195b38U, 0x79c8eedbU,
 557         0xa17c0a47U, 0x7c420fe9U, 0xf8841ec9U, 0x00000000U,
 558         0x09808683U, 0x322bed48U, 0x1e1170acU, 0x6c5a724eU,
 559         0xfd0efffbU, 0x0f853856U, 0x3daed51eU, 0x362d3927U,
 560         0x0a0fd964U, 0x685ca621U, 0x9b5b54d1U, 0x24362e3aU,
 561         0x0c0a67b1U, 0x9357e70fU, 0xb4ee96d2U, 0x1b9b919eU,
 562         0x80c0c54fU, 0x61dc20a2U, 0x5a774b69U, 0x1c121a16U,
 563         0xe293ba0aU, 0xc0a02ae5U, 0x3c22e043U, 0x121b171dU,
 564         0x0e090d0bU, 0xf28bc7adU, 0x2db6a8b9U, 0x141ea9c8U,
 565         0x57f11985U, 0xaf75074cU, 0xee99ddbbU, 0xa37f60fdU,
 566         0xf701269fU, 0x5c72f5bcU, 0x44663bc5U, 0x5bfb7e34U,
 567         0x8b432976U, 0xcb23c6dcU, 0xb6edfc68U, 0xb8e4f163U,
 568         0xd731dccaU, 0x42638510U, 0x13972240U, 0x84c61120U,
 569         0x854a247dU, 0xd2bb3df8U, 0xaef93211U, 0xc729a16dU,
 570         0x1d9e2f4bU, 0xdcb230f3U, 0x0d8652ecU, 0x77c1e3d0U,
 571         0x2bb3166cU, 0xa970b999U, 0x119448faU, 0x47e96422U,
 572         0xa8fc8cc4U, 0xa0f03f1aU, 0x567d2cd8U, 0x223390efU,
 573         0x87494ec7U, 0xd938d1c1U, 0x8ccaa2feU, 0x98d40b36U,
 574         0xa6f581cfU, 0xa57ade28U, 0xdab78e26U, 0x3fadbfa4U,
 575         0x2c3a9de4U, 0x5078920dU, 0x6a5fcc9bU, 0x547e4662U,
 576         0xf68d13c2U, 0x90d8b8e8U, 0x2e39f75eU, 0x82c3aff5U,
 577         0x9f5d80beU, 0x69d0937cU, 0x6fd52da9U, 0xcf2512b3U,
 578         0xc8ac993bU, 0x10187da7U, 0xe89c636eU, 0xdb3bbb7bU,
 579         0xcd267809U, 0x6e5918f4U, 0xec9ab701U, 0x834f9aa8U,
 580         0xe6956e65U, 0xaaffe67eU, 0x21bccf08U, 0xef15e8e6U,
 581         0xbae79bd9U, 0x4a6f36ceU, 0xea9f09d4U, 0x29b07cd6U,
 582         0x31a4b2afU, 0x2a3f2331U, 0xc6a59430U, 0x35a266c0U,
 583         0x744ebc37U, 0xfc82caa6U, 0xe090d0b0U, 0x33a7d815U,
 584         0xf104984aU, 0x41ecdaf7U, 0x7fcd500eU, 0x1791f62fU,
 585         0x764dd68dU, 0x43efb04dU, 0xccaa4d54U, 0xe49604dfU,
 586         0x9ed1b5e3U, 0x4c6a881bU, 0xc12c1fb8U, 0x4665517fU,
 587         0x9d5eea04U, 0x018c355dU, 0xfa877473U, 0xfb0b412eU,
 588         0xb3671d5aU, 0x92dbd252U, 0xe9105633U, 0x6dd64713U,
 589         0x9ad7618cU, 0x37a10c7aU, 0x59f8148eU, 0xeb133c89U,
 590         0xcea927eeU, 0xb761c935U, 0xe11ce5edU, 0x7a47b13cU,
 591         0x9cd2df59U, 0x55f2733fU, 0x1814ce79U, 0x73c737bfU,
 592         0x53f7cdeaU, 0x5ffdaa5bU, 0xdf3d6f14U, 0x7844db86U,
 593         0xcaaff381U, 0xb968c43eU, 0x3824342cU, 0xc2a3405fU,
 594         0x161dc372U, 0xbce2250cU, 0x283c498bU, 0xff0d9541U,
 595         0x39a80171U, 0x080cb3deU, 0xd8b4e49cU, 0x6456c190U,
 596         0x7bcb8461U, 0xd532b670U, 0x486c5c74U, 0xd0b85742U
 597 };
 598 
 599 static const uint32_t Td1[256] =
 600 {
 601         0x5051f4a7U, 0x537e4165U, 0xc31a17a4U, 0x963a275eU,
 602         0xcb3bab6bU, 0xf11f9d45U, 0xabacfa58U, 0x934be303U,
 603         0x552030faU, 0xf6ad766dU, 0x9188cc76U, 0x25f5024cU,
 604         0xfc4fe5d7U, 0xd7c52acbU, 0x80263544U, 0x8fb562a3U,
 605         0x49deb15aU, 0x6725ba1bU, 0x9845ea0eU, 0xe15dfec0U,
 606         0x02c32f75U, 0x12814cf0U, 0xa38d4697U, 0xc66bd3f9U,
 607         0xe7038f5fU, 0x9515929cU, 0xebbf6d7aU, 0xda955259U,
 608         0x2dd4be83U, 0xd3587421U, 0x2949e069U, 0x448ec9c8U,
 609         0x6a75c289U, 0x78f48e79U, 0x6b99583eU, 0xdd27b971U,
 610         0xb6bee14fU, 0x17f088adU, 0x66c920acU, 0xb47dce3aU,
 611         0x1863df4aU, 0x82e51a31U, 0x60975133U, 0x4562537fU,
 612         0xe0b16477U, 0x84bb6baeU, 0x1cfe81a0U, 0x94f9082bU,
 613         0x58704868U, 0x198f45fdU, 0x8794de6cU, 0xb7527bf8U,
 614         0x23ab73d3U, 0xe2724b02U, 0x57e31f8fU, 0x2a6655abU,
 615         0x07b2eb28U, 0x032fb5c2U, 0x9a86c57bU, 0xa5d33708U,
 616         0xf2302887U, 0xb223bfa5U, 0xba02036aU, 0x5ced1682U,
 617         0x2b8acf1cU, 0x92a779b4U, 0xf0f307f2U, 0xa14e69e2U,
 618         0xcd65daf4U, 0xd50605beU, 0x1fd13462U, 0x8ac4a6feU,
 619         0x9d342e53U, 0xa0a2f355U, 0x32058ae1U, 0x75a4f6ebU,
 620         0x390b83ecU, 0xaa4060efU, 0x065e719fU, 0x51bd6e10U,
 621         0xf93e218aU, 0x3d96dd06U, 0xaedd3e05U, 0x464de6bdU,
 622         0xb591548dU, 0x0571c45dU, 0x6f0406d4U, 0xff605015U,
 623         0x241998fbU, 0x97d6bde9U, 0xcc894043U, 0x7767d99eU,
 624         0xbdb0e842U, 0x8807898bU, 0x38e7195bU, 0xdb79c8eeU,
 625         0x47a17c0aU, 0xe97c420fU, 0xc9f8841eU, 0x00000000U,
 626         0x83098086U, 0x48322bedU, 0xac1e1170U, 0x4e6c5a72U,
 627         0xfbfd0effU, 0x560f8538U, 0x1e3daed5U, 0x27362d39U,
 628         0x640a0fd9U, 0x21685ca6U, 0xd19b5b54U, 0x3a24362eU,
 629         0xb10c0a67U, 0x0f9357e7U, 0xd2b4ee96U, 0x9e1b9b91U,
 630         0x4f80c0c5U, 0xa261dc20U, 0x695a774bU, 0x161c121aU,
 631         0x0ae293baU, 0xe5c0a02aU, 0x433c22e0U, 0x1d121b17U,
 632         0x0b0e090dU, 0xadf28bc7U, 0xb92db6a8U, 0xc8141ea9U,
 633         0x8557f119U, 0x4caf7507U, 0xbbee99ddU, 0xfda37f60U,
 634         0x9ff70126U, 0xbc5c72f5U, 0xc544663bU, 0x345bfb7eU,
 635         0x768b4329U, 0xdccb23c6U, 0x68b6edfcU, 0x63b8e4f1U,
 636         0xcad731dcU, 0x10426385U, 0x40139722U, 0x2084c611U,
 637         0x7d854a24U, 0xf8d2bb3dU, 0x11aef932U, 0x6dc729a1U,
 638         0x4b1d9e2fU, 0xf3dcb230U, 0xec0d8652U, 0xd077c1e3U,
 639         0x6c2bb316U, 0x99a970b9U, 0xfa119448U, 0x2247e964U,
 640         0xc4a8fc8cU, 0x1aa0f03fU, 0xd8567d2cU, 0xef223390U,
 641         0xc787494eU, 0xc1d938d1U, 0xfe8ccaa2U, 0x3698d40bU,
 642         0xcfa6f581U, 0x28a57adeU, 0x26dab78eU, 0xa43fadbfU,
 643         0xe42c3a9dU, 0x0d507892U, 0x9b6a5fccU, 0x62547e46U,
 644         0xc2f68d13U, 0xe890d8b8U, 0x5e2e39f7U, 0xf582c3afU,
 645         0xbe9f5d80U, 0x7c69d093U, 0xa96fd52dU, 0xb3cf2512U,
 646         0x3bc8ac99U, 0xa710187dU, 0x6ee89c63U, 0x7bdb3bbbU,
 647         0x09cd2678U, 0xf46e5918U, 0x01ec9ab7U, 0xa8834f9aU,
 648         0x65e6956eU, 0x7eaaffe6U, 0x0821bccfU, 0xe6ef15e8U,
 649         0xd9bae79bU, 0xce4a6f36U, 0xd4ea9f09U, 0xd629b07cU,
 650         0xaf31a4b2U, 0x312a3f23U, 0x30c6a594U, 0xc035a266U,
 651         0x37744ebcU, 0xa6fc82caU, 0xb0e090d0U, 0x1533a7d8U,
 652         0x4af10498U, 0xf741ecdaU, 0x0e7fcd50U, 0x2f1791f6U,
 653         0x8d764dd6U, 0x4d43efb0U, 0x54ccaa4dU, 0xdfe49604U,
 654         0xe39ed1b5U, 0x1b4c6a88U, 0xb8c12c1fU, 0x7f466551U,
 655         0x049d5eeaU, 0x5d018c35U, 0x73fa8774U, 0x2efb0b41U,
 656         0x5ab3671dU, 0x5292dbd2U, 0x33e91056U, 0x136dd647U,
 657         0x8c9ad761U, 0x7a37a10cU, 0x8e59f814U, 0x89eb133cU,
 658         0xeecea927U, 0x35b761c9U, 0xede11ce5U, 0x3c7a47b1U,
 659         0x599cd2dfU, 0x3f55f273U, 0x791814ceU, 0xbf73c737U,
 660         0xea53f7cdU, 0x5b5ffdaaU, 0x14df3d6fU, 0x867844dbU,
 661         0x81caaff3U, 0x3eb968c4U, 0x2c382434U, 0x5fc2a340U,
 662         0x72161dc3U, 0x0cbce225U, 0x8b283c49U, 0x41ff0d95U,
 663         0x7139a801U, 0xde080cb3U, 0x9cd8b4e4U, 0x906456c1U,
 664         0x617bcb84U, 0x70d532b6U, 0x74486c5cU, 0x42d0b857U
 665 };
 666 
 667 static const uint32_t Td2[256] =
 668 {
 669         0xa75051f4U, 0x65537e41U, 0xa4c31a17U, 0x5e963a27U,
 670         0x6bcb3babU, 0x45f11f9dU, 0x58abacfaU, 0x03934be3U,
 671         0xfa552030U, 0x6df6ad76U, 0x769188ccU, 0x4c25f502U,
 672         0xd7fc4fe5U, 0xcbd7c52aU, 0x44802635U, 0xa38fb562U,
 673         0x5a49deb1U, 0x1b6725baU, 0x0e9845eaU, 0xc0e15dfeU,
 674         0x7502c32fU, 0xf012814cU, 0x97a38d46U, 0xf9c66bd3U,
 675         0x5fe7038fU, 0x9c951592U, 0x7aebbf6dU, 0x59da9552U,
 676         0x832dd4beU, 0x21d35874U, 0x692949e0U, 0xc8448ec9U,
 677         0x896a75c2U, 0x7978f48eU, 0x3e6b9958U, 0x71dd27b9U,
 678         0x4fb6bee1U, 0xad17f088U, 0xac66c920U, 0x3ab47dceU,
 679         0x4a1863dfU, 0x3182e51aU, 0x33609751U, 0x7f456253U,
 680         0x77e0b164U, 0xae84bb6bU, 0xa01cfe81U, 0x2b94f908U,
 681         0x68587048U, 0xfd198f45U, 0x6c8794deU, 0xf8b7527bU,
 682         0xd323ab73U, 0x02e2724bU, 0x8f57e31fU, 0xab2a6655U,
 683         0x2807b2ebU, 0xc2032fb5U, 0x7b9a86c5U, 0x08a5d337U,
 684         0x87f23028U, 0xa5b223bfU, 0x6aba0203U, 0x825ced16U,
 685         0x1c2b8acfU, 0xb492a779U, 0xf2f0f307U, 0xe2a14e69U,
 686         0xf4cd65daU, 0xbed50605U, 0x621fd134U, 0xfe8ac4a6U,
 687         0x539d342eU, 0x55a0a2f3U, 0xe132058aU, 0xeb75a4f6U,
 688         0xec390b83U, 0xefaa4060U, 0x9f065e71U, 0x1051bd6eU,
 689         0x8af93e21U, 0x063d96ddU, 0x05aedd3eU, 0xbd464de6U,
 690         0x8db59154U, 0x5d0571c4U, 0xd46f0406U, 0x15ff6050U,
 691         0xfb241998U, 0xe997d6bdU, 0x43cc8940U, 0x9e7767d9U,
 692         0x42bdb0e8U, 0x8b880789U, 0x5b38e719U, 0xeedb79c8U,
 693         0x0a47a17cU, 0x0fe97c42U, 0x1ec9f884U, 0x00000000U,
 694         0x86830980U, 0xed48322bU, 0x70ac1e11U, 0x724e6c5aU,
 695         0xfffbfd0eU, 0x38560f85U, 0xd51e3daeU, 0x3927362dU,
 696         0xd9640a0fU, 0xa621685cU, 0x54d19b5bU, 0x2e3a2436U,
 697         0x67b10c0aU, 0xe70f9357U, 0x96d2b4eeU, 0x919e1b9bU,
 698         0xc54f80c0U, 0x20a261dcU, 0x4b695a77U, 0x1a161c12U,
 699         0xba0ae293U, 0x2ae5c0a0U, 0xe0433c22U, 0x171d121bU,
 700         0x0d0b0e09U, 0xc7adf28bU, 0xa8b92db6U, 0xa9c8141eU,
 701         0x198557f1U, 0x074caf75U, 0xddbbee99U, 0x60fda37fU,
 702         0x269ff701U, 0xf5bc5c72U, 0x3bc54466U, 0x7e345bfbU,
 703         0x29768b43U, 0xc6dccb23U, 0xfc68b6edU, 0xf163b8e4U,
 704         0xdccad731U, 0x85104263U, 0x22401397U, 0x112084c6U,
 705         0x247d854aU, 0x3df8d2bbU, 0x3211aef9U, 0xa16dc729U,
 706         0x2f4b1d9eU, 0x30f3dcb2U, 0x52ec0d86U, 0xe3d077c1U,
 707         0x166c2bb3U, 0xb999a970U, 0x48fa1194U, 0x642247e9U,
 708         0x8cc4a8fcU, 0x3f1aa0f0U, 0x2cd8567dU, 0x90ef2233U,
 709         0x4ec78749U, 0xd1c1d938U, 0xa2fe8ccaU, 0x0b3698d4U,
 710         0x81cfa6f5U, 0xde28a57aU, 0x8e26dab7U, 0xbfa43fadU,
 711         0x9de42c3aU, 0x920d5078U, 0xcc9b6a5fU, 0x4662547eU,
 712         0x13c2f68dU, 0xb8e890d8U, 0xf75e2e39U, 0xaff582c3U,
 713         0x80be9f5dU, 0x937c69d0U, 0x2da96fd5U, 0x12b3cf25U,
 714         0x993bc8acU, 0x7da71018U, 0x636ee89cU, 0xbb7bdb3bU,
 715         0x7809cd26U, 0x18f46e59U, 0xb701ec9aU, 0x9aa8834fU,
 716         0x6e65e695U, 0xe67eaaffU, 0xcf0821bcU, 0xe8e6ef15U,
 717         0x9bd9bae7U, 0x36ce4a6fU, 0x09d4ea9fU, 0x7cd629b0U,
 718         0xb2af31a4U, 0x23312a3fU, 0x9430c6a5U, 0x66c035a2U,
 719         0xbc37744eU, 0xcaa6fc82U, 0xd0b0e090U, 0xd81533a7U,
 720         0x984af104U, 0xdaf741ecU, 0x500e7fcdU, 0xf62f1791U,
 721         0xd68d764dU, 0xb04d43efU, 0x4d54ccaaU, 0x04dfe496U,
 722         0xb5e39ed1U, 0x881b4c6aU, 0x1fb8c12cU, 0x517f4665U,
 723         0xea049d5eU, 0x355d018cU, 0x7473fa87U, 0x412efb0bU,
 724         0x1d5ab367U, 0xd25292dbU, 0x5633e910U, 0x47136dd6U,
 725         0x618c9ad7U, 0x0c7a37a1U, 0x148e59f8U, 0x3c89eb13U,
 726         0x27eecea9U, 0xc935b761U, 0xe5ede11cU, 0xb13c7a47U,
 727         0xdf599cd2U, 0x733f55f2U, 0xce791814U, 0x37bf73c7U,
 728         0xcdea53f7U, 0xaa5b5ffdU, 0x6f14df3dU, 0xdb867844U,
 729         0xf381caafU, 0xc43eb968U, 0x342c3824U, 0x405fc2a3U,
 730         0xc372161dU, 0x250cbce2U, 0x498b283cU, 0x9541ff0dU,
 731         0x017139a8U, 0xb3de080cU, 0xe49cd8b4U, 0xc1906456U,
 732         0x84617bcbU, 0xb670d532U, 0x5c74486cU, 0x5742d0b8U
 733 };
 734 
 735 static const uint32_t Td3[256] =
 736 {
 737         0xf4a75051U, 0x4165537eU, 0x17a4c31aU, 0x275e963aU,
 738         0xab6bcb3bU, 0x9d45f11fU, 0xfa58abacU, 0xe303934bU,
 739         0x30fa5520U, 0x766df6adU, 0xcc769188U, 0x024c25f5U,
 740         0xe5d7fc4fU, 0x2acbd7c5U, 0x35448026U, 0x62a38fb5U,
 741         0xb15a49deU, 0xba1b6725U, 0xea0e9845U, 0xfec0e15dU,
 742         0x2f7502c3U, 0x4cf01281U, 0x4697a38dU, 0xd3f9c66bU,
 743         0x8f5fe703U, 0x929c9515U, 0x6d7aebbfU, 0x5259da95U,
 744         0xbe832dd4U, 0x7421d358U, 0xe0692949U, 0xc9c8448eU,
 745         0xc2896a75U, 0x8e7978f4U, 0x583e6b99U, 0xb971dd27U,
 746         0xe14fb6beU, 0x88ad17f0U, 0x20ac66c9U, 0xce3ab47dU,
 747         0xdf4a1863U, 0x1a3182e5U, 0x51336097U, 0x537f4562U,
 748         0x6477e0b1U, 0x6bae84bbU, 0x81a01cfeU, 0x082b94f9U,
 749         0x48685870U, 0x45fd198fU, 0xde6c8794U, 0x7bf8b752U,
 750         0x73d323abU, 0x4b02e272U, 0x1f8f57e3U, 0x55ab2a66U,
 751         0xeb2807b2U, 0xb5c2032fU, 0xc57b9a86U, 0x3708a5d3U,
 752         0x2887f230U, 0xbfa5b223U, 0x036aba02U, 0x16825cedU,
 753         0xcf1c2b8aU, 0x79b492a7U, 0x07f2f0f3U, 0x69e2a14eU,
 754         0xdaf4cd65U, 0x05bed506U, 0x34621fd1U, 0xa6fe8ac4U,
 755         0x2e539d34U, 0xf355a0a2U, 0x8ae13205U, 0xf6eb75a4U,
 756         0x83ec390bU, 0x60efaa40U, 0x719f065eU, 0x6e1051bdU,
 757         0x218af93eU, 0xdd063d96U, 0x3e05aeddU, 0xe6bd464dU,
 758         0x548db591U, 0xc45d0571U, 0x06d46f04U, 0x5015ff60U,
 759         0x98fb2419U, 0xbde997d6U, 0x4043cc89U, 0xd99e7767U,
 760         0xe842bdb0U, 0x898b8807U, 0x195b38e7U, 0xc8eedb79U,
 761         0x7c0a47a1U, 0x420fe97cU, 0x841ec9f8U, 0x00000000U,
 762         0x80868309U, 0x2bed4832U, 0x1170ac1eU, 0x5a724e6cU,
 763         0x0efffbfdU, 0x8538560fU, 0xaed51e3dU, 0x2d392736U,
 764         0x0fd9640aU, 0x5ca62168U, 0x5b54d19bU, 0x362e3a24U,
 765         0x0a67b10cU, 0x57e70f93U, 0xee96d2b4U, 0x9b919e1bU,
 766         0xc0c54f80U, 0xdc20a261U, 0x774b695aU, 0x121a161cU,
 767         0x93ba0ae2U, 0xa02ae5c0U, 0x22e0433cU, 0x1b171d12U,
 768         0x090d0b0eU, 0x8bc7adf2U, 0xb6a8b92dU, 0x1ea9c814U,
 769         0xf1198557U, 0x75074cafU, 0x99ddbbeeU, 0x7f60fda3U,
 770         0x01269ff7U, 0x72f5bc5cU, 0x663bc544U, 0xfb7e345bU,
 771         0x4329768bU, 0x23c6dccbU, 0xedfc68b6U, 0xe4f163b8U,
 772         0x31dccad7U, 0x63851042U, 0x97224013U, 0xc6112084U,
 773         0x4a247d85U, 0xbb3df8d2U, 0xf93211aeU, 0x29a16dc7U,
 774         0x9e2f4b1dU, 0xb230f3dcU, 0x8652ec0dU, 0xc1e3d077U,
 775         0xb3166c2bU, 0x70b999a9U, 0x9448fa11U, 0xe9642247U,
 776         0xfc8cc4a8U, 0xf03f1aa0U, 0x7d2cd856U, 0x3390ef22U,
 777         0x494ec787U, 0x38d1c1d9U, 0xcaa2fe8cU, 0xd40b3698U,
 778         0xf581cfa6U, 0x7ade28a5U, 0xb78e26daU, 0xadbfa43fU,
 779         0x3a9de42cU, 0x78920d50U, 0x5fcc9b6aU, 0x7e466254U,
 780         0x8d13c2f6U, 0xd8b8e890U, 0x39f75e2eU, 0xc3aff582U,
 781         0x5d80be9fU, 0xd0937c69U, 0xd52da96fU, 0x2512b3cfU,
 782         0xac993bc8U, 0x187da710U, 0x9c636ee8U, 0x3bbb7bdbU,
 783         0x267809cdU, 0x5918f46eU, 0x9ab701ecU, 0x4f9aa883U,
 784         0x956e65e6U, 0xffe67eaaU, 0xbccf0821U, 0x15e8e6efU,
 785         0xe79bd9baU, 0x6f36ce4aU, 0x9f09d4eaU, 0xb07cd629U,
 786         0xa4b2af31U, 0x3f23312aU, 0xa59430c6U, 0xa266c035U,
 787         0x4ebc3774U, 0x82caa6fcU, 0x90d0b0e0U, 0xa7d81533U,
 788         0x04984af1U, 0xecdaf741U, 0xcd500e7fU, 0x91f62f17U,
 789         0x4dd68d76U, 0xefb04d43U, 0xaa4d54ccU, 0x9604dfe4U,
 790         0xd1b5e39eU, 0x6a881b4cU, 0x2c1fb8c1U, 0x65517f46U,
 791         0x5eea049dU, 0x8c355d01U, 0x877473faU, 0x0b412efbU,
 792         0x671d5ab3U, 0xdbd25292U, 0x105633e9U, 0xd647136dU,
 793         0xd7618c9aU, 0xa10c7a37U, 0xf8148e59U, 0x133c89ebU,
 794         0xa927eeceU, 0x61c935b7U, 0x1ce5ede1U, 0x47b13c7aU,
 795         0xd2df599cU, 0xf2733f55U, 0x14ce7918U, 0xc737bf73U,
 796         0xf7cdea53U, 0xfdaa5b5fU, 0x3d6f14dfU, 0x44db8678U,
 797         0xaff381caU, 0x68c43eb9U, 0x24342c38U, 0xa3405fc2U,
 798         0x1dc37216U, 0xe2250cbcU, 0x3c498b28U, 0x0d9541ffU,
 799         0xa8017139U, 0x0cb3de08U, 0xb4e49cd8U, 0x56c19064U,
 800         0xcb84617bU, 0x32b670d5U, 0x6c5c7448U, 0xb85742d0U
 801 };
 802 
 803 #ifndef sun4u
 804 
 805 static const uint32_t Td4[256] =
 806 {
 807         0x52525252U, 0x09090909U, 0x6a6a6a6aU, 0xd5d5d5d5U,
 808         0x30303030U, 0x36363636U, 0xa5a5a5a5U, 0x38383838U,
 809         0xbfbfbfbfU, 0x40404040U, 0xa3a3a3a3U, 0x9e9e9e9eU,
 810         0x81818181U, 0xf3f3f3f3U, 0xd7d7d7d7U, 0xfbfbfbfbU,
 811         0x7c7c7c7cU, 0xe3e3e3e3U, 0x39393939U, 0x82828282U,
 812         0x9b9b9b9bU, 0x2f2f2f2fU, 0xffffffffU, 0x87878787U,
 813         0x34343434U, 0x8e8e8e8eU, 0x43434343U, 0x44444444U,
 814         0xc4c4c4c4U, 0xdedededeU, 0xe9e9e9e9U, 0xcbcbcbcbU,
 815         0x54545454U, 0x7b7b7b7bU, 0x94949494U, 0x32323232U,
 816         0xa6a6a6a6U, 0xc2c2c2c2U, 0x23232323U, 0x3d3d3d3dU,
 817         0xeeeeeeeeU, 0x4c4c4c4cU, 0x95959595U, 0x0b0b0b0bU,
 818         0x42424242U, 0xfafafafaU, 0xc3c3c3c3U, 0x4e4e4e4eU,
 819         0x08080808U, 0x2e2e2e2eU, 0xa1a1a1a1U, 0x66666666U,
 820         0x28282828U, 0xd9d9d9d9U, 0x24242424U, 0xb2b2b2b2U,
 821         0x76767676U, 0x5b5b5b5bU, 0xa2a2a2a2U, 0x49494949U,
 822         0x6d6d6d6dU, 0x8b8b8b8bU, 0xd1d1d1d1U, 0x25252525U,
 823         0x72727272U, 0xf8f8f8f8U, 0xf6f6f6f6U, 0x64646464U,
 824         0x86868686U, 0x68686868U, 0x98989898U, 0x16161616U,
 825         0xd4d4d4d4U, 0xa4a4a4a4U, 0x5c5c5c5cU, 0xccccccccU,
 826         0x5d5d5d5dU, 0x65656565U, 0xb6b6b6b6U, 0x92929292U,
 827         0x6c6c6c6cU, 0x70707070U, 0x48484848U, 0x50505050U,
 828         0xfdfdfdfdU, 0xededededU, 0xb9b9b9b9U, 0xdadadadaU,
 829         0x5e5e5e5eU, 0x15151515U, 0x46464646U, 0x57575757U,
 830         0xa7a7a7a7U, 0x8d8d8d8dU, 0x9d9d9d9dU, 0x84848484U,
 831         0x90909090U, 0xd8d8d8d8U, 0xababababU, 0x00000000U,
 832         0x8c8c8c8cU, 0xbcbcbcbcU, 0xd3d3d3d3U, 0x0a0a0a0aU,
 833         0xf7f7f7f7U, 0xe4e4e4e4U, 0x58585858U, 0x05050505U,
 834         0xb8b8b8b8U, 0xb3b3b3b3U, 0x45454545U, 0x06060606U,
 835         0xd0d0d0d0U, 0x2c2c2c2cU, 0x1e1e1e1eU, 0x8f8f8f8fU,
 836         0xcacacacaU, 0x3f3f3f3fU, 0x0f0f0f0fU, 0x02020202U,
 837         0xc1c1c1c1U, 0xafafafafU, 0xbdbdbdbdU, 0x03030303U,
 838         0x01010101U, 0x13131313U, 0x8a8a8a8aU, 0x6b6b6b6bU,
 839         0x3a3a3a3aU, 0x91919191U, 0x11111111U, 0x41414141U,
 840         0x4f4f4f4fU, 0x67676767U, 0xdcdcdcdcU, 0xeaeaeaeaU,
 841         0x97979797U, 0xf2f2f2f2U, 0xcfcfcfcfU, 0xcecececeU,
 842         0xf0f0f0f0U, 0xb4b4b4b4U, 0xe6e6e6e6U, 0x73737373U,
 843         0x96969696U, 0xacacacacU, 0x74747474U, 0x22222222U,
 844         0xe7e7e7e7U, 0xadadadadU, 0x35353535U, 0x85858585U,
 845         0xe2e2e2e2U, 0xf9f9f9f9U, 0x37373737U, 0xe8e8e8e8U,
 846         0x1c1c1c1cU, 0x75757575U, 0xdfdfdfdfU, 0x6e6e6e6eU,
 847         0x47474747U, 0xf1f1f1f1U, 0x1a1a1a1aU, 0x71717171U,
 848         0x1d1d1d1dU, 0x29292929U, 0xc5c5c5c5U, 0x89898989U,
 849         0x6f6f6f6fU, 0xb7b7b7b7U, 0x62626262U, 0x0e0e0e0eU,
 850         0xaaaaaaaaU, 0x18181818U, 0xbebebebeU, 0x1b1b1b1bU,
 851         0xfcfcfcfcU, 0x56565656U, 0x3e3e3e3eU, 0x4b4b4b4bU,
 852         0xc6c6c6c6U, 0xd2d2d2d2U, 0x79797979U, 0x20202020U,
 853         0x9a9a9a9aU, 0xdbdbdbdbU, 0xc0c0c0c0U, 0xfefefefeU,
 854         0x78787878U, 0xcdcdcdcdU, 0x5a5a5a5aU, 0xf4f4f4f4U,
 855         0x1f1f1f1fU, 0xddddddddU, 0xa8a8a8a8U, 0x33333333U,
 856         0x88888888U, 0x07070707U, 0xc7c7c7c7U, 0x31313131U,
 857         0xb1b1b1b1U, 0x12121212U, 0x10101010U, 0x59595959U,
 858         0x27272727U, 0x80808080U, 0xececececU, 0x5f5f5f5fU,
 859         0x60606060U, 0x51515151U, 0x7f7f7f7fU, 0xa9a9a9a9U,
 860         0x19191919U, 0xb5b5b5b5U, 0x4a4a4a4aU, 0x0d0d0d0dU,
 861         0x2d2d2d2dU, 0xe5e5e5e5U, 0x7a7a7a7aU, 0x9f9f9f9fU,
 862         0x93939393U, 0xc9c9c9c9U, 0x9c9c9c9cU, 0xefefefefU,
 863         0xa0a0a0a0U, 0xe0e0e0e0U, 0x3b3b3b3bU, 0x4d4d4d4dU,
 864         0xaeaeaeaeU, 0x2a2a2a2aU, 0xf5f5f5f5U, 0xb0b0b0b0U,
 865         0xc8c8c8c8U, 0xebebebebU, 0xbbbbbbbbU, 0x3c3c3c3cU,
 866         0x83838383U, 0x53535353U, 0x99999999U, 0x61616161U,
 867         0x17171717U, 0x2b2b2b2bU, 0x04040404U, 0x7e7e7e7eU,
 868         0xbabababaU, 0x77777777U, 0xd6d6d6d6U, 0x26262626U,
 869         0xe1e1e1e1U, 0x69696969U, 0x14141414U, 0x63636363U,
 870         0x55555555U, 0x21212121U, 0x0c0c0c0cU, 0x7d7d7d7dU
 871 };
 872 
 873 #endif /* !sun4u */
 874 
 875 /* Rcon is Round Constant; used for encryption key expansion */
 876 static const uint32_t rcon[RC_LENGTH] =
 877 {
 878         /* for 128-bit blocks, Rijndael never uses more than 10 rcon values */
 879         0x01000000, 0x02000000, 0x04000000, 0x08000000,
 880         0x10000000, 0x20000000, 0x40000000, 0x80000000,
 881         0x1B000000, 0x36000000
 882 };
 883 
 884 
 885 /*
 886  * Expand the cipher key into the encryption key schedule.
 887  *
 888  * Return the number of rounds for the given cipher key size.
 889  * The size of the key schedule depends on the number of rounds
 890  * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
 891  *
 892  * Parameters:
 893  * rk           AES key schedule 32-bit array to be initialized
 894  * cipherKey    User key
 895  * keyBits      AES key size (128, 192, or 256 bits)
 896  */
 897 static int
 898 rijndael_key_setup_enc_raw(uint32_t rk[], const uint32_t cipherKey[],
 899     int keyBits)
 900 {
 901         int             i = 0;
 902         uint32_t        temp;
 903 
 904         rk[0] = cipherKey[0];
 905         rk[1] = cipherKey[1];
 906         rk[2] = cipherKey[2];
 907         rk[3] = cipherKey[3];
 908 
 909         if (keyBits == 128) {
 910                 for (;;) {
 911                         temp  = rk[3];
 912                         rk[4] = rk[0] ^
 913                             (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
 914                             (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
 915                             (Te4[temp & 0xff] & 0x0000ff00) ^
 916                             (Te4[temp >> 24] & 0x000000ff) ^
 917                             rcon[i];
 918                         rk[5] = rk[1] ^ rk[4];
 919                         rk[6] = rk[2] ^ rk[5];
 920                         rk[7] = rk[3] ^ rk[6];
 921 
 922                         if (++i == 10) {
 923                                 return (10);
 924                         }
 925                         rk += 4;
 926                 }
 927         }
 928 
 929         rk[4] = cipherKey[4];
 930         rk[5] = cipherKey[5];
 931 
 932         if (keyBits == 192) {
 933                 for (;;) {
 934                         temp = rk[5];
 935                         rk[6] = rk[0] ^
 936                             (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
 937                             (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
 938                             (Te4[temp & 0xff] & 0x0000ff00) ^
 939                             (Te4[temp >> 24] & 0x000000ff) ^
 940                             rcon[i];
 941                         rk[7] = rk[1] ^ rk[6];
 942                         rk[8] = rk[2] ^ rk[7];
 943                         rk[9] = rk[3] ^ rk[8];
 944 
 945                         if (++i == 8) {
 946                                 return (12);
 947                         }
 948 
 949                         rk[10] = rk[4] ^ rk[9];
 950                         rk[11] = rk[5] ^ rk[10];
 951                         rk += 6;
 952                 }
 953         }
 954 
 955         rk[6] = cipherKey[6];
 956         rk[7] = cipherKey[7];
 957 
 958         if (keyBits == 256) {
 959                 for (;;) {
 960                         temp = rk[7];
 961                         rk[8] = rk[0] ^
 962                             (Te4[(temp >> 16) & 0xff] & 0xff000000) ^
 963                             (Te4[(temp >> 8) & 0xff] & 0x00ff0000) ^
 964                             (Te4[temp & 0xff] & 0x0000ff00) ^
 965                             (Te4[temp >> 24] & 0x000000ff) ^
 966                             rcon[i];
 967                         rk[9] = rk[1] ^ rk[8];
 968                         rk[10] = rk[2] ^ rk[9];
 969                         rk[11] = rk[3] ^ rk[10];
 970 
 971                         if (++i == 7) {
 972                                 return (14);
 973                         }
 974                         temp = rk[11];
 975                         rk[12] = rk[4] ^
 976                             (Te4[temp >> 24] & 0xff000000) ^
 977                             (Te4[(temp >> 16) & 0xff] & 0x00ff0000) ^
 978                             (Te4[(temp >> 8) & 0xff] & 0x0000ff00) ^
 979                             (Te4[temp & 0xff] & 0x000000ff);
 980                         rk[13] = rk[5] ^ rk[12];
 981                         rk[14] = rk[6] ^ rk[13];
 982                         rk[15] = rk[7] ^ rk[14];
 983 
 984                         rk += 8;
 985                 }
 986         }
 987 
 988         return (0);
 989 }
 990 #endif  /* !__amd64 */
 991 
 992 
 993 #ifdef  sun4u
 994 
 995 /*
 996  * Expand the cipher key into the encryption key schedule.
 997  * by the sun4u optimized assembly implementation.
 998  *
 999  * Return the number of rounds for the given cipher key size.
1000  * The size of the key schedule depends on the number of rounds
1001  * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
1002  *
1003  * Parameters:
1004  * rk           AES key schedule 64-bit array to be initialized
1005  * cipherKey    User key
1006  * keyBits      AES key size (128, 192, or 256 bits)
1007  */
1008 static int
1009 rijndael_key_setup_enc(uint64_t rk[], const uint32_t cipherKey[], int keyBits)
1010 {
1011         uint32_t        rk1[4 * (MAX_AES_NR + 1)];
1012         uint64_t        *rk64 = (uint64_t *)rk;
1013         uint32_t        *rkt;
1014         uint64_t        t;
1015         int             i, Nr;
1016 
1017         Nr = rijndael_key_setup_enc_raw(rk1, cipherKey, keyBits);
1018 
1019         for (i = 0; i < 4 * Nr; i++) {
1020                 t = (uint64_t)(rk1[i]);
1021                 rk64[i] = ((t & 0xff000000) << 11) |
1022                     ((t & 0xff0000) << 8) |
1023                     ((t & 0xffff) << 3);
1024         }
1025 
1026         rkt = (uint32_t *)(&(rk64[4 * Nr]));
1027 
1028         for (i = 0; i < 4; i++) {
1029                 rkt[i] = rk1[4 * Nr+i];
1030         }
1031 
1032         return (Nr);
1033 }
1034 
1035 
1036 /*
1037  * Expand the cipher key into the decryption key schedule as used
1038  * by the sun4u optimized assembly implementation.
1039  *
1040  * Return the number of rounds for the given cipher key size.
1041  * The size of the key schedule depends on the number of rounds
1042  * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
1043  *
1044  * Parameters:
1045  * rk           AES key schedule 32-bit array to be initialized
1046  * cipherKey    User key
1047  * keyBits      AES key size (128, 192, or 256 bits)
1048  */
1049 static int
1050 rijndael_key_setup_dec_raw(uint32_t rk[], const uint32_t cipherKey[],
1051     int keyBits)
1052 {
1053         int             Nr, i;
1054         uint32_t        temp;
1055 
1056         /* expand the cipher key: */
1057         Nr = rijndael_key_setup_enc_raw(rk, cipherKey, keyBits);
1058 
1059         /* invert the order of the round keys: */
1060 
1061         for (i = 0; i < 2 * Nr + 2; i++) {
1062                 temp = rk[i];
1063                 rk[i] = rk[4 * Nr - i + 3];
1064                 rk[4 * Nr - i + 3] = temp;
1065         }
1066 
1067         /*
1068          * apply the inverse MixColumn transform to all
1069          * round keys but the first and the last:
1070          */
1071         for (i = 1; i < Nr; i++) {
1072                 rk += 4;
1073                 rk[0] = Td0[Te4[rk[0] >> 24] & 0xff] ^
1074                     Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
1075                     Td2[Te4[(rk[0] >>  8) & 0xff] & 0xff] ^
1076                     Td3[Te4[rk[0] & 0xff] & 0xff];
1077                 rk[1] = Td0[Te4[rk[1] >> 24] & 0xff] ^
1078                     Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
1079                     Td2[Te4[(rk[1] >> 8) & 0xff] & 0xff] ^
1080                     Td3[Te4[rk[1] & 0xff] & 0xff];
1081                 rk[2] = Td0[Te4[rk[2] >> 24] & 0xff] ^
1082                     Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
1083                     Td2[Te4[(rk[2] >> 8) & 0xff] & 0xff] ^
1084                     Td3[Te4[rk[2] & 0xff] & 0xff];
1085                 rk[3] = Td0[Te4[rk[3] >> 24] & 0xff] ^
1086                     Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
1087                     Td2[Te4[(rk[3] >> 8) & 0xff] & 0xff] ^
1088                     Td3[Te4[rk[3] & 0xff] & 0xff];
1089         }
1090 
1091         return (Nr);
1092 }
1093 
1094 
1095 /*
1096  * The size of the key schedule depends on the number of rounds
1097  * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
1098  *
1099  * Parameters:
1100  * rk           AES key schedule 64-bit array to be initialized
1101  * cipherKey    User key
1102  * keyBits      AES key size (128, 192, or 256 bits)
1103  */
1104 static int
1105 rijndael_key_setup_dec(uint64_t rk[], const uint32_t cipherKey[], int keyBits)
1106 {
1107         uint32_t        rk1[4 * (MAX_AES_NR + 1)];
1108         uint64_t        *rk64 = (uint64_t *)rk;
1109         uint32_t        *rkt;
1110         uint64_t        t;
1111         int             i, Nr;
1112 
1113         Nr = rijndael_key_setup_dec_raw(rk1, cipherKey, keyBits);
1114         for (i = 0; i < 4 * Nr; i++) {
1115                 t = (uint64_t)(rk1[i]);
1116                 rk64[i] = ((t & 0xff000000) << 11) |
1117                     ((t & 0xff0000) << 8) |
1118                     ((t & 0xffff) << 3);
1119         }
1120 
1121         rkt = (uint32_t *)(&(rk64[4 * Nr]));
1122 
1123         for (i = 0; i < 4; i++) {
1124                 rkt[i] = rk1[4 * Nr + i];
1125         }
1126 
1127         return (Nr);
1128 }
1129 
1130 
1131 /*
1132  * Expand the 64-bit AES cipher key array into the encryption and decryption
1133  * key schedules.
1134  *
1135  * Parameters:
1136  * key          AES key schedule to be initialized
1137  * keyarr32     User key
1138  * keyBits      AES key size (128, 192, or 256 bits)
1139  */
1140 static void
1141 aes_setupkeys(aes_key_t *key, const uint32_t *keyarr32, int keybits)
1142 {
1143         key->nr = rijndael_key_setup_enc(&(key->encr_ks.ks64[0]), keyarr32,
1144             keybits);
1145         key->nr = rijndael_key_setup_dec(&(key->decr_ks.ks64[0]), keyarr32,
1146             keybits);
1147         key->type = AES_64BIT_KS;
1148 }
1149 
1150 
1151 #elif defined(__amd64)
1152 
1153 /*
1154  * Expand the 32-bit AES cipher key array into the encryption and decryption
1155  * key schedules.
1156  *
1157  * Parameters:
1158  * key          AES key schedule to be initialized
1159  * keyarr32     User key
1160  * keyBits      AES key size (128, 192, or 256 bits)
1161  */
1162 static void
1163 aes_setupkeys(aes_key_t *key, const uint32_t *keyarr32, int keybits)
1164 {
1165         AES_ACCEL_SAVESTATE(savestate);
1166         aes_accel_enter(savestate);
1167 
1168         if (intel_aes_instructions_present()) {
1169                 key->nr = rijndael_key_setup_enc_intel(&(key->encr_ks.ks32[0]),
1170                     keyarr32, keybits);
1171                 key->nr = rijndael_key_setup_dec_intel(&(key->decr_ks.ks32[0]),
1172                     keyarr32, keybits);
1173         } else {
1174                 key->nr = rijndael_key_setup_enc_amd64(&(key->encr_ks.ks32[0]),
1175                     keyarr32, keybits);
1176                 key->nr = rijndael_key_setup_dec_amd64(&(key->decr_ks.ks32[0]),
1177                     keyarr32, keybits);
1178         }
1179 
1180         aes_accel_exit(savestate);
1181         key->type = AES_32BIT_KS;
1182 }
1183 
1184 #else /* generic C implementation */
1185 
1186 /*
1187  *  Expand the cipher key into the decryption key schedule.
1188  *  Return the number of rounds for the given cipher key size.
1189  *  The size of the key schedule depends on the number of rounds
1190  *  (which can be computed from the size of the key), i.e. 4*(Nr + 1).
1191  *
1192  * Parameters:
1193  * rk           AES key schedule 32-bit array to be initialized
1194  * cipherKey    User key
1195  * keyBits      AES key size (128, 192, or 256 bits)
1196  */
1197 static int
1198 rijndael_key_setup_dec(uint32_t rk[], const uint32_t cipherKey[], int keyBits)
1199 {
1200         int      Nr, i, j;
1201         uint32_t temp;
1202 
1203         /* expand the cipher key: */
1204         Nr = rijndael_key_setup_enc_raw(rk, cipherKey, keyBits);
1205 
1206         /* invert the order of the round keys: */
1207         for (i = 0, j = 4 * Nr; i < j; i += 4, j -= 4) {
1208                 temp = rk[i];
1209                 rk[i] = rk[j];
1210                 rk[j] = temp;
1211                 temp = rk[i + 1];
1212                 rk[i + 1] = rk[j + 1];
1213                 rk[j + 1] = temp;
1214                 temp = rk[i + 2];
1215                 rk[i + 2] = rk[j + 2];
1216                 rk[j + 2] = temp;
1217                 temp = rk[i + 3];
1218                 rk[i + 3] = rk[j + 3];
1219                 rk[j + 3] = temp;
1220         }
1221 
1222         /*
1223          * apply the inverse MixColumn transform to all
1224          * round keys but the first and the last:
1225          */
1226         for (i = 1; i < Nr; i++) {
1227                 rk += 4;
1228                 rk[0] = Td0[Te4[rk[0] >> 24] & 0xff] ^
1229                     Td1[Te4[(rk[0] >> 16) & 0xff] & 0xff] ^
1230                     Td2[Te4[(rk[0] >> 8) & 0xff] & 0xff] ^
1231                     Td3[Te4[rk[0] & 0xff] & 0xff];
1232                 rk[1] = Td0[Te4[rk[1] >> 24] & 0xff] ^
1233                     Td1[Te4[(rk[1] >> 16) & 0xff] & 0xff] ^
1234                     Td2[Te4[(rk[1] >> 8) & 0xff] & 0xff] ^
1235                     Td3[Te4[rk[1] & 0xff] & 0xff];
1236                 rk[2] = Td0[Te4[rk[2] >> 24] & 0xff] ^
1237                     Td1[Te4[(rk[2] >> 16) & 0xff] & 0xff] ^
1238                     Td2[Te4[(rk[2] >> 8) & 0xff] & 0xff] ^
1239                     Td3[Te4[rk[2] & 0xff] & 0xff];
1240                 rk[3] = Td0[Te4[rk[3] >> 24] & 0xff] ^
1241                     Td1[Te4[(rk[3] >> 16) & 0xff] & 0xff] ^
1242                     Td2[Te4[(rk[3] >> 8) & 0xff] & 0xff] ^
1243                     Td3[Te4[rk[3] & 0xff] & 0xff];
1244         }
1245 
1246         return (Nr);
1247 }
1248 
1249 
1250 /*
1251  * Expand the 32-bit AES cipher key array into the encryption and decryption
1252  * key schedules.
1253  *
1254  * Parameters:
1255  * key          AES key schedule to be initialized
1256  * keyarr32     User key
1257  * keyBits      AES key size (128, 192, or 256 bits)
1258  */
1259 static void
1260 aes_setupkeys(aes_key_t *key, const uint32_t *keyarr32, int keybits)
1261 {
1262         key->nr = rijndael_key_setup_enc(&(key->encr_ks.ks32[0]), keyarr32,
1263             keybits);
1264         key->nr = rijndael_key_setup_dec(&(key->decr_ks.ks32[0]), keyarr32,
1265             keybits);
1266         key->type = AES_32BIT_KS;
1267 }
1268 
1269 
1270 /*
1271  * Encrypt one block of data. The block is assumed to be an array
1272  * of four uint32_t values, so copy for alignment (and byte-order
1273  * reversal for little endian systems might be necessary on the
1274  * input and output byte streams.
1275  * The size of the key schedule depends on the number of rounds
1276  * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
1277  *
1278  * Parameters:
1279  * rk   Key schedule, of aes_ks_t (60 32-bit integers)
1280  * Nr   Number of rounds
1281  * pt   Input block (plain text)
1282  * ct   Output block (crypto text).  Can overlap with pt
1283  */
1284 static void
1285 rijndael_encrypt(const uint32_t rk[], int Nr, const uint32_t pt[4],
1286     uint32_t ct[4])
1287 {
1288         uint32_t        s0, s1, s2, s3, t0, t1, t2, t3;
1289         int             r;
1290 
1291         /*
1292          * map byte array block to cipher state
1293          * and add initial round key:
1294          */
1295 
1296         s0 = pt[0] ^ rk[0];
1297         s1 = pt[1] ^ rk[1];
1298         s2 = pt[2] ^ rk[2];
1299         s3 = pt[3] ^ rk[3];
1300 
1301         /*
1302          * Nr - 1 full rounds:
1303          */
1304 
1305         r = Nr >> 1;
1306 
1307         for (;;) {
1308                 t0 = Te0[s0 >> 24] ^
1309                     Te1[(s1 >> 16) & 0xff] ^
1310                     Te2[(s2 >>  8) & 0xff] ^
1311                     Te3[s3 & 0xff] ^
1312                     rk[4];
1313 
1314                 t1 = Te0[s1 >> 24] ^
1315                     Te1[(s2 >> 16) & 0xff] ^
1316                     Te2[(s3 >>  8) & 0xff] ^
1317                     Te3[s0 & 0xff] ^
1318                     rk[5];
1319 
1320                 t2 = Te0[s2 >> 24] ^
1321                     Te1[(s3 >> 16) & 0xff] ^
1322                     Te2[(s0 >>  8) & 0xff] ^
1323                     Te3[s1 & 0xff] ^
1324                     rk[6];
1325 
1326                 t3 = Te0[s3 >> 24] ^
1327                     Te1[(s0 >> 16) & 0xff] ^
1328                     Te2[(s1 >>  8) & 0xff] ^
1329                     Te3[s2 & 0xff] ^
1330                     rk[7];
1331 
1332                 rk += 8;
1333 
1334                 if (--r == 0) {
1335                         break;
1336                 }
1337 
1338                 s0 = Te0[t0 >> 24] ^
1339                     Te1[(t1 >> 16) & 0xff] ^
1340                     Te2[(t2 >>  8) & 0xff] ^
1341                     Te3[t3 & 0xff] ^
1342                     rk[0];
1343 
1344                 s1 = Te0[t1 >> 24] ^
1345                     Te1[(t2 >> 16) & 0xff] ^
1346                     Te2[(t3 >>  8) & 0xff] ^
1347                     Te3[t0 & 0xff] ^
1348                     rk[1];
1349 
1350                 s2 = Te0[t2 >> 24] ^
1351                     Te1[(t3 >> 16) & 0xff] ^
1352                     Te2[(t0 >>  8) & 0xff] ^
1353                     Te3[t1 & 0xff] ^
1354                     rk[2];
1355 
1356                 s3 = Te0[t3 >> 24] ^
1357                     Te1[(t0 >> 16) & 0xff] ^
1358                     Te2[(t1 >>  8) & 0xff] ^
1359                     Te3[t2 & 0xff] ^
1360                     rk[3];
1361         }
1362 
1363         /*
1364          * apply last round and
1365          * map cipher state to byte array block:
1366          */
1367 
1368         s0 = (Te4[(t0 >> 24)] & 0xff000000) ^
1369             (Te4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
1370             (Te4[(t2 >>  8) & 0xff] & 0x0000ff00) ^
1371             (Te4[t3 & 0xff] & 0x000000ff) ^
1372             rk[0];
1373         ct[0] = s0;
1374 
1375         s1 = (Te4[(t1 >> 24)] & 0xff000000) ^
1376             (Te4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
1377             (Te4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
1378             (Te4[t0 & 0xff] & 0x000000ff) ^
1379             rk[1];
1380         ct[1] = s1;
1381 
1382         s2 = (Te4[(t2 >> 24)] & 0xff000000) ^
1383             (Te4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
1384             (Te4[(t0 >>  8) & 0xff] & 0x0000ff00) ^
1385             (Te4[t1 & 0xff] & 0x000000ff) ^
1386             rk[2];
1387         ct[2] = s2;
1388 
1389         s3 = (Te4[(t3 >> 24)] & 0xff000000) ^
1390             (Te4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
1391             (Te4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
1392             (Te4[t2 & 0xff] & 0x000000ff) ^
1393             rk[3];
1394         ct[3] = s3;
1395 }
1396 
1397 
1398 /*
1399  * Decrypt one block of data. The block is assumed to be an array
1400  * of four uint32_t values, so copy for alignment (and byte-order
1401  * reversal for little endian systems might be necessary on the
1402  * input and output byte streams.
1403  * The size of the key schedule depends on the number of rounds
1404  * (which can be computed from the size of the key), i.e. 4*(Nr + 1).
1405  *
1406  * Parameters:
1407  * rk   Key schedule, of aes_ks_t (60 32-bit integers)
1408  * Nr   Number of rounds
1409  * ct   Input block (crypto text)
1410  * pt   Output block (plain text). Can overlap with pt
1411  */
1412 static void
1413 rijndael_decrypt(const uint32_t rk[], int Nr, const uint32_t ct[4],
1414     uint32_t pt[4])
1415 {
1416         uint32_t s0, s1, s2, s3, t0, t1, t2, t3;
1417         int      r;
1418 
1419         /*
1420          * map byte array block to cipher state
1421          * and add initial round key:
1422          */
1423         s0 = ct[0] ^ rk[0];
1424         s1 = ct[1] ^ rk[1];
1425         s2 = ct[2] ^ rk[2];
1426         s3 = ct[3] ^ rk[3];
1427 
1428         /*
1429          * Nr - 1 full rounds:
1430          */
1431 
1432         r = Nr >> 1;
1433 
1434         for (;;) {
1435                 t0 = Td0[s0 >> 24] ^
1436                     Td1[(s3 >> 16) & 0xff] ^
1437                     Td2[(s2 >> 8) & 0xff] ^
1438                     Td3[s1 & 0xff] ^
1439                     rk[4];
1440 
1441                 t1 = Td0[s1 >> 24] ^
1442                     Td1[(s0 >> 16) & 0xff] ^
1443                     Td2[(s3 >>  8) & 0xff] ^
1444                     Td3[s2 & 0xff] ^
1445                     rk[5];
1446 
1447                 t2 = Td0[s2 >> 24] ^
1448                     Td1[(s1 >> 16) & 0xff] ^
1449                     Td2[(s0 >>  8) & 0xff] ^
1450                     Td3[s3 & 0xff] ^
1451                     rk[6];
1452 
1453                 t3 = Td0[s3 >> 24] ^
1454                     Td1[(s2 >> 16) & 0xff] ^
1455                     Td2[(s1 >> 8) & 0xff] ^
1456                     Td3[s0 & 0xff] ^
1457                     rk[7];
1458 
1459                 rk += 8;
1460 
1461                 if (--r == 0) {
1462                         break;
1463                 }
1464 
1465                 s0 = Td0[t0 >> 24] ^
1466                     Td1[(t3 >> 16) & 0xff] ^
1467                     Td2[(t2 >> 8) & 0xff] ^
1468                     Td3[t1 & 0xff] ^
1469                     rk[0];
1470 
1471                 s1 = Td0[t1 >> 24] ^
1472                     Td1[(t0 >> 16) & 0xff] ^
1473                     Td2[(t3 >> 8) & 0xff] ^
1474                     Td3[t2 & 0xff] ^
1475                     rk[1];
1476 
1477                 s2 = Td0[t2 >> 24] ^
1478                     Td1[(t1 >> 16) & 0xff] ^
1479                     Td2[(t0 >> 8) & 0xff] ^
1480                     Td3[t3 & 0xff] ^
1481                     rk[2];
1482 
1483                 s3 = Td0[t3 >> 24] ^
1484                     Td1[(t2 >> 16) & 0xff] ^
1485                     Td2[(t1 >> 8) & 0xff] ^
1486                     Td3[t0 & 0xff] ^
1487                     rk[3];
1488         }
1489 
1490         /*
1491          * apply last round and
1492          * map cipher state to byte array block:
1493          */
1494 
1495         s0 = (Td4[t0 >> 24] & 0xff000000) ^
1496             (Td4[(t3 >> 16) & 0xff] & 0x00ff0000) ^
1497             (Td4[(t2 >> 8) & 0xff] & 0x0000ff00) ^
1498             (Td4[t1 & 0xff] & 0x000000ff) ^
1499             rk[0];
1500         pt[0] = s0;
1501 
1502         s1 = (Td4[t1 >> 24] & 0xff000000) ^
1503             (Td4[(t0 >> 16) & 0xff] & 0x00ff0000) ^
1504             (Td4[(t3 >>  8) & 0xff] & 0x0000ff00) ^
1505             (Td4[t2 & 0xff] & 0x000000ff) ^
1506             rk[1];
1507         pt[1] = s1;
1508 
1509         s2 = (Td4[t2 >> 24] & 0xff000000) ^
1510             (Td4[(t1 >> 16) & 0xff] & 0x00ff0000) ^
1511             (Td4[(t0 >> 8) & 0xff] & 0x0000ff00) ^
1512             (Td4[t3 & 0xff] & 0x000000ff) ^
1513             rk[2];
1514         pt[2] = s2;
1515 
1516         s3 = (Td4[t3 >> 24] & 0xff000000) ^
1517             (Td4[(t2 >> 16) & 0xff] & 0x00ff0000) ^
1518             (Td4[(t1 >>  8) & 0xff] & 0x0000ff00) ^
1519             (Td4[t0 & 0xff] & 0x000000ff) ^
1520             rk[3];
1521         pt[3] = s3;
1522 }
1523 #endif  /* sun4u, __amd64 */
1524 
1525 
1526 /*
1527  * Initialize AES encryption and decryption key schedules.
1528  *
1529  * Parameters:
1530  * cipherKey    User key
1531  * keyBits      AES key size (128, 192, or 256 bits)
1532  * keysched     AES key schedule to be initialized, of type aes_key_t.
1533  *              Allocated by aes_alloc_keysched().
1534  */
1535 void
1536 aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits, void *keysched)
1537 {
1538         aes_key_t       *newbie = keysched;
1539         uint_t          keysize, i, j;
1540         union {
1541                 uint64_t        ka64[4];
1542                 uint32_t        ka32[8];
1543                 } keyarr;
1544 
1545         switch (keyBits) {
1546         case 128:
1547                 newbie->nr = 10;
1548                 break;
1549 
1550         case 192:
1551                 newbie->nr = 12;
1552                 break;
1553 
1554         case 256:
1555                 newbie->nr = 14;
1556                 break;
1557 
1558         default:
1559                 /* should never get here */
1560                 return;
1561         }
1562         keysize = CRYPTO_BITS2BYTES(keyBits);
1563 
1564         /*
1565          * For _LITTLE_ENDIAN machines (except AMD64), reverse every
1566          * 4 bytes in the key.  On _BIG_ENDIAN and AMD64, copy the key
1567          * without reversing bytes.
1568          * For AMD64, do not byte swap for aes_setupkeys().
1569          *
1570          * SPARCv8/v9 uses a key schedule array with 64-bit elements.
1571          * X86/AMD64  uses a key schedule array with 32-bit elements.
1572          */
1573 #ifndef AES_BYTE_SWAP
1574         if (IS_P2ALIGNED(cipherKey, sizeof (uint64_t))) {
1575                 for (i = 0, j = 0; j < keysize; i++, j += 8) {
1576                         /* LINTED: pointer alignment */
1577                         keyarr.ka64[i] = *((uint64_t *)&cipherKey[j]);
1578                 }
1579         } else {
1580                 bcopy(cipherKey, keyarr.ka32, keysize);
1581         }
1582 
1583 #else   /* byte swap */
1584         for (i = 0, j = 0; j < keysize; i++, j += 4) {
1585                 keyarr.ka32[i] = htonl(*(uint32_t *)(void *)&cipherKey[j]);
1586         }
1587 #endif
1588 
1589         aes_setupkeys(newbie, keyarr.ka32, keyBits);
1590 }
1591 
1592 #if     defined(__amd64) && defined(_KERNEL)
1593 void
1594 aes_accel_enter(void *savestate)
1595 {
1596         KPREEMPT_DISABLE;
1597         aes_accel_save(savestate);
1598 }
1599 
1600 void
1601 aes_accel_exit(void *savestate)
1602 {
1603         aes_accel_restore(savestate);
1604         KPREEMPT_ENABLE;
1605 }
1606 #endif  /* defined(__amd64) && defined(_KERNEL) */
1607 
1608 /*
1609  * Encrypt one block using AES.
1610  * Align if needed and (for x86 32-bit only) byte-swap.
1611  *
1612  * Parameters:
1613  * ks   Key schedule, of type aes_key_t
1614  * pt   Input block (plain text)
1615  * ct   Output block (crypto text).  Can overlap with pt
1616  */
1617 int
1618 aes_encrypt_block(const void *ks, const uint8_t *pt, uint8_t *ct)
1619 {
1620         aes_key_t       *ksch = (aes_key_t *)ks;
1621 
1622 #ifdef  __amd64
1623         if (intel_aes_instructions_present())
1624                 aes_encrypt_intel(&ksch->encr_ks.ks32[0], ksch->nr,
1625                     /* LINTED:  pointer alignment */
1626                     (uint32_t *)pt, (uint32_t *)ct);
1627         else
1628                 aes_encrypt_amd64(&ksch->encr_ks.ks32[0], ksch->nr,
1629                     /* LINTED:  pointer alignment */
1630                     (uint32_t *)pt, (uint32_t *)ct);
1631 #else   /* !__amd64 */
1632 #ifndef AES_BYTE_SWAP
1633         if (IS_P2ALIGNED2(pt, ct, sizeof (uint32_t))) {
1634                 AES_ENCRYPT_IMPL(&ksch->encr_ks.ks32[0], ksch->nr,
1635                     /* LINTED:  pointer alignment */
1636                     (uint32_t *)pt, (uint32_t *)ct);
1637         } else {
1638 #endif
1639                 uint32_t buffer[AES_BLOCK_LEN / sizeof (uint32_t)];
1640 
1641                 /* Copy input block into buffer */
1642 #ifndef AES_BYTE_SWAP
1643                 bcopy(pt, &buffer, AES_BLOCK_LEN);
1644 
1645 #else   /* byte swap */
1646                 buffer[0] = htonl(*(uint32_t *)(void *)&pt[0]);
1647                 buffer[1] = htonl(*(uint32_t *)(void *)&pt[4]);
1648                 buffer[2] = htonl(*(uint32_t *)(void *)&pt[8]);
1649                 buffer[3] = htonl(*(uint32_t *)(void *)&pt[12]);
1650 #endif  /* byte swap */
1651 
1652                 AES_ENCRYPT_IMPL(&ksch->encr_ks.ks32[0], ksch->nr,
1653                     buffer, buffer);
1654 
1655                 /* Copy result from buffer to output block */
1656 #ifndef AES_BYTE_SWAP
1657                 bcopy(&buffer, ct, AES_BLOCK_LEN);
1658         }
1659 
1660 #else   /* byte swap */
1661                 *(uint32_t *)(void *)&ct[0] = htonl(buffer[0]);
1662                 *(uint32_t *)(void *)&ct[4] = htonl(buffer[1]);
1663                 *(uint32_t *)(void *)&ct[8] = htonl(buffer[2]);
1664                 *(uint32_t *)(void *)&ct[12] = htonl(buffer[3]);
1665 #endif  /* byte swap */
1666 #endif  /* !__amd64 */
1667 
1668         return (CRYPTO_SUCCESS);
1669 }
1670 
1671 
1672 /*
1673  * Decrypt one block using AES.
1674  * Align and byte-swap if needed.
1675  *
1676  * Parameters:
1677  * ks   Key schedule, of type aes_key_t
1678  * ct   Input block (crypto text)
1679  * pt   Output block (plain text). Can overlap with pt
1680  */
1681 int
1682 aes_decrypt_block(const void *ks, const uint8_t *ct, uint8_t *pt)
1683 {
1684         aes_key_t       *ksch = (aes_key_t *)ks;
1685 
1686 #ifdef  __amd64
1687         if (intel_aes_instructions_present())
1688                 aes_decrypt_intel(&ksch->decr_ks.ks32[0], ksch->nr,
1689                     /* LINTED:  pointer alignment */
1690                     (uint32_t *)ct, (uint32_t *)pt);
1691         else
1692                 aes_decrypt_amd64(&ksch->decr_ks.ks32[0], ksch->nr,
1693                     /* LINTED:  pointer alignment */
1694                     (uint32_t *)ct, (uint32_t *)pt);
1695 #else   /* !__amd64 */
1696 #ifndef AES_BYTE_SWAP
1697         if (IS_P2ALIGNED2(ct, pt, sizeof (uint32_t))) {
1698                 AES_DECRYPT_IMPL(&ksch->decr_ks.ks32[0], ksch->nr,
1699                     /* LINTED:  pointer alignment */
1700                     (uint32_t *)ct, (uint32_t *)pt);
1701         } else {
1702 #endif
1703                 uint32_t buffer[AES_BLOCK_LEN / sizeof (uint32_t)];
1704 
1705                 /* Copy input block into buffer */
1706 #ifndef AES_BYTE_SWAP
1707                 bcopy(ct, &buffer, AES_BLOCK_LEN);
1708 
1709 #else   /* byte swap */
1710                 buffer[0] = htonl(*(uint32_t *)(void *)&ct[0]);
1711                 buffer[1] = htonl(*(uint32_t *)(void *)&ct[4]);
1712                 buffer[2] = htonl(*(uint32_t *)(void *)&ct[8]);
1713                 buffer[3] = htonl(*(uint32_t *)(void *)&ct[12]);
1714 #endif  /* byte swap */
1715 
1716                 AES_DECRYPT_IMPL(&ksch->decr_ks.ks32[0], ksch->nr,
1717                     buffer, buffer);
1718 
1719                 /* Copy result from buffer to output block */
1720 #ifndef AES_BYTE_SWAP
1721                 bcopy(&buffer, pt, AES_BLOCK_LEN);
1722         }
1723 
1724 #else   /* byte swap */
1725         *(uint32_t *)(void *)&pt[0] = htonl(buffer[0]);
1726         *(uint32_t *)(void *)&pt[4] = htonl(buffer[1]);
1727         *(uint32_t *)(void *)&pt[8] = htonl(buffer[2]);
1728         *(uint32_t *)(void *)&pt[12] = htonl(buffer[3]);
1729 #endif  /* byte swap */
1730 #endif  /* !__amd64 */
1731 
1732         return (CRYPTO_SUCCESS);
1733 }
1734 
1735 #define ECB_LOOP(ciph_func)                                             \
1736         do {                                                            \
1737                 for (; i < length; i += AES_BLOCK_LEN)                       \
1738                         ciph_func;                                      \
1739                 _NOTE(CONSTCOND)                                        \
1740         } while (0)
1741 #define ECB_LOOP_4P(ciph_func, enc_or_dec, in, out)                     \
1742         ECB_LOOP(ciph_func(&ksch->enc_or_dec ## r_ks.ks32[0],            \
1743             ksch->nr, (void *)&in[i], (void *)&out[i]))
1744 #define ECB_LOOP_3P(ciph_func, in, out)                                 \
1745         ECB_LOOP(ciph_func(ksch, (void *)&in[i], (void *)&out[i]))
1746 
1747 #ifdef  __amd64
1748 #define ECB_INTEL_IMPL(enc_or_dec, in, out)                             \
1749         do {                                                            \
1750                 if (intel_aes_instructions_present()) {                 \
1751                         /* first use the accelerated function */        \
1752                         for (; i + 8 * AES_BLOCK_LEN <= length;              \
1753                             i += 8 * AES_BLOCK_LEN)                     \
1754                                 aes_ ## enc_or_dec ## rypt_intel8(      \
1755                                     &ksch->enc_or_dec ## r_ks.ks32[0],   \
1756                                     ksch->nr, &in[i], &out[i]);              \
1757                         /* finish off the remainder per-block */        \
1758                         ECB_LOOP_4P(aes_ ## enc_or_dec ## rypt_intel,   \
1759                             enc_or_dec, in, out);                       \
1760                 } else {                                                \
1761                         ECB_LOOP_4P(aes_ ## enc_or_dec ## rypt_amd64,   \
1762                             enc_or_dec, in, out);                       \
1763                 }                                                       \
1764                 _NOTE(CONSTCOND)                                        \
1765         } while (0)
1766 #endif  /* __amd64 */
1767 
1768 /*
1769  * Perform AES ECB encryption on a sequence of blocks. On x86-64 CPUs with
1770  * the AES-NI extension, this performs the encryption in increments of 8
1771  * blocks at a time, exploiting instruction parallelism more efficiently.
1772  * On other platforms, this simply encrypts the blocks in sequence.
1773  */
1774 int
1775 aes_encrypt_ecb(const void *ks, const uint8_t *pt, uint8_t *ct, uint64_t length)
1776 {
1777         aes_key_t *ksch = (aes_key_t *)ks;
1778         uint64_t i = 0;
1779 
1780 #ifdef  __amd64
1781         ECB_INTEL_IMPL(enc, pt, ct);
1782 #elif   defined(sun4u)
1783         ECB_LOOP_4P(aes_encrypt_impl, enc, pt, ct);
1784 #else   /* Generic C implementation */
1785         ECB_LOOP_3P((void) aes_encrypt_block, pt, ct);
1786 #endif  /* Generic C implementation */
1787 
1788         return (CRYPTO_SUCCESS);
1789 }
1790 
1791 /*
1792  * Same as aes_encrypt_ecb, but performs decryption.
1793  */
1794 int
1795 aes_decrypt_ecb(const void *ks, const uint8_t *ct, uint8_t *pt, uint64_t length)
1796 {
1797         aes_key_t *ksch = (aes_key_t *)ks;
1798         uint64_t i = 0;
1799 
1800 #ifdef  __amd64
1801         ECB_INTEL_IMPL(dec, ct, pt);
1802 #elif   defined(sun4u)
1803         ECB_LOOP_4P(aes_decrypt_impl, dec, ct, pt);
1804 #else   /* Generic C implementation */
1805         ECB_LOOP_3P((void) aes_decrypt_block, ct, pt);
1806 #endif  /* Generic C implementation */
1807 
1808         return (CRYPTO_SUCCESS);
1809 }
1810 #ifdef  __amd64
1811 #undef  ECB_INTEL_IMPL
1812 #endif  /* __amd64 */
1813 
1814 #undef  ECB_LOOP
1815 #undef  ECB_LOOP_4P
1816 #undef  ECB_LOOP_3P
1817 
1818 #define CBC_LOOP(enc_func, xor_func)                                    \
1819         do {                                                            \
1820                 for (; i < length; i += AES_BLOCK_LEN) {             \
1821                         /* copy IV to ciphertext */                     \
1822                         bcopy(iv, &ct[i], AES_BLOCK_LEN);           \
1823                         /* XOR IV with plaintext with input */          \
1824                         xor_func(&pt[i], &ct[i]);                       \
1825                         /* encrypt counter in output region */          \
1826                         enc_func;                                       \
1827                         iv = &ct[i];                                        \
1828                 }                                                       \
1829                 _NOTE(CONSTCOND)                                        \
1830         } while (0)
1831 #define CBC_LOOP_4P(enc_func, xor_func)                                 \
1832         CBC_LOOP(enc_func(&ksch->encr_ks.ks32[0],                        \
1833             ksch->nr, (void *)&ct[i], (void *)&ct[i]), xor_func)
1834 #define CBC_LOOP_3P(enc_func, xor_func)                                 \
1835         CBC_LOOP(enc_func(ksch, (void *)&ct[i], (void *)&ct[i]), xor_func)
1836 
1837 /*
1838  * Encrypts a sequence of consecutive AES blocks in CBC mode. On x86-64
1839  * with the AES-NI extension, the encryption is performed on 8 blocks at
1840  * a time using an optimized assembly implementation, giving a speed boost
1841  * of around 75%. On other platforms, this simply performs CBC encryption
1842  * in sequence on the blocks.
1843  *
1844  * Decryption acceleration is implemented in the kernel kcf block cipher
1845  * modes code (cbc.c), because that doesn't require a complete hand-tuned
1846  * CBC implementation in assembly.
1847  */
1848 int
1849 aes_encrypt_cbc(const void *ks, const uint8_t *pt, uint8_t *ct,
1850     const uint8_t *iv, uint64_t length)
1851 {
1852         aes_key_t *ksch = (aes_key_t *)ks;
1853         size_t i = 0;
1854 
1855 #ifdef  __amd64
1856         if (intel_aes_instructions_present()) {
1857                 for (; i + 8 * AES_BLOCK_LEN <= length;
1858                     i += 8 * AES_BLOCK_LEN) {
1859                         aes_encrypt_cbc_intel8(&ksch->encr_ks.ks32[0],
1860                             ksch->nr, &ct[i], &ct[i], iv);
1861                         iv = &ct[7 * AES_BLOCK_LEN];
1862                 }
1863                 CBC_LOOP_4P(aes_encrypt_intel, aes_xor_intel);
1864         } else {
1865                 CBC_LOOP_4P(aes_encrypt_amd64, aes_xor_intel);
1866         }
1867 #elif   defined(sun4u)
1868         CBC_LOOP_4P(aes_encrypt_impl, aes_xor_block);
1869 #else   /* Generic C implementation */
1870         CBC_LOOP_3P((void) aes_encrypt_block, aes_xor_block);
1871 #endif  /* Generic C implementation */
1872 
1873         return (CRYPTO_SUCCESS);
1874 }
1875 #undef  CBC_LOOP
1876 #undef  CBC_LOOP_4P
1877 #undef  CBC_LOOP_3P
1878 
1879 #define CTR_LOOP(enc_func, xor_func)                                    \
1880         do {                                                            \
1881                 for (; i < length; i += AES_BLOCK_LEN) {             \
1882                         /* set up counter in output region */           \
1883                         *(uint64_t *)(void *)&output[i] = counter[0];       \
1884                         *(uint64_t *)(void *)&output[i + 8] =               \
1885                             htonll(counter[1]++);                       \
1886                         /* encrypt counter in output region */          \
1887                         enc_func;                                       \
1888                         /* XOR encrypted counter with input */          \
1889                         xor_func(&input[i], &output[i]);                \
1890                 }                                                       \
1891                 _NOTE(CONSTCOND)                                        \
1892         } while (0)
1893 #define CTR_LOOP_4P(enc_func, xor_func)                                 \
1894         CTR_LOOP(enc_func(&ksch->encr_ks.ks32[0], ksch->nr,           \
1895             (void *)&output[i], (void *)&output[i]), xor_func)
1896 #define CTR_LOOP_3P(enc_func, xor_func)                                 \
1897         CTR_LOOP(enc_func(ksch, (void *)&output[i], (void *)&output[i]),\
1898             xor_func)
1899 /*
1900  * Performs high-performance counter mode encryption and decryption on
1901  * a sequence of blocks. In CTR mode, encryption and decryption are the
1902  * same operation, just with the plaintext and ciphertext reversed:
1903  * plaintext = CTR(CTR(plaintext, K), K)
1904  * Blocks also do not interdepend on each other, so it is an excellent
1905  * mode when high performance is required and data authentication/integrity
1906  * checking is provided via some other means, or isn't necessary.
1907  *
1908  * On x86-64 with the AES-NI extension, this code performs CTR mode
1909  * encryption in parallel on 8 blocks at a time and can provide in
1910  * excess of 3GB/s/core of encryption/decryption performance (<1 CPB).
1911  */
1912 int
1913 aes_ctr_mode(const void *ks, const uint8_t *input, uint8_t *output,
1914     uint64_t length, uint64_t counter[2])
1915 {
1916         aes_key_t *ksch = (aes_key_t *)ks;
1917         uint64_t i = 0;
1918 
1919         // swap lower part to host order for computations
1920         counter[1] = ntohll(counter[1]);
1921 
1922 #ifdef  __amd64
1923         if (intel_aes_instructions_present()) {
1924                 /* first use the wide-register accelerated function */
1925                 for (; i + 8 * AES_BLOCK_LEN <= length;
1926                     i += 8 * AES_BLOCK_LEN) {
1927                         aes_ctr_intel8(&ksch->encr_ks.ks32[0], ksch->nr,
1928                             &input[i], &output[i], counter[0], counter[1]);
1929                         counter[1] += 8;
1930                 }
1931                 /* finish off the remainder using the slow per-block method */
1932                 CTR_LOOP_4P(aes_encrypt_intel, aes_xor_intel);
1933         } else {
1934                 CTR_LOOP_4P(aes_encrypt_amd64, aes_xor_intel);
1935         }
1936 #elif   defined(sun4u)
1937         CTR_LOOP_4P(aes_encrypt_impl, aes_xor_block);
1938 #else   /* Generic C implementation */
1939         CTR_LOOP_3P((void) aes_encrypt_block, aes_xor_block);
1940 #endif  /* Generic C implementation */
1941 
1942         // swap lower part back to big endian
1943         counter[1] = htonll(counter[1]);
1944 
1945         return (CRYPTO_SUCCESS);
1946 }
1947 #undef  CTR_LOOP
1948 
1949 /*
1950  * Allocate key schedule for AES.
1951  *
1952  * Return the pointer and set size to the number of bytes allocated.
1953  * Memory allocated must be freed by the caller when done.
1954  *
1955  * Parameters:
1956  * size         Size of key schedule allocated, in bytes
1957  * kmflag       Flag passed to kmem_alloc(9F); ignored in userland.
1958  */
1959 /* ARGSUSED */
1960 void *
1961 aes_alloc_keysched(size_t *size, int kmflag)
1962 {
1963         aes_key_t *keysched;
1964 
1965 #ifdef  _KERNEL
1966         keysched = (aes_key_t *)kmem_alloc(sizeof (aes_key_t), kmflag);
1967 #else   /* !_KERNEL */
1968         keysched = (aes_key_t *)malloc(sizeof (aes_key_t));
1969 #endif  /* _KERNEL */
1970 
1971         if (keysched != NULL) {
1972                 *size = sizeof (aes_key_t);
1973                 return (keysched);
1974         }
1975         return (NULL);
1976 }
1977 
1978 
1979 #ifdef __amd64
1980 /*
1981  * Return 1 if executing on x86-64 with AES-NI instructions, otherwise 0.
1982  * Cache the result, as the CPU can't change.
1983  *
1984  * Note: the userland version uses getisax().  The kernel version uses
1985  * global variable x86_featureset.
1986  */
1987 static inline int
1988 intel_aes_instructions_present(void)
1989 {
1990         static int      cached_result = -1;
1991 
1992         if (cached_result == -1) { /* first time */
1993 #ifdef _KERNEL
1994                 cached_result = is_x86_feature(x86_featureset, X86FSET_AES);
1995 #else
1996                 uint_t          ui = 0;
1997 
1998                 (void) getisax(&ui, 1);
1999                 cached_result = (ui & AV_386_AES) != 0;
2000 #endif  /* _KERNEL */
2001         }
2002 
2003         return (cached_result);
2004 }
2005 #endif  /* __amd64 */