Print this page
4896 Performance improvements for KCF AES modes

Split Close
Expand all
Collapse all
          --- old/usr/src/common/crypto/aes/aes_impl.h
          +++ new/usr/src/common/crypto/aes/aes_impl.h
↓ open down ↓ 14 lines elided ↑ open up ↑
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
       25 +/*
       26 + * Copyright 2015 by Saso Kiselkov. All rights reserved.
       27 + */
  25   28  
  26   29  #ifndef _AES_IMPL_H
  27   30  #define _AES_IMPL_H
  28   31  
  29   32  /*
  30   33   * Common definitions used by AES.
  31   34   */
  32   35  
  33   36  #ifdef  __cplusplus
  34   37  extern "C" {
↓ open down ↓ 3 lines elided ↑ open up ↑
  38   41  #include <sys/crypto/common.h>
  39   42  
  40   43  /* Similar to sysmacros.h IS_P2ALIGNED, but checks two pointers: */
  41   44  #define IS_P2ALIGNED2(v, w, a) \
  42   45          ((((uintptr_t)(v) | (uintptr_t)(w)) & ((uintptr_t)(a) - 1)) == 0)
  43   46  
  44   47  #define AES_BLOCK_LEN   16      /* bytes */
  45   48  /* Round constant length, in number of 32-bit elements: */
  46   49  #define RC_LENGTH       (5 * ((AES_BLOCK_LEN) / 4 - 2))
  47   50  
  48      -#define AES_COPY_BLOCK(src, dst) \
       51 +#define AES_COPY_BLOCK_UNALIGNED(src, dst) \
  49   52          (dst)[0] = (src)[0]; \
  50   53          (dst)[1] = (src)[1]; \
  51   54          (dst)[2] = (src)[2]; \
  52   55          (dst)[3] = (src)[3]; \
  53   56          (dst)[4] = (src)[4]; \
  54   57          (dst)[5] = (src)[5]; \
  55   58          (dst)[6] = (src)[6]; \
  56   59          (dst)[7] = (src)[7]; \
  57   60          (dst)[8] = (src)[8]; \
  58   61          (dst)[9] = (src)[9]; \
  59   62          (dst)[10] = (src)[10]; \
  60   63          (dst)[11] = (src)[11]; \
  61   64          (dst)[12] = (src)[12]; \
  62   65          (dst)[13] = (src)[13]; \
  63   66          (dst)[14] = (src)[14]; \
  64   67          (dst)[15] = (src)[15]
  65   68  
  66      -#define AES_XOR_BLOCK(src, dst) \
       69 +#define AES_XOR_BLOCK_UNALIGNED(src, dst) \
  67   70          (dst)[0] ^= (src)[0]; \
  68   71          (dst)[1] ^= (src)[1]; \
  69   72          (dst)[2] ^= (src)[2]; \
  70   73          (dst)[3] ^= (src)[3]; \
  71   74          (dst)[4] ^= (src)[4]; \
  72   75          (dst)[5] ^= (src)[5]; \
  73   76          (dst)[6] ^= (src)[6]; \
  74   77          (dst)[7] ^= (src)[7]; \
  75   78          (dst)[8] ^= (src)[8]; \
  76   79          (dst)[9] ^= (src)[9]; \
  77   80          (dst)[10] ^= (src)[10]; \
  78   81          (dst)[11] ^= (src)[11]; \
  79   82          (dst)[12] ^= (src)[12]; \
  80   83          (dst)[13] ^= (src)[13]; \
  81   84          (dst)[14] ^= (src)[14]; \
  82   85          (dst)[15] ^= (src)[15]
  83   86  
       87 +#define AES_COPY_BLOCK_ALIGNED(src, dst) \
       88 +        ((uint64_t *)(void *)(dst))[0] = ((uint64_t *)(void *)(src))[0]; \
       89 +        ((uint64_t *)(void *)(dst))[1] = ((uint64_t *)(void *)(src))[1]
       90 +
       91 +#define AES_XOR_BLOCK_ALIGNED(src, dst) \
       92 +        ((uint64_t *)(void *)(dst))[0] ^= ((uint64_t *)(void *)(src))[0]; \
       93 +        ((uint64_t *)(void *)(dst))[1] ^= ((uint64_t *)(void *)(src))[1]
       94 +
  84   95  /* AES key size definitions */
  85   96  #define AES_MINBITS             128
  86   97  #define AES_MINBYTES            ((AES_MINBITS) >> 3)
  87   98  #define AES_MAXBITS             256
  88   99  #define AES_MAXBYTES            ((AES_MAXBITS) >> 3)
  89  100  
  90  101  #define AES_MIN_KEY_BYTES       ((AES_MINBITS) >> 3)
  91  102  #define AES_MAX_KEY_BYTES       ((AES_MAXBITS) >> 3)
  92  103  #define AES_192_KEY_BYTES       24
  93  104  #define AES_IV_LEN              16
  94  105  
  95  106  /* AES key schedule may be implemented with 32- or 64-bit elements: */
  96  107  #define AES_32BIT_KS            32
  97  108  #define AES_64BIT_KS            64
  98  109  
  99  110  #define MAX_AES_NR              14 /* Maximum number of rounds */
 100  111  #define MAX_AES_NB              4  /* Number of columns comprising a state */
 101  112  
      113 +/*
      114 + * Architecture-specific acceleration support autodetection.
      115 + * Some architectures provide hardware-assisted acceleration using floating
      116 + * point registers, which need special handling inside of the kernel, so the
      117 + * macros below define the auxiliary functions needed to utilize them.
      118 + */
      119 +#if     defined(__amd64) && defined(_KERNEL)
      120 +/*
      121 + * Using floating point registers requires temporarily disabling kernel
      122 + * thread preemption, so we need to operate on small-enough chunks to
      123 + * prevent scheduling latency bubbles.
      124 + * A typical 64-bit CPU can sustain around 300-400MB/s/core even in the
      125 + * slowest encryption modes (CBC), which with 32k per run works out to ~100us
      126 + * per run. CPUs with AES-NI in fast modes (ECB, CTR, CBC decryption) can
      127 + * easily sustain 3GB/s/core, so the latency potential essentially vanishes.
      128 + */
      129 +#define AES_OPSZ        32768
      130 +
      131 +#if     defined(lint) || defined(__lint)
      132 +#define AES_ACCEL_SAVESTATE(name)       uint8_t name[16 * 16 + 8]
      133 +#else   /* lint || __lint */
      134 +#define AES_ACCEL_SAVESTATE(name) \
      135 +        /* stack space for xmm0--xmm15 and cr0 (16 x 128 bits + 64 bits) */ \
      136 +        uint8_t name[16 * 16 + 8] __attribute__((aligned(16)))
      137 +#endif  /* lint || __lint */
      138 +
      139 +#else   /* !defined(__amd64) || !defined(_KERNEL) */
      140 +/*
      141 + * All other accel support
      142 + */
      143 +#define AES_OPSZ        ((size_t)-1)
      144 +/* On other architectures or outside of the kernel these get stubbed out */
      145 +#define AES_ACCEL_SAVESTATE(name)
      146 +#define aes_accel_enter(savestate)
      147 +#define aes_accel_exit(savestate)
      148 +#endif  /* !defined(__amd64) || !defined(_KERNEL) */
      149 +
 102  150  typedef union {
 103  151  #ifdef  sun4u
 104  152          uint64_t        ks64[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
 105  153  #endif
 106  154          uint32_t        ks32[((MAX_AES_NR) + 1) * (MAX_AES_NB)];
 107  155  } aes_ks_t;
 108  156  
 109      -/* aes_key.flags value: */
 110      -#define INTEL_AES_NI_CAPABLE    0x1     /* AES-NI instructions present */
 111      -
 112  157  typedef struct aes_key aes_key_t;
 113  158  struct aes_key {
 114  159          aes_ks_t        encr_ks;  /* encryption key schedule */
 115  160          aes_ks_t        decr_ks;  /* decryption key schedule */
 116  161  #ifdef __amd64
 117  162          long double     align128; /* Align fields above for Intel AES-NI */
 118      -        int             flags;    /* implementation-dependent flags */
 119  163  #endif  /* __amd64 */
 120  164          int             nr;       /* number of rounds (10, 12, or 14) */
 121  165          int             type;     /* key schedule size (32 or 64 bits) */
 122  166  };
 123  167  
 124  168  /*
 125  169   * Core AES functions.
 126  170   * ks and keysched are pointers to aes_key_t.
 127  171   * They are declared void* as they are intended to be opaque types.
 128  172   * Use function aes_alloc_keysched() to allocate memory for ks and keysched.
 129  173   */
 130  174  extern void *aes_alloc_keysched(size_t *size, int kmflag);
 131  175  extern void aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits,
 132  176          void *keysched);
 133  177  extern int aes_encrypt_block(const void *ks, const uint8_t *pt, uint8_t *ct);
 134  178  extern int aes_decrypt_block(const void *ks, const uint8_t *ct, uint8_t *pt);
      179 +extern int aes_encrypt_ecb(const void *ks, const uint8_t *pt, uint8_t *ct,
      180 +    uint64_t length);
      181 +extern int aes_decrypt_ecb(const void *ks, const uint8_t *pt, uint8_t *ct,
      182 +    uint64_t length);
      183 +extern int aes_encrypt_cbc(const void *ks, const uint8_t *pt, uint8_t *ct,
      184 +    const uint8_t *iv, uint64_t length);
      185 +extern int aes_ctr_mode(const void *ks, const uint8_t *pt, uint8_t *ct,
      186 +    uint64_t length, uint64_t counter[2]);
 135  187  
 136  188  /*
 137  189   * AES mode functions.
 138  190   * The first 2 functions operate on 16-byte AES blocks.
 139  191   */
 140      -extern void aes_copy_block(uint8_t *in, uint8_t *out);
 141      -extern void aes_xor_block(uint8_t *data, uint8_t *dst);
      192 +#ifdef  __amd64
      193 +#define AES_COPY_BLOCK  aes_copy_intel
      194 +#define AES_XOR_BLOCK   aes_xor_intel
      195 +extern void aes_copy_intel(const uint8_t *src, uint8_t *dst);
      196 +extern void aes_xor_intel(const uint8_t *src, uint8_t *dst);
      197 +extern void aes_xor_intel8(const uint8_t *src, uint8_t *dst);
      198 +#else   /* !__amd64 */
      199 +#define AES_COPY_BLOCK  aes_copy_block
      200 +#define AES_XOR_BLOCK   aes_xor_block
      201 +#endif  /* !__amd64 */
 142  202  
      203 +extern void aes_copy_block(const uint8_t *src, uint8_t *dst);
      204 +extern void aes_xor_block(const uint8_t *src, uint8_t *dst);
      205 +
 143  206  /* Note: ctx is a pointer to aes_ctx_t defined in modes.h */
 144  207  extern int aes_encrypt_contiguous_blocks(void *ctx, char *data, size_t length,
 145  208      crypto_data_t *out);
 146  209  extern int aes_decrypt_contiguous_blocks(void *ctx, char *data, size_t length,
 147  210      crypto_data_t *out);
 148  211  
      212 +#if     defined(__amd64) && defined(_KERNEL)
 149  213  /*
      214 + * When AES floating-point acceleration is available, these will be called
      215 + * by the worker functions to clear and restore floating point state and
      216 + * control kernel thread preemption.
      217 + */
      218 +extern void aes_accel_enter(void *savestate);
      219 +extern void aes_accel_exit(void *savestate);
      220 +#endif  /* __amd64 && _KERNEL */
      221 +
      222 +/*
 150  223   * The following definitions and declarations are only used by AES FIPS POST
 151  224   */
 152  225  #ifdef _AES_IMPL
 153  226  
 154  227  #ifdef _KERNEL
 155  228  typedef enum aes_mech_type {
 156  229          AES_ECB_MECH_INFO_TYPE,         /* SUN_CKM_AES_ECB */
 157  230          AES_CBC_MECH_INFO_TYPE,         /* SUN_CKM_AES_CBC */
 158  231          AES_CBC_PAD_MECH_INFO_TYPE,     /* SUN_CKM_AES_CBC_PAD */
 159  232          AES_CTR_MECH_INFO_TYPE,         /* SUN_CKM_AES_CTR */
↓ open down ↓ 13 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX