Print this page
4896 Performance improvements for KCF AES modes

Split Close
Expand all
Collapse all
          --- old/usr/src/common/crypto/modes/modes.h
          +++ new/usr/src/common/crypto/modes/modes.h
↓ open down ↓ 14 lines elided ↑ open up ↑
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
  23   23   * Use is subject to license terms.
  24   24   */
       25 +/*
       26 + * Copyright 2015 by Saso Kiselkov. All rights reserved.
       27 + */
  25   28  
  26   29  #ifndef _COMMON_CRYPTO_MODES_H
  27   30  #define _COMMON_CRYPTO_MODES_H
  28   31  
  29   32  #ifdef  __cplusplus
  30   33  extern "C" {
  31   34  #endif
  32   35  
  33   36  #include <sys/strsun.h>
  34   37  #include <sys/systm.h>
↓ open down ↓ 145 lines elided ↑ open up ↑
 180  183  
 181  184  /*
 182  185   * gcm_tag_len:         Length of authentication tag.
 183  186   *
 184  187   * gcm_ghash:           Stores output from the GHASH function.
 185  188   *
 186  189   * gcm_processed_data_len:
 187  190   *                      Length of processed plaintext (encrypt) or
 188  191   *                      length of processed ciphertext (decrypt).
 189  192   *
 190      - * gcm_pt_buf:          Stores the decrypted plaintext returned by
 191      - *                      decrypt_final when the computed authentication
 192      - *                      tag matches the user supplied tag.
 193      - *
 194      - * gcm_pt_buf_len:      Length of the plaintext buffer.
 195      - *
 196  193   * gcm_H:               Subkey.
 197  194   *
      195 + * gcm_H_table:         Pipelined Karatsuba multipliers.
      196 + *
 198  197   * gcm_J0:              Pre-counter block generated from the IV.
 199  198   *
      199 + * gcm_tmp:             Temp storage for ciphertext when padding is needed.
      200 + *
 200  201   * gcm_len_a_len_c:     64-bit representations of the bit lengths of
 201  202   *                      AAD and ciphertext.
 202  203   *
 203  204   * gcm_kmflag:          Current value of kmflag. Used only for allocating
 204  205   *                      the plaintext buffer during decryption.
      206 + *
      207 + * gcm_last_input:      Buffer of (up to) two last blocks. This is used when
      208 + *                      input is not block-aligned and to temporarily hold
      209 + *                      the end of the ciphertext stream during decryption,
      210 + *                      since that could potentially be the GHASH auth tag
      211 + *                      which we must check in the final() call instead of
      212 + *                      decrypting it.
      213 + *
      214 + * gcm_last_input_fill: Number of bytes actually stored in gcm_last_input.
 205  215   */
 206  216  typedef struct gcm_ctx {
 207  217          struct common_ctx gcm_common;
 208  218          size_t gcm_tag_len;
 209  219          size_t gcm_processed_data_len;
 210      -        size_t gcm_pt_buf_len;
 211      -        uint32_t gcm_tmp[4];
 212  220          uint64_t gcm_ghash[2];
 213  221          uint64_t gcm_H[2];
      222 +#ifdef  __amd64
      223 +        uint8_t gcm_H_table[256];
      224 +#endif
 214  225          uint64_t gcm_J0[2];
      226 +        uint64_t gcm_tmp[2];
 215  227          uint64_t gcm_len_a_len_c[2];
 216      -        uint8_t *gcm_pt_buf;
 217  228          int gcm_kmflag;
      229 +        uint8_t gcm_last_input[32];
      230 +        size_t gcm_last_input_fill;
 218  231  } gcm_ctx_t;
 219  232  
 220  233  #define gcm_keysched            gcm_common.cc_keysched
 221  234  #define gcm_keysched_len        gcm_common.cc_keysched_len
 222  235  #define gcm_cb                  gcm_common.cc_iv
 223  236  #define gcm_remainder           gcm_common.cc_remainder
 224  237  #define gcm_remainder_len       gcm_common.cc_remainder_len
 225  238  #define gcm_lastp               gcm_common.cc_lastp
 226  239  #define gcm_copy_to             gcm_common.cc_copy_to
 227  240  #define gcm_flags               gcm_common.cc_flags
↓ open down ↓ 48 lines elided ↑ open up ↑
 276  289  } des_ctx_t;
 277  290  
 278  291  #define dc_flags                dcu.dcu_ecb.ecb_common.cc_flags
 279  292  #define dc_remainder_len        dcu.dcu_ecb.ecb_common.cc_remainder_len
 280  293  #define dc_keysched             dcu.dcu_ecb.ecb_common.cc_keysched
 281  294  #define dc_keysched_len         dcu.dcu_ecb.ecb_common.cc_keysched_len
 282  295  #define dc_iv                   dcu.dcu_ecb.ecb_common.cc_iv
 283  296  #define dc_lastp                dcu.dcu_ecb.ecb_common.cc_lastp
 284  297  
 285  298  extern int ecb_cipher_contiguous_blocks(ecb_ctx_t *, char *, size_t,
 286      -    crypto_data_t *, size_t, int (*cipher)(const void *, const uint8_t *,
 287      -    uint8_t *));
      299 +    crypto_data_t *, size_t,
      300 +    int (*cipher)(const void *, const uint8_t *, uint8_t *),
      301 +    int (*cipher_ecb)(const void *, const uint8_t *, uint8_t *, uint64_t));
 288  302  
 289  303  extern int cbc_encrypt_contiguous_blocks(cbc_ctx_t *, char *, size_t,
 290  304      crypto_data_t *, size_t,
 291  305      int (*encrypt)(const void *, const uint8_t *, uint8_t *),
 292      -    void (*copy_block)(uint8_t *, uint8_t *),
 293      -    void (*xor_block)(uint8_t *, uint8_t *));
      306 +    void (*copy_block)(const uint8_t *, uint8_t *),
      307 +    void (*xor_block)(const uint8_t *, uint8_t *),
      308 +    int (*encrypt_cbc)(const void *, const uint8_t *, uint8_t *,
      309 +    const uint8_t *, uint64_t));
 294  310  
 295  311  extern int cbc_decrypt_contiguous_blocks(cbc_ctx_t *, char *, size_t,
 296  312      crypto_data_t *, size_t,
 297  313      int (*decrypt)(const void *, const uint8_t *, uint8_t *),
 298      -    void (*copy_block)(uint8_t *, uint8_t *),
 299      -    void (*xor_block)(uint8_t *, uint8_t *));
      314 +    void (*copy_block)(const uint8_t *, uint8_t *),
      315 +    void (*xor_block)(const uint8_t *, uint8_t *),
      316 +    int (*decrypt_ecb)(const void *, const uint8_t *, uint8_t *, uint64_t),
      317 +    void (*xor_block_range)(const uint8_t *, uint8_t *, uint64_t));
 300  318  
 301  319  extern int ctr_mode_contiguous_blocks(ctr_ctx_t *, char *, size_t,
 302  320      crypto_data_t *, size_t,
 303  321      int (*cipher)(const void *, const uint8_t *, uint8_t *),
 304      -    void (*xor_block)(uint8_t *, uint8_t *));
      322 +    void (*xor_block)(const uint8_t *, uint8_t *),
      323 +    int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
      324 +    uint64_t *));
 305  325  
 306  326  extern int ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *, char *, size_t,
 307  327      crypto_data_t *, size_t,
 308  328      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 309      -    void (*copy_block)(uint8_t *, uint8_t *),
 310      -    void (*xor_block)(uint8_t *, uint8_t *));
      329 +    void (*copy_block)(const uint8_t *, uint8_t *),
      330 +    void (*xor_block)(const uint8_t *, uint8_t *));
 311  331  
 312  332  extern int ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *, char *, size_t,
 313  333      crypto_data_t *, size_t,
 314  334      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 315      -    void (*copy_block)(uint8_t *, uint8_t *),
 316      -    void (*xor_block)(uint8_t *, uint8_t *));
      335 +    void (*copy_block)(const uint8_t *, uint8_t *),
      336 +    void (*xor_block)(const uint8_t *, uint8_t *));
 317  337  
 318  338  extern int gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *, char *, size_t,
 319  339      crypto_data_t *, size_t,
 320  340      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 321      -    void (*copy_block)(uint8_t *, uint8_t *),
 322      -    void (*xor_block)(uint8_t *, uint8_t *));
      341 +    void (*copy_block)(const uint8_t *, uint8_t *),
      342 +    void (*xor_block)(const uint8_t *, uint8_t *),
      343 +    int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
      344 +    uint64_t *));
 323  345  
 324  346  extern int gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *, char *, size_t,
 325  347      crypto_data_t *, size_t,
 326  348      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 327      -    void (*copy_block)(uint8_t *, uint8_t *),
 328      -    void (*xor_block)(uint8_t *, uint8_t *));
      349 +    void (*copy_block)(const uint8_t *, uint8_t *),
      350 +    void (*xor_block)(const uint8_t *, uint8_t *),
      351 +    int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
      352 +    uint64_t *));
 329  353  
 330  354  int ccm_encrypt_final(ccm_ctx_t *, crypto_data_t *, size_t,
 331  355      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 332      -    void (*xor_block)(uint8_t *, uint8_t *));
      356 +    void (*xor_block)(const uint8_t *, uint8_t *));
 333  357  
 334  358  int gcm_encrypt_final(gcm_ctx_t *, crypto_data_t *, size_t,
 335  359      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 336      -    void (*copy_block)(uint8_t *, uint8_t *),
 337      -    void (*xor_block)(uint8_t *, uint8_t *));
      360 +    void (*copy_block)(const uint8_t *, uint8_t *),
      361 +    void (*xor_block)(const uint8_t *, uint8_t *));
 338  362  
 339  363  extern int ccm_decrypt_final(ccm_ctx_t *, crypto_data_t *, size_t,
 340  364      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 341      -    void (*copy_block)(uint8_t *, uint8_t *),
 342      -    void (*xor_block)(uint8_t *, uint8_t *));
      365 +    void (*copy_block)(const uint8_t *, uint8_t *),
      366 +    void (*xor_block)(const uint8_t *, uint8_t *));
 343  367  
 344  368  extern int gcm_decrypt_final(gcm_ctx_t *, crypto_data_t *, size_t,
 345  369      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 346      -    void (*xor_block)(uint8_t *, uint8_t *));
      370 +    void (*copy_block)(const uint8_t *, uint8_t *),
      371 +    void (*xor_block)(const uint8_t *, uint8_t *),
      372 +    int (*cipher_ctr)(const void *, const uint8_t *, uint8_t *, uint64_t,
      373 +    uint64_t *));
 347  374  
 348  375  extern int ctr_mode_final(ctr_ctx_t *, crypto_data_t *,
 349  376      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *));
 350  377  
 351  378  extern int cbc_init_ctx(cbc_ctx_t *, char *, size_t, size_t,
 352      -    void (*copy_block)(uint8_t *, uint64_t *));
      379 +    void (*copy_block)(const uint8_t *, uint64_t *));
 353  380  
 354  381  extern int ctr_init_ctx(ctr_ctx_t *, ulong_t, uint8_t *,
 355      -    void (*copy_block)(uint8_t *, uint8_t *));
      382 +    void (*copy_block)(const uint8_t *, uint8_t *));
 356  383  
 357  384  extern int ccm_init_ctx(ccm_ctx_t *, char *, int, boolean_t, size_t,
 358  385      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 359      -    void (*xor_block)(uint8_t *, uint8_t *));
      386 +    void (*xor_block)(const uint8_t *, uint8_t *));
 360  387  
 361  388  extern int gcm_init_ctx(gcm_ctx_t *, char *, size_t,
 362  389      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 363      -    void (*copy_block)(uint8_t *, uint8_t *),
 364      -    void (*xor_block)(uint8_t *, uint8_t *));
      390 +    void (*copy_block)(const uint8_t *, uint8_t *),
      391 +    void (*xor_block)(const uint8_t *, uint8_t *));
 365  392  
 366  393  extern int gmac_init_ctx(gcm_ctx_t *, char *, size_t,
 367  394      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
 368      -    void (*copy_block)(uint8_t *, uint8_t *),
 369      -    void (*xor_block)(uint8_t *, uint8_t *));
      395 +    void (*copy_block)(const uint8_t *, uint8_t *),
      396 +    void (*xor_block)(const uint8_t *, uint8_t *));
 370  397  
 371  398  extern void calculate_ccm_mac(ccm_ctx_t *, uint8_t *,
 372  399      int (*encrypt_block)(const void *, const uint8_t *, uint8_t *));
 373  400  
 374      -extern void gcm_mul(uint64_t *, uint64_t *, uint64_t *);
 375      -
 376  401  extern void crypto_init_ptrs(crypto_data_t *, void **, offset_t *);
 377      -extern void crypto_get_ptrs(crypto_data_t *, void **, offset_t *,
 378      -    uint8_t **, size_t *, uint8_t **, size_t);
 379  402  
 380  403  extern void *ecb_alloc_ctx(int);
 381  404  extern void *cbc_alloc_ctx(int);
 382  405  extern void *ctr_alloc_ctx(int);
 383  406  extern void *ccm_alloc_ctx(int);
 384  407  extern void *gcm_alloc_ctx(int);
 385  408  extern void *gmac_alloc_ctx(int);
 386  409  extern void crypto_free_mode_ctx(void *);
 387  410  extern void gcm_set_kmflag(gcm_ctx_t *, int);
 388  411  
      412 +#ifdef  INLINE_CRYPTO_GET_PTRS
      413 +/*
      414 + * Get pointers for where in the output to copy a block of encrypted or
      415 + * decrypted data.  The iov_or_mp argument stores a pointer to the current
      416 + * iovec or mp, and offset stores an offset into the current iovec or mp.
      417 + */
      418 +static inline void
      419 +crypto_get_ptrs(crypto_data_t *out, void **iov_or_mp, offset_t *current_offset,
      420 +    uint8_t **out_data_1, size_t *out_data_1_len, uint8_t **out_data_2,
      421 +    size_t amt)
      422 +{
      423 +        offset_t offset;
      424 +
      425 +        switch (out->cd_format) {
      426 +        case CRYPTO_DATA_RAW: {
      427 +                iovec_t *iov;
      428 +
      429 +                offset = *current_offset;
      430 +                iov = &out->cd_raw;
      431 +                if ((offset + amt) <= iov->iov_len) {
      432 +                        /* one block fits */
      433 +                        *out_data_1 = (uint8_t *)iov->iov_base + offset;
      434 +                        *out_data_1_len = amt;
      435 +                        *out_data_2 = NULL;
      436 +                        *current_offset = offset + amt;
      437 +                }
      438 +                break;
      439 +        }
      440 +
      441 +        case CRYPTO_DATA_UIO: {
      442 +                uio_t *uio = out->cd_uio;
      443 +                iovec_t *iov;
      444 +                offset_t offset;
      445 +                uintptr_t vec_idx;
      446 +                uint8_t *p;
      447 +
      448 +                offset = *current_offset;
      449 +                vec_idx = (uintptr_t)(*iov_or_mp);
      450 +                iov = &uio->uio_iov[vec_idx];
      451 +                p = (uint8_t *)iov->iov_base + offset;
      452 +                *out_data_1 = p;
      453 +
      454 +                if (offset + amt <= iov->iov_len) {
      455 +                        /* can fit one block into this iov */
      456 +                        *out_data_1_len = amt;
      457 +                        *out_data_2 = NULL;
      458 +                        *current_offset = offset + amt;
      459 +                } else {
      460 +                        /* one block spans two iovecs */
      461 +                        *out_data_1_len = iov->iov_len - offset;
      462 +                        if (vec_idx == uio->uio_iovcnt)
      463 +                                return;
      464 +                        vec_idx++;
      465 +                        iov = &uio->uio_iov[vec_idx];
      466 +                        *out_data_2 = (uint8_t *)iov->iov_base;
      467 +                        *current_offset = amt - *out_data_1_len;
      468 +                }
      469 +                *iov_or_mp = (void *)vec_idx;
      470 +                break;
      471 +        }
      472 +
      473 +        case CRYPTO_DATA_MBLK: {
      474 +                mblk_t *mp;
      475 +                uint8_t *p;
      476 +
      477 +                offset = *current_offset;
      478 +                mp = (mblk_t *)*iov_or_mp;
      479 +                p = mp->b_rptr + offset;
      480 +                *out_data_1 = p;
      481 +                if ((p + amt) <= mp->b_wptr) {
      482 +                        /* can fit one block into this mblk */
      483 +                        *out_data_1_len = amt;
      484 +                        *out_data_2 = NULL;
      485 +                        *current_offset = offset + amt;
      486 +                } else {
      487 +                        /* one block spans two mblks */
      488 +                        *out_data_1_len = _PTRDIFF(mp->b_wptr, p);
      489 +                        if ((mp = mp->b_cont) == NULL)
      490 +                                return;
      491 +                        *out_data_2 = mp->b_rptr;
      492 +                        *current_offset = (amt - *out_data_1_len);
      493 +                }
      494 +                *iov_or_mp = mp;
      495 +                break;
      496 +        }
      497 +        } /* end switch */
      498 +}
      499 +#endif  /* INLINE_CRYPTO_GET_PTRS */
      500 +
      501 +/*
      502 + * Checks whether a crypto_data_t object is composed of a single contiguous
      503 + * buffer. This is used in all fastpath detection code to avoid the
      504 + * possibility of having to do partial block splicing.
      505 + */
      506 +#define CRYPTO_DATA_IS_SINGLE_BLOCK(cd) \
      507 +        (cd != NULL && (cd->cd_format == CRYPTO_DATA_RAW || \
      508 +        (cd->cd_format == CRYPTO_DATA_UIO && cd->cd_uio->uio_iovcnt == 1) || \
      509 +        (cd->cd_format == CRYPTO_DATA_MBLK && cd->cd_mp->b_next == NULL)))
      510 +
      511 +/*
      512 + * Returns the first contiguous data buffer in a crypto_data_t object.
      513 + */
      514 +#define CRYPTO_DATA_FIRST_BLOCK(cd) \
      515 +        (cd->cd_format == CRYPTO_DATA_RAW ? \
      516 +        (void *)(cd->cd_raw.iov_base + cd->cd_offset) : \
      517 +        (cd->cd_format == CRYPTO_DATA_UIO ? \
      518 +        (void *)(cd->cd_uio->uio_iov[0].iov_base + cd->cd_offset) : \
      519 +        (void *)(cd->cd_mp->b_rptr + cd->cd_offset)))
      520 +
 389  521  #ifdef  __cplusplus
 390  522  }
 391  523  #endif
 392  524  
 393  525  #endif  /* _COMMON_CRYPTO_MODES_H */
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX