illumos-gate Wdiff usr/src/common/crypto/sha1/sha1.c

Print this page

3364 dboot should check boot archive integrity
Reviewed by: Hans Rosenfeld <hans.rosenfeld@nexenta.com>
Reviewed by: Dan McDonald <danmcd@nexenta.com>
Reviewed by: Richard Lowe <richlowe@richlowe.net>
Reviewed by: Garrett D'Amore <garrett@damore.org>

Split	Close
Expand all
Collapse all

          --- old/usr/src/common/crypto/sha1/sha1.c
          +++ new/usr/src/common/crypto/sha1/sha1.c

   1    1  /*
   2    2   * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
   3    3   * Use is subject to license terms.
   4    4   */
   5    5  
   6    6  /*
   7    7   * The basic framework for this code came from the reference
   8    8   * implementation for MD5.  That implementation is Copyright (C)
   9    9   * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved.
  10   10   *
  11   11   * License to copy and use this software is granted provided that it
  12   12   * is identified as the "RSA Data Security, Inc. MD5 Message-Digest
  13   13   * Algorithm" in all material mentioning or referencing this software
  14   14   * or this function.
  15   15   *
  16   16   * License is also granted to make and use derivative works provided
  17   17   * that such works are identified as "derived from the RSA Data
  18   18   * Security, Inc. MD5 Message-Digest Algorithm" in all material
  19   19   * mentioning or referencing the derived work.
  20   20   *
  21   21   * RSA Data Security, Inc. makes no representations concerning either
  22   22   * the merchantability of this software or the suitability of this
  23   23   * software for any particular purpose. It is provided "as is"
  24   24   * without express or implied warranty of any kind.

↓ open down ↓

24 lines elided

↑ open up ↑

  25   25   *
  26   26   * These notices must be retained in any copies of any part of this
  27   27   * documentation and/or software.
  28   28   *
  29   29   * NOTE: Cleaned-up and optimized, version of SHA1, based on the FIPS 180-1
  30   30   * standard, available at http://www.itl.nist.gov/fipspubs/fip180-1.htm
  31   31   * Not as fast as one would like -- further optimizations are encouraged
  32   32   * and appreciated.
  33   33   */
  34   34  
  35      -#ifndef _KERNEL
       35 +#if !defined(_KERNEL) && !defined(_BOOT)
  36   36  #include <stdint.h>
  37   37  #include <strings.h>
  38   38  #include <stdlib.h>
  39   39  #include <errno.h>
  40   40  #include <sys/systeminfo.h>
  41      -#endif  /* !_KERNEL */
       41 +#endif  /* !_KERNEL && !_BOOT */
  42   42  
  43   43  #include <sys/types.h>
  44   44  #include <sys/param.h>
  45   45  #include <sys/systm.h>
  46   46  #include <sys/sysmacros.h>
  47   47  #include <sys/sha1.h>
  48   48  #include <sys/sha1_consts.h>
  49   49  
  50   50  #ifdef _LITTLE_ENDIAN
  51   51  #include <sys/byteorder.h>
  52   52  #define HAVE_HTONL
  53   53  #endif
  54   54  
       55 +#ifdef  _BOOT
       56 +#define bcopy(_s, _d, _l)       ((void) memcpy((_d), (_s), (_l)))
       57 +#define bzero(_m, _l)           ((void) memset((_m), 0, (_l)))
       58 +#endif
       59 +
  55   60  static void Encode(uint8_t *, const uint32_t *, size_t);
  56   61  
  57   62  #if     defined(__sparc)
  58   63  
  59   64  #define SHA1_TRANSFORM(ctx, in) \
  60   65          SHA1Transform((ctx)->state[0], (ctx)->state[1], (ctx)->state[2], \
  61   66                  (ctx)->state[3], (ctx)->state[4], (ctx), (in))
  62   67  
  63   68  static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t,
  64   69      SHA1_CTX *, const uint8_t *);

  65   70  
  66   71  #elif   defined(__amd64)
  67   72  
  68   73  #define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1)
  69   74  #define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \
  70   75                  (in), (num))
  71   76  
  72   77  void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks);
  73   78  
  74   79  #else
  75   80  
  76   81  #define SHA1_TRANSFORM(ctx, in) SHA1Transform((ctx), (in))
  77   82  
  78   83  static void SHA1Transform(SHA1_CTX *, const uint8_t *);
  79   84  
  80   85  #endif
  81   86  
  82   87  
  83   88  static uint8_t PADDING[64] = { 0x80, /* all zeros */ };
  84   89  
  85   90  /*
  86   91   * F, G, and H are the basic SHA1 functions.
  87   92   */
  88   93  #define F(b, c, d)      (((b) & (c)) | ((~b) & (d)))
  89   94  #define G(b, c, d)      ((b) ^ (c) ^ (d))
  90   95  #define H(b, c, d)      (((b) & (c)) | (((b)|(c)) & (d)))
  91   96  
  92   97  /*
  93   98   * ROTATE_LEFT rotates x left n bits.
  94   99   */
  95  100  
  96  101  #if     defined(__GNUC__) && defined(_LP64)
  97  102  static __inline__ uint64_t
  98  103  ROTATE_LEFT(uint64_t value, uint32_t n)
  99  104  {
 100  105          uint32_t t32;
 101  106  
 102  107          t32 = (uint32_t)value;
 103  108          return ((t32 << n) | (t32 >> (32 - n)));
 104  109  }
 105  110  
 106  111  #else
 107  112  
 108  113  #define ROTATE_LEFT(x, n)       \
 109  114          (((x) << (n)) | ((x) >> ((sizeof (x) * NBBY)-(n))))
 110  115  
 111  116  #endif
 112  117  
 113  118  
 114  119  /*
 115  120   * SHA1Init()
 116  121   *
 117  122   * purpose: initializes the sha1 context and begins and sha1 digest operation
 118  123   *   input: SHA1_CTX *  : the context to initializes.
 119  124   *  output: void
 120  125   */
 121  126  
 122  127  void
 123  128  SHA1Init(SHA1_CTX *ctx)
 124  129  {
 125  130          ctx->count[0] = ctx->count[1] = 0;
 126  131  
 127  132          /*
 128  133           * load magic initialization constants. Tell lint
 129  134           * that these constants are unsigned by using U.
 130  135           */
 131  136  
 132  137          ctx->state[0] = 0x67452301U;
 133  138          ctx->state[1] = 0xefcdab89U;
 134  139          ctx->state[2] = 0x98badcfeU;
 135  140          ctx->state[3] = 0x10325476U;
 136  141          ctx->state[4] = 0xc3d2e1f0U;
 137  142  }
 138  143  
 139  144  #ifdef VIS_SHA1
 140  145  #ifdef _KERNEL
 141  146  
 142  147  #include <sys/regset.h>
 143  148  #include <sys/vis.h>
 144  149  #include <sys/fpu/fpusystm.h>
 145  150  
 146  151  /* the alignment for block stores to save fp registers */
 147  152  #define VIS_ALIGN       (64)
 148  153  
 149  154  extern int sha1_savefp(kfpu_t *, int);
 150  155  extern void sha1_restorefp(kfpu_t *);
 151  156  
 152  157  uint32_t        vis_sha1_svfp_threshold = 128;
 153  158  
 154  159  #endif /* _KERNEL */
 155  160  
 156  161  /*
 157  162   * VIS SHA-1 consts.
 158  163   */
 159  164  static uint64_t VIS[] = {
 160  165          0x8000000080000000ULL,
 161  166          0x0002000200020002ULL,
 162  167          0x5a8279996ed9eba1ULL,
 163  168          0x8f1bbcdcca62c1d6ULL,
 164  169          0x012389ab456789abULL};
 165  170  
 166  171  extern void SHA1TransformVIS(uint64_t *, uint32_t *, uint32_t *, uint64_t *);
 167  172  
 168  173  
 169  174  /*
 170  175   * SHA1Update()
 171  176   *
 172  177   * purpose: continues an sha1 digest operation, using the message block
 173  178   *          to update the context.
 174  179   *   input: SHA1_CTX *  : the context to update
 175  180   *          void *      : the message block
 176  181   *          size_t    : the length of the message block in bytes
 177  182   *  output: void
 178  183   */
 179  184  
 180  185  void
 181  186  SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len)
 182  187  {
 183  188          uint32_t i, buf_index, buf_len;
 184  189          uint64_t X0[40], input64[8];
 185  190          const uint8_t *input = inptr;
 186  191  #ifdef _KERNEL
 187  192          int usevis = 0;
 188  193  #else
 189  194          int usevis = 1;
 190  195  #endif /* _KERNEL */
 191  196  
 192  197          /* check for noop */
 193  198          if (input_len == 0)
 194  199                  return;
 195  200  
 196  201          /* compute number of bytes mod 64 */
 197  202          buf_index = (ctx->count[1] >> 3) & 0x3F;
 198  203  
 199  204          /* update number of bits */
 200  205          if ((ctx->count[1] += (input_len << 3)) < (input_len << 3))
 201  206                  ctx->count[0]++;
 202  207  
 203  208          ctx->count[0] += (input_len >> 29);
 204  209  
 205  210          buf_len = 64 - buf_index;
 206  211  
 207  212          /* transform as many times as possible */
 208  213          i = 0;
 209  214          if (input_len >= buf_len) {
 210  215  #ifdef _KERNEL
 211  216                  kfpu_t *fpu;
 212  217                  if (fpu_exists) {
 213  218                          uint8_t fpua[sizeof (kfpu_t) + GSR_SIZE + VIS_ALIGN];
 214  219                          uint32_t len = (input_len + buf_index) & ~0x3f;
 215  220                          int svfp_ok;
 216  221  
 217  222                          fpu = (kfpu_t *)P2ROUNDUP((uintptr_t)fpua, 64);
 218  223                          svfp_ok = ((len >= vis_sha1_svfp_threshold) ? 1 : 0);
 219  224                          usevis = fpu_exists && sha1_savefp(fpu, svfp_ok);
 220  225                  } else {
 221  226                          usevis = 0;
 222  227                  }
 223  228  #endif /* _KERNEL */
 224  229  
 225  230                  /*
 226  231                   * general optimization:
 227  232                   *
 228  233                   * only do initial bcopy() and SHA1Transform() if
 229  234                   * buf_index != 0.  if buf_index == 0, we're just
 230  235                   * wasting our time doing the bcopy() since there
 231  236                   * wasn't any data left over from a previous call to
 232  237                   * SHA1Update().
 233  238                   */
 234  239  
 235  240                  if (buf_index) {
 236  241                          bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
 237  242                          if (usevis) {
 238  243                                  SHA1TransformVIS(X0,
 239  244                                      ctx->buf_un.buf32,
 240  245                                      &ctx->state[0], VIS);
 241  246                          } else {
 242  247                                  SHA1_TRANSFORM(ctx, ctx->buf_un.buf8);
 243  248                          }
 244  249                          i = buf_len;
 245  250                  }
 246  251  
 247  252                  /*
 248  253                   * VIS SHA-1: uses the VIS 1.0 instructions to accelerate
 249  254                   * SHA-1 processing. This is achieved by "offloading" the
 250  255                   * computation of the message schedule (MS) to the VIS units.
 251  256                   * This allows the VIS computation of the message schedule
 252  257                   * to be performed in parallel with the standard integer
 253  258                   * processing of the remainder of the SHA-1 computation.
 254  259                   * performance by up to around 1.37X, compared to an optimized
 255  260                   * integer-only implementation.
 256  261                   *
 257  262                   * The VIS implementation of SHA1Transform has a different API
 258  263                   * to the standard integer version:
 259  264                   *
 260  265                   * void SHA1TransformVIS(
 261  266                   *       uint64_t *, // Pointer to MS for ith block
 262  267                   *       uint32_t *, // Pointer to ith block of message data
 263  268                   *       uint32_t *, // Pointer to SHA state i.e ctx->state
 264  269                   *       uint64_t *, // Pointer to various VIS constants
 265  270                   * )
 266  271                   *
 267  272                   * Note: the message data must by 4-byte aligned.
 268  273                   *
 269  274                   * Function requires VIS 1.0 support.
 270  275                   *
 271  276                   * Handling is provided to deal with arbitrary byte alingment
 272  277                   * of the input data but the performance gains are reduced
 273  278                   * for alignments other than 4-bytes.
 274  279                   */
 275  280                  if (usevis) {
 276  281                          if (!IS_P2ALIGNED(&input[i], sizeof (uint32_t))) {
 277  282                                  /*
 278  283                                   * Main processing loop - input misaligned
 279  284                                   */
 280  285                                  for (; i + 63 < input_len; i += 64) {
 281  286                                          bcopy(&input[i], input64, 64);
 282  287                                          SHA1TransformVIS(X0,
 283  288                                              (uint32_t *)input64,
 284  289                                              &ctx->state[0], VIS);
 285  290                                  }
 286  291                          } else {
 287  292                                  /*
 288  293                                   * Main processing loop - input 8-byte aligned
 289  294                                   */
 290  295                                  for (; i + 63 < input_len; i += 64) {
 291  296                                          SHA1TransformVIS(X0,
 292  297                                              /* LINTED E_BAD_PTR_CAST_ALIGN */
 293  298                                              (uint32_t *)&input[i], /* CSTYLED */
 294  299                                              &ctx->state[0], VIS);
 295  300                                  }
 296  301  
 297  302                          }
 298  303  #ifdef _KERNEL
 299  304                          sha1_restorefp(fpu);
 300  305  #endif /* _KERNEL */
 301  306                  } else {
 302  307                          for (; i + 63 < input_len; i += 64) {
 303  308                                  SHA1_TRANSFORM(ctx, &input[i]);
 304  309                          }
 305  310                  }
 306  311  
 307  312                  /*
 308  313                   * general optimization:
 309  314                   *
 310  315                   * if i and input_len are the same, return now instead
 311  316                   * of calling bcopy(), since the bcopy() in this case
 312  317                   * will be an expensive nop.
 313  318                   */
 314  319  
 315  320                  if (input_len == i)
 316  321                          return;
 317  322  
 318  323                  buf_index = 0;
 319  324          }
 320  325  
 321  326          /* buffer remaining input */
 322  327          bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
 323  328  }
 324  329  
 325  330  #else /* VIS_SHA1 */
 326  331  
 327  332  void
 328  333  SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len)
 329  334  {
 330  335          uint32_t i, buf_index, buf_len;
 331  336          const uint8_t *input = inptr;
 332  337  #if defined(__amd64)
 333  338          uint32_t        block_count;
 334  339  #endif  /* __amd64 */
 335  340  
 336  341          /* check for noop */
 337  342          if (input_len == 0)
 338  343                  return;
 339  344  
 340  345          /* compute number of bytes mod 64 */
 341  346          buf_index = (ctx->count[1] >> 3) & 0x3F;
 342  347  
 343  348          /* update number of bits */
 344  349          if ((ctx->count[1] += (input_len << 3)) < (input_len << 3))
 345  350                  ctx->count[0]++;
 346  351  
 347  352          ctx->count[0] += (input_len >> 29);
 348  353  
 349  354          buf_len = 64 - buf_index;
 350  355  
 351  356          /* transform as many times as possible */
 352  357          i = 0;
 353  358          if (input_len >= buf_len) {
 354  359  
 355  360                  /*
 356  361                   * general optimization:
 357  362                   *
 358  363                   * only do initial bcopy() and SHA1Transform() if
 359  364                   * buf_index != 0.  if buf_index == 0, we're just
 360  365                   * wasting our time doing the bcopy() since there
 361  366                   * wasn't any data left over from a previous call to
 362  367                   * SHA1Update().
 363  368                   */
 364  369  
 365  370                  if (buf_index) {
 366  371                          bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len);
 367  372                          SHA1_TRANSFORM(ctx, ctx->buf_un.buf8);
 368  373                          i = buf_len;
 369  374                  }
 370  375  
 371  376  #if !defined(__amd64)
 372  377                  for (; i + 63 < input_len; i += 64)
 373  378                          SHA1_TRANSFORM(ctx, &input[i]);
 374  379  #else
 375  380                  block_count = (input_len - i) >> 6;
 376  381                  if (block_count > 0) {
 377  382                          SHA1_TRANSFORM_BLOCKS(ctx, &input[i], block_count);
 378  383                          i += block_count << 6;
 379  384                  }
 380  385  #endif  /* !__amd64 */
 381  386  
 382  387                  /*
 383  388                   * general optimization:
 384  389                   *
 385  390                   * if i and input_len are the same, return now instead
 386  391                   * of calling bcopy(), since the bcopy() in this case
 387  392                   * will be an expensive nop.
 388  393                   */
 389  394  
 390  395                  if (input_len == i)
 391  396                          return;
 392  397  
 393  398                  buf_index = 0;
 394  399          }
 395  400  
 396  401          /* buffer remaining input */
 397  402          bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i);
 398  403  }
 399  404  
 400  405  #endif /* VIS_SHA1 */
 401  406  
 402  407  /*
 403  408   * SHA1Final()
 404  409   *
 405  410   * purpose: ends an sha1 digest operation, finalizing the message digest and
 406  411   *          zeroing the context.
 407  412   *   input: uchar_t *   : A buffer to store the digest.
 408  413   *                      : The function actually uses void* because many
 409  414   *                      : callers pass things other than uchar_t here.
 410  415   *          SHA1_CTX *  : the context to finalize, save, and zero
 411  416   *  output: void
 412  417   */
 413  418  
 414  419  void
 415  420  SHA1Final(void *digest, SHA1_CTX *ctx)
 416  421  {
 417  422          uint8_t         bitcount_be[sizeof (ctx->count)];
 418  423          uint32_t        index = (ctx->count[1] >> 3) & 0x3f;
 419  424  
 420  425          /* store bit count, big endian */
 421  426          Encode(bitcount_be, ctx->count, sizeof (bitcount_be));
 422  427  
 423  428          /* pad out to 56 mod 64 */
 424  429          SHA1Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index);
 425  430  
 426  431          /* append length (before padding) */
 427  432          SHA1Update(ctx, bitcount_be, sizeof (bitcount_be));
 428  433  
 429  434          /* store state in digest */
 430  435          Encode(digest, ctx->state, sizeof (ctx->state));
 431  436  
 432  437          /* zeroize sensitive information */
 433  438          bzero(ctx, sizeof (*ctx));
 434  439  }
 435  440  
 436  441  
 437  442  #if !defined(__amd64)
 438  443  
 439  444  typedef uint32_t sha1word;
 440  445  
 441  446  /*
 442  447   * sparc optimization:
 443  448   *
 444  449   * on the sparc, we can load big endian 32-bit data easily.  note that
 445  450   * special care must be taken to ensure the address is 32-bit aligned.
 446  451   * in the interest of speed, we don't check to make sure, since
 447  452   * careful programming can guarantee this for us.
 448  453   */
 449  454  
 450  455  #if     defined(_BIG_ENDIAN)
 451  456  #define LOAD_BIG_32(addr)       (*(uint32_t *)(addr))
 452  457  
 453  458  #elif   defined(HAVE_HTONL)
 454  459  #define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr)))
 455  460  
 456  461  #else
 457  462  /* little endian -- will work on big endian, but slowly */
 458  463  #define LOAD_BIG_32(addr)       \
 459  464          (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3])
 460  465  #endif  /* _BIG_ENDIAN */
 461  466  
 462  467  /*
 463  468   * SHA1Transform()
 464  469   */
 465  470  #if     defined(W_ARRAY)
 466  471  #define W(n) w[n]
 467  472  #else   /* !defined(W_ARRAY) */
 468  473  #define W(n) w_ ## n
 469  474  #endif  /* !defined(W_ARRAY) */
 470  475  
 471  476  
 472  477  #if     defined(__sparc)
 473  478  
 474  479  /*
 475  480   * sparc register window optimization:
 476  481   *
 477  482   * `a', `b', `c', `d', and `e' are passed into SHA1Transform
 478  483   * explicitly since it increases the number of registers available to
 479  484   * the compiler.  under this scheme, these variables can be held in
 480  485   * %i0 - %i4, which leaves more local and out registers available.
 481  486   *
 482  487   * purpose: sha1 transformation -- updates the digest based on `block'
 483  488   *   input: uint32_t    : bytes  1 -  4 of the digest
 484  489   *          uint32_t    : bytes  5 -  8 of the digest
 485  490   *          uint32_t    : bytes  9 - 12 of the digest
 486  491   *          uint32_t    : bytes 12 - 16 of the digest
 487  492   *          uint32_t    : bytes 16 - 20 of the digest
 488  493   *          SHA1_CTX *  : the context to update
 489  494   *          uint8_t [64]: the block to use to update the digest
 490  495   *  output: void
 491  496   */
 492  497  
 493  498  void
 494  499  SHA1Transform(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e,
 495  500      SHA1_CTX *ctx, const uint8_t blk[64])
 496  501  {
 497  502          /*
 498  503           * sparc optimization:
 499  504           *
 500  505           * while it is somewhat counter-intuitive, on sparc, it is
 501  506           * more efficient to place all the constants used in this
 502  507           * function in an array and load the values out of the array
 503  508           * than to manually load the constants.  this is because
 504  509           * setting a register to a 32-bit value takes two ops in most
 505  510           * cases: a `sethi' and an `or', but loading a 32-bit value
 506  511           * from memory only takes one `ld' (or `lduw' on v9).  while
 507  512           * this increases memory usage, the compiler can find enough
 508  513           * other things to do while waiting to keep the pipeline does
 509  514           * not stall.  additionally, it is likely that many of these
 510  515           * constants are cached so that later accesses do not even go
 511  516           * out to the bus.
 512  517           *
 513  518           * this array is declared `static' to keep the compiler from
 514  519           * having to bcopy() this array onto the stack frame of
 515  520           * SHA1Transform() each time it is called -- which is
 516  521           * unacceptably expensive.
 517  522           *
 518  523           * the `const' is to ensure that callers are good citizens and
 519  524           * do not try to munge the array.  since these routines are
 520  525           * going to be called from inside multithreaded kernelland,
 521  526           * this is a good safety check. -- `sha1_consts' will end up in
 522  527           * .rodata.
 523  528           *
 524  529           * unfortunately, loading from an array in this manner hurts
 525  530           * performance under Intel.  So, there is a macro,
 526  531           * SHA1_CONST(), used in SHA1Transform(), that either expands to
 527  532           * a reference to this array, or to the actual constant,
 528  533           * depending on what platform this code is compiled for.
 529  534           */
 530  535  
 531  536          static const uint32_t sha1_consts[] = {
 532  537                  SHA1_CONST_0, SHA1_CONST_1, SHA1_CONST_2, SHA1_CONST_3
 533  538          };
 534  539  
 535  540          /*
 536  541           * general optimization:
 537  542           *
 538  543           * use individual integers instead of using an array.  this is a
 539  544           * win, although the amount it wins by seems to vary quite a bit.
 540  545           */
 541  546  
 542  547          uint32_t        w_0, w_1, w_2,  w_3,  w_4,  w_5,  w_6,  w_7;
 543  548          uint32_t        w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
 544  549  
 545  550          /*
 546  551           * sparc optimization:
 547  552           *
 548  553           * if `block' is already aligned on a 4-byte boundary, use
 549  554           * LOAD_BIG_32() directly.  otherwise, bcopy() into a
 550  555           * buffer that *is* aligned on a 4-byte boundary and then do
 551  556           * the LOAD_BIG_32() on that buffer.  benchmarks have shown
 552  557           * that using the bcopy() is better than loading the bytes
 553  558           * individually and doing the endian-swap by hand.
 554  559           *
 555  560           * even though it's quite tempting to assign to do:
 556  561           *
 557  562           * blk = bcopy(ctx->buf_un.buf32, blk, sizeof (ctx->buf_un.buf32));
 558  563           *
 559  564           * and only have one set of LOAD_BIG_32()'s, the compiler
 560  565           * *does not* like that, so please resist the urge.
 561  566           */
 562  567  
 563  568          if ((uintptr_t)blk & 0x3) {             /* not 4-byte aligned? */
 564  569                  bcopy(blk, ctx->buf_un.buf32,  sizeof (ctx->buf_un.buf32));
 565  570                  w_15 = LOAD_BIG_32(ctx->buf_un.buf32 + 15);
 566  571                  w_14 = LOAD_BIG_32(ctx->buf_un.buf32 + 14);
 567  572                  w_13 = LOAD_BIG_32(ctx->buf_un.buf32 + 13);
 568  573                  w_12 = LOAD_BIG_32(ctx->buf_un.buf32 + 12);
 569  574                  w_11 = LOAD_BIG_32(ctx->buf_un.buf32 + 11);
 570  575                  w_10 = LOAD_BIG_32(ctx->buf_un.buf32 + 10);
 571  576                  w_9  = LOAD_BIG_32(ctx->buf_un.buf32 +  9);
 572  577                  w_8  = LOAD_BIG_32(ctx->buf_un.buf32 +  8);
 573  578                  w_7  = LOAD_BIG_32(ctx->buf_un.buf32 +  7);
 574  579                  w_6  = LOAD_BIG_32(ctx->buf_un.buf32 +  6);
 575  580                  w_5  = LOAD_BIG_32(ctx->buf_un.buf32 +  5);
 576  581                  w_4  = LOAD_BIG_32(ctx->buf_un.buf32 +  4);
 577  582                  w_3  = LOAD_BIG_32(ctx->buf_un.buf32 +  3);
 578  583                  w_2  = LOAD_BIG_32(ctx->buf_un.buf32 +  2);
 579  584                  w_1  = LOAD_BIG_32(ctx->buf_un.buf32 +  1);
 580  585                  w_0  = LOAD_BIG_32(ctx->buf_un.buf32 +  0);
 581  586          } else {
 582  587                  /* LINTED E_BAD_PTR_CAST_ALIGN */
 583  588                  w_15 = LOAD_BIG_32(blk + 60);
 584  589                  /* LINTED E_BAD_PTR_CAST_ALIGN */
 585  590                  w_14 = LOAD_BIG_32(blk + 56);
 586  591                  /* LINTED E_BAD_PTR_CAST_ALIGN */
 587  592                  w_13 = LOAD_BIG_32(blk + 52);
 588  593                  /* LINTED E_BAD_PTR_CAST_ALIGN */
 589  594                  w_12 = LOAD_BIG_32(blk + 48);
 590  595                  /* LINTED E_BAD_PTR_CAST_ALIGN */
 591  596                  w_11 = LOAD_BIG_32(blk + 44);
 592  597                  /* LINTED E_BAD_PTR_CAST_ALIGN */
 593  598                  w_10 = LOAD_BIG_32(blk + 40);
 594  599                  /* LINTED E_BAD_PTR_CAST_ALIGN */
 595  600                  w_9  = LOAD_BIG_32(blk + 36);
 596  601                  /* LINTED E_BAD_PTR_CAST_ALIGN */
 597  602                  w_8  = LOAD_BIG_32(blk + 32);
 598  603                  /* LINTED E_BAD_PTR_CAST_ALIGN */
 599  604                  w_7  = LOAD_BIG_32(blk + 28);
 600  605                  /* LINTED E_BAD_PTR_CAST_ALIGN */
 601  606                  w_6  = LOAD_BIG_32(blk + 24);
 602  607                  /* LINTED E_BAD_PTR_CAST_ALIGN */
 603  608                  w_5  = LOAD_BIG_32(blk + 20);
 604  609                  /* LINTED E_BAD_PTR_CAST_ALIGN */
 605  610                  w_4  = LOAD_BIG_32(blk + 16);
 606  611                  /* LINTED E_BAD_PTR_CAST_ALIGN */
 607  612                  w_3  = LOAD_BIG_32(blk + 12);
 608  613                  /* LINTED E_BAD_PTR_CAST_ALIGN */
 609  614                  w_2  = LOAD_BIG_32(blk +  8);
 610  615                  /* LINTED E_BAD_PTR_CAST_ALIGN */
 611  616                  w_1  = LOAD_BIG_32(blk +  4);
 612  617                  /* LINTED E_BAD_PTR_CAST_ALIGN */
 613  618                  w_0  = LOAD_BIG_32(blk +  0);
 614  619          }
 615  620  #else   /* !defined(__sparc) */
 616  621  
 617  622  void /* CSTYLED */
 618  623  SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64])
 619  624  {
 620  625          /* CSTYLED */
 621  626          sha1word a = ctx->state[0];
 622  627          sha1word b = ctx->state[1];
 623  628          sha1word c = ctx->state[2];
 624  629          sha1word d = ctx->state[3];
 625  630          sha1word e = ctx->state[4];
 626  631  
 627  632  #if     defined(W_ARRAY)
 628  633          sha1word        w[16];
 629  634  #else   /* !defined(W_ARRAY) */
 630  635          sha1word        w_0, w_1, w_2,  w_3,  w_4,  w_5,  w_6,  w_7;
 631  636          sha1word        w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15;
 632  637  #endif  /* !defined(W_ARRAY) */
 633  638  
 634  639          W(0)  = LOAD_BIG_32((void *)(blk +  0));
 635  640          W(1)  = LOAD_BIG_32((void *)(blk +  4));
 636  641          W(2)  = LOAD_BIG_32((void *)(blk +  8));
 637  642          W(3)  = LOAD_BIG_32((void *)(blk + 12));
 638  643          W(4)  = LOAD_BIG_32((void *)(blk + 16));
 639  644          W(5)  = LOAD_BIG_32((void *)(blk + 20));
 640  645          W(6)  = LOAD_BIG_32((void *)(blk + 24));
 641  646          W(7)  = LOAD_BIG_32((void *)(blk + 28));
 642  647          W(8)  = LOAD_BIG_32((void *)(blk + 32));
 643  648          W(9)  = LOAD_BIG_32((void *)(blk + 36));
 644  649          W(10) = LOAD_BIG_32((void *)(blk + 40));
 645  650          W(11) = LOAD_BIG_32((void *)(blk + 44));
 646  651          W(12) = LOAD_BIG_32((void *)(blk + 48));
 647  652          W(13) = LOAD_BIG_32((void *)(blk + 52));
 648  653          W(14) = LOAD_BIG_32((void *)(blk + 56));
 649  654          W(15) = LOAD_BIG_32((void *)(blk + 60));
 650  655  
 651  656  #endif  /* !defined(__sparc) */
 652  657  
 653  658          /*
 654  659           * general optimization:
 655  660           *
 656  661           * even though this approach is described in the standard as
 657  662           * being slower algorithmically, it is 30-40% faster than the
 658  663           * "faster" version under SPARC, because this version has more
 659  664           * of the constraints specified at compile-time and uses fewer
 660  665           * variables (and therefore has better register utilization)
 661  666           * than its "speedier" brother.  (i've tried both, trust me)
 662  667           *
 663  668           * for either method given in the spec, there is an "assignment"
 664  669           * phase where the following takes place:
 665  670           *
 666  671           *      tmp = (main_computation);
 667  672           *      e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp;
 668  673           *
 669  674           * we can make the algorithm go faster by not doing this work,
 670  675           * but just pretending that `d' is now `e', etc. this works
 671  676           * really well and obviates the need for a temporary variable.
 672  677           * however, we still explicitly perform the rotate action,
 673  678           * since it is cheaper on SPARC to do it once than to have to
 674  679           * do it over and over again.
 675  680           */
 676  681  
 677  682          /* round 1 */
 678  683          e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(0) + SHA1_CONST(0); /* 0 */
 679  684          b = ROTATE_LEFT(b, 30);
 680  685  
 681  686          d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(1) + SHA1_CONST(0); /* 1 */
 682  687          a = ROTATE_LEFT(a, 30);
 683  688  
 684  689          c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(2) + SHA1_CONST(0); /* 2 */
 685  690          e = ROTATE_LEFT(e, 30);
 686  691  
 687  692          b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(3) + SHA1_CONST(0); /* 3 */
 688  693          d = ROTATE_LEFT(d, 30);
 689  694  
 690  695          a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(4) + SHA1_CONST(0); /* 4 */
 691  696          c = ROTATE_LEFT(c, 30);
 692  697  
 693  698          e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(5) + SHA1_CONST(0); /* 5 */
 694  699          b = ROTATE_LEFT(b, 30);
 695  700  
 696  701          d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(6) + SHA1_CONST(0); /* 6 */
 697  702          a = ROTATE_LEFT(a, 30);
 698  703  
 699  704          c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(7) + SHA1_CONST(0); /* 7 */
 700  705          e = ROTATE_LEFT(e, 30);
 701  706  
 702  707          b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(8) + SHA1_CONST(0); /* 8 */
 703  708          d = ROTATE_LEFT(d, 30);
 704  709  
 705  710          a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(9) + SHA1_CONST(0); /* 9 */
 706  711          c = ROTATE_LEFT(c, 30);
 707  712  
 708  713          e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(10) + SHA1_CONST(0); /* 10 */
 709  714          b = ROTATE_LEFT(b, 30);
 710  715  
 711  716          d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(11) + SHA1_CONST(0); /* 11 */
 712  717          a = ROTATE_LEFT(a, 30);
 713  718  
 714  719          c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(12) + SHA1_CONST(0); /* 12 */
 715  720          e = ROTATE_LEFT(e, 30);
 716  721  
 717  722          b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(13) + SHA1_CONST(0); /* 13 */
 718  723          d = ROTATE_LEFT(d, 30);
 719  724  
 720  725          a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(14) + SHA1_CONST(0); /* 14 */
 721  726          c = ROTATE_LEFT(c, 30);
 722  727  
 723  728          e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(15) + SHA1_CONST(0); /* 15 */
 724  729          b = ROTATE_LEFT(b, 30);
 725  730  
 726  731          W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1);            /* 16 */
 727  732          d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(0) + SHA1_CONST(0);
 728  733          a = ROTATE_LEFT(a, 30);
 729  734  
 730  735          W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1);            /* 17 */
 731  736          c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(1) + SHA1_CONST(0);
 732  737          e = ROTATE_LEFT(e, 30);
 733  738  
 734  739          W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1);   /* 18 */
 735  740          b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(2) + SHA1_CONST(0);
 736  741          d = ROTATE_LEFT(d, 30);
 737  742  
 738  743          W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1);            /* 19 */
 739  744          a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(3) + SHA1_CONST(0);
 740  745          c = ROTATE_LEFT(c, 30);
 741  746  
 742  747          /* round 2 */
 743  748          W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1);            /* 20 */
 744  749          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(4) + SHA1_CONST(1);
 745  750          b = ROTATE_LEFT(b, 30);
 746  751  
 747  752          W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1);            /* 21 */
 748  753          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(5) + SHA1_CONST(1);
 749  754          a = ROTATE_LEFT(a, 30);
 750  755  
 751  756          W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1);            /* 22 */
 752  757          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(6) + SHA1_CONST(1);
 753  758          e = ROTATE_LEFT(e, 30);
 754  759  
 755  760          W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1);            /* 23 */
 756  761          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(7) + SHA1_CONST(1);
 757  762          d = ROTATE_LEFT(d, 30);
 758  763  
 759  764          W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1);            /* 24 */
 760  765          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(8) + SHA1_CONST(1);
 761  766          c = ROTATE_LEFT(c, 30);
 762  767  
 763  768          W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1);            /* 25 */
 764  769          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(9) + SHA1_CONST(1);
 765  770          b = ROTATE_LEFT(b, 30);
 766  771  
 767  772          W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1);  /* 26 */
 768  773          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(10) + SHA1_CONST(1);
 769  774          a = ROTATE_LEFT(a, 30);
 770  775  
 771  776          W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1);  /* 27 */
 772  777          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(11) + SHA1_CONST(1);
 773  778          e = ROTATE_LEFT(e, 30);
 774  779  
 775  780          W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1);  /* 28 */
 776  781          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(12) + SHA1_CONST(1);
 777  782          d = ROTATE_LEFT(d, 30);
 778  783  
 779  784          W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 29 */
 780  785          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(13) + SHA1_CONST(1);
 781  786          c = ROTATE_LEFT(c, 30);
 782  787  
 783  788          W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1);  /* 30 */
 784  789          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(14) + SHA1_CONST(1);
 785  790          b = ROTATE_LEFT(b, 30);
 786  791  
 787  792          W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1);  /* 31 */
 788  793          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(15) + SHA1_CONST(1);
 789  794          a = ROTATE_LEFT(a, 30);
 790  795  
 791  796          W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1);            /* 32 */
 792  797          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(0) + SHA1_CONST(1);
 793  798          e = ROTATE_LEFT(e, 30);
 794  799  
 795  800          W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1);            /* 33 */
 796  801          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(1) + SHA1_CONST(1);
 797  802          d = ROTATE_LEFT(d, 30);
 798  803  
 799  804          W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1);   /* 34 */
 800  805          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(2) + SHA1_CONST(1);
 801  806          c = ROTATE_LEFT(c, 30);
 802  807  
 803  808          W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1);            /* 35 */
 804  809          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(3) + SHA1_CONST(1);
 805  810          b = ROTATE_LEFT(b, 30);
 806  811  
 807  812          W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1);            /* 36 */
 808  813          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(4) + SHA1_CONST(1);
 809  814          a = ROTATE_LEFT(a, 30);
 810  815  
 811  816          W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1);            /* 37 */
 812  817          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(5) + SHA1_CONST(1);
 813  818          e = ROTATE_LEFT(e, 30);
 814  819  
 815  820          W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1);            /* 38 */
 816  821          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(6) + SHA1_CONST(1);
 817  822          d = ROTATE_LEFT(d, 30);
 818  823  
 819  824          W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1);            /* 39 */
 820  825          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(7) + SHA1_CONST(1);
 821  826          c = ROTATE_LEFT(c, 30);
 822  827  
 823  828          /* round 3 */
 824  829          W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1);            /* 40 */
 825  830          e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(8) + SHA1_CONST(2);
 826  831          b = ROTATE_LEFT(b, 30);
 827  832  
 828  833          W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1);            /* 41 */
 829  834          d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(9) + SHA1_CONST(2);
 830  835          a = ROTATE_LEFT(a, 30);
 831  836  
 832  837          W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1);  /* 42 */
 833  838          c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(10) + SHA1_CONST(2);
 834  839          e = ROTATE_LEFT(e, 30);
 835  840  
 836  841          W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1);  /* 43 */
 837  842          b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(11) + SHA1_CONST(2);
 838  843          d = ROTATE_LEFT(d, 30);
 839  844  
 840  845          W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1);  /* 44 */
 841  846          a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(12) + SHA1_CONST(2);
 842  847          c = ROTATE_LEFT(c, 30);
 843  848  
 844  849          W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 45 */
 845  850          e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(13) + SHA1_CONST(2);
 846  851          b = ROTATE_LEFT(b, 30);
 847  852  
 848  853          W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1);  /* 46 */
 849  854          d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(14) + SHA1_CONST(2);
 850  855          a = ROTATE_LEFT(a, 30);
 851  856  
 852  857          W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1);  /* 47 */
 853  858          c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(15) + SHA1_CONST(2);
 854  859          e = ROTATE_LEFT(e, 30);
 855  860  
 856  861          W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1);            /* 48 */
 857  862          b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(0) + SHA1_CONST(2);
 858  863          d = ROTATE_LEFT(d, 30);
 859  864  
 860  865          W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1);            /* 49 */
 861  866          a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(1) + SHA1_CONST(2);
 862  867          c = ROTATE_LEFT(c, 30);
 863  868  
 864  869          W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1);   /* 50 */
 865  870          e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(2) + SHA1_CONST(2);
 866  871          b = ROTATE_LEFT(b, 30);
 867  872  
 868  873          W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1);            /* 51 */
 869  874          d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(3) + SHA1_CONST(2);
 870  875          a = ROTATE_LEFT(a, 30);
 871  876  
 872  877          W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1);            /* 52 */
 873  878          c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(4) + SHA1_CONST(2);
 874  879          e = ROTATE_LEFT(e, 30);
 875  880  
 876  881          W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1);            /* 53 */
 877  882          b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(5) + SHA1_CONST(2);
 878  883          d = ROTATE_LEFT(d, 30);
 879  884  
 880  885          W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1);            /* 54 */
 881  886          a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(6) + SHA1_CONST(2);
 882  887          c = ROTATE_LEFT(c, 30);
 883  888  
 884  889          W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1);            /* 55 */
 885  890          e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(7) + SHA1_CONST(2);
 886  891          b = ROTATE_LEFT(b, 30);
 887  892  
 888  893          W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1);            /* 56 */
 889  894          d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(8) + SHA1_CONST(2);
 890  895          a = ROTATE_LEFT(a, 30);
 891  896  
 892  897          W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1);            /* 57 */
 893  898          c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(9) + SHA1_CONST(2);
 894  899          e = ROTATE_LEFT(e, 30);
 895  900  
 896  901          W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1);  /* 58 */
 897  902          b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(10) + SHA1_CONST(2);
 898  903          d = ROTATE_LEFT(d, 30);
 899  904  
 900  905          W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1);  /* 59 */
 901  906          a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(11) + SHA1_CONST(2);
 902  907          c = ROTATE_LEFT(c, 30);
 903  908  
 904  909          /* round 4 */
 905  910          W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1);  /* 60 */
 906  911          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(12) + SHA1_CONST(3);
 907  912          b = ROTATE_LEFT(b, 30);
 908  913  
 909  914          W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 61 */
 910  915          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(13) + SHA1_CONST(3);
 911  916          a = ROTATE_LEFT(a, 30);
 912  917  
 913  918          W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1);  /* 62 */
 914  919          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(14) + SHA1_CONST(3);
 915  920          e = ROTATE_LEFT(e, 30);
 916  921  
 917  922          W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1);  /* 63 */
 918  923          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(15) + SHA1_CONST(3);
 919  924          d = ROTATE_LEFT(d, 30);
 920  925  
 921  926          W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1);            /* 64 */
 922  927          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(0) + SHA1_CONST(3);
 923  928          c = ROTATE_LEFT(c, 30);
 924  929  
 925  930          W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1);            /* 65 */
 926  931          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(1) + SHA1_CONST(3);
 927  932          b = ROTATE_LEFT(b, 30);
 928  933  
 929  934          W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1);   /* 66 */
 930  935          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(2) + SHA1_CONST(3);
 931  936          a = ROTATE_LEFT(a, 30);
 932  937  
 933  938          W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1);            /* 67 */
 934  939          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(3) + SHA1_CONST(3);
 935  940          e = ROTATE_LEFT(e, 30);
 936  941  
 937  942          W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1);            /* 68 */
 938  943          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(4) + SHA1_CONST(3);
 939  944          d = ROTATE_LEFT(d, 30);
 940  945  
 941  946          W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1);            /* 69 */
 942  947          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(5) + SHA1_CONST(3);
 943  948          c = ROTATE_LEFT(c, 30);
 944  949  
 945  950          W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1);            /* 70 */
 946  951          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(6) + SHA1_CONST(3);
 947  952          b = ROTATE_LEFT(b, 30);
 948  953  
 949  954          W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1);            /* 71 */
 950  955          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(7) + SHA1_CONST(3);
 951  956          a = ROTATE_LEFT(a, 30);
 952  957  
 953  958          W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1);            /* 72 */
 954  959          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(8) + SHA1_CONST(3);
 955  960          e = ROTATE_LEFT(e, 30);
 956  961  
 957  962          W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1);            /* 73 */
 958  963          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(9) + SHA1_CONST(3);
 959  964          d = ROTATE_LEFT(d, 30);
 960  965  
 961  966          W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1);  /* 74 */
 962  967          a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(10) + SHA1_CONST(3);
 963  968          c = ROTATE_LEFT(c, 30);
 964  969  
 965  970          W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1);  /* 75 */
 966  971          e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(11) + SHA1_CONST(3);
 967  972          b = ROTATE_LEFT(b, 30);
 968  973  
 969  974          W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1);  /* 76 */
 970  975          d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(12) + SHA1_CONST(3);
 971  976          a = ROTATE_LEFT(a, 30);
 972  977  
 973  978          W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 77 */
 974  979          c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(13) + SHA1_CONST(3);
 975  980          e = ROTATE_LEFT(e, 30);
 976  981  
 977  982          W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1);  /* 78 */
 978  983          b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(14) + SHA1_CONST(3);
 979  984          d = ROTATE_LEFT(d, 30);
 980  985  
 981  986          W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1);  /* 79 */
 982  987  
 983  988          ctx->state[0] += ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(15) +
 984  989              SHA1_CONST(3);
 985  990          ctx->state[1] += b;
 986  991          ctx->state[2] += ROTATE_LEFT(c, 30);
 987  992          ctx->state[3] += d;
 988  993          ctx->state[4] += e;
 989  994  
 990  995          /* zeroize sensitive information */
 991  996          W(0) = W(1) = W(2) = W(3) = W(4) = W(5) = W(6) = W(7) = W(8) = 0;
 992  997          W(9) = W(10) = W(11) = W(12) = W(13) = W(14) = W(15) = 0;
 993  998  }
 994  999  #endif  /* !__amd64 */
 995 1000  
 996 1001  
 997 1002  /*
 998 1003   * Encode()
 999 1004   *
1000 1005   * purpose: to convert a list of numbers from little endian to big endian
1001 1006   *   input: uint8_t *   : place to store the converted big endian numbers
1002 1007   *          uint32_t *  : place to get numbers to convert from
1003 1008   *          size_t      : the length of the input in bytes
1004 1009   *  output: void
1005 1010   */
1006 1011  
1007 1012  static void
1008 1013  Encode(uint8_t *_RESTRICT_KYWD output, const uint32_t *_RESTRICT_KYWD input,
1009 1014      size_t len)
1010 1015  {
1011 1016          size_t          i, j;
1012 1017  
1013 1018  #if     defined(__sparc)
1014 1019          if (IS_P2ALIGNED(output, sizeof (uint32_t))) {
1015 1020                  for (i = 0, j = 0; j < len; i++, j += 4) {
1016 1021                          /* LINTED E_BAD_PTR_CAST_ALIGN */
1017 1022                          *((uint32_t *)(output + j)) = input[i];
1018 1023                  }
1019 1024          } else {
1020 1025  #endif  /* little endian -- will work on big endian, but slowly */
1021 1026                  for (i = 0, j = 0; j < len; i++, j += 4) {
1022 1027                          output[j]       = (input[i] >> 24) & 0xff;
1023 1028                          output[j + 1]   = (input[i] >> 16) & 0xff;
1024 1029                          output[j + 2]   = (input[i] >>  8) & 0xff;
1025 1030                          output[j + 3]   = input[i] & 0xff;
1026 1031                  }
1027 1032  #if     defined(__sparc)
1028 1033          }
1029 1034  #endif
1030 1035  }

↓ open down ↓

966 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX