il_11210 Wdiff usr/src/lib/libm/common/m9x/fmaf.c

Print this page

11210 libm should be cstyle(1ONBLD) clean

Split	Close
Expand all
Collapse all

          --- old/usr/src/lib/libm/common/m9x/fmaf.c
          +++ new/usr/src/lib/libm/common/m9x/fmaf.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.

↓ open down ↓

14 lines elided

↑ open up ↑

  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24   24   */
       25 +
  25   26  /*
  26   27   * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27   28   * Use is subject to license terms.
  28   29   */
  29   30  
  30   31  #pragma weak fmaf = __fmaf
  31   32  
  32   33  #include "libm.h"
  33   34  #include "fma.h"
  34   35  #include "fenv_inlines.h"
  35   36  
  36   37  #if defined(__sparc)
  37      -
  38   38  /*
  39   39   * fmaf for SPARC: 32-bit single precision, big-endian
  40   40   */
  41   41  float
  42      -__fmaf(float x, float y, float z) {
       42 +__fmaf(float x, float y, float z)
       43 +{
  43   44          union {
  44   45                  unsigned i[2];
  45   46                  double d;
  46   47          } xy, zz;
       48 +
  47   49          unsigned u, s;
  48   50          int exy, ez;
  49   51  
  50   52          /*
  51   53           * the following operations can only raise the invalid exception,
  52   54           * and then only if either x*y is of the form Inf*0 or one of x,
  53   55           * y, or z is a signaling NaN
  54   56           */
  55      -        xy.d = (double) x * y;
  56      -        zz.d = (double) z;
       57 +        xy.d = (double)x * y;
       58 +        zz.d = (double)z;
  57   59  
  58   60          /*
  59   61           * if the sum xy + z will be exact, just compute it and cast the
  60   62           * result to float
  61   63           */
  62   64          exy = (xy.i[0] >> 20) & 0x7ff;
  63   65          ez = (zz.i[0] >> 20) & 0x7ff;
       66 +
  64   67          if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 ||
  65      -                ez == 0x7ff || ez == 0) {
  66      -                return ((float) (xy.d + zz.d));
  67      -        }
       68 +            ez == 0x7ff || ez == 0)
       69 +                return ((float)(xy.d + zz.d));
  68   70  
  69   71          /*
  70   72           * collapse the tail of the smaller summand into a "sticky bit"
  71   73           * so that the sum can be computed without error
  72   74           */
  73   75          if (ez > exy) {
  74   76                  if (ez - exy < 31) {
  75   77                          u = xy.i[1];
  76   78                          s = 2 << (ez - exy);
       79 +
  77   80                          if (u & (s - 1))
  78   81                                  u |= s;
       82 +
  79   83                          xy.i[1] = u & ~(s - 1);
  80   84                  } else if (ez - exy < 51) {
  81   85                          u = xy.i[0];
  82   86                          s = 1 << (ez - exy - 31);
       87 +
  83   88                          if ((u & (s - 1)) | xy.i[1])
  84   89                                  u |= s;
       90 +
  85   91                          xy.i[0] = u & ~(s - 1);
  86   92                          xy.i[1] = 0;
  87   93                  } else {
  88   94                          /* collapse all of xy into a single bit */
  89   95                          xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20);
  90   96                          xy.i[1] = 0;
  91   97                  }
  92   98          } else {
  93   99                  if (exy - ez < 31) {
  94  100                          u = zz.i[1];
  95  101                          s = 2 << (exy - ez);
      102 +
  96  103                          if (u & (s - 1))
  97  104                                  u |= s;
      105 +
  98  106                          zz.i[1] = u & ~(s - 1);
  99  107                  } else if (exy - ez < 51) {
 100  108                          u = zz.i[0];
 101  109                          s = 1 << (exy - ez - 31);
      110 +
 102  111                          if ((u & (s - 1)) | zz.i[1])
 103  112                                  u |= s;
      113 +
 104  114                          zz.i[0] = u & ~(s - 1);
 105  115                          zz.i[1] = 0;
 106  116                  } else {
 107  117                          /* collapse all of zz into a single bit */
 108  118                          zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20);
 109  119                          zz.i[1] = 0;
 110  120                  }
 111  121          }
 112  122  
 113      -        return ((float) (xy.d + zz.d));
      123 +        return ((float)(xy.d + zz.d));
 114  124  }
 115      -
 116  125  #elif defined(__x86)
 117      -
 118  126  #if defined(__amd64)
 119      -#define NI      4
      127 +#define NI              4
 120  128  #else
 121      -#define NI      3
      129 +#define NI              3
 122  130  #endif
 123  131  
 124  132  /*
 125  133   * fmaf for x86: 32-bit single precision, little-endian
 126  134   */
 127  135  float
 128      -__fmaf(float x, float y, float z) {
      136 +__fmaf(float x, float y, float z)
      137 +{
 129  138          union {
 130  139                  unsigned i[NI];
 131  140                  long double e;
 132  141          } xy, zz;
      142 +
 133  143          unsigned u, s, cwsw, oldcwsw;
 134  144          int exy, ez;
 135  145  
 136  146          /* set rounding precision to 64 bits */
 137  147          __fenv_getcwsw(&oldcwsw);
 138  148          cwsw = (oldcwsw & 0xfcffffff) | 0x03000000;
 139  149          __fenv_setcwsw(&cwsw);
 140  150  
 141  151          /*
 142  152           * the following operations can only raise the invalid exception,
 143  153           * and then only if either x*y is of the form Inf*0 or one of x,
 144  154           * y, or z is a signaling NaN
 145  155           */
 146      -        xy.e = (long double) x * y;
 147      -        zz.e = (long double) z;
      156 +        xy.e = (long double)x * y;
      157 +        zz.e = (long double)z;
 148  158  
 149  159          /*
 150  160           * if the sum xy + z will be exact, just compute it and cast the
 151  161           * result to float
 152  162           */
 153  163          exy = xy.i[2] & 0x7fff;
 154  164          ez = zz.i[2] & 0x7fff;
      165 +
 155  166          if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 ||
 156      -                ez == 0x7fff || ez == 0) {
      167 +            ez == 0x7fff || ez == 0)
 157  168                  goto cont;
 158      -        }
 159  169  
 160  170          /*
 161  171           * collapse the tail of the smaller summand into a "sticky bit"
 162  172           * so that the sum can be computed without error
 163  173           */
 164  174          if (ez > exy) {
 165  175                  if (ez - exy < 31) {
 166  176                          u = xy.i[0];
 167  177                          s = 2 << (ez - exy);
      178 +
 168  179                          if (u & (s - 1))
 169  180                                  u |= s;
      181 +
 170  182                          xy.i[0] = u & ~(s - 1);
 171  183                  } else if (ez - exy < 62) {
 172  184                          u = xy.i[1];
 173  185                          s = 1 << (ez - exy - 31);
      186 +
 174  187                          if ((u & (s - 1)) | xy.i[0])
 175  188                                  u |= s;
      189 +
 176  190                          xy.i[1] = u & ~(s - 1);
 177  191                          xy.i[0] = 0;
 178  192                  } else {
 179  193                          /* collapse all of xy into a single bit */
 180  194                          xy.i[0] = 0;
 181  195                          xy.i[1] = 0x80000000;
 182  196                          xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62);
 183  197                  }
 184  198          } else {
 185  199                  if (exy - ez < 62) {
 186  200                          u = zz.i[1];
 187  201                          s = 1 << (exy - ez - 31);
      202 +
 188  203                          if ((u & (s - 1)) | zz.i[0])
 189  204                                  u |= s;
      205 +
 190  206                          zz.i[1] = u & ~(s - 1);
 191  207                          zz.i[0] = 0;
 192  208                  } else {
 193  209                          /* collapse all of zz into a single bit */
 194  210                          zz.i[0] = 0;
 195  211                          zz.i[1] = 0x80000000;
 196  212                          zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62);
 197  213                  }
 198  214          }
 199  215  
 200  216  cont:
 201  217          xy.e += zz.e;
 202  218  
 203  219          /* restore the rounding precision */
 204  220          __fenv_getcwsw(&cwsw);
 205  221          cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000);
 206  222          __fenv_setcwsw(&cwsw);
 207  223  
 208      -        return ((float) xy.e);
      224 +        return ((float)xy.e);
 209  225  }
 210  226  
 211  227  #if 0
 212  228  /*
 213  229   * another fmaf for x86: assumes return value will be left in
 214  230   * long double (80-bit double extended) precision
 215  231   */
 216  232  long double
 217      -__fmaf(float x, float y, float z) {
      233 +__fmaf(float x, float y, float z)
      234 +{
 218  235          /*
 219  236           * Note: This implementation assumes the rounding precision mode
 220  237           * is set to the default, rounding to 64 bit precision.  If this
 221  238           * routine must work in non-default rounding precision modes, do
 222  239           * the following instead:
 223  240           *
 224  241           *   long double t;
 225  242           *
 226  243           *   <set rp mode to round to 64 bit precision>
 227  244           *   t = x * y;
 228  245           *   <restore rp mode>
 229  246           *   return t + z;
 230  247           *
 231  248           * Note that the code to change rounding precision must not alter
 232  249           * the exception masks or flags, since the product x * y may raise
 233  250           * an invalid operation exception.
 234  251           */
 235      -        return ((long double) x * y + z);
      252 +        return ((long double)x * y + z);
 236  253  }
 237  254  #endif
 238      -
 239  255  #else
 240  256  #error Unknown architecture
 241  257  #endif

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX