il_alllibm Wdiff usr/src/lib/libm/common/m9x/fmaf.c

Print this page

5262 libm needs to be carefully unifdef'd
5268 libm doesn't need to hide symbols which are already local
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Reviewed by: Igor Kozhukhov <ikozhukhov@gmail.com>
Reviewed by: Gordon Ross <gwr@nexenta.com>
Approved by: Gordon Ross <gwr@nexenta.com>

Split	Close
Expand all
Collapse all

          --- old/usr/src/lib/libm/common/m9x/fmaf.c
          +++ new/usr/src/lib/libm/common/m9x/fmaf.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END

↓ open down ↓

19 lines elided

↑ open up ↑

  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24   24   */
  25   25  /*
  26   26   * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27   27   * Use is subject to license terms.
  28   28   */
  29   29  
  30      -#if defined(ELFOBJ)
  31   30  #pragma weak fmaf = __fmaf
  32      -#endif
  33   31  
  34   32  #include "libm.h"
  35   33  #include "fma.h"
  36   34  #include "fenv_inlines.h"
  37   35  
  38   36  #if defined(__sparc)
  39   37  
  40   38  /*
  41   39   * fmaf for SPARC: 32-bit single precision, big-endian
  42   40   */

  43   41  float
  44   42  __fmaf(float x, float y, float z) {
  45   43          union {
  46   44                  unsigned i[2];
  47   45                  double d;
  48   46          } xy, zz;
  49   47          unsigned u, s;
  50   48          int exy, ez;
  51   49  
  52   50          /*
  53   51           * the following operations can only raise the invalid exception,
  54   52           * and then only if either x*y is of the form Inf*0 or one of x,
  55   53           * y, or z is a signaling NaN
  56   54           */
  57   55          xy.d = (double) x * y;
  58   56          zz.d = (double) z;
  59   57  
  60   58          /*
  61   59           * if the sum xy + z will be exact, just compute it and cast the
  62   60           * result to float
  63   61           */
  64   62          exy = (xy.i[0] >> 20) & 0x7ff;
  65   63          ez = (zz.i[0] >> 20) & 0x7ff;
  66   64          if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 ||
  67   65                  ez == 0x7ff || ez == 0) {
  68   66                  return ((float) (xy.d + zz.d));
  69   67          }
  70   68  
  71   69          /*
  72   70           * collapse the tail of the smaller summand into a "sticky bit"
  73   71           * so that the sum can be computed without error
  74   72           */
  75   73          if (ez > exy) {
  76   74                  if (ez - exy < 31) {
  77   75                          u = xy.i[1];
  78   76                          s = 2 << (ez - exy);
  79   77                          if (u & (s - 1))
  80   78                                  u |= s;
  81   79                          xy.i[1] = u & ~(s - 1);
  82   80                  } else if (ez - exy < 51) {
  83   81                          u = xy.i[0];
  84   82                          s = 1 << (ez - exy - 31);
  85   83                          if ((u & (s - 1)) | xy.i[1])
  86   84                                  u |= s;
  87   85                          xy.i[0] = u & ~(s - 1);
  88   86                          xy.i[1] = 0;
  89   87                  } else {
  90   88                          /* collapse all of xy into a single bit */
  91   89                          xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20);
  92   90                          xy.i[1] = 0;
  93   91                  }
  94   92          } else {
  95   93                  if (exy - ez < 31) {
  96   94                          u = zz.i[1];
  97   95                          s = 2 << (exy - ez);
  98   96                          if (u & (s - 1))
  99   97                                  u |= s;
 100   98                          zz.i[1] = u & ~(s - 1);
 101   99                  } else if (exy - ez < 51) {
 102  100                          u = zz.i[0];
 103  101                          s = 1 << (exy - ez - 31);
 104  102                          if ((u & (s - 1)) | zz.i[1])
 105  103                                  u |= s;
 106  104                          zz.i[0] = u & ~(s - 1);
 107  105                          zz.i[1] = 0;
 108  106                  } else {
 109  107                          /* collapse all of zz into a single bit */
 110  108                          zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20);
 111  109                          zz.i[1] = 0;
 112  110                  }
 113  111          }
 114  112  
 115  113          return ((float) (xy.d + zz.d));
 116  114  }
 117  115  
 118  116  #elif defined(__x86)
 119  117  
 120  118  #if defined(__amd64)
 121  119  #define NI      4
 122  120  #else
 123  121  #define NI      3
 124  122  #endif
 125  123  
 126  124  /*
 127  125   * fmaf for x86: 32-bit single precision, little-endian
 128  126   */
 129  127  float
 130  128  __fmaf(float x, float y, float z) {
 131  129          union {
 132  130                  unsigned i[NI];
 133  131                  long double e;
 134  132          } xy, zz;
 135  133          unsigned u, s, cwsw, oldcwsw;
 136  134          int exy, ez;
 137  135  
 138  136          /* set rounding precision to 64 bits */
 139  137          __fenv_getcwsw(&oldcwsw);
 140  138          cwsw = (oldcwsw & 0xfcffffff) | 0x03000000;
 141  139          __fenv_setcwsw(&cwsw);
 142  140  
 143  141          /*
 144  142           * the following operations can only raise the invalid exception,
 145  143           * and then only if either x*y is of the form Inf*0 or one of x,
 146  144           * y, or z is a signaling NaN
 147  145           */
 148  146          xy.e = (long double) x * y;
 149  147          zz.e = (long double) z;
 150  148  
 151  149          /*
 152  150           * if the sum xy + z will be exact, just compute it and cast the
 153  151           * result to float
 154  152           */
 155  153          exy = xy.i[2] & 0x7fff;
 156  154          ez = zz.i[2] & 0x7fff;
 157  155          if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 ||
 158  156                  ez == 0x7fff || ez == 0) {
 159  157                  goto cont;
 160  158          }
 161  159  
 162  160          /*
 163  161           * collapse the tail of the smaller summand into a "sticky bit"
 164  162           * so that the sum can be computed without error
 165  163           */
 166  164          if (ez > exy) {
 167  165                  if (ez - exy < 31) {
 168  166                          u = xy.i[0];
 169  167                          s = 2 << (ez - exy);
 170  168                          if (u & (s - 1))
 171  169                                  u |= s;
 172  170                          xy.i[0] = u & ~(s - 1);
 173  171                  } else if (ez - exy < 62) {
 174  172                          u = xy.i[1];
 175  173                          s = 1 << (ez - exy - 31);
 176  174                          if ((u & (s - 1)) | xy.i[0])
 177  175                                  u |= s;
 178  176                          xy.i[1] = u & ~(s - 1);
 179  177                          xy.i[0] = 0;
 180  178                  } else {
 181  179                          /* collapse all of xy into a single bit */
 182  180                          xy.i[0] = 0;
 183  181                          xy.i[1] = 0x80000000;
 184  182                          xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62);
 185  183                  }
 186  184          } else {
 187  185                  if (exy - ez < 62) {
 188  186                          u = zz.i[1];
 189  187                          s = 1 << (exy - ez - 31);
 190  188                          if ((u & (s - 1)) | zz.i[0])
 191  189                                  u |= s;
 192  190                          zz.i[1] = u & ~(s - 1);
 193  191                          zz.i[0] = 0;
 194  192                  } else {
 195  193                          /* collapse all of zz into a single bit */
 196  194                          zz.i[0] = 0;
 197  195                          zz.i[1] = 0x80000000;
 198  196                          zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62);
 199  197                  }
 200  198          }
 201  199  
 202  200  cont:
 203  201          xy.e += zz.e;
 204  202  
 205  203          /* restore the rounding precision */
 206  204          __fenv_getcwsw(&cwsw);
 207  205          cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000);
 208  206          __fenv_setcwsw(&cwsw);
 209  207  
 210  208          return ((float) xy.e);
 211  209  }
 212  210  
 213  211  #if 0
 214  212  /*
 215  213   * another fmaf for x86: assumes return value will be left in
 216  214   * long double (80-bit double extended) precision
 217  215   */
 218  216  long double
 219  217  __fmaf(float x, float y, float z) {
 220  218          /*
 221  219           * Note: This implementation assumes the rounding precision mode
 222  220           * is set to the default, rounding to 64 bit precision.  If this
 223  221           * routine must work in non-default rounding precision modes, do
 224  222           * the following instead:
 225  223           *
 226  224           *   long double t;
 227  225           *
 228  226           *   <set rp mode to round to 64 bit precision>
 229  227           *   t = x * y;
 230  228           *   <restore rp mode>
 231  229           *   return t + z;
 232  230           *
 233  231           * Note that the code to change rounding precision must not alter
 234  232           * the exception masks or flags, since the product x * y may raise
 235  233           * an invalid operation exception.
 236  234           */
 237  235          return ((long double) x * y + z);
 238  236  }
 239  237  #endif
 240  238  
 241  239  #else
 242  240  #error Unknown architecture
 243  241  #endif

↓ open down ↓

201 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX