1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #if defined(ELFOBJ)
  31 #pragma weak fmaf = __fmaf
  32 #endif
  33 
  34 #include "libm.h"
  35 #include "fma.h"
  36 #include "fenv_inlines.h"
  37 
  38 #if defined(__sparc)
  39 
  40 /*
  41  * fmaf for SPARC: 32-bit single precision, big-endian
  42  */
  43 float
  44 __fmaf(float x, float y, float z) {
  45         union {
  46                 unsigned i[2];
  47                 double d;
  48         } xy, zz;
  49         unsigned u, s;
  50         int exy, ez;
  51 
  52         /*
  53          * the following operations can only raise the invalid exception,
  54          * and then only if either x*y is of the form Inf*0 or one of x,
  55          * y, or z is a signaling NaN
  56          */
  57         xy.d = (double) x * y;
  58         zz.d = (double) z;
  59 
  60         /*
  61          * if the sum xy + z will be exact, just compute it and cast the
  62          * result to float
  63          */
  64         exy = (xy.i[0] >> 20) & 0x7ff;
  65         ez = (zz.i[0] >> 20) & 0x7ff;
  66         if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 ||
  67                 ez == 0x7ff || ez == 0) {
  68                 return ((float) (xy.d + zz.d));
  69         }
  70 
  71         /*
  72          * collapse the tail of the smaller summand into a "sticky bit"
  73          * so that the sum can be computed without error
  74          */
  75         if (ez > exy) {
  76                 if (ez - exy < 31) {
  77                         u = xy.i[1];
  78                         s = 2 << (ez - exy);
  79                         if (u & (s - 1))
  80                                 u |= s;
  81                         xy.i[1] = u & ~(s - 1);
  82                 } else if (ez - exy < 51) {
  83                         u = xy.i[0];
  84                         s = 1 << (ez - exy - 31);
  85                         if ((u & (s - 1)) | xy.i[1])
  86                                 u |= s;
  87                         xy.i[0] = u & ~(s - 1);
  88                         xy.i[1] = 0;
  89                 } else {
  90                         /* collapse all of xy into a single bit */
  91                         xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20);
  92                         xy.i[1] = 0;
  93                 }
  94         } else {
  95                 if (exy - ez < 31) {
  96                         u = zz.i[1];
  97                         s = 2 << (exy - ez);
  98                         if (u & (s - 1))
  99                                 u |= s;
 100                         zz.i[1] = u & ~(s - 1);
 101                 } else if (exy - ez < 51) {
 102                         u = zz.i[0];
 103                         s = 1 << (exy - ez - 31);
 104                         if ((u & (s - 1)) | zz.i[1])
 105                                 u |= s;
 106                         zz.i[0] = u & ~(s - 1);
 107                         zz.i[1] = 0;
 108                 } else {
 109                         /* collapse all of zz into a single bit */
 110                         zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20);
 111                         zz.i[1] = 0;
 112                 }
 113         }
 114 
 115         return ((float) (xy.d + zz.d));
 116 }
 117 
 118 #elif defined(__x86)
 119 
 120 #if defined(__amd64)
 121 #define NI      4
 122 #else
 123 #define NI      3
 124 #endif
 125 
 126 /*
 127  * fmaf for x86: 32-bit single precision, little-endian
 128  */
 129 float
 130 __fmaf(float x, float y, float z) {
 131         union {
 132                 unsigned i[NI];
 133                 long double e;
 134         } xy, zz;
 135         unsigned u, s, cwsw, oldcwsw;
 136         int exy, ez;
 137 
 138         /* set rounding precision to 64 bits */
 139         __fenv_getcwsw(&oldcwsw);
 140         cwsw = (oldcwsw & 0xfcffffff) | 0x03000000;
 141         __fenv_setcwsw(&cwsw);
 142 
 143         /*
 144          * the following operations can only raise the invalid exception,
 145          * and then only if either x*y is of the form Inf*0 or one of x,
 146          * y, or z is a signaling NaN
 147          */
 148         xy.e = (long double) x * y;
 149         zz.e = (long double) z;
 150 
 151         /*
 152          * if the sum xy + z will be exact, just compute it and cast the
 153          * result to float
 154          */
 155         exy = xy.i[2] & 0x7fff;
 156         ez = zz.i[2] & 0x7fff;
 157         if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 ||
 158                 ez == 0x7fff || ez == 0) {
 159                 goto cont;
 160         }
 161 
 162         /*
 163          * collapse the tail of the smaller summand into a "sticky bit"
 164          * so that the sum can be computed without error
 165          */
 166         if (ez > exy) {
 167                 if (ez - exy < 31) {
 168                         u = xy.i[0];
 169                         s = 2 << (ez - exy);
 170                         if (u & (s - 1))
 171                                 u |= s;
 172                         xy.i[0] = u & ~(s - 1);
 173                 } else if (ez - exy < 62) {
 174                         u = xy.i[1];
 175                         s = 1 << (ez - exy - 31);
 176                         if ((u & (s - 1)) | xy.i[0])
 177                                 u |= s;
 178                         xy.i[1] = u & ~(s - 1);
 179                         xy.i[0] = 0;
 180                 } else {
 181                         /* collapse all of xy into a single bit */
 182                         xy.i[0] = 0;
 183                         xy.i[1] = 0x80000000;
 184                         xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62);
 185                 }
 186         } else {
 187                 if (exy - ez < 62) {
 188                         u = zz.i[1];
 189                         s = 1 << (exy - ez - 31);
 190                         if ((u & (s - 1)) | zz.i[0])
 191                                 u |= s;
 192                         zz.i[1] = u & ~(s - 1);
 193                         zz.i[0] = 0;
 194                 } else {
 195                         /* collapse all of zz into a single bit */
 196                         zz.i[0] = 0;
 197                         zz.i[1] = 0x80000000;
 198                         zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62);
 199                 }
 200         }
 201 
 202 cont:
 203         xy.e += zz.e;
 204 
 205         /* restore the rounding precision */
 206         __fenv_getcwsw(&cwsw);
 207         cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000);
 208         __fenv_setcwsw(&cwsw);
 209 
 210         return ((float) xy.e);
 211 }
 212 
 213 #if 0
 214 /*
 215  * another fmaf for x86: assumes return value will be left in
 216  * long double (80-bit double extended) precision
 217  */
 218 long double
 219 __fmaf(float x, float y, float z) {
 220         /*
 221          * Note: This implementation assumes the rounding precision mode
 222          * is set to the default, rounding to 64 bit precision.  If this
 223          * routine must work in non-default rounding precision modes, do
 224          * the following instead:
 225          *
 226          *   long double t;
 227          *
 228          *   <set rp mode to round to 64 bit precision>
 229          *   t = x * y;
 230          *   <restore rp mode>
 231          *   return t + z;
 232          *
 233          * Note that the code to change rounding precision must not alter
 234          * the exception masks or flags, since the product x * y may raise
 235          * an invalid operation exception.
 236          */
 237         return ((long double) x * y + z);
 238 }
 239 #endif
 240 
 241 #else
 242 #error Unknown architecture
 243 #endif