1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  28  * Use is subject to license terms.
  29  */
  30 
  31 #pragma weak fmaf = __fmaf
  32 
  33 #include "libm.h"
  34 #include "fma.h"
  35 #include "fenv_inlines.h"
  36 
  37 #if defined(__sparc)
  38 /*
  39  * fmaf for SPARC: 32-bit single precision, big-endian
  40  */
  41 float
  42 __fmaf(float x, float y, float z)
  43 {
  44         union {
  45                 unsigned i[2];
  46                 double d;
  47         } xy, zz;
  48 
  49         unsigned u, s;
  50         int exy, ez;
  51 
  52         /*
  53          * the following operations can only raise the invalid exception,
  54          * and then only if either x*y is of the form Inf*0 or one of x,
  55          * y, or z is a signaling NaN
  56          */
  57         xy.d = (double)x * y;
  58         zz.d = (double)z;
  59 
  60         /*
  61          * if the sum xy + z will be exact, just compute it and cast the
  62          * result to float
  63          */
  64         exy = (xy.i[0] >> 20) & 0x7ff;
  65         ez = (zz.i[0] >> 20) & 0x7ff;
  66 
  67         if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 ||
  68             ez == 0x7ff || ez == 0)
  69                 return ((float)(xy.d + zz.d));
  70 
  71         /*
  72          * collapse the tail of the smaller summand into a "sticky bit"
  73          * so that the sum can be computed without error
  74          */
  75         if (ez > exy) {
  76                 if (ez - exy < 31) {
  77                         u = xy.i[1];
  78                         s = 2 << (ez - exy);
  79 
  80                         if (u & (s - 1))
  81                                 u |= s;
  82 
  83                         xy.i[1] = u & ~(s - 1);
  84                 } else if (ez - exy < 51) {
  85                         u = xy.i[0];
  86                         s = 1 << (ez - exy - 31);
  87 
  88                         if ((u & (s - 1)) | xy.i[1])
  89                                 u |= s;
  90 
  91                         xy.i[0] = u & ~(s - 1);
  92                         xy.i[1] = 0;
  93                 } else {
  94                         /* collapse all of xy into a single bit */
  95                         xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20);
  96                         xy.i[1] = 0;
  97                 }
  98         } else {
  99                 if (exy - ez < 31) {
 100                         u = zz.i[1];
 101                         s = 2 << (exy - ez);
 102 
 103                         if (u & (s - 1))
 104                                 u |= s;
 105 
 106                         zz.i[1] = u & ~(s - 1);
 107                 } else if (exy - ez < 51) {
 108                         u = zz.i[0];
 109                         s = 1 << (exy - ez - 31);
 110 
 111                         if ((u & (s - 1)) | zz.i[1])
 112                                 u |= s;
 113 
 114                         zz.i[0] = u & ~(s - 1);
 115                         zz.i[1] = 0;
 116                 } else {
 117                         /* collapse all of zz into a single bit */
 118                         zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20);
 119                         zz.i[1] = 0;
 120                 }
 121         }
 122 
 123         return ((float)(xy.d + zz.d));
 124 }
 125 #elif defined(__x86)
 126 #if defined(__amd64)
 127 #define NI              4
 128 #else
 129 #define NI              3
 130 #endif
 131 
 132 /*
 133  * fmaf for x86: 32-bit single precision, little-endian
 134  */
 135 float
 136 __fmaf(float x, float y, float z)
 137 {
 138         union {
 139                 unsigned i[NI];
 140                 long double e;
 141         } xy, zz;
 142 
 143         unsigned u, s, cwsw, oldcwsw;
 144         int exy, ez;
 145 
 146         /* set rounding precision to 64 bits */
 147         __fenv_getcwsw(&oldcwsw);
 148         cwsw = (oldcwsw & 0xfcffffff) | 0x03000000;
 149         __fenv_setcwsw(&cwsw);
 150 
 151         /*
 152          * the following operations can only raise the invalid exception,
 153          * and then only if either x*y is of the form Inf*0 or one of x,
 154          * y, or z is a signaling NaN
 155          */
 156         xy.e = (long double)x * y;
 157         zz.e = (long double)z;
 158 
 159         /*
 160          * if the sum xy + z will be exact, just compute it and cast the
 161          * result to float
 162          */
 163         exy = xy.i[2] & 0x7fff;
 164         ez = zz.i[2] & 0x7fff;
 165 
 166         if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 ||
 167             ez == 0x7fff || ez == 0)
 168                 goto cont;
 169 
 170         /*
 171          * collapse the tail of the smaller summand into a "sticky bit"
 172          * so that the sum can be computed without error
 173          */
 174         if (ez > exy) {
 175                 if (ez - exy < 31) {
 176                         u = xy.i[0];
 177                         s = 2 << (ez - exy);
 178 
 179                         if (u & (s - 1))
 180                                 u |= s;
 181 
 182                         xy.i[0] = u & ~(s - 1);
 183                 } else if (ez - exy < 62) {
 184                         u = xy.i[1];
 185                         s = 1 << (ez - exy - 31);
 186 
 187                         if ((u & (s - 1)) | xy.i[0])
 188                                 u |= s;
 189 
 190                         xy.i[1] = u & ~(s - 1);
 191                         xy.i[0] = 0;
 192                 } else {
 193                         /* collapse all of xy into a single bit */
 194                         xy.i[0] = 0;
 195                         xy.i[1] = 0x80000000;
 196                         xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62);
 197                 }
 198         } else {
 199                 if (exy - ez < 62) {
 200                         u = zz.i[1];
 201                         s = 1 << (exy - ez - 31);
 202 
 203                         if ((u & (s - 1)) | zz.i[0])
 204                                 u |= s;
 205 
 206                         zz.i[1] = u & ~(s - 1);
 207                         zz.i[0] = 0;
 208                 } else {
 209                         /* collapse all of zz into a single bit */
 210                         zz.i[0] = 0;
 211                         zz.i[1] = 0x80000000;
 212                         zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62);
 213                 }
 214         }
 215 
 216 cont:
 217         xy.e += zz.e;
 218 
 219         /* restore the rounding precision */
 220         __fenv_getcwsw(&cwsw);
 221         cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000);
 222         __fenv_setcwsw(&cwsw);
 223 
 224         return ((float)xy.e);
 225 }
 226 
 227 #if 0
 228 /*
 229  * another fmaf for x86: assumes return value will be left in
 230  * long double (80-bit double extended) precision
 231  */
 232 long double
 233 __fmaf(float x, float y, float z)
 234 {
 235         /*
 236          * Note: This implementation assumes the rounding precision mode
 237          * is set to the default, rounding to 64 bit precision.  If this
 238          * routine must work in non-default rounding precision modes, do
 239          * the following instead:
 240          *
 241          *   long double t;
 242          *
 243          *   <set rp mode to round to 64 bit precision>
 244          *   t = x * y;
 245          *   <restore rp mode>
 246          *   return t + z;
 247          *
 248          * Note that the code to change rounding precision must not alter
 249          * the exception masks or flags, since the product x * y may raise
 250          * an invalid operation exception.
 251          */
 252         return ((long double)x * y + z);
 253 }
 254 #endif
 255 #else
 256 #error Unknown architecture
 257 #endif