il_11210 Sdiff usr/src/lib/libm/common/m9x/fmaf.c

Print this page

11210 libm should be cstyle(1ONBLD) clean

   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */

  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #pragma weak fmaf = __fmaf
  31 
  32 #include "libm.h"
  33 #include "fma.h"
  34 #include "fenv_inlines.h"
  35 
  36 #if defined(__sparc)
  37 
  38 /*
  39  * fmaf for SPARC: 32-bit single precision, big-endian
  40  */
  41 float
  42 __fmaf(float x, float y, float z) {

  43         union {
  44                 unsigned i[2];
  45                 double d;
  46         } xy, zz;

  47         unsigned u, s;
  48         int exy, ez;
  49 
  50         /*
  51          * the following operations can only raise the invalid exception,
  52          * and then only if either x*y is of the form Inf*0 or one of x,
  53          * y, or z is a signaling NaN
  54          */
  55         xy.d = (double) x * y;
  56         zz.d = (double) z;
  57 
  58         /*
  59          * if the sum xy + z will be exact, just compute it and cast the
  60          * result to float
  61          */
  62         exy = (xy.i[0] >> 20) & 0x7ff;
  63         ez = (zz.i[0] >> 20) & 0x7ff;

  64         if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 ||
  65                 ez == 0x7ff || ez == 0) {
  66                 return ((float) (xy.d + zz.d));
  67         }
  68 
  69         /*
  70          * collapse the tail of the smaller summand into a "sticky bit"
  71          * so that the sum can be computed without error
  72          */
  73         if (ez > exy) {
  74                 if (ez - exy < 31) {
  75                         u = xy.i[1];
  76                         s = 2 << (ez - exy);

  77                         if (u & (s - 1))
  78                                 u |= s;

  79                         xy.i[1] = u & ~(s - 1);
  80                 } else if (ez - exy < 51) {
  81                         u = xy.i[0];
  82                         s = 1 << (ez - exy - 31);

  83                         if ((u & (s - 1)) | xy.i[1])
  84                                 u |= s;

  85                         xy.i[0] = u & ~(s - 1);
  86                         xy.i[1] = 0;
  87                 } else {
  88                         /* collapse all of xy into a single bit */
  89                         xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20);
  90                         xy.i[1] = 0;
  91                 }
  92         } else {
  93                 if (exy - ez < 31) {
  94                         u = zz.i[1];
  95                         s = 2 << (exy - ez);

  96                         if (u & (s - 1))
  97                                 u |= s;

  98                         zz.i[1] = u & ~(s - 1);
  99                 } else if (exy - ez < 51) {
 100                         u = zz.i[0];
 101                         s = 1 << (exy - ez - 31);

 102                         if ((u & (s - 1)) | zz.i[1])
 103                                 u |= s;

 104                         zz.i[0] = u & ~(s - 1);
 105                         zz.i[1] = 0;
 106                 } else {
 107                         /* collapse all of zz into a single bit */
 108                         zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20);
 109                         zz.i[1] = 0;
 110                 }
 111         }
 112 
 113         return ((float) (xy.d + zz.d));
 114 }
 115 
 116 #elif defined(__x86)
 117 
 118 #if defined(__amd64)
 119 #define NI      4
 120 #else
 121 #define NI      3
 122 #endif
 123 
 124 /*
 125  * fmaf for x86: 32-bit single precision, little-endian
 126  */
 127 float
 128 __fmaf(float x, float y, float z) {

 129         union {
 130                 unsigned i[NI];
 131                 long double e;
 132         } xy, zz;

 133         unsigned u, s, cwsw, oldcwsw;
 134         int exy, ez;
 135 
 136         /* set rounding precision to 64 bits */
 137         __fenv_getcwsw(&oldcwsw);
 138         cwsw = (oldcwsw & 0xfcffffff) | 0x03000000;
 139         __fenv_setcwsw(&cwsw);
 140 
 141         /*
 142          * the following operations can only raise the invalid exception,
 143          * and then only if either x*y is of the form Inf*0 or one of x,
 144          * y, or z is a signaling NaN
 145          */
 146         xy.e = (long double) x * y;
 147         zz.e = (long double) z;
 148 
 149         /*
 150          * if the sum xy + z will be exact, just compute it and cast the
 151          * result to float
 152          */
 153         exy = xy.i[2] & 0x7fff;
 154         ez = zz.i[2] & 0x7fff;

 155         if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 ||
 156                 ez == 0x7fff || ez == 0) {
 157                 goto cont;
 158         }
 159 
 160         /*
 161          * collapse the tail of the smaller summand into a "sticky bit"
 162          * so that the sum can be computed without error
 163          */
 164         if (ez > exy) {
 165                 if (ez - exy < 31) {
 166                         u = xy.i[0];
 167                         s = 2 << (ez - exy);

 168                         if (u & (s - 1))
 169                                 u |= s;

 170                         xy.i[0] = u & ~(s - 1);
 171                 } else if (ez - exy < 62) {
 172                         u = xy.i[1];
 173                         s = 1 << (ez - exy - 31);

 174                         if ((u & (s - 1)) | xy.i[0])
 175                                 u |= s;

 176                         xy.i[1] = u & ~(s - 1);
 177                         xy.i[0] = 0;
 178                 } else {
 179                         /* collapse all of xy into a single bit */
 180                         xy.i[0] = 0;
 181                         xy.i[1] = 0x80000000;
 182                         xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62);
 183                 }
 184         } else {
 185                 if (exy - ez < 62) {
 186                         u = zz.i[1];
 187                         s = 1 << (exy - ez - 31);

 188                         if ((u & (s - 1)) | zz.i[0])
 189                                 u |= s;

 190                         zz.i[1] = u & ~(s - 1);
 191                         zz.i[0] = 0;
 192                 } else {
 193                         /* collapse all of zz into a single bit */
 194                         zz.i[0] = 0;
 195                         zz.i[1] = 0x80000000;
 196                         zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62);
 197                 }
 198         }
 199 
 200 cont:
 201         xy.e += zz.e;
 202 
 203         /* restore the rounding precision */
 204         __fenv_getcwsw(&cwsw);
 205         cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000);
 206         __fenv_setcwsw(&cwsw);
 207 
 208         return ((float) xy.e);
 209 }
 210 
 211 #if 0
 212 /*
 213  * another fmaf for x86: assumes return value will be left in
 214  * long double (80-bit double extended) precision
 215  */
 216 long double
 217 __fmaf(float x, float y, float z) {

 218         /*
 219          * Note: This implementation assumes the rounding precision mode
 220          * is set to the default, rounding to 64 bit precision.  If this
 221          * routine must work in non-default rounding precision modes, do
 222          * the following instead:
 223          *
 224          *   long double t;
 225          *
 226          *   <set rp mode to round to 64 bit precision>
 227          *   t = x * y;
 228          *   <restore rp mode>
 229          *   return t + z;
 230          *
 231          * Note that the code to change rounding precision must not alter
 232          * the exception masks or flags, since the product x * y may raise
 233          * an invalid operation exception.
 234          */
 235         return ((long double) x * y + z);
 236 }
 237 #endif
 238 
 239 #else
 240 #error Unknown architecture
 241 #endif

   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  28  * Use is subject to license terms.
  29  */
  30 
  31 #pragma weak fmaf = __fmaf
  32 
  33 #include "libm.h"
  34 #include "fma.h"
  35 #include "fenv_inlines.h"
  36 
  37 #if defined(__sparc)

  38 /*
  39  * fmaf for SPARC: 32-bit single precision, big-endian
  40  */
  41 float
  42 __fmaf(float x, float y, float z)
  43 {
  44         union {
  45                 unsigned i[2];
  46                 double d;
  47         } xy, zz;
  48 
  49         unsigned u, s;
  50         int exy, ez;
  51 
  52         /*
  53          * the following operations can only raise the invalid exception,
  54          * and then only if either x*y is of the form Inf*0 or one of x,
  55          * y, or z is a signaling NaN
  56          */
  57         xy.d = (double)x * y;
  58         zz.d = (double)z;
  59 
  60         /*
  61          * if the sum xy + z will be exact, just compute it and cast the
  62          * result to float
  63          */
  64         exy = (xy.i[0] >> 20) & 0x7ff;
  65         ez = (zz.i[0] >> 20) & 0x7ff;
  66 
  67         if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 ||
  68             ez == 0x7ff || ez == 0)
  69                 return ((float)(xy.d + zz.d));

  70 
  71         /*
  72          * collapse the tail of the smaller summand into a "sticky bit"
  73          * so that the sum can be computed without error
  74          */
  75         if (ez > exy) {
  76                 if (ez - exy < 31) {
  77                         u = xy.i[1];
  78                         s = 2 << (ez - exy);
  79 
  80                         if (u & (s - 1))
  81                                 u |= s;
  82 
  83                         xy.i[1] = u & ~(s - 1);
  84                 } else if (ez - exy < 51) {
  85                         u = xy.i[0];
  86                         s = 1 << (ez - exy - 31);
  87 
  88                         if ((u & (s - 1)) | xy.i[1])
  89                                 u |= s;
  90 
  91                         xy.i[0] = u & ~(s - 1);
  92                         xy.i[1] = 0;
  93                 } else {
  94                         /* collapse all of xy into a single bit */
  95                         xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20);
  96                         xy.i[1] = 0;
  97                 }
  98         } else {
  99                 if (exy - ez < 31) {
 100                         u = zz.i[1];
 101                         s = 2 << (exy - ez);
 102 
 103                         if (u & (s - 1))
 104                                 u |= s;
 105 
 106                         zz.i[1] = u & ~(s - 1);
 107                 } else if (exy - ez < 51) {
 108                         u = zz.i[0];
 109                         s = 1 << (exy - ez - 31);
 110 
 111                         if ((u & (s - 1)) | zz.i[1])
 112                                 u |= s;
 113 
 114                         zz.i[0] = u & ~(s - 1);
 115                         zz.i[1] = 0;
 116                 } else {
 117                         /* collapse all of zz into a single bit */
 118                         zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20);
 119                         zz.i[1] = 0;
 120                 }
 121         }
 122 
 123         return ((float)(xy.d + zz.d));
 124 }

 125 #elif defined(__x86)

 126 #if defined(__amd64)
 127 #define NI              4
 128 #else
 129 #define NI              3
 130 #endif
 131 
 132 /*
 133  * fmaf for x86: 32-bit single precision, little-endian
 134  */
 135 float
 136 __fmaf(float x, float y, float z)
 137 {
 138         union {
 139                 unsigned i[NI];
 140                 long double e;
 141         } xy, zz;
 142 
 143         unsigned u, s, cwsw, oldcwsw;
 144         int exy, ez;
 145 
 146         /* set rounding precision to 64 bits */
 147         __fenv_getcwsw(&oldcwsw);
 148         cwsw = (oldcwsw & 0xfcffffff) | 0x03000000;
 149         __fenv_setcwsw(&cwsw);
 150 
 151         /*
 152          * the following operations can only raise the invalid exception,
 153          * and then only if either x*y is of the form Inf*0 or one of x,
 154          * y, or z is a signaling NaN
 155          */
 156         xy.e = (long double)x * y;
 157         zz.e = (long double)z;
 158 
 159         /*
 160          * if the sum xy + z will be exact, just compute it and cast the
 161          * result to float
 162          */
 163         exy = xy.i[2] & 0x7fff;
 164         ez = zz.i[2] & 0x7fff;
 165 
 166         if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 ||
 167             ez == 0x7fff || ez == 0)
 168                 goto cont;

 169 
 170         /*
 171          * collapse the tail of the smaller summand into a "sticky bit"
 172          * so that the sum can be computed without error
 173          */
 174         if (ez > exy) {
 175                 if (ez - exy < 31) {
 176                         u = xy.i[0];
 177                         s = 2 << (ez - exy);
 178 
 179                         if (u & (s - 1))
 180                                 u |= s;
 181 
 182                         xy.i[0] = u & ~(s - 1);
 183                 } else if (ez - exy < 62) {
 184                         u = xy.i[1];
 185                         s = 1 << (ez - exy - 31);
 186 
 187                         if ((u & (s - 1)) | xy.i[0])
 188                                 u |= s;
 189 
 190                         xy.i[1] = u & ~(s - 1);
 191                         xy.i[0] = 0;
 192                 } else {
 193                         /* collapse all of xy into a single bit */
 194                         xy.i[0] = 0;
 195                         xy.i[1] = 0x80000000;
 196                         xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62);
 197                 }
 198         } else {
 199                 if (exy - ez < 62) {
 200                         u = zz.i[1];
 201                         s = 1 << (exy - ez - 31);
 202 
 203                         if ((u & (s - 1)) | zz.i[0])
 204                                 u |= s;
 205 
 206                         zz.i[1] = u & ~(s - 1);
 207                         zz.i[0] = 0;
 208                 } else {
 209                         /* collapse all of zz into a single bit */
 210                         zz.i[0] = 0;
 211                         zz.i[1] = 0x80000000;
 212                         zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62);
 213                 }
 214         }
 215 
 216 cont:
 217         xy.e += zz.e;
 218 
 219         /* restore the rounding precision */
 220         __fenv_getcwsw(&cwsw);
 221         cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000);
 222         __fenv_setcwsw(&cwsw);
 223 
 224         return ((float)xy.e);
 225 }
 226 
 227 #if 0
 228 /*
 229  * another fmaf for x86: assumes return value will be left in
 230  * long double (80-bit double extended) precision
 231  */
 232 long double
 233 __fmaf(float x, float y, float z)
 234 {
 235         /*
 236          * Note: This implementation assumes the rounding precision mode
 237          * is set to the default, rounding to 64 bit precision.  If this
 238          * routine must work in non-default rounding precision modes, do
 239          * the following instead:
 240          *
 241          *   long double t;
 242          *
 243          *   <set rp mode to round to 64 bit precision>
 244          *   t = x * y;
 245          *   <restore rp mode>
 246          *   return t + z;
 247          *
 248          * Note that the code to change rounding precision must not alter
 249          * the exception masks or flags, since the product x * y may raise
 250          * an invalid operation exception.
 251          */
 252         return ((long double)x * y + z);
 253 }
 254 #endif

 255 #else
 256 #error Unknown architecture
 257 #endif