1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 26 /* 27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 #pragma weak fmaf = __fmaf 32 33 #include "libm.h" 34 #include "fma.h" 35 #include "fenv_inlines.h" 36 37 #if defined(__sparc) 38 /* 39 * fmaf for SPARC: 32-bit single precision, big-endian 40 */ 41 float 42 __fmaf(float x, float y, float z) 43 { 44 union { 45 unsigned i[2]; 46 double d; 47 } xy, zz; 48 49 unsigned u, s; 50 int exy, ez; 51 52 /* 53 * the following operations can only raise the invalid exception, 54 * and then only if either x*y is of the form Inf*0 or one of x, 55 * y, or z is a signaling NaN 56 */ 57 xy.d = (double)x * y; 58 zz.d = (double)z; 59 60 /* 61 * if the sum xy + z will be exact, just compute it and cast the 62 * result to float 63 */ 64 exy = (xy.i[0] >> 20) & 0x7ff; 65 ez = (zz.i[0] >> 20) & 0x7ff; 66 67 if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 || 68 ez == 0x7ff || ez == 0) 69 return ((float)(xy.d + zz.d)); 70 71 /* 72 * collapse the tail of the smaller summand into a "sticky bit" 73 * so that the sum can be computed without error 74 */ 75 if (ez > exy) { 76 if (ez - exy < 31) { 77 u = xy.i[1]; 78 s = 2 << (ez - exy); 79 80 if (u & (s - 1)) 81 u |= s; 82 83 xy.i[1] = u & ~(s - 1); 84 } else if (ez - exy < 51) { 85 u = xy.i[0]; 86 s = 1 << (ez - exy - 31); 87 88 if ((u & (s - 1)) | xy.i[1]) 89 u |= s; 90 91 xy.i[0] = u & ~(s - 1); 92 xy.i[1] = 0; 93 } else { 94 /* collapse all of xy into a single bit */ 95 xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20); 96 xy.i[1] = 0; 97 } 98 } else { 99 if (exy - ez < 31) { 100 u = zz.i[1]; 101 s = 2 << (exy - ez); 102 103 if (u & (s - 1)) 104 u |= s; 105 106 zz.i[1] = u & ~(s - 1); 107 } else if (exy - ez < 51) { 108 u = zz.i[0]; 109 s = 1 << (exy - ez - 31); 110 111 if ((u & (s - 1)) | zz.i[1]) 112 u |= s; 113 114 zz.i[0] = u & ~(s - 1); 115 zz.i[1] = 0; 116 } else { 117 /* collapse all of zz into a single bit */ 118 zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20); 119 zz.i[1] = 0; 120 } 121 } 122 123 return ((float)(xy.d + zz.d)); 124 } 125 #elif defined(__x86) 126 #if defined(__amd64) 127 #define NI 4 128 #else 129 #define NI 3 130 #endif 131 132 /* 133 * fmaf for x86: 32-bit single precision, little-endian 134 */ 135 float 136 __fmaf(float x, float y, float z) 137 { 138 union { 139 unsigned i[NI]; 140 long double e; 141 } xy, zz; 142 143 unsigned u, s, cwsw, oldcwsw; 144 int exy, ez; 145 146 /* set rounding precision to 64 bits */ 147 __fenv_getcwsw(&oldcwsw); 148 cwsw = (oldcwsw & 0xfcffffff) | 0x03000000; 149 __fenv_setcwsw(&cwsw); 150 151 /* 152 * the following operations can only raise the invalid exception, 153 * and then only if either x*y is of the form Inf*0 or one of x, 154 * y, or z is a signaling NaN 155 */ 156 xy.e = (long double)x * y; 157 zz.e = (long double)z; 158 159 /* 160 * if the sum xy + z will be exact, just compute it and cast the 161 * result to float 162 */ 163 exy = xy.i[2] & 0x7fff; 164 ez = zz.i[2] & 0x7fff; 165 166 if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 || 167 ez == 0x7fff || ez == 0) 168 goto cont; 169 170 /* 171 * collapse the tail of the smaller summand into a "sticky bit" 172 * so that the sum can be computed without error 173 */ 174 if (ez > exy) { 175 if (ez - exy < 31) { 176 u = xy.i[0]; 177 s = 2 << (ez - exy); 178 179 if (u & (s - 1)) 180 u |= s; 181 182 xy.i[0] = u & ~(s - 1); 183 } else if (ez - exy < 62) { 184 u = xy.i[1]; 185 s = 1 << (ez - exy - 31); 186 187 if ((u & (s - 1)) | xy.i[0]) 188 u |= s; 189 190 xy.i[1] = u & ~(s - 1); 191 xy.i[0] = 0; 192 } else { 193 /* collapse all of xy into a single bit */ 194 xy.i[0] = 0; 195 xy.i[1] = 0x80000000; 196 xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62); 197 } 198 } else { 199 if (exy - ez < 62) { 200 u = zz.i[1]; 201 s = 1 << (exy - ez - 31); 202 203 if ((u & (s - 1)) | zz.i[0]) 204 u |= s; 205 206 zz.i[1] = u & ~(s - 1); 207 zz.i[0] = 0; 208 } else { 209 /* collapse all of zz into a single bit */ 210 zz.i[0] = 0; 211 zz.i[1] = 0x80000000; 212 zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62); 213 } 214 } 215 216 cont: 217 xy.e += zz.e; 218 219 /* restore the rounding precision */ 220 __fenv_getcwsw(&cwsw); 221 cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000); 222 __fenv_setcwsw(&cwsw); 223 224 return ((float)xy.e); 225 } 226 227 #if 0 228 /* 229 * another fmaf for x86: assumes return value will be left in 230 * long double (80-bit double extended) precision 231 */ 232 long double 233 __fmaf(float x, float y, float z) 234 { 235 /* 236 * Note: This implementation assumes the rounding precision mode 237 * is set to the default, rounding to 64 bit precision. If this 238 * routine must work in non-default rounding precision modes, do 239 * the following instead: 240 * 241 * long double t; 242 * 243 * <set rp mode to round to 64 bit precision> 244 * t = x * y; 245 * <restore rp mode> 246 * return t + z; 247 * 248 * Note that the code to change rounding precision must not alter 249 * the exception masks or flags, since the product x * y may raise 250 * an invalid operation exception. 251 */ 252 return ((long double)x * y + z); 253 } 254 #endif 255 #else 256 #error Unknown architecture 257 #endif