1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #if defined(ELFOBJ) 31 #pragma weak fmaf = __fmaf 32 #endif 33 34 #include "libm.h" 35 #include "fma.h" 36 #include "fenv_inlines.h" 37 38 #if defined(__sparc) 39 40 /* 41 * fmaf for SPARC: 32-bit single precision, big-endian 42 */ 43 float 44 __fmaf(float x, float y, float z) { 45 union { 46 unsigned i[2]; 47 double d; 48 } xy, zz; 49 unsigned u, s; 50 int exy, ez; 51 52 /* 53 * the following operations can only raise the invalid exception, 54 * and then only if either x*y is of the form Inf*0 or one of x, 55 * y, or z is a signaling NaN 56 */ 57 xy.d = (double) x * y; 58 zz.d = (double) z; 59 60 /* 61 * if the sum xy + z will be exact, just compute it and cast the 62 * result to float 63 */ 64 exy = (xy.i[0] >> 20) & 0x7ff; 65 ez = (zz.i[0] >> 20) & 0x7ff; 66 if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 || 67 ez == 0x7ff || ez == 0) { 68 return ((float) (xy.d + zz.d)); 69 } 70 71 /* 72 * collapse the tail of the smaller summand into a "sticky bit" 73 * so that the sum can be computed without error 74 */ 75 if (ez > exy) { 76 if (ez - exy < 31) { 77 u = xy.i[1]; 78 s = 2 << (ez - exy); 79 if (u & (s - 1)) 80 u |= s; 81 xy.i[1] = u & ~(s - 1); 82 } else if (ez - exy < 51) { 83 u = xy.i[0]; 84 s = 1 << (ez - exy - 31); 85 if ((u & (s - 1)) | xy.i[1]) 86 u |= s; 87 xy.i[0] = u & ~(s - 1); 88 xy.i[1] = 0; 89 } else { 90 /* collapse all of xy into a single bit */ 91 xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20); 92 xy.i[1] = 0; 93 } 94 } else { 95 if (exy - ez < 31) { 96 u = zz.i[1]; 97 s = 2 << (exy - ez); 98 if (u & (s - 1)) 99 u |= s; 100 zz.i[1] = u & ~(s - 1); 101 } else if (exy - ez < 51) { 102 u = zz.i[0]; 103 s = 1 << (exy - ez - 31); 104 if ((u & (s - 1)) | zz.i[1]) 105 u |= s; 106 zz.i[0] = u & ~(s - 1); 107 zz.i[1] = 0; 108 } else { 109 /* collapse all of zz into a single bit */ 110 zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20); 111 zz.i[1] = 0; 112 } 113 } 114 115 return ((float) (xy.d + zz.d)); 116 } 117 118 #elif defined(__x86) 119 120 #if defined(__amd64) 121 #define NI 4 122 #else 123 #define NI 3 124 #endif 125 126 /* 127 * fmaf for x86: 32-bit single precision, little-endian 128 */ 129 float 130 __fmaf(float x, float y, float z) { 131 union { 132 unsigned i[NI]; 133 long double e; 134 } xy, zz; 135 unsigned u, s, cwsw, oldcwsw; 136 int exy, ez; 137 138 /* set rounding precision to 64 bits */ 139 __fenv_getcwsw(&oldcwsw); 140 cwsw = (oldcwsw & 0xfcffffff) | 0x03000000; 141 __fenv_setcwsw(&cwsw); 142 143 /* 144 * the following operations can only raise the invalid exception, 145 * and then only if either x*y is of the form Inf*0 or one of x, 146 * y, or z is a signaling NaN 147 */ 148 xy.e = (long double) x * y; 149 zz.e = (long double) z; 150 151 /* 152 * if the sum xy + z will be exact, just compute it and cast the 153 * result to float 154 */ 155 exy = xy.i[2] & 0x7fff; 156 ez = zz.i[2] & 0x7fff; 157 if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 || 158 ez == 0x7fff || ez == 0) { 159 goto cont; 160 } 161 162 /* 163 * collapse the tail of the smaller summand into a "sticky bit" 164 * so that the sum can be computed without error 165 */ 166 if (ez > exy) { 167 if (ez - exy < 31) { 168 u = xy.i[0]; 169 s = 2 << (ez - exy); 170 if (u & (s - 1)) 171 u |= s; 172 xy.i[0] = u & ~(s - 1); 173 } else if (ez - exy < 62) { 174 u = xy.i[1]; 175 s = 1 << (ez - exy - 31); 176 if ((u & (s - 1)) | xy.i[0]) 177 u |= s; 178 xy.i[1] = u & ~(s - 1); 179 xy.i[0] = 0; 180 } else { 181 /* collapse all of xy into a single bit */ 182 xy.i[0] = 0; 183 xy.i[1] = 0x80000000; 184 xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62); 185 } 186 } else { 187 if (exy - ez < 62) { 188 u = zz.i[1]; 189 s = 1 << (exy - ez - 31); 190 if ((u & (s - 1)) | zz.i[0]) 191 u |= s; 192 zz.i[1] = u & ~(s - 1); 193 zz.i[0] = 0; 194 } else { 195 /* collapse all of zz into a single bit */ 196 zz.i[0] = 0; 197 zz.i[1] = 0x80000000; 198 zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62); 199 } 200 } 201 202 cont: 203 xy.e += zz.e; 204 205 /* restore the rounding precision */ 206 __fenv_getcwsw(&cwsw); 207 cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000); 208 __fenv_setcwsw(&cwsw); 209 210 return ((float) xy.e); 211 } 212 213 #if 0 214 /* 215 * another fmaf for x86: assumes return value will be left in 216 * long double (80-bit double extended) precision 217 */ 218 long double 219 __fmaf(float x, float y, float z) { 220 /* 221 * Note: This implementation assumes the rounding precision mode 222 * is set to the default, rounding to 64 bit precision. If this 223 * routine must work in non-default rounding precision modes, do 224 * the following instead: 225 * 226 * long double t; 227 * 228 * <set rp mode to round to 64 bit precision> 229 * t = x * y; 230 * <restore rp mode> 231 * return t + z; 232 * 233 * Note that the code to change rounding precision must not alter 234 * the exception masks or flags, since the product x * y may raise 235 * an invalid operation exception. 236 */ 237 return ((long double) x * y + z); 238 } 239 #endif 240 241 #else 242 #error Unknown architecture 243 #endif