1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 26 /* 27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 #pragma weak fma = __fma 32 33 #include "libm.h" 34 #include "fma.h" 35 #include "fenv_inlines.h" 36 37 #if defined(__sparc) 38 static const union { 39 unsigned i[2]; 40 double d; 41 } C[] = { 42 { 0x3fe00000u, 0 }, 43 { 0x40000000u, 0 }, 44 { 0x43300000u, 0 }, 45 { 0x41a00000u, 0 }, 46 { 0x3e500000u, 0 }, 47 { 0x3df00000u, 0 }, 48 { 0x3bf00000u, 0 }, 49 { 0x7fe00000u, 0 }, 50 { 0x00100000u, 0 }, 51 { 0x00100001u, 0 } 52 }; 53 54 #define half C[0].d 55 #define two C[1].d 56 #define two52 C[2].d 57 #define two27 C[3].d 58 #define twom26 C[4].d 59 #define twom32 C[5].d 60 #define twom64 C[6].d 61 #define huge C[7].d 62 #define tiny C[8].d 63 #define tiny2 C[9].d 64 65 static const unsigned int fsr_rm = 0xc0000000u; 66 67 /* 68 * fma for SPARC: 64-bit double precision, big-endian 69 */ 70 double 71 __fma(double x, double y, double z) 72 { 73 union { 74 unsigned i[2]; 75 double d; 76 } xx, yy, zz; 77 78 double xhi, yhi, xlo, ylo, t; 79 unsigned int xy0, xy1, xy2, xy3, z0, z1, z2, z3, fsr, rm, sticky; 80 int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit; 81 volatile double dummy; 82 83 /* extract the high order words of the arguments */ 84 xx.d = x; 85 yy.d = y; 86 zz.d = z; 87 hx = xx.i[0] & ~0x80000000; 88 hy = yy.i[0] & ~0x80000000; 89 hz = zz.i[0] & ~0x80000000; 90 91 /* dispense with inf, nan, and zero cases */ 92 if (hx >= 0x7ff00000 || hy >= 0x7ff00000 || (hx | xx.i[1]) == 0 || (hy | 93 yy.i[1]) == 0) /* x or y is inf, nan, or zero */ 94 return (x * y + z); 95 96 if (hz >= 0x7ff00000) /* z is inf or nan */ 97 return (x + z); /* avoid spurious under/overflow in x * y */ 98 99 if ((hz | zz.i[1]) == 0) /* z is zero */ 100 /* 101 * x * y isn't zero but could underflow to zero, 102 * so don't add z, lest we perturb the sign 103 */ 104 return (x * y); 105 106 /* 107 * now x, y, and z are all finite and nonzero; save the fsr and 108 * set round-to-negative-infinity mode (and clear nonstandard 109 * mode before we try to scale subnormal operands) 110 */ 111 __fenv_getfsr32(&fsr); 112 __fenv_setfsr32(&fsr_rm); 113 114 /* extract signs and exponents, and normalize subnormals */ 115 sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000; 116 sz = zz.i[0] & 0x80000000; 117 ex = hx >> 20; 118 119 if (!ex) { 120 xx.d = x * two52; 121 ex = ((xx.i[0] & ~0x80000000) >> 20) - 52; 122 } 123 124 ey = hy >> 20; 125 126 if (!ey) { 127 yy.d = y * two52; 128 ey = ((yy.i[0] & ~0x80000000) >> 20) - 52; 129 } 130 131 ez = hz >> 20; 132 133 if (!ez) { 134 zz.d = z * two52; 135 ez = ((zz.i[0] & ~0x80000000) >> 20) - 52; 136 } 137 138 /* multiply x*y to 106 bits */ 139 exy = ex + ey - 0x3ff; 140 xx.i[0] = (xx.i[0] & 0xfffff) | 0x3ff00000; 141 yy.i[0] = (yy.i[0] & 0xfffff) | 0x3ff00000; 142 x = xx.d; 143 y = yy.d; 144 xhi = ((x + twom26) + two27) - two27; 145 yhi = ((y + twom26) + two27) - two27; 146 xlo = x - xhi; 147 ylo = y - yhi; 148 x *= y; 149 y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo; 150 151 if (x >= two) { 152 x *= half; 153 y *= half; 154 exy++; 155 } 156 157 /* extract the significands */ 158 xx.d = x; 159 xy0 = (xx.i[0] & 0xfffff) | 0x100000; 160 xy1 = xx.i[1]; 161 yy.d = t = y + twom32; 162 xy2 = yy.i[1]; 163 yy.d = (y - (t - twom32)) + twom64; 164 xy3 = yy.i[1]; 165 z0 = (zz.i[0] & 0xfffff) | 0x100000; 166 z1 = zz.i[1]; 167 z2 = z3 = 0; 168 169 /* 170 * now x*y is represented by sxy, exy, and xy[0-3], and z is 171 * represented likewise; swap if need be so |xy| <= |z| 172 */ 173 if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 || 174 (xy1 == z1 && (xy2 | xy3) != 0)))))) { 175 e = sxy; 176 sxy = sz; 177 sz = e; 178 e = exy; 179 exy = ez; 180 ez = e; 181 e = xy0; 182 xy0 = z0; 183 z0 = e; 184 e = xy1; 185 xy1 = z1; 186 z1 = e; 187 z2 = xy2; 188 xy2 = 0; 189 z3 = xy3; 190 xy3 = 0; 191 } 192 193 /* shift the significand of xy keeping a sticky bit */ 194 e = ez - exy; 195 196 if (e > 116) { 197 xy0 = xy1 = xy2 = 0; 198 xy3 = 1; 199 } else if (e >= 96) { 200 sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (127 - e)); 201 xy3 = xy0 >> (e - 96); 202 203 if (sticky) 204 xy3 |= 1; 205 206 xy0 = xy1 = xy2 = 0; 207 } else if (e >= 64) { 208 sticky = xy3 | xy2 | ((xy1 << 1) << (95 - e)); 209 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e)); 210 211 if (sticky) 212 xy3 |= 1; 213 214 xy2 = xy0 >> (e - 64); 215 xy0 = xy1 = 0; 216 } else if (e >= 32) { 217 sticky = xy3 | ((xy2 << 1) << (63 - e)); 218 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e)); 219 220 if (sticky) 221 xy3 |= 1; 222 223 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e)); 224 xy1 = xy0 >> (e - 32); 225 xy0 = 0; 226 } else if (e) { 227 sticky = (xy3 << 1) << (31 - e); 228 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e)); 229 230 if (sticky) 231 xy3 |= 1; 232 233 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e)); 234 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e)); 235 xy0 >>= e; 236 } 237 238 /* if this is a magnitude subtract, negate the significand of xy */ 239 if (sxy ^ sz) { 240 xy0 = ~xy0; 241 xy1 = ~xy1; 242 xy2 = ~xy2; 243 xy3 = -xy3; 244 245 if (xy3 == 0) 246 if (++xy2 == 0) 247 if (++xy1 == 0) 248 xy0++; 249 } 250 251 /* add, propagating carries */ 252 z3 += xy3; 253 e = (z3 < xy3); 254 z2 += xy2; 255 256 if (e) { 257 z2++; 258 e = (z2 <= xy2); 259 } else { 260 e = (z2 < xy2); 261 } 262 263 z1 += xy1; 264 265 if (e) { 266 z1++; 267 e = (z1 <= xy1); 268 } else { 269 e = (z1 < xy1); 270 } 271 272 z0 += xy0; 273 274 if (e) 275 z0++; 276 277 /* postnormalize and collect rounding information into z2 */ 278 if (ez < 1) { 279 /* result is tiny; shift right until exponent is within range */ 280 e = 1 - ez; 281 282 if (e > 56) { 283 z2 = 1; /* result can't be exactly zero */ 284 z0 = z1 = 0; 285 } else if (e >= 32) { 286 sticky = z3 | z2 | ((z1 << 1) << (63 - e)); 287 z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e)); 288 289 if (sticky) 290 z2 |= 1; 291 292 z1 = z0 >> (e - 32); 293 z0 = 0; 294 } else { 295 sticky = z3 | (z2 << 1) << (31 - e); 296 z2 = (z2 >> e) | ((z1 << 1) << (31 - e)); 297 298 if (sticky) 299 z2 |= 1; 300 301 z1 = (z1 >> e) | ((z0 << 1) << (31 - e)); 302 z0 >>= e; 303 } 304 305 ez = 1; 306 } else if (z0 >= 0x200000) { 307 /* carry out; shift right by one */ 308 sticky = (z2 & 1) | z3; 309 z2 = (z2 >> 1) | (z1 << 31); 310 311 if (sticky) 312 z2 |= 1; 313 314 z1 = (z1 >> 1) | (z0 << 31); 315 z0 >>= 1; 316 ez++; 317 } else { 318 if (z0 < 0x100000 && (z0 | z1 | z2 | z3) != 0) { 319 /* 320 * borrow/cancellation; shift left as much as 321 * exponent allows 322 */ 323 while (!(z0 | (z1 & 0xffe00000)) && ez >= 33) { 324 z0 = z1; 325 z1 = z2; 326 z2 = z3; 327 z3 = 0; 328 ez -= 32; 329 } 330 331 while (z0 < 0x100000 && ez > 1) { 332 z0 = (z0 << 1) | (z1 >> 31); 333 z1 = (z1 << 1) | (z2 >> 31); 334 z2 = (z2 << 1) | (z3 >> 31); 335 z3 <<= 1; 336 ez--; 337 } 338 } 339 340 if (z3) 341 z2 |= 1; 342 } 343 344 /* get the rounding mode and clear current exceptions */ 345 rm = fsr >> 30; 346 fsr &= ~FSR_CEXC; 347 348 /* strip off the integer bit, if there is one */ 349 ibit = z0 & 0x100000; 350 351 if (ibit) { 352 z0 -= 0x100000; 353 } else { 354 ez = 0; 355 356 if (!(z0 | z1 | z2)) { /* exact zero */ 357 zz.i[0] = rm == FSR_RM ? 0x80000000 : 0; 358 zz.i[1] = 0; 359 __fenv_setfsr32(&fsr); 360 return (zz.d); 361 } 362 } 363 364 /* 365 * flip the sense of directed roundings if the result is negative; 366 * the logic below applies to a positive result 367 */ 368 if (sz) 369 rm ^= rm >> 1; 370 371 /* round and raise exceptions */ 372 if (z2) { 373 fsr |= FSR_NXC; 374 375 /* decide whether to round the fraction up */ 376 if (rm == FSR_RP || (rm == FSR_RN && (z2 > 0x80000000u || (z2 == 377 0x80000000u && (z1 & 1))))) { 378 /* round up and renormalize if necessary */ 379 if (++z1 == 0) { 380 if (++z0 == 0x100000) { 381 z0 = 0; 382 ez++; 383 } 384 } 385 } 386 } 387 388 /* check for under/overflow */ 389 if (ez >= 0x7ff) { 390 if (rm == FSR_RN || rm == FSR_RP) { 391 zz.i[0] = sz | 0x7ff00000; 392 zz.i[1] = 0; 393 } else { 394 zz.i[0] = sz | 0x7fefffff; 395 zz.i[1] = 0xffffffff; 396 } 397 398 fsr |= FSR_OFC | FSR_NXC; 399 } else { 400 zz.i[0] = sz | (ez << 20) | z0; 401 zz.i[1] = z1; 402 403 /* 404 * !ibit => exact result was tiny before rounding, 405 * z2 nonzero => result delivered is inexact 406 */ 407 if (!ibit) { 408 if (z2) 409 fsr |= FSR_UFC | FSR_NXC; 410 else if (fsr & FSR_UFM) 411 fsr |= FSR_UFC; 412 } 413 } 414 415 /* restore the fsr and emulate exceptions as needed */ 416 if ((fsr & FSR_CEXC) & (fsr >> 23)) { 417 __fenv_setfsr32(&fsr); 418 419 if (fsr & FSR_OFC) { 420 dummy = huge; 421 dummy *= huge; 422 } else if (fsr & FSR_UFC) { 423 dummy = tiny; 424 425 if (fsr & FSR_NXC) 426 dummy *= tiny; 427 else 428 dummy -= tiny2; 429 } else { 430 dummy = huge; 431 dummy += tiny; 432 } 433 } else { 434 fsr |= (fsr & 0x1f) << 5; 435 __fenv_setfsr32(&fsr); 436 } 437 438 return (zz.d); 439 } 440 #elif defined(__x86) 441 #if defined(__amd64) 442 #define NI 4 443 #else 444 #define NI 3 445 #endif 446 447 /* 448 * fma for x86: 64-bit double precision, little-endian 449 */ 450 double 451 __fma(double x, double y, double z) 452 { 453 union { 454 unsigned i[NI]; 455 long double e; 456 } xx, yy, zz; 457 458 long double xe, ye, xhi, xlo, yhi, ylo; 459 int ex, ey, ez; 460 unsigned cwsw, oldcwsw, rm; 461 462 /* convert the operands to double extended */ 463 xx.e = (long double)x; 464 yy.e = (long double)y; 465 zz.e = (long double)z; 466 467 /* extract the exponents of the arguments */ 468 ex = xx.i[2] & 0x7fff; 469 ey = yy.i[2] & 0x7fff; 470 ez = zz.i[2] & 0x7fff; 471 472 /* dispense with inf, nan, and zero cases */ 473 if (ex == 0x7fff || ey == 0x7fff || ex == 0 || ey == 0) 474 /* x or y is inf, nan, or zero */ 475 return ((double)(xx.e * yy.e + zz.e)); 476 477 if (ez >= 0x7fff) /* z is inf or nan */ 478 return ((double)(xx.e + zz.e)); 479 480 /* avoid spurious inexact in x * y */ 481 482 /* 483 * save the control and status words, mask all exceptions, and 484 * set rounding to 64-bit precision and to-nearest 485 */ 486 __fenv_getcwsw(&oldcwsw); 487 cwsw = (oldcwsw & 0xf0c0ffff) | 0x033f0000; 488 __fenv_setcwsw(&cwsw); 489 490 /* multiply x*y to 106 bits */ 491 xe = xx.e; 492 xx.i[0] = 0; 493 xhi = xx.e; /* hi 32 bits */ 494 xlo = xe - xhi; /* lo 21 bits */ 495 ye = yy.e; 496 yy.i[0] = 0; 497 yhi = yy.e; 498 ylo = ye - yhi; 499 xe = xe * ye; 500 ye = ((xhi * yhi - xe) + xhi * ylo + xlo * yhi) + xlo * ylo; 501 502 /* distill the sum of xe, ye, and z */ 503 xhi = ye + zz.e; 504 yhi = xhi - ye; 505 xlo = (zz.e - yhi) + (ye - (xhi - yhi)); 506 /* now (xhi,xlo) = ye + z */ 507 508 yhi = xe + xhi; 509 ye = yhi - xe; 510 ylo = (xhi - ye) + (xe - (yhi - ye)); /* now (yhi,ylo) = xe + xhi */ 511 512 xhi = xlo + ylo; 513 xe = xhi - xlo; 514 xlo = (ylo - xe) + (xlo - (xhi - xe)); /* now (xhi,xlo) = xlo + ylo */ 515 516 yy.e = yhi + xhi; 517 ylo = (yhi - yy.e) + xhi; /* now (yy.e,ylo) = xhi + yhi */ 518 519 if (yy.i[1] != 0) { /* yy.e is nonzero */ 520 /* perturb yy.e if its least significant 10 bits are zero */ 521 if (!(yy.i[0] & 0x3ff)) { 522 xx.e = ylo + xlo; 523 524 if (xx.i[1] != 0) { 525 xx.i[2] = (xx.i[2] & 0x8000) | ((yy.i[2] & 526 0x7fff) - 63); 527 xx.i[1] = 0x80000000; 528 xx.i[0] = 0; 529 yy.e += xx.e; 530 } 531 } 532 } else { 533 /* set sign of zero result according to rounding direction */ 534 rm = oldcwsw & 0x0c000000; 535 yy.i[2] = ((rm == FCW_RM) ? 0x8000 : 0); 536 } 537 538 /* 539 * restore the control and status words and convert the result 540 * to double 541 */ 542 __fenv_setcwsw(&oldcwsw); 543 return ((double)yy.e); 544 } 545 #else 546 #error Unknown architecture 547 #endif