1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #if defined(ELFOBJ) 31 #pragma weak fmal = __fmal 32 #endif 33 34 #include "libm.h" 35 #include "fma.h" 36 #include "fenv_inlines.h" 37 38 #if defined(__sparc) 39 40 static const union { 41 unsigned i[2]; 42 double d; 43 } C[] = { 44 { 0x3fe00000u, 0 }, 45 { 0x40000000u, 0 }, 46 { 0x3ef00000u, 0 }, 47 { 0x3e700000u, 0 }, 48 { 0x41300000u, 0 }, 49 { 0x3e300000u, 0 }, 50 { 0x3b300000u, 0 }, 51 { 0x38300000u, 0 }, 52 { 0x42300000u, 0 }, 53 { 0x3df00000u, 0 }, 54 { 0x7fe00000u, 0 }, 55 { 0x00100000u, 0 }, 56 { 0x00100001u, 0 }, 57 { 0, 0 }, 58 { 0x7ff00000u, 0 }, 59 { 0x7ff00001u, 0 } 60 }; 61 62 #define half C[0].d 63 #define two C[1].d 64 #define twom16 C[2].d 65 #define twom24 C[3].d 66 #define two20 C[4].d 67 #define twom28 C[5].d 68 #define twom76 C[6].d 69 #define twom124 C[7].d 70 #define two36 C[8].d 71 #define twom32 C[9].d 72 #define huge C[10].d 73 #define tiny C[11].d 74 #define tiny2 C[12].d 75 #define zero C[13].d 76 #define inf C[14].d 77 #define snan C[15].d 78 79 static const unsigned int fsr_rm = 0xc0000000u; 80 81 /* 82 * fmal for SPARC: 128-bit quad precision, big-endian 83 */ 84 long double 85 __fmal(long double x, long double y, long double z) { 86 union { 87 unsigned i[4]; 88 long double q; 89 } xx, yy, zz; 90 union { 91 unsigned i[2]; 92 double d; 93 } u; 94 double dx[5], dy[5], dxy[9], c, s; 95 unsigned xy0, xy1, xy2, xy3, xy4, xy5, xy6, xy7; 96 unsigned z0, z1, z2, z3, z4, z5, z6, z7; 97 unsigned rm, sticky; 98 unsigned int fsr; 99 int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit; 100 int cx, cy, cz; 101 volatile double dummy; 102 103 /* extract the high order words of the arguments */ 104 xx.q = x; 105 yy.q = y; 106 zz.q = z; 107 hx = xx.i[0] & ~0x80000000; 108 hy = yy.i[0] & ~0x80000000; 109 hz = zz.i[0] & ~0x80000000; 110 111 /* 112 * distinguish zero, finite nonzero, infinite, and quiet nan 113 * arguments; raise invalid and return for signaling nans 114 */ 115 if (hx >= 0x7fff0000) { 116 if ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3]) { 117 if (!(hx & 0x8000)) { 118 /* signaling nan, raise invalid */ 119 dummy = snan; 120 dummy += snan; 121 xx.i[0] |= 0x8000; 122 return (xx.q); 123 } 124 cx = 3; /* quiet nan */ 125 } else 126 cx = 2; /* inf */ 127 } else if (hx == 0) { 128 cx = (xx.i[1] | xx.i[2] | xx.i[3]) ? 1 : 0; 129 /* subnormal or zero */ 130 } else 131 cx = 1; /* finite nonzero */ 132 133 if (hy >= 0x7fff0000) { 134 if ((hy & 0xffff) | yy.i[1] | yy.i[2] | yy.i[3]) { 135 if (!(hy & 0x8000)) { 136 dummy = snan; 137 dummy += snan; 138 yy.i[0] |= 0x8000; 139 return (yy.q); 140 } 141 cy = 3; 142 } else 143 cy = 2; 144 } else if (hy == 0) { 145 cy = (yy.i[1] | yy.i[2] | yy.i[3]) ? 1 : 0; 146 } else 147 cy = 1; 148 149 if (hz >= 0x7fff0000) { 150 if ((hz & 0xffff) | zz.i[1] | zz.i[2] | zz.i[3]) { 151 if (!(hz & 0x8000)) { 152 dummy = snan; 153 dummy += snan; 154 zz.i[0] |= 0x8000; 155 return (zz.q); 156 } 157 cz = 3; 158 } else 159 cz = 2; 160 } else if (hz == 0) { 161 cz = (zz.i[1] | zz.i[2] | zz.i[3]) ? 1 : 0; 162 } else 163 cz = 1; 164 165 /* get the fsr and clear current exceptions */ 166 __fenv_getfsr32(&fsr); 167 fsr &= ~FSR_CEXC; 168 169 /* handle all other zero, inf, and nan cases */ 170 if (cx != 1 || cy != 1 || cz != 1) { 171 /* if x or y is a quiet nan, return it */ 172 if (cx == 3) { 173 __fenv_setfsr32(&fsr); 174 return (x); 175 } 176 if (cy == 3) { 177 __fenv_setfsr32(&fsr); 178 return (y); 179 } 180 181 /* if x*y is 0*inf, raise invalid and return the default nan */ 182 if ((cx == 0 && cy == 2) || (cx == 2 && cy == 0)) { 183 dummy = zero; 184 dummy *= inf; 185 zz.i[0] = 0x7fffffff; 186 zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff; 187 return (zz.q); 188 } 189 190 /* if z is a quiet nan, return it */ 191 if (cz == 3) { 192 __fenv_setfsr32(&fsr); 193 return (z); 194 } 195 196 /* 197 * now none of x, y, or z is nan; handle cases where x or y 198 * is inf 199 */ 200 if (cx == 2 || cy == 2) { 201 /* 202 * if z is also inf, either we have inf-inf or 203 * the result is the same as z depending on signs 204 */ 205 if (cz == 2) { 206 if ((int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < 0) { 207 dummy = inf; 208 dummy -= inf; 209 zz.i[0] = 0x7fffffff; 210 zz.i[1] = zz.i[2] = zz.i[3] = 211 0xffffffff; 212 return (zz.q); 213 } 214 __fenv_setfsr32(&fsr); 215 return (z); 216 } 217 218 /* otherwise the result is inf with appropriate sign */ 219 zz.i[0] = ((xx.i[0] ^ yy.i[0]) & 0x80000000) | 220 0x7fff0000; 221 zz.i[1] = zz.i[2] = zz.i[3] = 0; 222 __fenv_setfsr32(&fsr); 223 return (zz.q); 224 } 225 226 /* if z is inf, return it */ 227 if (cz == 2) { 228 __fenv_setfsr32(&fsr); 229 return (z); 230 } 231 232 /* 233 * now x, y, and z are all finite; handle cases where x or y 234 * is zero 235 */ 236 if (cx == 0 || cy == 0) { 237 /* either we have 0-0 or the result is the same as z */ 238 if (cz == 0 && (int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < 239 0) { 240 zz.i[0] = (fsr >> 30) == FSR_RM ? 0x80000000 : 241 0; 242 __fenv_setfsr32(&fsr); 243 return (zz.q); 244 } 245 __fenv_setfsr32(&fsr); 246 return (z); 247 } 248 249 /* if we get here, x and y are nonzero finite, z must be zero */ 250 return (x * y); 251 } 252 253 /* 254 * now x, y, and z are all finite and nonzero; set round-to- 255 * negative-infinity mode 256 */ 257 __fenv_setfsr32(&fsr_rm); 258 259 /* 260 * get the signs and exponents and normalize the significands 261 * of x and y 262 */ 263 sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000; 264 ex = hx >> 16; 265 hx &= 0xffff; 266 if (!ex) { 267 if (hx | (xx.i[1] & 0xfffe0000)) { 268 ex = 1; 269 } else if (xx.i[1] | (xx.i[2] & 0xfffe0000)) { 270 hx = xx.i[1]; 271 xx.i[1] = xx.i[2]; 272 xx.i[2] = xx.i[3]; 273 xx.i[3] = 0; 274 ex = -31; 275 } else if (xx.i[2] | (xx.i[3] & 0xfffe0000)) { 276 hx = xx.i[2]; 277 xx.i[1] = xx.i[3]; 278 xx.i[2] = xx.i[3] = 0; 279 ex = -63; 280 } else { 281 hx = xx.i[3]; 282 xx.i[1] = xx.i[2] = xx.i[3] = 0; 283 ex = -95; 284 } 285 while ((hx & 0x10000) == 0) { 286 hx = (hx << 1) | (xx.i[1] >> 31); 287 xx.i[1] = (xx.i[1] << 1) | (xx.i[2] >> 31); 288 xx.i[2] = (xx.i[2] << 1) | (xx.i[3] >> 31); 289 xx.i[3] <<= 1; 290 ex--; 291 } 292 } else 293 hx |= 0x10000; 294 ey = hy >> 16; 295 hy &= 0xffff; 296 if (!ey) { 297 if (hy | (yy.i[1] & 0xfffe0000)) { 298 ey = 1; 299 } else if (yy.i[1] | (yy.i[2] & 0xfffe0000)) { 300 hy = yy.i[1]; 301 yy.i[1] = yy.i[2]; 302 yy.i[2] = yy.i[3]; 303 yy.i[3] = 0; 304 ey = -31; 305 } else if (yy.i[2] | (yy.i[3] & 0xfffe0000)) { 306 hy = yy.i[2]; 307 yy.i[1] = yy.i[3]; 308 yy.i[2] = yy.i[3] = 0; 309 ey = -63; 310 } else { 311 hy = yy.i[3]; 312 yy.i[1] = yy.i[2] = yy.i[3] = 0; 313 ey = -95; 314 } 315 while ((hy & 0x10000) == 0) { 316 hy = (hy << 1) | (yy.i[1] >> 31); 317 yy.i[1] = (yy.i[1] << 1) | (yy.i[2] >> 31); 318 yy.i[2] = (yy.i[2] << 1) | (yy.i[3] >> 31); 319 yy.i[3] <<= 1; 320 ey--; 321 } 322 } else 323 hy |= 0x10000; 324 exy = ex + ey - 0x3fff; 325 326 /* convert the significands of x and y to doubles */ 327 c = twom16; 328 dx[0] = (double) ((int) hx) * c; 329 dy[0] = (double) ((int) hy) * c; 330 331 c *= twom24; 332 dx[1] = (double) ((int) (xx.i[1] >> 8)) * c; 333 dy[1] = (double) ((int) (yy.i[1] >> 8)) * c; 334 335 c *= twom24; 336 dx[2] = (double) ((int) (((xx.i[1] << 16) | (xx.i[2] >> 16)) & 337 0xffffff)) * c; 338 dy[2] = (double) ((int) (((yy.i[1] << 16) | (yy.i[2] >> 16)) & 339 0xffffff)) * c; 340 341 c *= twom24; 342 dx[3] = (double) ((int) (((xx.i[2] << 8) | (xx.i[3] >> 24)) & 343 0xffffff)) * c; 344 dy[3] = (double) ((int) (((yy.i[2] << 8) | (yy.i[3] >> 24)) & 345 0xffffff)) * c; 346 347 c *= twom24; 348 dx[4] = (double) ((int) (xx.i[3] & 0xffffff)) * c; 349 dy[4] = (double) ((int) (yy.i[3] & 0xffffff)) * c; 350 351 /* form the "digits" of the product */ 352 dxy[0] = dx[0] * dy[0]; 353 dxy[1] = dx[0] * dy[1] + dx[1] * dy[0]; 354 dxy[2] = dx[0] * dy[2] + dx[1] * dy[1] + dx[2] * dy[0]; 355 dxy[3] = dx[0] * dy[3] + dx[1] * dy[2] + dx[2] * dy[1] + 356 dx[3] * dy[0]; 357 dxy[4] = dx[0] * dy[4] + dx[1] * dy[3] + dx[2] * dy[2] + 358 dx[3] * dy[1] + dx[4] * dy[0]; 359 dxy[5] = dx[1] * dy[4] + dx[2] * dy[3] + dx[3] * dy[2] + 360 dx[4] * dy[1]; 361 dxy[6] = dx[2] * dy[4] + dx[3] * dy[3] + dx[4] * dy[2]; 362 dxy[7] = dx[3] * dy[4] + dx[4] * dy[3]; 363 dxy[8] = dx[4] * dy[4]; 364 365 /* split odd-numbered terms and combine into even-numbered terms */ 366 c = (dxy[1] + two20) - two20; 367 dxy[0] += c; 368 dxy[1] -= c; 369 c = (dxy[3] + twom28) - twom28; 370 dxy[2] += c + dxy[1]; 371 dxy[3] -= c; 372 c = (dxy[5] + twom76) - twom76; 373 dxy[4] += c + dxy[3]; 374 dxy[5] -= c; 375 c = (dxy[7] + twom124) - twom124; 376 dxy[6] += c + dxy[5]; 377 dxy[8] += (dxy[7] - c); 378 379 /* propagate carries, adjusting the exponent if need be */ 380 dxy[7] = dxy[6] + dxy[8]; 381 dxy[5] = dxy[4] + dxy[7]; 382 dxy[3] = dxy[2] + dxy[5]; 383 dxy[1] = dxy[0] + dxy[3]; 384 if (dxy[1] >= two) { 385 dxy[0] *= half; 386 dxy[1] *= half; 387 dxy[2] *= half; 388 dxy[3] *= half; 389 dxy[4] *= half; 390 dxy[5] *= half; 391 dxy[6] *= half; 392 dxy[7] *= half; 393 dxy[8] *= half; 394 exy++; 395 } 396 397 /* extract the significand of x*y */ 398 s = two36; 399 u.d = c = dxy[1] + s; 400 xy0 = u.i[1]; 401 c -= s; 402 dxy[1] -= c; 403 dxy[0] -= c; 404 405 s *= twom32; 406 u.d = c = dxy[1] + s; 407 xy1 = u.i[1]; 408 c -= s; 409 dxy[2] += (dxy[0] - c); 410 dxy[3] = dxy[2] + dxy[5]; 411 412 s *= twom32; 413 u.d = c = dxy[3] + s; 414 xy2 = u.i[1]; 415 c -= s; 416 dxy[4] += (dxy[2] - c); 417 dxy[5] = dxy[4] + dxy[7]; 418 419 s *= twom32; 420 u.d = c = dxy[5] + s; 421 xy3 = u.i[1]; 422 c -= s; 423 dxy[4] -= c; 424 dxy[5] = dxy[4] + dxy[7]; 425 426 s *= twom32; 427 u.d = c = dxy[5] + s; 428 xy4 = u.i[1]; 429 c -= s; 430 dxy[6] += (dxy[4] - c); 431 dxy[7] = dxy[6] + dxy[8]; 432 433 s *= twom32; 434 u.d = c = dxy[7] + s; 435 xy5 = u.i[1]; 436 c -= s; 437 dxy[8] += (dxy[6] - c); 438 439 s *= twom32; 440 u.d = c = dxy[8] + s; 441 xy6 = u.i[1]; 442 c -= s; 443 dxy[8] -= c; 444 445 s *= twom32; 446 u.d = c = dxy[8] + s; 447 xy7 = u.i[1]; 448 449 /* extract the sign, exponent, and significand of z */ 450 sz = zz.i[0] & 0x80000000; 451 ez = hz >> 16; 452 z0 = hz & 0xffff; 453 if (!ez) { 454 if (z0 | (zz.i[1] & 0xfffe0000)) { 455 z1 = zz.i[1]; 456 z2 = zz.i[2]; 457 z3 = zz.i[3]; 458 ez = 1; 459 } else if (zz.i[1] | (zz.i[2] & 0xfffe0000)) { 460 z0 = zz.i[1]; 461 z1 = zz.i[2]; 462 z2 = zz.i[3]; 463 z3 = 0; 464 ez = -31; 465 } else if (zz.i[2] | (zz.i[3] & 0xfffe0000)) { 466 z0 = zz.i[2]; 467 z1 = zz.i[3]; 468 z2 = z3 = 0; 469 ez = -63; 470 } else { 471 z0 = zz.i[3]; 472 z1 = z2 = z3 = 0; 473 ez = -95; 474 } 475 while ((z0 & 0x10000) == 0) { 476 z0 = (z0 << 1) | (z1 >> 31); 477 z1 = (z1 << 1) | (z2 >> 31); 478 z2 = (z2 << 1) | (z3 >> 31); 479 z3 <<= 1; 480 ez--; 481 } 482 } else { 483 z0 |= 0x10000; 484 z1 = zz.i[1]; 485 z2 = zz.i[2]; 486 z3 = zz.i[3]; 487 } 488 z4 = z5 = z6 = z7 = 0; 489 490 /* 491 * now x*y is represented by sxy, exy, and xy[0-7], and z is 492 * represented likewise; swap if need be so |xy| <= |z| 493 */ 494 if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 || 495 (xy1 == z1 && (xy2 > z2 || (xy2 == z2 && (xy3 > z3 || 496 (xy3 == z3 && (xy4 | xy5 | xy6 | xy7) != 0)))))))))) { 497 e = sxy; sxy = sz; sz = e; 498 e = exy; exy = ez; ez = e; 499 e = xy0; xy0 = z0; z0 = e; 500 e = xy1; xy1 = z1; z1 = e; 501 e = xy2; xy2 = z2; z2 = e; 502 e = xy3; xy3 = z3; z3 = e; 503 z4 = xy4; xy4 = 0; 504 z5 = xy5; xy5 = 0; 505 z6 = xy6; xy6 = 0; 506 z7 = xy7; xy7 = 0; 507 } 508 509 /* shift the significand of xy keeping a sticky bit */ 510 e = ez - exy; 511 if (e > 236) { 512 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0; 513 xy7 = 1; 514 } else if (e >= 224) { 515 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | xy1 | 516 ((xy0 << 1) << (255 - e)); 517 xy7 = xy0 >> (e - 224); 518 if (sticky) 519 xy7 |= 1; 520 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0; 521 } else if (e >= 192) { 522 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | 523 ((xy1 << 1) << (223 - e)); 524 xy7 = (xy1 >> (e - 192)) | ((xy0 << 1) << (223 - e)); 525 if (sticky) 526 xy7 |= 1; 527 xy6 = xy0 >> (e - 192); 528 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = 0; 529 } else if (e >= 160) { 530 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | 531 ((xy2 << 1) << (191 - e)); 532 xy7 = (xy2 >> (e - 160)) | ((xy1 << 1) << (191 - e)); 533 if (sticky) 534 xy7 |= 1; 535 xy6 = (xy1 >> (e - 160)) | ((xy0 << 1) << (191 - e)); 536 xy5 = xy0 >> (e - 160); 537 xy0 = xy1 = xy2 = xy3 = xy4 = 0; 538 } else if (e >= 128) { 539 sticky = xy7 | xy6 | xy5 | xy4 | ((xy3 << 1) << (159 - e)); 540 xy7 = (xy3 >> (e - 128)) | ((xy2 << 1) << (159 - e)); 541 if (sticky) 542 xy7 |= 1; 543 xy6 = (xy2 >> (e - 128)) | ((xy1 << 1) << (159 - e)); 544 xy5 = (xy1 >> (e - 128)) | ((xy0 << 1) << (159 - e)); 545 xy4 = xy0 >> (e - 128); 546 xy0 = xy1 = xy2 = xy3 = 0; 547 } else if (e >= 96) { 548 sticky = xy7 | xy6 | xy5 | ((xy4 << 1) << (127 - e)); 549 xy7 = (xy4 >> (e - 96)) | ((xy3 << 1) << (127 - e)); 550 if (sticky) 551 xy7 |= 1; 552 xy6 = (xy3 >> (e - 96)) | ((xy2 << 1) << (127 - e)); 553 xy5 = (xy2 >> (e - 96)) | ((xy1 << 1) << (127 - e)); 554 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e)); 555 xy3 = xy0 >> (e - 96); 556 xy0 = xy1 = xy2 = 0; 557 } else if (e >= 64) { 558 sticky = xy7 | xy6 | ((xy5 << 1) << (95 - e)); 559 xy7 = (xy5 >> (e - 64)) | ((xy4 << 1) << (95 - e)); 560 if (sticky) 561 xy7 |= 1; 562 xy6 = (xy4 >> (e - 64)) | ((xy3 << 1) << (95 - e)); 563 xy5 = (xy3 >> (e - 64)) | ((xy2 << 1) << (95 - e)); 564 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e)); 565 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e)); 566 xy2 = xy0 >> (e - 64); 567 xy0 = xy1 = 0; 568 } else if (e >= 32) { 569 sticky = xy7 | ((xy6 << 1) << (63 - e)); 570 xy7 = (xy6 >> (e - 32)) | ((xy5 << 1) << (63 - e)); 571 if (sticky) 572 xy7 |= 1; 573 xy6 = (xy5 >> (e - 32)) | ((xy4 << 1) << (63 - e)); 574 xy5 = (xy4 >> (e - 32)) | ((xy3 << 1) << (63 - e)); 575 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e)); 576 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e)); 577 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e)); 578 xy1 = xy0 >> (e - 32); 579 xy0 = 0; 580 } else if (e) { 581 sticky = (xy7 << 1) << (31 - e); 582 xy7 = (xy7 >> e) | ((xy6 << 1) << (31 - e)); 583 if (sticky) 584 xy7 |= 1; 585 xy6 = (xy6 >> e) | ((xy5 << 1) << (31 - e)); 586 xy5 = (xy5 >> e) | ((xy4 << 1) << (31 - e)); 587 xy4 = (xy4 >> e) | ((xy3 << 1) << (31 - e)); 588 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e)); 589 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e)); 590 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e)); 591 xy0 >>= e; 592 } 593 594 /* if this is a magnitude subtract, negate the significand of xy */ 595 if (sxy ^ sz) { 596 xy0 = ~xy0; 597 xy1 = ~xy1; 598 xy2 = ~xy2; 599 xy3 = ~xy3; 600 xy4 = ~xy4; 601 xy5 = ~xy5; 602 xy6 = ~xy6; 603 xy7 = -xy7; 604 if (xy7 == 0) 605 if (++xy6 == 0) 606 if (++xy5 == 0) 607 if (++xy4 == 0) 608 if (++xy3 == 0) 609 if (++xy2 == 0) 610 if (++xy1 == 0) 611 xy0++; 612 } 613 614 /* add, propagating carries */ 615 z7 += xy7; 616 e = (z7 < xy7); 617 z6 += xy6; 618 if (e) { 619 z6++; 620 e = (z6 <= xy6); 621 } else 622 e = (z6 < xy6); 623 z5 += xy5; 624 if (e) { 625 z5++; 626 e = (z5 <= xy5); 627 } else 628 e = (z5 < xy5); 629 z4 += xy4; 630 if (e) { 631 z4++; 632 e = (z4 <= xy4); 633 } else 634 e = (z4 < xy4); 635 z3 += xy3; 636 if (e) { 637 z3++; 638 e = (z3 <= xy3); 639 } else 640 e = (z3 < xy3); 641 z2 += xy2; 642 if (e) { 643 z2++; 644 e = (z2 <= xy2); 645 } else 646 e = (z2 < xy2); 647 z1 += xy1; 648 if (e) { 649 z1++; 650 e = (z1 <= xy1); 651 } else 652 e = (z1 < xy1); 653 z0 += xy0; 654 if (e) 655 z0++; 656 657 /* postnormalize and collect rounding information into z4 */ 658 if (ez < 1) { 659 /* result is tiny; shift right until exponent is within range */ 660 e = 1 - ez; 661 if (e > 116) { 662 z4 = 1; /* result can't be exactly zero */ 663 z0 = z1 = z2 = z3 = 0; 664 } else if (e >= 96) { 665 sticky = z7 | z6 | z5 | z4 | z3 | z2 | 666 ((z1 << 1) << (127 - e)); 667 z4 = (z1 >> (e - 96)) | ((z0 << 1) << (127 - e)); 668 if (sticky) 669 z4 |= 1; 670 z3 = z0 >> (e - 96); 671 z0 = z1 = z2 = 0; 672 } else if (e >= 64) { 673 sticky = z7 | z6 | z5 | z4 | z3 | 674 ((z2 << 1) << (95 - e)); 675 z4 = (z2 >> (e - 64)) | ((z1 << 1) << (95 - e)); 676 if (sticky) 677 z4 |= 1; 678 z3 = (z1 >> (e - 64)) | ((z0 << 1) << (95 - e)); 679 z2 = z0 >> (e - 64); 680 z0 = z1 = 0; 681 } else if (e >= 32) { 682 sticky = z7 | z6 | z5 | z4 | ((z3 << 1) << (63 - e)); 683 z4 = (z3 >> (e - 32)) | ((z2 << 1) << (63 - e)); 684 if (sticky) 685 z4 |= 1; 686 z3 = (z2 >> (e - 32)) | ((z1 << 1) << (63 - e)); 687 z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e)); 688 z1 = z0 >> (e - 32); 689 z0 = 0; 690 } else { 691 sticky = z7 | z6 | z5 | (z4 << 1) << (31 - e); 692 z4 = (z4 >> e) | ((z3 << 1) << (31 - e)); 693 if (sticky) 694 z4 |= 1; 695 z3 = (z3 >> e) | ((z2 << 1) << (31 - e)); 696 z2 = (z2 >> e) | ((z1 << 1) << (31 - e)); 697 z1 = (z1 >> e) | ((z0 << 1) << (31 - e)); 698 z0 >>= e; 699 } 700 ez = 1; 701 } else if (z0 >= 0x20000) { 702 /* carry out; shift right by one */ 703 sticky = (z4 & 1) | z5 | z6 | z7; 704 z4 = (z4 >> 1) | (z3 << 31); 705 if (sticky) 706 z4 |= 1; 707 z3 = (z3 >> 1) | (z2 << 31); 708 z2 = (z2 >> 1) | (z1 << 31); 709 z1 = (z1 >> 1) | (z0 << 31); 710 z0 >>= 1; 711 ez++; 712 } else { 713 if (z0 < 0x10000 && (z0 | z1 | z2 | z3 | z4 | z5 | z6 | z7) 714 != 0) { 715 /* 716 * borrow/cancellation; shift left as much as 717 * exponent allows 718 */ 719 while (!(z0 | (z1 & 0xfffe0000)) && ez >= 33) { 720 z0 = z1; 721 z1 = z2; 722 z2 = z3; 723 z3 = z4; 724 z4 = z5; 725 z5 = z6; 726 z6 = z7; 727 z7 = 0; 728 ez -= 32; 729 } 730 while (z0 < 0x10000 && ez > 1) { 731 z0 = (z0 << 1) | (z1 >> 31); 732 z1 = (z1 << 1) | (z2 >> 31); 733 z2 = (z2 << 1) | (z3 >> 31); 734 z3 = (z3 << 1) | (z4 >> 31); 735 z4 = (z4 << 1) | (z5 >> 31); 736 z5 = (z5 << 1) | (z6 >> 31); 737 z6 = (z6 << 1) | (z7 >> 31); 738 z7 <<= 1; 739 ez--; 740 } 741 } 742 if (z5 | z6 | z7) 743 z4 |= 1; 744 } 745 746 /* get the rounding mode */ 747 rm = fsr >> 30; 748 749 /* strip off the integer bit, if there is one */ 750 ibit = z0 & 0x10000; 751 if (ibit) 752 z0 -= 0x10000; 753 else { 754 ez = 0; 755 if (!(z0 | z1 | z2 | z3 | z4)) { /* exact zero */ 756 zz.i[0] = rm == FSR_RM ? 0x80000000 : 0; 757 zz.i[1] = zz.i[2] = zz.i[3] = 0; 758 __fenv_setfsr32(&fsr); 759 return (zz.q); 760 } 761 } 762 763 /* 764 * flip the sense of directed roundings if the result is negative; 765 * the logic below applies to a positive result 766 */ 767 if (sz) 768 rm ^= rm >> 1; 769 770 /* round and raise exceptions */ 771 if (z4) { 772 fsr |= FSR_NXC; 773 774 /* decide whether to round the fraction up */ 775 if (rm == FSR_RP || (rm == FSR_RN && (z4 > 0x80000000u || 776 (z4 == 0x80000000u && (z3 & 1))))) { 777 /* round up and renormalize if necessary */ 778 if (++z3 == 0) 779 if (++z2 == 0) 780 if (++z1 == 0) 781 if (++z0 == 0x10000) { 782 z0 = 0; 783 ez++; 784 } 785 } 786 } 787 788 /* check for under/overflow */ 789 if (ez >= 0x7fff) { 790 if (rm == FSR_RN || rm == FSR_RP) { 791 zz.i[0] = sz | 0x7fff0000; 792 zz.i[1] = zz.i[2] = zz.i[3] = 0; 793 } else { 794 zz.i[0] = sz | 0x7ffeffff; 795 zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff; 796 } 797 fsr |= FSR_OFC | FSR_NXC; 798 } else { 799 zz.i[0] = sz | (ez << 16) | z0; 800 zz.i[1] = z1; 801 zz.i[2] = z2; 802 zz.i[3] = z3; 803 804 /* 805 * !ibit => exact result was tiny before rounding, 806 * z4 nonzero => result delivered is inexact 807 */ 808 if (!ibit) { 809 if (z4) 810 fsr |= FSR_UFC | FSR_NXC; 811 else if (fsr & FSR_UFM) 812 fsr |= FSR_UFC; 813 } 814 } 815 816 /* restore the fsr and emulate exceptions as needed */ 817 if ((fsr & FSR_CEXC) & (fsr >> 23)) { 818 __fenv_setfsr32(&fsr); 819 if (fsr & FSR_OFC) { 820 dummy = huge; 821 dummy *= huge; 822 } else if (fsr & FSR_UFC) { 823 dummy = tiny; 824 if (fsr & FSR_NXC) 825 dummy *= tiny; 826 else 827 dummy -= tiny2; 828 } else { 829 dummy = huge; 830 dummy += tiny; 831 } 832 } else { 833 fsr |= (fsr & 0x1f) << 5; 834 __fenv_setfsr32(&fsr); 835 } 836 return (zz.q); 837 } 838 839 #elif defined(__x86) 840 841 static const union { 842 unsigned i[2]; 843 double d; 844 } C[] = { 845 { 0, 0x3fe00000u }, 846 { 0, 0x40000000u }, 847 { 0, 0x3df00000u }, 848 { 0, 0x3bf00000u }, 849 { 0, 0x41f00000u }, 850 { 0, 0x43e00000u }, 851 { 0, 0x7fe00000u }, 852 { 0, 0x00100000u }, 853 { 0, 0x00100001u } 854 }; 855 856 #define half C[0].d 857 #define two C[1].d 858 #define twom32 C[2].d 859 #define twom64 C[3].d 860 #define two32 C[4].d 861 #define two63 C[5].d 862 #define huge C[6].d 863 #define tiny C[7].d 864 #define tiny2 C[8].d 865 866 #if defined(__amd64) 867 #define NI 4 868 #else 869 #define NI 3 870 #endif 871 872 /* 873 * fmal for x86: 80-bit extended double precision, little-endian 874 */ 875 long double 876 __fmal(long double x, long double y, long double z) { 877 union { 878 unsigned i[NI]; 879 long double e; 880 } xx, yy, zz; 881 long double xhi, yhi, xlo, ylo, t; 882 unsigned xy0, xy1, xy2, xy3, xy4, z0, z1, z2, z3, z4; 883 unsigned oldcwsw, cwsw, rm, sticky, carry; 884 int ex, ey, ez, exy, sxy, sz, e, tinyafter; 885 volatile double dummy; 886 887 /* extract the exponents of the arguments */ 888 xx.e = x; 889 yy.e = y; 890 zz.e = z; 891 ex = xx.i[2] & 0x7fff; 892 ey = yy.i[2] & 0x7fff; 893 ez = zz.i[2] & 0x7fff; 894 895 /* dispense with inf, nan, and zero cases */ 896 if (ex == 0x7fff || ey == 0x7fff || (ex | xx.i[1] | xx.i[0]) == 0 || 897 (ey | yy.i[1] | yy.i[0]) == 0) /* x or y is inf, nan, or 0 */ 898 return (x * y + z); 899 900 if (ez == 0x7fff) /* z is inf or nan */ 901 return (x + z); /* avoid spurious under/overflow in x * y */ 902 903 if ((ez | zz.i[1] | zz.i[0]) == 0) /* z is zero */ 904 /* 905 * x * y isn't zero but could underflow to zero, 906 * so don't add z, lest we perturb the sign 907 */ 908 return (x * y); 909 910 /* 911 * now x, y, and z are all finite and nonzero; extract signs and 912 * normalize the significands (this will raise the denormal operand 913 * exception if need be) 914 */ 915 sxy = (xx.i[2] ^ yy.i[2]) & 0x8000; 916 sz = zz.i[2] & 0x8000; 917 if (!ex) { 918 xx.e = x * two63; 919 ex = (xx.i[2] & 0x7fff) - 63; 920 } 921 if (!ey) { 922 yy.e = y * two63; 923 ey = (yy.i[2] & 0x7fff) - 63; 924 } 925 if (!ez) { 926 zz.e = z * two63; 927 ez = (zz.i[2] & 0x7fff) - 63; 928 } 929 930 /* 931 * save the control and status words, mask all exceptions, and 932 * set rounding to 64-bit precision and toward-zero 933 */ 934 __fenv_getcwsw(&oldcwsw); 935 cwsw = (oldcwsw & 0xf0c0ffff) | 0x0f3f0000; 936 __fenv_setcwsw(&cwsw); 937 938 /* multiply x*y to 128 bits */ 939 exy = ex + ey - 0x3fff; 940 xx.i[2] = 0x3fff; 941 yy.i[2] = 0x3fff; 942 x = xx.e; 943 y = yy.e; 944 xhi = ((x + twom32) + two32) - two32; 945 yhi = ((y + twom32) + two32) - two32; 946 xlo = x - xhi; 947 ylo = y - yhi; 948 x *= y; 949 y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo; 950 if (x >= two) { 951 x *= half; 952 y *= half; 953 exy++; 954 } 955 956 /* extract the significands */ 957 xx.e = x; 958 xy0 = xx.i[1]; 959 xy1 = xx.i[0]; 960 yy.e = t = y + twom32; 961 xy2 = yy.i[0]; 962 yy.e = (y - (t - twom32)) + twom64; 963 xy3 = yy.i[0]; 964 xy4 = 0; 965 z0 = zz.i[1]; 966 z1 = zz.i[0]; 967 z2 = z3 = z4 = 0; 968 969 /* 970 * now x*y is represented by sxy, exy, and xy[0-4], and z is 971 * represented likewise; swap if need be so |xy| <= |z| 972 */ 973 if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && 974 (xy1 > z1 || (xy1 == z1 && (xy2 | xy3) != 0)))))) { 975 e = sxy; sxy = sz; sz = e; 976 e = exy; exy = ez; ez = e; 977 e = xy0; xy0 = z0; z0 = e; 978 e = xy1; xy1 = z1; z1 = e; 979 z2 = xy2; xy2 = 0; 980 z3 = xy3; xy3 = 0; 981 } 982 983 /* shift the significand of xy keeping a sticky bit */ 984 e = ez - exy; 985 if (e > 130) { 986 xy0 = xy1 = xy2 = xy3 = 0; 987 xy4 = 1; 988 } else if (e >= 128) { 989 sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (159 - e)); 990 xy4 = xy0 >> (e - 128); 991 if (sticky) 992 xy4 |= 1; 993 xy0 = xy1 = xy2 = xy3 = 0; 994 } else if (e >= 96) { 995 sticky = xy3 | xy2 | ((xy1 << 1) << (127 - e)); 996 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e)); 997 if (sticky) 998 xy4 |= 1; 999 xy3 = xy0 >> (e - 96); 1000 xy0 = xy1 = xy2 = 0; 1001 } else if (e >= 64) { 1002 sticky = xy3 | ((xy2 << 1) << (95 - e)); 1003 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e)); 1004 if (sticky) 1005 xy4 |= 1; 1006 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e)); 1007 xy2 = xy0 >> (e - 64); 1008 xy0 = xy1 = 0; 1009 } else if (e >= 32) { 1010 sticky = (xy3 << 1) << (63 - e); 1011 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e)); 1012 if (sticky) 1013 xy4 |= 1; 1014 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e)); 1015 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e)); 1016 xy1 = xy0 >> (e - 32); 1017 xy0 = 0; 1018 } else if (e) { 1019 xy4 = (xy3 << 1) << (31 - e); 1020 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e)); 1021 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e)); 1022 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e)); 1023 xy0 >>= e; 1024 } 1025 1026 /* if this is a magnitude subtract, negate the significand of xy */ 1027 if (sxy ^ sz) { 1028 xy0 = ~xy0; 1029 xy1 = ~xy1; 1030 xy2 = ~xy2; 1031 xy3 = ~xy3; 1032 xy4 = -xy4; 1033 if (xy4 == 0) 1034 if (++xy3 == 0) 1035 if (++xy2 == 0) 1036 if (++xy1 == 0) 1037 xy0++; 1038 } 1039 1040 /* add, propagating carries */ 1041 z4 += xy4; 1042 carry = (z4 < xy4); 1043 z3 += xy3; 1044 if (carry) { 1045 z3++; 1046 carry = (z3 <= xy3); 1047 } else 1048 carry = (z3 < xy3); 1049 z2 += xy2; 1050 if (carry) { 1051 z2++; 1052 carry = (z2 <= xy2); 1053 } else 1054 carry = (z2 < xy2); 1055 z1 += xy1; 1056 if (carry) { 1057 z1++; 1058 carry = (z1 <= xy1); 1059 } else 1060 carry = (z1 < xy1); 1061 z0 += xy0; 1062 if (carry) { 1063 z0++; 1064 carry = (z0 <= xy0); 1065 } else 1066 carry = (z0 < xy0); 1067 1068 /* for a magnitude subtract, ignore the last carry out */ 1069 if (sxy ^ sz) 1070 carry = 0; 1071 1072 /* postnormalize and collect rounding information into z2 */ 1073 if (ez < 1) { 1074 /* result is tiny; shift right until exponent is within range */ 1075 e = 1 - ez; 1076 if (e > 67) { 1077 z2 = 1; /* result can't be exactly zero */ 1078 z0 = z1 = 0; 1079 } else if (e >= 64) { 1080 sticky = z4 | z3 | z2 | z1 | ((z0 << 1) << (95 - e)); 1081 z2 = (z0 >> (e - 64)) | ((carry << 1) << (95 - e)); 1082 if (sticky) 1083 z2 |= 1; 1084 z1 = carry >> (e - 64); 1085 z0 = 0; 1086 } else if (e >= 32) { 1087 sticky = z4 | z3 | z2 | ((z1 << 1) << (63 - e)); 1088 z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e)); 1089 if (sticky) 1090 z2 |= 1; 1091 z1 = (z0 >> (e - 32)) | ((carry << 1) << (63 - e)); 1092 z0 = carry >> (e - 32); 1093 } else { 1094 sticky = z4 | z3 | (z2 << 1) << (31 - e); 1095 z2 = (z2 >> e) | ((z1 << 1) << (31 - e)); 1096 if (sticky) 1097 z2 |= 1; 1098 z1 = (z1 >> e) | ((z0 << 1) << (31 - e)); 1099 z0 = (z0 >> e) | ((carry << 1) << (31 - e)); 1100 } 1101 ez = 1; 1102 } else if (carry) { 1103 /* carry out; shift right by one */ 1104 sticky = (z2 & 1) | z3 | z4; 1105 z2 = (z2 >> 1) | (z1 << 31); 1106 if (sticky) 1107 z2 |= 1; 1108 z1 = (z1 >> 1) | (z0 << 31); 1109 z0 = (z0 >> 1) | 0x80000000; 1110 ez++; 1111 } else { 1112 if (z0 < 0x80000000u && (z0 | z1 | z2 | z3 | z4) != 0) { 1113 /* 1114 * borrow/cancellation; shift left as much as 1115 * exponent allows 1116 */ 1117 while (!z0 && ez >= 33) { 1118 z0 = z1; 1119 z1 = z2; 1120 z2 = z3; 1121 z3 = z4; 1122 z4 = 0; 1123 ez -= 32; 1124 } 1125 while (z0 < 0x80000000u && ez > 1) { 1126 z0 = (z0 << 1) | (z1 >> 31); 1127 z1 = (z1 << 1) | (z2 >> 31); 1128 z2 = (z2 << 1) | (z3 >> 31); 1129 z3 = (z3 << 1) | (z4 >> 31); 1130 z4 <<= 1; 1131 ez--; 1132 } 1133 } 1134 if (z3 | z4) 1135 z2 |= 1; 1136 } 1137 1138 /* get the rounding mode */ 1139 rm = oldcwsw & 0x0c000000; 1140 1141 /* adjust exponent if result is subnormal */ 1142 tinyafter = 0; 1143 if (!(z0 & 0x80000000)) { 1144 ez = 0; 1145 tinyafter = 1; 1146 if (!(z0 | z1 | z2)) { /* exact zero */ 1147 zz.i[2] = rm == FCW_RM ? 0x8000 : 0; 1148 zz.i[1] = zz.i[0] = 0; 1149 __fenv_setcwsw(&oldcwsw); 1150 return (zz.e); 1151 } 1152 } 1153 1154 /* 1155 * flip the sense of directed roundings if the result is negative; 1156 * the logic below applies to a positive result 1157 */ 1158 if (sz && (rm == FCW_RM || rm == FCW_RP)) 1159 rm = (FCW_RM + FCW_RP) - rm; 1160 1161 /* round */ 1162 if (z2) { 1163 if (rm == FCW_RP || (rm == FCW_RN && (z2 > 0x80000000u || 1164 (z2 == 0x80000000u && (z1 & 1))))) { 1165 /* round up and renormalize if necessary */ 1166 if (++z1 == 0) { 1167 if (++z0 == 0) { 1168 z0 = 0x80000000; 1169 ez++; 1170 } else if (z0 == 0x80000000) { 1171 /* rounded up to smallest normal */ 1172 ez = 1; 1173 if ((rm == FCW_RP && z2 > 1174 0x80000000u) || (rm == FCW_RN && 1175 z2 >= 0xc0000000u)) 1176 /* 1177 * would have rounded up to 1178 * smallest normal even with 1179 * unbounded range 1180 */ 1181 tinyafter = 0; 1182 } 1183 } 1184 } 1185 } 1186 1187 /* restore the control and status words, check for over/underflow */ 1188 __fenv_setcwsw(&oldcwsw); 1189 if (ez >= 0x7fff) { 1190 if (rm == FCW_RN || rm == FCW_RP) { 1191 zz.i[2] = sz | 0x7fff; 1192 zz.i[1] = 0x80000000; 1193 zz.i[0] = 0; 1194 } else { 1195 zz.i[2] = sz | 0x7ffe; 1196 zz.i[1] = 0xffffffff; 1197 zz.i[0] = 0xffffffff; 1198 } 1199 dummy = huge; 1200 dummy *= huge; 1201 } else { 1202 zz.i[2] = sz | ez; 1203 zz.i[1] = z0; 1204 zz.i[0] = z1; 1205 1206 /* 1207 * tinyafter => result rounded w/ unbounded range would be tiny, 1208 * z2 nonzero => result delivered is inexact 1209 */ 1210 if (tinyafter) { 1211 dummy = tiny; 1212 if (z2) 1213 dummy *= tiny; 1214 else 1215 dummy -= tiny2; 1216 } else if (z2) { 1217 dummy = huge; 1218 dummy += tiny; 1219 } 1220 } 1221 1222 return (zz.e); 1223 } 1224 1225 #else 1226 #error Unknown architecture 1227 #endif