1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 26 /* 27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 #pragma weak fmal = __fmal 32 33 #include "libm.h" 34 #include "fma.h" 35 #include "fenv_inlines.h" 36 37 #if defined(__sparc) 38 static const union { 39 unsigned i[2]; 40 double d; 41 } C[] = { 42 { 0x3fe00000u, 0 }, 43 { 0x40000000u, 0 }, 44 { 0x3ef00000u, 0 }, 45 { 0x3e700000u, 0 }, 46 { 0x41300000u, 0 }, 47 { 0x3e300000u, 0 }, 48 { 0x3b300000u, 0 }, 49 { 0x38300000u, 0 }, 50 { 0x42300000u, 0 }, 51 { 0x3df00000u, 0 }, 52 { 0x7fe00000u, 0 }, 53 { 0x00100000u, 0 }, 54 { 0x00100001u, 0 }, 55 { 0, 0 }, 56 { 0x7ff00000u, 0 }, 57 { 0x7ff00001u, 0 } 58 }; 59 60 #define half C[0].d 61 #define two C[1].d 62 #define twom16 C[2].d 63 #define twom24 C[3].d 64 #define two20 C[4].d 65 #define twom28 C[5].d 66 #define twom76 C[6].d 67 #define twom124 C[7].d 68 #define two36 C[8].d 69 #define twom32 C[9].d 70 #define huge C[10].d 71 #define tiny C[11].d 72 #define tiny2 C[12].d 73 #define zero C[13].d 74 #define inf C[14].d 75 #define snan C[15].d 76 77 static const unsigned int fsr_rm = 0xc0000000u; 78 79 /* 80 * fmal for SPARC: 128-bit quad precision, big-endian 81 */ 82 long double 83 __fmal(long double x, long double y, long double z) 84 { 85 union { 86 unsigned int i[4]; 87 long double q; 88 } xx, yy, zz; 89 union { 90 unsigned int i[2]; 91 double d; 92 } u; 93 94 double dx[5], dy[5], dxy[9], c, s; 95 unsigned int xy0, xy1, xy2, xy3, xy4, xy5, xy6, xy7; 96 unsigned int z0, z1, z2, z3, z4, z5, z6, z7; 97 unsigned int rm, sticky; 98 unsigned int fsr; 99 int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit; 100 int cx, cy, cz; 101 volatile double dummy; 102 103 /* extract the high order words of the arguments */ 104 xx.q = x; 105 yy.q = y; 106 zz.q = z; 107 hx = xx.i[0] & ~0x80000000; 108 hy = yy.i[0] & ~0x80000000; 109 hz = zz.i[0] & ~0x80000000; 110 111 /* 112 * distinguish zero, finite nonzero, infinite, and quiet nan 113 * arguments; raise invalid and return for signaling nans 114 */ 115 if (hx >= 0x7fff0000) { 116 if ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3]) { 117 if (!(hx & 0x8000)) { 118 /* signaling nan, raise invalid */ 119 dummy = snan; 120 dummy += snan; 121 xx.i[0] |= 0x8000; 122 return (xx.q); 123 } 124 125 cx = 3; /* quiet nan */ 126 } else { 127 cx = 2; /* inf */ 128 } 129 } else if (hx == 0) { 130 cx = (xx.i[1] | xx.i[2] | xx.i[3]) ? 1 : 0; 131 /* subnormal or zero */ 132 } else { 133 cx = 1; /* finite nonzero */ 134 } 135 136 if (hy >= 0x7fff0000) { 137 if ((hy & 0xffff) | yy.i[1] | yy.i[2] | yy.i[3]) { 138 if (!(hy & 0x8000)) { 139 dummy = snan; 140 dummy += snan; 141 yy.i[0] |= 0x8000; 142 return (yy.q); 143 } 144 145 cy = 3; 146 } else { 147 cy = 2; 148 } 149 } else if (hy == 0) { 150 cy = (yy.i[1] | yy.i[2] | yy.i[3]) ? 1 : 0; 151 } else { 152 cy = 1; 153 } 154 155 if (hz >= 0x7fff0000) { 156 if ((hz & 0xffff) | zz.i[1] | zz.i[2] | zz.i[3]) { 157 if (!(hz & 0x8000)) { 158 dummy = snan; 159 dummy += snan; 160 zz.i[0] |= 0x8000; 161 return (zz.q); 162 } 163 164 cz = 3; 165 } else { 166 cz = 2; 167 } 168 } else if (hz == 0) { 169 cz = (zz.i[1] | zz.i[2] | zz.i[3]) ? 1 : 0; 170 } else { 171 cz = 1; 172 } 173 174 /* get the fsr and clear current exceptions */ 175 __fenv_getfsr32(&fsr); 176 fsr &= ~FSR_CEXC; 177 178 /* handle all other zero, inf, and nan cases */ 179 if (cx != 1 || cy != 1 || cz != 1) { 180 /* if x or y is a quiet nan, return it */ 181 if (cx == 3) { 182 __fenv_setfsr32(&fsr); 183 return (x); 184 } 185 186 if (cy == 3) { 187 __fenv_setfsr32(&fsr); 188 return (y); 189 } 190 191 /* if x*y is 0*inf, raise invalid and return the default nan */ 192 if ((cx == 0 && cy == 2) || (cx == 2 && cy == 0)) { 193 dummy = zero; 194 dummy *= inf; 195 zz.i[0] = 0x7fffffff; 196 zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff; 197 return (zz.q); 198 } 199 200 /* if z is a quiet nan, return it */ 201 if (cz == 3) { 202 __fenv_setfsr32(&fsr); 203 return (z); 204 } 205 206 /* 207 * now none of x, y, or z is nan; handle cases where x or y 208 * is inf 209 */ 210 if (cx == 2 || cy == 2) { 211 /* 212 * if z is also inf, either we have inf-inf or 213 * the result is the same as z depending on signs 214 */ 215 if (cz == 2) { 216 if ((int)((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < 0) { 217 dummy = inf; 218 dummy -= inf; 219 zz.i[0] = 0x7fffffff; 220 zz.i[1] = zz.i[2] = zz.i[3] = 221 0xffffffff; 222 return (zz.q); 223 } 224 225 __fenv_setfsr32(&fsr); 226 return (z); 227 } 228 229 /* otherwise the result is inf with appropriate sign */ 230 zz.i[0] = ((xx.i[0] ^ yy.i[0]) & 0x80000000) | 231 0x7fff0000; 232 zz.i[1] = zz.i[2] = zz.i[3] = 0; 233 __fenv_setfsr32(&fsr); 234 return (zz.q); 235 } 236 237 /* if z is inf, return it */ 238 if (cz == 2) { 239 __fenv_setfsr32(&fsr); 240 return (z); 241 } 242 243 /* 244 * now x, y, and z are all finite; handle cases where x or y 245 * is zero 246 */ 247 if (cx == 0 || cy == 0) { 248 /* either we have 0-0 or the result is the same as z */ 249 if (cz == 0 && (int)((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < 250 0) { 251 zz.i[0] = (fsr >> 30) == FSR_RM ? 0x80000000 : 252 0; 253 __fenv_setfsr32(&fsr); 254 return (zz.q); 255 } 256 257 __fenv_setfsr32(&fsr); 258 return (z); 259 } 260 261 /* if we get here, x and y are nonzero finite, z must be zero */ 262 return (x * y); 263 } 264 265 /* 266 * now x, y, and z are all finite and nonzero; set round-to- 267 * negative-infinity mode 268 */ 269 __fenv_setfsr32(&fsr_rm); 270 271 /* 272 * get the signs and exponents and normalize the significands 273 * of x and y 274 */ 275 sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000; 276 ex = hx >> 16; 277 hx &= 0xffff; 278 279 if (!ex) { 280 if (hx | (xx.i[1] & 0xfffe0000)) { 281 ex = 1; 282 } else if (xx.i[1] | (xx.i[2] & 0xfffe0000)) { 283 hx = xx.i[1]; 284 xx.i[1] = xx.i[2]; 285 xx.i[2] = xx.i[3]; 286 xx.i[3] = 0; 287 ex = -31; 288 } else if (xx.i[2] | (xx.i[3] & 0xfffe0000)) { 289 hx = xx.i[2]; 290 xx.i[1] = xx.i[3]; 291 xx.i[2] = xx.i[3] = 0; 292 ex = -63; 293 } else { 294 hx = xx.i[3]; 295 xx.i[1] = xx.i[2] = xx.i[3] = 0; 296 ex = -95; 297 } 298 299 while ((hx & 0x10000) == 0) { 300 hx = (hx << 1) | (xx.i[1] >> 31); 301 xx.i[1] = (xx.i[1] << 1) | (xx.i[2] >> 31); 302 xx.i[2] = (xx.i[2] << 1) | (xx.i[3] >> 31); 303 xx.i[3] <<= 1; 304 ex--; 305 } 306 } else { 307 hx |= 0x10000; 308 } 309 310 ey = hy >> 16; 311 hy &= 0xffff; 312 313 if (!ey) { 314 if (hy | (yy.i[1] & 0xfffe0000)) { 315 ey = 1; 316 } else if (yy.i[1] | (yy.i[2] & 0xfffe0000)) { 317 hy = yy.i[1]; 318 yy.i[1] = yy.i[2]; 319 yy.i[2] = yy.i[3]; 320 yy.i[3] = 0; 321 ey = -31; 322 } else if (yy.i[2] | (yy.i[3] & 0xfffe0000)) { 323 hy = yy.i[2]; 324 yy.i[1] = yy.i[3]; 325 yy.i[2] = yy.i[3] = 0; 326 ey = -63; 327 } else { 328 hy = yy.i[3]; 329 yy.i[1] = yy.i[2] = yy.i[3] = 0; 330 ey = -95; 331 } 332 333 while ((hy & 0x10000) == 0) { 334 hy = (hy << 1) | (yy.i[1] >> 31); 335 yy.i[1] = (yy.i[1] << 1) | (yy.i[2] >> 31); 336 yy.i[2] = (yy.i[2] << 1) | (yy.i[3] >> 31); 337 yy.i[3] <<= 1; 338 ey--; 339 } 340 } else { 341 hy |= 0x10000; 342 } 343 344 exy = ex + ey - 0x3fff; 345 346 /* convert the significands of x and y to doubles */ 347 c = twom16; 348 dx[0] = (double)((int)hx) * c; 349 dy[0] = (double)((int)hy) * c; 350 351 c *= twom24; 352 dx[1] = (double)((int)(xx.i[1] >> 8)) * c; 353 dy[1] = (double)((int)(yy.i[1] >> 8)) * c; 354 355 c *= twom24; 356 dx[2] = (double)((int)(((xx.i[1] << 16) | (xx.i[2] >> 16)) & 357 0xffffff)) * c; 358 dy[2] = (double)((int)(((yy.i[1] << 16) | (yy.i[2] >> 16)) & 359 0xffffff)) * c; 360 361 c *= twom24; 362 dx[3] = (double)((int)(((xx.i[2] << 8) | (xx.i[3] >> 24)) & 0xffffff)) * 363 c; 364 dy[3] = (double)((int)(((yy.i[2] << 8) | (yy.i[3] >> 24)) & 0xffffff)) * 365 c; 366 367 c *= twom24; 368 dx[4] = (double)((int)(xx.i[3] & 0xffffff)) * c; 369 dy[4] = (double)((int)(yy.i[3] & 0xffffff)) * c; 370 371 /* form the "digits" of the product */ 372 dxy[0] = dx[0] * dy[0]; 373 dxy[1] = dx[0] * dy[1] + dx[1] * dy[0]; 374 dxy[2] = dx[0] * dy[2] + dx[1] * dy[1] + dx[2] * dy[0]; 375 dxy[3] = dx[0] * dy[3] + dx[1] * dy[2] + dx[2] * dy[1] + dx[3] * dy[0]; 376 dxy[4] = dx[0] * dy[4] + dx[1] * dy[3] + dx[2] * dy[2] + dx[3] * dy[1] + 377 dx[4] * dy[0]; 378 dxy[5] = dx[1] * dy[4] + dx[2] * dy[3] + dx[3] * dy[2] + dx[4] * dy[1]; 379 dxy[6] = dx[2] * dy[4] + dx[3] * dy[3] + dx[4] * dy[2]; 380 dxy[7] = dx[3] * dy[4] + dx[4] * dy[3]; 381 dxy[8] = dx[4] * dy[4]; 382 383 /* split odd-numbered terms and combine into even-numbered terms */ 384 c = (dxy[1] + two20) - two20; 385 dxy[0] += c; 386 dxy[1] -= c; 387 c = (dxy[3] + twom28) - twom28; 388 dxy[2] += c + dxy[1]; 389 dxy[3] -= c; 390 c = (dxy[5] + twom76) - twom76; 391 dxy[4] += c + dxy[3]; 392 dxy[5] -= c; 393 c = (dxy[7] + twom124) - twom124; 394 dxy[6] += c + dxy[5]; 395 dxy[8] += (dxy[7] - c); 396 397 /* propagate carries, adjusting the exponent if need be */ 398 dxy[7] = dxy[6] + dxy[8]; 399 dxy[5] = dxy[4] + dxy[7]; 400 dxy[3] = dxy[2] + dxy[5]; 401 dxy[1] = dxy[0] + dxy[3]; 402 403 if (dxy[1] >= two) { 404 dxy[0] *= half; 405 dxy[1] *= half; 406 dxy[2] *= half; 407 dxy[3] *= half; 408 dxy[4] *= half; 409 dxy[5] *= half; 410 dxy[6] *= half; 411 dxy[7] *= half; 412 dxy[8] *= half; 413 exy++; 414 } 415 416 /* extract the significand of x*y */ 417 s = two36; 418 u.d = c = dxy[1] + s; 419 xy0 = u.i[1]; 420 c -= s; 421 dxy[1] -= c; 422 dxy[0] -= c; 423 424 s *= twom32; 425 u.d = c = dxy[1] + s; 426 xy1 = u.i[1]; 427 c -= s; 428 dxy[2] += (dxy[0] - c); 429 dxy[3] = dxy[2] + dxy[5]; 430 431 s *= twom32; 432 u.d = c = dxy[3] + s; 433 xy2 = u.i[1]; 434 c -= s; 435 dxy[4] += (dxy[2] - c); 436 dxy[5] = dxy[4] + dxy[7]; 437 438 s *= twom32; 439 u.d = c = dxy[5] + s; 440 xy3 = u.i[1]; 441 c -= s; 442 dxy[4] -= c; 443 dxy[5] = dxy[4] + dxy[7]; 444 445 s *= twom32; 446 u.d = c = dxy[5] + s; 447 xy4 = u.i[1]; 448 c -= s; 449 dxy[6] += (dxy[4] - c); 450 dxy[7] = dxy[6] + dxy[8]; 451 452 s *= twom32; 453 u.d = c = dxy[7] + s; 454 xy5 = u.i[1]; 455 c -= s; 456 dxy[8] += (dxy[6] - c); 457 458 s *= twom32; 459 u.d = c = dxy[8] + s; 460 xy6 = u.i[1]; 461 c -= s; 462 dxy[8] -= c; 463 464 s *= twom32; 465 u.d = c = dxy[8] + s; 466 xy7 = u.i[1]; 467 468 /* extract the sign, exponent, and significand of z */ 469 sz = zz.i[0] & 0x80000000; 470 ez = hz >> 16; 471 z0 = hz & 0xffff; 472 473 if (!ez) { 474 if (z0 | (zz.i[1] & 0xfffe0000)) { 475 z1 = zz.i[1]; 476 z2 = zz.i[2]; 477 z3 = zz.i[3]; 478 ez = 1; 479 } else if (zz.i[1] | (zz.i[2] & 0xfffe0000)) { 480 z0 = zz.i[1]; 481 z1 = zz.i[2]; 482 z2 = zz.i[3]; 483 z3 = 0; 484 ez = -31; 485 } else if (zz.i[2] | (zz.i[3] & 0xfffe0000)) { 486 z0 = zz.i[2]; 487 z1 = zz.i[3]; 488 z2 = z3 = 0; 489 ez = -63; 490 } else { 491 z0 = zz.i[3]; 492 z1 = z2 = z3 = 0; 493 ez = -95; 494 } 495 496 while ((z0 & 0x10000) == 0) { 497 z0 = (z0 << 1) | (z1 >> 31); 498 z1 = (z1 << 1) | (z2 >> 31); 499 z2 = (z2 << 1) | (z3 >> 31); 500 z3 <<= 1; 501 ez--; 502 } 503 } else { 504 z0 |= 0x10000; 505 z1 = zz.i[1]; 506 z2 = zz.i[2]; 507 z3 = zz.i[3]; 508 } 509 510 z4 = z5 = z6 = z7 = 0; 511 512 /* 513 * now x*y is represented by sxy, exy, and xy[0-7], and z is 514 * represented likewise; swap if need be so |xy| <= |z| 515 */ 516 if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 || 517 (xy1 == z1 && (xy2 > z2 || (xy2 == z2 && (xy3 > z3 || (xy3 == z3 && 518 (xy4 | xy5 | xy6 | xy7) != 0)))))))))) { 519 e = sxy; 520 sxy = sz; 521 sz = e; 522 e = exy; 523 exy = ez; 524 ez = e; 525 e = xy0; 526 xy0 = z0; 527 z0 = e; 528 e = xy1; 529 xy1 = z1; 530 z1 = e; 531 e = xy2; 532 xy2 = z2; 533 z2 = e; 534 e = xy3; 535 xy3 = z3; 536 z3 = e; 537 z4 = xy4; 538 xy4 = 0; 539 z5 = xy5; 540 xy5 = 0; 541 z6 = xy6; 542 xy6 = 0; 543 z7 = xy7; 544 xy7 = 0; 545 } 546 547 /* shift the significand of xy keeping a sticky bit */ 548 e = ez - exy; 549 550 if (e > 236) { 551 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0; 552 xy7 = 1; 553 } else if (e >= 224) { 554 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | xy1 | ((xy0 << 555 1) << (255 - e)); 556 xy7 = xy0 >> (e - 224); 557 558 if (sticky) 559 xy7 |= 1; 560 561 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0; 562 } else if (e >= 192) { 563 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | ((xy1 << 1) << 564 (223 - e)); 565 xy7 = (xy1 >> (e - 192)) | ((xy0 << 1) << (223 - e)); 566 567 if (sticky) 568 xy7 |= 1; 569 570 xy6 = xy0 >> (e - 192); 571 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = 0; 572 } else if (e >= 160) { 573 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | ((xy2 << 1) << (191 - 574 e)); 575 xy7 = (xy2 >> (e - 160)) | ((xy1 << 1) << (191 - e)); 576 577 if (sticky) 578 xy7 |= 1; 579 580 xy6 = (xy1 >> (e - 160)) | ((xy0 << 1) << (191 - e)); 581 xy5 = xy0 >> (e - 160); 582 xy0 = xy1 = xy2 = xy3 = xy4 = 0; 583 } else if (e >= 128) { 584 sticky = xy7 | xy6 | xy5 | xy4 | ((xy3 << 1) << (159 - e)); 585 xy7 = (xy3 >> (e - 128)) | ((xy2 << 1) << (159 - e)); 586 587 if (sticky) 588 xy7 |= 1; 589 590 xy6 = (xy2 >> (e - 128)) | ((xy1 << 1) << (159 - e)); 591 xy5 = (xy1 >> (e - 128)) | ((xy0 << 1) << (159 - e)); 592 xy4 = xy0 >> (e - 128); 593 xy0 = xy1 = xy2 = xy3 = 0; 594 } else if (e >= 96) { 595 sticky = xy7 | xy6 | xy5 | ((xy4 << 1) << (127 - e)); 596 xy7 = (xy4 >> (e - 96)) | ((xy3 << 1) << (127 - e)); 597 598 if (sticky) 599 xy7 |= 1; 600 601 xy6 = (xy3 >> (e - 96)) | ((xy2 << 1) << (127 - e)); 602 xy5 = (xy2 >> (e - 96)) | ((xy1 << 1) << (127 - e)); 603 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e)); 604 xy3 = xy0 >> (e - 96); 605 xy0 = xy1 = xy2 = 0; 606 } else if (e >= 64) { 607 sticky = xy7 | xy6 | ((xy5 << 1) << (95 - e)); 608 xy7 = (xy5 >> (e - 64)) | ((xy4 << 1) << (95 - e)); 609 610 if (sticky) 611 xy7 |= 1; 612 613 xy6 = (xy4 >> (e - 64)) | ((xy3 << 1) << (95 - e)); 614 xy5 = (xy3 >> (e - 64)) | ((xy2 << 1) << (95 - e)); 615 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e)); 616 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e)); 617 xy2 = xy0 >> (e - 64); 618 xy0 = xy1 = 0; 619 } else if (e >= 32) { 620 sticky = xy7 | ((xy6 << 1) << (63 - e)); 621 xy7 = (xy6 >> (e - 32)) | ((xy5 << 1) << (63 - e)); 622 623 if (sticky) 624 xy7 |= 1; 625 626 xy6 = (xy5 >> (e - 32)) | ((xy4 << 1) << (63 - e)); 627 xy5 = (xy4 >> (e - 32)) | ((xy3 << 1) << (63 - e)); 628 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e)); 629 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e)); 630 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e)); 631 xy1 = xy0 >> (e - 32); 632 xy0 = 0; 633 } else if (e) { 634 sticky = (xy7 << 1) << (31 - e); 635 xy7 = (xy7 >> e) | ((xy6 << 1) << (31 - e)); 636 637 if (sticky) 638 xy7 |= 1; 639 640 xy6 = (xy6 >> e) | ((xy5 << 1) << (31 - e)); 641 xy5 = (xy5 >> e) | ((xy4 << 1) << (31 - e)); 642 xy4 = (xy4 >> e) | ((xy3 << 1) << (31 - e)); 643 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e)); 644 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e)); 645 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e)); 646 xy0 >>= e; 647 } 648 649 /* if this is a magnitude subtract, negate the significand of xy */ 650 if (sxy ^ sz) { 651 xy0 = ~xy0; 652 xy1 = ~xy1; 653 xy2 = ~xy2; 654 xy3 = ~xy3; 655 xy4 = ~xy4; 656 xy5 = ~xy5; 657 xy6 = ~xy6; 658 xy7 = -xy7; 659 660 if (xy7 == 0) 661 if (++xy6 == 0) 662 if (++xy5 == 0) 663 if (++xy4 == 0) 664 if (++xy3 == 0) 665 if (++xy2 == 0) 666 if (++xy1 == 0) 667 xy0++; 668 } 669 670 /* add, propagating carries */ 671 z7 += xy7; 672 e = (z7 < xy7); 673 z6 += xy6; 674 675 if (e) { 676 z6++; 677 e = (z6 <= xy6); 678 } else { 679 e = (z6 < xy6); 680 } 681 682 z5 += xy5; 683 684 if (e) { 685 z5++; 686 e = (z5 <= xy5); 687 } else { 688 e = (z5 < xy5); 689 } 690 691 z4 += xy4; 692 693 if (e) { 694 z4++; 695 e = (z4 <= xy4); 696 } else { 697 e = (z4 < xy4); 698 } 699 700 z3 += xy3; 701 702 if (e) { 703 z3++; 704 e = (z3 <= xy3); 705 } else { 706 e = (z3 < xy3); 707 } 708 709 z2 += xy2; 710 711 if (e) { 712 z2++; 713 e = (z2 <= xy2); 714 } else { 715 e = (z2 < xy2); 716 } 717 718 z1 += xy1; 719 720 if (e) { 721 z1++; 722 e = (z1 <= xy1); 723 } else { 724 e = (z1 < xy1); 725 } 726 727 z0 += xy0; 728 729 if (e) 730 z0++; 731 732 /* postnormalize and collect rounding information into z4 */ 733 if (ez < 1) { 734 /* result is tiny; shift right until exponent is within range */ 735 e = 1 - ez; 736 737 if (e > 116) { 738 z4 = 1; /* result can't be exactly zero */ 739 z0 = z1 = z2 = z3 = 0; 740 } else if (e >= 96) { 741 sticky = z7 | z6 | z5 | z4 | z3 | z2 | ((z1 << 1) << 742 (127 - e)); 743 z4 = (z1 >> (e - 96)) | ((z0 << 1) << (127 - e)); 744 745 if (sticky) 746 z4 |= 1; 747 748 z3 = z0 >> (e - 96); 749 z0 = z1 = z2 = 0; 750 } else if (e >= 64) { 751 sticky = z7 | z6 | z5 | z4 | z3 | ((z2 << 1) << (95 - 752 e)); 753 z4 = (z2 >> (e - 64)) | ((z1 << 1) << (95 - e)); 754 755 if (sticky) 756 z4 |= 1; 757 758 z3 = (z1 >> (e - 64)) | ((z0 << 1) << (95 - e)); 759 z2 = z0 >> (e - 64); 760 z0 = z1 = 0; 761 } else if (e >= 32) { 762 sticky = z7 | z6 | z5 | z4 | ((z3 << 1) << (63 - e)); 763 z4 = (z3 >> (e - 32)) | ((z2 << 1) << (63 - e)); 764 765 if (sticky) 766 z4 |= 1; 767 768 z3 = (z2 >> (e - 32)) | ((z1 << 1) << (63 - e)); 769 z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e)); 770 z1 = z0 >> (e - 32); 771 z0 = 0; 772 } else { 773 sticky = z7 | z6 | z5 | (z4 << 1) << (31 - e); 774 z4 = (z4 >> e) | ((z3 << 1) << (31 - e)); 775 776 if (sticky) 777 z4 |= 1; 778 779 z3 = (z3 >> e) | ((z2 << 1) << (31 - e)); 780 z2 = (z2 >> e) | ((z1 << 1) << (31 - e)); 781 z1 = (z1 >> e) | ((z0 << 1) << (31 - e)); 782 z0 >>= e; 783 } 784 785 ez = 1; 786 } else if (z0 >= 0x20000) { 787 /* carry out; shift right by one */ 788 sticky = (z4 & 1) | z5 | z6 | z7; 789 z4 = (z4 >> 1) | (z3 << 31); 790 791 if (sticky) 792 z4 |= 1; 793 794 z3 = (z3 >> 1) | (z2 << 31); 795 z2 = (z2 >> 1) | (z1 << 31); 796 z1 = (z1 >> 1) | (z0 << 31); 797 z0 >>= 1; 798 ez++; 799 } else { 800 if (z0 < 0x10000 && (z0 | z1 | z2 | z3 | z4 | z5 | z6 | z7) != 801 0) { 802 /* 803 * borrow/cancellation; shift left as much as 804 * exponent allows 805 */ 806 while (!(z0 | (z1 & 0xfffe0000)) && ez >= 33) { 807 z0 = z1; 808 z1 = z2; 809 z2 = z3; 810 z3 = z4; 811 z4 = z5; 812 z5 = z6; 813 z6 = z7; 814 z7 = 0; 815 ez -= 32; 816 } 817 818 while (z0 < 0x10000 && ez > 1) { 819 z0 = (z0 << 1) | (z1 >> 31); 820 z1 = (z1 << 1) | (z2 >> 31); 821 z2 = (z2 << 1) | (z3 >> 31); 822 z3 = (z3 << 1) | (z4 >> 31); 823 z4 = (z4 << 1) | (z5 >> 31); 824 z5 = (z5 << 1) | (z6 >> 31); 825 z6 = (z6 << 1) | (z7 >> 31); 826 z7 <<= 1; 827 ez--; 828 } 829 } 830 831 if (z5 | z6 | z7) 832 z4 |= 1; 833 } 834 835 /* get the rounding mode */ 836 rm = fsr >> 30; 837 838 /* strip off the integer bit, if there is one */ 839 ibit = z0 & 0x10000; 840 841 if (ibit) { 842 z0 -= 0x10000; 843 } else { 844 ez = 0; 845 846 if (!(z0 | z1 | z2 | z3 | z4)) { /* exact zero */ 847 zz.i[0] = rm == FSR_RM ? 0x80000000 : 0; 848 zz.i[1] = zz.i[2] = zz.i[3] = 0; 849 __fenv_setfsr32(&fsr); 850 return (zz.q); 851 } 852 } 853 854 /* 855 * flip the sense of directed roundings if the result is negative; 856 * the logic below applies to a positive result 857 */ 858 if (sz) 859 rm ^= rm >> 1; 860 861 /* round and raise exceptions */ 862 if (z4) { 863 fsr |= FSR_NXC; 864 865 /* decide whether to round the fraction up */ 866 if (rm == FSR_RP || (rm == FSR_RN && (z4 > 0x80000000u || (z4 == 867 0x80000000u && (z3 & 1))))) { 868 /* round up and renormalize if necessary */ 869 if (++z3 == 0) 870 if (++z2 == 0) 871 if (++z1 == 0) 872 if (++z0 == 0x10000) { 873 z0 = 0; 874 ez++; 875 } 876 } 877 } 878 879 /* check for under/overflow */ 880 if (ez >= 0x7fff) { 881 if (rm == FSR_RN || rm == FSR_RP) { 882 zz.i[0] = sz | 0x7fff0000; 883 zz.i[1] = zz.i[2] = zz.i[3] = 0; 884 } else { 885 zz.i[0] = sz | 0x7ffeffff; 886 zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff; 887 } 888 889 fsr |= FSR_OFC | FSR_NXC; 890 } else { 891 zz.i[0] = sz | (ez << 16) | z0; 892 zz.i[1] = z1; 893 zz.i[2] = z2; 894 zz.i[3] = z3; 895 896 /* 897 * !ibit => exact result was tiny before rounding, 898 * z4 nonzero => result delivered is inexact 899 */ 900 if (!ibit) { 901 if (z4) 902 fsr |= FSR_UFC | FSR_NXC; 903 else if (fsr & FSR_UFM) 904 fsr |= FSR_UFC; 905 } 906 } 907 908 /* restore the fsr and emulate exceptions as needed */ 909 if ((fsr & FSR_CEXC) & (fsr >> 23)) { 910 __fenv_setfsr32(&fsr); 911 912 if (fsr & FSR_OFC) { 913 dummy = huge; 914 dummy *= huge; 915 } else if (fsr & FSR_UFC) { 916 dummy = tiny; 917 918 if (fsr & FSR_NXC) 919 dummy *= tiny; 920 else 921 dummy -= tiny2; 922 } else { 923 dummy = huge; 924 dummy += tiny; 925 } 926 } else { 927 fsr |= (fsr & 0x1f) << 5; 928 __fenv_setfsr32(&fsr); 929 } 930 931 return (zz.q); 932 } 933 #elif defined(__x86) 934 static const union { 935 unsigned i[2]; 936 double d; 937 } C[] = { 938 { 0, 0x3fe00000u }, 939 { 0, 0x40000000u }, 940 { 0, 0x3df00000u }, 941 { 0, 0x3bf00000u }, 942 { 0, 0x41f00000u }, 943 { 0, 0x43e00000u }, 944 { 0, 0x7fe00000u }, 945 { 0, 0x00100000u }, 946 { 0, 0x00100001u } 947 }; 948 949 #define half C[0].d 950 #define two C[1].d 951 #define twom32 C[2].d 952 #define twom64 C[3].d 953 #define two32 C[4].d 954 #define two63 C[5].d 955 #define huge C[6].d 956 #define tiny C[7].d 957 #define tiny2 C[8].d 958 959 #if defined(__amd64) 960 #define NI 4 961 #else 962 #define NI 3 963 #endif 964 965 /* 966 * fmal for x86: 80-bit extended double precision, little-endian 967 */ 968 long double 969 __fmal(long double x, long double y, long double z) 970 { 971 union { 972 unsigned i[NI]; 973 long double e; 974 } xx, yy, zz; 975 976 long double xhi, yhi, xlo, ylo, t; 977 unsigned xy0, xy1, xy2, xy3, xy4, z0, z1, z2, z3, z4; 978 unsigned oldcwsw, cwsw, rm, sticky, carry; 979 int ex, ey, ez, exy, sxy, sz, e, tinyafter; 980 volatile double dummy; 981 982 /* extract the exponents of the arguments */ 983 xx.e = x; 984 yy.e = y; 985 zz.e = z; 986 ex = xx.i[2] & 0x7fff; 987 ey = yy.i[2] & 0x7fff; 988 ez = zz.i[2] & 0x7fff; 989 990 /* dispense with inf, nan, and zero cases */ 991 if (ex == 0x7fff || ey == 0x7fff || (ex | xx.i[1] | xx.i[0]) == 0 || 992 (ey | yy.i[1] | yy.i[0]) == 0) /* x or y is inf, nan, or 0 */ 993 return (x * y + z); 994 995 if (ez == 0x7fff) /* z is inf or nan */ 996 return (x + z); /* avoid spurious under/overflow in x * y */ 997 998 if ((ez | zz.i[1] | zz.i[0]) == 0) /* z is zero */ 999 /* 1000 * x * y isn't zero but could underflow to zero, 1001 * so don't add z, lest we perturb the sign 1002 */ 1003 return (x * y); 1004 1005 /* 1006 * now x, y, and z are all finite and nonzero; extract signs and 1007 * normalize the significands (this will raise the denormal operand 1008 * exception if need be) 1009 */ 1010 sxy = (xx.i[2] ^ yy.i[2]) & 0x8000; 1011 sz = zz.i[2] & 0x8000; 1012 1013 if (!ex) { 1014 xx.e = x * two63; 1015 ex = (xx.i[2] & 0x7fff) - 63; 1016 } 1017 1018 if (!ey) { 1019 yy.e = y * two63; 1020 ey = (yy.i[2] & 0x7fff) - 63; 1021 } 1022 1023 if (!ez) { 1024 zz.e = z * two63; 1025 ez = (zz.i[2] & 0x7fff) - 63; 1026 } 1027 1028 /* 1029 * save the control and status words, mask all exceptions, and 1030 * set rounding to 64-bit precision and toward-zero 1031 */ 1032 __fenv_getcwsw(&oldcwsw); 1033 cwsw = (oldcwsw & 0xf0c0ffff) | 0x0f3f0000; 1034 __fenv_setcwsw(&cwsw); 1035 1036 /* multiply x*y to 128 bits */ 1037 exy = ex + ey - 0x3fff; 1038 xx.i[2] = 0x3fff; 1039 yy.i[2] = 0x3fff; 1040 x = xx.e; 1041 y = yy.e; 1042 xhi = ((x + twom32) + two32) - two32; 1043 yhi = ((y + twom32) + two32) - two32; 1044 xlo = x - xhi; 1045 ylo = y - yhi; 1046 x *= y; 1047 y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo; 1048 1049 if (x >= two) { 1050 x *= half; 1051 y *= half; 1052 exy++; 1053 } 1054 1055 /* extract the significands */ 1056 xx.e = x; 1057 xy0 = xx.i[1]; 1058 xy1 = xx.i[0]; 1059 yy.e = t = y + twom32; 1060 xy2 = yy.i[0]; 1061 yy.e = (y - (t - twom32)) + twom64; 1062 xy3 = yy.i[0]; 1063 xy4 = 0; 1064 z0 = zz.i[1]; 1065 z1 = zz.i[0]; 1066 z2 = z3 = z4 = 0; 1067 1068 /* 1069 * now x*y is represented by sxy, exy, and xy[0-4], and z is 1070 * represented likewise; swap if need be so |xy| <= |z| 1071 */ 1072 if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 || 1073 (xy1 == z1 && (xy2 | xy3) != 0)))))) { 1074 e = sxy; 1075 sxy = sz; 1076 sz = e; 1077 e = exy; 1078 exy = ez; 1079 ez = e; 1080 e = xy0; 1081 xy0 = z0; 1082 z0 = e; 1083 e = xy1; 1084 xy1 = z1; 1085 z1 = e; 1086 z2 = xy2; 1087 xy2 = 0; 1088 z3 = xy3; 1089 xy3 = 0; 1090 } 1091 1092 /* shift the significand of xy keeping a sticky bit */ 1093 e = ez - exy; 1094 1095 if (e > 130) { 1096 xy0 = xy1 = xy2 = xy3 = 0; 1097 xy4 = 1; 1098 } else if (e >= 128) { 1099 sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (159 - e)); 1100 xy4 = xy0 >> (e - 128); 1101 1102 if (sticky) 1103 xy4 |= 1; 1104 1105 xy0 = xy1 = xy2 = xy3 = 0; 1106 } else if (e >= 96) { 1107 sticky = xy3 | xy2 | ((xy1 << 1) << (127 - e)); 1108 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e)); 1109 1110 if (sticky) 1111 xy4 |= 1; 1112 1113 xy3 = xy0 >> (e - 96); 1114 xy0 = xy1 = xy2 = 0; 1115 } else if (e >= 64) { 1116 sticky = xy3 | ((xy2 << 1) << (95 - e)); 1117 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e)); 1118 1119 if (sticky) 1120 xy4 |= 1; 1121 1122 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e)); 1123 xy2 = xy0 >> (e - 64); 1124 xy0 = xy1 = 0; 1125 } else if (e >= 32) { 1126 sticky = (xy3 << 1) << (63 - e); 1127 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e)); 1128 1129 if (sticky) 1130 xy4 |= 1; 1131 1132 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e)); 1133 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e)); 1134 xy1 = xy0 >> (e - 32); 1135 xy0 = 0; 1136 } else if (e) { 1137 xy4 = (xy3 << 1) << (31 - e); 1138 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e)); 1139 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e)); 1140 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e)); 1141 xy0 >>= e; 1142 } 1143 1144 /* if this is a magnitude subtract, negate the significand of xy */ 1145 if (sxy ^ sz) { 1146 xy0 = ~xy0; 1147 xy1 = ~xy1; 1148 xy2 = ~xy2; 1149 xy3 = ~xy3; 1150 xy4 = -xy4; 1151 1152 if (xy4 == 0) 1153 if (++xy3 == 0) 1154 if (++xy2 == 0) 1155 if (++xy1 == 0) 1156 xy0++; 1157 } 1158 1159 /* add, propagating carries */ 1160 z4 += xy4; 1161 carry = (z4 < xy4); 1162 z3 += xy3; 1163 1164 if (carry) { 1165 z3++; 1166 carry = (z3 <= xy3); 1167 } else { 1168 carry = (z3 < xy3); 1169 } 1170 1171 z2 += xy2; 1172 1173 if (carry) { 1174 z2++; 1175 carry = (z2 <= xy2); 1176 } else { 1177 carry = (z2 < xy2); 1178 } 1179 1180 z1 += xy1; 1181 1182 if (carry) { 1183 z1++; 1184 carry = (z1 <= xy1); 1185 } else { 1186 carry = (z1 < xy1); 1187 } 1188 1189 z0 += xy0; 1190 1191 if (carry) { 1192 z0++; 1193 carry = (z0 <= xy0); 1194 } else { 1195 carry = (z0 < xy0); 1196 } 1197 1198 /* for a magnitude subtract, ignore the last carry out */ 1199 if (sxy ^ sz) 1200 carry = 0; 1201 1202 /* postnormalize and collect rounding information into z2 */ 1203 if (ez < 1) { 1204 /* result is tiny; shift right until exponent is within range */ 1205 e = 1 - ez; 1206 1207 if (e > 67) { 1208 z2 = 1; /* result can't be exactly zero */ 1209 z0 = z1 = 0; 1210 } else if (e >= 64) { 1211 sticky = z4 | z3 | z2 | z1 | ((z0 << 1) << (95 - e)); 1212 z2 = (z0 >> (e - 64)) | ((carry << 1) << (95 - e)); 1213 1214 if (sticky) 1215 z2 |= 1; 1216 1217 z1 = carry >> (e - 64); 1218 z0 = 0; 1219 } else if (e >= 32) { 1220 sticky = z4 | z3 | z2 | ((z1 << 1) << (63 - e)); 1221 z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e)); 1222 1223 if (sticky) 1224 z2 |= 1; 1225 1226 z1 = (z0 >> (e - 32)) | ((carry << 1) << (63 - e)); 1227 z0 = carry >> (e - 32); 1228 } else { 1229 sticky = z4 | z3 | (z2 << 1) << (31 - e); 1230 z2 = (z2 >> e) | ((z1 << 1) << (31 - e)); 1231 1232 if (sticky) 1233 z2 |= 1; 1234 1235 z1 = (z1 >> e) | ((z0 << 1) << (31 - e)); 1236 z0 = (z0 >> e) | ((carry << 1) << (31 - e)); 1237 } 1238 1239 ez = 1; 1240 } else if (carry) { 1241 /* carry out; shift right by one */ 1242 sticky = (z2 & 1) | z3 | z4; 1243 z2 = (z2 >> 1) | (z1 << 31); 1244 1245 if (sticky) 1246 z2 |= 1; 1247 1248 z1 = (z1 >> 1) | (z0 << 31); 1249 z0 = (z0 >> 1) | 0x80000000; 1250 ez++; 1251 } else { 1252 if (z0 < 0x80000000u && (z0 | z1 | z2 | z3 | z4) != 0) { 1253 /* 1254 * borrow/cancellation; shift left as much as 1255 * exponent allows 1256 */ 1257 while (!z0 && ez >= 33) { 1258 z0 = z1; 1259 z1 = z2; 1260 z2 = z3; 1261 z3 = z4; 1262 z4 = 0; 1263 ez -= 32; 1264 } 1265 1266 while (z0 < 0x80000000u && ez > 1) { 1267 z0 = (z0 << 1) | (z1 >> 31); 1268 z1 = (z1 << 1) | (z2 >> 31); 1269 z2 = (z2 << 1) | (z3 >> 31); 1270 z3 = (z3 << 1) | (z4 >> 31); 1271 z4 <<= 1; 1272 ez--; 1273 } 1274 } 1275 1276 if (z3 | z4) 1277 z2 |= 1; 1278 } 1279 1280 /* get the rounding mode */ 1281 rm = oldcwsw & 0x0c000000; 1282 1283 /* adjust exponent if result is subnormal */ 1284 tinyafter = 0; 1285 1286 if (!(z0 & 0x80000000)) { 1287 ez = 0; 1288 tinyafter = 1; 1289 1290 if (!(z0 | z1 | z2)) { /* exact zero */ 1291 zz.i[2] = rm == FCW_RM ? 0x8000 : 0; 1292 zz.i[1] = zz.i[0] = 0; 1293 __fenv_setcwsw(&oldcwsw); 1294 return (zz.e); 1295 } 1296 } 1297 1298 /* 1299 * flip the sense of directed roundings if the result is negative; 1300 * the logic below applies to a positive result 1301 */ 1302 if (sz && (rm == FCW_RM || rm == FCW_RP)) 1303 rm = (FCW_RM + FCW_RP) - rm; 1304 1305 /* round */ 1306 if (z2) { 1307 if (rm == FCW_RP || (rm == FCW_RN && (z2 > 0x80000000u || (z2 == 1308 0x80000000u && (z1 & 1))))) { 1309 /* round up and renormalize if necessary */ 1310 if (++z1 == 0) { 1311 if (++z0 == 0) { 1312 z0 = 0x80000000; 1313 ez++; 1314 } else if (z0 == 0x80000000) { 1315 /* rounded up to smallest normal */ 1316 ez = 1; 1317 1318 if ((rm == FCW_RP && z2 > 1319 0x80000000u) || (rm == FCW_RN && 1320 z2 >= 0xc0000000u)) 1321 /* 1322 * would have rounded up to 1323 * smallest normal even with 1324 * unbounded range 1325 */ 1326 tinyafter = 0; 1327 } 1328 } 1329 } 1330 } 1331 1332 /* restore the control and status words, check for over/underflow */ 1333 __fenv_setcwsw(&oldcwsw); 1334 1335 if (ez >= 0x7fff) { 1336 if (rm == FCW_RN || rm == FCW_RP) { 1337 zz.i[2] = sz | 0x7fff; 1338 zz.i[1] = 0x80000000; 1339 zz.i[0] = 0; 1340 } else { 1341 zz.i[2] = sz | 0x7ffe; 1342 zz.i[1] = 0xffffffff; 1343 zz.i[0] = 0xffffffff; 1344 } 1345 1346 dummy = huge; 1347 dummy *= huge; 1348 } else { 1349 zz.i[2] = sz | ez; 1350 zz.i[1] = z0; 1351 zz.i[0] = z1; 1352 1353 /* 1354 * tinyafter => result rounded w/ unbounded range would be tiny, 1355 * z2 nonzero => result delivered is inexact 1356 */ 1357 if (tinyafter) { 1358 dummy = tiny; 1359 1360 if (z2) 1361 dummy *= tiny; 1362 else 1363 dummy -= tiny2; 1364 } else if (z2) { 1365 dummy = huge; 1366 dummy += tiny; 1367 } 1368 } 1369 1370 return (zz.e); 1371 } 1372 #else 1373 #error Unknown architecture 1374 #endif