59 #define two27 C[3].d
60 #define twom26 C[4].d
61 #define twom32 C[5].d
62 #define twom64 C[6].d
63 #define huge C[7].d
64 #define tiny C[8].d
65 #define tiny2 C[9].d
66
67 static const unsigned int fsr_rm = 0xc0000000u;
68
69 /*
70 * fma for SPARC: 64-bit double precision, big-endian
71 */
72 double
73 __fma(double x, double y, double z) {
74 union {
75 unsigned i[2];
76 double d;
77 } xx, yy, zz;
78 double xhi, yhi, xlo, ylo, t;
79 unsigned xy0, xy1, xy2, xy3, z0, z1, z2, z3, rm, sticky;
80 unsigned int fsr;
81 int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit;
82 volatile double dummy;
83
84 /* extract the high order words of the arguments */
85 xx.d = x;
86 yy.d = y;
87 zz.d = z;
88 hx = xx.i[0] & ~0x80000000;
89 hy = yy.i[0] & ~0x80000000;
90 hz = zz.i[0] & ~0x80000000;
91
92 /* dispense with inf, nan, and zero cases */
93 if (hx >= 0x7ff00000 || hy >= 0x7ff00000 || (hx | xx.i[1]) == 0 ||
94 (hy | yy.i[1]) == 0) /* x or y is inf, nan, or zero */
95 return (x * y + z);
96
97 if (hz >= 0x7ff00000) /* z is inf or nan */
98 return (x + z); /* avoid spurious under/overflow in x * y */
99
100 if ((hz | zz.i[1]) == 0) /* z is zero */
475 xx.i[2] = (xx.i[2] & 0x8000) |
476 ((yy.i[2] & 0x7fff) - 63);
477 xx.i[1] = 0x80000000;
478 xx.i[0] = 0;
479 yy.e += xx.e;
480 }
481 }
482 } else {
483 /* set sign of zero result according to rounding direction */
484 rm = oldcwsw & 0x0c000000;
485 yy.i[2] = ((rm == FCW_RM)? 0x8000 : 0);
486 }
487
488 /*
489 * restore the control and status words and convert the result
490 * to double
491 */
492 __fenv_setcwsw(&oldcwsw);
493 return ((double) yy.e);
494 }
495
496 #if 0
497 /*
498 * another fma for x86: assumes return value will be left in
499 * long double (80-bit double extended) precision
500 */
501 long double
502 __fma(double x, double y, double z) {
503 union {
504 unsigned i[3];
505 long double e;
506 } xx, yy, zz, tt;
507 long double xe, ye, xhi, xlo, yhi, ylo, zhi, zlo;
508 int ex, ey, ez;
509 unsigned cwsw, oldcwsw, s;
510
511 /* convert the operands to double extended */
512 xx.e = (long double) x;
513 yy.e = (long double) y;
514 zz.e = (long double) z;
515
516 /* extract the exponents of the arguments */
517 ex = xx.i[2] & 0x7fff;
518 ey = yy.i[2] & 0x7fff;
519 ez = zz.i[2] & 0x7fff;
520
521 /* dispense with inf, nan, and zero cases */
522 if (ex == 0x7fff || ey == 0x7fff || ex == 0 || ey == 0)
523 /* x or y is inf, nan, or zero */
524 return (xx.e * yy.e + zz.e);
525
526 if (ez >= 0x7fff) /* z is inf or nan */
527 return (xx.e + zz.e); /* avoid spurious inexact in x * y */
528
529 if (ez == 0) /* z is zero */
530 return (xx.e * yy.e); /* x * y isn't zero; no need to add z */
531
532 /*
533 * save the control and status words, mask all exceptions, and
534 * set rounding to 64-bit precision and to-nearest
535 */
536 __fenv_getcwsw(&oldcwsw);
537 cwsw = (oldcwsw & 0xf0c0ffff) | 0x033f0000;
538 __fenv_setcwsw(&cwsw);
539
540 /* multiply x*y to 106 bits */
541 xe = xx.e;
542 xx.i[0] = 0;
543 xhi = xx.e; /* hi 32 bits */
544 xlo = xe - xhi; /* lo 21 bits */
545 ye = yy.e;
546 yy.i[0] = 0;
547 yhi = yy.e;
548 ylo = ye - yhi;
549 xx.e = xe * ye;
550 xx.i[0] &= ~0x7ff; /* 53 bits of x*y */
551 yy.e = ((xhi * yhi - xx.e) + xhi * ylo + xlo * yhi) + xlo * ylo;
552
553 /* reduce to a sum of two terms */
554 if (yy.e != 0.0) {
555 ex = xx.i[2] & 0x7fff;
556 if (ez - ex > 10) {
557 /* collapse y into a single bit and add to x */
558 yy.i[0] = 0;
559 yy.i[1] = 0x80000000;
560 yy.i[2] = (yy.i[2] & 0x8000) | (ex - 60);
561 xx.e += yy.e;
562 } else if (ex - ez <= 10) {
563 xx.e += zz.e; /* exact */
564 zz.e = yy.e;
565 } else if (ex - ez <= 42) {
566 /* split z into two pieces */
567 tt.i[0] = 0;
568 tt.i[1] = 0x80000000;
569 tt.i[2] = ex + 11;
570 zhi = (zz.e + tt.e) - tt.e;
571 zlo = zz.e - zhi;
572 xx.e += zhi;
573 zz.e = yy.e + zlo;
574 } else if (ex - ez <= 63) {
575 zz.e += yy.e; /* exact */
576 } else if (ex - ez <= 106) {
577 /*
578 * collapse the tail of z into a sticky bit and add z
579 * to y without error
580 */
581 if (ex - ez <= 81) {
582 s = 1 << (ex - ez - 50);
583 if (zz.i[0] & (s - 1))
584 zz.i[0] |= s;
585 zz.i[0] &= ~(s - 1);
586 } else {
587 s = 1 << (ex - ez - 82);
588 if ((zz.i[1] & (s - 1)) | zz.i[0])
589 zz.i[1] |= s;
590 zz.i[1] &= ~(s - 1);
591 zz.i[0] = 0;
592 }
593 zz.e += yy.e;
594 } else {
595 /* collapse z into a single bit and add to y */
596 zz.i[0] = 0;
597 zz.i[1] = 0x80000000;
598 zz.i[2] = (zz.i[2] & 0x8000) | (ex - 113);
599 zz.e += yy.e;
600 }
601 }
602
603 /* restore the control and status words, and sum */
604 __fenv_setcwsw(&oldcwsw);
605 return (xx.e + zz.e);
606 }
607 #endif
608
609 #else
610 #error Unknown architecture
611 #endif
|
59 #define two27 C[3].d
60 #define twom26 C[4].d
61 #define twom32 C[5].d
62 #define twom64 C[6].d
63 #define huge C[7].d
64 #define tiny C[8].d
65 #define tiny2 C[9].d
66
67 static const unsigned int fsr_rm = 0xc0000000u;
68
69 /*
70 * fma for SPARC: 64-bit double precision, big-endian
71 */
72 double
73 __fma(double x, double y, double z) {
74 union {
75 unsigned i[2];
76 double d;
77 } xx, yy, zz;
78 double xhi, yhi, xlo, ylo, t;
79 unsigned int xy0, xy1, xy2, xy3, z0, z1, z2, z3, fsr, rm, sticky;
80 int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit;
81 volatile double dummy;
82
83 /* extract the high order words of the arguments */
84 xx.d = x;
85 yy.d = y;
86 zz.d = z;
87 hx = xx.i[0] & ~0x80000000;
88 hy = yy.i[0] & ~0x80000000;
89 hz = zz.i[0] & ~0x80000000;
90
91 /* dispense with inf, nan, and zero cases */
92 if (hx >= 0x7ff00000 || hy >= 0x7ff00000 || (hx | xx.i[1]) == 0 ||
93 (hy | yy.i[1]) == 0) /* x or y is inf, nan, or zero */
94 return (x * y + z);
95
96 if (hz >= 0x7ff00000) /* z is inf or nan */
97 return (x + z); /* avoid spurious under/overflow in x * y */
98
99 if ((hz | zz.i[1]) == 0) /* z is zero */
474 xx.i[2] = (xx.i[2] & 0x8000) |
475 ((yy.i[2] & 0x7fff) - 63);
476 xx.i[1] = 0x80000000;
477 xx.i[0] = 0;
478 yy.e += xx.e;
479 }
480 }
481 } else {
482 /* set sign of zero result according to rounding direction */
483 rm = oldcwsw & 0x0c000000;
484 yy.i[2] = ((rm == FCW_RM)? 0x8000 : 0);
485 }
486
487 /*
488 * restore the control and status words and convert the result
489 * to double
490 */
491 __fenv_setcwsw(&oldcwsw);
492 return ((double) yy.e);
493 }
494
495 #else
496 #error Unknown architecture
497 #endif
|