529 { \
530 y0 = *px; \
531 if ( (hy | ly) == 0 ) /* pow(X,0) */ \
532 RETURN (I, DONE) \
533 if ( hx > 0x7ff00000 || (hx == 0x7ff00000 && lx != 0) ) /* |X| = Nan */ \
534 *pz = y0 + y0; \
535 else if ( (hx | lx) == 0 || (hx == 0x7ff00000 && lx == 0) ) /* X = 0 or Inf */ \
536 { \
537 HI(pz) = hx; \
538 LO(pz) = lx; \
539 if ( sy ) \
540 *pz = DONE / *pz; \
541 } \
542 else \
543 *pz = (sx) ? DZERO / DZERO : DONE; \
544 RET_SC(I) \
545 } \
546 yisint##I = 0; /* Y - non-integer */ \
547 exp = hy >> 20; /* Y exponent */ \
548 ull_y0 &= LMMANT; \
549 ull_x##I = ull_y0 | LDONE; \
550 x##I = *(double*)&ull_x##I; \
551 ull_ax##I = (ull_x##I + LMROUND) & LMHI20; \
552 ax##I = *(double*)&ull_ax##I; \
553 if ( hx >= 0x7ff00000 || exp >= 0x43e ) /* X=Inf,Nan or |Y|>2^63,Inf,Nan */ \
554 { \
555 y0 = *px; \
556 if ( hx > 0x7ff00000 || (hx == 0x7ff00000 && lx != 0) || \
557 hy > 0x7ff00000 || (hy == 0x7ff00000 && ly != 0) ) /* |X| or |Y| = Nan */ \
558 RETURN (I, y0 + *py) \
559 if ( hy == 0x7ff00000 && (ly == 0) ) /* |Y| = Inf */ \
560 { \
561 if ( hx == 0x3ff00000 && (lx == 0) ) /* +-1 ** +-Inf */ \
562 *pz = *py - *py; \
563 else if ( (hx < 0x3ff00000) != sy ) \
564 *pz = DZERO; \
565 else \
566 { \
567 HI(pz) = hy; \
568 LO(pz) = ly; \
569 } \
570 RET_SC(I) \
571 } \
602 RET_SC(I) \
603 } \
604 else /* |Y| >= 2^63 */ \
605 { \
606 /* |X| = 0, 1, Inf */ \
607 if ( lx == 0 && (hx == 0 || hx == 0x3ff00000 || hx == 0x7ff00000) ) \
608 { \
609 HI(pz) = hx; \
610 LO(pz) = lx; \
611 if ( sy ) \
612 *pz = DONE / *pz; \
613 } \
614 else \
615 { \
616 y0 = ( (hx < 0x3ff00000) != sy ) ? _TINY : _HUGE; \
617 *pz = y0 * y0; \
618 } \
619 RET_SC(I) \
620 } \
621 } \
622 if ( sx || (hx | lx) == 0 ) /* X <= 0 */ \
623 { \
624 if ( exp >= 0x434 ) /* |Y| >= 2^53 */ \
625 yisint##I = 2; /* Y - even */ \
626 else \
627 { \
628 if ( exp >= 0x3ff ) /* |Y| >= 1 */ \
629 { \
630 if ( exp > (20 + 0x3ff) ) \
631 { \
632 i0 = ly >> (52 - (exp - 0x3ff)); \
633 if ( (i0 << (52 - (exp - 0x3ff))) == ly ) \
634 yisint##I = 2 - (i0 & 1); \
635 } \
636 else if ( ly == 0 ) \
637 { \
638 i0 = hy >> (20 - (exp - 0x3ff)); \
639 if ( (i0 << (20 - (exp - 0x3ff))) == hy ) \
640 yisint##I = 2 - (i0 & 1); \
641 } \
642 } \
645 { \
646 y0 = DZERO; \
647 if ( sy ) \
648 y0 = DONE / y0; \
649 if ( sx & yisint##I ) \
650 y0 = -y0; \
651 RETURN (I, y0) \
652 } \
653 if ( yisint##I == 0 ) /* pow(neg,non-integer) */ \
654 RETURN (I, DZERO / DZERO) /* NaN */ \
655 } \
656 exp = (hx >> 20); \
657 exp##I = exp - 2046; \
658 py##I = py; \
659 pz##I = pz; \
660 ux##I = x##I + ax##I; \
661 if ( !exp ) \
662 { \
663 ax##I = (double) ull_y0; \
664 ull_ax##I = *(unsigned long long*)&ax##I; \
665 ull_x##I = ull_ax##I & LMMANT | LDONE; \
666 x##I = *(double*)&ull_x##I; \
667 exp##I = ((unsigned int*) & ull_ax##I)[0]; \
668 exp##I = (exp##I >> 20) - (2046 + 1023 + 51); \
669 ull_ax##I = ull_x##I + LMROUND & LMHI20; \
670 ax##I = *(double*)&ull_ax##I; \
671 ux##I = x##I + ax##I; \
672 } \
673 ull_x##I = *(unsigned long long *)&ux##I; \
674 hx##I = HI(&ull_ax##I); \
675 yd##I = DONE / ux##I;
676
677 void
678 __vpow( int n, double * restrict px, int stridex, double * restrict py,
679 int stridey, double * restrict pz, int stridez )
680 {
681 double *py0, *py1, *py2;
682 double *pz0, *pz1, *pz2;
683 double y0, yd0, u0, s0, s_l0, m_h0;
684 double y1, yd1, u1, s1, s_l1, m_h1;
685 double y2, yd2, u2, s2, s_l2, m_h2;
686 double ax0, x0, s_h0, ux0;
687 double ax1, x1, s_h1, ux1;
688 double ax2, x2, s_h2, ux2;
689 int eflag0, gflag0, ind0, i0;
690 int eflag1, gflag1, ind1, i1;
691 int eflag2, gflag2, ind2, i2;
692 int hx0, yisint0, exp0;
693 int hx1, yisint1, exp1;
694 int hx2, yisint2, exp2;
695 int exp, i = 0;
696 unsigned hx, lx, sx, hy, ly, sy;
697 unsigned long long ull_y0, ull_x0, ull_x1, ull_x2, ull_ax0, ull_ax1, ull_ax2;
698 unsigned long long LDONE = ((unsigned long long*)LCONST)[1]; /* 1.0 */
699 unsigned long long LMMANT = ((unsigned long long*)LCONST)[4]; /* 0x000fffffffffffff */
700 unsigned long long LMROUND = ((unsigned long long*)LCONST)[5]; /* 0x0000080000000000 */
701 unsigned long long LMHI20 = ((unsigned long long*)LCONST)[6]; /* 0xfffff00000000000 */
702 double DONE = ((double*)LCONST)[1]; /* 1.0 */
703 double DZERO = ((double*)LCONST)[7]; /* 0.0 */
704 double KA5 = ((double*)LCONST)[8]; /* 5.77078604860893737986e-01*256 */
705 double KA3 = ((double*)LCONST)[9]; /* 9.61796693925765549423e-01*256 */
706 double KA1_LO = ((double*)LCONST)[10]; /* 1.41052154268147309568e-05*256 */
707 double KA1_HI = ((double*)LCONST)[11]; /* 2.8853759765625e+00*256 */
708 double KA1 = ((double*)LCONST)[12]; /* 2.885390081777926774e+00*256 */
709 double HTHRESH = ((double*)LCONST)[13]; /* 262144.0 */
710 double LTHRESH = ((double*)LCONST)[14]; /* -275200.0 */
711 double KB5 = ((double*)LCONST)[15]; /* 1.21195555854068860923e-15 */
712 double KB4 = ((double*)LCONST)[16]; /* 2.23939573811855104311e-12 */
713 double KB3 = ((double*)LCONST)[17]; /* 3.30830268126604677436e-09 */
1112 RET_SC(I) \
1113 } \
1114
1115 #define LMMANT ((unsigned long long*)LCONST)[4] /* 0x000fffffffffffff */
1116 #define LMROUND ((unsigned long long*)LCONST)[5] /* 0x0000080000000000 */
1117 #define LMHI20 ((unsigned long long*)LCONST)[6] /* 0xfffff00000000000 */
1118 #define MMANT ((double*)LCONST)[4] /* 0x000fffffffffffff */
1119 #define MROUND ((double*)LCONST)[5] /* 0x0000080000000000 */
1120 #define MHI20 ((double*)LCONST)[6] /* 0xfffff00000000000 */
1121 #define KA5 ((double*)LCONST)[8] /* 5.77078604860893737986e-01*256 */
1122 #define KA3 ((double*)LCONST)[9] /* 9.61796693925765549423e-01*256 */
1123 #define KA1_LO ((double*)LCONST)[10] /* 1.41052154268147309568e-05*256 */
1124 #define KA1_HI ((double*)LCONST)[11] /* 2.8853759765625e+00*256 */
1125 #define KA1 ((double*)LCONST)[12] /* 2.885390081777926774e+00*256 */
1126
1127
1128 static void
1129 __vpowx( int n, double * restrict px, double * restrict py,
1130 int stridey, double * restrict pz, int stridez )
1131 {
1132 double *py0, *py1, *py2;
1133 double *pz0, *pz1, *pz2;
1134 double ux0, y0, yd0, u0, s0;
1135 double y1, yd1, u1, s1;
1136 double y2, yd2, u2, s2;
1137 double yr, s_h0, s_l0, m_h0, x0, ax0;
1138 unsigned long long ull_y0, ull_x0, ull_x1, ull_x2, ull_ax0;
1139 int eflag0, gflag0, ind0, i0, exp0;
1140 int eflag1, gflag1, ind1, i1;
1141 int eflag2, gflag2, ind2, i2;
1142 int i = 0;
1143 unsigned hx, hx0, hy, ly, sy;
1144 double DONE = ((double*)LCONST)[1]; /* 1.0 */
1145 unsigned long long LDONE = ((unsigned long long*)LCONST)[1]; /* 1.0 */
1146 double DZERO = ((double*)LCONST)[7]; /* 0.0 */
1147 double HTHRESH = ((double*)LCONST)[13]; /* 262144.0 */
1148 double LTHRESH = ((double*)LCONST)[14]; /* -275200.0 */
1149 double KB5 = ((double*)LCONST)[15]; /* 1.21195555854068860923e-15 */
1150 double KB4 = ((double*)LCONST)[16]; /* 2.23939573811855104311e-12 */
1151 double KB3 = ((double*)LCONST)[17]; /* 3.30830268126604677436e-09 */
1152 double KB2 = ((double*)LCONST)[18]; /* 3.66556559691003767877e-06 */
1153 double KB1 = ((double*)LCONST)[19]; /* 2.70760617406228636578e-03 */
1154
1155 /* perform s_h + yr = 256*log2(x) */
1156 ull_y0 = *(unsigned long long*)px;
1157 hx = HI(px);
1158 ull_x0 = ull_y0 & LMMANT | LDONE;
1159 x0 = *(double*)&ull_x0;
1160 exp0 = (hx >> 20) - 2046;
1161 ull_ax0 = ull_x0 + LMROUND & LMHI20;
1162 ax0 = *(double*)&ull_ax0;
1163 hx0 = HI(&ax0);
1164 ux0 = x0 + ax0;
1165 yd0 = DONE / ux0;
1166 u0 = x0 - ax0;
1167 s0 = u0 * yd0;
1168 LO(&ux0) = 0;
1169 y0 = s0 * s0;
1170 s_h0 = s0;
1171 LO(&s_h0) = 0;
1172 s0 = (KA5 * y0 + KA3) * y0 * s0;
1173 s_l0 = (x0 - (ux0 - ax0));
1174 s_l0 = u0 - s_h0 * ux0 - s_h0 * s_l0;
1175 s_l0 = KA1 * yd0 * s_l0;
1176 i0 = (hx0 >> 8) & 0xff0;
1177 exp0 += (hx0 >> 20);
1178 yd0 = KA1_HI * s_h0;
1179 y0 = *(double *)((char*)__TBL_log2 + i0);
1180 y0 += (double)(exp0 << 8);
1181 m_h0 = y0 + yd0;
|
529 { \
530 y0 = *px; \
531 if ( (hy | ly) == 0 ) /* pow(X,0) */ \
532 RETURN (I, DONE) \
533 if ( hx > 0x7ff00000 || (hx == 0x7ff00000 && lx != 0) ) /* |X| = Nan */ \
534 *pz = y0 + y0; \
535 else if ( (hx | lx) == 0 || (hx == 0x7ff00000 && lx == 0) ) /* X = 0 or Inf */ \
536 { \
537 HI(pz) = hx; \
538 LO(pz) = lx; \
539 if ( sy ) \
540 *pz = DONE / *pz; \
541 } \
542 else \
543 *pz = (sx) ? DZERO / DZERO : DONE; \
544 RET_SC(I) \
545 } \
546 yisint##I = 0; /* Y - non-integer */ \
547 exp = hy >> 20; /* Y exponent */ \
548 ull_y0 &= LMMANT; \
549 ull_x##I = (ull_y0 | LDONE); \
550 x##I = *(double*)&ull_x##I; \
551 ull_ax##I = ((ull_x##I + LMROUND) & LMHI20); \
552 ax##I = *(double*)&ull_ax##I; \
553 if ( hx >= 0x7ff00000 || exp >= 0x43e ) /* X=Inf,Nan or |Y|>2^63,Inf,Nan */ \
554 { \
555 y0 = *px; \
556 if ( hx > 0x7ff00000 || (hx == 0x7ff00000 && lx != 0) || \
557 hy > 0x7ff00000 || (hy == 0x7ff00000 && ly != 0) ) /* |X| or |Y| = Nan */ \
558 RETURN (I, y0 + *py) \
559 if ( hy == 0x7ff00000 && (ly == 0) ) /* |Y| = Inf */ \
560 { \
561 if ( hx == 0x3ff00000 && (lx == 0) ) /* +-1 ** +-Inf */ \
562 *pz = *py - *py; \
563 else if ( (hx < 0x3ff00000) != sy ) \
564 *pz = DZERO; \
565 else \
566 { \
567 HI(pz) = hy; \
568 LO(pz) = ly; \
569 } \
570 RET_SC(I) \
571 } \
602 RET_SC(I) \
603 } \
604 else /* |Y| >= 2^63 */ \
605 { \
606 /* |X| = 0, 1, Inf */ \
607 if ( lx == 0 && (hx == 0 || hx == 0x3ff00000 || hx == 0x7ff00000) ) \
608 { \
609 HI(pz) = hx; \
610 LO(pz) = lx; \
611 if ( sy ) \
612 *pz = DONE / *pz; \
613 } \
614 else \
615 { \
616 y0 = ( (hx < 0x3ff00000) != sy ) ? _TINY : _HUGE; \
617 *pz = y0 * y0; \
618 } \
619 RET_SC(I) \
620 } \
621 } \
622 if ( (sx || (hx | lx)) == 0 ) /* X <= 0 */ \
623 { \
624 if ( exp >= 0x434 ) /* |Y| >= 2^53 */ \
625 yisint##I = 2; /* Y - even */ \
626 else \
627 { \
628 if ( exp >= 0x3ff ) /* |Y| >= 1 */ \
629 { \
630 if ( exp > (20 + 0x3ff) ) \
631 { \
632 i0 = ly >> (52 - (exp - 0x3ff)); \
633 if ( (i0 << (52 - (exp - 0x3ff))) == ly ) \
634 yisint##I = 2 - (i0 & 1); \
635 } \
636 else if ( ly == 0 ) \
637 { \
638 i0 = hy >> (20 - (exp - 0x3ff)); \
639 if ( (i0 << (20 - (exp - 0x3ff))) == hy ) \
640 yisint##I = 2 - (i0 & 1); \
641 } \
642 } \
645 { \
646 y0 = DZERO; \
647 if ( sy ) \
648 y0 = DONE / y0; \
649 if ( sx & yisint##I ) \
650 y0 = -y0; \
651 RETURN (I, y0) \
652 } \
653 if ( yisint##I == 0 ) /* pow(neg,non-integer) */ \
654 RETURN (I, DZERO / DZERO) /* NaN */ \
655 } \
656 exp = (hx >> 20); \
657 exp##I = exp - 2046; \
658 py##I = py; \
659 pz##I = pz; \
660 ux##I = x##I + ax##I; \
661 if ( !exp ) \
662 { \
663 ax##I = (double) ull_y0; \
664 ull_ax##I = *(unsigned long long*)&ax##I; \
665 ull_x##I = ((ull_ax##I & LMMANT) | LDONE); \
666 x##I = *(double*)&ull_x##I; \
667 exp##I = ((unsigned int*) & ull_ax##I)[0]; \
668 exp##I = (exp##I >> 20) - (2046 + 1023 + 51); \
669 ull_ax##I = (ull_x##I + (LMROUND & LMHI20)); \
670 ax##I = *(double*)&ull_ax##I; \
671 ux##I = x##I + ax##I; \
672 } \
673 ull_x##I = *(unsigned long long *)&ux##I; \
674 hx##I = HI(&ull_ax##I); \
675 yd##I = DONE / ux##I;
676
677 void
678 __vpow( int n, double * restrict px, int stridex, double * restrict py,
679 int stridey, double * restrict pz, int stridez )
680 {
681 double *py0 = 0, *py1 = 0, *py2;
682 double *pz0 = 0, *pz1 = 0, *pz2;
683 double y0, yd0 = 0.0L, u0, s0, s_l0, m_h0;
684 double y1, yd1 = 0.0L, u1, s1, s_l1, m_h1;
685 double y2, yd2, u2, s2, s_l2, m_h2;
686 double ax0 = 0.0L, x0 = 0.0L, s_h0, ux0;
687 double ax1 = 0.0L, x1 = 0.0L, s_h1, ux1;
688 double ax2, x2, s_h2, ux2;
689 int eflag0, gflag0, ind0, i0;
690 int eflag1, gflag1, ind1, i1;
691 int eflag2, gflag2, ind2, i2;
692 int hx0 = 0, yisint0 = 0, exp0 = 0;
693 int hx1 = 0, yisint1 = 0, exp1 = 0;
694 int hx2, yisint2, exp2;
695 int exp, i = 0;
696 unsigned hx, lx, sx, hy, ly, sy;
697 unsigned long long ull_y0, ull_x0, ull_x1, ull_x2, ull_ax0, ull_ax1, ull_ax2;
698 unsigned long long LDONE = ((unsigned long long*)LCONST)[1]; /* 1.0 */
699 unsigned long long LMMANT = ((unsigned long long*)LCONST)[4]; /* 0x000fffffffffffff */
700 unsigned long long LMROUND = ((unsigned long long*)LCONST)[5]; /* 0x0000080000000000 */
701 unsigned long long LMHI20 = ((unsigned long long*)LCONST)[6]; /* 0xfffff00000000000 */
702 double DONE = ((double*)LCONST)[1]; /* 1.0 */
703 double DZERO = ((double*)LCONST)[7]; /* 0.0 */
704 double KA5 = ((double*)LCONST)[8]; /* 5.77078604860893737986e-01*256 */
705 double KA3 = ((double*)LCONST)[9]; /* 9.61796693925765549423e-01*256 */
706 double KA1_LO = ((double*)LCONST)[10]; /* 1.41052154268147309568e-05*256 */
707 double KA1_HI = ((double*)LCONST)[11]; /* 2.8853759765625e+00*256 */
708 double KA1 = ((double*)LCONST)[12]; /* 2.885390081777926774e+00*256 */
709 double HTHRESH = ((double*)LCONST)[13]; /* 262144.0 */
710 double LTHRESH = ((double*)LCONST)[14]; /* -275200.0 */
711 double KB5 = ((double*)LCONST)[15]; /* 1.21195555854068860923e-15 */
712 double KB4 = ((double*)LCONST)[16]; /* 2.23939573811855104311e-12 */
713 double KB3 = ((double*)LCONST)[17]; /* 3.30830268126604677436e-09 */
1112 RET_SC(I) \
1113 } \
1114
1115 #define LMMANT ((unsigned long long*)LCONST)[4] /* 0x000fffffffffffff */
1116 #define LMROUND ((unsigned long long*)LCONST)[5] /* 0x0000080000000000 */
1117 #define LMHI20 ((unsigned long long*)LCONST)[6] /* 0xfffff00000000000 */
1118 #define MMANT ((double*)LCONST)[4] /* 0x000fffffffffffff */
1119 #define MROUND ((double*)LCONST)[5] /* 0x0000080000000000 */
1120 #define MHI20 ((double*)LCONST)[6] /* 0xfffff00000000000 */
1121 #define KA5 ((double*)LCONST)[8] /* 5.77078604860893737986e-01*256 */
1122 #define KA3 ((double*)LCONST)[9] /* 9.61796693925765549423e-01*256 */
1123 #define KA1_LO ((double*)LCONST)[10] /* 1.41052154268147309568e-05*256 */
1124 #define KA1_HI ((double*)LCONST)[11] /* 2.8853759765625e+00*256 */
1125 #define KA1 ((double*)LCONST)[12] /* 2.885390081777926774e+00*256 */
1126
1127
1128 static void
1129 __vpowx( int n, double * restrict px, double * restrict py,
1130 int stridey, double * restrict pz, int stridez )
1131 {
1132 double *py0, *py1 = 0, *py2;
1133 double *pz0, *pz1 = 0, *pz2;
1134 double ux0, y0, yd0, u0, s0;
1135 double y1, yd1, u1, s1;
1136 double y2, yd2, u2, s2;
1137 double yr, s_h0, s_l0, m_h0, x0, ax0;
1138 unsigned long long ull_y0, ull_x0, ull_x1, ull_x2, ull_ax0;
1139 int eflag0, gflag0, ind0, i0, exp0;
1140 int eflag1, gflag1, ind1, i1;
1141 int eflag2, gflag2, ind2, i2;
1142 int i = 0;
1143 unsigned hx, hx0, hy, ly, sy;
1144 double DONE = ((double*)LCONST)[1]; /* 1.0 */
1145 unsigned long long LDONE = ((unsigned long long*)LCONST)[1]; /* 1.0 */
1146 double DZERO = ((double*)LCONST)[7]; /* 0.0 */
1147 double HTHRESH = ((double*)LCONST)[13]; /* 262144.0 */
1148 double LTHRESH = ((double*)LCONST)[14]; /* -275200.0 */
1149 double KB5 = ((double*)LCONST)[15]; /* 1.21195555854068860923e-15 */
1150 double KB4 = ((double*)LCONST)[16]; /* 2.23939573811855104311e-12 */
1151 double KB3 = ((double*)LCONST)[17]; /* 3.30830268126604677436e-09 */
1152 double KB2 = ((double*)LCONST)[18]; /* 3.66556559691003767877e-06 */
1153 double KB1 = ((double*)LCONST)[19]; /* 2.70760617406228636578e-03 */
1154
1155 /* perform s_h + yr = 256*log2(x) */
1156 ull_y0 = *(unsigned long long*)px;
1157 hx = HI(px);
1158 ull_x0 = (ull_y0 & LMMANT) | LDONE;
1159 x0 = *(double*)&ull_x0;
1160 exp0 = (hx >> 20) - 2046;
1161 ull_ax0 = ull_x0 + (LMROUND & LMHI20);
1162 ax0 = *(double*)&ull_ax0;
1163 hx0 = HI(&ax0);
1164 ux0 = x0 + ax0;
1165 yd0 = DONE / ux0;
1166 u0 = x0 - ax0;
1167 s0 = u0 * yd0;
1168 LO(&ux0) = 0;
1169 y0 = s0 * s0;
1170 s_h0 = s0;
1171 LO(&s_h0) = 0;
1172 s0 = (KA5 * y0 + KA3) * y0 * s0;
1173 s_l0 = (x0 - (ux0 - ax0));
1174 s_l0 = u0 - s_h0 * ux0 - s_h0 * s_l0;
1175 s_l0 = KA1 * yd0 * s_l0;
1176 i0 = (hx0 >> 8) & 0xff0;
1177 exp0 += (hx0 >> 20);
1178 yd0 = KA1_HI * s_h0;
1179 y0 = *(double *)((char*)__TBL_log2 + i0);
1180 y0 += (double)(exp0 << 8);
1181 m_h0 = y0 + yd0;
|