Print this page
11210 libm should be cstyle(1ONBLD) clean


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */

  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #pragma weak __tgamma = tgamma
  31 
  32 /* INDENT OFF */
  33 /*
  34  * True gamma function
  35  * double tgamma(double x)
  36  *
  37  * Error:
  38  * ------
  39  *      Less that one ulp for both positive and negative arguments.
  40  *
  41  * Algorithm:
  42  * ---------
  43  *      A: For negative argument
  44  *              (1) gamma(-n or -inf) is NaN
  45  *              (2) Underflow Threshold
  46  *              (3) Reduction to gamma(1+x)
  47  *      B: For x between 1 and 2
  48  *      C: For x between 0 and 1
  49  *      D: For x between 2 and 8
  50  *      E: Overflow thresold {see over.c}
  51  *      F: For overflow_threshold >= x >= 8
  52  *


 730  *        GP2 =   7.77830853479775281781085278324621033523037489883e-0004
 731  *
 732  *
 733  *      Implementation note:
 734  *      z = (1/x), z2 = z*z, z4 = z2*z2;
 735  *      p = z*(GP0+z2*(GP1+....+z2*GP7))
 736  *        = z*(GP0+(z4*(GP2+z4*(GP4+z4*GP6))+z2*(GP1+z4*(GP3+z4*(GP5+z4*GP7)))))
 737  *
 738  *   Adding everything up:
 739  *      t = rr.h*ww.h+hln2pi_h                  ... exact
 740  *      w = (hln2pi_l + ((x-0.5)*ww.l+rr.l*ww.h)) + p
 741  *
 742  *   Computing exp(t+w):
 743  *      s = t+w; write s = (n+j/32)*ln2+r, |r|<=(1/64)*ln2, then
 744  *      exp(s) = 2**n * (2**(j/32) + 2**(j/32)*expm1(r)), where
 745  *      expm1(r) = r + Et1*r^2 + Et2*r^3 + ... + Et5*r^6, and
 746  *      2**(j/32) is obtained by table look-up S[j]+S_trail[j].
 747  *      Remez error bound:
 748  *      |exp(r) - (1+r+Et1*r^2+...+Et5*r^6)| <= 2^(-63).
 749  */

 750 
 751 #include "libm.h"
 752 
 753 #define __HI(x) ((int *) &x)[HIWORD]
 754 #define __LO(x) ((unsigned *) &x)[LOWORD]
 755 
 756 struct Double {
 757         double h;
 758         double l;
 759 };
 760 
 761 /* Hex value of GP0 shoule be 3FB55555 55555555 */
 762 static const double c[] = {
 763         +1.0,
 764         +2.0,
 765         +0.5,
 766         +1.0e-300,
 767         +6.66666666666666740682e-01,                            /* A1=T3[0] */
 768         +3.99999999955626478023093908674902212920e-01,          /* A2=T3[1] */
 769         +2.85720221533145659809237398709372330980e-01,          /* A3=T3[2] */
 770         +0.0833333333333333287074040640618477,                  /* GP[0] */
 771         -2.77777777776649355200565611114627670089130772843e-03,
 772         +7.93650787486083724805476194170211775784158551509e-04,
 773         -5.95236628558314928757811419580281294593903582971e-04,
 774         +8.41566473999853451983137162780427812781178932540e-04,


1116 #define P23   cr[14]
1117 #define Q20   cr[15]
1118 #define Q21   cr[16]
1119 #define Q22   cr[17]
1120 #define Q23   cr[18]
1121 #define Q24   cr[19]
1122 #define Q25   cr[20]
1123 #define Q26   cr[21]
1124 #define P30   cr[22]
1125 #define P31   cr[23]
1126 #define P32   cr[24]
1127 #define P33   cr[25]
1128 #define P34   cr[26]
1129 #define Q30   cr[27]
1130 #define Q31   cr[28]
1131 #define Q32   cr[29]
1132 #define Q33   cr[30]
1133 #define Q34   cr[31]
1134 #define Q35   cr[32]
1135 
1136 static const double
1137         GZ1_h = +0.938204627909682398190,
1138         GZ1_l = +5.121952600248205157935e-17,
1139         GZ2_h = +0.885603194410888749921,
1140         GZ2_l = -4.964236872556339810692e-17,
1141         GZ3_h = +0.936781411463652347038,
1142         GZ3_l = -2.541923110834479415023e-17,
1143         TZ1 = -0.3517214357852935791015625,
1144         TZ3 = +0.280530631542205810546875;
1145 /* INDENT ON */
1146 
1147 /* compute gamma(y=yh+yl) for y in GT1 = [1.0000, 1.2845] */
1148 /* assume yh got 20 significant bits */


1149 static struct Double
1150 GT1(double yh, double yl) {

1151         double t3, t4, y, z;
1152         struct Double r;
1153 
1154         y = yh + yl;
1155         z = y * y;
1156         t3 = (z * (P10 + y * ((P11 + y * P12) + z * (P13 + y * P14)))) /
1157                 (Q10 + y * ((Q11 + y * Q12) + z * ((Q13 + Q14 * y) + z * Q15)));
1158         t3 += (TZ1 * yl + GZ1_l);
1159         t4 = TZ1 * yh;
1160         r.h = (double) ((float) (t4 + GZ1_h + t3));
1161         t3 += (t4 - (r.h - GZ1_h));
1162         r.l = t3;
1163         return (r);
1164 }
1165 
1166 /* compute gamma(y=yh+yl) for y in GT2 = [1.2844, 1.6374] */
1167 /* assume yh got 20 significant bits */


1168 static struct Double
1169 GT2(double yh, double yl) {

1170         double t3, y, z;
1171         struct Double r;
1172 
1173         y = yh + yl;
1174         z = y * y;
1175         t3 = (z * (P20 + y * P21 + z * (P22 + y * P23))) /
1176                 (Q20 + (y * ((Q21 + Q22 * y) + z * Q23) +
1177                 (z * z) * ((Q24 + Q25 * y) + z * Q26))) + GZ2_l;
1178         r.h = (double) ((float) (GZ2_h + t3));
1179         r.l = t3 - (r.h - GZ2_h);
1180         return (r);
1181 }
1182 
1183 /* compute gamma(y=yh+yl) for y in GT3 = [1.6373, 2.0000] */
1184 /* assume yh got 20 significant bits */


1185 static struct Double
1186 GT3(double yh, double yl) {

1187         double t3, t4, y, z;
1188         struct Double r;
1189 
1190         y = yh + yl;
1191         z = y * y;
1192         t3 = (z * (P30 + y * ((P31 + y * P32) + z * (P33 + y * P34)))) /
1193                 (Q30 + y * ((Q31 + y * Q32) + z * ((Q33 + Q34 * y) + z * Q35)));
1194         t3 += (TZ3 * yl + GZ3_l);
1195         t4 = TZ3 * yh;
1196         r.h = (double) ((float) (t4 + GZ3_h + t3));
1197         t3 += (t4 - (r.h - GZ3_h));
1198         r.l = t3;
1199         return (r);
1200 }
1201 
1202 /* INDENT OFF */
1203 /*
1204  * return tgamma(x) scaled by 2**-m for 8<x<=171.62... using Stirling's formula
1205  *     log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x))
1206  *                = L1 + L2 + L3,
1207  */
1208 /* INDENT ON */
1209 static struct Double
1210 large_gam(double x, int *m) {
1211         double z, t1, t2, t3, z2, t5, w, y, u, r, z4, v, t24 = 16777216.0,
1212                 p24 = 1.0 / 16777216.0;

1213         int n2, j2, k, ix, j;
1214         unsigned lx;
1215         struct Double zz;
1216         double u2, ss_h, ss_l, r_h, w_h, w_l, t4;
1217 
1218 /* INDENT OFF */
1219 /*
1220  * compute ss = ss.h+ss.l = log(x)-1 (see tgamma_log.h for details)
1221  *
1222  *  log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y,  1<=y<2,
1223  *  j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and
1224  *       T1(n) = T1[2n,2n+1] = n*log(2)-1,
1225  *       T2(j) = T2[2j,2j+1] = log(z[j]),
1226  *       T3(s) = 2s + A1[0]s^3 + A2[1]s^5 + A3[2]s^7
1227  *  Note
1228  *  (1) the leading entries are truncated to 24 binary point.
1229  *  (2) Remez error for T3(s) is bounded by 2**(-72.4)
1230  *                                   2**(-24)
1231  *                           _________V___________________
1232  *               T1(n):     |_________|___________________|
1233  *                             _______ ______________________
1234  *               T2(j):       |_______|______________________|
1235  *                                ____ _______________________
1236  *               2s:             |____|_______________________|
1237  *                                    __________________________
1238  *          +    T3(s)-2s:           |__________________________|
1239  *                       -------------------------------------------
1240  *                          [leading] + [Trailing]
1241  */
1242 /* INDENT ON */
1243         ix = __HI(x);
1244         lx = __LO(x);
1245         n2 = (ix >> 20) - 0x3ff;  /* exponent of x, range:3-7 */
1246         n2 += n2;                       /* 2n */
1247         ix = (ix & 0x000fffff) | 0x3ff00000;        /* y = scale x to [1,2] */
1248         __HI(y) = ix;
1249         __LO(y) = lx;
1250         __HI(z) = (ix & 0xffffc000) | 0x2000;       /* z[j]=1+j/64+1/128 */
1251         __LO(z) = 0;
1252         j2 = (ix >> 13) & 0x7e;       /* 2j */
1253         t1 = y + z;
1254         t2 = y - z;
1255         r = one / t1;
1256         t1 = (double) ((float) t1);
1257         u = r * t2;             /* u = (y-z)/(y+z) */
1258         t4 = T2[j2 + 1] + T1[n2 + 1];
1259         z2 = u * u;
1260         k = __HI(u) & 0x7fffffff;
1261         t3 = T2[j2] + T1[n2];

1262         if ((k >> 20) < 0x3ec) {       /* |u|<2**-19 */
1263                 t2 = t4 + u * ((two + z2 * A1) + (z2 * z2) * (A2 + z2 * A3));
1264         } else {
1265                 t5 = t4 + u * (z2 * A1 + (z2 * z2) * (A2 + z2 * A3));
1266                 u2 = u + u;
1267                 v = (double) ((int) (u2 * t24)) * p24;
1268                 t2 = t5 + r * ((two * t2 - v * t1) - v * (y - (t1 - z)));
1269                 t3 += v;
1270         }
1271         ss_h = (double) ((float) (t2 + t3));

1272         ss_l = t2 - (ss_h - t3);
1273 
1274         /*
1275          * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2)))
1276          * where ss = log(x) - 1 in already in extra precision
1277          */
1278         z = one / x;
1279         r = x - half;
1280         r_h = (double) ((float) r);
1281         w_h = r_h * ss_h + hln2pi_h;
1282         z2 = z * z;
1283         w = (r - r_h) * ss_h + r * ss_l;
1284         z4 = z2 * z2;
1285         t1 = z2 * (GP1 + z4 * (GP3 + z4 * (GP5 + z4 * GP7)));
1286         t2 = z4 * (GP2 + z4 * (GP4 + z4 * GP6));
1287         t1 += t2;
1288         w += hln2pi_l;
1289         w_l = z * (GP0 + t1) + w;
1290         k = (int) ((w_h + w_l) * invln2_32 + half);
1291 
1292         /* compute the exponential of w_h+w_l */
1293         j = k & 0x1f;
1294         *m = (k >> 5);
1295         t3 = (double) k;
1296 
1297         /* perform w - k*ln2_32 (represent as w_h - w_l) */
1298         t1 = w_h - t3 * ln2_32hi;
1299         t2 = t3 * ln2_32lo;
1300         w = w_l - t2;
1301         w_h = t1 + w_l;
1302         w_l = t2 - (w_l - (w_h - t1));
1303 
1304         /* compute exp(w_h+w_l) */
1305         z = w_h - w_l;
1306         z2 = z * z;
1307         t1 = z2 * (Et1 + z2 * (Et3 + z2 * Et5));
1308         t2 = z2 * (Et2 + z2 * Et4);
1309         t3 = w_h - (w_l - (t1 + z * t2));
1310         zz.l = S_trail[j] * (one + t3) + S[j] * t3;
1311         zz.h = S[j];
1312         return (zz);
1313 }
1314 
1315 /* INDENT OFF */
1316 /*
1317  * kpsin(x)= sin(pi*x)/pi
1318  *                 3        5        7        9        11        13        15
1319  *      = x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x +ks[4]*x  +ks[5]*x  +ks[6]*x
1320  */
1321 static const double ks[] = {
1322         -1.64493406684822640606569,
1323         +8.11742425283341655883668741874008920850698590621e-0001,
1324         -1.90751824120862873825597279118304943994042258291e-0001,
1325         +2.61478477632554278317289628332654539353521911570e-0002,
1326         -2.34607978510202710377617190278735525354347705866e-0003,
1327         +1.48413292290051695897242899977121846763824221705e-0004,
1328         -6.87730769637543488108688726777687262485357072242e-0006,
1329 };
1330 /* INDENT ON */
1331 
1332 /* assume x is not tiny and positive */
1333 static struct Double
1334 kpsin(double x) {

1335         double z, t1, t2, t3, t4;
1336         struct Double xx;
1337 
1338         z = x * x;
1339         xx.h = x;
1340         t1 = z * x;
1341         t2 = z * z;
1342         t4 = t1 * ks[0];
1343         t3 = (t1 * z) * ((ks[1] + z * ks[2] + t2 * ks[3]) + (z * t2) *
1344                 (ks[4] + z * ks[5] + t2 * ks[6]));
1345         xx.l = t4 + t3;
1346         return (xx);
1347 }
1348 
1349 /* INDENT OFF */
1350 /*
1351  * kpcos(x)= cos(pi*x)/pi
1352  *                     2        4        6        8        10        12
1353  *      = 1/pi +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x +kc[4]*x  +kc[5]*x
1354  */
1355 
1356 static const double one_pi_h = 0.318309886183790635705292970,
1357                 one_pi_l = 3.583247455607534006714276420e-17;
1358 static const double npi_2_h = -1.5625,
1359                 npi_2_l = -0.00829632679489661923132169163975055099555883223;
1360 static const double kc[] = {
1361         -1.57079632679489661923132169163975055099555883223e+0000,
1362         +1.29192819501230224953283586722575766189551966008e+0000,
1363         -4.25027339940149518500158850753393173519732149213e-0001,
1364         +7.49080625187015312373925142219429422375556727752e-0002,
1365         -8.21442040906099210866977352284054849051348692715e-0003,
1366         +6.10411356829515414575566564733632532333904115968e-0004,
1367 };
1368 /* INDENT ON */
1369 
1370 /* assume x is not tiny and positive */
1371 static struct Double
1372 kpcos(double x) {

1373         double z, t1, t2, t3, t4, x4, x8;
1374         struct Double xx;
1375 
1376         z = x * x;
1377         xx.h = one_pi_h;
1378         t1 = (double) ((float) x);
1379         x4 = z * z;
1380         t2 = npi_2_l * z + npi_2_h * (x + t1) * (x - t1);
1381         t3 = one_pi_l + x4 * ((kc[1] + z * kc[2]) + x4 * (kc[3] + z *
1382                 kc[4] + x4 * kc[5]));
1383         t4 = t1 * t1;   /* 48 bits mantissa */
1384         x8 = t2 + t3;
1385         t4 *= npi_2_h;  /* npi_2_h is 5 bits const. The product is exact */
1386         xx.l = x8 + t4; /* that will minimized the rounding error in xx.l */
1387         return (xx);
1388 }
1389 
1390 /* INDENT OFF */
1391 static const double
1392         /* 0.134861805732790769689793935774652917006 */
1393         t0z1   =  0.1348618057327907737708,
1394         t0z1_l = -4.0810077708578299022531e-18,
1395         /* 0.461632144968362341262659542325721328468 */
1396         t0z2   =  0.4616321449683623567850,
1397         t0z2_l = -1.5522348162858676890521e-17,
1398         /* 0.819773101100500601787868704921606996312 */
1399         t0z3   =  0.8197731011005006118708,
1400         t0z3_l = -1.0082945122487103498325e-17;
1401         /* 1.134861805732790769689793935774652917006 */
1402 /* INDENT ON */


1403 
1404 /* gamma(x+i) for 0 <= x < 1  */
1405 static struct Double
1406 gam_n(int i, double x) {
1407         struct Double rr = {0.0L, 0.0L}, yy;

1408         double r1, r2, t2, z, xh, xl, yh, yl, zh, z1, z2, zl, x5, wh, wl;
1409 
1410         /* compute yy = gamma(x+1) */
1411         if (x > 0.2845) {
1412                 if (x > 0.6374) {
1413                         r1 = x - t0z3;
1414                         r2 = (double) ((float) (r1 - t0z3_l));
1415                         t2 = r1 - r2;
1416                         yy = GT3(r2, t2 - t0z3_l);
1417                 } else {
1418                         r1 = x - t0z2;
1419                         r2 = (double) ((float) (r1 - t0z2_l));
1420                         t2 = r1 - r2;
1421                         yy = GT2(r2, t2 - t0z2_l);
1422                 }
1423         } else {
1424                 r1 = x - t0z1;
1425                 r2 = (double) ((float) (r1 - t0z1_l));
1426                 t2 = r1 - r2;
1427                 yy = GT1(r2, t2 - t0z1_l);
1428         }
1429 
1430         /* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0<i<8 */
1431         switch (i) {
1432         case 0:         /* yy/x */
1433                 r1 = one / x;
1434                 xh = (double) ((float) x);      /* x is not tiny */
1435                 rr.h = (double) ((float) ((yy.h + yy.l) * r1));
1436                 rr.l = r1 * (yy.h - rr.h * xh) -
1437                         ((r1 * rr.h) * (x - xh) - r1 * yy.l);
1438                 break;
1439         case 1:         /* yy */
1440                 rr.h = yy.h;
1441                 rr.l = yy.l;
1442                 break;
1443         case 2:         /* (x+1)*yy */
1444                 z = x + one;    /* may not be exact */
1445                 zh = (double) ((float) z);
1446                 rr.h = zh * yy.h;
1447                 rr.l = z * yy.l + (x - (zh - one)) * yy.h;
1448                 break;
1449         case 3:         /* (x+2)*(x+1)*yy */
1450                 z1 = x + one;
1451                 z2 = x + 2.0;
1452                 z = z1 * z2;
1453                 xh = (double) ((float) z);
1454                 zh = (double) ((float) z1);
1455                 xl = (x - (zh - one)) * (z2 + zh) - (xh - zh * (zh + one));
1456                 rr.h = xh * yy.h;
1457                 rr.l = z * yy.l + xl * yy.h;
1458                 break;
1459 
1460         case 4:         /* (x+1)*(x+3)*(x+2)*yy */
1461                 z1 = x + 2.0;
1462                 z2 = (x + one) * (x + 3.0);
1463                 zh = z1;
1464                 __LO(zh) = 0;
1465                 __HI(zh) &= 0xfffffff8;     /* zh 18 bits mantissa */
1466                 zl = x - (zh - 2.0);
1467                 z = z1 * z2;
1468                 xh = (double) ((float) z);
1469                 xl = zl * (z2 + zh * (z1 + zh)) - (xh - zh * (zh * zh - one));
1470                 rr.h = xh * yy.h;
1471                 rr.l = z * yy.l + xl * yy.h;
1472                 break;
1473         case 5:         /* ((x+1)*(x+4)*(x+2)*(x+3))*yy */
1474                 z1 = x + 2.0;
1475                 z2 = x + 3.0;
1476                 z = z1 * z2;
1477                 zh = (double) ((float) z1);
1478                 yh = (double) ((float) z);
1479                 yl = (x - (zh - 2.0)) * (z2 + zh) - (yh - zh * (zh + one));
1480                 z2 = z - 2.0;
1481                 z *= z2;
1482                 xh = (double) ((float) z);
1483                 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0));
1484                 rr.h = xh * yy.h;
1485                 rr.l = z * yy.l + xl * yy.h;
1486                 break;
1487         case 6:         /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */
1488                 z1 = x + 2.0;
1489                 z2 = x + 3.0;
1490                 z = z1 * z2;
1491                 zh = (double) ((float) z1);
1492                 yh = (double) ((float) z);
1493                 z1 = x - (zh - 2.0);
1494                 yl = z1 * (z2 + zh) - (yh - zh * (zh + one));
1495                 z2 = z - 2.0;
1496                 x5 = x + 5.0;
1497                 z *= z2;
1498                 xh = (double) ((float) z);
1499                 zh += 3.0;
1500                 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0));
1501                                                 /* xh+xl=(x+1)*...*(x+4) */
1502                 /* wh+wl=(x+5)*yy */
1503                 wh = (double) ((float) (x5 * (yy.h + yy.l)));



1504                 wl = (z1 * yy.h + x5 * yy.l) - (wh - zh * yy.h);
1505                 rr.h = wh * xh;
1506                 rr.l = z * wl + xl * wh;
1507                 break;
1508         case 7:         /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */
1509                 z1 = x + 3.0;
1510                 z2 = x + 4.0;
1511                 z = z2 * z1;
1512                 zh = (double) ((float) z1);
1513                 yh = (double) ((float) z);      /* yh+yl = (x+3)(x+4) */
1514                 yl = (x - (zh - 3.0)) * (z2 + zh) - (yh - (zh * (zh + one)));
1515                 z1 = x + 6.0;
1516                 z2 = z - 2.0;   /* z2 = (x+2)*(x+5) */
1517                 z *= z2;
1518                 xh = (double) ((float) z);
1519                 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0));
1520                                                 /* xh+xl=(x+2)*...*(x+5) */
1521                 /* wh+wl=(x+1)(x+6)*yy */



1522                 z2 -= 4.0;      /* z2 = (x+1)(x+6) */
1523                 wh = (double) ((float) (z2 * (yy.h + yy.l)));
1524                 wl = (z2 * yy.l + yl * yy.h) - (wh - (yh - 6.0) * yy.h);
1525                 rr.h = wh * xh;
1526                 rr.l = z * wl + xl * wh;
1527         }

1528         return (rr);
1529 }
1530 
1531 double
1532 tgamma(double x) {

1533         struct Double ss, ww;
1534         double t, t1, t2, t3, t4, t5, w, y, z, z1, z2, z3, z5;
1535         int i, j, k, m, ix, hx, xk;
1536         unsigned lx;
1537 
1538         hx = __HI(x);
1539         lx = __LO(x);
1540         ix = hx & 0x7fffffff;
1541         y = x;
1542 
1543         if (ix < 0x3ca00000)
1544                 return (one / x);       /* |x| < 2**-53 */

1545         if (ix >= 0x7ff00000)
1546                         /* +Inf -> +Inf, -Inf or NaN -> NaN */
1547                 return (x * ((hx < 0)? 0.0 : x));

1548         if (hx > 0x406573fa ||       /* x > 171.62... overflow to +inf */
1549             (hx == 0x406573fa && lx > 0xE561F647)) {
1550                 z = x / tiny;
1551                 return (z * z);
1552         }

1553         if (hx >= 0x40200000) {      /* x >= 8 */
1554                 ww = large_gam(x, &m);
1555                 w = ww.h + ww.l;
1556                 __HI(w) += m << 20;
1557                 return (w);
1558         }

1559         if (hx > 0) {                /* 0 < x < 8 */
1560                 i = (int) x;
1561                 ww = gam_n(i, x - (double) i);
1562                 return (ww.h + ww.l);
1563         }
1564 
1565         /* negative x */
1566         /* INDENT OFF */


1567         /*
1568          * compute: xk =
1569          *      -2 ... x is an even int (-inf is even)
1570          *      -1 ... x is an odd int
1571          *      +0 ... x is not an int but chopped to an even int
1572          *      +1 ... x is not an int but chopped to an odd int
1573          */
1574         /* INDENT ON */
1575         xk = 0;

1576         if (ix >= 0x43300000) {
1577                 if (ix >= 0x43400000)
1578                         xk = -2;
1579                 else
1580                         xk = -2 + (lx & 1);
1581         } else if (ix >= 0x3ff00000) {
1582                 k = (ix >> 20) - 0x3ff;

1583                 if (k > 20) {
1584                         j = lx >> (52 - k);

1585                         if ((j << (52 - k)) == lx)
1586                                 xk = -2 + (j & 1);
1587                         else
1588                                 xk = j & 1;
1589                 } else {
1590                         j = ix >> (20 - k);

1591                         if ((j << (20 - k)) == ix && lx == 0)
1592                                 xk = -2 + (j & 1);
1593                         else
1594                                 xk = j & 1;
1595                 }
1596         }

1597         if (xk < 0)
1598                 /* ideally gamma(-n)= (-1)**(n+1) * inf, but c99 expect NaN */
1599                 return ((x - x) / (x - x));             /* 0/0 = NaN */
1600 
1601 
1602         /* negative underflow thresold */
1603         if (ix > 0x4066e000 || (ix == 0x4066e000 && lx > 11)) {
1604                 /* x < -183.0 - 11ulp */
1605                 z = tiny / x;

1606                 if (xk == 1)
1607                         z = -z;

1608                 return (z * tiny);
1609         }
1610 
1611         /* now compute gamma(x) by  -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x */
1612 
1613         /*
1614          * First compute ss = -sin(pi*y)/pi , so that
1615          * gamma(x) = 1/(ss*gamma(1+y))
1616          */
1617         y = -x;
1618         j = (int) y;
1619         z = y - (double) j;
1620         if (z > 0.3183098861837906715377675)

1621                 if (z > 0.6816901138162093284622325)
1622                         ss = kpsin(one - z);
1623                 else
1624                         ss = kpcos(0.5 - z);
1625         else
1626                 ss = kpsin(z);


1627         if (xk == 0) {
1628                 ss.h = -ss.h;
1629                 ss.l = -ss.l;
1630         }
1631 
1632         /* Then compute ww = gamma(1+y), note that result scale to 2**m */
1633         m = 0;

1634         if (j < 7) {
1635                 ww = gam_n(j + 1, z);
1636         } else {
1637                 w = y + one;

1638                 if ((lx & 1) == 0) {        /* y+1 exact (note that y<184) */
1639                         ww = large_gam(w, &m);
1640                 } else {
1641                         t = w - one;

1642                         if (t == y) {   /* y+one exact */
1643                                 ww = large_gam(w, &m);
1644                         } else {        /* use y*gamma(y) */
1645                                 if (j == 7)
1646                                         ww = gam_n(j, z);
1647                                 else
1648                                         ww = large_gam(y, &m);

1649                                 t4 = ww.h + ww.l;
1650                                 t1 = (double) ((float) y);
1651                                 t2 = (double) ((float) t4);
1652                                                 /* t4 will not be too large */
1653                                 ww.l = y * (ww.l - (t2 - ww.h)) + (y - t1) * t2;
1654                                 ww.h = t1 * t2;
1655                         }
1656                 }
1657         }
1658 
1659         /* compute 1/(ss*ww) */
1660         t3 = ss.h + ss.l;
1661         t4 = ww.h + ww.l;
1662         t1 = (double) ((float) t3);
1663         t2 = (double) ((float) t4);
1664         z1 = ss.l - (t1 - ss.h);        /* (t1,z1) = ss */
1665         z2 = ww.l - (t2 - ww.h);        /* (t2,z2) = ww */
1666         t3 = t3 * t4;                   /* t3 = ss*ww */
1667         z3 = one / t3;                  /* z3 = 1/(ss*ww) */
1668         t5 = t1 * t2;
1669         z5 = z1 * t4 + t1 * z2;         /* (t5,z5) = ss*ww */
1670         t1 = (double) ((float) t3);     /* (t1,z1) = ss*ww */
1671         z1 = z5 - (t1 - t5);
1672         t2 = (double) ((float) z3);     /* leading 1/(ss*ww) */
1673         z2 = z3 * (t2 * z1 - (one - t2 * t1));
1674         z = t2 - z2;
1675 
1676         /* check whether z*2**-m underflow */
1677         if (m != 0) {
1678                 hx = __HI(z);
1679                 i = hx & 0x80000000;
1680                 ix = hx ^ i;
1681                 j = ix >> 20;

1682                 if (j > m) {
1683                         ix -= m << 20;
1684                         __HI(z) = ix ^ i;
1685                 } else if ((m - j) > 52) {
1686                         /* underflow */
1687                         if (xk == 0)
1688                                 z = -tiny * tiny;
1689                         else
1690                                 z = tiny * tiny;
1691                 } else {
1692                         /* subnormal */
1693                         m -= 60;
1694                         t = one;
1695                         __HI(t) -= 60 << 20;
1696                         ix -= m << 20;
1697                         __HI(z) = ix ^ i;
1698                         z *= t;
1699                 }
1700         }

1701         return (z);
1702 }


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  28  * Use is subject to license terms.
  29  */
  30 
  31 #pragma weak __tgamma = tgamma
  32 
  33 /* BEGIN CSTYLED */
  34 /*
  35  * True gamma function
  36  * double tgamma(double x)
  37  *
  38  * Error:
  39  * ------
  40  *      Less that one ulp for both positive and negative arguments.
  41  *
  42  * Algorithm:
  43  * ---------
  44  *      A: For negative argument
  45  *              (1) gamma(-n or -inf) is NaN
  46  *              (2) Underflow Threshold
  47  *              (3) Reduction to gamma(1+x)
  48  *      B: For x between 1 and 2
  49  *      C: For x between 0 and 1
  50  *      D: For x between 2 and 8
  51  *      E: Overflow thresold {see over.c}
  52  *      F: For overflow_threshold >= x >= 8
  53  *


 731  *        GP2 =   7.77830853479775281781085278324621033523037489883e-0004
 732  *
 733  *
 734  *      Implementation note:
 735  *      z = (1/x), z2 = z*z, z4 = z2*z2;
 736  *      p = z*(GP0+z2*(GP1+....+z2*GP7))
 737  *        = z*(GP0+(z4*(GP2+z4*(GP4+z4*GP6))+z2*(GP1+z4*(GP3+z4*(GP5+z4*GP7)))))
 738  *
 739  *   Adding everything up:
 740  *      t = rr.h*ww.h+hln2pi_h                  ... exact
 741  *      w = (hln2pi_l + ((x-0.5)*ww.l+rr.l*ww.h)) + p
 742  *
 743  *   Computing exp(t+w):
 744  *      s = t+w; write s = (n+j/32)*ln2+r, |r|<=(1/64)*ln2, then
 745  *      exp(s) = 2**n * (2**(j/32) + 2**(j/32)*expm1(r)), where
 746  *      expm1(r) = r + Et1*r^2 + Et2*r^3 + ... + Et5*r^6, and
 747  *      2**(j/32) is obtained by table look-up S[j]+S_trail[j].
 748  *      Remez error bound:
 749  *      |exp(r) - (1+r+Et1*r^2+...+Et5*r^6)| <= 2^(-63).
 750  */
 751 /* END CSTYLED */
 752 
 753 #include "libm.h"
 754 
 755 #define __HI(x)         ((int *)&x)[HIWORD]
 756 #define __LO(x)         ((unsigned *)&x)[LOWORD]
 757 
 758 struct Double {
 759         double h;
 760         double l;
 761 };
 762 
 763 /* Hex value of GP0 shoule be 3FB55555 55555555 */
 764 static const double c[] = {
 765         +1.0,
 766         +2.0,
 767         +0.5,
 768         +1.0e-300,
 769         +6.66666666666666740682e-01,    /* A1=T3[0] */
 770         +3.99999999955626478023093908674902212920e-01,  /* A2=T3[1] */
 771         +2.85720221533145659809237398709372330980e-01,  /* A3=T3[2] */
 772         +0.0833333333333333287074040640618477,          /* GP[0] */
 773         -2.77777777776649355200565611114627670089130772843e-03,
 774         +7.93650787486083724805476194170211775784158551509e-04,
 775         -5.95236628558314928757811419580281294593903582971e-04,
 776         +8.41566473999853451983137162780427812781178932540e-04,


1118 #define P23             cr[14]
1119 #define Q20             cr[15]
1120 #define Q21             cr[16]
1121 #define Q22             cr[17]
1122 #define Q23             cr[18]
1123 #define Q24             cr[19]
1124 #define Q25             cr[20]
1125 #define Q26             cr[21]
1126 #define P30             cr[22]
1127 #define P31             cr[23]
1128 #define P32             cr[24]
1129 #define P33             cr[25]
1130 #define P34             cr[26]
1131 #define Q30             cr[27]
1132 #define Q31             cr[28]
1133 #define Q32             cr[29]
1134 #define Q33             cr[30]
1135 #define Q34             cr[31]
1136 #define Q35             cr[32]
1137 
1138 static const double GZ1_h = +0.938204627909682398190,

1139         GZ1_l = +5.121952600248205157935e-17,
1140         GZ2_h = +0.885603194410888749921,
1141         GZ2_l = -4.964236872556339810692e-17,
1142         GZ3_h = +0.936781411463652347038,
1143         GZ3_l = -2.541923110834479415023e-17,
1144         TZ1 = -0.3517214357852935791015625,
1145         TZ3 = +0.280530631542205810546875;

1146 
1147 /*
1148  * compute gamma(y=yh+yl) for y in GT1 = [1.0000, 1.2845]
1149  * assume yh got 20 significant bits
1150  */
1151 static struct Double
1152 GT1(double yh, double yl)
1153 {
1154         double t3, t4, y, z;
1155         struct Double r;
1156 
1157         y = yh + yl;
1158         z = y * y;
1159         t3 = (z * (P10 + y * ((P11 + y * P12) + z * (P13 + y * P14)))) / (Q10 +
1160             y * ((Q11 + y * Q12) + z * ((Q13 + Q14 * y) + z * Q15)));
1161         t3 += (TZ1 * yl + GZ1_l);
1162         t4 = TZ1 * yh;
1163         r.h = (double)((float)(t4 + GZ1_h + t3));
1164         t3 += (t4 - (r.h - GZ1_h));
1165         r.l = t3;
1166         return (r);
1167 }
1168 
1169 /*
1170  * compute gamma(y=yh+yl) for y in GT2 = [1.2844, 1.6374]
1171  * assume yh got 20 significant bits
1172  */
1173 static struct Double
1174 GT2(double yh, double yl)
1175 {
1176         double t3, y, z;
1177         struct Double r;
1178 
1179         y = yh + yl;
1180         z = y * y;
1181         t3 = (z * (P20 + y * P21 + z * (P22 + y * P23))) / (Q20 + (y * ((Q21 +
1182             Q22 * y) + z * Q23) + (z * z) * ((Q24 + Q25 * y) + z * Q26))) +
1183             GZ2_l;
1184         r.h = (double)((float)(GZ2_h + t3));
1185         r.l = t3 - (r.h - GZ2_h);
1186         return (r);
1187 }
1188 
1189 /*
1190  * compute gamma(y=yh+yl) for y in GT3 = [1.6373, 2.0000]
1191  * assume yh got 20 significant bits
1192  */
1193 static struct Double
1194 GT3(double yh, double yl)
1195 {
1196         double t3, t4, y, z;
1197         struct Double r;
1198 
1199         y = yh + yl;
1200         z = y * y;
1201         t3 = (z * (P30 + y * ((P31 + y * P32) + z * (P33 + y * P34)))) / (Q30 +
1202             y * ((Q31 + y * Q32) + z * ((Q33 + Q34 * y) + z * Q35)));
1203         t3 += (TZ3 * yl + GZ3_l);
1204         t4 = TZ3 * yh;
1205         r.h = (double)((float)(t4 + GZ3_h + t3));
1206         t3 += (t4 - (r.h - GZ3_h));
1207         r.l = t3;
1208         return (r);
1209 }
1210 
1211 
1212 /*
1213  * return tgamma(x) scaled by 2**-m for 8<x<=171.62... using Stirling's formula
1214  *     log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x))
1215  *                = L1 + L2 + L3,
1216  */

1217 static struct Double
1218 large_gam(double x, int *m)
1219 {
1220         double z, t1, t2, t3, z2, t5, w, y, u, r, z4, v, t24 = 16777216.0, p24 =
1221             1.0 / 16777216.0;
1222         int n2, j2, k, ix, j;
1223         unsigned lx;
1224         struct Double zz;
1225         double u2, ss_h, ss_l, r_h, w_h, w_l, t4;
1226 
1227 
1228 /*
1229  * compute ss = ss.h+ss.l = log(x)-1 (see tgamma_log.h for details)
1230  *
1231  *  log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y,  1<=y<2,
1232  *  j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and
1233  *       T1(n) = T1[2n,2n+1] = n*log(2)-1,
1234  *       T2(j) = T2[2j,2j+1] = log(z[j]),
1235  *       T3(s) = 2s + A1[0]s^3 + A2[1]s^5 + A3[2]s^7
1236  *  Note
1237  *  (1) the leading entries are truncated to 24 binary point.
1238  *  (2) Remez error for T3(s) is bounded by 2**(-72.4)
1239  *                                   2**(-24)
1240  *                           _________V___________________
1241  *               T1(n):     |_________|___________________|
1242  *                             _______ ______________________
1243  *               T2(j):       |_______|______________________|
1244  *                                ____ _______________________
1245  *               2s:             |____|_______________________|
1246  *                                    __________________________
1247  *          +    T3(s)-2s:           |__________________________|
1248  *                       -------------------------------------------
1249  *                          [leading] + [Trailing]
1250  */

1251         ix = __HI(x);
1252         lx = __LO(x);
1253         n2 = (ix >> 20) - 0x3ff;          /* exponent of x, range:3-7 */
1254         n2 += n2;                               /* 2n */
1255         ix = (ix & 0x000fffff) | 0x3ff00000;        /* y = scale x to [1,2] */
1256         __HI(y) = ix;
1257         __LO(y) = lx;
1258         __HI(z) = (ix & 0xffffc000) | 0x2000;       /* z[j]=1+j/64+1/128 */
1259         __LO(z) = 0;
1260         j2 = (ix >> 13) & 0x7e;                       /* 2j */
1261         t1 = y + z;
1262         t2 = y - z;
1263         r = one / t1;
1264         t1 = (double)((float)t1);
1265         u = r * t2;                     /* u = (y-z)/(y+z) */
1266         t4 = T2[j2 + 1] + T1[n2 + 1];
1267         z2 = u * u;
1268         k = __HI(u) & 0x7fffffff;
1269         t3 = T2[j2] + T1[n2];
1270 
1271         if ((k >> 20) < 0x3ec) {       /* |u|<2**-19 */
1272                 t2 = t4 + u * ((two + z2 * A1) + (z2 * z2) * (A2 + z2 * A3));
1273         } else {
1274                 t5 = t4 + u * (z2 * A1 + (z2 * z2) * (A2 + z2 * A3));
1275                 u2 = u + u;
1276                 v = (double)((int)(u2 * t24)) * p24;
1277                 t2 = t5 + r * ((two * t2 - v * t1) - v * (y - (t1 - z)));
1278                 t3 += v;
1279         }
1280 
1281         ss_h = (double)((float)(t2 + t3));
1282         ss_l = t2 - (ss_h - t3);
1283 
1284         /*
1285          * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2)))
1286          * where ss = log(x) - 1 in already in extra precision
1287          */
1288         z = one / x;
1289         r = x - half;
1290         r_h = (double)((float)r);
1291         w_h = r_h * ss_h + hln2pi_h;
1292         z2 = z * z;
1293         w = (r - r_h) * ss_h + r * ss_l;
1294         z4 = z2 * z2;
1295         t1 = z2 * (GP1 + z4 * (GP3 + z4 * (GP5 + z4 * GP7)));
1296         t2 = z4 * (GP2 + z4 * (GP4 + z4 * GP6));
1297         t1 += t2;
1298         w += hln2pi_l;
1299         w_l = z * (GP0 + t1) + w;
1300         k = (int)((w_h + w_l) * invln2_32 + half);
1301 
1302         /* compute the exponential of w_h+w_l */
1303         j = k & 0x1f;
1304         *m = (k >> 5);
1305         t3 = (double)k;
1306 
1307         /* perform w - k*ln2_32 (represent as w_h - w_l) */
1308         t1 = w_h - t3 * ln2_32hi;
1309         t2 = t3 * ln2_32lo;
1310         w = w_l - t2;
1311         w_h = t1 + w_l;
1312         w_l = t2 - (w_l - (w_h - t1));
1313 
1314         /* compute exp(w_h+w_l) */
1315         z = w_h - w_l;
1316         z2 = z * z;
1317         t1 = z2 * (Et1 + z2 * (Et3 + z2 * Et5));
1318         t2 = z2 * (Et2 + z2 * Et4);
1319         t3 = w_h - (w_l - (t1 + z * t2));
1320         zz.l = S_trail[j] * (one + t3) + S[j] * t3;
1321         zz.h = S[j];
1322         return (zz);
1323 }
1324 
1325 
1326 /*
1327  * kpsin(x)= sin(pi*x)/pi
1328  *                 3        5        7        9        11        13        15
1329  *      = x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x +ks[4]*x  +ks[5]*x  +ks[6]*x
1330  */
1331 static const double ks[] = {
1332         -1.64493406684822640606569,
1333         +8.11742425283341655883668741874008920850698590621e-0001,
1334         -1.90751824120862873825597279118304943994042258291e-0001,
1335         +2.61478477632554278317289628332654539353521911570e-0002,
1336         -2.34607978510202710377617190278735525354347705866e-0003,
1337         +1.48413292290051695897242899977121846763824221705e-0004,
1338         -6.87730769637543488108688726777687262485357072242e-0006,
1339 };

1340 
1341 /* assume x is not tiny and positive */
1342 static struct Double
1343 kpsin(double x)
1344 {
1345         double z, t1, t2, t3, t4;
1346         struct Double xx;
1347 
1348         z = x * x;
1349         xx.h = x;
1350         t1 = z * x;
1351         t2 = z * z;
1352         t4 = t1 * ks[0];
1353         t3 = (t1 * z) * ((ks[1] + z * ks[2] + t2 * ks[3]) + (z * t2) * (ks[4] +
1354             z * ks[5] + t2 * ks[6]));
1355         xx.l = t4 + t3;
1356         return (xx);
1357 }
1358 
1359 
1360 /*
1361  * kpcos(x)= cos(pi*x)/pi
1362  *                     2        4        6        8        10        12
1363  *      = 1/pi +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x +kc[4]*x  +kc[5]*x
1364  */
1365 
1366 static const double one_pi_h = 0.318309886183790635705292970,
1367         one_pi_l = 3.583247455607534006714276420e-17;
1368 static const double npi_2_h = -1.5625,
1369         npi_2_l = -0.00829632679489661923132169163975055099555883223;
1370 static const double kc[] = {
1371         -1.57079632679489661923132169163975055099555883223e+0000,
1372         +1.29192819501230224953283586722575766189551966008e+0000,
1373         -4.25027339940149518500158850753393173519732149213e-0001,
1374         +7.49080625187015312373925142219429422375556727752e-0002,
1375         -8.21442040906099210866977352284054849051348692715e-0003,
1376         +6.10411356829515414575566564733632532333904115968e-0004,
1377 };

1378 
1379 /* assume x is not tiny and positive */
1380 static struct Double
1381 kpcos(double x)
1382 {
1383         double z, t1, t2, t3, t4, x4, x8;
1384         struct Double xx;
1385 
1386         z = x * x;
1387         xx.h = one_pi_h;
1388         t1 = (double)((float)x);
1389         x4 = z * z;
1390         t2 = npi_2_l * z + npi_2_h * (x + t1) * (x - t1);
1391         t3 = one_pi_l + x4 * ((kc[1] + z * kc[2]) + x4 * (kc[3] + z * kc[4] +
1392             x4 * kc[5]));
1393         t4 = t1 * t1;           /* 48 bits mantissa */
1394         x8 = t2 + t3;
1395         t4 *= npi_2_h;    /* npi_2_h is 5 bits const. The product is exact */
1396         xx.l = x8 + t4;   /* that will minimized the rounding error in xx.l */
1397         return (xx);
1398 }
1399 

1400 static const double
1401 /* 0.134861805732790769689793935774652917006 */
1402         t0z1 = 0.1348618057327907737708,
1403         t0z1_l = -4.0810077708578299022531e-18,
1404 /* 0.461632144968362341262659542325721328468 */
1405         t0z2 = 0.4616321449683623567850,
1406         t0z2_l = -1.5522348162858676890521e-17,
1407 /* 0.819773101100500601787868704921606996312 */
1408         t0z3 = 0.8197731011005006118708,
1409         t0z3_l = -1.0082945122487103498325e-17;
1410 
1411 /*
1412  * 1.134861805732790769689793935774652917006
1413  */
1414 
1415 /* gamma(x+i) for 0 <= x < 1  */
1416 static struct Double
1417 gam_n(int i, double x)
1418 {
1419         struct Double rr = { 0.0L, 0.0L }, yy;
1420         double r1, r2, t2, z, xh, xl, yh, yl, zh, z1, z2, zl, x5, wh, wl;
1421 
1422         /* compute yy = gamma(x+1) */
1423         if (x > 0.2845) {
1424                 if (x > 0.6374) {
1425                         r1 = x - t0z3;
1426                         r2 = (double)((float)(r1 - t0z3_l));
1427                         t2 = r1 - r2;
1428                         yy = GT3(r2, t2 - t0z3_l);
1429                 } else {
1430                         r1 = x - t0z2;
1431                         r2 = (double)((float)(r1 - t0z2_l));
1432                         t2 = r1 - r2;
1433                         yy = GT2(r2, t2 - t0z2_l);
1434                 }
1435         } else {
1436                 r1 = x - t0z1;
1437                 r2 = (double)((float)(r1 - t0z1_l));
1438                 t2 = r1 - r2;
1439                 yy = GT1(r2, t2 - t0z1_l);
1440         }
1441 
1442         /* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0<i<8 */
1443         switch (i) {
1444         case 0:                                 /* yy/x */
1445                 r1 = one / x;
1446                 xh = (double)((float)x);        /* x is not tiny */
1447                 rr.h = (double)((float)((yy.h + yy.l) * r1));
1448                 rr.l = r1 * (yy.h - rr.h * xh) - ((r1 * rr.h) * (x - xh) - r1 *
1449                     yy.l);
1450                 break;
1451         case 1:                         /* yy */
1452                 rr.h = yy.h;
1453                 rr.l = yy.l;
1454                 break;
1455         case 2:                         /* (x+1)*yy */
1456                 z = x + one;            /* may not be exact */
1457                 zh = (double)((float)z);
1458                 rr.h = zh * yy.h;
1459                 rr.l = z * yy.l + (x - (zh - one)) * yy.h;
1460                 break;
1461         case 3:                         /* (x+2)*(x+1)*yy */
1462                 z1 = x + one;
1463                 z2 = x + 2.0;
1464                 z = z1 * z2;
1465                 xh = (double)((float)z);
1466                 zh = (double)((float)z1);
1467                 xl = (x - (zh - one)) * (z2 + zh) - (xh - zh * (zh + one));
1468                 rr.h = xh * yy.h;
1469                 rr.l = z * yy.l + xl * yy.h;
1470                 break;
1471 
1472         case 4:                         /* (x+1)*(x+3)*(x+2)*yy */
1473                 z1 = x + 2.0;
1474                 z2 = (x + one) * (x + 3.0);
1475                 zh = z1;
1476                 __LO(zh) = 0;
1477                 __HI(zh) &= 0xfffffff8;     /* zh 18 bits mantissa */
1478                 zl = x - (zh - 2.0);
1479                 z = z1 * z2;
1480                 xh = (double)((float)z);
1481                 xl = zl * (z2 + zh * (z1 + zh)) - (xh - zh * (zh * zh - one));
1482                 rr.h = xh * yy.h;
1483                 rr.l = z * yy.l + xl * yy.h;
1484                 break;
1485         case 5:                         /* ((x+1)*(x+4)*(x+2)*(x+3))*yy */
1486                 z1 = x + 2.0;
1487                 z2 = x + 3.0;
1488                 z = z1 * z2;
1489                 zh = (double)((float)z1);
1490                 yh = (double)((float)z);
1491                 yl = (x - (zh - 2.0)) * (z2 + zh) - (yh - zh * (zh + one));
1492                 z2 = z - 2.0;
1493                 z *= z2;
1494                 xh = (double)((float)z);
1495                 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0));
1496                 rr.h = xh * yy.h;
1497                 rr.l = z * yy.l + xl * yy.h;
1498                 break;
1499         case 6:                         /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */
1500                 z1 = x + 2.0;
1501                 z2 = x + 3.0;
1502                 z = z1 * z2;
1503                 zh = (double)((float)z1);
1504                 yh = (double)((float)z);
1505                 z1 = x - (zh - 2.0);
1506                 yl = z1 * (z2 + zh) - (yh - zh * (zh + one));
1507                 z2 = z - 2.0;
1508                 x5 = x + 5.0;
1509                 z *= z2;
1510                 xh = (double)((float)z);
1511                 zh += 3.0;
1512                 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0));
1513 
1514                 /*
1515                  * xh+xl=(x+1)*...*(x+4)
1516                  * wh+wl=(x+5)*yy
1517                  */
1518                 wh = (double)((float)(x5 * (yy.h + yy.l)));
1519                 wl = (z1 * yy.h + x5 * yy.l) - (wh - zh * yy.h);
1520                 rr.h = wh * xh;
1521                 rr.l = z * wl + xl * wh;
1522                 break;
1523         case 7:                 /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */
1524                 z1 = x + 3.0;
1525                 z2 = x + 4.0;
1526                 z = z2 * z1;
1527                 zh = (double)((float)z1);
1528                 yh = (double)((float)z);        /* yh+yl = (x+3)(x+4) */
1529                 yl = (x - (zh - 3.0)) * (z2 + zh) - (yh - (zh * (zh + one)));
1530                 z1 = x + 6.0;
1531                 z2 = z - 2.0;                   /* z2 = (x+2)*(x+5) */
1532                 z *= z2;
1533                 xh = (double)((float)z);
1534                 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0));
1535 
1536                 /*
1537                  * xh+xl=(x+2)*...*(x+5)
1538                  * wh+wl=(x+1)(x+6)*yy
1539                  */
1540                 z2 -= 4.0;              /* z2 = (x+1)(x+6) */
1541                 wh = (double)((float)(z2 * (yy.h + yy.l)));
1542                 wl = (z2 * yy.l + yl * yy.h) - (wh - (yh - 6.0) * yy.h);
1543                 rr.h = wh * xh;
1544                 rr.l = z * wl + xl * wh;
1545         }
1546 
1547         return (rr);
1548 }
1549 
1550 double
1551 tgamma(double x)
1552 {
1553         struct Double ss, ww;
1554         double t, t1, t2, t3, t4, t5, w, y, z, z1, z2, z3, z5;
1555         int i, j, k, m, ix, hx, xk;
1556         unsigned lx;
1557 
1558         hx = __HI(x);
1559         lx = __LO(x);
1560         ix = hx & 0x7fffffff;
1561         y = x;
1562 
1563         if (ix < 0x3ca00000)
1564                 return (one / x);       /* |x| < 2**-53 */
1565 
1566         if (ix >= 0x7ff00000)
1567                 /* +Inf -> +Inf, -Inf or NaN -> NaN */
1568                 return (x * ((hx < 0) ? 0.0 : x));
1569 
1570         if (hx > 0x406573fa ||               /* x > 171.62... overflow to +inf */
1571             (hx == 0x406573fa && lx > 0xE561F647)) {
1572                 z = x / tiny;
1573                 return (z * z);
1574         }
1575 
1576         if (hx >= 0x40200000) {              /* x >= 8 */
1577                 ww = large_gam(x, &m);
1578                 w = ww.h + ww.l;
1579                 __HI(w) += m << 20;
1580                 return (w);
1581         }
1582 
1583         if (hx > 0) {                        /* 0 < x < 8 */
1584                 i = (int)x;
1585                 ww = gam_n(i, x - (double)i);
1586                 return (ww.h + ww.l);
1587         }
1588 
1589         /*
1590          * negative x
1591          */
1592 
1593         /*
1594          * compute: xk =
1595          *      -2 ... x is an even int (-inf is even)
1596          *      -1 ... x is an odd int
1597          *      +0 ... x is not an int but chopped to an even int
1598          *      +1 ... x is not an int but chopped to an odd int
1599          */

1600         xk = 0;
1601 
1602         if (ix >= 0x43300000) {
1603                 if (ix >= 0x43400000)
1604                         xk = -2;
1605                 else
1606                         xk = -2 + (lx & 1);
1607         } else if (ix >= 0x3ff00000) {
1608                 k = (ix >> 20) - 0x3ff;
1609 
1610                 if (k > 20) {
1611                         j = lx >> (52 - k);
1612 
1613                         if ((j << (52 - k)) == lx)
1614                                 xk = -2 + (j & 1);
1615                         else
1616                                 xk = j & 1;
1617                 } else {
1618                         j = ix >> (20 - k);
1619 
1620                         if ((j << (20 - k)) == ix && lx == 0)
1621                                 xk = -2 + (j & 1);
1622                         else
1623                                 xk = j & 1;
1624                 }
1625         }
1626 
1627         if (xk < 0)
1628                 /* ideally gamma(-n)= (-1)**(n+1) * inf, but c99 expect NaN */
1629                 return ((x - x) / (x - x));     /* 0/0 = NaN */
1630 

1631         /* negative underflow thresold */
1632         if (ix > 0x4066e000 || (ix == 0x4066e000 && lx > 11)) {
1633                 /* x < -183.0 - 11ulp */
1634                 z = tiny / x;
1635 
1636                 if (xk == 1)
1637                         z = -z;
1638 
1639                 return (z * tiny);
1640         }
1641 
1642         /* now compute gamma(x) by  -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x */
1643 
1644         /*
1645          * First compute ss = -sin(pi*y)/pi , so that
1646          * gamma(x) = 1/(ss*gamma(1+y))
1647          */
1648         y = -x;
1649         j = (int)y;
1650         z = y - (double)j;
1651 
1652         if (z > 0.3183098861837906715377675) {
1653                 if (z > 0.6816901138162093284622325)
1654                         ss = kpsin(one - z);
1655                 else
1656                         ss = kpcos(0.5 - z);
1657         } else {
1658                 ss = kpsin(z);
1659         }
1660 
1661         if (xk == 0) {
1662                 ss.h = -ss.h;
1663                 ss.l = -ss.l;
1664         }
1665 
1666         /* Then compute ww = gamma(1+y), note that result scale to 2**m */
1667         m = 0;
1668 
1669         if (j < 7) {
1670                 ww = gam_n(j + 1, z);
1671         } else {
1672                 w = y + one;
1673 
1674                 if ((lx & 1) == 0) {        /* y+1 exact (note that y<184) */
1675                         ww = large_gam(w, &m);
1676                 } else {
1677                         t = w - one;
1678 
1679                         if (t == y) {   /* y+one exact */
1680                                 ww = large_gam(w, &m);
1681                         } else {        /* use y*gamma(y) */
1682                                 if (j == 7)
1683                                         ww = gam_n(j, z);
1684                                 else
1685                                         ww = large_gam(y, &m);
1686 
1687                                 t4 = ww.h + ww.l;
1688                                 t1 = (double)((float)y);
1689                                 t2 = (double)((float)t4);
1690                                 /* t4 will not be too large */
1691                                 ww.l = y * (ww.l - (t2 - ww.h)) + (y - t1) * t2;
1692                                 ww.h = t1 * t2;
1693                         }
1694                 }
1695         }
1696 
1697         /* compute 1/(ss*ww) */
1698         t3 = ss.h + ss.l;
1699         t4 = ww.h + ww.l;
1700         t1 = (double)((float)t3);
1701         t2 = (double)((float)t4);
1702         z1 = ss.l - (t1 - ss.h);        /* (t1,z1) = ss */
1703         z2 = ww.l - (t2 - ww.h);        /* (t2,z2) = ww */
1704         t3 = t3 * t4;                   /* t3 = ss*ww */
1705         z3 = one / t3;                  /* z3 = 1/(ss*ww) */
1706         t5 = t1 * t2;
1707         z5 = z1 * t4 + t1 * z2;         /* (t5,z5) = ss*ww */
1708         t1 = (double)((float)t3);       /* (t1,z1) = ss*ww */
1709         z1 = z5 - (t1 - t5);
1710         t2 = (double)((float)z3);       /* leading 1/(ss*ww) */
1711         z2 = z3 * (t2 * z1 - (one - t2 * t1));
1712         z = t2 - z2;
1713 
1714         /* check whether z*2**-m underflow */
1715         if (m != 0) {
1716                 hx = __HI(z);
1717                 i = hx & 0x80000000;
1718                 ix = hx ^ i;
1719                 j = ix >> 20;
1720 
1721                 if (j > m) {
1722                         ix -= m << 20;
1723                         __HI(z) = ix ^ i;
1724                 } else if ((m - j) > 52) {
1725                         /* underflow */
1726                         if (xk == 0)
1727                                 z = -tiny * tiny;
1728                         else
1729                                 z = tiny * tiny;
1730                 } else {
1731                         /* subnormal */
1732                         m -= 60;
1733                         t = one;
1734                         __HI(t) -= 60 << 20;
1735                         ix -= m << 20;
1736                         __HI(z) = ix ^ i;
1737                         z *= t;
1738                 }
1739         }
1740 
1741         return (z);
1742 }