5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #pragma weak __tgamma = tgamma
31
32 /* INDENT OFF */
33 /*
34 * True gamma function
35 * double tgamma(double x)
36 *
37 * Error:
38 * ------
39 * Less that one ulp for both positive and negative arguments.
40 *
41 * Algorithm:
42 * ---------
43 * A: For negative argument
44 * (1) gamma(-n or -inf) is NaN
45 * (2) Underflow Threshold
46 * (3) Reduction to gamma(1+x)
47 * B: For x between 1 and 2
48 * C: For x between 0 and 1
49 * D: For x between 2 and 8
50 * E: Overflow thresold {see over.c}
51 * F: For overflow_threshold >= x >= 8
52 *
730 * GP2 = 7.77830853479775281781085278324621033523037489883e-0004
731 *
732 *
733 * Implementation note:
734 * z = (1/x), z2 = z*z, z4 = z2*z2;
735 * p = z*(GP0+z2*(GP1+....+z2*GP7))
736 * = z*(GP0+(z4*(GP2+z4*(GP4+z4*GP6))+z2*(GP1+z4*(GP3+z4*(GP5+z4*GP7)))))
737 *
738 * Adding everything up:
739 * t = rr.h*ww.h+hln2pi_h ... exact
740 * w = (hln2pi_l + ((x-0.5)*ww.l+rr.l*ww.h)) + p
741 *
742 * Computing exp(t+w):
743 * s = t+w; write s = (n+j/32)*ln2+r, |r|<=(1/64)*ln2, then
744 * exp(s) = 2**n * (2**(j/32) + 2**(j/32)*expm1(r)), where
745 * expm1(r) = r + Et1*r^2 + Et2*r^3 + ... + Et5*r^6, and
746 * 2**(j/32) is obtained by table look-up S[j]+S_trail[j].
747 * Remez error bound:
748 * |exp(r) - (1+r+Et1*r^2+...+Et5*r^6)| <= 2^(-63).
749 */
750
751 #include "libm.h"
752
753 #define __HI(x) ((int *) &x)[HIWORD]
754 #define __LO(x) ((unsigned *) &x)[LOWORD]
755
756 struct Double {
757 double h;
758 double l;
759 };
760
761 /* Hex value of GP0 shoule be 3FB55555 55555555 */
762 static const double c[] = {
763 +1.0,
764 +2.0,
765 +0.5,
766 +1.0e-300,
767 +6.66666666666666740682e-01, /* A1=T3[0] */
768 +3.99999999955626478023093908674902212920e-01, /* A2=T3[1] */
769 +2.85720221533145659809237398709372330980e-01, /* A3=T3[2] */
770 +0.0833333333333333287074040640618477, /* GP[0] */
771 -2.77777777776649355200565611114627670089130772843e-03,
772 +7.93650787486083724805476194170211775784158551509e-04,
773 -5.95236628558314928757811419580281294593903582971e-04,
774 +8.41566473999853451983137162780427812781178932540e-04,
1116 #define P23 cr[14]
1117 #define Q20 cr[15]
1118 #define Q21 cr[16]
1119 #define Q22 cr[17]
1120 #define Q23 cr[18]
1121 #define Q24 cr[19]
1122 #define Q25 cr[20]
1123 #define Q26 cr[21]
1124 #define P30 cr[22]
1125 #define P31 cr[23]
1126 #define P32 cr[24]
1127 #define P33 cr[25]
1128 #define P34 cr[26]
1129 #define Q30 cr[27]
1130 #define Q31 cr[28]
1131 #define Q32 cr[29]
1132 #define Q33 cr[30]
1133 #define Q34 cr[31]
1134 #define Q35 cr[32]
1135
1136 static const double
1137 GZ1_h = +0.938204627909682398190,
1138 GZ1_l = +5.121952600248205157935e-17,
1139 GZ2_h = +0.885603194410888749921,
1140 GZ2_l = -4.964236872556339810692e-17,
1141 GZ3_h = +0.936781411463652347038,
1142 GZ3_l = -2.541923110834479415023e-17,
1143 TZ1 = -0.3517214357852935791015625,
1144 TZ3 = +0.280530631542205810546875;
1145 /* INDENT ON */
1146
1147 /* compute gamma(y=yh+yl) for y in GT1 = [1.0000, 1.2845] */
1148 /* assume yh got 20 significant bits */
1149 static struct Double
1150 GT1(double yh, double yl) {
1151 double t3, t4, y, z;
1152 struct Double r;
1153
1154 y = yh + yl;
1155 z = y * y;
1156 t3 = (z * (P10 + y * ((P11 + y * P12) + z * (P13 + y * P14)))) /
1157 (Q10 + y * ((Q11 + y * Q12) + z * ((Q13 + Q14 * y) + z * Q15)));
1158 t3 += (TZ1 * yl + GZ1_l);
1159 t4 = TZ1 * yh;
1160 r.h = (double) ((float) (t4 + GZ1_h + t3));
1161 t3 += (t4 - (r.h - GZ1_h));
1162 r.l = t3;
1163 return (r);
1164 }
1165
1166 /* compute gamma(y=yh+yl) for y in GT2 = [1.2844, 1.6374] */
1167 /* assume yh got 20 significant bits */
1168 static struct Double
1169 GT2(double yh, double yl) {
1170 double t3, y, z;
1171 struct Double r;
1172
1173 y = yh + yl;
1174 z = y * y;
1175 t3 = (z * (P20 + y * P21 + z * (P22 + y * P23))) /
1176 (Q20 + (y * ((Q21 + Q22 * y) + z * Q23) +
1177 (z * z) * ((Q24 + Q25 * y) + z * Q26))) + GZ2_l;
1178 r.h = (double) ((float) (GZ2_h + t3));
1179 r.l = t3 - (r.h - GZ2_h);
1180 return (r);
1181 }
1182
1183 /* compute gamma(y=yh+yl) for y in GT3 = [1.6373, 2.0000] */
1184 /* assume yh got 20 significant bits */
1185 static struct Double
1186 GT3(double yh, double yl) {
1187 double t3, t4, y, z;
1188 struct Double r;
1189
1190 y = yh + yl;
1191 z = y * y;
1192 t3 = (z * (P30 + y * ((P31 + y * P32) + z * (P33 + y * P34)))) /
1193 (Q30 + y * ((Q31 + y * Q32) + z * ((Q33 + Q34 * y) + z * Q35)));
1194 t3 += (TZ3 * yl + GZ3_l);
1195 t4 = TZ3 * yh;
1196 r.h = (double) ((float) (t4 + GZ3_h + t3));
1197 t3 += (t4 - (r.h - GZ3_h));
1198 r.l = t3;
1199 return (r);
1200 }
1201
1202 /* INDENT OFF */
1203 /*
1204 * return tgamma(x) scaled by 2**-m for 8<x<=171.62... using Stirling's formula
1205 * log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x))
1206 * = L1 + L2 + L3,
1207 */
1208 /* INDENT ON */
1209 static struct Double
1210 large_gam(double x, int *m) {
1211 double z, t1, t2, t3, z2, t5, w, y, u, r, z4, v, t24 = 16777216.0,
1212 p24 = 1.0 / 16777216.0;
1213 int n2, j2, k, ix, j;
1214 unsigned lx;
1215 struct Double zz;
1216 double u2, ss_h, ss_l, r_h, w_h, w_l, t4;
1217
1218 /* INDENT OFF */
1219 /*
1220 * compute ss = ss.h+ss.l = log(x)-1 (see tgamma_log.h for details)
1221 *
1222 * log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y, 1<=y<2,
1223 * j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and
1224 * T1(n) = T1[2n,2n+1] = n*log(2)-1,
1225 * T2(j) = T2[2j,2j+1] = log(z[j]),
1226 * T3(s) = 2s + A1[0]s^3 + A2[1]s^5 + A3[2]s^7
1227 * Note
1228 * (1) the leading entries are truncated to 24 binary point.
1229 * (2) Remez error for T3(s) is bounded by 2**(-72.4)
1230 * 2**(-24)
1231 * _________V___________________
1232 * T1(n): |_________|___________________|
1233 * _______ ______________________
1234 * T2(j): |_______|______________________|
1235 * ____ _______________________
1236 * 2s: |____|_______________________|
1237 * __________________________
1238 * + T3(s)-2s: |__________________________|
1239 * -------------------------------------------
1240 * [leading] + [Trailing]
1241 */
1242 /* INDENT ON */
1243 ix = __HI(x);
1244 lx = __LO(x);
1245 n2 = (ix >> 20) - 0x3ff; /* exponent of x, range:3-7 */
1246 n2 += n2; /* 2n */
1247 ix = (ix & 0x000fffff) | 0x3ff00000; /* y = scale x to [1,2] */
1248 __HI(y) = ix;
1249 __LO(y) = lx;
1250 __HI(z) = (ix & 0xffffc000) | 0x2000; /* z[j]=1+j/64+1/128 */
1251 __LO(z) = 0;
1252 j2 = (ix >> 13) & 0x7e; /* 2j */
1253 t1 = y + z;
1254 t2 = y - z;
1255 r = one / t1;
1256 t1 = (double) ((float) t1);
1257 u = r * t2; /* u = (y-z)/(y+z) */
1258 t4 = T2[j2 + 1] + T1[n2 + 1];
1259 z2 = u * u;
1260 k = __HI(u) & 0x7fffffff;
1261 t3 = T2[j2] + T1[n2];
1262 if ((k >> 20) < 0x3ec) { /* |u|<2**-19 */
1263 t2 = t4 + u * ((two + z2 * A1) + (z2 * z2) * (A2 + z2 * A3));
1264 } else {
1265 t5 = t4 + u * (z2 * A1 + (z2 * z2) * (A2 + z2 * A3));
1266 u2 = u + u;
1267 v = (double) ((int) (u2 * t24)) * p24;
1268 t2 = t5 + r * ((two * t2 - v * t1) - v * (y - (t1 - z)));
1269 t3 += v;
1270 }
1271 ss_h = (double) ((float) (t2 + t3));
1272 ss_l = t2 - (ss_h - t3);
1273
1274 /*
1275 * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2)))
1276 * where ss = log(x) - 1 in already in extra precision
1277 */
1278 z = one / x;
1279 r = x - half;
1280 r_h = (double) ((float) r);
1281 w_h = r_h * ss_h + hln2pi_h;
1282 z2 = z * z;
1283 w = (r - r_h) * ss_h + r * ss_l;
1284 z4 = z2 * z2;
1285 t1 = z2 * (GP1 + z4 * (GP3 + z4 * (GP5 + z4 * GP7)));
1286 t2 = z4 * (GP2 + z4 * (GP4 + z4 * GP6));
1287 t1 += t2;
1288 w += hln2pi_l;
1289 w_l = z * (GP0 + t1) + w;
1290 k = (int) ((w_h + w_l) * invln2_32 + half);
1291
1292 /* compute the exponential of w_h+w_l */
1293 j = k & 0x1f;
1294 *m = (k >> 5);
1295 t3 = (double) k;
1296
1297 /* perform w - k*ln2_32 (represent as w_h - w_l) */
1298 t1 = w_h - t3 * ln2_32hi;
1299 t2 = t3 * ln2_32lo;
1300 w = w_l - t2;
1301 w_h = t1 + w_l;
1302 w_l = t2 - (w_l - (w_h - t1));
1303
1304 /* compute exp(w_h+w_l) */
1305 z = w_h - w_l;
1306 z2 = z * z;
1307 t1 = z2 * (Et1 + z2 * (Et3 + z2 * Et5));
1308 t2 = z2 * (Et2 + z2 * Et4);
1309 t3 = w_h - (w_l - (t1 + z * t2));
1310 zz.l = S_trail[j] * (one + t3) + S[j] * t3;
1311 zz.h = S[j];
1312 return (zz);
1313 }
1314
1315 /* INDENT OFF */
1316 /*
1317 * kpsin(x)= sin(pi*x)/pi
1318 * 3 5 7 9 11 13 15
1319 * = x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x +ks[4]*x +ks[5]*x +ks[6]*x
1320 */
1321 static const double ks[] = {
1322 -1.64493406684822640606569,
1323 +8.11742425283341655883668741874008920850698590621e-0001,
1324 -1.90751824120862873825597279118304943994042258291e-0001,
1325 +2.61478477632554278317289628332654539353521911570e-0002,
1326 -2.34607978510202710377617190278735525354347705866e-0003,
1327 +1.48413292290051695897242899977121846763824221705e-0004,
1328 -6.87730769637543488108688726777687262485357072242e-0006,
1329 };
1330 /* INDENT ON */
1331
1332 /* assume x is not tiny and positive */
1333 static struct Double
1334 kpsin(double x) {
1335 double z, t1, t2, t3, t4;
1336 struct Double xx;
1337
1338 z = x * x;
1339 xx.h = x;
1340 t1 = z * x;
1341 t2 = z * z;
1342 t4 = t1 * ks[0];
1343 t3 = (t1 * z) * ((ks[1] + z * ks[2] + t2 * ks[3]) + (z * t2) *
1344 (ks[4] + z * ks[5] + t2 * ks[6]));
1345 xx.l = t4 + t3;
1346 return (xx);
1347 }
1348
1349 /* INDENT OFF */
1350 /*
1351 * kpcos(x)= cos(pi*x)/pi
1352 * 2 4 6 8 10 12
1353 * = 1/pi +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x +kc[4]*x +kc[5]*x
1354 */
1355
1356 static const double one_pi_h = 0.318309886183790635705292970,
1357 one_pi_l = 3.583247455607534006714276420e-17;
1358 static const double npi_2_h = -1.5625,
1359 npi_2_l = -0.00829632679489661923132169163975055099555883223;
1360 static const double kc[] = {
1361 -1.57079632679489661923132169163975055099555883223e+0000,
1362 +1.29192819501230224953283586722575766189551966008e+0000,
1363 -4.25027339940149518500158850753393173519732149213e-0001,
1364 +7.49080625187015312373925142219429422375556727752e-0002,
1365 -8.21442040906099210866977352284054849051348692715e-0003,
1366 +6.10411356829515414575566564733632532333904115968e-0004,
1367 };
1368 /* INDENT ON */
1369
1370 /* assume x is not tiny and positive */
1371 static struct Double
1372 kpcos(double x) {
1373 double z, t1, t2, t3, t4, x4, x8;
1374 struct Double xx;
1375
1376 z = x * x;
1377 xx.h = one_pi_h;
1378 t1 = (double) ((float) x);
1379 x4 = z * z;
1380 t2 = npi_2_l * z + npi_2_h * (x + t1) * (x - t1);
1381 t3 = one_pi_l + x4 * ((kc[1] + z * kc[2]) + x4 * (kc[3] + z *
1382 kc[4] + x4 * kc[5]));
1383 t4 = t1 * t1; /* 48 bits mantissa */
1384 x8 = t2 + t3;
1385 t4 *= npi_2_h; /* npi_2_h is 5 bits const. The product is exact */
1386 xx.l = x8 + t4; /* that will minimized the rounding error in xx.l */
1387 return (xx);
1388 }
1389
1390 /* INDENT OFF */
1391 static const double
1392 /* 0.134861805732790769689793935774652917006 */
1393 t0z1 = 0.1348618057327907737708,
1394 t0z1_l = -4.0810077708578299022531e-18,
1395 /* 0.461632144968362341262659542325721328468 */
1396 t0z2 = 0.4616321449683623567850,
1397 t0z2_l = -1.5522348162858676890521e-17,
1398 /* 0.819773101100500601787868704921606996312 */
1399 t0z3 = 0.8197731011005006118708,
1400 t0z3_l = -1.0082945122487103498325e-17;
1401 /* 1.134861805732790769689793935774652917006 */
1402 /* INDENT ON */
1403
1404 /* gamma(x+i) for 0 <= x < 1 */
1405 static struct Double
1406 gam_n(int i, double x) {
1407 struct Double rr = {0.0L, 0.0L}, yy;
1408 double r1, r2, t2, z, xh, xl, yh, yl, zh, z1, z2, zl, x5, wh, wl;
1409
1410 /* compute yy = gamma(x+1) */
1411 if (x > 0.2845) {
1412 if (x > 0.6374) {
1413 r1 = x - t0z3;
1414 r2 = (double) ((float) (r1 - t0z3_l));
1415 t2 = r1 - r2;
1416 yy = GT3(r2, t2 - t0z3_l);
1417 } else {
1418 r1 = x - t0z2;
1419 r2 = (double) ((float) (r1 - t0z2_l));
1420 t2 = r1 - r2;
1421 yy = GT2(r2, t2 - t0z2_l);
1422 }
1423 } else {
1424 r1 = x - t0z1;
1425 r2 = (double) ((float) (r1 - t0z1_l));
1426 t2 = r1 - r2;
1427 yy = GT1(r2, t2 - t0z1_l);
1428 }
1429
1430 /* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0<i<8 */
1431 switch (i) {
1432 case 0: /* yy/x */
1433 r1 = one / x;
1434 xh = (double) ((float) x); /* x is not tiny */
1435 rr.h = (double) ((float) ((yy.h + yy.l) * r1));
1436 rr.l = r1 * (yy.h - rr.h * xh) -
1437 ((r1 * rr.h) * (x - xh) - r1 * yy.l);
1438 break;
1439 case 1: /* yy */
1440 rr.h = yy.h;
1441 rr.l = yy.l;
1442 break;
1443 case 2: /* (x+1)*yy */
1444 z = x + one; /* may not be exact */
1445 zh = (double) ((float) z);
1446 rr.h = zh * yy.h;
1447 rr.l = z * yy.l + (x - (zh - one)) * yy.h;
1448 break;
1449 case 3: /* (x+2)*(x+1)*yy */
1450 z1 = x + one;
1451 z2 = x + 2.0;
1452 z = z1 * z2;
1453 xh = (double) ((float) z);
1454 zh = (double) ((float) z1);
1455 xl = (x - (zh - one)) * (z2 + zh) - (xh - zh * (zh + one));
1456 rr.h = xh * yy.h;
1457 rr.l = z * yy.l + xl * yy.h;
1458 break;
1459
1460 case 4: /* (x+1)*(x+3)*(x+2)*yy */
1461 z1 = x + 2.0;
1462 z2 = (x + one) * (x + 3.0);
1463 zh = z1;
1464 __LO(zh) = 0;
1465 __HI(zh) &= 0xfffffff8; /* zh 18 bits mantissa */
1466 zl = x - (zh - 2.0);
1467 z = z1 * z2;
1468 xh = (double) ((float) z);
1469 xl = zl * (z2 + zh * (z1 + zh)) - (xh - zh * (zh * zh - one));
1470 rr.h = xh * yy.h;
1471 rr.l = z * yy.l + xl * yy.h;
1472 break;
1473 case 5: /* ((x+1)*(x+4)*(x+2)*(x+3))*yy */
1474 z1 = x + 2.0;
1475 z2 = x + 3.0;
1476 z = z1 * z2;
1477 zh = (double) ((float) z1);
1478 yh = (double) ((float) z);
1479 yl = (x - (zh - 2.0)) * (z2 + zh) - (yh - zh * (zh + one));
1480 z2 = z - 2.0;
1481 z *= z2;
1482 xh = (double) ((float) z);
1483 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0));
1484 rr.h = xh * yy.h;
1485 rr.l = z * yy.l + xl * yy.h;
1486 break;
1487 case 6: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */
1488 z1 = x + 2.0;
1489 z2 = x + 3.0;
1490 z = z1 * z2;
1491 zh = (double) ((float) z1);
1492 yh = (double) ((float) z);
1493 z1 = x - (zh - 2.0);
1494 yl = z1 * (z2 + zh) - (yh - zh * (zh + one));
1495 z2 = z - 2.0;
1496 x5 = x + 5.0;
1497 z *= z2;
1498 xh = (double) ((float) z);
1499 zh += 3.0;
1500 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0));
1501 /* xh+xl=(x+1)*...*(x+4) */
1502 /* wh+wl=(x+5)*yy */
1503 wh = (double) ((float) (x5 * (yy.h + yy.l)));
1504 wl = (z1 * yy.h + x5 * yy.l) - (wh - zh * yy.h);
1505 rr.h = wh * xh;
1506 rr.l = z * wl + xl * wh;
1507 break;
1508 case 7: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */
1509 z1 = x + 3.0;
1510 z2 = x + 4.0;
1511 z = z2 * z1;
1512 zh = (double) ((float) z1);
1513 yh = (double) ((float) z); /* yh+yl = (x+3)(x+4) */
1514 yl = (x - (zh - 3.0)) * (z2 + zh) - (yh - (zh * (zh + one)));
1515 z1 = x + 6.0;
1516 z2 = z - 2.0; /* z2 = (x+2)*(x+5) */
1517 z *= z2;
1518 xh = (double) ((float) z);
1519 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0));
1520 /* xh+xl=(x+2)*...*(x+5) */
1521 /* wh+wl=(x+1)(x+6)*yy */
1522 z2 -= 4.0; /* z2 = (x+1)(x+6) */
1523 wh = (double) ((float) (z2 * (yy.h + yy.l)));
1524 wl = (z2 * yy.l + yl * yy.h) - (wh - (yh - 6.0) * yy.h);
1525 rr.h = wh * xh;
1526 rr.l = z * wl + xl * wh;
1527 }
1528 return (rr);
1529 }
1530
1531 double
1532 tgamma(double x) {
1533 struct Double ss, ww;
1534 double t, t1, t2, t3, t4, t5, w, y, z, z1, z2, z3, z5;
1535 int i, j, k, m, ix, hx, xk;
1536 unsigned lx;
1537
1538 hx = __HI(x);
1539 lx = __LO(x);
1540 ix = hx & 0x7fffffff;
1541 y = x;
1542
1543 if (ix < 0x3ca00000)
1544 return (one / x); /* |x| < 2**-53 */
1545 if (ix >= 0x7ff00000)
1546 /* +Inf -> +Inf, -Inf or NaN -> NaN */
1547 return (x * ((hx < 0)? 0.0 : x));
1548 if (hx > 0x406573fa || /* x > 171.62... overflow to +inf */
1549 (hx == 0x406573fa && lx > 0xE561F647)) {
1550 z = x / tiny;
1551 return (z * z);
1552 }
1553 if (hx >= 0x40200000) { /* x >= 8 */
1554 ww = large_gam(x, &m);
1555 w = ww.h + ww.l;
1556 __HI(w) += m << 20;
1557 return (w);
1558 }
1559 if (hx > 0) { /* 0 < x < 8 */
1560 i = (int) x;
1561 ww = gam_n(i, x - (double) i);
1562 return (ww.h + ww.l);
1563 }
1564
1565 /* negative x */
1566 /* INDENT OFF */
1567 /*
1568 * compute: xk =
1569 * -2 ... x is an even int (-inf is even)
1570 * -1 ... x is an odd int
1571 * +0 ... x is not an int but chopped to an even int
1572 * +1 ... x is not an int but chopped to an odd int
1573 */
1574 /* INDENT ON */
1575 xk = 0;
1576 if (ix >= 0x43300000) {
1577 if (ix >= 0x43400000)
1578 xk = -2;
1579 else
1580 xk = -2 + (lx & 1);
1581 } else if (ix >= 0x3ff00000) {
1582 k = (ix >> 20) - 0x3ff;
1583 if (k > 20) {
1584 j = lx >> (52 - k);
1585 if ((j << (52 - k)) == lx)
1586 xk = -2 + (j & 1);
1587 else
1588 xk = j & 1;
1589 } else {
1590 j = ix >> (20 - k);
1591 if ((j << (20 - k)) == ix && lx == 0)
1592 xk = -2 + (j & 1);
1593 else
1594 xk = j & 1;
1595 }
1596 }
1597 if (xk < 0)
1598 /* ideally gamma(-n)= (-1)**(n+1) * inf, but c99 expect NaN */
1599 return ((x - x) / (x - x)); /* 0/0 = NaN */
1600
1601
1602 /* negative underflow thresold */
1603 if (ix > 0x4066e000 || (ix == 0x4066e000 && lx > 11)) {
1604 /* x < -183.0 - 11ulp */
1605 z = tiny / x;
1606 if (xk == 1)
1607 z = -z;
1608 return (z * tiny);
1609 }
1610
1611 /* now compute gamma(x) by -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x */
1612
1613 /*
1614 * First compute ss = -sin(pi*y)/pi , so that
1615 * gamma(x) = 1/(ss*gamma(1+y))
1616 */
1617 y = -x;
1618 j = (int) y;
1619 z = y - (double) j;
1620 if (z > 0.3183098861837906715377675)
1621 if (z > 0.6816901138162093284622325)
1622 ss = kpsin(one - z);
1623 else
1624 ss = kpcos(0.5 - z);
1625 else
1626 ss = kpsin(z);
1627 if (xk == 0) {
1628 ss.h = -ss.h;
1629 ss.l = -ss.l;
1630 }
1631
1632 /* Then compute ww = gamma(1+y), note that result scale to 2**m */
1633 m = 0;
1634 if (j < 7) {
1635 ww = gam_n(j + 1, z);
1636 } else {
1637 w = y + one;
1638 if ((lx & 1) == 0) { /* y+1 exact (note that y<184) */
1639 ww = large_gam(w, &m);
1640 } else {
1641 t = w - one;
1642 if (t == y) { /* y+one exact */
1643 ww = large_gam(w, &m);
1644 } else { /* use y*gamma(y) */
1645 if (j == 7)
1646 ww = gam_n(j, z);
1647 else
1648 ww = large_gam(y, &m);
1649 t4 = ww.h + ww.l;
1650 t1 = (double) ((float) y);
1651 t2 = (double) ((float) t4);
1652 /* t4 will not be too large */
1653 ww.l = y * (ww.l - (t2 - ww.h)) + (y - t1) * t2;
1654 ww.h = t1 * t2;
1655 }
1656 }
1657 }
1658
1659 /* compute 1/(ss*ww) */
1660 t3 = ss.h + ss.l;
1661 t4 = ww.h + ww.l;
1662 t1 = (double) ((float) t3);
1663 t2 = (double) ((float) t4);
1664 z1 = ss.l - (t1 - ss.h); /* (t1,z1) = ss */
1665 z2 = ww.l - (t2 - ww.h); /* (t2,z2) = ww */
1666 t3 = t3 * t4; /* t3 = ss*ww */
1667 z3 = one / t3; /* z3 = 1/(ss*ww) */
1668 t5 = t1 * t2;
1669 z5 = z1 * t4 + t1 * z2; /* (t5,z5) = ss*ww */
1670 t1 = (double) ((float) t3); /* (t1,z1) = ss*ww */
1671 z1 = z5 - (t1 - t5);
1672 t2 = (double) ((float) z3); /* leading 1/(ss*ww) */
1673 z2 = z3 * (t2 * z1 - (one - t2 * t1));
1674 z = t2 - z2;
1675
1676 /* check whether z*2**-m underflow */
1677 if (m != 0) {
1678 hx = __HI(z);
1679 i = hx & 0x80000000;
1680 ix = hx ^ i;
1681 j = ix >> 20;
1682 if (j > m) {
1683 ix -= m << 20;
1684 __HI(z) = ix ^ i;
1685 } else if ((m - j) > 52) {
1686 /* underflow */
1687 if (xk == 0)
1688 z = -tiny * tiny;
1689 else
1690 z = tiny * tiny;
1691 } else {
1692 /* subnormal */
1693 m -= 60;
1694 t = one;
1695 __HI(t) -= 60 << 20;
1696 ix -= m << 20;
1697 __HI(z) = ix ^ i;
1698 z *= t;
1699 }
1700 }
1701 return (z);
1702 }
|
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25
26 /*
27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
29 */
30
31 #pragma weak __tgamma = tgamma
32
33 /* BEGIN CSTYLED */
34 /*
35 * True gamma function
36 * double tgamma(double x)
37 *
38 * Error:
39 * ------
40 * Less that one ulp for both positive and negative arguments.
41 *
42 * Algorithm:
43 * ---------
44 * A: For negative argument
45 * (1) gamma(-n or -inf) is NaN
46 * (2) Underflow Threshold
47 * (3) Reduction to gamma(1+x)
48 * B: For x between 1 and 2
49 * C: For x between 0 and 1
50 * D: For x between 2 and 8
51 * E: Overflow thresold {see over.c}
52 * F: For overflow_threshold >= x >= 8
53 *
731 * GP2 = 7.77830853479775281781085278324621033523037489883e-0004
732 *
733 *
734 * Implementation note:
735 * z = (1/x), z2 = z*z, z4 = z2*z2;
736 * p = z*(GP0+z2*(GP1+....+z2*GP7))
737 * = z*(GP0+(z4*(GP2+z4*(GP4+z4*GP6))+z2*(GP1+z4*(GP3+z4*(GP5+z4*GP7)))))
738 *
739 * Adding everything up:
740 * t = rr.h*ww.h+hln2pi_h ... exact
741 * w = (hln2pi_l + ((x-0.5)*ww.l+rr.l*ww.h)) + p
742 *
743 * Computing exp(t+w):
744 * s = t+w; write s = (n+j/32)*ln2+r, |r|<=(1/64)*ln2, then
745 * exp(s) = 2**n * (2**(j/32) + 2**(j/32)*expm1(r)), where
746 * expm1(r) = r + Et1*r^2 + Et2*r^3 + ... + Et5*r^6, and
747 * 2**(j/32) is obtained by table look-up S[j]+S_trail[j].
748 * Remez error bound:
749 * |exp(r) - (1+r+Et1*r^2+...+Et5*r^6)| <= 2^(-63).
750 */
751 /* END CSTYLED */
752
753 #include "libm.h"
754
755 #define __HI(x) ((int *)&x)[HIWORD]
756 #define __LO(x) ((unsigned *)&x)[LOWORD]
757
758 struct Double {
759 double h;
760 double l;
761 };
762
763 /* Hex value of GP0 shoule be 3FB55555 55555555 */
764 static const double c[] = {
765 +1.0,
766 +2.0,
767 +0.5,
768 +1.0e-300,
769 +6.66666666666666740682e-01, /* A1=T3[0] */
770 +3.99999999955626478023093908674902212920e-01, /* A2=T3[1] */
771 +2.85720221533145659809237398709372330980e-01, /* A3=T3[2] */
772 +0.0833333333333333287074040640618477, /* GP[0] */
773 -2.77777777776649355200565611114627670089130772843e-03,
774 +7.93650787486083724805476194170211775784158551509e-04,
775 -5.95236628558314928757811419580281294593903582971e-04,
776 +8.41566473999853451983137162780427812781178932540e-04,
1118 #define P23 cr[14]
1119 #define Q20 cr[15]
1120 #define Q21 cr[16]
1121 #define Q22 cr[17]
1122 #define Q23 cr[18]
1123 #define Q24 cr[19]
1124 #define Q25 cr[20]
1125 #define Q26 cr[21]
1126 #define P30 cr[22]
1127 #define P31 cr[23]
1128 #define P32 cr[24]
1129 #define P33 cr[25]
1130 #define P34 cr[26]
1131 #define Q30 cr[27]
1132 #define Q31 cr[28]
1133 #define Q32 cr[29]
1134 #define Q33 cr[30]
1135 #define Q34 cr[31]
1136 #define Q35 cr[32]
1137
1138 static const double GZ1_h = +0.938204627909682398190,
1139 GZ1_l = +5.121952600248205157935e-17,
1140 GZ2_h = +0.885603194410888749921,
1141 GZ2_l = -4.964236872556339810692e-17,
1142 GZ3_h = +0.936781411463652347038,
1143 GZ3_l = -2.541923110834479415023e-17,
1144 TZ1 = -0.3517214357852935791015625,
1145 TZ3 = +0.280530631542205810546875;
1146
1147 /*
1148 * compute gamma(y=yh+yl) for y in GT1 = [1.0000, 1.2845]
1149 * assume yh got 20 significant bits
1150 */
1151 static struct Double
1152 GT1(double yh, double yl)
1153 {
1154 double t3, t4, y, z;
1155 struct Double r;
1156
1157 y = yh + yl;
1158 z = y * y;
1159 t3 = (z * (P10 + y * ((P11 + y * P12) + z * (P13 + y * P14)))) / (Q10 +
1160 y * ((Q11 + y * Q12) + z * ((Q13 + Q14 * y) + z * Q15)));
1161 t3 += (TZ1 * yl + GZ1_l);
1162 t4 = TZ1 * yh;
1163 r.h = (double)((float)(t4 + GZ1_h + t3));
1164 t3 += (t4 - (r.h - GZ1_h));
1165 r.l = t3;
1166 return (r);
1167 }
1168
1169 /*
1170 * compute gamma(y=yh+yl) for y in GT2 = [1.2844, 1.6374]
1171 * assume yh got 20 significant bits
1172 */
1173 static struct Double
1174 GT2(double yh, double yl)
1175 {
1176 double t3, y, z;
1177 struct Double r;
1178
1179 y = yh + yl;
1180 z = y * y;
1181 t3 = (z * (P20 + y * P21 + z * (P22 + y * P23))) / (Q20 + (y * ((Q21 +
1182 Q22 * y) + z * Q23) + (z * z) * ((Q24 + Q25 * y) + z * Q26))) +
1183 GZ2_l;
1184 r.h = (double)((float)(GZ2_h + t3));
1185 r.l = t3 - (r.h - GZ2_h);
1186 return (r);
1187 }
1188
1189 /*
1190 * compute gamma(y=yh+yl) for y in GT3 = [1.6373, 2.0000]
1191 * assume yh got 20 significant bits
1192 */
1193 static struct Double
1194 GT3(double yh, double yl)
1195 {
1196 double t3, t4, y, z;
1197 struct Double r;
1198
1199 y = yh + yl;
1200 z = y * y;
1201 t3 = (z * (P30 + y * ((P31 + y * P32) + z * (P33 + y * P34)))) / (Q30 +
1202 y * ((Q31 + y * Q32) + z * ((Q33 + Q34 * y) + z * Q35)));
1203 t3 += (TZ3 * yl + GZ3_l);
1204 t4 = TZ3 * yh;
1205 r.h = (double)((float)(t4 + GZ3_h + t3));
1206 t3 += (t4 - (r.h - GZ3_h));
1207 r.l = t3;
1208 return (r);
1209 }
1210
1211
1212 /*
1213 * return tgamma(x) scaled by 2**-m for 8<x<=171.62... using Stirling's formula
1214 * log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x))
1215 * = L1 + L2 + L3,
1216 */
1217 static struct Double
1218 large_gam(double x, int *m)
1219 {
1220 double z, t1, t2, t3, z2, t5, w, y, u, r, z4, v, t24 = 16777216.0, p24 =
1221 1.0 / 16777216.0;
1222 int n2, j2, k, ix, j;
1223 unsigned lx;
1224 struct Double zz;
1225 double u2, ss_h, ss_l, r_h, w_h, w_l, t4;
1226
1227
1228 /*
1229 * compute ss = ss.h+ss.l = log(x)-1 (see tgamma_log.h for details)
1230 *
1231 * log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y, 1<=y<2,
1232 * j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and
1233 * T1(n) = T1[2n,2n+1] = n*log(2)-1,
1234 * T2(j) = T2[2j,2j+1] = log(z[j]),
1235 * T3(s) = 2s + A1[0]s^3 + A2[1]s^5 + A3[2]s^7
1236 * Note
1237 * (1) the leading entries are truncated to 24 binary point.
1238 * (2) Remez error for T3(s) is bounded by 2**(-72.4)
1239 * 2**(-24)
1240 * _________V___________________
1241 * T1(n): |_________|___________________|
1242 * _______ ______________________
1243 * T2(j): |_______|______________________|
1244 * ____ _______________________
1245 * 2s: |____|_______________________|
1246 * __________________________
1247 * + T3(s)-2s: |__________________________|
1248 * -------------------------------------------
1249 * [leading] + [Trailing]
1250 */
1251 ix = __HI(x);
1252 lx = __LO(x);
1253 n2 = (ix >> 20) - 0x3ff; /* exponent of x, range:3-7 */
1254 n2 += n2; /* 2n */
1255 ix = (ix & 0x000fffff) | 0x3ff00000; /* y = scale x to [1,2] */
1256 __HI(y) = ix;
1257 __LO(y) = lx;
1258 __HI(z) = (ix & 0xffffc000) | 0x2000; /* z[j]=1+j/64+1/128 */
1259 __LO(z) = 0;
1260 j2 = (ix >> 13) & 0x7e; /* 2j */
1261 t1 = y + z;
1262 t2 = y - z;
1263 r = one / t1;
1264 t1 = (double)((float)t1);
1265 u = r * t2; /* u = (y-z)/(y+z) */
1266 t4 = T2[j2 + 1] + T1[n2 + 1];
1267 z2 = u * u;
1268 k = __HI(u) & 0x7fffffff;
1269 t3 = T2[j2] + T1[n2];
1270
1271 if ((k >> 20) < 0x3ec) { /* |u|<2**-19 */
1272 t2 = t4 + u * ((two + z2 * A1) + (z2 * z2) * (A2 + z2 * A3));
1273 } else {
1274 t5 = t4 + u * (z2 * A1 + (z2 * z2) * (A2 + z2 * A3));
1275 u2 = u + u;
1276 v = (double)((int)(u2 * t24)) * p24;
1277 t2 = t5 + r * ((two * t2 - v * t1) - v * (y - (t1 - z)));
1278 t3 += v;
1279 }
1280
1281 ss_h = (double)((float)(t2 + t3));
1282 ss_l = t2 - (ss_h - t3);
1283
1284 /*
1285 * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2)))
1286 * where ss = log(x) - 1 in already in extra precision
1287 */
1288 z = one / x;
1289 r = x - half;
1290 r_h = (double)((float)r);
1291 w_h = r_h * ss_h + hln2pi_h;
1292 z2 = z * z;
1293 w = (r - r_h) * ss_h + r * ss_l;
1294 z4 = z2 * z2;
1295 t1 = z2 * (GP1 + z4 * (GP3 + z4 * (GP5 + z4 * GP7)));
1296 t2 = z4 * (GP2 + z4 * (GP4 + z4 * GP6));
1297 t1 += t2;
1298 w += hln2pi_l;
1299 w_l = z * (GP0 + t1) + w;
1300 k = (int)((w_h + w_l) * invln2_32 + half);
1301
1302 /* compute the exponential of w_h+w_l */
1303 j = k & 0x1f;
1304 *m = (k >> 5);
1305 t3 = (double)k;
1306
1307 /* perform w - k*ln2_32 (represent as w_h - w_l) */
1308 t1 = w_h - t3 * ln2_32hi;
1309 t2 = t3 * ln2_32lo;
1310 w = w_l - t2;
1311 w_h = t1 + w_l;
1312 w_l = t2 - (w_l - (w_h - t1));
1313
1314 /* compute exp(w_h+w_l) */
1315 z = w_h - w_l;
1316 z2 = z * z;
1317 t1 = z2 * (Et1 + z2 * (Et3 + z2 * Et5));
1318 t2 = z2 * (Et2 + z2 * Et4);
1319 t3 = w_h - (w_l - (t1 + z * t2));
1320 zz.l = S_trail[j] * (one + t3) + S[j] * t3;
1321 zz.h = S[j];
1322 return (zz);
1323 }
1324
1325
1326 /*
1327 * kpsin(x)= sin(pi*x)/pi
1328 * 3 5 7 9 11 13 15
1329 * = x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x +ks[4]*x +ks[5]*x +ks[6]*x
1330 */
1331 static const double ks[] = {
1332 -1.64493406684822640606569,
1333 +8.11742425283341655883668741874008920850698590621e-0001,
1334 -1.90751824120862873825597279118304943994042258291e-0001,
1335 +2.61478477632554278317289628332654539353521911570e-0002,
1336 -2.34607978510202710377617190278735525354347705866e-0003,
1337 +1.48413292290051695897242899977121846763824221705e-0004,
1338 -6.87730769637543488108688726777687262485357072242e-0006,
1339 };
1340
1341 /* assume x is not tiny and positive */
1342 static struct Double
1343 kpsin(double x)
1344 {
1345 double z, t1, t2, t3, t4;
1346 struct Double xx;
1347
1348 z = x * x;
1349 xx.h = x;
1350 t1 = z * x;
1351 t2 = z * z;
1352 t4 = t1 * ks[0];
1353 t3 = (t1 * z) * ((ks[1] + z * ks[2] + t2 * ks[3]) + (z * t2) * (ks[4] +
1354 z * ks[5] + t2 * ks[6]));
1355 xx.l = t4 + t3;
1356 return (xx);
1357 }
1358
1359
1360 /*
1361 * kpcos(x)= cos(pi*x)/pi
1362 * 2 4 6 8 10 12
1363 * = 1/pi +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x +kc[4]*x +kc[5]*x
1364 */
1365
1366 static const double one_pi_h = 0.318309886183790635705292970,
1367 one_pi_l = 3.583247455607534006714276420e-17;
1368 static const double npi_2_h = -1.5625,
1369 npi_2_l = -0.00829632679489661923132169163975055099555883223;
1370 static const double kc[] = {
1371 -1.57079632679489661923132169163975055099555883223e+0000,
1372 +1.29192819501230224953283586722575766189551966008e+0000,
1373 -4.25027339940149518500158850753393173519732149213e-0001,
1374 +7.49080625187015312373925142219429422375556727752e-0002,
1375 -8.21442040906099210866977352284054849051348692715e-0003,
1376 +6.10411356829515414575566564733632532333904115968e-0004,
1377 };
1378
1379 /* assume x is not tiny and positive */
1380 static struct Double
1381 kpcos(double x)
1382 {
1383 double z, t1, t2, t3, t4, x4, x8;
1384 struct Double xx;
1385
1386 z = x * x;
1387 xx.h = one_pi_h;
1388 t1 = (double)((float)x);
1389 x4 = z * z;
1390 t2 = npi_2_l * z + npi_2_h * (x + t1) * (x - t1);
1391 t3 = one_pi_l + x4 * ((kc[1] + z * kc[2]) + x4 * (kc[3] + z * kc[4] +
1392 x4 * kc[5]));
1393 t4 = t1 * t1; /* 48 bits mantissa */
1394 x8 = t2 + t3;
1395 t4 *= npi_2_h; /* npi_2_h is 5 bits const. The product is exact */
1396 xx.l = x8 + t4; /* that will minimized the rounding error in xx.l */
1397 return (xx);
1398 }
1399
1400 static const double
1401 /* 0.134861805732790769689793935774652917006 */
1402 t0z1 = 0.1348618057327907737708,
1403 t0z1_l = -4.0810077708578299022531e-18,
1404 /* 0.461632144968362341262659542325721328468 */
1405 t0z2 = 0.4616321449683623567850,
1406 t0z2_l = -1.5522348162858676890521e-17,
1407 /* 0.819773101100500601787868704921606996312 */
1408 t0z3 = 0.8197731011005006118708,
1409 t0z3_l = -1.0082945122487103498325e-17;
1410
1411 /*
1412 * 1.134861805732790769689793935774652917006
1413 */
1414
1415 /* gamma(x+i) for 0 <= x < 1 */
1416 static struct Double
1417 gam_n(int i, double x)
1418 {
1419 struct Double rr = { 0.0L, 0.0L }, yy;
1420 double r1, r2, t2, z, xh, xl, yh, yl, zh, z1, z2, zl, x5, wh, wl;
1421
1422 /* compute yy = gamma(x+1) */
1423 if (x > 0.2845) {
1424 if (x > 0.6374) {
1425 r1 = x - t0z3;
1426 r2 = (double)((float)(r1 - t0z3_l));
1427 t2 = r1 - r2;
1428 yy = GT3(r2, t2 - t0z3_l);
1429 } else {
1430 r1 = x - t0z2;
1431 r2 = (double)((float)(r1 - t0z2_l));
1432 t2 = r1 - r2;
1433 yy = GT2(r2, t2 - t0z2_l);
1434 }
1435 } else {
1436 r1 = x - t0z1;
1437 r2 = (double)((float)(r1 - t0z1_l));
1438 t2 = r1 - r2;
1439 yy = GT1(r2, t2 - t0z1_l);
1440 }
1441
1442 /* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0<i<8 */
1443 switch (i) {
1444 case 0: /* yy/x */
1445 r1 = one / x;
1446 xh = (double)((float)x); /* x is not tiny */
1447 rr.h = (double)((float)((yy.h + yy.l) * r1));
1448 rr.l = r1 * (yy.h - rr.h * xh) - ((r1 * rr.h) * (x - xh) - r1 *
1449 yy.l);
1450 break;
1451 case 1: /* yy */
1452 rr.h = yy.h;
1453 rr.l = yy.l;
1454 break;
1455 case 2: /* (x+1)*yy */
1456 z = x + one; /* may not be exact */
1457 zh = (double)((float)z);
1458 rr.h = zh * yy.h;
1459 rr.l = z * yy.l + (x - (zh - one)) * yy.h;
1460 break;
1461 case 3: /* (x+2)*(x+1)*yy */
1462 z1 = x + one;
1463 z2 = x + 2.0;
1464 z = z1 * z2;
1465 xh = (double)((float)z);
1466 zh = (double)((float)z1);
1467 xl = (x - (zh - one)) * (z2 + zh) - (xh - zh * (zh + one));
1468 rr.h = xh * yy.h;
1469 rr.l = z * yy.l + xl * yy.h;
1470 break;
1471
1472 case 4: /* (x+1)*(x+3)*(x+2)*yy */
1473 z1 = x + 2.0;
1474 z2 = (x + one) * (x + 3.0);
1475 zh = z1;
1476 __LO(zh) = 0;
1477 __HI(zh) &= 0xfffffff8; /* zh 18 bits mantissa */
1478 zl = x - (zh - 2.0);
1479 z = z1 * z2;
1480 xh = (double)((float)z);
1481 xl = zl * (z2 + zh * (z1 + zh)) - (xh - zh * (zh * zh - one));
1482 rr.h = xh * yy.h;
1483 rr.l = z * yy.l + xl * yy.h;
1484 break;
1485 case 5: /* ((x+1)*(x+4)*(x+2)*(x+3))*yy */
1486 z1 = x + 2.0;
1487 z2 = x + 3.0;
1488 z = z1 * z2;
1489 zh = (double)((float)z1);
1490 yh = (double)((float)z);
1491 yl = (x - (zh - 2.0)) * (z2 + zh) - (yh - zh * (zh + one));
1492 z2 = z - 2.0;
1493 z *= z2;
1494 xh = (double)((float)z);
1495 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0));
1496 rr.h = xh * yy.h;
1497 rr.l = z * yy.l + xl * yy.h;
1498 break;
1499 case 6: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */
1500 z1 = x + 2.0;
1501 z2 = x + 3.0;
1502 z = z1 * z2;
1503 zh = (double)((float)z1);
1504 yh = (double)((float)z);
1505 z1 = x - (zh - 2.0);
1506 yl = z1 * (z2 + zh) - (yh - zh * (zh + one));
1507 z2 = z - 2.0;
1508 x5 = x + 5.0;
1509 z *= z2;
1510 xh = (double)((float)z);
1511 zh += 3.0;
1512 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0));
1513
1514 /*
1515 * xh+xl=(x+1)*...*(x+4)
1516 * wh+wl=(x+5)*yy
1517 */
1518 wh = (double)((float)(x5 * (yy.h + yy.l)));
1519 wl = (z1 * yy.h + x5 * yy.l) - (wh - zh * yy.h);
1520 rr.h = wh * xh;
1521 rr.l = z * wl + xl * wh;
1522 break;
1523 case 7: /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */
1524 z1 = x + 3.0;
1525 z2 = x + 4.0;
1526 z = z2 * z1;
1527 zh = (double)((float)z1);
1528 yh = (double)((float)z); /* yh+yl = (x+3)(x+4) */
1529 yl = (x - (zh - 3.0)) * (z2 + zh) - (yh - (zh * (zh + one)));
1530 z1 = x + 6.0;
1531 z2 = z - 2.0; /* z2 = (x+2)*(x+5) */
1532 z *= z2;
1533 xh = (double)((float)z);
1534 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0));
1535
1536 /*
1537 * xh+xl=(x+2)*...*(x+5)
1538 * wh+wl=(x+1)(x+6)*yy
1539 */
1540 z2 -= 4.0; /* z2 = (x+1)(x+6) */
1541 wh = (double)((float)(z2 * (yy.h + yy.l)));
1542 wl = (z2 * yy.l + yl * yy.h) - (wh - (yh - 6.0) * yy.h);
1543 rr.h = wh * xh;
1544 rr.l = z * wl + xl * wh;
1545 }
1546
1547 return (rr);
1548 }
1549
1550 double
1551 tgamma(double x)
1552 {
1553 struct Double ss, ww;
1554 double t, t1, t2, t3, t4, t5, w, y, z, z1, z2, z3, z5;
1555 int i, j, k, m, ix, hx, xk;
1556 unsigned lx;
1557
1558 hx = __HI(x);
1559 lx = __LO(x);
1560 ix = hx & 0x7fffffff;
1561 y = x;
1562
1563 if (ix < 0x3ca00000)
1564 return (one / x); /* |x| < 2**-53 */
1565
1566 if (ix >= 0x7ff00000)
1567 /* +Inf -> +Inf, -Inf or NaN -> NaN */
1568 return (x * ((hx < 0) ? 0.0 : x));
1569
1570 if (hx > 0x406573fa || /* x > 171.62... overflow to +inf */
1571 (hx == 0x406573fa && lx > 0xE561F647)) {
1572 z = x / tiny;
1573 return (z * z);
1574 }
1575
1576 if (hx >= 0x40200000) { /* x >= 8 */
1577 ww = large_gam(x, &m);
1578 w = ww.h + ww.l;
1579 __HI(w) += m << 20;
1580 return (w);
1581 }
1582
1583 if (hx > 0) { /* 0 < x < 8 */
1584 i = (int)x;
1585 ww = gam_n(i, x - (double)i);
1586 return (ww.h + ww.l);
1587 }
1588
1589 /*
1590 * negative x
1591 */
1592
1593 /*
1594 * compute: xk =
1595 * -2 ... x is an even int (-inf is even)
1596 * -1 ... x is an odd int
1597 * +0 ... x is not an int but chopped to an even int
1598 * +1 ... x is not an int but chopped to an odd int
1599 */
1600 xk = 0;
1601
1602 if (ix >= 0x43300000) {
1603 if (ix >= 0x43400000)
1604 xk = -2;
1605 else
1606 xk = -2 + (lx & 1);
1607 } else if (ix >= 0x3ff00000) {
1608 k = (ix >> 20) - 0x3ff;
1609
1610 if (k > 20) {
1611 j = lx >> (52 - k);
1612
1613 if ((j << (52 - k)) == lx)
1614 xk = -2 + (j & 1);
1615 else
1616 xk = j & 1;
1617 } else {
1618 j = ix >> (20 - k);
1619
1620 if ((j << (20 - k)) == ix && lx == 0)
1621 xk = -2 + (j & 1);
1622 else
1623 xk = j & 1;
1624 }
1625 }
1626
1627 if (xk < 0)
1628 /* ideally gamma(-n)= (-1)**(n+1) * inf, but c99 expect NaN */
1629 return ((x - x) / (x - x)); /* 0/0 = NaN */
1630
1631 /* negative underflow thresold */
1632 if (ix > 0x4066e000 || (ix == 0x4066e000 && lx > 11)) {
1633 /* x < -183.0 - 11ulp */
1634 z = tiny / x;
1635
1636 if (xk == 1)
1637 z = -z;
1638
1639 return (z * tiny);
1640 }
1641
1642 /* now compute gamma(x) by -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x */
1643
1644 /*
1645 * First compute ss = -sin(pi*y)/pi , so that
1646 * gamma(x) = 1/(ss*gamma(1+y))
1647 */
1648 y = -x;
1649 j = (int)y;
1650 z = y - (double)j;
1651
1652 if (z > 0.3183098861837906715377675) {
1653 if (z > 0.6816901138162093284622325)
1654 ss = kpsin(one - z);
1655 else
1656 ss = kpcos(0.5 - z);
1657 } else {
1658 ss = kpsin(z);
1659 }
1660
1661 if (xk == 0) {
1662 ss.h = -ss.h;
1663 ss.l = -ss.l;
1664 }
1665
1666 /* Then compute ww = gamma(1+y), note that result scale to 2**m */
1667 m = 0;
1668
1669 if (j < 7) {
1670 ww = gam_n(j + 1, z);
1671 } else {
1672 w = y + one;
1673
1674 if ((lx & 1) == 0) { /* y+1 exact (note that y<184) */
1675 ww = large_gam(w, &m);
1676 } else {
1677 t = w - one;
1678
1679 if (t == y) { /* y+one exact */
1680 ww = large_gam(w, &m);
1681 } else { /* use y*gamma(y) */
1682 if (j == 7)
1683 ww = gam_n(j, z);
1684 else
1685 ww = large_gam(y, &m);
1686
1687 t4 = ww.h + ww.l;
1688 t1 = (double)((float)y);
1689 t2 = (double)((float)t4);
1690 /* t4 will not be too large */
1691 ww.l = y * (ww.l - (t2 - ww.h)) + (y - t1) * t2;
1692 ww.h = t1 * t2;
1693 }
1694 }
1695 }
1696
1697 /* compute 1/(ss*ww) */
1698 t3 = ss.h + ss.l;
1699 t4 = ww.h + ww.l;
1700 t1 = (double)((float)t3);
1701 t2 = (double)((float)t4);
1702 z1 = ss.l - (t1 - ss.h); /* (t1,z1) = ss */
1703 z2 = ww.l - (t2 - ww.h); /* (t2,z2) = ww */
1704 t3 = t3 * t4; /* t3 = ss*ww */
1705 z3 = one / t3; /* z3 = 1/(ss*ww) */
1706 t5 = t1 * t2;
1707 z5 = z1 * t4 + t1 * z2; /* (t5,z5) = ss*ww */
1708 t1 = (double)((float)t3); /* (t1,z1) = ss*ww */
1709 z1 = z5 - (t1 - t5);
1710 t2 = (double)((float)z3); /* leading 1/(ss*ww) */
1711 z2 = z3 * (t2 * z1 - (one - t2 * t1));
1712 z = t2 - z2;
1713
1714 /* check whether z*2**-m underflow */
1715 if (m != 0) {
1716 hx = __HI(z);
1717 i = hx & 0x80000000;
1718 ix = hx ^ i;
1719 j = ix >> 20;
1720
1721 if (j > m) {
1722 ix -= m << 20;
1723 __HI(z) = ix ^ i;
1724 } else if ((m - j) > 52) {
1725 /* underflow */
1726 if (xk == 0)
1727 z = -tiny * tiny;
1728 else
1729 z = tiny * tiny;
1730 } else {
1731 /* subnormal */
1732 m -= 60;
1733 t = one;
1734 __HI(t) -= 60 << 20;
1735 ix -= m << 20;
1736 __HI(z) = ix ^ i;
1737 z *= t;
1738 }
1739 }
1740
1741 return (z);
1742 }
|