il_11210 Sdiff usr/src/lib/libm/common/m9x/tgammal.c

Print this page

11210 libm should be cstyle(1ONBLD) clean

   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */

  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #pragma weak __tgammal = tgammal
  31 
  32 #include "libm.h"
  33 #include <sys/isa_defs.h>
  34 
  35 #if defined(_BIG_ENDIAN)
  36 #define H0_WORD(x)      ((unsigned *) &x)[0]
  37 #define H3_WORD(x)      ((unsigned *) &x)[3]
  38 #define CHOPPED(x)      (long double) ((double) (x))
  39 #else
  40 #define H0_WORD(x)      ((((int *) &x)[2] << 16) | \
  41                         (0x0000ffff & (((unsigned *) &x)[1] >> 15)))
  42 #define H3_WORD(x)      ((unsigned *) &x)[0]
  43 #define CHOPPED(x)      (long double) ((float) (x))
  44 #endif
  45 
  46 struct LDouble {
  47         long double h, l;
  48 };
  49 
  50 /* INDENT OFF */
  51 /* Primary interval GTi() */

  52 static const long double P1[] = {
  53         +0.709086836199777919037185741507610124611513720557L,
  54         +4.45754781206489035827915969367354835667391606951e-0001L,
  55         +3.21049298735832382311662273882632210062918153852e-0002L,
  56         -5.71296796342106617651765245858289197369688864350e-0003L,
  57         +6.04666892891998977081619174969855831606965352773e-0003L,
  58         +8.99106186996888711939627812174765258822658645168e-0004L,
  59         -6.96496846144407741431207008527018441810175568949e-0005L,
  60         +1.52597046118984020814225409300131445070213882429e-0005L,
  61         +5.68521076168495673844711465407432189190681541547e-0007L,
  62         +3.30749673519634895220582062520286565610418952979e-0008L,
  63 };

  64 static const long double Q1[] = {
  65         +1.0+0000L,
  66         +1.35806511721671070408570853537257079579490650668e+0000L,
  67         +2.97567810153429553405327140096063086994072952961e-0001L,
  68         -1.52956835982588571502954372821681851681118097870e-0001L,
  69         -2.88248519561420109768781615289082053597954521218e-0002L,
  70         +1.03475311719937405219789948456313936302378395955e-0002L,
  71         +4.12310203243891222368965360124391297374822742313e-0004L,
  72         -3.12653708152290867248931925120380729518332507388e-0004L,
  73         +2.36672170850409745237358105667757760527014332458e-0005L,
  74 };

  75 static const long double P2[] = {
  76         +0.428486815855585429730209907810650135255270600668084114L,
  77         +2.62768479103809762805691743305424077975230551176e-0001L,
  78         +3.81187532685392297608310837995193946591425896150e-0002L,
  79         +3.00063075891811043820666846129131255948527925381e-0003L,
  80         +2.47315407812279164228398470797498649142513408654e-0003L,
  81         +3.62838199917848372586173483147214880464782938664e-0004L,
  82         +3.43991105975492623982725644046473030098172692423e-0006L,
  83         +4.56902151569603272237014240794257659159045432895e-0006L,
  84         +2.13734755837595695602045100675540011352948958453e-0007L,
  85         +9.74123440547918230781670266967882492234877125358e-0009L,
  86 };

  87 static const long double Q2[] = {
  88         +1.0L,
  89         +9.18284118632506842664645516830761489700556179701e-0001L,
  90         -6.41430858837830766045202076965923776189154874947e-0003L,
  91         -1.24400885809771073213345747437964149775410921376e-0001L,
  92         +4.69803798146251757538856567522481979624746875964e-0003L,
  93         +7.18309447069495315914284705109868696262662082731e-0003L,
  94         -8.75812626987894695112722600697653425786166399105e-0004L,
  95         -1.23539972377769277995959339188431498626674835169e-0004L,
  96         +3.10019017590151598732360097849672925448587547746e-0005L,
  97         -1.77260223349332617658921874288026777465782364070e-0006L,
  98 };

  99 static const long double P3[] = {
 100         +0.3824094797345675048502747661075355640070439388902L,
 101         +3.42198093076618495415854906335908427159833377774e-0001L,
 102         +9.63828189500585568303961406863153237440702754858e-0002L,
 103         +8.76069421042696384852462044188520252156846768667e-0003L,
 104         +1.86477890389161491224872014149309015261897537488e-0003L,
 105         +8.16871354540309895879974742853701311541286944191e-0004L,
 106         +6.83783483674600322518695090864659381650125625216e-0005L,
 107         -1.10168269719261574708565935172719209272190828456e-0006L,
 108         +9.66243228508380420159234853278906717065629721016e-0007L,
 109         +2.31858885579177250541163820671121664974334728142e-0008L,
 110 };

 111 static const long double Q3[] = {
 112         +1.0L,
 113         +8.25479821168813634632437430090376252512793067339e-0001L,
 114         -1.62251363073937769739639623669295110346015576320e-0002L,
 115         -1.10621286905916732758745130629426559691187579852e-0001L,
 116         +3.48309693970985612644446415789230015515365291459e-0003L,
 117         +6.73553737487488333032431261131289672347043401328e-0003L,
 118         -7.63222008393372630162743587811004613050245128051e-0004L,
 119         -1.35792670669190631476784768961953711773073251336e-0004L,
 120         +3.19610150954223587006220730065608156460205690618e-0005L,
 121         -1.82096553862822346610109522015129585693354348322e-0006L,
 122 };
 123 
 124 static const long double
 125 #if defined(__x86)
 126 GZ1_h   =  0.938204627909682449364570100414084663498215377L,
 127 GZ1_l   =  4.518346116624229420055327632718530617227944106e-20L,
 128 GZ2_h   =  0.885603194410888700264725126309883762587560340L,
 129 GZ2_l   =  1.409077427270497062039119290776508217077297169e-20L,
 130 GZ3_h   =  0.936781411463652321613537060640553022494714241L,
 131 GZ3_l   =  5.309836440284827247897772963887219035221996813e-21L,
 132 #else
 133 GZ1_h   =  0.938204627909682449409753561580326910854647031L,
 134 GZ1_l   =  4.684412162199460089642452580902345976446297037e-35L,
 135 GZ2_h   =  0.885603194410888700278815900582588658192658794L,
 136 GZ2_l   =  7.501529273890253789219935569758713534641074860e-35L,
 137 GZ3_h   =  0.936781411463652321618846897080837818855399840L,
 138 GZ3_l   =  3.088721217404784363585591914529361687403776917e-35L,
 139 #endif
 140 TZ1     = -0.3517214357852935791015625L,
 141 TZ3     =  0.280530631542205810546875L;
 142 /* INDENT ON */
 143 
 144 /* INDENT OFF */
 145 /*
 146  * compute gamma(y=yh+yl) for y in GT1 = [1.0000, 1.2845]
 147  * ...assume yh got 53 or 24(i386) significant bits
 148  */
 149 /* INDENT ON */
 150 static struct LDouble
 151 GT1(long double yh, long double yl) {

 152         long double t3, t4, y;
 153         int i;
 154         struct LDouble r;
 155 
 156         y = yh + yl;

 157         for (t4 = Q1[8], t3 = P1[8] + y * P1[9], i = 7; i >= 0; i--) {
 158                 t4 = t4 * y + Q1[i];
 159                 t3 = t3 * y + P1[i];
 160         }

 161         t3 = (y * y) * t3 / t4;
 162         t3 += (TZ1 * yl + GZ1_l);
 163         t4 = TZ1 * yh;
 164         r.h = CHOPPED((t4 + GZ1_h + t3));
 165         t3 += (t4 - (r.h - GZ1_h));
 166         r.l = t3;
 167         return (r);
 168 }
 169 
 170 /* INDENT OFF */
 171 /*
 172  * compute gamma(y=yh+yl) for y in GT2 = [1.2844, 1.6374]
 173  * ...assume yh got 53 significant bits
 174  */
 175 /* INDENT ON */
 176 static struct LDouble
 177 GT2(long double yh, long double yl) {

 178         long double t3, t4, y;
 179         int i;
 180         struct LDouble r;
 181 
 182         y = yh + yl;

 183         for (t4 = Q2[9], t3 = P2[9], i = 8; i >= 0; i--) {
 184                 t4 = t4 * y + Q2[i];
 185                 t3 = t3 * y + P2[i];
 186         }

 187         t3 = GZ2_l + (y * y) * t3 / t4;
 188         r.h = CHOPPED((GZ2_h + t3));
 189         r.l = t3 - (r.h - GZ2_h);
 190         return (r);
 191 }
 192 
 193 /* INDENT OFF */
 194 /*
 195  * compute gamma(y=yh+yl) for y in GT3 = [1.6373, 2.0000]
 196  * ...assume yh got 53 significant bits
 197  */
 198 /* INDENT ON */
 199 static struct LDouble
 200 GT3(long double yh, long double yl) {

 201         long double t3, t4, y;
 202         int i;
 203         struct LDouble r;
 204 
 205         y = yh + yl;

 206         for (t4 = Q3[9], t3 = P3[9], i = 8; i >= 0; i--) {
 207                 t4 = t4 * y + Q3[i];
 208                 t3 = t3 * y + P3[i];
 209         }

 210         t3 = (y * y) * t3 / t4;
 211         t3 += (TZ3 * yl + GZ3_l);
 212         t4 = TZ3 * yh;
 213         r.h = CHOPPED((t4 + GZ3_h + t3));
 214         t3 += (t4 - (r.h - GZ3_h));
 215         r.l = t3;
 216         return (r);
 217 }
 218 
 219 /* INDENT OFF */
 220 /* Hex value of GP[0] shoule be 3FB55555 55555555 */

 221 static const long double GP[] = {
 222         +0.083333333333333333333333333333333172839171301L,
 223         -2.77777777777777777777777777492501211999399424104e-0003L,
 224         +7.93650793650793650793635650541638236350020883243e-0004L,
 225         -5.95238095238095238057299772679324503339241961704e-0004L,
 226         +8.41750841750841696138422987977683524926142600321e-0004L,
 227         -1.91752691752686682825032547823699662178842123308e-0003L,
 228         +6.41025641022403480921891559356473451161279359322e-0003L,
 229         -2.95506535798414019189819587455577003732808185071e-0002L,
 230         +1.79644367229970031486079180060923073476568732136e-0001L,
 231         -1.39243086487274662174562872567057200255649290646e+0000L,
 232         +1.34025874044417962188677816477842265259608269775e+0001L,
 233         -1.56803713480127469414495545399982508700748274318e+0002L,
 234         +2.18739841656201561694927630335099313968924493891e+0003L,
 235         -3.55249848644100338419187038090925410976237921269e+0004L,
 236         +6.43464880437835286216768959439484376449179576452e+0005L,
 237         -1.20459154385577014992600342782821389605893904624e+0007L,
 238         +2.09263249637351298563934942349749718491071093210e+0008L,
 239         -2.96247483183169219343745316433899599834685703457e+0009L,
 240         +2.88984933605896033154727626086506756972327292981e+0010L,
 241         -1.40960434146030007732838382416230610302678063984e+0011L,      /* 19 */
 242 };
 243 
 244 static const long double T3[] = {
 245         +0.666666666666666666666666666666666634567834260213L,   /* T3[0] */
 246         +0.400000000000000000000000000040853636176634934140L,   /* T3[1] */
 247         +0.285714285714285714285696975252753987869020263448L,   /* T3[2] */
 248         +0.222222222222222225593221101192317258554772129875L,   /* T3[3] */
 249         +0.181818181817850192105847183461778186703779262916L,   /* T3[4] */
 250         +0.153846169861348633757101285952333369222567014596L,   /* T3[5] */
 251         +0.133033462889260193922261296772841229985047571265L,   /* T3[6] */
 252 };
 253 
 254 static const long double c[] = {
 255 0.0L,
 256 1.0L,
 257 2.0L,
 258 0.5L,
 259 1.0e-4930L,                                                     /* tiny */
 260 4.18937683105468750000e-01L,                                    /* hln2pim1_h */
 261 8.50099203991780329736405617639861397473637783412817152e-07L,   /* hln2pim1_l */
 262 0.418938533204672741780329736405617639861397473637783412817152L, /* hln2pim1 */
 263 2.16608493865351192653179168701171875e-02L,                     /* ln2_32hi */
 264 5.96317165397058692545083025235937919875797669127130e-12L,      /* ln2_32lo */
 265 46.16624130844682903551758979206054839765267053289554989233L,   /* invln2_32 */
 266 #if defined(__x86)
 267 1.7555483429044629170023839037639845628291e+03L,                /* overflow */
 268 #else
 269 1.7555483429044629170038892160702032034177e+03L,                /* overflow */
 270 #endif
 271 };

 272 
 273 #define zero            c[0]
 274 #define one             c[1]
 275 #define two             c[2]
 276 #define half            c[3]
 277 #define tiny            c[4]
 278 #define hln2pim1_h      c[5]
 279 #define hln2pim1_l      c[6]
 280 #define hln2pim1        c[7]
 281 #define ln2_32hi        c[8]
 282 #define ln2_32lo        c[9]
 283 #define invln2_32       c[10]
 284 #define overflow        c[11]
 285 
 286 /*
 287  * |exp(r) - (1+r+Et0*r^2+...+Et10*r^12)| <= 2^(-128.88) for |r|<=ln2/64
 288  */
 289 static const long double Et[] = {
 290         +5.0000000000000000000e-1L,
 291         +1.66666666666666666666666666666828835166292152466e-0001L,

 297         +2.75573192239028921114572986441972140933432317798e-0006L,
 298         +2.75573192239448470555548102895526369739856219317e-0007L,
 299         +2.50521677867683935940853997995937600214167232477e-0008L,
 300         +2.08767928899010367374984448513685566514152147362e-0009L,
 301 };
 302 
 303 /*
 304  * long double precision coefficients for computing log(x)-1 in tgamma.
 305  *  See "algorithm" for details
 306  *
 307  *  log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y,  1<=y<2,
 308  *  j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and
 309  *       T1(n) = T1[2n,2n+1] = n*log(2)-1,
 310  *       T2(j) = T2[2j,2j+1] = log(z[j]),
 311  *       T3(s) = 2s + T3[0]s^3 + T3[1]s^5 + T3[2]s^7 + ... + T3[6]s^15
 312  *  Note
 313  *  (1) the leading entries are truncated to 24 binary point.
 314  *  (2) Remez error for T3(s) is bounded by 2**(-136.54)
 315  */
 316 static const long double T1[] = {
 317 -1.000000000000000000000000000000000000000000e+00L,
 318         +0.000000000000000000000000000000000000000000e+00L,
 319 -3.068528175354003906250000000000000000000000e-01L,
 320 -1.904654299957767878541823431924500011926579e-09L,
 321         +3.862943053245544433593750000000000000000000e-01L,
 322         +5.579533617547508924291635313615100141107647e-08L,
 323         +1.079441487789154052734375000000000000000000e+00L,
 324         +5.389068187551732136437452970422650211661470e-08L,
 325         +1.772588670253753662109375000000000000000000e+00L,
 326         +5.198602757555955348583270627230200282215294e-08L,
 327         +2.465735852718353271484375000000000000000000e+00L,
 328         +5.008137327560178560729088284037750352769117e-08L,
 329         +3.158883035182952880859375000000000000000000e+00L,
 330         +4.817671897564401772874905940845299849351090e-08L,
 331         +3.852030217647552490234375000000000000000000e+00L,
 332         +4.627206467568624985020723597652849919904913e-08L,
 333         +4.545177400112152099609375000000000000000000e+00L,
 334         +4.436741037572848197166541254460399990458737e-08L,
 335         +5.238324582576751708984375000000000000000000e+00L,
 336         +4.246275607577071409312358911267950061012560e-08L,
 337         +5.931471765041351318359375000000000000000000e+00L,
 338         +4.055810177581294621458176568075500131566384e-08L,
 339 };
 340

 527         +1.35425554693689272829801474014070273e+00L,
 528         +1.38390988196383195487265952726519287e+00L,
 529         +1.41421356237309504880168872420969798e+00L,
 530         +1.44518080697704662003700624147167095e+00L,
 531         +1.47682614593949931138690748037404985e+00L,
 532         +1.50916442759342273976601955103319352e+00L,
 533         +1.54221082540794082361229186209073479e+00L,
 534         +1.57598084510788648645527016018190504e+00L,
 535         +1.61049033194925430817952066735740067e+00L,
 536         +1.64575547815396484451875672472582254e+00L,
 537         +1.68179283050742908606225095246642969e+00L,
 538         +1.71861929812247791562934437645631244e+00L,
 539         +1.75625216037329948311216061937531314e+00L,
 540         +1.79470907500310718642770324212778174e+00L,
 541         +1.83400808640934246348708318958828892e+00L,
 542         +1.87416763411029990132999894995444645e+00L,
 543         +1.91520656139714729387261127029583086e+00L,
 544         +1.95714412417540026901832225162687149e+00L,
 545 #endif
 546 };

 547 static const long double S_trail[] = {
 548 #if defined(__x86)
 549         +0.0000000000000000000000000e+00L,
 550         +2.6327965667180882569382524e-20L,
 551         +8.3765863521895191129661899e-20L,
 552         +3.9798705777454504249209575e-20L,
 553         +1.0668046596651558640993042e-19L,
 554         +1.9376009847285360448117114e-20L,
 555         +6.7081819456112953751277576e-21L,
 556         +1.9711680502629186462729727e-20L,
 557         +2.9932584438449523689104569e-20L,
 558         +6.8887754153039109411061914e-20L,
 559         +6.8002718741225378942847820e-20L,
 560         +6.5846917376975403439742349e-20L,
 561         +1.2171958727511372194876001e-20L,
 562         +3.5625253228704087115438260e-20L,
 563         +3.1129551559077560956309179e-20L,
 564         +5.7519192396164779846216492e-20L,
 565         +3.7900651177865141593101239e-20L,
 566         +1.1659262405698741798080115e-20L,
 567         +7.1364385105284695967172478e-20L,
 568         +5.2631003710812203588788949e-20L,
 569         +2.6328853788732632868460580e-20L,
 570         +5.4583950085438242788190141e-20L,
 571         +9.5803254376938269960718656e-20L,
 572         +7.6837733983874245823512279e-21L,
 573         +2.4415965910835093824202087e-20L,
 574         +2.6052966871016580981769728e-20L,
 575         +2.6876456344632553875309579e-21L,
 576         +1.2861930155613700201703279e-20L,
 577         +8.8166633394037485606572294e-20L,
 578         +2.9788615389580190940837037e-20L,
 579         +5.2352341619805098677422139e-20L,
 580         +5.2578463064010463732242363e-20L,
 581 #else
 582         +0.00000000000000000000000000000000000e+00L,
 583         +1.80506787420330954745573333054573786e-35L,
 584 -9.37452029228042742195756741973083214e-35L,
 585 -1.59696844729275877071290963023149997e-35L,
 586         +9.11249341012502297851168610167248666e-35L,
 587 -6.50422820697854828723037477525938871e-35L,
 588 -8.14846884452585113732569176748815532e-35L,
 589 -5.06621457672180031337233074514290335e-35L,
 590 -1.35983097468881697374987563824591912e-35L,
 591         +9.49742763556319647030771056643324660e-35L,
 592 -3.28317052317699860161506596533391526e-36L,
 593 -5.01723570938719041029018653045842895e-35L,
 594 -2.39147479768910917162283430160264014e-35L,
 595 -8.35057135763390881529889073794408385e-36L,
 596         +7.03675688907326504242173719067187644e-35L,
 597 -5.18248485306464645753689301856695619e-35L,
 598         +9.42224254862183206569211673639406488e-35L,
 599 -3.96750082539886230916730613021641828e-35L,
 600         +7.14352899156330061452327361509276724e-35L,
 601         +1.15987125286798512424651783410044433e-35L,
 602         +4.69693347835811549530973921320187447e-35L,
 603 -3.38651317599500471079924198499981917e-35L,
 604 -8.58731877429824706886865593510387445e-35L,
 605 -9.60595154874935050318549936224606909e-35L,
 606         +9.60973393212801278450755869714178581e-35L,
 607         +6.37839792144002843924476144978084855e-35L,
 608         +7.79243078569586424945646112516927770e-35L,
 609         +7.36133776758845652413193083663393220e-35L,
 610 -6.47299514791334723003521457561217053e-35L,
 611         +8.58747441795369869427879806229522962e-35L,
 612         +2.37181542282517483569165122830269098e-35L,
 613 -3.02689168209611877300459737342190031e-37L,
 614 #endif
 615 };
 616 /* INDENT ON */
 617 
 618 /* INDENT OFF */
 619 /*
 620  * return tgamma(x) scaled by 2**-m for 8<x<=171.62... using Stirling's formula
 621  *     log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x))
 622  *                = L1 + L2 + L3,
 623  */
 624 /* INDENT ON */
 625 static struct LDouble
 626 large_gam(long double x, int *m) {

 627         long double z, t1, t2, t3, z2, t5, w, y, u, r, v;
 628         long double t24 = 16777216.0L, p24 = 1.0L / 16777216.0L;
 629         int n2, j2, k, ix, j, i;
 630         struct LDouble zz;
 631         long double u2, ss_h, ss_l, r_h, w_h, w_l, t4;
 632 
 633 /* INDENT OFF */
 634 /*
 635  * compute ss = ss.h+ss.l = log(x)-1 (see tgamma_log.h for details)
 636  *
 637  *  log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y,  1<=y<2,
 638  *  j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and
 639  *       T1(n) = T1[2n,2n+1] = n*log(2)-1,
 640  *       T2(j) = T2[2j,2j+1] = log(z[j]),
 641  *       T3(s) = 2s + T3[0]s^3 + T3[1]s^5 + ... + T3[6]s^15
 642  *  Note
 643  *  (1) the leading entries are truncated to 24 binary point.
 644  *  (2) Remez error for T3(s) is bounded by 2**(-72.4)
 645  *                                   2**(-24)
 646  *                           _________V___________________
 647  *               T1(n):     |_________|___________________|
 648  *                             _______ ______________________
 649  *               T2(j):       |_______|______________________|
 650  *                                ____ _______________________
 651  *               2s:             |____|_______________________|
 652  *                                    __________________________
 653  *          +    T3(s)-2s:           |__________________________|
 654  *                       -------------------------------------------
 655  *                          [leading] + [Trailing]
 656  */
 657         /* INDENT ON */
 658         ix = H0_WORD(x);
 659         n2 = (ix >> 16) - 0x3fff; /* exponent of x, range:3-10 */
 660         y = scalbnl(x, -n2);    /* y = scale x to [1,2] */
 661         n2 += n2;               /* 2n */
 662         j = (ix >> 10) & 0x3f;        /* j */
 663         z = 1.0078125L + (long double) j * 0.015625L;   /* z[j]=1+j/64+1/128 */
 664         j2 = j + j;
 665         t1 = y + z;
 666         t2 = y - z;
 667         r = one / t1;
 668         u = r * t2;             /* u = (y-z)/(y+z) */
 669         t1 = CHOPPED(t1);
 670         t4 = T2[j2 + 1] + T1[n2 + 1];
 671         z2 = u * u;
 672         k = H0_WORD(u) & 0x7fffffff;
 673         t3 = T2[j2] + T1[n2];

 674         for (t5 = T3[6], i = 5; i >= 0; i--)
 675                 t5 = z2 * t5 + T3[i];

 676         if ((k >> 16) < 0x3fec) {      /* |u|<2**-19 */
 677                 t2 = t4 + u * (two + z2 * t5);
 678         } else {
 679                 t5 = t4 + (u * z2) * t5;
 680                 u2 = u + u;
 681                 v = (long double) ((int) (u2 * t24)) * p24;
 682                 t2 = t5 + r * ((two * t2 - v * t1) - v * (y - (t1 - z)));
 683                 t3 += v;
 684         }

 685         ss_h = CHOPPED((t2 + t3));
 686         ss_l = t2 - (ss_h - t3);
 687 /* INDENT OFF */
 688 /*
 689  * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2)))
 690  * where ss = log(x) - 1 in already in extra precision
 691  */
 692         /* INDENT ON */
 693         z = one / x;
 694         r = x - half;
 695         r_h = CHOPPED((r));
 696         w_h = r_h * ss_h + hln2pim1_h;
 697         z2 = z * z;
 698         w = (r - r_h) * ss_h + r * ss_l;
 699         t1 = GP[19];

 700         for (i = 18; i > 0; i--)
 701                 t1 = z2 * t1 + GP[i];

 702         w += hln2pim1_l;
 703         w_l = z * (GP[0] + z2 * t1) + w;
 704         k = (int) ((w_h + w_l) * invln2_32 + half);
 705 
 706         /* compute the exponential of w_h+w_l */
 707 
 708         j = k & 0x1f;
 709         *m = k >> 5;
 710         t3 = (long double) k;
 711 
 712         /* perform w - k*ln2_32 (represent as w_h - w_l) */
 713         t1 = w_h - t3 * ln2_32hi;
 714         t2 = t3 * ln2_32lo;
 715         w = t2 - w_l;
 716         w_h = t1 - w;
 717         w_l = w - (t1 - w_h);
 718 
 719         /* compute exp(w_h-w_l) */
 720         z = w_h - w_l;

 721         for (t1 = Et[10], i = 9; i >= 0; i--)
 722                 t1 = z * t1 + Et[i];

 723         t3 = w_h - (w_l - (z * z) * t1);        /* t3 = expm1(z) */
 724         zz.l = S_trail[j] * (one + t3) + S[j] * t3;
 725         zz.h = S[j];
 726         return (zz);
 727 }
 728 
 729 /* INDENT OFF */
 730 /*
 731  * kpsin(x)= sin(pi*x)/pi
 732  *                 3        5        7        9        11                27
 733  *      = x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x +ks[4]*x  + ... + ks[12]*x
 734  */
 735 static const long double ks[] = {
 736         -1.64493406684822643647241516664602518705158902870e+0000L,
 737         +8.11742425283353643637002772405874238094995726160e-0001L,
 738         -1.90751824122084213696472111835337366232282723933e-0001L,
 739         +2.61478478176548005046532613563241288115395517084e-0002L,
 740         -2.34608103545582363750893072647117829448016479971e-0003L,
 741         +1.48428793031071003684606647212534027556262040158e-0004L,
 742         -6.97587366165638046518462722252768122615952898698e-0006L,
 743         +2.53121740413702536928659271747187500934840057929e-0007L,
 744         -7.30471182221385990397683641695766121301933621956e-0009L,
 745         +1.71653847451163495739958249695549313987973589884e-0010L,
 746         -3.34813314714560776122245796929054813458341420565e-0012L,
 747         +5.50724992262622033449487808306969135431411753047e-0014L,
 748         -7.67678132753577998601234393215802221104236979928e-0016L,
 749 };
 750 /* INDENT ON */
 751 
 752 /*
 753  * assume x is not tiny and positive
 754  */
 755 static struct LDouble
 756 kpsin(long double x) {

 757         long double z, t1, t2;
 758         struct LDouble xx;
 759         int i;
 760 
 761         z = x * x;
 762         xx.h = x;

 763         for (t2 = ks[12], i = 11; i > 0; i--)
 764                 t2 = z * t2 + ks[i];

 765         t1 = z * x;
 766         t2 *= z * t1;
 767         xx.l = t1 * ks[0] + t2;
 768         return (xx);
 769 }
 770 
 771 /* INDENT OFF */
 772 /*
 773  * kpcos(x)= cos(pi*x)/pi
 774  *                     2        4        6        8        10        12
 775  *      = 1/pi +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x +kc[4]*x  +kc[5]*x
 776  *
 777  *                     2        4        6        8        10            22
 778  *      = 1/pi - pi/2*x +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x  +...+kc[9]*x
 779  *
 780  * -pi/2*x*x = (npi_2_h + npi_2_l) * (x_f+x_l)*(x_f+x_l)
 781  *         =  npi_2_h*(x_f+x_l)*(x_f+x_l) + npi_2_l*x*x
 782  *         =  npi_2_h*x_f*x_f + npi_2_h*(x*x-x_f*x_f) + npi_2_l*x*x
 783  *         =  npi_2_h*x_f*x_f + npi_2_h*(x+x_f)*(x-x_f) + npi_2_l*x*x
 784  * Here x_f = (long double) (float)x
 785  * Note that pi/2(in hex) =
 786  *  1.921FB54442D18469898CC51701B839A252049C1114CF98E804177D4C76273644A29
 787  * npi_2_h = -pi/2 chopped to 25 bits = -1.921FB50000000000000000000000000 =
 788  *  -1.570796310901641845703125000000000 and
 789  * npi_2_l =
 790  *  -0.0000004442D18469898CC51701B839A252049C1114CF98E804177D4C76273644A29 =
 791  *  -.0000000158932547735281966916397514420985846996875529104874722961539 =
 792  *  -1.5893254773528196691639751442098584699687552910487472296153e-8
 793  * 1/pi(in hex) =
 794  *  .517CC1B727220A94FE13ABE8FA9A6EE06DB14ACC9E21C820FF28B1D5EF5DE2B
 795  * will be splitted into:
 796  *  one_pi_h = 1/pi chopped to 48 bits = .517CC1B727220000000000...  and
 797  *  one_pi_l = .0000000000000A94FE13ABE8FA9A6EE06DB14ACC9E21C820FF28B1D5EF5DE2B
 798  */
 799 
 800 static const long double
 801 #if defined(__x86)
 802 one_pi_h = 0.3183098861481994390487670898437500L,       /* 31 bits */
 803 one_pi_l = 3.559123248900043690127872406891929148e-11L,
 804 #else
 805 one_pi_h = 0.31830988618379052468299050815403461456298828125L,
 806 one_pi_l = 1.46854777018590994109505931010230912897495334688117e-16L,
 807 #endif
 808 npi_2_h = -1.570796310901641845703125000000000L,
 809 npi_2_l = -1.5893254773528196691639751442098584699687552910e-8L;
 810 
 811 static const long double kc[] = {
 812         +1.29192819501249250731151312779548918765320728489e+0000L,
 813         -4.25027339979557573976029596929319207009444090366e-0001L,
 814         +7.49080661650990096109672954618317623888421628613e-0002L,
 815         -8.21458866111282287985539464173976555436050215120e-0003L,
 816         +6.14202578809529228503205255165761204750211603402e-0004L,
 817         -3.33073432691149607007217330302595267179545908740e-0005L,
 818         +1.36970959047832085796809745461530865597993680204e-0006L,
 819         -4.41780774262583514450246512727201806217271097336e-0008L,
 820         +1.14741409212381858820016567664488123478660705759e-0009L,
 821         -2.44261236114707374558437500654381006300502749632e-0011L,
 822 };
 823 /* INDENT ON */
 824 
 825 /*
 826  * assume x is not tiny and positive
 827  */
 828 static struct LDouble
 829 kpcos(long double x) {

 830         long double z, t1, t2, t3, t4, x4, x8;
 831         int i;
 832         struct LDouble xx;
 833 
 834         z = x * x;
 835         xx.h = one_pi_h;
 836         t1 = (long double) ((float) x);
 837         x4 = z * z;
 838         t2 = npi_2_l * z + npi_2_h * (x + t1) * (x - t1);

 839         for (i = 8, t3 = kc[9]; i >= 0; i--)
 840                 t3 = z * t3 + kc[i];

 841         t3 = one_pi_l + x4 * t3;
 842         t4 = t1 * t1 * npi_2_h;
 843         x8 = t2 + t3;
 844         xx.l = x8 + t4;
 845         return (xx);
 846 }
 847 
 848 /* INDENT OFF */
 849 static const long double
 850         /* 0.13486180573279076968979393577465291700642511139552429398233 */
 851 #if defined(__x86)
 852 t0z1   =  0.1348618057327907696779385054997035808810L,
 853 t0z1_l =  1.1855430274949336125392717150257379614654e-20L,
 854 #else
 855 t0z1   =  0.1348618057327907696897939357746529168654L,
 856 t0z1_l =  1.4102088588676879418739164486159514674310e-37L,
 857 #endif
 858         /* 0.46163214496836234126265954232572132846819620400644635129599 */
 859 #if defined(__x86)
 860 t0z2   =  0.4616321449683623412538115843295472018326L,
 861 t0z2_l =  8.84795799617412663558532305039261747030640e-21L,
 862 #else
 863 t0z2   =  0.46163214496836234126265954232572132343318L,
 864 t0z2_l =  5.03501162329616380465302666480916271611101e-36L,
 865 #endif
 866         /* 0.81977310110050060178786870492160699631174407846245179119586 */
 867 #if defined(__x86)
 868 t0z3   =  0.81977310110050060178773362329351925836817L,
 869 t0z3_l =  1.350816280877379435658077052534574556256230e-22L
 870 #else
 871 t0z3   =  0.8197731011005006017878687049216069516957449L,
 872 t0z3_l =  4.461599916947014419045492615933551648857380e-35L
 873 #endif
 874 ;
 875 /* INDENT ON */
 876 
 877 /*
 878  * gamma(x+i) for 0 <= x < 1
 879  */
 880 static struct LDouble
 881 gam_n(int i, long double x) {
 882         struct LDouble rr = {0.0L, 0.0L}, yy;

 883         long double r1, r2, t2, z, xh, xl, yh, yl, zh, z1, z2, zl, x5, wh, wl;
 884 
 885         /* compute yy = gamma(x+1) */
 886         if (x > 0.2845L) {
 887                 if (x > 0.6374L) {
 888                         r1 = x - t0z3;
 889                         r2 = CHOPPED((r1 - t0z3_l));
 890                         t2 = r1 - r2;
 891                         yy = GT3(r2, t2 - t0z3_l);
 892                 } else {
 893                         r1 = x - t0z2;
 894                         r2 = CHOPPED((r1 - t0z2_l));
 895                         t2 = r1 - r2;
 896                         yy = GT2(r2, t2 - t0z2_l);
 897                 }
 898         } else {
 899                 r1 = x - t0z1;
 900                 r2 = CHOPPED((r1 - t0z1_l));
 901                 t2 = r1 - r2;
 902                 yy = GT1(r2, t2 - t0z1_l);
 903         }

 904         /* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0<i<8 */
 905         switch (i) {
 906         case 0:         /* yy/x */
 907                 r1 = one / x;
 908                 xh = CHOPPED((x));      /* x is not tiny */
 909                 rr.h = CHOPPED(((yy.h + yy.l) * r1));
 910                 rr.l = r1 * (yy.h - rr.h * xh) - ((r1 * rr.h) * (x - xh) -
 911                         r1 * yy.l);
 912                 break;
 913         case 1:         /* yy */
 914                 rr.h = yy.h;
 915                 rr.l = yy.l;
 916                 break;
 917         case 2:         /* (x+1)*yy */
 918                 z = x + one;    /* may not be exact */
 919                 zh = CHOPPED((z));
 920                 rr.h = zh * yy.h;
 921                 rr.l = z * yy.l + (x - (zh - one)) * yy.h;
 922                 break;
 923         case 3:         /* (x+2)*(x+1)*yy */
 924                 z1 = x + one;
 925                 z2 = x + 2.0L;
 926                 z = z1 * z2;
 927                 xh = CHOPPED((z));
 928                 zh = CHOPPED((z1));
 929                 xl = (x - (zh - one)) * (z2 + zh) - (xh - zh * (zh + one));
 930 
 931                 rr.h = xh * yy.h;

 959                 z *= z2;
 960                 xh = CHOPPED((z));
 961                 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0L));
 962                 rr.h = xh * yy.h;
 963                 rr.l = z * yy.l + xl * yy.h;
 964                 break;
 965         case 6:         /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */
 966                 z1 = x + 2.0L;
 967                 z2 = x + 3.0L;
 968                 z = z1 * z2;
 969                 zh = CHOPPED((z1));
 970                 yh = CHOPPED((z));
 971                 z1 = x - (zh - 2.0L);
 972                 yl = z1 * (z2 + zh) - (yh - zh * (zh + one));
 973                 z2 = z - 2.0L;
 974                 x5 = x + 5.0L;
 975                 z *= z2;
 976                 xh = CHOPPED(z);
 977                 zh += 3.0;
 978                 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0L));
 979                                                 /* xh+xl=(x+1)*...*(x+4) */
 980                 /* wh+wl=(x+5)*yy */



 981                 wh = CHOPPED((x5 * (yy.h + yy.l)));
 982                 wl = (z1 * yy.h + x5 * yy.l) - (wh - zh * yy.h);
 983                 rr.h = wh * xh;
 984                 rr.l = z * wl + xl * wh;
 985                 break;
 986         case 7:         /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */
 987                 z1 = x + 3.0L;
 988                 z2 = x + 4.0L;
 989                 z = z2 * z1;
 990                 zh = CHOPPED((z1));
 991                 yh = CHOPPED((z));      /* yh+yl = (x+3)(x+4) */
 992                 yl = (x - (zh - 3.0L)) * (z2 + zh) - (yh - (zh * (zh + one)));
 993                 z1 = x + 6.0L;
 994                 z2 = z - 2.0L;  /* z2 = (x+2)*(x+5) */
 995                 z *= z2;
 996                 xh = CHOPPED((z));
 997                 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0L));
 998                                                 /* xh+xl=(x+2)*...*(x+5) */
 999                 /* wh+wl=(x+1)(x+6)*yy */



1000                 z2 -= 4.0L;     /* z2 = (x+1)(x+6) */
1001                 wh = CHOPPED((z2 * (yy.h + yy.l)));
1002                 wl = (z2 * yy.l + yl * yy.h) - (wh - (yh - 6.0L) * yy.h);
1003                 rr.h = wh * xh;
1004                 rr.l = z * wl + xl * wh;
1005         }

1006         return (rr);
1007 }
1008 
1009 long double
1010 tgammal(long double x) {

1011         struct LDouble ss, ww;
1012         long double t, t1, t2, t3, t4, t5, w, y, z, z1, z2, z3, z5;
1013         int i, j, m, ix, hx, xk;
1014         unsigned lx;
1015 
1016         hx = H0_WORD(x);
1017         lx = H3_WORD(x);
1018         ix = hx & 0x7fffffff;
1019         y = x;
1020         if (ix < 0x3f8e0000) {       /* x < 2**-113 */

1021                 return (one / x);
1022         }
1023         if (ix >= 0x7fff0000)
1024                 return (x * ((hx < 0)? zero : x));   /* Inf or NaN */

1025         if (x > overflow)    /* overflow threshold */
1026                 return (x * 1.0e4932L);

1027         if (hx >= 0x40020000) {      /* x >= 8 */
1028                 ww = large_gam(x, &m);
1029                 w = ww.h + ww.l;
1030                 return (scalbnl(w, m));
1031         }
1032 
1033         if (hx > 0) {                /* 0 < x < 8 */
1034                 i = (int) x;
1035                 ww = gam_n(i, x - (long double) i);
1036                 return (ww.h + ww.l);
1037         }
1038         /* INDENT OFF */
1039         /* negative x */



1040         /*
1041          * compute xk =
1042          *      -2 ... x is an even int (-inf is considered an even #)
1043          *      -1 ... x is an odd int
1044          *      +0 ... x is not an int but chopped to an even int
1045          *      +1 ... x is not an int but chopped to an odd int
1046          */
1047         /* INDENT ON */
1048         xk = 0;
1049 #if defined(__x86)
1050         if (ix >= 0x403e0000) {      /* x >= 2**63 } */
1051                 if (ix >= 0x403f0000)
1052                         xk = -2;
1053                 else
1054                         xk = -2 + (lx & 1);
1055 #else
1056         if (ix >= 0x406f0000) {      /* x >= 2**112 */
1057                 if (ix >= 0x40700000)
1058                         xk = -2;
1059                 else
1060                         xk = -2 + (lx & 1);
1061 #endif
1062         } else if (ix >= 0x3fff0000) {
1063                 w = -x;
1064                 t1 = floorl(w);
1065                 t2 = t1 * half;
1066                 t3 = floorl(t2);

1067                 if (t1 == w) {
1068                         if (t2 == t3)
1069                                 xk = -2;
1070                         else
1071                                 xk = -1;
1072                 } else {
1073                         if (t2 == t3)
1074                                 xk = 0;
1075                         else
1076                                 xk = 1;
1077                 }
1078         }
1079 
1080         if (xk < 0) {
1081                 /* return NaN. Ideally gamma(-n)= (-1)**(n+1) * inf */
1082                 return (x - x) / (x - x);
1083         }
1084 
1085         /*
1086          * negative underflow thresold -(1774+9ulp)
1087          */
1088         if (x < -1774.0000000000000000000000000000017749370L) {
1089                 z = tiny / x;

1090                 if (xk == 1)
1091                         z = -z;

1092                 return (z * tiny);
1093         }
1094 
1095         /* INDENT OFF */
1096         /*
1097          * now compute gamma(x) by  -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x
1098          */

1099         /*
1100          * First compute ss = -sin(pi*y)/pi so that
1101          * gamma(x) = 1/(ss*gamma(1+y))
1102          */
1103         /* INDENT ON */
1104         y = -x;
1105         j = (int) y;
1106         z = y - (long double) j;
1107         if (z > 0.3183098861837906715377675L)

1108                 if (z > 0.6816901138162093284622325L)
1109                         ss = kpsin(one - z);
1110                 else
1111                         ss = kpcos(0.5L - z);
1112         else
1113                 ss = kpsin(z);


1114         if (xk == 0) {
1115                 ss.h = -ss.h;
1116                 ss.l = -ss.l;
1117         }
1118 
1119         /* Then compute ww = gamma(1+y), note that result scale to 2**m */
1120         m = 0;

1121         if (j < 7) {
1122                 ww = gam_n(j + 1, z);
1123         } else {
1124                 w = y + one;

1125                 if ((lx & 1) == 0) {        /* y+1 exact (note that y<184) */
1126                         ww = large_gam(w, &m);
1127                 } else {
1128                         t = w - one;

1129                         if (t == y) {   /* y+one exact */
1130                                 ww = large_gam(w, &m);
1131                         } else {        /* use y*gamma(y) */
1132                                 if (j == 7)
1133                                         ww = gam_n(j, z);
1134                                 else
1135                                         ww = large_gam(y, &m);

1136                                 t4 = ww.h + ww.l;
1137                                 t1 = CHOPPED((y));
1138                                 t2 = CHOPPED((t4));
1139                                                 /* t4 will not be too large */
1140                                 ww.l = y * (ww.l - (t2 - ww.h)) + (y - t1) * t2;
1141                                 ww.h = t1 * t2;
1142                         }
1143                 }
1144         }
1145 
1146         /* compute 1/(ss*ww) */
1147         t3 = ss.h + ss.l;
1148         t4 = ww.h + ww.l;
1149         t1 = CHOPPED((t3));
1150         t2 = CHOPPED((t4));
1151         z1 = ss.l - (t1 - ss.h);        /* (t1,z1) = ss */
1152         z2 = ww.l - (t2 - ww.h);        /* (t2,z2) = ww */
1153         t3 = t3 * t4;                   /* t3 = ss*ww */
1154         z3 = one / t3;                  /* z3 = 1/(ss*ww) */
1155         t5 = t1 * t2;

   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  28  * Use is subject to license terms.
  29  */
  30 
  31 #pragma weak __tgammal = tgammal
  32 
  33 #include "libm.h"
  34 #include <sys/isa_defs.h>
  35 
  36 #if defined(_BIG_ENDIAN)
  37 #define H0_WORD(x)              ((unsigned *)&x)[0]
  38 #define H3_WORD(x)              ((unsigned *)&x)[3]
  39 #define CHOPPED(x)              (long double)((double)(x))
  40 #else
  41 #define H0_WORD(x)              ((((int *)&x)[2] << 16) | (0x0000ffff & \
  42         (((unsigned *)&x)[1] >> 15)))
  43 #define H3_WORD(x)              ((unsigned *)&x)[0]
  44 #define CHOPPED(x)              (long double)((float)(x))
  45 #endif
  46 
  47 struct LDouble {
  48         long double h, l;
  49 };
  50 
  51 /*
  52  * Primary interval GTi()
  53  */
  54 static const long double P1[] = {
  55         +0.709086836199777919037185741507610124611513720557L,
  56         +4.45754781206489035827915969367354835667391606951e-0001L,
  57         +3.21049298735832382311662273882632210062918153852e-0002L,
  58         -5.71296796342106617651765245858289197369688864350e-0003L,
  59         +6.04666892891998977081619174969855831606965352773e-0003L,
  60         +8.99106186996888711939627812174765258822658645168e-0004L,
  61         -6.96496846144407741431207008527018441810175568949e-0005L,
  62         +1.52597046118984020814225409300131445070213882429e-0005L,
  63         +5.68521076168495673844711465407432189190681541547e-0007L,
  64         +3.30749673519634895220582062520286565610418952979e-0008L,
  65 };
  66 
  67 static const long double Q1[] = {
  68         +1.0 + 0000L,
  69         +1.35806511721671070408570853537257079579490650668e+0000L,
  70         +2.97567810153429553405327140096063086994072952961e-0001L,
  71         -1.52956835982588571502954372821681851681118097870e-0001L,
  72         -2.88248519561420109768781615289082053597954521218e-0002L,
  73         +1.03475311719937405219789948456313936302378395955e-0002L,
  74         +4.12310203243891222368965360124391297374822742313e-0004L,
  75         -3.12653708152290867248931925120380729518332507388e-0004L,
  76         +2.36672170850409745237358105667757760527014332458e-0005L,
  77 };
  78 
  79 static const long double P2[] = {
  80         +0.428486815855585429730209907810650135255270600668084114L,
  81         +2.62768479103809762805691743305424077975230551176e-0001L,
  82         +3.81187532685392297608310837995193946591425896150e-0002L,
  83         +3.00063075891811043820666846129131255948527925381e-0003L,
  84         +2.47315407812279164228398470797498649142513408654e-0003L,
  85         +3.62838199917848372586173483147214880464782938664e-0004L,
  86         +3.43991105975492623982725644046473030098172692423e-0006L,
  87         +4.56902151569603272237014240794257659159045432895e-0006L,
  88         +2.13734755837595695602045100675540011352948958453e-0007L,
  89         +9.74123440547918230781670266967882492234877125358e-0009L,
  90 };
  91 
  92 static const long double Q2[] = {
  93         +1.0L,
  94         +9.18284118632506842664645516830761489700556179701e-0001L,
  95         -6.41430858837830766045202076965923776189154874947e-0003L,
  96         -1.24400885809771073213345747437964149775410921376e-0001L,
  97         +4.69803798146251757538856567522481979624746875964e-0003L,
  98         +7.18309447069495315914284705109868696262662082731e-0003L,
  99         -8.75812626987894695112722600697653425786166399105e-0004L,
 100         -1.23539972377769277995959339188431498626674835169e-0004L,
 101         +3.10019017590151598732360097849672925448587547746e-0005L,
 102         -1.77260223349332617658921874288026777465782364070e-0006L,
 103 };
 104 
 105 static const long double P3[] = {
 106         +0.3824094797345675048502747661075355640070439388902L,
 107         +3.42198093076618495415854906335908427159833377774e-0001L,
 108         +9.63828189500585568303961406863153237440702754858e-0002L,
 109         +8.76069421042696384852462044188520252156846768667e-0003L,
 110         +1.86477890389161491224872014149309015261897537488e-0003L,
 111         +8.16871354540309895879974742853701311541286944191e-0004L,
 112         +6.83783483674600322518695090864659381650125625216e-0005L,
 113         -1.10168269719261574708565935172719209272190828456e-0006L,
 114         +9.66243228508380420159234853278906717065629721016e-0007L,
 115         +2.31858885579177250541163820671121664974334728142e-0008L,
 116 };
 117 
 118 static const long double Q3[] = {
 119         +1.0L, +8.25479821168813634632437430090376252512793067339e-0001L,

 120         -1.62251363073937769739639623669295110346015576320e-0002L,
 121         -1.10621286905916732758745130629426559691187579852e-0001L,
 122         +3.48309693970985612644446415789230015515365291459e-0003L,
 123         +6.73553737487488333032431261131289672347043401328e-0003L,
 124         -7.63222008393372630162743587811004613050245128051e-0004L,
 125         -1.35792670669190631476784768961953711773073251336e-0004L,
 126         +3.19610150954223587006220730065608156460205690618e-0005L,
 127         -1.82096553862822346610109522015129585693354348322e-0006L,
 128 };
 129 
 130 static const long double
 131 #if defined(__x86)
 132         GZ1_h = 0.938204627909682449364570100414084663498215377L,
 133         GZ1_l = 4.518346116624229420055327632718530617227944106e-20L,
 134         GZ2_h = 0.885603194410888700264725126309883762587560340L,
 135         GZ2_l = 1.409077427270497062039119290776508217077297169e-20L,
 136         GZ3_h = 0.936781411463652321613537060640553022494714241L,
 137         GZ3_l = 5.309836440284827247897772963887219035221996813e-21L,
 138 #else
 139         GZ1_h = 0.938204627909682449409753561580326910854647031L,
 140         GZ1_l = 4.684412162199460089642452580902345976446297037e-35L,
 141         GZ2_h = 0.885603194410888700278815900582588658192658794L,
 142         GZ2_l = 7.501529273890253789219935569758713534641074860e-35L,
 143         GZ3_h = 0.936781411463652321618846897080837818855399840L,
 144         GZ3_l = 3.088721217404784363585591914529361687403776917e-35L,
 145 #endif
 146         TZ1 = -0.3517214357852935791015625L,
 147         TZ3 = 0.280530631542205810546875L;
 148 
 149 

 150 /*
 151  * compute gamma(y=yh+yl) for y in GT1 = [1.0000, 1.2845]
 152  * ...assume yh got 53 or 24(i386) significant bits
 153  */

 154 static struct LDouble
 155 GT1(long double yh, long double yl)
 156 {
 157         long double t3, t4, y;
 158         int i;
 159         struct LDouble r;
 160 
 161         y = yh + yl;
 162 
 163         for (t4 = Q1[8], t3 = P1[8] + y * P1[9], i = 7; i >= 0; i--) {
 164                 t4 = t4 * y + Q1[i];
 165                 t3 = t3 * y + P1[i];
 166         }
 167 
 168         t3 = (y * y) * t3 / t4;
 169         t3 += (TZ1 * yl + GZ1_l);
 170         t4 = TZ1 * yh;
 171         r.h = CHOPPED((t4 + GZ1_h + t3));
 172         t3 += (t4 - (r.h - GZ1_h));
 173         r.l = t3;
 174         return (r);
 175 }
 176 
 177 
 178 /*
 179  * compute gamma(y=yh+yl) for y in GT2 = [1.2844, 1.6374]
 180  * ...assume yh got 53 significant bits
 181  */

 182 static struct LDouble
 183 GT2(long double yh, long double yl)
 184 {
 185         long double t3, t4, y;
 186         int i;
 187         struct LDouble r;
 188 
 189         y = yh + yl;
 190 
 191         for (t4 = Q2[9], t3 = P2[9], i = 8; i >= 0; i--) {
 192                 t4 = t4 * y + Q2[i];
 193                 t3 = t3 * y + P2[i];
 194         }
 195 
 196         t3 = GZ2_l + (y * y) * t3 / t4;
 197         r.h = CHOPPED((GZ2_h + t3));
 198         r.l = t3 - (r.h - GZ2_h);
 199         return (r);
 200 }
 201 
 202 
 203 /*
 204  * compute gamma(y=yh+yl) for y in GT3 = [1.6373, 2.0000]
 205  * ...assume yh got 53 significant bits
 206  */

 207 static struct LDouble
 208 GT3(long double yh, long double yl)
 209 {
 210         long double t3, t4, y;
 211         int i;
 212         struct LDouble r;
 213 
 214         y = yh + yl;
 215 
 216         for (t4 = Q3[9], t3 = P3[9], i = 8; i >= 0; i--) {
 217                 t4 = t4 * y + Q3[i];
 218                 t3 = t3 * y + P3[i];
 219         }
 220 
 221         t3 = (y * y) * t3 / t4;
 222         t3 += (TZ3 * yl + GZ3_l);
 223         t4 = TZ3 * yh;
 224         r.h = CHOPPED((t4 + GZ3_h + t3));
 225         t3 += (t4 - (r.h - GZ3_h));
 226         r.l = t3;
 227         return (r);
 228 }
 229 
 230 /*
 231  * Hex value of GP[0] shoule be 3FB55555 55555555
 232  */
 233 static const long double GP[] = {
 234         +0.083333333333333333333333333333333172839171301L,
 235         -2.77777777777777777777777777492501211999399424104e-0003L,
 236         +7.93650793650793650793635650541638236350020883243e-0004L,
 237         -5.95238095238095238057299772679324503339241961704e-0004L,
 238         +8.41750841750841696138422987977683524926142600321e-0004L,
 239         -1.91752691752686682825032547823699662178842123308e-0003L,
 240         +6.41025641022403480921891559356473451161279359322e-0003L,
 241         -2.95506535798414019189819587455577003732808185071e-0002L,
 242         +1.79644367229970031486079180060923073476568732136e-0001L,
 243         -1.39243086487274662174562872567057200255649290646e+0000L,
 244         +1.34025874044417962188677816477842265259608269775e+0001L,
 245         -1.56803713480127469414495545399982508700748274318e+0002L,
 246         +2.18739841656201561694927630335099313968924493891e+0003L,
 247         -3.55249848644100338419187038090925410976237921269e+0004L,
 248         +6.43464880437835286216768959439484376449179576452e+0005L,
 249         -1.20459154385577014992600342782821389605893904624e+0007L,
 250         +2.09263249637351298563934942349749718491071093210e+0008L,
 251         -2.96247483183169219343745316433899599834685703457e+0009L,
 252         +2.88984933605896033154727626086506756972327292981e+0010L,
 253         -1.40960434146030007732838382416230610302678063984e+0011L, /* 19 */
 254 };
 255 
 256 static const long double T3[] = {
 257         +0.666666666666666666666666666666666634567834260213L, /* T3[0] */
 258         +0.400000000000000000000000000040853636176634934140L, /* T3[1] */
 259         +0.285714285714285714285696975252753987869020263448L, /* T3[2] */
 260         +0.222222222222222225593221101192317258554772129875L, /* T3[3] */
 261         +0.181818181817850192105847183461778186703779262916L, /* T3[4] */
 262         +0.153846169861348633757101285952333369222567014596L, /* T3[5] */
 263         +0.133033462889260193922261296772841229985047571265L, /* T3[6] */
 264 };
 265 /* BEGIN CSTYLED */
 266 static const long double c[] = {
 267         0.0L,
 268         1.0L,
 269         2.0L,
 270         0.5L,
 271         1.0e-4930L,                  /* tiny */
 272         4.18937683105468750000e-01L, /* hln2pim1_h */
 273         8.50099203991780329736405617639861397473637783412817152e-07L, /* hln2pim1_l */
 274         0.418938533204672741780329736405617639861397473637783412817152L, /* hln2pim1 */
 275         2.16608493865351192653179168701171875e-02L, /* ln2_32hi */
 276         5.96317165397058692545083025235937919875797669127130e-12L, /* ln2_32lo */
 277         46.16624130844682903551758979206054839765267053289554989233L, /* invln2_32 */
 278 #if defined(__x86)
 279         1.7555483429044629170023839037639845628291e+03L, /* overflow */
 280 #else
 281         1.7555483429044629170038892160702032034177e+03L, /* overflow */
 282 #endif
 283 };
 284 /* END CSTYLED */
 285 
 286 #define zero                    c[0]
 287 #define one                     c[1]
 288 #define two                     c[2]
 289 #define half                    c[3]
 290 #define tiny                    c[4]
 291 #define hln2pim1_h              c[5]
 292 #define hln2pim1_l              c[6]
 293 #define hln2pim1                c[7]
 294 #define ln2_32hi                c[8]
 295 #define ln2_32lo                c[9]
 296 #define invln2_32               c[10]
 297 #define overflow                c[11]
 298 
 299 /*
 300  * |exp(r) - (1+r+Et0*r^2+...+Et10*r^12)| <= 2^(-128.88) for |r|<=ln2/64
 301  */
 302 static const long double Et[] = {
 303         +5.0000000000000000000e-1L,
 304         +1.66666666666666666666666666666828835166292152466e-0001L,

 310         +2.75573192239028921114572986441972140933432317798e-0006L,
 311         +2.75573192239448470555548102895526369739856219317e-0007L,
 312         +2.50521677867683935940853997995937600214167232477e-0008L,
 313         +2.08767928899010367374984448513685566514152147362e-0009L,
 314 };
 315 
 316 /*
 317  * long double precision coefficients for computing log(x)-1 in tgamma.
 318  *  See "algorithm" for details
 319  *
 320  *  log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y,  1<=y<2,
 321  *  j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and
 322  *       T1(n) = T1[2n,2n+1] = n*log(2)-1,
 323  *       T2(j) = T2[2j,2j+1] = log(z[j]),
 324  *       T3(s) = 2s + T3[0]s^3 + T3[1]s^5 + T3[2]s^7 + ... + T3[6]s^15
 325  *  Note
 326  *  (1) the leading entries are truncated to 24 binary point.
 327  *  (2) Remez error for T3(s) is bounded by 2**(-136.54)
 328  */
 329 static const long double T1[] = {
 330         -1.000000000000000000000000000000000000000000e+00L,
 331         +0.000000000000000000000000000000000000000000e+00L,
 332         -3.068528175354003906250000000000000000000000e-01L,
 333         -1.904654299957767878541823431924500011926579e-09L,
 334         +3.862943053245544433593750000000000000000000e-01L,
 335         +5.579533617547508924291635313615100141107647e-08L,
 336         +1.079441487789154052734375000000000000000000e+00L,
 337         +5.389068187551732136437452970422650211661470e-08L,
 338         +1.772588670253753662109375000000000000000000e+00L,
 339         +5.198602757555955348583270627230200282215294e-08L,
 340         +2.465735852718353271484375000000000000000000e+00L,
 341         +5.008137327560178560729088284037750352769117e-08L,
 342         +3.158883035182952880859375000000000000000000e+00L,
 343         +4.817671897564401772874905940845299849351090e-08L,
 344         +3.852030217647552490234375000000000000000000e+00L,
 345         +4.627206467568624985020723597652849919904913e-08L,
 346         +4.545177400112152099609375000000000000000000e+00L,
 347         +4.436741037572848197166541254460399990458737e-08L,
 348         +5.238324582576751708984375000000000000000000e+00L,
 349         +4.246275607577071409312358911267950061012560e-08L,
 350         +5.931471765041351318359375000000000000000000e+00L,
 351         +4.055810177581294621458176568075500131566384e-08L,
 352 };
 353

 540         +1.35425554693689272829801474014070273e+00L,
 541         +1.38390988196383195487265952726519287e+00L,
 542         +1.41421356237309504880168872420969798e+00L,
 543         +1.44518080697704662003700624147167095e+00L,
 544         +1.47682614593949931138690748037404985e+00L,
 545         +1.50916442759342273976601955103319352e+00L,
 546         +1.54221082540794082361229186209073479e+00L,
 547         +1.57598084510788648645527016018190504e+00L,
 548         +1.61049033194925430817952066735740067e+00L,
 549         +1.64575547815396484451875672472582254e+00L,
 550         +1.68179283050742908606225095246642969e+00L,
 551         +1.71861929812247791562934437645631244e+00L,
 552         +1.75625216037329948311216061937531314e+00L,
 553         +1.79470907500310718642770324212778174e+00L,
 554         +1.83400808640934246348708318958828892e+00L,
 555         +1.87416763411029990132999894995444645e+00L,
 556         +1.91520656139714729387261127029583086e+00L,
 557         +1.95714412417540026901832225162687149e+00L,
 558 #endif
 559 };
 560 
 561 static const long double S_trail[] = {
 562 #if defined(__x86)
 563         +0.0000000000000000000000000e+00L,
 564         +2.6327965667180882569382524e-20L,
 565         +8.3765863521895191129661899e-20L,
 566         +3.9798705777454504249209575e-20L,
 567         +1.0668046596651558640993042e-19L,
 568         +1.9376009847285360448117114e-20L,
 569         +6.7081819456112953751277576e-21L,
 570         +1.9711680502629186462729727e-20L,
 571         +2.9932584438449523689104569e-20L,
 572         +6.8887754153039109411061914e-20L,
 573         +6.8002718741225378942847820e-20L,
 574         +6.5846917376975403439742349e-20L,
 575         +1.2171958727511372194876001e-20L,
 576         +3.5625253228704087115438260e-20L,
 577         +3.1129551559077560956309179e-20L,
 578         +5.7519192396164779846216492e-20L,
 579         +3.7900651177865141593101239e-20L,
 580         +1.1659262405698741798080115e-20L,
 581         +7.1364385105284695967172478e-20L,
 582         +5.2631003710812203588788949e-20L,
 583         +2.6328853788732632868460580e-20L,
 584         +5.4583950085438242788190141e-20L,
 585         +9.5803254376938269960718656e-20L,
 586         +7.6837733983874245823512279e-21L,
 587         +2.4415965910835093824202087e-20L,
 588         +2.6052966871016580981769728e-20L,
 589         +2.6876456344632553875309579e-21L,
 590         +1.2861930155613700201703279e-20L,
 591         +8.8166633394037485606572294e-20L,
 592         +2.9788615389580190940837037e-20L,
 593         +5.2352341619805098677422139e-20L,
 594         +5.2578463064010463732242363e-20L,
 595 #else
 596         +0.00000000000000000000000000000000000e+00L,
 597         +1.80506787420330954745573333054573786e-35L,
 598         -9.37452029228042742195756741973083214e-35L,
 599         -1.59696844729275877071290963023149997e-35L,
 600         +9.11249341012502297851168610167248666e-35L,
 601         -6.50422820697854828723037477525938871e-35L,
 602         -8.14846884452585113732569176748815532e-35L,
 603         -5.06621457672180031337233074514290335e-35L,
 604         -1.35983097468881697374987563824591912e-35L,
 605         +9.49742763556319647030771056643324660e-35L,
 606         -3.28317052317699860161506596533391526e-36L,
 607         -5.01723570938719041029018653045842895e-35L,
 608         -2.39147479768910917162283430160264014e-35L,
 609         -8.35057135763390881529889073794408385e-36L,
 610         +7.03675688907326504242173719067187644e-35L,
 611         -5.18248485306464645753689301856695619e-35L,
 612         +9.42224254862183206569211673639406488e-35L,
 613         -3.96750082539886230916730613021641828e-35L,
 614         +7.14352899156330061452327361509276724e-35L,
 615         +1.15987125286798512424651783410044433e-35L,
 616         +4.69693347835811549530973921320187447e-35L,
 617         -3.38651317599500471079924198499981917e-35L,
 618         -8.58731877429824706886865593510387445e-35L,
 619         -9.60595154874935050318549936224606909e-35L,
 620         +9.60973393212801278450755869714178581e-35L,
 621         +6.37839792144002843924476144978084855e-35L,
 622         +7.79243078569586424945646112516927770e-35L,
 623         +7.36133776758845652413193083663393220e-35L,
 624         -6.47299514791334723003521457561217053e-35L,
 625         +8.58747441795369869427879806229522962e-35L,
 626         +2.37181542282517483569165122830269098e-35L,
 627         -3.02689168209611877300459737342190031e-37L,
 628 #endif
 629 };

 630 
 631 
 632 /*
 633  * return tgamma(x) scaled by 2**-m for 8<x<=171.62... using Stirling's formula
 634  *     log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x))
 635  *                = L1 + L2 + L3,
 636  */

 637 static struct LDouble
 638 large_gam(long double x, int *m)
 639 {
 640         long double z, t1, t2, t3, z2, t5, w, y, u, r, v;
 641         long double t24 = 16777216.0L, p24 = 1.0L / 16777216.0L;
 642         int n2, j2, k, ix, j, i;
 643         struct LDouble zz;
 644         long double u2, ss_h, ss_l, r_h, w_h, w_l, t4;
 645 
 646 /* BEGIN CSTYLED */
 647 /*
 648  * compute ss = ss.h+ss.l = log(x)-1 (see tgamma_log.h for details)
 649  *
 650  *  log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y,  1<=y<2,
 651  *  j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and
 652  *       T1(n) = T1[2n,2n+1] = n*log(2)-1,
 653  *       T2(j) = T2[2j,2j+1] = log(z[j]),
 654  *       T3(s) = 2s + T3[0]s^3 + T3[1]s^5 + ... + T3[6]s^15
 655  *  Note
 656  *  (1) the leading entries are truncated to 24 binary point.
 657  *  (2) Remez error for T3(s) is bounded by 2**(-72.4)
 658  *                                   2**(-24)
 659  *                           _________V___________________
 660  *               T1(n):     |_________|___________________|
 661  *                             _______ ______________________
 662  *               T2(j):       |_______|______________________|
 663  *                                ____ _______________________
 664  *               2s:             |____|_______________________|
 665  *                                    __________________________
 666  *          +    T3(s)-2s:           |__________________________|
 667  *                       -------------------------------------------
 668  *                          [leading] + [Trailing]
 669  */
 670         /* END CSTYLED */
 671         ix = H0_WORD(x);
 672         n2 = (ix >> 16) - 0x3fff; /* exponent of x, range:3-10 */
 673         y = scalbnl(x, -n2);      /* y = scale x to [1,2] */
 674         n2 += n2;                 /* 2n */
 675         j = (ix >> 10) & 0x3f;          /* j */
 676         z = 1.0078125L + (long double)j * 0.015625L;    /* z[j]=1+j/64+1/128 */
 677         j2 = j + j;
 678         t1 = y + z;
 679         t2 = y - z;
 680         r = one / t1;
 681         u = r * t2;                     /* u = (y-z)/(y+z) */
 682         t1 = CHOPPED(t1);
 683         t4 = T2[j2 + 1] + T1[n2 + 1];
 684         z2 = u * u;
 685         k = H0_WORD(u) & 0x7fffffff;
 686         t3 = T2[j2] + T1[n2];
 687 
 688         for (t5 = T3[6], i = 5; i >= 0; i--)
 689                 t5 = z2 * t5 + T3[i];
 690 
 691         if ((k >> 16) < 0x3fec) {      /* |u|<2**-19 */
 692                 t2 = t4 + u * (two + z2 * t5);
 693         } else {
 694                 t5 = t4 + (u * z2) * t5;
 695                 u2 = u + u;
 696                 v = (long double)((int)(u2 * t24)) * p24;
 697                 t2 = t5 + r * ((two * t2 - v * t1) - v * (y - (t1 - z)));
 698                 t3 += v;
 699         }
 700 
 701         ss_h = CHOPPED((t2 + t3));
 702         ss_l = t2 - (ss_h - t3);
 703 
 704 /*
 705  * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2)))
 706  * where ss = log(x) - 1 in already in extra precision
 707  */

 708         z = one / x;
 709         r = x - half;
 710         r_h = CHOPPED((r));
 711         w_h = r_h * ss_h + hln2pim1_h;
 712         z2 = z * z;
 713         w = (r - r_h) * ss_h + r * ss_l;
 714         t1 = GP[19];
 715 
 716         for (i = 18; i > 0; i--)
 717                 t1 = z2 * t1 + GP[i];
 718 
 719         w += hln2pim1_l;
 720         w_l = z * (GP[0] + z2 * t1) + w;
 721         k = (int)((w_h + w_l) * invln2_32 + half);
 722 
 723         /* compute the exponential of w_h+w_l */
 724 
 725         j = k & 0x1f;
 726         *m = k >> 5;
 727         t3 = (long double)k;
 728 
 729         /* perform w - k*ln2_32 (represent as w_h - w_l) */
 730         t1 = w_h - t3 * ln2_32hi;
 731         t2 = t3 * ln2_32lo;
 732         w = t2 - w_l;
 733         w_h = t1 - w;
 734         w_l = w - (t1 - w_h);
 735 
 736         /* compute exp(w_h-w_l) */
 737         z = w_h - w_l;
 738 
 739         for (t1 = Et[10], i = 9; i >= 0; i--)
 740                 t1 = z * t1 + Et[i];
 741 
 742         t3 = w_h - (w_l - (z * z) * t1);        /* t3 = expm1(z) */
 743         zz.l = S_trail[j] * (one + t3) + S[j] * t3;
 744         zz.h = S[j];
 745         return (zz);
 746 }
 747 
 748 
 749 /*
 750  * kpsin(x)= sin(pi*x)/pi
 751  *                 3        5        7        9        11                27
 752  *      = x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x +ks[4]*x  + ... + ks[12]*x
 753  */
 754 static const long double ks[] = {
 755         -1.64493406684822643647241516664602518705158902870e+0000L,
 756         +8.11742425283353643637002772405874238094995726160e-0001L,
 757         -1.90751824122084213696472111835337366232282723933e-0001L,
 758         +2.61478478176548005046532613563241288115395517084e-0002L,
 759         -2.34608103545582363750893072647117829448016479971e-0003L,
 760         +1.48428793031071003684606647212534027556262040158e-0004L,
 761         -6.97587366165638046518462722252768122615952898698e-0006L,
 762         +2.53121740413702536928659271747187500934840057929e-0007L,
 763         -7.30471182221385990397683641695766121301933621956e-0009L,
 764         +1.71653847451163495739958249695549313987973589884e-0010L,
 765         -3.34813314714560776122245796929054813458341420565e-0012L,
 766         +5.50724992262622033449487808306969135431411753047e-0014L,
 767         -7.67678132753577998601234393215802221104236979928e-0016L,
 768 };

 769 
 770 /*
 771  * assume x is not tiny and positive
 772  */
 773 static struct LDouble
 774 kpsin(long double x)
 775 {
 776         long double z, t1, t2;
 777         struct LDouble xx;
 778         int i;
 779 
 780         z = x * x;
 781         xx.h = x;
 782 
 783         for (t2 = ks[12], i = 11; i > 0; i--)
 784                 t2 = z * t2 + ks[i];
 785 
 786         t1 = z * x;
 787         t2 *= z * t1;
 788         xx.l = t1 * ks[0] + t2;
 789         return (xx);
 790 }
 791 
 792 
 793 /*
 794  * kpcos(x)= cos(pi*x)/pi
 795  *                     2        4        6        8        10        12
 796  *      = 1/pi +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x +kc[4]*x  +kc[5]*x
 797  *
 798  *                     2        4        6        8        10            22
 799  *      = 1/pi - pi/2*x +kc[0]*x +kc[1]*x +kc[2]*x +kc[3]*x  +...+kc[9]*x
 800  *
 801  * -pi/2*x*x = (npi_2_h + npi_2_l) * (x_f+x_l)*(x_f+x_l)
 802  *         =  npi_2_h*(x_f+x_l)*(x_f+x_l) + npi_2_l*x*x
 803  *         =  npi_2_h*x_f*x_f + npi_2_h*(x*x-x_f*x_f) + npi_2_l*x*x
 804  *         =  npi_2_h*x_f*x_f + npi_2_h*(x+x_f)*(x-x_f) + npi_2_l*x*x
 805  * Here x_f = (long double) (float)x
 806  * Note that pi/2(in hex) =
 807  *  1.921FB54442D18469898CC51701B839A252049C1114CF98E804177D4C76273644A29
 808  * npi_2_h = -pi/2 chopped to 25 bits = -1.921FB50000000000000000000000000 =
 809  *  -1.570796310901641845703125000000000 and
 810  * npi_2_l =
 811  *  -0.0000004442D18469898CC51701B839A252049C1114CF98E804177D4C76273644A29 =
 812  *  -.0000000158932547735281966916397514420985846996875529104874722961539 =
 813  *  -1.5893254773528196691639751442098584699687552910487472296153e-8
 814  * 1/pi(in hex) =
 815  *  .517CC1B727220A94FE13ABE8FA9A6EE06DB14ACC9E21C820FF28B1D5EF5DE2B
 816  * will be splitted into:
 817  *  one_pi_h = 1/pi chopped to 48 bits = .517CC1B727220000000000...  and
 818  *  one_pi_l = .0000000000000A94FE13ABE8FA9A6EE06DB14ACC9E21C820FF28B1D5EF5DE2B
 819  */
 820 
 821 static const long double
 822 #if defined(__x86)
 823         one_pi_h = 0.3183098861481994390487670898437500L,       /* 31 bits */
 824         one_pi_l = 3.559123248900043690127872406891929148e-11L,
 825 #else
 826         one_pi_h = 0.31830988618379052468299050815403461456298828125L,
 827         one_pi_l = 1.46854777018590994109505931010230912897495334688117e-16L,
 828 #endif
 829         npi_2_h = -1.570796310901641845703125000000000L,
 830         npi_2_l = -1.5893254773528196691639751442098584699687552910e-8L;

 831 static const long double kc[] = {
 832         +1.29192819501249250731151312779548918765320728489e+0000L,
 833         -4.25027339979557573976029596929319207009444090366e-0001L,
 834         +7.49080661650990096109672954618317623888421628613e-0002L,
 835         -8.21458866111282287985539464173976555436050215120e-0003L,
 836         +6.14202578809529228503205255165761204750211603402e-0004L,
 837         -3.33073432691149607007217330302595267179545908740e-0005L,
 838         +1.36970959047832085796809745461530865597993680204e-0006L,
 839         -4.41780774262583514450246512727201806217271097336e-0008L,
 840         +1.14741409212381858820016567664488123478660705759e-0009L,
 841         -2.44261236114707374558437500654381006300502749632e-0011L,
 842 };

 843 
 844 /*
 845  * assume x is not tiny and positive
 846  */
 847 static struct LDouble
 848 kpcos(long double x)
 849 {
 850         long double z, t1, t2, t3, t4, x4, x8;
 851         int i;
 852         struct LDouble xx;
 853 
 854         z = x * x;
 855         xx.h = one_pi_h;
 856         t1 = (long double)((float)x);
 857         x4 = z * z;
 858         t2 = npi_2_l * z + npi_2_h * (x + t1) * (x - t1);
 859 
 860         for (i = 8, t3 = kc[9]; i >= 0; i--)
 861                 t3 = z * t3 + kc[i];
 862 
 863         t3 = one_pi_l + x4 * t3;
 864         t4 = t1 * t1 * npi_2_h;
 865         x8 = t2 + t3;
 866         xx.l = x8 + t4;
 867         return (xx);
 868 }
 869 

 870 static const long double
 871 /* 0.13486180573279076968979393577465291700642511139552429398233 */
 872 #if defined(__x86)
 873         t0z1 = 0.1348618057327907696779385054997035808810L,
 874         t0z1_l = 1.1855430274949336125392717150257379614654e-20L,
 875 #else
 876         t0z1 = 0.1348618057327907696897939357746529168654L,
 877         t0z1_l = 1.4102088588676879418739164486159514674310e-37L,
 878 #endif
 879 /* 0.46163214496836234126265954232572132846819620400644635129599 */
 880 #if defined(__x86)
 881         t0z2 = 0.4616321449683623412538115843295472018326L,
 882         t0z2_l = 8.84795799617412663558532305039261747030640e-21L,
 883 #else
 884         t0z2 = 0.46163214496836234126265954232572132343318L,
 885         t0z2_l = 5.03501162329616380465302666480916271611101e-36L,
 886 #endif
 887 /* 0.81977310110050060178786870492160699631174407846245179119586 */
 888 #if defined(__x86)
 889         t0z3 = 0.81977310110050060178773362329351925836817L,
 890         t0z3_l = 1.350816280877379435658077052534574556256230e-22L
 891 #else
 892         t0z3 = 0.8197731011005006017878687049216069516957449L,
 893         t0z3_l = 4.461599916947014419045492615933551648857380e-35L
 894 #endif
 895 ;

 896 
 897 /*
 898  * gamma(x+i) for 0 <= x < 1
 899  */
 900 static struct LDouble
 901 gam_n(int i, long double x)
 902 {
 903         struct LDouble rr = { 0.0L, 0.0L }, yy;
 904         long double r1, r2, t2, z, xh, xl, yh, yl, zh, z1, z2, zl, x5, wh, wl;
 905 
 906         /* compute yy = gamma(x+1) */
 907         if (x > 0.2845L) {
 908                 if (x > 0.6374L) {
 909                         r1 = x - t0z3;
 910                         r2 = CHOPPED((r1 - t0z3_l));
 911                         t2 = r1 - r2;
 912                         yy = GT3(r2, t2 - t0z3_l);
 913                 } else {
 914                         r1 = x - t0z2;
 915                         r2 = CHOPPED((r1 - t0z2_l));
 916                         t2 = r1 - r2;
 917                         yy = GT2(r2, t2 - t0z2_l);
 918                 }
 919         } else {
 920                 r1 = x - t0z1;
 921                 r2 = CHOPPED((r1 - t0z1_l));
 922                 t2 = r1 - r2;
 923                 yy = GT1(r2, t2 - t0z1_l);
 924         }
 925 
 926         /* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0<i<8 */
 927         switch (i) {
 928         case 0:                         /* yy/x */
 929                 r1 = one / x;
 930                 xh = CHOPPED((x));      /* x is not tiny */
 931                 rr.h = CHOPPED(((yy.h + yy.l) * r1));
 932                 rr.l = r1 * (yy.h - rr.h * xh) - ((r1 * rr.h) * (x - xh) - r1 *
 933                     yy.l);
 934                 break;
 935         case 1:                         /* yy */
 936                 rr.h = yy.h;
 937                 rr.l = yy.l;
 938                 break;
 939         case 2:                         /* (x+1)*yy */
 940                 z = x + one;            /* may not be exact */
 941                 zh = CHOPPED((z));
 942                 rr.h = zh * yy.h;
 943                 rr.l = z * yy.l + (x - (zh - one)) * yy.h;
 944                 break;
 945         case 3:                         /* (x+2)*(x+1)*yy */
 946                 z1 = x + one;
 947                 z2 = x + 2.0L;
 948                 z = z1 * z2;
 949                 xh = CHOPPED((z));
 950                 zh = CHOPPED((z1));
 951                 xl = (x - (zh - one)) * (z2 + zh) - (xh - zh * (zh + one));
 952 
 953                 rr.h = xh * yy.h;

 981                 z *= z2;
 982                 xh = CHOPPED((z));
 983                 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0L));
 984                 rr.h = xh * yy.h;
 985                 rr.l = z * yy.l + xl * yy.h;
 986                 break;
 987         case 6:                         /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */
 988                 z1 = x + 2.0L;
 989                 z2 = x + 3.0L;
 990                 z = z1 * z2;
 991                 zh = CHOPPED((z1));
 992                 yh = CHOPPED((z));
 993                 z1 = x - (zh - 2.0L);
 994                 yl = z1 * (z2 + zh) - (yh - zh * (zh + one));
 995                 z2 = z - 2.0L;
 996                 x5 = x + 5.0L;
 997                 z *= z2;
 998                 xh = CHOPPED(z);
 999                 zh += 3.0;
1000                 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0L));
1001 
1002                 /*
1003                  * xh+xl=(x+1)*...*(x+4)
1004                  * wh+wl=(x+5)*yy
1005                  */
1006                 wh = CHOPPED((x5 * (yy.h + yy.l)));
1007                 wl = (z1 * yy.h + x5 * yy.l) - (wh - zh * yy.h);
1008                 rr.h = wh * xh;
1009                 rr.l = z * wl + xl * wh;
1010                 break;
1011         case 7:                 /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */
1012                 z1 = x + 3.0L;
1013                 z2 = x + 4.0L;
1014                 z = z2 * z1;
1015                 zh = CHOPPED((z1));
1016                 yh = CHOPPED((z));      /* yh+yl = (x+3)(x+4) */
1017                 yl = (x - (zh - 3.0L)) * (z2 + zh) - (yh - (zh * (zh + one)));
1018                 z1 = x + 6.0L;
1019                 z2 = z - 2.0L;          /* z2 = (x+2)*(x+5) */
1020                 z *= z2;
1021                 xh = CHOPPED((z));
1022                 xl = yl * (z2 + yh) - (xh - yh * (yh - 2.0L));
1023 
1024                 /*
1025                  * xh+xl=(x+2)*...*(x+5)
1026                  * wh+wl=(x+1)(x+6)*yy
1027                  */
1028                 z2 -= 4.0L;             /* z2 = (x+1)(x+6) */
1029                 wh = CHOPPED((z2 * (yy.h + yy.l)));
1030                 wl = (z2 * yy.l + yl * yy.h) - (wh - (yh - 6.0L) * yy.h);
1031                 rr.h = wh * xh;
1032                 rr.l = z * wl + xl * wh;
1033         }
1034 
1035         return (rr);
1036 }
1037 
1038 long double
1039 tgammal(long double x)
1040 {
1041         struct LDouble ss, ww;
1042         long double t, t1, t2, t3, t4, t5, w, y, z, z1, z2, z3, z5;
1043         int i, j, m, ix, hx, xk;
1044         unsigned lx;
1045 
1046         hx = H0_WORD(x);
1047         lx = H3_WORD(x);
1048         ix = hx & 0x7fffffff;
1049         y = x;
1050 
1051         if (ix < 0x3f8e0000)         /* x < 2**-113 */
1052                 return (one / x);
1053 
1054         if (ix >= 0x7fff0000)
1055                 return (x * ((hx < 0) ? zero : x));  /* Inf or NaN */
1056 
1057         if (x > overflow)                            /* overflow threshold */
1058                 return (x * 1.0e4932L);
1059 
1060         if (hx >= 0x40020000) {                              /* x >= 8 */
1061                 ww = large_gam(x, &m);
1062                 w = ww.h + ww.l;
1063                 return (scalbnl(w, m));
1064         }
1065 
1066         if (hx > 0) {                        /* 0 < x < 8 */
1067                 i = (int)x;
1068                 ww = gam_n(i, x - (long double)i);
1069                 return (ww.h + ww.l);
1070         }
1071 
1072         /*
1073          * negative x
1074          */
1075 
1076         /*
1077          * compute xk =
1078          *      -2 ... x is an even int (-inf is considered an even #)
1079          *      -1 ... x is an odd int
1080          *      +0 ... x is not an int but chopped to an even int
1081          *      +1 ... x is not an int but chopped to an odd int
1082          */

1083         xk = 0;
1084 #if defined(__x86)
1085         if (ix >= 0x403e0000) {              /* x >= 2**63 } */
1086                 if (ix >= 0x403f0000)
1087                         xk = -2;
1088                 else
1089                         xk = -2 + (lx & 1);
1090 #else
1091         if (ix >= 0x406f0000) {              /* x >= 2**112 */
1092                 if (ix >= 0x40700000)
1093                         xk = -2;
1094                 else
1095                         xk = -2 + (lx & 1);
1096 #endif
1097         } else if (ix >= 0x3fff0000) {
1098                 w = -x;
1099                 t1 = floorl(w);
1100                 t2 = t1 * half;
1101                 t3 = floorl(t2);
1102 
1103                 if (t1 == w) {
1104                         if (t2 == t3)
1105                                 xk = -2;
1106                         else
1107                                 xk = -1;
1108                 } else {
1109                         if (t2 == t3)
1110                                 xk = 0;
1111                         else
1112                                 xk = 1;
1113                 }
1114         }
1115 
1116         if (xk < 0) {
1117                 /* return NaN. Ideally gamma(-n)= (-1)**(n+1) * inf */
1118                 return ((x - x) / (x - x));
1119         }
1120 
1121         /*
1122          * negative underflow thresold -(1774+9ulp)
1123          */
1124         if (x < -1774.0000000000000000000000000000017749370L) {
1125                 z = tiny / x;
1126 
1127                 if (xk == 1)
1128                         z = -z;
1129 
1130                 return (z * tiny);
1131         }
1132 
1133 
1134         /*
1135          * now compute gamma(x) by  -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x
1136          */
1137 
1138         /*
1139          * First compute ss = -sin(pi*y)/pi so that
1140          * gamma(x) = 1/(ss*gamma(1+y))
1141          */

1142         y = -x;
1143         j = (int)y;
1144         z = y - (long double)j;
1145 
1146         if (z > 0.3183098861837906715377675L) {
1147                 if (z > 0.6816901138162093284622325L)
1148                         ss = kpsin(one - z);
1149                 else
1150                         ss = kpcos(0.5L - z);
1151         } else {
1152                 ss = kpsin(z);
1153         }
1154 
1155         if (xk == 0) {
1156                 ss.h = -ss.h;
1157                 ss.l = -ss.l;
1158         }
1159 
1160         /* Then compute ww = gamma(1+y), note that result scale to 2**m */
1161         m = 0;
1162 
1163         if (j < 7) {
1164                 ww = gam_n(j + 1, z);
1165         } else {
1166                 w = y + one;
1167 
1168                 if ((lx & 1) == 0) {        /* y+1 exact (note that y<184) */
1169                         ww = large_gam(w, &m);
1170                 } else {
1171                         t = w - one;
1172 
1173                         if (t == y) {   /* y+one exact */
1174                                 ww = large_gam(w, &m);
1175                         } else {        /* use y*gamma(y) */
1176                                 if (j == 7)
1177                                         ww = gam_n(j, z);
1178                                 else
1179                                         ww = large_gam(y, &m);
1180 
1181                                 t4 = ww.h + ww.l;
1182                                 t1 = CHOPPED((y));
1183                                 t2 = CHOPPED((t4));
1184                                 /* t4 will not be too large */
1185                                 ww.l = y * (ww.l - (t2 - ww.h)) + (y - t1) * t2;
1186                                 ww.h = t1 * t2;
1187                         }
1188                 }
1189         }
1190 
1191         /* compute 1/(ss*ww) */
1192         t3 = ss.h + ss.l;
1193         t4 = ww.h + ww.l;
1194         t1 = CHOPPED((t3));
1195         t2 = CHOPPED((t4));
1196         z1 = ss.l - (t1 - ss.h);        /* (t1,z1) = ss */
1197         z2 = ww.l - (t2 - ww.h);        /* (t2,z2) = ww */
1198         t3 = t3 * t4;                   /* t3 = ss*ww */
1199         z3 = one / t3;                  /* z3 = 1/(ss*ww) */
1200         t5 = t1 * t2;