il_11210 New usr/src/lib/libm/common/m9x/tgammaf.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  28  * Use is subject to license terms.
  29  */
  30 
  31 #pragma weak __tgammaf = tgammaf
  32 
  33 /*
  34  * True gamma function
  35  *
  36  * float tgammaf(float x)
  37  *
  38  * Algorithm: see tgamma.c
  39  *
  40  * Maximum error observed: 0.87ulp (both positive and negative arguments)
  41  */
  42 
  43 #include "libm.h"
  44 #include <math.h>
  45 #if defined(__SUNPRO_C)
  46 #include <sunmath.h>
  47 #endif
  48 #include <sys/isa_defs.h>
  49 
  50 #if defined(_BIG_ENDIAN)
  51 #define HIWORD          0
  52 #define LOWORD          1
  53 #else
  54 #define HIWORD          1
  55 #define LOWORD          0
  56 #endif
  57 #define __HI(x)         ((int *)&x)[HIWORD]
  58 #define __LO(x)         ((unsigned *)&x)[LOWORD]
  59 
  60 /* Coefficients for primary intervals GTi() */
  61 static const double cr[] = {
  62         /* p1 */
  63         +7.09087253435088360271451613398019280077561279443e-0001,
  64         -5.17229560788652108545141978238701790105241761089e-0001,
  65         +5.23403394528150789405825222323770647162337764327e-0001,
  66         -4.54586308717075010784041566069480411732634814899e-0001,
  67         +4.20596490915239085459964590559256913498190955233e-0001,
  68         -3.57307589712377520978332185838241458642142185789e-0001,
  69         /* p2 */
  70         +4.28486983980295198166056119223984284434264344578e-0001,
  71         -1.30704539487709138528680121627899735386650103914e-0001,
  72         +1.60856285038051955072861219352655851542955430871e-0001,
  73         -9.22285161346010583774458802067371182158937943507e-0002,
  74         +7.19240511767225260740890292605070595560626179357e-0002,
  75         -4.88158265593355093703112238534484636193260459574e-0002,
  76         /* p3 */
  77         +3.82409531118807759081121479786092134814808872880e-0001,
  78         +2.65309888180188647956400403013495759365167853426e-0002,
  79         +8.06815109775079171923561169415370309376296739835e-0002,
  80         -1.54821591666137613928840890835174351674007764799e-0002,
  81         +1.76308239242717268530498313416899188157165183405e-0002,
  82         /* GZi and TZi */
  83         +0.9382046279096824494097535615803269576988,    /* GZ1 */
  84         +0.8856031944108887002788159005825887332080,    /* GZ2 */
  85         +0.9367814114636523216188468970808378497426,    /* GZ3 */
  86         -0.3517214357852935791015625,                   /* TZ1 */
  87         +0.280530631542205810546875,                    /* TZ3 */
  88 };
  89 
  90 #define P10             cr[0]
  91 #define P11             cr[1]
  92 #define P12             cr[2]
  93 #define P13             cr[3]
  94 #define P14             cr[4]
  95 #define P15             cr[5]
  96 #define P20             cr[6]
  97 #define P21             cr[7]
  98 #define P22             cr[8]
  99 #define P23             cr[9]
 100 #define P24             cr[10]
 101 #define P25             cr[11]
 102 #define P30             cr[12]
 103 #define P31             cr[13]
 104 #define P32             cr[14]
 105 #define P33             cr[15]
 106 #define P34             cr[16]
 107 #define GZ1             cr[17]
 108 #define GZ2             cr[18]
 109 #define GZ3             cr[19]
 110 #define TZ1             cr[20]
 111 #define TZ3             cr[21]
 112 
 113 /* compute gamma(y) for y in GT1 = [1.0000, 1.2845] */
 114 static double
 115 GT1(double y)
 116 {
 117         double z, r;
 118 
 119         z = y * y;
 120         r = TZ1 * y + z * ((P10 + y * P11 + z * P12) + (z * y) * (P13 + y *
 121             P14 + z * P15));
 122         return (GZ1 + r);
 123 }
 124 
 125 /* compute gamma(y) for y in GT2 = [1.2844, 1.6374] */
 126 static double
 127 GT2(double y)
 128 {
 129         double z;
 130 
 131         z = y * y;
 132         return (GZ2 + z * ((P20 + y * P21 + z * P22) + (z * y) * (P23 + y *
 133             P24 + z * P25)));
 134 }
 135 
 136 /* compute gamma(y) for y in GT3 = [1.6373, 2.0000] */
 137 static double
 138 GT3(double y)
 139 {
 140         double z, r;
 141 
 142         z = y * y;
 143         r = TZ3 * y + z * ((P30 + y * P31 + z * P32) + (z * y) * (P33 + y *
 144             P34));
 145         return (GZ3 + r);
 146 }
 147 
 148 static const double c[] = {
 149         +1.0,
 150         +2.0,
 151         +0.5,
 152         +1.0e-300,
 153         +6.666717231848518054693623697539230e-0001,     /* A1=T3[0] */
 154         +8.33333330959694065245736888749042811909994573178e-0002, /* GP[0] */
 155         -2.77765545601667179767706600890361535225507762168e-0003, /* GP[1] */
 156         +7.77830853479775281781085278324621033523037489883e-0004, /* GP[2] */
 157         +4.18938533204672741744150788368695779923320328369e-0001, /* hln2pi   */
 158         +2.16608493924982901946e-02, /* ln2_32 */
 159         +4.61662413084468283841e+01, /* invln2_32 */
 160         +5.00004103388988968841156421415669985414073453720e-0001, /* Et1 */
 161         +1.66667656752800761782778277828110208108687545908e-0001, /* Et2 */
 162 };
 163 
 164 #define one             c[0]
 165 #define two             c[1]
 166 #define half            c[2]
 167 #define tiny            c[3]
 168 #define A1              c[4]
 169 #define GP0             c[5]
 170 #define GP1             c[6]
 171 #define GP2             c[7]
 172 #define hln2pi          c[8]
 173 #define ln2_32          c[9]
 174 #define invln2_32       c[10]
 175 #define Et1             c[11]
 176 #define Et2             c[12]
 177 
 178 /* S[j] = 2**(j/32.) for the final computation of exp(w) */
 179 static const double S[] = {
 180         +1.00000000000000000000e+00,    /* 3FF0000000000000 */
 181         +1.02189714865411662714e+00,    /* 3FF059B0D3158574 */
 182         +1.04427378242741375480e+00,    /* 3FF0B5586CF9890F */
 183         +1.06714040067682369717e+00,    /* 3FF11301D0125B51 */
 184         +1.09050773266525768967e+00,    /* 3FF172B83C7D517B */
 185         +1.11438674259589243221e+00,    /* 3FF1D4873168B9AA */
 186         +1.13878863475669156458e+00,    /* 3FF2387A6E756238 */
 187         +1.16372485877757747552e+00,    /* 3FF29E9DF51FDEE1 */
 188         +1.18920711500272102690e+00,    /* 3FF306FE0A31B715 */
 189         +1.21524735998046895524e+00,    /* 3FF371A7373AA9CB */
 190         +1.24185781207348400201e+00,    /* 3FF3DEA64C123422 */
 191         +1.26905095719173321989e+00,    /* 3FF44E086061892D */
 192         +1.29683955465100964055e+00,    /* 3FF4BFDAD5362A27 */
 193         +1.32523664315974132322e+00,    /* 3FF5342B569D4F82 */
 194         +1.35425554693689265129e+00,    /* 3FF5AB07DD485429 */
 195         +1.38390988196383202258e+00,    /* 3FF6247EB03A5585 */
 196         +1.41421356237309514547e+00,    /* 3FF6A09E667F3BCD */
 197         +1.44518080697704665027e+00,    /* 3FF71F75E8EC5F74 */
 198         +1.47682614593949934623e+00,    /* 3FF7A11473EB0187 */
 199         +1.50916442759342284141e+00,    /* 3FF82589994CCE13 */
 200         +1.54221082540794074411e+00,    /* 3FF8ACE5422AA0DB */
 201         +1.57598084510788649659e+00,    /* 3FF93737B0CDC5E5 */
 202         +1.61049033194925428347e+00,    /* 3FF9C49182A3F090 */
 203         +1.64575547815396494578e+00,    /* 3FFA5503B23E255D */
 204         +1.68179283050742900407e+00,    /* 3FFAE89F995AD3AD */
 205         +1.71861929812247793414e+00,    /* 3FFB7F76F2FB5E47 */
 206         +1.75625216037329945351e+00,    /* 3FFC199BDD85529C */
 207         +1.79470907500310716820e+00,    /* 3FFCB720DCEF9069 */
 208         +1.83400808640934243066e+00,    /* 3FFD5818DCFBA487 */
 209         +1.87416763411029996256e+00,    /* 3FFDFC97337B9B5F */
 210         +1.91520656139714740007e+00,    /* 3FFEA4AFA2A490DA */
 211         +1.95714412417540017941e+00,    /* 3FFF50765B6E4540 */
 212 };
 213 
 214 
 215 /*
 216  * return tgammaf(x) in double for 8<x<=35.040096283... using Stirling's formula
 217  *     log(G(x)) ~= (x-.5)*(log(x)-1) + .5(log(2*pi)-1) + (1/x)*P(1/(x*x))
 218  */
 219 
 220 /*
 221  * compute ss = log(x)-1
 222  *
 223  *  log(x) - 1 = T1(n) + T2(j) + T3(s), where x = 2**n * y,  1<=y<2,
 224  *  j=[64*y], z[j]=1+j/64+1/128, s = (y-z[j])/(y+z[j]), and
 225  *       T1(n-3) = n*log(2)-1,  n=3,4,5
 226  *       T2(j) = log(z[j]),
 227  *       T3(s) = 2s + A1*s^3
 228  *  Note
 229  *  (1) Remez error for T3(s) is bounded by 2**(-35.8)
 230  *      (see mpremez/work/Log/tgamma_log_2_outr1)
 231  */
 232 static const double T1[] = {            /* T1[j]=(j+3)*log(2)-1 */
 233         +1.079441541679835928251696364375e+00,
 234         +1.772588722239781237668928485833e+00,
 235         +2.465735902799726547086160607291e+00,
 236 };
 237 
 238 static const double T2[] = {            /* T2[j]=log(1+j/64+1/128) */
 239         +7.782140442054948947462900061137e-03,
 240         +2.316705928153437822879916096229e-02,
 241         +3.831886430213659919375532512380e-02,
 242         +5.324451451881228286587019378653e-02,
 243         +6.795066190850774939456527777263e-02,
 244         +8.244366921107459126816006866831e-02,
 245         +9.672962645855111229557105648746e-02,
 246         +1.108143663402901141948061693232e-01,
 247         +1.247034785009572358634065153809e-01,
 248         +1.384023228591191356853258736016e-01,
 249         +1.519160420258419750718034248969e-01,
 250         +1.652495728953071628756114492772e-01,
 251         +1.784076574728182971194002415109e-01,
 252         +1.913948529996294546092988075613e-01,
 253         +2.042155414286908915038203861962e-01,
 254         +2.168739383006143596190895257443e-01,
 255         +2.293741010648458299914807250461e-01,
 256         +2.417199368871451681443075159135e-01,
 257         +2.539152099809634441373232979066e-01,
 258         +2.659635484971379413391259265375e-01,
 259         +2.778684510034563061863500329234e-01,
 260         +2.896332925830426768788930555257e-01,
 261         +3.012613305781617810128755382338e-01,
 262         +3.127557100038968883862465596883e-01,
 263         +3.241194686542119760906707604350e-01,
 264         +3.353555419211378302571795798142e-01,
 265         +3.464667673462085809184621884258e-01,
 266         +3.574558889218037742260094901409e-01,
 267         +3.683255611587076530482301540504e-01,
 268         +3.790783529349694583908533456310e-01,
 269         +3.897167511400252133704636040035e-01,
 270         +4.002431641270127069293251019951e-01,
 271         +4.106599249852683859343062031758e-01,
 272         +4.209692946441296361288671615068e-01,
 273         +4.311734648183713408591724789556e-01,
 274         +4.412745608048752294894964416613e-01,
 275         +4.512746441394585851446923830790e-01,
 276         +4.611757151221701663679999255979e-01,
 277         +4.709797152187910125468978560564e-01,
 278         +4.806885293457519076766184554480e-01,
 279         +4.903039880451938381503461596457e-01,
 280         +4.998278695564493298213314152470e-01,
 281         +5.092619017898079468040749192283e-01,
 282         +5.186077642080456321529769963648e-01,
 283         +5.278670896208423851138922177783e-01,
 284         +5.370414658968836545667292441538e-01,
 285         +5.461324375981356503823972092312e-01,
 286         +5.551415075405015927154803595159e-01,
 287         +5.640701382848029660713842900902e-01,
 288         +5.729197535617855090927567266263e-01,
 289         +5.816917396346224825206107537254e-01,
 290         +5.903874466021763746419167081236e-01,
 291         +5.990081896460833993816000244617e-01,
 292         +6.075552502245417955010851527911e-01,
 293         +6.160298772155140196475659281967e-01,
 294         +6.244332880118935010425387440547e-01,
 295         +6.327666695710378295457864685036e-01,
 296         +6.410311794209312910556013344054e-01,
 297         +6.492279466251098188908399699053e-01,
 298         +6.573580727083600301418900232459e-01,
 299         +6.654226325450904489500926100067e-01,
 300         +6.734226752121667202979603888010e-01,
 301         +6.813592248079030689480715595681e-01,
 302         +6.892332812388089803249143378146e-01,
 303 };
 304 
 305 static double
 306 large_gam(double x)
 307 {
 308         double ss, zz, z, t1, t2, w, y, u;
 309         unsigned lx;
 310         int k, ix, j, m;
 311 
 312         ix = __HI(x);
 313         lx = __LO(x);
 314         m = (ix >> 20) - 0x3ff;                   /* exponent of x, range:3-5 */
 315         ix = (ix & 0x000fffff) | 0x3ff00000;        /* y = scale x to [1,2] */
 316         __HI(y) = ix;
 317         __LO(y) = lx;
 318         __HI(z) = (ix & 0xffffc000) | 0x2000;       /* z[j]=1+j/64+1/128 */
 319         __LO(z) = 0;
 320         j = (ix >> 14) & 0x3f;
 321         t1 = y + z;
 322         t2 = y - z;
 323         u = t2 / t1;
 324         ss = T1[m - 3] + T2[j] + u * (two + A1 * (u * u));
 325         /* ss = log(x)-1 */
 326 
 327         /*
 328          * compute ww = (x-.5)*(log(x)-1) + .5*(log(2pi)-1) + 1/x*(P(1/x^2)))
 329          * where ss = log(x) - 1
 330          */
 331         z = one / x;
 332         zz = z * z;
 333         w = ((x - half) * ss + hln2pi) + z * (GP0 + zz * GP1 + (zz * zz) * GP2);
 334         k = (int)(w * invln2_32 + half);
 335 
 336         /* compute the exponential of w */
 337         j = k & 0x1f;
 338         m = k >> 5;
 339         z = w - (double)k * ln2_32;
 340         zz = S[j] * (one + z + (z * z) * (Et1 + z * Et2));
 341         __HI(zz) += m << 20;
 342         return (zz);
 343 }
 344 
 345 
 346 /*
 347  * kpsin(x)= sin(pi*x)/pi
 348  *                 3        5        7        9
 349  *      = x+ks[0]*x +ks[1]*x +ks[2]*x +ks[3]*x
 350  */
 351 static const double ks[] = {
 352         -1.64493404985645811354476665052005342839447790544e+0000,
 353         +8.11740794458351064092797249069438269367389272270e-0001,
 354         -1.90703144603551216933075809162889536878854055202e-0001,
 355         +2.55742333994264563281155312271481108635575331201e-0002,
 356 };
 357 
 358 static double
 359 kpsin(double x)
 360 {
 361         double z;
 362 
 363         z = x * x;
 364         return (x + (x * z) * ((ks[0] + z * ks[1]) + (z * z) * (ks[2] + z *
 365             ks[3])));
 366 }
 367 
 368 
 369 /*
 370  * kpcos(x)= cos(pi*x)/pi
 371  *                     2        4        6
 372  *      = kc[0]+kc[1]*x +kc[2]*x +kc[3]*x
 373  */
 374 static const double kc[] = {
 375         +3.18309886183790671537767526745028724068919291480e-0001,
 376         -1.57079581447762568199467875065854538626594937791e+0000,
 377         +1.29183528092558692844073004029568674027807393862e+0000,
 378         -4.20232949771307685981015914425195471602739075537e-0001,
 379 };
 380 
 381 static double
 382 kpcos(double x)
 383 {
 384         double z;
 385 
 386         z = x * x;
 387         return (kc[0] + z * (kc[1] + z * kc[2] + (z * z) * kc[3]));
 388 }
 389 
 390 static const double t0z1 = 0.134861805732790769689793935774652917006,
 391         t0z2 = 0.461632144968362341262659542325721328468,
 392         t0z3 = 0.819773101100500601787868704921606996312;
 393 
 394 /*
 395  * 1.134861805732790769689793935774652917006
 396  */
 397 
 398 /*
 399  * gamma(x+i) for 0 <= x < 1
 400  */
 401 static double
 402 gam_n(int i, double x)
 403 {
 404         double rr = 0.0L, yy;
 405         double z1, z2;
 406 
 407         /* compute yy = gamma(x+1) */
 408         if (x > 0.2845) {
 409                 if (x > 0.6374)
 410                         yy = GT3(x - t0z3);
 411                 else
 412                         yy = GT2(x - t0z2);
 413         } else {
 414                 yy = GT1(x - t0z1);
 415         }
 416 
 417         /* compute gamma(x+i) = (x+i-1)*...*(x+1)*yy, 0<i<8 */
 418         switch (i) {
 419         case 0:                         /* yy/x */
 420                 rr = yy / x;
 421                 break;
 422         case 1:                         /* yy */
 423                 rr = yy;
 424                 break;
 425         case 2:                         /* (x+1)*yy */
 426                 rr = (x + one) * yy;
 427                 break;
 428         case 3:                         /* (x+2)*(x+1)*yy */
 429                 rr = (x + one) * (x + two) * yy;
 430                 break;
 431 
 432         case 4:                         /* (x+1)*(x+3)*(x+2)*yy */
 433                 rr = (x + one) * (x + two) * ((x + 3.0) * yy);
 434                 break;
 435         case 5:                         /* ((x+1)*(x+4)*(x+2)*(x+3))*yy */
 436                 z1 = (x + two) * (x + 3.0) * yy;
 437                 z2 = (x + one) * (x + 4.0);
 438                 rr = z1 * z2;
 439                 break;
 440         case 6:                         /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5))*yy */
 441                 z1 = (x + two) * (x + 3.0);
 442                 z2 = (x + 5.0) * yy;
 443                 rr = z1 * (z1 - two) * z2;
 444                 break;
 445         case 7:                 /* ((x+1)*(x+2)*(x+3)*(x+4)*(x+5)*(x+6))*yy */
 446                 z1 = (x + two) * (x + 3.0);
 447                 z2 = (x + 5.0) * (x + 6.0) * yy;
 448                 rr = z1 * (z1 - two) * z2;
 449                 break;
 450         }
 451 
 452         return (rr);
 453 }
 454 
 455 float
 456 tgammaf(float xf)
 457 {
 458         float zf;
 459         double ss, ww;
 460         double x, y, z;
 461         int i, j, k, ix, hx, xk;
 462 
 463         hx = *(int *)&xf;
 464         ix = hx & 0x7fffffff;
 465 
 466         x = (double)xf;
 467 
 468         if (ix < 0x33800000)
 469                 return (1.0F / xf);                     /* |x| < 2**-24 */
 470 
 471         if (ix >= 0x7f800000)
 472                 return (xf * ((hx < 0) ? 0.0F : xf));        /* +-Inf or NaN */
 473 
 474         if (hx > 0x420C290F) /* x > 35.040096283... overflow */
 475                 return ((float)(x / tiny));
 476 
 477         if (hx >= 0x41000000)                                /* x >= 8 */
 478                 return ((float)large_gam(x));
 479 
 480         if (hx > 0) {                                        /* 0 < x < 8 */
 481                 i = (int)xf;
 482                 return ((float)gam_n(i, x - (double)i));
 483         }
 484 
 485         /*
 486          * negative x
 487          */
 488 
 489         /*
 490          * compute xk =
 491          *      -2 ... x is an even int (-inf is considered even)
 492          *      -1 ... x is an odd int
 493          *      +0 ... x is not an int but chopped to an even int
 494          *      +1 ... x is not an int but chopped to an odd int
 495          */
 496         xk = 0;
 497 
 498         if (ix >= 0x4b000000) {
 499                 if (ix > 0x4b000000)
 500                         xk = -2;
 501                 else
 502                         xk = -2 + (ix & 1);
 503         } else if (ix >= 0x3f800000) {
 504                 k = (ix >> 23) - 0x7f;
 505                 j = ix >> (23 - k);
 506 
 507                 if ((j << (23 - k)) == ix)
 508                         xk = -2 + (j & 1);
 509                 else
 510                         xk = j & 1;
 511         }
 512 
 513         if (xk < 0) {
 514                 /* 0/0 invalid NaN, ideally gamma(-n)= (-1)**(n+1) * inf */
 515                 zf = xf - xf;
 516                 return (zf / zf);
 517         }
 518 
 519         /* negative underflow thresold */
 520         if (ix > 0x4224000B) {               /* x < -(41+11ulp) */
 521                 if (xk == 0)
 522                         z = -tiny;
 523                 else
 524                         z = tiny;
 525 
 526                 return ((float)z);
 527         }
 528 
 529         /*
 530          * now compute gamma(x) by  -1/((sin(pi*y)/pi)*gamma(1+y)), y = -x
 531          */
 532 
 533         /*
 534          * First compute ss = -sin(pi*y)/pi , so that
 535          * gamma(x) = 1/(ss*gamma(1+y))
 536          */
 537         y = -x;
 538         j = (int)y;
 539         z = y - (double)j;
 540 
 541         if (z > 0.3183098861837906715377675) {
 542                 if (z > 0.6816901138162093284622325)
 543                         ss = kpsin(one - z);
 544                 else
 545                         ss = kpcos(0.5 - z);
 546         } else {
 547                 ss = kpsin(z);
 548         }
 549 
 550         if (xk == 0)
 551                 ss = -ss;
 552 
 553         /* Then compute ww = gamma(1+y)  */
 554         if (j < 7)
 555                 ww = gam_n(j + 1, z);
 556         else
 557                 ww = large_gam(y + one);
 558 
 559         /* return 1/(ss*ww) */
 560         return ((float)(one / (ww * ss)));
 561 }