il_11210 New usr/src/lib/libm/common/Q/erfl.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  28  * Use is subject to license terms.
  29  */
  30 
  31 /*
  32  * long double function erf,erfc (long double x)
  33  * K.C. Ng, September, 1989.
  34  *                           x
  35  *                    2      |\
  36  *     erf(x)  =  ---------  | exp(-t*t)dt
  37  *                 sqrt(pi) \|
  38  *                           0
  39  *
  40  *     erfc(x) =  1-erf(x)
  41  *
  42  * method:
  43  *      Since erf(-x) = -erf(x), we assume x>=0.
  44  *      For x near 0, we have the expansion
  45  *
  46  *          erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....).
  47  *
  48  *      Since 2/sqrt(pi) = 1.128379167095512573896158903121545171688,
  49  *      we use x + x*P(x^2) to approximate erf(x). This formula will
  50  *      guarantee the error less than one ulp where x is not too far
  51  *      away from 0. We note that erf(x)=x at x = 0.6174...... After
  52  *      some experiment, we choose the following approximation on
  53  *      interval [0,0.84375].
  54  *
  55  *      For x in [0,0.84375]
  56  *                 2                2        4               40
  57  *         P =  P(x ) = (p0 + p1 * x + p2 * x + ... + p20 * x  )
  58  *
  59  *         erf(x)  = x + x*P
  60  *         erfc(x) = 1 - erf(x)           if x<=0.25
  61  *                 = 0.5 + ((0.5-x)-x*P)  if x in [0.25,0.84375]
  62  *      precision: |P(x^2)-(erf(x)-x)/x| <= 2**-122.50
  63  *
  64  *      For x in [0.84375,1.25], let s = x - 1, and
  65  *      c = 0.84506291151 rounded to single (24 bits)
  66  *         erf(x)  = c  + P1(s)/Q1(s)
  67  *         erfc(x) = (1-c)  - P1(s)/Q1(s)
  68  *      precision: |P1/Q1 - (erf(x)-c)| <= 2**-118.41
  69  *
  70  *
  71  *      For x in [1.25,1.75], let s = x - 1.5, and
  72  *      c = 0.95478588343 rounded to single (24 bits)
  73  *         erf(x)  = c  + P2(s)/Q2(s)
  74  *         erfc(x) = (1-c)  - P2(s)/Q2(s)
  75  *      precision: |P1/Q1 - (erf(x)-c)| <= 2**-123.83
  76  *
  77  *
  78  *      For x in [1.75,16/3]
  79  *         erfc(x) = exp(-x*x)*(1/x)*R1(1/x)/S1(1/x)
  80  *         erf(x)  = 1 - erfc(x)
  81  *      precision: absolute error of R1/S1 is bounded by 2**-124.03
  82  *
  83  *      For x in [16/3,107]
  84  *         erfc(x) = exp(-x*x)*(1/x)*R2(1/x)/S2(1/x)
  85  *         erf(x)  = 1 - erfc(x) (if x>=9 simple return erf(x)=1 with inexact)
  86  *      precision: absolute error of R2/S2 is bounded by 2**-120.07
  87  *
  88  *      Else if inf > x >= 107
  89  *         erf(x)  = 1 with inexact
  90  *         erfc(x) = 0 with underflow
  91  *
  92  *      Special case:
  93  *         erf(inf)  = 1
  94  *         erfc(inf) = 0
  95  */
  96 
  97 #pragma weak __erfl = erfl
  98 #pragma weak __erfcl = erfcl
  99 
 100 #include "libm.h"
 101 #include "longdouble.h"
 102 
 103 static const long double tiny = 1e-40L,
 104         nearunfl = 1e-4000L,
 105         half = 0.5L,
 106         one = 1.0L,
 107         onehalf = 1.5L,
 108         L16_3 = 16.0L / 3.0L;
 109 
 110 /*
 111  * Coefficients for even polynomial P for erf(x)=x+x*P(x^2) on [0,0.84375]
 112  */
 113 static const long double P[] = {        /* 21 coeffs */
 114         1.283791670955125738961589031215451715556e-0001L,
 115         -3.761263890318375246320529677071815594603e-0001L,
 116         1.128379167095512573896158903121205899135e-0001L,
 117         -2.686617064513125175943235483344625046092e-0002L,
 118         5.223977625442187842111846652980454568389e-0003L,
 119         -8.548327023450852832546626271083862724358e-0004L,
 120         1.205533298178966425102164715902231976672e-0004L,
 121         -1.492565035840625097674944905027897838996e-0005L,
 122         1.646211436588924733604648849172936692024e-0006L,
 123         -1.636584469123491976815834704799733514987e-0007L,
 124         1.480719281587897445302529007144770739305e-0008L,
 125         -1.229055530170782843046467986464722047175e-0009L,
 126         9.422759064320307357553954945760654341633e-0011L,
 127         -6.711366846653439036162105104991433380926e-0012L,
 128         4.463224090341893165100275380693843116240e-0013L,
 129         -2.783513452582658245422635662559779162312e-0014L,
 130         1.634227412586960195251346878863754661546e-0015L,
 131         -9.060782672889577722765711455623117802795e-0017L,
 132         4.741341801266246873412159213893613602354e-0018L,
 133         -2.272417596497826188374846636534317381203e-0019L,
 134         8.069088733716068462496835658928566920933e-0021L,
 135 };
 136 
 137 /*
 138  * Rational erf(x) = ((float)0.84506291151) + P1(x-1)/Q1(x-1) on [0.84375,1.25]
 139  */
 140 static const long double C1 = (long double)((float)0.84506291151);
 141 static const long double P1[] = {       /*  12 top coeffs */
 142         -2.362118560752659955654364917390741930316e-0003L,
 143         4.129623379624420034078926610650759979146e-0001L,
 144         -3.973857505403547283109417923182669976904e-0002L,
 145         4.357503184084022439763567513078036755183e-0002L,
 146         8.015593623388421371247676683754171456950e-0002L,
 147         -1.034459310403352486685467221776778474602e-0002L,
 148         5.671850295381046679675355719017720821383e-0003L,
 149         1.219262563232763998351452194968781174318e-0003L,
 150         5.390833481581033423020320734201065475098e-0004L,
 151         -1.978853912815115495053119023517805528300e-0004L,
 152         6.184234513953600118335017885706420552487e-0005L,
 153         -5.331802711697810861017518515816271808286e-0006L,
 154 };
 155 
 156 static const long double Q1[] = { /* 12 bottom coeffs with leading 1.0 hidden */
 157         9.081506296064882195280178373107623196655e-0001L,
 158         6.821049531968204097604392183650687642520e-0001L,
 159         4.067869178233539502315055970743271822838e-0001L,
 160         1.702332233546316765818144723063881095577e-0001L,
 161         7.498098377690553934266423088708614219356e-0002L,
 162         2.050154396918178697056927234366372760310e-0002L,
 163         7.012988534031999899054782333851905939379e-0003L,
 164         1.149904787014400354649843451234570731076e-0003L,
 165         3.185620255011299476196039491205159718620e-0004L,
 166         1.273405072153008775426376193374105840517e-0005L,
 167         4.753866999959432971956781228148402971454e-0006L,
 168         -1.002287602111660026053981728549540200683e-0006L,
 169 };
 170 
 171 /*
 172  * Rational erf(x) = ((float)0.95478588343) + P2(x-1.5)/Q2(x-1.5)
 173  * on [1.25,1.75]
 174  */
 175 static const long double C2 = (long double)((float)0.95478588343);
 176 static const long double P2[] = {       /*  12 top coeffs */
 177         1.131926304864446730135126164594785863512e-0002L,
 178         1.273617996967754151544330055186210322832e-0001L,
 179         -8.169980734667512519897816907190281143423e-0002L,
 180         9.512267486090321197833634271787944271746e-0002L,
 181         -2.394251569804872160005274999735914368170e-0002L,
 182         1.108768660227528667525252333184520222905e-0002L,
 183         3.527435492933902414662043314373277494221e-0004L,
 184         4.946116273341953463584319006669474625971e-0004L,
 185         -4.289851942513144714600285769022420962418e-0005L,
 186         8.304719841341952705874781636002085119978e-0005L,
 187         -1.040460226177309338781902252282849903189e-0005L,
 188         2.122913331584921470381327583672044434087e-0006L,
 189 };
 190 
 191 static const long double Q2[] = { /* 13 bottom coeffs with leading 1.0 hidden */
 192         7.448815737306992749168727691042003832150e-0001L,
 193         7.161813850236008294484744312430122188043e-0001L,
 194         3.603134756584225766144922727405641236121e-0001L,
 195         1.955811609133766478080550795194535852653e-0001L,
 196         7.253059963716225972479693813787810711233e-0002L,
 197         2.752391253757421424212770221541238324978e-0002L,
 198         7.677654852085240257439050673446546828005e-0003L,
 199         2.141102244555509687346497060326630061069e-0003L,
 200         4.342123013830957093949563339130674364271e-0004L,
 201         8.664587895570043348530991997272212150316e-0005L,
 202         1.109201582511752087060167429397033701988e-0005L,
 203         1.357834375781831062713347000030984364311e-0006L,
 204         4.957746280594384997273090385060680016451e-0008L,
 205 };
 206 
 207 /*
 208  * erfc(x) = exp(-x*x)/x * R1(1/x)/S1(1/x) on [1.75, 16/3]
 209  */
 210 static const long double R1[] = {       /*  14 top coeffs */
 211         4.630195122654315016370705767621550602948e+0006L,
 212         1.257949521746494830700654204488675713628e+0007L,
 213         1.704153822720260272814743497376181625707e+0007L,
 214         1.502600568706061872381577539537315739943e+0007L,
 215         9.543710793431995284827024445387333922861e+0006L,
 216         4.589344808584091011652238164935949522427e+0006L,
 217         1.714660662941745791190907071920671844289e+0006L,
 218         5.034802147768798894307672256192466283867e+0005L,
 219         1.162286400443554670553152110447126850725e+0005L,
 220         2.086643834548901681362757308058660399137e+0004L,
 221         2.839793161868140305907004392890348777338e+0003L,
 222         2.786687241658423601778258694498655680778e+0002L,
 223         1.779177837102695602425897452623985786464e+0001L,
 224         5.641895835477470769043614623819144434731e-0001L,
 225 };
 226 
 227 static const long double S1[] = { /* 15 bottom coeffs with leading 1.0 hidden */
 228         4.630195122654331529595606896287596843110e+0006L,
 229         1.780411093345512024324781084220509055058e+0007L,
 230         3.250113097051800703707108623715776848283e+0007L,
 231         3.737857099176755050912193712123489115755e+0007L,
 232         3.029787497516578821459174055870781168593e+0007L,
 233         1.833850619965384765005769632103205777227e+0007L,
 234         8.562719999736915722210391222639186586498e+0006L,
 235         3.139684562074658971315545539760008136973e+0006L,
 236         9.106421313731384880027703627454366930945e+0005L,
 237         2.085108342384266508613267136003194920001e+0005L,
 238         3.723126272693120340730491416449539290600e+0004L,
 239         5.049169878567344046145695360784436929802e+0003L,
 240         4.944274532748010767670150730035392093899e+0002L,
 241         3.153510608818213929982940249162268971412e+0001L,
 242         1.0e00L,
 243 };
 244 
 245 /*
 246  * erfc(x) = exp(-x*x)/x * R2(1/x)/S2(1/x) on [16/3, 107]
 247  */
 248 static const long double R2[] = { /*  15 top coeffs in reverse order!! */
 249         2.447288012254302966796326587537136931669e+0005L,
 250         8.768592567189861896653369912716538739016e+0005L,
 251         1.552293152581780065761497908005779524953e+0006L,
 252         1.792075924835942935864231657504259926729e+0006L,
 253         1.504001463155897344947500222052694835875e+0006L,
 254         9.699485556326891411801230186016013019935e+0005L,
 255         4.961449933661807969863435013364796037700e+0005L,
 256         2.048726544693474028061176764716228273791e+0005L,
 257         6.891532964330949722479061090551896886635e+0004L,
 258         1.888014709010307507771964047905823237985e+0004L,
 259         4.189692064988957745054734809642495644502e+0003L,
 260         7.362346487427048068212968889642741734621e+0002L,
 261         9.980359714211411423007641056580813116207e+0001L,
 262         9.426910895135379181107191962193485174159e+0000L,
 263         5.641895835477562869480794515623601280429e-0001L,
 264 };
 265 
 266 static const long double S2[] = {       /* 16 coefficients */
 267         2.447282203601902971246004716790604686880e+0005L,
 268         1.153009852759385309367759460934808489833e+0006L,
 269         2.608580649612639131548966265078663384849e+0006L,
 270         3.766673917346623308850202792390569025740e+0006L,
 271         3.890566255138383910789924920541335370691e+0006L,
 272         3.052882073900746207613166259994150527732e+0006L,
 273         1.885574519970380988460241047248519418407e+0006L,
 274         9.369722034759943185851450846811445012922e+0005L,
 275         3.792278350536686111444869752624492443659e+0005L,
 276         1.257750606950115799965366001773094058720e+0005L,
 277         3.410830600242369370645608634643620355058e+0004L,
 278         7.513984469742343134851326863175067271240e+0003L,
 279         1.313296320593190002554779998138695507840e+0003L,
 280         1.773972700887629157006326333696896516769e+0002L,
 281         1.670876451822586800422009013880457094162e+0001L,
 282         1.000L,
 283 };
 284 
 285 long double
 286 erfl(long double x)
 287 {
 288         long double s, y, t;
 289 
 290         if (!finitel(x)) {
 291                 if (x != x)
 292                         return (x + x);         /* NaN */
 293 
 294                 return (copysignl(one, x));     /* return +-1.0 is x=Inf */
 295         }
 296 
 297         y = fabsl(x);
 298 
 299         if (y <= 0.84375L) {
 300                 if (y <= tiny)
 301                         return (x + P[0] * x);
 302 
 303                 s = y * y;
 304                 t = __poly_libmq(s, 21, P);
 305                 return (x + x * t);
 306         }
 307 
 308         if (y <= 1.25L) {
 309                 s = y - one;
 310                 t = C1 + __poly_libmq(s, 12, P1) / (one + s * __poly_libmq(s,
 311                     12, Q1));
 312                 return ((signbitl(x)) ? -t : t);
 313         } else if (y <= 1.75L) {
 314                 s = y - onehalf;
 315                 t = C2 + __poly_libmq(s, 12, P2) / (one + s * __poly_libmq(s,
 316                     13, Q2));
 317                 return ((signbitl(x)) ? -t : t);
 318         }
 319 
 320         if (y <= 9.0L)
 321                 t = erfcl(y);
 322         else
 323                 t = tiny;
 324 
 325         return ((signbitl(x)) ? t - one : one - t);
 326 }
 327 
 328 long double
 329 erfcl(long double x)
 330 {
 331         long double s, y, t;
 332 
 333         if (!finitel(x)) {
 334                 if (x != x)
 335                         return (x + x);         /* NaN */
 336 
 337                 /* return 2.0 if x= -inf; 0.0 if x= +inf */
 338                 if (x < 0.0L)
 339                         return (2.0L);
 340                 else
 341                         return (0.0L);
 342         }
 343 
 344         if (x <= 0.84375L) {
 345                 if (x <= 0.25)
 346                         return (one - erfl(x));
 347 
 348                 s = x * x;
 349                 t = half - x;
 350                 t = t - x * __poly_libmq(s, 21, P);
 351                 return (half + t);
 352         }
 353 
 354         if (x <= 1.25L) {
 355                 s = x - one;
 356                 t = one - C1;
 357                 return (t - __poly_libmq(s, 12, P1) / (one + s * __poly_libmq(s,
 358                     12, Q1)));
 359         } else if (x <= 1.75L) {
 360                 s = x - onehalf;
 361                 t = one - C2;
 362                 return (t - __poly_libmq(s, 12, P2) / (one + s * __poly_libmq(s,
 363                     13, Q2)));
 364         }
 365 
 366         if (x >= 107.0L) {
 367                 return (nearunfl * nearunfl);   /* underflow */
 368         } else if (x >= L16_3) {
 369                 y = __poly_libmq(x, 15, R2);
 370                 t = y / __poly_libmq(x, 16, S2);
 371         } else {
 372                 y = __poly_libmq(x, 14, R1);
 373                 t = y / __poly_libmq(x, 15, S1);
 374         }
 375 
 376         /*
 377          * Note that exp(-x*x+d) = exp(-x*x)*exp(d), so to compute
 378          * exp(-x*x) with a small relative error, we need to compute
 379          * -x*x with a small absolute error.  To this end, we set y
 380          * equal to the leading part of x but with enough trailing
 381          * zeros that y*y can be computed exactly and we rewrite x*x
 382          * as y*y + (x-y)*(x+y), distributing the latter expression
 383          * across the exponential.
 384          *
 385          * We could construct y in a portable way by setting
 386          *
 387          *   int i = (int)(x * ptwo);
 388          *   y = (long double)i * 1/ptwo;
 389          *
 390          * where ptwo is some power of two large enough to make x-y
 391          * small but not so large that the conversion to int overflows.
 392          * When long double arithmetic is slow, however, the following
 393          * non-portable code is preferable.
 394          */
 395         y = x;
 396         *(2 + (int *)&y) = *(3 + (int *)&y) = 0;
 397         t *= expl(-y * y) * expl(-(x - y) * (x + y));
 398         return (t);
 399 }