1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 26 /* 27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 /* 32 * long double function erf,erfc (long double x) 33 * K.C. Ng, September, 1989. 34 * x 35 * 2 |\ 36 * erf(x) = --------- | exp(-t*t)dt 37 * sqrt(pi) \| 38 * 0 39 * 40 * erfc(x) = 1-erf(x) 41 * 42 * method: 43 * Since erf(-x) = -erf(x), we assume x>=0. 44 * For x near 0, we have the expansion 45 * 46 * erf(x) = (2/sqrt(pi))*(x - x^3/3 + x^5/10 - x^7/42 + ....). 47 * 48 * Since 2/sqrt(pi) = 1.128379167095512573896158903121545171688, 49 * we use x + x*P(x^2) to approximate erf(x). This formula will 50 * guarantee the error less than one ulp where x is not too far 51 * away from 0. We note that erf(x)=x at x = 0.6174...... After 52 * some experiment, we choose the following approximation on 53 * interval [0,0.84375]. 54 * 55 * For x in [0,0.84375] 56 * 2 2 4 40 57 * P = P(x ) = (p0 + p1 * x + p2 * x + ... + p20 * x ) 58 * 59 * erf(x) = x + x*P 60 * erfc(x) = 1 - erf(x) if x<=0.25 61 * = 0.5 + ((0.5-x)-x*P) if x in [0.25,0.84375] 62 * precision: |P(x^2)-(erf(x)-x)/x| <= 2**-122.50 63 * 64 * For x in [0.84375,1.25], let s = x - 1, and 65 * c = 0.84506291151 rounded to single (24 bits) 66 * erf(x) = c + P1(s)/Q1(s) 67 * erfc(x) = (1-c) - P1(s)/Q1(s) 68 * precision: |P1/Q1 - (erf(x)-c)| <= 2**-118.41 69 * 70 * 71 * For x in [1.25,1.75], let s = x - 1.5, and 72 * c = 0.95478588343 rounded to single (24 bits) 73 * erf(x) = c + P2(s)/Q2(s) 74 * erfc(x) = (1-c) - P2(s)/Q2(s) 75 * precision: |P1/Q1 - (erf(x)-c)| <= 2**-123.83 76 * 77 * 78 * For x in [1.75,16/3] 79 * erfc(x) = exp(-x*x)*(1/x)*R1(1/x)/S1(1/x) 80 * erf(x) = 1 - erfc(x) 81 * precision: absolute error of R1/S1 is bounded by 2**-124.03 82 * 83 * For x in [16/3,107] 84 * erfc(x) = exp(-x*x)*(1/x)*R2(1/x)/S2(1/x) 85 * erf(x) = 1 - erfc(x) (if x>=9 simple return erf(x)=1 with inexact) 86 * precision: absolute error of R2/S2 is bounded by 2**-120.07 87 * 88 * Else if inf > x >= 107 89 * erf(x) = 1 with inexact 90 * erfc(x) = 0 with underflow 91 * 92 * Special case: 93 * erf(inf) = 1 94 * erfc(inf) = 0 95 */ 96 97 #pragma weak __erfl = erfl 98 #pragma weak __erfcl = erfcl 99 100 #include "libm.h" 101 #include "longdouble.h" 102 103 static const long double tiny = 1e-40L, 104 nearunfl = 1e-4000L, 105 half = 0.5L, 106 one = 1.0L, 107 onehalf = 1.5L, 108 L16_3 = 16.0L / 3.0L; 109 110 /* 111 * Coefficients for even polynomial P for erf(x)=x+x*P(x^2) on [0,0.84375] 112 */ 113 static const long double P[] = { /* 21 coeffs */ 114 1.283791670955125738961589031215451715556e-0001L, 115 -3.761263890318375246320529677071815594603e-0001L, 116 1.128379167095512573896158903121205899135e-0001L, 117 -2.686617064513125175943235483344625046092e-0002L, 118 5.223977625442187842111846652980454568389e-0003L, 119 -8.548327023450852832546626271083862724358e-0004L, 120 1.205533298178966425102164715902231976672e-0004L, 121 -1.492565035840625097674944905027897838996e-0005L, 122 1.646211436588924733604648849172936692024e-0006L, 123 -1.636584469123491976815834704799733514987e-0007L, 124 1.480719281587897445302529007144770739305e-0008L, 125 -1.229055530170782843046467986464722047175e-0009L, 126 9.422759064320307357553954945760654341633e-0011L, 127 -6.711366846653439036162105104991433380926e-0012L, 128 4.463224090341893165100275380693843116240e-0013L, 129 -2.783513452582658245422635662559779162312e-0014L, 130 1.634227412586960195251346878863754661546e-0015L, 131 -9.060782672889577722765711455623117802795e-0017L, 132 4.741341801266246873412159213893613602354e-0018L, 133 -2.272417596497826188374846636534317381203e-0019L, 134 8.069088733716068462496835658928566920933e-0021L, 135 }; 136 137 /* 138 * Rational erf(x) = ((float)0.84506291151) + P1(x-1)/Q1(x-1) on [0.84375,1.25] 139 */ 140 static const long double C1 = (long double)((float)0.84506291151); 141 static const long double P1[] = { /* 12 top coeffs */ 142 -2.362118560752659955654364917390741930316e-0003L, 143 4.129623379624420034078926610650759979146e-0001L, 144 -3.973857505403547283109417923182669976904e-0002L, 145 4.357503184084022439763567513078036755183e-0002L, 146 8.015593623388421371247676683754171456950e-0002L, 147 -1.034459310403352486685467221776778474602e-0002L, 148 5.671850295381046679675355719017720821383e-0003L, 149 1.219262563232763998351452194968781174318e-0003L, 150 5.390833481581033423020320734201065475098e-0004L, 151 -1.978853912815115495053119023517805528300e-0004L, 152 6.184234513953600118335017885706420552487e-0005L, 153 -5.331802711697810861017518515816271808286e-0006L, 154 }; 155 156 static const long double Q1[] = { /* 12 bottom coeffs with leading 1.0 hidden */ 157 9.081506296064882195280178373107623196655e-0001L, 158 6.821049531968204097604392183650687642520e-0001L, 159 4.067869178233539502315055970743271822838e-0001L, 160 1.702332233546316765818144723063881095577e-0001L, 161 7.498098377690553934266423088708614219356e-0002L, 162 2.050154396918178697056927234366372760310e-0002L, 163 7.012988534031999899054782333851905939379e-0003L, 164 1.149904787014400354649843451234570731076e-0003L, 165 3.185620255011299476196039491205159718620e-0004L, 166 1.273405072153008775426376193374105840517e-0005L, 167 4.753866999959432971956781228148402971454e-0006L, 168 -1.002287602111660026053981728549540200683e-0006L, 169 }; 170 171 /* 172 * Rational erf(x) = ((float)0.95478588343) + P2(x-1.5)/Q2(x-1.5) 173 * on [1.25,1.75] 174 */ 175 static const long double C2 = (long double)((float)0.95478588343); 176 static const long double P2[] = { /* 12 top coeffs */ 177 1.131926304864446730135126164594785863512e-0002L, 178 1.273617996967754151544330055186210322832e-0001L, 179 -8.169980734667512519897816907190281143423e-0002L, 180 9.512267486090321197833634271787944271746e-0002L, 181 -2.394251569804872160005274999735914368170e-0002L, 182 1.108768660227528667525252333184520222905e-0002L, 183 3.527435492933902414662043314373277494221e-0004L, 184 4.946116273341953463584319006669474625971e-0004L, 185 -4.289851942513144714600285769022420962418e-0005L, 186 8.304719841341952705874781636002085119978e-0005L, 187 -1.040460226177309338781902252282849903189e-0005L, 188 2.122913331584921470381327583672044434087e-0006L, 189 }; 190 191 static const long double Q2[] = { /* 13 bottom coeffs with leading 1.0 hidden */ 192 7.448815737306992749168727691042003832150e-0001L, 193 7.161813850236008294484744312430122188043e-0001L, 194 3.603134756584225766144922727405641236121e-0001L, 195 1.955811609133766478080550795194535852653e-0001L, 196 7.253059963716225972479693813787810711233e-0002L, 197 2.752391253757421424212770221541238324978e-0002L, 198 7.677654852085240257439050673446546828005e-0003L, 199 2.141102244555509687346497060326630061069e-0003L, 200 4.342123013830957093949563339130674364271e-0004L, 201 8.664587895570043348530991997272212150316e-0005L, 202 1.109201582511752087060167429397033701988e-0005L, 203 1.357834375781831062713347000030984364311e-0006L, 204 4.957746280594384997273090385060680016451e-0008L, 205 }; 206 207 /* 208 * erfc(x) = exp(-x*x)/x * R1(1/x)/S1(1/x) on [1.75, 16/3] 209 */ 210 static const long double R1[] = { /* 14 top coeffs */ 211 4.630195122654315016370705767621550602948e+0006L, 212 1.257949521746494830700654204488675713628e+0007L, 213 1.704153822720260272814743497376181625707e+0007L, 214 1.502600568706061872381577539537315739943e+0007L, 215 9.543710793431995284827024445387333922861e+0006L, 216 4.589344808584091011652238164935949522427e+0006L, 217 1.714660662941745791190907071920671844289e+0006L, 218 5.034802147768798894307672256192466283867e+0005L, 219 1.162286400443554670553152110447126850725e+0005L, 220 2.086643834548901681362757308058660399137e+0004L, 221 2.839793161868140305907004392890348777338e+0003L, 222 2.786687241658423601778258694498655680778e+0002L, 223 1.779177837102695602425897452623985786464e+0001L, 224 5.641895835477470769043614623819144434731e-0001L, 225 }; 226 227 static const long double S1[] = { /* 15 bottom coeffs with leading 1.0 hidden */ 228 4.630195122654331529595606896287596843110e+0006L, 229 1.780411093345512024324781084220509055058e+0007L, 230 3.250113097051800703707108623715776848283e+0007L, 231 3.737857099176755050912193712123489115755e+0007L, 232 3.029787497516578821459174055870781168593e+0007L, 233 1.833850619965384765005769632103205777227e+0007L, 234 8.562719999736915722210391222639186586498e+0006L, 235 3.139684562074658971315545539760008136973e+0006L, 236 9.106421313731384880027703627454366930945e+0005L, 237 2.085108342384266508613267136003194920001e+0005L, 238 3.723126272693120340730491416449539290600e+0004L, 239 5.049169878567344046145695360784436929802e+0003L, 240 4.944274532748010767670150730035392093899e+0002L, 241 3.153510608818213929982940249162268971412e+0001L, 242 1.0e00L, 243 }; 244 245 /* 246 * erfc(x) = exp(-x*x)/x * R2(1/x)/S2(1/x) on [16/3, 107] 247 */ 248 static const long double R2[] = { /* 15 top coeffs in reverse order!! */ 249 2.447288012254302966796326587537136931669e+0005L, 250 8.768592567189861896653369912716538739016e+0005L, 251 1.552293152581780065761497908005779524953e+0006L, 252 1.792075924835942935864231657504259926729e+0006L, 253 1.504001463155897344947500222052694835875e+0006L, 254 9.699485556326891411801230186016013019935e+0005L, 255 4.961449933661807969863435013364796037700e+0005L, 256 2.048726544693474028061176764716228273791e+0005L, 257 6.891532964330949722479061090551896886635e+0004L, 258 1.888014709010307507771964047905823237985e+0004L, 259 4.189692064988957745054734809642495644502e+0003L, 260 7.362346487427048068212968889642741734621e+0002L, 261 9.980359714211411423007641056580813116207e+0001L, 262 9.426910895135379181107191962193485174159e+0000L, 263 5.641895835477562869480794515623601280429e-0001L, 264 }; 265 266 static const long double S2[] = { /* 16 coefficients */ 267 2.447282203601902971246004716790604686880e+0005L, 268 1.153009852759385309367759460934808489833e+0006L, 269 2.608580649612639131548966265078663384849e+0006L, 270 3.766673917346623308850202792390569025740e+0006L, 271 3.890566255138383910789924920541335370691e+0006L, 272 3.052882073900746207613166259994150527732e+0006L, 273 1.885574519970380988460241047248519418407e+0006L, 274 9.369722034759943185851450846811445012922e+0005L, 275 3.792278350536686111444869752624492443659e+0005L, 276 1.257750606950115799965366001773094058720e+0005L, 277 3.410830600242369370645608634643620355058e+0004L, 278 7.513984469742343134851326863175067271240e+0003L, 279 1.313296320593190002554779998138695507840e+0003L, 280 1.773972700887629157006326333696896516769e+0002L, 281 1.670876451822586800422009013880457094162e+0001L, 282 1.000L, 283 }; 284 285 long double 286 erfl(long double x) 287 { 288 long double s, y, t; 289 290 if (!finitel(x)) { 291 if (x != x) 292 return (x + x); /* NaN */ 293 294 return (copysignl(one, x)); /* return +-1.0 is x=Inf */ 295 } 296 297 y = fabsl(x); 298 299 if (y <= 0.84375L) { 300 if (y <= tiny) 301 return (x + P[0] * x); 302 303 s = y * y; 304 t = __poly_libmq(s, 21, P); 305 return (x + x * t); 306 } 307 308 if (y <= 1.25L) { 309 s = y - one; 310 t = C1 + __poly_libmq(s, 12, P1) / (one + s * __poly_libmq(s, 311 12, Q1)); 312 return ((signbitl(x)) ? -t : t); 313 } else if (y <= 1.75L) { 314 s = y - onehalf; 315 t = C2 + __poly_libmq(s, 12, P2) / (one + s * __poly_libmq(s, 316 13, Q2)); 317 return ((signbitl(x)) ? -t : t); 318 } 319 320 if (y <= 9.0L) 321 t = erfcl(y); 322 else 323 t = tiny; 324 325 return ((signbitl(x)) ? t - one : one - t); 326 } 327 328 long double 329 erfcl(long double x) 330 { 331 long double s, y, t; 332 333 if (!finitel(x)) { 334 if (x != x) 335 return (x + x); /* NaN */ 336 337 /* return 2.0 if x= -inf; 0.0 if x= +inf */ 338 if (x < 0.0L) 339 return (2.0L); 340 else 341 return (0.0L); 342 } 343 344 if (x <= 0.84375L) { 345 if (x <= 0.25) 346 return (one - erfl(x)); 347 348 s = x * x; 349 t = half - x; 350 t = t - x * __poly_libmq(s, 21, P); 351 return (half + t); 352 } 353 354 if (x <= 1.25L) { 355 s = x - one; 356 t = one - C1; 357 return (t - __poly_libmq(s, 12, P1) / (one + s * __poly_libmq(s, 358 12, Q1))); 359 } else if (x <= 1.75L) { 360 s = x - onehalf; 361 t = one - C2; 362 return (t - __poly_libmq(s, 12, P2) / (one + s * __poly_libmq(s, 363 13, Q2))); 364 } 365 366 if (x >= 107.0L) { 367 return (nearunfl * nearunfl); /* underflow */ 368 } else if (x >= L16_3) { 369 y = __poly_libmq(x, 15, R2); 370 t = y / __poly_libmq(x, 16, S2); 371 } else { 372 y = __poly_libmq(x, 14, R1); 373 t = y / __poly_libmq(x, 15, S1); 374 } 375 376 /* 377 * Note that exp(-x*x+d) = exp(-x*x)*exp(d), so to compute 378 * exp(-x*x) with a small relative error, we need to compute 379 * -x*x with a small absolute error. To this end, we set y 380 * equal to the leading part of x but with enough trailing 381 * zeros that y*y can be computed exactly and we rewrite x*x 382 * as y*y + (x-y)*(x+y), distributing the latter expression 383 * across the exponential. 384 * 385 * We could construct y in a portable way by setting 386 * 387 * int i = (int)(x * ptwo); 388 * y = (long double)i * 1/ptwo; 389 * 390 * where ptwo is some power of two large enough to make x-y 391 * small but not so large that the conversion to int overflows. 392 * When long double arithmetic is slow, however, the following 393 * non-portable code is preferable. 394 */ 395 y = x; 396 *(2 + (int *)&y) = *(3 + (int *)&y) = 0; 397 t *= expl(-y * y) * expl(-(x - y) * (x + y)); 398 return (t); 399 }