1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 26 /* 27 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 #include "libm.h" /* __k_clog_r */ 32 #include "complex_wrapper.h" 33 34 35 /* 36 * double __k_clog_r(double x, double y, double *e); 37 * 38 * Compute real part of complex natural logarithm of x+iy in extra precision 39 * 40 * __k_clog_r returns log(hypot(x, y)) with a correction term e. 41 * 42 * Accuracy: 70 bits 43 * 44 * Method. 45 * Let Z = x*x + y*y. Z can be normalized as Z = 2^N * z, 1 <= z < 2. 46 * We further break down z into 1 + zk + zh + zt, where 47 * zk = K*(2^-7) matches z to 7.5 significant bits, 0 <= K <= 2^(-7)-1 48 * zh = (z-zk) rounded to 24 bits 49 * zt = (z-zk-zh) rounded. 50 * 51 * z - (1+zk) (zh+zt) 52 * Let s = ------------ = ---------------, then 53 * z + (1+zk) 2(1+zk)+zh+zt 54 * z 55 * log(Z) = N*log2 + log(z) = N*log2 + log(1+zk) + log(------) 56 * 1+zk 57 * 1+s 58 * = N*log2 + log(1+zk) + log(---) 59 * 1-s 60 * 61 * 1 3 1 5 62 * = N*log2 + log(1+zk) + 2s + -- (2s) + -- (2s) + ... 63 * 12 80 64 * 65 * Note 1. For IEEE double precision, a seven degree odd polynomial 66 * 2s + P1*(2s)^3 + P2*(2s)^5 + P3*(2s)^7 67 * is generated by a special remez algorithm to 68 * approx log((1+s)/(1-s)) accurte to 72 bits. 69 * Note 2. 2s can be computed accurately as s2h+s2t by 70 * r = 2/((zh+zt)+2(1+zk)) 71 * s2 = r*(zh+zt) 72 * s2h = s2 rounded to float; v = 0.5*s2h; 73 * s2t = r*((((zh-s2h*(1+zk))-v*zh)+zt)-v*zt) 74 */ 75 76 static const double zero = 0.0, 77 half = 0.5, 78 two = 2.0, 79 two120 = 1.32922799578491587290e+36, /* 2^120 */ 80 ln2_h = 6.93147180369123816490e-01, /* 3fe62e42 fee00000 */ 81 ln2_t = 1.90821492927058770002e-10, /* 3dea39ef 35793c76 */ 82 P1 = .083333333333333351554108717377986202224765262191125, 83 P2 = .01249999999819227552330700574633767185896464873834375, 84 P3 = .0022321938458645656605471559987512516234702284287265625; 85 86 /* 87 * T[2k, 2k+1] = log(1+k*2^-7) for k = 0, ..., 2^7 - 1, 88 * with T[2k] * 2^40 is an int 89 */ 90 static const double TBL_log1k[] = { 91 0.00000000000000000000e+00, 0.00000000000000000000e+00, 92 7.78214044203195953742e-03, 2.29894100462035112076e-14, 93 1.55041865355087793432e-02, 4.56474807636434698847e-13, 94 2.31670592811497044750e-02, 3.84673753843363762372e-13, 95 3.07716586667083902285e-02, 4.52981425779092882775e-14, 96 3.83188643018002039753e-02, 3.36395218465265063278e-13, 97 4.58095360309016541578e-02, 3.92549008891706208826e-13, 98 5.32445145181554835290e-02, 6.56799336898521766515e-13, 99 6.06246218158048577607e-02, 6.29984819938331143924e-13, 100 6.79506619080711971037e-02, 4.36552290856295281946e-13, 101 7.52234212368421140127e-02, 7.45411685916941618656e-13, 102 8.24436692109884461388e-02, 8.61451293608781447223e-14, 103 8.96121586893059429713e-02, 3.81189648692113819551e-13, 104 9.67296264579999842681e-02, 5.51128027471986918274e-13, 105 1.03796793680885457434e-01, 7.58107392301637643358e-13, 106 1.10814366339582193177e-01, 7.07921017612766061755e-13, 107 1.17783035655520507134e-01, 8.62947404296943765415e-13, 108 1.24703478500123310369e-01, 8.33925494898414856118e-13, 109 1.31576357788617315236e-01, 1.01957352237084734958e-13, 110 1.38402322858382831328e-01, 7.36304357708705134617e-13, 111 1.45182009843665582594e-01, 8.32314688404647202319e-13, 112 1.51916042025732167531e-01, 1.09807540998552379211e-13, 113 1.58605030175749561749e-01, 8.89022343972466269900e-13, 114 1.65249572894936136436e-01, 3.71026439894104998399e-13, 115 1.71850256926518341061e-01, 1.40881279371111350341e-13, 116 1.78407657472234859597e-01, 5.83437522462346671423e-13, 117 1.84922338493379356805e-01, 6.32635858668445232946e-13, 118 1.91394852999110298697e-01, 5.19155912393432989209e-13, 119 1.97825743329303804785e-01, 6.16075577558872326221e-13, 120 2.04215541428311553318e-01, 3.79338185766902218086e-13, 121 2.10564769106895255391e-01, 4.54382278998146218219e-13, 122 2.16873938300523150247e-01, 9.12093724991498410553e-14, 123 2.23143551314024080057e-01, 1.85675709597960106615e-13, 124 2.29374101064422575291e-01, 4.23254700234549300166e-13, 125 2.35566071311950508971e-01, 8.16400106820959292914e-13, 126 2.41719936886511277407e-01, 6.33890736899755317832e-13, 127 2.47836163904139539227e-01, 4.41717553713155466566e-13, 128 2.53915209980732470285e-01, 2.30973852175869394892e-13, 129 2.59957524436686071567e-01, 2.39995404842117353465e-13, 130 2.65963548496984003577e-01, 1.53937761744554075681e-13, 131 2.71933715483100968413e-01, 5.40790418614551497411e-13, 132 2.77868451003087102436e-01, 3.69203750820800887027e-13, 133 2.83768173129828937817e-01, 8.15660529536291275782e-13, 134 2.89633292582948342897e-01, 9.43339818951269030846e-14, 135 2.95464212893421063200e-01, 4.14813187042585679830e-13, 136 3.01261330577290209476e-01, 8.71571536970835103739e-13, 137 3.07025035294827830512e-01, 8.40315630479242455758e-14, 138 3.12755710003330023028e-01, 5.66865358290073900922e-13, 139 3.18453731118097493891e-01, 4.37121919574291444278e-13, 140 3.24119468653407238889e-01, 8.04737201185162774515e-13, 141 3.29753286371669673827e-01, 7.98307987877335024112e-13, 142 3.35355541920762334485e-01, 3.75495772572598557174e-13, 143 3.40926586970454081893e-01, 1.39128412121975659358e-13, 144 3.46466767346100823488e-01, 1.07757430375726404546e-13, 145 3.51976423156884266064e-01, 2.93918591876480007730e-13, 146 3.57455888921322184615e-01, 4.81589611172320539489e-13, 147 3.62905493689140712377e-01, 2.27740761140395561986e-13, 148 3.68325561158599157352e-01, 1.08495696229679121506e-13, 149 3.73716409792905324139e-01, 6.78756682315870616582e-13, 150 3.79078352934811846353e-01, 1.57612037739694350287e-13, 151 3.84411698910298582632e-01, 3.34571026954408237380e-14, 152 3.89716751139530970249e-01, 4.94243121138567024911e-13, 153 3.94993808240542421117e-01, 3.26556988969071456956e-13, 154 4.00243164126550254878e-01, 4.62452051668403792833e-13, 155 4.05465108107819105498e-01, 3.45276479520397708744e-13, 156 4.10659924984429380856e-01, 8.39005077851830734139e-13, 157 4.15827895143593195826e-01, 1.17769787513692141889e-13, 158 4.20969294643327884842e-01, 8.01751287156832458079e-13, 159 4.26084395310681429692e-01, 2.18633432932159103190e-13, 160 4.31173464818130014464e-01, 2.41326394913331314894e-13, 161 4.36236766774527495727e-01, 3.90574622098307022265e-13, 162 4.41274560804231441580e-01, 6.43787909737320689684e-13, 163 4.46287102628048160113e-01, 3.71351419195920213229e-13, 164 4.51274644138720759656e-01, 7.37825488412103968058e-13, 165 4.56237433480964682531e-01, 6.22911850193784704748e-13, 166 4.61175715121498797089e-01, 6.71369279138460114513e-13, 167 4.66089729924533457961e-01, 6.57665976858006147528e-14, 168 4.70979715218163619284e-01, 6.27393263311115598424e-13, 169 4.75845904869856894948e-01, 1.07019317621142549209e-13, 170 4.80688529345570714213e-01, 1.81193463664411114729e-13, 171 4.85507815781602403149e-01, 9.84046527823262695501e-14, 172 4.90303988044615834951e-01, 5.78003198945402769376e-13, 173 4.95077266797125048470e-01, 7.26466128212511528295e-13, 174 4.99827869555701909121e-01, 7.47420700205478712293e-13, 175 5.04556010751912253909e-01, 4.83033149495532022300e-13, 176 5.09261901789614057634e-01, 1.93889170049107088943e-13, 177 5.13945751101346104406e-01, 8.88212395185718544720e-13, 178 5.18607764207445143256e-01, 6.00488896640545761201e-13, 179 5.23248143764249107335e-01, 2.98729182044413286731e-13, 180 5.27867089620485785417e-01, 3.56599696633478298092e-13, 181 5.32464798869114019908e-01, 3.57823965912763837621e-13, 182 5.37041465896436420735e-01, 4.47233831757482468946e-13, 183 5.41597282432121573947e-01, 6.22797629172251525649e-13, 184 5.46132437597407260910e-01, 7.28389472720657362987e-13, 185 5.50647117952394182794e-01, 2.68096466152116723636e-13, 186 5.55141507539701706264e-01, 7.99886451312335479470e-13, 187 5.59615787935399566777e-01, 2.31194938380053776320e-14, 188 5.64070138284478161950e-01, 3.24804121719935740729e-13, 189 5.68504735351780254859e-01, 8.88457219261483317716e-13, 190 5.72919753561109246220e-01, 6.76262872317054154667e-13, 191 5.77315365034337446559e-01, 4.86157758891509033842e-13, 192 5.81691739634152327199e-01, 4.70155322075549811780e-13, 193 5.86049045003164792433e-01, 4.13416470738355643357e-13, 194 5.90387446602107957006e-01, 6.84176364159146659095e-14, 195 5.94707107746216934174e-01, 4.75855340044306376333e-13, 196 5.99008189645246602595e-01, 8.36796786747576938145e-13, 197 6.03290851438032404985e-01, 5.18573553063418286042e-14, 198 6.07555250224322662689e-01, 2.19132812293400917731e-13, 199 6.11801541105705837253e-01, 2.87066276408616768331e-13, 200 6.16029877214714360889e-01, 7.99658758518543977451e-13, 201 6.20240409751204424538e-01, 6.53104313776336534177e-13, 202 6.24433288011459808331e-01, 4.33692711555820529733e-13, 203 6.28608659421843185555e-01, 5.30952189118357790115e-13, 204 6.32766669570628437214e-01, 4.09392332186786656392e-13, 205 6.36907462236194987781e-01, 8.74243839148582888557e-13, 206 6.41031179420679109171e-01, 2.52181884568428814231e-13, 207 6.45137961372711288277e-01, 8.73413388168702670246e-13, 208 6.49227946624705509748e-01, 4.04309142530119209805e-13, 209 6.53301272011958644725e-01, 7.86994033233553225797e-13, 210 6.57358072708120744210e-01, 2.39285932153437645135e-13, 211 6.61398482245203922503e-01, 1.61085757539324585156e-13, 212 6.65422632544505177066e-01, 5.85271884362515112697e-13, 213 6.69430653942072240170e-01, 5.57027128793880294600e-13, 214 6.73422675211440946441e-01, 7.25773856816637653180e-13, 215 6.77398823590920073912e-01, 8.86066898134949155668e-13, 216 6.81359224807238206267e-01, 6.64862680714687006264e-13, 217 6.85304003098281100392e-01, 6.38316151706465171657e-13, 218 6.89233281238557538018e-01, 2.51442307283760746611e-13, 219 }; 220 221 /* 222 * Compute N*log2 + log(1+zk+zh+zt) in extra precision 223 */ 224 static double 225 k_log_NKz(int N, int K, double zh, double *zt) 226 { 227 double y, r, w, s2, s2h, s2t, t, zk, v, P; 228 229 ((int *)&zk)[HIWORD] = 0x3ff00000 + (K << 13); 230 ((int *)&zk)[LOWORD] = 0; 231 t = zh + (*zt); 232 r = two / (t + two * zk); 233 s2h = s2 = r * t; 234 ((int *)&s2h)[LOWORD] &= 0xe0000000; 235 v = half * s2h; 236 w = s2 * s2; 237 s2t = r * ((((zh - s2h * zk) - v * zh) + (*zt)) - v * (*zt)); 238 P = s2t + (w * s2) * ((P1 + w * P2) + (w * w) * P3); 239 P += N * ln2_t + TBL_log1k[K + K + 1]; 240 t = N * ln2_h + TBL_log1k[K + K]; 241 y = t + (P + s2h); 242 P -= ((y - t) - s2h); 243 *zt = P; 244 return (y); 245 } 246 247 double 248 __k_clog_r(double x, double y, double *er) 249 { 250 double t1, t2, t3, t4, tk, z, wh, w, zh, zk; 251 int n, k, ix, iy, iz, nx, ny, nz, i, j; 252 unsigned lx, ly; 253 254 ix = (((int *)&x)[HIWORD]) & ~0x80000000; 255 lx = ((unsigned *)&x)[LOWORD]; 256 iy = (((int *)&y)[HIWORD]) & ~0x80000000; 257 ly = ((unsigned *)&y)[LOWORD]; 258 y = fabs(y); 259 x = fabs(x); 260 261 if (ix < iy || (ix == iy && lx < ly)) { /* force x >= y */ 262 tk = x; 263 x = y; 264 y = tk; 265 n = ix, ix = iy; 266 iy = n; 267 n = lx, lx = ly; 268 ly = n; 269 } 270 271 *er = zero; 272 nx = ix >> 20; 273 ny = iy >> 20; 274 275 if (nx >= 0x7ff) { /* x or y is Inf or NaN */ 276 if (ISINF(ix, lx)) 277 return (x); 278 else if (ISINF(iy, ly)) 279 return (y); 280 else 281 return (x + y); 282 } 283 284 /* 285 * for tiny y (double y < 2^-35, extended y < 2^-46, quad y < 2^-70): 286 * log(sqrt(1+y^2)) = (y^2)/2 - (y^4)/8 + ... ~= (y^2)/2 287 */ 288 if ((((ix - 0x3ff00000) | lx) == 0) && ny < (0x3ff - 35)) { 289 t2 = y * y; 290 291 if (ny >= 565) { /* compute er = tail of t2 */ 292 ((int *)&wh)[HIWORD] = iy; 293 ((unsigned *)&wh)[LOWORD] = ly & 0xf8000000; 294 *er = half * ((y - wh) * (y + wh) - (t2 - wh * wh)); 295 } 296 297 return (half * t2); 298 } 299 300 /* 301 * x or y is subnormal or zero 302 */ 303 if (nx == 0) { 304 if ((ix | lx) == 0) { 305 return (-1.0 / x); 306 } else { 307 x *= two120; 308 y *= two120; 309 ix = ((int *)&x)[HIWORD]; 310 lx = ((unsigned *)&x)[LOWORD]; 311 iy = ((int *)&y)[HIWORD]; 312 ly = ((unsigned *)&y)[LOWORD]; 313 nx = (ix >> 20) - 120; 314 ny = (iy >> 20) - 120; 315 316 /* guard subnormal flush to 0 */ 317 if ((ix | lx) == 0) 318 return (-1.0 / x); 319 } 320 } else if (ny == 0) { /* y subnormal, scale it */ 321 y *= two120; 322 iy = ((int *)&y)[HIWORD]; 323 ly = ((unsigned *)&y)[LOWORD]; 324 ny = (iy >> 20) - 120; 325 } 326 327 n = nx - ny; 328 329 /* 330 * return log(x) when y is zero or x >> y so that 331 * log(x) ~ log(sqrt(x*x+y*y)) to 27 extra bits 332 * (n > 62 for double, 78 for i386 extended, 122 for quad) 333 */ 334 if (n > 62 || (iy | ly) == 0) { 335 i = (0x000fffff & ix) | 0x3ff00000; /* normalize x */ 336 ((int *)&x)[HIWORD] = i; 337 i += 0x1000; 338 ((int *)&zk)[HIWORD] = i & 0xffffe000; 339 ((int *)&zk)[LOWORD] = 0; /* zk matches 7.5 bits of x */ 340 z = x - zk; 341 zh = (double)((float)z); 342 i >>= 13; 343 k = i & 0x7f; /* index of zk */ 344 n = nx - 0x3ff; 345 *er = z - zh; 346 347 if (i >> 17) { /* if zk = 2.0, adjust scaling */ 348 n += 1; 349 zh *= 0.5; 350 *er *= 0.5; 351 } 352 353 w = k_log_NKz(n, k, zh, er); 354 } else { 355 /* 356 * compute z = x*x + y*y 357 */ 358 ix = (ix & 0xfffff) | 0x3ff00000; 359 iy = (iy & 0xfffff) | (0x3ff00000 - (n << 20)); 360 ((int *)&x)[HIWORD] = ix; 361 ((int *)&y)[HIWORD] = iy; 362 t1 = x * x; 363 t2 = y * y; 364 j = ((lx >> 26) + 1) >> 1; 365 ((int *)&wh)[HIWORD] = ix + (j >> 5); 366 ((unsigned *)&wh)[LOWORD] = (j << 27); 367 z = t1 + t2; 368 369 /* 370 * higher precision simulation x*x = t1 + t3, y*y = t2 + t4 371 */ 372 tk = wh - x; 373 t3 = tk * tk - (two * wh * tk - (wh * wh - t1)); 374 j = ((ly >> 26) + 1) >> 1; 375 ((int *)&wh)[HIWORD] = iy + (j >> 5); 376 ((unsigned *)&wh)[LOWORD] = (j << 27); 377 tk = wh - y; 378 t4 = tk * tk - (two * wh * tk - (wh * wh - t2)); 379 380 /* 381 * find zk matches z to 7.5 bits 382 */ 383 nx -= 0x3ff; 384 iz = ((int *)&z)[HIWORD] + 0x1000; 385 k = (iz >> 13) & 0x7f; 386 nz = (iz >> 20) - 0x3ff; 387 ((int *)&zk)[HIWORD] = iz & 0xffffe000; 388 ((int *)&zk)[LOWORD] = 0; 389 390 /* 391 * order t1,t2,t3,t4 according to their size 392 */ 393 if (t2 >= fabs(t3)) { 394 if (fabs(t3) < fabs(t4)) { 395 wh = t3; 396 t3 = t4; 397 t4 = wh; 398 } 399 } else { 400 wh = t2; 401 t2 = t3; 402 t3 = wh; 403 } 404 405 /* 406 * higher precision simulation: x * x + y * y = t1 + t2 + t3 + t4 407 * = zk (7 bits) + zh (24 bits) + *er (tail) and call k_log_NKz 408 */ 409 tk = t1 - zk; 410 zh = ((tk + t2) + t3) + t4; 411 ((int *)&zh)[LOWORD] &= 0xe0000000; 412 w = fabs(zh); 413 414 if (w >= fabs(t2)) { 415 *er = (((tk - zh) + t2) + t3) + t4; 416 } else { 417 if (n == 0) { 418 wh = half * zk; 419 wh = (t1 - wh) - (wh - t2); 420 } else { 421 wh = tk + t2; 422 } 423 424 if (w >= fabs(t3)) { 425 *er = ((wh - zh) + t3) + t4; 426 } else { 427 z = t3; 428 t3 += t4; 429 t4 -= t3 - z; 430 431 if (w >= fabs(t3)) 432 *er = ((wh - zh) + t3) + t4; 433 else 434 *er = ((wh + t3) - zh) + t4; 435 } 436 } 437 438 if (nz == 3) { 439 zh *= 0.125; 440 *er *= 0.125; 441 } 442 443 if (nz == 2) { 444 zh *= 0.25; 445 *er *= 0.25; 446 } 447 448 if (nz == 1) { 449 zh *= half; 450 *er *= half; 451 } 452 453 nz += nx + nx; 454 w = half * k_log_NKz(nz, k, zh, er); 455 *er *= half; 456 } 457 458 return (w); 459 }