1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 26 /* 27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 #pragma weak __sqrtl = sqrtl 32 33 #include "libm.h" 34 #include "longdouble.h" 35 36 extern int __swapTE(int); 37 extern int __swapEX(int); 38 extern enum fp_direction_type __swapRD(enum fp_direction_type); 39 40 /* 41 * in struct longdouble, msw consists of 42 * unsigned short sgn:1; 43 * unsigned short exp:15; 44 * unsigned short frac1:16; 45 */ 46 47 #ifdef __LITTLE_ENDIAN 48 /* array indices used to access words within a double */ 49 #define HIWORD 1 50 #define LOWORD 0 51 52 /* structure used to access words within a quad */ 53 union longdouble { 54 struct { 55 unsigned int frac4; 56 unsigned int frac3; 57 unsigned int frac2; 58 unsigned int msw; 59 } l; 60 long double d; 61 }; 62 63 /* default NaN returned for sqrt(neg) */ 64 static const union longdouble qnan = { 65 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff 66 }; 67 68 /* signalling NaN used to raise invalid */ 69 static const union { 70 unsigned u[2]; 71 double d; 72 } snan = { 0, 0x7ff00001 }; 73 #else 74 /* array indices used to access words within a double */ 75 #define HIWORD 0 76 #define LOWORD 1 77 78 /* structure used to access words within a quad */ 79 union longdouble { 80 struct { 81 unsigned int msw; 82 unsigned int frac2; 83 unsigned int frac3; 84 unsigned int frac4; 85 } l; 86 long double d; 87 }; 88 89 /* default NaN returned for sqrt(neg) */ 90 static const union longdouble qnan = { 91 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff 92 }; 93 94 /* signalling NaN used to raise invalid */ 95 static const union { 96 unsigned u[2]; 97 double d; 98 } snan = { 0x7ff00001, 0 }; 99 #endif /* __LITTLE_ENDIAN */ 100 101 static const double zero = 0.0, 102 half = 0.5, 103 one = 1.0, 104 huge = 1.0e300, 105 tiny = 1.0e-300, 106 two36 = 6.87194767360000000000e+10, 107 two30 = 1.07374182400000000000e+09, 108 two6 = 6.40000000000000000000e+01, 109 two4 = 1.60000000000000000000e+01, 110 twom18 = 3.81469726562500000000e-06, 111 twom28 = 3.72529029846191406250e-09, 112 twom42 = 2.27373675443232059479e-13, 113 twom60 = 8.67361737988403547206e-19, 114 twom62 = 2.16840434497100886801e-19, 115 twom66 = 1.35525271560688054251e-20, 116 twom90 = 8.07793566946316088742e-28, 117 twom113 = 9.62964972193617926528e-35, 118 twom124 = 4.70197740328915003187e-38; 119 120 /* 121 * Extract the exponent and normalized significand (represented as 122 * an array of five doubles) from a finite, nonzero quad. 123 */ 124 static int 125 __q_unpack(const union longdouble *x, double *s) 126 { 127 union { 128 double d; 129 unsigned int l[2]; 130 } u; 131 132 double b; 133 unsigned int lx, w[3]; 134 int ex; 135 136 /* get the normalized significand and exponent */ 137 ex = (int)((x->l.msw & 0x7fffffff) >> 16); 138 lx = x->l.msw & 0xffff; 139 140 if (ex) { 141 lx |= 0x10000; 142 w[0] = x->l.frac2; 143 w[1] = x->l.frac3; 144 w[2] = x->l.frac4; 145 } else { 146 if (lx | (x->l.frac2 & 0xfffe0000)) { 147 w[0] = x->l.frac2; 148 w[1] = x->l.frac3; 149 w[2] = x->l.frac4; 150 ex = 1; 151 } else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000)) { 152 lx = x->l.frac2; 153 w[0] = x->l.frac3; 154 w[1] = x->l.frac4; 155 w[2] = 0; 156 ex = -31; 157 } else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000)) { 158 lx = x->l.frac3; 159 w[0] = x->l.frac4; 160 w[1] = w[2] = 0; 161 ex = -63; 162 } else { 163 lx = x->l.frac4; 164 w[0] = w[1] = w[2] = 0; 165 ex = -95; 166 } 167 168 while ((lx & 0x10000) == 0) { 169 lx = (lx << 1) | (w[0] >> 31); 170 w[0] = (w[0] << 1) | (w[1] >> 31); 171 w[1] = (w[1] << 1) | (w[2] >> 31); 172 w[2] <<= 1; 173 ex--; 174 } 175 } 176 177 /* extract the significand into five doubles */ 178 u.l[HIWORD] = 0x42300000; 179 u.l[LOWORD] = 0; 180 b = u.d; 181 u.l[LOWORD] = lx; 182 s[0] = u.d - b; 183 184 u.l[HIWORD] = 0x40300000; 185 u.l[LOWORD] = 0; 186 b = u.d; 187 u.l[LOWORD] = w[0] & 0xffffff00; 188 s[1] = u.d - b; 189 190 u.l[HIWORD] = 0x3e300000; 191 u.l[LOWORD] = 0; 192 b = u.d; 193 u.l[HIWORD] |= w[0] & 0xff; 194 u.l[LOWORD] = w[1] & 0xffff0000; 195 s[2] = u.d - b; 196 197 u.l[HIWORD] = 0x3c300000; 198 u.l[LOWORD] = 0; 199 b = u.d; 200 u.l[HIWORD] |= w[1] & 0xffff; 201 u.l[LOWORD] = w[2] & 0xff000000; 202 s[3] = u.d - b; 203 204 u.l[HIWORD] = 0x3c300000; 205 u.l[LOWORD] = 0; 206 b = u.d; 207 u.l[LOWORD] = w[2] & 0xffffff; 208 s[4] = u.d - b; 209 210 return (ex - 0x3fff); 211 } 212 213 /* 214 * Pack an exponent and array of three doubles representing a finite, 215 * nonzero number into a quad. Assume the sign is already there and 216 * the rounding mode has been fudged accordingly. 217 */ 218 static void 219 __q_pack(const double *z, int exp, enum fp_direction_type rm, 220 union longdouble *x, int *inexact) 221 { 222 union { 223 double d; 224 unsigned int l[2]; 225 } u; 226 227 double s[3], t, t2; 228 unsigned int msw, frac2, frac3, frac4; 229 230 /* bias exponent and strip off integer bit */ 231 exp += 0x3fff; 232 s[0] = z[0] - one; 233 s[1] = z[1]; 234 s[2] = z[2]; 235 236 /* 237 * chop the significand to obtain the fraction; 238 * use round-to-minus-infinity to ensure chopping 239 */ 240 (void) __swapRD(fp_negative); 241 242 /* extract the first eighty bits of fraction */ 243 t = s[1] + s[2]; 244 u.d = two36 + (s[0] + t); 245 msw = u.l[LOWORD]; 246 s[0] -= (u.d - two36); 247 248 u.d = two4 + (s[0] + t); 249 frac2 = u.l[LOWORD]; 250 s[0] -= (u.d - two4); 251 252 u.d = twom28 + (s[0] + t); 253 frac3 = u.l[LOWORD]; 254 s[0] -= (u.d - twom28); 255 256 /* condense the remaining fraction; errors here won't matter */ 257 t = s[0] + s[1]; 258 s[1] = ((s[0] - t) + s[1]) + s[2]; 259 s[0] = t; 260 261 /* get the last word of fraction */ 262 u.d = twom60 + (s[0] + s[1]); 263 frac4 = u.l[LOWORD]; 264 s[0] -= (u.d - twom60); 265 266 /* 267 * keep track of what's left for rounding; note that 268 * t2 will be non-negative due to rounding mode 269 */ 270 t = s[0] + s[1]; 271 t2 = (s[0] - t) + s[1]; 272 273 if (t != zero) { 274 *inexact = 1; 275 276 /* decide whether to round the fraction up */ 277 if (rm == fp_positive || (rm == fp_nearest && (t > twom113 || 278 (t == twom113 && (t2 != zero || frac4 & 1))))) { 279 /* round up and renormalize if necessary */ 280 if (++frac4 == 0) { 281 if (++frac3 == 0) { 282 if (++frac2 == 0) { 283 if (++msw == 0x10000) { 284 msw = 0; 285 exp++; 286 } 287 } 288 } 289 } 290 } 291 } 292 293 /* assemble the result */ 294 x->l.msw |= msw | (exp << 16); 295 x->l.frac2 = frac2; 296 x->l.frac3 = frac3; 297 x->l.frac4 = frac4; 298 } 299 300 /* 301 * Compute the square root of x and place the TP result in s. 302 */ 303 static void 304 __q_tp_sqrt(const double *x, double *s) 305 { 306 double c, rr, r[3], tt[3], t[5]; 307 308 /* approximate the divisor for the Newton iteration */ 309 c = sqrt((x[0] + x[1]) + x[2]); 310 rr = half / c; 311 312 /* compute the first five "digits" of the square root */ 313 t[0] = (c + two30) - two30; 314 tt[0] = t[0] + t[0]; 315 r[0] = ((x[0] - t[0] * t[0]) + x[1]) + x[2]; 316 317 t[1] = (rr * (r[0] + x[3]) + two6) - two6; 318 tt[1] = t[1] + t[1]; 319 r[0] -= tt[0] * t[1]; 320 r[1] = x[3] - t[1] * t[1]; 321 c = (r[1] + twom18) - twom18; 322 r[0] += c; 323 r[1] = (r[1] - c) + x[4]; 324 325 t[2] = (rr * (r[0] + r[1]) + twom18) - twom18; 326 tt[2] = t[2] + t[2]; 327 r[0] -= tt[0] * t[2]; 328 r[1] -= tt[1] * t[2]; 329 c = (r[1] + twom42) - twom42; 330 r[0] += c; 331 r[1] = (r[1] - c) - t[2] * t[2]; 332 333 t[3] = (rr * (r[0] + r[1]) + twom42) - twom42; 334 r[0] = ((r[0] - tt[0] * t[3]) + r[1]) - tt[1] * t[3]; 335 r[1] = -tt[2] * t[3]; 336 c = (r[1] + twom90) - twom90; 337 r[0] += c; 338 r[1] = (r[1] - c) - t[3] * t[3]; 339 340 t[4] = (rr * (r[0] + r[1]) + twom66) - twom66; 341 342 /* here we just need to get the sign of the remainder */ 343 c = (((((r[0] - tt[0] * t[4]) - tt[1] * t[4]) + r[1]) - tt[2] * t[4]) - 344 (t[3] + t[3]) * t[4]) - t[4] * t[4]; 345 346 /* reduce to three doubles */ 347 t[0] += t[1]; 348 t[1] = t[2] + t[3]; 349 t[2] = t[4]; 350 351 /* if the third term might lie on a rounding boundary, perturb it */ 352 if (c != zero && t[2] == (twom62 + t[2]) - twom62) { 353 if (c < zero) 354 t[2] -= twom124; 355 else 356 t[2] += twom124; 357 } 358 359 /* condense the square root */ 360 c = t[1] + t[2]; 361 t[2] += (t[1] - c); 362 t[1] = c; 363 c = t[0] + t[1]; 364 s[1] = t[1] + (t[0] - c); 365 s[0] = c; 366 367 if (s[1] == zero) { 368 c = s[0] + t[2]; 369 s[1] = t[2] + (s[0] - c); 370 s[0] = c; 371 s[2] = zero; 372 } else { 373 c = s[1] + t[2]; 374 s[2] = t[2] + (s[1] - c); 375 s[1] = c; 376 } 377 } 378 379 long double 380 sqrtl(long double ldx) 381 { 382 union longdouble x; 383 volatile double t; 384 double xx[5], zz[3]; 385 enum fp_direction_type rm; 386 int ex, inexact, exc, traps; 387 388 /* clear cexc */ 389 t = zero; 390 t -= zero; 391 392 /* check for zero operand */ 393 x.d = ldx; 394 395 if (!((x.l.msw & 0x7fffffff) | x.l.frac2 | x.l.frac3 | x.l.frac4)) 396 return (ldx); 397 398 /* handle nan and inf cases */ 399 if ((x.l.msw & 0x7fffffff) >= 0x7fff0000) { 400 if ((x.l.msw & 0xffff) | x.l.frac2 | x.l.frac3 | x.l.frac4) { 401 if (!(x.l.msw & 0x8000)) { 402 /* snan, signal invalid */ 403 t += snan.d; 404 } 405 406 x.l.msw |= 0x8000; 407 return (x.d); 408 } 409 410 if (x.l.msw & 0x80000000) { 411 /* sqrt(-inf), signal invalid */ 412 t = -one; 413 t = sqrt(t); 414 return (qnan.d); 415 } 416 417 /* sqrt(inf), return inf */ 418 return (x.d); 419 } 420 421 /* handle negative numbers */ 422 if (x.l.msw & 0x80000000) { 423 t = -one; 424 t = sqrt(t); 425 return (qnan.d); 426 } 427 428 /* now x is finite, positive */ 429 430 traps = __swapTE(0); 431 exc = __swapEX(0); 432 rm = __swapRD(fp_nearest); 433 434 ex = __q_unpack(&x, xx); 435 436 if (ex & 1) { 437 /* make exponent even */ 438 xx[0] += xx[0]; 439 xx[1] += xx[1]; 440 xx[2] += xx[2]; 441 xx[3] += xx[3]; 442 xx[4] += xx[4]; 443 ex--; 444 } 445 446 __q_tp_sqrt(xx, zz); 447 448 /* put everything together */ 449 x.l.msw = 0; 450 inexact = 0; 451 __q_pack(zz, ex >> 1, rm, &x, &inexact); 452 453 (void) __swapRD(rm); 454 (void) __swapEX(exc); 455 (void) __swapTE(traps); 456 457 if (inexact) { 458 t = huge; 459 t += tiny; 460 } 461 462 return (x.d); 463 }