1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #pragma weak sqrtl = __sqrtl 31 32 #include "libm.h" 33 #include "longdouble.h" 34 35 extern int __swapTE(int); 36 extern int __swapEX(int); 37 extern enum fp_direction_type __swapRD(enum fp_direction_type); 38 39 /* 40 * in struct longdouble, msw consists of 41 * unsigned short sgn:1; 42 * unsigned short exp:15; 43 * unsigned short frac1:16; 44 */ 45 46 #ifdef __LITTLE_ENDIAN 47 48 /* array indices used to access words within a double */ 49 #define HIWORD 1 50 #define LOWORD 0 51 52 /* structure used to access words within a quad */ 53 union longdouble { 54 struct { 55 unsigned int frac4; 56 unsigned int frac3; 57 unsigned int frac2; 58 unsigned int msw; 59 } l; 60 long double d; 61 }; 62 63 /* default NaN returned for sqrt(neg) */ 64 static const union longdouble 65 qnan = { 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff }; 66 67 /* signalling NaN used to raise invalid */ 68 static const union { 69 unsigned u[2]; 70 double d; 71 } snan = { 0, 0x7ff00001 }; 72 73 #else 74 75 /* array indices used to access words within a double */ 76 #define HIWORD 0 77 #define LOWORD 1 78 79 /* structure used to access words within a quad */ 80 union longdouble { 81 struct { 82 unsigned int msw; 83 unsigned int frac2; 84 unsigned int frac3; 85 unsigned int frac4; 86 } l; 87 long double d; 88 }; 89 90 /* default NaN returned for sqrt(neg) */ 91 static const union longdouble 92 qnan = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff }; 93 94 /* signalling NaN used to raise invalid */ 95 static const union { 96 unsigned u[2]; 97 double d; 98 } snan = { 0x7ff00001, 0 }; 99 100 #endif /* __LITTLE_ENDIAN */ 101 102 103 static const double 104 zero = 0.0, 105 half = 0.5, 106 one = 1.0, 107 huge = 1.0e300, 108 tiny = 1.0e-300, 109 two36 = 6.87194767360000000000e+10, 110 two30 = 1.07374182400000000000e+09, 111 two6 = 6.40000000000000000000e+01, 112 two4 = 1.60000000000000000000e+01, 113 twom18 = 3.81469726562500000000e-06, 114 twom28 = 3.72529029846191406250e-09, 115 twom42 = 2.27373675443232059479e-13, 116 twom60 = 8.67361737988403547206e-19, 117 twom62 = 2.16840434497100886801e-19, 118 twom66 = 1.35525271560688054251e-20, 119 twom90 = 8.07793566946316088742e-28, 120 twom113 = 9.62964972193617926528e-35, 121 twom124 = 4.70197740328915003187e-38; 122 123 124 /* 125 * Extract the exponent and normalized significand (represented as 126 * an array of five doubles) from a finite, nonzero quad. 127 */ 128 static int 129 __q_unpack( const union longdouble *x, double *s ) 130 { 131 union { 132 double d; 133 unsigned int l[2]; 134 } u; 135 double b; 136 unsigned int lx, w[3]; 137 int ex; 138 139 /* get the normalized significand and exponent */ 140 ex = (int) ( ( x->l.msw & 0x7fffffff ) >> 16 ); 141 lx = x->l.msw & 0xffff; 142 if ( ex ) 143 { 144 lx |= 0x10000; 145 w[0] = x->l.frac2; 146 w[1] = x->l.frac3; 147 w[2] = x->l.frac4; 148 } 149 else 150 { 151 if ( lx | ( x->l.frac2 & 0xfffe0000 ) ) 152 { 153 w[0] = x->l.frac2; 154 w[1] = x->l.frac3; 155 w[2] = x->l.frac4; 156 ex = 1; 157 } 158 else if ( x->l.frac2 | ( x->l.frac3 & 0xfffe0000 ) ) 159 { 160 lx = x->l.frac2; 161 w[0] = x->l.frac3; 162 w[1] = x->l.frac4; 163 w[2] = 0; 164 ex = -31; 165 } 166 else if ( x->l.frac3 | ( x->l.frac4 & 0xfffe0000 ) ) 167 { 168 lx = x->l.frac3; 169 w[0] = x->l.frac4; 170 w[1] = w[2] = 0; 171 ex = -63; 172 } 173 else 174 { 175 lx = x->l.frac4; 176 w[0] = w[1] = w[2] = 0; 177 ex = -95; 178 } 179 while ( ( lx & 0x10000 ) == 0 ) 180 { 181 lx = ( lx << 1 ) | ( w[0] >> 31 ); 182 w[0] = ( w[0] << 1 ) | ( w[1] >> 31 ); 183 w[1] = ( w[1] << 1 ) | ( w[2] >> 31 ); 184 w[2] <<= 1; 185 ex--; 186 } 187 } 188 189 /* extract the significand into five doubles */ 190 u.l[HIWORD] = 0x42300000; 191 u.l[LOWORD] = 0; 192 b = u.d; 193 u.l[LOWORD] = lx; 194 s[0] = u.d - b; 195 196 u.l[HIWORD] = 0x40300000; 197 u.l[LOWORD] = 0; 198 b = u.d; 199 u.l[LOWORD] = w[0] & 0xffffff00; 200 s[1] = u.d - b; 201 202 u.l[HIWORD] = 0x3e300000; 203 u.l[LOWORD] = 0; 204 b = u.d; 205 u.l[HIWORD] |= w[0] & 0xff; 206 u.l[LOWORD] = w[1] & 0xffff0000; 207 s[2] = u.d - b; 208 209 u.l[HIWORD] = 0x3c300000; 210 u.l[LOWORD] = 0; 211 b = u.d; 212 u.l[HIWORD] |= w[1] & 0xffff; 213 u.l[LOWORD] = w[2] & 0xff000000; 214 s[3] = u.d - b; 215 216 u.l[HIWORD] = 0x3c300000; 217 u.l[LOWORD] = 0; 218 b = u.d; 219 u.l[LOWORD] = w[2] & 0xffffff; 220 s[4] = u.d - b; 221 222 return ex - 0x3fff; 223 } 224 225 226 /* 227 * Pack an exponent and array of three doubles representing a finite, 228 * nonzero number into a quad. Assume the sign is already there and 229 * the rounding mode has been fudged accordingly. 230 */ 231 static void 232 __q_pack( const double *z, int exp, enum fp_direction_type rm, 233 union longdouble *x, int *inexact ) 234 { 235 union { 236 double d; 237 unsigned int l[2]; 238 } u; 239 double s[3], t, t2; 240 unsigned int msw, frac2, frac3, frac4; 241 242 /* bias exponent and strip off integer bit */ 243 exp += 0x3fff; 244 s[0] = z[0] - one; 245 s[1] = z[1]; 246 s[2] = z[2]; 247 248 /* 249 * chop the significand to obtain the fraction; 250 * use round-to-minus-infinity to ensure chopping 251 */ 252 (void) __swapRD( fp_negative ); 253 254 /* extract the first eighty bits of fraction */ 255 t = s[1] + s[2]; 256 u.d = two36 + ( s[0] + t ); 257 msw = u.l[LOWORD]; 258 s[0] -= ( u.d - two36 ); 259 260 u.d = two4 + ( s[0] + t ); 261 frac2 = u.l[LOWORD]; 262 s[0] -= ( u.d - two4 ); 263 264 u.d = twom28 + ( s[0] + t ); 265 frac3 = u.l[LOWORD]; 266 s[0] -= ( u.d - twom28 ); 267 268 /* condense the remaining fraction; errors here won't matter */ 269 t = s[0] + s[1]; 270 s[1] = ( ( s[0] - t ) + s[1] ) + s[2]; 271 s[0] = t; 272 273 /* get the last word of fraction */ 274 u.d = twom60 + ( s[0] + s[1] ); 275 frac4 = u.l[LOWORD]; 276 s[0] -= ( u.d - twom60 ); 277 278 /* 279 * keep track of what's left for rounding; note that 280 * t2 will be non-negative due to rounding mode 281 */ 282 t = s[0] + s[1]; 283 t2 = ( s[0] - t ) + s[1]; 284 285 if ( t != zero ) 286 { 287 *inexact = 1; 288 289 /* decide whether to round the fraction up */ 290 if ( rm == fp_positive || ( rm == fp_nearest && ( t > twom113 || 291 ( t == twom113 && ( t2 != zero || frac4 & 1 ) ) ) ) ) 292 { 293 /* round up and renormalize if necessary */ 294 if ( ++frac4 == 0 ) 295 if ( ++frac3 == 0 ) 296 if ( ++frac2 == 0 ) 297 if ( ++msw == 0x10000 ) 298 { 299 msw = 0; 300 exp++; 301 } 302 } 303 } 304 305 /* assemble the result */ 306 x->l.msw |= msw | ( exp << 16 ); 307 x->l.frac2 = frac2; 308 x->l.frac3 = frac3; 309 x->l.frac4 = frac4; 310 } 311 312 313 /* 314 * Compute the square root of x and place the TP result in s. 315 */ 316 static void 317 __q_tp_sqrt( const double *x, double *s ) 318 { 319 double c, rr, r[3], tt[3], t[5]; 320 321 /* approximate the divisor for the Newton iteration */ 322 c = sqrt( ( x[0] + x[1] ) + x[2] ); 323 rr = half / c; 324 325 /* compute the first five "digits" of the square root */ 326 t[0] = ( c + two30 ) - two30; 327 tt[0] = t[0] + t[0]; 328 r[0] = ( ( x[0] - t[0] * t[0] ) + x[1] ) + x[2]; 329 330 t[1] = ( rr * ( r[0] + x[3] ) + two6 ) - two6; 331 tt[1] = t[1] + t[1]; 332 r[0] -= tt[0] * t[1]; 333 r[1] = x[3] - t[1] * t[1]; 334 c = ( r[1] + twom18 ) - twom18; 335 r[0] += c; 336 r[1] = ( r[1] - c ) + x[4]; 337 338 t[2] = ( rr * ( r[0] + r[1] ) + twom18 ) - twom18; 339 tt[2] = t[2] + t[2]; 340 r[0] -= tt[0] * t[2]; 341 r[1] -= tt[1] * t[2]; 342 c = ( r[1] + twom42 ) - twom42; 343 r[0] += c; 344 r[1] = ( r[1] - c ) - t[2] * t[2]; 345 346 t[3] = ( rr * ( r[0] + r[1] ) + twom42 ) - twom42; 347 r[0] = ( ( r[0] - tt[0] * t[3] ) + r[1] ) - tt[1] * t[3]; 348 r[1] = -tt[2] * t[3]; 349 c = ( r[1] + twom90 ) - twom90; 350 r[0] += c; 351 r[1] = ( r[1] - c ) - t[3] * t[3]; 352 353 t[4] = ( rr * ( r[0] + r[1] ) + twom66 ) - twom66; 354 355 /* here we just need to get the sign of the remainder */ 356 c = ( ( ( ( ( r[0] - tt[0] * t[4] ) - tt[1] * t[4] ) + r[1] ) 357 - tt[2] * t[4] ) - ( t[3] + t[3] ) * t[4] ) - t[4] * t[4]; 358 359 /* reduce to three doubles */ 360 t[0] += t[1]; 361 t[1] = t[2] + t[3]; 362 t[2] = t[4]; 363 364 /* if the third term might lie on a rounding boundary, perturb it */ 365 if ( c != zero && t[2] == ( twom62 + t[2] ) - twom62 ) 366 { 367 if ( c < zero ) 368 t[2] -= twom124; 369 else 370 t[2] += twom124; 371 } 372 373 /* condense the square root */ 374 c = t[1] + t[2]; 375 t[2] += ( t[1] - c ); 376 t[1] = c; 377 c = t[0] + t[1]; 378 s[1] = t[1] + ( t[0] - c ); 379 s[0] = c; 380 if ( s[1] == zero ) 381 { 382 c = s[0] + t[2]; 383 s[1] = t[2] + ( s[0] - c ); 384 s[0] = c; 385 s[2] = zero; 386 } 387 else 388 { 389 c = s[1] + t[2]; 390 s[2] = t[2] + ( s[1] - c ); 391 s[1] = c; 392 } 393 } 394 395 396 long double 397 sqrtl( long double ldx ) 398 { 399 union longdouble x; 400 volatile double t; 401 double xx[5], zz[3]; 402 enum fp_direction_type rm; 403 int ex, inexact, exc, traps; 404 405 /* clear cexc */ 406 t = zero; 407 t -= zero; 408 409 /* check for zero operand */ 410 x.d = ldx; 411 if ( !( ( x.l.msw & 0x7fffffff ) | x.l.frac2 | x.l.frac3 | x.l.frac4 ) ) 412 return ldx; 413 414 /* handle nan and inf cases */ 415 if ( ( x.l.msw & 0x7fffffff ) >= 0x7fff0000 ) 416 { 417 if ( ( x.l.msw & 0xffff ) | x.l.frac2 | x.l.frac3 | x.l.frac4 ) 418 { 419 if ( !( x.l.msw & 0x8000 ) ) 420 { 421 /* snan, signal invalid */ 422 t += snan.d; 423 } 424 x.l.msw |= 0x8000; 425 return x.d; 426 } 427 if ( x.l.msw & 0x80000000 ) 428 { 429 /* sqrt(-inf), signal invalid */ 430 t = -one; 431 t = sqrt( t ); 432 return qnan.d; 433 } 434 /* sqrt(inf), return inf */ 435 return x.d; 436 } 437 438 /* handle negative numbers */ 439 if ( x.l.msw & 0x80000000 ) 440 { 441 t = -one; 442 t = sqrt( t ); 443 return qnan.d; 444 } 445 446 /* now x is finite, positive */ 447 448 traps = __swapTE( 0 ); 449 exc = __swapEX( 0 ); 450 rm = __swapRD( fp_nearest ); 451 452 ex = __q_unpack( &x, xx ); 453 if ( ex & 1 ) 454 { 455 /* make exponent even */ 456 xx[0] += xx[0]; 457 xx[1] += xx[1]; 458 xx[2] += xx[2]; 459 xx[3] += xx[3]; 460 xx[4] += xx[4]; 461 ex--; 462 } 463 __q_tp_sqrt( xx, zz ); 464 465 /* put everything together */ 466 x.l.msw = 0; 467 inexact = 0; 468 __q_pack( zz, ex >> 1, rm, &x, &inexact ); 469 470 (void) __swapRD( rm ); 471 (void) __swapEX( exc ); 472 (void) __swapTE( traps ); 473 if ( inexact ) 474 { 475 t = huge; 476 t += tiny; 477 } 478 return x.d; 479 }