illumos-gate New usr/src/lib/libm/common/Q/sqrtl.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #pragma weak sqrtl = __sqrtl
  31 
  32 #include "libm.h"
  33 #include "longdouble.h"
  34 
  35 extern int __swapTE(int);
  36 extern int __swapEX(int);
  37 extern enum fp_direction_type __swapRD(enum fp_direction_type);
  38 
  39 /*
  40  * in struct longdouble, msw consists of
  41  *      unsigned short  sgn:1;
  42  *      unsigned short  exp:15;
  43  *      unsigned short  frac1:16;
  44  */
  45 
  46 #ifdef __LITTLE_ENDIAN
  47 
  48 /* array indices used to access words within a double */
  49 #define HIWORD  1
  50 #define LOWORD  0
  51 
  52 /* structure used to access words within a quad */
  53 union longdouble {
  54         struct {
  55                 unsigned int    frac4;
  56                 unsigned int    frac3;
  57                 unsigned int    frac2;
  58                 unsigned int    msw;
  59         } l;
  60         long double     d;
  61 };
  62 
  63 /* default NaN returned for sqrt(neg) */
  64 static const union longdouble
  65         qnan = { 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff };
  66 
  67 /* signalling NaN used to raise invalid */
  68 static const union {
  69         unsigned u[2];
  70         double d;
  71 } snan = { 0, 0x7ff00001 };
  72 
  73 #else
  74 
  75 /* array indices used to access words within a double */
  76 #define HIWORD  0
  77 #define LOWORD  1
  78 
  79 /* structure used to access words within a quad */
  80 union longdouble {
  81         struct {
  82                 unsigned int    msw;
  83                 unsigned int    frac2;
  84                 unsigned int    frac3;
  85                 unsigned int    frac4;
  86         } l;
  87         long double     d;
  88 };
  89 
  90 /* default NaN returned for sqrt(neg) */
  91 static const union longdouble
  92         qnan = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff };
  93 
  94 /* signalling NaN used to raise invalid */
  95 static const union {
  96         unsigned u[2];
  97         double d;
  98 } snan = { 0x7ff00001, 0 };
  99 
 100 #endif /* __LITTLE_ENDIAN */
 101 
 102 
 103 static const double
 104         zero = 0.0,
 105         half = 0.5,
 106         one = 1.0,
 107         huge = 1.0e300,
 108         tiny = 1.0e-300,
 109         two36 = 6.87194767360000000000e+10,
 110         two30 = 1.07374182400000000000e+09,
 111         two6 = 6.40000000000000000000e+01,
 112         two4 = 1.60000000000000000000e+01,
 113         twom18 = 3.81469726562500000000e-06,
 114         twom28 = 3.72529029846191406250e-09,
 115         twom42 = 2.27373675443232059479e-13,
 116         twom60 = 8.67361737988403547206e-19,
 117         twom62 = 2.16840434497100886801e-19,
 118         twom66 = 1.35525271560688054251e-20,
 119         twom90 = 8.07793566946316088742e-28,
 120         twom113 = 9.62964972193617926528e-35,
 121         twom124 = 4.70197740328915003187e-38;
 122 
 123 
 124 /*
 125 *       Extract the exponent and normalized significand (represented as
 126 *       an array of five doubles) from a finite, nonzero quad.
 127 */
 128 static int
 129 __q_unpack( const union longdouble *x, double *s )
 130 {
 131         union {
 132                 double                  d;
 133                 unsigned int    l[2];
 134         } u;
 135         double                  b;
 136         unsigned int    lx, w[3];
 137         int                             ex;
 138 
 139         /* get the normalized significand and exponent */
 140         ex = (int) ( ( x->l.msw & 0x7fffffff ) >> 16 );
 141         lx = x->l.msw & 0xffff;
 142         if ( ex )
 143         {
 144                 lx |= 0x10000;
 145                 w[0] = x->l.frac2;
 146                 w[1] = x->l.frac3;
 147                 w[2] = x->l.frac4;
 148         }
 149         else
 150         {
 151                 if ( lx | ( x->l.frac2 & 0xfffe0000 ) )
 152                 {
 153                         w[0] = x->l.frac2;
 154                         w[1] = x->l.frac3;
 155                         w[2] = x->l.frac4;
 156                         ex = 1;
 157                 }
 158                 else if ( x->l.frac2 | ( x->l.frac3 & 0xfffe0000 ) )
 159                 {
 160                         lx = x->l.frac2;
 161                         w[0] = x->l.frac3;
 162                         w[1] = x->l.frac4;
 163                         w[2] = 0;
 164                         ex = -31;
 165                 }
 166                 else if ( x->l.frac3 | ( x->l.frac4 & 0xfffe0000 ) )
 167                 {
 168                         lx = x->l.frac3;
 169                         w[0] = x->l.frac4;
 170                         w[1] = w[2] = 0;
 171                         ex = -63;
 172                 }
 173                 else
 174                 {
 175                         lx = x->l.frac4;
 176                         w[0] = w[1] = w[2] = 0;
 177                         ex = -95;
 178                 }
 179                 while ( ( lx & 0x10000 ) == 0 )
 180                 {
 181                         lx = ( lx << 1 ) | ( w[0] >> 31 );
 182                         w[0] = ( w[0] << 1 ) | ( w[1] >> 31 );
 183                         w[1] = ( w[1] << 1 ) | ( w[2] >> 31 );
 184                         w[2] <<= 1;
 185                         ex--;
 186                 }
 187         }
 188 
 189         /* extract the significand into five doubles */
 190         u.l[HIWORD] = 0x42300000;
 191         u.l[LOWORD] = 0;
 192         b = u.d;
 193         u.l[LOWORD] = lx;
 194         s[0] = u.d - b;
 195 
 196         u.l[HIWORD] = 0x40300000;
 197         u.l[LOWORD] = 0;
 198         b = u.d;
 199         u.l[LOWORD] = w[0] & 0xffffff00;
 200         s[1] = u.d - b;
 201 
 202         u.l[HIWORD] = 0x3e300000;
 203         u.l[LOWORD] = 0;
 204         b = u.d;
 205         u.l[HIWORD] |= w[0] & 0xff;
 206         u.l[LOWORD] = w[1] & 0xffff0000;
 207         s[2] = u.d - b;
 208 
 209         u.l[HIWORD] = 0x3c300000;
 210         u.l[LOWORD] = 0;
 211         b = u.d;
 212         u.l[HIWORD] |= w[1] & 0xffff;
 213         u.l[LOWORD] = w[2] & 0xff000000;
 214         s[3] = u.d - b;
 215 
 216         u.l[HIWORD] = 0x3c300000;
 217         u.l[LOWORD] = 0;
 218         b = u.d;
 219         u.l[LOWORD] = w[2] & 0xffffff;
 220         s[4] = u.d - b;
 221 
 222         return ex - 0x3fff;
 223 }
 224 
 225 
 226 /*
 227 *       Pack an exponent and array of three doubles representing a finite,
 228 *       nonzero number into a quad.  Assume the sign is already there and
 229 *       the rounding mode has been fudged accordingly.
 230 */
 231 static void
 232 __q_pack( const double *z, int exp, enum fp_direction_type rm,
 233         union longdouble *x, int *inexact )
 234 {
 235         union {
 236                 double                  d;
 237                 unsigned int    l[2];
 238         } u;
 239         double                  s[3], t, t2;
 240         unsigned int    msw, frac2, frac3, frac4;
 241 
 242         /* bias exponent and strip off integer bit */
 243         exp += 0x3fff;
 244         s[0] = z[0] - one;
 245         s[1] = z[1];
 246         s[2] = z[2];
 247 
 248         /*
 249          * chop the significand to obtain the fraction;
 250          * use round-to-minus-infinity to ensure chopping
 251          */
 252         (void) __swapRD( fp_negative );
 253 
 254         /* extract the first eighty bits of fraction */
 255         t = s[1] + s[2];
 256         u.d = two36 + ( s[0] + t );
 257         msw = u.l[LOWORD];
 258         s[0] -= ( u.d - two36 );
 259 
 260         u.d = two4 + ( s[0] + t );
 261         frac2 = u.l[LOWORD];
 262         s[0] -= ( u.d - two4 );
 263 
 264         u.d = twom28 + ( s[0] + t );
 265         frac3 = u.l[LOWORD];
 266         s[0] -= ( u.d - twom28 );
 267 
 268         /* condense the remaining fraction; errors here won't matter */
 269         t = s[0] + s[1];
 270         s[1] = ( ( s[0] - t ) + s[1] ) + s[2];
 271         s[0] = t;
 272 
 273         /* get the last word of fraction */
 274         u.d = twom60 + ( s[0] + s[1] );
 275         frac4 = u.l[LOWORD];
 276         s[0] -= ( u.d - twom60 );
 277 
 278         /*
 279          * keep track of what's left for rounding; note that
 280          * t2 will be non-negative due to rounding mode
 281          */
 282         t = s[0] + s[1];
 283         t2 = ( s[0] - t ) + s[1];
 284 
 285         if ( t != zero )
 286         {
 287                 *inexact = 1;
 288 
 289                 /* decide whether to round the fraction up */
 290                 if ( rm == fp_positive || ( rm == fp_nearest && ( t > twom113 ||
 291                         ( t == twom113 && ( t2 != zero || frac4 & 1 ) ) ) ) )
 292                 {
 293                         /* round up and renormalize if necessary */
 294                         if ( ++frac4 == 0 )
 295                                 if ( ++frac3 == 0 )
 296                                         if ( ++frac2 == 0 )
 297                                                 if ( ++msw == 0x10000 )
 298                                                 {
 299                                                         msw = 0;
 300                                                         exp++;
 301                                                 }
 302                 }
 303         }
 304 
 305         /* assemble the result */
 306         x->l.msw |= msw | ( exp << 16 );
 307         x->l.frac2 = frac2;
 308         x->l.frac3 = frac3;
 309         x->l.frac4 = frac4;
 310 }
 311 
 312 
 313 /*
 314 *       Compute the square root of x and place the TP result in s.
 315 */
 316 static void
 317 __q_tp_sqrt( const double *x, double *s )
 318 {
 319         double  c, rr, r[3], tt[3], t[5];
 320 
 321         /* approximate the divisor for the Newton iteration */
 322         c = sqrt( ( x[0] + x[1] ) + x[2] );
 323         rr = half / c;
 324 
 325         /* compute the first five "digits" of the square root */
 326         t[0] = ( c + two30 ) - two30;
 327         tt[0] = t[0] + t[0];
 328         r[0] = ( ( x[0] - t[0] * t[0] ) + x[1] ) + x[2];
 329 
 330         t[1] = ( rr * ( r[0] + x[3] ) + two6 ) - two6;
 331         tt[1] = t[1] + t[1];
 332         r[0] -= tt[0] * t[1];
 333         r[1] = x[3] - t[1] * t[1];
 334         c = ( r[1] + twom18 ) - twom18;
 335         r[0] += c;
 336         r[1] = ( r[1] - c ) + x[4];
 337 
 338         t[2] = ( rr * ( r[0] + r[1] ) + twom18 ) - twom18;
 339         tt[2] = t[2] + t[2];
 340         r[0] -= tt[0] * t[2];
 341         r[1] -= tt[1] * t[2];
 342         c = ( r[1] + twom42 ) - twom42;
 343         r[0] += c;
 344         r[1] = ( r[1] - c ) - t[2] * t[2];
 345 
 346         t[3] = ( rr * ( r[0] + r[1] ) + twom42 ) - twom42;
 347         r[0] = ( ( r[0] - tt[0] * t[3] ) + r[1] ) - tt[1] * t[3];
 348         r[1] = -tt[2] * t[3];
 349         c = ( r[1] + twom90 ) - twom90;
 350         r[0] += c;
 351         r[1] = ( r[1] - c ) - t[3] * t[3];
 352 
 353         t[4] = ( rr * ( r[0] + r[1] ) + twom66 ) - twom66;
 354 
 355         /* here we just need to get the sign of the remainder */
 356         c = ( ( ( ( ( r[0] - tt[0] * t[4] ) - tt[1] * t[4] ) + r[1] )
 357                 - tt[2] * t[4] ) - ( t[3] + t[3] ) * t[4] ) - t[4] * t[4];
 358 
 359         /* reduce to three doubles */
 360         t[0] += t[1];
 361         t[1] = t[2] + t[3];
 362         t[2] = t[4];
 363 
 364         /* if the third term might lie on a rounding boundary, perturb it */
 365         if ( c != zero && t[2] == ( twom62 + t[2] ) - twom62 )
 366         {
 367                 if ( c < zero )
 368                         t[2] -= twom124;
 369                 else
 370                         t[2] += twom124;
 371         }
 372 
 373         /* condense the square root */
 374         c = t[1] + t[2];
 375         t[2] += ( t[1] - c );
 376         t[1] = c;
 377         c = t[0] + t[1];
 378         s[1] = t[1] + ( t[0] - c );
 379         s[0] = c;
 380         if ( s[1] == zero )
 381         {
 382                 c = s[0] + t[2];
 383                 s[1] = t[2] + ( s[0] - c );
 384                 s[0] = c;
 385                 s[2] = zero;
 386         }
 387         else
 388         {
 389                 c = s[1] + t[2];
 390                 s[2] = t[2] + ( s[1] - c );
 391                 s[1] = c;
 392         }
 393 }
 394 
 395 
 396 long double
 397 sqrtl( long double ldx )
 398 {
 399         union   longdouble              x;
 400         volatile double                 t;
 401         double                                  xx[5], zz[3];
 402         enum fp_direction_type  rm;
 403         int                             ex, inexact, exc, traps;
 404 
 405         /* clear cexc */
 406         t = zero;
 407         t -= zero;
 408 
 409         /* check for zero operand */
 410         x.d = ldx;
 411         if ( !( ( x.l.msw & 0x7fffffff ) | x.l.frac2 | x.l.frac3 | x.l.frac4 ) )
 412                 return ldx;
 413 
 414         /* handle nan and inf cases */
 415         if ( ( x.l.msw & 0x7fffffff ) >= 0x7fff0000 )
 416         {
 417                 if ( ( x.l.msw & 0xffff ) | x.l.frac2 | x.l.frac3 | x.l.frac4 )
 418                 {
 419                         if ( !( x.l.msw & 0x8000 ) )
 420                         {
 421                                 /* snan, signal invalid */
 422                                 t += snan.d;
 423                         }
 424                         x.l.msw |= 0x8000;
 425                         return x.d;
 426                 }
 427                 if ( x.l.msw & 0x80000000 )
 428                 {
 429                         /* sqrt(-inf), signal invalid */
 430                         t = -one;
 431                         t = sqrt( t );
 432                         return qnan.d;
 433                 }
 434                 /* sqrt(inf), return inf */
 435                 return x.d;
 436         }
 437 
 438         /* handle negative numbers */
 439         if ( x.l.msw & 0x80000000 )
 440         {
 441                 t = -one;
 442                 t = sqrt( t );
 443                 return qnan.d;
 444         }
 445 
 446         /* now x is finite, positive */
 447 
 448         traps = __swapTE( 0 );
 449         exc = __swapEX( 0 );
 450         rm = __swapRD( fp_nearest );
 451 
 452         ex = __q_unpack( &x, xx );
 453         if ( ex & 1 )
 454         {
 455                 /* make exponent even */
 456                 xx[0] += xx[0];
 457                 xx[1] += xx[1];
 458                 xx[2] += xx[2];
 459                 xx[3] += xx[3];
 460                 xx[4] += xx[4];
 461                 ex--;
 462         }
 463         __q_tp_sqrt( xx, zz );
 464 
 465         /* put everything together */
 466         x.l.msw = 0;
 467         inexact = 0;
 468         __q_pack( zz, ex >> 1, rm, &x, &inexact );
 469 
 470         (void) __swapRD( rm );
 471         (void) __swapEX( exc );
 472         (void) __swapTE( traps );
 473         if ( inexact )
 474         {
 475                 t = huge;
 476                 t += tiny;
 477         }
 478         return x.d;
 479 }