il_11210 Sdiff usr/src/lib/libm/common/Q/sqrtl.c

Print this page

11210 libm should be cstyle(1ONBLD) clean

   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */

  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #pragma weak __sqrtl = sqrtl
  31 
  32 #include "libm.h"
  33 #include "longdouble.h"
  34 
  35 extern int __swapTE(int);
  36 extern int __swapEX(int);
  37 extern enum fp_direction_type __swapRD(enum fp_direction_type);
  38 
  39 /*
  40  * in struct longdouble, msw consists of
  41  *      unsigned short  sgn:1;
  42  *      unsigned short  exp:15;
  43  *      unsigned short  frac1:16;
  44  */
  45 
  46 #ifdef __LITTLE_ENDIAN
  47 
  48 /* array indices used to access words within a double */
  49 #define HIWORD  1
  50 #define LOWORD  0
  51 
  52 /* structure used to access words within a quad */
  53 union longdouble {
  54         struct {
  55                 unsigned int    frac4;
  56                 unsigned int    frac3;
  57                 unsigned int    frac2;
  58                 unsigned int    msw;
  59         } l;
  60         long double     d;
  61 };
  62 
  63 /* default NaN returned for sqrt(neg) */
  64 static const union longdouble
  65         qnan = { 0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff };

  66 
  67 /* signalling NaN used to raise invalid */
  68 static const union {
  69         unsigned u[2];
  70         double d;
  71 } snan = { 0, 0x7ff00001 };
  72 
  73 #else
  74 
  75 /* array indices used to access words within a double */
  76 #define HIWORD  0
  77 #define LOWORD  1
  78 
  79 /* structure used to access words within a quad */
  80 union longdouble {
  81         struct {
  82                 unsigned int    msw;
  83                 unsigned int    frac2;
  84                 unsigned int    frac3;
  85                 unsigned int    frac4;
  86         } l;
  87         long double     d;
  88 };
  89 
  90 /* default NaN returned for sqrt(neg) */
  91 static const union longdouble
  92         qnan = { 0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff };

  93 
  94 /* signalling NaN used to raise invalid */
  95 static const union {
  96         unsigned u[2];
  97         double d;
  98 } snan = { 0x7ff00001, 0 };
  99 
 100 #endif /* __LITTLE_ENDIAN */
 101 
 102 
 103 static const double
 104         zero = 0.0,
 105         half = 0.5,
 106         one = 1.0,
 107         huge = 1.0e300,
 108         tiny = 1.0e-300,
 109         two36 = 6.87194767360000000000e+10,
 110         two30 = 1.07374182400000000000e+09,
 111         two6 = 6.40000000000000000000e+01,
 112         two4 = 1.60000000000000000000e+01,
 113         twom18 = 3.81469726562500000000e-06,
 114         twom28 = 3.72529029846191406250e-09,
 115         twom42 = 2.27373675443232059479e-13,
 116         twom60 = 8.67361737988403547206e-19,
 117         twom62 = 2.16840434497100886801e-19,
 118         twom66 = 1.35525271560688054251e-20,
 119         twom90 = 8.07793566946316088742e-28,
 120         twom113 = 9.62964972193617926528e-35,
 121         twom124 = 4.70197740328915003187e-38;
 122 
 123 
 124 /*
 125 *       Extract the exponent and normalized significand (represented as
 126 *       an array of five doubles) from a finite, nonzero quad.
 127 */
 128 static int
 129 __q_unpack(const union longdouble *x, double *s)
 130 {
 131         union {
 132                 double                  d;
 133                 unsigned int    l[2];
 134         } u;

 135         double                  b;
 136         unsigned int    lx, w[3];
 137         int                             ex;
 138 
 139         /* get the normalized significand and exponent */
 140         ex = (int) ((x->l.msw & 0x7fffffff) >> 16);
 141         lx = x->l.msw & 0xffff;
 142         if (ex)
 143         {
 144                 lx |= 0x10000;
 145                 w[0] = x->l.frac2;
 146                 w[1] = x->l.frac3;
 147                 w[2] = x->l.frac4;
 148         }
 149         else
 150         {
 151                 if (lx | (x->l.frac2 & 0xfffe0000))
 152                 {
 153                         w[0] = x->l.frac2;
 154                         w[1] = x->l.frac3;
 155                         w[2] = x->l.frac4;
 156                         ex = 1;
 157                 }
 158                 else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000))
 159                 {
 160                         lx = x->l.frac2;
 161                         w[0] = x->l.frac3;
 162                         w[1] = x->l.frac4;
 163                         w[2] = 0;
 164                         ex = -31;
 165                 }
 166                 else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000))
 167                 {
 168                         lx = x->l.frac3;
 169                         w[0] = x->l.frac4;
 170                         w[1] = w[2] = 0;
 171                         ex = -63;
 172                 }
 173                 else
 174                 {
 175                         lx = x->l.frac4;
 176                         w[0] = w[1] = w[2] = 0;
 177                         ex = -95;
 178                 }
 179                 while ((lx & 0x10000) == 0)
 180                 {
 181                         lx = (lx << 1) | (w[0] >> 31);
 182                         w[0] = (w[0] << 1) | (w[1] >> 31);
 183                         w[1] = (w[1] << 1) | (w[2] >> 31);
 184                         w[2] <<= 1;
 185                         ex--;
 186                 }
 187         }
 188 
 189         /* extract the significand into five doubles */
 190         u.l[HIWORD] = 0x42300000;
 191         u.l[LOWORD] = 0;
 192         b = u.d;
 193         u.l[LOWORD] = lx;
 194         s[0] = u.d - b;
 195 
 196         u.l[HIWORD] = 0x40300000;
 197         u.l[LOWORD] = 0;
 198         b = u.d;
 199         u.l[LOWORD] = w[0] & 0xffffff00;
 200         s[1] = u.d - b;

 202         u.l[HIWORD] = 0x3e300000;
 203         u.l[LOWORD] = 0;
 204         b = u.d;
 205         u.l[HIWORD] |= w[0] & 0xff;
 206         u.l[LOWORD] = w[1] & 0xffff0000;
 207         s[2] = u.d - b;
 208 
 209         u.l[HIWORD] = 0x3c300000;
 210         u.l[LOWORD] = 0;
 211         b = u.d;
 212         u.l[HIWORD] |= w[1] & 0xffff;
 213         u.l[LOWORD] = w[2] & 0xff000000;
 214         s[3] = u.d - b;
 215 
 216         u.l[HIWORD] = 0x3c300000;
 217         u.l[LOWORD] = 0;
 218         b = u.d;
 219         u.l[LOWORD] = w[2] & 0xffffff;
 220         s[4] = u.d - b;
 221 
 222         return ex - 0x3fff;
 223 }
 224 
 225 
 226 /*
 227 *       Pack an exponent and array of three doubles representing a finite,
 228 *       nonzero number into a quad.  Assume the sign is already there and
 229 *       the rounding mode has been fudged accordingly.
 230 */
 231 static void
 232 __q_pack(const double *z, int exp, enum fp_direction_type rm,
 233         union longdouble *x, int *inexact)
 234 {
 235         union {
 236                 double                  d;
 237                 unsigned int    l[2];
 238         } u;

 239         double                  s[3], t, t2;
 240         unsigned int    msw, frac2, frac3, frac4;
 241 
 242         /* bias exponent and strip off integer bit */
 243         exp += 0x3fff;
 244         s[0] = z[0] - one;
 245         s[1] = z[1];
 246         s[2] = z[2];
 247 
 248         /*
 249          * chop the significand to obtain the fraction;
 250          * use round-to-minus-infinity to ensure chopping
 251          */
 252         (void) __swapRD(fp_negative);
 253 
 254         /* extract the first eighty bits of fraction */
 255         t = s[1] + s[2];
 256         u.d = two36 + (s[0] + t);
 257         msw = u.l[LOWORD];
 258         s[0] -= (u.d - two36);

 265         frac3 = u.l[LOWORD];
 266         s[0] -= (u.d - twom28);
 267 
 268         /* condense the remaining fraction; errors here won't matter */
 269         t = s[0] + s[1];
 270         s[1] = ((s[0] - t) + s[1]) + s[2];
 271         s[0] = t;
 272 
 273         /* get the last word of fraction */
 274         u.d = twom60 + (s[0] + s[1]);
 275         frac4 = u.l[LOWORD];
 276         s[0] -= (u.d - twom60);
 277 
 278         /*
 279          * keep track of what's left for rounding; note that
 280          * t2 will be non-negative due to rounding mode
 281          */
 282         t = s[0] + s[1];
 283         t2 = (s[0] - t) + s[1];
 284 
 285         if (t != zero)
 286         {
 287                 *inexact = 1;
 288 
 289                 /* decide whether to round the fraction up */
 290                 if (rm == fp_positive || (rm == fp_nearest && (t > twom113 ||
 291                         (t == twom113 && (t2 != zero || frac4 & 1)))))
 292                 {
 293                         /* round up and renormalize if necessary */
 294                         if (++frac4 == 0)
 295                                 if (++frac3 == 0)
 296                                         if (++frac2 == 0)
 297                                                 if (++msw == 0x10000)
 298                                                 {
 299                                                         msw = 0;
 300                                                         exp++;
 301                                                 }
 302                 }
 303         }



 304 
 305         /* assemble the result */
 306         x->l.msw |= msw | (exp << 16);
 307         x->l.frac2 = frac2;
 308         x->l.frac3 = frac3;
 309         x->l.frac4 = frac4;
 310 }
 311 
 312 
 313 /*
 314 *       Compute the square root of x and place the TP result in s.
 315 */
 316 static void
 317 __q_tp_sqrt(const double *x, double *s)
 318 {
 319         double  c, rr, r[3], tt[3], t[5];
 320 
 321         /* approximate the divisor for the Newton iteration */
 322         c = sqrt((x[0] + x[1]) + x[2]);
 323         rr = half / c;
 324 
 325         /* compute the first five "digits" of the square root */
 326         t[0] = (c + two30) - two30;
 327         tt[0] = t[0] + t[0];
 328         r[0] = ((x[0] - t[0] * t[0]) + x[1]) + x[2];
 329 
 330         t[1] = (rr * (r[0] + x[3]) + two6) - two6;
 331         tt[1] = t[1] + t[1];
 332         r[0] -= tt[0] * t[1];
 333         r[1] = x[3] - t[1] * t[1];
 334         c = (r[1] + twom18) - twom18;
 335         r[0] += c;
 336         r[1] = (r[1] - c) + x[4];
 337 
 338         t[2] = (rr * (r[0] + r[1]) + twom18) - twom18;
 339         tt[2] = t[2] + t[2];
 340         r[0] -= tt[0] * t[2];
 341         r[1] -= tt[1] * t[2];
 342         c = (r[1] + twom42) - twom42;
 343         r[0] += c;
 344         r[1] = (r[1] - c) - t[2] * t[2];
 345 
 346         t[3] = (rr * (r[0] + r[1]) + twom42) - twom42;
 347         r[0] = ((r[0] - tt[0] * t[3]) + r[1]) - tt[1] * t[3];
 348         r[1] = -tt[2] * t[3];
 349         c = (r[1] + twom90) - twom90;
 350         r[0] += c;
 351         r[1] = (r[1] - c) - t[3] * t[3];
 352 
 353         t[4] = (rr * (r[0] + r[1]) + twom66) - twom66;
 354 
 355         /* here we just need to get the sign of the remainder */
 356         c = (((((r[0] - tt[0] * t[4]) - tt[1] * t[4]) + r[1])
 357                 - tt[2] * t[4]) - (t[3] + t[3]) * t[4]) - t[4] * t[4];
 358 
 359         /* reduce to three doubles */
 360         t[0] += t[1];
 361         t[1] = t[2] + t[3];
 362         t[2] = t[4];
 363 
 364         /* if the third term might lie on a rounding boundary, perturb it */
 365         if (c != zero && t[2] == (twom62 + t[2]) - twom62)
 366         {
 367                 if (c < zero)
 368                         t[2] -= twom124;
 369                 else
 370                         t[2] += twom124;
 371         }
 372 
 373         /* condense the square root */
 374         c = t[1] + t[2];
 375         t[2] += (t[1] - c);
 376         t[1] = c;
 377         c = t[0] + t[1];
 378         s[1] = t[1] + (t[0] - c);
 379         s[0] = c;
 380         if (s[1] == zero)
 381         {
 382                 c = s[0] + t[2];
 383                 s[1] = t[2] + (s[0] - c);
 384                 s[0] = c;
 385                 s[2] = zero;
 386         }
 387         else
 388         {
 389                 c = s[1] + t[2];
 390                 s[2] = t[2] + (s[1] - c);
 391                 s[1] = c;
 392         }
 393 }
 394 
 395 
 396 long double
 397 sqrtl(long double ldx)
 398 {
 399         union   longdouble              x;
 400         volatile double                 t;
 401         double                                  xx[5], zz[3];
 402         enum fp_direction_type  rm;
 403         int                             ex, inexact, exc, traps;
 404 
 405         /* clear cexc */
 406         t = zero;
 407         t -= zero;
 408 
 409         /* check for zero operand */
 410         x.d = ldx;

 411         if (!((x.l.msw & 0x7fffffff) | x.l.frac2 | x.l.frac3 | x.l.frac4))
 412                 return ldx;
 413 
 414         /* handle nan and inf cases */
 415         if ((x.l.msw & 0x7fffffff) >= 0x7fff0000)
 416         {
 417                 if ((x.l.msw & 0xffff) | x.l.frac2 | x.l.frac3 | x.l.frac4)
 418                 {
 419                         if (!(x.l.msw & 0x8000))
 420                         {
 421                                 /* snan, signal invalid */
 422                                 t += snan.d;
 423                         }

 424                         x.l.msw |= 0x8000;
 425                         return x.d;
 426                 }
 427                 if (x.l.msw & 0x80000000)
 428                 {
 429                         /* sqrt(-inf), signal invalid */
 430                         t = -one;
 431                         t = sqrt(t);
 432                         return qnan.d;
 433                 }

 434                 /* sqrt(inf), return inf */
 435                 return x.d;
 436         }
 437 
 438         /* handle negative numbers */
 439         if (x.l.msw & 0x80000000)
 440         {
 441                 t = -one;
 442                 t = sqrt(t);
 443                 return qnan.d;
 444         }
 445 
 446         /* now x is finite, positive */
 447 
 448         traps = __swapTE(0);
 449         exc = __swapEX(0);
 450         rm = __swapRD(fp_nearest);
 451 
 452         ex = __q_unpack(&x, xx);
 453         if (ex & 1)
 454         {
 455                 /* make exponent even */
 456                 xx[0] += xx[0];
 457                 xx[1] += xx[1];
 458                 xx[2] += xx[2];
 459                 xx[3] += xx[3];
 460                 xx[4] += xx[4];
 461                 ex--;
 462         }

 463         __q_tp_sqrt(xx, zz);
 464 
 465         /* put everything together */
 466         x.l.msw = 0;
 467         inexact = 0;
 468         __q_pack(zz, ex >> 1, rm, &x, &inexact);
 469 
 470         (void) __swapRD(rm);
 471         (void) __swapEX(exc);
 472         (void) __swapTE(traps);
 473         if (inexact)
 474         {
 475                 t = huge;
 476                 t += tiny;
 477         }
 478         return x.d;

 479 }

   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  28  * Use is subject to license terms.
  29  */
  30 
  31 #pragma weak __sqrtl = sqrtl
  32 
  33 #include "libm.h"
  34 #include "longdouble.h"
  35 
  36 extern int __swapTE(int);
  37 extern int __swapEX(int);
  38 extern enum fp_direction_type __swapRD(enum fp_direction_type);
  39 
  40 /*
  41  * in struct longdouble, msw consists of
  42  *      unsigned short  sgn:1;
  43  *      unsigned short  exp:15;
  44  *      unsigned short  frac1:16;
  45  */
  46 
  47 #ifdef __LITTLE_ENDIAN

  48 /* array indices used to access words within a double */
  49 #define HIWORD          1
  50 #define LOWORD          0
  51 
  52 /* structure used to access words within a quad */
  53 union longdouble {
  54         struct {
  55                 unsigned int frac4;
  56                 unsigned int frac3;
  57                 unsigned int frac2;
  58                 unsigned int msw;
  59         } l;
  60         long double d;
  61 };
  62 
  63 /* default NaN returned for sqrt(neg) */
  64 static const union longdouble qnan = {
  65         0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff
  66 };
  67 
  68 /* signalling NaN used to raise invalid */
  69 static const union {
  70         unsigned u[2];
  71         double d;
  72 } snan = { 0, 0x7ff00001 };

  73 #else

  74 /* array indices used to access words within a double */
  75 #define HIWORD          0
  76 #define LOWORD          1
  77 
  78 /* structure used to access words within a quad */
  79 union longdouble {
  80         struct {
  81                 unsigned int msw;
  82                 unsigned int frac2;
  83                 unsigned int frac3;
  84                 unsigned int frac4;
  85         } l;
  86         long double d;
  87 };
  88 
  89 /* default NaN returned for sqrt(neg) */
  90 static const union longdouble qnan = {
  91         0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff
  92 };
  93 
  94 /* signalling NaN used to raise invalid */
  95 static const union {
  96         unsigned u[2];
  97         double d;
  98 } snan = { 0x7ff00001, 0 };

  99 #endif /* __LITTLE_ENDIAN */
 100 
 101 static const double zero = 0.0,


 102         half = 0.5,
 103         one = 1.0,
 104         huge = 1.0e300,
 105         tiny = 1.0e-300,
 106         two36 = 6.87194767360000000000e+10,
 107         two30 = 1.07374182400000000000e+09,
 108         two6 = 6.40000000000000000000e+01,
 109         two4 = 1.60000000000000000000e+01,
 110         twom18 = 3.81469726562500000000e-06,
 111         twom28 = 3.72529029846191406250e-09,
 112         twom42 = 2.27373675443232059479e-13,
 113         twom60 = 8.67361737988403547206e-19,
 114         twom62 = 2.16840434497100886801e-19,
 115         twom66 = 1.35525271560688054251e-20,
 116         twom90 = 8.07793566946316088742e-28,
 117         twom113 = 9.62964972193617926528e-35,
 118         twom124 = 4.70197740328915003187e-38;
 119 

 120 /*
 121  *      Extract the exponent and normalized significand (represented as
 122  *      an array of five doubles) from a finite, nonzero quad.
 123  */
 124 static int
 125 __q_unpack(const union longdouble *x, double *s)
 126 {
 127         union {
 128                 double d;
 129                 unsigned int l[2];
 130         } u;
 131 
 132         double b;
 133         unsigned int lx, w[3];
 134         int ex;
 135 
 136         /* get the normalized significand and exponent */
 137         ex = (int)((x->l.msw & 0x7fffffff) >> 16);
 138         lx = x->l.msw & 0xffff;
 139 
 140         if (ex) {
 141                 lx |= 0x10000;
 142                 w[0] = x->l.frac2;
 143                 w[1] = x->l.frac3;
 144                 w[2] = x->l.frac4;
 145         } else {
 146                 if (lx | (x->l.frac2 & 0xfffe0000)) {



 147                         w[0] = x->l.frac2;
 148                         w[1] = x->l.frac3;
 149                         w[2] = x->l.frac4;
 150                         ex = 1;
 151                 } else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000)) {


 152                         lx = x->l.frac2;
 153                         w[0] = x->l.frac3;
 154                         w[1] = x->l.frac4;
 155                         w[2] = 0;
 156                         ex = -31;
 157                 } else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000)) {


 158                         lx = x->l.frac3;
 159                         w[0] = x->l.frac4;
 160                         w[1] = w[2] = 0;
 161                         ex = -63;
 162                 } else {


 163                         lx = x->l.frac4;
 164                         w[0] = w[1] = w[2] = 0;
 165                         ex = -95;
 166                 }
 167 
 168                 while ((lx & 0x10000) == 0) {
 169                         lx = (lx << 1) | (w[0] >> 31);
 170                         w[0] = (w[0] << 1) | (w[1] >> 31);
 171                         w[1] = (w[1] << 1) | (w[2] >> 31);
 172                         w[2] <<= 1;
 173                         ex--;
 174                 }
 175         }
 176 
 177         /* extract the significand into five doubles */
 178         u.l[HIWORD] = 0x42300000;
 179         u.l[LOWORD] = 0;
 180         b = u.d;
 181         u.l[LOWORD] = lx;
 182         s[0] = u.d - b;
 183 
 184         u.l[HIWORD] = 0x40300000;
 185         u.l[LOWORD] = 0;
 186         b = u.d;
 187         u.l[LOWORD] = w[0] & 0xffffff00;
 188         s[1] = u.d - b;

 190         u.l[HIWORD] = 0x3e300000;
 191         u.l[LOWORD] = 0;
 192         b = u.d;
 193         u.l[HIWORD] |= w[0] & 0xff;
 194         u.l[LOWORD] = w[1] & 0xffff0000;
 195         s[2] = u.d - b;
 196 
 197         u.l[HIWORD] = 0x3c300000;
 198         u.l[LOWORD] = 0;
 199         b = u.d;
 200         u.l[HIWORD] |= w[1] & 0xffff;
 201         u.l[LOWORD] = w[2] & 0xff000000;
 202         s[3] = u.d - b;
 203 
 204         u.l[HIWORD] = 0x3c300000;
 205         u.l[LOWORD] = 0;
 206         b = u.d;
 207         u.l[LOWORD] = w[2] & 0xffffff;
 208         s[4] = u.d - b;
 209 
 210         return (ex - 0x3fff);
 211 }
 212 

 213 /*
 214  *      Pack an exponent and array of three doubles representing a finite,
 215  *      nonzero number into a quad.  Assume the sign is already there and
 216  *      the rounding mode has been fudged accordingly.
 217  */
 218 static void
 219 __q_pack(const double *z, int exp, enum fp_direction_type rm,
 220     union longdouble *x, int *inexact)
 221 {
 222         union {
 223                 double d;
 224                 unsigned int l[2];
 225         } u;
 226 
 227         double s[3], t, t2;
 228         unsigned int msw, frac2, frac3, frac4;
 229 
 230         /* bias exponent and strip off integer bit */
 231         exp += 0x3fff;
 232         s[0] = z[0] - one;
 233         s[1] = z[1];
 234         s[2] = z[2];
 235 
 236         /*
 237          * chop the significand to obtain the fraction;
 238          * use round-to-minus-infinity to ensure chopping
 239          */
 240         (void) __swapRD(fp_negative);
 241 
 242         /* extract the first eighty bits of fraction */
 243         t = s[1] + s[2];
 244         u.d = two36 + (s[0] + t);
 245         msw = u.l[LOWORD];
 246         s[0] -= (u.d - two36);

 253         frac3 = u.l[LOWORD];
 254         s[0] -= (u.d - twom28);
 255 
 256         /* condense the remaining fraction; errors here won't matter */
 257         t = s[0] + s[1];
 258         s[1] = ((s[0] - t) + s[1]) + s[2];
 259         s[0] = t;
 260 
 261         /* get the last word of fraction */
 262         u.d = twom60 + (s[0] + s[1]);
 263         frac4 = u.l[LOWORD];
 264         s[0] -= (u.d - twom60);
 265 
 266         /*
 267          * keep track of what's left for rounding; note that
 268          * t2 will be non-negative due to rounding mode
 269          */
 270         t = s[0] + s[1];
 271         t2 = (s[0] - t) + s[1];
 272 
 273         if (t != zero) {

 274                 *inexact = 1;
 275 
 276                 /* decide whether to round the fraction up */
 277                 if (rm == fp_positive || (rm == fp_nearest && (t > twom113 ||
 278                     (t == twom113 && (t2 != zero || frac4 & 1))))) {

 279                         /* round up and renormalize if necessary */
 280                         if (++frac4 == 0) {
 281                                 if (++frac3 == 0) {
 282                                         if (++frac2 == 0) {
 283                                                 if (++msw == 0x10000) {

 284                                                         msw = 0;
 285                                                         exp++;
 286                                                 }
 287                                         }
 288                                 }
 289                         }
 290                 }
 291         }
 292 
 293         /* assemble the result */
 294         x->l.msw |= msw | (exp << 16);
 295         x->l.frac2 = frac2;
 296         x->l.frac3 = frac3;
 297         x->l.frac4 = frac4;
 298 }
 299 

 300 /*
 301  *      Compute the square root of x and place the TP result in s.
 302  */
 303 static void
 304 __q_tp_sqrt(const double *x, double *s)
 305 {
 306         double c, rr, r[3], tt[3], t[5];
 307 
 308         /* approximate the divisor for the Newton iteration */
 309         c = sqrt((x[0] + x[1]) + x[2]);
 310         rr = half / c;
 311 
 312         /* compute the first five "digits" of the square root */
 313         t[0] = (c + two30) - two30;
 314         tt[0] = t[0] + t[0];
 315         r[0] = ((x[0] - t[0] * t[0]) + x[1]) + x[2];
 316 
 317         t[1] = (rr * (r[0] + x[3]) + two6) - two6;
 318         tt[1] = t[1] + t[1];
 319         r[0] -= tt[0] * t[1];
 320         r[1] = x[3] - t[1] * t[1];
 321         c = (r[1] + twom18) - twom18;
 322         r[0] += c;
 323         r[1] = (r[1] - c) + x[4];
 324 
 325         t[2] = (rr * (r[0] + r[1]) + twom18) - twom18;
 326         tt[2] = t[2] + t[2];
 327         r[0] -= tt[0] * t[2];
 328         r[1] -= tt[1] * t[2];
 329         c = (r[1] + twom42) - twom42;
 330         r[0] += c;
 331         r[1] = (r[1] - c) - t[2] * t[2];
 332 
 333         t[3] = (rr * (r[0] + r[1]) + twom42) - twom42;
 334         r[0] = ((r[0] - tt[0] * t[3]) + r[1]) - tt[1] * t[3];
 335         r[1] = -tt[2] * t[3];
 336         c = (r[1] + twom90) - twom90;
 337         r[0] += c;
 338         r[1] = (r[1] - c) - t[3] * t[3];
 339 
 340         t[4] = (rr * (r[0] + r[1]) + twom66) - twom66;
 341 
 342         /* here we just need to get the sign of the remainder */
 343         c = (((((r[0] - tt[0] * t[4]) - tt[1] * t[4]) + r[1]) - tt[2] * t[4]) -
 344             (t[3] + t[3]) * t[4]) - t[4] * t[4];
 345 
 346         /* reduce to three doubles */
 347         t[0] += t[1];
 348         t[1] = t[2] + t[3];
 349         t[2] = t[4];
 350 
 351         /* if the third term might lie on a rounding boundary, perturb it */
 352         if (c != zero && t[2] == (twom62 + t[2]) - twom62) {

 353                 if (c < zero)
 354                         t[2] -= twom124;
 355                 else
 356                         t[2] += twom124;
 357         }
 358 
 359         /* condense the square root */
 360         c = t[1] + t[2];
 361         t[2] += (t[1] - c);
 362         t[1] = c;
 363         c = t[0] + t[1];
 364         s[1] = t[1] + (t[0] - c);
 365         s[0] = c;
 366 
 367         if (s[1] == zero) {
 368                 c = s[0] + t[2];
 369                 s[1] = t[2] + (s[0] - c);
 370                 s[0] = c;
 371                 s[2] = zero;
 372         } else {


 373                 c = s[1] + t[2];
 374                 s[2] = t[2] + (s[1] - c);
 375                 s[1] = c;
 376         }
 377 }
 378 

 379 long double
 380 sqrtl(long double ldx)
 381 {
 382         union   longdouble x;
 383         volatile double t;
 384         double xx[5], zz[3];
 385         enum fp_direction_type rm;
 386         int ex, inexact, exc, traps;
 387 
 388         /* clear cexc */
 389         t = zero;
 390         t -= zero;
 391 
 392         /* check for zero operand */
 393         x.d = ldx;
 394 
 395         if (!((x.l.msw & 0x7fffffff) | x.l.frac2 | x.l.frac3 | x.l.frac4))
 396                 return (ldx);
 397 
 398         /* handle nan and inf cases */
 399         if ((x.l.msw & 0x7fffffff) >= 0x7fff0000) {
 400                 if ((x.l.msw & 0xffff) | x.l.frac2 | x.l.frac3 | x.l.frac4) {
 401                         if (!(x.l.msw & 0x8000)) {



 402                                 /* snan, signal invalid */
 403                                 t += snan.d;
 404                         }
 405 
 406                         x.l.msw |= 0x8000;
 407                         return (x.d);
 408                 }
 409 
 410                 if (x.l.msw & 0x80000000) {
 411                         /* sqrt(-inf), signal invalid */
 412                         t = -one;
 413                         t = sqrt(t);
 414                         return (qnan.d);
 415                 }
 416 
 417                 /* sqrt(inf), return inf */
 418                 return (x.d);
 419         }
 420 
 421         /* handle negative numbers */
 422         if (x.l.msw & 0x80000000) {

 423                 t = -one;
 424                 t = sqrt(t);
 425                 return (qnan.d);
 426         }
 427 
 428         /* now x is finite, positive */
 429 
 430         traps = __swapTE(0);
 431         exc = __swapEX(0);
 432         rm = __swapRD(fp_nearest);
 433 
 434         ex = __q_unpack(&x, xx);
 435 
 436         if (ex & 1) {
 437                 /* make exponent even */
 438                 xx[0] += xx[0];
 439                 xx[1] += xx[1];
 440                 xx[2] += xx[2];
 441                 xx[3] += xx[3];
 442                 xx[4] += xx[4];
 443                 ex--;
 444         }
 445 
 446         __q_tp_sqrt(xx, zz);
 447 
 448         /* put everything together */
 449         x.l.msw = 0;
 450         inexact = 0;
 451         __q_pack(zz, ex >> 1, rm, &x, &inexact);
 452 
 453         (void) __swapRD(rm);
 454         (void) __swapEX(exc);
 455         (void) __swapTE(traps);
 456 
 457         if (inexact) {
 458                 t = huge;
 459                 t += tiny;
 460         }
 461 
 462         return (x.d);
 463 }