1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  28  * Use is subject to license terms.
  29  */
  30 
  31 #pragma weak __sqrtl = sqrtl
  32 
  33 #include "libm.h"
  34 #include "longdouble.h"
  35 
  36 extern int __swapTE(int);
  37 extern int __swapEX(int);
  38 extern enum fp_direction_type __swapRD(enum fp_direction_type);
  39 
  40 /*
  41  * in struct longdouble, msw consists of
  42  *      unsigned short  sgn:1;
  43  *      unsigned short  exp:15;
  44  *      unsigned short  frac1:16;
  45  */
  46 
  47 #ifdef __LITTLE_ENDIAN
  48 /* array indices used to access words within a double */
  49 #define HIWORD          1
  50 #define LOWORD          0
  51 
  52 /* structure used to access words within a quad */
  53 union longdouble {
  54         struct {
  55                 unsigned int frac4;
  56                 unsigned int frac3;
  57                 unsigned int frac2;
  58                 unsigned int msw;
  59         } l;
  60         long double d;
  61 };
  62 
  63 /* default NaN returned for sqrt(neg) */
  64 static const union longdouble qnan = {
  65         0xffffffff, 0xffffffff, 0xffffffff, 0x7fffffff
  66 };
  67 
  68 /* signalling NaN used to raise invalid */
  69 static const union {
  70         unsigned u[2];
  71         double d;
  72 } snan = { 0, 0x7ff00001 };
  73 #else
  74 /* array indices used to access words within a double */
  75 #define HIWORD          0
  76 #define LOWORD          1
  77 
  78 /* structure used to access words within a quad */
  79 union longdouble {
  80         struct {
  81                 unsigned int msw;
  82                 unsigned int frac2;
  83                 unsigned int frac3;
  84                 unsigned int frac4;
  85         } l;
  86         long double d;
  87 };
  88 
  89 /* default NaN returned for sqrt(neg) */
  90 static const union longdouble qnan = {
  91         0x7fffffff, 0xffffffff, 0xffffffff, 0xffffffff
  92 };
  93 
  94 /* signalling NaN used to raise invalid */
  95 static const union {
  96         unsigned u[2];
  97         double d;
  98 } snan = { 0x7ff00001, 0 };
  99 #endif /* __LITTLE_ENDIAN */
 100 
 101 static const double zero = 0.0,
 102         half = 0.5,
 103         one = 1.0,
 104         huge = 1.0e300,
 105         tiny = 1.0e-300,
 106         two36 = 6.87194767360000000000e+10,
 107         two30 = 1.07374182400000000000e+09,
 108         two6 = 6.40000000000000000000e+01,
 109         two4 = 1.60000000000000000000e+01,
 110         twom18 = 3.81469726562500000000e-06,
 111         twom28 = 3.72529029846191406250e-09,
 112         twom42 = 2.27373675443232059479e-13,
 113         twom60 = 8.67361737988403547206e-19,
 114         twom62 = 2.16840434497100886801e-19,
 115         twom66 = 1.35525271560688054251e-20,
 116         twom90 = 8.07793566946316088742e-28,
 117         twom113 = 9.62964972193617926528e-35,
 118         twom124 = 4.70197740328915003187e-38;
 119 
 120 /*
 121  *      Extract the exponent and normalized significand (represented as
 122  *      an array of five doubles) from a finite, nonzero quad.
 123  */
 124 static int
 125 __q_unpack(const union longdouble *x, double *s)
 126 {
 127         union {
 128                 double d;
 129                 unsigned int l[2];
 130         } u;
 131 
 132         double b;
 133         unsigned int lx, w[3];
 134         int ex;
 135 
 136         /* get the normalized significand and exponent */
 137         ex = (int)((x->l.msw & 0x7fffffff) >> 16);
 138         lx = x->l.msw & 0xffff;
 139 
 140         if (ex) {
 141                 lx |= 0x10000;
 142                 w[0] = x->l.frac2;
 143                 w[1] = x->l.frac3;
 144                 w[2] = x->l.frac4;
 145         } else {
 146                 if (lx | (x->l.frac2 & 0xfffe0000)) {
 147                         w[0] = x->l.frac2;
 148                         w[1] = x->l.frac3;
 149                         w[2] = x->l.frac4;
 150                         ex = 1;
 151                 } else if (x->l.frac2 | (x->l.frac3 & 0xfffe0000)) {
 152                         lx = x->l.frac2;
 153                         w[0] = x->l.frac3;
 154                         w[1] = x->l.frac4;
 155                         w[2] = 0;
 156                         ex = -31;
 157                 } else if (x->l.frac3 | (x->l.frac4 & 0xfffe0000)) {
 158                         lx = x->l.frac3;
 159                         w[0] = x->l.frac4;
 160                         w[1] = w[2] = 0;
 161                         ex = -63;
 162                 } else {
 163                         lx = x->l.frac4;
 164                         w[0] = w[1] = w[2] = 0;
 165                         ex = -95;
 166                 }
 167 
 168                 while ((lx & 0x10000) == 0) {
 169                         lx = (lx << 1) | (w[0] >> 31);
 170                         w[0] = (w[0] << 1) | (w[1] >> 31);
 171                         w[1] = (w[1] << 1) | (w[2] >> 31);
 172                         w[2] <<= 1;
 173                         ex--;
 174                 }
 175         }
 176 
 177         /* extract the significand into five doubles */
 178         u.l[HIWORD] = 0x42300000;
 179         u.l[LOWORD] = 0;
 180         b = u.d;
 181         u.l[LOWORD] = lx;
 182         s[0] = u.d - b;
 183 
 184         u.l[HIWORD] = 0x40300000;
 185         u.l[LOWORD] = 0;
 186         b = u.d;
 187         u.l[LOWORD] = w[0] & 0xffffff00;
 188         s[1] = u.d - b;
 189 
 190         u.l[HIWORD] = 0x3e300000;
 191         u.l[LOWORD] = 0;
 192         b = u.d;
 193         u.l[HIWORD] |= w[0] & 0xff;
 194         u.l[LOWORD] = w[1] & 0xffff0000;
 195         s[2] = u.d - b;
 196 
 197         u.l[HIWORD] = 0x3c300000;
 198         u.l[LOWORD] = 0;
 199         b = u.d;
 200         u.l[HIWORD] |= w[1] & 0xffff;
 201         u.l[LOWORD] = w[2] & 0xff000000;
 202         s[3] = u.d - b;
 203 
 204         u.l[HIWORD] = 0x3c300000;
 205         u.l[LOWORD] = 0;
 206         b = u.d;
 207         u.l[LOWORD] = w[2] & 0xffffff;
 208         s[4] = u.d - b;
 209 
 210         return (ex - 0x3fff);
 211 }
 212 
 213 /*
 214  *      Pack an exponent and array of three doubles representing a finite,
 215  *      nonzero number into a quad.  Assume the sign is already there and
 216  *      the rounding mode has been fudged accordingly.
 217  */
 218 static void
 219 __q_pack(const double *z, int exp, enum fp_direction_type rm,
 220     union longdouble *x, int *inexact)
 221 {
 222         union {
 223                 double d;
 224                 unsigned int l[2];
 225         } u;
 226 
 227         double s[3], t, t2;
 228         unsigned int msw, frac2, frac3, frac4;
 229 
 230         /* bias exponent and strip off integer bit */
 231         exp += 0x3fff;
 232         s[0] = z[0] - one;
 233         s[1] = z[1];
 234         s[2] = z[2];
 235 
 236         /*
 237          * chop the significand to obtain the fraction;
 238          * use round-to-minus-infinity to ensure chopping
 239          */
 240         (void) __swapRD(fp_negative);
 241 
 242         /* extract the first eighty bits of fraction */
 243         t = s[1] + s[2];
 244         u.d = two36 + (s[0] + t);
 245         msw = u.l[LOWORD];
 246         s[0] -= (u.d - two36);
 247 
 248         u.d = two4 + (s[0] + t);
 249         frac2 = u.l[LOWORD];
 250         s[0] -= (u.d - two4);
 251 
 252         u.d = twom28 + (s[0] + t);
 253         frac3 = u.l[LOWORD];
 254         s[0] -= (u.d - twom28);
 255 
 256         /* condense the remaining fraction; errors here won't matter */
 257         t = s[0] + s[1];
 258         s[1] = ((s[0] - t) + s[1]) + s[2];
 259         s[0] = t;
 260 
 261         /* get the last word of fraction */
 262         u.d = twom60 + (s[0] + s[1]);
 263         frac4 = u.l[LOWORD];
 264         s[0] -= (u.d - twom60);
 265 
 266         /*
 267          * keep track of what's left for rounding; note that
 268          * t2 will be non-negative due to rounding mode
 269          */
 270         t = s[0] + s[1];
 271         t2 = (s[0] - t) + s[1];
 272 
 273         if (t != zero) {
 274                 *inexact = 1;
 275 
 276                 /* decide whether to round the fraction up */
 277                 if (rm == fp_positive || (rm == fp_nearest && (t > twom113 ||
 278                     (t == twom113 && (t2 != zero || frac4 & 1))))) {
 279                         /* round up and renormalize if necessary */
 280                         if (++frac4 == 0) {
 281                                 if (++frac3 == 0) {
 282                                         if (++frac2 == 0) {
 283                                                 if (++msw == 0x10000) {
 284                                                         msw = 0;
 285                                                         exp++;
 286                                                 }
 287                                         }
 288                                 }
 289                         }
 290                 }
 291         }
 292 
 293         /* assemble the result */
 294         x->l.msw |= msw | (exp << 16);
 295         x->l.frac2 = frac2;
 296         x->l.frac3 = frac3;
 297         x->l.frac4 = frac4;
 298 }
 299 
 300 /*
 301  *      Compute the square root of x and place the TP result in s.
 302  */
 303 static void
 304 __q_tp_sqrt(const double *x, double *s)
 305 {
 306         double c, rr, r[3], tt[3], t[5];
 307 
 308         /* approximate the divisor for the Newton iteration */
 309         c = sqrt((x[0] + x[1]) + x[2]);
 310         rr = half / c;
 311 
 312         /* compute the first five "digits" of the square root */
 313         t[0] = (c + two30) - two30;
 314         tt[0] = t[0] + t[0];
 315         r[0] = ((x[0] - t[0] * t[0]) + x[1]) + x[2];
 316 
 317         t[1] = (rr * (r[0] + x[3]) + two6) - two6;
 318         tt[1] = t[1] + t[1];
 319         r[0] -= tt[0] * t[1];
 320         r[1] = x[3] - t[1] * t[1];
 321         c = (r[1] + twom18) - twom18;
 322         r[0] += c;
 323         r[1] = (r[1] - c) + x[4];
 324 
 325         t[2] = (rr * (r[0] + r[1]) + twom18) - twom18;
 326         tt[2] = t[2] + t[2];
 327         r[0] -= tt[0] * t[2];
 328         r[1] -= tt[1] * t[2];
 329         c = (r[1] + twom42) - twom42;
 330         r[0] += c;
 331         r[1] = (r[1] - c) - t[2] * t[2];
 332 
 333         t[3] = (rr * (r[0] + r[1]) + twom42) - twom42;
 334         r[0] = ((r[0] - tt[0] * t[3]) + r[1]) - tt[1] * t[3];
 335         r[1] = -tt[2] * t[3];
 336         c = (r[1] + twom90) - twom90;
 337         r[0] += c;
 338         r[1] = (r[1] - c) - t[3] * t[3];
 339 
 340         t[4] = (rr * (r[0] + r[1]) + twom66) - twom66;
 341 
 342         /* here we just need to get the sign of the remainder */
 343         c = (((((r[0] - tt[0] * t[4]) - tt[1] * t[4]) + r[1]) - tt[2] * t[4]) -
 344             (t[3] + t[3]) * t[4]) - t[4] * t[4];
 345 
 346         /* reduce to three doubles */
 347         t[0] += t[1];
 348         t[1] = t[2] + t[3];
 349         t[2] = t[4];
 350 
 351         /* if the third term might lie on a rounding boundary, perturb it */
 352         if (c != zero && t[2] == (twom62 + t[2]) - twom62) {
 353                 if (c < zero)
 354                         t[2] -= twom124;
 355                 else
 356                         t[2] += twom124;
 357         }
 358 
 359         /* condense the square root */
 360         c = t[1] + t[2];
 361         t[2] += (t[1] - c);
 362         t[1] = c;
 363         c = t[0] + t[1];
 364         s[1] = t[1] + (t[0] - c);
 365         s[0] = c;
 366 
 367         if (s[1] == zero) {
 368                 c = s[0] + t[2];
 369                 s[1] = t[2] + (s[0] - c);
 370                 s[0] = c;
 371                 s[2] = zero;
 372         } else {
 373                 c = s[1] + t[2];
 374                 s[2] = t[2] + (s[1] - c);
 375                 s[1] = c;
 376         }
 377 }
 378 
 379 long double
 380 sqrtl(long double ldx)
 381 {
 382         union   longdouble x;
 383         volatile double t;
 384         double xx[5], zz[3];
 385         enum fp_direction_type rm;
 386         int ex, inexact, exc, traps;
 387 
 388         /* clear cexc */
 389         t = zero;
 390         t -= zero;
 391 
 392         /* check for zero operand */
 393         x.d = ldx;
 394 
 395         if (!((x.l.msw & 0x7fffffff) | x.l.frac2 | x.l.frac3 | x.l.frac4))
 396                 return (ldx);
 397 
 398         /* handle nan and inf cases */
 399         if ((x.l.msw & 0x7fffffff) >= 0x7fff0000) {
 400                 if ((x.l.msw & 0xffff) | x.l.frac2 | x.l.frac3 | x.l.frac4) {
 401                         if (!(x.l.msw & 0x8000)) {
 402                                 /* snan, signal invalid */
 403                                 t += snan.d;
 404                         }
 405 
 406                         x.l.msw |= 0x8000;
 407                         return (x.d);
 408                 }
 409 
 410                 if (x.l.msw & 0x80000000) {
 411                         /* sqrt(-inf), signal invalid */
 412                         t = -one;
 413                         t = sqrt(t);
 414                         return (qnan.d);
 415                 }
 416 
 417                 /* sqrt(inf), return inf */
 418                 return (x.d);
 419         }
 420 
 421         /* handle negative numbers */
 422         if (x.l.msw & 0x80000000) {
 423                 t = -one;
 424                 t = sqrt(t);
 425                 return (qnan.d);
 426         }
 427 
 428         /* now x is finite, positive */
 429 
 430         traps = __swapTE(0);
 431         exc = __swapEX(0);
 432         rm = __swapRD(fp_nearest);
 433 
 434         ex = __q_unpack(&x, xx);
 435 
 436         if (ex & 1) {
 437                 /* make exponent even */
 438                 xx[0] += xx[0];
 439                 xx[1] += xx[1];
 440                 xx[2] += xx[2];
 441                 xx[3] += xx[3];
 442                 xx[4] += xx[4];
 443                 ex--;
 444         }
 445 
 446         __q_tp_sqrt(xx, zz);
 447 
 448         /* put everything together */
 449         x.l.msw = 0;
 450         inexact = 0;
 451         __q_pack(zz, ex >> 1, rm, &x, &inexact);
 452 
 453         (void) __swapRD(rm);
 454         (void) __swapEX(exc);
 455         (void) __swapTE(traps);
 456 
 457         if (inexact) {
 458                 t = huge;
 459                 t += tiny;
 460         }
 461 
 462         return (x.d);
 463 }