Print this page
11210 libm should be cstyle(1ONBLD) clean


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */

  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #pragma weak fmal = __fmal
  31 
  32 #include "libm.h"
  33 #include "fma.h"
  34 #include "fenv_inlines.h"
  35 
  36 #if defined(__sparc)
  37 
  38 static const union {
  39         unsigned i[2];
  40         double d;
  41 } C[] = {
  42         { 0x3fe00000u, 0 },
  43         { 0x40000000u, 0 },
  44         { 0x3ef00000u, 0 },
  45         { 0x3e700000u, 0 },
  46         { 0x41300000u, 0 },
  47         { 0x3e300000u, 0 },
  48         { 0x3b300000u, 0 },
  49         { 0x38300000u, 0 },
  50         { 0x42300000u, 0 },
  51         { 0x3df00000u, 0 },
  52         { 0x7fe00000u, 0 },
  53         { 0x00100000u, 0 },
  54         { 0x00100001u, 0 },
  55         { 0, 0 },
  56         { 0x7ff00000u, 0 },
  57         { 0x7ff00001u, 0 }


  63 #define twom24  C[3].d
  64 #define two20   C[4].d
  65 #define twom28  C[5].d
  66 #define twom76  C[6].d
  67 #define twom124 C[7].d
  68 #define two36   C[8].d
  69 #define twom32  C[9].d
  70 #define huge    C[10].d
  71 #define tiny    C[11].d
  72 #define tiny2   C[12].d
  73 #define zero    C[13].d
  74 #define inf     C[14].d
  75 #define snan    C[15].d
  76 
  77 static const unsigned int fsr_rm = 0xc0000000u;
  78 
  79 /*
  80  * fmal for SPARC: 128-bit quad precision, big-endian
  81  */
  82 long double
  83 __fmal(long double x, long double y, long double z) {

  84         union {
  85                 unsigned int i[4];
  86                 long double q;
  87         } xx, yy, zz;
  88         union {
  89                 unsigned int i[2];
  90                 double d;
  91         } u;

  92         double dx[5], dy[5], dxy[9], c, s;
  93         unsigned int xy0, xy1, xy2, xy3, xy4, xy5, xy6, xy7;
  94         unsigned int z0, z1, z2, z3, z4, z5, z6, z7;
  95         unsigned int rm, sticky;
  96         unsigned int fsr;
  97         int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit;
  98         int cx, cy, cz;
  99         volatile double dummy;
 100 
 101         /* extract the high order words of the arguments */
 102         xx.q = x;
 103         yy.q = y;
 104         zz.q = z;
 105         hx = xx.i[0] & ~0x80000000;
 106         hy = yy.i[0] & ~0x80000000;
 107         hz = zz.i[0] & ~0x80000000;
 108 
 109         /*
 110          * distinguish zero, finite nonzero, infinite, and quiet nan
 111          * arguments; raise invalid and return for signaling nans
 112          */
 113         if (hx >= 0x7fff0000) {
 114                 if ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3]) {
 115                         if (!(hx & 0x8000)) {
 116                                 /* signaling nan, raise invalid */
 117                                 dummy = snan;
 118                                 dummy += snan;
 119                                 xx.i[0] |= 0x8000;
 120                                 return (xx.q);
 121                         }

 122                         cx = 3; /* quiet nan */
 123                 } else
 124                         cx = 2; /* inf */

 125         } else if (hx == 0) {
 126                 cx = (xx.i[1] | xx.i[2] | xx.i[3]) ? 1 : 0;
 127                                 /* subnormal or zero */
 128         } else
 129                 cx = 1;         /* finite nonzero */

 130 
 131         if (hy >= 0x7fff0000) {
 132                 if ((hy & 0xffff) | yy.i[1] | yy.i[2] | yy.i[3]) {
 133                         if (!(hy & 0x8000)) {
 134                                 dummy = snan;
 135                                 dummy += snan;
 136                                 yy.i[0] |= 0x8000;
 137                                 return (yy.q);
 138                         }

 139                         cy = 3;
 140                 } else
 141                         cy = 2;

 142         } else if (hy == 0) {
 143                 cy = (yy.i[1] | yy.i[2] | yy.i[3]) ? 1 : 0;
 144         } else
 145                 cy = 1;

 146 
 147         if (hz >= 0x7fff0000) {
 148                 if ((hz & 0xffff) | zz.i[1] | zz.i[2] | zz.i[3]) {
 149                         if (!(hz & 0x8000)) {
 150                                 dummy = snan;
 151                                 dummy += snan;
 152                                 zz.i[0] |= 0x8000;
 153                                 return (zz.q);
 154                         }

 155                         cz = 3;
 156                 } else
 157                         cz = 2;

 158         } else if (hz == 0) {
 159                 cz = (zz.i[1] | zz.i[2] | zz.i[3]) ? 1 : 0;
 160         } else
 161                 cz = 1;

 162 
 163         /* get the fsr and clear current exceptions */
 164         __fenv_getfsr32(&fsr);
 165         fsr &= ~FSR_CEXC;
 166 
 167         /* handle all other zero, inf, and nan cases */
 168         if (cx != 1 || cy != 1 || cz != 1) {
 169                 /* if x or y is a quiet nan, return it */
 170                 if (cx == 3) {
 171                         __fenv_setfsr32(&fsr);
 172                         return (x);
 173                 }

 174                 if (cy == 3) {
 175                         __fenv_setfsr32(&fsr);
 176                         return (y);
 177                 }
 178 
 179                 /* if x*y is 0*inf, raise invalid and return the default nan */
 180                 if ((cx == 0 && cy == 2) || (cx == 2 && cy == 0)) {
 181                         dummy = zero;
 182                         dummy *= inf;
 183                         zz.i[0] = 0x7fffffff;
 184                         zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff;
 185                         return (zz.q);
 186                 }
 187 
 188                 /* if z is a quiet nan, return it */
 189                 if (cz == 3) {
 190                         __fenv_setfsr32(&fsr);
 191                         return (z);
 192                 }
 193 
 194                 /*
 195                  * now none of x, y, or z is nan; handle cases where x or y
 196                  * is inf
 197                  */
 198                 if (cx == 2 || cy == 2) {
 199                         /*
 200                          * if z is also inf, either we have inf-inf or
 201                          * the result is the same as z depending on signs
 202                          */
 203                         if (cz == 2) {
 204                                 if ((int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < 0) {
 205                                         dummy = inf;
 206                                         dummy -= inf;
 207                                         zz.i[0] = 0x7fffffff;
 208                                         zz.i[1] = zz.i[2] = zz.i[3] =
 209                                                 0xffffffff;
 210                                         return (zz.q);
 211                                 }

 212                                 __fenv_setfsr32(&fsr);
 213                                 return (z);
 214                         }
 215 
 216                         /* otherwise the result is inf with appropriate sign */
 217                         zz.i[0] = ((xx.i[0] ^ yy.i[0]) & 0x80000000) |
 218                                 0x7fff0000;
 219                         zz.i[1] = zz.i[2] = zz.i[3] = 0;
 220                         __fenv_setfsr32(&fsr);
 221                         return (zz.q);
 222                 }
 223 
 224                 /* if z is inf, return it */
 225                 if (cz == 2) {
 226                         __fenv_setfsr32(&fsr);
 227                         return (z);
 228                 }
 229 
 230                 /*
 231                  * now x, y, and z are all finite; handle cases where x or y
 232                  * is zero
 233                  */
 234                 if (cx == 0 || cy == 0) {
 235                         /* either we have 0-0 or the result is the same as z */
 236                         if (cz == 0 && (int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) <
 237                                 0) {
 238                                 zz.i[0] = (fsr >> 30) == FSR_RM ? 0x80000000 :
 239                                         0;
 240                                 __fenv_setfsr32(&fsr);
 241                                 return (zz.q);
 242                         }

 243                         __fenv_setfsr32(&fsr);
 244                         return (z);
 245                 }
 246 
 247                 /* if we get here, x and y are nonzero finite, z must be zero */
 248                 return (x * y);
 249         }
 250 
 251         /*
 252          * now x, y, and z are all finite and nonzero; set round-to-
 253          * negative-infinity mode
 254          */
 255         __fenv_setfsr32(&fsr_rm);
 256 
 257         /*
 258          * get the signs and exponents and normalize the significands
 259          * of x and y
 260          */
 261         sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000;
 262         ex = hx >> 16;
 263         hx &= 0xffff;

 264         if (!ex) {
 265                 if (hx | (xx.i[1] & 0xfffe0000)) {
 266                         ex = 1;
 267                 } else if (xx.i[1] | (xx.i[2] & 0xfffe0000)) {
 268                         hx = xx.i[1];
 269                         xx.i[1] = xx.i[2];
 270                         xx.i[2] = xx.i[3];
 271                         xx.i[3] = 0;
 272                         ex = -31;
 273                 } else if (xx.i[2] | (xx.i[3] & 0xfffe0000)) {
 274                         hx = xx.i[2];
 275                         xx.i[1] = xx.i[3];
 276                         xx.i[2] = xx.i[3] = 0;
 277                         ex = -63;
 278                 } else {
 279                         hx = xx.i[3];
 280                         xx.i[1] = xx.i[2] = xx.i[3] = 0;
 281                         ex = -95;
 282                 }

 283                 while ((hx & 0x10000) == 0) {
 284                         hx = (hx << 1) | (xx.i[1] >> 31);
 285                         xx.i[1] = (xx.i[1] << 1) | (xx.i[2] >> 31);
 286                         xx.i[2] = (xx.i[2] << 1) | (xx.i[3] >> 31);
 287                         xx.i[3] <<= 1;
 288                         ex--;
 289                 }
 290         } else
 291                 hx |= 0x10000;


 292         ey = hy >> 16;
 293         hy &= 0xffff;

 294         if (!ey) {
 295                 if (hy | (yy.i[1] & 0xfffe0000)) {
 296                         ey = 1;
 297                 } else if (yy.i[1] | (yy.i[2] & 0xfffe0000)) {
 298                         hy = yy.i[1];
 299                         yy.i[1] = yy.i[2];
 300                         yy.i[2] = yy.i[3];
 301                         yy.i[3] = 0;
 302                         ey = -31;
 303                 } else if (yy.i[2] | (yy.i[3] & 0xfffe0000)) {
 304                         hy = yy.i[2];
 305                         yy.i[1] = yy.i[3];
 306                         yy.i[2] = yy.i[3] = 0;
 307                         ey = -63;
 308                 } else {
 309                         hy = yy.i[3];
 310                         yy.i[1] = yy.i[2] = yy.i[3] = 0;
 311                         ey = -95;
 312                 }

 313                 while ((hy & 0x10000) == 0) {
 314                         hy = (hy << 1) | (yy.i[1] >> 31);
 315                         yy.i[1] = (yy.i[1] << 1) | (yy.i[2] >> 31);
 316                         yy.i[2] = (yy.i[2] << 1) | (yy.i[3] >> 31);
 317                         yy.i[3] <<= 1;
 318                         ey--;
 319                 }
 320         } else
 321                 hy |= 0x10000;


 322         exy = ex + ey - 0x3fff;
 323 
 324         /* convert the significands of x and y to doubles */
 325         c = twom16;
 326         dx[0] = (double) ((int) hx) * c;
 327         dy[0] = (double) ((int) hy) * c;
 328 
 329         c *= twom24;
 330         dx[1] = (double) ((int) (xx.i[1] >> 8)) * c;
 331         dy[1] = (double) ((int) (yy.i[1] >> 8)) * c;
 332 
 333         c *= twom24;
 334         dx[2] = (double) ((int) (((xx.i[1] << 16) | (xx.i[2] >> 16)) &
 335             0xffffff)) * c;
 336         dy[2] = (double) ((int) (((yy.i[1] << 16) | (yy.i[2] >> 16)) &
 337             0xffffff)) * c;
 338 
 339         c *= twom24;
 340         dx[3] = (double) ((int) (((xx.i[2] << 8) | (xx.i[3] >> 24)) &
 341             0xffffff)) * c;
 342         dy[3] = (double) ((int) (((yy.i[2] << 8) | (yy.i[3] >> 24)) &
 343             0xffffff)) * c;
 344 
 345         c *= twom24;
 346         dx[4] = (double) ((int) (xx.i[3] & 0xffffff)) * c;
 347         dy[4] = (double) ((int) (yy.i[3] & 0xffffff)) * c;
 348 
 349         /* form the "digits" of the product */
 350         dxy[0] = dx[0] * dy[0];
 351         dxy[1] = dx[0] * dy[1] + dx[1] * dy[0];
 352         dxy[2] = dx[0] * dy[2] + dx[1] * dy[1] + dx[2] * dy[0];
 353         dxy[3] = dx[0] * dy[3] + dx[1] * dy[2] + dx[2] * dy[1] +
 354             dx[3] * dy[0];
 355         dxy[4] = dx[0] * dy[4] + dx[1] * dy[3] + dx[2] * dy[2] +
 356             dx[3] * dy[1] + dx[4] * dy[0];
 357         dxy[5] = dx[1] * dy[4] + dx[2] * dy[3] + dx[3] * dy[2] +
 358             dx[4] * dy[1];
 359         dxy[6] = dx[2] * dy[4] + dx[3] * dy[3] + dx[4] * dy[2];
 360         dxy[7] = dx[3] * dy[4] + dx[4] * dy[3];
 361         dxy[8] = dx[4] * dy[4];
 362 
 363         /* split odd-numbered terms and combine into even-numbered terms */
 364         c = (dxy[1] + two20) - two20;
 365         dxy[0] += c;
 366         dxy[1] -= c;
 367         c = (dxy[3] + twom28) - twom28;
 368         dxy[2] += c + dxy[1];
 369         dxy[3] -= c;
 370         c = (dxy[5] + twom76) - twom76;
 371         dxy[4] += c + dxy[3];
 372         dxy[5] -= c;
 373         c = (dxy[7] + twom124) - twom124;
 374         dxy[6] += c + dxy[5];
 375         dxy[8] += (dxy[7] - c);
 376 
 377         /* propagate carries, adjusting the exponent if need be */
 378         dxy[7] = dxy[6] + dxy[8];
 379         dxy[5] = dxy[4] + dxy[7];
 380         dxy[3] = dxy[2] + dxy[5];
 381         dxy[1] = dxy[0] + dxy[3];

 382         if (dxy[1] >= two) {
 383                 dxy[0] *= half;
 384                 dxy[1] *= half;
 385                 dxy[2] *= half;
 386                 dxy[3] *= half;
 387                 dxy[4] *= half;
 388                 dxy[5] *= half;
 389                 dxy[6] *= half;
 390                 dxy[7] *= half;
 391                 dxy[8] *= half;
 392                 exy++;
 393         }
 394 
 395         /* extract the significand of x*y */
 396         s = two36;
 397         u.d = c = dxy[1] + s;
 398         xy0 = u.i[1];
 399         c -= s;
 400         dxy[1] -= c;
 401         dxy[0] -= c;


 431         s *= twom32;
 432         u.d = c = dxy[7] + s;
 433         xy5 = u.i[1];
 434         c -= s;
 435         dxy[8] += (dxy[6] - c);
 436 
 437         s *= twom32;
 438         u.d = c = dxy[8] + s;
 439         xy6 = u.i[1];
 440         c -= s;
 441         dxy[8] -= c;
 442 
 443         s *= twom32;
 444         u.d = c = dxy[8] + s;
 445         xy7 = u.i[1];
 446 
 447         /* extract the sign, exponent, and significand of z */
 448         sz = zz.i[0] & 0x80000000;
 449         ez = hz >> 16;
 450         z0 = hz & 0xffff;

 451         if (!ez) {
 452                 if (z0 | (zz.i[1] & 0xfffe0000)) {
 453                         z1 = zz.i[1];
 454                         z2 = zz.i[2];
 455                         z3 = zz.i[3];
 456                         ez = 1;
 457                 } else if (zz.i[1] | (zz.i[2] & 0xfffe0000)) {
 458                         z0 = zz.i[1];
 459                         z1 = zz.i[2];
 460                         z2 = zz.i[3];
 461                         z3 = 0;
 462                         ez = -31;
 463                 } else if (zz.i[2] | (zz.i[3] & 0xfffe0000)) {
 464                         z0 = zz.i[2];
 465                         z1 = zz.i[3];
 466                         z2 = z3 = 0;
 467                         ez = -63;
 468                 } else {
 469                         z0 = zz.i[3];
 470                         z1 = z2 = z3 = 0;
 471                         ez = -95;
 472                 }

 473                 while ((z0 & 0x10000) == 0) {
 474                         z0 = (z0 << 1) | (z1 >> 31);
 475                         z1 = (z1 << 1) | (z2 >> 31);
 476                         z2 = (z2 << 1) | (z3 >> 31);
 477                         z3 <<= 1;
 478                         ez--;
 479                 }
 480         } else {
 481                 z0 |= 0x10000;
 482                 z1 = zz.i[1];
 483                 z2 = zz.i[2];
 484                 z3 = zz.i[3];
 485         }

 486         z4 = z5 = z6 = z7 = 0;
 487 
 488         /*
 489          * now x*y is represented by sxy, exy, and xy[0-7], and z is
 490          * represented likewise; swap if need be so |xy| <= |z|
 491          */
 492         if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 ||
 493                 (xy1 == z1 && (xy2 > z2 || (xy2 == z2 && (xy3 > z3 ||
 494                 (xy3 == z3 && (xy4 | xy5 | xy6 | xy7) != 0)))))))))) {
 495                 e = sxy; sxy = sz; sz = e;
 496                 e = exy; exy = ez; ez = e;
 497                 e = xy0; xy0 = z0; z0 = e;
 498                 e = xy1; xy1 = z1; z1 = e;
 499                 e = xy2; xy2 = z2; z2 = e;
 500                 e = xy3; xy3 = z3; z3 = e;
 501                 z4 = xy4; xy4 = 0;
 502                 z5 = xy5; xy5 = 0;
 503                 z6 = xy6; xy6 = 0;
 504                 z7 = xy7; xy7 = 0;
















 505         }
 506 
 507         /* shift the significand of xy keeping a sticky bit */
 508         e = ez - exy;

 509         if (e > 236) {
 510                 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0;
 511                 xy7 = 1;
 512         } else if (e >= 224) {
 513                 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | xy1 |
 514                         ((xy0 << 1) << (255 - e));
 515                 xy7 = xy0 >> (e - 224);

 516                 if (sticky)
 517                         xy7 |= 1;

 518                 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0;
 519         } else if (e >= 192) {
 520                 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 |
 521                         ((xy1 << 1) << (223 - e));
 522                 xy7 = (xy1 >> (e - 192)) | ((xy0 << 1) << (223 - e));

 523                 if (sticky)
 524                         xy7 |= 1;

 525                 xy6 = xy0 >> (e - 192);
 526                 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = 0;
 527         } else if (e >= 160) {
 528                 sticky = xy7 | xy6 | xy5 | xy4 | xy3 |
 529                         ((xy2 << 1) << (191 - e));
 530                 xy7 = (xy2 >> (e - 160)) | ((xy1 << 1) << (191 - e));

 531                 if (sticky)
 532                         xy7 |= 1;

 533                 xy6 = (xy1 >> (e - 160)) | ((xy0 << 1) << (191 - e));
 534                 xy5 = xy0 >> (e - 160);
 535                 xy0 = xy1 = xy2 = xy3 = xy4 = 0;
 536         } else if (e >= 128) {
 537                 sticky = xy7 | xy6 | xy5 | xy4 | ((xy3 << 1) << (159 - e));
 538                 xy7 = (xy3 >> (e - 128)) | ((xy2 << 1) << (159 - e));

 539                 if (sticky)
 540                         xy7 |= 1;

 541                 xy6 = (xy2 >> (e - 128)) | ((xy1 << 1) << (159 - e));
 542                 xy5 = (xy1 >> (e - 128)) | ((xy0 << 1) << (159 - e));
 543                 xy4 = xy0 >> (e - 128);
 544                 xy0 = xy1 = xy2 = xy3 = 0;
 545         } else if (e >= 96) {
 546                 sticky = xy7 | xy6 | xy5 | ((xy4 << 1) << (127 - e));
 547                 xy7 = (xy4 >> (e - 96)) | ((xy3 << 1) << (127 - e));

 548                 if (sticky)
 549                         xy7 |= 1;

 550                 xy6 = (xy3 >> (e - 96)) | ((xy2 << 1) << (127 - e));
 551                 xy5 = (xy2 >> (e - 96)) | ((xy1 << 1) << (127 - e));
 552                 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e));
 553                 xy3 = xy0 >> (e - 96);
 554                 xy0 = xy1 = xy2 = 0;
 555         } else if (e >= 64) {
 556                 sticky = xy7 | xy6 | ((xy5 << 1) << (95 - e));
 557                 xy7 = (xy5 >> (e - 64)) | ((xy4 << 1) << (95 - e));

 558                 if (sticky)
 559                         xy7 |= 1;

 560                 xy6 = (xy4 >> (e - 64)) | ((xy3 << 1) << (95 - e));
 561                 xy5 = (xy3 >> (e - 64)) | ((xy2 << 1) << (95 - e));
 562                 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e));
 563                 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
 564                 xy2 = xy0 >> (e - 64);
 565                 xy0 = xy1 = 0;
 566         } else if (e >= 32) {
 567                 sticky = xy7 | ((xy6 << 1) << (63 - e));
 568                 xy7 = (xy6 >> (e - 32)) | ((xy5 << 1) << (63 - e));

 569                 if (sticky)
 570                         xy7 |= 1;

 571                 xy6 = (xy5 >> (e - 32)) | ((xy4 << 1) << (63 - e));
 572                 xy5 = (xy4 >> (e - 32)) | ((xy3 << 1) << (63 - e));
 573                 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e));
 574                 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
 575                 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
 576                 xy1 = xy0 >> (e - 32);
 577                 xy0 = 0;
 578         } else if (e) {
 579                 sticky = (xy7 << 1) << (31 - e);
 580                 xy7 = (xy7 >> e) | ((xy6 << 1) << (31 - e));

 581                 if (sticky)
 582                         xy7 |= 1;

 583                 xy6 = (xy6 >> e) | ((xy5 << 1) << (31 - e));
 584                 xy5 = (xy5 >> e) | ((xy4 << 1) << (31 - e));
 585                 xy4 = (xy4 >> e) | ((xy3 << 1) << (31 - e));
 586                 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
 587                 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
 588                 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
 589                 xy0 >>= e;
 590         }
 591 
 592         /* if this is a magnitude subtract, negate the significand of xy */
 593         if (sxy ^ sz) {
 594                 xy0 = ~xy0;
 595                 xy1 = ~xy1;
 596                 xy2 = ~xy2;
 597                 xy3 = ~xy3;
 598                 xy4 = ~xy4;
 599                 xy5 = ~xy5;
 600                 xy6 = ~xy6;
 601                 xy7 = -xy7;

 602                 if (xy7 == 0)
 603                         if (++xy6 == 0)
 604                                 if (++xy5 == 0)
 605                                         if (++xy4 == 0)
 606                                                 if (++xy3 == 0)
 607                                                         if (++xy2 == 0)
 608                                                                 if (++xy1 == 0)
 609                                                                         xy0++;
 610         }
 611 
 612         /* add, propagating carries */
 613         z7 += xy7;
 614         e = (z7 < xy7);
 615         z6 += xy6;

 616         if (e) {
 617                 z6++;
 618                 e = (z6 <= xy6);
 619         } else
 620                 e = (z6 < xy6);


 621         z5 += xy5;

 622         if (e) {
 623                 z5++;
 624                 e = (z5 <= xy5);
 625         } else
 626                 e = (z5 < xy5);


 627         z4 += xy4;

 628         if (e) {
 629                 z4++;
 630                 e = (z4 <= xy4);
 631         } else
 632                 e = (z4 < xy4);


 633         z3 += xy3;

 634         if (e) {
 635                 z3++;
 636                 e = (z3 <= xy3);
 637         } else
 638                 e = (z3 < xy3);


 639         z2 += xy2;

 640         if (e) {
 641                 z2++;
 642                 e = (z2 <= xy2);
 643         } else
 644                 e = (z2 < xy2);


 645         z1 += xy1;

 646         if (e) {
 647                 z1++;
 648                 e = (z1 <= xy1);
 649         } else
 650                 e = (z1 < xy1);


 651         z0 += xy0;

 652         if (e)
 653                 z0++;
 654 
 655         /* postnormalize and collect rounding information into z4 */
 656         if (ez < 1) {
 657                 /* result is tiny; shift right until exponent is within range */
 658                 e = 1 - ez;

 659                 if (e > 116) {
 660                         z4 = 1; /* result can't be exactly zero */
 661                         z0 = z1 = z2 = z3 = 0;
 662                 } else if (e >= 96) {
 663                         sticky = z7 | z6 | z5 | z4 | z3 | z2 |
 664                                 ((z1 << 1) << (127 - e));
 665                         z4 = (z1 >> (e - 96)) | ((z0 << 1) << (127 - e));

 666                         if (sticky)
 667                                 z4 |= 1;

 668                         z3 = z0 >> (e - 96);
 669                         z0 = z1 = z2 = 0;
 670                 } else if (e >= 64) {
 671                         sticky = z7 | z6 | z5 | z4 | z3 |
 672                                 ((z2 << 1) << (95 - e));
 673                         z4 = (z2 >> (e - 64)) | ((z1 << 1) << (95 - e));

 674                         if (sticky)
 675                                 z4 |= 1;

 676                         z3 = (z1 >> (e - 64)) | ((z0 << 1) << (95 - e));
 677                         z2 = z0 >> (e - 64);
 678                         z0 = z1 = 0;
 679                 } else if (e >= 32) {
 680                         sticky = z7 | z6 | z5 | z4 | ((z3 << 1) << (63 - e));
 681                         z4 = (z3 >> (e - 32)) | ((z2 << 1) << (63 - e));

 682                         if (sticky)
 683                                 z4 |= 1;

 684                         z3 = (z2 >> (e - 32)) | ((z1 << 1) << (63 - e));
 685                         z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
 686                         z1 = z0 >> (e - 32);
 687                         z0 = 0;
 688                 } else {
 689                         sticky = z7 | z6 | z5 | (z4 << 1) << (31 - e);
 690                         z4 = (z4 >> e) | ((z3 << 1) << (31 - e));

 691                         if (sticky)
 692                                 z4 |= 1;

 693                         z3 = (z3 >> e) | ((z2 << 1) << (31 - e));
 694                         z2 = (z2 >> e) | ((z1 << 1) << (31 - e));
 695                         z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
 696                         z0 >>= e;
 697                 }

 698                 ez = 1;
 699         } else if (z0 >= 0x20000) {
 700                 /* carry out; shift right by one */
 701                 sticky = (z4 & 1) | z5 | z6 | z7;
 702                 z4 = (z4 >> 1) | (z3 << 31);

 703                 if (sticky)
 704                         z4 |= 1;

 705                 z3 = (z3 >> 1) | (z2 << 31);
 706                 z2 = (z2 >> 1) | (z1 << 31);
 707                 z1 = (z1 >> 1) | (z0 << 31);
 708                 z0 >>= 1;
 709                 ez++;
 710         } else {
 711                 if (z0 < 0x10000 && (z0 | z1 | z2 | z3 | z4 | z5 | z6 | z7)
 712                         != 0) {
 713                         /*
 714                          * borrow/cancellation; shift left as much as
 715                          * exponent allows
 716                          */
 717                         while (!(z0 | (z1 & 0xfffe0000)) && ez >= 33) {
 718                                 z0 = z1;
 719                                 z1 = z2;
 720                                 z2 = z3;
 721                                 z3 = z4;
 722                                 z4 = z5;
 723                                 z5 = z6;
 724                                 z6 = z7;
 725                                 z7 = 0;
 726                                 ez -= 32;
 727                         }

 728                         while (z0 < 0x10000 && ez > 1) {
 729                                 z0 = (z0 << 1) | (z1 >> 31);
 730                                 z1 = (z1 << 1) | (z2 >> 31);
 731                                 z2 = (z2 << 1) | (z3 >> 31);
 732                                 z3 = (z3 << 1) | (z4 >> 31);
 733                                 z4 = (z4 << 1) | (z5 >> 31);
 734                                 z5 = (z5 << 1) | (z6 >> 31);
 735                                 z6 = (z6 << 1) | (z7 >> 31);
 736                                 z7 <<= 1;
 737                                 ez--;
 738                         }
 739                 }

 740                 if (z5 | z6 | z7)
 741                         z4 |= 1;
 742         }
 743 
 744         /* get the rounding mode */
 745         rm = fsr >> 30;
 746 
 747         /* strip off the integer bit, if there is one */
 748         ibit = z0 & 0x10000;
 749         if (ibit)

 750                 z0 -= 0x10000;
 751         else {
 752                 ez = 0;

 753                 if (!(z0 | z1 | z2 | z3 | z4)) { /* exact zero */
 754                         zz.i[0] = rm == FSR_RM ? 0x80000000 : 0;
 755                         zz.i[1] = zz.i[2] = zz.i[3] = 0;
 756                         __fenv_setfsr32(&fsr);
 757                         return (zz.q);
 758                 }
 759         }
 760 
 761         /*
 762          * flip the sense of directed roundings if the result is negative;
 763          * the logic below applies to a positive result
 764          */
 765         if (sz)
 766                 rm ^= rm >> 1;
 767 
 768         /* round and raise exceptions */
 769         if (z4) {
 770                 fsr |= FSR_NXC;
 771 
 772                 /* decide whether to round the fraction up */
 773                 if (rm == FSR_RP || (rm == FSR_RN && (z4 > 0x80000000u ||
 774                         (z4 == 0x80000000u && (z3 & 1))))) {
 775                         /* round up and renormalize if necessary */
 776                         if (++z3 == 0)
 777                                 if (++z2 == 0)
 778                                         if (++z1 == 0)
 779                                                 if (++z0 == 0x10000) {
 780                                                         z0 = 0;
 781                                                         ez++;
 782                                                 }
 783                 }
 784         }
 785 
 786         /* check for under/overflow */
 787         if (ez >= 0x7fff) {
 788                 if (rm == FSR_RN || rm == FSR_RP) {
 789                         zz.i[0] = sz | 0x7fff0000;
 790                         zz.i[1] = zz.i[2] = zz.i[3] = 0;
 791                 } else {
 792                         zz.i[0] = sz | 0x7ffeffff;
 793                         zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff;
 794                 }

 795                 fsr |= FSR_OFC | FSR_NXC;
 796         } else {
 797                 zz.i[0] = sz | (ez << 16) | z0;
 798                 zz.i[1] = z1;
 799                 zz.i[2] = z2;
 800                 zz.i[3] = z3;
 801 
 802                 /*
 803                  * !ibit => exact result was tiny before rounding,
 804                  * z4 nonzero => result delivered is inexact
 805                  */
 806                 if (!ibit) {
 807                         if (z4)
 808                                 fsr |= FSR_UFC | FSR_NXC;
 809                         else if (fsr & FSR_UFM)
 810                                 fsr |= FSR_UFC;
 811                 }
 812         }
 813 
 814         /* restore the fsr and emulate exceptions as needed */
 815         if ((fsr & FSR_CEXC) & (fsr >> 23)) {
 816                 __fenv_setfsr32(&fsr);

 817                 if (fsr & FSR_OFC) {
 818                         dummy = huge;
 819                         dummy *= huge;
 820                 } else if (fsr & FSR_UFC) {
 821                         dummy = tiny;

 822                         if (fsr & FSR_NXC)
 823                                 dummy *= tiny;
 824                         else
 825                                 dummy -= tiny2;
 826                 } else {
 827                         dummy = huge;
 828                         dummy += tiny;
 829                 }
 830         } else {
 831                 fsr |= (fsr & 0x1f) << 5;
 832                 __fenv_setfsr32(&fsr);
 833         }

 834         return (zz.q);
 835 }
 836 
 837 #elif defined(__x86)
 838 
 839 static const union {
 840         unsigned i[2];
 841         double d;
 842 } C[] = {
 843         { 0, 0x3fe00000u },
 844         { 0, 0x40000000u },
 845         { 0, 0x3df00000u },
 846         { 0, 0x3bf00000u },
 847         { 0, 0x41f00000u },
 848         { 0, 0x43e00000u },
 849         { 0, 0x7fe00000u },
 850         { 0, 0x00100000u },
 851         { 0, 0x00100001u }
 852 };
 853 
 854 #define half    C[0].d
 855 #define two     C[1].d
 856 #define twom32  C[2].d
 857 #define twom64  C[3].d
 858 #define two32   C[4].d
 859 #define two63   C[5].d
 860 #define huge    C[6].d
 861 #define tiny    C[7].d
 862 #define tiny2   C[8].d
 863 
 864 #if defined(__amd64)
 865 #define NI      4
 866 #else
 867 #define NI      3
 868 #endif
 869 
 870 /*
 871  * fmal for x86: 80-bit extended double precision, little-endian
 872  */
 873 long double
 874 __fmal(long double x, long double y, long double z) {

 875         union {
 876                 unsigned i[NI];
 877                 long double e;
 878         } xx, yy, zz;

 879         long double xhi, yhi, xlo, ylo, t;
 880         unsigned xy0, xy1, xy2, xy3, xy4, z0, z1, z2, z3, z4;
 881         unsigned oldcwsw, cwsw, rm, sticky, carry;
 882         int ex, ey, ez, exy, sxy, sz, e, tinyafter;
 883         volatile double dummy;
 884 
 885         /* extract the exponents of the arguments */
 886         xx.e = x;
 887         yy.e = y;
 888         zz.e = z;
 889         ex = xx.i[2] & 0x7fff;
 890         ey = yy.i[2] & 0x7fff;
 891         ez = zz.i[2] & 0x7fff;
 892 
 893         /* dispense with inf, nan, and zero cases */
 894         if (ex == 0x7fff || ey == 0x7fff || (ex | xx.i[1] | xx.i[0]) == 0 ||
 895                 (ey | yy.i[1] | yy.i[0]) == 0)  /* x or y is inf, nan, or 0 */
 896                 return (x * y + z);
 897 
 898         if (ez == 0x7fff)                       /* z is inf or nan */
 899                 return (x + z); /* avoid spurious under/overflow in x * y */
 900 
 901         if ((ez | zz.i[1] | zz.i[0]) == 0)      /* z is zero */
 902                 /*
 903                  * x * y isn't zero but could underflow to zero,
 904                  * so don't add z, lest we perturb the sign
 905                  */
 906                 return (x * y);
 907 
 908         /*
 909          * now x, y, and z are all finite and nonzero; extract signs and
 910          * normalize the significands (this will raise the denormal operand
 911          * exception if need be)
 912          */
 913         sxy = (xx.i[2] ^ yy.i[2]) & 0x8000;
 914         sz = zz.i[2] & 0x8000;

 915         if (!ex) {
 916                 xx.e = x * two63;
 917                 ex = (xx.i[2] & 0x7fff) - 63;
 918         }

 919         if (!ey) {
 920                 yy.e = y * two63;
 921                 ey = (yy.i[2] & 0x7fff) - 63;
 922         }

 923         if (!ez) {
 924                 zz.e = z * two63;
 925                 ez = (zz.i[2] & 0x7fff) - 63;
 926         }
 927 
 928         /*
 929          * save the control and status words, mask all exceptions, and
 930          * set rounding to 64-bit precision and toward-zero
 931          */
 932         __fenv_getcwsw(&oldcwsw);
 933         cwsw = (oldcwsw & 0xf0c0ffff) | 0x0f3f0000;
 934         __fenv_setcwsw(&cwsw);
 935 
 936         /* multiply x*y to 128 bits */
 937         exy = ex + ey - 0x3fff;
 938         xx.i[2] = 0x3fff;
 939         yy.i[2] = 0x3fff;
 940         x = xx.e;
 941         y = yy.e;
 942         xhi = ((x + twom32) + two32) - two32;
 943         yhi = ((y + twom32) + two32) - two32;
 944         xlo = x - xhi;
 945         ylo = y - yhi;
 946         x *= y;
 947         y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo;

 948         if (x >= two) {
 949                 x *= half;
 950                 y *= half;
 951                 exy++;
 952         }
 953 
 954         /* extract the significands */
 955         xx.e = x;
 956         xy0 = xx.i[1];
 957         xy1 = xx.i[0];
 958         yy.e = t = y + twom32;
 959         xy2 = yy.i[0];
 960         yy.e = (y - (t - twom32)) + twom64;
 961         xy3 = yy.i[0];
 962         xy4 = 0;
 963         z0 = zz.i[1];
 964         z1 = zz.i[0];
 965         z2 = z3 = z4 = 0;
 966 
 967         /*
 968          * now x*y is represented by sxy, exy, and xy[0-4], and z is
 969          * represented likewise; swap if need be so |xy| <= |z|
 970          */
 971         if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 &&
 972                 (xy1 > z1 || (xy1 == z1 && (xy2 | xy3) != 0)))))) {
 973                 e = sxy; sxy = sz; sz = e;
 974                 e = exy; exy = ez; ez = e;
 975                 e = xy0; xy0 = z0; z0 = e;
 976                 e = xy1; xy1 = z1; z1 = e;
 977                 z2 = xy2; xy2 = 0;
 978                 z3 = xy3; xy3 = 0;










 979         }
 980 
 981         /* shift the significand of xy keeping a sticky bit */
 982         e = ez - exy;

 983         if (e > 130) {
 984                 xy0 = xy1 = xy2 = xy3 = 0;
 985                 xy4 = 1;
 986         } else if (e >= 128) {
 987                 sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (159 - e));
 988                 xy4 = xy0 >> (e - 128);

 989                 if (sticky)
 990                         xy4 |= 1;

 991                 xy0 = xy1 = xy2 = xy3 = 0;
 992         } else if (e >= 96) {
 993                 sticky = xy3 | xy2 | ((xy1 << 1) << (127 - e));
 994                 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e));

 995                 if (sticky)
 996                         xy4 |= 1;

 997                 xy3 = xy0 >> (e - 96);
 998                 xy0 = xy1 = xy2 = 0;
 999         } else if (e >= 64) {
1000                 sticky = xy3 | ((xy2 << 1) << (95 - e));
1001                 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e));

1002                 if (sticky)
1003                         xy4 |= 1;

1004                 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
1005                 xy2 = xy0 >> (e - 64);
1006                 xy0 = xy1 = 0;
1007         } else if (e >= 32) {
1008                 sticky = (xy3 << 1) << (63 - e);
1009                 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e));

1010                 if (sticky)
1011                         xy4 |= 1;

1012                 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
1013                 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
1014                 xy1 = xy0 >> (e - 32);
1015                 xy0 = 0;
1016         } else if (e) {
1017                 xy4 = (xy3 << 1) << (31 - e);
1018                 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
1019                 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
1020                 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
1021                 xy0 >>= e;
1022         }
1023 
1024         /* if this is a magnitude subtract, negate the significand of xy */
1025         if (sxy ^ sz) {
1026                 xy0 = ~xy0;
1027                 xy1 = ~xy1;
1028                 xy2 = ~xy2;
1029                 xy3 = ~xy3;
1030                 xy4 = -xy4;

1031                 if (xy4 == 0)
1032                         if (++xy3 == 0)
1033                                 if (++xy2 == 0)
1034                                         if (++xy1 == 0)
1035                                                 xy0++;
1036         }
1037 
1038         /* add, propagating carries */
1039         z4 += xy4;
1040         carry = (z4 < xy4);
1041         z3 += xy3;

1042         if (carry) {
1043                 z3++;
1044                 carry = (z3 <= xy3);
1045         } else
1046                 carry = (z3 < xy3);


1047         z2 += xy2;

1048         if (carry) {
1049                 z2++;
1050                 carry = (z2 <= xy2);
1051         } else
1052                 carry = (z2 < xy2);


1053         z1 += xy1;

1054         if (carry) {
1055                 z1++;
1056                 carry = (z1 <= xy1);
1057         } else
1058                 carry = (z1 < xy1);


1059         z0 += xy0;

1060         if (carry) {
1061                 z0++;
1062                 carry = (z0 <= xy0);
1063         } else
1064                 carry = (z0 < xy0);

1065 
1066         /* for a magnitude subtract, ignore the last carry out */
1067         if (sxy ^ sz)
1068                 carry = 0;
1069 
1070         /* postnormalize and collect rounding information into z2 */
1071         if (ez < 1) {
1072                 /* result is tiny; shift right until exponent is within range */
1073                 e = 1 - ez;

1074                 if (e > 67) {
1075                         z2 = 1; /* result can't be exactly zero */
1076                         z0 = z1 = 0;
1077                 } else if (e >= 64) {
1078                         sticky = z4 | z3 | z2 | z1 | ((z0 << 1) << (95 - e));
1079                         z2 = (z0 >> (e - 64)) | ((carry << 1) << (95 - e));

1080                         if (sticky)
1081                                 z2 |= 1;

1082                         z1 = carry >> (e - 64);
1083                         z0 = 0;
1084                 } else if (e >= 32) {
1085                         sticky = z4 | z3 | z2 | ((z1 << 1) << (63 - e));
1086                         z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));

1087                         if (sticky)
1088                                 z2 |= 1;

1089                         z1 = (z0 >> (e - 32)) | ((carry << 1) << (63 - e));
1090                         z0 = carry >> (e - 32);
1091                 } else {
1092                         sticky = z4 | z3 | (z2 << 1) << (31 - e);
1093                         z2 = (z2 >> e) | ((z1 << 1) << (31 - e));

1094                         if (sticky)
1095                                 z2 |= 1;

1096                         z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
1097                         z0 = (z0 >> e) | ((carry << 1) << (31 - e));
1098                 }

1099                 ez = 1;
1100         } else if (carry) {
1101                 /* carry out; shift right by one */
1102                 sticky = (z2 & 1) | z3 | z4;
1103                 z2 = (z2 >> 1) | (z1 << 31);

1104                 if (sticky)
1105                         z2 |= 1;

1106                 z1 = (z1 >> 1) | (z0 << 31);
1107                 z0 = (z0 >> 1) | 0x80000000;
1108                 ez++;
1109         } else {
1110                 if (z0 < 0x80000000u && (z0 | z1 | z2 | z3 | z4) != 0) {
1111                         /*
1112                          * borrow/cancellation; shift left as much as
1113                          * exponent allows
1114                          */
1115                         while (!z0 && ez >= 33) {
1116                                 z0 = z1;
1117                                 z1 = z2;
1118                                 z2 = z3;
1119                                 z3 = z4;
1120                                 z4 = 0;
1121                                 ez -= 32;
1122                         }

1123                         while (z0 < 0x80000000u && ez > 1) {
1124                                 z0 = (z0 << 1) | (z1 >> 31);
1125                                 z1 = (z1 << 1) | (z2 >> 31);
1126                                 z2 = (z2 << 1) | (z3 >> 31);
1127                                 z3 = (z3 << 1) | (z4 >> 31);
1128                                 z4 <<= 1;
1129                                 ez--;
1130                         }
1131                 }

1132                 if (z3 | z4)
1133                         z2 |= 1;
1134         }
1135 
1136         /* get the rounding mode */
1137         rm = oldcwsw & 0x0c000000;
1138 
1139         /* adjust exponent if result is subnormal */
1140         tinyafter = 0;

1141         if (!(z0 & 0x80000000)) {
1142                 ez = 0;
1143                 tinyafter = 1;

1144                 if (!(z0 | z1 | z2)) { /* exact zero */
1145                         zz.i[2] = rm == FCW_RM ? 0x8000 : 0;
1146                         zz.i[1] = zz.i[0] = 0;
1147                         __fenv_setcwsw(&oldcwsw);
1148                         return (zz.e);
1149                 }
1150         }
1151 
1152         /*
1153          * flip the sense of directed roundings if the result is negative;
1154          * the logic below applies to a positive result
1155          */
1156         if (sz && (rm == FCW_RM || rm == FCW_RP))
1157                 rm = (FCW_RM + FCW_RP) - rm;
1158 
1159         /* round */
1160         if (z2) {
1161                 if (rm == FCW_RP || (rm == FCW_RN && (z2 > 0x80000000u ||
1162                         (z2 == 0x80000000u && (z1 & 1))))) {
1163                         /* round up and renormalize if necessary */
1164                         if (++z1 == 0) {
1165                                 if (++z0 == 0) {
1166                                         z0 = 0x80000000;
1167                                         ez++;
1168                                 } else if (z0 == 0x80000000) {
1169                                         /* rounded up to smallest normal */
1170                                         ez = 1;

1171                                         if ((rm == FCW_RP && z2 >
1172                                                 0x80000000u) || (rm == FCW_RN &&
1173                                                 z2 >= 0xc0000000u))
1174                                                 /*
1175                                                  * would have rounded up to
1176                                                  * smallest normal even with
1177                                                  * unbounded range
1178                                                  */
1179                                                 tinyafter = 0;
1180                                 }
1181                         }
1182                 }
1183         }
1184 
1185         /* restore the control and status words, check for over/underflow */
1186         __fenv_setcwsw(&oldcwsw);

1187         if (ez >= 0x7fff) {
1188                 if (rm == FCW_RN || rm == FCW_RP) {
1189                         zz.i[2] = sz | 0x7fff;
1190                         zz.i[1] = 0x80000000;
1191                         zz.i[0] = 0;
1192                 } else {
1193                         zz.i[2] = sz | 0x7ffe;
1194                         zz.i[1] = 0xffffffff;
1195                         zz.i[0] = 0xffffffff;
1196                 }

1197                 dummy = huge;
1198                 dummy *= huge;
1199         } else {
1200                 zz.i[2] = sz | ez;
1201                 zz.i[1] = z0;
1202                 zz.i[0] = z1;
1203 
1204                 /*
1205                  * tinyafter => result rounded w/ unbounded range would be tiny,
1206                  * z2 nonzero => result delivered is inexact
1207                  */
1208                 if (tinyafter) {
1209                         dummy = tiny;

1210                         if (z2)
1211                                 dummy *= tiny;
1212                         else
1213                                 dummy -= tiny2;
1214                 } else if (z2) {
1215                         dummy = huge;
1216                         dummy += tiny;
1217                 }
1218         }
1219 
1220         return (zz.e);
1221 }
1222 
1223 #else
1224 #error Unknown architecture
1225 #endif


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  28  * Use is subject to license terms.
  29  */
  30 
  31 #pragma weak fmal = __fmal
  32 
  33 #include "libm.h"
  34 #include "fma.h"
  35 #include "fenv_inlines.h"
  36 
  37 #if defined(__sparc)

  38 static const union {
  39         unsigned i[2];
  40         double d;
  41 } C[] = {
  42         { 0x3fe00000u, 0 },
  43         { 0x40000000u, 0 },
  44         { 0x3ef00000u, 0 },
  45         { 0x3e700000u, 0 },
  46         { 0x41300000u, 0 },
  47         { 0x3e300000u, 0 },
  48         { 0x3b300000u, 0 },
  49         { 0x38300000u, 0 },
  50         { 0x42300000u, 0 },
  51         { 0x3df00000u, 0 },
  52         { 0x7fe00000u, 0 },
  53         { 0x00100000u, 0 },
  54         { 0x00100001u, 0 },
  55         { 0, 0 },
  56         { 0x7ff00000u, 0 },
  57         { 0x7ff00001u, 0 }


  63 #define twom24          C[3].d
  64 #define two20           C[4].d
  65 #define twom28          C[5].d
  66 #define twom76          C[6].d
  67 #define twom124         C[7].d
  68 #define two36           C[8].d
  69 #define twom32          C[9].d
  70 #define huge            C[10].d
  71 #define tiny            C[11].d
  72 #define tiny2           C[12].d
  73 #define zero            C[13].d
  74 #define inf             C[14].d
  75 #define snan            C[15].d
  76 
  77 static const unsigned int fsr_rm = 0xc0000000u;
  78 
  79 /*
  80  * fmal for SPARC: 128-bit quad precision, big-endian
  81  */
  82 long double
  83 __fmal(long double x, long double y, long double z)
  84 {
  85         union {
  86                 unsigned int i[4];
  87                 long double q;
  88         } xx, yy, zz;
  89         union {
  90                 unsigned int i[2];
  91                 double d;
  92         } u;
  93 
  94         double dx[5], dy[5], dxy[9], c, s;
  95         unsigned int xy0, xy1, xy2, xy3, xy4, xy5, xy6, xy7;
  96         unsigned int z0, z1, z2, z3, z4, z5, z6, z7;
  97         unsigned int rm, sticky;
  98         unsigned int fsr;
  99         int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit;
 100         int cx, cy, cz;
 101         volatile double dummy;
 102 
 103         /* extract the high order words of the arguments */
 104         xx.q = x;
 105         yy.q = y;
 106         zz.q = z;
 107         hx = xx.i[0] & ~0x80000000;
 108         hy = yy.i[0] & ~0x80000000;
 109         hz = zz.i[0] & ~0x80000000;
 110 
 111         /*
 112          * distinguish zero, finite nonzero, infinite, and quiet nan
 113          * arguments; raise invalid and return for signaling nans
 114          */
 115         if (hx >= 0x7fff0000) {
 116                 if ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3]) {
 117                         if (!(hx & 0x8000)) {
 118                                 /* signaling nan, raise invalid */
 119                                 dummy = snan;
 120                                 dummy += snan;
 121                                 xx.i[0] |= 0x8000;
 122                                 return (xx.q);
 123                         }
 124 
 125                         cx = 3;         /* quiet nan */
 126                 } else {
 127                         cx = 2;         /* inf */
 128                 }
 129         } else if (hx == 0) {
 130                 cx = (xx.i[1] | xx.i[2] | xx.i[3]) ? 1 : 0;
 131                 /* subnormal or zero */
 132         } else {
 133                 cx = 1;                 /* finite nonzero */
 134         }
 135 
 136         if (hy >= 0x7fff0000) {
 137                 if ((hy & 0xffff) | yy.i[1] | yy.i[2] | yy.i[3]) {
 138                         if (!(hy & 0x8000)) {
 139                                 dummy = snan;
 140                                 dummy += snan;
 141                                 yy.i[0] |= 0x8000;
 142                                 return (yy.q);
 143                         }
 144 
 145                         cy = 3;
 146                 } else {
 147                         cy = 2;
 148                 }
 149         } else if (hy == 0) {
 150                 cy = (yy.i[1] | yy.i[2] | yy.i[3]) ? 1 : 0;
 151         } else {
 152                 cy = 1;
 153         }
 154 
 155         if (hz >= 0x7fff0000) {
 156                 if ((hz & 0xffff) | zz.i[1] | zz.i[2] | zz.i[3]) {
 157                         if (!(hz & 0x8000)) {
 158                                 dummy = snan;
 159                                 dummy += snan;
 160                                 zz.i[0] |= 0x8000;
 161                                 return (zz.q);
 162                         }
 163 
 164                         cz = 3;
 165                 } else {
 166                         cz = 2;
 167                 }
 168         } else if (hz == 0) {
 169                 cz = (zz.i[1] | zz.i[2] | zz.i[3]) ? 1 : 0;
 170         } else {
 171                 cz = 1;
 172         }
 173 
 174         /* get the fsr and clear current exceptions */
 175         __fenv_getfsr32(&fsr);
 176         fsr &= ~FSR_CEXC;
 177 
 178         /* handle all other zero, inf, and nan cases */
 179         if (cx != 1 || cy != 1 || cz != 1) {
 180                 /* if x or y is a quiet nan, return it */
 181                 if (cx == 3) {
 182                         __fenv_setfsr32(&fsr);
 183                         return (x);
 184                 }
 185 
 186                 if (cy == 3) {
 187                         __fenv_setfsr32(&fsr);
 188                         return (y);
 189                 }
 190 
 191                 /* if x*y is 0*inf, raise invalid and return the default nan */
 192                 if ((cx == 0 && cy == 2) || (cx == 2 && cy == 0)) {
 193                         dummy = zero;
 194                         dummy *= inf;
 195                         zz.i[0] = 0x7fffffff;
 196                         zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff;
 197                         return (zz.q);
 198                 }
 199 
 200                 /* if z is a quiet nan, return it */
 201                 if (cz == 3) {
 202                         __fenv_setfsr32(&fsr);
 203                         return (z);
 204                 }
 205 
 206                 /*
 207                  * now none of x, y, or z is nan; handle cases where x or y
 208                  * is inf
 209                  */
 210                 if (cx == 2 || cy == 2) {
 211                         /*
 212                          * if z is also inf, either we have inf-inf or
 213                          * the result is the same as z depending on signs
 214                          */
 215                         if (cz == 2) {
 216                                 if ((int)((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < 0) {
 217                                         dummy = inf;
 218                                         dummy -= inf;
 219                                         zz.i[0] = 0x7fffffff;
 220                                         zz.i[1] = zz.i[2] = zz.i[3] =
 221                                             0xffffffff;
 222                                         return (zz.q);
 223                                 }
 224 
 225                                 __fenv_setfsr32(&fsr);
 226                                 return (z);
 227                         }
 228 
 229                         /* otherwise the result is inf with appropriate sign */
 230                         zz.i[0] = ((xx.i[0] ^ yy.i[0]) & 0x80000000) |
 231                             0x7fff0000;
 232                         zz.i[1] = zz.i[2] = zz.i[3] = 0;
 233                         __fenv_setfsr32(&fsr);
 234                         return (zz.q);
 235                 }
 236 
 237                 /* if z is inf, return it */
 238                 if (cz == 2) {
 239                         __fenv_setfsr32(&fsr);
 240                         return (z);
 241                 }
 242 
 243                 /*
 244                  * now x, y, and z are all finite; handle cases where x or y
 245                  * is zero
 246                  */
 247                 if (cx == 0 || cy == 0) {
 248                         /* either we have 0-0 or the result is the same as z */
 249                         if (cz == 0 && (int)((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) <
 250                             0) {
 251                                 zz.i[0] = (fsr >> 30) == FSR_RM ? 0x80000000 :
 252                                     0;
 253                                 __fenv_setfsr32(&fsr);
 254                                 return (zz.q);
 255                         }
 256 
 257                         __fenv_setfsr32(&fsr);
 258                         return (z);
 259                 }
 260 
 261                 /* if we get here, x and y are nonzero finite, z must be zero */
 262                 return (x * y);
 263         }
 264 
 265         /*
 266          * now x, y, and z are all finite and nonzero; set round-to-
 267          * negative-infinity mode
 268          */
 269         __fenv_setfsr32(&fsr_rm);
 270 
 271         /*
 272          * get the signs and exponents and normalize the significands
 273          * of x and y
 274          */
 275         sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000;
 276         ex = hx >> 16;
 277         hx &= 0xffff;
 278 
 279         if (!ex) {
 280                 if (hx | (xx.i[1] & 0xfffe0000)) {
 281                         ex = 1;
 282                 } else if (xx.i[1] | (xx.i[2] & 0xfffe0000)) {
 283                         hx = xx.i[1];
 284                         xx.i[1] = xx.i[2];
 285                         xx.i[2] = xx.i[3];
 286                         xx.i[3] = 0;
 287                         ex = -31;
 288                 } else if (xx.i[2] | (xx.i[3] & 0xfffe0000)) {
 289                         hx = xx.i[2];
 290                         xx.i[1] = xx.i[3];
 291                         xx.i[2] = xx.i[3] = 0;
 292                         ex = -63;
 293                 } else {
 294                         hx = xx.i[3];
 295                         xx.i[1] = xx.i[2] = xx.i[3] = 0;
 296                         ex = -95;
 297                 }
 298 
 299                 while ((hx & 0x10000) == 0) {
 300                         hx = (hx << 1) | (xx.i[1] >> 31);
 301                         xx.i[1] = (xx.i[1] << 1) | (xx.i[2] >> 31);
 302                         xx.i[2] = (xx.i[2] << 1) | (xx.i[3] >> 31);
 303                         xx.i[3] <<= 1;
 304                         ex--;
 305                 }
 306         } else {
 307                 hx |= 0x10000;
 308         }
 309 
 310         ey = hy >> 16;
 311         hy &= 0xffff;
 312 
 313         if (!ey) {
 314                 if (hy | (yy.i[1] & 0xfffe0000)) {
 315                         ey = 1;
 316                 } else if (yy.i[1] | (yy.i[2] & 0xfffe0000)) {
 317                         hy = yy.i[1];
 318                         yy.i[1] = yy.i[2];
 319                         yy.i[2] = yy.i[3];
 320                         yy.i[3] = 0;
 321                         ey = -31;
 322                 } else if (yy.i[2] | (yy.i[3] & 0xfffe0000)) {
 323                         hy = yy.i[2];
 324                         yy.i[1] = yy.i[3];
 325                         yy.i[2] = yy.i[3] = 0;
 326                         ey = -63;
 327                 } else {
 328                         hy = yy.i[3];
 329                         yy.i[1] = yy.i[2] = yy.i[3] = 0;
 330                         ey = -95;
 331                 }
 332 
 333                 while ((hy & 0x10000) == 0) {
 334                         hy = (hy << 1) | (yy.i[1] >> 31);
 335                         yy.i[1] = (yy.i[1] << 1) | (yy.i[2] >> 31);
 336                         yy.i[2] = (yy.i[2] << 1) | (yy.i[3] >> 31);
 337                         yy.i[3] <<= 1;
 338                         ey--;
 339                 }
 340         } else {
 341                 hy |= 0x10000;
 342         }
 343 
 344         exy = ex + ey - 0x3fff;
 345 
 346         /* convert the significands of x and y to doubles */
 347         c = twom16;
 348         dx[0] = (double)((int)hx) * c;
 349         dy[0] = (double)((int)hy) * c;
 350 
 351         c *= twom24;
 352         dx[1] = (double)((int)(xx.i[1] >> 8)) * c;
 353         dy[1] = (double)((int)(yy.i[1] >> 8)) * c;
 354 
 355         c *= twom24;
 356         dx[2] = (double)((int)(((xx.i[1] << 16) | (xx.i[2] >> 16)) &
 357             0xffffff)) * c;
 358         dy[2] = (double)((int)(((yy.i[1] << 16) | (yy.i[2] >> 16)) &
 359             0xffffff)) * c;
 360 
 361         c *= twom24;
 362         dx[3] = (double)((int)(((xx.i[2] << 8) | (xx.i[3] >> 24)) & 0xffffff)) *
 363             c;
 364         dy[3] = (double)((int)(((yy.i[2] << 8) | (yy.i[3] >> 24)) & 0xffffff)) *
 365             c;
 366 
 367         c *= twom24;
 368         dx[4] = (double)((int)(xx.i[3] & 0xffffff)) * c;
 369         dy[4] = (double)((int)(yy.i[3] & 0xffffff)) * c;
 370 
 371         /* form the "digits" of the product */
 372         dxy[0] = dx[0] * dy[0];
 373         dxy[1] = dx[0] * dy[1] + dx[1] * dy[0];
 374         dxy[2] = dx[0] * dy[2] + dx[1] * dy[1] + dx[2] * dy[0];
 375         dxy[3] = dx[0] * dy[3] + dx[1] * dy[2] + dx[2] * dy[1] + dx[3] * dy[0];
 376         dxy[4] = dx[0] * dy[4] + dx[1] * dy[3] + dx[2] * dy[2] + dx[3] * dy[1] +
 377             dx[4] * dy[0];
 378         dxy[5] = dx[1] * dy[4] + dx[2] * dy[3] + dx[3] * dy[2] + dx[4] * dy[1];


 379         dxy[6] = dx[2] * dy[4] + dx[3] * dy[3] + dx[4] * dy[2];
 380         dxy[7] = dx[3] * dy[4] + dx[4] * dy[3];
 381         dxy[8] = dx[4] * dy[4];
 382 
 383         /* split odd-numbered terms and combine into even-numbered terms */
 384         c = (dxy[1] + two20) - two20;
 385         dxy[0] += c;
 386         dxy[1] -= c;
 387         c = (dxy[3] + twom28) - twom28;
 388         dxy[2] += c + dxy[1];
 389         dxy[3] -= c;
 390         c = (dxy[5] + twom76) - twom76;
 391         dxy[4] += c + dxy[3];
 392         dxy[5] -= c;
 393         c = (dxy[7] + twom124) - twom124;
 394         dxy[6] += c + dxy[5];
 395         dxy[8] += (dxy[7] - c);
 396 
 397         /* propagate carries, adjusting the exponent if need be */
 398         dxy[7] = dxy[6] + dxy[8];
 399         dxy[5] = dxy[4] + dxy[7];
 400         dxy[3] = dxy[2] + dxy[5];
 401         dxy[1] = dxy[0] + dxy[3];
 402 
 403         if (dxy[1] >= two) {
 404                 dxy[0] *= half;
 405                 dxy[1] *= half;
 406                 dxy[2] *= half;
 407                 dxy[3] *= half;
 408                 dxy[4] *= half;
 409                 dxy[5] *= half;
 410                 dxy[6] *= half;
 411                 dxy[7] *= half;
 412                 dxy[8] *= half;
 413                 exy++;
 414         }
 415 
 416         /* extract the significand of x*y */
 417         s = two36;
 418         u.d = c = dxy[1] + s;
 419         xy0 = u.i[1];
 420         c -= s;
 421         dxy[1] -= c;
 422         dxy[0] -= c;


 452         s *= twom32;
 453         u.d = c = dxy[7] + s;
 454         xy5 = u.i[1];
 455         c -= s;
 456         dxy[8] += (dxy[6] - c);
 457 
 458         s *= twom32;
 459         u.d = c = dxy[8] + s;
 460         xy6 = u.i[1];
 461         c -= s;
 462         dxy[8] -= c;
 463 
 464         s *= twom32;
 465         u.d = c = dxy[8] + s;
 466         xy7 = u.i[1];
 467 
 468         /* extract the sign, exponent, and significand of z */
 469         sz = zz.i[0] & 0x80000000;
 470         ez = hz >> 16;
 471         z0 = hz & 0xffff;
 472 
 473         if (!ez) {
 474                 if (z0 | (zz.i[1] & 0xfffe0000)) {
 475                         z1 = zz.i[1];
 476                         z2 = zz.i[2];
 477                         z3 = zz.i[3];
 478                         ez = 1;
 479                 } else if (zz.i[1] | (zz.i[2] & 0xfffe0000)) {
 480                         z0 = zz.i[1];
 481                         z1 = zz.i[2];
 482                         z2 = zz.i[3];
 483                         z3 = 0;
 484                         ez = -31;
 485                 } else if (zz.i[2] | (zz.i[3] & 0xfffe0000)) {
 486                         z0 = zz.i[2];
 487                         z1 = zz.i[3];
 488                         z2 = z3 = 0;
 489                         ez = -63;
 490                 } else {
 491                         z0 = zz.i[3];
 492                         z1 = z2 = z3 = 0;
 493                         ez = -95;
 494                 }
 495 
 496                 while ((z0 & 0x10000) == 0) {
 497                         z0 = (z0 << 1) | (z1 >> 31);
 498                         z1 = (z1 << 1) | (z2 >> 31);
 499                         z2 = (z2 << 1) | (z3 >> 31);
 500                         z3 <<= 1;
 501                         ez--;
 502                 }
 503         } else {
 504                 z0 |= 0x10000;
 505                 z1 = zz.i[1];
 506                 z2 = zz.i[2];
 507                 z3 = zz.i[3];
 508         }
 509 
 510         z4 = z5 = z6 = z7 = 0;
 511 
 512         /*
 513          * now x*y is represented by sxy, exy, and xy[0-7], and z is
 514          * represented likewise; swap if need be so |xy| <= |z|
 515          */
 516         if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 ||
 517             (xy1 == z1 && (xy2 > z2 || (xy2 == z2 && (xy3 > z3 || (xy3 == z3 &&
 518             (xy4 | xy5 | xy6 | xy7) != 0)))))))))) {
 519                 e = sxy;
 520                 sxy = sz;
 521                 sz = e;
 522                 e = exy;
 523                 exy = ez;
 524                 ez = e;
 525                 e = xy0;
 526                 xy0 = z0;
 527                 z0 = e;
 528                 e = xy1;
 529                 xy1 = z1;
 530                 z1 = e;
 531                 e = xy2;
 532                 xy2 = z2;
 533                 z2 = e;
 534                 e = xy3;
 535                 xy3 = z3;
 536                 z3 = e;
 537                 z4 = xy4;
 538                 xy4 = 0;
 539                 z5 = xy5;
 540                 xy5 = 0;
 541                 z6 = xy6;
 542                 xy6 = 0;
 543                 z7 = xy7;
 544                 xy7 = 0;
 545         }
 546 
 547         /* shift the significand of xy keeping a sticky bit */
 548         e = ez - exy;
 549 
 550         if (e > 236) {
 551                 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0;
 552                 xy7 = 1;
 553         } else if (e >= 224) {
 554                 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | xy1 | ((xy0 <<
 555                     1) << (255 - e));
 556                 xy7 = xy0 >> (e - 224);
 557 
 558                 if (sticky)
 559                         xy7 |= 1;
 560 
 561                 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0;
 562         } else if (e >= 192) {
 563                 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | ((xy1 << 1) <<
 564                     (223 - e));
 565                 xy7 = (xy1 >> (e - 192)) | ((xy0 << 1) << (223 - e));
 566 
 567                 if (sticky)
 568                         xy7 |= 1;
 569 
 570                 xy6 = xy0 >> (e - 192);
 571                 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = 0;
 572         } else if (e >= 160) {
 573                 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | ((xy2 << 1) << (191 -
 574                     e));
 575                 xy7 = (xy2 >> (e - 160)) | ((xy1 << 1) << (191 - e));
 576 
 577                 if (sticky)
 578                         xy7 |= 1;
 579 
 580                 xy6 = (xy1 >> (e - 160)) | ((xy0 << 1) << (191 - e));
 581                 xy5 = xy0 >> (e - 160);
 582                 xy0 = xy1 = xy2 = xy3 = xy4 = 0;
 583         } else if (e >= 128) {
 584                 sticky = xy7 | xy6 | xy5 | xy4 | ((xy3 << 1) << (159 - e));
 585                 xy7 = (xy3 >> (e - 128)) | ((xy2 << 1) << (159 - e));
 586 
 587                 if (sticky)
 588                         xy7 |= 1;
 589 
 590                 xy6 = (xy2 >> (e - 128)) | ((xy1 << 1) << (159 - e));
 591                 xy5 = (xy1 >> (e - 128)) | ((xy0 << 1) << (159 - e));
 592                 xy4 = xy0 >> (e - 128);
 593                 xy0 = xy1 = xy2 = xy3 = 0;
 594         } else if (e >= 96) {
 595                 sticky = xy7 | xy6 | xy5 | ((xy4 << 1) << (127 - e));
 596                 xy7 = (xy4 >> (e - 96)) | ((xy3 << 1) << (127 - e));
 597 
 598                 if (sticky)
 599                         xy7 |= 1;
 600 
 601                 xy6 = (xy3 >> (e - 96)) | ((xy2 << 1) << (127 - e));
 602                 xy5 = (xy2 >> (e - 96)) | ((xy1 << 1) << (127 - e));
 603                 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e));
 604                 xy3 = xy0 >> (e - 96);
 605                 xy0 = xy1 = xy2 = 0;
 606         } else if (e >= 64) {
 607                 sticky = xy7 | xy6 | ((xy5 << 1) << (95 - e));
 608                 xy7 = (xy5 >> (e - 64)) | ((xy4 << 1) << (95 - e));
 609 
 610                 if (sticky)
 611                         xy7 |= 1;
 612 
 613                 xy6 = (xy4 >> (e - 64)) | ((xy3 << 1) << (95 - e));
 614                 xy5 = (xy3 >> (e - 64)) | ((xy2 << 1) << (95 - e));
 615                 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e));
 616                 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
 617                 xy2 = xy0 >> (e - 64);
 618                 xy0 = xy1 = 0;
 619         } else if (e >= 32) {
 620                 sticky = xy7 | ((xy6 << 1) << (63 - e));
 621                 xy7 = (xy6 >> (e - 32)) | ((xy5 << 1) << (63 - e));
 622 
 623                 if (sticky)
 624                         xy7 |= 1;
 625 
 626                 xy6 = (xy5 >> (e - 32)) | ((xy4 << 1) << (63 - e));
 627                 xy5 = (xy4 >> (e - 32)) | ((xy3 << 1) << (63 - e));
 628                 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e));
 629                 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
 630                 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
 631                 xy1 = xy0 >> (e - 32);
 632                 xy0 = 0;
 633         } else if (e) {
 634                 sticky = (xy7 << 1) << (31 - e);
 635                 xy7 = (xy7 >> e) | ((xy6 << 1) << (31 - e));
 636 
 637                 if (sticky)
 638                         xy7 |= 1;
 639 
 640                 xy6 = (xy6 >> e) | ((xy5 << 1) << (31 - e));
 641                 xy5 = (xy5 >> e) | ((xy4 << 1) << (31 - e));
 642                 xy4 = (xy4 >> e) | ((xy3 << 1) << (31 - e));
 643                 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
 644                 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
 645                 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
 646                 xy0 >>= e;
 647         }
 648 
 649         /* if this is a magnitude subtract, negate the significand of xy */
 650         if (sxy ^ sz) {
 651                 xy0 = ~xy0;
 652                 xy1 = ~xy1;
 653                 xy2 = ~xy2;
 654                 xy3 = ~xy3;
 655                 xy4 = ~xy4;
 656                 xy5 = ~xy5;
 657                 xy6 = ~xy6;
 658                 xy7 = -xy7;
 659 
 660                 if (xy7 == 0)
 661                         if (++xy6 == 0)
 662                                 if (++xy5 == 0)
 663                                         if (++xy4 == 0)
 664                                                 if (++xy3 == 0)
 665                                                         if (++xy2 == 0)
 666                                                                 if (++xy1 == 0)
 667                                                                         xy0++;
 668         }
 669 
 670         /* add, propagating carries */
 671         z7 += xy7;
 672         e = (z7 < xy7);
 673         z6 += xy6;
 674 
 675         if (e) {
 676                 z6++;
 677                 e = (z6 <= xy6);
 678         } else {
 679                 e = (z6 < xy6);
 680         }
 681 
 682         z5 += xy5;
 683 
 684         if (e) {
 685                 z5++;
 686                 e = (z5 <= xy5);
 687         } else {
 688                 e = (z5 < xy5);
 689         }
 690 
 691         z4 += xy4;
 692 
 693         if (e) {
 694                 z4++;
 695                 e = (z4 <= xy4);
 696         } else {
 697                 e = (z4 < xy4);
 698         }
 699 
 700         z3 += xy3;
 701 
 702         if (e) {
 703                 z3++;
 704                 e = (z3 <= xy3);
 705         } else {
 706                 e = (z3 < xy3);
 707         }
 708 
 709         z2 += xy2;
 710 
 711         if (e) {
 712                 z2++;
 713                 e = (z2 <= xy2);
 714         } else {
 715                 e = (z2 < xy2);
 716         }
 717 
 718         z1 += xy1;
 719 
 720         if (e) {
 721                 z1++;
 722                 e = (z1 <= xy1);
 723         } else {
 724                 e = (z1 < xy1);
 725         }
 726 
 727         z0 += xy0;
 728 
 729         if (e)
 730                 z0++;
 731 
 732         /* postnormalize and collect rounding information into z4 */
 733         if (ez < 1) {
 734                 /* result is tiny; shift right until exponent is within range */
 735                 e = 1 - ez;
 736 
 737                 if (e > 116) {
 738                         z4 = 1;         /* result can't be exactly zero */
 739                         z0 = z1 = z2 = z3 = 0;
 740                 } else if (e >= 96) {
 741                         sticky = z7 | z6 | z5 | z4 | z3 | z2 | ((z1 << 1) <<
 742                             (127 - e));
 743                         z4 = (z1 >> (e - 96)) | ((z0 << 1) << (127 - e));
 744 
 745                         if (sticky)
 746                                 z4 |= 1;
 747 
 748                         z3 = z0 >> (e - 96);
 749                         z0 = z1 = z2 = 0;
 750                 } else if (e >= 64) {
 751                         sticky = z7 | z6 | z5 | z4 | z3 | ((z2 << 1) << (95 -
 752                             e));
 753                         z4 = (z2 >> (e - 64)) | ((z1 << 1) << (95 - e));
 754 
 755                         if (sticky)
 756                                 z4 |= 1;
 757 
 758                         z3 = (z1 >> (e - 64)) | ((z0 << 1) << (95 - e));
 759                         z2 = z0 >> (e - 64);
 760                         z0 = z1 = 0;
 761                 } else if (e >= 32) {
 762                         sticky = z7 | z6 | z5 | z4 | ((z3 << 1) << (63 - e));
 763                         z4 = (z3 >> (e - 32)) | ((z2 << 1) << (63 - e));
 764 
 765                         if (sticky)
 766                                 z4 |= 1;
 767 
 768                         z3 = (z2 >> (e - 32)) | ((z1 << 1) << (63 - e));
 769                         z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
 770                         z1 = z0 >> (e - 32);
 771                         z0 = 0;
 772                 } else {
 773                         sticky = z7 | z6 | z5 | (z4 << 1) << (31 - e);
 774                         z4 = (z4 >> e) | ((z3 << 1) << (31 - e));
 775 
 776                         if (sticky)
 777                                 z4 |= 1;
 778 
 779                         z3 = (z3 >> e) | ((z2 << 1) << (31 - e));
 780                         z2 = (z2 >> e) | ((z1 << 1) << (31 - e));
 781                         z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
 782                         z0 >>= e;
 783                 }
 784 
 785                 ez = 1;
 786         } else if (z0 >= 0x20000) {
 787                 /* carry out; shift right by one */
 788                 sticky = (z4 & 1) | z5 | z6 | z7;
 789                 z4 = (z4 >> 1) | (z3 << 31);
 790 
 791                 if (sticky)
 792                         z4 |= 1;
 793 
 794                 z3 = (z3 >> 1) | (z2 << 31);
 795                 z2 = (z2 >> 1) | (z1 << 31);
 796                 z1 = (z1 >> 1) | (z0 << 31);
 797                 z0 >>= 1;
 798                 ez++;
 799         } else {
 800                 if (z0 < 0x10000 && (z0 | z1 | z2 | z3 | z4 | z5 | z6 | z7) !=
 801                     0) {
 802                         /*
 803                          * borrow/cancellation; shift left as much as
 804                          * exponent allows
 805                          */
 806                         while (!(z0 | (z1 & 0xfffe0000)) && ez >= 33) {
 807                                 z0 = z1;
 808                                 z1 = z2;
 809                                 z2 = z3;
 810                                 z3 = z4;
 811                                 z4 = z5;
 812                                 z5 = z6;
 813                                 z6 = z7;
 814                                 z7 = 0;
 815                                 ez -= 32;
 816                         }
 817 
 818                         while (z0 < 0x10000 && ez > 1) {
 819                                 z0 = (z0 << 1) | (z1 >> 31);
 820                                 z1 = (z1 << 1) | (z2 >> 31);
 821                                 z2 = (z2 << 1) | (z3 >> 31);
 822                                 z3 = (z3 << 1) | (z4 >> 31);
 823                                 z4 = (z4 << 1) | (z5 >> 31);
 824                                 z5 = (z5 << 1) | (z6 >> 31);
 825                                 z6 = (z6 << 1) | (z7 >> 31);
 826                                 z7 <<= 1;
 827                                 ez--;
 828                         }
 829                 }
 830 
 831                 if (z5 | z6 | z7)
 832                         z4 |= 1;
 833         }
 834 
 835         /* get the rounding mode */
 836         rm = fsr >> 30;
 837 
 838         /* strip off the integer bit, if there is one */
 839         ibit = z0 & 0x10000;
 840 
 841         if (ibit) {
 842                 z0 -= 0x10000;
 843         } else {
 844                 ez = 0;
 845 
 846                 if (!(z0 | z1 | z2 | z3 | z4)) {        /* exact zero */
 847                         zz.i[0] = rm == FSR_RM ? 0x80000000 : 0;
 848                         zz.i[1] = zz.i[2] = zz.i[3] = 0;
 849                         __fenv_setfsr32(&fsr);
 850                         return (zz.q);
 851                 }
 852         }
 853 
 854         /*
 855          * flip the sense of directed roundings if the result is negative;
 856          * the logic below applies to a positive result
 857          */
 858         if (sz)
 859                 rm ^= rm >> 1;
 860 
 861         /* round and raise exceptions */
 862         if (z4) {
 863                 fsr |= FSR_NXC;
 864 
 865                 /* decide whether to round the fraction up */
 866                 if (rm == FSR_RP || (rm == FSR_RN && (z4 > 0x80000000u || (z4 ==
 867                     0x80000000u && (z3 & 1))))) {
 868                         /* round up and renormalize if necessary */
 869                         if (++z3 == 0)
 870                                 if (++z2 == 0)
 871                                         if (++z1 == 0)
 872                                                 if (++z0 == 0x10000) {
 873                                                         z0 = 0;
 874                                                         ez++;
 875                                                 }
 876                 }
 877         }
 878 
 879         /* check for under/overflow */
 880         if (ez >= 0x7fff) {
 881                 if (rm == FSR_RN || rm == FSR_RP) {
 882                         zz.i[0] = sz | 0x7fff0000;
 883                         zz.i[1] = zz.i[2] = zz.i[3] = 0;
 884                 } else {
 885                         zz.i[0] = sz | 0x7ffeffff;
 886                         zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff;
 887                 }
 888 
 889                 fsr |= FSR_OFC | FSR_NXC;
 890         } else {
 891                 zz.i[0] = sz | (ez << 16) | z0;
 892                 zz.i[1] = z1;
 893                 zz.i[2] = z2;
 894                 zz.i[3] = z3;
 895 
 896                 /*
 897                  * !ibit => exact result was tiny before rounding,
 898                  * z4 nonzero => result delivered is inexact
 899                  */
 900                 if (!ibit) {
 901                         if (z4)
 902                                 fsr |= FSR_UFC | FSR_NXC;
 903                         else if (fsr & FSR_UFM)
 904                                 fsr |= FSR_UFC;
 905                 }
 906         }
 907 
 908         /* restore the fsr and emulate exceptions as needed */
 909         if ((fsr & FSR_CEXC) & (fsr >> 23)) {
 910                 __fenv_setfsr32(&fsr);
 911 
 912                 if (fsr & FSR_OFC) {
 913                         dummy = huge;
 914                         dummy *= huge;
 915                 } else if (fsr & FSR_UFC) {
 916                         dummy = tiny;
 917 
 918                         if (fsr & FSR_NXC)
 919                                 dummy *= tiny;
 920                         else
 921                                 dummy -= tiny2;
 922                 } else {
 923                         dummy = huge;
 924                         dummy += tiny;
 925                 }
 926         } else {
 927                 fsr |= (fsr & 0x1f) << 5;
 928                 __fenv_setfsr32(&fsr);
 929         }
 930 
 931         return (zz.q);
 932 }

 933 #elif defined(__x86)

 934 static const union {
 935         unsigned i[2];
 936         double d;
 937 } C[] = {
 938         { 0, 0x3fe00000u },
 939         { 0, 0x40000000u },
 940         { 0, 0x3df00000u },
 941         { 0, 0x3bf00000u },
 942         { 0, 0x41f00000u },
 943         { 0, 0x43e00000u },
 944         { 0, 0x7fe00000u },
 945         { 0, 0x00100000u },
 946         { 0, 0x00100001u }
 947 };
 948 
 949 #define half            C[0].d
 950 #define two             C[1].d
 951 #define twom32          C[2].d
 952 #define twom64          C[3].d
 953 #define two32           C[4].d
 954 #define two63           C[5].d
 955 #define huge            C[6].d
 956 #define tiny            C[7].d
 957 #define tiny2           C[8].d
 958 
 959 #if defined(__amd64)
 960 #define NI              4
 961 #else
 962 #define NI              3
 963 #endif
 964 
 965 /*
 966  * fmal for x86: 80-bit extended double precision, little-endian
 967  */
 968 long double
 969 __fmal(long double x, long double y, long double z)
 970 {
 971         union {
 972                 unsigned i[NI];
 973                 long double e;
 974         } xx, yy, zz;
 975 
 976         long double xhi, yhi, xlo, ylo, t;
 977         unsigned xy0, xy1, xy2, xy3, xy4, z0, z1, z2, z3, z4;
 978         unsigned oldcwsw, cwsw, rm, sticky, carry;
 979         int ex, ey, ez, exy, sxy, sz, e, tinyafter;
 980         volatile double dummy;
 981 
 982         /* extract the exponents of the arguments */
 983         xx.e = x;
 984         yy.e = y;
 985         zz.e = z;
 986         ex = xx.i[2] & 0x7fff;
 987         ey = yy.i[2] & 0x7fff;
 988         ez = zz.i[2] & 0x7fff;
 989 
 990         /* dispense with inf, nan, and zero cases */
 991         if (ex == 0x7fff || ey == 0x7fff || (ex | xx.i[1] | xx.i[0]) == 0 ||
 992             (ey | yy.i[1] | yy.i[0]) == 0)      /* x or y is inf, nan, or 0 */
 993                 return (x * y + z);
 994 
 995         if (ez == 0x7fff)       /* z is inf or nan */
 996                 return (x + z); /* avoid spurious under/overflow in x * y */
 997 
 998         if ((ez | zz.i[1] | zz.i[0]) == 0)      /* z is zero */
 999                 /*
1000                  * x * y isn't zero but could underflow to zero,
1001                  * so don't add z, lest we perturb the sign
1002                  */
1003                 return (x * y);
1004 
1005         /*
1006          * now x, y, and z are all finite and nonzero; extract signs and
1007          * normalize the significands (this will raise the denormal operand
1008          * exception if need be)
1009          */
1010         sxy = (xx.i[2] ^ yy.i[2]) & 0x8000;
1011         sz = zz.i[2] & 0x8000;
1012 
1013         if (!ex) {
1014                 xx.e = x * two63;
1015                 ex = (xx.i[2] & 0x7fff) - 63;
1016         }
1017 
1018         if (!ey) {
1019                 yy.e = y * two63;
1020                 ey = (yy.i[2] & 0x7fff) - 63;
1021         }
1022 
1023         if (!ez) {
1024                 zz.e = z * two63;
1025                 ez = (zz.i[2] & 0x7fff) - 63;
1026         }
1027 
1028         /*
1029          * save the control and status words, mask all exceptions, and
1030          * set rounding to 64-bit precision and toward-zero
1031          */
1032         __fenv_getcwsw(&oldcwsw);
1033         cwsw = (oldcwsw & 0xf0c0ffff) | 0x0f3f0000;
1034         __fenv_setcwsw(&cwsw);
1035 
1036         /* multiply x*y to 128 bits */
1037         exy = ex + ey - 0x3fff;
1038         xx.i[2] = 0x3fff;
1039         yy.i[2] = 0x3fff;
1040         x = xx.e;
1041         y = yy.e;
1042         xhi = ((x + twom32) + two32) - two32;
1043         yhi = ((y + twom32) + two32) - two32;
1044         xlo = x - xhi;
1045         ylo = y - yhi;
1046         x *= y;
1047         y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo;
1048 
1049         if (x >= two) {
1050                 x *= half;
1051                 y *= half;
1052                 exy++;
1053         }
1054 
1055         /* extract the significands */
1056         xx.e = x;
1057         xy0 = xx.i[1];
1058         xy1 = xx.i[0];
1059         yy.e = t = y + twom32;
1060         xy2 = yy.i[0];
1061         yy.e = (y - (t - twom32)) + twom64;
1062         xy3 = yy.i[0];
1063         xy4 = 0;
1064         z0 = zz.i[1];
1065         z1 = zz.i[0];
1066         z2 = z3 = z4 = 0;
1067 
1068         /*
1069          * now x*y is represented by sxy, exy, and xy[0-4], and z is
1070          * represented likewise; swap if need be so |xy| <= |z|
1071          */
1072         if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 ||
1073             (xy1 == z1 && (xy2 | xy3) != 0)))))) {
1074                 e = sxy;
1075                 sxy = sz;
1076                 sz = e;
1077                 e = exy;
1078                 exy = ez;
1079                 ez = e;
1080                 e = xy0;
1081                 xy0 = z0;
1082                 z0 = e;
1083                 e = xy1;
1084                 xy1 = z1;
1085                 z1 = e;
1086                 z2 = xy2;
1087                 xy2 = 0;
1088                 z3 = xy3;
1089                 xy3 = 0;
1090         }
1091 
1092         /* shift the significand of xy keeping a sticky bit */
1093         e = ez - exy;
1094 
1095         if (e > 130) {
1096                 xy0 = xy1 = xy2 = xy3 = 0;
1097                 xy4 = 1;
1098         } else if (e >= 128) {
1099                 sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (159 - e));
1100                 xy4 = xy0 >> (e - 128);
1101 
1102                 if (sticky)
1103                         xy4 |= 1;
1104 
1105                 xy0 = xy1 = xy2 = xy3 = 0;
1106         } else if (e >= 96) {
1107                 sticky = xy3 | xy2 | ((xy1 << 1) << (127 - e));
1108                 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e));
1109 
1110                 if (sticky)
1111                         xy4 |= 1;
1112 
1113                 xy3 = xy0 >> (e - 96);
1114                 xy0 = xy1 = xy2 = 0;
1115         } else if (e >= 64) {
1116                 sticky = xy3 | ((xy2 << 1) << (95 - e));
1117                 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e));
1118 
1119                 if (sticky)
1120                         xy4 |= 1;
1121 
1122                 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
1123                 xy2 = xy0 >> (e - 64);
1124                 xy0 = xy1 = 0;
1125         } else if (e >= 32) {
1126                 sticky = (xy3 << 1) << (63 - e);
1127                 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e));
1128 
1129                 if (sticky)
1130                         xy4 |= 1;
1131 
1132                 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
1133                 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
1134                 xy1 = xy0 >> (e - 32);
1135                 xy0 = 0;
1136         } else if (e) {
1137                 xy4 = (xy3 << 1) << (31 - e);
1138                 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
1139                 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
1140                 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
1141                 xy0 >>= e;
1142         }
1143 
1144         /* if this is a magnitude subtract, negate the significand of xy */
1145         if (sxy ^ sz) {
1146                 xy0 = ~xy0;
1147                 xy1 = ~xy1;
1148                 xy2 = ~xy2;
1149                 xy3 = ~xy3;
1150                 xy4 = -xy4;
1151 
1152                 if (xy4 == 0)
1153                         if (++xy3 == 0)
1154                                 if (++xy2 == 0)
1155                                         if (++xy1 == 0)
1156                                                 xy0++;
1157         }
1158 
1159         /* add, propagating carries */
1160         z4 += xy4;
1161         carry = (z4 < xy4);
1162         z3 += xy3;
1163 
1164         if (carry) {
1165                 z3++;
1166                 carry = (z3 <= xy3);
1167         } else {
1168                 carry = (z3 < xy3);
1169         }
1170 
1171         z2 += xy2;
1172 
1173         if (carry) {
1174                 z2++;
1175                 carry = (z2 <= xy2);
1176         } else {
1177                 carry = (z2 < xy2);
1178         }
1179 
1180         z1 += xy1;
1181 
1182         if (carry) {
1183                 z1++;
1184                 carry = (z1 <= xy1);
1185         } else {
1186                 carry = (z1 < xy1);
1187         }
1188 
1189         z0 += xy0;
1190 
1191         if (carry) {
1192                 z0++;
1193                 carry = (z0 <= xy0);
1194         } else {
1195                 carry = (z0 < xy0);
1196         }
1197 
1198         /* for a magnitude subtract, ignore the last carry out */
1199         if (sxy ^ sz)
1200                 carry = 0;
1201 
1202         /* postnormalize and collect rounding information into z2 */
1203         if (ez < 1) {
1204                 /* result is tiny; shift right until exponent is within range */
1205                 e = 1 - ez;
1206 
1207                 if (e > 67) {
1208                         z2 = 1;         /* result can't be exactly zero */
1209                         z0 = z1 = 0;
1210                 } else if (e >= 64) {
1211                         sticky = z4 | z3 | z2 | z1 | ((z0 << 1) << (95 - e));
1212                         z2 = (z0 >> (e - 64)) | ((carry << 1) << (95 - e));
1213 
1214                         if (sticky)
1215                                 z2 |= 1;
1216 
1217                         z1 = carry >> (e - 64);
1218                         z0 = 0;
1219                 } else if (e >= 32) {
1220                         sticky = z4 | z3 | z2 | ((z1 << 1) << (63 - e));
1221                         z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
1222 
1223                         if (sticky)
1224                                 z2 |= 1;
1225 
1226                         z1 = (z0 >> (e - 32)) | ((carry << 1) << (63 - e));
1227                         z0 = carry >> (e - 32);
1228                 } else {
1229                         sticky = z4 | z3 | (z2 << 1) << (31 - e);
1230                         z2 = (z2 >> e) | ((z1 << 1) << (31 - e));
1231 
1232                         if (sticky)
1233                                 z2 |= 1;
1234 
1235                         z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
1236                         z0 = (z0 >> e) | ((carry << 1) << (31 - e));
1237                 }
1238 
1239                 ez = 1;
1240         } else if (carry) {
1241                 /* carry out; shift right by one */
1242                 sticky = (z2 & 1) | z3 | z4;
1243                 z2 = (z2 >> 1) | (z1 << 31);
1244 
1245                 if (sticky)
1246                         z2 |= 1;
1247 
1248                 z1 = (z1 >> 1) | (z0 << 31);
1249                 z0 = (z0 >> 1) | 0x80000000;
1250                 ez++;
1251         } else {
1252                 if (z0 < 0x80000000u && (z0 | z1 | z2 | z3 | z4) != 0) {
1253                         /*
1254                          * borrow/cancellation; shift left as much as
1255                          * exponent allows
1256                          */
1257                         while (!z0 && ez >= 33) {
1258                                 z0 = z1;
1259                                 z1 = z2;
1260                                 z2 = z3;
1261                                 z3 = z4;
1262                                 z4 = 0;
1263                                 ez -= 32;
1264                         }
1265 
1266                         while (z0 < 0x80000000u && ez > 1) {
1267                                 z0 = (z0 << 1) | (z1 >> 31);
1268                                 z1 = (z1 << 1) | (z2 >> 31);
1269                                 z2 = (z2 << 1) | (z3 >> 31);
1270                                 z3 = (z3 << 1) | (z4 >> 31);
1271                                 z4 <<= 1;
1272                                 ez--;
1273                         }
1274                 }
1275 
1276                 if (z3 | z4)
1277                         z2 |= 1;
1278         }
1279 
1280         /* get the rounding mode */
1281         rm = oldcwsw & 0x0c000000;
1282 
1283         /* adjust exponent if result is subnormal */
1284         tinyafter = 0;
1285 
1286         if (!(z0 & 0x80000000)) {
1287                 ez = 0;
1288                 tinyafter = 1;
1289 
1290                 if (!(z0 | z1 | z2)) {  /* exact zero */
1291                         zz.i[2] = rm == FCW_RM ? 0x8000 : 0;
1292                         zz.i[1] = zz.i[0] = 0;
1293                         __fenv_setcwsw(&oldcwsw);
1294                         return (zz.e);
1295                 }
1296         }
1297 
1298         /*
1299          * flip the sense of directed roundings if the result is negative;
1300          * the logic below applies to a positive result
1301          */
1302         if (sz && (rm == FCW_RM || rm == FCW_RP))
1303                 rm = (FCW_RM + FCW_RP) - rm;
1304 
1305         /* round */
1306         if (z2) {
1307                 if (rm == FCW_RP || (rm == FCW_RN && (z2 > 0x80000000u || (z2 ==
1308                     0x80000000u && (z1 & 1))))) {
1309                         /* round up and renormalize if necessary */
1310                         if (++z1 == 0) {
1311                                 if (++z0 == 0) {
1312                                         z0 = 0x80000000;
1313                                         ez++;
1314                                 } else if (z0 == 0x80000000) {
1315                                         /* rounded up to smallest normal */
1316                                         ez = 1;
1317 
1318                                         if ((rm == FCW_RP && z2 >
1319                                             0x80000000u) || (rm == FCW_RN &&
1320                                             z2 >= 0xc0000000u))
1321                                                 /*
1322                                                  * would have rounded up to
1323                                                  * smallest normal even with
1324                                                  * unbounded range
1325                                                  */
1326                                                 tinyafter = 0;
1327                                 }
1328                         }
1329                 }
1330         }
1331 
1332         /* restore the control and status words, check for over/underflow */
1333         __fenv_setcwsw(&oldcwsw);
1334 
1335         if (ez >= 0x7fff) {
1336                 if (rm == FCW_RN || rm == FCW_RP) {
1337                         zz.i[2] = sz | 0x7fff;
1338                         zz.i[1] = 0x80000000;
1339                         zz.i[0] = 0;
1340                 } else {
1341                         zz.i[2] = sz | 0x7ffe;
1342                         zz.i[1] = 0xffffffff;
1343                         zz.i[0] = 0xffffffff;
1344                 }
1345 
1346                 dummy = huge;
1347                 dummy *= huge;
1348         } else {
1349                 zz.i[2] = sz | ez;
1350                 zz.i[1] = z0;
1351                 zz.i[0] = z1;
1352 
1353                 /*
1354                  * tinyafter => result rounded w/ unbounded range would be tiny,
1355                  * z2 nonzero => result delivered is inexact
1356                  */
1357                 if (tinyafter) {
1358                         dummy = tiny;
1359 
1360                         if (z2)
1361                                 dummy *= tiny;
1362                         else
1363                                 dummy -= tiny2;
1364                 } else if (z2) {
1365                         dummy = huge;
1366                         dummy += tiny;
1367                 }
1368         }
1369 
1370         return (zz.e);
1371 }

1372 #else
1373 #error Unknown architecture
1374 #endif