Print this page
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libm/common/m9x/fmal.c
+++ new/usr/src/lib/libm/common/m9x/fmal.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 27 * Use is subject to license terms.
28 28 */
29 29
30 30 #if defined(ELFOBJ)
31 31 #pragma weak fmal = __fmal
32 32 #endif
33 33
34 34 #include "libm.h"
35 35 #include "fma.h"
36 36 #include "fenv_inlines.h"
37 37
38 38 #if defined(__sparc)
39 39
40 40 static const union {
41 41 unsigned i[2];
42 42 double d;
43 43 } C[] = {
44 44 { 0x3fe00000u, 0 },
45 45 { 0x40000000u, 0 },
46 46 { 0x3ef00000u, 0 },
47 47 { 0x3e700000u, 0 },
48 48 { 0x41300000u, 0 },
49 49 { 0x3e300000u, 0 },
50 50 { 0x3b300000u, 0 },
51 51 { 0x38300000u, 0 },
52 52 { 0x42300000u, 0 },
53 53 { 0x3df00000u, 0 },
54 54 { 0x7fe00000u, 0 },
55 55 { 0x00100000u, 0 },
56 56 { 0x00100001u, 0 },
57 57 { 0, 0 },
58 58 { 0x7ff00000u, 0 },
59 59 { 0x7ff00001u, 0 }
60 60 };
61 61
62 62 #define half C[0].d
63 63 #define two C[1].d
64 64 #define twom16 C[2].d
65 65 #define twom24 C[3].d
66 66 #define two20 C[4].d
67 67 #define twom28 C[5].d
68 68 #define twom76 C[6].d
69 69 #define twom124 C[7].d
70 70 #define two36 C[8].d
71 71 #define twom32 C[9].d
72 72 #define huge C[10].d
73 73 #define tiny C[11].d
74 74 #define tiny2 C[12].d
75 75 #define zero C[13].d
76 76 #define inf C[14].d
↓ open down ↓ |
76 lines elided |
↑ open up ↑ |
77 77 #define snan C[15].d
78 78
79 79 static const unsigned int fsr_rm = 0xc0000000u;
80 80
81 81 /*
82 82 * fmal for SPARC: 128-bit quad precision, big-endian
83 83 */
84 84 long double
85 85 __fmal(long double x, long double y, long double z) {
86 86 union {
87 - unsigned i[4];
87 + unsigned int i[4];
88 88 long double q;
89 89 } xx, yy, zz;
90 90 union {
91 - unsigned i[2];
91 + unsigned int i[2];
92 92 double d;
93 93 } u;
94 94 double dx[5], dy[5], dxy[9], c, s;
95 - unsigned xy0, xy1, xy2, xy3, xy4, xy5, xy6, xy7;
96 - unsigned z0, z1, z2, z3, z4, z5, z6, z7;
97 - unsigned rm, sticky;
95 + unsigned int xy0, xy1, xy2, xy3, xy4, xy5, xy6, xy7;
96 + unsigned int z0, z1, z2, z3, z4, z5, z6, z7;
97 + unsigned int rm, sticky;
98 98 unsigned int fsr;
99 99 int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit;
100 100 int cx, cy, cz;
101 101 volatile double dummy;
102 102
103 103 /* extract the high order words of the arguments */
104 104 xx.q = x;
105 105 yy.q = y;
106 106 zz.q = z;
107 107 hx = xx.i[0] & ~0x80000000;
108 108 hy = yy.i[0] & ~0x80000000;
109 109 hz = zz.i[0] & ~0x80000000;
110 110
111 111 /*
112 112 * distinguish zero, finite nonzero, infinite, and quiet nan
113 113 * arguments; raise invalid and return for signaling nans
114 114 */
115 115 if (hx >= 0x7fff0000) {
116 116 if ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3]) {
117 117 if (!(hx & 0x8000)) {
118 118 /* signaling nan, raise invalid */
119 119 dummy = snan;
120 120 dummy += snan;
121 121 xx.i[0] |= 0x8000;
122 122 return (xx.q);
123 123 }
124 124 cx = 3; /* quiet nan */
125 125 } else
126 126 cx = 2; /* inf */
127 127 } else if (hx == 0) {
128 128 cx = (xx.i[1] | xx.i[2] | xx.i[3]) ? 1 : 0;
129 129 /* subnormal or zero */
130 130 } else
131 131 cx = 1; /* finite nonzero */
132 132
133 133 if (hy >= 0x7fff0000) {
134 134 if ((hy & 0xffff) | yy.i[1] | yy.i[2] | yy.i[3]) {
135 135 if (!(hy & 0x8000)) {
136 136 dummy = snan;
137 137 dummy += snan;
138 138 yy.i[0] |= 0x8000;
139 139 return (yy.q);
140 140 }
141 141 cy = 3;
142 142 } else
143 143 cy = 2;
144 144 } else if (hy == 0) {
145 145 cy = (yy.i[1] | yy.i[2] | yy.i[3]) ? 1 : 0;
146 146 } else
147 147 cy = 1;
148 148
149 149 if (hz >= 0x7fff0000) {
150 150 if ((hz & 0xffff) | zz.i[1] | zz.i[2] | zz.i[3]) {
151 151 if (!(hz & 0x8000)) {
152 152 dummy = snan;
153 153 dummy += snan;
154 154 zz.i[0] |= 0x8000;
155 155 return (zz.q);
156 156 }
157 157 cz = 3;
158 158 } else
159 159 cz = 2;
160 160 } else if (hz == 0) {
161 161 cz = (zz.i[1] | zz.i[2] | zz.i[3]) ? 1 : 0;
162 162 } else
163 163 cz = 1;
164 164
165 165 /* get the fsr and clear current exceptions */
166 166 __fenv_getfsr32(&fsr);
167 167 fsr &= ~FSR_CEXC;
168 168
169 169 /* handle all other zero, inf, and nan cases */
170 170 if (cx != 1 || cy != 1 || cz != 1) {
171 171 /* if x or y is a quiet nan, return it */
172 172 if (cx == 3) {
173 173 __fenv_setfsr32(&fsr);
174 174 return (x);
175 175 }
176 176 if (cy == 3) {
177 177 __fenv_setfsr32(&fsr);
178 178 return (y);
179 179 }
180 180
181 181 /* if x*y is 0*inf, raise invalid and return the default nan */
182 182 if ((cx == 0 && cy == 2) || (cx == 2 && cy == 0)) {
183 183 dummy = zero;
184 184 dummy *= inf;
185 185 zz.i[0] = 0x7fffffff;
186 186 zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff;
187 187 return (zz.q);
188 188 }
189 189
190 190 /* if z is a quiet nan, return it */
191 191 if (cz == 3) {
192 192 __fenv_setfsr32(&fsr);
193 193 return (z);
194 194 }
195 195
196 196 /*
197 197 * now none of x, y, or z is nan; handle cases where x or y
198 198 * is inf
199 199 */
200 200 if (cx == 2 || cy == 2) {
201 201 /*
202 202 * if z is also inf, either we have inf-inf or
203 203 * the result is the same as z depending on signs
204 204 */
205 205 if (cz == 2) {
206 206 if ((int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < 0) {
207 207 dummy = inf;
208 208 dummy -= inf;
209 209 zz.i[0] = 0x7fffffff;
210 210 zz.i[1] = zz.i[2] = zz.i[3] =
211 211 0xffffffff;
212 212 return (zz.q);
213 213 }
214 214 __fenv_setfsr32(&fsr);
215 215 return (z);
216 216 }
217 217
218 218 /* otherwise the result is inf with appropriate sign */
219 219 zz.i[0] = ((xx.i[0] ^ yy.i[0]) & 0x80000000) |
220 220 0x7fff0000;
221 221 zz.i[1] = zz.i[2] = zz.i[3] = 0;
222 222 __fenv_setfsr32(&fsr);
223 223 return (zz.q);
224 224 }
225 225
226 226 /* if z is inf, return it */
227 227 if (cz == 2) {
228 228 __fenv_setfsr32(&fsr);
229 229 return (z);
230 230 }
231 231
232 232 /*
233 233 * now x, y, and z are all finite; handle cases where x or y
234 234 * is zero
235 235 */
236 236 if (cx == 0 || cy == 0) {
237 237 /* either we have 0-0 or the result is the same as z */
238 238 if (cz == 0 && (int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) <
239 239 0) {
240 240 zz.i[0] = (fsr >> 30) == FSR_RM ? 0x80000000 :
241 241 0;
242 242 __fenv_setfsr32(&fsr);
243 243 return (zz.q);
244 244 }
245 245 __fenv_setfsr32(&fsr);
246 246 return (z);
247 247 }
248 248
249 249 /* if we get here, x and y are nonzero finite, z must be zero */
250 250 return (x * y);
251 251 }
252 252
253 253 /*
254 254 * now x, y, and z are all finite and nonzero; set round-to-
255 255 * negative-infinity mode
256 256 */
257 257 __fenv_setfsr32(&fsr_rm);
258 258
259 259 /*
260 260 * get the signs and exponents and normalize the significands
261 261 * of x and y
262 262 */
263 263 sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000;
264 264 ex = hx >> 16;
265 265 hx &= 0xffff;
266 266 if (!ex) {
267 267 if (hx | (xx.i[1] & 0xfffe0000)) {
268 268 ex = 1;
269 269 } else if (xx.i[1] | (xx.i[2] & 0xfffe0000)) {
270 270 hx = xx.i[1];
271 271 xx.i[1] = xx.i[2];
272 272 xx.i[2] = xx.i[3];
273 273 xx.i[3] = 0;
274 274 ex = -31;
275 275 } else if (xx.i[2] | (xx.i[3] & 0xfffe0000)) {
276 276 hx = xx.i[2];
277 277 xx.i[1] = xx.i[3];
278 278 xx.i[2] = xx.i[3] = 0;
279 279 ex = -63;
280 280 } else {
281 281 hx = xx.i[3];
282 282 xx.i[1] = xx.i[2] = xx.i[3] = 0;
283 283 ex = -95;
284 284 }
285 285 while ((hx & 0x10000) == 0) {
286 286 hx = (hx << 1) | (xx.i[1] >> 31);
287 287 xx.i[1] = (xx.i[1] << 1) | (xx.i[2] >> 31);
288 288 xx.i[2] = (xx.i[2] << 1) | (xx.i[3] >> 31);
289 289 xx.i[3] <<= 1;
290 290 ex--;
291 291 }
292 292 } else
293 293 hx |= 0x10000;
294 294 ey = hy >> 16;
295 295 hy &= 0xffff;
296 296 if (!ey) {
297 297 if (hy | (yy.i[1] & 0xfffe0000)) {
298 298 ey = 1;
299 299 } else if (yy.i[1] | (yy.i[2] & 0xfffe0000)) {
300 300 hy = yy.i[1];
301 301 yy.i[1] = yy.i[2];
302 302 yy.i[2] = yy.i[3];
303 303 yy.i[3] = 0;
304 304 ey = -31;
305 305 } else if (yy.i[2] | (yy.i[3] & 0xfffe0000)) {
306 306 hy = yy.i[2];
307 307 yy.i[1] = yy.i[3];
308 308 yy.i[2] = yy.i[3] = 0;
309 309 ey = -63;
310 310 } else {
311 311 hy = yy.i[3];
312 312 yy.i[1] = yy.i[2] = yy.i[3] = 0;
313 313 ey = -95;
314 314 }
315 315 while ((hy & 0x10000) == 0) {
316 316 hy = (hy << 1) | (yy.i[1] >> 31);
317 317 yy.i[1] = (yy.i[1] << 1) | (yy.i[2] >> 31);
318 318 yy.i[2] = (yy.i[2] << 1) | (yy.i[3] >> 31);
319 319 yy.i[3] <<= 1;
320 320 ey--;
321 321 }
322 322 } else
323 323 hy |= 0x10000;
324 324 exy = ex + ey - 0x3fff;
325 325
326 326 /* convert the significands of x and y to doubles */
327 327 c = twom16;
328 328 dx[0] = (double) ((int) hx) * c;
329 329 dy[0] = (double) ((int) hy) * c;
330 330
331 331 c *= twom24;
332 332 dx[1] = (double) ((int) (xx.i[1] >> 8)) * c;
333 333 dy[1] = (double) ((int) (yy.i[1] >> 8)) * c;
334 334
335 335 c *= twom24;
336 336 dx[2] = (double) ((int) (((xx.i[1] << 16) | (xx.i[2] >> 16)) &
337 337 0xffffff)) * c;
338 338 dy[2] = (double) ((int) (((yy.i[1] << 16) | (yy.i[2] >> 16)) &
339 339 0xffffff)) * c;
340 340
341 341 c *= twom24;
342 342 dx[3] = (double) ((int) (((xx.i[2] << 8) | (xx.i[3] >> 24)) &
343 343 0xffffff)) * c;
344 344 dy[3] = (double) ((int) (((yy.i[2] << 8) | (yy.i[3] >> 24)) &
345 345 0xffffff)) * c;
346 346
347 347 c *= twom24;
348 348 dx[4] = (double) ((int) (xx.i[3] & 0xffffff)) * c;
349 349 dy[4] = (double) ((int) (yy.i[3] & 0xffffff)) * c;
350 350
351 351 /* form the "digits" of the product */
352 352 dxy[0] = dx[0] * dy[0];
353 353 dxy[1] = dx[0] * dy[1] + dx[1] * dy[0];
354 354 dxy[2] = dx[0] * dy[2] + dx[1] * dy[1] + dx[2] * dy[0];
355 355 dxy[3] = dx[0] * dy[3] + dx[1] * dy[2] + dx[2] * dy[1] +
356 356 dx[3] * dy[0];
357 357 dxy[4] = dx[0] * dy[4] + dx[1] * dy[3] + dx[2] * dy[2] +
358 358 dx[3] * dy[1] + dx[4] * dy[0];
359 359 dxy[5] = dx[1] * dy[4] + dx[2] * dy[3] + dx[3] * dy[2] +
360 360 dx[4] * dy[1];
361 361 dxy[6] = dx[2] * dy[4] + dx[3] * dy[3] + dx[4] * dy[2];
362 362 dxy[7] = dx[3] * dy[4] + dx[4] * dy[3];
363 363 dxy[8] = dx[4] * dy[4];
364 364
365 365 /* split odd-numbered terms and combine into even-numbered terms */
366 366 c = (dxy[1] + two20) - two20;
367 367 dxy[0] += c;
368 368 dxy[1] -= c;
369 369 c = (dxy[3] + twom28) - twom28;
370 370 dxy[2] += c + dxy[1];
371 371 dxy[3] -= c;
372 372 c = (dxy[5] + twom76) - twom76;
373 373 dxy[4] += c + dxy[3];
374 374 dxy[5] -= c;
375 375 c = (dxy[7] + twom124) - twom124;
376 376 dxy[6] += c + dxy[5];
377 377 dxy[8] += (dxy[7] - c);
378 378
379 379 /* propagate carries, adjusting the exponent if need be */
380 380 dxy[7] = dxy[6] + dxy[8];
381 381 dxy[5] = dxy[4] + dxy[7];
382 382 dxy[3] = dxy[2] + dxy[5];
383 383 dxy[1] = dxy[0] + dxy[3];
384 384 if (dxy[1] >= two) {
385 385 dxy[0] *= half;
386 386 dxy[1] *= half;
387 387 dxy[2] *= half;
388 388 dxy[3] *= half;
389 389 dxy[4] *= half;
390 390 dxy[5] *= half;
391 391 dxy[6] *= half;
392 392 dxy[7] *= half;
393 393 dxy[8] *= half;
394 394 exy++;
395 395 }
396 396
397 397 /* extract the significand of x*y */
398 398 s = two36;
399 399 u.d = c = dxy[1] + s;
400 400 xy0 = u.i[1];
401 401 c -= s;
402 402 dxy[1] -= c;
403 403 dxy[0] -= c;
404 404
405 405 s *= twom32;
406 406 u.d = c = dxy[1] + s;
407 407 xy1 = u.i[1];
408 408 c -= s;
409 409 dxy[2] += (dxy[0] - c);
410 410 dxy[3] = dxy[2] + dxy[5];
411 411
412 412 s *= twom32;
413 413 u.d = c = dxy[3] + s;
414 414 xy2 = u.i[1];
415 415 c -= s;
416 416 dxy[4] += (dxy[2] - c);
417 417 dxy[5] = dxy[4] + dxy[7];
418 418
419 419 s *= twom32;
420 420 u.d = c = dxy[5] + s;
421 421 xy3 = u.i[1];
422 422 c -= s;
423 423 dxy[4] -= c;
424 424 dxy[5] = dxy[4] + dxy[7];
425 425
426 426 s *= twom32;
427 427 u.d = c = dxy[5] + s;
428 428 xy4 = u.i[1];
429 429 c -= s;
430 430 dxy[6] += (dxy[4] - c);
431 431 dxy[7] = dxy[6] + dxy[8];
432 432
433 433 s *= twom32;
434 434 u.d = c = dxy[7] + s;
435 435 xy5 = u.i[1];
436 436 c -= s;
437 437 dxy[8] += (dxy[6] - c);
438 438
439 439 s *= twom32;
440 440 u.d = c = dxy[8] + s;
441 441 xy6 = u.i[1];
442 442 c -= s;
443 443 dxy[8] -= c;
444 444
445 445 s *= twom32;
446 446 u.d = c = dxy[8] + s;
447 447 xy7 = u.i[1];
448 448
449 449 /* extract the sign, exponent, and significand of z */
450 450 sz = zz.i[0] & 0x80000000;
451 451 ez = hz >> 16;
452 452 z0 = hz & 0xffff;
453 453 if (!ez) {
454 454 if (z0 | (zz.i[1] & 0xfffe0000)) {
455 455 z1 = zz.i[1];
456 456 z2 = zz.i[2];
457 457 z3 = zz.i[3];
458 458 ez = 1;
459 459 } else if (zz.i[1] | (zz.i[2] & 0xfffe0000)) {
460 460 z0 = zz.i[1];
461 461 z1 = zz.i[2];
462 462 z2 = zz.i[3];
463 463 z3 = 0;
464 464 ez = -31;
465 465 } else if (zz.i[2] | (zz.i[3] & 0xfffe0000)) {
466 466 z0 = zz.i[2];
467 467 z1 = zz.i[3];
468 468 z2 = z3 = 0;
469 469 ez = -63;
470 470 } else {
471 471 z0 = zz.i[3];
472 472 z1 = z2 = z3 = 0;
473 473 ez = -95;
474 474 }
475 475 while ((z0 & 0x10000) == 0) {
476 476 z0 = (z0 << 1) | (z1 >> 31);
477 477 z1 = (z1 << 1) | (z2 >> 31);
478 478 z2 = (z2 << 1) | (z3 >> 31);
479 479 z3 <<= 1;
480 480 ez--;
481 481 }
482 482 } else {
483 483 z0 |= 0x10000;
484 484 z1 = zz.i[1];
485 485 z2 = zz.i[2];
486 486 z3 = zz.i[3];
487 487 }
488 488 z4 = z5 = z6 = z7 = 0;
489 489
490 490 /*
491 491 * now x*y is represented by sxy, exy, and xy[0-7], and z is
492 492 * represented likewise; swap if need be so |xy| <= |z|
493 493 */
494 494 if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 ||
495 495 (xy1 == z1 && (xy2 > z2 || (xy2 == z2 && (xy3 > z3 ||
496 496 (xy3 == z3 && (xy4 | xy5 | xy6 | xy7) != 0)))))))))) {
497 497 e = sxy; sxy = sz; sz = e;
498 498 e = exy; exy = ez; ez = e;
499 499 e = xy0; xy0 = z0; z0 = e;
500 500 e = xy1; xy1 = z1; z1 = e;
501 501 e = xy2; xy2 = z2; z2 = e;
502 502 e = xy3; xy3 = z3; z3 = e;
503 503 z4 = xy4; xy4 = 0;
504 504 z5 = xy5; xy5 = 0;
505 505 z6 = xy6; xy6 = 0;
506 506 z7 = xy7; xy7 = 0;
507 507 }
508 508
509 509 /* shift the significand of xy keeping a sticky bit */
510 510 e = ez - exy;
511 511 if (e > 236) {
512 512 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0;
513 513 xy7 = 1;
514 514 } else if (e >= 224) {
515 515 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | xy1 |
516 516 ((xy0 << 1) << (255 - e));
517 517 xy7 = xy0 >> (e - 224);
518 518 if (sticky)
519 519 xy7 |= 1;
520 520 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0;
521 521 } else if (e >= 192) {
522 522 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 |
523 523 ((xy1 << 1) << (223 - e));
524 524 xy7 = (xy1 >> (e - 192)) | ((xy0 << 1) << (223 - e));
525 525 if (sticky)
526 526 xy7 |= 1;
527 527 xy6 = xy0 >> (e - 192);
528 528 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = 0;
529 529 } else if (e >= 160) {
530 530 sticky = xy7 | xy6 | xy5 | xy4 | xy3 |
531 531 ((xy2 << 1) << (191 - e));
532 532 xy7 = (xy2 >> (e - 160)) | ((xy1 << 1) << (191 - e));
533 533 if (sticky)
534 534 xy7 |= 1;
535 535 xy6 = (xy1 >> (e - 160)) | ((xy0 << 1) << (191 - e));
536 536 xy5 = xy0 >> (e - 160);
537 537 xy0 = xy1 = xy2 = xy3 = xy4 = 0;
538 538 } else if (e >= 128) {
539 539 sticky = xy7 | xy6 | xy5 | xy4 | ((xy3 << 1) << (159 - e));
540 540 xy7 = (xy3 >> (e - 128)) | ((xy2 << 1) << (159 - e));
541 541 if (sticky)
542 542 xy7 |= 1;
543 543 xy6 = (xy2 >> (e - 128)) | ((xy1 << 1) << (159 - e));
544 544 xy5 = (xy1 >> (e - 128)) | ((xy0 << 1) << (159 - e));
545 545 xy4 = xy0 >> (e - 128);
546 546 xy0 = xy1 = xy2 = xy3 = 0;
547 547 } else if (e >= 96) {
548 548 sticky = xy7 | xy6 | xy5 | ((xy4 << 1) << (127 - e));
549 549 xy7 = (xy4 >> (e - 96)) | ((xy3 << 1) << (127 - e));
550 550 if (sticky)
551 551 xy7 |= 1;
552 552 xy6 = (xy3 >> (e - 96)) | ((xy2 << 1) << (127 - e));
553 553 xy5 = (xy2 >> (e - 96)) | ((xy1 << 1) << (127 - e));
554 554 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e));
555 555 xy3 = xy0 >> (e - 96);
556 556 xy0 = xy1 = xy2 = 0;
557 557 } else if (e >= 64) {
558 558 sticky = xy7 | xy6 | ((xy5 << 1) << (95 - e));
559 559 xy7 = (xy5 >> (e - 64)) | ((xy4 << 1) << (95 - e));
560 560 if (sticky)
561 561 xy7 |= 1;
562 562 xy6 = (xy4 >> (e - 64)) | ((xy3 << 1) << (95 - e));
563 563 xy5 = (xy3 >> (e - 64)) | ((xy2 << 1) << (95 - e));
564 564 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e));
565 565 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
566 566 xy2 = xy0 >> (e - 64);
567 567 xy0 = xy1 = 0;
568 568 } else if (e >= 32) {
569 569 sticky = xy7 | ((xy6 << 1) << (63 - e));
570 570 xy7 = (xy6 >> (e - 32)) | ((xy5 << 1) << (63 - e));
571 571 if (sticky)
572 572 xy7 |= 1;
573 573 xy6 = (xy5 >> (e - 32)) | ((xy4 << 1) << (63 - e));
574 574 xy5 = (xy4 >> (e - 32)) | ((xy3 << 1) << (63 - e));
575 575 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e));
576 576 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
577 577 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
578 578 xy1 = xy0 >> (e - 32);
579 579 xy0 = 0;
580 580 } else if (e) {
581 581 sticky = (xy7 << 1) << (31 - e);
582 582 xy7 = (xy7 >> e) | ((xy6 << 1) << (31 - e));
583 583 if (sticky)
584 584 xy7 |= 1;
585 585 xy6 = (xy6 >> e) | ((xy5 << 1) << (31 - e));
586 586 xy5 = (xy5 >> e) | ((xy4 << 1) << (31 - e));
587 587 xy4 = (xy4 >> e) | ((xy3 << 1) << (31 - e));
588 588 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
589 589 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
590 590 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
591 591 xy0 >>= e;
592 592 }
593 593
594 594 /* if this is a magnitude subtract, negate the significand of xy */
595 595 if (sxy ^ sz) {
596 596 xy0 = ~xy0;
597 597 xy1 = ~xy1;
598 598 xy2 = ~xy2;
599 599 xy3 = ~xy3;
600 600 xy4 = ~xy4;
601 601 xy5 = ~xy5;
602 602 xy6 = ~xy6;
603 603 xy7 = -xy7;
604 604 if (xy7 == 0)
605 605 if (++xy6 == 0)
606 606 if (++xy5 == 0)
607 607 if (++xy4 == 0)
608 608 if (++xy3 == 0)
609 609 if (++xy2 == 0)
610 610 if (++xy1 == 0)
611 611 xy0++;
612 612 }
613 613
614 614 /* add, propagating carries */
615 615 z7 += xy7;
616 616 e = (z7 < xy7);
617 617 z6 += xy6;
618 618 if (e) {
619 619 z6++;
620 620 e = (z6 <= xy6);
621 621 } else
622 622 e = (z6 < xy6);
623 623 z5 += xy5;
624 624 if (e) {
625 625 z5++;
626 626 e = (z5 <= xy5);
627 627 } else
628 628 e = (z5 < xy5);
629 629 z4 += xy4;
630 630 if (e) {
631 631 z4++;
632 632 e = (z4 <= xy4);
633 633 } else
634 634 e = (z4 < xy4);
635 635 z3 += xy3;
636 636 if (e) {
637 637 z3++;
638 638 e = (z3 <= xy3);
639 639 } else
640 640 e = (z3 < xy3);
641 641 z2 += xy2;
642 642 if (e) {
643 643 z2++;
644 644 e = (z2 <= xy2);
645 645 } else
646 646 e = (z2 < xy2);
647 647 z1 += xy1;
648 648 if (e) {
649 649 z1++;
650 650 e = (z1 <= xy1);
651 651 } else
652 652 e = (z1 < xy1);
653 653 z0 += xy0;
654 654 if (e)
655 655 z0++;
656 656
657 657 /* postnormalize and collect rounding information into z4 */
658 658 if (ez < 1) {
659 659 /* result is tiny; shift right until exponent is within range */
660 660 e = 1 - ez;
661 661 if (e > 116) {
662 662 z4 = 1; /* result can't be exactly zero */
663 663 z0 = z1 = z2 = z3 = 0;
664 664 } else if (e >= 96) {
665 665 sticky = z7 | z6 | z5 | z4 | z3 | z2 |
666 666 ((z1 << 1) << (127 - e));
667 667 z4 = (z1 >> (e - 96)) | ((z0 << 1) << (127 - e));
668 668 if (sticky)
669 669 z4 |= 1;
670 670 z3 = z0 >> (e - 96);
671 671 z0 = z1 = z2 = 0;
672 672 } else if (e >= 64) {
673 673 sticky = z7 | z6 | z5 | z4 | z3 |
674 674 ((z2 << 1) << (95 - e));
675 675 z4 = (z2 >> (e - 64)) | ((z1 << 1) << (95 - e));
676 676 if (sticky)
677 677 z4 |= 1;
678 678 z3 = (z1 >> (e - 64)) | ((z0 << 1) << (95 - e));
679 679 z2 = z0 >> (e - 64);
680 680 z0 = z1 = 0;
681 681 } else if (e >= 32) {
682 682 sticky = z7 | z6 | z5 | z4 | ((z3 << 1) << (63 - e));
683 683 z4 = (z3 >> (e - 32)) | ((z2 << 1) << (63 - e));
684 684 if (sticky)
685 685 z4 |= 1;
686 686 z3 = (z2 >> (e - 32)) | ((z1 << 1) << (63 - e));
687 687 z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
688 688 z1 = z0 >> (e - 32);
689 689 z0 = 0;
690 690 } else {
691 691 sticky = z7 | z6 | z5 | (z4 << 1) << (31 - e);
692 692 z4 = (z4 >> e) | ((z3 << 1) << (31 - e));
693 693 if (sticky)
694 694 z4 |= 1;
695 695 z3 = (z3 >> e) | ((z2 << 1) << (31 - e));
696 696 z2 = (z2 >> e) | ((z1 << 1) << (31 - e));
697 697 z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
698 698 z0 >>= e;
699 699 }
700 700 ez = 1;
701 701 } else if (z0 >= 0x20000) {
702 702 /* carry out; shift right by one */
703 703 sticky = (z4 & 1) | z5 | z6 | z7;
704 704 z4 = (z4 >> 1) | (z3 << 31);
705 705 if (sticky)
706 706 z4 |= 1;
707 707 z3 = (z3 >> 1) | (z2 << 31);
708 708 z2 = (z2 >> 1) | (z1 << 31);
709 709 z1 = (z1 >> 1) | (z0 << 31);
710 710 z0 >>= 1;
711 711 ez++;
712 712 } else {
713 713 if (z0 < 0x10000 && (z0 | z1 | z2 | z3 | z4 | z5 | z6 | z7)
714 714 != 0) {
715 715 /*
716 716 * borrow/cancellation; shift left as much as
717 717 * exponent allows
718 718 */
719 719 while (!(z0 | (z1 & 0xfffe0000)) && ez >= 33) {
720 720 z0 = z1;
721 721 z1 = z2;
722 722 z2 = z3;
723 723 z3 = z4;
724 724 z4 = z5;
725 725 z5 = z6;
726 726 z6 = z7;
727 727 z7 = 0;
728 728 ez -= 32;
729 729 }
730 730 while (z0 < 0x10000 && ez > 1) {
731 731 z0 = (z0 << 1) | (z1 >> 31);
732 732 z1 = (z1 << 1) | (z2 >> 31);
733 733 z2 = (z2 << 1) | (z3 >> 31);
734 734 z3 = (z3 << 1) | (z4 >> 31);
735 735 z4 = (z4 << 1) | (z5 >> 31);
736 736 z5 = (z5 << 1) | (z6 >> 31);
737 737 z6 = (z6 << 1) | (z7 >> 31);
738 738 z7 <<= 1;
739 739 ez--;
740 740 }
741 741 }
742 742 if (z5 | z6 | z7)
743 743 z4 |= 1;
744 744 }
745 745
746 746 /* get the rounding mode */
747 747 rm = fsr >> 30;
748 748
749 749 /* strip off the integer bit, if there is one */
750 750 ibit = z0 & 0x10000;
751 751 if (ibit)
752 752 z0 -= 0x10000;
753 753 else {
754 754 ez = 0;
755 755 if (!(z0 | z1 | z2 | z3 | z4)) { /* exact zero */
756 756 zz.i[0] = rm == FSR_RM ? 0x80000000 : 0;
757 757 zz.i[1] = zz.i[2] = zz.i[3] = 0;
758 758 __fenv_setfsr32(&fsr);
759 759 return (zz.q);
760 760 }
761 761 }
762 762
763 763 /*
764 764 * flip the sense of directed roundings if the result is negative;
765 765 * the logic below applies to a positive result
766 766 */
767 767 if (sz)
768 768 rm ^= rm >> 1;
769 769
770 770 /* round and raise exceptions */
771 771 if (z4) {
772 772 fsr |= FSR_NXC;
773 773
774 774 /* decide whether to round the fraction up */
775 775 if (rm == FSR_RP || (rm == FSR_RN && (z4 > 0x80000000u ||
776 776 (z4 == 0x80000000u && (z3 & 1))))) {
777 777 /* round up and renormalize if necessary */
778 778 if (++z3 == 0)
779 779 if (++z2 == 0)
780 780 if (++z1 == 0)
781 781 if (++z0 == 0x10000) {
782 782 z0 = 0;
783 783 ez++;
784 784 }
785 785 }
786 786 }
787 787
788 788 /* check for under/overflow */
789 789 if (ez >= 0x7fff) {
790 790 if (rm == FSR_RN || rm == FSR_RP) {
791 791 zz.i[0] = sz | 0x7fff0000;
792 792 zz.i[1] = zz.i[2] = zz.i[3] = 0;
793 793 } else {
794 794 zz.i[0] = sz | 0x7ffeffff;
795 795 zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff;
796 796 }
797 797 fsr |= FSR_OFC | FSR_NXC;
798 798 } else {
799 799 zz.i[0] = sz | (ez << 16) | z0;
800 800 zz.i[1] = z1;
801 801 zz.i[2] = z2;
802 802 zz.i[3] = z3;
803 803
804 804 /*
805 805 * !ibit => exact result was tiny before rounding,
806 806 * z4 nonzero => result delivered is inexact
807 807 */
808 808 if (!ibit) {
809 809 if (z4)
810 810 fsr |= FSR_UFC | FSR_NXC;
811 811 else if (fsr & FSR_UFM)
812 812 fsr |= FSR_UFC;
813 813 }
814 814 }
815 815
816 816 /* restore the fsr and emulate exceptions as needed */
817 817 if ((fsr & FSR_CEXC) & (fsr >> 23)) {
818 818 __fenv_setfsr32(&fsr);
819 819 if (fsr & FSR_OFC) {
820 820 dummy = huge;
821 821 dummy *= huge;
822 822 } else if (fsr & FSR_UFC) {
823 823 dummy = tiny;
824 824 if (fsr & FSR_NXC)
825 825 dummy *= tiny;
826 826 else
827 827 dummy -= tiny2;
828 828 } else {
829 829 dummy = huge;
830 830 dummy += tiny;
831 831 }
832 832 } else {
833 833 fsr |= (fsr & 0x1f) << 5;
834 834 __fenv_setfsr32(&fsr);
835 835 }
836 836 return (zz.q);
837 837 }
838 838
839 839 #elif defined(__x86)
840 840
841 841 static const union {
842 842 unsigned i[2];
843 843 double d;
844 844 } C[] = {
845 845 { 0, 0x3fe00000u },
846 846 { 0, 0x40000000u },
847 847 { 0, 0x3df00000u },
848 848 { 0, 0x3bf00000u },
849 849 { 0, 0x41f00000u },
850 850 { 0, 0x43e00000u },
851 851 { 0, 0x7fe00000u },
852 852 { 0, 0x00100000u },
853 853 { 0, 0x00100001u }
854 854 };
855 855
856 856 #define half C[0].d
857 857 #define two C[1].d
858 858 #define twom32 C[2].d
859 859 #define twom64 C[3].d
860 860 #define two32 C[4].d
861 861 #define two63 C[5].d
862 862 #define huge C[6].d
863 863 #define tiny C[7].d
864 864 #define tiny2 C[8].d
865 865
866 866 #if defined(__amd64)
867 867 #define NI 4
868 868 #else
869 869 #define NI 3
870 870 #endif
871 871
872 872 /*
873 873 * fmal for x86: 80-bit extended double precision, little-endian
874 874 */
875 875 long double
876 876 __fmal(long double x, long double y, long double z) {
877 877 union {
878 878 unsigned i[NI];
879 879 long double e;
880 880 } xx, yy, zz;
881 881 long double xhi, yhi, xlo, ylo, t;
882 882 unsigned xy0, xy1, xy2, xy3, xy4, z0, z1, z2, z3, z4;
883 883 unsigned oldcwsw, cwsw, rm, sticky, carry;
884 884 int ex, ey, ez, exy, sxy, sz, e, tinyafter;
885 885 volatile double dummy;
886 886
887 887 /* extract the exponents of the arguments */
888 888 xx.e = x;
889 889 yy.e = y;
890 890 zz.e = z;
891 891 ex = xx.i[2] & 0x7fff;
892 892 ey = yy.i[2] & 0x7fff;
893 893 ez = zz.i[2] & 0x7fff;
894 894
895 895 /* dispense with inf, nan, and zero cases */
896 896 if (ex == 0x7fff || ey == 0x7fff || (ex | xx.i[1] | xx.i[0]) == 0 ||
897 897 (ey | yy.i[1] | yy.i[0]) == 0) /* x or y is inf, nan, or 0 */
898 898 return (x * y + z);
899 899
900 900 if (ez == 0x7fff) /* z is inf or nan */
901 901 return (x + z); /* avoid spurious under/overflow in x * y */
902 902
903 903 if ((ez | zz.i[1] | zz.i[0]) == 0) /* z is zero */
904 904 /*
905 905 * x * y isn't zero but could underflow to zero,
906 906 * so don't add z, lest we perturb the sign
907 907 */
908 908 return (x * y);
909 909
910 910 /*
911 911 * now x, y, and z are all finite and nonzero; extract signs and
912 912 * normalize the significands (this will raise the denormal operand
913 913 * exception if need be)
914 914 */
915 915 sxy = (xx.i[2] ^ yy.i[2]) & 0x8000;
916 916 sz = zz.i[2] & 0x8000;
917 917 if (!ex) {
918 918 xx.e = x * two63;
919 919 ex = (xx.i[2] & 0x7fff) - 63;
920 920 }
921 921 if (!ey) {
922 922 yy.e = y * two63;
923 923 ey = (yy.i[2] & 0x7fff) - 63;
924 924 }
925 925 if (!ez) {
926 926 zz.e = z * two63;
927 927 ez = (zz.i[2] & 0x7fff) - 63;
928 928 }
929 929
930 930 /*
931 931 * save the control and status words, mask all exceptions, and
932 932 * set rounding to 64-bit precision and toward-zero
933 933 */
934 934 __fenv_getcwsw(&oldcwsw);
935 935 cwsw = (oldcwsw & 0xf0c0ffff) | 0x0f3f0000;
936 936 __fenv_setcwsw(&cwsw);
937 937
938 938 /* multiply x*y to 128 bits */
939 939 exy = ex + ey - 0x3fff;
940 940 xx.i[2] = 0x3fff;
941 941 yy.i[2] = 0x3fff;
942 942 x = xx.e;
943 943 y = yy.e;
944 944 xhi = ((x + twom32) + two32) - two32;
945 945 yhi = ((y + twom32) + two32) - two32;
946 946 xlo = x - xhi;
947 947 ylo = y - yhi;
948 948 x *= y;
949 949 y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo;
950 950 if (x >= two) {
951 951 x *= half;
952 952 y *= half;
953 953 exy++;
954 954 }
955 955
956 956 /* extract the significands */
957 957 xx.e = x;
958 958 xy0 = xx.i[1];
959 959 xy1 = xx.i[0];
960 960 yy.e = t = y + twom32;
961 961 xy2 = yy.i[0];
962 962 yy.e = (y - (t - twom32)) + twom64;
963 963 xy3 = yy.i[0];
964 964 xy4 = 0;
965 965 z0 = zz.i[1];
966 966 z1 = zz.i[0];
967 967 z2 = z3 = z4 = 0;
968 968
969 969 /*
970 970 * now x*y is represented by sxy, exy, and xy[0-4], and z is
971 971 * represented likewise; swap if need be so |xy| <= |z|
972 972 */
973 973 if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 &&
974 974 (xy1 > z1 || (xy1 == z1 && (xy2 | xy3) != 0)))))) {
975 975 e = sxy; sxy = sz; sz = e;
976 976 e = exy; exy = ez; ez = e;
977 977 e = xy0; xy0 = z0; z0 = e;
978 978 e = xy1; xy1 = z1; z1 = e;
979 979 z2 = xy2; xy2 = 0;
980 980 z3 = xy3; xy3 = 0;
981 981 }
982 982
983 983 /* shift the significand of xy keeping a sticky bit */
984 984 e = ez - exy;
985 985 if (e > 130) {
986 986 xy0 = xy1 = xy2 = xy3 = 0;
987 987 xy4 = 1;
988 988 } else if (e >= 128) {
989 989 sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (159 - e));
990 990 xy4 = xy0 >> (e - 128);
991 991 if (sticky)
992 992 xy4 |= 1;
993 993 xy0 = xy1 = xy2 = xy3 = 0;
994 994 } else if (e >= 96) {
995 995 sticky = xy3 | xy2 | ((xy1 << 1) << (127 - e));
996 996 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e));
997 997 if (sticky)
998 998 xy4 |= 1;
999 999 xy3 = xy0 >> (e - 96);
1000 1000 xy0 = xy1 = xy2 = 0;
1001 1001 } else if (e >= 64) {
1002 1002 sticky = xy3 | ((xy2 << 1) << (95 - e));
1003 1003 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e));
1004 1004 if (sticky)
1005 1005 xy4 |= 1;
1006 1006 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
1007 1007 xy2 = xy0 >> (e - 64);
1008 1008 xy0 = xy1 = 0;
1009 1009 } else if (e >= 32) {
1010 1010 sticky = (xy3 << 1) << (63 - e);
1011 1011 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e));
1012 1012 if (sticky)
1013 1013 xy4 |= 1;
1014 1014 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
1015 1015 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
1016 1016 xy1 = xy0 >> (e - 32);
1017 1017 xy0 = 0;
1018 1018 } else if (e) {
1019 1019 xy4 = (xy3 << 1) << (31 - e);
1020 1020 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
1021 1021 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
1022 1022 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
1023 1023 xy0 >>= e;
1024 1024 }
1025 1025
1026 1026 /* if this is a magnitude subtract, negate the significand of xy */
1027 1027 if (sxy ^ sz) {
1028 1028 xy0 = ~xy0;
1029 1029 xy1 = ~xy1;
1030 1030 xy2 = ~xy2;
1031 1031 xy3 = ~xy3;
1032 1032 xy4 = -xy4;
1033 1033 if (xy4 == 0)
1034 1034 if (++xy3 == 0)
1035 1035 if (++xy2 == 0)
1036 1036 if (++xy1 == 0)
1037 1037 xy0++;
1038 1038 }
1039 1039
1040 1040 /* add, propagating carries */
1041 1041 z4 += xy4;
1042 1042 carry = (z4 < xy4);
1043 1043 z3 += xy3;
1044 1044 if (carry) {
1045 1045 z3++;
1046 1046 carry = (z3 <= xy3);
1047 1047 } else
1048 1048 carry = (z3 < xy3);
1049 1049 z2 += xy2;
1050 1050 if (carry) {
1051 1051 z2++;
1052 1052 carry = (z2 <= xy2);
1053 1053 } else
1054 1054 carry = (z2 < xy2);
1055 1055 z1 += xy1;
1056 1056 if (carry) {
1057 1057 z1++;
1058 1058 carry = (z1 <= xy1);
1059 1059 } else
1060 1060 carry = (z1 < xy1);
1061 1061 z0 += xy0;
1062 1062 if (carry) {
1063 1063 z0++;
1064 1064 carry = (z0 <= xy0);
1065 1065 } else
1066 1066 carry = (z0 < xy0);
1067 1067
1068 1068 /* for a magnitude subtract, ignore the last carry out */
1069 1069 if (sxy ^ sz)
1070 1070 carry = 0;
1071 1071
1072 1072 /* postnormalize and collect rounding information into z2 */
1073 1073 if (ez < 1) {
1074 1074 /* result is tiny; shift right until exponent is within range */
1075 1075 e = 1 - ez;
1076 1076 if (e > 67) {
1077 1077 z2 = 1; /* result can't be exactly zero */
1078 1078 z0 = z1 = 0;
1079 1079 } else if (e >= 64) {
1080 1080 sticky = z4 | z3 | z2 | z1 | ((z0 << 1) << (95 - e));
1081 1081 z2 = (z0 >> (e - 64)) | ((carry << 1) << (95 - e));
1082 1082 if (sticky)
1083 1083 z2 |= 1;
1084 1084 z1 = carry >> (e - 64);
1085 1085 z0 = 0;
1086 1086 } else if (e >= 32) {
1087 1087 sticky = z4 | z3 | z2 | ((z1 << 1) << (63 - e));
1088 1088 z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
1089 1089 if (sticky)
1090 1090 z2 |= 1;
1091 1091 z1 = (z0 >> (e - 32)) | ((carry << 1) << (63 - e));
1092 1092 z0 = carry >> (e - 32);
1093 1093 } else {
1094 1094 sticky = z4 | z3 | (z2 << 1) << (31 - e);
1095 1095 z2 = (z2 >> e) | ((z1 << 1) << (31 - e));
1096 1096 if (sticky)
1097 1097 z2 |= 1;
1098 1098 z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
1099 1099 z0 = (z0 >> e) | ((carry << 1) << (31 - e));
1100 1100 }
1101 1101 ez = 1;
1102 1102 } else if (carry) {
1103 1103 /* carry out; shift right by one */
1104 1104 sticky = (z2 & 1) | z3 | z4;
1105 1105 z2 = (z2 >> 1) | (z1 << 31);
1106 1106 if (sticky)
1107 1107 z2 |= 1;
1108 1108 z1 = (z1 >> 1) | (z0 << 31);
1109 1109 z0 = (z0 >> 1) | 0x80000000;
1110 1110 ez++;
1111 1111 } else {
1112 1112 if (z0 < 0x80000000u && (z0 | z1 | z2 | z3 | z4) != 0) {
1113 1113 /*
1114 1114 * borrow/cancellation; shift left as much as
1115 1115 * exponent allows
1116 1116 */
1117 1117 while (!z0 && ez >= 33) {
1118 1118 z0 = z1;
1119 1119 z1 = z2;
1120 1120 z2 = z3;
1121 1121 z3 = z4;
1122 1122 z4 = 0;
1123 1123 ez -= 32;
1124 1124 }
1125 1125 while (z0 < 0x80000000u && ez > 1) {
1126 1126 z0 = (z0 << 1) | (z1 >> 31);
1127 1127 z1 = (z1 << 1) | (z2 >> 31);
1128 1128 z2 = (z2 << 1) | (z3 >> 31);
1129 1129 z3 = (z3 << 1) | (z4 >> 31);
1130 1130 z4 <<= 1;
1131 1131 ez--;
1132 1132 }
1133 1133 }
1134 1134 if (z3 | z4)
1135 1135 z2 |= 1;
1136 1136 }
1137 1137
1138 1138 /* get the rounding mode */
1139 1139 rm = oldcwsw & 0x0c000000;
1140 1140
1141 1141 /* adjust exponent if result is subnormal */
1142 1142 tinyafter = 0;
1143 1143 if (!(z0 & 0x80000000)) {
1144 1144 ez = 0;
1145 1145 tinyafter = 1;
1146 1146 if (!(z0 | z1 | z2)) { /* exact zero */
1147 1147 zz.i[2] = rm == FCW_RM ? 0x8000 : 0;
1148 1148 zz.i[1] = zz.i[0] = 0;
1149 1149 __fenv_setcwsw(&oldcwsw);
1150 1150 return (zz.e);
1151 1151 }
1152 1152 }
1153 1153
1154 1154 /*
1155 1155 * flip the sense of directed roundings if the result is negative;
1156 1156 * the logic below applies to a positive result
1157 1157 */
1158 1158 if (sz && (rm == FCW_RM || rm == FCW_RP))
1159 1159 rm = (FCW_RM + FCW_RP) - rm;
1160 1160
1161 1161 /* round */
1162 1162 if (z2) {
1163 1163 if (rm == FCW_RP || (rm == FCW_RN && (z2 > 0x80000000u ||
1164 1164 (z2 == 0x80000000u && (z1 & 1))))) {
1165 1165 /* round up and renormalize if necessary */
1166 1166 if (++z1 == 0) {
1167 1167 if (++z0 == 0) {
1168 1168 z0 = 0x80000000;
1169 1169 ez++;
1170 1170 } else if (z0 == 0x80000000) {
1171 1171 /* rounded up to smallest normal */
1172 1172 ez = 1;
1173 1173 if ((rm == FCW_RP && z2 >
1174 1174 0x80000000u) || (rm == FCW_RN &&
1175 1175 z2 >= 0xc0000000u))
1176 1176 /*
1177 1177 * would have rounded up to
1178 1178 * smallest normal even with
1179 1179 * unbounded range
1180 1180 */
1181 1181 tinyafter = 0;
1182 1182 }
1183 1183 }
1184 1184 }
1185 1185 }
1186 1186
1187 1187 /* restore the control and status words, check for over/underflow */
1188 1188 __fenv_setcwsw(&oldcwsw);
1189 1189 if (ez >= 0x7fff) {
1190 1190 if (rm == FCW_RN || rm == FCW_RP) {
1191 1191 zz.i[2] = sz | 0x7fff;
1192 1192 zz.i[1] = 0x80000000;
1193 1193 zz.i[0] = 0;
1194 1194 } else {
1195 1195 zz.i[2] = sz | 0x7ffe;
1196 1196 zz.i[1] = 0xffffffff;
1197 1197 zz.i[0] = 0xffffffff;
1198 1198 }
1199 1199 dummy = huge;
1200 1200 dummy *= huge;
1201 1201 } else {
1202 1202 zz.i[2] = sz | ez;
1203 1203 zz.i[1] = z0;
1204 1204 zz.i[0] = z1;
1205 1205
1206 1206 /*
1207 1207 * tinyafter => result rounded w/ unbounded range would be tiny,
1208 1208 * z2 nonzero => result delivered is inexact
1209 1209 */
1210 1210 if (tinyafter) {
1211 1211 dummy = tiny;
1212 1212 if (z2)
1213 1213 dummy *= tiny;
1214 1214 else
1215 1215 dummy -= tiny2;
1216 1216 } else if (z2) {
1217 1217 dummy = huge;
1218 1218 dummy += tiny;
1219 1219 }
1220 1220 }
1221 1221
1222 1222 return (zz.e);
1223 1223 }
1224 1224
1225 1225 #else
1226 1226 #error Unknown architecture
1227 1227 #endif
↓ open down ↓ |
1120 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX