Print this page
5262 libm needs to be carefully unifdef'd
5268 libm doesn't need to hide symbols which are already local
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libm/common/m9x/fmal.c
+++ new/usr/src/lib/libm/common/m9x/fmal.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
↓ open down ↓ |
19 lines elided |
↑ open up ↑ |
20 20 */
21 21
22 22 /*
23 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 27 * Use is subject to license terms.
28 28 */
29 29
30 -#if defined(ELFOBJ)
31 30 #pragma weak fmal = __fmal
32 -#endif
33 31
34 32 #include "libm.h"
35 33 #include "fma.h"
36 34 #include "fenv_inlines.h"
37 35
38 36 #if defined(__sparc)
39 37
40 38 static const union {
41 39 unsigned i[2];
42 40 double d;
43 41 } C[] = {
44 42 { 0x3fe00000u, 0 },
45 43 { 0x40000000u, 0 },
46 44 { 0x3ef00000u, 0 },
47 45 { 0x3e700000u, 0 },
48 46 { 0x41300000u, 0 },
49 47 { 0x3e300000u, 0 },
50 48 { 0x3b300000u, 0 },
51 49 { 0x38300000u, 0 },
52 50 { 0x42300000u, 0 },
53 51 { 0x3df00000u, 0 },
54 52 { 0x7fe00000u, 0 },
55 53 { 0x00100000u, 0 },
56 54 { 0x00100001u, 0 },
57 55 { 0, 0 },
58 56 { 0x7ff00000u, 0 },
59 57 { 0x7ff00001u, 0 }
60 58 };
61 59
62 60 #define half C[0].d
63 61 #define two C[1].d
64 62 #define twom16 C[2].d
65 63 #define twom24 C[3].d
66 64 #define two20 C[4].d
67 65 #define twom28 C[5].d
68 66 #define twom76 C[6].d
69 67 #define twom124 C[7].d
70 68 #define two36 C[8].d
71 69 #define twom32 C[9].d
72 70 #define huge C[10].d
73 71 #define tiny C[11].d
74 72 #define tiny2 C[12].d
75 73 #define zero C[13].d
76 74 #define inf C[14].d
77 75 #define snan C[15].d
78 76
79 77 static const unsigned int fsr_rm = 0xc0000000u;
80 78
81 79 /*
82 80 * fmal for SPARC: 128-bit quad precision, big-endian
83 81 */
84 82 long double
85 83 __fmal(long double x, long double y, long double z) {
86 84 union {
87 85 unsigned int i[4];
88 86 long double q;
89 87 } xx, yy, zz;
90 88 union {
91 89 unsigned int i[2];
92 90 double d;
93 91 } u;
94 92 double dx[5], dy[5], dxy[9], c, s;
95 93 unsigned int xy0, xy1, xy2, xy3, xy4, xy5, xy6, xy7;
96 94 unsigned int z0, z1, z2, z3, z4, z5, z6, z7;
97 95 unsigned int rm, sticky;
98 96 unsigned int fsr;
99 97 int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit;
100 98 int cx, cy, cz;
101 99 volatile double dummy;
102 100
103 101 /* extract the high order words of the arguments */
104 102 xx.q = x;
105 103 yy.q = y;
106 104 zz.q = z;
107 105 hx = xx.i[0] & ~0x80000000;
108 106 hy = yy.i[0] & ~0x80000000;
109 107 hz = zz.i[0] & ~0x80000000;
110 108
111 109 /*
112 110 * distinguish zero, finite nonzero, infinite, and quiet nan
113 111 * arguments; raise invalid and return for signaling nans
114 112 */
115 113 if (hx >= 0x7fff0000) {
116 114 if ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3]) {
117 115 if (!(hx & 0x8000)) {
118 116 /* signaling nan, raise invalid */
119 117 dummy = snan;
120 118 dummy += snan;
121 119 xx.i[0] |= 0x8000;
122 120 return (xx.q);
123 121 }
124 122 cx = 3; /* quiet nan */
125 123 } else
126 124 cx = 2; /* inf */
127 125 } else if (hx == 0) {
128 126 cx = (xx.i[1] | xx.i[2] | xx.i[3]) ? 1 : 0;
129 127 /* subnormal or zero */
130 128 } else
131 129 cx = 1; /* finite nonzero */
132 130
133 131 if (hy >= 0x7fff0000) {
134 132 if ((hy & 0xffff) | yy.i[1] | yy.i[2] | yy.i[3]) {
135 133 if (!(hy & 0x8000)) {
136 134 dummy = snan;
137 135 dummy += snan;
138 136 yy.i[0] |= 0x8000;
139 137 return (yy.q);
140 138 }
141 139 cy = 3;
142 140 } else
143 141 cy = 2;
144 142 } else if (hy == 0) {
145 143 cy = (yy.i[1] | yy.i[2] | yy.i[3]) ? 1 : 0;
146 144 } else
147 145 cy = 1;
148 146
149 147 if (hz >= 0x7fff0000) {
150 148 if ((hz & 0xffff) | zz.i[1] | zz.i[2] | zz.i[3]) {
151 149 if (!(hz & 0x8000)) {
152 150 dummy = snan;
153 151 dummy += snan;
154 152 zz.i[0] |= 0x8000;
155 153 return (zz.q);
156 154 }
157 155 cz = 3;
158 156 } else
159 157 cz = 2;
160 158 } else if (hz == 0) {
161 159 cz = (zz.i[1] | zz.i[2] | zz.i[3]) ? 1 : 0;
162 160 } else
163 161 cz = 1;
164 162
165 163 /* get the fsr and clear current exceptions */
166 164 __fenv_getfsr32(&fsr);
167 165 fsr &= ~FSR_CEXC;
168 166
169 167 /* handle all other zero, inf, and nan cases */
170 168 if (cx != 1 || cy != 1 || cz != 1) {
171 169 /* if x or y is a quiet nan, return it */
172 170 if (cx == 3) {
173 171 __fenv_setfsr32(&fsr);
174 172 return (x);
175 173 }
176 174 if (cy == 3) {
177 175 __fenv_setfsr32(&fsr);
178 176 return (y);
179 177 }
180 178
181 179 /* if x*y is 0*inf, raise invalid and return the default nan */
182 180 if ((cx == 0 && cy == 2) || (cx == 2 && cy == 0)) {
183 181 dummy = zero;
184 182 dummy *= inf;
185 183 zz.i[0] = 0x7fffffff;
186 184 zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff;
187 185 return (zz.q);
188 186 }
189 187
190 188 /* if z is a quiet nan, return it */
191 189 if (cz == 3) {
192 190 __fenv_setfsr32(&fsr);
193 191 return (z);
194 192 }
195 193
196 194 /*
197 195 * now none of x, y, or z is nan; handle cases where x or y
198 196 * is inf
199 197 */
200 198 if (cx == 2 || cy == 2) {
201 199 /*
202 200 * if z is also inf, either we have inf-inf or
203 201 * the result is the same as z depending on signs
204 202 */
205 203 if (cz == 2) {
206 204 if ((int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < 0) {
207 205 dummy = inf;
208 206 dummy -= inf;
209 207 zz.i[0] = 0x7fffffff;
210 208 zz.i[1] = zz.i[2] = zz.i[3] =
211 209 0xffffffff;
212 210 return (zz.q);
213 211 }
214 212 __fenv_setfsr32(&fsr);
215 213 return (z);
216 214 }
217 215
218 216 /* otherwise the result is inf with appropriate sign */
219 217 zz.i[0] = ((xx.i[0] ^ yy.i[0]) & 0x80000000) |
220 218 0x7fff0000;
221 219 zz.i[1] = zz.i[2] = zz.i[3] = 0;
222 220 __fenv_setfsr32(&fsr);
223 221 return (zz.q);
224 222 }
225 223
226 224 /* if z is inf, return it */
227 225 if (cz == 2) {
228 226 __fenv_setfsr32(&fsr);
229 227 return (z);
230 228 }
231 229
232 230 /*
233 231 * now x, y, and z are all finite; handle cases where x or y
234 232 * is zero
235 233 */
236 234 if (cx == 0 || cy == 0) {
237 235 /* either we have 0-0 or the result is the same as z */
238 236 if (cz == 0 && (int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) <
239 237 0) {
240 238 zz.i[0] = (fsr >> 30) == FSR_RM ? 0x80000000 :
241 239 0;
242 240 __fenv_setfsr32(&fsr);
243 241 return (zz.q);
244 242 }
245 243 __fenv_setfsr32(&fsr);
246 244 return (z);
247 245 }
248 246
249 247 /* if we get here, x and y are nonzero finite, z must be zero */
250 248 return (x * y);
251 249 }
252 250
253 251 /*
254 252 * now x, y, and z are all finite and nonzero; set round-to-
255 253 * negative-infinity mode
256 254 */
257 255 __fenv_setfsr32(&fsr_rm);
258 256
259 257 /*
260 258 * get the signs and exponents and normalize the significands
261 259 * of x and y
262 260 */
263 261 sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000;
264 262 ex = hx >> 16;
265 263 hx &= 0xffff;
266 264 if (!ex) {
267 265 if (hx | (xx.i[1] & 0xfffe0000)) {
268 266 ex = 1;
269 267 } else if (xx.i[1] | (xx.i[2] & 0xfffe0000)) {
270 268 hx = xx.i[1];
271 269 xx.i[1] = xx.i[2];
272 270 xx.i[2] = xx.i[3];
273 271 xx.i[3] = 0;
274 272 ex = -31;
275 273 } else if (xx.i[2] | (xx.i[3] & 0xfffe0000)) {
276 274 hx = xx.i[2];
277 275 xx.i[1] = xx.i[3];
278 276 xx.i[2] = xx.i[3] = 0;
279 277 ex = -63;
280 278 } else {
281 279 hx = xx.i[3];
282 280 xx.i[1] = xx.i[2] = xx.i[3] = 0;
283 281 ex = -95;
284 282 }
285 283 while ((hx & 0x10000) == 0) {
286 284 hx = (hx << 1) | (xx.i[1] >> 31);
287 285 xx.i[1] = (xx.i[1] << 1) | (xx.i[2] >> 31);
288 286 xx.i[2] = (xx.i[2] << 1) | (xx.i[3] >> 31);
289 287 xx.i[3] <<= 1;
290 288 ex--;
291 289 }
292 290 } else
293 291 hx |= 0x10000;
294 292 ey = hy >> 16;
295 293 hy &= 0xffff;
296 294 if (!ey) {
297 295 if (hy | (yy.i[1] & 0xfffe0000)) {
298 296 ey = 1;
299 297 } else if (yy.i[1] | (yy.i[2] & 0xfffe0000)) {
300 298 hy = yy.i[1];
301 299 yy.i[1] = yy.i[2];
302 300 yy.i[2] = yy.i[3];
303 301 yy.i[3] = 0;
304 302 ey = -31;
305 303 } else if (yy.i[2] | (yy.i[3] & 0xfffe0000)) {
306 304 hy = yy.i[2];
307 305 yy.i[1] = yy.i[3];
308 306 yy.i[2] = yy.i[3] = 0;
309 307 ey = -63;
310 308 } else {
311 309 hy = yy.i[3];
312 310 yy.i[1] = yy.i[2] = yy.i[3] = 0;
313 311 ey = -95;
314 312 }
315 313 while ((hy & 0x10000) == 0) {
316 314 hy = (hy << 1) | (yy.i[1] >> 31);
317 315 yy.i[1] = (yy.i[1] << 1) | (yy.i[2] >> 31);
318 316 yy.i[2] = (yy.i[2] << 1) | (yy.i[3] >> 31);
319 317 yy.i[3] <<= 1;
320 318 ey--;
321 319 }
322 320 } else
323 321 hy |= 0x10000;
324 322 exy = ex + ey - 0x3fff;
325 323
326 324 /* convert the significands of x and y to doubles */
327 325 c = twom16;
328 326 dx[0] = (double) ((int) hx) * c;
329 327 dy[0] = (double) ((int) hy) * c;
330 328
331 329 c *= twom24;
332 330 dx[1] = (double) ((int) (xx.i[1] >> 8)) * c;
333 331 dy[1] = (double) ((int) (yy.i[1] >> 8)) * c;
334 332
335 333 c *= twom24;
336 334 dx[2] = (double) ((int) (((xx.i[1] << 16) | (xx.i[2] >> 16)) &
337 335 0xffffff)) * c;
338 336 dy[2] = (double) ((int) (((yy.i[1] << 16) | (yy.i[2] >> 16)) &
339 337 0xffffff)) * c;
340 338
341 339 c *= twom24;
342 340 dx[3] = (double) ((int) (((xx.i[2] << 8) | (xx.i[3] >> 24)) &
343 341 0xffffff)) * c;
344 342 dy[3] = (double) ((int) (((yy.i[2] << 8) | (yy.i[3] >> 24)) &
345 343 0xffffff)) * c;
346 344
347 345 c *= twom24;
348 346 dx[4] = (double) ((int) (xx.i[3] & 0xffffff)) * c;
349 347 dy[4] = (double) ((int) (yy.i[3] & 0xffffff)) * c;
350 348
351 349 /* form the "digits" of the product */
352 350 dxy[0] = dx[0] * dy[0];
353 351 dxy[1] = dx[0] * dy[1] + dx[1] * dy[0];
354 352 dxy[2] = dx[0] * dy[2] + dx[1] * dy[1] + dx[2] * dy[0];
355 353 dxy[3] = dx[0] * dy[3] + dx[1] * dy[2] + dx[2] * dy[1] +
356 354 dx[3] * dy[0];
357 355 dxy[4] = dx[0] * dy[4] + dx[1] * dy[3] + dx[2] * dy[2] +
358 356 dx[3] * dy[1] + dx[4] * dy[0];
359 357 dxy[5] = dx[1] * dy[4] + dx[2] * dy[3] + dx[3] * dy[2] +
360 358 dx[4] * dy[1];
361 359 dxy[6] = dx[2] * dy[4] + dx[3] * dy[3] + dx[4] * dy[2];
362 360 dxy[7] = dx[3] * dy[4] + dx[4] * dy[3];
363 361 dxy[8] = dx[4] * dy[4];
364 362
365 363 /* split odd-numbered terms and combine into even-numbered terms */
366 364 c = (dxy[1] + two20) - two20;
367 365 dxy[0] += c;
368 366 dxy[1] -= c;
369 367 c = (dxy[3] + twom28) - twom28;
370 368 dxy[2] += c + dxy[1];
371 369 dxy[3] -= c;
372 370 c = (dxy[5] + twom76) - twom76;
373 371 dxy[4] += c + dxy[3];
374 372 dxy[5] -= c;
375 373 c = (dxy[7] + twom124) - twom124;
376 374 dxy[6] += c + dxy[5];
377 375 dxy[8] += (dxy[7] - c);
378 376
379 377 /* propagate carries, adjusting the exponent if need be */
380 378 dxy[7] = dxy[6] + dxy[8];
381 379 dxy[5] = dxy[4] + dxy[7];
382 380 dxy[3] = dxy[2] + dxy[5];
383 381 dxy[1] = dxy[0] + dxy[3];
384 382 if (dxy[1] >= two) {
385 383 dxy[0] *= half;
386 384 dxy[1] *= half;
387 385 dxy[2] *= half;
388 386 dxy[3] *= half;
389 387 dxy[4] *= half;
390 388 dxy[5] *= half;
391 389 dxy[6] *= half;
392 390 dxy[7] *= half;
393 391 dxy[8] *= half;
394 392 exy++;
395 393 }
396 394
397 395 /* extract the significand of x*y */
398 396 s = two36;
399 397 u.d = c = dxy[1] + s;
400 398 xy0 = u.i[1];
401 399 c -= s;
402 400 dxy[1] -= c;
403 401 dxy[0] -= c;
404 402
405 403 s *= twom32;
406 404 u.d = c = dxy[1] + s;
407 405 xy1 = u.i[1];
408 406 c -= s;
409 407 dxy[2] += (dxy[0] - c);
410 408 dxy[3] = dxy[2] + dxy[5];
411 409
412 410 s *= twom32;
413 411 u.d = c = dxy[3] + s;
414 412 xy2 = u.i[1];
415 413 c -= s;
416 414 dxy[4] += (dxy[2] - c);
417 415 dxy[5] = dxy[4] + dxy[7];
418 416
419 417 s *= twom32;
420 418 u.d = c = dxy[5] + s;
421 419 xy3 = u.i[1];
422 420 c -= s;
423 421 dxy[4] -= c;
424 422 dxy[5] = dxy[4] + dxy[7];
425 423
426 424 s *= twom32;
427 425 u.d = c = dxy[5] + s;
428 426 xy4 = u.i[1];
429 427 c -= s;
430 428 dxy[6] += (dxy[4] - c);
431 429 dxy[7] = dxy[6] + dxy[8];
432 430
433 431 s *= twom32;
434 432 u.d = c = dxy[7] + s;
435 433 xy5 = u.i[1];
436 434 c -= s;
437 435 dxy[8] += (dxy[6] - c);
438 436
439 437 s *= twom32;
440 438 u.d = c = dxy[8] + s;
441 439 xy6 = u.i[1];
442 440 c -= s;
443 441 dxy[8] -= c;
444 442
445 443 s *= twom32;
446 444 u.d = c = dxy[8] + s;
447 445 xy7 = u.i[1];
448 446
449 447 /* extract the sign, exponent, and significand of z */
450 448 sz = zz.i[0] & 0x80000000;
451 449 ez = hz >> 16;
452 450 z0 = hz & 0xffff;
453 451 if (!ez) {
454 452 if (z0 | (zz.i[1] & 0xfffe0000)) {
455 453 z1 = zz.i[1];
456 454 z2 = zz.i[2];
457 455 z3 = zz.i[3];
458 456 ez = 1;
459 457 } else if (zz.i[1] | (zz.i[2] & 0xfffe0000)) {
460 458 z0 = zz.i[1];
461 459 z1 = zz.i[2];
462 460 z2 = zz.i[3];
463 461 z3 = 0;
464 462 ez = -31;
465 463 } else if (zz.i[2] | (zz.i[3] & 0xfffe0000)) {
466 464 z0 = zz.i[2];
467 465 z1 = zz.i[3];
468 466 z2 = z3 = 0;
469 467 ez = -63;
470 468 } else {
471 469 z0 = zz.i[3];
472 470 z1 = z2 = z3 = 0;
473 471 ez = -95;
474 472 }
475 473 while ((z0 & 0x10000) == 0) {
476 474 z0 = (z0 << 1) | (z1 >> 31);
477 475 z1 = (z1 << 1) | (z2 >> 31);
478 476 z2 = (z2 << 1) | (z3 >> 31);
479 477 z3 <<= 1;
480 478 ez--;
481 479 }
482 480 } else {
483 481 z0 |= 0x10000;
484 482 z1 = zz.i[1];
485 483 z2 = zz.i[2];
486 484 z3 = zz.i[3];
487 485 }
488 486 z4 = z5 = z6 = z7 = 0;
489 487
490 488 /*
491 489 * now x*y is represented by sxy, exy, and xy[0-7], and z is
492 490 * represented likewise; swap if need be so |xy| <= |z|
493 491 */
494 492 if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 ||
495 493 (xy1 == z1 && (xy2 > z2 || (xy2 == z2 && (xy3 > z3 ||
496 494 (xy3 == z3 && (xy4 | xy5 | xy6 | xy7) != 0)))))))))) {
497 495 e = sxy; sxy = sz; sz = e;
498 496 e = exy; exy = ez; ez = e;
499 497 e = xy0; xy0 = z0; z0 = e;
500 498 e = xy1; xy1 = z1; z1 = e;
501 499 e = xy2; xy2 = z2; z2 = e;
502 500 e = xy3; xy3 = z3; z3 = e;
503 501 z4 = xy4; xy4 = 0;
504 502 z5 = xy5; xy5 = 0;
505 503 z6 = xy6; xy6 = 0;
506 504 z7 = xy7; xy7 = 0;
507 505 }
508 506
509 507 /* shift the significand of xy keeping a sticky bit */
510 508 e = ez - exy;
511 509 if (e > 236) {
512 510 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0;
513 511 xy7 = 1;
514 512 } else if (e >= 224) {
515 513 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | xy1 |
516 514 ((xy0 << 1) << (255 - e));
517 515 xy7 = xy0 >> (e - 224);
518 516 if (sticky)
519 517 xy7 |= 1;
520 518 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0;
521 519 } else if (e >= 192) {
522 520 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 |
523 521 ((xy1 << 1) << (223 - e));
524 522 xy7 = (xy1 >> (e - 192)) | ((xy0 << 1) << (223 - e));
525 523 if (sticky)
526 524 xy7 |= 1;
527 525 xy6 = xy0 >> (e - 192);
528 526 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = 0;
529 527 } else if (e >= 160) {
530 528 sticky = xy7 | xy6 | xy5 | xy4 | xy3 |
531 529 ((xy2 << 1) << (191 - e));
532 530 xy7 = (xy2 >> (e - 160)) | ((xy1 << 1) << (191 - e));
533 531 if (sticky)
534 532 xy7 |= 1;
535 533 xy6 = (xy1 >> (e - 160)) | ((xy0 << 1) << (191 - e));
536 534 xy5 = xy0 >> (e - 160);
537 535 xy0 = xy1 = xy2 = xy3 = xy4 = 0;
538 536 } else if (e >= 128) {
539 537 sticky = xy7 | xy6 | xy5 | xy4 | ((xy3 << 1) << (159 - e));
540 538 xy7 = (xy3 >> (e - 128)) | ((xy2 << 1) << (159 - e));
541 539 if (sticky)
542 540 xy7 |= 1;
543 541 xy6 = (xy2 >> (e - 128)) | ((xy1 << 1) << (159 - e));
544 542 xy5 = (xy1 >> (e - 128)) | ((xy0 << 1) << (159 - e));
545 543 xy4 = xy0 >> (e - 128);
546 544 xy0 = xy1 = xy2 = xy3 = 0;
547 545 } else if (e >= 96) {
548 546 sticky = xy7 | xy6 | xy5 | ((xy4 << 1) << (127 - e));
549 547 xy7 = (xy4 >> (e - 96)) | ((xy3 << 1) << (127 - e));
550 548 if (sticky)
551 549 xy7 |= 1;
552 550 xy6 = (xy3 >> (e - 96)) | ((xy2 << 1) << (127 - e));
553 551 xy5 = (xy2 >> (e - 96)) | ((xy1 << 1) << (127 - e));
554 552 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e));
555 553 xy3 = xy0 >> (e - 96);
556 554 xy0 = xy1 = xy2 = 0;
557 555 } else if (e >= 64) {
558 556 sticky = xy7 | xy6 | ((xy5 << 1) << (95 - e));
559 557 xy7 = (xy5 >> (e - 64)) | ((xy4 << 1) << (95 - e));
560 558 if (sticky)
561 559 xy7 |= 1;
562 560 xy6 = (xy4 >> (e - 64)) | ((xy3 << 1) << (95 - e));
563 561 xy5 = (xy3 >> (e - 64)) | ((xy2 << 1) << (95 - e));
564 562 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e));
565 563 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
566 564 xy2 = xy0 >> (e - 64);
567 565 xy0 = xy1 = 0;
568 566 } else if (e >= 32) {
569 567 sticky = xy7 | ((xy6 << 1) << (63 - e));
570 568 xy7 = (xy6 >> (e - 32)) | ((xy5 << 1) << (63 - e));
571 569 if (sticky)
572 570 xy7 |= 1;
573 571 xy6 = (xy5 >> (e - 32)) | ((xy4 << 1) << (63 - e));
574 572 xy5 = (xy4 >> (e - 32)) | ((xy3 << 1) << (63 - e));
575 573 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e));
576 574 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
577 575 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
578 576 xy1 = xy0 >> (e - 32);
579 577 xy0 = 0;
580 578 } else if (e) {
581 579 sticky = (xy7 << 1) << (31 - e);
582 580 xy7 = (xy7 >> e) | ((xy6 << 1) << (31 - e));
583 581 if (sticky)
584 582 xy7 |= 1;
585 583 xy6 = (xy6 >> e) | ((xy5 << 1) << (31 - e));
586 584 xy5 = (xy5 >> e) | ((xy4 << 1) << (31 - e));
587 585 xy4 = (xy4 >> e) | ((xy3 << 1) << (31 - e));
588 586 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
589 587 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
590 588 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
591 589 xy0 >>= e;
592 590 }
593 591
594 592 /* if this is a magnitude subtract, negate the significand of xy */
595 593 if (sxy ^ sz) {
596 594 xy0 = ~xy0;
597 595 xy1 = ~xy1;
598 596 xy2 = ~xy2;
599 597 xy3 = ~xy3;
600 598 xy4 = ~xy4;
601 599 xy5 = ~xy5;
602 600 xy6 = ~xy6;
603 601 xy7 = -xy7;
604 602 if (xy7 == 0)
605 603 if (++xy6 == 0)
606 604 if (++xy5 == 0)
607 605 if (++xy4 == 0)
608 606 if (++xy3 == 0)
609 607 if (++xy2 == 0)
610 608 if (++xy1 == 0)
611 609 xy0++;
612 610 }
613 611
614 612 /* add, propagating carries */
615 613 z7 += xy7;
616 614 e = (z7 < xy7);
617 615 z6 += xy6;
618 616 if (e) {
619 617 z6++;
620 618 e = (z6 <= xy6);
621 619 } else
622 620 e = (z6 < xy6);
623 621 z5 += xy5;
624 622 if (e) {
625 623 z5++;
626 624 e = (z5 <= xy5);
627 625 } else
628 626 e = (z5 < xy5);
629 627 z4 += xy4;
630 628 if (e) {
631 629 z4++;
632 630 e = (z4 <= xy4);
633 631 } else
634 632 e = (z4 < xy4);
635 633 z3 += xy3;
636 634 if (e) {
637 635 z3++;
638 636 e = (z3 <= xy3);
639 637 } else
640 638 e = (z3 < xy3);
641 639 z2 += xy2;
642 640 if (e) {
643 641 z2++;
644 642 e = (z2 <= xy2);
645 643 } else
646 644 e = (z2 < xy2);
647 645 z1 += xy1;
648 646 if (e) {
649 647 z1++;
650 648 e = (z1 <= xy1);
651 649 } else
652 650 e = (z1 < xy1);
653 651 z0 += xy0;
654 652 if (e)
655 653 z0++;
656 654
657 655 /* postnormalize and collect rounding information into z4 */
658 656 if (ez < 1) {
659 657 /* result is tiny; shift right until exponent is within range */
660 658 e = 1 - ez;
661 659 if (e > 116) {
662 660 z4 = 1; /* result can't be exactly zero */
663 661 z0 = z1 = z2 = z3 = 0;
664 662 } else if (e >= 96) {
665 663 sticky = z7 | z6 | z5 | z4 | z3 | z2 |
666 664 ((z1 << 1) << (127 - e));
667 665 z4 = (z1 >> (e - 96)) | ((z0 << 1) << (127 - e));
668 666 if (sticky)
669 667 z4 |= 1;
670 668 z3 = z0 >> (e - 96);
671 669 z0 = z1 = z2 = 0;
672 670 } else if (e >= 64) {
673 671 sticky = z7 | z6 | z5 | z4 | z3 |
674 672 ((z2 << 1) << (95 - e));
675 673 z4 = (z2 >> (e - 64)) | ((z1 << 1) << (95 - e));
676 674 if (sticky)
677 675 z4 |= 1;
678 676 z3 = (z1 >> (e - 64)) | ((z0 << 1) << (95 - e));
679 677 z2 = z0 >> (e - 64);
680 678 z0 = z1 = 0;
681 679 } else if (e >= 32) {
682 680 sticky = z7 | z6 | z5 | z4 | ((z3 << 1) << (63 - e));
683 681 z4 = (z3 >> (e - 32)) | ((z2 << 1) << (63 - e));
684 682 if (sticky)
685 683 z4 |= 1;
686 684 z3 = (z2 >> (e - 32)) | ((z1 << 1) << (63 - e));
687 685 z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
688 686 z1 = z0 >> (e - 32);
689 687 z0 = 0;
690 688 } else {
691 689 sticky = z7 | z6 | z5 | (z4 << 1) << (31 - e);
692 690 z4 = (z4 >> e) | ((z3 << 1) << (31 - e));
693 691 if (sticky)
694 692 z4 |= 1;
695 693 z3 = (z3 >> e) | ((z2 << 1) << (31 - e));
696 694 z2 = (z2 >> e) | ((z1 << 1) << (31 - e));
697 695 z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
698 696 z0 >>= e;
699 697 }
700 698 ez = 1;
701 699 } else if (z0 >= 0x20000) {
702 700 /* carry out; shift right by one */
703 701 sticky = (z4 & 1) | z5 | z6 | z7;
704 702 z4 = (z4 >> 1) | (z3 << 31);
705 703 if (sticky)
706 704 z4 |= 1;
707 705 z3 = (z3 >> 1) | (z2 << 31);
708 706 z2 = (z2 >> 1) | (z1 << 31);
709 707 z1 = (z1 >> 1) | (z0 << 31);
710 708 z0 >>= 1;
711 709 ez++;
712 710 } else {
713 711 if (z0 < 0x10000 && (z0 | z1 | z2 | z3 | z4 | z5 | z6 | z7)
714 712 != 0) {
715 713 /*
716 714 * borrow/cancellation; shift left as much as
717 715 * exponent allows
718 716 */
719 717 while (!(z0 | (z1 & 0xfffe0000)) && ez >= 33) {
720 718 z0 = z1;
721 719 z1 = z2;
722 720 z2 = z3;
723 721 z3 = z4;
724 722 z4 = z5;
725 723 z5 = z6;
726 724 z6 = z7;
727 725 z7 = 0;
728 726 ez -= 32;
729 727 }
730 728 while (z0 < 0x10000 && ez > 1) {
731 729 z0 = (z0 << 1) | (z1 >> 31);
732 730 z1 = (z1 << 1) | (z2 >> 31);
733 731 z2 = (z2 << 1) | (z3 >> 31);
734 732 z3 = (z3 << 1) | (z4 >> 31);
735 733 z4 = (z4 << 1) | (z5 >> 31);
736 734 z5 = (z5 << 1) | (z6 >> 31);
737 735 z6 = (z6 << 1) | (z7 >> 31);
738 736 z7 <<= 1;
739 737 ez--;
740 738 }
741 739 }
742 740 if (z5 | z6 | z7)
743 741 z4 |= 1;
744 742 }
745 743
746 744 /* get the rounding mode */
747 745 rm = fsr >> 30;
748 746
749 747 /* strip off the integer bit, if there is one */
750 748 ibit = z0 & 0x10000;
751 749 if (ibit)
752 750 z0 -= 0x10000;
753 751 else {
754 752 ez = 0;
755 753 if (!(z0 | z1 | z2 | z3 | z4)) { /* exact zero */
756 754 zz.i[0] = rm == FSR_RM ? 0x80000000 : 0;
757 755 zz.i[1] = zz.i[2] = zz.i[3] = 0;
758 756 __fenv_setfsr32(&fsr);
759 757 return (zz.q);
760 758 }
761 759 }
762 760
763 761 /*
764 762 * flip the sense of directed roundings if the result is negative;
765 763 * the logic below applies to a positive result
766 764 */
767 765 if (sz)
768 766 rm ^= rm >> 1;
769 767
770 768 /* round and raise exceptions */
771 769 if (z4) {
772 770 fsr |= FSR_NXC;
773 771
774 772 /* decide whether to round the fraction up */
775 773 if (rm == FSR_RP || (rm == FSR_RN && (z4 > 0x80000000u ||
776 774 (z4 == 0x80000000u && (z3 & 1))))) {
777 775 /* round up and renormalize if necessary */
778 776 if (++z3 == 0)
779 777 if (++z2 == 0)
780 778 if (++z1 == 0)
781 779 if (++z0 == 0x10000) {
782 780 z0 = 0;
783 781 ez++;
784 782 }
785 783 }
786 784 }
787 785
788 786 /* check for under/overflow */
789 787 if (ez >= 0x7fff) {
790 788 if (rm == FSR_RN || rm == FSR_RP) {
791 789 zz.i[0] = sz | 0x7fff0000;
792 790 zz.i[1] = zz.i[2] = zz.i[3] = 0;
793 791 } else {
794 792 zz.i[0] = sz | 0x7ffeffff;
795 793 zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff;
796 794 }
797 795 fsr |= FSR_OFC | FSR_NXC;
798 796 } else {
799 797 zz.i[0] = sz | (ez << 16) | z0;
800 798 zz.i[1] = z1;
801 799 zz.i[2] = z2;
802 800 zz.i[3] = z3;
803 801
804 802 /*
805 803 * !ibit => exact result was tiny before rounding,
806 804 * z4 nonzero => result delivered is inexact
807 805 */
808 806 if (!ibit) {
809 807 if (z4)
810 808 fsr |= FSR_UFC | FSR_NXC;
811 809 else if (fsr & FSR_UFM)
812 810 fsr |= FSR_UFC;
813 811 }
814 812 }
815 813
816 814 /* restore the fsr and emulate exceptions as needed */
817 815 if ((fsr & FSR_CEXC) & (fsr >> 23)) {
818 816 __fenv_setfsr32(&fsr);
819 817 if (fsr & FSR_OFC) {
820 818 dummy = huge;
821 819 dummy *= huge;
822 820 } else if (fsr & FSR_UFC) {
823 821 dummy = tiny;
824 822 if (fsr & FSR_NXC)
825 823 dummy *= tiny;
826 824 else
827 825 dummy -= tiny2;
828 826 } else {
829 827 dummy = huge;
830 828 dummy += tiny;
831 829 }
832 830 } else {
833 831 fsr |= (fsr & 0x1f) << 5;
834 832 __fenv_setfsr32(&fsr);
835 833 }
836 834 return (zz.q);
837 835 }
838 836
839 837 #elif defined(__x86)
840 838
841 839 static const union {
842 840 unsigned i[2];
843 841 double d;
844 842 } C[] = {
845 843 { 0, 0x3fe00000u },
846 844 { 0, 0x40000000u },
847 845 { 0, 0x3df00000u },
848 846 { 0, 0x3bf00000u },
849 847 { 0, 0x41f00000u },
850 848 { 0, 0x43e00000u },
851 849 { 0, 0x7fe00000u },
852 850 { 0, 0x00100000u },
853 851 { 0, 0x00100001u }
854 852 };
855 853
856 854 #define half C[0].d
857 855 #define two C[1].d
858 856 #define twom32 C[2].d
859 857 #define twom64 C[3].d
860 858 #define two32 C[4].d
861 859 #define two63 C[5].d
862 860 #define huge C[6].d
863 861 #define tiny C[7].d
864 862 #define tiny2 C[8].d
865 863
866 864 #if defined(__amd64)
867 865 #define NI 4
868 866 #else
869 867 #define NI 3
870 868 #endif
871 869
872 870 /*
873 871 * fmal for x86: 80-bit extended double precision, little-endian
874 872 */
875 873 long double
876 874 __fmal(long double x, long double y, long double z) {
877 875 union {
878 876 unsigned i[NI];
879 877 long double e;
880 878 } xx, yy, zz;
881 879 long double xhi, yhi, xlo, ylo, t;
882 880 unsigned xy0, xy1, xy2, xy3, xy4, z0, z1, z2, z3, z4;
883 881 unsigned oldcwsw, cwsw, rm, sticky, carry;
884 882 int ex, ey, ez, exy, sxy, sz, e, tinyafter;
885 883 volatile double dummy;
886 884
887 885 /* extract the exponents of the arguments */
888 886 xx.e = x;
889 887 yy.e = y;
890 888 zz.e = z;
891 889 ex = xx.i[2] & 0x7fff;
892 890 ey = yy.i[2] & 0x7fff;
893 891 ez = zz.i[2] & 0x7fff;
894 892
895 893 /* dispense with inf, nan, and zero cases */
896 894 if (ex == 0x7fff || ey == 0x7fff || (ex | xx.i[1] | xx.i[0]) == 0 ||
897 895 (ey | yy.i[1] | yy.i[0]) == 0) /* x or y is inf, nan, or 0 */
898 896 return (x * y + z);
899 897
900 898 if (ez == 0x7fff) /* z is inf or nan */
901 899 return (x + z); /* avoid spurious under/overflow in x * y */
902 900
903 901 if ((ez | zz.i[1] | zz.i[0]) == 0) /* z is zero */
904 902 /*
905 903 * x * y isn't zero but could underflow to zero,
906 904 * so don't add z, lest we perturb the sign
907 905 */
908 906 return (x * y);
909 907
910 908 /*
911 909 * now x, y, and z are all finite and nonzero; extract signs and
912 910 * normalize the significands (this will raise the denormal operand
913 911 * exception if need be)
914 912 */
915 913 sxy = (xx.i[2] ^ yy.i[2]) & 0x8000;
916 914 sz = zz.i[2] & 0x8000;
917 915 if (!ex) {
918 916 xx.e = x * two63;
919 917 ex = (xx.i[2] & 0x7fff) - 63;
920 918 }
921 919 if (!ey) {
922 920 yy.e = y * two63;
923 921 ey = (yy.i[2] & 0x7fff) - 63;
924 922 }
925 923 if (!ez) {
926 924 zz.e = z * two63;
927 925 ez = (zz.i[2] & 0x7fff) - 63;
928 926 }
929 927
930 928 /*
931 929 * save the control and status words, mask all exceptions, and
932 930 * set rounding to 64-bit precision and toward-zero
933 931 */
934 932 __fenv_getcwsw(&oldcwsw);
935 933 cwsw = (oldcwsw & 0xf0c0ffff) | 0x0f3f0000;
936 934 __fenv_setcwsw(&cwsw);
937 935
938 936 /* multiply x*y to 128 bits */
939 937 exy = ex + ey - 0x3fff;
940 938 xx.i[2] = 0x3fff;
941 939 yy.i[2] = 0x3fff;
942 940 x = xx.e;
943 941 y = yy.e;
944 942 xhi = ((x + twom32) + two32) - two32;
945 943 yhi = ((y + twom32) + two32) - two32;
946 944 xlo = x - xhi;
947 945 ylo = y - yhi;
948 946 x *= y;
949 947 y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo;
950 948 if (x >= two) {
951 949 x *= half;
952 950 y *= half;
953 951 exy++;
954 952 }
955 953
956 954 /* extract the significands */
957 955 xx.e = x;
958 956 xy0 = xx.i[1];
959 957 xy1 = xx.i[0];
960 958 yy.e = t = y + twom32;
961 959 xy2 = yy.i[0];
962 960 yy.e = (y - (t - twom32)) + twom64;
963 961 xy3 = yy.i[0];
964 962 xy4 = 0;
965 963 z0 = zz.i[1];
966 964 z1 = zz.i[0];
967 965 z2 = z3 = z4 = 0;
968 966
969 967 /*
970 968 * now x*y is represented by sxy, exy, and xy[0-4], and z is
971 969 * represented likewise; swap if need be so |xy| <= |z|
972 970 */
973 971 if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 &&
974 972 (xy1 > z1 || (xy1 == z1 && (xy2 | xy3) != 0)))))) {
975 973 e = sxy; sxy = sz; sz = e;
976 974 e = exy; exy = ez; ez = e;
977 975 e = xy0; xy0 = z0; z0 = e;
978 976 e = xy1; xy1 = z1; z1 = e;
979 977 z2 = xy2; xy2 = 0;
980 978 z3 = xy3; xy3 = 0;
981 979 }
982 980
983 981 /* shift the significand of xy keeping a sticky bit */
984 982 e = ez - exy;
985 983 if (e > 130) {
986 984 xy0 = xy1 = xy2 = xy3 = 0;
987 985 xy4 = 1;
988 986 } else if (e >= 128) {
989 987 sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (159 - e));
990 988 xy4 = xy0 >> (e - 128);
991 989 if (sticky)
992 990 xy4 |= 1;
993 991 xy0 = xy1 = xy2 = xy3 = 0;
994 992 } else if (e >= 96) {
995 993 sticky = xy3 | xy2 | ((xy1 << 1) << (127 - e));
996 994 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e));
997 995 if (sticky)
998 996 xy4 |= 1;
999 997 xy3 = xy0 >> (e - 96);
1000 998 xy0 = xy1 = xy2 = 0;
1001 999 } else if (e >= 64) {
1002 1000 sticky = xy3 | ((xy2 << 1) << (95 - e));
1003 1001 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e));
1004 1002 if (sticky)
1005 1003 xy4 |= 1;
1006 1004 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
1007 1005 xy2 = xy0 >> (e - 64);
1008 1006 xy0 = xy1 = 0;
1009 1007 } else if (e >= 32) {
1010 1008 sticky = (xy3 << 1) << (63 - e);
1011 1009 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e));
1012 1010 if (sticky)
1013 1011 xy4 |= 1;
1014 1012 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
1015 1013 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
1016 1014 xy1 = xy0 >> (e - 32);
1017 1015 xy0 = 0;
1018 1016 } else if (e) {
1019 1017 xy4 = (xy3 << 1) << (31 - e);
1020 1018 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
1021 1019 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
1022 1020 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
1023 1021 xy0 >>= e;
1024 1022 }
1025 1023
1026 1024 /* if this is a magnitude subtract, negate the significand of xy */
1027 1025 if (sxy ^ sz) {
1028 1026 xy0 = ~xy0;
1029 1027 xy1 = ~xy1;
1030 1028 xy2 = ~xy2;
1031 1029 xy3 = ~xy3;
1032 1030 xy4 = -xy4;
1033 1031 if (xy4 == 0)
1034 1032 if (++xy3 == 0)
1035 1033 if (++xy2 == 0)
1036 1034 if (++xy1 == 0)
1037 1035 xy0++;
1038 1036 }
1039 1037
1040 1038 /* add, propagating carries */
1041 1039 z4 += xy4;
1042 1040 carry = (z4 < xy4);
1043 1041 z3 += xy3;
1044 1042 if (carry) {
1045 1043 z3++;
1046 1044 carry = (z3 <= xy3);
1047 1045 } else
1048 1046 carry = (z3 < xy3);
1049 1047 z2 += xy2;
1050 1048 if (carry) {
1051 1049 z2++;
1052 1050 carry = (z2 <= xy2);
1053 1051 } else
1054 1052 carry = (z2 < xy2);
1055 1053 z1 += xy1;
1056 1054 if (carry) {
1057 1055 z1++;
1058 1056 carry = (z1 <= xy1);
1059 1057 } else
1060 1058 carry = (z1 < xy1);
1061 1059 z0 += xy0;
1062 1060 if (carry) {
1063 1061 z0++;
1064 1062 carry = (z0 <= xy0);
1065 1063 } else
1066 1064 carry = (z0 < xy0);
1067 1065
1068 1066 /* for a magnitude subtract, ignore the last carry out */
1069 1067 if (sxy ^ sz)
1070 1068 carry = 0;
1071 1069
1072 1070 /* postnormalize and collect rounding information into z2 */
1073 1071 if (ez < 1) {
1074 1072 /* result is tiny; shift right until exponent is within range */
1075 1073 e = 1 - ez;
1076 1074 if (e > 67) {
1077 1075 z2 = 1; /* result can't be exactly zero */
1078 1076 z0 = z1 = 0;
1079 1077 } else if (e >= 64) {
1080 1078 sticky = z4 | z3 | z2 | z1 | ((z0 << 1) << (95 - e));
1081 1079 z2 = (z0 >> (e - 64)) | ((carry << 1) << (95 - e));
1082 1080 if (sticky)
1083 1081 z2 |= 1;
1084 1082 z1 = carry >> (e - 64);
1085 1083 z0 = 0;
1086 1084 } else if (e >= 32) {
1087 1085 sticky = z4 | z3 | z2 | ((z1 << 1) << (63 - e));
1088 1086 z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
1089 1087 if (sticky)
1090 1088 z2 |= 1;
1091 1089 z1 = (z0 >> (e - 32)) | ((carry << 1) << (63 - e));
1092 1090 z0 = carry >> (e - 32);
1093 1091 } else {
1094 1092 sticky = z4 | z3 | (z2 << 1) << (31 - e);
1095 1093 z2 = (z2 >> e) | ((z1 << 1) << (31 - e));
1096 1094 if (sticky)
1097 1095 z2 |= 1;
1098 1096 z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
1099 1097 z0 = (z0 >> e) | ((carry << 1) << (31 - e));
1100 1098 }
1101 1099 ez = 1;
1102 1100 } else if (carry) {
1103 1101 /* carry out; shift right by one */
1104 1102 sticky = (z2 & 1) | z3 | z4;
1105 1103 z2 = (z2 >> 1) | (z1 << 31);
1106 1104 if (sticky)
1107 1105 z2 |= 1;
1108 1106 z1 = (z1 >> 1) | (z0 << 31);
1109 1107 z0 = (z0 >> 1) | 0x80000000;
1110 1108 ez++;
1111 1109 } else {
1112 1110 if (z0 < 0x80000000u && (z0 | z1 | z2 | z3 | z4) != 0) {
1113 1111 /*
1114 1112 * borrow/cancellation; shift left as much as
1115 1113 * exponent allows
1116 1114 */
1117 1115 while (!z0 && ez >= 33) {
1118 1116 z0 = z1;
1119 1117 z1 = z2;
1120 1118 z2 = z3;
1121 1119 z3 = z4;
1122 1120 z4 = 0;
1123 1121 ez -= 32;
1124 1122 }
1125 1123 while (z0 < 0x80000000u && ez > 1) {
1126 1124 z0 = (z0 << 1) | (z1 >> 31);
1127 1125 z1 = (z1 << 1) | (z2 >> 31);
1128 1126 z2 = (z2 << 1) | (z3 >> 31);
1129 1127 z3 = (z3 << 1) | (z4 >> 31);
1130 1128 z4 <<= 1;
1131 1129 ez--;
1132 1130 }
1133 1131 }
1134 1132 if (z3 | z4)
1135 1133 z2 |= 1;
1136 1134 }
1137 1135
1138 1136 /* get the rounding mode */
1139 1137 rm = oldcwsw & 0x0c000000;
1140 1138
1141 1139 /* adjust exponent if result is subnormal */
1142 1140 tinyafter = 0;
1143 1141 if (!(z0 & 0x80000000)) {
1144 1142 ez = 0;
1145 1143 tinyafter = 1;
1146 1144 if (!(z0 | z1 | z2)) { /* exact zero */
1147 1145 zz.i[2] = rm == FCW_RM ? 0x8000 : 0;
1148 1146 zz.i[1] = zz.i[0] = 0;
1149 1147 __fenv_setcwsw(&oldcwsw);
1150 1148 return (zz.e);
1151 1149 }
1152 1150 }
1153 1151
1154 1152 /*
1155 1153 * flip the sense of directed roundings if the result is negative;
1156 1154 * the logic below applies to a positive result
1157 1155 */
1158 1156 if (sz && (rm == FCW_RM || rm == FCW_RP))
1159 1157 rm = (FCW_RM + FCW_RP) - rm;
1160 1158
1161 1159 /* round */
1162 1160 if (z2) {
1163 1161 if (rm == FCW_RP || (rm == FCW_RN && (z2 > 0x80000000u ||
1164 1162 (z2 == 0x80000000u && (z1 & 1))))) {
1165 1163 /* round up and renormalize if necessary */
1166 1164 if (++z1 == 0) {
1167 1165 if (++z0 == 0) {
1168 1166 z0 = 0x80000000;
1169 1167 ez++;
1170 1168 } else if (z0 == 0x80000000) {
1171 1169 /* rounded up to smallest normal */
1172 1170 ez = 1;
1173 1171 if ((rm == FCW_RP && z2 >
1174 1172 0x80000000u) || (rm == FCW_RN &&
1175 1173 z2 >= 0xc0000000u))
1176 1174 /*
1177 1175 * would have rounded up to
1178 1176 * smallest normal even with
1179 1177 * unbounded range
1180 1178 */
1181 1179 tinyafter = 0;
1182 1180 }
1183 1181 }
1184 1182 }
1185 1183 }
1186 1184
1187 1185 /* restore the control and status words, check for over/underflow */
1188 1186 __fenv_setcwsw(&oldcwsw);
1189 1187 if (ez >= 0x7fff) {
1190 1188 if (rm == FCW_RN || rm == FCW_RP) {
1191 1189 zz.i[2] = sz | 0x7fff;
1192 1190 zz.i[1] = 0x80000000;
1193 1191 zz.i[0] = 0;
1194 1192 } else {
1195 1193 zz.i[2] = sz | 0x7ffe;
1196 1194 zz.i[1] = 0xffffffff;
1197 1195 zz.i[0] = 0xffffffff;
1198 1196 }
1199 1197 dummy = huge;
1200 1198 dummy *= huge;
1201 1199 } else {
1202 1200 zz.i[2] = sz | ez;
1203 1201 zz.i[1] = z0;
1204 1202 zz.i[0] = z1;
1205 1203
1206 1204 /*
1207 1205 * tinyafter => result rounded w/ unbounded range would be tiny,
1208 1206 * z2 nonzero => result delivered is inexact
1209 1207 */
1210 1208 if (tinyafter) {
1211 1209 dummy = tiny;
1212 1210 if (z2)
1213 1211 dummy *= tiny;
1214 1212 else
1215 1213 dummy -= tiny2;
1216 1214 } else if (z2) {
1217 1215 dummy = huge;
1218 1216 dummy += tiny;
1219 1217 }
1220 1218 }
1221 1219
1222 1220 return (zz.e);
1223 1221 }
1224 1222
1225 1223 #else
1226 1224 #error Unknown architecture
1227 1225 #endif
↓ open down ↓ |
1185 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX