5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #pragma weak fmal = __fmal
31
32 #include "libm.h"
33 #include "fma.h"
34 #include "fenv_inlines.h"
35
36 #if defined(__sparc)
37
38 static const union {
39 unsigned i[2];
40 double d;
41 } C[] = {
42 { 0x3fe00000u, 0 },
43 { 0x40000000u, 0 },
44 { 0x3ef00000u, 0 },
45 { 0x3e700000u, 0 },
46 { 0x41300000u, 0 },
47 { 0x3e300000u, 0 },
48 { 0x3b300000u, 0 },
49 { 0x38300000u, 0 },
50 { 0x42300000u, 0 },
51 { 0x3df00000u, 0 },
52 { 0x7fe00000u, 0 },
53 { 0x00100000u, 0 },
54 { 0x00100001u, 0 },
55 { 0, 0 },
56 { 0x7ff00000u, 0 },
57 { 0x7ff00001u, 0 }
63 #define twom24 C[3].d
64 #define two20 C[4].d
65 #define twom28 C[5].d
66 #define twom76 C[6].d
67 #define twom124 C[7].d
68 #define two36 C[8].d
69 #define twom32 C[9].d
70 #define huge C[10].d
71 #define tiny C[11].d
72 #define tiny2 C[12].d
73 #define zero C[13].d
74 #define inf C[14].d
75 #define snan C[15].d
76
77 static const unsigned int fsr_rm = 0xc0000000u;
78
79 /*
80 * fmal for SPARC: 128-bit quad precision, big-endian
81 */
82 long double
83 __fmal(long double x, long double y, long double z) {
84 union {
85 unsigned int i[4];
86 long double q;
87 } xx, yy, zz;
88 union {
89 unsigned int i[2];
90 double d;
91 } u;
92 double dx[5], dy[5], dxy[9], c, s;
93 unsigned int xy0, xy1, xy2, xy3, xy4, xy5, xy6, xy7;
94 unsigned int z0, z1, z2, z3, z4, z5, z6, z7;
95 unsigned int rm, sticky;
96 unsigned int fsr;
97 int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit;
98 int cx, cy, cz;
99 volatile double dummy;
100
101 /* extract the high order words of the arguments */
102 xx.q = x;
103 yy.q = y;
104 zz.q = z;
105 hx = xx.i[0] & ~0x80000000;
106 hy = yy.i[0] & ~0x80000000;
107 hz = zz.i[0] & ~0x80000000;
108
109 /*
110 * distinguish zero, finite nonzero, infinite, and quiet nan
111 * arguments; raise invalid and return for signaling nans
112 */
113 if (hx >= 0x7fff0000) {
114 if ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3]) {
115 if (!(hx & 0x8000)) {
116 /* signaling nan, raise invalid */
117 dummy = snan;
118 dummy += snan;
119 xx.i[0] |= 0x8000;
120 return (xx.q);
121 }
122 cx = 3; /* quiet nan */
123 } else
124 cx = 2; /* inf */
125 } else if (hx == 0) {
126 cx = (xx.i[1] | xx.i[2] | xx.i[3]) ? 1 : 0;
127 /* subnormal or zero */
128 } else
129 cx = 1; /* finite nonzero */
130
131 if (hy >= 0x7fff0000) {
132 if ((hy & 0xffff) | yy.i[1] | yy.i[2] | yy.i[3]) {
133 if (!(hy & 0x8000)) {
134 dummy = snan;
135 dummy += snan;
136 yy.i[0] |= 0x8000;
137 return (yy.q);
138 }
139 cy = 3;
140 } else
141 cy = 2;
142 } else if (hy == 0) {
143 cy = (yy.i[1] | yy.i[2] | yy.i[3]) ? 1 : 0;
144 } else
145 cy = 1;
146
147 if (hz >= 0x7fff0000) {
148 if ((hz & 0xffff) | zz.i[1] | zz.i[2] | zz.i[3]) {
149 if (!(hz & 0x8000)) {
150 dummy = snan;
151 dummy += snan;
152 zz.i[0] |= 0x8000;
153 return (zz.q);
154 }
155 cz = 3;
156 } else
157 cz = 2;
158 } else if (hz == 0) {
159 cz = (zz.i[1] | zz.i[2] | zz.i[3]) ? 1 : 0;
160 } else
161 cz = 1;
162
163 /* get the fsr and clear current exceptions */
164 __fenv_getfsr32(&fsr);
165 fsr &= ~FSR_CEXC;
166
167 /* handle all other zero, inf, and nan cases */
168 if (cx != 1 || cy != 1 || cz != 1) {
169 /* if x or y is a quiet nan, return it */
170 if (cx == 3) {
171 __fenv_setfsr32(&fsr);
172 return (x);
173 }
174 if (cy == 3) {
175 __fenv_setfsr32(&fsr);
176 return (y);
177 }
178
179 /* if x*y is 0*inf, raise invalid and return the default nan */
180 if ((cx == 0 && cy == 2) || (cx == 2 && cy == 0)) {
181 dummy = zero;
182 dummy *= inf;
183 zz.i[0] = 0x7fffffff;
184 zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff;
185 return (zz.q);
186 }
187
188 /* if z is a quiet nan, return it */
189 if (cz == 3) {
190 __fenv_setfsr32(&fsr);
191 return (z);
192 }
193
194 /*
195 * now none of x, y, or z is nan; handle cases where x or y
196 * is inf
197 */
198 if (cx == 2 || cy == 2) {
199 /*
200 * if z is also inf, either we have inf-inf or
201 * the result is the same as z depending on signs
202 */
203 if (cz == 2) {
204 if ((int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < 0) {
205 dummy = inf;
206 dummy -= inf;
207 zz.i[0] = 0x7fffffff;
208 zz.i[1] = zz.i[2] = zz.i[3] =
209 0xffffffff;
210 return (zz.q);
211 }
212 __fenv_setfsr32(&fsr);
213 return (z);
214 }
215
216 /* otherwise the result is inf with appropriate sign */
217 zz.i[0] = ((xx.i[0] ^ yy.i[0]) & 0x80000000) |
218 0x7fff0000;
219 zz.i[1] = zz.i[2] = zz.i[3] = 0;
220 __fenv_setfsr32(&fsr);
221 return (zz.q);
222 }
223
224 /* if z is inf, return it */
225 if (cz == 2) {
226 __fenv_setfsr32(&fsr);
227 return (z);
228 }
229
230 /*
231 * now x, y, and z are all finite; handle cases where x or y
232 * is zero
233 */
234 if (cx == 0 || cy == 0) {
235 /* either we have 0-0 or the result is the same as z */
236 if (cz == 0 && (int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) <
237 0) {
238 zz.i[0] = (fsr >> 30) == FSR_RM ? 0x80000000 :
239 0;
240 __fenv_setfsr32(&fsr);
241 return (zz.q);
242 }
243 __fenv_setfsr32(&fsr);
244 return (z);
245 }
246
247 /* if we get here, x and y are nonzero finite, z must be zero */
248 return (x * y);
249 }
250
251 /*
252 * now x, y, and z are all finite and nonzero; set round-to-
253 * negative-infinity mode
254 */
255 __fenv_setfsr32(&fsr_rm);
256
257 /*
258 * get the signs and exponents and normalize the significands
259 * of x and y
260 */
261 sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000;
262 ex = hx >> 16;
263 hx &= 0xffff;
264 if (!ex) {
265 if (hx | (xx.i[1] & 0xfffe0000)) {
266 ex = 1;
267 } else if (xx.i[1] | (xx.i[2] & 0xfffe0000)) {
268 hx = xx.i[1];
269 xx.i[1] = xx.i[2];
270 xx.i[2] = xx.i[3];
271 xx.i[3] = 0;
272 ex = -31;
273 } else if (xx.i[2] | (xx.i[3] & 0xfffe0000)) {
274 hx = xx.i[2];
275 xx.i[1] = xx.i[3];
276 xx.i[2] = xx.i[3] = 0;
277 ex = -63;
278 } else {
279 hx = xx.i[3];
280 xx.i[1] = xx.i[2] = xx.i[3] = 0;
281 ex = -95;
282 }
283 while ((hx & 0x10000) == 0) {
284 hx = (hx << 1) | (xx.i[1] >> 31);
285 xx.i[1] = (xx.i[1] << 1) | (xx.i[2] >> 31);
286 xx.i[2] = (xx.i[2] << 1) | (xx.i[3] >> 31);
287 xx.i[3] <<= 1;
288 ex--;
289 }
290 } else
291 hx |= 0x10000;
292 ey = hy >> 16;
293 hy &= 0xffff;
294 if (!ey) {
295 if (hy | (yy.i[1] & 0xfffe0000)) {
296 ey = 1;
297 } else if (yy.i[1] | (yy.i[2] & 0xfffe0000)) {
298 hy = yy.i[1];
299 yy.i[1] = yy.i[2];
300 yy.i[2] = yy.i[3];
301 yy.i[3] = 0;
302 ey = -31;
303 } else if (yy.i[2] | (yy.i[3] & 0xfffe0000)) {
304 hy = yy.i[2];
305 yy.i[1] = yy.i[3];
306 yy.i[2] = yy.i[3] = 0;
307 ey = -63;
308 } else {
309 hy = yy.i[3];
310 yy.i[1] = yy.i[2] = yy.i[3] = 0;
311 ey = -95;
312 }
313 while ((hy & 0x10000) == 0) {
314 hy = (hy << 1) | (yy.i[1] >> 31);
315 yy.i[1] = (yy.i[1] << 1) | (yy.i[2] >> 31);
316 yy.i[2] = (yy.i[2] << 1) | (yy.i[3] >> 31);
317 yy.i[3] <<= 1;
318 ey--;
319 }
320 } else
321 hy |= 0x10000;
322 exy = ex + ey - 0x3fff;
323
324 /* convert the significands of x and y to doubles */
325 c = twom16;
326 dx[0] = (double) ((int) hx) * c;
327 dy[0] = (double) ((int) hy) * c;
328
329 c *= twom24;
330 dx[1] = (double) ((int) (xx.i[1] >> 8)) * c;
331 dy[1] = (double) ((int) (yy.i[1] >> 8)) * c;
332
333 c *= twom24;
334 dx[2] = (double) ((int) (((xx.i[1] << 16) | (xx.i[2] >> 16)) &
335 0xffffff)) * c;
336 dy[2] = (double) ((int) (((yy.i[1] << 16) | (yy.i[2] >> 16)) &
337 0xffffff)) * c;
338
339 c *= twom24;
340 dx[3] = (double) ((int) (((xx.i[2] << 8) | (xx.i[3] >> 24)) &
341 0xffffff)) * c;
342 dy[3] = (double) ((int) (((yy.i[2] << 8) | (yy.i[3] >> 24)) &
343 0xffffff)) * c;
344
345 c *= twom24;
346 dx[4] = (double) ((int) (xx.i[3] & 0xffffff)) * c;
347 dy[4] = (double) ((int) (yy.i[3] & 0xffffff)) * c;
348
349 /* form the "digits" of the product */
350 dxy[0] = dx[0] * dy[0];
351 dxy[1] = dx[0] * dy[1] + dx[1] * dy[0];
352 dxy[2] = dx[0] * dy[2] + dx[1] * dy[1] + dx[2] * dy[0];
353 dxy[3] = dx[0] * dy[3] + dx[1] * dy[2] + dx[2] * dy[1] +
354 dx[3] * dy[0];
355 dxy[4] = dx[0] * dy[4] + dx[1] * dy[3] + dx[2] * dy[2] +
356 dx[3] * dy[1] + dx[4] * dy[0];
357 dxy[5] = dx[1] * dy[4] + dx[2] * dy[3] + dx[3] * dy[2] +
358 dx[4] * dy[1];
359 dxy[6] = dx[2] * dy[4] + dx[3] * dy[3] + dx[4] * dy[2];
360 dxy[7] = dx[3] * dy[4] + dx[4] * dy[3];
361 dxy[8] = dx[4] * dy[4];
362
363 /* split odd-numbered terms and combine into even-numbered terms */
364 c = (dxy[1] + two20) - two20;
365 dxy[0] += c;
366 dxy[1] -= c;
367 c = (dxy[3] + twom28) - twom28;
368 dxy[2] += c + dxy[1];
369 dxy[3] -= c;
370 c = (dxy[5] + twom76) - twom76;
371 dxy[4] += c + dxy[3];
372 dxy[5] -= c;
373 c = (dxy[7] + twom124) - twom124;
374 dxy[6] += c + dxy[5];
375 dxy[8] += (dxy[7] - c);
376
377 /* propagate carries, adjusting the exponent if need be */
378 dxy[7] = dxy[6] + dxy[8];
379 dxy[5] = dxy[4] + dxy[7];
380 dxy[3] = dxy[2] + dxy[5];
381 dxy[1] = dxy[0] + dxy[3];
382 if (dxy[1] >= two) {
383 dxy[0] *= half;
384 dxy[1] *= half;
385 dxy[2] *= half;
386 dxy[3] *= half;
387 dxy[4] *= half;
388 dxy[5] *= half;
389 dxy[6] *= half;
390 dxy[7] *= half;
391 dxy[8] *= half;
392 exy++;
393 }
394
395 /* extract the significand of x*y */
396 s = two36;
397 u.d = c = dxy[1] + s;
398 xy0 = u.i[1];
399 c -= s;
400 dxy[1] -= c;
401 dxy[0] -= c;
431 s *= twom32;
432 u.d = c = dxy[7] + s;
433 xy5 = u.i[1];
434 c -= s;
435 dxy[8] += (dxy[6] - c);
436
437 s *= twom32;
438 u.d = c = dxy[8] + s;
439 xy6 = u.i[1];
440 c -= s;
441 dxy[8] -= c;
442
443 s *= twom32;
444 u.d = c = dxy[8] + s;
445 xy7 = u.i[1];
446
447 /* extract the sign, exponent, and significand of z */
448 sz = zz.i[0] & 0x80000000;
449 ez = hz >> 16;
450 z0 = hz & 0xffff;
451 if (!ez) {
452 if (z0 | (zz.i[1] & 0xfffe0000)) {
453 z1 = zz.i[1];
454 z2 = zz.i[2];
455 z3 = zz.i[3];
456 ez = 1;
457 } else if (zz.i[1] | (zz.i[2] & 0xfffe0000)) {
458 z0 = zz.i[1];
459 z1 = zz.i[2];
460 z2 = zz.i[3];
461 z3 = 0;
462 ez = -31;
463 } else if (zz.i[2] | (zz.i[3] & 0xfffe0000)) {
464 z0 = zz.i[2];
465 z1 = zz.i[3];
466 z2 = z3 = 0;
467 ez = -63;
468 } else {
469 z0 = zz.i[3];
470 z1 = z2 = z3 = 0;
471 ez = -95;
472 }
473 while ((z0 & 0x10000) == 0) {
474 z0 = (z0 << 1) | (z1 >> 31);
475 z1 = (z1 << 1) | (z2 >> 31);
476 z2 = (z2 << 1) | (z3 >> 31);
477 z3 <<= 1;
478 ez--;
479 }
480 } else {
481 z0 |= 0x10000;
482 z1 = zz.i[1];
483 z2 = zz.i[2];
484 z3 = zz.i[3];
485 }
486 z4 = z5 = z6 = z7 = 0;
487
488 /*
489 * now x*y is represented by sxy, exy, and xy[0-7], and z is
490 * represented likewise; swap if need be so |xy| <= |z|
491 */
492 if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 ||
493 (xy1 == z1 && (xy2 > z2 || (xy2 == z2 && (xy3 > z3 ||
494 (xy3 == z3 && (xy4 | xy5 | xy6 | xy7) != 0)))))))))) {
495 e = sxy; sxy = sz; sz = e;
496 e = exy; exy = ez; ez = e;
497 e = xy0; xy0 = z0; z0 = e;
498 e = xy1; xy1 = z1; z1 = e;
499 e = xy2; xy2 = z2; z2 = e;
500 e = xy3; xy3 = z3; z3 = e;
501 z4 = xy4; xy4 = 0;
502 z5 = xy5; xy5 = 0;
503 z6 = xy6; xy6 = 0;
504 z7 = xy7; xy7 = 0;
505 }
506
507 /* shift the significand of xy keeping a sticky bit */
508 e = ez - exy;
509 if (e > 236) {
510 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0;
511 xy7 = 1;
512 } else if (e >= 224) {
513 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | xy1 |
514 ((xy0 << 1) << (255 - e));
515 xy7 = xy0 >> (e - 224);
516 if (sticky)
517 xy7 |= 1;
518 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0;
519 } else if (e >= 192) {
520 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 |
521 ((xy1 << 1) << (223 - e));
522 xy7 = (xy1 >> (e - 192)) | ((xy0 << 1) << (223 - e));
523 if (sticky)
524 xy7 |= 1;
525 xy6 = xy0 >> (e - 192);
526 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = 0;
527 } else if (e >= 160) {
528 sticky = xy7 | xy6 | xy5 | xy4 | xy3 |
529 ((xy2 << 1) << (191 - e));
530 xy7 = (xy2 >> (e - 160)) | ((xy1 << 1) << (191 - e));
531 if (sticky)
532 xy7 |= 1;
533 xy6 = (xy1 >> (e - 160)) | ((xy0 << 1) << (191 - e));
534 xy5 = xy0 >> (e - 160);
535 xy0 = xy1 = xy2 = xy3 = xy4 = 0;
536 } else if (e >= 128) {
537 sticky = xy7 | xy6 | xy5 | xy4 | ((xy3 << 1) << (159 - e));
538 xy7 = (xy3 >> (e - 128)) | ((xy2 << 1) << (159 - e));
539 if (sticky)
540 xy7 |= 1;
541 xy6 = (xy2 >> (e - 128)) | ((xy1 << 1) << (159 - e));
542 xy5 = (xy1 >> (e - 128)) | ((xy0 << 1) << (159 - e));
543 xy4 = xy0 >> (e - 128);
544 xy0 = xy1 = xy2 = xy3 = 0;
545 } else if (e >= 96) {
546 sticky = xy7 | xy6 | xy5 | ((xy4 << 1) << (127 - e));
547 xy7 = (xy4 >> (e - 96)) | ((xy3 << 1) << (127 - e));
548 if (sticky)
549 xy7 |= 1;
550 xy6 = (xy3 >> (e - 96)) | ((xy2 << 1) << (127 - e));
551 xy5 = (xy2 >> (e - 96)) | ((xy1 << 1) << (127 - e));
552 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e));
553 xy3 = xy0 >> (e - 96);
554 xy0 = xy1 = xy2 = 0;
555 } else if (e >= 64) {
556 sticky = xy7 | xy6 | ((xy5 << 1) << (95 - e));
557 xy7 = (xy5 >> (e - 64)) | ((xy4 << 1) << (95 - e));
558 if (sticky)
559 xy7 |= 1;
560 xy6 = (xy4 >> (e - 64)) | ((xy3 << 1) << (95 - e));
561 xy5 = (xy3 >> (e - 64)) | ((xy2 << 1) << (95 - e));
562 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e));
563 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
564 xy2 = xy0 >> (e - 64);
565 xy0 = xy1 = 0;
566 } else if (e >= 32) {
567 sticky = xy7 | ((xy6 << 1) << (63 - e));
568 xy7 = (xy6 >> (e - 32)) | ((xy5 << 1) << (63 - e));
569 if (sticky)
570 xy7 |= 1;
571 xy6 = (xy5 >> (e - 32)) | ((xy4 << 1) << (63 - e));
572 xy5 = (xy4 >> (e - 32)) | ((xy3 << 1) << (63 - e));
573 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e));
574 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
575 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
576 xy1 = xy0 >> (e - 32);
577 xy0 = 0;
578 } else if (e) {
579 sticky = (xy7 << 1) << (31 - e);
580 xy7 = (xy7 >> e) | ((xy6 << 1) << (31 - e));
581 if (sticky)
582 xy7 |= 1;
583 xy6 = (xy6 >> e) | ((xy5 << 1) << (31 - e));
584 xy5 = (xy5 >> e) | ((xy4 << 1) << (31 - e));
585 xy4 = (xy4 >> e) | ((xy3 << 1) << (31 - e));
586 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
587 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
588 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
589 xy0 >>= e;
590 }
591
592 /* if this is a magnitude subtract, negate the significand of xy */
593 if (sxy ^ sz) {
594 xy0 = ~xy0;
595 xy1 = ~xy1;
596 xy2 = ~xy2;
597 xy3 = ~xy3;
598 xy4 = ~xy4;
599 xy5 = ~xy5;
600 xy6 = ~xy6;
601 xy7 = -xy7;
602 if (xy7 == 0)
603 if (++xy6 == 0)
604 if (++xy5 == 0)
605 if (++xy4 == 0)
606 if (++xy3 == 0)
607 if (++xy2 == 0)
608 if (++xy1 == 0)
609 xy0++;
610 }
611
612 /* add, propagating carries */
613 z7 += xy7;
614 e = (z7 < xy7);
615 z6 += xy6;
616 if (e) {
617 z6++;
618 e = (z6 <= xy6);
619 } else
620 e = (z6 < xy6);
621 z5 += xy5;
622 if (e) {
623 z5++;
624 e = (z5 <= xy5);
625 } else
626 e = (z5 < xy5);
627 z4 += xy4;
628 if (e) {
629 z4++;
630 e = (z4 <= xy4);
631 } else
632 e = (z4 < xy4);
633 z3 += xy3;
634 if (e) {
635 z3++;
636 e = (z3 <= xy3);
637 } else
638 e = (z3 < xy3);
639 z2 += xy2;
640 if (e) {
641 z2++;
642 e = (z2 <= xy2);
643 } else
644 e = (z2 < xy2);
645 z1 += xy1;
646 if (e) {
647 z1++;
648 e = (z1 <= xy1);
649 } else
650 e = (z1 < xy1);
651 z0 += xy0;
652 if (e)
653 z0++;
654
655 /* postnormalize and collect rounding information into z4 */
656 if (ez < 1) {
657 /* result is tiny; shift right until exponent is within range */
658 e = 1 - ez;
659 if (e > 116) {
660 z4 = 1; /* result can't be exactly zero */
661 z0 = z1 = z2 = z3 = 0;
662 } else if (e >= 96) {
663 sticky = z7 | z6 | z5 | z4 | z3 | z2 |
664 ((z1 << 1) << (127 - e));
665 z4 = (z1 >> (e - 96)) | ((z0 << 1) << (127 - e));
666 if (sticky)
667 z4 |= 1;
668 z3 = z0 >> (e - 96);
669 z0 = z1 = z2 = 0;
670 } else if (e >= 64) {
671 sticky = z7 | z6 | z5 | z4 | z3 |
672 ((z2 << 1) << (95 - e));
673 z4 = (z2 >> (e - 64)) | ((z1 << 1) << (95 - e));
674 if (sticky)
675 z4 |= 1;
676 z3 = (z1 >> (e - 64)) | ((z0 << 1) << (95 - e));
677 z2 = z0 >> (e - 64);
678 z0 = z1 = 0;
679 } else if (e >= 32) {
680 sticky = z7 | z6 | z5 | z4 | ((z3 << 1) << (63 - e));
681 z4 = (z3 >> (e - 32)) | ((z2 << 1) << (63 - e));
682 if (sticky)
683 z4 |= 1;
684 z3 = (z2 >> (e - 32)) | ((z1 << 1) << (63 - e));
685 z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
686 z1 = z0 >> (e - 32);
687 z0 = 0;
688 } else {
689 sticky = z7 | z6 | z5 | (z4 << 1) << (31 - e);
690 z4 = (z4 >> e) | ((z3 << 1) << (31 - e));
691 if (sticky)
692 z4 |= 1;
693 z3 = (z3 >> e) | ((z2 << 1) << (31 - e));
694 z2 = (z2 >> e) | ((z1 << 1) << (31 - e));
695 z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
696 z0 >>= e;
697 }
698 ez = 1;
699 } else if (z0 >= 0x20000) {
700 /* carry out; shift right by one */
701 sticky = (z4 & 1) | z5 | z6 | z7;
702 z4 = (z4 >> 1) | (z3 << 31);
703 if (sticky)
704 z4 |= 1;
705 z3 = (z3 >> 1) | (z2 << 31);
706 z2 = (z2 >> 1) | (z1 << 31);
707 z1 = (z1 >> 1) | (z0 << 31);
708 z0 >>= 1;
709 ez++;
710 } else {
711 if (z0 < 0x10000 && (z0 | z1 | z2 | z3 | z4 | z5 | z6 | z7)
712 != 0) {
713 /*
714 * borrow/cancellation; shift left as much as
715 * exponent allows
716 */
717 while (!(z0 | (z1 & 0xfffe0000)) && ez >= 33) {
718 z0 = z1;
719 z1 = z2;
720 z2 = z3;
721 z3 = z4;
722 z4 = z5;
723 z5 = z6;
724 z6 = z7;
725 z7 = 0;
726 ez -= 32;
727 }
728 while (z0 < 0x10000 && ez > 1) {
729 z0 = (z0 << 1) | (z1 >> 31);
730 z1 = (z1 << 1) | (z2 >> 31);
731 z2 = (z2 << 1) | (z3 >> 31);
732 z3 = (z3 << 1) | (z4 >> 31);
733 z4 = (z4 << 1) | (z5 >> 31);
734 z5 = (z5 << 1) | (z6 >> 31);
735 z6 = (z6 << 1) | (z7 >> 31);
736 z7 <<= 1;
737 ez--;
738 }
739 }
740 if (z5 | z6 | z7)
741 z4 |= 1;
742 }
743
744 /* get the rounding mode */
745 rm = fsr >> 30;
746
747 /* strip off the integer bit, if there is one */
748 ibit = z0 & 0x10000;
749 if (ibit)
750 z0 -= 0x10000;
751 else {
752 ez = 0;
753 if (!(z0 | z1 | z2 | z3 | z4)) { /* exact zero */
754 zz.i[0] = rm == FSR_RM ? 0x80000000 : 0;
755 zz.i[1] = zz.i[2] = zz.i[3] = 0;
756 __fenv_setfsr32(&fsr);
757 return (zz.q);
758 }
759 }
760
761 /*
762 * flip the sense of directed roundings if the result is negative;
763 * the logic below applies to a positive result
764 */
765 if (sz)
766 rm ^= rm >> 1;
767
768 /* round and raise exceptions */
769 if (z4) {
770 fsr |= FSR_NXC;
771
772 /* decide whether to round the fraction up */
773 if (rm == FSR_RP || (rm == FSR_RN && (z4 > 0x80000000u ||
774 (z4 == 0x80000000u && (z3 & 1))))) {
775 /* round up and renormalize if necessary */
776 if (++z3 == 0)
777 if (++z2 == 0)
778 if (++z1 == 0)
779 if (++z0 == 0x10000) {
780 z0 = 0;
781 ez++;
782 }
783 }
784 }
785
786 /* check for under/overflow */
787 if (ez >= 0x7fff) {
788 if (rm == FSR_RN || rm == FSR_RP) {
789 zz.i[0] = sz | 0x7fff0000;
790 zz.i[1] = zz.i[2] = zz.i[3] = 0;
791 } else {
792 zz.i[0] = sz | 0x7ffeffff;
793 zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff;
794 }
795 fsr |= FSR_OFC | FSR_NXC;
796 } else {
797 zz.i[0] = sz | (ez << 16) | z0;
798 zz.i[1] = z1;
799 zz.i[2] = z2;
800 zz.i[3] = z3;
801
802 /*
803 * !ibit => exact result was tiny before rounding,
804 * z4 nonzero => result delivered is inexact
805 */
806 if (!ibit) {
807 if (z4)
808 fsr |= FSR_UFC | FSR_NXC;
809 else if (fsr & FSR_UFM)
810 fsr |= FSR_UFC;
811 }
812 }
813
814 /* restore the fsr and emulate exceptions as needed */
815 if ((fsr & FSR_CEXC) & (fsr >> 23)) {
816 __fenv_setfsr32(&fsr);
817 if (fsr & FSR_OFC) {
818 dummy = huge;
819 dummy *= huge;
820 } else if (fsr & FSR_UFC) {
821 dummy = tiny;
822 if (fsr & FSR_NXC)
823 dummy *= tiny;
824 else
825 dummy -= tiny2;
826 } else {
827 dummy = huge;
828 dummy += tiny;
829 }
830 } else {
831 fsr |= (fsr & 0x1f) << 5;
832 __fenv_setfsr32(&fsr);
833 }
834 return (zz.q);
835 }
836
837 #elif defined(__x86)
838
839 static const union {
840 unsigned i[2];
841 double d;
842 } C[] = {
843 { 0, 0x3fe00000u },
844 { 0, 0x40000000u },
845 { 0, 0x3df00000u },
846 { 0, 0x3bf00000u },
847 { 0, 0x41f00000u },
848 { 0, 0x43e00000u },
849 { 0, 0x7fe00000u },
850 { 0, 0x00100000u },
851 { 0, 0x00100001u }
852 };
853
854 #define half C[0].d
855 #define two C[1].d
856 #define twom32 C[2].d
857 #define twom64 C[3].d
858 #define two32 C[4].d
859 #define two63 C[5].d
860 #define huge C[6].d
861 #define tiny C[7].d
862 #define tiny2 C[8].d
863
864 #if defined(__amd64)
865 #define NI 4
866 #else
867 #define NI 3
868 #endif
869
870 /*
871 * fmal for x86: 80-bit extended double precision, little-endian
872 */
873 long double
874 __fmal(long double x, long double y, long double z) {
875 union {
876 unsigned i[NI];
877 long double e;
878 } xx, yy, zz;
879 long double xhi, yhi, xlo, ylo, t;
880 unsigned xy0, xy1, xy2, xy3, xy4, z0, z1, z2, z3, z4;
881 unsigned oldcwsw, cwsw, rm, sticky, carry;
882 int ex, ey, ez, exy, sxy, sz, e, tinyafter;
883 volatile double dummy;
884
885 /* extract the exponents of the arguments */
886 xx.e = x;
887 yy.e = y;
888 zz.e = z;
889 ex = xx.i[2] & 0x7fff;
890 ey = yy.i[2] & 0x7fff;
891 ez = zz.i[2] & 0x7fff;
892
893 /* dispense with inf, nan, and zero cases */
894 if (ex == 0x7fff || ey == 0x7fff || (ex | xx.i[1] | xx.i[0]) == 0 ||
895 (ey | yy.i[1] | yy.i[0]) == 0) /* x or y is inf, nan, or 0 */
896 return (x * y + z);
897
898 if (ez == 0x7fff) /* z is inf or nan */
899 return (x + z); /* avoid spurious under/overflow in x * y */
900
901 if ((ez | zz.i[1] | zz.i[0]) == 0) /* z is zero */
902 /*
903 * x * y isn't zero but could underflow to zero,
904 * so don't add z, lest we perturb the sign
905 */
906 return (x * y);
907
908 /*
909 * now x, y, and z are all finite and nonzero; extract signs and
910 * normalize the significands (this will raise the denormal operand
911 * exception if need be)
912 */
913 sxy = (xx.i[2] ^ yy.i[2]) & 0x8000;
914 sz = zz.i[2] & 0x8000;
915 if (!ex) {
916 xx.e = x * two63;
917 ex = (xx.i[2] & 0x7fff) - 63;
918 }
919 if (!ey) {
920 yy.e = y * two63;
921 ey = (yy.i[2] & 0x7fff) - 63;
922 }
923 if (!ez) {
924 zz.e = z * two63;
925 ez = (zz.i[2] & 0x7fff) - 63;
926 }
927
928 /*
929 * save the control and status words, mask all exceptions, and
930 * set rounding to 64-bit precision and toward-zero
931 */
932 __fenv_getcwsw(&oldcwsw);
933 cwsw = (oldcwsw & 0xf0c0ffff) | 0x0f3f0000;
934 __fenv_setcwsw(&cwsw);
935
936 /* multiply x*y to 128 bits */
937 exy = ex + ey - 0x3fff;
938 xx.i[2] = 0x3fff;
939 yy.i[2] = 0x3fff;
940 x = xx.e;
941 y = yy.e;
942 xhi = ((x + twom32) + two32) - two32;
943 yhi = ((y + twom32) + two32) - two32;
944 xlo = x - xhi;
945 ylo = y - yhi;
946 x *= y;
947 y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo;
948 if (x >= two) {
949 x *= half;
950 y *= half;
951 exy++;
952 }
953
954 /* extract the significands */
955 xx.e = x;
956 xy0 = xx.i[1];
957 xy1 = xx.i[0];
958 yy.e = t = y + twom32;
959 xy2 = yy.i[0];
960 yy.e = (y - (t - twom32)) + twom64;
961 xy3 = yy.i[0];
962 xy4 = 0;
963 z0 = zz.i[1];
964 z1 = zz.i[0];
965 z2 = z3 = z4 = 0;
966
967 /*
968 * now x*y is represented by sxy, exy, and xy[0-4], and z is
969 * represented likewise; swap if need be so |xy| <= |z|
970 */
971 if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 &&
972 (xy1 > z1 || (xy1 == z1 && (xy2 | xy3) != 0)))))) {
973 e = sxy; sxy = sz; sz = e;
974 e = exy; exy = ez; ez = e;
975 e = xy0; xy0 = z0; z0 = e;
976 e = xy1; xy1 = z1; z1 = e;
977 z2 = xy2; xy2 = 0;
978 z3 = xy3; xy3 = 0;
979 }
980
981 /* shift the significand of xy keeping a sticky bit */
982 e = ez - exy;
983 if (e > 130) {
984 xy0 = xy1 = xy2 = xy3 = 0;
985 xy4 = 1;
986 } else if (e >= 128) {
987 sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (159 - e));
988 xy4 = xy0 >> (e - 128);
989 if (sticky)
990 xy4 |= 1;
991 xy0 = xy1 = xy2 = xy3 = 0;
992 } else if (e >= 96) {
993 sticky = xy3 | xy2 | ((xy1 << 1) << (127 - e));
994 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e));
995 if (sticky)
996 xy4 |= 1;
997 xy3 = xy0 >> (e - 96);
998 xy0 = xy1 = xy2 = 0;
999 } else if (e >= 64) {
1000 sticky = xy3 | ((xy2 << 1) << (95 - e));
1001 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e));
1002 if (sticky)
1003 xy4 |= 1;
1004 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
1005 xy2 = xy0 >> (e - 64);
1006 xy0 = xy1 = 0;
1007 } else if (e >= 32) {
1008 sticky = (xy3 << 1) << (63 - e);
1009 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e));
1010 if (sticky)
1011 xy4 |= 1;
1012 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
1013 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
1014 xy1 = xy0 >> (e - 32);
1015 xy0 = 0;
1016 } else if (e) {
1017 xy4 = (xy3 << 1) << (31 - e);
1018 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
1019 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
1020 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
1021 xy0 >>= e;
1022 }
1023
1024 /* if this is a magnitude subtract, negate the significand of xy */
1025 if (sxy ^ sz) {
1026 xy0 = ~xy0;
1027 xy1 = ~xy1;
1028 xy2 = ~xy2;
1029 xy3 = ~xy3;
1030 xy4 = -xy4;
1031 if (xy4 == 0)
1032 if (++xy3 == 0)
1033 if (++xy2 == 0)
1034 if (++xy1 == 0)
1035 xy0++;
1036 }
1037
1038 /* add, propagating carries */
1039 z4 += xy4;
1040 carry = (z4 < xy4);
1041 z3 += xy3;
1042 if (carry) {
1043 z3++;
1044 carry = (z3 <= xy3);
1045 } else
1046 carry = (z3 < xy3);
1047 z2 += xy2;
1048 if (carry) {
1049 z2++;
1050 carry = (z2 <= xy2);
1051 } else
1052 carry = (z2 < xy2);
1053 z1 += xy1;
1054 if (carry) {
1055 z1++;
1056 carry = (z1 <= xy1);
1057 } else
1058 carry = (z1 < xy1);
1059 z0 += xy0;
1060 if (carry) {
1061 z0++;
1062 carry = (z0 <= xy0);
1063 } else
1064 carry = (z0 < xy0);
1065
1066 /* for a magnitude subtract, ignore the last carry out */
1067 if (sxy ^ sz)
1068 carry = 0;
1069
1070 /* postnormalize and collect rounding information into z2 */
1071 if (ez < 1) {
1072 /* result is tiny; shift right until exponent is within range */
1073 e = 1 - ez;
1074 if (e > 67) {
1075 z2 = 1; /* result can't be exactly zero */
1076 z0 = z1 = 0;
1077 } else if (e >= 64) {
1078 sticky = z4 | z3 | z2 | z1 | ((z0 << 1) << (95 - e));
1079 z2 = (z0 >> (e - 64)) | ((carry << 1) << (95 - e));
1080 if (sticky)
1081 z2 |= 1;
1082 z1 = carry >> (e - 64);
1083 z0 = 0;
1084 } else if (e >= 32) {
1085 sticky = z4 | z3 | z2 | ((z1 << 1) << (63 - e));
1086 z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
1087 if (sticky)
1088 z2 |= 1;
1089 z1 = (z0 >> (e - 32)) | ((carry << 1) << (63 - e));
1090 z0 = carry >> (e - 32);
1091 } else {
1092 sticky = z4 | z3 | (z2 << 1) << (31 - e);
1093 z2 = (z2 >> e) | ((z1 << 1) << (31 - e));
1094 if (sticky)
1095 z2 |= 1;
1096 z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
1097 z0 = (z0 >> e) | ((carry << 1) << (31 - e));
1098 }
1099 ez = 1;
1100 } else if (carry) {
1101 /* carry out; shift right by one */
1102 sticky = (z2 & 1) | z3 | z4;
1103 z2 = (z2 >> 1) | (z1 << 31);
1104 if (sticky)
1105 z2 |= 1;
1106 z1 = (z1 >> 1) | (z0 << 31);
1107 z0 = (z0 >> 1) | 0x80000000;
1108 ez++;
1109 } else {
1110 if (z0 < 0x80000000u && (z0 | z1 | z2 | z3 | z4) != 0) {
1111 /*
1112 * borrow/cancellation; shift left as much as
1113 * exponent allows
1114 */
1115 while (!z0 && ez >= 33) {
1116 z0 = z1;
1117 z1 = z2;
1118 z2 = z3;
1119 z3 = z4;
1120 z4 = 0;
1121 ez -= 32;
1122 }
1123 while (z0 < 0x80000000u && ez > 1) {
1124 z0 = (z0 << 1) | (z1 >> 31);
1125 z1 = (z1 << 1) | (z2 >> 31);
1126 z2 = (z2 << 1) | (z3 >> 31);
1127 z3 = (z3 << 1) | (z4 >> 31);
1128 z4 <<= 1;
1129 ez--;
1130 }
1131 }
1132 if (z3 | z4)
1133 z2 |= 1;
1134 }
1135
1136 /* get the rounding mode */
1137 rm = oldcwsw & 0x0c000000;
1138
1139 /* adjust exponent if result is subnormal */
1140 tinyafter = 0;
1141 if (!(z0 & 0x80000000)) {
1142 ez = 0;
1143 tinyafter = 1;
1144 if (!(z0 | z1 | z2)) { /* exact zero */
1145 zz.i[2] = rm == FCW_RM ? 0x8000 : 0;
1146 zz.i[1] = zz.i[0] = 0;
1147 __fenv_setcwsw(&oldcwsw);
1148 return (zz.e);
1149 }
1150 }
1151
1152 /*
1153 * flip the sense of directed roundings if the result is negative;
1154 * the logic below applies to a positive result
1155 */
1156 if (sz && (rm == FCW_RM || rm == FCW_RP))
1157 rm = (FCW_RM + FCW_RP) - rm;
1158
1159 /* round */
1160 if (z2) {
1161 if (rm == FCW_RP || (rm == FCW_RN && (z2 > 0x80000000u ||
1162 (z2 == 0x80000000u && (z1 & 1))))) {
1163 /* round up and renormalize if necessary */
1164 if (++z1 == 0) {
1165 if (++z0 == 0) {
1166 z0 = 0x80000000;
1167 ez++;
1168 } else if (z0 == 0x80000000) {
1169 /* rounded up to smallest normal */
1170 ez = 1;
1171 if ((rm == FCW_RP && z2 >
1172 0x80000000u) || (rm == FCW_RN &&
1173 z2 >= 0xc0000000u))
1174 /*
1175 * would have rounded up to
1176 * smallest normal even with
1177 * unbounded range
1178 */
1179 tinyafter = 0;
1180 }
1181 }
1182 }
1183 }
1184
1185 /* restore the control and status words, check for over/underflow */
1186 __fenv_setcwsw(&oldcwsw);
1187 if (ez >= 0x7fff) {
1188 if (rm == FCW_RN || rm == FCW_RP) {
1189 zz.i[2] = sz | 0x7fff;
1190 zz.i[1] = 0x80000000;
1191 zz.i[0] = 0;
1192 } else {
1193 zz.i[2] = sz | 0x7ffe;
1194 zz.i[1] = 0xffffffff;
1195 zz.i[0] = 0xffffffff;
1196 }
1197 dummy = huge;
1198 dummy *= huge;
1199 } else {
1200 zz.i[2] = sz | ez;
1201 zz.i[1] = z0;
1202 zz.i[0] = z1;
1203
1204 /*
1205 * tinyafter => result rounded w/ unbounded range would be tiny,
1206 * z2 nonzero => result delivered is inexact
1207 */
1208 if (tinyafter) {
1209 dummy = tiny;
1210 if (z2)
1211 dummy *= tiny;
1212 else
1213 dummy -= tiny2;
1214 } else if (z2) {
1215 dummy = huge;
1216 dummy += tiny;
1217 }
1218 }
1219
1220 return (zz.e);
1221 }
1222
1223 #else
1224 #error Unknown architecture
1225 #endif
|
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25
26 /*
27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
29 */
30
31 #pragma weak fmal = __fmal
32
33 #include "libm.h"
34 #include "fma.h"
35 #include "fenv_inlines.h"
36
37 #if defined(__sparc)
38 static const union {
39 unsigned i[2];
40 double d;
41 } C[] = {
42 { 0x3fe00000u, 0 },
43 { 0x40000000u, 0 },
44 { 0x3ef00000u, 0 },
45 { 0x3e700000u, 0 },
46 { 0x41300000u, 0 },
47 { 0x3e300000u, 0 },
48 { 0x3b300000u, 0 },
49 { 0x38300000u, 0 },
50 { 0x42300000u, 0 },
51 { 0x3df00000u, 0 },
52 { 0x7fe00000u, 0 },
53 { 0x00100000u, 0 },
54 { 0x00100001u, 0 },
55 { 0, 0 },
56 { 0x7ff00000u, 0 },
57 { 0x7ff00001u, 0 }
63 #define twom24 C[3].d
64 #define two20 C[4].d
65 #define twom28 C[5].d
66 #define twom76 C[6].d
67 #define twom124 C[7].d
68 #define two36 C[8].d
69 #define twom32 C[9].d
70 #define huge C[10].d
71 #define tiny C[11].d
72 #define tiny2 C[12].d
73 #define zero C[13].d
74 #define inf C[14].d
75 #define snan C[15].d
76
77 static const unsigned int fsr_rm = 0xc0000000u;
78
79 /*
80 * fmal for SPARC: 128-bit quad precision, big-endian
81 */
82 long double
83 __fmal(long double x, long double y, long double z)
84 {
85 union {
86 unsigned int i[4];
87 long double q;
88 } xx, yy, zz;
89 union {
90 unsigned int i[2];
91 double d;
92 } u;
93
94 double dx[5], dy[5], dxy[9], c, s;
95 unsigned int xy0, xy1, xy2, xy3, xy4, xy5, xy6, xy7;
96 unsigned int z0, z1, z2, z3, z4, z5, z6, z7;
97 unsigned int rm, sticky;
98 unsigned int fsr;
99 int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit;
100 int cx, cy, cz;
101 volatile double dummy;
102
103 /* extract the high order words of the arguments */
104 xx.q = x;
105 yy.q = y;
106 zz.q = z;
107 hx = xx.i[0] & ~0x80000000;
108 hy = yy.i[0] & ~0x80000000;
109 hz = zz.i[0] & ~0x80000000;
110
111 /*
112 * distinguish zero, finite nonzero, infinite, and quiet nan
113 * arguments; raise invalid and return for signaling nans
114 */
115 if (hx >= 0x7fff0000) {
116 if ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3]) {
117 if (!(hx & 0x8000)) {
118 /* signaling nan, raise invalid */
119 dummy = snan;
120 dummy += snan;
121 xx.i[0] |= 0x8000;
122 return (xx.q);
123 }
124
125 cx = 3; /* quiet nan */
126 } else {
127 cx = 2; /* inf */
128 }
129 } else if (hx == 0) {
130 cx = (xx.i[1] | xx.i[2] | xx.i[3]) ? 1 : 0;
131 /* subnormal or zero */
132 } else {
133 cx = 1; /* finite nonzero */
134 }
135
136 if (hy >= 0x7fff0000) {
137 if ((hy & 0xffff) | yy.i[1] | yy.i[2] | yy.i[3]) {
138 if (!(hy & 0x8000)) {
139 dummy = snan;
140 dummy += snan;
141 yy.i[0] |= 0x8000;
142 return (yy.q);
143 }
144
145 cy = 3;
146 } else {
147 cy = 2;
148 }
149 } else if (hy == 0) {
150 cy = (yy.i[1] | yy.i[2] | yy.i[3]) ? 1 : 0;
151 } else {
152 cy = 1;
153 }
154
155 if (hz >= 0x7fff0000) {
156 if ((hz & 0xffff) | zz.i[1] | zz.i[2] | zz.i[3]) {
157 if (!(hz & 0x8000)) {
158 dummy = snan;
159 dummy += snan;
160 zz.i[0] |= 0x8000;
161 return (zz.q);
162 }
163
164 cz = 3;
165 } else {
166 cz = 2;
167 }
168 } else if (hz == 0) {
169 cz = (zz.i[1] | zz.i[2] | zz.i[3]) ? 1 : 0;
170 } else {
171 cz = 1;
172 }
173
174 /* get the fsr and clear current exceptions */
175 __fenv_getfsr32(&fsr);
176 fsr &= ~FSR_CEXC;
177
178 /* handle all other zero, inf, and nan cases */
179 if (cx != 1 || cy != 1 || cz != 1) {
180 /* if x or y is a quiet nan, return it */
181 if (cx == 3) {
182 __fenv_setfsr32(&fsr);
183 return (x);
184 }
185
186 if (cy == 3) {
187 __fenv_setfsr32(&fsr);
188 return (y);
189 }
190
191 /* if x*y is 0*inf, raise invalid and return the default nan */
192 if ((cx == 0 && cy == 2) || (cx == 2 && cy == 0)) {
193 dummy = zero;
194 dummy *= inf;
195 zz.i[0] = 0x7fffffff;
196 zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff;
197 return (zz.q);
198 }
199
200 /* if z is a quiet nan, return it */
201 if (cz == 3) {
202 __fenv_setfsr32(&fsr);
203 return (z);
204 }
205
206 /*
207 * now none of x, y, or z is nan; handle cases where x or y
208 * is inf
209 */
210 if (cx == 2 || cy == 2) {
211 /*
212 * if z is also inf, either we have inf-inf or
213 * the result is the same as z depending on signs
214 */
215 if (cz == 2) {
216 if ((int)((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < 0) {
217 dummy = inf;
218 dummy -= inf;
219 zz.i[0] = 0x7fffffff;
220 zz.i[1] = zz.i[2] = zz.i[3] =
221 0xffffffff;
222 return (zz.q);
223 }
224
225 __fenv_setfsr32(&fsr);
226 return (z);
227 }
228
229 /* otherwise the result is inf with appropriate sign */
230 zz.i[0] = ((xx.i[0] ^ yy.i[0]) & 0x80000000) |
231 0x7fff0000;
232 zz.i[1] = zz.i[2] = zz.i[3] = 0;
233 __fenv_setfsr32(&fsr);
234 return (zz.q);
235 }
236
237 /* if z is inf, return it */
238 if (cz == 2) {
239 __fenv_setfsr32(&fsr);
240 return (z);
241 }
242
243 /*
244 * now x, y, and z are all finite; handle cases where x or y
245 * is zero
246 */
247 if (cx == 0 || cy == 0) {
248 /* either we have 0-0 or the result is the same as z */
249 if (cz == 0 && (int)((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) <
250 0) {
251 zz.i[0] = (fsr >> 30) == FSR_RM ? 0x80000000 :
252 0;
253 __fenv_setfsr32(&fsr);
254 return (zz.q);
255 }
256
257 __fenv_setfsr32(&fsr);
258 return (z);
259 }
260
261 /* if we get here, x and y are nonzero finite, z must be zero */
262 return (x * y);
263 }
264
265 /*
266 * now x, y, and z are all finite and nonzero; set round-to-
267 * negative-infinity mode
268 */
269 __fenv_setfsr32(&fsr_rm);
270
271 /*
272 * get the signs and exponents and normalize the significands
273 * of x and y
274 */
275 sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000;
276 ex = hx >> 16;
277 hx &= 0xffff;
278
279 if (!ex) {
280 if (hx | (xx.i[1] & 0xfffe0000)) {
281 ex = 1;
282 } else if (xx.i[1] | (xx.i[2] & 0xfffe0000)) {
283 hx = xx.i[1];
284 xx.i[1] = xx.i[2];
285 xx.i[2] = xx.i[3];
286 xx.i[3] = 0;
287 ex = -31;
288 } else if (xx.i[2] | (xx.i[3] & 0xfffe0000)) {
289 hx = xx.i[2];
290 xx.i[1] = xx.i[3];
291 xx.i[2] = xx.i[3] = 0;
292 ex = -63;
293 } else {
294 hx = xx.i[3];
295 xx.i[1] = xx.i[2] = xx.i[3] = 0;
296 ex = -95;
297 }
298
299 while ((hx & 0x10000) == 0) {
300 hx = (hx << 1) | (xx.i[1] >> 31);
301 xx.i[1] = (xx.i[1] << 1) | (xx.i[2] >> 31);
302 xx.i[2] = (xx.i[2] << 1) | (xx.i[3] >> 31);
303 xx.i[3] <<= 1;
304 ex--;
305 }
306 } else {
307 hx |= 0x10000;
308 }
309
310 ey = hy >> 16;
311 hy &= 0xffff;
312
313 if (!ey) {
314 if (hy | (yy.i[1] & 0xfffe0000)) {
315 ey = 1;
316 } else if (yy.i[1] | (yy.i[2] & 0xfffe0000)) {
317 hy = yy.i[1];
318 yy.i[1] = yy.i[2];
319 yy.i[2] = yy.i[3];
320 yy.i[3] = 0;
321 ey = -31;
322 } else if (yy.i[2] | (yy.i[3] & 0xfffe0000)) {
323 hy = yy.i[2];
324 yy.i[1] = yy.i[3];
325 yy.i[2] = yy.i[3] = 0;
326 ey = -63;
327 } else {
328 hy = yy.i[3];
329 yy.i[1] = yy.i[2] = yy.i[3] = 0;
330 ey = -95;
331 }
332
333 while ((hy & 0x10000) == 0) {
334 hy = (hy << 1) | (yy.i[1] >> 31);
335 yy.i[1] = (yy.i[1] << 1) | (yy.i[2] >> 31);
336 yy.i[2] = (yy.i[2] << 1) | (yy.i[3] >> 31);
337 yy.i[3] <<= 1;
338 ey--;
339 }
340 } else {
341 hy |= 0x10000;
342 }
343
344 exy = ex + ey - 0x3fff;
345
346 /* convert the significands of x and y to doubles */
347 c = twom16;
348 dx[0] = (double)((int)hx) * c;
349 dy[0] = (double)((int)hy) * c;
350
351 c *= twom24;
352 dx[1] = (double)((int)(xx.i[1] >> 8)) * c;
353 dy[1] = (double)((int)(yy.i[1] >> 8)) * c;
354
355 c *= twom24;
356 dx[2] = (double)((int)(((xx.i[1] << 16) | (xx.i[2] >> 16)) &
357 0xffffff)) * c;
358 dy[2] = (double)((int)(((yy.i[1] << 16) | (yy.i[2] >> 16)) &
359 0xffffff)) * c;
360
361 c *= twom24;
362 dx[3] = (double)((int)(((xx.i[2] << 8) | (xx.i[3] >> 24)) & 0xffffff)) *
363 c;
364 dy[3] = (double)((int)(((yy.i[2] << 8) | (yy.i[3] >> 24)) & 0xffffff)) *
365 c;
366
367 c *= twom24;
368 dx[4] = (double)((int)(xx.i[3] & 0xffffff)) * c;
369 dy[4] = (double)((int)(yy.i[3] & 0xffffff)) * c;
370
371 /* form the "digits" of the product */
372 dxy[0] = dx[0] * dy[0];
373 dxy[1] = dx[0] * dy[1] + dx[1] * dy[0];
374 dxy[2] = dx[0] * dy[2] + dx[1] * dy[1] + dx[2] * dy[0];
375 dxy[3] = dx[0] * dy[3] + dx[1] * dy[2] + dx[2] * dy[1] + dx[3] * dy[0];
376 dxy[4] = dx[0] * dy[4] + dx[1] * dy[3] + dx[2] * dy[2] + dx[3] * dy[1] +
377 dx[4] * dy[0];
378 dxy[5] = dx[1] * dy[4] + dx[2] * dy[3] + dx[3] * dy[2] + dx[4] * dy[1];
379 dxy[6] = dx[2] * dy[4] + dx[3] * dy[3] + dx[4] * dy[2];
380 dxy[7] = dx[3] * dy[4] + dx[4] * dy[3];
381 dxy[8] = dx[4] * dy[4];
382
383 /* split odd-numbered terms and combine into even-numbered terms */
384 c = (dxy[1] + two20) - two20;
385 dxy[0] += c;
386 dxy[1] -= c;
387 c = (dxy[3] + twom28) - twom28;
388 dxy[2] += c + dxy[1];
389 dxy[3] -= c;
390 c = (dxy[5] + twom76) - twom76;
391 dxy[4] += c + dxy[3];
392 dxy[5] -= c;
393 c = (dxy[7] + twom124) - twom124;
394 dxy[6] += c + dxy[5];
395 dxy[8] += (dxy[7] - c);
396
397 /* propagate carries, adjusting the exponent if need be */
398 dxy[7] = dxy[6] + dxy[8];
399 dxy[5] = dxy[4] + dxy[7];
400 dxy[3] = dxy[2] + dxy[5];
401 dxy[1] = dxy[0] + dxy[3];
402
403 if (dxy[1] >= two) {
404 dxy[0] *= half;
405 dxy[1] *= half;
406 dxy[2] *= half;
407 dxy[3] *= half;
408 dxy[4] *= half;
409 dxy[5] *= half;
410 dxy[6] *= half;
411 dxy[7] *= half;
412 dxy[8] *= half;
413 exy++;
414 }
415
416 /* extract the significand of x*y */
417 s = two36;
418 u.d = c = dxy[1] + s;
419 xy0 = u.i[1];
420 c -= s;
421 dxy[1] -= c;
422 dxy[0] -= c;
452 s *= twom32;
453 u.d = c = dxy[7] + s;
454 xy5 = u.i[1];
455 c -= s;
456 dxy[8] += (dxy[6] - c);
457
458 s *= twom32;
459 u.d = c = dxy[8] + s;
460 xy6 = u.i[1];
461 c -= s;
462 dxy[8] -= c;
463
464 s *= twom32;
465 u.d = c = dxy[8] + s;
466 xy7 = u.i[1];
467
468 /* extract the sign, exponent, and significand of z */
469 sz = zz.i[0] & 0x80000000;
470 ez = hz >> 16;
471 z0 = hz & 0xffff;
472
473 if (!ez) {
474 if (z0 | (zz.i[1] & 0xfffe0000)) {
475 z1 = zz.i[1];
476 z2 = zz.i[2];
477 z3 = zz.i[3];
478 ez = 1;
479 } else if (zz.i[1] | (zz.i[2] & 0xfffe0000)) {
480 z0 = zz.i[1];
481 z1 = zz.i[2];
482 z2 = zz.i[3];
483 z3 = 0;
484 ez = -31;
485 } else if (zz.i[2] | (zz.i[3] & 0xfffe0000)) {
486 z0 = zz.i[2];
487 z1 = zz.i[3];
488 z2 = z3 = 0;
489 ez = -63;
490 } else {
491 z0 = zz.i[3];
492 z1 = z2 = z3 = 0;
493 ez = -95;
494 }
495
496 while ((z0 & 0x10000) == 0) {
497 z0 = (z0 << 1) | (z1 >> 31);
498 z1 = (z1 << 1) | (z2 >> 31);
499 z2 = (z2 << 1) | (z3 >> 31);
500 z3 <<= 1;
501 ez--;
502 }
503 } else {
504 z0 |= 0x10000;
505 z1 = zz.i[1];
506 z2 = zz.i[2];
507 z3 = zz.i[3];
508 }
509
510 z4 = z5 = z6 = z7 = 0;
511
512 /*
513 * now x*y is represented by sxy, exy, and xy[0-7], and z is
514 * represented likewise; swap if need be so |xy| <= |z|
515 */
516 if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 ||
517 (xy1 == z1 && (xy2 > z2 || (xy2 == z2 && (xy3 > z3 || (xy3 == z3 &&
518 (xy4 | xy5 | xy6 | xy7) != 0)))))))))) {
519 e = sxy;
520 sxy = sz;
521 sz = e;
522 e = exy;
523 exy = ez;
524 ez = e;
525 e = xy0;
526 xy0 = z0;
527 z0 = e;
528 e = xy1;
529 xy1 = z1;
530 z1 = e;
531 e = xy2;
532 xy2 = z2;
533 z2 = e;
534 e = xy3;
535 xy3 = z3;
536 z3 = e;
537 z4 = xy4;
538 xy4 = 0;
539 z5 = xy5;
540 xy5 = 0;
541 z6 = xy6;
542 xy6 = 0;
543 z7 = xy7;
544 xy7 = 0;
545 }
546
547 /* shift the significand of xy keeping a sticky bit */
548 e = ez - exy;
549
550 if (e > 236) {
551 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0;
552 xy7 = 1;
553 } else if (e >= 224) {
554 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | xy1 | ((xy0 <<
555 1) << (255 - e));
556 xy7 = xy0 >> (e - 224);
557
558 if (sticky)
559 xy7 |= 1;
560
561 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0;
562 } else if (e >= 192) {
563 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | ((xy1 << 1) <<
564 (223 - e));
565 xy7 = (xy1 >> (e - 192)) | ((xy0 << 1) << (223 - e));
566
567 if (sticky)
568 xy7 |= 1;
569
570 xy6 = xy0 >> (e - 192);
571 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = 0;
572 } else if (e >= 160) {
573 sticky = xy7 | xy6 | xy5 | xy4 | xy3 | ((xy2 << 1) << (191 -
574 e));
575 xy7 = (xy2 >> (e - 160)) | ((xy1 << 1) << (191 - e));
576
577 if (sticky)
578 xy7 |= 1;
579
580 xy6 = (xy1 >> (e - 160)) | ((xy0 << 1) << (191 - e));
581 xy5 = xy0 >> (e - 160);
582 xy0 = xy1 = xy2 = xy3 = xy4 = 0;
583 } else if (e >= 128) {
584 sticky = xy7 | xy6 | xy5 | xy4 | ((xy3 << 1) << (159 - e));
585 xy7 = (xy3 >> (e - 128)) | ((xy2 << 1) << (159 - e));
586
587 if (sticky)
588 xy7 |= 1;
589
590 xy6 = (xy2 >> (e - 128)) | ((xy1 << 1) << (159 - e));
591 xy5 = (xy1 >> (e - 128)) | ((xy0 << 1) << (159 - e));
592 xy4 = xy0 >> (e - 128);
593 xy0 = xy1 = xy2 = xy3 = 0;
594 } else if (e >= 96) {
595 sticky = xy7 | xy6 | xy5 | ((xy4 << 1) << (127 - e));
596 xy7 = (xy4 >> (e - 96)) | ((xy3 << 1) << (127 - e));
597
598 if (sticky)
599 xy7 |= 1;
600
601 xy6 = (xy3 >> (e - 96)) | ((xy2 << 1) << (127 - e));
602 xy5 = (xy2 >> (e - 96)) | ((xy1 << 1) << (127 - e));
603 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e));
604 xy3 = xy0 >> (e - 96);
605 xy0 = xy1 = xy2 = 0;
606 } else if (e >= 64) {
607 sticky = xy7 | xy6 | ((xy5 << 1) << (95 - e));
608 xy7 = (xy5 >> (e - 64)) | ((xy4 << 1) << (95 - e));
609
610 if (sticky)
611 xy7 |= 1;
612
613 xy6 = (xy4 >> (e - 64)) | ((xy3 << 1) << (95 - e));
614 xy5 = (xy3 >> (e - 64)) | ((xy2 << 1) << (95 - e));
615 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e));
616 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
617 xy2 = xy0 >> (e - 64);
618 xy0 = xy1 = 0;
619 } else if (e >= 32) {
620 sticky = xy7 | ((xy6 << 1) << (63 - e));
621 xy7 = (xy6 >> (e - 32)) | ((xy5 << 1) << (63 - e));
622
623 if (sticky)
624 xy7 |= 1;
625
626 xy6 = (xy5 >> (e - 32)) | ((xy4 << 1) << (63 - e));
627 xy5 = (xy4 >> (e - 32)) | ((xy3 << 1) << (63 - e));
628 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e));
629 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
630 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
631 xy1 = xy0 >> (e - 32);
632 xy0 = 0;
633 } else if (e) {
634 sticky = (xy7 << 1) << (31 - e);
635 xy7 = (xy7 >> e) | ((xy6 << 1) << (31 - e));
636
637 if (sticky)
638 xy7 |= 1;
639
640 xy6 = (xy6 >> e) | ((xy5 << 1) << (31 - e));
641 xy5 = (xy5 >> e) | ((xy4 << 1) << (31 - e));
642 xy4 = (xy4 >> e) | ((xy3 << 1) << (31 - e));
643 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
644 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
645 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
646 xy0 >>= e;
647 }
648
649 /* if this is a magnitude subtract, negate the significand of xy */
650 if (sxy ^ sz) {
651 xy0 = ~xy0;
652 xy1 = ~xy1;
653 xy2 = ~xy2;
654 xy3 = ~xy3;
655 xy4 = ~xy4;
656 xy5 = ~xy5;
657 xy6 = ~xy6;
658 xy7 = -xy7;
659
660 if (xy7 == 0)
661 if (++xy6 == 0)
662 if (++xy5 == 0)
663 if (++xy4 == 0)
664 if (++xy3 == 0)
665 if (++xy2 == 0)
666 if (++xy1 == 0)
667 xy0++;
668 }
669
670 /* add, propagating carries */
671 z7 += xy7;
672 e = (z7 < xy7);
673 z6 += xy6;
674
675 if (e) {
676 z6++;
677 e = (z6 <= xy6);
678 } else {
679 e = (z6 < xy6);
680 }
681
682 z5 += xy5;
683
684 if (e) {
685 z5++;
686 e = (z5 <= xy5);
687 } else {
688 e = (z5 < xy5);
689 }
690
691 z4 += xy4;
692
693 if (e) {
694 z4++;
695 e = (z4 <= xy4);
696 } else {
697 e = (z4 < xy4);
698 }
699
700 z3 += xy3;
701
702 if (e) {
703 z3++;
704 e = (z3 <= xy3);
705 } else {
706 e = (z3 < xy3);
707 }
708
709 z2 += xy2;
710
711 if (e) {
712 z2++;
713 e = (z2 <= xy2);
714 } else {
715 e = (z2 < xy2);
716 }
717
718 z1 += xy1;
719
720 if (e) {
721 z1++;
722 e = (z1 <= xy1);
723 } else {
724 e = (z1 < xy1);
725 }
726
727 z0 += xy0;
728
729 if (e)
730 z0++;
731
732 /* postnormalize and collect rounding information into z4 */
733 if (ez < 1) {
734 /* result is tiny; shift right until exponent is within range */
735 e = 1 - ez;
736
737 if (e > 116) {
738 z4 = 1; /* result can't be exactly zero */
739 z0 = z1 = z2 = z3 = 0;
740 } else if (e >= 96) {
741 sticky = z7 | z6 | z5 | z4 | z3 | z2 | ((z1 << 1) <<
742 (127 - e));
743 z4 = (z1 >> (e - 96)) | ((z0 << 1) << (127 - e));
744
745 if (sticky)
746 z4 |= 1;
747
748 z3 = z0 >> (e - 96);
749 z0 = z1 = z2 = 0;
750 } else if (e >= 64) {
751 sticky = z7 | z6 | z5 | z4 | z3 | ((z2 << 1) << (95 -
752 e));
753 z4 = (z2 >> (e - 64)) | ((z1 << 1) << (95 - e));
754
755 if (sticky)
756 z4 |= 1;
757
758 z3 = (z1 >> (e - 64)) | ((z0 << 1) << (95 - e));
759 z2 = z0 >> (e - 64);
760 z0 = z1 = 0;
761 } else if (e >= 32) {
762 sticky = z7 | z6 | z5 | z4 | ((z3 << 1) << (63 - e));
763 z4 = (z3 >> (e - 32)) | ((z2 << 1) << (63 - e));
764
765 if (sticky)
766 z4 |= 1;
767
768 z3 = (z2 >> (e - 32)) | ((z1 << 1) << (63 - e));
769 z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
770 z1 = z0 >> (e - 32);
771 z0 = 0;
772 } else {
773 sticky = z7 | z6 | z5 | (z4 << 1) << (31 - e);
774 z4 = (z4 >> e) | ((z3 << 1) << (31 - e));
775
776 if (sticky)
777 z4 |= 1;
778
779 z3 = (z3 >> e) | ((z2 << 1) << (31 - e));
780 z2 = (z2 >> e) | ((z1 << 1) << (31 - e));
781 z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
782 z0 >>= e;
783 }
784
785 ez = 1;
786 } else if (z0 >= 0x20000) {
787 /* carry out; shift right by one */
788 sticky = (z4 & 1) | z5 | z6 | z7;
789 z4 = (z4 >> 1) | (z3 << 31);
790
791 if (sticky)
792 z4 |= 1;
793
794 z3 = (z3 >> 1) | (z2 << 31);
795 z2 = (z2 >> 1) | (z1 << 31);
796 z1 = (z1 >> 1) | (z0 << 31);
797 z0 >>= 1;
798 ez++;
799 } else {
800 if (z0 < 0x10000 && (z0 | z1 | z2 | z3 | z4 | z5 | z6 | z7) !=
801 0) {
802 /*
803 * borrow/cancellation; shift left as much as
804 * exponent allows
805 */
806 while (!(z0 | (z1 & 0xfffe0000)) && ez >= 33) {
807 z0 = z1;
808 z1 = z2;
809 z2 = z3;
810 z3 = z4;
811 z4 = z5;
812 z5 = z6;
813 z6 = z7;
814 z7 = 0;
815 ez -= 32;
816 }
817
818 while (z0 < 0x10000 && ez > 1) {
819 z0 = (z0 << 1) | (z1 >> 31);
820 z1 = (z1 << 1) | (z2 >> 31);
821 z2 = (z2 << 1) | (z3 >> 31);
822 z3 = (z3 << 1) | (z4 >> 31);
823 z4 = (z4 << 1) | (z5 >> 31);
824 z5 = (z5 << 1) | (z6 >> 31);
825 z6 = (z6 << 1) | (z7 >> 31);
826 z7 <<= 1;
827 ez--;
828 }
829 }
830
831 if (z5 | z6 | z7)
832 z4 |= 1;
833 }
834
835 /* get the rounding mode */
836 rm = fsr >> 30;
837
838 /* strip off the integer bit, if there is one */
839 ibit = z0 & 0x10000;
840
841 if (ibit) {
842 z0 -= 0x10000;
843 } else {
844 ez = 0;
845
846 if (!(z0 | z1 | z2 | z3 | z4)) { /* exact zero */
847 zz.i[0] = rm == FSR_RM ? 0x80000000 : 0;
848 zz.i[1] = zz.i[2] = zz.i[3] = 0;
849 __fenv_setfsr32(&fsr);
850 return (zz.q);
851 }
852 }
853
854 /*
855 * flip the sense of directed roundings if the result is negative;
856 * the logic below applies to a positive result
857 */
858 if (sz)
859 rm ^= rm >> 1;
860
861 /* round and raise exceptions */
862 if (z4) {
863 fsr |= FSR_NXC;
864
865 /* decide whether to round the fraction up */
866 if (rm == FSR_RP || (rm == FSR_RN && (z4 > 0x80000000u || (z4 ==
867 0x80000000u && (z3 & 1))))) {
868 /* round up and renormalize if necessary */
869 if (++z3 == 0)
870 if (++z2 == 0)
871 if (++z1 == 0)
872 if (++z0 == 0x10000) {
873 z0 = 0;
874 ez++;
875 }
876 }
877 }
878
879 /* check for under/overflow */
880 if (ez >= 0x7fff) {
881 if (rm == FSR_RN || rm == FSR_RP) {
882 zz.i[0] = sz | 0x7fff0000;
883 zz.i[1] = zz.i[2] = zz.i[3] = 0;
884 } else {
885 zz.i[0] = sz | 0x7ffeffff;
886 zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff;
887 }
888
889 fsr |= FSR_OFC | FSR_NXC;
890 } else {
891 zz.i[0] = sz | (ez << 16) | z0;
892 zz.i[1] = z1;
893 zz.i[2] = z2;
894 zz.i[3] = z3;
895
896 /*
897 * !ibit => exact result was tiny before rounding,
898 * z4 nonzero => result delivered is inexact
899 */
900 if (!ibit) {
901 if (z4)
902 fsr |= FSR_UFC | FSR_NXC;
903 else if (fsr & FSR_UFM)
904 fsr |= FSR_UFC;
905 }
906 }
907
908 /* restore the fsr and emulate exceptions as needed */
909 if ((fsr & FSR_CEXC) & (fsr >> 23)) {
910 __fenv_setfsr32(&fsr);
911
912 if (fsr & FSR_OFC) {
913 dummy = huge;
914 dummy *= huge;
915 } else if (fsr & FSR_UFC) {
916 dummy = tiny;
917
918 if (fsr & FSR_NXC)
919 dummy *= tiny;
920 else
921 dummy -= tiny2;
922 } else {
923 dummy = huge;
924 dummy += tiny;
925 }
926 } else {
927 fsr |= (fsr & 0x1f) << 5;
928 __fenv_setfsr32(&fsr);
929 }
930
931 return (zz.q);
932 }
933 #elif defined(__x86)
934 static const union {
935 unsigned i[2];
936 double d;
937 } C[] = {
938 { 0, 0x3fe00000u },
939 { 0, 0x40000000u },
940 { 0, 0x3df00000u },
941 { 0, 0x3bf00000u },
942 { 0, 0x41f00000u },
943 { 0, 0x43e00000u },
944 { 0, 0x7fe00000u },
945 { 0, 0x00100000u },
946 { 0, 0x00100001u }
947 };
948
949 #define half C[0].d
950 #define two C[1].d
951 #define twom32 C[2].d
952 #define twom64 C[3].d
953 #define two32 C[4].d
954 #define two63 C[5].d
955 #define huge C[6].d
956 #define tiny C[7].d
957 #define tiny2 C[8].d
958
959 #if defined(__amd64)
960 #define NI 4
961 #else
962 #define NI 3
963 #endif
964
965 /*
966 * fmal for x86: 80-bit extended double precision, little-endian
967 */
968 long double
969 __fmal(long double x, long double y, long double z)
970 {
971 union {
972 unsigned i[NI];
973 long double e;
974 } xx, yy, zz;
975
976 long double xhi, yhi, xlo, ylo, t;
977 unsigned xy0, xy1, xy2, xy3, xy4, z0, z1, z2, z3, z4;
978 unsigned oldcwsw, cwsw, rm, sticky, carry;
979 int ex, ey, ez, exy, sxy, sz, e, tinyafter;
980 volatile double dummy;
981
982 /* extract the exponents of the arguments */
983 xx.e = x;
984 yy.e = y;
985 zz.e = z;
986 ex = xx.i[2] & 0x7fff;
987 ey = yy.i[2] & 0x7fff;
988 ez = zz.i[2] & 0x7fff;
989
990 /* dispense with inf, nan, and zero cases */
991 if (ex == 0x7fff || ey == 0x7fff || (ex | xx.i[1] | xx.i[0]) == 0 ||
992 (ey | yy.i[1] | yy.i[0]) == 0) /* x or y is inf, nan, or 0 */
993 return (x * y + z);
994
995 if (ez == 0x7fff) /* z is inf or nan */
996 return (x + z); /* avoid spurious under/overflow in x * y */
997
998 if ((ez | zz.i[1] | zz.i[0]) == 0) /* z is zero */
999 /*
1000 * x * y isn't zero but could underflow to zero,
1001 * so don't add z, lest we perturb the sign
1002 */
1003 return (x * y);
1004
1005 /*
1006 * now x, y, and z are all finite and nonzero; extract signs and
1007 * normalize the significands (this will raise the denormal operand
1008 * exception if need be)
1009 */
1010 sxy = (xx.i[2] ^ yy.i[2]) & 0x8000;
1011 sz = zz.i[2] & 0x8000;
1012
1013 if (!ex) {
1014 xx.e = x * two63;
1015 ex = (xx.i[2] & 0x7fff) - 63;
1016 }
1017
1018 if (!ey) {
1019 yy.e = y * two63;
1020 ey = (yy.i[2] & 0x7fff) - 63;
1021 }
1022
1023 if (!ez) {
1024 zz.e = z * two63;
1025 ez = (zz.i[2] & 0x7fff) - 63;
1026 }
1027
1028 /*
1029 * save the control and status words, mask all exceptions, and
1030 * set rounding to 64-bit precision and toward-zero
1031 */
1032 __fenv_getcwsw(&oldcwsw);
1033 cwsw = (oldcwsw & 0xf0c0ffff) | 0x0f3f0000;
1034 __fenv_setcwsw(&cwsw);
1035
1036 /* multiply x*y to 128 bits */
1037 exy = ex + ey - 0x3fff;
1038 xx.i[2] = 0x3fff;
1039 yy.i[2] = 0x3fff;
1040 x = xx.e;
1041 y = yy.e;
1042 xhi = ((x + twom32) + two32) - two32;
1043 yhi = ((y + twom32) + two32) - two32;
1044 xlo = x - xhi;
1045 ylo = y - yhi;
1046 x *= y;
1047 y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo;
1048
1049 if (x >= two) {
1050 x *= half;
1051 y *= half;
1052 exy++;
1053 }
1054
1055 /* extract the significands */
1056 xx.e = x;
1057 xy0 = xx.i[1];
1058 xy1 = xx.i[0];
1059 yy.e = t = y + twom32;
1060 xy2 = yy.i[0];
1061 yy.e = (y - (t - twom32)) + twom64;
1062 xy3 = yy.i[0];
1063 xy4 = 0;
1064 z0 = zz.i[1];
1065 z1 = zz.i[0];
1066 z2 = z3 = z4 = 0;
1067
1068 /*
1069 * now x*y is represented by sxy, exy, and xy[0-4], and z is
1070 * represented likewise; swap if need be so |xy| <= |z|
1071 */
1072 if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 ||
1073 (xy1 == z1 && (xy2 | xy3) != 0)))))) {
1074 e = sxy;
1075 sxy = sz;
1076 sz = e;
1077 e = exy;
1078 exy = ez;
1079 ez = e;
1080 e = xy0;
1081 xy0 = z0;
1082 z0 = e;
1083 e = xy1;
1084 xy1 = z1;
1085 z1 = e;
1086 z2 = xy2;
1087 xy2 = 0;
1088 z3 = xy3;
1089 xy3 = 0;
1090 }
1091
1092 /* shift the significand of xy keeping a sticky bit */
1093 e = ez - exy;
1094
1095 if (e > 130) {
1096 xy0 = xy1 = xy2 = xy3 = 0;
1097 xy4 = 1;
1098 } else if (e >= 128) {
1099 sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (159 - e));
1100 xy4 = xy0 >> (e - 128);
1101
1102 if (sticky)
1103 xy4 |= 1;
1104
1105 xy0 = xy1 = xy2 = xy3 = 0;
1106 } else if (e >= 96) {
1107 sticky = xy3 | xy2 | ((xy1 << 1) << (127 - e));
1108 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e));
1109
1110 if (sticky)
1111 xy4 |= 1;
1112
1113 xy3 = xy0 >> (e - 96);
1114 xy0 = xy1 = xy2 = 0;
1115 } else if (e >= 64) {
1116 sticky = xy3 | ((xy2 << 1) << (95 - e));
1117 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e));
1118
1119 if (sticky)
1120 xy4 |= 1;
1121
1122 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
1123 xy2 = xy0 >> (e - 64);
1124 xy0 = xy1 = 0;
1125 } else if (e >= 32) {
1126 sticky = (xy3 << 1) << (63 - e);
1127 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e));
1128
1129 if (sticky)
1130 xy4 |= 1;
1131
1132 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
1133 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
1134 xy1 = xy0 >> (e - 32);
1135 xy0 = 0;
1136 } else if (e) {
1137 xy4 = (xy3 << 1) << (31 - e);
1138 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
1139 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
1140 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
1141 xy0 >>= e;
1142 }
1143
1144 /* if this is a magnitude subtract, negate the significand of xy */
1145 if (sxy ^ sz) {
1146 xy0 = ~xy0;
1147 xy1 = ~xy1;
1148 xy2 = ~xy2;
1149 xy3 = ~xy3;
1150 xy4 = -xy4;
1151
1152 if (xy4 == 0)
1153 if (++xy3 == 0)
1154 if (++xy2 == 0)
1155 if (++xy1 == 0)
1156 xy0++;
1157 }
1158
1159 /* add, propagating carries */
1160 z4 += xy4;
1161 carry = (z4 < xy4);
1162 z3 += xy3;
1163
1164 if (carry) {
1165 z3++;
1166 carry = (z3 <= xy3);
1167 } else {
1168 carry = (z3 < xy3);
1169 }
1170
1171 z2 += xy2;
1172
1173 if (carry) {
1174 z2++;
1175 carry = (z2 <= xy2);
1176 } else {
1177 carry = (z2 < xy2);
1178 }
1179
1180 z1 += xy1;
1181
1182 if (carry) {
1183 z1++;
1184 carry = (z1 <= xy1);
1185 } else {
1186 carry = (z1 < xy1);
1187 }
1188
1189 z0 += xy0;
1190
1191 if (carry) {
1192 z0++;
1193 carry = (z0 <= xy0);
1194 } else {
1195 carry = (z0 < xy0);
1196 }
1197
1198 /* for a magnitude subtract, ignore the last carry out */
1199 if (sxy ^ sz)
1200 carry = 0;
1201
1202 /* postnormalize and collect rounding information into z2 */
1203 if (ez < 1) {
1204 /* result is tiny; shift right until exponent is within range */
1205 e = 1 - ez;
1206
1207 if (e > 67) {
1208 z2 = 1; /* result can't be exactly zero */
1209 z0 = z1 = 0;
1210 } else if (e >= 64) {
1211 sticky = z4 | z3 | z2 | z1 | ((z0 << 1) << (95 - e));
1212 z2 = (z0 >> (e - 64)) | ((carry << 1) << (95 - e));
1213
1214 if (sticky)
1215 z2 |= 1;
1216
1217 z1 = carry >> (e - 64);
1218 z0 = 0;
1219 } else if (e >= 32) {
1220 sticky = z4 | z3 | z2 | ((z1 << 1) << (63 - e));
1221 z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
1222
1223 if (sticky)
1224 z2 |= 1;
1225
1226 z1 = (z0 >> (e - 32)) | ((carry << 1) << (63 - e));
1227 z0 = carry >> (e - 32);
1228 } else {
1229 sticky = z4 | z3 | (z2 << 1) << (31 - e);
1230 z2 = (z2 >> e) | ((z1 << 1) << (31 - e));
1231
1232 if (sticky)
1233 z2 |= 1;
1234
1235 z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
1236 z0 = (z0 >> e) | ((carry << 1) << (31 - e));
1237 }
1238
1239 ez = 1;
1240 } else if (carry) {
1241 /* carry out; shift right by one */
1242 sticky = (z2 & 1) | z3 | z4;
1243 z2 = (z2 >> 1) | (z1 << 31);
1244
1245 if (sticky)
1246 z2 |= 1;
1247
1248 z1 = (z1 >> 1) | (z0 << 31);
1249 z0 = (z0 >> 1) | 0x80000000;
1250 ez++;
1251 } else {
1252 if (z0 < 0x80000000u && (z0 | z1 | z2 | z3 | z4) != 0) {
1253 /*
1254 * borrow/cancellation; shift left as much as
1255 * exponent allows
1256 */
1257 while (!z0 && ez >= 33) {
1258 z0 = z1;
1259 z1 = z2;
1260 z2 = z3;
1261 z3 = z4;
1262 z4 = 0;
1263 ez -= 32;
1264 }
1265
1266 while (z0 < 0x80000000u && ez > 1) {
1267 z0 = (z0 << 1) | (z1 >> 31);
1268 z1 = (z1 << 1) | (z2 >> 31);
1269 z2 = (z2 << 1) | (z3 >> 31);
1270 z3 = (z3 << 1) | (z4 >> 31);
1271 z4 <<= 1;
1272 ez--;
1273 }
1274 }
1275
1276 if (z3 | z4)
1277 z2 |= 1;
1278 }
1279
1280 /* get the rounding mode */
1281 rm = oldcwsw & 0x0c000000;
1282
1283 /* adjust exponent if result is subnormal */
1284 tinyafter = 0;
1285
1286 if (!(z0 & 0x80000000)) {
1287 ez = 0;
1288 tinyafter = 1;
1289
1290 if (!(z0 | z1 | z2)) { /* exact zero */
1291 zz.i[2] = rm == FCW_RM ? 0x8000 : 0;
1292 zz.i[1] = zz.i[0] = 0;
1293 __fenv_setcwsw(&oldcwsw);
1294 return (zz.e);
1295 }
1296 }
1297
1298 /*
1299 * flip the sense of directed roundings if the result is negative;
1300 * the logic below applies to a positive result
1301 */
1302 if (sz && (rm == FCW_RM || rm == FCW_RP))
1303 rm = (FCW_RM + FCW_RP) - rm;
1304
1305 /* round */
1306 if (z2) {
1307 if (rm == FCW_RP || (rm == FCW_RN && (z2 > 0x80000000u || (z2 ==
1308 0x80000000u && (z1 & 1))))) {
1309 /* round up and renormalize if necessary */
1310 if (++z1 == 0) {
1311 if (++z0 == 0) {
1312 z0 = 0x80000000;
1313 ez++;
1314 } else if (z0 == 0x80000000) {
1315 /* rounded up to smallest normal */
1316 ez = 1;
1317
1318 if ((rm == FCW_RP && z2 >
1319 0x80000000u) || (rm == FCW_RN &&
1320 z2 >= 0xc0000000u))
1321 /*
1322 * would have rounded up to
1323 * smallest normal even with
1324 * unbounded range
1325 */
1326 tinyafter = 0;
1327 }
1328 }
1329 }
1330 }
1331
1332 /* restore the control and status words, check for over/underflow */
1333 __fenv_setcwsw(&oldcwsw);
1334
1335 if (ez >= 0x7fff) {
1336 if (rm == FCW_RN || rm == FCW_RP) {
1337 zz.i[2] = sz | 0x7fff;
1338 zz.i[1] = 0x80000000;
1339 zz.i[0] = 0;
1340 } else {
1341 zz.i[2] = sz | 0x7ffe;
1342 zz.i[1] = 0xffffffff;
1343 zz.i[0] = 0xffffffff;
1344 }
1345
1346 dummy = huge;
1347 dummy *= huge;
1348 } else {
1349 zz.i[2] = sz | ez;
1350 zz.i[1] = z0;
1351 zz.i[0] = z1;
1352
1353 /*
1354 * tinyafter => result rounded w/ unbounded range would be tiny,
1355 * z2 nonzero => result delivered is inexact
1356 */
1357 if (tinyafter) {
1358 dummy = tiny;
1359
1360 if (z2)
1361 dummy *= tiny;
1362 else
1363 dummy -= tiny2;
1364 } else if (z2) {
1365 dummy = huge;
1366 dummy += tiny;
1367 }
1368 }
1369
1370 return (zz.e);
1371 }
1372 #else
1373 #error Unknown architecture
1374 #endif
|