5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #pragma weak fma = __fma
31
32 #include "libm.h"
33 #include "fma.h"
34 #include "fenv_inlines.h"
35
36 #if defined(__sparc)
37
38 static const union {
39 unsigned i[2];
40 double d;
41 } C[] = {
42 { 0x3fe00000u, 0 },
43 { 0x40000000u, 0 },
44 { 0x43300000u, 0 },
45 { 0x41a00000u, 0 },
46 { 0x3e500000u, 0 },
47 { 0x3df00000u, 0 },
48 { 0x3bf00000u, 0 },
49 { 0x7fe00000u, 0 },
50 { 0x00100000u, 0 },
51 { 0x00100001u, 0 }
52 };
53
54 #define half C[0].d
55 #define two C[1].d
56 #define two52 C[2].d
57 #define two27 C[3].d
58 #define twom26 C[4].d
59 #define twom32 C[5].d
60 #define twom64 C[6].d
61 #define huge C[7].d
62 #define tiny C[8].d
63 #define tiny2 C[9].d
64
65 static const unsigned int fsr_rm = 0xc0000000u;
66
67 /*
68 * fma for SPARC: 64-bit double precision, big-endian
69 */
70 double
71 __fma(double x, double y, double z) {
72 union {
73 unsigned i[2];
74 double d;
75 } xx, yy, zz;
76 double xhi, yhi, xlo, ylo, t;
77 unsigned int xy0, xy1, xy2, xy3, z0, z1, z2, z3, fsr, rm, sticky;
78 int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit;
79 volatile double dummy;
80
81 /* extract the high order words of the arguments */
82 xx.d = x;
83 yy.d = y;
84 zz.d = z;
85 hx = xx.i[0] & ~0x80000000;
86 hy = yy.i[0] & ~0x80000000;
87 hz = zz.i[0] & ~0x80000000;
88
89 /* dispense with inf, nan, and zero cases */
90 if (hx >= 0x7ff00000 || hy >= 0x7ff00000 || (hx | xx.i[1]) == 0 ||
91 (hy | yy.i[1]) == 0) /* x or y is inf, nan, or zero */
92 return (x * y + z);
93
94 if (hz >= 0x7ff00000) /* z is inf or nan */
95 return (x + z); /* avoid spurious under/overflow in x * y */
96
97 if ((hz | zz.i[1]) == 0) /* z is zero */
98 /*
99 * x * y isn't zero but could underflow to zero,
100 * so don't add z, lest we perturb the sign
101 */
102 return (x * y);
103
104 /*
105 * now x, y, and z are all finite and nonzero; save the fsr and
106 * set round-to-negative-infinity mode (and clear nonstandard
107 * mode before we try to scale subnormal operands)
108 */
109 __fenv_getfsr32(&fsr);
110 __fenv_setfsr32(&fsr_rm);
111
112 /* extract signs and exponents, and normalize subnormals */
113 sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000;
114 sz = zz.i[0] & 0x80000000;
115 ex = hx >> 20;
116 if (!ex) {
117 xx.d = x * two52;
118 ex = ((xx.i[0] & ~0x80000000) >> 20) - 52;
119 }
120 ey = hy >> 20;
121 if (!ey) {
122 yy.d = y * two52;
123 ey = ((yy.i[0] & ~0x80000000) >> 20) - 52;
124 }
125 ez = hz >> 20;
126 if (!ez) {
127 zz.d = z * two52;
128 ez = ((zz.i[0] & ~0x80000000) >> 20) - 52;
129 }
130
131 /* multiply x*y to 106 bits */
132 exy = ex + ey - 0x3ff;
133 xx.i[0] = (xx.i[0] & 0xfffff) | 0x3ff00000;
134 yy.i[0] = (yy.i[0] & 0xfffff) | 0x3ff00000;
135 x = xx.d;
136 y = yy.d;
137 xhi = ((x + twom26) + two27) - two27;
138 yhi = ((y + twom26) + two27) - two27;
139 xlo = x - xhi;
140 ylo = y - yhi;
141 x *= y;
142 y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo;
143 if (x >= two) {
144 x *= half;
145 y *= half;
146 exy++;
147 }
148
149 /* extract the significands */
150 xx.d = x;
151 xy0 = (xx.i[0] & 0xfffff) | 0x100000;
152 xy1 = xx.i[1];
153 yy.d = t = y + twom32;
154 xy2 = yy.i[1];
155 yy.d = (y - (t - twom32)) + twom64;
156 xy3 = yy.i[1];
157 z0 = (zz.i[0] & 0xfffff) | 0x100000;
158 z1 = zz.i[1];
159 z2 = z3 = 0;
160
161 /*
162 * now x*y is represented by sxy, exy, and xy[0-3], and z is
163 * represented likewise; swap if need be so |xy| <= |z|
164 */
165 if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 &&
166 (xy1 > z1 || (xy1 == z1 && (xy2 | xy3) != 0)))))) {
167 e = sxy; sxy = sz; sz = e;
168 e = exy; exy = ez; ez = e;
169 e = xy0; xy0 = z0; z0 = e;
170 e = xy1; xy1 = z1; z1 = e;
171 z2 = xy2; xy2 = 0;
172 z3 = xy3; xy3 = 0;
173 }
174
175 /* shift the significand of xy keeping a sticky bit */
176 e = ez - exy;
177 if (e > 116) {
178 xy0 = xy1 = xy2 = 0;
179 xy3 = 1;
180 } else if (e >= 96) {
181 sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (127 - e));
182 xy3 = xy0 >> (e - 96);
183 if (sticky)
184 xy3 |= 1;
185 xy0 = xy1 = xy2 = 0;
186 } else if (e >= 64) {
187 sticky = xy3 | xy2 | ((xy1 << 1) << (95 - e));
188 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
189 if (sticky)
190 xy3 |= 1;
191 xy2 = xy0 >> (e - 64);
192 xy0 = xy1 = 0;
193 } else if (e >= 32) {
194 sticky = xy3 | ((xy2 << 1) << (63 - e));
195 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
196 if (sticky)
197 xy3 |= 1;
198 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
199 xy1 = xy0 >> (e - 32);
200 xy0 = 0;
201 } else if (e) {
202 sticky = (xy3 << 1) << (31 - e);
203 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
204 if (sticky)
205 xy3 |= 1;
206 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
207 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
208 xy0 >>= e;
209 }
210
211 /* if this is a magnitude subtract, negate the significand of xy */
212 if (sxy ^ sz) {
213 xy0 = ~xy0;
214 xy1 = ~xy1;
215 xy2 = ~xy2;
216 xy3 = -xy3;
217 if (xy3 == 0)
218 if (++xy2 == 0)
219 if (++xy1 == 0)
220 xy0++;
221 }
222
223 /* add, propagating carries */
224 z3 += xy3;
225 e = (z3 < xy3);
226 z2 += xy2;
227 if (e) {
228 z2++;
229 e = (z2 <= xy2);
230 } else
231 e = (z2 < xy2);
232 z1 += xy1;
233 if (e) {
234 z1++;
235 e = (z1 <= xy1);
236 } else
237 e = (z1 < xy1);
238 z0 += xy0;
239 if (e)
240 z0++;
241
242 /* postnormalize and collect rounding information into z2 */
243 if (ez < 1) {
244 /* result is tiny; shift right until exponent is within range */
245 e = 1 - ez;
246 if (e > 56) {
247 z2 = 1; /* result can't be exactly zero */
248 z0 = z1 = 0;
249 } else if (e >= 32) {
250 sticky = z3 | z2 | ((z1 << 1) << (63 - e));
251 z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
252 if (sticky)
253 z2 |= 1;
254 z1 = z0 >> (e - 32);
255 z0 = 0;
256 } else {
257 sticky = z3 | (z2 << 1) << (31 - e);
258 z2 = (z2 >> e) | ((z1 << 1) << (31 - e));
259 if (sticky)
260 z2 |= 1;
261 z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
262 z0 >>= e;
263 }
264 ez = 1;
265 } else if (z0 >= 0x200000) {
266 /* carry out; shift right by one */
267 sticky = (z2 & 1) | z3;
268 z2 = (z2 >> 1) | (z1 << 31);
269 if (sticky)
270 z2 |= 1;
271 z1 = (z1 >> 1) | (z0 << 31);
272 z0 >>= 1;
273 ez++;
274 } else {
275 if (z0 < 0x100000 && (z0 | z1 | z2 | z3) != 0) {
276 /*
277 * borrow/cancellation; shift left as much as
278 * exponent allows
279 */
280 while (!(z0 | (z1 & 0xffe00000)) && ez >= 33) {
281 z0 = z1;
282 z1 = z2;
283 z2 = z3;
284 z3 = 0;
285 ez -= 32;
286 }
287 while (z0 < 0x100000 && ez > 1) {
288 z0 = (z0 << 1) | (z1 >> 31);
289 z1 = (z1 << 1) | (z2 >> 31);
290 z2 = (z2 << 1) | (z3 >> 31);
291 z3 <<= 1;
292 ez--;
293 }
294 }
295 if (z3)
296 z2 |= 1;
297 }
298
299 /* get the rounding mode and clear current exceptions */
300 rm = fsr >> 30;
301 fsr &= ~FSR_CEXC;
302
303 /* strip off the integer bit, if there is one */
304 ibit = z0 & 0x100000;
305 if (ibit)
306 z0 -= 0x100000;
307 else {
308 ez = 0;
309 if (!(z0 | z1 | z2)) { /* exact zero */
310 zz.i[0] = rm == FSR_RM ? 0x80000000 : 0;
311 zz.i[1] = 0;
312 __fenv_setfsr32(&fsr);
313 return (zz.d);
314 }
315 }
316
317 /*
318 * flip the sense of directed roundings if the result is negative;
319 * the logic below applies to a positive result
320 */
321 if (sz)
322 rm ^= rm >> 1;
323
324 /* round and raise exceptions */
325 if (z2) {
326 fsr |= FSR_NXC;
327
328 /* decide whether to round the fraction up */
329 if (rm == FSR_RP || (rm == FSR_RN && (z2 > 0x80000000u ||
330 (z2 == 0x80000000u && (z1 & 1))))) {
331 /* round up and renormalize if necessary */
332 if (++z1 == 0) {
333 if (++z0 == 0x100000) {
334 z0 = 0;
335 ez++;
336 }
337 }
338 }
339 }
340
341 /* check for under/overflow */
342 if (ez >= 0x7ff) {
343 if (rm == FSR_RN || rm == FSR_RP) {
344 zz.i[0] = sz | 0x7ff00000;
345 zz.i[1] = 0;
346 } else {
347 zz.i[0] = sz | 0x7fefffff;
348 zz.i[1] = 0xffffffff;
349 }
350 fsr |= FSR_OFC | FSR_NXC;
351 } else {
352 zz.i[0] = sz | (ez << 20) | z0;
353 zz.i[1] = z1;
354
355 /*
356 * !ibit => exact result was tiny before rounding,
357 * z2 nonzero => result delivered is inexact
358 */
359 if (!ibit) {
360 if (z2)
361 fsr |= FSR_UFC | FSR_NXC;
362 else if (fsr & FSR_UFM)
363 fsr |= FSR_UFC;
364 }
365 }
366
367 /* restore the fsr and emulate exceptions as needed */
368 if ((fsr & FSR_CEXC) & (fsr >> 23)) {
369 __fenv_setfsr32(&fsr);
370 if (fsr & FSR_OFC) {
371 dummy = huge;
372 dummy *= huge;
373 } else if (fsr & FSR_UFC) {
374 dummy = tiny;
375 if (fsr & FSR_NXC)
376 dummy *= tiny;
377 else
378 dummy -= tiny2;
379 } else {
380 dummy = huge;
381 dummy += tiny;
382 }
383 } else {
384 fsr |= (fsr & 0x1f) << 5;
385 __fenv_setfsr32(&fsr);
386 }
387 return (zz.d);
388 }
389
390 #elif defined(__x86)
391
392 #if defined(__amd64)
393 #define NI 4
394 #else
395 #define NI 3
396 #endif
397
398 /*
399 * fma for x86: 64-bit double precision, little-endian
400 */
401 double
402 __fma(double x, double y, double z) {
403 union {
404 unsigned i[NI];
405 long double e;
406 } xx, yy, zz;
407 long double xe, ye, xhi, xlo, yhi, ylo;
408 int ex, ey, ez;
409 unsigned cwsw, oldcwsw, rm;
410
411 /* convert the operands to double extended */
412 xx.e = (long double) x;
413 yy.e = (long double) y;
414 zz.e = (long double) z;
415
416 /* extract the exponents of the arguments */
417 ex = xx.i[2] & 0x7fff;
418 ey = yy.i[2] & 0x7fff;
419 ez = zz.i[2] & 0x7fff;
420
421 /* dispense with inf, nan, and zero cases */
422 if (ex == 0x7fff || ey == 0x7fff || ex == 0 || ey == 0)
423 /* x or y is inf, nan, or zero */
424 return ((double) (xx.e * yy.e + zz.e));
425
426 if (ez >= 0x7fff) /* z is inf or nan */
427 return ((double) (xx.e + zz.e));
428 /* avoid spurious inexact in x * y */
429
430 /*
431 * save the control and status words, mask all exceptions, and
432 * set rounding to 64-bit precision and to-nearest
433 */
434 __fenv_getcwsw(&oldcwsw);
435 cwsw = (oldcwsw & 0xf0c0ffff) | 0x033f0000;
436 __fenv_setcwsw(&cwsw);
437
438 /* multiply x*y to 106 bits */
439 xe = xx.e;
440 xx.i[0] = 0;
441 xhi = xx.e; /* hi 32 bits */
442 xlo = xe - xhi; /* lo 21 bits */
443 ye = yy.e;
444 yy.i[0] = 0;
445 yhi = yy.e;
446 ylo = ye - yhi;
447 xe = xe * ye;
451 xhi = ye + zz.e;
452 yhi = xhi - ye;
453 xlo = (zz.e - yhi) + (ye - (xhi - yhi));
454 /* now (xhi,xlo) = ye + z */
455
456 yhi = xe + xhi;
457 ye = yhi - xe;
458 ylo = (xhi - ye) + (xe - (yhi - ye)); /* now (yhi,ylo) = xe + xhi */
459
460 xhi = xlo + ylo;
461 xe = xhi - xlo;
462 xlo = (ylo - xe) + (xlo - (xhi - xe)); /* now (xhi,xlo) = xlo + ylo */
463
464 yy.e = yhi + xhi;
465 ylo = (yhi - yy.e) + xhi; /* now (yy.e,ylo) = xhi + yhi */
466
467 if (yy.i[1] != 0) { /* yy.e is nonzero */
468 /* perturb yy.e if its least significant 10 bits are zero */
469 if (!(yy.i[0] & 0x3ff)) {
470 xx.e = ylo + xlo;
471 if (xx.i[1] != 0) {
472 xx.i[2] = (xx.i[2] & 0x8000) |
473 ((yy.i[2] & 0x7fff) - 63);
474 xx.i[1] = 0x80000000;
475 xx.i[0] = 0;
476 yy.e += xx.e;
477 }
478 }
479 } else {
480 /* set sign of zero result according to rounding direction */
481 rm = oldcwsw & 0x0c000000;
482 yy.i[2] = ((rm == FCW_RM)? 0x8000 : 0);
483 }
484
485 /*
486 * restore the control and status words and convert the result
487 * to double
488 */
489 __fenv_setcwsw(&oldcwsw);
490 return ((double) yy.e);
491 }
492
493 #else
494 #error Unknown architecture
495 #endif
|
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25
26 /*
27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
29 */
30
31 #pragma weak fma = __fma
32
33 #include "libm.h"
34 #include "fma.h"
35 #include "fenv_inlines.h"
36
37 #if defined(__sparc)
38 static const union {
39 unsigned i[2];
40 double d;
41 } C[] = {
42 { 0x3fe00000u, 0 },
43 { 0x40000000u, 0 },
44 { 0x43300000u, 0 },
45 { 0x41a00000u, 0 },
46 { 0x3e500000u, 0 },
47 { 0x3df00000u, 0 },
48 { 0x3bf00000u, 0 },
49 { 0x7fe00000u, 0 },
50 { 0x00100000u, 0 },
51 { 0x00100001u, 0 }
52 };
53
54 #define half C[0].d
55 #define two C[1].d
56 #define two52 C[2].d
57 #define two27 C[3].d
58 #define twom26 C[4].d
59 #define twom32 C[5].d
60 #define twom64 C[6].d
61 #define huge C[7].d
62 #define tiny C[8].d
63 #define tiny2 C[9].d
64
65 static const unsigned int fsr_rm = 0xc0000000u;
66
67 /*
68 * fma for SPARC: 64-bit double precision, big-endian
69 */
70 double
71 __fma(double x, double y, double z)
72 {
73 union {
74 unsigned i[2];
75 double d;
76 } xx, yy, zz;
77
78 double xhi, yhi, xlo, ylo, t;
79 unsigned int xy0, xy1, xy2, xy3, z0, z1, z2, z3, fsr, rm, sticky;
80 int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit;
81 volatile double dummy;
82
83 /* extract the high order words of the arguments */
84 xx.d = x;
85 yy.d = y;
86 zz.d = z;
87 hx = xx.i[0] & ~0x80000000;
88 hy = yy.i[0] & ~0x80000000;
89 hz = zz.i[0] & ~0x80000000;
90
91 /* dispense with inf, nan, and zero cases */
92 if (hx >= 0x7ff00000 || hy >= 0x7ff00000 || (hx | xx.i[1]) == 0 || (hy |
93 yy.i[1]) == 0) /* x or y is inf, nan, or zero */
94 return (x * y + z);
95
96 if (hz >= 0x7ff00000) /* z is inf or nan */
97 return (x + z); /* avoid spurious under/overflow in x * y */
98
99 if ((hz | zz.i[1]) == 0) /* z is zero */
100 /*
101 * x * y isn't zero but could underflow to zero,
102 * so don't add z, lest we perturb the sign
103 */
104 return (x * y);
105
106 /*
107 * now x, y, and z are all finite and nonzero; save the fsr and
108 * set round-to-negative-infinity mode (and clear nonstandard
109 * mode before we try to scale subnormal operands)
110 */
111 __fenv_getfsr32(&fsr);
112 __fenv_setfsr32(&fsr_rm);
113
114 /* extract signs and exponents, and normalize subnormals */
115 sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000;
116 sz = zz.i[0] & 0x80000000;
117 ex = hx >> 20;
118
119 if (!ex) {
120 xx.d = x * two52;
121 ex = ((xx.i[0] & ~0x80000000) >> 20) - 52;
122 }
123
124 ey = hy >> 20;
125
126 if (!ey) {
127 yy.d = y * two52;
128 ey = ((yy.i[0] & ~0x80000000) >> 20) - 52;
129 }
130
131 ez = hz >> 20;
132
133 if (!ez) {
134 zz.d = z * two52;
135 ez = ((zz.i[0] & ~0x80000000) >> 20) - 52;
136 }
137
138 /* multiply x*y to 106 bits */
139 exy = ex + ey - 0x3ff;
140 xx.i[0] = (xx.i[0] & 0xfffff) | 0x3ff00000;
141 yy.i[0] = (yy.i[0] & 0xfffff) | 0x3ff00000;
142 x = xx.d;
143 y = yy.d;
144 xhi = ((x + twom26) + two27) - two27;
145 yhi = ((y + twom26) + two27) - two27;
146 xlo = x - xhi;
147 ylo = y - yhi;
148 x *= y;
149 y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo;
150
151 if (x >= two) {
152 x *= half;
153 y *= half;
154 exy++;
155 }
156
157 /* extract the significands */
158 xx.d = x;
159 xy0 = (xx.i[0] & 0xfffff) | 0x100000;
160 xy1 = xx.i[1];
161 yy.d = t = y + twom32;
162 xy2 = yy.i[1];
163 yy.d = (y - (t - twom32)) + twom64;
164 xy3 = yy.i[1];
165 z0 = (zz.i[0] & 0xfffff) | 0x100000;
166 z1 = zz.i[1];
167 z2 = z3 = 0;
168
169 /*
170 * now x*y is represented by sxy, exy, and xy[0-3], and z is
171 * represented likewise; swap if need be so |xy| <= |z|
172 */
173 if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 ||
174 (xy1 == z1 && (xy2 | xy3) != 0)))))) {
175 e = sxy;
176 sxy = sz;
177 sz = e;
178 e = exy;
179 exy = ez;
180 ez = e;
181 e = xy0;
182 xy0 = z0;
183 z0 = e;
184 e = xy1;
185 xy1 = z1;
186 z1 = e;
187 z2 = xy2;
188 xy2 = 0;
189 z3 = xy3;
190 xy3 = 0;
191 }
192
193 /* shift the significand of xy keeping a sticky bit */
194 e = ez - exy;
195
196 if (e > 116) {
197 xy0 = xy1 = xy2 = 0;
198 xy3 = 1;
199 } else if (e >= 96) {
200 sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (127 - e));
201 xy3 = xy0 >> (e - 96);
202
203 if (sticky)
204 xy3 |= 1;
205
206 xy0 = xy1 = xy2 = 0;
207 } else if (e >= 64) {
208 sticky = xy3 | xy2 | ((xy1 << 1) << (95 - e));
209 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
210
211 if (sticky)
212 xy3 |= 1;
213
214 xy2 = xy0 >> (e - 64);
215 xy0 = xy1 = 0;
216 } else if (e >= 32) {
217 sticky = xy3 | ((xy2 << 1) << (63 - e));
218 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
219
220 if (sticky)
221 xy3 |= 1;
222
223 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
224 xy1 = xy0 >> (e - 32);
225 xy0 = 0;
226 } else if (e) {
227 sticky = (xy3 << 1) << (31 - e);
228 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
229
230 if (sticky)
231 xy3 |= 1;
232
233 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
234 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
235 xy0 >>= e;
236 }
237
238 /* if this is a magnitude subtract, negate the significand of xy */
239 if (sxy ^ sz) {
240 xy0 = ~xy0;
241 xy1 = ~xy1;
242 xy2 = ~xy2;
243 xy3 = -xy3;
244
245 if (xy3 == 0)
246 if (++xy2 == 0)
247 if (++xy1 == 0)
248 xy0++;
249 }
250
251 /* add, propagating carries */
252 z3 += xy3;
253 e = (z3 < xy3);
254 z2 += xy2;
255
256 if (e) {
257 z2++;
258 e = (z2 <= xy2);
259 } else {
260 e = (z2 < xy2);
261 }
262
263 z1 += xy1;
264
265 if (e) {
266 z1++;
267 e = (z1 <= xy1);
268 } else {
269 e = (z1 < xy1);
270 }
271
272 z0 += xy0;
273
274 if (e)
275 z0++;
276
277 /* postnormalize and collect rounding information into z2 */
278 if (ez < 1) {
279 /* result is tiny; shift right until exponent is within range */
280 e = 1 - ez;
281
282 if (e > 56) {
283 z2 = 1; /* result can't be exactly zero */
284 z0 = z1 = 0;
285 } else if (e >= 32) {
286 sticky = z3 | z2 | ((z1 << 1) << (63 - e));
287 z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
288
289 if (sticky)
290 z2 |= 1;
291
292 z1 = z0 >> (e - 32);
293 z0 = 0;
294 } else {
295 sticky = z3 | (z2 << 1) << (31 - e);
296 z2 = (z2 >> e) | ((z1 << 1) << (31 - e));
297
298 if (sticky)
299 z2 |= 1;
300
301 z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
302 z0 >>= e;
303 }
304
305 ez = 1;
306 } else if (z0 >= 0x200000) {
307 /* carry out; shift right by one */
308 sticky = (z2 & 1) | z3;
309 z2 = (z2 >> 1) | (z1 << 31);
310
311 if (sticky)
312 z2 |= 1;
313
314 z1 = (z1 >> 1) | (z0 << 31);
315 z0 >>= 1;
316 ez++;
317 } else {
318 if (z0 < 0x100000 && (z0 | z1 | z2 | z3) != 0) {
319 /*
320 * borrow/cancellation; shift left as much as
321 * exponent allows
322 */
323 while (!(z0 | (z1 & 0xffe00000)) && ez >= 33) {
324 z0 = z1;
325 z1 = z2;
326 z2 = z3;
327 z3 = 0;
328 ez -= 32;
329 }
330
331 while (z0 < 0x100000 && ez > 1) {
332 z0 = (z0 << 1) | (z1 >> 31);
333 z1 = (z1 << 1) | (z2 >> 31);
334 z2 = (z2 << 1) | (z3 >> 31);
335 z3 <<= 1;
336 ez--;
337 }
338 }
339
340 if (z3)
341 z2 |= 1;
342 }
343
344 /* get the rounding mode and clear current exceptions */
345 rm = fsr >> 30;
346 fsr &= ~FSR_CEXC;
347
348 /* strip off the integer bit, if there is one */
349 ibit = z0 & 0x100000;
350
351 if (ibit) {
352 z0 -= 0x100000;
353 } else {
354 ez = 0;
355
356 if (!(z0 | z1 | z2)) { /* exact zero */
357 zz.i[0] = rm == FSR_RM ? 0x80000000 : 0;
358 zz.i[1] = 0;
359 __fenv_setfsr32(&fsr);
360 return (zz.d);
361 }
362 }
363
364 /*
365 * flip the sense of directed roundings if the result is negative;
366 * the logic below applies to a positive result
367 */
368 if (sz)
369 rm ^= rm >> 1;
370
371 /* round and raise exceptions */
372 if (z2) {
373 fsr |= FSR_NXC;
374
375 /* decide whether to round the fraction up */
376 if (rm == FSR_RP || (rm == FSR_RN && (z2 > 0x80000000u || (z2 ==
377 0x80000000u && (z1 & 1))))) {
378 /* round up and renormalize if necessary */
379 if (++z1 == 0) {
380 if (++z0 == 0x100000) {
381 z0 = 0;
382 ez++;
383 }
384 }
385 }
386 }
387
388 /* check for under/overflow */
389 if (ez >= 0x7ff) {
390 if (rm == FSR_RN || rm == FSR_RP) {
391 zz.i[0] = sz | 0x7ff00000;
392 zz.i[1] = 0;
393 } else {
394 zz.i[0] = sz | 0x7fefffff;
395 zz.i[1] = 0xffffffff;
396 }
397
398 fsr |= FSR_OFC | FSR_NXC;
399 } else {
400 zz.i[0] = sz | (ez << 20) | z0;
401 zz.i[1] = z1;
402
403 /*
404 * !ibit => exact result was tiny before rounding,
405 * z2 nonzero => result delivered is inexact
406 */
407 if (!ibit) {
408 if (z2)
409 fsr |= FSR_UFC | FSR_NXC;
410 else if (fsr & FSR_UFM)
411 fsr |= FSR_UFC;
412 }
413 }
414
415 /* restore the fsr and emulate exceptions as needed */
416 if ((fsr & FSR_CEXC) & (fsr >> 23)) {
417 __fenv_setfsr32(&fsr);
418
419 if (fsr & FSR_OFC) {
420 dummy = huge;
421 dummy *= huge;
422 } else if (fsr & FSR_UFC) {
423 dummy = tiny;
424
425 if (fsr & FSR_NXC)
426 dummy *= tiny;
427 else
428 dummy -= tiny2;
429 } else {
430 dummy = huge;
431 dummy += tiny;
432 }
433 } else {
434 fsr |= (fsr & 0x1f) << 5;
435 __fenv_setfsr32(&fsr);
436 }
437
438 return (zz.d);
439 }
440 #elif defined(__x86)
441 #if defined(__amd64)
442 #define NI 4
443 #else
444 #define NI 3
445 #endif
446
447 /*
448 * fma for x86: 64-bit double precision, little-endian
449 */
450 double
451 __fma(double x, double y, double z)
452 {
453 union {
454 unsigned i[NI];
455 long double e;
456 } xx, yy, zz;
457
458 long double xe, ye, xhi, xlo, yhi, ylo;
459 int ex, ey, ez;
460 unsigned cwsw, oldcwsw, rm;
461
462 /* convert the operands to double extended */
463 xx.e = (long double)x;
464 yy.e = (long double)y;
465 zz.e = (long double)z;
466
467 /* extract the exponents of the arguments */
468 ex = xx.i[2] & 0x7fff;
469 ey = yy.i[2] & 0x7fff;
470 ez = zz.i[2] & 0x7fff;
471
472 /* dispense with inf, nan, and zero cases */
473 if (ex == 0x7fff || ey == 0x7fff || ex == 0 || ey == 0)
474 /* x or y is inf, nan, or zero */
475 return ((double)(xx.e * yy.e + zz.e));
476
477 if (ez >= 0x7fff) /* z is inf or nan */
478 return ((double)(xx.e + zz.e));
479
480 /* avoid spurious inexact in x * y */
481
482 /*
483 * save the control and status words, mask all exceptions, and
484 * set rounding to 64-bit precision and to-nearest
485 */
486 __fenv_getcwsw(&oldcwsw);
487 cwsw = (oldcwsw & 0xf0c0ffff) | 0x033f0000;
488 __fenv_setcwsw(&cwsw);
489
490 /* multiply x*y to 106 bits */
491 xe = xx.e;
492 xx.i[0] = 0;
493 xhi = xx.e; /* hi 32 bits */
494 xlo = xe - xhi; /* lo 21 bits */
495 ye = yy.e;
496 yy.i[0] = 0;
497 yhi = yy.e;
498 ylo = ye - yhi;
499 xe = xe * ye;
503 xhi = ye + zz.e;
504 yhi = xhi - ye;
505 xlo = (zz.e - yhi) + (ye - (xhi - yhi));
506 /* now (xhi,xlo) = ye + z */
507
508 yhi = xe + xhi;
509 ye = yhi - xe;
510 ylo = (xhi - ye) + (xe - (yhi - ye)); /* now (yhi,ylo) = xe + xhi */
511
512 xhi = xlo + ylo;
513 xe = xhi - xlo;
514 xlo = (ylo - xe) + (xlo - (xhi - xe)); /* now (xhi,xlo) = xlo + ylo */
515
516 yy.e = yhi + xhi;
517 ylo = (yhi - yy.e) + xhi; /* now (yy.e,ylo) = xhi + yhi */
518
519 if (yy.i[1] != 0) { /* yy.e is nonzero */
520 /* perturb yy.e if its least significant 10 bits are zero */
521 if (!(yy.i[0] & 0x3ff)) {
522 xx.e = ylo + xlo;
523
524 if (xx.i[1] != 0) {
525 xx.i[2] = (xx.i[2] & 0x8000) | ((yy.i[2] &
526 0x7fff) - 63);
527 xx.i[1] = 0x80000000;
528 xx.i[0] = 0;
529 yy.e += xx.e;
530 }
531 }
532 } else {
533 /* set sign of zero result according to rounding direction */
534 rm = oldcwsw & 0x0c000000;
535 yy.i[2] = ((rm == FCW_RM) ? 0x8000 : 0);
536 }
537
538 /*
539 * restore the control and status words and convert the result
540 * to double
541 */
542 __fenv_setcwsw(&oldcwsw);
543 return ((double)yy.e);
544 }
545 #else
546 #error Unknown architecture
547 #endif
|