Print this page
11210 libm should be cstyle(1ONBLD) clean
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libm/common/m9x/fmal.c
+++ new/usr/src/lib/libm/common/m9x/fmal.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 24 */
25 +
25 26 /*
26 27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 28 * Use is subject to license terms.
28 29 */
29 30
30 31 #pragma weak fmal = __fmal
31 32
32 33 #include "libm.h"
33 34 #include "fma.h"
34 35 #include "fenv_inlines.h"
35 36
36 37 #if defined(__sparc)
37 -
38 38 static const union {
39 39 unsigned i[2];
40 40 double d;
41 41 } C[] = {
42 42 { 0x3fe00000u, 0 },
43 43 { 0x40000000u, 0 },
44 44 { 0x3ef00000u, 0 },
45 45 { 0x3e700000u, 0 },
46 46 { 0x41300000u, 0 },
47 47 { 0x3e300000u, 0 },
48 48 { 0x3b300000u, 0 },
49 49 { 0x38300000u, 0 },
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
50 50 { 0x42300000u, 0 },
51 51 { 0x3df00000u, 0 },
52 52 { 0x7fe00000u, 0 },
53 53 { 0x00100000u, 0 },
54 54 { 0x00100001u, 0 },
55 55 { 0, 0 },
56 56 { 0x7ff00000u, 0 },
57 57 { 0x7ff00001u, 0 }
58 58 };
59 59
60 -#define half C[0].d
61 -#define two C[1].d
62 -#define twom16 C[2].d
63 -#define twom24 C[3].d
64 -#define two20 C[4].d
65 -#define twom28 C[5].d
66 -#define twom76 C[6].d
67 -#define twom124 C[7].d
68 -#define two36 C[8].d
69 -#define twom32 C[9].d
70 -#define huge C[10].d
71 -#define tiny C[11].d
72 -#define tiny2 C[12].d
73 -#define zero C[13].d
74 -#define inf C[14].d
75 -#define snan C[15].d
60 +#define half C[0].d
61 +#define two C[1].d
62 +#define twom16 C[2].d
63 +#define twom24 C[3].d
64 +#define two20 C[4].d
65 +#define twom28 C[5].d
66 +#define twom76 C[6].d
67 +#define twom124 C[7].d
68 +#define two36 C[8].d
69 +#define twom32 C[9].d
70 +#define huge C[10].d
71 +#define tiny C[11].d
72 +#define tiny2 C[12].d
73 +#define zero C[13].d
74 +#define inf C[14].d
75 +#define snan C[15].d
76 76
77 77 static const unsigned int fsr_rm = 0xc0000000u;
78 78
79 79 /*
80 80 * fmal for SPARC: 128-bit quad precision, big-endian
81 81 */
82 82 long double
83 -__fmal(long double x, long double y, long double z) {
83 +__fmal(long double x, long double y, long double z)
84 +{
84 85 union {
85 86 unsigned int i[4];
86 87 long double q;
87 88 } xx, yy, zz;
88 89 union {
89 90 unsigned int i[2];
90 91 double d;
91 92 } u;
93 +
92 94 double dx[5], dy[5], dxy[9], c, s;
93 95 unsigned int xy0, xy1, xy2, xy3, xy4, xy5, xy6, xy7;
94 96 unsigned int z0, z1, z2, z3, z4, z5, z6, z7;
95 97 unsigned int rm, sticky;
96 98 unsigned int fsr;
97 99 int hx, hy, hz, ex, ey, ez, exy, sxy, sz, e, ibit;
98 100 int cx, cy, cz;
99 - volatile double dummy;
101 + volatile double dummy;
100 102
101 103 /* extract the high order words of the arguments */
102 104 xx.q = x;
103 105 yy.q = y;
104 106 zz.q = z;
105 107 hx = xx.i[0] & ~0x80000000;
106 108 hy = yy.i[0] & ~0x80000000;
107 109 hz = zz.i[0] & ~0x80000000;
108 110
109 111 /*
110 112 * distinguish zero, finite nonzero, infinite, and quiet nan
111 113 * arguments; raise invalid and return for signaling nans
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
112 114 */
113 115 if (hx >= 0x7fff0000) {
114 116 if ((hx & 0xffff) | xx.i[1] | xx.i[2] | xx.i[3]) {
115 117 if (!(hx & 0x8000)) {
116 118 /* signaling nan, raise invalid */
117 119 dummy = snan;
118 120 dummy += snan;
119 121 xx.i[0] |= 0x8000;
120 122 return (xx.q);
121 123 }
122 - cx = 3; /* quiet nan */
123 - } else
124 - cx = 2; /* inf */
124 +
125 + cx = 3; /* quiet nan */
126 + } else {
127 + cx = 2; /* inf */
128 + }
125 129 } else if (hx == 0) {
126 130 cx = (xx.i[1] | xx.i[2] | xx.i[3]) ? 1 : 0;
127 - /* subnormal or zero */
128 - } else
129 - cx = 1; /* finite nonzero */
131 + /* subnormal or zero */
132 + } else {
133 + cx = 1; /* finite nonzero */
134 + }
130 135
131 136 if (hy >= 0x7fff0000) {
132 137 if ((hy & 0xffff) | yy.i[1] | yy.i[2] | yy.i[3]) {
133 138 if (!(hy & 0x8000)) {
134 139 dummy = snan;
135 140 dummy += snan;
136 141 yy.i[0] |= 0x8000;
137 142 return (yy.q);
138 143 }
144 +
139 145 cy = 3;
140 - } else
146 + } else {
141 147 cy = 2;
148 + }
142 149 } else if (hy == 0) {
143 150 cy = (yy.i[1] | yy.i[2] | yy.i[3]) ? 1 : 0;
144 - } else
151 + } else {
145 152 cy = 1;
153 + }
146 154
147 155 if (hz >= 0x7fff0000) {
148 156 if ((hz & 0xffff) | zz.i[1] | zz.i[2] | zz.i[3]) {
149 157 if (!(hz & 0x8000)) {
150 158 dummy = snan;
151 159 dummy += snan;
152 160 zz.i[0] |= 0x8000;
153 161 return (zz.q);
154 162 }
163 +
155 164 cz = 3;
156 - } else
165 + } else {
157 166 cz = 2;
167 + }
158 168 } else if (hz == 0) {
159 169 cz = (zz.i[1] | zz.i[2] | zz.i[3]) ? 1 : 0;
160 - } else
170 + } else {
161 171 cz = 1;
172 + }
162 173
163 174 /* get the fsr and clear current exceptions */
164 175 __fenv_getfsr32(&fsr);
165 176 fsr &= ~FSR_CEXC;
166 177
167 178 /* handle all other zero, inf, and nan cases */
168 179 if (cx != 1 || cy != 1 || cz != 1) {
169 180 /* if x or y is a quiet nan, return it */
170 181 if (cx == 3) {
171 182 __fenv_setfsr32(&fsr);
172 183 return (x);
173 184 }
185 +
174 186 if (cy == 3) {
175 187 __fenv_setfsr32(&fsr);
176 188 return (y);
177 189 }
178 190
179 191 /* if x*y is 0*inf, raise invalid and return the default nan */
180 192 if ((cx == 0 && cy == 2) || (cx == 2 && cy == 0)) {
181 193 dummy = zero;
182 194 dummy *= inf;
183 195 zz.i[0] = 0x7fffffff;
184 196 zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff;
185 197 return (zz.q);
186 198 }
187 199
188 200 /* if z is a quiet nan, return it */
189 201 if (cz == 3) {
190 202 __fenv_setfsr32(&fsr);
191 203 return (z);
192 204 }
193 205
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
194 206 /*
195 207 * now none of x, y, or z is nan; handle cases where x or y
196 208 * is inf
197 209 */
198 210 if (cx == 2 || cy == 2) {
199 211 /*
200 212 * if z is also inf, either we have inf-inf or
201 213 * the result is the same as z depending on signs
202 214 */
203 215 if (cz == 2) {
204 - if ((int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < 0) {
216 + if ((int)((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) < 0) {
205 217 dummy = inf;
206 218 dummy -= inf;
207 219 zz.i[0] = 0x7fffffff;
208 220 zz.i[1] = zz.i[2] = zz.i[3] =
209 - 0xffffffff;
221 + 0xffffffff;
210 222 return (zz.q);
211 223 }
224 +
212 225 __fenv_setfsr32(&fsr);
213 226 return (z);
214 227 }
215 228
216 229 /* otherwise the result is inf with appropriate sign */
217 230 zz.i[0] = ((xx.i[0] ^ yy.i[0]) & 0x80000000) |
218 - 0x7fff0000;
231 + 0x7fff0000;
219 232 zz.i[1] = zz.i[2] = zz.i[3] = 0;
220 233 __fenv_setfsr32(&fsr);
221 234 return (zz.q);
222 235 }
223 236
224 237 /* if z is inf, return it */
225 238 if (cz == 2) {
226 239 __fenv_setfsr32(&fsr);
227 240 return (z);
228 241 }
229 242
230 243 /*
231 244 * now x, y, and z are all finite; handle cases where x or y
232 245 * is zero
233 246 */
234 247 if (cx == 0 || cy == 0) {
235 248 /* either we have 0-0 or the result is the same as z */
236 - if (cz == 0 && (int) ((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) <
237 - 0) {
249 + if (cz == 0 && (int)((xx.i[0] ^ yy.i[0]) ^ zz.i[0]) <
250 + 0) {
238 251 zz.i[0] = (fsr >> 30) == FSR_RM ? 0x80000000 :
239 - 0;
252 + 0;
240 253 __fenv_setfsr32(&fsr);
241 254 return (zz.q);
242 255 }
256 +
243 257 __fenv_setfsr32(&fsr);
244 258 return (z);
245 259 }
246 260
247 261 /* if we get here, x and y are nonzero finite, z must be zero */
248 262 return (x * y);
249 263 }
250 264
251 265 /*
252 266 * now x, y, and z are all finite and nonzero; set round-to-
253 267 * negative-infinity mode
↓ open down ↓ |
1 lines elided |
↑ open up ↑ |
254 268 */
255 269 __fenv_setfsr32(&fsr_rm);
256 270
257 271 /*
258 272 * get the signs and exponents and normalize the significands
259 273 * of x and y
260 274 */
261 275 sxy = (xx.i[0] ^ yy.i[0]) & 0x80000000;
262 276 ex = hx >> 16;
263 277 hx &= 0xffff;
278 +
264 279 if (!ex) {
265 280 if (hx | (xx.i[1] & 0xfffe0000)) {
266 281 ex = 1;
267 282 } else if (xx.i[1] | (xx.i[2] & 0xfffe0000)) {
268 283 hx = xx.i[1];
269 284 xx.i[1] = xx.i[2];
270 285 xx.i[2] = xx.i[3];
271 286 xx.i[3] = 0;
272 287 ex = -31;
273 288 } else if (xx.i[2] | (xx.i[3] & 0xfffe0000)) {
274 289 hx = xx.i[2];
275 290 xx.i[1] = xx.i[3];
276 291 xx.i[2] = xx.i[3] = 0;
277 292 ex = -63;
278 293 } else {
279 294 hx = xx.i[3];
280 295 xx.i[1] = xx.i[2] = xx.i[3] = 0;
281 296 ex = -95;
282 297 }
298 +
283 299 while ((hx & 0x10000) == 0) {
284 300 hx = (hx << 1) | (xx.i[1] >> 31);
285 301 xx.i[1] = (xx.i[1] << 1) | (xx.i[2] >> 31);
286 302 xx.i[2] = (xx.i[2] << 1) | (xx.i[3] >> 31);
287 303 xx.i[3] <<= 1;
288 304 ex--;
289 305 }
290 - } else
306 + } else {
291 307 hx |= 0x10000;
308 + }
309 +
292 310 ey = hy >> 16;
293 311 hy &= 0xffff;
312 +
294 313 if (!ey) {
295 314 if (hy | (yy.i[1] & 0xfffe0000)) {
296 315 ey = 1;
297 316 } else if (yy.i[1] | (yy.i[2] & 0xfffe0000)) {
298 317 hy = yy.i[1];
299 318 yy.i[1] = yy.i[2];
300 319 yy.i[2] = yy.i[3];
301 320 yy.i[3] = 0;
302 321 ey = -31;
303 322 } else if (yy.i[2] | (yy.i[3] & 0xfffe0000)) {
304 323 hy = yy.i[2];
305 324 yy.i[1] = yy.i[3];
306 325 yy.i[2] = yy.i[3] = 0;
307 326 ey = -63;
308 327 } else {
309 328 hy = yy.i[3];
310 329 yy.i[1] = yy.i[2] = yy.i[3] = 0;
311 330 ey = -95;
312 331 }
332 +
313 333 while ((hy & 0x10000) == 0) {
314 334 hy = (hy << 1) | (yy.i[1] >> 31);
315 335 yy.i[1] = (yy.i[1] << 1) | (yy.i[2] >> 31);
316 336 yy.i[2] = (yy.i[2] << 1) | (yy.i[3] >> 31);
317 337 yy.i[3] <<= 1;
318 338 ey--;
319 339 }
320 - } else
340 + } else {
321 341 hy |= 0x10000;
342 + }
343 +
322 344 exy = ex + ey - 0x3fff;
323 345
324 346 /* convert the significands of x and y to doubles */
325 347 c = twom16;
326 - dx[0] = (double) ((int) hx) * c;
327 - dy[0] = (double) ((int) hy) * c;
348 + dx[0] = (double)((int)hx) * c;
349 + dy[0] = (double)((int)hy) * c;
328 350
329 351 c *= twom24;
330 - dx[1] = (double) ((int) (xx.i[1] >> 8)) * c;
331 - dy[1] = (double) ((int) (yy.i[1] >> 8)) * c;
352 + dx[1] = (double)((int)(xx.i[1] >> 8)) * c;
353 + dy[1] = (double)((int)(yy.i[1] >> 8)) * c;
332 354
333 355 c *= twom24;
334 - dx[2] = (double) ((int) (((xx.i[1] << 16) | (xx.i[2] >> 16)) &
356 + dx[2] = (double)((int)(((xx.i[1] << 16) | (xx.i[2] >> 16)) &
335 357 0xffffff)) * c;
336 - dy[2] = (double) ((int) (((yy.i[1] << 16) | (yy.i[2] >> 16)) &
358 + dy[2] = (double)((int)(((yy.i[1] << 16) | (yy.i[2] >> 16)) &
337 359 0xffffff)) * c;
338 360
339 361 c *= twom24;
340 - dx[3] = (double) ((int) (((xx.i[2] << 8) | (xx.i[3] >> 24)) &
341 - 0xffffff)) * c;
342 - dy[3] = (double) ((int) (((yy.i[2] << 8) | (yy.i[3] >> 24)) &
343 - 0xffffff)) * c;
362 + dx[3] = (double)((int)(((xx.i[2] << 8) | (xx.i[3] >> 24)) & 0xffffff)) *
363 + c;
364 + dy[3] = (double)((int)(((yy.i[2] << 8) | (yy.i[3] >> 24)) & 0xffffff)) *
365 + c;
344 366
345 367 c *= twom24;
346 - dx[4] = (double) ((int) (xx.i[3] & 0xffffff)) * c;
347 - dy[4] = (double) ((int) (yy.i[3] & 0xffffff)) * c;
368 + dx[4] = (double)((int)(xx.i[3] & 0xffffff)) * c;
369 + dy[4] = (double)((int)(yy.i[3] & 0xffffff)) * c;
348 370
349 371 /* form the "digits" of the product */
350 372 dxy[0] = dx[0] * dy[0];
351 373 dxy[1] = dx[0] * dy[1] + dx[1] * dy[0];
352 374 dxy[2] = dx[0] * dy[2] + dx[1] * dy[1] + dx[2] * dy[0];
353 - dxy[3] = dx[0] * dy[3] + dx[1] * dy[2] + dx[2] * dy[1] +
354 - dx[3] * dy[0];
355 - dxy[4] = dx[0] * dy[4] + dx[1] * dy[3] + dx[2] * dy[2] +
356 - dx[3] * dy[1] + dx[4] * dy[0];
357 - dxy[5] = dx[1] * dy[4] + dx[2] * dy[3] + dx[3] * dy[2] +
358 - dx[4] * dy[1];
375 + dxy[3] = dx[0] * dy[3] + dx[1] * dy[2] + dx[2] * dy[1] + dx[3] * dy[0];
376 + dxy[4] = dx[0] * dy[4] + dx[1] * dy[3] + dx[2] * dy[2] + dx[3] * dy[1] +
377 + dx[4] * dy[0];
378 + dxy[5] = dx[1] * dy[4] + dx[2] * dy[3] + dx[3] * dy[2] + dx[4] * dy[1];
359 379 dxy[6] = dx[2] * dy[4] + dx[3] * dy[3] + dx[4] * dy[2];
360 380 dxy[7] = dx[3] * dy[4] + dx[4] * dy[3];
361 381 dxy[8] = dx[4] * dy[4];
362 382
363 383 /* split odd-numbered terms and combine into even-numbered terms */
364 384 c = (dxy[1] + two20) - two20;
365 385 dxy[0] += c;
366 386 dxy[1] -= c;
367 387 c = (dxy[3] + twom28) - twom28;
368 388 dxy[2] += c + dxy[1];
369 389 dxy[3] -= c;
370 390 c = (dxy[5] + twom76) - twom76;
371 391 dxy[4] += c + dxy[3];
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
372 392 dxy[5] -= c;
373 393 c = (dxy[7] + twom124) - twom124;
374 394 dxy[6] += c + dxy[5];
375 395 dxy[8] += (dxy[7] - c);
376 396
377 397 /* propagate carries, adjusting the exponent if need be */
378 398 dxy[7] = dxy[6] + dxy[8];
379 399 dxy[5] = dxy[4] + dxy[7];
380 400 dxy[3] = dxy[2] + dxy[5];
381 401 dxy[1] = dxy[0] + dxy[3];
402 +
382 403 if (dxy[1] >= two) {
383 404 dxy[0] *= half;
384 405 dxy[1] *= half;
385 406 dxy[2] *= half;
386 407 dxy[3] *= half;
387 408 dxy[4] *= half;
388 409 dxy[5] *= half;
389 410 dxy[6] *= half;
390 411 dxy[7] *= half;
391 412 dxy[8] *= half;
392 413 exy++;
393 414 }
394 415
395 416 /* extract the significand of x*y */
396 417 s = two36;
397 418 u.d = c = dxy[1] + s;
398 419 xy0 = u.i[1];
399 420 c -= s;
400 421 dxy[1] -= c;
401 422 dxy[0] -= c;
402 423
403 424 s *= twom32;
404 425 u.d = c = dxy[1] + s;
405 426 xy1 = u.i[1];
406 427 c -= s;
407 428 dxy[2] += (dxy[0] - c);
408 429 dxy[3] = dxy[2] + dxy[5];
409 430
410 431 s *= twom32;
411 432 u.d = c = dxy[3] + s;
412 433 xy2 = u.i[1];
413 434 c -= s;
414 435 dxy[4] += (dxy[2] - c);
415 436 dxy[5] = dxy[4] + dxy[7];
416 437
417 438 s *= twom32;
418 439 u.d = c = dxy[5] + s;
419 440 xy3 = u.i[1];
420 441 c -= s;
421 442 dxy[4] -= c;
422 443 dxy[5] = dxy[4] + dxy[7];
423 444
424 445 s *= twom32;
425 446 u.d = c = dxy[5] + s;
426 447 xy4 = u.i[1];
427 448 c -= s;
428 449 dxy[6] += (dxy[4] - c);
429 450 dxy[7] = dxy[6] + dxy[8];
430 451
431 452 s *= twom32;
432 453 u.d = c = dxy[7] + s;
433 454 xy5 = u.i[1];
434 455 c -= s;
435 456 dxy[8] += (dxy[6] - c);
436 457
437 458 s *= twom32;
438 459 u.d = c = dxy[8] + s;
439 460 xy6 = u.i[1];
440 461 c -= s;
↓ open down ↓ |
49 lines elided |
↑ open up ↑ |
441 462 dxy[8] -= c;
442 463
443 464 s *= twom32;
444 465 u.d = c = dxy[8] + s;
445 466 xy7 = u.i[1];
446 467
447 468 /* extract the sign, exponent, and significand of z */
448 469 sz = zz.i[0] & 0x80000000;
449 470 ez = hz >> 16;
450 471 z0 = hz & 0xffff;
472 +
451 473 if (!ez) {
452 474 if (z0 | (zz.i[1] & 0xfffe0000)) {
453 475 z1 = zz.i[1];
454 476 z2 = zz.i[2];
455 477 z3 = zz.i[3];
456 478 ez = 1;
457 479 } else if (zz.i[1] | (zz.i[2] & 0xfffe0000)) {
458 480 z0 = zz.i[1];
459 481 z1 = zz.i[2];
460 482 z2 = zz.i[3];
461 483 z3 = 0;
462 484 ez = -31;
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
463 485 } else if (zz.i[2] | (zz.i[3] & 0xfffe0000)) {
464 486 z0 = zz.i[2];
465 487 z1 = zz.i[3];
466 488 z2 = z3 = 0;
467 489 ez = -63;
468 490 } else {
469 491 z0 = zz.i[3];
470 492 z1 = z2 = z3 = 0;
471 493 ez = -95;
472 494 }
495 +
473 496 while ((z0 & 0x10000) == 0) {
474 497 z0 = (z0 << 1) | (z1 >> 31);
475 498 z1 = (z1 << 1) | (z2 >> 31);
476 499 z2 = (z2 << 1) | (z3 >> 31);
477 500 z3 <<= 1;
478 501 ez--;
479 502 }
480 503 } else {
481 504 z0 |= 0x10000;
482 505 z1 = zz.i[1];
483 506 z2 = zz.i[2];
484 507 z3 = zz.i[3];
485 508 }
509 +
486 510 z4 = z5 = z6 = z7 = 0;
487 511
488 512 /*
489 513 * now x*y is represented by sxy, exy, and xy[0-7], and z is
490 514 * represented likewise; swap if need be so |xy| <= |z|
491 515 */
492 516 if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 ||
493 - (xy1 == z1 && (xy2 > z2 || (xy2 == z2 && (xy3 > z3 ||
494 - (xy3 == z3 && (xy4 | xy5 | xy6 | xy7) != 0)))))))))) {
495 - e = sxy; sxy = sz; sz = e;
496 - e = exy; exy = ez; ez = e;
497 - e = xy0; xy0 = z0; z0 = e;
498 - e = xy1; xy1 = z1; z1 = e;
499 - e = xy2; xy2 = z2; z2 = e;
500 - e = xy3; xy3 = z3; z3 = e;
501 - z4 = xy4; xy4 = 0;
502 - z5 = xy5; xy5 = 0;
503 - z6 = xy6; xy6 = 0;
504 - z7 = xy7; xy7 = 0;
517 + (xy1 == z1 && (xy2 > z2 || (xy2 == z2 && (xy3 > z3 || (xy3 == z3 &&
518 + (xy4 | xy5 | xy6 | xy7) != 0)))))))))) {
519 + e = sxy;
520 + sxy = sz;
521 + sz = e;
522 + e = exy;
523 + exy = ez;
524 + ez = e;
525 + e = xy0;
526 + xy0 = z0;
527 + z0 = e;
528 + e = xy1;
529 + xy1 = z1;
530 + z1 = e;
531 + e = xy2;
532 + xy2 = z2;
533 + z2 = e;
534 + e = xy3;
535 + xy3 = z3;
536 + z3 = e;
537 + z4 = xy4;
538 + xy4 = 0;
539 + z5 = xy5;
540 + xy5 = 0;
541 + z6 = xy6;
542 + xy6 = 0;
543 + z7 = xy7;
544 + xy7 = 0;
505 545 }
506 546
507 547 /* shift the significand of xy keeping a sticky bit */
508 548 e = ez - exy;
549 +
509 550 if (e > 236) {
510 551 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0;
511 552 xy7 = 1;
512 553 } else if (e >= 224) {
513 - sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | xy1 |
514 - ((xy0 << 1) << (255 - e));
554 + sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | xy1 | ((xy0 <<
555 + 1) << (255 - e));
515 556 xy7 = xy0 >> (e - 224);
557 +
516 558 if (sticky)
517 559 xy7 |= 1;
560 +
518 561 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = xy6 = 0;
519 562 } else if (e >= 192) {
520 - sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 |
521 - ((xy1 << 1) << (223 - e));
563 + sticky = xy7 | xy6 | xy5 | xy4 | xy3 | xy2 | ((xy1 << 1) <<
564 + (223 - e));
522 565 xy7 = (xy1 >> (e - 192)) | ((xy0 << 1) << (223 - e));
566 +
523 567 if (sticky)
524 568 xy7 |= 1;
569 +
525 570 xy6 = xy0 >> (e - 192);
526 571 xy0 = xy1 = xy2 = xy3 = xy4 = xy5 = 0;
527 572 } else if (e >= 160) {
528 - sticky = xy7 | xy6 | xy5 | xy4 | xy3 |
529 - ((xy2 << 1) << (191 - e));
573 + sticky = xy7 | xy6 | xy5 | xy4 | xy3 | ((xy2 << 1) << (191 -
574 + e));
530 575 xy7 = (xy2 >> (e - 160)) | ((xy1 << 1) << (191 - e));
576 +
531 577 if (sticky)
532 578 xy7 |= 1;
579 +
533 580 xy6 = (xy1 >> (e - 160)) | ((xy0 << 1) << (191 - e));
534 581 xy5 = xy0 >> (e - 160);
535 582 xy0 = xy1 = xy2 = xy3 = xy4 = 0;
536 583 } else if (e >= 128) {
537 584 sticky = xy7 | xy6 | xy5 | xy4 | ((xy3 << 1) << (159 - e));
538 585 xy7 = (xy3 >> (e - 128)) | ((xy2 << 1) << (159 - e));
586 +
539 587 if (sticky)
540 588 xy7 |= 1;
589 +
541 590 xy6 = (xy2 >> (e - 128)) | ((xy1 << 1) << (159 - e));
542 591 xy5 = (xy1 >> (e - 128)) | ((xy0 << 1) << (159 - e));
543 592 xy4 = xy0 >> (e - 128);
544 593 xy0 = xy1 = xy2 = xy3 = 0;
545 594 } else if (e >= 96) {
546 595 sticky = xy7 | xy6 | xy5 | ((xy4 << 1) << (127 - e));
547 596 xy7 = (xy4 >> (e - 96)) | ((xy3 << 1) << (127 - e));
597 +
548 598 if (sticky)
549 599 xy7 |= 1;
600 +
550 601 xy6 = (xy3 >> (e - 96)) | ((xy2 << 1) << (127 - e));
551 602 xy5 = (xy2 >> (e - 96)) | ((xy1 << 1) << (127 - e));
552 603 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e));
553 604 xy3 = xy0 >> (e - 96);
554 605 xy0 = xy1 = xy2 = 0;
555 606 } else if (e >= 64) {
556 607 sticky = xy7 | xy6 | ((xy5 << 1) << (95 - e));
557 608 xy7 = (xy5 >> (e - 64)) | ((xy4 << 1) << (95 - e));
609 +
558 610 if (sticky)
559 611 xy7 |= 1;
612 +
560 613 xy6 = (xy4 >> (e - 64)) | ((xy3 << 1) << (95 - e));
561 614 xy5 = (xy3 >> (e - 64)) | ((xy2 << 1) << (95 - e));
562 615 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e));
563 616 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
564 617 xy2 = xy0 >> (e - 64);
565 618 xy0 = xy1 = 0;
566 619 } else if (e >= 32) {
567 620 sticky = xy7 | ((xy6 << 1) << (63 - e));
568 621 xy7 = (xy6 >> (e - 32)) | ((xy5 << 1) << (63 - e));
622 +
569 623 if (sticky)
570 624 xy7 |= 1;
625 +
571 626 xy6 = (xy5 >> (e - 32)) | ((xy4 << 1) << (63 - e));
572 627 xy5 = (xy4 >> (e - 32)) | ((xy3 << 1) << (63 - e));
573 628 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e));
574 629 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
575 630 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
576 631 xy1 = xy0 >> (e - 32);
577 632 xy0 = 0;
578 633 } else if (e) {
579 634 sticky = (xy7 << 1) << (31 - e);
580 635 xy7 = (xy7 >> e) | ((xy6 << 1) << (31 - e));
636 +
581 637 if (sticky)
582 638 xy7 |= 1;
639 +
583 640 xy6 = (xy6 >> e) | ((xy5 << 1) << (31 - e));
584 641 xy5 = (xy5 >> e) | ((xy4 << 1) << (31 - e));
585 642 xy4 = (xy4 >> e) | ((xy3 << 1) << (31 - e));
586 643 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
587 644 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
588 645 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
589 646 xy0 >>= e;
590 647 }
591 648
592 649 /* if this is a magnitude subtract, negate the significand of xy */
593 650 if (sxy ^ sz) {
594 651 xy0 = ~xy0;
595 652 xy1 = ~xy1;
596 653 xy2 = ~xy2;
597 654 xy3 = ~xy3;
598 655 xy4 = ~xy4;
599 656 xy5 = ~xy5;
600 657 xy6 = ~xy6;
601 658 xy7 = -xy7;
659 +
602 660 if (xy7 == 0)
603 661 if (++xy6 == 0)
604 662 if (++xy5 == 0)
605 663 if (++xy4 == 0)
606 664 if (++xy3 == 0)
607 665 if (++xy2 == 0)
608 666 if (++xy1 == 0)
609 667 xy0++;
610 668 }
611 669
612 670 /* add, propagating carries */
613 671 z7 += xy7;
614 672 e = (z7 < xy7);
615 673 z6 += xy6;
674 +
616 675 if (e) {
617 676 z6++;
618 677 e = (z6 <= xy6);
619 - } else
678 + } else {
620 679 e = (z6 < xy6);
680 + }
681 +
621 682 z5 += xy5;
683 +
622 684 if (e) {
623 685 z5++;
624 686 e = (z5 <= xy5);
625 - } else
687 + } else {
626 688 e = (z5 < xy5);
689 + }
690 +
627 691 z4 += xy4;
692 +
628 693 if (e) {
629 694 z4++;
630 695 e = (z4 <= xy4);
631 - } else
696 + } else {
632 697 e = (z4 < xy4);
698 + }
699 +
633 700 z3 += xy3;
701 +
634 702 if (e) {
635 703 z3++;
636 704 e = (z3 <= xy3);
637 - } else
705 + } else {
638 706 e = (z3 < xy3);
707 + }
708 +
639 709 z2 += xy2;
710 +
640 711 if (e) {
641 712 z2++;
642 713 e = (z2 <= xy2);
643 - } else
714 + } else {
644 715 e = (z2 < xy2);
716 + }
717 +
645 718 z1 += xy1;
719 +
646 720 if (e) {
647 721 z1++;
648 722 e = (z1 <= xy1);
649 - } else
723 + } else {
650 724 e = (z1 < xy1);
725 + }
726 +
651 727 z0 += xy0;
728 +
652 729 if (e)
653 730 z0++;
654 731
655 732 /* postnormalize and collect rounding information into z4 */
656 733 if (ez < 1) {
657 734 /* result is tiny; shift right until exponent is within range */
658 735 e = 1 - ez;
736 +
659 737 if (e > 116) {
660 - z4 = 1; /* result can't be exactly zero */
738 + z4 = 1; /* result can't be exactly zero */
661 739 z0 = z1 = z2 = z3 = 0;
662 740 } else if (e >= 96) {
663 - sticky = z7 | z6 | z5 | z4 | z3 | z2 |
664 - ((z1 << 1) << (127 - e));
741 + sticky = z7 | z6 | z5 | z4 | z3 | z2 | ((z1 << 1) <<
742 + (127 - e));
665 743 z4 = (z1 >> (e - 96)) | ((z0 << 1) << (127 - e));
744 +
666 745 if (sticky)
667 746 z4 |= 1;
747 +
668 748 z3 = z0 >> (e - 96);
669 749 z0 = z1 = z2 = 0;
670 750 } else if (e >= 64) {
671 - sticky = z7 | z6 | z5 | z4 | z3 |
672 - ((z2 << 1) << (95 - e));
751 + sticky = z7 | z6 | z5 | z4 | z3 | ((z2 << 1) << (95 -
752 + e));
673 753 z4 = (z2 >> (e - 64)) | ((z1 << 1) << (95 - e));
754 +
674 755 if (sticky)
675 756 z4 |= 1;
757 +
676 758 z3 = (z1 >> (e - 64)) | ((z0 << 1) << (95 - e));
677 759 z2 = z0 >> (e - 64);
678 760 z0 = z1 = 0;
679 761 } else if (e >= 32) {
680 762 sticky = z7 | z6 | z5 | z4 | ((z3 << 1) << (63 - e));
681 763 z4 = (z3 >> (e - 32)) | ((z2 << 1) << (63 - e));
764 +
682 765 if (sticky)
683 766 z4 |= 1;
767 +
684 768 z3 = (z2 >> (e - 32)) | ((z1 << 1) << (63 - e));
685 769 z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
686 770 z1 = z0 >> (e - 32);
687 771 z0 = 0;
688 772 } else {
689 773 sticky = z7 | z6 | z5 | (z4 << 1) << (31 - e);
690 774 z4 = (z4 >> e) | ((z3 << 1) << (31 - e));
775 +
691 776 if (sticky)
692 777 z4 |= 1;
778 +
693 779 z3 = (z3 >> e) | ((z2 << 1) << (31 - e));
694 780 z2 = (z2 >> e) | ((z1 << 1) << (31 - e));
695 781 z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
696 782 z0 >>= e;
697 783 }
784 +
698 785 ez = 1;
699 786 } else if (z0 >= 0x20000) {
700 787 /* carry out; shift right by one */
701 788 sticky = (z4 & 1) | z5 | z6 | z7;
702 789 z4 = (z4 >> 1) | (z3 << 31);
790 +
703 791 if (sticky)
704 792 z4 |= 1;
793 +
705 794 z3 = (z3 >> 1) | (z2 << 31);
706 795 z2 = (z2 >> 1) | (z1 << 31);
707 796 z1 = (z1 >> 1) | (z0 << 31);
708 797 z0 >>= 1;
709 798 ez++;
710 799 } else {
711 - if (z0 < 0x10000 && (z0 | z1 | z2 | z3 | z4 | z5 | z6 | z7)
712 - != 0) {
800 + if (z0 < 0x10000 && (z0 | z1 | z2 | z3 | z4 | z5 | z6 | z7) !=
801 + 0) {
713 802 /*
714 803 * borrow/cancellation; shift left as much as
715 804 * exponent allows
716 805 */
717 806 while (!(z0 | (z1 & 0xfffe0000)) && ez >= 33) {
718 807 z0 = z1;
719 808 z1 = z2;
720 809 z2 = z3;
721 810 z3 = z4;
722 811 z4 = z5;
723 812 z5 = z6;
724 813 z6 = z7;
725 814 z7 = 0;
726 815 ez -= 32;
727 816 }
817 +
728 818 while (z0 < 0x10000 && ez > 1) {
729 819 z0 = (z0 << 1) | (z1 >> 31);
730 820 z1 = (z1 << 1) | (z2 >> 31);
731 821 z2 = (z2 << 1) | (z3 >> 31);
732 822 z3 = (z3 << 1) | (z4 >> 31);
733 823 z4 = (z4 << 1) | (z5 >> 31);
734 824 z5 = (z5 << 1) | (z6 >> 31);
735 825 z6 = (z6 << 1) | (z7 >> 31);
736 826 z7 <<= 1;
737 827 ez--;
738 828 }
739 829 }
830 +
740 831 if (z5 | z6 | z7)
741 832 z4 |= 1;
742 833 }
743 834
744 835 /* get the rounding mode */
745 836 rm = fsr >> 30;
746 837
747 838 /* strip off the integer bit, if there is one */
748 839 ibit = z0 & 0x10000;
749 - if (ibit)
840 +
841 + if (ibit) {
750 842 z0 -= 0x10000;
751 - else {
843 + } else {
752 844 ez = 0;
753 - if (!(z0 | z1 | z2 | z3 | z4)) { /* exact zero */
845 +
846 + if (!(z0 | z1 | z2 | z3 | z4)) { /* exact zero */
754 847 zz.i[0] = rm == FSR_RM ? 0x80000000 : 0;
755 848 zz.i[1] = zz.i[2] = zz.i[3] = 0;
756 849 __fenv_setfsr32(&fsr);
757 850 return (zz.q);
758 851 }
759 852 }
760 853
761 854 /*
762 855 * flip the sense of directed roundings if the result is negative;
763 856 * the logic below applies to a positive result
764 857 */
765 858 if (sz)
766 859 rm ^= rm >> 1;
767 860
768 861 /* round and raise exceptions */
769 862 if (z4) {
770 863 fsr |= FSR_NXC;
771 864
772 865 /* decide whether to round the fraction up */
773 - if (rm == FSR_RP || (rm == FSR_RN && (z4 > 0x80000000u ||
774 - (z4 == 0x80000000u && (z3 & 1))))) {
866 + if (rm == FSR_RP || (rm == FSR_RN && (z4 > 0x80000000u || (z4 ==
867 + 0x80000000u && (z3 & 1))))) {
775 868 /* round up and renormalize if necessary */
776 869 if (++z3 == 0)
777 870 if (++z2 == 0)
778 871 if (++z1 == 0)
779 872 if (++z0 == 0x10000) {
780 873 z0 = 0;
781 874 ez++;
782 875 }
783 876 }
784 877 }
785 878
786 879 /* check for under/overflow */
787 880 if (ez >= 0x7fff) {
788 881 if (rm == FSR_RN || rm == FSR_RP) {
789 882 zz.i[0] = sz | 0x7fff0000;
790 883 zz.i[1] = zz.i[2] = zz.i[3] = 0;
791 884 } else {
792 885 zz.i[0] = sz | 0x7ffeffff;
793 886 zz.i[1] = zz.i[2] = zz.i[3] = 0xffffffff;
794 887 }
888 +
795 889 fsr |= FSR_OFC | FSR_NXC;
796 890 } else {
797 891 zz.i[0] = sz | (ez << 16) | z0;
798 892 zz.i[1] = z1;
799 893 zz.i[2] = z2;
800 894 zz.i[3] = z3;
801 895
802 896 /*
803 897 * !ibit => exact result was tiny before rounding,
804 898 * z4 nonzero => result delivered is inexact
805 899 */
806 900 if (!ibit) {
↓ open down ↓ |
2 lines elided |
↑ open up ↑ |
807 901 if (z4)
808 902 fsr |= FSR_UFC | FSR_NXC;
809 903 else if (fsr & FSR_UFM)
810 904 fsr |= FSR_UFC;
811 905 }
812 906 }
813 907
814 908 /* restore the fsr and emulate exceptions as needed */
815 909 if ((fsr & FSR_CEXC) & (fsr >> 23)) {
816 910 __fenv_setfsr32(&fsr);
911 +
817 912 if (fsr & FSR_OFC) {
818 913 dummy = huge;
819 914 dummy *= huge;
820 915 } else if (fsr & FSR_UFC) {
821 916 dummy = tiny;
917 +
822 918 if (fsr & FSR_NXC)
823 919 dummy *= tiny;
824 920 else
825 921 dummy -= tiny2;
826 922 } else {
827 923 dummy = huge;
828 924 dummy += tiny;
829 925 }
830 926 } else {
831 927 fsr |= (fsr & 0x1f) << 5;
832 928 __fenv_setfsr32(&fsr);
833 929 }
930 +
834 931 return (zz.q);
835 932 }
836 -
837 933 #elif defined(__x86)
838 -
839 934 static const union {
840 935 unsigned i[2];
841 936 double d;
842 937 } C[] = {
843 938 { 0, 0x3fe00000u },
844 939 { 0, 0x40000000u },
845 940 { 0, 0x3df00000u },
846 941 { 0, 0x3bf00000u },
847 942 { 0, 0x41f00000u },
848 943 { 0, 0x43e00000u },
849 944 { 0, 0x7fe00000u },
850 945 { 0, 0x00100000u },
851 946 { 0, 0x00100001u }
852 947 };
853 948
854 -#define half C[0].d
855 -#define two C[1].d
856 -#define twom32 C[2].d
857 -#define twom64 C[3].d
858 -#define two32 C[4].d
859 -#define two63 C[5].d
860 -#define huge C[6].d
861 -#define tiny C[7].d
862 -#define tiny2 C[8].d
949 +#define half C[0].d
950 +#define two C[1].d
951 +#define twom32 C[2].d
952 +#define twom64 C[3].d
953 +#define two32 C[4].d
954 +#define two63 C[5].d
955 +#define huge C[6].d
956 +#define tiny C[7].d
957 +#define tiny2 C[8].d
863 958
864 959 #if defined(__amd64)
865 -#define NI 4
960 +#define NI 4
866 961 #else
867 -#define NI 3
962 +#define NI 3
868 963 #endif
869 964
870 965 /*
871 966 * fmal for x86: 80-bit extended double precision, little-endian
872 967 */
873 968 long double
874 -__fmal(long double x, long double y, long double z) {
969 +__fmal(long double x, long double y, long double z)
970 +{
875 971 union {
876 972 unsigned i[NI];
877 973 long double e;
878 974 } xx, yy, zz;
975 +
879 976 long double xhi, yhi, xlo, ylo, t;
880 977 unsigned xy0, xy1, xy2, xy3, xy4, z0, z1, z2, z3, z4;
881 978 unsigned oldcwsw, cwsw, rm, sticky, carry;
882 979 int ex, ey, ez, exy, sxy, sz, e, tinyafter;
883 - volatile double dummy;
980 + volatile double dummy;
884 981
885 982 /* extract the exponents of the arguments */
886 983 xx.e = x;
887 984 yy.e = y;
888 985 zz.e = z;
889 986 ex = xx.i[2] & 0x7fff;
890 987 ey = yy.i[2] & 0x7fff;
891 988 ez = zz.i[2] & 0x7fff;
892 989
893 990 /* dispense with inf, nan, and zero cases */
894 991 if (ex == 0x7fff || ey == 0x7fff || (ex | xx.i[1] | xx.i[0]) == 0 ||
895 - (ey | yy.i[1] | yy.i[0]) == 0) /* x or y is inf, nan, or 0 */
992 + (ey | yy.i[1] | yy.i[0]) == 0) /* x or y is inf, nan, or 0 */
896 993 return (x * y + z);
897 994
898 - if (ez == 0x7fff) /* z is inf or nan */
995 + if (ez == 0x7fff) /* z is inf or nan */
899 996 return (x + z); /* avoid spurious under/overflow in x * y */
900 997
901 998 if ((ez | zz.i[1] | zz.i[0]) == 0) /* z is zero */
902 999 /*
903 1000 * x * y isn't zero but could underflow to zero,
904 1001 * so don't add z, lest we perturb the sign
905 1002 */
906 1003 return (x * y);
907 1004
908 1005 /*
909 1006 * now x, y, and z are all finite and nonzero; extract signs and
910 1007 * normalize the significands (this will raise the denormal operand
911 1008 * exception if need be)
912 1009 */
913 1010 sxy = (xx.i[2] ^ yy.i[2]) & 0x8000;
914 1011 sz = zz.i[2] & 0x8000;
1012 +
915 1013 if (!ex) {
916 1014 xx.e = x * two63;
917 1015 ex = (xx.i[2] & 0x7fff) - 63;
918 1016 }
1017 +
919 1018 if (!ey) {
920 1019 yy.e = y * two63;
921 1020 ey = (yy.i[2] & 0x7fff) - 63;
922 1021 }
1022 +
923 1023 if (!ez) {
924 1024 zz.e = z * two63;
925 1025 ez = (zz.i[2] & 0x7fff) - 63;
926 1026 }
927 1027
928 1028 /*
929 1029 * save the control and status words, mask all exceptions, and
930 1030 * set rounding to 64-bit precision and toward-zero
931 1031 */
932 1032 __fenv_getcwsw(&oldcwsw);
933 1033 cwsw = (oldcwsw & 0xf0c0ffff) | 0x0f3f0000;
934 1034 __fenv_setcwsw(&cwsw);
935 1035
936 1036 /* multiply x*y to 128 bits */
937 1037 exy = ex + ey - 0x3fff;
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
938 1038 xx.i[2] = 0x3fff;
939 1039 yy.i[2] = 0x3fff;
940 1040 x = xx.e;
941 1041 y = yy.e;
942 1042 xhi = ((x + twom32) + two32) - two32;
943 1043 yhi = ((y + twom32) + two32) - two32;
944 1044 xlo = x - xhi;
945 1045 ylo = y - yhi;
946 1046 x *= y;
947 1047 y = ((xhi * yhi - x) + xhi * ylo + xlo * yhi) + xlo * ylo;
1048 +
948 1049 if (x >= two) {
949 1050 x *= half;
950 1051 y *= half;
951 1052 exy++;
952 1053 }
953 1054
954 1055 /* extract the significands */
955 1056 xx.e = x;
956 1057 xy0 = xx.i[1];
957 1058 xy1 = xx.i[0];
958 1059 yy.e = t = y + twom32;
959 1060 xy2 = yy.i[0];
960 1061 yy.e = (y - (t - twom32)) + twom64;
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
961 1062 xy3 = yy.i[0];
962 1063 xy4 = 0;
963 1064 z0 = zz.i[1];
964 1065 z1 = zz.i[0];
965 1066 z2 = z3 = z4 = 0;
966 1067
967 1068 /*
968 1069 * now x*y is represented by sxy, exy, and xy[0-4], and z is
969 1070 * represented likewise; swap if need be so |xy| <= |z|
970 1071 */
971 - if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 &&
972 - (xy1 > z1 || (xy1 == z1 && (xy2 | xy3) != 0)))))) {
973 - e = sxy; sxy = sz; sz = e;
974 - e = exy; exy = ez; ez = e;
975 - e = xy0; xy0 = z0; z0 = e;
976 - e = xy1; xy1 = z1; z1 = e;
977 - z2 = xy2; xy2 = 0;
978 - z3 = xy3; xy3 = 0;
1072 + if (exy > ez || (exy == ez && (xy0 > z0 || (xy0 == z0 && (xy1 > z1 ||
1073 + (xy1 == z1 && (xy2 | xy3) != 0)))))) {
1074 + e = sxy;
1075 + sxy = sz;
1076 + sz = e;
1077 + e = exy;
1078 + exy = ez;
1079 + ez = e;
1080 + e = xy0;
1081 + xy0 = z0;
1082 + z0 = e;
1083 + e = xy1;
1084 + xy1 = z1;
1085 + z1 = e;
1086 + z2 = xy2;
1087 + xy2 = 0;
1088 + z3 = xy3;
1089 + xy3 = 0;
979 1090 }
980 1091
981 1092 /* shift the significand of xy keeping a sticky bit */
982 1093 e = ez - exy;
1094 +
983 1095 if (e > 130) {
984 1096 xy0 = xy1 = xy2 = xy3 = 0;
985 1097 xy4 = 1;
986 1098 } else if (e >= 128) {
987 1099 sticky = xy3 | xy2 | xy1 | ((xy0 << 1) << (159 - e));
988 1100 xy4 = xy0 >> (e - 128);
1101 +
989 1102 if (sticky)
990 1103 xy4 |= 1;
1104 +
991 1105 xy0 = xy1 = xy2 = xy3 = 0;
992 1106 } else if (e >= 96) {
993 1107 sticky = xy3 | xy2 | ((xy1 << 1) << (127 - e));
994 1108 xy4 = (xy1 >> (e - 96)) | ((xy0 << 1) << (127 - e));
1109 +
995 1110 if (sticky)
996 1111 xy4 |= 1;
1112 +
997 1113 xy3 = xy0 >> (e - 96);
998 1114 xy0 = xy1 = xy2 = 0;
999 1115 } else if (e >= 64) {
1000 1116 sticky = xy3 | ((xy2 << 1) << (95 - e));
1001 1117 xy4 = (xy2 >> (e - 64)) | ((xy1 << 1) << (95 - e));
1118 +
1002 1119 if (sticky)
1003 1120 xy4 |= 1;
1121 +
1004 1122 xy3 = (xy1 >> (e - 64)) | ((xy0 << 1) << (95 - e));
1005 1123 xy2 = xy0 >> (e - 64);
1006 1124 xy0 = xy1 = 0;
1007 1125 } else if (e >= 32) {
1008 1126 sticky = (xy3 << 1) << (63 - e);
1009 1127 xy4 = (xy3 >> (e - 32)) | ((xy2 << 1) << (63 - e));
1128 +
1010 1129 if (sticky)
1011 1130 xy4 |= 1;
1131 +
1012 1132 xy3 = (xy2 >> (e - 32)) | ((xy1 << 1) << (63 - e));
1013 1133 xy2 = (xy1 >> (e - 32)) | ((xy0 << 1) << (63 - e));
1014 1134 xy1 = xy0 >> (e - 32);
1015 1135 xy0 = 0;
1016 1136 } else if (e) {
1017 1137 xy4 = (xy3 << 1) << (31 - e);
1018 1138 xy3 = (xy3 >> e) | ((xy2 << 1) << (31 - e));
1019 1139 xy2 = (xy2 >> e) | ((xy1 << 1) << (31 - e));
1020 1140 xy1 = (xy1 >> e) | ((xy0 << 1) << (31 - e));
1021 1141 xy0 >>= e;
1022 1142 }
1023 1143
1024 1144 /* if this is a magnitude subtract, negate the significand of xy */
1025 1145 if (sxy ^ sz) {
1026 1146 xy0 = ~xy0;
1027 1147 xy1 = ~xy1;
1028 1148 xy2 = ~xy2;
1029 1149 xy3 = ~xy3;
1030 1150 xy4 = -xy4;
1151 +
1031 1152 if (xy4 == 0)
1032 1153 if (++xy3 == 0)
1033 1154 if (++xy2 == 0)
1034 1155 if (++xy1 == 0)
1035 1156 xy0++;
1036 1157 }
1037 1158
1038 1159 /* add, propagating carries */
1039 1160 z4 += xy4;
1040 1161 carry = (z4 < xy4);
1041 1162 z3 += xy3;
1163 +
1042 1164 if (carry) {
1043 1165 z3++;
1044 1166 carry = (z3 <= xy3);
1045 - } else
1167 + } else {
1046 1168 carry = (z3 < xy3);
1169 + }
1170 +
1047 1171 z2 += xy2;
1172 +
1048 1173 if (carry) {
1049 1174 z2++;
1050 1175 carry = (z2 <= xy2);
1051 - } else
1176 + } else {
1052 1177 carry = (z2 < xy2);
1178 + }
1179 +
1053 1180 z1 += xy1;
1181 +
1054 1182 if (carry) {
1055 1183 z1++;
1056 1184 carry = (z1 <= xy1);
1057 - } else
1185 + } else {
1058 1186 carry = (z1 < xy1);
1187 + }
1188 +
1059 1189 z0 += xy0;
1190 +
1060 1191 if (carry) {
1061 1192 z0++;
1062 1193 carry = (z0 <= xy0);
1063 - } else
1194 + } else {
1064 1195 carry = (z0 < xy0);
1196 + }
1065 1197
1066 1198 /* for a magnitude subtract, ignore the last carry out */
1067 1199 if (sxy ^ sz)
1068 1200 carry = 0;
1069 1201
1070 1202 /* postnormalize and collect rounding information into z2 */
1071 1203 if (ez < 1) {
1072 1204 /* result is tiny; shift right until exponent is within range */
1073 1205 e = 1 - ez;
1206 +
1074 1207 if (e > 67) {
1075 - z2 = 1; /* result can't be exactly zero */
1208 + z2 = 1; /* result can't be exactly zero */
1076 1209 z0 = z1 = 0;
1077 1210 } else if (e >= 64) {
1078 1211 sticky = z4 | z3 | z2 | z1 | ((z0 << 1) << (95 - e));
1079 1212 z2 = (z0 >> (e - 64)) | ((carry << 1) << (95 - e));
1213 +
1080 1214 if (sticky)
1081 1215 z2 |= 1;
1216 +
1082 1217 z1 = carry >> (e - 64);
1083 1218 z0 = 0;
1084 1219 } else if (e >= 32) {
1085 1220 sticky = z4 | z3 | z2 | ((z1 << 1) << (63 - e));
1086 1221 z2 = (z1 >> (e - 32)) | ((z0 << 1) << (63 - e));
1222 +
1087 1223 if (sticky)
1088 1224 z2 |= 1;
1225 +
1089 1226 z1 = (z0 >> (e - 32)) | ((carry << 1) << (63 - e));
1090 1227 z0 = carry >> (e - 32);
1091 1228 } else {
1092 1229 sticky = z4 | z3 | (z2 << 1) << (31 - e);
1093 1230 z2 = (z2 >> e) | ((z1 << 1) << (31 - e));
1231 +
1094 1232 if (sticky)
1095 1233 z2 |= 1;
1234 +
1096 1235 z1 = (z1 >> e) | ((z0 << 1) << (31 - e));
1097 1236 z0 = (z0 >> e) | ((carry << 1) << (31 - e));
1098 1237 }
1238 +
1099 1239 ez = 1;
1100 1240 } else if (carry) {
1101 1241 /* carry out; shift right by one */
1102 1242 sticky = (z2 & 1) | z3 | z4;
1103 1243 z2 = (z2 >> 1) | (z1 << 31);
1244 +
1104 1245 if (sticky)
1105 1246 z2 |= 1;
1247 +
1106 1248 z1 = (z1 >> 1) | (z0 << 31);
1107 1249 z0 = (z0 >> 1) | 0x80000000;
1108 1250 ez++;
1109 1251 } else {
1110 1252 if (z0 < 0x80000000u && (z0 | z1 | z2 | z3 | z4) != 0) {
1111 1253 /*
1112 1254 * borrow/cancellation; shift left as much as
1113 1255 * exponent allows
1114 1256 */
1115 1257 while (!z0 && ez >= 33) {
1116 1258 z0 = z1;
1117 1259 z1 = z2;
1118 1260 z2 = z3;
1119 1261 z3 = z4;
1120 1262 z4 = 0;
1121 1263 ez -= 32;
1122 1264 }
1265 +
1123 1266 while (z0 < 0x80000000u && ez > 1) {
1124 1267 z0 = (z0 << 1) | (z1 >> 31);
1125 1268 z1 = (z1 << 1) | (z2 >> 31);
1126 1269 z2 = (z2 << 1) | (z3 >> 31);
1127 1270 z3 = (z3 << 1) | (z4 >> 31);
1128 1271 z4 <<= 1;
1129 1272 ez--;
1130 1273 }
1131 1274 }
1275 +
1132 1276 if (z3 | z4)
1133 1277 z2 |= 1;
1134 1278 }
1135 1279
1136 1280 /* get the rounding mode */
1137 1281 rm = oldcwsw & 0x0c000000;
1138 1282
1139 1283 /* adjust exponent if result is subnormal */
1140 1284 tinyafter = 0;
1285 +
1141 1286 if (!(z0 & 0x80000000)) {
1142 1287 ez = 0;
1143 1288 tinyafter = 1;
1144 - if (!(z0 | z1 | z2)) { /* exact zero */
1289 +
1290 + if (!(z0 | z1 | z2)) { /* exact zero */
1145 1291 zz.i[2] = rm == FCW_RM ? 0x8000 : 0;
1146 1292 zz.i[1] = zz.i[0] = 0;
1147 1293 __fenv_setcwsw(&oldcwsw);
1148 1294 return (zz.e);
1149 1295 }
1150 1296 }
1151 1297
1152 1298 /*
1153 1299 * flip the sense of directed roundings if the result is negative;
1154 1300 * the logic below applies to a positive result
1155 1301 */
1156 1302 if (sz && (rm == FCW_RM || rm == FCW_RP))
1157 1303 rm = (FCW_RM + FCW_RP) - rm;
1158 1304
1159 1305 /* round */
1160 1306 if (z2) {
1161 - if (rm == FCW_RP || (rm == FCW_RN && (z2 > 0x80000000u ||
1162 - (z2 == 0x80000000u && (z1 & 1))))) {
1307 + if (rm == FCW_RP || (rm == FCW_RN && (z2 > 0x80000000u || (z2 ==
1308 + 0x80000000u && (z1 & 1))))) {
1163 1309 /* round up and renormalize if necessary */
1164 1310 if (++z1 == 0) {
1165 1311 if (++z0 == 0) {
1166 1312 z0 = 0x80000000;
1167 1313 ez++;
1168 1314 } else if (z0 == 0x80000000) {
1169 1315 /* rounded up to smallest normal */
1170 1316 ez = 1;
1317 +
1171 1318 if ((rm == FCW_RP && z2 >
1172 - 0x80000000u) || (rm == FCW_RN &&
1173 - z2 >= 0xc0000000u))
1319 + 0x80000000u) || (rm == FCW_RN &&
1320 + z2 >= 0xc0000000u))
1174 1321 /*
1175 1322 * would have rounded up to
1176 1323 * smallest normal even with
1177 1324 * unbounded range
1178 1325 */
1179 1326 tinyafter = 0;
1180 1327 }
1181 1328 }
1182 1329 }
1183 1330 }
1184 1331
1185 1332 /* restore the control and status words, check for over/underflow */
1186 1333 __fenv_setcwsw(&oldcwsw);
1334 +
1187 1335 if (ez >= 0x7fff) {
1188 1336 if (rm == FCW_RN || rm == FCW_RP) {
1189 1337 zz.i[2] = sz | 0x7fff;
1190 1338 zz.i[1] = 0x80000000;
1191 1339 zz.i[0] = 0;
1192 1340 } else {
1193 1341 zz.i[2] = sz | 0x7ffe;
1194 1342 zz.i[1] = 0xffffffff;
1195 1343 zz.i[0] = 0xffffffff;
1196 1344 }
1345 +
1197 1346 dummy = huge;
1198 1347 dummy *= huge;
1199 1348 } else {
1200 1349 zz.i[2] = sz | ez;
1201 1350 zz.i[1] = z0;
1202 1351 zz.i[0] = z1;
1203 1352
1204 1353 /*
1205 1354 * tinyafter => result rounded w/ unbounded range would be tiny,
1206 1355 * z2 nonzero => result delivered is inexact
1207 1356 */
1208 1357 if (tinyafter) {
1209 1358 dummy = tiny;
1359 +
1210 1360 if (z2)
1211 1361 dummy *= tiny;
1212 1362 else
1213 1363 dummy -= tiny2;
1214 1364 } else if (z2) {
1215 1365 dummy = huge;
1216 1366 dummy += tiny;
1217 1367 }
1218 1368 }
1219 1369
1220 1370 return (zz.e);
1221 1371 }
1222 -
1223 1372 #else
1224 1373 #error Unknown architecture
1225 1374 #endif
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX