5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #pragma weak fmaf = __fmaf
31
32 #include "libm.h"
33 #include "fma.h"
34 #include "fenv_inlines.h"
35
36 #if defined(__sparc)
37
38 /*
39 * fmaf for SPARC: 32-bit single precision, big-endian
40 */
41 float
42 __fmaf(float x, float y, float z) {
43 union {
44 unsigned i[2];
45 double d;
46 } xy, zz;
47 unsigned u, s;
48 int exy, ez;
49
50 /*
51 * the following operations can only raise the invalid exception,
52 * and then only if either x*y is of the form Inf*0 or one of x,
53 * y, or z is a signaling NaN
54 */
55 xy.d = (double) x * y;
56 zz.d = (double) z;
57
58 /*
59 * if the sum xy + z will be exact, just compute it and cast the
60 * result to float
61 */
62 exy = (xy.i[0] >> 20) & 0x7ff;
63 ez = (zz.i[0] >> 20) & 0x7ff;
64 if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 ||
65 ez == 0x7ff || ez == 0) {
66 return ((float) (xy.d + zz.d));
67 }
68
69 /*
70 * collapse the tail of the smaller summand into a "sticky bit"
71 * so that the sum can be computed without error
72 */
73 if (ez > exy) {
74 if (ez - exy < 31) {
75 u = xy.i[1];
76 s = 2 << (ez - exy);
77 if (u & (s - 1))
78 u |= s;
79 xy.i[1] = u & ~(s - 1);
80 } else if (ez - exy < 51) {
81 u = xy.i[0];
82 s = 1 << (ez - exy - 31);
83 if ((u & (s - 1)) | xy.i[1])
84 u |= s;
85 xy.i[0] = u & ~(s - 1);
86 xy.i[1] = 0;
87 } else {
88 /* collapse all of xy into a single bit */
89 xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20);
90 xy.i[1] = 0;
91 }
92 } else {
93 if (exy - ez < 31) {
94 u = zz.i[1];
95 s = 2 << (exy - ez);
96 if (u & (s - 1))
97 u |= s;
98 zz.i[1] = u & ~(s - 1);
99 } else if (exy - ez < 51) {
100 u = zz.i[0];
101 s = 1 << (exy - ez - 31);
102 if ((u & (s - 1)) | zz.i[1])
103 u |= s;
104 zz.i[0] = u & ~(s - 1);
105 zz.i[1] = 0;
106 } else {
107 /* collapse all of zz into a single bit */
108 zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20);
109 zz.i[1] = 0;
110 }
111 }
112
113 return ((float) (xy.d + zz.d));
114 }
115
116 #elif defined(__x86)
117
118 #if defined(__amd64)
119 #define NI 4
120 #else
121 #define NI 3
122 #endif
123
124 /*
125 * fmaf for x86: 32-bit single precision, little-endian
126 */
127 float
128 __fmaf(float x, float y, float z) {
129 union {
130 unsigned i[NI];
131 long double e;
132 } xy, zz;
133 unsigned u, s, cwsw, oldcwsw;
134 int exy, ez;
135
136 /* set rounding precision to 64 bits */
137 __fenv_getcwsw(&oldcwsw);
138 cwsw = (oldcwsw & 0xfcffffff) | 0x03000000;
139 __fenv_setcwsw(&cwsw);
140
141 /*
142 * the following operations can only raise the invalid exception,
143 * and then only if either x*y is of the form Inf*0 or one of x,
144 * y, or z is a signaling NaN
145 */
146 xy.e = (long double) x * y;
147 zz.e = (long double) z;
148
149 /*
150 * if the sum xy + z will be exact, just compute it and cast the
151 * result to float
152 */
153 exy = xy.i[2] & 0x7fff;
154 ez = zz.i[2] & 0x7fff;
155 if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 ||
156 ez == 0x7fff || ez == 0) {
157 goto cont;
158 }
159
160 /*
161 * collapse the tail of the smaller summand into a "sticky bit"
162 * so that the sum can be computed without error
163 */
164 if (ez > exy) {
165 if (ez - exy < 31) {
166 u = xy.i[0];
167 s = 2 << (ez - exy);
168 if (u & (s - 1))
169 u |= s;
170 xy.i[0] = u & ~(s - 1);
171 } else if (ez - exy < 62) {
172 u = xy.i[1];
173 s = 1 << (ez - exy - 31);
174 if ((u & (s - 1)) | xy.i[0])
175 u |= s;
176 xy.i[1] = u & ~(s - 1);
177 xy.i[0] = 0;
178 } else {
179 /* collapse all of xy into a single bit */
180 xy.i[0] = 0;
181 xy.i[1] = 0x80000000;
182 xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62);
183 }
184 } else {
185 if (exy - ez < 62) {
186 u = zz.i[1];
187 s = 1 << (exy - ez - 31);
188 if ((u & (s - 1)) | zz.i[0])
189 u |= s;
190 zz.i[1] = u & ~(s - 1);
191 zz.i[0] = 0;
192 } else {
193 /* collapse all of zz into a single bit */
194 zz.i[0] = 0;
195 zz.i[1] = 0x80000000;
196 zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62);
197 }
198 }
199
200 cont:
201 xy.e += zz.e;
202
203 /* restore the rounding precision */
204 __fenv_getcwsw(&cwsw);
205 cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000);
206 __fenv_setcwsw(&cwsw);
207
208 return ((float) xy.e);
209 }
210
211 #if 0
212 /*
213 * another fmaf for x86: assumes return value will be left in
214 * long double (80-bit double extended) precision
215 */
216 long double
217 __fmaf(float x, float y, float z) {
218 /*
219 * Note: This implementation assumes the rounding precision mode
220 * is set to the default, rounding to 64 bit precision. If this
221 * routine must work in non-default rounding precision modes, do
222 * the following instead:
223 *
224 * long double t;
225 *
226 * <set rp mode to round to 64 bit precision>
227 * t = x * y;
228 * <restore rp mode>
229 * return t + z;
230 *
231 * Note that the code to change rounding precision must not alter
232 * the exception masks or flags, since the product x * y may raise
233 * an invalid operation exception.
234 */
235 return ((long double) x * y + z);
236 }
237 #endif
238
239 #else
240 #error Unknown architecture
241 #endif
|
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25
26 /*
27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
29 */
30
31 #pragma weak fmaf = __fmaf
32
33 #include "libm.h"
34 #include "fma.h"
35 #include "fenv_inlines.h"
36
37 #if defined(__sparc)
38 /*
39 * fmaf for SPARC: 32-bit single precision, big-endian
40 */
41 float
42 __fmaf(float x, float y, float z)
43 {
44 union {
45 unsigned i[2];
46 double d;
47 } xy, zz;
48
49 unsigned u, s;
50 int exy, ez;
51
52 /*
53 * the following operations can only raise the invalid exception,
54 * and then only if either x*y is of the form Inf*0 or one of x,
55 * y, or z is a signaling NaN
56 */
57 xy.d = (double)x * y;
58 zz.d = (double)z;
59
60 /*
61 * if the sum xy + z will be exact, just compute it and cast the
62 * result to float
63 */
64 exy = (xy.i[0] >> 20) & 0x7ff;
65 ez = (zz.i[0] >> 20) & 0x7ff;
66
67 if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 ||
68 ez == 0x7ff || ez == 0)
69 return ((float)(xy.d + zz.d));
70
71 /*
72 * collapse the tail of the smaller summand into a "sticky bit"
73 * so that the sum can be computed without error
74 */
75 if (ez > exy) {
76 if (ez - exy < 31) {
77 u = xy.i[1];
78 s = 2 << (ez - exy);
79
80 if (u & (s - 1))
81 u |= s;
82
83 xy.i[1] = u & ~(s - 1);
84 } else if (ez - exy < 51) {
85 u = xy.i[0];
86 s = 1 << (ez - exy - 31);
87
88 if ((u & (s - 1)) | xy.i[1])
89 u |= s;
90
91 xy.i[0] = u & ~(s - 1);
92 xy.i[1] = 0;
93 } else {
94 /* collapse all of xy into a single bit */
95 xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20);
96 xy.i[1] = 0;
97 }
98 } else {
99 if (exy - ez < 31) {
100 u = zz.i[1];
101 s = 2 << (exy - ez);
102
103 if (u & (s - 1))
104 u |= s;
105
106 zz.i[1] = u & ~(s - 1);
107 } else if (exy - ez < 51) {
108 u = zz.i[0];
109 s = 1 << (exy - ez - 31);
110
111 if ((u & (s - 1)) | zz.i[1])
112 u |= s;
113
114 zz.i[0] = u & ~(s - 1);
115 zz.i[1] = 0;
116 } else {
117 /* collapse all of zz into a single bit */
118 zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20);
119 zz.i[1] = 0;
120 }
121 }
122
123 return ((float)(xy.d + zz.d));
124 }
125 #elif defined(__x86)
126 #if defined(__amd64)
127 #define NI 4
128 #else
129 #define NI 3
130 #endif
131
132 /*
133 * fmaf for x86: 32-bit single precision, little-endian
134 */
135 float
136 __fmaf(float x, float y, float z)
137 {
138 union {
139 unsigned i[NI];
140 long double e;
141 } xy, zz;
142
143 unsigned u, s, cwsw, oldcwsw;
144 int exy, ez;
145
146 /* set rounding precision to 64 bits */
147 __fenv_getcwsw(&oldcwsw);
148 cwsw = (oldcwsw & 0xfcffffff) | 0x03000000;
149 __fenv_setcwsw(&cwsw);
150
151 /*
152 * the following operations can only raise the invalid exception,
153 * and then only if either x*y is of the form Inf*0 or one of x,
154 * y, or z is a signaling NaN
155 */
156 xy.e = (long double)x * y;
157 zz.e = (long double)z;
158
159 /*
160 * if the sum xy + z will be exact, just compute it and cast the
161 * result to float
162 */
163 exy = xy.i[2] & 0x7fff;
164 ez = zz.i[2] & 0x7fff;
165
166 if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 ||
167 ez == 0x7fff || ez == 0)
168 goto cont;
169
170 /*
171 * collapse the tail of the smaller summand into a "sticky bit"
172 * so that the sum can be computed without error
173 */
174 if (ez > exy) {
175 if (ez - exy < 31) {
176 u = xy.i[0];
177 s = 2 << (ez - exy);
178
179 if (u & (s - 1))
180 u |= s;
181
182 xy.i[0] = u & ~(s - 1);
183 } else if (ez - exy < 62) {
184 u = xy.i[1];
185 s = 1 << (ez - exy - 31);
186
187 if ((u & (s - 1)) | xy.i[0])
188 u |= s;
189
190 xy.i[1] = u & ~(s - 1);
191 xy.i[0] = 0;
192 } else {
193 /* collapse all of xy into a single bit */
194 xy.i[0] = 0;
195 xy.i[1] = 0x80000000;
196 xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62);
197 }
198 } else {
199 if (exy - ez < 62) {
200 u = zz.i[1];
201 s = 1 << (exy - ez - 31);
202
203 if ((u & (s - 1)) | zz.i[0])
204 u |= s;
205
206 zz.i[1] = u & ~(s - 1);
207 zz.i[0] = 0;
208 } else {
209 /* collapse all of zz into a single bit */
210 zz.i[0] = 0;
211 zz.i[1] = 0x80000000;
212 zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62);
213 }
214 }
215
216 cont:
217 xy.e += zz.e;
218
219 /* restore the rounding precision */
220 __fenv_getcwsw(&cwsw);
221 cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000);
222 __fenv_setcwsw(&cwsw);
223
224 return ((float)xy.e);
225 }
226
227 #if 0
228 /*
229 * another fmaf for x86: assumes return value will be left in
230 * long double (80-bit double extended) precision
231 */
232 long double
233 __fmaf(float x, float y, float z)
234 {
235 /*
236 * Note: This implementation assumes the rounding precision mode
237 * is set to the default, rounding to 64 bit precision. If this
238 * routine must work in non-default rounding precision modes, do
239 * the following instead:
240 *
241 * long double t;
242 *
243 * <set rp mode to round to 64 bit precision>
244 * t = x * y;
245 * <restore rp mode>
246 * return t + z;
247 *
248 * Note that the code to change rounding precision must not alter
249 * the exception masks or flags, since the product x * y may raise
250 * an invalid operation exception.
251 */
252 return ((long double)x * y + z);
253 }
254 #endif
255 #else
256 #error Unknown architecture
257 #endif
|