1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #if defined(ELFOBJ)
31 #pragma weak fmaf = __fmaf
32 #endif
33
34 #include "libm.h"
35 #include "fma.h"
36 #include "fenv_inlines.h"
37
38 #if defined(__sparc)
39
40 /*
41 * fmaf for SPARC: 32-bit single precision, big-endian
42 */
43 float
44 __fmaf(float x, float y, float z) {
45 union {
46 unsigned i[2];
47 double d;
48 } xy, zz;
49 unsigned u, s;
50 int exy, ez;
51
52 /*
53 * the following operations can only raise the invalid exception,
54 * and then only if either x*y is of the form Inf*0 or one of x,
55 * y, or z is a signaling NaN
56 */
57 xy.d = (double) x * y;
58 zz.d = (double) z;
59
60 /*
61 * if the sum xy + z will be exact, just compute it and cast the
62 * result to float
63 */
64 exy = (xy.i[0] >> 20) & 0x7ff;
65 ez = (zz.i[0] >> 20) & 0x7ff;
66 if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 ||
67 ez == 0x7ff || ez == 0) {
68 return ((float) (xy.d + zz.d));
69 }
70
71 /*
72 * collapse the tail of the smaller summand into a "sticky bit"
73 * so that the sum can be computed without error
74 */
75 if (ez > exy) {
76 if (ez - exy < 31) {
77 u = xy.i[1];
78 s = 2 << (ez - exy);
79 if (u & (s - 1))
80 u |= s;
81 xy.i[1] = u & ~(s - 1);
82 } else if (ez - exy < 51) {
83 u = xy.i[0];
84 s = 1 << (ez - exy - 31);
85 if ((u & (s - 1)) | xy.i[1])
86 u |= s;
87 xy.i[0] = u & ~(s - 1);
88 xy.i[1] = 0;
89 } else {
90 /* collapse all of xy into a single bit */
91 xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20);
92 xy.i[1] = 0;
93 }
94 } else {
95 if (exy - ez < 31) {
96 u = zz.i[1];
97 s = 2 << (exy - ez);
98 if (u & (s - 1))
99 u |= s;
100 zz.i[1] = u & ~(s - 1);
101 } else if (exy - ez < 51) {
102 u = zz.i[0];
103 s = 1 << (exy - ez - 31);
104 if ((u & (s - 1)) | zz.i[1])
105 u |= s;
106 zz.i[0] = u & ~(s - 1);
107 zz.i[1] = 0;
108 } else {
109 /* collapse all of zz into a single bit */
110 zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20);
111 zz.i[1] = 0;
112 }
113 }
114
115 return ((float) (xy.d + zz.d));
116 }
117
118 #elif defined(__x86)
119
120 #if defined(__amd64)
121 #define NI 4
122 #else
123 #define NI 3
124 #endif
125
126 /*
127 * fmaf for x86: 32-bit single precision, little-endian
128 */
129 float
130 __fmaf(float x, float y, float z) {
131 union {
132 unsigned i[NI];
133 long double e;
134 } xy, zz;
135 unsigned u, s, cwsw, oldcwsw;
136 int exy, ez;
137
138 /* set rounding precision to 64 bits */
139 __fenv_getcwsw(&oldcwsw);
140 cwsw = (oldcwsw & 0xfcffffff) | 0x03000000;
141 __fenv_setcwsw(&cwsw);
142
143 /*
144 * the following operations can only raise the invalid exception,
145 * and then only if either x*y is of the form Inf*0 or one of x,
146 * y, or z is a signaling NaN
147 */
148 xy.e = (long double) x * y;
149 zz.e = (long double) z;
150
151 /*
152 * if the sum xy + z will be exact, just compute it and cast the
153 * result to float
154 */
155 exy = xy.i[2] & 0x7fff;
156 ez = zz.i[2] & 0x7fff;
157 if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 ||
158 ez == 0x7fff || ez == 0) {
159 goto cont;
160 }
161
162 /*
163 * collapse the tail of the smaller summand into a "sticky bit"
164 * so that the sum can be computed without error
165 */
166 if (ez > exy) {
167 if (ez - exy < 31) {
168 u = xy.i[0];
169 s = 2 << (ez - exy);
170 if (u & (s - 1))
171 u |= s;
172 xy.i[0] = u & ~(s - 1);
173 } else if (ez - exy < 62) {
174 u = xy.i[1];
175 s = 1 << (ez - exy - 31);
176 if ((u & (s - 1)) | xy.i[0])
177 u |= s;
178 xy.i[1] = u & ~(s - 1);
179 xy.i[0] = 0;
180 } else {
181 /* collapse all of xy into a single bit */
182 xy.i[0] = 0;
183 xy.i[1] = 0x80000000;
184 xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62);
185 }
186 } else {
187 if (exy - ez < 62) {
188 u = zz.i[1];
189 s = 1 << (exy - ez - 31);
190 if ((u & (s - 1)) | zz.i[0])
191 u |= s;
192 zz.i[1] = u & ~(s - 1);
193 zz.i[0] = 0;
194 } else {
195 /* collapse all of zz into a single bit */
196 zz.i[0] = 0;
197 zz.i[1] = 0x80000000;
198 zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62);
199 }
200 }
201
202 cont:
203 xy.e += zz.e;
204
205 /* restore the rounding precision */
206 __fenv_getcwsw(&cwsw);
207 cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000);
208 __fenv_setcwsw(&cwsw);
209
210 return ((float) xy.e);
211 }
212
213 #if 0
214 /*
215 * another fmaf for x86: assumes return value will be left in
216 * long double (80-bit double extended) precision
217 */
218 long double
219 __fmaf(float x, float y, float z) {
220 /*
221 * Note: This implementation assumes the rounding precision mode
222 * is set to the default, rounding to 64 bit precision. If this
223 * routine must work in non-default rounding precision modes, do
224 * the following instead:
225 *
226 * long double t;
227 *
228 * <set rp mode to round to 64 bit precision>
229 * t = x * y;
230 * <restore rp mode>
231 * return t + z;
232 *
233 * Note that the code to change rounding precision must not alter
234 * the exception masks or flags, since the product x * y may raise
235 * an invalid operation exception.
236 */
237 return ((long double) x * y + z);
238 }
239 #endif
240
241 #else
242 #error Unknown architecture
243 #endif