Print this page
11210 libm should be cstyle(1ONBLD) clean
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libm/common/m9x/fmaf.c
+++ new/usr/src/lib/libm/common/m9x/fmaf.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 24 */
25 +
25 26 /*
26 27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 28 * Use is subject to license terms.
28 29 */
29 30
30 31 #pragma weak fmaf = __fmaf
31 32
32 33 #include "libm.h"
33 34 #include "fma.h"
34 35 #include "fenv_inlines.h"
35 36
36 37 #if defined(__sparc)
37 -
38 38 /*
39 39 * fmaf for SPARC: 32-bit single precision, big-endian
40 40 */
41 41 float
42 -__fmaf(float x, float y, float z) {
42 +__fmaf(float x, float y, float z)
43 +{
43 44 union {
44 45 unsigned i[2];
45 46 double d;
46 47 } xy, zz;
48 +
47 49 unsigned u, s;
48 50 int exy, ez;
49 51
50 52 /*
51 53 * the following operations can only raise the invalid exception,
52 54 * and then only if either x*y is of the form Inf*0 or one of x,
53 55 * y, or z is a signaling NaN
54 56 */
55 - xy.d = (double) x * y;
56 - zz.d = (double) z;
57 + xy.d = (double)x * y;
58 + zz.d = (double)z;
57 59
58 60 /*
59 61 * if the sum xy + z will be exact, just compute it and cast the
60 62 * result to float
61 63 */
62 64 exy = (xy.i[0] >> 20) & 0x7ff;
63 65 ez = (zz.i[0] >> 20) & 0x7ff;
66 +
64 67 if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 ||
65 - ez == 0x7ff || ez == 0) {
66 - return ((float) (xy.d + zz.d));
67 - }
68 + ez == 0x7ff || ez == 0)
69 + return ((float)(xy.d + zz.d));
68 70
69 71 /*
70 72 * collapse the tail of the smaller summand into a "sticky bit"
71 73 * so that the sum can be computed without error
72 74 */
73 75 if (ez > exy) {
74 76 if (ez - exy < 31) {
75 77 u = xy.i[1];
76 78 s = 2 << (ez - exy);
79 +
77 80 if (u & (s - 1))
78 81 u |= s;
82 +
79 83 xy.i[1] = u & ~(s - 1);
80 84 } else if (ez - exy < 51) {
81 85 u = xy.i[0];
82 86 s = 1 << (ez - exy - 31);
87 +
83 88 if ((u & (s - 1)) | xy.i[1])
84 89 u |= s;
90 +
85 91 xy.i[0] = u & ~(s - 1);
86 92 xy.i[1] = 0;
87 93 } else {
88 94 /* collapse all of xy into a single bit */
89 95 xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20);
90 96 xy.i[1] = 0;
91 97 }
92 98 } else {
93 99 if (exy - ez < 31) {
94 100 u = zz.i[1];
95 101 s = 2 << (exy - ez);
102 +
96 103 if (u & (s - 1))
97 104 u |= s;
105 +
98 106 zz.i[1] = u & ~(s - 1);
99 107 } else if (exy - ez < 51) {
100 108 u = zz.i[0];
101 109 s = 1 << (exy - ez - 31);
110 +
102 111 if ((u & (s - 1)) | zz.i[1])
103 112 u |= s;
113 +
104 114 zz.i[0] = u & ~(s - 1);
105 115 zz.i[1] = 0;
106 116 } else {
107 117 /* collapse all of zz into a single bit */
108 118 zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20);
109 119 zz.i[1] = 0;
110 120 }
111 121 }
112 122
113 - return ((float) (xy.d + zz.d));
123 + return ((float)(xy.d + zz.d));
114 124 }
115 -
116 125 #elif defined(__x86)
117 -
118 126 #if defined(__amd64)
119 -#define NI 4
127 +#define NI 4
120 128 #else
121 -#define NI 3
129 +#define NI 3
122 130 #endif
123 131
124 132 /*
125 133 * fmaf for x86: 32-bit single precision, little-endian
126 134 */
127 135 float
128 -__fmaf(float x, float y, float z) {
136 +__fmaf(float x, float y, float z)
137 +{
129 138 union {
130 139 unsigned i[NI];
131 140 long double e;
132 141 } xy, zz;
142 +
133 143 unsigned u, s, cwsw, oldcwsw;
134 144 int exy, ez;
135 145
136 146 /* set rounding precision to 64 bits */
137 147 __fenv_getcwsw(&oldcwsw);
138 148 cwsw = (oldcwsw & 0xfcffffff) | 0x03000000;
139 149 __fenv_setcwsw(&cwsw);
140 150
141 151 /*
142 152 * the following operations can only raise the invalid exception,
143 153 * and then only if either x*y is of the form Inf*0 or one of x,
144 154 * y, or z is a signaling NaN
145 155 */
146 - xy.e = (long double) x * y;
147 - zz.e = (long double) z;
156 + xy.e = (long double)x * y;
157 + zz.e = (long double)z;
148 158
149 159 /*
150 160 * if the sum xy + z will be exact, just compute it and cast the
151 161 * result to float
152 162 */
153 163 exy = xy.i[2] & 0x7fff;
154 164 ez = zz.i[2] & 0x7fff;
165 +
155 166 if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 ||
156 - ez == 0x7fff || ez == 0) {
167 + ez == 0x7fff || ez == 0)
157 168 goto cont;
158 - }
159 169
160 170 /*
161 171 * collapse the tail of the smaller summand into a "sticky bit"
162 172 * so that the sum can be computed without error
163 173 */
164 174 if (ez > exy) {
165 175 if (ez - exy < 31) {
166 176 u = xy.i[0];
167 177 s = 2 << (ez - exy);
178 +
168 179 if (u & (s - 1))
169 180 u |= s;
181 +
170 182 xy.i[0] = u & ~(s - 1);
171 183 } else if (ez - exy < 62) {
172 184 u = xy.i[1];
173 185 s = 1 << (ez - exy - 31);
186 +
174 187 if ((u & (s - 1)) | xy.i[0])
175 188 u |= s;
189 +
176 190 xy.i[1] = u & ~(s - 1);
177 191 xy.i[0] = 0;
178 192 } else {
179 193 /* collapse all of xy into a single bit */
180 194 xy.i[0] = 0;
181 195 xy.i[1] = 0x80000000;
182 196 xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62);
183 197 }
184 198 } else {
185 199 if (exy - ez < 62) {
186 200 u = zz.i[1];
187 201 s = 1 << (exy - ez - 31);
202 +
188 203 if ((u & (s - 1)) | zz.i[0])
189 204 u |= s;
205 +
190 206 zz.i[1] = u & ~(s - 1);
191 207 zz.i[0] = 0;
192 208 } else {
193 209 /* collapse all of zz into a single bit */
194 210 zz.i[0] = 0;
195 211 zz.i[1] = 0x80000000;
196 212 zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62);
197 213 }
198 214 }
199 215
200 216 cont:
201 217 xy.e += zz.e;
202 218
203 219 /* restore the rounding precision */
204 220 __fenv_getcwsw(&cwsw);
205 221 cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000);
206 222 __fenv_setcwsw(&cwsw);
207 223
208 - return ((float) xy.e);
224 + return ((float)xy.e);
209 225 }
210 226
211 227 #if 0
212 228 /*
213 229 * another fmaf for x86: assumes return value will be left in
214 230 * long double (80-bit double extended) precision
215 231 */
216 232 long double
217 -__fmaf(float x, float y, float z) {
233 +__fmaf(float x, float y, float z)
234 +{
218 235 /*
219 236 * Note: This implementation assumes the rounding precision mode
220 237 * is set to the default, rounding to 64 bit precision. If this
221 238 * routine must work in non-default rounding precision modes, do
222 239 * the following instead:
223 240 *
224 241 * long double t;
225 242 *
226 243 * <set rp mode to round to 64 bit precision>
227 244 * t = x * y;
228 245 * <restore rp mode>
229 246 * return t + z;
230 247 *
231 248 * Note that the code to change rounding precision must not alter
232 249 * the exception masks or flags, since the product x * y may raise
233 250 * an invalid operation exception.
234 251 */
235 - return ((long double) x * y + z);
252 + return ((long double)x * y + z);
236 253 }
237 254 #endif
238 -
239 255 #else
240 256 #error Unknown architecture
241 257 #endif
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX