Print this page
5262 libm needs to be carefully unifdef'd
5268 libm doesn't need to hide symbols which are already local
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libm/common/m9x/fmaf.c
+++ new/usr/src/lib/libm/common/m9x/fmaf.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
↓ open down ↓ |
19 lines elided |
↑ open up ↑ |
20 20 */
21 21
22 22 /*
23 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 27 * Use is subject to license terms.
28 28 */
29 29
30 -#if defined(ELFOBJ)
31 30 #pragma weak fmaf = __fmaf
32 -#endif
33 31
34 32 #include "libm.h"
35 33 #include "fma.h"
36 34 #include "fenv_inlines.h"
37 35
38 36 #if defined(__sparc)
39 37
40 38 /*
41 39 * fmaf for SPARC: 32-bit single precision, big-endian
42 40 */
43 41 float
44 42 __fmaf(float x, float y, float z) {
45 43 union {
46 44 unsigned i[2];
47 45 double d;
48 46 } xy, zz;
49 47 unsigned u, s;
50 48 int exy, ez;
51 49
52 50 /*
53 51 * the following operations can only raise the invalid exception,
54 52 * and then only if either x*y is of the form Inf*0 or one of x,
55 53 * y, or z is a signaling NaN
56 54 */
57 55 xy.d = (double) x * y;
58 56 zz.d = (double) z;
59 57
60 58 /*
61 59 * if the sum xy + z will be exact, just compute it and cast the
62 60 * result to float
63 61 */
64 62 exy = (xy.i[0] >> 20) & 0x7ff;
65 63 ez = (zz.i[0] >> 20) & 0x7ff;
66 64 if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 ||
67 65 ez == 0x7ff || ez == 0) {
68 66 return ((float) (xy.d + zz.d));
69 67 }
70 68
71 69 /*
72 70 * collapse the tail of the smaller summand into a "sticky bit"
73 71 * so that the sum can be computed without error
74 72 */
75 73 if (ez > exy) {
76 74 if (ez - exy < 31) {
77 75 u = xy.i[1];
78 76 s = 2 << (ez - exy);
79 77 if (u & (s - 1))
80 78 u |= s;
81 79 xy.i[1] = u & ~(s - 1);
82 80 } else if (ez - exy < 51) {
83 81 u = xy.i[0];
84 82 s = 1 << (ez - exy - 31);
85 83 if ((u & (s - 1)) | xy.i[1])
86 84 u |= s;
87 85 xy.i[0] = u & ~(s - 1);
88 86 xy.i[1] = 0;
89 87 } else {
90 88 /* collapse all of xy into a single bit */
91 89 xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20);
92 90 xy.i[1] = 0;
93 91 }
94 92 } else {
95 93 if (exy - ez < 31) {
96 94 u = zz.i[1];
97 95 s = 2 << (exy - ez);
98 96 if (u & (s - 1))
99 97 u |= s;
100 98 zz.i[1] = u & ~(s - 1);
101 99 } else if (exy - ez < 51) {
102 100 u = zz.i[0];
103 101 s = 1 << (exy - ez - 31);
104 102 if ((u & (s - 1)) | zz.i[1])
105 103 u |= s;
106 104 zz.i[0] = u & ~(s - 1);
107 105 zz.i[1] = 0;
108 106 } else {
109 107 /* collapse all of zz into a single bit */
110 108 zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20);
111 109 zz.i[1] = 0;
112 110 }
113 111 }
114 112
115 113 return ((float) (xy.d + zz.d));
116 114 }
117 115
118 116 #elif defined(__x86)
119 117
120 118 #if defined(__amd64)
121 119 #define NI 4
122 120 #else
123 121 #define NI 3
124 122 #endif
125 123
126 124 /*
127 125 * fmaf for x86: 32-bit single precision, little-endian
128 126 */
129 127 float
130 128 __fmaf(float x, float y, float z) {
131 129 union {
132 130 unsigned i[NI];
133 131 long double e;
134 132 } xy, zz;
135 133 unsigned u, s, cwsw, oldcwsw;
136 134 int exy, ez;
137 135
138 136 /* set rounding precision to 64 bits */
139 137 __fenv_getcwsw(&oldcwsw);
140 138 cwsw = (oldcwsw & 0xfcffffff) | 0x03000000;
141 139 __fenv_setcwsw(&cwsw);
142 140
143 141 /*
144 142 * the following operations can only raise the invalid exception,
145 143 * and then only if either x*y is of the form Inf*0 or one of x,
146 144 * y, or z is a signaling NaN
147 145 */
148 146 xy.e = (long double) x * y;
149 147 zz.e = (long double) z;
150 148
151 149 /*
152 150 * if the sum xy + z will be exact, just compute it and cast the
153 151 * result to float
154 152 */
155 153 exy = xy.i[2] & 0x7fff;
156 154 ez = zz.i[2] & 0x7fff;
157 155 if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 ||
158 156 ez == 0x7fff || ez == 0) {
159 157 goto cont;
160 158 }
161 159
162 160 /*
163 161 * collapse the tail of the smaller summand into a "sticky bit"
164 162 * so that the sum can be computed without error
165 163 */
166 164 if (ez > exy) {
167 165 if (ez - exy < 31) {
168 166 u = xy.i[0];
169 167 s = 2 << (ez - exy);
170 168 if (u & (s - 1))
171 169 u |= s;
172 170 xy.i[0] = u & ~(s - 1);
173 171 } else if (ez - exy < 62) {
174 172 u = xy.i[1];
175 173 s = 1 << (ez - exy - 31);
176 174 if ((u & (s - 1)) | xy.i[0])
177 175 u |= s;
178 176 xy.i[1] = u & ~(s - 1);
179 177 xy.i[0] = 0;
180 178 } else {
181 179 /* collapse all of xy into a single bit */
182 180 xy.i[0] = 0;
183 181 xy.i[1] = 0x80000000;
184 182 xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62);
185 183 }
186 184 } else {
187 185 if (exy - ez < 62) {
188 186 u = zz.i[1];
189 187 s = 1 << (exy - ez - 31);
190 188 if ((u & (s - 1)) | zz.i[0])
191 189 u |= s;
192 190 zz.i[1] = u & ~(s - 1);
193 191 zz.i[0] = 0;
194 192 } else {
195 193 /* collapse all of zz into a single bit */
196 194 zz.i[0] = 0;
197 195 zz.i[1] = 0x80000000;
198 196 zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62);
199 197 }
200 198 }
201 199
202 200 cont:
203 201 xy.e += zz.e;
204 202
205 203 /* restore the rounding precision */
206 204 __fenv_getcwsw(&cwsw);
207 205 cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000);
208 206 __fenv_setcwsw(&cwsw);
209 207
210 208 return ((float) xy.e);
211 209 }
212 210
213 211 #if 0
214 212 /*
215 213 * another fmaf for x86: assumes return value will be left in
216 214 * long double (80-bit double extended) precision
217 215 */
218 216 long double
219 217 __fmaf(float x, float y, float z) {
220 218 /*
221 219 * Note: This implementation assumes the rounding precision mode
222 220 * is set to the default, rounding to 64 bit precision. If this
223 221 * routine must work in non-default rounding precision modes, do
224 222 * the following instead:
225 223 *
226 224 * long double t;
227 225 *
228 226 * <set rp mode to round to 64 bit precision>
229 227 * t = x * y;
230 228 * <restore rp mode>
231 229 * return t + z;
232 230 *
233 231 * Note that the code to change rounding precision must not alter
234 232 * the exception masks or flags, since the product x * y may raise
235 233 * an invalid operation exception.
236 234 */
237 235 return ((long double) x * y + z);
238 236 }
239 237 #endif
240 238
241 239 #else
242 240 #error Unknown architecture
243 241 #endif
↓ open down ↓ |
201 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX