Print this page
5261 libm should stop using synonyms.h
5298 fabs is 0-sized, confuses dis(1) and others
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Gordon Ross <gwr@nexenta.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libmvec/common/__vhypot.c
+++ new/usr/src/lib/libmvec/common/__vhypot.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
↓ open down ↓ |
20 lines elided |
↑ open up ↑ |
21 21
22 22 /*
23 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 27 * Use is subject to license terms.
28 28 */
29 29
30 30 #include <sys/isa_defs.h>
31 -#include "libm_synonyms.h"
32 31 #include "libm_inlines.h"
33 32
34 33 #ifdef _LITTLE_ENDIAN
35 34 #define HI(x) *(1+(int*)x)
36 35 #define LO(x) *(unsigned*)x
37 36 #else
38 37 #define HI(x) *(int*)x
39 38 #define LO(x) *(1+(unsigned*)x)
40 39 #endif
41 40
42 41 #ifdef __RESTRICT
43 42 #define restrict _Restrict
44 43 #else
45 44 #define restrict
46 45 #endif
47 46
48 47 /* double hypot(double x, double y)
49 48 *
50 49 * Method :
51 50 * 1. Special cases:
52 51 * x or y is +Inf or -Inf => +Inf
53 52 * x or y is NaN => QNaN
54 53 * 2. Computes hypot(x,y):
55 54 * hypot(x,y) = m * sqrt(xnm * xnm + ynm * ynm)
56 55 * Where:
57 56 * m = max(|x|,|y|)
58 57 * xnm = x * (1/m)
↓ open down ↓ |
17 lines elided |
↑ open up ↑ |
59 58 * ynm = y * (1/m)
60 59 *
61 60 * Compute xnm * xnm + ynm * ynm by simulating
62 61 * muti-precision arithmetic.
63 62 *
64 63 * Accuracy:
65 64 * Maximum error observed: less than 0.872 ulp after 16.777.216.000
66 65 * results.
67 66 */
68 67
69 -#define sqrt __sqrt
70 -
71 68 extern double sqrt(double);
72 69 extern double fabs(double);
73 70
74 71 static const unsigned long long LCONST[] = {
75 72 0x41b0000000000000ULL, /* D2ON28 = 2 ** 28 */
76 73 0x0010000000000000ULL, /* D2ONM1022 = 2 ** -1022 */
77 74 0x7fd0000000000000ULL /* D2ONP1022 = 2 ** 1022 */
78 75 };
79 76
80 77 static void
81 78 __vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
82 79 int stridey, double * restrict pz, int stridez);
83 80
84 81 #pragma no_inline(__vhypot_n)
85 82
86 83 #define RETURN(ret) \
87 84 { \
88 85 *pz = (ret); \
89 86 py += stridey; \
90 87 pz += stridez; \
91 88 if (n_n == 0) \
92 89 { \
93 90 hx0 = HI(px); \
94 91 hy0 = HI(py); \
95 92 spx = px; spy = py; spz = pz; \
96 93 continue; \
97 94 } \
98 95 n--; \
99 96 break; \
100 97 }
101 98
102 99 void
103 100 __vhypot(int n, double * restrict px, int stridex, double * restrict py,
104 101 int stridey, double * restrict pz, int stridez)
105 102 {
106 103 int hx0, hx1, hy0, j0, diff;
107 104 double x_hi, x_lo, y_hi, y_lo;
108 105 double scl = 0;
109 106 double x, y, res;
110 107 double *spx, *spy, *spz;
111 108 int n_n;
112 109 double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */
113 110 double D2ONM1022 = ((double*)LCONST)[1]; /* 2 **-1022 */
114 111 double D2ONP1022 = ((double*)LCONST)[2]; /* 2 ** 1022 */
115 112
116 113 while (n > 1)
117 114 {
118 115 n_n = 0;
119 116 spx = px;
120 117 spy = py;
121 118 spz = pz;
122 119 hx0 = HI(px);
123 120 hy0 = HI(py);
124 121 for (; n > 1 ; n--)
125 122 {
126 123 px += stridex;
127 124 hx0 &= 0x7fffffff;
128 125 hy0 &= 0x7fffffff;
129 126
130 127 if (hx0 >= 0x7fe00000) /* |X| >= 2**1023 or Inf or NaN */
131 128 {
132 129 diff = hy0 - hx0;
133 130 j0 = diff >> 31;
134 131 j0 = hy0 - (diff & j0);
135 132 j0 &= 0x7ff00000;
136 133 x = *(px - stridex);
137 134 y = *py;
138 135 x = fabs(x);
139 136 y = fabs(y);
140 137 if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */
141 138 {
142 139 int lx = LO((px - stridex));
143 140 int ly = LO(py);
144 141 if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
145 142 else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
146 143 else res = x + y;
147 144 RETURN (res)
148 145 }
149 146 else
150 147 {
151 148 j0 = diff >> 31;
152 149 if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
153 150 {
154 151 x *= D2ONM1022;
155 152 y *= D2ONM1022;
156 153
157 154 x_hi = (x + D2ON28) - D2ON28;
158 155 x_lo = x - x_hi;
159 156 y_hi = (y + D2ON28) - D2ON28;
160 157 y_lo = y - y_hi;
161 158 res = (x_hi * x_hi + y_hi * y_hi);
162 159 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
163 160
164 161 res = sqrt (res);
165 162
166 163 res = D2ONP1022 * res;
167 164 RETURN (res)
168 165 }
169 166 else RETURN (x + y)
170 167 }
171 168 }
172 169 if (hy0 >= 0x7fe00000) /* |Y| >= 2**1023 or Inf or NaN */
173 170 {
174 171 diff = hy0 - hx0;
175 172 j0 = diff >> 31;
176 173 j0 = hy0 - (diff & j0);
177 174 j0 &= 0x7ff00000;
178 175 x = *(px - stridex);
179 176 y = *py;
180 177 x = fabs(x);
181 178 y = fabs(y);
182 179 if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */
183 180 {
184 181 int lx = LO((px - stridex));
185 182 int ly = LO(py);
186 183 if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
187 184 else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
188 185 else res = x + y;
189 186 RETURN (res)
190 187 }
191 188 else
192 189 {
193 190 j0 = diff >> 31;
194 191 if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
195 192 {
196 193 x *= D2ONM1022;
197 194 y *= D2ONM1022;
198 195
199 196 x_hi = (x + D2ON28) - D2ON28;
200 197 x_lo = x - x_hi;
201 198 y_hi = (y + D2ON28) - D2ON28;
202 199 y_lo = y - y_hi;
203 200 res = (x_hi * x_hi + y_hi * y_hi);
204 201 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
205 202
206 203 res = sqrt (res);
207 204
208 205 res = D2ONP1022 * res;
209 206 RETURN (res)
210 207 }
211 208 else RETURN (x + y)
212 209 }
213 210 }
214 211
215 212 hx1 = HI(px);
216 213
217 214 if (hx0 < 0x00100000 && hy0 < 0x00100000) /* X and Y are subnormal */
218 215 {
219 216 x = *(px - stridex);
220 217 y = *py;
221 218
222 219 x *= D2ONP1022;
223 220 y *= D2ONP1022;
224 221
225 222 x_hi = (x + D2ON28) - D2ON28;
226 223 x_lo = x - x_hi;
227 224 y_hi = (y + D2ON28) - D2ON28;
228 225 y_lo = y - y_hi;
229 226 res = (x_hi * x_hi + y_hi * y_hi);
230 227 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
231 228
232 229 res = sqrt(res);
233 230
234 231 res = D2ONM1022 * res;
235 232 RETURN (res)
236 233 }
237 234
238 235 hx0 = hx1;
239 236 py += stridey;
240 237 pz += stridez;
241 238 n_n++;
242 239 hy0 = HI(py);
243 240 }
244 241 if (n_n > 0)
245 242 __vhypot_n (n_n, spx, stridex, spy, stridey, spz, stridez);
246 243 }
247 244
248 245 if (n > 0)
249 246 {
250 247 x = *px;
251 248 y = *py;
252 249 hx0 = HI(px);
253 250 hy0 = HI(py);
254 251
255 252 hx0 &= 0x7fffffff;
256 253 hy0 &= 0x7fffffff;
257 254
258 255 diff = hy0 - hx0;
259 256 j0 = diff >> 31;
260 257 j0 = hy0 - (diff & j0);
261 258 j0 &= 0x7ff00000;
262 259
263 260 if (j0 >= 0x7fe00000) /* max(|X|,|Y|) >= 2**1023 or X or Y = Inf or NaN */
264 261 {
265 262 x = fabs(x);
266 263 y = fabs(y);
267 264 if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */
268 265 {
269 266 int lx = LO(px);
270 267 int ly = LO(py);
271 268 if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
272 269 else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
273 270 else res = x + y;
274 271 *pz = res;
275 272 return;
276 273 }
277 274 else
278 275 {
279 276 j0 = diff >> 31;
280 277 if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
281 278 {
282 279 x *= D2ONM1022;
283 280 y *= D2ONM1022;
284 281
285 282 x_hi = (x + D2ON28) - D2ON28;
286 283 x_lo = x - x_hi;
287 284 y_hi = (y + D2ON28) - D2ON28;
288 285 y_lo = y - y_hi;
289 286 res = (x_hi * x_hi + y_hi * y_hi);
290 287 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
291 288
292 289 res = sqrt (res);
293 290
294 291 res = D2ONP1022 * res;
295 292 *pz = res;
296 293 return;
297 294 }
298 295 else
299 296 {
300 297 *pz = x + y;
301 298 return;
302 299 }
303 300 }
304 301 }
305 302
306 303 if (j0 < 0x00100000) /* X and Y are subnormal */
307 304 {
308 305 x *= D2ONP1022;
309 306 y *= D2ONP1022;
310 307
311 308 x_hi = (x + D2ON28) - D2ON28;
312 309 x_lo = x - x_hi;
313 310 y_hi = (y + D2ON28) - D2ON28;
314 311 y_lo = y - y_hi;
315 312 res = (x_hi * x_hi + y_hi * y_hi);
316 313 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
317 314
318 315 res = sqrt(res);
319 316
320 317 res = D2ONM1022 * res;
321 318 *pz = res;
322 319 return;
323 320 }
324 321
325 322 HI(&scl) = (0x7fe00000 - j0);
326 323
327 324 x *= scl;
328 325 y *= scl;
329 326
330 327 x_hi = (x + D2ON28) - D2ON28;
331 328 y_hi = (y + D2ON28) - D2ON28;
332 329 x_lo = x - x_hi;
333 330 y_lo = y - y_hi;
334 331
335 332 res = (x_hi * x_hi + y_hi * y_hi);
336 333 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
337 334
338 335 res = sqrt(res);
339 336
340 337 HI(&scl) = j0;
341 338
342 339 res = scl * res;
343 340 *pz = res;
344 341 }
345 342 }
346 343
347 344 static void
348 345 __vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
349 346 int stridey, double * restrict pz, int stridez)
350 347 {
351 348 int hx0, hy0, j0, diff0;
352 349 double x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0;
353 350 double x0, y0, res0;
354 351 double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */
355 352
356 353 for(; n > 0 ; n--)
357 354 {
358 355 x0 = *px;
359 356 y0 = *py;
360 357 hx0 = HI(px);
361 358 hy0 = HI(py);
362 359
363 360 hx0 &= 0x7fffffff;
364 361 hy0 &= 0x7fffffff;
365 362
366 363 diff0 = hy0 - hx0;
367 364 j0 = diff0 >> 31;
368 365 j0 = hy0 - (diff0 & j0);
369 366 j0 &= 0x7ff00000;
370 367
371 368 px += stridex;
372 369 py += stridey;
373 370
374 371 HI(&scl0) = (0x7fe00000 - j0);
375 372
376 373 x0 *= scl0;
377 374 y0 *= scl0;
378 375
379 376 x_hi0 = (x0 + D2ON28) - D2ON28;
380 377 y_hi0 = (y0 + D2ON28) - D2ON28;
381 378 x_lo0 = x0 - x_hi0;
382 379 y_lo0 = y0 - y_hi0;
383 380
384 381 res0 = (x_hi0 * x_hi0 + y_hi0 * y_hi0);
385 382 res0 += ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0);
386 383
↓ open down ↓ |
306 lines elided |
↑ open up ↑ |
387 384 res0 = sqrt(res0);
388 385
389 386 HI(&scl0) = j0;
390 387
391 388 res0 = scl0 * res0;
392 389 *pz = res0;
393 390
394 391 pz += stridez;
395 392 }
396 393 }
397 -
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX