1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include <sys/isa_defs.h>
31 #include "libm_synonyms.h"
32 #include "libm_inlines.h"
33
34 #ifdef _LITTLE_ENDIAN
35 #define HI(x) *(1+(int*)x)
36 #define LO(x) *(unsigned*)x
37 #else
38 #define HI(x) *(int*)x
39 #define LO(x) *(1+(unsigned*)x)
40 #endif
41
42 #ifdef __RESTRICT
43 #define restrict _Restrict
44 #else
45 #define restrict
46 #endif
47
48 /* double hypot(double x, double y)
49 *
50 * Method :
51 * 1. Special cases:
52 * x or y is +Inf or -Inf => +Inf
53 * x or y is NaN => QNaN
54 * 2. Computes hypot(x,y):
55 * hypot(x,y) = m * sqrt(xnm * xnm + ynm * ynm)
56 * Where:
57 * m = max(|x|,|y|)
58 * xnm = x * (1/m)
59 * ynm = y * (1/m)
60 *
61 * Compute xnm * xnm + ynm * ynm by simulating
62 * muti-precision arithmetic.
63 *
64 * Accuracy:
65 * Maximum error observed: less than 0.872 ulp after 16.777.216.000
66 * results.
67 */
68
69 #define sqrt __sqrt
70
71 extern double sqrt(double);
72 extern double fabs(double);
73
74 static const unsigned long long LCONST[] = {
75 0x41b0000000000000ULL, /* D2ON28 = 2 ** 28 */
76 0x0010000000000000ULL, /* D2ONM1022 = 2 ** -1022 */
77 0x7fd0000000000000ULL /* D2ONP1022 = 2 ** 1022 */
78 };
79
80 static void
81 __vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
82 int stridey, double * restrict pz, int stridez);
83
84 #pragma no_inline(__vhypot_n)
85
86 #define RETURN(ret) \
87 { \
88 *pz = (ret); \
89 py += stridey; \
90 pz += stridez; \
91 if (n_n == 0) \
92 { \
93 hx0 = HI(px); \
94 hy0 = HI(py); \
95 spx = px; spy = py; spz = pz; \
96 continue; \
97 } \
98 n--; \
99 break; \
100 }
101
102 void
103 __vhypot(int n, double * restrict px, int stridex, double * restrict py,
104 int stridey, double * restrict pz, int stridez)
105 {
106 int hx0, hx1, hy0, j0, diff;
107 double x_hi, x_lo, y_hi, y_lo;
108 double scl = 0;
109 double x, y, res;
110 double *spx, *spy, *spz;
111 int n_n;
112 double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */
113 double D2ONM1022 = ((double*)LCONST)[1]; /* 2 **-1022 */
114 double D2ONP1022 = ((double*)LCONST)[2]; /* 2 ** 1022 */
115
116 while (n > 1)
117 {
118 n_n = 0;
119 spx = px;
120 spy = py;
121 spz = pz;
122 hx0 = HI(px);
123 hy0 = HI(py);
124 for (; n > 1 ; n--)
125 {
126 px += stridex;
127 hx0 &= 0x7fffffff;
128 hy0 &= 0x7fffffff;
129
130 if (hx0 >= 0x7fe00000) /* |X| >= 2**1023 or Inf or NaN */
131 {
132 diff = hy0 - hx0;
133 j0 = diff >> 31;
134 j0 = hy0 - (diff & j0);
135 j0 &= 0x7ff00000;
136 x = *(px - stridex);
137 y = *py;
138 x = fabs(x);
139 y = fabs(y);
140 if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */
141 {
142 int lx = LO((px - stridex));
143 int ly = LO(py);
144 if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
145 else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
146 else res = x + y;
147 RETURN (res)
148 }
149 else
150 {
151 j0 = diff >> 31;
152 if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
153 {
154 x *= D2ONM1022;
155 y *= D2ONM1022;
156
157 x_hi = (x + D2ON28) - D2ON28;
158 x_lo = x - x_hi;
159 y_hi = (y + D2ON28) - D2ON28;
160 y_lo = y - y_hi;
161 res = (x_hi * x_hi + y_hi * y_hi);
162 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
163
164 res = sqrt (res);
165
166 res = D2ONP1022 * res;
167 RETURN (res)
168 }
169 else RETURN (x + y)
170 }
171 }
172 if (hy0 >= 0x7fe00000) /* |Y| >= 2**1023 or Inf or NaN */
173 {
174 diff = hy0 - hx0;
175 j0 = diff >> 31;
176 j0 = hy0 - (diff & j0);
177 j0 &= 0x7ff00000;
178 x = *(px - stridex);
179 y = *py;
180 x = fabs(x);
181 y = fabs(y);
182 if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */
183 {
184 int lx = LO((px - stridex));
185 int ly = LO(py);
186 if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
187 else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
188 else res = x + y;
189 RETURN (res)
190 }
191 else
192 {
193 j0 = diff >> 31;
194 if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
195 {
196 x *= D2ONM1022;
197 y *= D2ONM1022;
198
199 x_hi = (x + D2ON28) - D2ON28;
200 x_lo = x - x_hi;
201 y_hi = (y + D2ON28) - D2ON28;
202 y_lo = y - y_hi;
203 res = (x_hi * x_hi + y_hi * y_hi);
204 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
205
206 res = sqrt (res);
207
208 res = D2ONP1022 * res;
209 RETURN (res)
210 }
211 else RETURN (x + y)
212 }
213 }
214
215 hx1 = HI(px);
216
217 if (hx0 < 0x00100000 && hy0 < 0x00100000) /* X and Y are subnormal */
218 {
219 x = *(px - stridex);
220 y = *py;
221
222 x *= D2ONP1022;
223 y *= D2ONP1022;
224
225 x_hi = (x + D2ON28) - D2ON28;
226 x_lo = x - x_hi;
227 y_hi = (y + D2ON28) - D2ON28;
228 y_lo = y - y_hi;
229 res = (x_hi * x_hi + y_hi * y_hi);
230 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
231
232 res = sqrt(res);
233
234 res = D2ONM1022 * res;
235 RETURN (res)
236 }
237
238 hx0 = hx1;
239 py += stridey;
240 pz += stridez;
241 n_n++;
242 hy0 = HI(py);
243 }
244 if (n_n > 0)
245 __vhypot_n (n_n, spx, stridex, spy, stridey, spz, stridez);
246 }
247
248 if (n > 0)
249 {
250 x = *px;
251 y = *py;
252 hx0 = HI(px);
253 hy0 = HI(py);
254
255 hx0 &= 0x7fffffff;
256 hy0 &= 0x7fffffff;
257
258 diff = hy0 - hx0;
259 j0 = diff >> 31;
260 j0 = hy0 - (diff & j0);
261 j0 &= 0x7ff00000;
262
263 if (j0 >= 0x7fe00000) /* max(|X|,|Y|) >= 2**1023 or X or Y = Inf or NaN */
264 {
265 x = fabs(x);
266 y = fabs(y);
267 if (j0 >= 0x7ff00000) /* |X| or |Y| = Inf or NaN */
268 {
269 int lx = LO(px);
270 int ly = LO(py);
271 if (hx0 == 0x7ff00000 && lx == 0) res = x == y ? y : x;
272 else if (hy0 == 0x7ff00000 && ly == 0) res = x == y ? x : y;
273 else res = x + y;
274 *pz = res;
275 return;
276 }
277 else
278 {
279 j0 = diff >> 31;
280 if (((diff ^ j0) - j0) < 0x03600000) /* max(|X|,|Y|)/min(|X|,|Y|) < 2**54 */
281 {
282 x *= D2ONM1022;
283 y *= D2ONM1022;
284
285 x_hi = (x + D2ON28) - D2ON28;
286 x_lo = x - x_hi;
287 y_hi = (y + D2ON28) - D2ON28;
288 y_lo = y - y_hi;
289 res = (x_hi * x_hi + y_hi * y_hi);
290 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
291
292 res = sqrt (res);
293
294 res = D2ONP1022 * res;
295 *pz = res;
296 return;
297 }
298 else
299 {
300 *pz = x + y;
301 return;
302 }
303 }
304 }
305
306 if (j0 < 0x00100000) /* X and Y are subnormal */
307 {
308 x *= D2ONP1022;
309 y *= D2ONP1022;
310
311 x_hi = (x + D2ON28) - D2ON28;
312 x_lo = x - x_hi;
313 y_hi = (y + D2ON28) - D2ON28;
314 y_lo = y - y_hi;
315 res = (x_hi * x_hi + y_hi * y_hi);
316 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
317
318 res = sqrt(res);
319
320 res = D2ONM1022 * res;
321 *pz = res;
322 return;
323 }
324
325 HI(&scl) = (0x7fe00000 - j0);
326
327 x *= scl;
328 y *= scl;
329
330 x_hi = (x + D2ON28) - D2ON28;
331 y_hi = (y + D2ON28) - D2ON28;
332 x_lo = x - x_hi;
333 y_lo = y - y_hi;
334
335 res = (x_hi * x_hi + y_hi * y_hi);
336 res += ((x + x_hi) * x_lo + (y + y_hi) * y_lo);
337
338 res = sqrt(res);
339
340 HI(&scl) = j0;
341
342 res = scl * res;
343 *pz = res;
344 }
345 }
346
347 static void
348 __vhypot_n(int n, double * restrict px, int stridex, double * restrict py,
349 int stridey, double * restrict pz, int stridez)
350 {
351 int hx0, hy0, j0, diff0;
352 double x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0;
353 double x0, y0, res0;
354 double D2ON28 = ((double*)LCONST)[0]; /* 2 ** 28 */
355
356 for(; n > 0 ; n--)
357 {
358 x0 = *px;
359 y0 = *py;
360 hx0 = HI(px);
361 hy0 = HI(py);
362
363 hx0 &= 0x7fffffff;
364 hy0 &= 0x7fffffff;
365
366 diff0 = hy0 - hx0;
367 j0 = diff0 >> 31;
368 j0 = hy0 - (diff0 & j0);
369 j0 &= 0x7ff00000;
370
371 px += stridex;
372 py += stridey;
373
374 HI(&scl0) = (0x7fe00000 - j0);
375
376 x0 *= scl0;
377 y0 *= scl0;
378
379 x_hi0 = (x0 + D2ON28) - D2ON28;
380 y_hi0 = (y0 + D2ON28) - D2ON28;
381 x_lo0 = x0 - x_hi0;
382 y_lo0 = y0 - y_hi0;
383
384 res0 = (x_hi0 * x_hi0 + y_hi0 * y_hi0);
385 res0 += ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0);
386
387 res0 = sqrt(res0);
388
389 HI(&scl0) = j0;
390
391 res0 = scl0 * res0;
392 *pz = res0;
393
394 pz += stridez;
395 }
396 }
397