Print this page
5261 libm should stop using synonyms.h
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libmvec/common/__vrhypot.c
+++ new/usr/src/lib/libmvec/common/__vrhypot.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
↓ open down ↓ |
20 lines elided |
↑ open up ↑ |
21 21
22 22 /*
23 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 27 * Use is subject to license terms.
28 28 */
29 29
30 30 #include <sys/isa_defs.h>
31 -#include "libm_synonyms.h"
32 31 #include "libm_inlines.h"
33 32
34 33 #ifdef _LITTLE_ENDIAN
35 34 #define HI(x) *(1+(int*)x)
36 35 #define LO(x) *(unsigned*)x
37 36 #else
38 37 #define HI(x) *(int*)x
39 38 #define LO(x) *(1+(unsigned*)x)
40 39 #endif
41 40
42 41 #ifdef __RESTRICT
43 42 #define restrict _Restrict
44 43 #else
45 44 #define restrict
46 45 #endif
47 46
48 47 /* double rhypot(double x, double y)
49 48 *
50 49 * Method :
51 50 * 1. Special cases:
52 51 * x or y = Inf => 0
53 52 * x or y = NaN => QNaN
54 53 * x and y = 0 => Inf + divide-by-zero
55 54 * 2. Computes rhypot(x,y):
56 55 * rhypot(x,y) = m * sqrt(1/(xnm * xnm + ynm * ynm))
57 56 * Where:
58 57 * m = 1/max(|x|,|y|)
59 58 * xnm = x * m
↓ open down ↓ |
18 lines elided |
↑ open up ↑ |
60 59 * ynm = y * m
61 60 *
62 61 * Compute 1/(xnm * xnm + ynm * ynm) by simulating
63 62 * muti-precision arithmetic.
64 63 *
65 64 * Accuracy:
66 65 * Maximum error observed: less than 0.869 ulp after 1.000.000.000
67 66 * results.
68 67 */
69 68
70 -#define sqrt __sqrt
71 -
72 69 extern double sqrt(double);
73 -
74 70 extern double fabs(double);
75 71
76 72 static const int __vlibm_TBL_rhypot[] = {
77 73 /* i = [0,127]
78 74 * TBL[i] = 0x3ff00000 + *(int*)&(1.0 / *(double*)&(0x3ff0000000000000ULL + (i << 45))); */
79 75 0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465,
80 76 0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a,
81 77 0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6,
82 78 0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3,
83 79 0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b,
84 80 0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036,
85 81 0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01,
86 82 0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1,
87 83 0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb,
88 84 0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5,
89 85 0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405,
90 86 0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc,
91 87 0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7,
92 88 0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec,
93 89 0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b,
94 90 0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed,
95 91 0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150,
96 92 0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539,
97 93 0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66,
98 94 0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995,
99 95 0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d,
100 96 0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19,
101 97 0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404,
102 98 0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22,
103 99 0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47,
104 100 0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a,
105 101 0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06,
106 102 0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358,
107 103 0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20,
108 104 0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f,
109 105 0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197,
110 106 0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010,
111 107 };
112 108
113 109 static const unsigned long long LCONST[] = {
114 110 0x3ff0000000000000ULL, /* DONE = 1.0 */
115 111 0x4000000000000000ULL, /* DTWO = 2.0 */
116 112 0x4230000000000000ULL, /* D2ON36 = 2**36 */
117 113 0x7fd0000000000000ULL, /* D2ON1022 = 2**1022 */
118 114 0x3cb0000000000000ULL, /* D2ONM52 = 2**-52 */
119 115 };
120 116
121 117 #define RET_SC(I) \
122 118 px += stridex; \
123 119 py += stridey; \
124 120 pz += stridez; \
125 121 if (--n <= 0) \
126 122 break; \
127 123 goto start##I;
128 124
129 125 #define RETURN(I, ret) \
130 126 { \
131 127 pz[0] = (ret); \
132 128 RET_SC(I) \
133 129 }
134 130
135 131 #define PREP(I) \
136 132 hx##I = HI(px); \
137 133 hy##I = HI(py); \
138 134 hx##I &= 0x7fffffff; \
139 135 hy##I &= 0x7fffffff; \
140 136 pz##I = pz; \
141 137 if (hx##I >= 0x7ff00000 || hy##I >= 0x7ff00000) /* |X| or |Y| = Inf,NaN */ \
142 138 { \
143 139 lx = LO(px); \
144 140 ly = LO(py); \
145 141 x = *px; \
146 142 y = *py; \
147 143 if (hx##I == 0x7ff00000 && lx == 0) res0 = 0.0; /* |X| = Inf */ \
148 144 else if (hy##I == 0x7ff00000 && ly == 0) res0 = 0.0; /* |Y| = Inf */ \
149 145 else res0 = fabs(x) + fabs(y); \
150 146 \
151 147 RETURN (I, res0) \
152 148 } \
153 149 x##I = *px; \
154 150 y##I = *py; \
155 151 diff0 = hy##I - hx##I; \
156 152 j0 = diff0 >> 31; \
157 153 if (hx##I < 0x00100000 && hy##I < 0x00100000) /* |X| and |Y| = subnormal or zero */ \
158 154 { \
159 155 lx = LO(px); \
160 156 ly = LO(py); \
161 157 x = x##I; \
162 158 y = y##I; \
163 159 \
164 160 if ((hx##I | hy##I | lx | ly) == 0) /* |X| and |Y| = 0 */ \
165 161 RETURN (I, DONE / 0.0) \
166 162 \
167 163 x = fabs(x); \
168 164 y = fabs(y); \
169 165 \
170 166 x = *(long long*)&x; \
171 167 y = *(long long*)&y; \
172 168 \
173 169 x *= D2ONM52; \
174 170 y *= D2ONM52; \
175 171 \
176 172 x_hi0 = (x + D2ON36) - D2ON36; \
177 173 y_hi0 = (y + D2ON36) - D2ON36; \
178 174 x_lo0 = x - x_hi0; \
179 175 y_lo0 = y - y_hi0; \
180 176 res0_hi = (x_hi0 * x_hi0 + y_hi0 * y_hi0); \
181 177 res0_lo = ((x + x_hi0) * x_lo0 + (y + y_hi0) * y_lo0); \
182 178 \
183 179 dres0 = res0_hi + res0_lo; \
184 180 \
185 181 iarr0 = HI(&dres0); \
186 182 iexp0 = iarr0 & 0xfff00000; \
187 183 \
188 184 iarr0 = (iarr0 >> 11) & 0x1fc; \
189 185 itbl0 = ((int*)((char*)__vlibm_TBL_rhypot + iarr0))[0]; \
190 186 itbl0 -= iexp0; \
191 187 HI(&dd0) = itbl0; \
192 188 LO(&dd0) = 0; \
193 189 \
194 190 dd0 = dd0 * (DTWO - dd0 * dres0); \
195 191 dd0 = dd0 * (DTWO - dd0 * dres0); \
196 192 dres0 = dd0 * (DTWO - dd0 * dres0); \
197 193 \
198 194 HI(&res0) = HI(&dres0) & 0xffffff00; \
199 195 LO(&res0) = 0; \
200 196 res0 += (DONE - res0_hi * res0 - res0_lo * res0) * dres0; \
201 197 res0 = sqrt (res0); \
202 198 \
203 199 res0 = D2ON1022 * res0; \
204 200 RETURN (I, res0) \
205 201 } \
206 202 j0 = hy##I - (diff0 & j0); \
207 203 j0 &= 0x7ff00000; \
208 204 HI(&scl##I) = 0x7ff00000 - j0;
209 205
210 206 void
211 207 __vrhypot(int n, double * restrict px, int stridex, double * restrict py,
212 208 int stridey, double * restrict pz, int stridez)
213 209 {
214 210 int i = 0;
215 211 double x, y;
216 212 double x_hi0, x_lo0, y_hi0, y_lo0, scl0 = 0;
217 213 double x0, y0, res0, dd0;
218 214 double res0_hi,res0_lo, dres0;
219 215 double x_hi1, x_lo1, y_hi1, y_lo1, scl1 = 0;
220 216 double x1 = 0.0L, y1 = 0.0L, res1, dd1;
221 217 double res1_hi,res1_lo, dres1;
222 218 double x_hi2, x_lo2, y_hi2, y_lo2, scl2 = 0;
223 219 double x2, y2, res2, dd2;
224 220 double res2_hi,res2_lo, dres2;
225 221
226 222 int hx0, hy0, j0, diff0;
227 223 int iarr0, iexp0, itbl0;
228 224 int hx1, hy1;
229 225 int iarr1, iexp1, itbl1;
230 226 int hx2, hy2;
231 227 int iarr2, iexp2, itbl2;
232 228
233 229 int lx, ly;
234 230
235 231 double DONE = ((double*)LCONST)[0];
236 232 double DTWO = ((double*)LCONST)[1];
237 233 double D2ON36 = ((double*)LCONST)[2];
238 234 double D2ON1022 = ((double*)LCONST)[3];
239 235 double D2ONM52 = ((double*)LCONST)[4];
240 236
241 237 double *pz0, *pz1 = 0, *pz2;
242 238
243 239 do
244 240 {
245 241 start0:
246 242 PREP(0)
247 243 px += stridex;
248 244 py += stridey;
249 245 pz += stridez;
250 246 i = 1;
251 247 if (--n <= 0)
252 248 break;
253 249
254 250 start1:
255 251 PREP(1)
256 252 px += stridex;
257 253 py += stridey;
258 254 pz += stridez;
259 255 i = 2;
260 256 if (--n <= 0)
261 257 break;
262 258
263 259 start2:
264 260 PREP(2)
265 261
266 262 x0 *= scl0;
267 263 y0 *= scl0;
268 264 x1 *= scl1;
269 265 y1 *= scl1;
270 266 x2 *= scl2;
271 267 y2 *= scl2;
272 268
273 269 x_hi0 = (x0 + D2ON36) - D2ON36;
274 270 y_hi0 = (y0 + D2ON36) - D2ON36;
275 271 x_hi1 = (x1 + D2ON36) - D2ON36;
276 272 y_hi1 = (y1 + D2ON36) - D2ON36;
277 273 x_hi2 = (x2 + D2ON36) - D2ON36;
278 274 y_hi2 = (y2 + D2ON36) - D2ON36;
279 275 x_lo0 = x0 - x_hi0;
280 276 y_lo0 = y0 - y_hi0;
281 277 x_lo1 = x1 - x_hi1;
282 278 y_lo1 = y1 - y_hi1;
283 279 x_lo2 = x2 - x_hi2;
284 280 y_lo2 = y2 - y_hi2;
285 281 res0_hi = (x_hi0 * x_hi0 + y_hi0 * y_hi0);
286 282 res1_hi = (x_hi1 * x_hi1 + y_hi1 * y_hi1);
287 283 res2_hi = (x_hi2 * x_hi2 + y_hi2 * y_hi2);
288 284 res0_lo = ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0);
289 285 res1_lo = ((x1 + x_hi1) * x_lo1 + (y1 + y_hi1) * y_lo1);
290 286 res2_lo = ((x2 + x_hi2) * x_lo2 + (y2 + y_hi2) * y_lo2);
291 287
292 288 dres0 = res0_hi + res0_lo;
293 289 dres1 = res1_hi + res1_lo;
294 290 dres2 = res2_hi + res2_lo;
295 291
296 292 iarr0 = HI(&dres0);
297 293 iarr1 = HI(&dres1);
298 294 iarr2 = HI(&dres2);
299 295 iexp0 = iarr0 & 0xfff00000;
300 296 iexp1 = iarr1 & 0xfff00000;
301 297 iexp2 = iarr2 & 0xfff00000;
302 298
303 299 iarr0 = (iarr0 >> 11) & 0x1fc;
304 300 iarr1 = (iarr1 >> 11) & 0x1fc;
305 301 iarr2 = (iarr2 >> 11) & 0x1fc;
306 302 itbl0 = ((int*)((char*)__vlibm_TBL_rhypot + iarr0))[0];
307 303 itbl1 = ((int*)((char*)__vlibm_TBL_rhypot + iarr1))[0];
308 304 itbl2 = ((int*)((char*)__vlibm_TBL_rhypot + iarr2))[0];
309 305 itbl0 -= iexp0;
310 306 itbl1 -= iexp1;
311 307 itbl2 -= iexp2;
312 308 HI(&dd0) = itbl0;
313 309 HI(&dd1) = itbl1;
314 310 HI(&dd2) = itbl2;
315 311 LO(&dd0) = 0;
316 312 LO(&dd1) = 0;
317 313 LO(&dd2) = 0;
318 314
319 315 dd0 = dd0 * (DTWO - dd0 * dres0);
320 316 dd1 = dd1 * (DTWO - dd1 * dres1);
321 317 dd2 = dd2 * (DTWO - dd2 * dres2);
322 318 dd0 = dd0 * (DTWO - dd0 * dres0);
323 319 dd1 = dd1 * (DTWO - dd1 * dres1);
324 320 dd2 = dd2 * (DTWO - dd2 * dres2);
325 321 dres0 = dd0 * (DTWO - dd0 * dres0);
326 322 dres1 = dd1 * (DTWO - dd1 * dres1);
327 323 dres2 = dd2 * (DTWO - dd2 * dres2);
328 324
329 325 HI(&res0) = HI(&dres0) & 0xffffff00;
330 326 HI(&res1) = HI(&dres1) & 0xffffff00;
331 327 HI(&res2) = HI(&dres2) & 0xffffff00;
332 328 LO(&res0) = 0;
333 329 LO(&res1) = 0;
334 330 LO(&res2) = 0;
335 331 res0 += (DONE - res0_hi * res0 - res0_lo * res0) * dres0;
336 332 res1 += (DONE - res1_hi * res1 - res1_lo * res1) * dres1;
337 333 res2 += (DONE - res2_hi * res2 - res2_lo * res2) * dres2;
338 334 res0 = sqrt (res0);
339 335 res1 = sqrt (res1);
340 336 res2 = sqrt (res2);
341 337
342 338 res0 = scl0 * res0;
343 339 res1 = scl1 * res1;
344 340 res2 = scl2 * res2;
345 341
346 342 *pz0 = res0;
347 343 *pz1 = res1;
348 344 *pz2 = res2;
349 345
350 346 px += stridex;
351 347 py += stridey;
352 348 pz += stridez;
353 349 i = 0;
354 350
355 351 } while (--n > 0);
356 352
357 353 if (i > 0)
358 354 {
359 355 x0 *= scl0;
360 356 y0 *= scl0;
361 357
362 358 x_hi0 = (x0 + D2ON36) - D2ON36;
363 359 y_hi0 = (y0 + D2ON36) - D2ON36;
364 360 x_lo0 = x0 - x_hi0;
365 361 y_lo0 = y0 - y_hi0;
366 362 res0_hi = (x_hi0 * x_hi0 + y_hi0 * y_hi0);
367 363 res0_lo = ((x0 + x_hi0) * x_lo0 + (y0 + y_hi0) * y_lo0);
368 364
369 365 dres0 = res0_hi + res0_lo;
370 366
371 367 iarr0 = HI(&dres0);
372 368 iexp0 = iarr0 & 0xfff00000;
373 369
374 370 iarr0 = (iarr0 >> 11) & 0x1fc;
375 371 itbl0 = ((int*)((char*)__vlibm_TBL_rhypot + iarr0))[0];
376 372 itbl0 -= iexp0;
377 373 HI(&dd0) = itbl0;
378 374 LO(&dd0) = 0;
379 375
380 376 dd0 = dd0 * (DTWO - dd0 * dres0);
381 377 dd0 = dd0 * (DTWO - dd0 * dres0);
382 378 dres0 = dd0 * (DTWO - dd0 * dres0);
383 379
384 380 HI(&res0) = HI(&dres0) & 0xffffff00;
385 381 LO(&res0) = 0;
386 382 res0 += (DONE - res0_hi * res0 - res0_lo * res0) * dres0;
387 383 res0 = sqrt (res0);
388 384
389 385 res0 = scl0 * res0;
390 386
391 387 *pz0 = res0;
392 388
393 389 if (i > 1)
394 390 {
395 391 x1 *= scl1;
396 392 y1 *= scl1;
397 393
398 394 x_hi1 = (x1 + D2ON36) - D2ON36;
399 395 y_hi1 = (y1 + D2ON36) - D2ON36;
400 396 x_lo1 = x1 - x_hi1;
401 397 y_lo1 = y1 - y_hi1;
402 398 res1_hi = (x_hi1 * x_hi1 + y_hi1 * y_hi1);
403 399 res1_lo = ((x1 + x_hi1) * x_lo1 + (y1 + y_hi1) * y_lo1);
404 400
405 401 dres1 = res1_hi + res1_lo;
406 402
407 403 iarr1 = HI(&dres1);
408 404 iexp1 = iarr1 & 0xfff00000;
409 405
410 406 iarr1 = (iarr1 >> 11) & 0x1fc;
411 407 itbl1 = ((int*)((char*)__vlibm_TBL_rhypot + iarr1))[0];
412 408 itbl1 -= iexp1;
413 409 HI(&dd1) = itbl1;
414 410 LO(&dd1) = 0;
415 411
416 412 dd1 = dd1 * (DTWO - dd1 * dres1);
417 413 dd1 = dd1 * (DTWO - dd1 * dres1);
418 414 dres1 = dd1 * (DTWO - dd1 * dres1);
419 415
420 416 HI(&res1) = HI(&dres1) & 0xffffff00;
↓ open down ↓ |
337 lines elided |
↑ open up ↑ |
421 417 LO(&res1) = 0;
422 418 res1 += (DONE - res1_hi * res1 - res1_lo * res1) * dres1;
423 419 res1 = sqrt (res1);
424 420
425 421 res1 = scl1 * res1;
426 422
427 423 *pz1 = res1;
428 424 }
429 425 }
430 426 }
431 -
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX