11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include <sys/isa_defs.h>
31 #include "libm_synonyms.h"
32 #include "libm_inlines.h"
33
34 #ifdef _LITTLE_ENDIAN
35 #define HI(x) *(1+(int*)x)
36 #define LO(x) *(unsigned*)x
37 #else
38 #define HI(x) *(int*)x
39 #define LO(x) *(1+(unsigned*)x)
40 #endif
41
42 #ifdef __RESTRICT
43 #define restrict _Restrict
44 #else
45 #define restrict
46 #endif
47
48 /* double rsqrt(double x)
49 *
50 * Method :
51 * 1. Special cases:
66 * Where:
67 * m = m0 + dm,
68 * m0 = 0.5 * (1 + k/64) for m = [0.5, 0.5+127/256), k = [0, 63];
69 * m0 = 1.0 * (0 + k/64) for m = [0.5+127/256, 1.0+127/128), k = [64, 127];
70 * m0 = 2.0 for m = [1.0+127/128, 2.0), k = 128.
71 * Then:
72 * 1/sqrt(m0) is looked up in a table,
73 * 1/m0 is computed as (1/sqrt(m0)) * (1/sqrt(m0)).
74 * 1/sqrt(1 + (1/m0)*dm) is computed using approximation:
75 * 1/sqrt(1 + z) = (((((a6 * z + a5) * z + a4) * z + a3)
76 * * z + a2) * z + a1) * z + a0
77 * where z = [-1/128, 1/128].
78 *
79 * Accuracy:
80 * The maximum relative error for the approximating
81 * polynomial is 2**(-56.26).
82 * Maximum error observed: less than 0.563 ulp after 1.500.000.000
83 * results.
84 */
85
86 #define sqrt __sqrt
87
88 extern double sqrt (double);
89 extern const double __vlibm_TBL_rsqrt[];
90
91 static void
92 __vrsqrt_n(int n, double * restrict px, int stridex, double * restrict py, int stridey);
93
94 #pragma no_inline(__vrsqrt_n)
95
96 #define RETURN(ret) \
97 { \
98 *py = (ret); \
99 py += stridey; \
100 if (n_n == 0) \
101 { \
102 spx = px; spy = py; \
103 hx = HI(px); \
104 continue; \
105 } \
106 n--; \
107 break; \
395 LO(&res_c0) = 0;
396
397 px += stridex;
398
399 dexp_hi0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[0];
400 dexp_lo0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[1];
401 xx0 = dexp_hi0 * dexp_hi0;
402 xx0 = (res0 - res_c0) * xx0;
403 res0 = (((((K6 * xx0 + K5) * xx0 + K4) * xx0 + K3) * xx0 + K2) * xx0 + K1) * xx0;
404
405 res0 = dexp_hi0 * res0 + dexp_lo0 + dexp_hi0;
406
407 HI(&dsqrt_exp0) = sqrt_exp0;
408 LO(&dsqrt_exp0) = 0;
409 res0 *= dsqrt_exp0;
410
411 *py = res0;
412 py += stridey;
413 }
414 }
415
|
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include <sys/isa_defs.h>
31 #include "libm_inlines.h"
32
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x) *(1+(int*)x)
35 #define LO(x) *(unsigned*)x
36 #else
37 #define HI(x) *(int*)x
38 #define LO(x) *(1+(unsigned*)x)
39 #endif
40
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
46
47 /* double rsqrt(double x)
48 *
49 * Method :
50 * 1. Special cases:
65 * Where:
66 * m = m0 + dm,
67 * m0 = 0.5 * (1 + k/64) for m = [0.5, 0.5+127/256), k = [0, 63];
68 * m0 = 1.0 * (0 + k/64) for m = [0.5+127/256, 1.0+127/128), k = [64, 127];
69 * m0 = 2.0 for m = [1.0+127/128, 2.0), k = 128.
70 * Then:
71 * 1/sqrt(m0) is looked up in a table,
72 * 1/m0 is computed as (1/sqrt(m0)) * (1/sqrt(m0)).
73 * 1/sqrt(1 + (1/m0)*dm) is computed using approximation:
74 * 1/sqrt(1 + z) = (((((a6 * z + a5) * z + a4) * z + a3)
75 * * z + a2) * z + a1) * z + a0
76 * where z = [-1/128, 1/128].
77 *
78 * Accuracy:
79 * The maximum relative error for the approximating
80 * polynomial is 2**(-56.26).
81 * Maximum error observed: less than 0.563 ulp after 1.500.000.000
82 * results.
83 */
84
85 extern double sqrt (double);
86 extern const double __vlibm_TBL_rsqrt[];
87
88 static void
89 __vrsqrt_n(int n, double * restrict px, int stridex, double * restrict py, int stridey);
90
91 #pragma no_inline(__vrsqrt_n)
92
93 #define RETURN(ret) \
94 { \
95 *py = (ret); \
96 py += stridey; \
97 if (n_n == 0) \
98 { \
99 spx = px; spy = py; \
100 hx = HI(px); \
101 continue; \
102 } \
103 n--; \
104 break; \
392 LO(&res_c0) = 0;
393
394 px += stridex;
395
396 dexp_hi0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[0];
397 dexp_lo0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[1];
398 xx0 = dexp_hi0 * dexp_hi0;
399 xx0 = (res0 - res_c0) * xx0;
400 res0 = (((((K6 * xx0 + K5) * xx0 + K4) * xx0 + K3) * xx0 + K2) * xx0 + K1) * xx0;
401
402 res0 = dexp_hi0 * res0 + dexp_lo0 + dexp_hi0;
403
404 HI(&dsqrt_exp0) = sqrt_exp0;
405 LO(&dsqrt_exp0) = 0;
406 res0 *= dsqrt_exp0;
407
408 *py = res0;
409 py += stridey;
410 }
411 }
|