10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include "libm_synonyms.h"
31 #include "libm_inlines.h"
32
33 #ifdef __RESTRICT
34 #define restrict _Restrict
35 #else
36 #define restrict
37 #endif
38
39 /* float rsqrtf(float x)
40 *
41 * Method :
42 * 1. Special cases:
43 * for x = NaN => QNaN;
44 * for x = +Inf => 0;
45 * for x is negative, -Inf => QNaN + invalid;
46 * for x = +0 => +Inf + divide-by-zero;
47 * for x = -0 => -Inf + divide-by-zero.
48 * 2. Computes reciprocal square root from:
49 * x = m * 2**n
50 * Where:
54 * rsqrtf(x) = 1/sqrt( m * 2**n ) = (2 ** (-n/2)) * (1/sqrt(m))
55 * 2. Computes 1/sqrt(m) from:
56 * 1/sqrt(m) = (1/sqrt(m0)) * (1/sqrt(1 + (1/m0)*dm))
57 * Where:
58 * m = m0 + dm,
59 * m0 = 0.5 * (1 + k/64) for m = [0.5, 0.5+127/256), k = [0, 63];
60 * m0 = 1.0 * (0 + k/64) for m = [0.5+127/256, 1.0+127/128), k = [64, 127];
61 * Then:
62 * 1/sqrt(m0), 1/m0 are looked up in a table,
63 * 1/sqrt(1 + (1/m0)*dm) is computed using approximation:
64 * 1/sqrt(1 + z) = ((a3 * z + a2) * z + a1) * z + a0
65 * where z = [-1/64, 1/64].
66 *
67 * Accuracy:
68 * The maximum relative error for the approximating
69 * polynomial is 2**(-27.87).
70 * Maximum error observed: less than 0.534 ulp for the
71 * whole float type range.
72 */
73
74 #define sqrtf __sqrtf
75
76 extern float sqrtf(float);
77
78 static const double __TBL_rsqrtf[] = {
79 /*
80 i = [0,63]
81 TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-24;
82 TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46)));
83 i = [64,127]
84 TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-23;
85 TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46)));
86 */
87 1.1920928955078125000e-07, 1.4142135623730951455e+00,
88 1.1737530048076923728e-07, 1.4032928308912466786e+00,
89 1.1559688683712121533e-07, 1.3926212476455828160e+00,
90 1.1387156016791044559e-07, 1.3821894809301762397e+00,
91 1.1219697840073529256e-07, 1.3719886811400707760e+00,
92 1.1057093523550724772e-07, 1.3620104492139977204e+00,
93 1.0899135044642856803e-07, 1.3522468075656264297e+00,
94 1.0745626100352112918e-07, 1.3426901732747025253e+00,
95 1.0596381293402777190e-07, 1.3333333333333332593e+00,
486
487 iexp0 = ax0 >> 24;
488 iexp0 = 0x3f - iexp0;
489 iexp0 = iexp0 << 23;
490
491 si0 = (ax0 >> 13) & 0x7f0;
492
493 tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0];
494 tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1];
495 iax0 = ax0 & 0x7ffe0000;
496 iax0 = ax0 - iax0;
497 xx0 = iax0 * tbl_div0;
498 res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0);
499
500 fres0 = res0;
501 iexp0 += *(int*)&fres0;
502 *(int*)py = iexp0;
503 py += stridey;
504 }
505 }
506
|
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include "libm_inlines.h"
31
32 #ifdef __RESTRICT
33 #define restrict _Restrict
34 #else
35 #define restrict
36 #endif
37
38 /* float rsqrtf(float x)
39 *
40 * Method :
41 * 1. Special cases:
42 * for x = NaN => QNaN;
43 * for x = +Inf => 0;
44 * for x is negative, -Inf => QNaN + invalid;
45 * for x = +0 => +Inf + divide-by-zero;
46 * for x = -0 => -Inf + divide-by-zero.
47 * 2. Computes reciprocal square root from:
48 * x = m * 2**n
49 * Where:
53 * rsqrtf(x) = 1/sqrt( m * 2**n ) = (2 ** (-n/2)) * (1/sqrt(m))
54 * 2. Computes 1/sqrt(m) from:
55 * 1/sqrt(m) = (1/sqrt(m0)) * (1/sqrt(1 + (1/m0)*dm))
56 * Where:
57 * m = m0 + dm,
58 * m0 = 0.5 * (1 + k/64) for m = [0.5, 0.5+127/256), k = [0, 63];
59 * m0 = 1.0 * (0 + k/64) for m = [0.5+127/256, 1.0+127/128), k = [64, 127];
60 * Then:
61 * 1/sqrt(m0), 1/m0 are looked up in a table,
62 * 1/sqrt(1 + (1/m0)*dm) is computed using approximation:
63 * 1/sqrt(1 + z) = ((a3 * z + a2) * z + a1) * z + a0
64 * where z = [-1/64, 1/64].
65 *
66 * Accuracy:
67 * The maximum relative error for the approximating
68 * polynomial is 2**(-27.87).
69 * Maximum error observed: less than 0.534 ulp for the
70 * whole float type range.
71 */
72
73 extern float sqrtf(float);
74
75 static const double __TBL_rsqrtf[] = {
76 /*
77 i = [0,63]
78 TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-24;
79 TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46)));
80 i = [64,127]
81 TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-23;
82 TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46)));
83 */
84 1.1920928955078125000e-07, 1.4142135623730951455e+00,
85 1.1737530048076923728e-07, 1.4032928308912466786e+00,
86 1.1559688683712121533e-07, 1.3926212476455828160e+00,
87 1.1387156016791044559e-07, 1.3821894809301762397e+00,
88 1.1219697840073529256e-07, 1.3719886811400707760e+00,
89 1.1057093523550724772e-07, 1.3620104492139977204e+00,
90 1.0899135044642856803e-07, 1.3522468075656264297e+00,
91 1.0745626100352112918e-07, 1.3426901732747025253e+00,
92 1.0596381293402777190e-07, 1.3333333333333332593e+00,
483
484 iexp0 = ax0 >> 24;
485 iexp0 = 0x3f - iexp0;
486 iexp0 = iexp0 << 23;
487
488 si0 = (ax0 >> 13) & 0x7f0;
489
490 tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0];
491 tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1];
492 iax0 = ax0 & 0x7ffe0000;
493 iax0 = ax0 - iax0;
494 xx0 = iax0 * tbl_div0;
495 res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0);
496
497 fres0 = res0;
498 iexp0 += *(int*)&fres0;
499 *(int*)py = iexp0;
500 py += stridey;
501 }
502 }
|