11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include <sys/isa_defs.h>
31
32 #ifdef _LITTLE_ENDIAN
33 #define HI(x) *(1+(int*)x)
34 #define LO(x) *(unsigned*)x
35 #else
36 #define HI(x) *(int*)x
37 #define LO(x) *(1+(unsigned*)x)
38 #endif
39
40 #ifdef __RESTRICT
41 #define restrict _Restrict
42 #else
43 #define restrict
44 #endif
45
46 /*
47 * vcos.1.c
48 *
49 * Vector cosine function. Just slight modifications to vsin.8.c, mainly
50 * in the primary range part.
81 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
82
83 /* Don't __ the following; acomp will handle it */
84 extern double fabs( double );
85 extern void __vlibm_vcos_big( int, double *, int, double *, int, int );
86
87 /*
88 * y[i*stridey] := cos( x[i*stridex] ), for i = 0..n.
89 *
90 * Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06.
91 * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
92 *
93 * elts < 2^-27 use the approximation 1.0 ~ cos(x).
94 */
95 void
96 __vcos( int n, double * restrict x, int stridex, double * restrict y,
97 int stridey )
98 {
99 double x0_or_one[4], x1_or_one[4], x2_or_one[4];
100 double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
101 double x0, x1, x2, *py0, *py1, *py2, *xsave, *ysave;
102 unsigned hx0, hx1, hx2, xsb0, xsb1, xsb2;
103 int i, biguns, nsave, sxsave, sysave;
104
105 nsave = n;
106 xsave = x;
107 sxsave = stridex;
108 ysave = y;
109 sysave = stridey;
110 biguns = 0;
111
112 do /* MAIN LOOP */
113 {
114 /* Gotos here so _break_ exits MAIN LOOP. */
115 LOOP0: /* Find first arg in right range. */
116 xsb0 = HI(x); /* get most significant word */
117 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
118 if ( hx0 > 0x3fe921fb ) {
119 /* Too big: arg reduction needed, so leave for second part */
120 biguns = 1;
121 goto MEDIUM;
122 }
123 if ( hx0 < 0x3e400000 ) {
124 /* Too small. cos x ~ 1. */
125 volatile int v = *x;
126 *y = 1.0;
127 x += stridex;
128 y += stridey;
129 i = 0;
130 if ( --n <= 0 )
131 break;
132 goto LOOP0;
133 }
134 x0 = *x;
135 py0 = y;
136 x += stridex;
137 y += stridey;
138 i = 1;
139 if ( --n <= 0 )
140 break;
141
142 LOOP1: /* Get second arg, same as above. */
143 xsb1 = HI(x);
144 hx1 = xsb1 & ~0x80000000;
145 if ( hx1 > 0x3fe921fb )
146 {
147 biguns = 2;
148 goto MEDIUM;
149 }
150 if ( hx1 < 0x3e400000 )
151 {
152 volatile int v = *x;
153 *y = 1.0;
154 x += stridex;
155 y += stridey;
156 i = 1;
157 if ( --n <= 0 )
158 break;
159 goto LOOP1;
160 }
161 x1 = *x;
162 py1 = y;
163 x += stridex;
164 y += stridey;
165 i = 2;
166 if ( --n <= 0 )
167 break;
168
169 LOOP2: /* Get third arg, same as above. */
170 xsb2 = HI(x);
171 hx2 = xsb2 & ~0x80000000;
172 if ( hx2 > 0x3fe921fb )
173 {
174 biguns = 3;
175 goto MEDIUM;
176 }
177 if ( hx2 < 0x3e400000 )
178 {
179 volatile int v = *x;
180 *y = 1.0;
181 x += stridex;
182 y += stridey;
183 i = 2;
184 if ( --n <= 0 )
185 break;
186 goto LOOP2;
187 }
188 x2 = *x;
189 py2 = y;
190
191 /*
192 * 0x3fc40000 = 5/32 ~ 0.15625
193 * Get msb after subtraction. Will be 1 only if
194 * hx0 - 5/32 is negative.
195 */
196 i = ( hx0 - 0x3fc40000 ) >> 31;
197 i |= ( ( hx1 - 0x3fc40000 ) >> 30 ) & 2;
198 i |= ( ( hx2 - 0x3fc40000 ) >> 29 ) & 4;
199 switch ( i )
|
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include <sys/isa_defs.h>
31 #include <sys/ccompile.h>
32
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x) *(1+(int*)x)
35 #define LO(x) *(unsigned*)x
36 #else
37 #define HI(x) *(int*)x
38 #define LO(x) *(1+(unsigned*)x)
39 #endif
40
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
46
47 /*
48 * vcos.1.c
49 *
50 * Vector cosine function. Just slight modifications to vsin.8.c, mainly
51 * in the primary range part.
82 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
83
84 /* Don't __ the following; acomp will handle it */
85 extern double fabs( double );
86 extern void __vlibm_vcos_big( int, double *, int, double *, int, int );
87
88 /*
89 * y[i*stridey] := cos( x[i*stridex] ), for i = 0..n.
90 *
91 * Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06.
92 * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
93 *
94 * elts < 2^-27 use the approximation 1.0 ~ cos(x).
95 */
96 void
97 __vcos( int n, double * restrict x, int stridex, double * restrict y,
98 int stridey )
99 {
100 double x0_or_one[4], x1_or_one[4], x2_or_one[4];
101 double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
102 double x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave;
103 unsigned hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2;
104 int i, biguns, nsave, sxsave, sysave;
105 nsave = n;
106 xsave = x;
107 sxsave = stridex;
108 ysave = y;
109 sysave = stridey;
110 biguns = 0;
111
112 do /* MAIN LOOP */
113 {
114 /* Gotos here so _break_ exits MAIN LOOP. */
115 LOOP0: /* Find first arg in right range. */
116 xsb0 = HI(x); /* get most significant word */
117 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
118 if ( hx0 > 0x3fe921fb ) {
119 /* Too big: arg reduction needed, so leave for second part */
120 biguns = 1;
121 goto MEDIUM;
122 }
123 if ( hx0 < 0x3e400000 ) {
124 /* Too small. cos x ~ 1. */
125 *y = 1.0;
126 x += stridex;
127 y += stridey;
128 i = 0;
129 if ( --n <= 0 )
130 break;
131 goto LOOP0;
132 }
133 x0 = *x;
134 py0 = y;
135 x += stridex;
136 y += stridey;
137 i = 1;
138 if ( --n <= 0 )
139 break;
140
141 LOOP1: /* Get second arg, same as above. */
142 xsb1 = HI(x);
143 hx1 = xsb1 & ~0x80000000;
144 if ( hx1 > 0x3fe921fb )
145 {
146 biguns = 2;
147 goto MEDIUM;
148 }
149 if ( hx1 < 0x3e400000 )
150 {
151 *y = 1.0;
152 x += stridex;
153 y += stridey;
154 i = 1;
155 if ( --n <= 0 )
156 break;
157 goto LOOP1;
158 }
159 x1 = *x;
160 py1 = y;
161 x += stridex;
162 y += stridey;
163 i = 2;
164 if ( --n <= 0 )
165 break;
166
167 LOOP2: /* Get third arg, same as above. */
168 xsb2 = HI(x);
169 hx2 = xsb2 & ~0x80000000;
170 if ( hx2 > 0x3fe921fb )
171 {
172 biguns = 3;
173 goto MEDIUM;
174 }
175 if ( hx2 < 0x3e400000 )
176 {
177 *y = 1.0;
178 x += stridex;
179 y += stridey;
180 i = 2;
181 if ( --n <= 0 )
182 break;
183 goto LOOP2;
184 }
185 x2 = *x;
186 py2 = y;
187
188 /*
189 * 0x3fc40000 = 5/32 ~ 0.15625
190 * Get msb after subtraction. Will be 1 only if
191 * hx0 - 5/32 is negative.
192 */
193 i = ( hx0 - 0x3fc40000 ) >> 31;
194 i |= ( ( hx1 - 0x3fc40000 ) >> 30 ) & 2;
195 i |= ( ( hx2 - 0x3fc40000 ) >> 29 ) & 4;
196 switch ( i )
|