11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include <sys/isa_defs.h>
31
32 #ifdef _LITTLE_ENDIAN
33 #define HI(x) *(1+(int*)x)
34 #define LO(x) *(unsigned*)x
35 #else
36 #define HI(x) *(int*)x
37 #define LO(x) *(1+(unsigned*)x)
38 #endif
39
40 #ifdef __RESTRICT
41 #define restrict _Restrict
42 #else
43 #define restrict
44 #endif
45
46 /*
47 * vsincos.c
48 *
49 * Vector sine and cosine function. Just slight modifications to vcos.c.
50 */
81 * c[i*stridec] := cos( x[i*stridex] ), for i = 0..n.
82 *
83 * Calls __vlibm_vsincos_big to handle all elts which have abs >~ 1.647e+06.
84 * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
85 *
86 * elts < 2^-27 use the approximation 1.0 ~ cos(x).
87 */
88 void
89 __vsincos( int n, double * restrict x, int stridex,
90 double * restrict y, int stridey,
91 double * restrict c, int stridec )
92 {
93 double x0_or_one[4], x1_or_one[4], x2_or_one[4];
94 double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
95 double x0, x1, x2,
96 *py0, *py1, *py2,
97 *pc0, *pc1, *pc2,
98 *xsave, *ysave, *csave;
99 unsigned hx0, hx1, hx2, xsb0, xsb1, xsb2;
100 int i, biguns, nsave, sxsave, sysave, scsave;
101
102 nsave = n;
103 xsave = x;
104 sxsave = stridex;
105 ysave = y;
106 sysave = stridey;
107 csave = c;
108 scsave = stridec;
109 biguns = 0;
110
111 do /* MAIN LOOP */
112 {
113
114 /* Gotos here so _break_ exits MAIN LOOP. */
115 LOOP0: /* Find first arg in right range. */
116 xsb0 = HI(x); /* get most significant word */
117 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
118 if ( hx0 > 0x3fe921fb ) {
119 /* Too big: arg reduction needed, so leave for second part */
120 biguns = 1;
121 x += stridex;
122 y += stridey;
123 c += stridec;
124 i = 0;
125 if ( --n <= 0 )
126 break;
127 goto LOOP0;
128 }
129 if ( hx0 < 0x3e400000 ) {
130 /* Too small. cos x ~ 1, sin x ~ x. */
131 volatile int v = *x;
132 *c = 1.0;
133 *y = *x;
134 x += stridex;
135 y += stridey;
136 c += stridec;
137 i = 0;
138 if ( --n <= 0 )
139 break;
140 goto LOOP0;
141 }
142 x0 = *x;
143 py0 = y;
144 pc0 = c;
145 x += stridex;
146 y += stridey;
147 c += stridec;
148 i = 1;
149 if ( --n <= 0 )
150 break;
151
152 LOOP1: /* Get second arg, same as above. */
153 xsb1 = HI(x);
154 hx1 = xsb1 & ~0x80000000;
155 if ( hx1 > 0x3fe921fb )
156 {
157 biguns = 1;
158 x += stridex;
159 y += stridey;
160 c += stridec;
161 i = 1;
162 if ( --n <= 0 )
163 break;
164 goto LOOP1;
165 }
166 if ( hx1 < 0x3e400000 )
167 {
168 volatile int v = *x;
169 *c = 1.0;
170 *y = *x;
171 x += stridex;
172 y += stridey;
173 c += stridec;
174 i = 1;
175 if ( --n <= 0 )
176 break;
177 goto LOOP1;
178 }
179 x1 = *x;
180 py1 = y;
181 pc1 = c;
182 x += stridex;
183 y += stridey;
184 c += stridec;
185 i = 2;
186 if ( --n <= 0 )
187 break;
188
189 LOOP2: /* Get third arg, same as above. */
190 xsb2 = HI(x);
191 hx2 = xsb2 & ~0x80000000;
192 if ( hx2 > 0x3fe921fb )
193 {
194 biguns = 1;
195 x += stridex;
196 y += stridey;
197 c += stridec;
198 i = 2;
199 if ( --n <= 0 )
200 break;
201 goto LOOP2;
202 }
203 if ( hx2 < 0x3e400000 )
204 {
205 volatile int v = *x;
206 *c = 1.0;
207 *y = *x;
208 x += stridex;
209 y += stridey;
210 c += stridec;
211 i = 2;
212 if ( --n <= 0 )
213 break;
214 goto LOOP2;
215 }
216 x2 = *x;
217 py2 = y;
218 pc2 = c;
219
220 /*
221 * 0x3fc40000 = 5/32 ~ 0.15625
222 * Get msb after subtraction. Will be 1 only if
223 * hx0 - 5/32 is negative.
224 */
225 i = ( hx2 - 0x3fc40000 ) >> 31;
|
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include <sys/isa_defs.h>
31 #include <sys/ccompile.h>
32
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x) *(1+(int*)x)
35 #define LO(x) *(unsigned*)x
36 #else
37 #define HI(x) *(int*)x
38 #define LO(x) *(1+(unsigned*)x)
39 #endif
40
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
46
47 /*
48 * vsincos.c
49 *
50 * Vector sine and cosine function. Just slight modifications to vcos.c.
51 */
82 * c[i*stridec] := cos( x[i*stridex] ), for i = 0..n.
83 *
84 * Calls __vlibm_vsincos_big to handle all elts which have abs >~ 1.647e+06.
85 * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
86 *
87 * elts < 2^-27 use the approximation 1.0 ~ cos(x).
88 */
89 void
90 __vsincos( int n, double * restrict x, int stridex,
91 double * restrict y, int stridey,
92 double * restrict c, int stridec )
93 {
94 double x0_or_one[4], x1_or_one[4], x2_or_one[4];
95 double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
96 double x0, x1, x2,
97 *py0, *py1, *py2,
98 *pc0, *pc1, *pc2,
99 *xsave, *ysave, *csave;
100 unsigned hx0, hx1, hx2, xsb0, xsb1, xsb2;
101 int i, biguns, nsave, sxsave, sysave, scsave;
102 nsave = n;
103 xsave = x;
104 sxsave = stridex;
105 ysave = y;
106 sysave = stridey;
107 csave = c;
108 scsave = stridec;
109 biguns = 0;
110
111 do /* MAIN LOOP */
112 {
113
114 /* Gotos here so _break_ exits MAIN LOOP. */
115 LOOP0: /* Find first arg in right range. */
116 xsb0 = HI(x); /* get most significant word */
117 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
118 if ( hx0 > 0x3fe921fb ) {
119 /* Too big: arg reduction needed, so leave for second part */
120 biguns = 1;
121 x += stridex;
122 y += stridey;
123 c += stridec;
124 i = 0;
125 if ( --n <= 0 )
126 break;
127 goto LOOP0;
128 }
129 if ( hx0 < 0x3e400000 ) {
130 /* Too small. cos x ~ 1, sin x ~ x. */
131 *c = 1.0;
132 *y = *x;
133 x += stridex;
134 y += stridey;
135 c += stridec;
136 i = 0;
137 if ( --n <= 0 )
138 break;
139 goto LOOP0;
140 }
141 x0 = *x;
142 py0 = y;
143 pc0 = c;
144 x += stridex;
145 y += stridey;
146 c += stridec;
147 i = 1;
148 if ( --n <= 0 )
149 break;
150
151 LOOP1: /* Get second arg, same as above. */
152 xsb1 = HI(x);
153 hx1 = xsb1 & ~0x80000000;
154 if ( hx1 > 0x3fe921fb )
155 {
156 biguns = 1;
157 x += stridex;
158 y += stridey;
159 c += stridec;
160 i = 1;
161 if ( --n <= 0 )
162 break;
163 goto LOOP1;
164 }
165 if ( hx1 < 0x3e400000 )
166 {
167 *c = 1.0;
168 *y = *x;
169 x += stridex;
170 y += stridey;
171 c += stridec;
172 i = 1;
173 if ( --n <= 0 )
174 break;
175 goto LOOP1;
176 }
177 x1 = *x;
178 py1 = y;
179 pc1 = c;
180 x += stridex;
181 y += stridey;
182 c += stridec;
183 i = 2;
184 if ( --n <= 0 )
185 break;
186
187 LOOP2: /* Get third arg, same as above. */
188 xsb2 = HI(x);
189 hx2 = xsb2 & ~0x80000000;
190 if ( hx2 > 0x3fe921fb )
191 {
192 biguns = 1;
193 x += stridex;
194 y += stridey;
195 c += stridec;
196 i = 2;
197 if ( --n <= 0 )
198 break;
199 goto LOOP2;
200 }
201 if ( hx2 < 0x3e400000 )
202 {
203 *c = 1.0;
204 *y = *x;
205 x += stridex;
206 y += stridey;
207 c += stridec;
208 i = 2;
209 if ( --n <= 0 )
210 break;
211 goto LOOP2;
212 }
213 x2 = *x;
214 py2 = y;
215 pc2 = c;
216
217 /*
218 * 0x3fc40000 = 5/32 ~ 0.15625
219 * Get msb after subtraction. Will be 1 only if
220 * hx0 - 5/32 is negative.
221 */
222 i = ( hx2 - 0x3fc40000 ) >> 31;
|