1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include <sys/isa_defs.h>
31 #include <sys/ccompile.h>
32
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x) *(1+(int*)x)
35 #define LO(x) *(unsigned*)x
36 #else
37 #define HI(x) *(int*)x
38 #define LO(x) *(1+(unsigned*)x)
39 #endif
40
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
46
47 /*
48 * vcos.1.c
49 *
50 * Vector cosine function. Just slight modifications to vsin.8.c, mainly
51 * in the primary range part.
52 *
53 * Modification to primary range processing. If an argument that does not
54 * fall in the primary range is encountered, then processing is continued
55 * in the medium range.
56 *
57 */
58
59 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
60
61 static const double
62 half[2] = { 0.5, -0.5 },
63 one = 1.0,
64 invpio2 = 0.636619772367581343075535, /* 53 bits of pi/2 */
65 pio2_1 = 1.570796326734125614166, /* first 33 bits of pi/2 */
66 pio2_2 = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
67 pio2_3 = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
68 pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
69 pp1 = -1.666666666605760465276263943134982554676e-0001,
70 pp2 = 8.333261209690963126718376566146180944442e-0003,
71 qq1 = -4.999999999977710986407023955908711557870e-0001,
72 qq2 = 4.166654863857219350645055881018842089580e-0002,
73 poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
74 -4.999999999999931701464060878888294524481e-0001 },
75 poly2[2]= { 8.333333332390951295683993455280336376663e-0003,
76 4.166666666394861917535640593963708222319e-0002 },
77 poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
78 -1.388888552656142867832756687736851681462e-0003 },
79 poly4[2]= { 2.753403624854277237649987622848330351110e-0006,
80 2.478519423681460796618128289454530524759e-0005 };
81
82 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
83
84 /* Don't __ the following; acomp will handle it */
85 extern double fabs( double );
86 extern void __vlibm_vcos_big( int, double *, int, double *, int, int );
87
88 /*
89 * y[i*stridey] := cos( x[i*stridex] ), for i = 0..n.
90 *
91 * Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06.
92 * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
93 *
94 * elts < 2^-27 use the approximation 1.0 ~ cos(x).
95 */
96 void
97 __vcos( int n, double * restrict x, int stridex, double * restrict y,
98 int stridey )
99 {
100 double x0_or_one[4], x1_or_one[4], x2_or_one[4];
101 double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
102 double x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave;
103 unsigned hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2;
104 int i, biguns, nsave, sxsave, sysave;
105 nsave = n;
106 xsave = x;
107 sxsave = stridex;
108 ysave = y;
109 sysave = stridey;
110 biguns = 0;
111
112 do /* MAIN LOOP */
113 {
114 /* Gotos here so _break_ exits MAIN LOOP. */
115 LOOP0: /* Find first arg in right range. */
116 xsb0 = HI(x); /* get most significant word */
117 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
118 if ( hx0 > 0x3fe921fb ) {
119 /* Too big: arg reduction needed, so leave for second part */
120 biguns = 1;
121 goto MEDIUM;
122 }
123 if ( hx0 < 0x3e400000 ) {
124 /* Too small. cos x ~ 1. */
125 *y = 1.0;
126 x += stridex;
127 y += stridey;
128 i = 0;
129 if ( --n <= 0 )
130 break;
131 goto LOOP0;
132 }
133 x0 = *x;
134 py0 = y;
135 x += stridex;
136 y += stridey;
137 i = 1;
138 if ( --n <= 0 )
139 break;
140
141 LOOP1: /* Get second arg, same as above. */
142 xsb1 = HI(x);
143 hx1 = xsb1 & ~0x80000000;
144 if ( hx1 > 0x3fe921fb )
145 {
146 biguns = 2;
147 goto MEDIUM;
148 }
149 if ( hx1 < 0x3e400000 )
150 {
151 *y = 1.0;
152 x += stridex;
153 y += stridey;
154 i = 1;
155 if ( --n <= 0 )
156 break;
157 goto LOOP1;
158 }
159 x1 = *x;
160 py1 = y;
161 x += stridex;
162 y += stridey;
163 i = 2;
164 if ( --n <= 0 )
165 break;
166
167 LOOP2: /* Get third arg, same as above. */
168 xsb2 = HI(x);
169 hx2 = xsb2 & ~0x80000000;
170 if ( hx2 > 0x3fe921fb )
171 {
172 biguns = 3;
173 goto MEDIUM;
174 }
175 if ( hx2 < 0x3e400000 )
176 {
177 *y = 1.0;
178 x += stridex;
179 y += stridey;
180 i = 2;
181 if ( --n <= 0 )
182 break;
183 goto LOOP2;
184 }
185 x2 = *x;
186 py2 = y;
187
188 /*
189 * 0x3fc40000 = 5/32 ~ 0.15625
190 * Get msb after subtraction. Will be 1 only if
191 * hx0 - 5/32 is negative.
192 */
193 i = ( hx0 - 0x3fc40000 ) >> 31;
194 i |= ( ( hx1 - 0x3fc40000 ) >> 30 ) & 2;
195 i |= ( ( hx2 - 0x3fc40000 ) >> 29 ) & 4;
196 switch ( i )
197 {
198 double a0, a1, a2, w0, w1, w2;
199 double t0, t1, t2, z0, z1, z2;
200 unsigned j0, j1, j2;
201
202 case 0: /* All are > 5/32 */
203 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
204 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
205 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
206 HI(&t0) = j0;
207 HI(&t1) = j1;
208 HI(&t2) = j2;
209 LO(&t0) = 0;
210 LO(&t1) = 0;
211 LO(&t2) = 0;
212 x0 -= t0;
213 x1 -= t1;
214 x2 -= t2;
215 z0 = x0 * x0;
216 z1 = x1 * x1;
217 z2 = x2 * x2;
218 t0 = z0 * ( qq1 + z0 * qq2 );
219 t1 = z1 * ( qq1 + z1 * qq2 );
220 t2 = z2 * ( qq1 + z2 * qq2 );
221 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
222 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
223 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
224 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
225 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
226 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
227 xsb0 = ( xsb0 >> 30 ) & 2;
228 xsb1 = ( xsb1 >> 30 ) & 2;
229 xsb2 = ( xsb2 >> 30 ) & 2;
230 a0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
231 a1 = __vlibm_TBL_sincos_hi[j1+1];
232 a2 = __vlibm_TBL_sincos_hi[j2+1];
233 /* cos_lo(t) sin_hi(t) */
234 t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
235 t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
236 t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 );
237
238 *py0 = a0 + t0;
239 *py1 = a1 + t1;
240 *py2 = a2 + t2;
241 break;
242
243 case 1:
244 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
245 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
246 HI(&t1) = j1;
247 HI(&t2) = j2;
248 LO(&t1) = 0;
249 LO(&t2) = 0;
250 x1 -= t1;
251 x2 -= t2;
252 z0 = x0 * x0;
253 z1 = x1 * x1;
254 z2 = x2 * x2;
255 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
256 t1 = z1 * ( qq1 + z1 * qq2 );
257 t2 = z2 * ( qq1 + z2 * qq2 );
258 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
259 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
260 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
261 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
262 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
263 xsb1 = ( xsb1 >> 30 ) & 2;
264 xsb2 = ( xsb2 >> 30 ) & 2;
265 a1 = __vlibm_TBL_sincos_hi[j1+1];
266 a2 = __vlibm_TBL_sincos_hi[j2+1];
267 t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
268 t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 );
269 *py0 = one + t0;
270 *py1 = a1 + t1;
271 *py2 = a2 + t2;
272 break;
273
274 case 2:
275 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
276 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
277 HI(&t0) = j0;
278 HI(&t2) = j2;
279 LO(&t0) = 0;
280 LO(&t2) = 0;
281 x0 -= t0;
282 x2 -= t2;
283 z0 = x0 * x0;
284 z1 = x1 * x1;
285 z2 = x2 * x2;
286 t0 = z0 * ( qq1 + z0 * qq2 );
287 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
288 t2 = z2 * ( qq1 + z2 * qq2 );
289 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
290 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
291 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
292 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
293 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
294 xsb0 = ( xsb0 >> 30 ) & 2;
295 xsb2 = ( xsb2 >> 30 ) & 2;
296 a0 = __vlibm_TBL_sincos_hi[j0+1];
297 a2 = __vlibm_TBL_sincos_hi[j2+1];
298 t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
299 t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 );
300 *py0 = a0 + t0;
301 *py1 = one + t1;
302 *py2 = a2 + t2;
303 break;
304
305 case 3:
306 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
307 HI(&t2) = j2;
308 LO(&t2) = 0;
309 x2 -= t2;
310 z0 = x0 * x0;
311 z1 = x1 * x1;
312 z2 = x2 * x2;
313 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
314 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
315 t2 = z2 * ( qq1 + z2 * qq2 );
316 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
317 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
318 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
319 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
320 xsb2 = ( xsb2 >> 30 ) & 2;
321 a2 = __vlibm_TBL_sincos_hi[j2+1];
322 t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 );
323 *py0 = one + t0;
324 *py1 = one + t1;
325 *py2 = a2 + t2;
326 break;
327
328 case 4:
329 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
330 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
331 HI(&t0) = j0;
332 HI(&t1) = j1;
333 LO(&t0) = 0;
334 LO(&t1) = 0;
335 x0 -= t0;
336 x1 -= t1;
337 z0 = x0 * x0;
338 z1 = x1 * x1;
339 z2 = x2 * x2;
340 t0 = z0 * ( qq1 + z0 * qq2 );
341 t1 = z1 * ( qq1 + z1 * qq2 );
342 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
343 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
344 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
345 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
346 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
347 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
348 xsb0 = ( xsb0 >> 30 ) & 2;
349 xsb1 = ( xsb1 >> 30 ) & 2;
350 a0 = __vlibm_TBL_sincos_hi[j0+1];
351 a1 = __vlibm_TBL_sincos_hi[j1+1];
352 t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
353 t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
354 *py0 = a0 + t0;
355 *py1 = a1 + t1;
356 *py2 = one + t2;
357 break;
358
359 case 5:
360 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
361 HI(&t1) = j1;
362 LO(&t1) = 0;
363 x1 -= t1;
364 z0 = x0 * x0;
365 z1 = x1 * x1;
366 z2 = x2 * x2;
367 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
368 t1 = z1 * ( qq1 + z1 * qq2 );
369 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
370 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
371 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
372 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
373 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
374 xsb1 = ( xsb1 >> 30 ) & 2;
375 a1 = __vlibm_TBL_sincos_hi[j1+1];
376 t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
377 *py0 = one + t0;
378 *py1 = a1 + t1;
379 *py2 = one + t2;
380 break;
381
382 case 6:
383 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
384 HI(&t0) = j0;
385 LO(&t0) = 0;
386 x0 -= t0;
387 z0 = x0 * x0;
388 z1 = x1 * x1;
389 z2 = x2 * x2;
390 t0 = z0 * ( qq1 + z0 * qq2 );
391 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
392 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
393 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
394 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
395 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
396 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
397 xsb0 = ( xsb0 >> 30 ) & 2;
398 a0 = __vlibm_TBL_sincos_hi[j0+1];
399 t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
400 *py0 = a0 + t0;
401 *py1 = one + t1;
402 *py2 = one + t2;
403 break;
404
405 case 7: /* All are < 5/32 */
406 z0 = x0 * x0;
407 z1 = x1 * x1;
408 z2 = x2 * x2;
409 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
410 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
411 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
412 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
413 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
414 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
415 *py0 = one + t0;
416 *py1 = one + t1;
417 *py2 = one + t2;
418 break;
419 }
420
421 x += stridex;
422 y += stridey;
423 i = 0;
424 } while ( --n > 0 ); /* END MAIN LOOP */
425
426 /*
427 * CLEAN UP last 0, 1, or 2 elts.
428 */
429 if ( i > 0 ) /* Clean up elts at tail. i < 3. */
430 {
431 double a0, a1, w0, w1;
432 double t0, t1, z0, z1;
433 unsigned j0, j1;
434
435 if ( i > 1 )
436 {
437 if ( hx1 < 0x3fc40000 )
438 {
439 z1 = x1 * x1;
440 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
441 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
442 t1 = one + t1;
443 *py1 = t1;
444 }
445 else
446 {
447 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
448 HI(&t1) = j1;
449 LO(&t1) = 0;
450 x1 -= t1;
451 z1 = x1 * x1;
452 t1 = z1 * ( qq1 + z1 * qq2 );
453 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
454 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
455 xsb1 = ( xsb1 >> 30 ) & 2;
456 a1 = __vlibm_TBL_sincos_hi[j1+1];
457 t1 = __vlibm_TBL_sincos_lo[j1+1]
458 - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
459 *py1 = a1 + t1;
460 }
461 }
462 if ( hx0 < 0x3fc40000 )
463 {
464 z0 = x0 * x0;
465 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
466 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
467 t0 = one + t0;
468 *py0 = t0;
469 }
470 else
471 {
472 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
473 HI(&t0) = j0;
474 LO(&t0) = 0;
475 x0 -= t0;
476 z0 = x0 * x0;
477 t0 = z0 * ( qq1 + z0 * qq2 );
478 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
479 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
480 xsb0 = ( xsb0 >> 30 ) & 2;
481 a0 = __vlibm_TBL_sincos_hi[j0+1];
482 t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
483 *py0 = a0 + t0;
484 }
485 } /* END CLEAN UP */
486
487 return;
488
489 /*
490 * Take care of BIGUNS.
491 *
492 * We have jumped here in the middle of processing after having
493 * encountered a medium range argument. Therefore things are in a
494 * bit of a tizzy.
495 */
496
497 MEDIUM:
498
499 x0_or_one[1] = 1.0;
500 x1_or_one[1] = 1.0;
501 x2_or_one[1] = 1.0;
502 x0_or_one[3] = -1.0;
503 x1_or_one[3] = -1.0;
504 x2_or_one[3] = -1.0;
505 y0_or_zero[1] = 0.0;
506 y1_or_zero[1] = 0.0;
507 y2_or_zero[1] = 0.0;
508 y0_or_zero[3] = 0.0;
509 y1_or_zero[3] = 0.0;
510 y2_or_zero[3] = 0.0;
511
512 if ( biguns == 3 )
513 {
514 biguns = 0;
515 xsb0 = xsb0 >> 31;
516 xsb1 = xsb1 >> 31;
517 goto loop2;
518 }
519 else if ( biguns == 2 )
520 {
521 xsb0 = xsb0 >> 31;
522 biguns = 0;
523 goto loop1;
524 }
525 biguns = 0;
526
527 do
528 {
529 double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
530 unsigned hx;
531 int n0, n1, n2;
532
533 /*
534 * Find 3 more to work on: Not already done, not too big.
535 */
536
537 loop0:
538 hx = HI(x);
539 xsb0 = hx >> 31;
540 hx &= ~0x80000000;
541 if ( hx > 0x413921fb ) /* (1.6471e+06) Too big: leave it. */
542 {
543 if ( hx >= 0x7ff00000 ) /* Inf or NaN */
544 {
545 x0 = *x;
546 *y = x0 - x0;
547 }
548 else
549 biguns = 1;
550 x += stridex;
551 y += stridey;
552 i = 0;
553 if ( --n <= 0 )
554 break;
555 goto loop0;
556 }
557 x0 = *x;
558 py0 = y;
559 x += stridex;
560 y += stridey;
561 i = 1;
562 if ( --n <= 0 )
563 break;
564
565 loop1:
566 hx = HI(x);
567 xsb1 = hx >> 31;
568 hx &= ~0x80000000;
569 if ( hx > 0x413921fb )
570 {
571 if ( hx >= 0x7ff00000 )
572 {
573 x1 = *x;
574 *y = x1 - x1;
575 }
576 else
577 biguns = 1;
578 x += stridex;
579 y += stridey;
580 i = 1;
581 if ( --n <= 0 )
582 break;
583 goto loop1;
584 }
585 x1 = *x;
586 py1 = y;
587 x += stridex;
588 y += stridey;
589 i = 2;
590 if ( --n <= 0 )
591 break;
592
593 loop2:
594 hx = HI(x);
595 xsb2 = hx >> 31;
596 hx &= ~0x80000000;
597 if ( hx > 0x413921fb )
598 {
599 if ( hx >= 0x7ff00000 )
600 {
601 x2 = *x;
602 *y = x2 - x2;
603 }
604 else
605 biguns = 1;
606 x += stridex;
607 y += stridey;
608 i = 2;
609 if ( --n <= 0 )
610 break;
611 goto loop2;
612 }
613 x2 = *x;
614 py2 = y;
615
616 n0 = (int) ( x0 * invpio2 + half[xsb0] );
617 n1 = (int) ( x1 * invpio2 + half[xsb1] );
618 n2 = (int) ( x2 * invpio2 + half[xsb2] );
619 fn0 = (double) n0;
620 fn1 = (double) n1;
621 fn2 = (double) n2;
622 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
623 n1 = (n1 + 1) & 3;
624 n2 = (n2 + 1) & 3;
625 a0 = x0 - fn0 * pio2_1;
626 a1 = x1 - fn1 * pio2_1;
627 a2 = x2 - fn2 * pio2_1;
628 w0 = fn0 * pio2_2;
629 w1 = fn1 * pio2_2;
630 w2 = fn2 * pio2_2;
631 x0 = a0 - w0;
632 x1 = a1 - w1;
633 x2 = a2 - w2;
634 y0 = ( a0 - x0 ) - w0;
635 y1 = ( a1 - x1 ) - w1;
636 y2 = ( a2 - x2 ) - w2;
637 a0 = x0;
638 a1 = x1;
639 a2 = x2;
640 w0 = fn0 * pio2_3 - y0;
641 w1 = fn1 * pio2_3 - y1;
642 w2 = fn2 * pio2_3 - y2;
643 x0 = a0 - w0;
644 x1 = a1 - w1;
645 x2 = a2 - w2;
646 y0 = ( a0 - x0 ) - w0;
647 y1 = ( a1 - x1 ) - w1;
648 y2 = ( a2 - x2 ) - w2;
649 a0 = x0;
650 a1 = x1;
651 a2 = x2;
652 w0 = fn0 * pio2_3t - y0;
653 w1 = fn1 * pio2_3t - y1;
654 w2 = fn2 * pio2_3t - y2;
655 x0 = a0 - w0;
656 x1 = a1 - w1;
657 x2 = a2 - w2;
658 y0 = ( a0 - x0 ) - w0;
659 y1 = ( a1 - x1 ) - w1;
660 y2 = ( a2 - x2 ) - w2;
661 xsb0 = HI(&x0);
662 i = ( ( xsb0 & ~0x80000000 ) - thresh[n0&1] ) >> 31;
663 xsb1 = HI(&x1);
664 i |= ( ( ( xsb1 & ~0x80000000 ) - thresh[n1&1] ) >> 30 ) & 2;
665 xsb2 = HI(&x2);
666 i |= ( ( ( xsb2 & ~0x80000000 ) - thresh[n2&1] ) >> 29 ) & 4;
667 switch ( i )
668 {
669 double t0, t1, t2, z0, z1, z2;
670 unsigned j0, j1, j2;
671
672 case 0:
673 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
674 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
675 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
676 HI(&t0) = j0;
677 HI(&t1) = j1;
678 HI(&t2) = j2;
679 LO(&t0) = 0;
680 LO(&t1) = 0;
681 LO(&t2) = 0;
682 x0 = ( x0 - t0 ) + y0;
683 x1 = ( x1 - t1 ) + y1;
684 x2 = ( x2 - t2 ) + y2;
685 z0 = x0 * x0;
686 z1 = x1 * x1;
687 z2 = x2 * x2;
688 t0 = z0 * ( qq1 + z0 * qq2 );
689 t1 = z1 * ( qq1 + z1 * qq2 );
690 t2 = z2 * ( qq1 + z2 * qq2 );
691 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
692 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
693 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
694 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
695 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
696 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
697 xsb0 = ( xsb0 >> 30 ) & 2;
698 xsb1 = ( xsb1 >> 30 ) & 2;
699 xsb2 = ( xsb2 >> 30 ) & 2;
700 n0 ^= ( xsb0 & ~( n0 << 1 ) );
701 n1 ^= ( xsb1 & ~( n1 << 1 ) );
702 n2 ^= ( xsb2 & ~( n2 << 1 ) );
703 xsb0 |= 1;
704 xsb1 |= 1;
705 xsb2 |= 1;
706 a0 = __vlibm_TBL_sincos_hi[j0+n0];
707 a1 = __vlibm_TBL_sincos_hi[j1+n1];
708 a2 = __vlibm_TBL_sincos_hi[j2+n2];
709 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
710 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
711 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
712 *py0 = ( a0 + t0 );
713 *py1 = ( a1 + t1 );
714 *py2 = ( a2 + t2 );
715 break;
716
717 case 1:
718 j0 = n0 & 1;
719 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
720 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
721 HI(&t1) = j1;
722 HI(&t2) = j2;
723 LO(&t1) = 0;
724 LO(&t2) = 0;
725 x0_or_one[0] = x0;
726 x0_or_one[2] = -x0;
727 y0_or_zero[0] = y0;
728 y0_or_zero[2] = -y0;
729 x1 = ( x1 - t1 ) + y1;
730 x2 = ( x2 - t2 ) + y2;
731 z0 = x0 * x0;
732 z1 = x1 * x1;
733 z2 = x2 * x2;
734 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
735 t1 = z1 * ( qq1 + z1 * qq2 );
736 t2 = z2 * ( qq1 + z2 * qq2 );
737 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
738 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
739 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
740 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
741 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
742 xsb1 = ( xsb1 >> 30 ) & 2;
743 xsb2 = ( xsb2 >> 30 ) & 2;
744 n1 ^= ( xsb1 & ~( n1 << 1 ) );
745 n2 ^= ( xsb2 & ~( n2 << 1 ) );
746 xsb1 |= 1;
747 xsb2 |= 1;
748 a1 = __vlibm_TBL_sincos_hi[j1+n1];
749 a2 = __vlibm_TBL_sincos_hi[j2+n2];
750 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
751 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
752 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
753 *py0 = t0;
754 *py1 = ( a1 + t1 );
755 *py2 = ( a2 + t2 );
756 break;
757
758 case 2:
759 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
760 j1 = n1 & 1;
761 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
762 HI(&t0) = j0;
763 HI(&t2) = j2;
764 LO(&t0) = 0;
765 LO(&t2) = 0;
766 x1_or_one[0] = x1;
767 x1_or_one[2] = -x1;
768 x0 = ( x0 - t0 ) + y0;
769 y1_or_zero[0] = y1;
770 y1_or_zero[2] = -y1;
771 x2 = ( x2 - t2 ) + y2;
772 z0 = x0 * x0;
773 z1 = x1 * x1;
774 z2 = x2 * x2;
775 t0 = z0 * ( qq1 + z0 * qq2 );
776 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
777 t2 = z2 * ( qq1 + z2 * qq2 );
778 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
779 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
780 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
781 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
782 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
783 xsb0 = ( xsb0 >> 30 ) & 2;
784 xsb2 = ( xsb2 >> 30 ) & 2;
785 n0 ^= ( xsb0 & ~( n0 << 1 ) );
786 n2 ^= ( xsb2 & ~( n2 << 1 ) );
787 xsb0 |= 1;
788 xsb2 |= 1;
789 a0 = __vlibm_TBL_sincos_hi[j0+n0];
790 a2 = __vlibm_TBL_sincos_hi[j2+n2];
791 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
792 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
793 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
794 *py0 = ( a0 + t0 );
795 *py1 = t1;
796 *py2 = ( a2 + t2 );
797 break;
798
799 case 3:
800 j0 = n0 & 1;
801 j1 = n1 & 1;
802 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
803 HI(&t2) = j2;
804 LO(&t2) = 0;
805 x0_or_one[0] = x0;
806 x0_or_one[2] = -x0;
807 x1_or_one[0] = x1;
808 x1_or_one[2] = -x1;
809 y0_or_zero[0] = y0;
810 y0_or_zero[2] = -y0;
811 y1_or_zero[0] = y1;
812 y1_or_zero[2] = -y1;
813 x2 = ( x2 - t2 ) + y2;
814 z0 = x0 * x0;
815 z1 = x1 * x1;
816 z2 = x2 * x2;
817 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
818 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
819 t2 = z2 * ( qq1 + z2 * qq2 );
820 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
821 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
822 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
823 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
824 xsb2 = ( xsb2 >> 30 ) & 2;
825 n2 ^= ( xsb2 & ~( n2 << 1 ) );
826 xsb2 |= 1;
827 a2 = __vlibm_TBL_sincos_hi[j2+n2];
828 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
829 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
830 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
831 *py0 = t0;
832 *py1 = t1;
833 *py2 = ( a2 + t2 );
834 break;
835
836 case 4:
837 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
838 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
839 j2 = n2 & 1;
840 HI(&t0) = j0;
841 HI(&t1) = j1;
842 LO(&t0) = 0;
843 LO(&t1) = 0;
844 x2_or_one[0] = x2;
845 x2_or_one[2] = -x2;
846 x0 = ( x0 - t0 ) + y0;
847 x1 = ( x1 - t1 ) + y1;
848 y2_or_zero[0] = y2;
849 y2_or_zero[2] = -y2;
850 z0 = x0 * x0;
851 z1 = x1 * x1;
852 z2 = x2 * x2;
853 t0 = z0 * ( qq1 + z0 * qq2 );
854 t1 = z1 * ( qq1 + z1 * qq2 );
855 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
856 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
857 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
858 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
859 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
860 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
861 xsb0 = ( xsb0 >> 30 ) & 2;
862 xsb1 = ( xsb1 >> 30 ) & 2;
863 n0 ^= ( xsb0 & ~( n0 << 1 ) );
864 n1 ^= ( xsb1 & ~( n1 << 1 ) );
865 xsb0 |= 1;
866 xsb1 |= 1;
867 a0 = __vlibm_TBL_sincos_hi[j0+n0];
868 a1 = __vlibm_TBL_sincos_hi[j1+n1];
869 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
870 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
871 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
872 *py0 = ( a0 + t0 );
873 *py1 = ( a1 + t1 );
874 *py2 = t2;
875 break;
876
877 case 5:
878 j0 = n0 & 1;
879 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
880 j2 = n2 & 1;
881 HI(&t1) = j1;
882 LO(&t1) = 0;
883 x0_or_one[0] = x0;
884 x0_or_one[2] = -x0;
885 x2_or_one[0] = x2;
886 x2_or_one[2] = -x2;
887 y0_or_zero[0] = y0;
888 y0_or_zero[2] = -y0;
889 x1 = ( x1 - t1 ) + y1;
890 y2_or_zero[0] = y2;
891 y2_or_zero[2] = -y2;
892 z0 = x0 * x0;
893 z1 = x1 * x1;
894 z2 = x2 * x2;
895 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
896 t1 = z1 * ( qq1 + z1 * qq2 );
897 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
898 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
899 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
900 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
901 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
902 xsb1 = ( xsb1 >> 30 ) & 2;
903 n1 ^= ( xsb1 & ~( n1 << 1 ) );
904 xsb1 |= 1;
905 a1 = __vlibm_TBL_sincos_hi[j1+n1];
906 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
907 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
908 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
909 *py0 = t0;
910 *py1 = ( a1 + t1 );
911 *py2 = t2;
912 break;
913
914 case 6:
915 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
916 j1 = n1 & 1;
917 j2 = n2 & 1;
918 HI(&t0) = j0;
919 LO(&t0) = 0;
920 x1_or_one[0] = x1;
921 x1_or_one[2] = -x1;
922 x2_or_one[0] = x2;
923 x2_or_one[2] = -x2;
924 x0 = ( x0 - t0 ) + y0;
925 y1_or_zero[0] = y1;
926 y1_or_zero[2] = -y1;
927 y2_or_zero[0] = y2;
928 y2_or_zero[2] = -y2;
929 z0 = x0 * x0;
930 z1 = x1 * x1;
931 z2 = x2 * x2;
932 t0 = z0 * ( qq1 + z0 * qq2 );
933 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
934 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
935 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
936 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
937 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
938 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
939 xsb0 = ( xsb0 >> 30 ) & 2;
940 n0 ^= ( xsb0 & ~( n0 << 1 ) );
941 xsb0 |= 1;
942 a0 = __vlibm_TBL_sincos_hi[j0+n0];
943 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
944 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
945 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
946 *py0 = ( a0 + t0 );
947 *py1 = t1;
948 *py2 = t2;
949 break;
950
951 case 7:
952 j0 = n0 & 1;
953 j1 = n1 & 1;
954 j2 = n2 & 1;
955 x0_or_one[0] = x0;
956 x0_or_one[2] = -x0;
957 x1_or_one[0] = x1;
958 x1_or_one[2] = -x1;
959 x2_or_one[0] = x2;
960 x2_or_one[2] = -x2;
961 y0_or_zero[0] = y0;
962 y0_or_zero[2] = -y0;
963 y1_or_zero[0] = y1;
964 y1_or_zero[2] = -y1;
965 y2_or_zero[0] = y2;
966 y2_or_zero[2] = -y2;
967 z0 = x0 * x0;
968 z1 = x1 * x1;
969 z2 = x2 * x2;
970 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
971 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
972 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
973 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
974 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
975 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
976 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
977 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
978 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
979 *py0 = t0;
980 *py1 = t1;
981 *py2 = t2;
982 break;
983 }
984
985 x += stridex;
986 y += stridey;
987 i = 0;
988 } while ( --n > 0 );
989
990 if ( i > 0 )
991 {
992 double fn0, fn1, a0, a1, w0, w1, y0, y1;
993 double t0, t1, z0, z1;
994 unsigned j0, j1;
995 int n0, n1;
996
997 if ( i > 1 )
998 {
999 n1 = (int) ( x1 * invpio2 + half[xsb1] );
1000 fn1 = (double) n1;
1001 n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1002 a1 = x1 - fn1 * pio2_1;
1003 w1 = fn1 * pio2_2;
1004 x1 = a1 - w1;
1005 y1 = ( a1 - x1 ) - w1;
1006 a1 = x1;
1007 w1 = fn1 * pio2_3 - y1;
1008 x1 = a1 - w1;
1009 y1 = ( a1 - x1 ) - w1;
1010 a1 = x1;
1011 w1 = fn1 * pio2_3t - y1;
1012 x1 = a1 - w1;
1013 y1 = ( a1 - x1 ) - w1;
1014 xsb1 = HI(&x1);
1015 if ( ( xsb1 & ~0x80000000 ) < thresh[n1&1] )
1016 {
1017 j1 = n1 & 1;
1018 x1_or_one[0] = x1;
1019 x1_or_one[2] = -x1;
1020 y1_or_zero[0] = y1;
1021 y1_or_zero[2] = -y1;
1022 z1 = x1 * x1;
1023 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1024 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1025 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1026 *py1 = t1;
1027 }
1028 else
1029 {
1030 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1031 HI(&t1) = j1;
1032 LO(&t1) = 0;
1033 x1 = ( x1 - t1 ) + y1;
1034 z1 = x1 * x1;
1035 t1 = z1 * ( qq1 + z1 * qq2 );
1036 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1037 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1038 xsb1 = ( xsb1 >> 30 ) & 2;
1039 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1040 xsb1 |= 1;
1041 a1 = __vlibm_TBL_sincos_hi[j1+n1];
1042 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
1043 *py1 = ( a1 + t1 );
1044 }
1045 }
1046 n0 = (int) ( x0 * invpio2 + half[xsb0] );
1047 fn0 = (double) n0;
1048 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1049 a0 = x0 - fn0 * pio2_1;
1050 w0 = fn0 * pio2_2;
1051 x0 = a0 - w0;
1052 y0 = ( a0 - x0 ) - w0;
1053 a0 = x0;
1054 w0 = fn0 * pio2_3 - y0;
1055 x0 = a0 - w0;
1056 y0 = ( a0 - x0 ) - w0;
1057 a0 = x0;
1058 w0 = fn0 * pio2_3t - y0;
1059 x0 = a0 - w0;
1060 y0 = ( a0 - x0 ) - w0;
1061 xsb0 = HI(&x0);
1062 if ( ( xsb0 & ~0x80000000 ) < thresh[n0&1] )
1063 {
1064 j0 = n0 & 1;
1065 x0_or_one[0] = x0;
1066 x0_or_one[2] = -x0;
1067 y0_or_zero[0] = y0;
1068 y0_or_zero[2] = -y0;
1069 z0 = x0 * x0;
1070 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1071 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1072 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1073 *py0 = t0;
1074 }
1075 else
1076 {
1077 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1078 HI(&t0) = j0;
1079 LO(&t0) = 0;
1080 x0 = ( x0 - t0 ) + y0;
1081 z0 = x0 * x0;
1082 t0 = z0 * ( qq1 + z0 * qq2 );
1083 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1084 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1085 xsb0 = ( xsb0 >> 30 ) & 2;
1086 n0 ^= ( xsb0 & ~( n0 << 1 ) );
1087 xsb0 |= 1;
1088 a0 = __vlibm_TBL_sincos_hi[j0+n0];
1089 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
1090 *py0 = ( a0 + t0 );
1091 }
1092 }
1093
1094 if ( biguns )
1095 __vlibm_vcos_big( nsave, xsave, sxsave, ysave, sysave, 0x413921fb );
1096 }