1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include <sys/isa_defs.h>
31 #include <sys/ccompile.h>
32
33 #ifdef _LITTLE_ENDIAN
34 #define HI(x) *(1+(int*)x)
35 #define LO(x) *(unsigned*)x
36 #else
37 #define HI(x) *(int*)x
38 #define LO(x) *(1+(unsigned*)x)
39 #endif
40
41 #ifdef __RESTRICT
42 #define restrict _Restrict
43 #else
44 #define restrict
45 #endif
46
47 /*
48 * vsincos.c
49 *
50 * Vector sine and cosine function. Just slight modifications to vcos.c.
51 */
52
53 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
54
55 static const double
56 half[2] = { 0.5, -0.5 },
57 one = 1.0,
58 invpio2 = 0.636619772367581343075535, /* 53 bits of pi/2 */
59 pio2_1 = 1.570796326734125614166, /* first 33 bits of pi/2 */
60 pio2_2 = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
61 pio2_3 = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
62 pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
63 pp1 = -1.666666666605760465276263943134982554676e-0001,
64 pp2 = 8.333261209690963126718376566146180944442e-0003,
65 qq1 = -4.999999999977710986407023955908711557870e-0001,
66 qq2 = 4.166654863857219350645055881018842089580e-0002,
67 poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
68 -4.999999999999931701464060878888294524481e-0001 },
69 poly2[2]= { 8.333333332390951295683993455280336376663e-0003,
70 4.166666666394861917535640593963708222319e-0002 },
71 poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
72 -1.388888552656142867832756687736851681462e-0003 },
73 poly4[2]= { 2.753403624854277237649987622848330351110e-0006,
74 2.478519423681460796618128289454530524759e-0005 };
75
76 /* Don't __ the following; acomp will handle it */
77 extern double fabs( double );
78 extern void __vlibm_vsincos_big( int, double *, int, double *, int, double *, int, int );
79
80 /*
81 * y[i*stridey] := sin( x[i*stridex] ), for i = 0..n.
82 * c[i*stridec] := cos( x[i*stridex] ), for i = 0..n.
83 *
84 * Calls __vlibm_vsincos_big to handle all elts which have abs >~ 1.647e+06.
85 * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
86 *
87 * elts < 2^-27 use the approximation 1.0 ~ cos(x).
88 */
89 void
90 __vsincos( int n, double * restrict x, int stridex,
91 double * restrict y, int stridey,
92 double * restrict c, int stridec )
93 {
94 double x0_or_one[4], x1_or_one[4], x2_or_one[4];
95 double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
96 double x0, x1, x2,
97 *py0, *py1, *py2,
98 *pc0, *pc1, *pc2,
99 *xsave, *ysave, *csave;
100 unsigned hx0, hx1, hx2, xsb0, xsb1, xsb2;
101 int i, biguns, nsave, sxsave, sysave, scsave;
102 nsave = n;
103 xsave = x;
104 sxsave = stridex;
105 ysave = y;
106 sysave = stridey;
107 csave = c;
108 scsave = stridec;
109 biguns = 0;
110
111 do /* MAIN LOOP */
112 {
113
114 /* Gotos here so _break_ exits MAIN LOOP. */
115 LOOP0: /* Find first arg in right range. */
116 xsb0 = HI(x); /* get most significant word */
117 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
118 if ( hx0 > 0x3fe921fb ) {
119 /* Too big: arg reduction needed, so leave for second part */
120 biguns = 1;
121 x += stridex;
122 y += stridey;
123 c += stridec;
124 i = 0;
125 if ( --n <= 0 )
126 break;
127 goto LOOP0;
128 }
129 if ( hx0 < 0x3e400000 ) {
130 /* Too small. cos x ~ 1, sin x ~ x. */
131 *c = 1.0;
132 *y = *x;
133 x += stridex;
134 y += stridey;
135 c += stridec;
136 i = 0;
137 if ( --n <= 0 )
138 break;
139 goto LOOP0;
140 }
141 x0 = *x;
142 py0 = y;
143 pc0 = c;
144 x += stridex;
145 y += stridey;
146 c += stridec;
147 i = 1;
148 if ( --n <= 0 )
149 break;
150
151 LOOP1: /* Get second arg, same as above. */
152 xsb1 = HI(x);
153 hx1 = xsb1 & ~0x80000000;
154 if ( hx1 > 0x3fe921fb )
155 {
156 biguns = 1;
157 x += stridex;
158 y += stridey;
159 c += stridec;
160 i = 1;
161 if ( --n <= 0 )
162 break;
163 goto LOOP1;
164 }
165 if ( hx1 < 0x3e400000 )
166 {
167 *c = 1.0;
168 *y = *x;
169 x += stridex;
170 y += stridey;
171 c += stridec;
172 i = 1;
173 if ( --n <= 0 )
174 break;
175 goto LOOP1;
176 }
177 x1 = *x;
178 py1 = y;
179 pc1 = c;
180 x += stridex;
181 y += stridey;
182 c += stridec;
183 i = 2;
184 if ( --n <= 0 )
185 break;
186
187 LOOP2: /* Get third arg, same as above. */
188 xsb2 = HI(x);
189 hx2 = xsb2 & ~0x80000000;
190 if ( hx2 > 0x3fe921fb )
191 {
192 biguns = 1;
193 x += stridex;
194 y += stridey;
195 c += stridec;
196 i = 2;
197 if ( --n <= 0 )
198 break;
199 goto LOOP2;
200 }
201 if ( hx2 < 0x3e400000 )
202 {
203 *c = 1.0;
204 *y = *x;
205 x += stridex;
206 y += stridey;
207 c += stridec;
208 i = 2;
209 if ( --n <= 0 )
210 break;
211 goto LOOP2;
212 }
213 x2 = *x;
214 py2 = y;
215 pc2 = c;
216
217 /*
218 * 0x3fc40000 = 5/32 ~ 0.15625
219 * Get msb after subtraction. Will be 1 only if
220 * hx0 - 5/32 is negative.
221 */
222 i = ( hx2 - 0x3fc40000 ) >> 31;
223 i |= ( ( hx1 - 0x3fc40000 ) >> 30 ) & 2;
224 i |= ( ( hx0 - 0x3fc40000 ) >> 29 ) & 4;
225 switch ( i )
226 {
227 double a1_0, a1_1, a1_2, a2_0, a2_1, a2_2;
228 double w0, w1, w2;
229 double t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2;
230 double z0, z1, z2;
231 unsigned j0, j1, j2;
232
233 case 0: /* All are > 5/32 */
234 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
235 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
236 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
237
238 HI(&t0) = j0;
239 HI(&t1) = j1;
240 HI(&t2) = j2;
241 LO(&t0) = 0;
242 LO(&t1) = 0;
243 LO(&t2) = 0;
244
245 x0 -= t0;
246 x1 -= t1;
247 x2 -= t2;
248
249 z0 = x0 * x0;
250 z1 = x1 * x1;
251 z2 = x2 * x2;
252
253 t0 = z0 * ( qq1 + z0 * qq2 );
254 t1 = z1 * ( qq1 + z1 * qq2 );
255 t2 = z2 * ( qq1 + z2 * qq2 );
256
257 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
258 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
259 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
260
261 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
262 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
263 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
264
265 xsb0 = ( xsb0 >> 30 ) & 2;
266 xsb1 = ( xsb1 >> 30 ) & 2;
267 xsb2 = ( xsb2 >> 30 ) & 2;
268
269 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
270 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
271 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
272
273 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
274 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
275 a2_2 = __vlibm_TBL_sincos_hi[j2+1];
276 /* cos_lo(t) */
277 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
278 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
279 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
280
281 *pc0 = a2_0 + t2_0;
282 *pc1 = a2_1 + t2_1;
283 *pc2 = a2_2 + t2_2;
284
285 t1_0 = a2_0*w0 + a1_0*t0;
286 t1_1 = a2_1*w1 + a1_1*t1;
287 t1_2 = a2_2*w2 + a1_2*t2;
288
289 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
290 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
291 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
292
293 *py0 = a1_0 + t1_0;
294 *py1 = a1_1 + t1_1;
295 *py2 = a1_2 + t1_2;
296
297 break;
298
299 case 1:
300 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
301 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
302 HI(&t0) = j0;
303 HI(&t1) = j1;
304 LO(&t0) = 0;
305 LO(&t1) = 0;
306 x0 -= t0;
307 x1 -= t1;
308 z0 = x0 * x0;
309 z1 = x1 * x1;
310 z2 = x2 * x2;
311 t0 = z0 * ( qq1 + z0 * qq2 );
312 t1 = z1 * ( qq1 + z1 * qq2 );
313 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
314 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
315 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
316 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
317 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
318 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
319 xsb0 = ( xsb0 >> 30 ) & 2;
320 xsb1 = ( xsb1 >> 30 ) & 2;
321
322 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
323 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
324
325 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
326 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
327 /* cos_lo(t) */
328 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
329 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
330
331 *pc0 = a2_0 + t2_0;
332 *pc1 = a2_1 + t2_1;
333 *pc2 = one + t2;
334
335 t1_0 = a2_0*w0 + a1_0*t0;
336 t1_1 = a2_1*w1 + a1_1*t1;
337 t2 = z2 * ( poly3[0] + z2 * poly4[0] );
338
339 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
340 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
341 t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
342
343 *py0 = a1_0 + t1_0;
344 *py1 = a1_1 + t1_1;
345 t2 = x2 + x2 * t2;
346 *py2 = t2;
347
348 break;
349
350 case 2:
351 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
352 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
353 HI(&t0) = j0;
354 HI(&t2) = j2;
355 LO(&t0) = 0;
356 LO(&t2) = 0;
357 x0 -= t0;
358 x2 -= t2;
359 z0 = x0 * x0;
360 z1 = x1 * x1;
361 z2 = x2 * x2;
362 t0 = z0 * ( qq1 + z0 * qq2 );
363 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
364 t2 = z2 * ( qq1 + z2 * qq2 );
365 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
366 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
367 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
368 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
369 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
370 xsb0 = ( xsb0 >> 30 ) & 2;
371 xsb2 = ( xsb2 >> 30 ) & 2;
372
373 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
374 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
375
376 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
377 a2_2 = __vlibm_TBL_sincos_hi[j2+1];
378 /* cos_lo(t) */
379 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
380 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
381
382 *pc0 = a2_0 + t2_0;
383 *pc1 = one + t1;
384 *pc2 = a2_2 + t2_2;
385
386 t1_0 = a2_0*w0 + a1_0*t0;
387 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
388 t1_2 = a2_2*w2 + a1_2*t2;
389
390 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
391 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
392 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
393
394 *py0 = a1_0 + t1_0;
395 t1 = x1 + x1 * t1;
396 *py1 = t1;
397 *py2 = a1_2 + t1_2;
398
399 break;
400
401 case 3:
402 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
403 HI(&t0) = j0;
404 LO(&t0) = 0;
405 x0 -= t0;
406 z0 = x0 * x0;
407 z1 = x1 * x1;
408 z2 = x2 * x2;
409 t0 = z0 * ( qq1 + z0 * qq2 );
410 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
411 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
412 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
413 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
414 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
415 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
416 xsb0 = ( xsb0 >> 30 ) & 2;
417 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
418
419 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
420
421 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
422
423 *pc0 = a2_0 + t2_0;
424 *pc1 = one + t1;
425 *pc2 = one + t2;
426
427 t1_0 = a2_0*w0 + a1_0*t0;
428 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
429 t2 = z2 * ( poly3[0] + z2 * poly4[0] );
430
431 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
432 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
433 t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
434
435 *py0 = a1_0 + t1_0;
436 t1 = x1 + x1 * t1;
437 *py1 = t1;
438 t2 = x2 + x2 * t2;
439 *py2 = t2;
440
441 break;
442
443 case 4:
444 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
445 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
446 HI(&t1) = j1;
447 HI(&t2) = j2;
448 LO(&t1) = 0;
449 LO(&t2) = 0;
450 x1 -= t1;
451 x2 -= t2;
452 z0 = x0 * x0;
453 z1 = x1 * x1;
454 z2 = x2 * x2;
455 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
456 t1 = z1 * ( qq1 + z1 * qq2 );
457 t2 = z2 * ( qq1 + z2 * qq2 );
458 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
459 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
460 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
461 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
462 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
463 xsb1 = ( xsb1 >> 30 ) & 2;
464 xsb2 = ( xsb2 >> 30 ) & 2;
465
466 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
467 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
468
469 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
470 a2_2 = __vlibm_TBL_sincos_hi[j2+1];
471 /* cos_lo(t) */
472 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
473 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
474
475 *pc0 = one + t0;
476 *pc1 = a2_1 + t2_1;
477 *pc2 = a2_2 + t2_2;
478
479 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
480 t1_1 = a2_1*w1 + a1_1*t1;
481 t1_2 = a2_2*w2 + a1_2*t2;
482
483 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
484 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
485 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
486
487 t0 = x0 + x0 * t0;
488 *py0 = t0;
489 *py1 = a1_1 + t1_1;
490 *py2 = a1_2 + t1_2;
491
492 break;
493
494 case 5:
495 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
496 HI(&t1) = j1;
497 LO(&t1) = 0;
498 x1 -= t1;
499 z0 = x0 * x0;
500 z1 = x1 * x1;
501 z2 = x2 * x2;
502 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
503 t1 = z1 * ( qq1 + z1 * qq2 );
504 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
505 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
506 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
507 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
508 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
509 xsb1 = ( xsb1 >> 30 ) & 2;
510
511 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
512
513 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
514
515 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
516
517 *pc0 = one + t0;
518 *pc1 = a2_1 + t2_1;
519 *pc2 = one + t2;
520
521 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
522 t1_1 = a2_1*w1 + a1_1*t1;
523 t2 = z2 * ( poly3[0] + z2 * poly4[0] );
524
525 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
526 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
527 t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
528
529 t0 = x0 + x0 * t0;
530 *py0 = t0;
531 *py1 = a1_1 + t1_1;
532 t2 = x2 + x2 * t2;
533 *py2 = t2;
534
535 break;
536
537 case 6:
538 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
539 HI(&t2) = j2;
540 LO(&t2) = 0;
541 x2 -= t2;
542 z0 = x0 * x0;
543 z1 = x1 * x1;
544 z2 = x2 * x2;
545 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
546 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
547 t2 = z2 * ( qq1 + z2 * qq2 );
548 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
549 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
550 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
551 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
552 xsb2 = ( xsb2 >> 30 ) & 2;
553 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
554
555 a2_2 = __vlibm_TBL_sincos_hi[j2+1];
556
557 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
558
559 *pc0 = one + t0;
560 *pc1 = one + t1;
561 *pc2 = a2_2 + t2_2;
562
563 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
564 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
565 t1_2 = a2_2*w2 + a1_2*t2;
566
567 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
568 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
569 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
570
571 t0 = x0 + x0 * t0;
572 *py0 = t0;
573 t1 = x1 + x1 * t1;
574 *py1 = t1;
575 *py2 = a1_2 + t1_2;
576
577 break;
578
579 case 7: /* All are < 5/32 */
580 z0 = x0 * x0;
581 z1 = x1 * x1;
582 z2 = x2 * x2;
583 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
584 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
585 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
586 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
587 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
588 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
589 *pc0 = one + t0;
590 *pc1 = one + t1;
591 *pc2 = one + t2;
592 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
593 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
594 t2 = z2 * ( poly3[0] + z2 * poly4[0] );
595 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
596 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
597 t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
598 t0 = x0 + x0 * t0;
599 t1 = x1 + x1 * t1;
600 t2 = x2 + x2 * t2;
601 *py0 = t0;
602 *py1 = t1;
603 *py2 = t2;
604 break;
605 }
606
607 x += stridex;
608 y += stridey;
609 c += stridec;
610 i = 0;
611 } while ( --n > 0 ); /* END MAIN LOOP */
612
613 /*
614 * CLEAN UP last 0, 1, or 2 elts.
615 */
616 if ( i > 0 ) /* Clean up elts at tail. i < 3. */
617 {
618 double a1_0, a1_1, a2_0, a2_1;
619 double w0, w1;
620 double t0, t1, t1_0, t1_1, t2_0, t2_1;
621 double z0, z1;
622 unsigned j0, j1;
623
624 if ( i > 1 )
625 {
626 if ( hx1 < 0x3fc40000 )
627 {
628 z1 = x1 * x1;
629 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
630 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
631 t1 = one + t1;
632 *pc1 = t1;
633 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
634 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
635 t1 = x1 + x1 * t1;
636 *py1 = t1;
637 }
638 else
639 {
640 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
641 HI(&t1) = j1;
642 LO(&t1) = 0;
643 x1 -= t1;
644 z1 = x1 * x1;
645 t1 = z1 * ( qq1 + z1 * qq2 );
646 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
647 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
648 xsb1 = ( xsb1 >> 30 ) & 2;
649 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
650 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
651 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
652 *pc1 = a2_1 + t2_1;
653 t1_1 = a2_1*w1 + a1_1*t1;
654 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
655 *py1 = a1_1 + t1_1;
656 }
657 }
658 if ( hx0 < 0x3fc40000 )
659 {
660 z0 = x0 * x0;
661 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
662 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
663 t0 = one + t0;
664 *pc0 = t0;
665 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
666 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
667 t0 = x0 + x0 * t0;
668 *py0 = t0;
669 }
670 else
671 {
672 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
673 HI(&t0) = j0;
674 LO(&t0) = 0;
675 x0 -= t0;
676 z0 = x0 * x0;
677 t0 = z0 * ( qq1 + z0 * qq2 );
678 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
679 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
680 xsb0 = ( xsb0 >> 30 ) & 2;
681 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
682 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
683 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
684 *pc0 = a2_0 + t2_0;
685 t1_0 = a2_0*w0 + a1_0*t0;
686 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
687 *py0 = a1_0 + t1_0;
688 }
689 } /* END CLEAN UP */
690
691 if ( !biguns )
692 return;
693
694 /*
695 * Take care of BIGUNS.
696 */
697 n = nsave;
698 x = xsave;
699 stridex = sxsave;
700 y = ysave;
701 stridey = sysave;
702 c = csave;
703 stridec = scsave;
704 biguns = 0;
705
706 x0_or_one[1] = 1.0;
707 x1_or_one[1] = 1.0;
708 x2_or_one[1] = 1.0;
709 x0_or_one[3] = -1.0;
710 x1_or_one[3] = -1.0;
711 x2_or_one[3] = -1.0;
712 y0_or_zero[1] = 0.0;
713 y1_or_zero[1] = 0.0;
714 y2_or_zero[1] = 0.0;
715 y0_or_zero[3] = 0.0;
716 y1_or_zero[3] = 0.0;
717 y2_or_zero[3] = 0.0;
718
719 do
720 {
721 double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
722 unsigned hx;
723 int n0, n1, n2;
724
725 /*
726 * Find 3 more to work on: Not already done, not too big.
727 */
728 loop0:
729 hx = HI(x);
730 xsb0 = hx >> 31;
731 hx &= ~0x80000000;
732 if ( hx <= 0x3fe921fb ) /* Done above. */
733 {
734 x += stridex;
735 y += stridey;
736 c += stridec;
737 i = 0;
738 if ( --n <= 0 )
739 break;
740 goto loop0;
741 }
742 if ( hx > 0x413921fb ) /* (1.6471e+06) Too big: leave it. */
743 {
744 if ( hx >= 0x7ff00000 ) /* Inf or NaN */
745 {
746 x0 = *x;
747 *y = x0 - x0;
748 *c = x0 - x0;
749 }
750 else {
751 biguns = 1;
752 }
753 x += stridex;
754 y += stridey;
755 c += stridec;
756 i = 0;
757 if ( --n <= 0 )
758 break;
759 goto loop0;
760 }
761 x0 = *x;
762 py0 = y;
763 pc0 = c;
764 x += stridex;
765 y += stridey;
766 c += stridec;
767 i = 1;
768 if ( --n <= 0 )
769 break;
770
771 loop1:
772 hx = HI(x);
773 xsb1 = hx >> 31;
774 hx &= ~0x80000000;
775 if ( hx <= 0x3fe921fb )
776 {
777 x += stridex;
778 y += stridey;
779 c += stridec;
780 i = 1;
781 if ( --n <= 0 )
782 break;
783 goto loop1;
784 }
785 if ( hx > 0x413921fb )
786 {
787 if ( hx >= 0x7ff00000 )
788 {
789 x1 = *x;
790 *y = x1 - x1;
791 *c = x1 - x1;
792 }
793 else {
794 biguns = 1;
795 }
796 x += stridex;
797 y += stridey;
798 c += stridec;
799 i = 1;
800 if ( --n <= 0 )
801 break;
802 goto loop1;
803 }
804 x1 = *x;
805 py1 = y;
806 pc1 = c;
807 x += stridex;
808 y += stridey;
809 c += stridec;
810 i = 2;
811 if ( --n <= 0 )
812 break;
813
814 loop2:
815 hx = HI(x);
816 xsb2 = hx >> 31;
817 hx &= ~0x80000000;
818 if ( hx <= 0x3fe921fb )
819 {
820 x += stridex;
821 y += stridey;
822 c += stridec;
823 i = 2;
824 if ( --n <= 0 )
825 break;
826 goto loop2;
827 }
828 if ( hx > 0x413921fb )
829 {
830 if ( hx >= 0x7ff00000 )
831 {
832 x2 = *x;
833 *y = x2 - x2;
834 *c = x2 - x2;
835 }
836 else {
837 biguns = 1;
838 }
839 x += stridex;
840 y += stridey;
841 c += stridec;
842 i = 2;
843 if ( --n <= 0 )
844 break;
845 goto loop2;
846 }
847 x2 = *x;
848 py2 = y;
849 pc2 = c;
850
851 n0 = (int) ( x0 * invpio2 + half[xsb0] );
852 n1 = (int) ( x1 * invpio2 + half[xsb1] );
853 n2 = (int) ( x2 * invpio2 + half[xsb2] );
854 fn0 = (double) n0;
855 fn1 = (double) n1;
856 fn2 = (double) n2;
857 n0 &= 3;
858 n1 &= 3;
859 n2 &= 3;
860 a0 = x0 - fn0 * pio2_1;
861 a1 = x1 - fn1 * pio2_1;
862 a2 = x2 - fn2 * pio2_1;
863 w0 = fn0 * pio2_2;
864 w1 = fn1 * pio2_2;
865 w2 = fn2 * pio2_2;
866 x0 = a0 - w0;
867 x1 = a1 - w1;
868 x2 = a2 - w2;
869 y0 = ( a0 - x0 ) - w0;
870 y1 = ( a1 - x1 ) - w1;
871 y2 = ( a2 - x2 ) - w2;
872 a0 = x0;
873 a1 = x1;
874 a2 = x2;
875 w0 = fn0 * pio2_3 - y0;
876 w1 = fn1 * pio2_3 - y1;
877 w2 = fn2 * pio2_3 - y2;
878 x0 = a0 - w0;
879 x1 = a1 - w1;
880 x2 = a2 - w2;
881 y0 = ( a0 - x0 ) - w0;
882 y1 = ( a1 - x1 ) - w1;
883 y2 = ( a2 - x2 ) - w2;
884 a0 = x0;
885 a1 = x1;
886 a2 = x2;
887 w0 = fn0 * pio2_3t - y0;
888 w1 = fn1 * pio2_3t - y1;
889 w2 = fn2 * pio2_3t - y2;
890 x0 = a0 - w0;
891 x1 = a1 - w1;
892 x2 = a2 - w2;
893 y0 = ( a0 - x0 ) - w0;
894 y1 = ( a1 - x1 ) - w1;
895 y2 = ( a2 - x2 ) - w2;
896 xsb2 = HI(&x2);
897 i = ( ( xsb2 & ~0x80000000 ) - 0x3fc40000 ) >> 31;
898 xsb1 = HI(&x1);
899 i |= ( ( ( xsb1 & ~0x80000000 ) - 0x3fc40000 ) >> 30 ) & 2;
900 xsb0 = HI(&x0);
901 i |= ( ( ( xsb0 & ~0x80000000 ) - 0x3fc40000 ) >> 29 ) & 4;
902 switch ( i )
903 {
904 double a1_0, a1_1, a1_2, a2_0, a2_1, a2_2;
905 double t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2;
906 double z0, z1, z2;
907 unsigned j0, j1, j2;
908
909 case 0:
910 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
911 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
912 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
913 HI(&t0) = j0;
914 HI(&t1) = j1;
915 HI(&t2) = j2;
916 LO(&t0) = 0;
917 LO(&t1) = 0;
918 LO(&t2) = 0;
919 x0 = ( x0 - t0 ) + y0;
920 x1 = ( x1 - t1 ) + y1;
921 x2 = ( x2 - t2 ) + y2;
922 z0 = x0 * x0;
923 z1 = x1 * x1;
924 z2 = x2 * x2;
925 t0 = z0 * ( qq1 + z0 * qq2 );
926 t1 = z1 * ( qq1 + z1 * qq2 );
927 t2 = z2 * ( qq1 + z2 * qq2 );
928 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
929 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
930 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
931 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
932 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
933 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
934 xsb0 = ( xsb0 >> 30 ) & 2;
935 xsb1 = ( xsb1 >> 30 ) & 2;
936 xsb2 = ( xsb2 >> 30 ) & 2;
937 n0 ^= ( xsb0 & ~( n0 << 1 ) );
938 n1 ^= ( xsb1 & ~( n1 << 1 ) );
939 n2 ^= ( xsb2 & ~( n2 << 1 ) );
940 xsb0 |= 1;
941 xsb1 |= 1;
942 xsb2 |= 1;
943
944 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
945 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
946 a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
947
948 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
949 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
950 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
951
952 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
953 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
954 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
955
956 w0 *= a2_0;
957 w1 *= a2_1;
958 w2 *= a2_2;
959
960 *pc0 = a2_0 + t2_0;
961 *pc1 = a2_1 + t2_1;
962 *pc2 = a2_2 + t2_2;
963
964 t1_0 = w0 + a1_0*t0;
965 t1_1 = w1 + a1_1*t1;
966 t1_2 = w2 + a1_2*t2;
967
968 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
969 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
970 t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
971
972 *py0 = a1_0 + t1_0;
973 *py1 = a1_1 + t1_1;
974 *py2 = a1_2 + t1_2;
975
976 break;
977
978 case 1:
979 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
980 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
981 j2 = n2 & 1;
982 HI(&t0) = j0;
983 HI(&t1) = j1;
984 LO(&t0) = 0;
985 LO(&t1) = 0;
986 x2_or_one[0] = x2;
987 x2_or_one[2] = -x2;
988 x0 = ( x0 - t0 ) + y0;
989 x1 = ( x1 - t1 ) + y1;
990 y2_or_zero[0] = y2;
991 y2_or_zero[2] = -y2;
992 z0 = x0 * x0;
993 z1 = x1 * x1;
994 z2 = x2 * x2;
995 t0 = z0 * ( qq1 + z0 * qq2 );
996 t1 = z1 * ( qq1 + z1 * qq2 );
997 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
998 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
999 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1000 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1001 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1002 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1003 xsb0 = ( xsb0 >> 30 ) & 2;
1004 xsb1 = ( xsb1 >> 30 ) & 2;
1005 n0 ^= ( xsb0 & ~( n0 << 1 ) );
1006 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1007 xsb0 |= 1;
1008 xsb1 |= 1;
1009 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1010 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1011
1012 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1013 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1014
1015 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1016 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1017 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1018
1019 *pc0 = a2_0 + t2_0;
1020 *pc1 = a2_1 + t2_1;
1021 *py2 = t2;
1022
1023 n2 = (n2 + 1) & 3;
1024 j2 = (j2 + 1) & 1;
1025 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1026
1027 t1_0 = a2_0*w0 + a1_0*t0;
1028 t1_1 = a2_1*w1 + a1_1*t1;
1029 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1030
1031 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1032 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1033 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1034
1035 *py0 = a1_0 + t1_0;
1036 *py1 = a1_1 + t1_1;
1037 *pc2 = t2;
1038
1039 break;
1040
1041 case 2:
1042 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1043 j1 = n1 & 1;
1044 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
1045 HI(&t0) = j0;
1046 HI(&t2) = j2;
1047 LO(&t0) = 0;
1048 LO(&t2) = 0;
1049 x1_or_one[0] = x1;
1050 x1_or_one[2] = -x1;
1051 x0 = ( x0 - t0 ) + y0;
1052 y1_or_zero[0] = y1;
1053 y1_or_zero[2] = -y1;
1054 x2 = ( x2 - t2 ) + y2;
1055 z0 = x0 * x0;
1056 z1 = x1 * x1;
1057 z2 = x2 * x2;
1058 t0 = z0 * ( qq1 + z0 * qq2 );
1059 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1060 t2 = z2 * ( qq1 + z2 * qq2 );
1061 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1062 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1063 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
1064 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1065 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1066 xsb0 = ( xsb0 >> 30 ) & 2;
1067 xsb2 = ( xsb2 >> 30 ) & 2;
1068 n0 ^= ( xsb0 & ~( n0 << 1 ) );
1069 n2 ^= ( xsb2 & ~( n2 << 1 ) );
1070 xsb0 |= 1;
1071 xsb2 |= 1;
1072
1073 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1074 a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1075
1076 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1077 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1078
1079 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1080 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1081 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
1082
1083 *pc0 = a2_0 + t2_0;
1084 *py1 = t1;
1085 *pc2 = a2_2 + t2_2;
1086
1087 n1 = (n1 + 1) & 3;
1088 j1 = (j1 + 1) & 1;
1089 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1090
1091 t1_0 = a2_0*w0 + a1_0*t0;
1092 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1093 t1_2 = a2_2*w2 + a1_2*t2;
1094
1095 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1096 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1097 t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1098
1099 *py0 = a1_0 + t1_0;
1100 *pc1 = t1;
1101 *py2 = a1_2 + t1_2;
1102
1103 break;
1104
1105 case 3:
1106 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1107 j1 = n1 & 1;
1108 j2 = n2 & 1;
1109 HI(&t0) = j0;
1110 LO(&t0) = 0;
1111 x1_or_one[0] = x1;
1112 x1_or_one[2] = -x1;
1113 x2_or_one[0] = x2;
1114 x2_or_one[2] = -x2;
1115 x0 = ( x0 - t0 ) + y0;
1116 y1_or_zero[0] = y1;
1117 y1_or_zero[2] = -y1;
1118 y2_or_zero[0] = y2;
1119 y2_or_zero[2] = -y2;
1120 z0 = x0 * x0;
1121 z1 = x1 * x1;
1122 z2 = x2 * x2;
1123 t0 = z0 * ( qq1 + z0 * qq2 );
1124 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1125 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1126 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1127 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1128 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1129 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1130 xsb0 = ( xsb0 >> 30 ) & 2;
1131 n0 ^= ( xsb0 & ~( n0 << 1 ) );
1132 xsb0 |= 1;
1133
1134 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1135 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1136
1137 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1138 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1139 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1140
1141 *pc0 = a2_0 + t2_0;
1142 *py1 = t1;
1143 *py2 = t2;
1144
1145 n1 = (n1 + 1) & 3;
1146 n2 = (n2 + 1) & 3;
1147 j1 = (j1 + 1) & 1;
1148 j2 = (j2 + 1) & 1;
1149
1150 t1_0 = a2_0*w0 + a1_0*t0;
1151 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1152 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1153
1154 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1155 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1156 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1157
1158 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1159 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1160
1161 *py0 = a1_0 + t1_0;
1162 *pc1 = t1;
1163 *pc2 = t2;
1164
1165 break;
1166
1167 case 4:
1168 j0 = n0 & 1;
1169 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1170 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
1171 HI(&t1) = j1;
1172 HI(&t2) = j2;
1173 LO(&t1) = 0;
1174 LO(&t2) = 0;
1175 x0_or_one[0] = x0;
1176 x0_or_one[2] = -x0;
1177 y0_or_zero[0] = y0;
1178 y0_or_zero[2] = -y0;
1179 x1 = ( x1 - t1 ) + y1;
1180 x2 = ( x2 - t2 ) + y2;
1181 z0 = x0 * x0;
1182 z1 = x1 * x1;
1183 z2 = x2 * x2;
1184 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1185 t1 = z1 * ( qq1 + z1 * qq2 );
1186 t2 = z2 * ( qq1 + z2 * qq2 );
1187 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1188 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1189 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
1190 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1191 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1192 xsb1 = ( xsb1 >> 30 ) & 2;
1193 xsb2 = ( xsb2 >> 30 ) & 2;
1194 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1195 n2 ^= ( xsb2 & ~( n2 << 1 ) );
1196 xsb1 |= 1;
1197 xsb2 |= 1;
1198
1199 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1200 a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1201
1202 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1203 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1204
1205 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1206 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1207 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
1208
1209 *py0 = t0;
1210 *pc1 = a2_1 + t2_1;
1211 *pc2 = a2_2 + t2_2;
1212
1213 n0 = (n0 + 1) & 3;
1214 j0 = (j0 + 1) & 1;
1215 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1216
1217 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1218 t1_1 = a2_1*w1 + a1_1*t1;
1219 t1_2 = a2_2*w2 + a1_2*t2;
1220
1221 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1222 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1223 t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1224
1225 *py1 = a1_1 + t1_1;
1226 *py2 = a1_2 + t1_2;
1227 *pc0 = t0;
1228
1229 break;
1230
1231 case 5:
1232 j0 = n0 & 1;
1233 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1234 j2 = n2 & 1;
1235 HI(&t1) = j1;
1236 LO(&t1) = 0;
1237 x0_or_one[0] = x0;
1238 x0_or_one[2] = -x0;
1239 x2_or_one[0] = x2;
1240 x2_or_one[2] = -x2;
1241 y0_or_zero[0] = y0;
1242 y0_or_zero[2] = -y0;
1243 x1 = ( x1 - t1 ) + y1;
1244 y2_or_zero[0] = y2;
1245 y2_or_zero[2] = -y2;
1246 z0 = x0 * x0;
1247 z1 = x1 * x1;
1248 z2 = x2 * x2;
1249 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1250 t1 = z1 * ( qq1 + z1 * qq2 );
1251 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1252 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1253 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1254 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1255 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1256 xsb1 = ( xsb1 >> 30 ) & 2;
1257 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1258 xsb1 |= 1;
1259
1260 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1261 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1262
1263 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1264 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1265 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1266
1267 *py0 = t0;
1268 *pc1 = a2_1 + t2_1;
1269 *py2 = t2;
1270
1271 n0 = (n0 + 1) & 3;
1272 n2 = (n2 + 1) & 3;
1273 j0 = (j0 + 1) & 1;
1274 j2 = (j2 + 1) & 1;
1275
1276 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1277 t1_1 = a2_1*w1 + a1_1*t1;
1278 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1279
1280 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1281 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1282 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1283
1284 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1285 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1286
1287 *pc0 = t0;
1288 *py1 = a1_1 + t1_1;
1289 *pc2 = t2;
1290
1291 break;
1292
1293 case 6:
1294 j0 = n0 & 1;
1295 j1 = n1 & 1;
1296 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
1297 HI(&t2) = j2;
1298 LO(&t2) = 0;
1299 x0_or_one[0] = x0;
1300 x0_or_one[2] = -x0;
1301 x1_or_one[0] = x1;
1302 x1_or_one[2] = -x1;
1303 y0_or_zero[0] = y0;
1304 y0_or_zero[2] = -y0;
1305 y1_or_zero[0] = y1;
1306 y1_or_zero[2] = -y1;
1307 x2 = ( x2 - t2 ) + y2;
1308 z0 = x0 * x0;
1309 z1 = x1 * x1;
1310 z2 = x2 * x2;
1311 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1312 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1313 t2 = z2 * ( qq1 + z2 * qq2 );
1314 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1315 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1316 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
1317 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1318 xsb2 = ( xsb2 >> 30 ) & 2;
1319 n2 ^= ( xsb2 & ~( n2 << 1 ) );
1320 xsb2 |= 1;
1321
1322 a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1323 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1324
1325 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1326 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1327 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
1328
1329 *py0 = t0;
1330 *py1 = t1;
1331 *pc2 = a2_2 + t2_2;
1332
1333 n0 = (n0 + 1) & 3;
1334 n1 = (n1 + 1) & 3;
1335 j0 = (j0 + 1) & 1;
1336 j1 = (j1 + 1) & 1;
1337
1338 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1339 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1340 t1_2 = a2_2*w2 + a1_2*t2;
1341
1342 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1343 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1344 t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1345
1346 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1347 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1348
1349 *pc0 = t0;
1350 *pc1 = t1;
1351 *py2 = a1_2 + t1_2;
1352
1353 break;
1354
1355 case 7:
1356 j0 = n0 & 1;
1357 j1 = n1 & 1;
1358 j2 = n2 & 1;
1359 x0_or_one[0] = x0;
1360 x0_or_one[2] = -x0;
1361 x1_or_one[0] = x1;
1362 x1_or_one[2] = -x1;
1363 x2_or_one[0] = x2;
1364 x2_or_one[2] = -x2;
1365 y0_or_zero[0] = y0;
1366 y0_or_zero[2] = -y0;
1367 y1_or_zero[0] = y1;
1368 y1_or_zero[2] = -y1;
1369 y2_or_zero[0] = y2;
1370 y2_or_zero[2] = -y2;
1371 z0 = x0 * x0;
1372 z1 = x1 * x1;
1373 z2 = x2 * x2;
1374 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1375 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1376 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1377 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1378 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1379 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1380 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1381 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1382 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1383 *py0 = t0;
1384 *py1 = t1;
1385 *py2 = t2;
1386
1387 n0 = (n0 + 1) & 3;
1388 n1 = (n1 + 1) & 3;
1389 n2 = (n2 + 1) & 3;
1390 j0 = (j0 + 1) & 1;
1391 j1 = (j1 + 1) & 1;
1392 j2 = (j2 + 1) & 1;
1393 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1394 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1395 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1396 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1397 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1398 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1399 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1400 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1401 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1402 *pc0 = t0;
1403 *pc1 = t1;
1404 *pc2 = t2;
1405 break;
1406 }
1407
1408 x += stridex;
1409 y += stridey;
1410 c += stridec;
1411 i = 0;
1412 } while ( --n > 0 );
1413
1414 if ( i > 0 )
1415 {
1416 double a1_0, a1_1, a2_0, a2_1;
1417 double t0, t1, t1_0, t1_1, t2_0, t2_1;
1418 double fn0, fn1, a0, a1, w0, w1, y0, y1;
1419 double z0, z1;
1420 unsigned j0, j1;
1421 int n0, n1;
1422
1423 if ( i > 1 )
1424 {
1425 n1 = (int) ( x1 * invpio2 + half[xsb1] );
1426 fn1 = (double) n1;
1427 n1 &= 3;
1428 a1 = x1 - fn1 * pio2_1;
1429 w1 = fn1 * pio2_2;
1430 x1 = a1 - w1;
1431 y1 = ( a1 - x1 ) - w1;
1432 a1 = x1;
1433 w1 = fn1 * pio2_3 - y1;
1434 x1 = a1 - w1;
1435 y1 = ( a1 - x1 ) - w1;
1436 a1 = x1;
1437 w1 = fn1 * pio2_3t - y1;
1438 x1 = a1 - w1;
1439 y1 = ( a1 - x1 ) - w1;
1440 xsb1 = HI(&x1);
1441 if ( ( xsb1 & ~0x80000000 ) < 0x3fc40000 )
1442 {
1443 j1 = n1 & 1;
1444 x1_or_one[0] = x1;
1445 x1_or_one[2] = -x1;
1446 y1_or_zero[0] = y1;
1447 y1_or_zero[2] = -y1;
1448 z1 = x1 * x1;
1449 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1450 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1451 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1452 *py1 = t1;
1453 n1 = (n1 + 1) & 3;
1454 j1 = (j1 + 1) & 1;
1455 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1456 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1457 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1458 *pc1 = t1;
1459 }
1460 else
1461 {
1462 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1463 HI(&t1) = j1;
1464 LO(&t1) = 0;
1465 x1 = ( x1 - t1 ) + y1;
1466 z1 = x1 * x1;
1467 t1 = z1 * ( qq1 + z1 * qq2 );
1468 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1469 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1470 xsb1 = ( xsb1 >> 30 ) & 2;
1471 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1472 xsb1 |= 1;
1473 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1474 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1475 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1476 *pc1 = a2_1 + t2_1;
1477 t1_1 = a2_1*w1 + a1_1*t1;
1478 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1479 *py1 = a1_1 + t1_1;
1480 }
1481 }
1482 n0 = (int) ( x0 * invpio2 + half[xsb0] );
1483 fn0 = (double) n0;
1484 n0 &= 3;
1485 a0 = x0 - fn0 * pio2_1;
1486 w0 = fn0 * pio2_2;
1487 x0 = a0 - w0;
1488 y0 = ( a0 - x0 ) - w0;
1489 a0 = x0;
1490 w0 = fn0 * pio2_3 - y0;
1491 x0 = a0 - w0;
1492 y0 = ( a0 - x0 ) - w0;
1493 a0 = x0;
1494 w0 = fn0 * pio2_3t - y0;
1495 x0 = a0 - w0;
1496 y0 = ( a0 - x0 ) - w0;
1497 xsb0 = HI(&x0);
1498 if ( ( xsb0 & ~0x80000000 ) < 0x3fc40000 )
1499 {
1500 j0 = n0 & 1;
1501 x0_or_one[0] = x0;
1502 x0_or_one[2] = -x0;
1503 y0_or_zero[0] = y0;
1504 y0_or_zero[2] = -y0;
1505 z0 = x0 * x0;
1506 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1507 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1508 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1509 *py0 = t0;
1510 n0 = (n0 + 1) & 3;
1511 j0 = (j0 + 1) & 1;
1512 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1513 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1514 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1515 *pc0 = t0;
1516 }
1517 else
1518 {
1519 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1520 HI(&t0) = j0;
1521 LO(&t0) = 0;
1522 x0 = ( x0 - t0 ) + y0;
1523 z0 = x0 * x0;
1524 t0 = z0 * ( qq1 + z0 * qq2 );
1525 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1526 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1527 xsb0 = ( xsb0 >> 30 ) & 2;
1528 n0 ^= ( xsb0 & ~( n0 << 1 ) );
1529 xsb0 |= 1;
1530 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1531 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1532 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1533 *pc0 = a2_0 + t2_0;
1534 t1_0 = a2_0*w0 + a1_0*t0;
1535 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1536 *py0 = a1_0 + t1_0;
1537 }
1538 }
1539
1540 if ( biguns ) {
1541 __vlibm_vsincos_big( nsave, xsave, sxsave, ysave, sysave, csave, scsave, 0x413921fb );
1542 }
1543 }