1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include <sys/isa_defs.h>
31
32 #ifdef _LITTLE_ENDIAN
33 #define HI(x) *(1+(int*)x)
34 #define LO(x) *(unsigned*)x
35 #else
36 #define HI(x) *(int*)x
37 #define LO(x) *(1+(unsigned*)x)
38 #endif
39
40 #ifdef __RESTRICT
41 #define restrict _Restrict
42 #else
43 #define restrict
44 #endif
45
46 /*
47 * vsincos.c
48 *
49 * Vector sine and cosine function. Just slight modifications to vcos.c.
50 */
51
52 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
53
54 static const double
55 half[2] = { 0.5, -0.5 },
56 one = 1.0,
57 invpio2 = 0.636619772367581343075535, /* 53 bits of pi/2 */
58 pio2_1 = 1.570796326734125614166, /* first 33 bits of pi/2 */
59 pio2_2 = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
60 pio2_3 = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
61 pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
62 pp1 = -1.666666666605760465276263943134982554676e-0001,
63 pp2 = 8.333261209690963126718376566146180944442e-0003,
64 qq1 = -4.999999999977710986407023955908711557870e-0001,
65 qq2 = 4.166654863857219350645055881018842089580e-0002,
66 poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
67 -4.999999999999931701464060878888294524481e-0001 },
68 poly2[2]= { 8.333333332390951295683993455280336376663e-0003,
69 4.166666666394861917535640593963708222319e-0002 },
70 poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
71 -1.388888552656142867832756687736851681462e-0003 },
72 poly4[2]= { 2.753403624854277237649987622848330351110e-0006,
73 2.478519423681460796618128289454530524759e-0005 };
74
75 /* Don't __ the following; acomp will handle it */
76 extern double fabs( double );
77 extern void __vlibm_vsincos_big( int, double *, int, double *, int, double *, int, int );
78
79 /*
80 * y[i*stridey] := sin( x[i*stridex] ), for i = 0..n.
81 * c[i*stridec] := cos( x[i*stridex] ), for i = 0..n.
82 *
83 * Calls __vlibm_vsincos_big to handle all elts which have abs >~ 1.647e+06.
84 * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
85 *
86 * elts < 2^-27 use the approximation 1.0 ~ cos(x).
87 */
88 void
89 __vsincos( int n, double * restrict x, int stridex,
90 double * restrict y, int stridey,
91 double * restrict c, int stridec )
92 {
93 double x0_or_one[4], x1_or_one[4], x2_or_one[4];
94 double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
95 double x0, x1, x2,
96 *py0, *py1, *py2,
97 *pc0, *pc1, *pc2,
98 *xsave, *ysave, *csave;
99 unsigned hx0, hx1, hx2, xsb0, xsb1, xsb2;
100 int i, biguns, nsave, sxsave, sysave, scsave;
101
102 nsave = n;
103 xsave = x;
104 sxsave = stridex;
105 ysave = y;
106 sysave = stridey;
107 csave = c;
108 scsave = stridec;
109 biguns = 0;
110
111 do /* MAIN LOOP */
112 {
113
114 /* Gotos here so _break_ exits MAIN LOOP. */
115 LOOP0: /* Find first arg in right range. */
116 xsb0 = HI(x); /* get most significant word */
117 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
118 if ( hx0 > 0x3fe921fb ) {
119 /* Too big: arg reduction needed, so leave for second part */
120 biguns = 1;
121 x += stridex;
122 y += stridey;
123 c += stridec;
124 i = 0;
125 if ( --n <= 0 )
126 break;
127 goto LOOP0;
128 }
129 if ( hx0 < 0x3e400000 ) {
130 /* Too small. cos x ~ 1, sin x ~ x. */
131 volatile int v = *x;
132 *c = 1.0;
133 *y = *x;
134 x += stridex;
135 y += stridey;
136 c += stridec;
137 i = 0;
138 if ( --n <= 0 )
139 break;
140 goto LOOP0;
141 }
142 x0 = *x;
143 py0 = y;
144 pc0 = c;
145 x += stridex;
146 y += stridey;
147 c += stridec;
148 i = 1;
149 if ( --n <= 0 )
150 break;
151
152 LOOP1: /* Get second arg, same as above. */
153 xsb1 = HI(x);
154 hx1 = xsb1 & ~0x80000000;
155 if ( hx1 > 0x3fe921fb )
156 {
157 biguns = 1;
158 x += stridex;
159 y += stridey;
160 c += stridec;
161 i = 1;
162 if ( --n <= 0 )
163 break;
164 goto LOOP1;
165 }
166 if ( hx1 < 0x3e400000 )
167 {
168 volatile int v = *x;
169 *c = 1.0;
170 *y = *x;
171 x += stridex;
172 y += stridey;
173 c += stridec;
174 i = 1;
175 if ( --n <= 0 )
176 break;
177 goto LOOP1;
178 }
179 x1 = *x;
180 py1 = y;
181 pc1 = c;
182 x += stridex;
183 y += stridey;
184 c += stridec;
185 i = 2;
186 if ( --n <= 0 )
187 break;
188
189 LOOP2: /* Get third arg, same as above. */
190 xsb2 = HI(x);
191 hx2 = xsb2 & ~0x80000000;
192 if ( hx2 > 0x3fe921fb )
193 {
194 biguns = 1;
195 x += stridex;
196 y += stridey;
197 c += stridec;
198 i = 2;
199 if ( --n <= 0 )
200 break;
201 goto LOOP2;
202 }
203 if ( hx2 < 0x3e400000 )
204 {
205 volatile int v = *x;
206 *c = 1.0;
207 *y = *x;
208 x += stridex;
209 y += stridey;
210 c += stridec;
211 i = 2;
212 if ( --n <= 0 )
213 break;
214 goto LOOP2;
215 }
216 x2 = *x;
217 py2 = y;
218 pc2 = c;
219
220 /*
221 * 0x3fc40000 = 5/32 ~ 0.15625
222 * Get msb after subtraction. Will be 1 only if
223 * hx0 - 5/32 is negative.
224 */
225 i = ( hx2 - 0x3fc40000 ) >> 31;
226 i |= ( ( hx1 - 0x3fc40000 ) >> 30 ) & 2;
227 i |= ( ( hx0 - 0x3fc40000 ) >> 29 ) & 4;
228 switch ( i )
229 {
230 double a1_0, a1_1, a1_2, a2_0, a2_1, a2_2;
231 double w0, w1, w2;
232 double t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2;
233 double z0, z1, z2;
234 unsigned j0, j1, j2;
235
236 case 0: /* All are > 5/32 */
237 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
238 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
239 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
240
241 HI(&t0) = j0;
242 HI(&t1) = j1;
243 HI(&t2) = j2;
244 LO(&t0) = 0;
245 LO(&t1) = 0;
246 LO(&t2) = 0;
247
248 x0 -= t0;
249 x1 -= t1;
250 x2 -= t2;
251
252 z0 = x0 * x0;
253 z1 = x1 * x1;
254 z2 = x2 * x2;
255
256 t0 = z0 * ( qq1 + z0 * qq2 );
257 t1 = z1 * ( qq1 + z1 * qq2 );
258 t2 = z2 * ( qq1 + z2 * qq2 );
259
260 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
261 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
262 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
263
264 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
265 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
266 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
267
268 xsb0 = ( xsb0 >> 30 ) & 2;
269 xsb1 = ( xsb1 >> 30 ) & 2;
270 xsb2 = ( xsb2 >> 30 ) & 2;
271
272 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
273 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
274 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
275
276 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
277 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
278 a2_2 = __vlibm_TBL_sincos_hi[j2+1];
279 /* cos_lo(t) */
280 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
281 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
282 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
283
284 *pc0 = a2_0 + t2_0;
285 *pc1 = a2_1 + t2_1;
286 *pc2 = a2_2 + t2_2;
287
288 t1_0 = a2_0*w0 + a1_0*t0;
289 t1_1 = a2_1*w1 + a1_1*t1;
290 t1_2 = a2_2*w2 + a1_2*t2;
291
292 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
293 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
294 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
295
296 *py0 = a1_0 + t1_0;
297 *py1 = a1_1 + t1_1;
298 *py2 = a1_2 + t1_2;
299
300 break;
301
302 case 1:
303 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
304 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
305 HI(&t0) = j0;
306 HI(&t1) = j1;
307 LO(&t0) = 0;
308 LO(&t1) = 0;
309 x0 -= t0;
310 x1 -= t1;
311 z0 = x0 * x0;
312 z1 = x1 * x1;
313 z2 = x2 * x2;
314 t0 = z0 * ( qq1 + z0 * qq2 );
315 t1 = z1 * ( qq1 + z1 * qq2 );
316 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
317 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
318 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
319 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
320 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
321 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
322 xsb0 = ( xsb0 >> 30 ) & 2;
323 xsb1 = ( xsb1 >> 30 ) & 2;
324
325 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
326 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
327
328 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
329 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
330 /* cos_lo(t) */
331 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
332 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
333
334 *pc0 = a2_0 + t2_0;
335 *pc1 = a2_1 + t2_1;
336 *pc2 = one + t2;
337
338 t1_0 = a2_0*w0 + a1_0*t0;
339 t1_1 = a2_1*w1 + a1_1*t1;
340 t2 = z2 * ( poly3[0] + z2 * poly4[0] );
341
342 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
343 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
344 t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
345
346 *py0 = a1_0 + t1_0;
347 *py1 = a1_1 + t1_1;
348 t2 = x2 + x2 * t2;
349 *py2 = t2;
350
351 break;
352
353 case 2:
354 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
355 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
356 HI(&t0) = j0;
357 HI(&t2) = j2;
358 LO(&t0) = 0;
359 LO(&t2) = 0;
360 x0 -= t0;
361 x2 -= t2;
362 z0 = x0 * x0;
363 z1 = x1 * x1;
364 z2 = x2 * x2;
365 t0 = z0 * ( qq1 + z0 * qq2 );
366 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
367 t2 = z2 * ( qq1 + z2 * qq2 );
368 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
369 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
370 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
371 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
372 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
373 xsb0 = ( xsb0 >> 30 ) & 2;
374 xsb2 = ( xsb2 >> 30 ) & 2;
375
376 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
377 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
378
379 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
380 a2_2 = __vlibm_TBL_sincos_hi[j2+1];
381 /* cos_lo(t) */
382 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
383 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
384
385 *pc0 = a2_0 + t2_0;
386 *pc1 = one + t1;
387 *pc2 = a2_2 + t2_2;
388
389 t1_0 = a2_0*w0 + a1_0*t0;
390 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
391 t1_2 = a2_2*w2 + a1_2*t2;
392
393 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
394 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
395 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
396
397 *py0 = a1_0 + t1_0;
398 t1 = x1 + x1 * t1;
399 *py1 = t1;
400 *py2 = a1_2 + t1_2;
401
402 break;
403
404 case 3:
405 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
406 HI(&t0) = j0;
407 LO(&t0) = 0;
408 x0 -= t0;
409 z0 = x0 * x0;
410 z1 = x1 * x1;
411 z2 = x2 * x2;
412 t0 = z0 * ( qq1 + z0 * qq2 );
413 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
414 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
415 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
416 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
417 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
418 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
419 xsb0 = ( xsb0 >> 30 ) & 2;
420 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
421
422 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
423
424 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
425
426 *pc0 = a2_0 + t2_0;
427 *pc1 = one + t1;
428 *pc2 = one + t2;
429
430 t1_0 = a2_0*w0 + a1_0*t0;
431 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
432 t2 = z2 * ( poly3[0] + z2 * poly4[0] );
433
434 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
435 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
436 t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
437
438 *py0 = a1_0 + t1_0;
439 t1 = x1 + x1 * t1;
440 *py1 = t1;
441 t2 = x2 + x2 * t2;
442 *py2 = t2;
443
444 break;
445
446 case 4:
447 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
448 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
449 HI(&t1) = j1;
450 HI(&t2) = j2;
451 LO(&t1) = 0;
452 LO(&t2) = 0;
453 x1 -= t1;
454 x2 -= t2;
455 z0 = x0 * x0;
456 z1 = x1 * x1;
457 z2 = x2 * x2;
458 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
459 t1 = z1 * ( qq1 + z1 * qq2 );
460 t2 = z2 * ( qq1 + z2 * qq2 );
461 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
462 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
463 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
464 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
465 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
466 xsb1 = ( xsb1 >> 30 ) & 2;
467 xsb2 = ( xsb2 >> 30 ) & 2;
468
469 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
470 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
471
472 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
473 a2_2 = __vlibm_TBL_sincos_hi[j2+1];
474 /* cos_lo(t) */
475 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
476 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
477
478 *pc0 = one + t0;
479 *pc1 = a2_1 + t2_1;
480 *pc2 = a2_2 + t2_2;
481
482 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
483 t1_1 = a2_1*w1 + a1_1*t1;
484 t1_2 = a2_2*w2 + a1_2*t2;
485
486 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
487 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
488 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
489
490 t0 = x0 + x0 * t0;
491 *py0 = t0;
492 *py1 = a1_1 + t1_1;
493 *py2 = a1_2 + t1_2;
494
495 break;
496
497 case 5:
498 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
499 HI(&t1) = j1;
500 LO(&t1) = 0;
501 x1 -= t1;
502 z0 = x0 * x0;
503 z1 = x1 * x1;
504 z2 = x2 * x2;
505 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
506 t1 = z1 * ( qq1 + z1 * qq2 );
507 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
508 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
509 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
510 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
511 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
512 xsb1 = ( xsb1 >> 30 ) & 2;
513
514 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
515
516 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
517
518 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
519
520 *pc0 = one + t0;
521 *pc1 = a2_1 + t2_1;
522 *pc2 = one + t2;
523
524 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
525 t1_1 = a2_1*w1 + a1_1*t1;
526 t2 = z2 * ( poly3[0] + z2 * poly4[0] );
527
528 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
529 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
530 t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
531
532 t0 = x0 + x0 * t0;
533 *py0 = t0;
534 *py1 = a1_1 + t1_1;
535 t2 = x2 + x2 * t2;
536 *py2 = t2;
537
538 break;
539
540 case 6:
541 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
542 HI(&t2) = j2;
543 LO(&t2) = 0;
544 x2 -= t2;
545 z0 = x0 * x0;
546 z1 = x1 * x1;
547 z2 = x2 * x2;
548 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
549 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
550 t2 = z2 * ( qq1 + z2 * qq2 );
551 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
552 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
553 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
554 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
555 xsb2 = ( xsb2 >> 30 ) & 2;
556 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
557
558 a2_2 = __vlibm_TBL_sincos_hi[j2+1];
559
560 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
561
562 *pc0 = one + t0;
563 *pc1 = one + t1;
564 *pc2 = a2_2 + t2_2;
565
566 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
567 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
568 t1_2 = a2_2*w2 + a1_2*t2;
569
570 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
571 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
572 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
573
574 t0 = x0 + x0 * t0;
575 *py0 = t0;
576 t1 = x1 + x1 * t1;
577 *py1 = t1;
578 *py2 = a1_2 + t1_2;
579
580 break;
581
582 case 7: /* All are < 5/32 */
583 z0 = x0 * x0;
584 z1 = x1 * x1;
585 z2 = x2 * x2;
586 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
587 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
588 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
589 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
590 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
591 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
592 *pc0 = one + t0;
593 *pc1 = one + t1;
594 *pc2 = one + t2;
595 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
596 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
597 t2 = z2 * ( poly3[0] + z2 * poly4[0] );
598 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
599 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
600 t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
601 t0 = x0 + x0 * t0;
602 t1 = x1 + x1 * t1;
603 t2 = x2 + x2 * t2;
604 *py0 = t0;
605 *py1 = t1;
606 *py2 = t2;
607 break;
608 }
609
610 x += stridex;
611 y += stridey;
612 c += stridec;
613 i = 0;
614 } while ( --n > 0 ); /* END MAIN LOOP */
615
616 /*
617 * CLEAN UP last 0, 1, or 2 elts.
618 */
619 if ( i > 0 ) /* Clean up elts at tail. i < 3. */
620 {
621 double a1_0, a1_1, a2_0, a2_1;
622 double w0, w1;
623 double t0, t1, t1_0, t1_1, t2_0, t2_1;
624 double z0, z1;
625 unsigned j0, j1;
626
627 if ( i > 1 )
628 {
629 if ( hx1 < 0x3fc40000 )
630 {
631 z1 = x1 * x1;
632 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
633 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
634 t1 = one + t1;
635 *pc1 = t1;
636 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
637 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
638 t1 = x1 + x1 * t1;
639 *py1 = t1;
640 }
641 else
642 {
643 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
644 HI(&t1) = j1;
645 LO(&t1) = 0;
646 x1 -= t1;
647 z1 = x1 * x1;
648 t1 = z1 * ( qq1 + z1 * qq2 );
649 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
650 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
651 xsb1 = ( xsb1 >> 30 ) & 2;
652 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
653 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
654 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
655 *pc1 = a2_1 + t2_1;
656 t1_1 = a2_1*w1 + a1_1*t1;
657 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
658 *py1 = a1_1 + t1_1;
659 }
660 }
661 if ( hx0 < 0x3fc40000 )
662 {
663 z0 = x0 * x0;
664 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
665 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
666 t0 = one + t0;
667 *pc0 = t0;
668 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
669 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
670 t0 = x0 + x0 * t0;
671 *py0 = t0;
672 }
673 else
674 {
675 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
676 HI(&t0) = j0;
677 LO(&t0) = 0;
678 x0 -= t0;
679 z0 = x0 * x0;
680 t0 = z0 * ( qq1 + z0 * qq2 );
681 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
682 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
683 xsb0 = ( xsb0 >> 30 ) & 2;
684 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
685 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
686 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
687 *pc0 = a2_0 + t2_0;
688 t1_0 = a2_0*w0 + a1_0*t0;
689 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
690 *py0 = a1_0 + t1_0;
691 }
692 } /* END CLEAN UP */
693
694 if ( !biguns )
695 return;
696
697 /*
698 * Take care of BIGUNS.
699 */
700 n = nsave;
701 x = xsave;
702 stridex = sxsave;
703 y = ysave;
704 stridey = sysave;
705 c = csave;
706 stridec = scsave;
707 biguns = 0;
708
709 x0_or_one[1] = 1.0;
710 x1_or_one[1] = 1.0;
711 x2_or_one[1] = 1.0;
712 x0_or_one[3] = -1.0;
713 x1_or_one[3] = -1.0;
714 x2_or_one[3] = -1.0;
715 y0_or_zero[1] = 0.0;
716 y1_or_zero[1] = 0.0;
717 y2_or_zero[1] = 0.0;
718 y0_or_zero[3] = 0.0;
719 y1_or_zero[3] = 0.0;
720 y2_or_zero[3] = 0.0;
721
722 do
723 {
724 double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
725 unsigned hx;
726 int n0, n1, n2;
727
728 /*
729 * Find 3 more to work on: Not already done, not too big.
730 */
731 loop0:
732 hx = HI(x);
733 xsb0 = hx >> 31;
734 hx &= ~0x80000000;
735 if ( hx <= 0x3fe921fb ) /* Done above. */
736 {
737 x += stridex;
738 y += stridey;
739 c += stridec;
740 i = 0;
741 if ( --n <= 0 )
742 break;
743 goto loop0;
744 }
745 if ( hx > 0x413921fb ) /* (1.6471e+06) Too big: leave it. */
746 {
747 if ( hx >= 0x7ff00000 ) /* Inf or NaN */
748 {
749 x0 = *x;
750 *y = x0 - x0;
751 *c = x0 - x0;
752 }
753 else {
754 biguns = 1;
755 }
756 x += stridex;
757 y += stridey;
758 c += stridec;
759 i = 0;
760 if ( --n <= 0 )
761 break;
762 goto loop0;
763 }
764 x0 = *x;
765 py0 = y;
766 pc0 = c;
767 x += stridex;
768 y += stridey;
769 c += stridec;
770 i = 1;
771 if ( --n <= 0 )
772 break;
773
774 loop1:
775 hx = HI(x);
776 xsb1 = hx >> 31;
777 hx &= ~0x80000000;
778 if ( hx <= 0x3fe921fb )
779 {
780 x += stridex;
781 y += stridey;
782 c += stridec;
783 i = 1;
784 if ( --n <= 0 )
785 break;
786 goto loop1;
787 }
788 if ( hx > 0x413921fb )
789 {
790 if ( hx >= 0x7ff00000 )
791 {
792 x1 = *x;
793 *y = x1 - x1;
794 *c = x1 - x1;
795 }
796 else {
797 biguns = 1;
798 }
799 x += stridex;
800 y += stridey;
801 c += stridec;
802 i = 1;
803 if ( --n <= 0 )
804 break;
805 goto loop1;
806 }
807 x1 = *x;
808 py1 = y;
809 pc1 = c;
810 x += stridex;
811 y += stridey;
812 c += stridec;
813 i = 2;
814 if ( --n <= 0 )
815 break;
816
817 loop2:
818 hx = HI(x);
819 xsb2 = hx >> 31;
820 hx &= ~0x80000000;
821 if ( hx <= 0x3fe921fb )
822 {
823 x += stridex;
824 y += stridey;
825 c += stridec;
826 i = 2;
827 if ( --n <= 0 )
828 break;
829 goto loop2;
830 }
831 if ( hx > 0x413921fb )
832 {
833 if ( hx >= 0x7ff00000 )
834 {
835 x2 = *x;
836 *y = x2 - x2;
837 *c = x2 - x2;
838 }
839 else {
840 biguns = 1;
841 }
842 x += stridex;
843 y += stridey;
844 c += stridec;
845 i = 2;
846 if ( --n <= 0 )
847 break;
848 goto loop2;
849 }
850 x2 = *x;
851 py2 = y;
852 pc2 = c;
853
854 n0 = (int) ( x0 * invpio2 + half[xsb0] );
855 n1 = (int) ( x1 * invpio2 + half[xsb1] );
856 n2 = (int) ( x2 * invpio2 + half[xsb2] );
857 fn0 = (double) n0;
858 fn1 = (double) n1;
859 fn2 = (double) n2;
860 n0 &= 3;
861 n1 &= 3;
862 n2 &= 3;
863 a0 = x0 - fn0 * pio2_1;
864 a1 = x1 - fn1 * pio2_1;
865 a2 = x2 - fn2 * pio2_1;
866 w0 = fn0 * pio2_2;
867 w1 = fn1 * pio2_2;
868 w2 = fn2 * pio2_2;
869 x0 = a0 - w0;
870 x1 = a1 - w1;
871 x2 = a2 - w2;
872 y0 = ( a0 - x0 ) - w0;
873 y1 = ( a1 - x1 ) - w1;
874 y2 = ( a2 - x2 ) - w2;
875 a0 = x0;
876 a1 = x1;
877 a2 = x2;
878 w0 = fn0 * pio2_3 - y0;
879 w1 = fn1 * pio2_3 - y1;
880 w2 = fn2 * pio2_3 - y2;
881 x0 = a0 - w0;
882 x1 = a1 - w1;
883 x2 = a2 - w2;
884 y0 = ( a0 - x0 ) - w0;
885 y1 = ( a1 - x1 ) - w1;
886 y2 = ( a2 - x2 ) - w2;
887 a0 = x0;
888 a1 = x1;
889 a2 = x2;
890 w0 = fn0 * pio2_3t - y0;
891 w1 = fn1 * pio2_3t - y1;
892 w2 = fn2 * pio2_3t - y2;
893 x0 = a0 - w0;
894 x1 = a1 - w1;
895 x2 = a2 - w2;
896 y0 = ( a0 - x0 ) - w0;
897 y1 = ( a1 - x1 ) - w1;
898 y2 = ( a2 - x2 ) - w2;
899 xsb2 = HI(&x2);
900 i = ( ( xsb2 & ~0x80000000 ) - 0x3fc40000 ) >> 31;
901 xsb1 = HI(&x1);
902 i |= ( ( ( xsb1 & ~0x80000000 ) - 0x3fc40000 ) >> 30 ) & 2;
903 xsb0 = HI(&x0);
904 i |= ( ( ( xsb0 & ~0x80000000 ) - 0x3fc40000 ) >> 29 ) & 4;
905 switch ( i )
906 {
907 double a1_0, a1_1, a1_2, a2_0, a2_1, a2_2;
908 double t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2;
909 double z0, z1, z2;
910 unsigned j0, j1, j2;
911
912 case 0:
913 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
914 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
915 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
916 HI(&t0) = j0;
917 HI(&t1) = j1;
918 HI(&t2) = j2;
919 LO(&t0) = 0;
920 LO(&t1) = 0;
921 LO(&t2) = 0;
922 x0 = ( x0 - t0 ) + y0;
923 x1 = ( x1 - t1 ) + y1;
924 x2 = ( x2 - t2 ) + y2;
925 z0 = x0 * x0;
926 z1 = x1 * x1;
927 z2 = x2 * x2;
928 t0 = z0 * ( qq1 + z0 * qq2 );
929 t1 = z1 * ( qq1 + z1 * qq2 );
930 t2 = z2 * ( qq1 + z2 * qq2 );
931 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
932 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
933 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
934 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
935 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
936 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
937 xsb0 = ( xsb0 >> 30 ) & 2;
938 xsb1 = ( xsb1 >> 30 ) & 2;
939 xsb2 = ( xsb2 >> 30 ) & 2;
940 n0 ^= ( xsb0 & ~( n0 << 1 ) );
941 n1 ^= ( xsb1 & ~( n1 << 1 ) );
942 n2 ^= ( xsb2 & ~( n2 << 1 ) );
943 xsb0 |= 1;
944 xsb1 |= 1;
945 xsb2 |= 1;
946
947 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
948 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
949 a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
950
951 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
952 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
953 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
954
955 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
956 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
957 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
958
959 w0 *= a2_0;
960 w1 *= a2_1;
961 w2 *= a2_2;
962
963 *pc0 = a2_0 + t2_0;
964 *pc1 = a2_1 + t2_1;
965 *pc2 = a2_2 + t2_2;
966
967 t1_0 = w0 + a1_0*t0;
968 t1_1 = w1 + a1_1*t1;
969 t1_2 = w2 + a1_2*t2;
970
971 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
972 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
973 t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
974
975 *py0 = a1_0 + t1_0;
976 *py1 = a1_1 + t1_1;
977 *py2 = a1_2 + t1_2;
978
979 break;
980
981 case 1:
982 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
983 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
984 j2 = n2 & 1;
985 HI(&t0) = j0;
986 HI(&t1) = j1;
987 LO(&t0) = 0;
988 LO(&t1) = 0;
989 x2_or_one[0] = x2;
990 x2_or_one[2] = -x2;
991 x0 = ( x0 - t0 ) + y0;
992 x1 = ( x1 - t1 ) + y1;
993 y2_or_zero[0] = y2;
994 y2_or_zero[2] = -y2;
995 z0 = x0 * x0;
996 z1 = x1 * x1;
997 z2 = x2 * x2;
998 t0 = z0 * ( qq1 + z0 * qq2 );
999 t1 = z1 * ( qq1 + z1 * qq2 );
1000 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1001 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1002 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1003 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1004 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1005 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1006 xsb0 = ( xsb0 >> 30 ) & 2;
1007 xsb1 = ( xsb1 >> 30 ) & 2;
1008 n0 ^= ( xsb0 & ~( n0 << 1 ) );
1009 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1010 xsb0 |= 1;
1011 xsb1 |= 1;
1012 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1013 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1014
1015 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1016 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1017
1018 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1019 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1020 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1021
1022 *pc0 = a2_0 + t2_0;
1023 *pc1 = a2_1 + t2_1;
1024 *py2 = t2;
1025
1026 n2 = (n2 + 1) & 3;
1027 j2 = (j2 + 1) & 1;
1028 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1029
1030 t1_0 = a2_0*w0 + a1_0*t0;
1031 t1_1 = a2_1*w1 + a1_1*t1;
1032 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1033
1034 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1035 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1036 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1037
1038 *py0 = a1_0 + t1_0;
1039 *py1 = a1_1 + t1_1;
1040 *pc2 = t2;
1041
1042 break;
1043
1044 case 2:
1045 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1046 j1 = n1 & 1;
1047 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
1048 HI(&t0) = j0;
1049 HI(&t2) = j2;
1050 LO(&t0) = 0;
1051 LO(&t2) = 0;
1052 x1_or_one[0] = x1;
1053 x1_or_one[2] = -x1;
1054 x0 = ( x0 - t0 ) + y0;
1055 y1_or_zero[0] = y1;
1056 y1_or_zero[2] = -y1;
1057 x2 = ( x2 - t2 ) + y2;
1058 z0 = x0 * x0;
1059 z1 = x1 * x1;
1060 z2 = x2 * x2;
1061 t0 = z0 * ( qq1 + z0 * qq2 );
1062 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1063 t2 = z2 * ( qq1 + z2 * qq2 );
1064 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1065 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1066 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
1067 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1068 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1069 xsb0 = ( xsb0 >> 30 ) & 2;
1070 xsb2 = ( xsb2 >> 30 ) & 2;
1071 n0 ^= ( xsb0 & ~( n0 << 1 ) );
1072 n2 ^= ( xsb2 & ~( n2 << 1 ) );
1073 xsb0 |= 1;
1074 xsb2 |= 1;
1075
1076 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1077 a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1078
1079 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1080 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1081
1082 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1083 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1084 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
1085
1086 *pc0 = a2_0 + t2_0;
1087 *py1 = t1;
1088 *pc2 = a2_2 + t2_2;
1089
1090 n1 = (n1 + 1) & 3;
1091 j1 = (j1 + 1) & 1;
1092 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1093
1094 t1_0 = a2_0*w0 + a1_0*t0;
1095 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1096 t1_2 = a2_2*w2 + a1_2*t2;
1097
1098 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1099 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1100 t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1101
1102 *py0 = a1_0 + t1_0;
1103 *pc1 = t1;
1104 *py2 = a1_2 + t1_2;
1105
1106 break;
1107
1108 case 3:
1109 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1110 j1 = n1 & 1;
1111 j2 = n2 & 1;
1112 HI(&t0) = j0;
1113 LO(&t0) = 0;
1114 x1_or_one[0] = x1;
1115 x1_or_one[2] = -x1;
1116 x2_or_one[0] = x2;
1117 x2_or_one[2] = -x2;
1118 x0 = ( x0 - t0 ) + y0;
1119 y1_or_zero[0] = y1;
1120 y1_or_zero[2] = -y1;
1121 y2_or_zero[0] = y2;
1122 y2_or_zero[2] = -y2;
1123 z0 = x0 * x0;
1124 z1 = x1 * x1;
1125 z2 = x2 * x2;
1126 t0 = z0 * ( qq1 + z0 * qq2 );
1127 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1128 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1129 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1130 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1131 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1132 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1133 xsb0 = ( xsb0 >> 30 ) & 2;
1134 n0 ^= ( xsb0 & ~( n0 << 1 ) );
1135 xsb0 |= 1;
1136
1137 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1138 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1139
1140 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1141 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1142 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1143
1144 *pc0 = a2_0 + t2_0;
1145 *py1 = t1;
1146 *py2 = t2;
1147
1148 n1 = (n1 + 1) & 3;
1149 n2 = (n2 + 1) & 3;
1150 j1 = (j1 + 1) & 1;
1151 j2 = (j2 + 1) & 1;
1152
1153 t1_0 = a2_0*w0 + a1_0*t0;
1154 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1155 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1156
1157 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1158 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1159 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1160
1161 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1162 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1163
1164 *py0 = a1_0 + t1_0;
1165 *pc1 = t1;
1166 *pc2 = t2;
1167
1168 break;
1169
1170 case 4:
1171 j0 = n0 & 1;
1172 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1173 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
1174 HI(&t1) = j1;
1175 HI(&t2) = j2;
1176 LO(&t1) = 0;
1177 LO(&t2) = 0;
1178 x0_or_one[0] = x0;
1179 x0_or_one[2] = -x0;
1180 y0_or_zero[0] = y0;
1181 y0_or_zero[2] = -y0;
1182 x1 = ( x1 - t1 ) + y1;
1183 x2 = ( x2 - t2 ) + y2;
1184 z0 = x0 * x0;
1185 z1 = x1 * x1;
1186 z2 = x2 * x2;
1187 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1188 t1 = z1 * ( qq1 + z1 * qq2 );
1189 t2 = z2 * ( qq1 + z2 * qq2 );
1190 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1191 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1192 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
1193 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1194 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1195 xsb1 = ( xsb1 >> 30 ) & 2;
1196 xsb2 = ( xsb2 >> 30 ) & 2;
1197 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1198 n2 ^= ( xsb2 & ~( n2 << 1 ) );
1199 xsb1 |= 1;
1200 xsb2 |= 1;
1201
1202 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1203 a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1204
1205 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1206 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1207
1208 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1209 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1210 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
1211
1212 *py0 = t0;
1213 *pc1 = a2_1 + t2_1;
1214 *pc2 = a2_2 + t2_2;
1215
1216 n0 = (n0 + 1) & 3;
1217 j0 = (j0 + 1) & 1;
1218 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1219
1220 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1221 t1_1 = a2_1*w1 + a1_1*t1;
1222 t1_2 = a2_2*w2 + a1_2*t2;
1223
1224 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1225 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1226 t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1227
1228 *py1 = a1_1 + t1_1;
1229 *py2 = a1_2 + t1_2;
1230 *pc0 = t0;
1231
1232 break;
1233
1234 case 5:
1235 j0 = n0 & 1;
1236 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1237 j2 = n2 & 1;
1238 HI(&t1) = j1;
1239 LO(&t1) = 0;
1240 x0_or_one[0] = x0;
1241 x0_or_one[2] = -x0;
1242 x2_or_one[0] = x2;
1243 x2_or_one[2] = -x2;
1244 y0_or_zero[0] = y0;
1245 y0_or_zero[2] = -y0;
1246 x1 = ( x1 - t1 ) + y1;
1247 y2_or_zero[0] = y2;
1248 y2_or_zero[2] = -y2;
1249 z0 = x0 * x0;
1250 z1 = x1 * x1;
1251 z2 = x2 * x2;
1252 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1253 t1 = z1 * ( qq1 + z1 * qq2 );
1254 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1255 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1256 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1257 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1258 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1259 xsb1 = ( xsb1 >> 30 ) & 2;
1260 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1261 xsb1 |= 1;
1262
1263 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1264 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1265
1266 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1267 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1268 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1269
1270 *py0 = t0;
1271 *pc1 = a2_1 + t2_1;
1272 *py2 = t2;
1273
1274 n0 = (n0 + 1) & 3;
1275 n2 = (n2 + 1) & 3;
1276 j0 = (j0 + 1) & 1;
1277 j2 = (j2 + 1) & 1;
1278
1279 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1280 t1_1 = a2_1*w1 + a1_1*t1;
1281 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1282
1283 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1284 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1285 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1286
1287 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1288 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1289
1290 *pc0 = t0;
1291 *py1 = a1_1 + t1_1;
1292 *pc2 = t2;
1293
1294 break;
1295
1296 case 6:
1297 j0 = n0 & 1;
1298 j1 = n1 & 1;
1299 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
1300 HI(&t2) = j2;
1301 LO(&t2) = 0;
1302 x0_or_one[0] = x0;
1303 x0_or_one[2] = -x0;
1304 x1_or_one[0] = x1;
1305 x1_or_one[2] = -x1;
1306 y0_or_zero[0] = y0;
1307 y0_or_zero[2] = -y0;
1308 y1_or_zero[0] = y1;
1309 y1_or_zero[2] = -y1;
1310 x2 = ( x2 - t2 ) + y2;
1311 z0 = x0 * x0;
1312 z1 = x1 * x1;
1313 z2 = x2 * x2;
1314 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1315 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1316 t2 = z2 * ( qq1 + z2 * qq2 );
1317 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1318 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1319 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
1320 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1321 xsb2 = ( xsb2 >> 30 ) & 2;
1322 n2 ^= ( xsb2 & ~( n2 << 1 ) );
1323 xsb2 |= 1;
1324
1325 a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1326 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1327
1328 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1329 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1330 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
1331
1332 *py0 = t0;
1333 *py1 = t1;
1334 *pc2 = a2_2 + t2_2;
1335
1336 n0 = (n0 + 1) & 3;
1337 n1 = (n1 + 1) & 3;
1338 j0 = (j0 + 1) & 1;
1339 j1 = (j1 + 1) & 1;
1340
1341 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1342 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1343 t1_2 = a2_2*w2 + a1_2*t2;
1344
1345 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1346 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1347 t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1348
1349 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1350 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1351
1352 *pc0 = t0;
1353 *pc1 = t1;
1354 *py2 = a1_2 + t1_2;
1355
1356 break;
1357
1358 case 7:
1359 j0 = n0 & 1;
1360 j1 = n1 & 1;
1361 j2 = n2 & 1;
1362 x0_or_one[0] = x0;
1363 x0_or_one[2] = -x0;
1364 x1_or_one[0] = x1;
1365 x1_or_one[2] = -x1;
1366 x2_or_one[0] = x2;
1367 x2_or_one[2] = -x2;
1368 y0_or_zero[0] = y0;
1369 y0_or_zero[2] = -y0;
1370 y1_or_zero[0] = y1;
1371 y1_or_zero[2] = -y1;
1372 y2_or_zero[0] = y2;
1373 y2_or_zero[2] = -y2;
1374 z0 = x0 * x0;
1375 z1 = x1 * x1;
1376 z2 = x2 * x2;
1377 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1378 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1379 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1380 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1381 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1382 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1383 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1384 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1385 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1386 *py0 = t0;
1387 *py1 = t1;
1388 *py2 = t2;
1389
1390 n0 = (n0 + 1) & 3;
1391 n1 = (n1 + 1) & 3;
1392 n2 = (n2 + 1) & 3;
1393 j0 = (j0 + 1) & 1;
1394 j1 = (j1 + 1) & 1;
1395 j2 = (j2 + 1) & 1;
1396 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1397 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1398 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1399 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1400 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1401 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1402 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1403 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1404 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1405 *pc0 = t0;
1406 *pc1 = t1;
1407 *pc2 = t2;
1408 break;
1409 }
1410
1411 x += stridex;
1412 y += stridey;
1413 c += stridec;
1414 i = 0;
1415 } while ( --n > 0 );
1416
1417 if ( i > 0 )
1418 {
1419 double a1_0, a1_1, a2_0, a2_1;
1420 double t0, t1, t1_0, t1_1, t2_0, t2_1;
1421 double fn0, fn1, a0, a1, w0, w1, y0, y1;
1422 double z0, z1;
1423 unsigned j0, j1;
1424 int n0, n1;
1425
1426 if ( i > 1 )
1427 {
1428 n1 = (int) ( x1 * invpio2 + half[xsb1] );
1429 fn1 = (double) n1;
1430 n1 &= 3;
1431 a1 = x1 - fn1 * pio2_1;
1432 w1 = fn1 * pio2_2;
1433 x1 = a1 - w1;
1434 y1 = ( a1 - x1 ) - w1;
1435 a1 = x1;
1436 w1 = fn1 * pio2_3 - y1;
1437 x1 = a1 - w1;
1438 y1 = ( a1 - x1 ) - w1;
1439 a1 = x1;
1440 w1 = fn1 * pio2_3t - y1;
1441 x1 = a1 - w1;
1442 y1 = ( a1 - x1 ) - w1;
1443 xsb1 = HI(&x1);
1444 if ( ( xsb1 & ~0x80000000 ) < 0x3fc40000 )
1445 {
1446 j1 = n1 & 1;
1447 x1_or_one[0] = x1;
1448 x1_or_one[2] = -x1;
1449 y1_or_zero[0] = y1;
1450 y1_or_zero[2] = -y1;
1451 z1 = x1 * x1;
1452 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1453 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1454 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1455 *py1 = t1;
1456 n1 = (n1 + 1) & 3;
1457 j1 = (j1 + 1) & 1;
1458 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1459 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1460 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1461 *pc1 = t1;
1462 }
1463 else
1464 {
1465 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1466 HI(&t1) = j1;
1467 LO(&t1) = 0;
1468 x1 = ( x1 - t1 ) + y1;
1469 z1 = x1 * x1;
1470 t1 = z1 * ( qq1 + z1 * qq2 );
1471 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1472 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1473 xsb1 = ( xsb1 >> 30 ) & 2;
1474 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1475 xsb1 |= 1;
1476 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1477 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1478 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1479 *pc1 = a2_1 + t2_1;
1480 t1_1 = a2_1*w1 + a1_1*t1;
1481 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1482 *py1 = a1_1 + t1_1;
1483 }
1484 }
1485 n0 = (int) ( x0 * invpio2 + half[xsb0] );
1486 fn0 = (double) n0;
1487 n0 &= 3;
1488 a0 = x0 - fn0 * pio2_1;
1489 w0 = fn0 * pio2_2;
1490 x0 = a0 - w0;
1491 y0 = ( a0 - x0 ) - w0;
1492 a0 = x0;
1493 w0 = fn0 * pio2_3 - y0;
1494 x0 = a0 - w0;
1495 y0 = ( a0 - x0 ) - w0;
1496 a0 = x0;
1497 w0 = fn0 * pio2_3t - y0;
1498 x0 = a0 - w0;
1499 y0 = ( a0 - x0 ) - w0;
1500 xsb0 = HI(&x0);
1501 if ( ( xsb0 & ~0x80000000 ) < 0x3fc40000 )
1502 {
1503 j0 = n0 & 1;
1504 x0_or_one[0] = x0;
1505 x0_or_one[2] = -x0;
1506 y0_or_zero[0] = y0;
1507 y0_or_zero[2] = -y0;
1508 z0 = x0 * x0;
1509 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1510 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1511 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1512 *py0 = t0;
1513 n0 = (n0 + 1) & 3;
1514 j0 = (j0 + 1) & 1;
1515 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1516 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1517 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1518 *pc0 = t0;
1519 }
1520 else
1521 {
1522 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1523 HI(&t0) = j0;
1524 LO(&t0) = 0;
1525 x0 = ( x0 - t0 ) + y0;
1526 z0 = x0 * x0;
1527 t0 = z0 * ( qq1 + z0 * qq2 );
1528 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1529 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1530 xsb0 = ( xsb0 >> 30 ) & 2;
1531 n0 ^= ( xsb0 & ~( n0 << 1 ) );
1532 xsb0 |= 1;
1533 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1534 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1535 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1536 *pc0 = a2_0 + t2_0;
1537 t1_0 = a2_0*w0 + a1_0*t0;
1538 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1539 *py0 = a1_0 + t1_0;
1540 }
1541 }
1542
1543 if ( biguns ) {
1544 __vlibm_vsincos_big( nsave, xsave, sxsave, ysave, sysave, csave, scsave, 0x413921fb );
1545 }
1546 }