Print this page
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libmvec/common/__vsincos.c
+++ new/usr/src/lib/libmvec/common/__vsincos.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
↓ open down ↓ |
20 lines elided |
↑ open up ↑ |
21 21
22 22 /*
23 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 27 * Use is subject to license terms.
28 28 */
29 29
30 30 #include <sys/isa_defs.h>
31 +#include <sys/ccompile.h>
31 32
32 33 #ifdef _LITTLE_ENDIAN
33 34 #define HI(x) *(1+(int*)x)
34 35 #define LO(x) *(unsigned*)x
35 36 #else
36 37 #define HI(x) *(int*)x
37 38 #define LO(x) *(1+(unsigned*)x)
38 39 #endif
39 40
40 41 #ifdef __RESTRICT
41 42 #define restrict _Restrict
42 43 #else
43 44 #define restrict
44 45 #endif
45 46
46 47 /*
47 48 * vsincos.c
48 49 *
49 50 * Vector sine and cosine function. Just slight modifications to vcos.c.
50 51 */
51 52
52 53 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
53 54
54 55 static const double
55 56 half[2] = { 0.5, -0.5 },
56 57 one = 1.0,
57 58 invpio2 = 0.636619772367581343075535, /* 53 bits of pi/2 */
58 59 pio2_1 = 1.570796326734125614166, /* first 33 bits of pi/2 */
59 60 pio2_2 = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
60 61 pio2_3 = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
61 62 pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
62 63 pp1 = -1.666666666605760465276263943134982554676e-0001,
63 64 pp2 = 8.333261209690963126718376566146180944442e-0003,
64 65 qq1 = -4.999999999977710986407023955908711557870e-0001,
65 66 qq2 = 4.166654863857219350645055881018842089580e-0002,
66 67 poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
67 68 -4.999999999999931701464060878888294524481e-0001 },
68 69 poly2[2]= { 8.333333332390951295683993455280336376663e-0003,
69 70 4.166666666394861917535640593963708222319e-0002 },
70 71 poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
71 72 -1.388888552656142867832756687736851681462e-0003 },
72 73 poly4[2]= { 2.753403624854277237649987622848330351110e-0006,
73 74 2.478519423681460796618128289454530524759e-0005 };
74 75
75 76 /* Don't __ the following; acomp will handle it */
76 77 extern double fabs( double );
77 78 extern void __vlibm_vsincos_big( int, double *, int, double *, int, double *, int, int );
78 79
79 80 /*
80 81 * y[i*stridey] := sin( x[i*stridex] ), for i = 0..n.
81 82 * c[i*stridec] := cos( x[i*stridex] ), for i = 0..n.
82 83 *
83 84 * Calls __vlibm_vsincos_big to handle all elts which have abs >~ 1.647e+06.
84 85 * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
85 86 *
↓ open down ↓ |
45 lines elided |
↑ open up ↑ |
86 87 * elts < 2^-27 use the approximation 1.0 ~ cos(x).
87 88 */
88 89 void
89 90 __vsincos( int n, double * restrict x, int stridex,
90 91 double * restrict y, int stridey,
91 92 double * restrict c, int stridec )
92 93 {
93 94 double x0_or_one[4], x1_or_one[4], x2_or_one[4];
94 95 double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
95 96 double x0, x1, x2,
96 - *py0, *py1, *py2,
97 - *pc0, *pc1, *pc2,
98 - *xsave, *ysave, *csave;
97 + *py0, *py1, *py2,
98 + *pc0, *pc1, *pc2,
99 + *xsave, *ysave, *csave;
99 100 unsigned hx0, hx1, hx2, xsb0, xsb1, xsb2;
100 - int i, biguns, nsave, sxsave, sysave, scsave;
101 -
101 + int i, biguns, nsave, sxsave, sysave, scsave;
102 102 nsave = n;
103 103 xsave = x;
104 104 sxsave = stridex;
105 105 ysave = y;
106 106 sysave = stridey;
107 107 csave = c;
108 108 scsave = stridec;
109 109 biguns = 0;
110 110
111 111 do /* MAIN LOOP */
112 112 {
113 113
114 114 /* Gotos here so _break_ exits MAIN LOOP. */
115 115 LOOP0: /* Find first arg in right range. */
116 116 xsb0 = HI(x); /* get most significant word */
117 117 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
118 118 if ( hx0 > 0x3fe921fb ) {
119 119 /* Too big: arg reduction needed, so leave for second part */
120 120 biguns = 1;
↓ open down ↓ |
9 lines elided |
↑ open up ↑ |
121 121 x += stridex;
122 122 y += stridey;
123 123 c += stridec;
124 124 i = 0;
125 125 if ( --n <= 0 )
126 126 break;
127 127 goto LOOP0;
128 128 }
129 129 if ( hx0 < 0x3e400000 ) {
130 130 /* Too small. cos x ~ 1, sin x ~ x. */
131 - volatile int v = *x;
132 131 *c = 1.0;
133 132 *y = *x;
134 133 x += stridex;
135 134 y += stridey;
136 135 c += stridec;
137 136 i = 0;
138 137 if ( --n <= 0 )
139 138 break;
140 139 goto LOOP0;
141 140 }
142 141 x0 = *x;
143 142 py0 = y;
144 143 pc0 = c;
145 144 x += stridex;
146 145 y += stridey;
147 146 c += stridec;
148 147 i = 1;
149 148 if ( --n <= 0 )
150 149 break;
151 150
152 151 LOOP1: /* Get second arg, same as above. */
153 152 xsb1 = HI(x);
154 153 hx1 = xsb1 & ~0x80000000;
155 154 if ( hx1 > 0x3fe921fb )
156 155 {
157 156 biguns = 1;
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
158 157 x += stridex;
159 158 y += stridey;
160 159 c += stridec;
161 160 i = 1;
162 161 if ( --n <= 0 )
163 162 break;
164 163 goto LOOP1;
165 164 }
166 165 if ( hx1 < 0x3e400000 )
167 166 {
168 - volatile int v = *x;
169 167 *c = 1.0;
170 168 *y = *x;
171 169 x += stridex;
172 170 y += stridey;
173 171 c += stridec;
174 172 i = 1;
175 173 if ( --n <= 0 )
176 174 break;
177 175 goto LOOP1;
178 176 }
179 177 x1 = *x;
180 178 py1 = y;
181 179 pc1 = c;
182 180 x += stridex;
183 181 y += stridey;
184 182 c += stridec;
185 183 i = 2;
186 184 if ( --n <= 0 )
187 185 break;
188 186
189 187 LOOP2: /* Get third arg, same as above. */
190 188 xsb2 = HI(x);
191 189 hx2 = xsb2 & ~0x80000000;
192 190 if ( hx2 > 0x3fe921fb )
193 191 {
194 192 biguns = 1;
↓ open down ↓ |
16 lines elided |
↑ open up ↑ |
195 193 x += stridex;
196 194 y += stridey;
197 195 c += stridec;
198 196 i = 2;
199 197 if ( --n <= 0 )
200 198 break;
201 199 goto LOOP2;
202 200 }
203 201 if ( hx2 < 0x3e400000 )
204 202 {
205 - volatile int v = *x;
206 203 *c = 1.0;
207 204 *y = *x;
208 205 x += stridex;
209 206 y += stridey;
210 207 c += stridec;
211 208 i = 2;
212 209 if ( --n <= 0 )
213 210 break;
214 211 goto LOOP2;
215 212 }
216 213 x2 = *x;
217 214 py2 = y;
218 215 pc2 = c;
219 216
220 217 /*
221 218 * 0x3fc40000 = 5/32 ~ 0.15625
222 219 * Get msb after subtraction. Will be 1 only if
223 220 * hx0 - 5/32 is negative.
224 221 */
225 222 i = ( hx2 - 0x3fc40000 ) >> 31;
226 223 i |= ( ( hx1 - 0x3fc40000 ) >> 30 ) & 2;
227 224 i |= ( ( hx0 - 0x3fc40000 ) >> 29 ) & 4;
228 225 switch ( i )
229 226 {
230 227 double a1_0, a1_1, a1_2, a2_0, a2_1, a2_2;
231 228 double w0, w1, w2;
232 229 double t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2;
233 230 double z0, z1, z2;
234 231 unsigned j0, j1, j2;
235 232
236 233 case 0: /* All are > 5/32 */
237 234 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
238 235 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
239 236 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
240 237
241 238 HI(&t0) = j0;
242 239 HI(&t1) = j1;
243 240 HI(&t2) = j2;
244 241 LO(&t0) = 0;
245 242 LO(&t1) = 0;
246 243 LO(&t2) = 0;
247 244
248 245 x0 -= t0;
249 246 x1 -= t1;
250 247 x2 -= t2;
251 248
252 249 z0 = x0 * x0;
253 250 z1 = x1 * x1;
254 251 z2 = x2 * x2;
255 252
256 253 t0 = z0 * ( qq1 + z0 * qq2 );
257 254 t1 = z1 * ( qq1 + z1 * qq2 );
258 255 t2 = z2 * ( qq1 + z2 * qq2 );
259 256
260 257 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
261 258 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
262 259 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
263 260
264 261 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
265 262 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
266 263 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
267 264
268 265 xsb0 = ( xsb0 >> 30 ) & 2;
269 266 xsb1 = ( xsb1 >> 30 ) & 2;
270 267 xsb2 = ( xsb2 >> 30 ) & 2;
271 268
272 269 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
273 270 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
274 271 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
275 272
276 273 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
277 274 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
278 275 a2_2 = __vlibm_TBL_sincos_hi[j2+1];
279 276 /* cos_lo(t) */
280 277 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
281 278 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
282 279 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
283 280
284 281 *pc0 = a2_0 + t2_0;
285 282 *pc1 = a2_1 + t2_1;
286 283 *pc2 = a2_2 + t2_2;
287 284
288 285 t1_0 = a2_0*w0 + a1_0*t0;
289 286 t1_1 = a2_1*w1 + a1_1*t1;
290 287 t1_2 = a2_2*w2 + a1_2*t2;
291 288
292 289 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
293 290 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
294 291 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
295 292
296 293 *py0 = a1_0 + t1_0;
297 294 *py1 = a1_1 + t1_1;
298 295 *py2 = a1_2 + t1_2;
299 296
300 297 break;
301 298
302 299 case 1:
303 300 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
304 301 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
305 302 HI(&t0) = j0;
306 303 HI(&t1) = j1;
307 304 LO(&t0) = 0;
308 305 LO(&t1) = 0;
309 306 x0 -= t0;
310 307 x1 -= t1;
311 308 z0 = x0 * x0;
312 309 z1 = x1 * x1;
313 310 z2 = x2 * x2;
314 311 t0 = z0 * ( qq1 + z0 * qq2 );
315 312 t1 = z1 * ( qq1 + z1 * qq2 );
316 313 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
317 314 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
318 315 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
319 316 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
320 317 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
321 318 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
322 319 xsb0 = ( xsb0 >> 30 ) & 2;
323 320 xsb1 = ( xsb1 >> 30 ) & 2;
324 321
325 322 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
326 323 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
327 324
328 325 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
329 326 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
330 327 /* cos_lo(t) */
331 328 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
332 329 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
333 330
334 331 *pc0 = a2_0 + t2_0;
335 332 *pc1 = a2_1 + t2_1;
336 333 *pc2 = one + t2;
337 334
338 335 t1_0 = a2_0*w0 + a1_0*t0;
339 336 t1_1 = a2_1*w1 + a1_1*t1;
340 337 t2 = z2 * ( poly3[0] + z2 * poly4[0] );
341 338
342 339 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
343 340 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
344 341 t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
345 342
346 343 *py0 = a1_0 + t1_0;
347 344 *py1 = a1_1 + t1_1;
348 345 t2 = x2 + x2 * t2;
349 346 *py2 = t2;
350 347
351 348 break;
352 349
353 350 case 2:
354 351 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
355 352 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
356 353 HI(&t0) = j0;
357 354 HI(&t2) = j2;
358 355 LO(&t0) = 0;
359 356 LO(&t2) = 0;
360 357 x0 -= t0;
361 358 x2 -= t2;
362 359 z0 = x0 * x0;
363 360 z1 = x1 * x1;
364 361 z2 = x2 * x2;
365 362 t0 = z0 * ( qq1 + z0 * qq2 );
366 363 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
367 364 t2 = z2 * ( qq1 + z2 * qq2 );
368 365 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
369 366 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
370 367 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
371 368 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
372 369 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
373 370 xsb0 = ( xsb0 >> 30 ) & 2;
374 371 xsb2 = ( xsb2 >> 30 ) & 2;
375 372
376 373 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
377 374 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
378 375
379 376 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
380 377 a2_2 = __vlibm_TBL_sincos_hi[j2+1];
381 378 /* cos_lo(t) */
382 379 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
383 380 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
384 381
385 382 *pc0 = a2_0 + t2_0;
386 383 *pc1 = one + t1;
387 384 *pc2 = a2_2 + t2_2;
388 385
389 386 t1_0 = a2_0*w0 + a1_0*t0;
390 387 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
391 388 t1_2 = a2_2*w2 + a1_2*t2;
392 389
393 390 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
394 391 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
395 392 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
396 393
397 394 *py0 = a1_0 + t1_0;
398 395 t1 = x1 + x1 * t1;
399 396 *py1 = t1;
400 397 *py2 = a1_2 + t1_2;
401 398
402 399 break;
403 400
404 401 case 3:
405 402 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
406 403 HI(&t0) = j0;
407 404 LO(&t0) = 0;
408 405 x0 -= t0;
409 406 z0 = x0 * x0;
410 407 z1 = x1 * x1;
411 408 z2 = x2 * x2;
412 409 t0 = z0 * ( qq1 + z0 * qq2 );
413 410 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
414 411 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
415 412 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
416 413 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
417 414 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
418 415 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
419 416 xsb0 = ( xsb0 >> 30 ) & 2;
420 417 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
421 418
422 419 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
423 420
424 421 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
425 422
426 423 *pc0 = a2_0 + t2_0;
427 424 *pc1 = one + t1;
428 425 *pc2 = one + t2;
429 426
430 427 t1_0 = a2_0*w0 + a1_0*t0;
431 428 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
432 429 t2 = z2 * ( poly3[0] + z2 * poly4[0] );
433 430
434 431 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
435 432 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
436 433 t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
437 434
438 435 *py0 = a1_0 + t1_0;
439 436 t1 = x1 + x1 * t1;
440 437 *py1 = t1;
441 438 t2 = x2 + x2 * t2;
442 439 *py2 = t2;
443 440
444 441 break;
445 442
446 443 case 4:
447 444 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
448 445 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
449 446 HI(&t1) = j1;
450 447 HI(&t2) = j2;
451 448 LO(&t1) = 0;
452 449 LO(&t2) = 0;
453 450 x1 -= t1;
454 451 x2 -= t2;
455 452 z0 = x0 * x0;
456 453 z1 = x1 * x1;
457 454 z2 = x2 * x2;
458 455 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
459 456 t1 = z1 * ( qq1 + z1 * qq2 );
460 457 t2 = z2 * ( qq1 + z2 * qq2 );
461 458 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
462 459 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
463 460 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
464 461 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
465 462 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
466 463 xsb1 = ( xsb1 >> 30 ) & 2;
467 464 xsb2 = ( xsb2 >> 30 ) & 2;
468 465
469 466 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
470 467 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
471 468
472 469 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
473 470 a2_2 = __vlibm_TBL_sincos_hi[j2+1];
474 471 /* cos_lo(t) */
475 472 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
476 473 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
477 474
478 475 *pc0 = one + t0;
479 476 *pc1 = a2_1 + t2_1;
480 477 *pc2 = a2_2 + t2_2;
481 478
482 479 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
483 480 t1_1 = a2_1*w1 + a1_1*t1;
484 481 t1_2 = a2_2*w2 + a1_2*t2;
485 482
486 483 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
487 484 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
488 485 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
489 486
490 487 t0 = x0 + x0 * t0;
491 488 *py0 = t0;
492 489 *py1 = a1_1 + t1_1;
493 490 *py2 = a1_2 + t1_2;
494 491
495 492 break;
496 493
497 494 case 5:
498 495 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
499 496 HI(&t1) = j1;
500 497 LO(&t1) = 0;
501 498 x1 -= t1;
502 499 z0 = x0 * x0;
503 500 z1 = x1 * x1;
504 501 z2 = x2 * x2;
505 502 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
506 503 t1 = z1 * ( qq1 + z1 * qq2 );
507 504 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
508 505 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
509 506 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
510 507 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
511 508 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
512 509 xsb1 = ( xsb1 >> 30 ) & 2;
513 510
514 511 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
515 512
516 513 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
517 514
518 515 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
519 516
520 517 *pc0 = one + t0;
521 518 *pc1 = a2_1 + t2_1;
522 519 *pc2 = one + t2;
523 520
524 521 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
525 522 t1_1 = a2_1*w1 + a1_1*t1;
526 523 t2 = z2 * ( poly3[0] + z2 * poly4[0] );
527 524
528 525 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
529 526 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
530 527 t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
531 528
532 529 t0 = x0 + x0 * t0;
533 530 *py0 = t0;
534 531 *py1 = a1_1 + t1_1;
535 532 t2 = x2 + x2 * t2;
536 533 *py2 = t2;
537 534
538 535 break;
539 536
540 537 case 6:
541 538 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
542 539 HI(&t2) = j2;
543 540 LO(&t2) = 0;
544 541 x2 -= t2;
545 542 z0 = x0 * x0;
546 543 z1 = x1 * x1;
547 544 z2 = x2 * x2;
548 545 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
549 546 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
550 547 t2 = z2 * ( qq1 + z2 * qq2 );
551 548 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
552 549 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
553 550 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
554 551 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
555 552 xsb2 = ( xsb2 >> 30 ) & 2;
556 553 a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
557 554
558 555 a2_2 = __vlibm_TBL_sincos_hi[j2+1];
559 556
560 557 t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
561 558
562 559 *pc0 = one + t0;
563 560 *pc1 = one + t1;
564 561 *pc2 = a2_2 + t2_2;
565 562
566 563 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
567 564 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
568 565 t1_2 = a2_2*w2 + a1_2*t2;
569 566
570 567 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
571 568 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
572 569 t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
573 570
574 571 t0 = x0 + x0 * t0;
575 572 *py0 = t0;
576 573 t1 = x1 + x1 * t1;
577 574 *py1 = t1;
578 575 *py2 = a1_2 + t1_2;
579 576
580 577 break;
581 578
582 579 case 7: /* All are < 5/32 */
583 580 z0 = x0 * x0;
584 581 z1 = x1 * x1;
585 582 z2 = x2 * x2;
586 583 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
587 584 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
588 585 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
589 586 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
590 587 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
591 588 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
592 589 *pc0 = one + t0;
593 590 *pc1 = one + t1;
594 591 *pc2 = one + t2;
595 592 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
596 593 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
597 594 t2 = z2 * ( poly3[0] + z2 * poly4[0] );
598 595 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
599 596 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
600 597 t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
601 598 t0 = x0 + x0 * t0;
602 599 t1 = x1 + x1 * t1;
603 600 t2 = x2 + x2 * t2;
604 601 *py0 = t0;
605 602 *py1 = t1;
606 603 *py2 = t2;
607 604 break;
608 605 }
609 606
610 607 x += stridex;
611 608 y += stridey;
612 609 c += stridec;
613 610 i = 0;
614 611 } while ( --n > 0 ); /* END MAIN LOOP */
615 612
616 613 /*
617 614 * CLEAN UP last 0, 1, or 2 elts.
618 615 */
619 616 if ( i > 0 ) /* Clean up elts at tail. i < 3. */
620 617 {
621 618 double a1_0, a1_1, a2_0, a2_1;
622 619 double w0, w1;
623 620 double t0, t1, t1_0, t1_1, t2_0, t2_1;
624 621 double z0, z1;
625 622 unsigned j0, j1;
626 623
627 624 if ( i > 1 )
628 625 {
629 626 if ( hx1 < 0x3fc40000 )
630 627 {
631 628 z1 = x1 * x1;
632 629 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
633 630 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
634 631 t1 = one + t1;
635 632 *pc1 = t1;
636 633 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
637 634 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
638 635 t1 = x1 + x1 * t1;
639 636 *py1 = t1;
640 637 }
641 638 else
642 639 {
643 640 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
644 641 HI(&t1) = j1;
645 642 LO(&t1) = 0;
646 643 x1 -= t1;
647 644 z1 = x1 * x1;
648 645 t1 = z1 * ( qq1 + z1 * qq2 );
649 646 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
650 647 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
651 648 xsb1 = ( xsb1 >> 30 ) & 2;
652 649 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
653 650 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
654 651 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
655 652 *pc1 = a2_1 + t2_1;
656 653 t1_1 = a2_1*w1 + a1_1*t1;
657 654 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
658 655 *py1 = a1_1 + t1_1;
659 656 }
660 657 }
661 658 if ( hx0 < 0x3fc40000 )
662 659 {
663 660 z0 = x0 * x0;
664 661 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
665 662 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
666 663 t0 = one + t0;
667 664 *pc0 = t0;
668 665 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
669 666 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
670 667 t0 = x0 + x0 * t0;
671 668 *py0 = t0;
672 669 }
673 670 else
674 671 {
675 672 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
676 673 HI(&t0) = j0;
677 674 LO(&t0) = 0;
678 675 x0 -= t0;
679 676 z0 = x0 * x0;
680 677 t0 = z0 * ( qq1 + z0 * qq2 );
681 678 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
682 679 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
683 680 xsb0 = ( xsb0 >> 30 ) & 2;
684 681 a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
685 682 a2_0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
686 683 t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
687 684 *pc0 = a2_0 + t2_0;
688 685 t1_0 = a2_0*w0 + a1_0*t0;
689 686 t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
690 687 *py0 = a1_0 + t1_0;
691 688 }
692 689 } /* END CLEAN UP */
693 690
694 691 if ( !biguns )
695 692 return;
696 693
697 694 /*
698 695 * Take care of BIGUNS.
699 696 */
700 697 n = nsave;
701 698 x = xsave;
702 699 stridex = sxsave;
703 700 y = ysave;
704 701 stridey = sysave;
705 702 c = csave;
706 703 stridec = scsave;
707 704 biguns = 0;
708 705
709 706 x0_or_one[1] = 1.0;
710 707 x1_or_one[1] = 1.0;
711 708 x2_or_one[1] = 1.0;
712 709 x0_or_one[3] = -1.0;
713 710 x1_or_one[3] = -1.0;
714 711 x2_or_one[3] = -1.0;
715 712 y0_or_zero[1] = 0.0;
716 713 y1_or_zero[1] = 0.0;
717 714 y2_or_zero[1] = 0.0;
718 715 y0_or_zero[3] = 0.0;
719 716 y1_or_zero[3] = 0.0;
720 717 y2_or_zero[3] = 0.0;
721 718
722 719 do
723 720 {
724 721 double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
725 722 unsigned hx;
726 723 int n0, n1, n2;
727 724
728 725 /*
729 726 * Find 3 more to work on: Not already done, not too big.
730 727 */
731 728 loop0:
732 729 hx = HI(x);
733 730 xsb0 = hx >> 31;
734 731 hx &= ~0x80000000;
735 732 if ( hx <= 0x3fe921fb ) /* Done above. */
736 733 {
737 734 x += stridex;
738 735 y += stridey;
739 736 c += stridec;
740 737 i = 0;
741 738 if ( --n <= 0 )
742 739 break;
743 740 goto loop0;
744 741 }
745 742 if ( hx > 0x413921fb ) /* (1.6471e+06) Too big: leave it. */
746 743 {
747 744 if ( hx >= 0x7ff00000 ) /* Inf or NaN */
748 745 {
749 746 x0 = *x;
750 747 *y = x0 - x0;
751 748 *c = x0 - x0;
752 749 }
753 750 else {
754 751 biguns = 1;
755 752 }
756 753 x += stridex;
757 754 y += stridey;
758 755 c += stridec;
759 756 i = 0;
760 757 if ( --n <= 0 )
761 758 break;
762 759 goto loop0;
763 760 }
764 761 x0 = *x;
765 762 py0 = y;
766 763 pc0 = c;
767 764 x += stridex;
768 765 y += stridey;
769 766 c += stridec;
770 767 i = 1;
771 768 if ( --n <= 0 )
772 769 break;
773 770
774 771 loop1:
775 772 hx = HI(x);
776 773 xsb1 = hx >> 31;
777 774 hx &= ~0x80000000;
778 775 if ( hx <= 0x3fe921fb )
779 776 {
780 777 x += stridex;
781 778 y += stridey;
782 779 c += stridec;
783 780 i = 1;
784 781 if ( --n <= 0 )
785 782 break;
786 783 goto loop1;
787 784 }
788 785 if ( hx > 0x413921fb )
789 786 {
790 787 if ( hx >= 0x7ff00000 )
791 788 {
792 789 x1 = *x;
793 790 *y = x1 - x1;
794 791 *c = x1 - x1;
795 792 }
796 793 else {
797 794 biguns = 1;
798 795 }
799 796 x += stridex;
800 797 y += stridey;
801 798 c += stridec;
802 799 i = 1;
803 800 if ( --n <= 0 )
804 801 break;
805 802 goto loop1;
806 803 }
807 804 x1 = *x;
808 805 py1 = y;
809 806 pc1 = c;
810 807 x += stridex;
811 808 y += stridey;
812 809 c += stridec;
813 810 i = 2;
814 811 if ( --n <= 0 )
815 812 break;
816 813
817 814 loop2:
818 815 hx = HI(x);
819 816 xsb2 = hx >> 31;
820 817 hx &= ~0x80000000;
821 818 if ( hx <= 0x3fe921fb )
822 819 {
823 820 x += stridex;
824 821 y += stridey;
825 822 c += stridec;
826 823 i = 2;
827 824 if ( --n <= 0 )
828 825 break;
829 826 goto loop2;
830 827 }
831 828 if ( hx > 0x413921fb )
832 829 {
833 830 if ( hx >= 0x7ff00000 )
834 831 {
835 832 x2 = *x;
836 833 *y = x2 - x2;
837 834 *c = x2 - x2;
838 835 }
839 836 else {
840 837 biguns = 1;
841 838 }
842 839 x += stridex;
843 840 y += stridey;
844 841 c += stridec;
845 842 i = 2;
846 843 if ( --n <= 0 )
847 844 break;
848 845 goto loop2;
849 846 }
850 847 x2 = *x;
851 848 py2 = y;
852 849 pc2 = c;
853 850
854 851 n0 = (int) ( x0 * invpio2 + half[xsb0] );
855 852 n1 = (int) ( x1 * invpio2 + half[xsb1] );
856 853 n2 = (int) ( x2 * invpio2 + half[xsb2] );
857 854 fn0 = (double) n0;
858 855 fn1 = (double) n1;
859 856 fn2 = (double) n2;
860 857 n0 &= 3;
861 858 n1 &= 3;
862 859 n2 &= 3;
863 860 a0 = x0 - fn0 * pio2_1;
864 861 a1 = x1 - fn1 * pio2_1;
865 862 a2 = x2 - fn2 * pio2_1;
866 863 w0 = fn0 * pio2_2;
867 864 w1 = fn1 * pio2_2;
868 865 w2 = fn2 * pio2_2;
869 866 x0 = a0 - w0;
870 867 x1 = a1 - w1;
871 868 x2 = a2 - w2;
872 869 y0 = ( a0 - x0 ) - w0;
873 870 y1 = ( a1 - x1 ) - w1;
874 871 y2 = ( a2 - x2 ) - w2;
875 872 a0 = x0;
876 873 a1 = x1;
877 874 a2 = x2;
878 875 w0 = fn0 * pio2_3 - y0;
879 876 w1 = fn1 * pio2_3 - y1;
880 877 w2 = fn2 * pio2_3 - y2;
881 878 x0 = a0 - w0;
882 879 x1 = a1 - w1;
883 880 x2 = a2 - w2;
884 881 y0 = ( a0 - x0 ) - w0;
885 882 y1 = ( a1 - x1 ) - w1;
886 883 y2 = ( a2 - x2 ) - w2;
887 884 a0 = x0;
888 885 a1 = x1;
889 886 a2 = x2;
890 887 w0 = fn0 * pio2_3t - y0;
891 888 w1 = fn1 * pio2_3t - y1;
892 889 w2 = fn2 * pio2_3t - y2;
893 890 x0 = a0 - w0;
894 891 x1 = a1 - w1;
895 892 x2 = a2 - w2;
896 893 y0 = ( a0 - x0 ) - w0;
897 894 y1 = ( a1 - x1 ) - w1;
898 895 y2 = ( a2 - x2 ) - w2;
899 896 xsb2 = HI(&x2);
900 897 i = ( ( xsb2 & ~0x80000000 ) - 0x3fc40000 ) >> 31;
901 898 xsb1 = HI(&x1);
902 899 i |= ( ( ( xsb1 & ~0x80000000 ) - 0x3fc40000 ) >> 30 ) & 2;
903 900 xsb0 = HI(&x0);
904 901 i |= ( ( ( xsb0 & ~0x80000000 ) - 0x3fc40000 ) >> 29 ) & 4;
905 902 switch ( i )
906 903 {
907 904 double a1_0, a1_1, a1_2, a2_0, a2_1, a2_2;
908 905 double t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2;
909 906 double z0, z1, z2;
910 907 unsigned j0, j1, j2;
911 908
912 909 case 0:
913 910 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
914 911 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
915 912 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
916 913 HI(&t0) = j0;
917 914 HI(&t1) = j1;
918 915 HI(&t2) = j2;
919 916 LO(&t0) = 0;
920 917 LO(&t1) = 0;
921 918 LO(&t2) = 0;
922 919 x0 = ( x0 - t0 ) + y0;
923 920 x1 = ( x1 - t1 ) + y1;
924 921 x2 = ( x2 - t2 ) + y2;
925 922 z0 = x0 * x0;
926 923 z1 = x1 * x1;
927 924 z2 = x2 * x2;
928 925 t0 = z0 * ( qq1 + z0 * qq2 );
929 926 t1 = z1 * ( qq1 + z1 * qq2 );
930 927 t2 = z2 * ( qq1 + z2 * qq2 );
931 928 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
932 929 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
933 930 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
934 931 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
935 932 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
936 933 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
937 934 xsb0 = ( xsb0 >> 30 ) & 2;
938 935 xsb1 = ( xsb1 >> 30 ) & 2;
939 936 xsb2 = ( xsb2 >> 30 ) & 2;
940 937 n0 ^= ( xsb0 & ~( n0 << 1 ) );
941 938 n1 ^= ( xsb1 & ~( n1 << 1 ) );
942 939 n2 ^= ( xsb2 & ~( n2 << 1 ) );
943 940 xsb0 |= 1;
944 941 xsb1 |= 1;
945 942 xsb2 |= 1;
946 943
947 944 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
948 945 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
949 946 a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
950 947
951 948 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
952 949 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
953 950 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
954 951
955 952 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
956 953 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
957 954 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
958 955
959 956 w0 *= a2_0;
960 957 w1 *= a2_1;
961 958 w2 *= a2_2;
962 959
963 960 *pc0 = a2_0 + t2_0;
964 961 *pc1 = a2_1 + t2_1;
965 962 *pc2 = a2_2 + t2_2;
966 963
967 964 t1_0 = w0 + a1_0*t0;
968 965 t1_1 = w1 + a1_1*t1;
969 966 t1_2 = w2 + a1_2*t2;
970 967
971 968 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
972 969 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
973 970 t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
974 971
975 972 *py0 = a1_0 + t1_0;
976 973 *py1 = a1_1 + t1_1;
977 974 *py2 = a1_2 + t1_2;
978 975
979 976 break;
980 977
981 978 case 1:
982 979 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
983 980 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
984 981 j2 = n2 & 1;
985 982 HI(&t0) = j0;
986 983 HI(&t1) = j1;
987 984 LO(&t0) = 0;
988 985 LO(&t1) = 0;
989 986 x2_or_one[0] = x2;
990 987 x2_or_one[2] = -x2;
991 988 x0 = ( x0 - t0 ) + y0;
992 989 x1 = ( x1 - t1 ) + y1;
993 990 y2_or_zero[0] = y2;
994 991 y2_or_zero[2] = -y2;
995 992 z0 = x0 * x0;
996 993 z1 = x1 * x1;
997 994 z2 = x2 * x2;
998 995 t0 = z0 * ( qq1 + z0 * qq2 );
999 996 t1 = z1 * ( qq1 + z1 * qq2 );
1000 997 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1001 998 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1002 999 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1003 1000 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1004 1001 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1005 1002 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1006 1003 xsb0 = ( xsb0 >> 30 ) & 2;
1007 1004 xsb1 = ( xsb1 >> 30 ) & 2;
1008 1005 n0 ^= ( xsb0 & ~( n0 << 1 ) );
1009 1006 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1010 1007 xsb0 |= 1;
1011 1008 xsb1 |= 1;
1012 1009 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1013 1010 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1014 1011
1015 1012 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1016 1013 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1017 1014
1018 1015 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1019 1016 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1020 1017 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1021 1018
1022 1019 *pc0 = a2_0 + t2_0;
1023 1020 *pc1 = a2_1 + t2_1;
1024 1021 *py2 = t2;
1025 1022
1026 1023 n2 = (n2 + 1) & 3;
1027 1024 j2 = (j2 + 1) & 1;
1028 1025 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1029 1026
1030 1027 t1_0 = a2_0*w0 + a1_0*t0;
1031 1028 t1_1 = a2_1*w1 + a1_1*t1;
1032 1029 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1033 1030
1034 1031 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1035 1032 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1036 1033 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1037 1034
1038 1035 *py0 = a1_0 + t1_0;
1039 1036 *py1 = a1_1 + t1_1;
1040 1037 *pc2 = t2;
1041 1038
1042 1039 break;
1043 1040
1044 1041 case 2:
1045 1042 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1046 1043 j1 = n1 & 1;
1047 1044 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
1048 1045 HI(&t0) = j0;
1049 1046 HI(&t2) = j2;
1050 1047 LO(&t0) = 0;
1051 1048 LO(&t2) = 0;
1052 1049 x1_or_one[0] = x1;
1053 1050 x1_or_one[2] = -x1;
1054 1051 x0 = ( x0 - t0 ) + y0;
1055 1052 y1_or_zero[0] = y1;
1056 1053 y1_or_zero[2] = -y1;
1057 1054 x2 = ( x2 - t2 ) + y2;
1058 1055 z0 = x0 * x0;
1059 1056 z1 = x1 * x1;
1060 1057 z2 = x2 * x2;
1061 1058 t0 = z0 * ( qq1 + z0 * qq2 );
1062 1059 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1063 1060 t2 = z2 * ( qq1 + z2 * qq2 );
1064 1061 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1065 1062 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1066 1063 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
1067 1064 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1068 1065 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1069 1066 xsb0 = ( xsb0 >> 30 ) & 2;
1070 1067 xsb2 = ( xsb2 >> 30 ) & 2;
1071 1068 n0 ^= ( xsb0 & ~( n0 << 1 ) );
1072 1069 n2 ^= ( xsb2 & ~( n2 << 1 ) );
1073 1070 xsb0 |= 1;
1074 1071 xsb2 |= 1;
1075 1072
1076 1073 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1077 1074 a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1078 1075
1079 1076 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1080 1077 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1081 1078
1082 1079 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1083 1080 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1084 1081 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
1085 1082
1086 1083 *pc0 = a2_0 + t2_0;
1087 1084 *py1 = t1;
1088 1085 *pc2 = a2_2 + t2_2;
1089 1086
1090 1087 n1 = (n1 + 1) & 3;
1091 1088 j1 = (j1 + 1) & 1;
1092 1089 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1093 1090
1094 1091 t1_0 = a2_0*w0 + a1_0*t0;
1095 1092 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1096 1093 t1_2 = a2_2*w2 + a1_2*t2;
1097 1094
1098 1095 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1099 1096 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1100 1097 t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1101 1098
1102 1099 *py0 = a1_0 + t1_0;
1103 1100 *pc1 = t1;
1104 1101 *py2 = a1_2 + t1_2;
1105 1102
1106 1103 break;
1107 1104
1108 1105 case 3:
1109 1106 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1110 1107 j1 = n1 & 1;
1111 1108 j2 = n2 & 1;
1112 1109 HI(&t0) = j0;
1113 1110 LO(&t0) = 0;
1114 1111 x1_or_one[0] = x1;
1115 1112 x1_or_one[2] = -x1;
1116 1113 x2_or_one[0] = x2;
1117 1114 x2_or_one[2] = -x2;
1118 1115 x0 = ( x0 - t0 ) + y0;
1119 1116 y1_or_zero[0] = y1;
1120 1117 y1_or_zero[2] = -y1;
1121 1118 y2_or_zero[0] = y2;
1122 1119 y2_or_zero[2] = -y2;
1123 1120 z0 = x0 * x0;
1124 1121 z1 = x1 * x1;
1125 1122 z2 = x2 * x2;
1126 1123 t0 = z0 * ( qq1 + z0 * qq2 );
1127 1124 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1128 1125 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1129 1126 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1130 1127 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1131 1128 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1132 1129 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1133 1130 xsb0 = ( xsb0 >> 30 ) & 2;
1134 1131 n0 ^= ( xsb0 & ~( n0 << 1 ) );
1135 1132 xsb0 |= 1;
1136 1133
1137 1134 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1138 1135 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1139 1136
1140 1137 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1141 1138 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1142 1139 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1143 1140
1144 1141 *pc0 = a2_0 + t2_0;
1145 1142 *py1 = t1;
1146 1143 *py2 = t2;
1147 1144
1148 1145 n1 = (n1 + 1) & 3;
1149 1146 n2 = (n2 + 1) & 3;
1150 1147 j1 = (j1 + 1) & 1;
1151 1148 j2 = (j2 + 1) & 1;
1152 1149
1153 1150 t1_0 = a2_0*w0 + a1_0*t0;
1154 1151 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1155 1152 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1156 1153
1157 1154 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1158 1155 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1159 1156 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1160 1157
1161 1158 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1162 1159 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1163 1160
1164 1161 *py0 = a1_0 + t1_0;
1165 1162 *pc1 = t1;
1166 1163 *pc2 = t2;
1167 1164
1168 1165 break;
1169 1166
1170 1167 case 4:
1171 1168 j0 = n0 & 1;
1172 1169 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1173 1170 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
1174 1171 HI(&t1) = j1;
1175 1172 HI(&t2) = j2;
1176 1173 LO(&t1) = 0;
1177 1174 LO(&t2) = 0;
1178 1175 x0_or_one[0] = x0;
1179 1176 x0_or_one[2] = -x0;
1180 1177 y0_or_zero[0] = y0;
1181 1178 y0_or_zero[2] = -y0;
1182 1179 x1 = ( x1 - t1 ) + y1;
1183 1180 x2 = ( x2 - t2 ) + y2;
1184 1181 z0 = x0 * x0;
1185 1182 z1 = x1 * x1;
1186 1183 z2 = x2 * x2;
1187 1184 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1188 1185 t1 = z1 * ( qq1 + z1 * qq2 );
1189 1186 t2 = z2 * ( qq1 + z2 * qq2 );
1190 1187 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1191 1188 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1192 1189 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
1193 1190 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1194 1191 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1195 1192 xsb1 = ( xsb1 >> 30 ) & 2;
1196 1193 xsb2 = ( xsb2 >> 30 ) & 2;
1197 1194 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1198 1195 n2 ^= ( xsb2 & ~( n2 << 1 ) );
1199 1196 xsb1 |= 1;
1200 1197 xsb2 |= 1;
1201 1198
1202 1199 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1203 1200 a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1204 1201
1205 1202 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1206 1203 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1207 1204
1208 1205 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1209 1206 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1210 1207 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
1211 1208
1212 1209 *py0 = t0;
1213 1210 *pc1 = a2_1 + t2_1;
1214 1211 *pc2 = a2_2 + t2_2;
1215 1212
1216 1213 n0 = (n0 + 1) & 3;
1217 1214 j0 = (j0 + 1) & 1;
1218 1215 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1219 1216
1220 1217 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1221 1218 t1_1 = a2_1*w1 + a1_1*t1;
1222 1219 t1_2 = a2_2*w2 + a1_2*t2;
1223 1220
1224 1221 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1225 1222 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1226 1223 t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1227 1224
1228 1225 *py1 = a1_1 + t1_1;
1229 1226 *py2 = a1_2 + t1_2;
1230 1227 *pc0 = t0;
1231 1228
1232 1229 break;
1233 1230
1234 1231 case 5:
1235 1232 j0 = n0 & 1;
1236 1233 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1237 1234 j2 = n2 & 1;
1238 1235 HI(&t1) = j1;
1239 1236 LO(&t1) = 0;
1240 1237 x0_or_one[0] = x0;
1241 1238 x0_or_one[2] = -x0;
1242 1239 x2_or_one[0] = x2;
1243 1240 x2_or_one[2] = -x2;
1244 1241 y0_or_zero[0] = y0;
1245 1242 y0_or_zero[2] = -y0;
1246 1243 x1 = ( x1 - t1 ) + y1;
1247 1244 y2_or_zero[0] = y2;
1248 1245 y2_or_zero[2] = -y2;
1249 1246 z0 = x0 * x0;
1250 1247 z1 = x1 * x1;
1251 1248 z2 = x2 * x2;
1252 1249 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1253 1250 t1 = z1 * ( qq1 + z1 * qq2 );
1254 1251 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1255 1252 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1256 1253 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1257 1254 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1258 1255 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1259 1256 xsb1 = ( xsb1 >> 30 ) & 2;
1260 1257 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1261 1258 xsb1 |= 1;
1262 1259
1263 1260 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1264 1261 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1265 1262
1266 1263 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1267 1264 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1268 1265 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1269 1266
1270 1267 *py0 = t0;
1271 1268 *pc1 = a2_1 + t2_1;
1272 1269 *py2 = t2;
1273 1270
1274 1271 n0 = (n0 + 1) & 3;
1275 1272 n2 = (n2 + 1) & 3;
1276 1273 j0 = (j0 + 1) & 1;
1277 1274 j2 = (j2 + 1) & 1;
1278 1275
1279 1276 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1280 1277 t1_1 = a2_1*w1 + a1_1*t1;
1281 1278 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1282 1279
1283 1280 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1284 1281 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1285 1282 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1286 1283
1287 1284 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1288 1285 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1289 1286
1290 1287 *pc0 = t0;
1291 1288 *py1 = a1_1 + t1_1;
1292 1289 *pc2 = t2;
1293 1290
1294 1291 break;
1295 1292
1296 1293 case 6:
1297 1294 j0 = n0 & 1;
1298 1295 j1 = n1 & 1;
1299 1296 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
1300 1297 HI(&t2) = j2;
1301 1298 LO(&t2) = 0;
1302 1299 x0_or_one[0] = x0;
1303 1300 x0_or_one[2] = -x0;
1304 1301 x1_or_one[0] = x1;
1305 1302 x1_or_one[2] = -x1;
1306 1303 y0_or_zero[0] = y0;
1307 1304 y0_or_zero[2] = -y0;
1308 1305 y1_or_zero[0] = y1;
1309 1306 y1_or_zero[2] = -y1;
1310 1307 x2 = ( x2 - t2 ) + y2;
1311 1308 z0 = x0 * x0;
1312 1309 z1 = x1 * x1;
1313 1310 z2 = x2 * x2;
1314 1311 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1315 1312 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1316 1313 t2 = z2 * ( qq1 + z2 * qq2 );
1317 1314 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1318 1315 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1319 1316 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
1320 1317 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1321 1318 xsb2 = ( xsb2 >> 30 ) & 2;
1322 1319 n2 ^= ( xsb2 & ~( n2 << 1 ) );
1323 1320 xsb2 |= 1;
1324 1321
1325 1322 a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1326 1323 a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1327 1324
1328 1325 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1329 1326 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1330 1327 t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
1331 1328
1332 1329 *py0 = t0;
1333 1330 *py1 = t1;
1334 1331 *pc2 = a2_2 + t2_2;
1335 1332
1336 1333 n0 = (n0 + 1) & 3;
1337 1334 n1 = (n1 + 1) & 3;
1338 1335 j0 = (j0 + 1) & 1;
1339 1336 j1 = (j1 + 1) & 1;
1340 1337
1341 1338 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1342 1339 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1343 1340 t1_2 = a2_2*w2 + a1_2*t2;
1344 1341
1345 1342 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1346 1343 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1347 1344 t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1348 1345
1349 1346 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1350 1347 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1351 1348
1352 1349 *pc0 = t0;
1353 1350 *pc1 = t1;
1354 1351 *py2 = a1_2 + t1_2;
1355 1352
1356 1353 break;
1357 1354
1358 1355 case 7:
1359 1356 j0 = n0 & 1;
1360 1357 j1 = n1 & 1;
1361 1358 j2 = n2 & 1;
1362 1359 x0_or_one[0] = x0;
1363 1360 x0_or_one[2] = -x0;
1364 1361 x1_or_one[0] = x1;
1365 1362 x1_or_one[2] = -x1;
1366 1363 x2_or_one[0] = x2;
1367 1364 x2_or_one[2] = -x2;
1368 1365 y0_or_zero[0] = y0;
1369 1366 y0_or_zero[2] = -y0;
1370 1367 y1_or_zero[0] = y1;
1371 1368 y1_or_zero[2] = -y1;
1372 1369 y2_or_zero[0] = y2;
1373 1370 y2_or_zero[2] = -y2;
1374 1371 z0 = x0 * x0;
1375 1372 z1 = x1 * x1;
1376 1373 z2 = x2 * x2;
1377 1374 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1378 1375 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1379 1376 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1380 1377 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1381 1378 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1382 1379 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1383 1380 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1384 1381 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1385 1382 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1386 1383 *py0 = t0;
1387 1384 *py1 = t1;
1388 1385 *py2 = t2;
1389 1386
1390 1387 n0 = (n0 + 1) & 3;
1391 1388 n1 = (n1 + 1) & 3;
1392 1389 n2 = (n2 + 1) & 3;
1393 1390 j0 = (j0 + 1) & 1;
1394 1391 j1 = (j1 + 1) & 1;
1395 1392 j2 = (j2 + 1) & 1;
1396 1393 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1397 1394 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1398 1395 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1399 1396 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1400 1397 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1401 1398 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1402 1399 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1403 1400 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1404 1401 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1405 1402 *pc0 = t0;
1406 1403 *pc1 = t1;
1407 1404 *pc2 = t2;
1408 1405 break;
1409 1406 }
1410 1407
1411 1408 x += stridex;
1412 1409 y += stridey;
1413 1410 c += stridec;
1414 1411 i = 0;
1415 1412 } while ( --n > 0 );
1416 1413
1417 1414 if ( i > 0 )
1418 1415 {
1419 1416 double a1_0, a1_1, a2_0, a2_1;
1420 1417 double t0, t1, t1_0, t1_1, t2_0, t2_1;
1421 1418 double fn0, fn1, a0, a1, w0, w1, y0, y1;
1422 1419 double z0, z1;
1423 1420 unsigned j0, j1;
1424 1421 int n0, n1;
1425 1422
1426 1423 if ( i > 1 )
1427 1424 {
1428 1425 n1 = (int) ( x1 * invpio2 + half[xsb1] );
1429 1426 fn1 = (double) n1;
1430 1427 n1 &= 3;
1431 1428 a1 = x1 - fn1 * pio2_1;
1432 1429 w1 = fn1 * pio2_2;
1433 1430 x1 = a1 - w1;
1434 1431 y1 = ( a1 - x1 ) - w1;
1435 1432 a1 = x1;
1436 1433 w1 = fn1 * pio2_3 - y1;
1437 1434 x1 = a1 - w1;
1438 1435 y1 = ( a1 - x1 ) - w1;
1439 1436 a1 = x1;
1440 1437 w1 = fn1 * pio2_3t - y1;
1441 1438 x1 = a1 - w1;
1442 1439 y1 = ( a1 - x1 ) - w1;
1443 1440 xsb1 = HI(&x1);
1444 1441 if ( ( xsb1 & ~0x80000000 ) < 0x3fc40000 )
1445 1442 {
1446 1443 j1 = n1 & 1;
1447 1444 x1_or_one[0] = x1;
1448 1445 x1_or_one[2] = -x1;
1449 1446 y1_or_zero[0] = y1;
1450 1447 y1_or_zero[2] = -y1;
1451 1448 z1 = x1 * x1;
1452 1449 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1453 1450 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1454 1451 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1455 1452 *py1 = t1;
1456 1453 n1 = (n1 + 1) & 3;
1457 1454 j1 = (j1 + 1) & 1;
1458 1455 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1459 1456 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1460 1457 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1461 1458 *pc1 = t1;
1462 1459 }
1463 1460 else
1464 1461 {
1465 1462 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1466 1463 HI(&t1) = j1;
1467 1464 LO(&t1) = 0;
1468 1465 x1 = ( x1 - t1 ) + y1;
1469 1466 z1 = x1 * x1;
1470 1467 t1 = z1 * ( qq1 + z1 * qq2 );
1471 1468 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1472 1469 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1473 1470 xsb1 = ( xsb1 >> 30 ) & 2;
1474 1471 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1475 1472 xsb1 |= 1;
1476 1473 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1477 1474 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1478 1475 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1479 1476 *pc1 = a2_1 + t2_1;
1480 1477 t1_1 = a2_1*w1 + a1_1*t1;
1481 1478 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1482 1479 *py1 = a1_1 + t1_1;
1483 1480 }
1484 1481 }
1485 1482 n0 = (int) ( x0 * invpio2 + half[xsb0] );
1486 1483 fn0 = (double) n0;
1487 1484 n0 &= 3;
1488 1485 a0 = x0 - fn0 * pio2_1;
1489 1486 w0 = fn0 * pio2_2;
1490 1487 x0 = a0 - w0;
1491 1488 y0 = ( a0 - x0 ) - w0;
1492 1489 a0 = x0;
1493 1490 w0 = fn0 * pio2_3 - y0;
1494 1491 x0 = a0 - w0;
1495 1492 y0 = ( a0 - x0 ) - w0;
1496 1493 a0 = x0;
1497 1494 w0 = fn0 * pio2_3t - y0;
1498 1495 x0 = a0 - w0;
1499 1496 y0 = ( a0 - x0 ) - w0;
1500 1497 xsb0 = HI(&x0);
1501 1498 if ( ( xsb0 & ~0x80000000 ) < 0x3fc40000 )
1502 1499 {
1503 1500 j0 = n0 & 1;
1504 1501 x0_or_one[0] = x0;
1505 1502 x0_or_one[2] = -x0;
1506 1503 y0_or_zero[0] = y0;
1507 1504 y0_or_zero[2] = -y0;
1508 1505 z0 = x0 * x0;
1509 1506 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1510 1507 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1511 1508 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1512 1509 *py0 = t0;
1513 1510 n0 = (n0 + 1) & 3;
1514 1511 j0 = (j0 + 1) & 1;
1515 1512 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1516 1513 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1517 1514 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1518 1515 *pc0 = t0;
1519 1516 }
1520 1517 else
1521 1518 {
1522 1519 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1523 1520 HI(&t0) = j0;
1524 1521 LO(&t0) = 0;
1525 1522 x0 = ( x0 - t0 ) + y0;
1526 1523 z0 = x0 * x0;
1527 1524 t0 = z0 * ( qq1 + z0 * qq2 );
1528 1525 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1529 1526 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1530 1527 xsb0 = ( xsb0 >> 30 ) & 2;
1531 1528 n0 ^= ( xsb0 & ~( n0 << 1 ) );
1532 1529 xsb0 |= 1;
1533 1530 a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1534 1531 a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1535 1532 t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1536 1533 *pc0 = a2_0 + t2_0;
1537 1534 t1_0 = a2_0*w0 + a1_0*t0;
1538 1535 t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1539 1536 *py0 = a1_0 + t1_0;
1540 1537 }
1541 1538 }
1542 1539
1543 1540 if ( biguns ) {
1544 1541 __vlibm_vsincos_big( nsave, xsave, sxsave, ysave, sysave, csave, scsave, 0x413921fb );
1545 1542 }
1546 1543 }
↓ open down ↓ |
1331 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX