Print this page
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libmvec/common/__vcos.c
+++ new/usr/src/lib/libmvec/common/__vcos.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
↓ open down ↓ |
20 lines elided |
↑ open up ↑ |
21 21
22 22 /*
23 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 27 * Use is subject to license terms.
28 28 */
29 29
30 30 #include <sys/isa_defs.h>
31 +#include <sys/ccompile.h>
31 32
32 33 #ifdef _LITTLE_ENDIAN
33 34 #define HI(x) *(1+(int*)x)
34 35 #define LO(x) *(unsigned*)x
35 36 #else
36 37 #define HI(x) *(int*)x
37 38 #define LO(x) *(1+(unsigned*)x)
38 39 #endif
39 40
40 41 #ifdef __RESTRICT
41 42 #define restrict _Restrict
42 43 #else
43 44 #define restrict
44 45 #endif
45 46
46 47 /*
47 48 * vcos.1.c
48 49 *
49 50 * Vector cosine function. Just slight modifications to vsin.8.c, mainly
50 51 * in the primary range part.
51 52 *
52 53 * Modification to primary range processing. If an argument that does not
53 54 * fall in the primary range is encountered, then processing is continued
54 55 * in the medium range.
55 56 *
56 57 */
57 58
58 59 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
59 60
60 61 static const double
61 62 half[2] = { 0.5, -0.5 },
62 63 one = 1.0,
63 64 invpio2 = 0.636619772367581343075535, /* 53 bits of pi/2 */
64 65 pio2_1 = 1.570796326734125614166, /* first 33 bits of pi/2 */
65 66 pio2_2 = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
66 67 pio2_3 = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
67 68 pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
68 69 pp1 = -1.666666666605760465276263943134982554676e-0001,
69 70 pp2 = 8.333261209690963126718376566146180944442e-0003,
70 71 qq1 = -4.999999999977710986407023955908711557870e-0001,
71 72 qq2 = 4.166654863857219350645055881018842089580e-0002,
72 73 poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
73 74 -4.999999999999931701464060878888294524481e-0001 },
74 75 poly2[2]= { 8.333333332390951295683993455280336376663e-0003,
75 76 4.166666666394861917535640593963708222319e-0002 },
76 77 poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
77 78 -1.388888552656142867832756687736851681462e-0003 },
78 79 poly4[2]= { 2.753403624854277237649987622848330351110e-0006,
79 80 2.478519423681460796618128289454530524759e-0005 };
80 81
81 82 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
82 83
83 84 /* Don't __ the following; acomp will handle it */
84 85 extern double fabs( double );
85 86 extern void __vlibm_vcos_big( int, double *, int, double *, int, int );
86 87
87 88 /*
88 89 * y[i*stridey] := cos( x[i*stridex] ), for i = 0..n.
89 90 *
90 91 * Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06.
↓ open down ↓ |
50 lines elided |
↑ open up ↑ |
91 92 * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
92 93 *
93 94 * elts < 2^-27 use the approximation 1.0 ~ cos(x).
94 95 */
95 96 void
96 97 __vcos( int n, double * restrict x, int stridex, double * restrict y,
97 98 int stridey )
98 99 {
99 100 double x0_or_one[4], x1_or_one[4], x2_or_one[4];
100 101 double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
101 - double x0, x1, x2, *py0, *py1, *py2, *xsave, *ysave;
102 - unsigned hx0, hx1, hx2, xsb0, xsb1, xsb2;
103 - int i, biguns, nsave, sxsave, sysave;
104 -
102 + double x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave;
103 + unsigned hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2;
104 + int i, biguns, nsave, sxsave, sysave;
105 105 nsave = n;
106 106 xsave = x;
107 107 sxsave = stridex;
108 108 ysave = y;
109 109 sysave = stridey;
110 110 biguns = 0;
111 111
112 112 do /* MAIN LOOP */
113 113 {
114 114 /* Gotos here so _break_ exits MAIN LOOP. */
115 115 LOOP0: /* Find first arg in right range. */
116 116 xsb0 = HI(x); /* get most significant word */
117 117 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
118 118 if ( hx0 > 0x3fe921fb ) {
119 119 /* Too big: arg reduction needed, so leave for second part */
120 120 biguns = 1;
121 121 goto MEDIUM;
122 122 }
123 123 if ( hx0 < 0x3e400000 ) {
124 124 /* Too small. cos x ~ 1. */
125 - volatile int v = *x;
126 125 *y = 1.0;
127 126 x += stridex;
128 127 y += stridey;
129 128 i = 0;
130 129 if ( --n <= 0 )
131 130 break;
132 131 goto LOOP0;
133 132 }
134 133 x0 = *x;
135 134 py0 = y;
136 135 x += stridex;
137 136 y += stridey;
138 137 i = 1;
139 138 if ( --n <= 0 )
140 139 break;
141 140
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
142 141 LOOP1: /* Get second arg, same as above. */
143 142 xsb1 = HI(x);
144 143 hx1 = xsb1 & ~0x80000000;
145 144 if ( hx1 > 0x3fe921fb )
146 145 {
147 146 biguns = 2;
148 147 goto MEDIUM;
149 148 }
150 149 if ( hx1 < 0x3e400000 )
151 150 {
152 - volatile int v = *x;
153 151 *y = 1.0;
154 152 x += stridex;
155 153 y += stridey;
156 154 i = 1;
157 155 if ( --n <= 0 )
158 156 break;
159 157 goto LOOP1;
160 158 }
161 159 x1 = *x;
162 160 py1 = y;
163 161 x += stridex;
164 162 y += stridey;
165 163 i = 2;
166 164 if ( --n <= 0 )
167 165 break;
168 166
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
169 167 LOOP2: /* Get third arg, same as above. */
170 168 xsb2 = HI(x);
171 169 hx2 = xsb2 & ~0x80000000;
172 170 if ( hx2 > 0x3fe921fb )
173 171 {
174 172 biguns = 3;
175 173 goto MEDIUM;
176 174 }
177 175 if ( hx2 < 0x3e400000 )
178 176 {
179 - volatile int v = *x;
180 177 *y = 1.0;
181 178 x += stridex;
182 179 y += stridey;
183 180 i = 2;
184 181 if ( --n <= 0 )
185 182 break;
186 183 goto LOOP2;
187 184 }
188 185 x2 = *x;
189 186 py2 = y;
190 187
191 188 /*
192 189 * 0x3fc40000 = 5/32 ~ 0.15625
193 190 * Get msb after subtraction. Will be 1 only if
194 191 * hx0 - 5/32 is negative.
195 192 */
196 193 i = ( hx0 - 0x3fc40000 ) >> 31;
197 194 i |= ( ( hx1 - 0x3fc40000 ) >> 30 ) & 2;
198 195 i |= ( ( hx2 - 0x3fc40000 ) >> 29 ) & 4;
199 196 switch ( i )
200 197 {
201 198 double a0, a1, a2, w0, w1, w2;
202 199 double t0, t1, t2, z0, z1, z2;
203 200 unsigned j0, j1, j2;
204 201
205 202 case 0: /* All are > 5/32 */
206 203 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
207 204 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
208 205 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
209 206 HI(&t0) = j0;
210 207 HI(&t1) = j1;
211 208 HI(&t2) = j2;
212 209 LO(&t0) = 0;
213 210 LO(&t1) = 0;
214 211 LO(&t2) = 0;
215 212 x0 -= t0;
216 213 x1 -= t1;
217 214 x2 -= t2;
218 215 z0 = x0 * x0;
219 216 z1 = x1 * x1;
220 217 z2 = x2 * x2;
221 218 t0 = z0 * ( qq1 + z0 * qq2 );
222 219 t1 = z1 * ( qq1 + z1 * qq2 );
223 220 t2 = z2 * ( qq1 + z2 * qq2 );
224 221 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
225 222 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
226 223 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
227 224 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
228 225 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
229 226 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
230 227 xsb0 = ( xsb0 >> 30 ) & 2;
231 228 xsb1 = ( xsb1 >> 30 ) & 2;
232 229 xsb2 = ( xsb2 >> 30 ) & 2;
233 230 a0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
234 231 a1 = __vlibm_TBL_sincos_hi[j1+1];
235 232 a2 = __vlibm_TBL_sincos_hi[j2+1];
236 233 /* cos_lo(t) sin_hi(t) */
237 234 t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
238 235 t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
239 236 t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 );
240 237
241 238 *py0 = a0 + t0;
242 239 *py1 = a1 + t1;
243 240 *py2 = a2 + t2;
244 241 break;
245 242
246 243 case 1:
247 244 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
248 245 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
249 246 HI(&t1) = j1;
250 247 HI(&t2) = j2;
251 248 LO(&t1) = 0;
252 249 LO(&t2) = 0;
253 250 x1 -= t1;
254 251 x2 -= t2;
255 252 z0 = x0 * x0;
256 253 z1 = x1 * x1;
257 254 z2 = x2 * x2;
258 255 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
259 256 t1 = z1 * ( qq1 + z1 * qq2 );
260 257 t2 = z2 * ( qq1 + z2 * qq2 );
261 258 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
262 259 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
263 260 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
264 261 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
265 262 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
266 263 xsb1 = ( xsb1 >> 30 ) & 2;
267 264 xsb2 = ( xsb2 >> 30 ) & 2;
268 265 a1 = __vlibm_TBL_sincos_hi[j1+1];
269 266 a2 = __vlibm_TBL_sincos_hi[j2+1];
270 267 t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
271 268 t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 );
272 269 *py0 = one + t0;
273 270 *py1 = a1 + t1;
274 271 *py2 = a2 + t2;
275 272 break;
276 273
277 274 case 2:
278 275 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
279 276 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
280 277 HI(&t0) = j0;
281 278 HI(&t2) = j2;
282 279 LO(&t0) = 0;
283 280 LO(&t2) = 0;
284 281 x0 -= t0;
285 282 x2 -= t2;
286 283 z0 = x0 * x0;
287 284 z1 = x1 * x1;
288 285 z2 = x2 * x2;
289 286 t0 = z0 * ( qq1 + z0 * qq2 );
290 287 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
291 288 t2 = z2 * ( qq1 + z2 * qq2 );
292 289 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
293 290 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
294 291 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
295 292 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
296 293 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
297 294 xsb0 = ( xsb0 >> 30 ) & 2;
298 295 xsb2 = ( xsb2 >> 30 ) & 2;
299 296 a0 = __vlibm_TBL_sincos_hi[j0+1];
300 297 a2 = __vlibm_TBL_sincos_hi[j2+1];
301 298 t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
302 299 t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 );
303 300 *py0 = a0 + t0;
304 301 *py1 = one + t1;
305 302 *py2 = a2 + t2;
306 303 break;
307 304
308 305 case 3:
309 306 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
310 307 HI(&t2) = j2;
311 308 LO(&t2) = 0;
312 309 x2 -= t2;
313 310 z0 = x0 * x0;
314 311 z1 = x1 * x1;
315 312 z2 = x2 * x2;
316 313 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
317 314 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
318 315 t2 = z2 * ( qq1 + z2 * qq2 );
319 316 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
320 317 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
321 318 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
322 319 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
323 320 xsb2 = ( xsb2 >> 30 ) & 2;
324 321 a2 = __vlibm_TBL_sincos_hi[j2+1];
325 322 t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 );
326 323 *py0 = one + t0;
327 324 *py1 = one + t1;
328 325 *py2 = a2 + t2;
329 326 break;
330 327
331 328 case 4:
332 329 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
333 330 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
334 331 HI(&t0) = j0;
335 332 HI(&t1) = j1;
336 333 LO(&t0) = 0;
337 334 LO(&t1) = 0;
338 335 x0 -= t0;
339 336 x1 -= t1;
340 337 z0 = x0 * x0;
341 338 z1 = x1 * x1;
342 339 z2 = x2 * x2;
343 340 t0 = z0 * ( qq1 + z0 * qq2 );
344 341 t1 = z1 * ( qq1 + z1 * qq2 );
345 342 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
346 343 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
347 344 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
348 345 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
349 346 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
350 347 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
351 348 xsb0 = ( xsb0 >> 30 ) & 2;
352 349 xsb1 = ( xsb1 >> 30 ) & 2;
353 350 a0 = __vlibm_TBL_sincos_hi[j0+1];
354 351 a1 = __vlibm_TBL_sincos_hi[j1+1];
355 352 t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
356 353 t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
357 354 *py0 = a0 + t0;
358 355 *py1 = a1 + t1;
359 356 *py2 = one + t2;
360 357 break;
361 358
362 359 case 5:
363 360 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
364 361 HI(&t1) = j1;
365 362 LO(&t1) = 0;
366 363 x1 -= t1;
367 364 z0 = x0 * x0;
368 365 z1 = x1 * x1;
369 366 z2 = x2 * x2;
370 367 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
371 368 t1 = z1 * ( qq1 + z1 * qq2 );
372 369 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
373 370 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
374 371 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
375 372 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
376 373 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
377 374 xsb1 = ( xsb1 >> 30 ) & 2;
378 375 a1 = __vlibm_TBL_sincos_hi[j1+1];
379 376 t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
380 377 *py0 = one + t0;
381 378 *py1 = a1 + t1;
382 379 *py2 = one + t2;
383 380 break;
384 381
385 382 case 6:
386 383 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
387 384 HI(&t0) = j0;
388 385 LO(&t0) = 0;
389 386 x0 -= t0;
390 387 z0 = x0 * x0;
391 388 z1 = x1 * x1;
392 389 z2 = x2 * x2;
393 390 t0 = z0 * ( qq1 + z0 * qq2 );
394 391 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
395 392 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
396 393 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
397 394 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
398 395 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
399 396 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
400 397 xsb0 = ( xsb0 >> 30 ) & 2;
401 398 a0 = __vlibm_TBL_sincos_hi[j0+1];
402 399 t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
403 400 *py0 = a0 + t0;
404 401 *py1 = one + t1;
405 402 *py2 = one + t2;
406 403 break;
407 404
408 405 case 7: /* All are < 5/32 */
409 406 z0 = x0 * x0;
410 407 z1 = x1 * x1;
411 408 z2 = x2 * x2;
412 409 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
413 410 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
414 411 t2 = z2 * ( poly3[1] + z2 * poly4[1] );
415 412 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
416 413 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
417 414 t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
418 415 *py0 = one + t0;
419 416 *py1 = one + t1;
420 417 *py2 = one + t2;
421 418 break;
422 419 }
423 420
424 421 x += stridex;
425 422 y += stridey;
426 423 i = 0;
427 424 } while ( --n > 0 ); /* END MAIN LOOP */
428 425
429 426 /*
430 427 * CLEAN UP last 0, 1, or 2 elts.
431 428 */
432 429 if ( i > 0 ) /* Clean up elts at tail. i < 3. */
433 430 {
434 431 double a0, a1, w0, w1;
435 432 double t0, t1, z0, z1;
436 433 unsigned j0, j1;
437 434
438 435 if ( i > 1 )
439 436 {
440 437 if ( hx1 < 0x3fc40000 )
441 438 {
442 439 z1 = x1 * x1;
443 440 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
444 441 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
445 442 t1 = one + t1;
446 443 *py1 = t1;
447 444 }
448 445 else
449 446 {
450 447 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
451 448 HI(&t1) = j1;
452 449 LO(&t1) = 0;
453 450 x1 -= t1;
454 451 z1 = x1 * x1;
455 452 t1 = z1 * ( qq1 + z1 * qq2 );
456 453 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
457 454 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
458 455 xsb1 = ( xsb1 >> 30 ) & 2;
459 456 a1 = __vlibm_TBL_sincos_hi[j1+1];
460 457 t1 = __vlibm_TBL_sincos_lo[j1+1]
461 458 - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
462 459 *py1 = a1 + t1;
463 460 }
464 461 }
465 462 if ( hx0 < 0x3fc40000 )
466 463 {
467 464 z0 = x0 * x0;
468 465 t0 = z0 * ( poly3[1] + z0 * poly4[1] );
469 466 t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
470 467 t0 = one + t0;
471 468 *py0 = t0;
472 469 }
473 470 else
474 471 {
475 472 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
476 473 HI(&t0) = j0;
477 474 LO(&t0) = 0;
478 475 x0 -= t0;
479 476 z0 = x0 * x0;
480 477 t0 = z0 * ( qq1 + z0 * qq2 );
481 478 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
482 479 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
483 480 xsb0 = ( xsb0 >> 30 ) & 2;
484 481 a0 = __vlibm_TBL_sincos_hi[j0+1];
485 482 t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
486 483 *py0 = a0 + t0;
487 484 }
488 485 } /* END CLEAN UP */
489 486
490 487 return;
491 488
492 489 /*
493 490 * Take care of BIGUNS.
494 491 *
495 492 * We have jumped here in the middle of processing after having
496 493 * encountered a medium range argument. Therefore things are in a
497 494 * bit of a tizzy.
498 495 */
499 496
500 497 MEDIUM:
501 498
502 499 x0_or_one[1] = 1.0;
503 500 x1_or_one[1] = 1.0;
504 501 x2_or_one[1] = 1.0;
505 502 x0_or_one[3] = -1.0;
506 503 x1_or_one[3] = -1.0;
507 504 x2_or_one[3] = -1.0;
508 505 y0_or_zero[1] = 0.0;
509 506 y1_or_zero[1] = 0.0;
510 507 y2_or_zero[1] = 0.0;
511 508 y0_or_zero[3] = 0.0;
512 509 y1_or_zero[3] = 0.0;
513 510 y2_or_zero[3] = 0.0;
514 511
515 512 if ( biguns == 3 )
516 513 {
517 514 biguns = 0;
518 515 xsb0 = xsb0 >> 31;
519 516 xsb1 = xsb1 >> 31;
520 517 goto loop2;
521 518 }
522 519 else if ( biguns == 2 )
523 520 {
524 521 xsb0 = xsb0 >> 31;
525 522 biguns = 0;
526 523 goto loop1;
527 524 }
528 525 biguns = 0;
529 526
530 527 do
531 528 {
532 529 double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
533 530 unsigned hx;
534 531 int n0, n1, n2;
535 532
536 533 /*
537 534 * Find 3 more to work on: Not already done, not too big.
538 535 */
539 536
540 537 loop0:
541 538 hx = HI(x);
542 539 xsb0 = hx >> 31;
543 540 hx &= ~0x80000000;
544 541 if ( hx > 0x413921fb ) /* (1.6471e+06) Too big: leave it. */
545 542 {
546 543 if ( hx >= 0x7ff00000 ) /* Inf or NaN */
547 544 {
548 545 x0 = *x;
549 546 *y = x0 - x0;
550 547 }
551 548 else
552 549 biguns = 1;
553 550 x += stridex;
554 551 y += stridey;
555 552 i = 0;
556 553 if ( --n <= 0 )
557 554 break;
558 555 goto loop0;
559 556 }
560 557 x0 = *x;
561 558 py0 = y;
562 559 x += stridex;
563 560 y += stridey;
564 561 i = 1;
565 562 if ( --n <= 0 )
566 563 break;
567 564
568 565 loop1:
569 566 hx = HI(x);
570 567 xsb1 = hx >> 31;
571 568 hx &= ~0x80000000;
572 569 if ( hx > 0x413921fb )
573 570 {
574 571 if ( hx >= 0x7ff00000 )
575 572 {
576 573 x1 = *x;
577 574 *y = x1 - x1;
578 575 }
579 576 else
580 577 biguns = 1;
581 578 x += stridex;
582 579 y += stridey;
583 580 i = 1;
584 581 if ( --n <= 0 )
585 582 break;
586 583 goto loop1;
587 584 }
588 585 x1 = *x;
589 586 py1 = y;
590 587 x += stridex;
591 588 y += stridey;
592 589 i = 2;
593 590 if ( --n <= 0 )
594 591 break;
595 592
596 593 loop2:
597 594 hx = HI(x);
598 595 xsb2 = hx >> 31;
599 596 hx &= ~0x80000000;
600 597 if ( hx > 0x413921fb )
601 598 {
602 599 if ( hx >= 0x7ff00000 )
603 600 {
604 601 x2 = *x;
605 602 *y = x2 - x2;
606 603 }
607 604 else
608 605 biguns = 1;
609 606 x += stridex;
610 607 y += stridey;
611 608 i = 2;
612 609 if ( --n <= 0 )
613 610 break;
614 611 goto loop2;
615 612 }
616 613 x2 = *x;
617 614 py2 = y;
618 615
619 616 n0 = (int) ( x0 * invpio2 + half[xsb0] );
620 617 n1 = (int) ( x1 * invpio2 + half[xsb1] );
621 618 n2 = (int) ( x2 * invpio2 + half[xsb2] );
622 619 fn0 = (double) n0;
623 620 fn1 = (double) n1;
624 621 fn2 = (double) n2;
625 622 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
626 623 n1 = (n1 + 1) & 3;
627 624 n2 = (n2 + 1) & 3;
628 625 a0 = x0 - fn0 * pio2_1;
629 626 a1 = x1 - fn1 * pio2_1;
630 627 a2 = x2 - fn2 * pio2_1;
631 628 w0 = fn0 * pio2_2;
632 629 w1 = fn1 * pio2_2;
633 630 w2 = fn2 * pio2_2;
634 631 x0 = a0 - w0;
635 632 x1 = a1 - w1;
636 633 x2 = a2 - w2;
637 634 y0 = ( a0 - x0 ) - w0;
638 635 y1 = ( a1 - x1 ) - w1;
639 636 y2 = ( a2 - x2 ) - w2;
640 637 a0 = x0;
641 638 a1 = x1;
642 639 a2 = x2;
643 640 w0 = fn0 * pio2_3 - y0;
644 641 w1 = fn1 * pio2_3 - y1;
645 642 w2 = fn2 * pio2_3 - y2;
646 643 x0 = a0 - w0;
647 644 x1 = a1 - w1;
648 645 x2 = a2 - w2;
649 646 y0 = ( a0 - x0 ) - w0;
650 647 y1 = ( a1 - x1 ) - w1;
651 648 y2 = ( a2 - x2 ) - w2;
652 649 a0 = x0;
653 650 a1 = x1;
654 651 a2 = x2;
655 652 w0 = fn0 * pio2_3t - y0;
656 653 w1 = fn1 * pio2_3t - y1;
657 654 w2 = fn2 * pio2_3t - y2;
658 655 x0 = a0 - w0;
659 656 x1 = a1 - w1;
660 657 x2 = a2 - w2;
661 658 y0 = ( a0 - x0 ) - w0;
662 659 y1 = ( a1 - x1 ) - w1;
663 660 y2 = ( a2 - x2 ) - w2;
664 661 xsb0 = HI(&x0);
665 662 i = ( ( xsb0 & ~0x80000000 ) - thresh[n0&1] ) >> 31;
666 663 xsb1 = HI(&x1);
667 664 i |= ( ( ( xsb1 & ~0x80000000 ) - thresh[n1&1] ) >> 30 ) & 2;
668 665 xsb2 = HI(&x2);
669 666 i |= ( ( ( xsb2 & ~0x80000000 ) - thresh[n2&1] ) >> 29 ) & 4;
670 667 switch ( i )
671 668 {
672 669 double t0, t1, t2, z0, z1, z2;
673 670 unsigned j0, j1, j2;
674 671
675 672 case 0:
676 673 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
677 674 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
678 675 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
679 676 HI(&t0) = j0;
680 677 HI(&t1) = j1;
681 678 HI(&t2) = j2;
682 679 LO(&t0) = 0;
683 680 LO(&t1) = 0;
684 681 LO(&t2) = 0;
685 682 x0 = ( x0 - t0 ) + y0;
686 683 x1 = ( x1 - t1 ) + y1;
687 684 x2 = ( x2 - t2 ) + y2;
688 685 z0 = x0 * x0;
689 686 z1 = x1 * x1;
690 687 z2 = x2 * x2;
691 688 t0 = z0 * ( qq1 + z0 * qq2 );
692 689 t1 = z1 * ( qq1 + z1 * qq2 );
693 690 t2 = z2 * ( qq1 + z2 * qq2 );
694 691 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
695 692 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
696 693 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
697 694 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
698 695 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
699 696 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
700 697 xsb0 = ( xsb0 >> 30 ) & 2;
701 698 xsb1 = ( xsb1 >> 30 ) & 2;
702 699 xsb2 = ( xsb2 >> 30 ) & 2;
703 700 n0 ^= ( xsb0 & ~( n0 << 1 ) );
704 701 n1 ^= ( xsb1 & ~( n1 << 1 ) );
705 702 n2 ^= ( xsb2 & ~( n2 << 1 ) );
706 703 xsb0 |= 1;
707 704 xsb1 |= 1;
708 705 xsb2 |= 1;
709 706 a0 = __vlibm_TBL_sincos_hi[j0+n0];
710 707 a1 = __vlibm_TBL_sincos_hi[j1+n1];
711 708 a2 = __vlibm_TBL_sincos_hi[j2+n2];
712 709 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
713 710 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
714 711 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
715 712 *py0 = ( a0 + t0 );
716 713 *py1 = ( a1 + t1 );
717 714 *py2 = ( a2 + t2 );
718 715 break;
719 716
720 717 case 1:
721 718 j0 = n0 & 1;
722 719 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
723 720 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
724 721 HI(&t1) = j1;
725 722 HI(&t2) = j2;
726 723 LO(&t1) = 0;
727 724 LO(&t2) = 0;
728 725 x0_or_one[0] = x0;
729 726 x0_or_one[2] = -x0;
730 727 y0_or_zero[0] = y0;
731 728 y0_or_zero[2] = -y0;
732 729 x1 = ( x1 - t1 ) + y1;
733 730 x2 = ( x2 - t2 ) + y2;
734 731 z0 = x0 * x0;
735 732 z1 = x1 * x1;
736 733 z2 = x2 * x2;
737 734 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
738 735 t1 = z1 * ( qq1 + z1 * qq2 );
739 736 t2 = z2 * ( qq1 + z2 * qq2 );
740 737 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
741 738 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
742 739 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
743 740 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
744 741 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
745 742 xsb1 = ( xsb1 >> 30 ) & 2;
746 743 xsb2 = ( xsb2 >> 30 ) & 2;
747 744 n1 ^= ( xsb1 & ~( n1 << 1 ) );
748 745 n2 ^= ( xsb2 & ~( n2 << 1 ) );
749 746 xsb1 |= 1;
750 747 xsb2 |= 1;
751 748 a1 = __vlibm_TBL_sincos_hi[j1+n1];
752 749 a2 = __vlibm_TBL_sincos_hi[j2+n2];
753 750 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
754 751 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
755 752 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
756 753 *py0 = t0;
757 754 *py1 = ( a1 + t1 );
758 755 *py2 = ( a2 + t2 );
759 756 break;
760 757
761 758 case 2:
762 759 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
763 760 j1 = n1 & 1;
764 761 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
765 762 HI(&t0) = j0;
766 763 HI(&t2) = j2;
767 764 LO(&t0) = 0;
768 765 LO(&t2) = 0;
769 766 x1_or_one[0] = x1;
770 767 x1_or_one[2] = -x1;
771 768 x0 = ( x0 - t0 ) + y0;
772 769 y1_or_zero[0] = y1;
773 770 y1_or_zero[2] = -y1;
774 771 x2 = ( x2 - t2 ) + y2;
775 772 z0 = x0 * x0;
776 773 z1 = x1 * x1;
777 774 z2 = x2 * x2;
778 775 t0 = z0 * ( qq1 + z0 * qq2 );
779 776 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
780 777 t2 = z2 * ( qq1 + z2 * qq2 );
781 778 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
782 779 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
783 780 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
784 781 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
785 782 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
786 783 xsb0 = ( xsb0 >> 30 ) & 2;
787 784 xsb2 = ( xsb2 >> 30 ) & 2;
788 785 n0 ^= ( xsb0 & ~( n0 << 1 ) );
789 786 n2 ^= ( xsb2 & ~( n2 << 1 ) );
790 787 xsb0 |= 1;
791 788 xsb2 |= 1;
792 789 a0 = __vlibm_TBL_sincos_hi[j0+n0];
793 790 a2 = __vlibm_TBL_sincos_hi[j2+n2];
794 791 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
795 792 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
796 793 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
797 794 *py0 = ( a0 + t0 );
798 795 *py1 = t1;
799 796 *py2 = ( a2 + t2 );
800 797 break;
801 798
802 799 case 3:
803 800 j0 = n0 & 1;
804 801 j1 = n1 & 1;
805 802 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
806 803 HI(&t2) = j2;
807 804 LO(&t2) = 0;
808 805 x0_or_one[0] = x0;
809 806 x0_or_one[2] = -x0;
810 807 x1_or_one[0] = x1;
811 808 x1_or_one[2] = -x1;
812 809 y0_or_zero[0] = y0;
813 810 y0_or_zero[2] = -y0;
814 811 y1_or_zero[0] = y1;
815 812 y1_or_zero[2] = -y1;
816 813 x2 = ( x2 - t2 ) + y2;
817 814 z0 = x0 * x0;
818 815 z1 = x1 * x1;
819 816 z2 = x2 * x2;
820 817 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
821 818 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
822 819 t2 = z2 * ( qq1 + z2 * qq2 );
823 820 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
824 821 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
825 822 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
826 823 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
827 824 xsb2 = ( xsb2 >> 30 ) & 2;
828 825 n2 ^= ( xsb2 & ~( n2 << 1 ) );
829 826 xsb2 |= 1;
830 827 a2 = __vlibm_TBL_sincos_hi[j2+n2];
831 828 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
832 829 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
833 830 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
834 831 *py0 = t0;
835 832 *py1 = t1;
836 833 *py2 = ( a2 + t2 );
837 834 break;
838 835
839 836 case 4:
840 837 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
841 838 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
842 839 j2 = n2 & 1;
843 840 HI(&t0) = j0;
844 841 HI(&t1) = j1;
845 842 LO(&t0) = 0;
846 843 LO(&t1) = 0;
847 844 x2_or_one[0] = x2;
848 845 x2_or_one[2] = -x2;
849 846 x0 = ( x0 - t0 ) + y0;
850 847 x1 = ( x1 - t1 ) + y1;
851 848 y2_or_zero[0] = y2;
852 849 y2_or_zero[2] = -y2;
853 850 z0 = x0 * x0;
854 851 z1 = x1 * x1;
855 852 z2 = x2 * x2;
856 853 t0 = z0 * ( qq1 + z0 * qq2 );
857 854 t1 = z1 * ( qq1 + z1 * qq2 );
858 855 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
859 856 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
860 857 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
861 858 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
862 859 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
863 860 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
864 861 xsb0 = ( xsb0 >> 30 ) & 2;
865 862 xsb1 = ( xsb1 >> 30 ) & 2;
866 863 n0 ^= ( xsb0 & ~( n0 << 1 ) );
867 864 n1 ^= ( xsb1 & ~( n1 << 1 ) );
868 865 xsb0 |= 1;
869 866 xsb1 |= 1;
870 867 a0 = __vlibm_TBL_sincos_hi[j0+n0];
871 868 a1 = __vlibm_TBL_sincos_hi[j1+n1];
872 869 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
873 870 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
874 871 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
875 872 *py0 = ( a0 + t0 );
876 873 *py1 = ( a1 + t1 );
877 874 *py2 = t2;
878 875 break;
879 876
880 877 case 5:
881 878 j0 = n0 & 1;
882 879 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
883 880 j2 = n2 & 1;
884 881 HI(&t1) = j1;
885 882 LO(&t1) = 0;
886 883 x0_or_one[0] = x0;
887 884 x0_or_one[2] = -x0;
888 885 x2_or_one[0] = x2;
889 886 x2_or_one[2] = -x2;
890 887 y0_or_zero[0] = y0;
891 888 y0_or_zero[2] = -y0;
892 889 x1 = ( x1 - t1 ) + y1;
893 890 y2_or_zero[0] = y2;
894 891 y2_or_zero[2] = -y2;
895 892 z0 = x0 * x0;
896 893 z1 = x1 * x1;
897 894 z2 = x2 * x2;
898 895 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
899 896 t1 = z1 * ( qq1 + z1 * qq2 );
900 897 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
901 898 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
902 899 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
903 900 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
904 901 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
905 902 xsb1 = ( xsb1 >> 30 ) & 2;
906 903 n1 ^= ( xsb1 & ~( n1 << 1 ) );
907 904 xsb1 |= 1;
908 905 a1 = __vlibm_TBL_sincos_hi[j1+n1];
909 906 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
910 907 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
911 908 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
912 909 *py0 = t0;
913 910 *py1 = ( a1 + t1 );
914 911 *py2 = t2;
915 912 break;
916 913
917 914 case 6:
918 915 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
919 916 j1 = n1 & 1;
920 917 j2 = n2 & 1;
921 918 HI(&t0) = j0;
922 919 LO(&t0) = 0;
923 920 x1_or_one[0] = x1;
924 921 x1_or_one[2] = -x1;
925 922 x2_or_one[0] = x2;
926 923 x2_or_one[2] = -x2;
927 924 x0 = ( x0 - t0 ) + y0;
928 925 y1_or_zero[0] = y1;
929 926 y1_or_zero[2] = -y1;
930 927 y2_or_zero[0] = y2;
931 928 y2_or_zero[2] = -y2;
932 929 z0 = x0 * x0;
933 930 z1 = x1 * x1;
934 931 z2 = x2 * x2;
935 932 t0 = z0 * ( qq1 + z0 * qq2 );
936 933 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
937 934 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
938 935 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
939 936 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
940 937 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
941 938 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
942 939 xsb0 = ( xsb0 >> 30 ) & 2;
943 940 n0 ^= ( xsb0 & ~( n0 << 1 ) );
944 941 xsb0 |= 1;
945 942 a0 = __vlibm_TBL_sincos_hi[j0+n0];
946 943 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
947 944 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
948 945 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
949 946 *py0 = ( a0 + t0 );
950 947 *py1 = t1;
951 948 *py2 = t2;
952 949 break;
953 950
954 951 case 7:
955 952 j0 = n0 & 1;
956 953 j1 = n1 & 1;
957 954 j2 = n2 & 1;
958 955 x0_or_one[0] = x0;
959 956 x0_or_one[2] = -x0;
960 957 x1_or_one[0] = x1;
961 958 x1_or_one[2] = -x1;
962 959 x2_or_one[0] = x2;
963 960 x2_or_one[2] = -x2;
964 961 y0_or_zero[0] = y0;
965 962 y0_or_zero[2] = -y0;
966 963 y1_or_zero[0] = y1;
967 964 y1_or_zero[2] = -y1;
968 965 y2_or_zero[0] = y2;
969 966 y2_or_zero[2] = -y2;
970 967 z0 = x0 * x0;
971 968 z1 = x1 * x1;
972 969 z2 = x2 * x2;
973 970 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
974 971 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
975 972 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
976 973 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
977 974 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
978 975 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
979 976 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
980 977 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
981 978 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
982 979 *py0 = t0;
983 980 *py1 = t1;
984 981 *py2 = t2;
985 982 break;
986 983 }
987 984
988 985 x += stridex;
989 986 y += stridey;
990 987 i = 0;
991 988 } while ( --n > 0 );
992 989
993 990 if ( i > 0 )
994 991 {
995 992 double fn0, fn1, a0, a1, w0, w1, y0, y1;
996 993 double t0, t1, z0, z1;
997 994 unsigned j0, j1;
998 995 int n0, n1;
999 996
1000 997 if ( i > 1 )
1001 998 {
1002 999 n1 = (int) ( x1 * invpio2 + half[xsb1] );
1003 1000 fn1 = (double) n1;
1004 1001 n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1005 1002 a1 = x1 - fn1 * pio2_1;
1006 1003 w1 = fn1 * pio2_2;
1007 1004 x1 = a1 - w1;
1008 1005 y1 = ( a1 - x1 ) - w1;
1009 1006 a1 = x1;
1010 1007 w1 = fn1 * pio2_3 - y1;
1011 1008 x1 = a1 - w1;
1012 1009 y1 = ( a1 - x1 ) - w1;
1013 1010 a1 = x1;
1014 1011 w1 = fn1 * pio2_3t - y1;
1015 1012 x1 = a1 - w1;
1016 1013 y1 = ( a1 - x1 ) - w1;
1017 1014 xsb1 = HI(&x1);
1018 1015 if ( ( xsb1 & ~0x80000000 ) < thresh[n1&1] )
1019 1016 {
1020 1017 j1 = n1 & 1;
1021 1018 x1_or_one[0] = x1;
1022 1019 x1_or_one[2] = -x1;
1023 1020 y1_or_zero[0] = y1;
1024 1021 y1_or_zero[2] = -y1;
1025 1022 z1 = x1 * x1;
1026 1023 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1027 1024 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1028 1025 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1029 1026 *py1 = t1;
1030 1027 }
1031 1028 else
1032 1029 {
1033 1030 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1034 1031 HI(&t1) = j1;
1035 1032 LO(&t1) = 0;
1036 1033 x1 = ( x1 - t1 ) + y1;
1037 1034 z1 = x1 * x1;
1038 1035 t1 = z1 * ( qq1 + z1 * qq2 );
1039 1036 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1040 1037 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1041 1038 xsb1 = ( xsb1 >> 30 ) & 2;
1042 1039 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1043 1040 xsb1 |= 1;
1044 1041 a1 = __vlibm_TBL_sincos_hi[j1+n1];
1045 1042 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
1046 1043 *py1 = ( a1 + t1 );
1047 1044 }
1048 1045 }
1049 1046 n0 = (int) ( x0 * invpio2 + half[xsb0] );
1050 1047 fn0 = (double) n0;
1051 1048 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1052 1049 a0 = x0 - fn0 * pio2_1;
1053 1050 w0 = fn0 * pio2_2;
1054 1051 x0 = a0 - w0;
1055 1052 y0 = ( a0 - x0 ) - w0;
1056 1053 a0 = x0;
1057 1054 w0 = fn0 * pio2_3 - y0;
1058 1055 x0 = a0 - w0;
1059 1056 y0 = ( a0 - x0 ) - w0;
1060 1057 a0 = x0;
1061 1058 w0 = fn0 * pio2_3t - y0;
1062 1059 x0 = a0 - w0;
1063 1060 y0 = ( a0 - x0 ) - w0;
1064 1061 xsb0 = HI(&x0);
1065 1062 if ( ( xsb0 & ~0x80000000 ) < thresh[n0&1] )
1066 1063 {
1067 1064 j0 = n0 & 1;
1068 1065 x0_or_one[0] = x0;
1069 1066 x0_or_one[2] = -x0;
1070 1067 y0_or_zero[0] = y0;
1071 1068 y0_or_zero[2] = -y0;
1072 1069 z0 = x0 * x0;
1073 1070 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1074 1071 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1075 1072 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1076 1073 *py0 = t0;
1077 1074 }
1078 1075 else
1079 1076 {
1080 1077 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1081 1078 HI(&t0) = j0;
1082 1079 LO(&t0) = 0;
1083 1080 x0 = ( x0 - t0 ) + y0;
1084 1081 z0 = x0 * x0;
1085 1082 t0 = z0 * ( qq1 + z0 * qq2 );
1086 1083 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1087 1084 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1088 1085 xsb0 = ( xsb0 >> 30 ) & 2;
1089 1086 n0 ^= ( xsb0 & ~( n0 << 1 ) );
1090 1087 xsb0 |= 1;
1091 1088 a0 = __vlibm_TBL_sincos_hi[j0+n0];
1092 1089 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
1093 1090 *py0 = ( a0 + t0 );
1094 1091 }
1095 1092 }
1096 1093
1097 1094 if ( biguns )
1098 1095 __vlibm_vcos_big( nsave, xsave, sxsave, ysave, sysave, 0x413921fb );
1099 1096 }
↓ open down ↓ |
910 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX