Print this page
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libmvec/common/__vsin.c
+++ new/usr/src/lib/libmvec/common/__vsin.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
↓ open down ↓ |
20 lines elided |
↑ open up ↑ |
21 21
22 22 /*
23 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 27 * Use is subject to license terms.
28 28 */
29 29
30 30 #include <sys/isa_defs.h>
31 +#include <sys/ccompile.h>
31 32
32 33 #ifdef _LITTLE_ENDIAN
33 34 #define HI(x) *(1+(int*)x)
34 35 #define LO(x) *(unsigned*)x
35 36 #else
36 37 #define HI(x) *(int*)x
37 38 #define LO(x) *(1+(unsigned*)x)
38 39 #endif
39 40
40 41 #ifdef __RESTRICT
41 42 #define restrict _Restrict
42 43 #else
43 44 #define restrict
44 45 #endif
45 46
46 47 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
47 48
48 49 static const double
49 50 half[2] = { 0.5, -0.5 },
50 51 one = 1.0,
51 52 invpio2 = 0.636619772367581343075535,
52 53 pio2_1 = 1.570796326734125614166,
53 54 pio2_2 = 6.077100506303965976596e-11,
54 55 pio2_3 = 2.022266248711166455796e-21,
55 56 pio2_3t = 8.478427660368899643959e-32,
56 57 pp1 = -1.666666666605760465276263943134982554676e-0001,
57 58 pp2 = 8.333261209690963126718376566146180944442e-0003,
58 59 qq1 = -4.999999999977710986407023955908711557870e-0001,
59 60 qq2 = 4.166654863857219350645055881018842089580e-0002,
60 61 poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
61 62 -4.999999999999931701464060878888294524481e-0001 },
62 63 poly2[2]= { 8.333333332390951295683993455280336376663e-0003,
63 64 4.166666666394861917535640593963708222319e-0002 },
64 65 poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
65 66 -1.388888552656142867832756687736851681462e-0003 },
66 67 poly4[2]= { 2.753403624854277237649987622848330351110e-0006,
67 68 2.478519423681460796618128289454530524759e-0005 };
68 69
69 70 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
70 71
↓ open down ↓ |
30 lines elided |
↑ open up ↑ |
71 72 /* Don't __ the following; acomp will handle it */
72 73 extern double fabs( double );
73 74 extern void __vlibm_vsin_big( int, double *, int, double *, int, int );
74 75
75 76 void
76 77 __vsin( int n, double * restrict x, int stridex, double * restrict y,
77 78 int stridey )
78 79 {
79 80 double x0_or_one[4], x1_or_one[4], x2_or_one[4];
80 81 double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
81 - double x0, x1, x2, *py0, *py1, *py2, *xsave, *ysave;
82 - unsigned hx0, hx1, hx2, xsb0, xsb1, xsb2;
83 - int i, biguns, nsave, sxsave, sysave;
84 -
82 + double x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave;
83 + unsigned hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2;
84 + int i, biguns, nsave, sxsave, sysave;
85 85 nsave = n;
86 86 xsave = x;
87 87 sxsave = stridex;
88 88 ysave = y;
89 89 sysave = stridey;
90 90 biguns = 0;
91 91
92 92 do
93 93 {
94 94 LOOP0:
95 95 xsb0 = HI(x);
96 96 hx0 = xsb0 & ~0x80000000;
97 97 if ( hx0 > 0x3fe921fb )
98 98 {
99 99 biguns = 1;
100 100 goto MEDIUM;
101 101 }
102 102 if ( hx0 < 0x3e400000 )
103 103 {
104 - volatile int v = *x;
105 104 *y = *x;
106 105 x += stridex;
107 106 y += stridey;
108 107 i = 0;
109 108 if ( --n <= 0 )
110 109 break;
111 110 goto LOOP0;
112 111 }
113 112 x0 = *x;
114 113 py0 = y;
115 114 x += stridex;
116 115 y += stridey;
117 116 i = 1;
118 117 if ( --n <= 0 )
119 118 break;
120 119
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
121 120 LOOP1:
122 121 xsb1 = HI(x);
123 122 hx1 = xsb1 & ~0x80000000;
124 123 if ( hx1 > 0x3fe921fb )
125 124 {
126 125 biguns = 2;
127 126 goto MEDIUM;
128 127 }
129 128 if ( hx1 < 0x3e400000 )
130 129 {
131 - volatile int v = *x;
132 130 *y = *x;
133 131 x += stridex;
134 132 y += stridey;
135 133 i = 1;
136 134 if ( --n <= 0 )
137 135 break;
138 136 goto LOOP1;
139 137 }
140 138 x1 = *x;
141 139 py1 = y;
142 140 x += stridex;
143 141 y += stridey;
144 142 i = 2;
145 143 if ( --n <= 0 )
146 144 break;
147 145
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
148 146 LOOP2:
149 147 xsb2 = HI(x);
150 148 hx2 = xsb2 & ~0x80000000;
151 149 if ( hx2 > 0x3fe921fb )
152 150 {
153 151 biguns = 3;
154 152 goto MEDIUM;
155 153 }
156 154 if ( hx2 < 0x3e400000 )
157 155 {
158 - volatile int v = *x;
159 156 *y = *x;
160 157 x += stridex;
161 158 y += stridey;
162 159 i = 2;
163 160 if ( --n <= 0 )
164 161 break;
165 162 goto LOOP2;
166 163 }
167 164 x2 = *x;
168 165 py2 = y;
169 166
170 167 i = ( hx0 - 0x3fc90000 ) >> 31;
171 168 i |= ( ( hx1 - 0x3fc90000 ) >> 30 ) & 2;
172 169 i |= ( ( hx2 - 0x3fc90000 ) >> 29 ) & 4;
173 170 switch ( i )
174 171 {
175 172 double a0, a1, a2, w0, w1, w2;
176 173 double t0, t1, t2, z0, z1, z2;
177 174 unsigned j0, j1, j2;
178 175
179 176 case 0:
180 177 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
181 178 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
182 179 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
183 180 HI(&t0) = j0;
184 181 HI(&t1) = j1;
185 182 HI(&t2) = j2;
186 183 LO(&t0) = 0;
187 184 LO(&t1) = 0;
188 185 LO(&t2) = 0;
189 186 x0 -= t0;
190 187 x1 -= t1;
191 188 x2 -= t2;
192 189 z0 = x0 * x0;
193 190 z1 = x1 * x1;
194 191 z2 = x2 * x2;
195 192 t0 = z0 * ( qq1 + z0 * qq2 );
196 193 t1 = z1 * ( qq1 + z1 * qq2 );
197 194 t2 = z2 * ( qq1 + z2 * qq2 );
198 195 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
199 196 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
200 197 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
201 198 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
202 199 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
203 200 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
204 201 xsb0 = ( xsb0 >> 30 ) & 2;
205 202 xsb1 = ( xsb1 >> 30 ) & 2;
206 203 xsb2 = ( xsb2 >> 30 ) & 2;
207 204 a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
208 205 a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
209 206 a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
210 207 t0 = ( __vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+xsb0];
211 208 t1 = ( __vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+xsb1];
212 209 t2 = ( __vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+xsb2];
213 210 *py0 = a0 + t0;
214 211 *py1 = a1 + t1;
215 212 *py2 = a2 + t2;
216 213 break;
217 214
218 215 case 1:
219 216 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
220 217 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
221 218 HI(&t1) = j1;
222 219 HI(&t2) = j2;
223 220 LO(&t1) = 0;
224 221 LO(&t2) = 0;
225 222 x1 -= t1;
226 223 x2 -= t2;
227 224 z0 = x0 * x0;
228 225 z1 = x1 * x1;
229 226 z2 = x2 * x2;
230 227 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
231 228 t1 = z1 * ( qq1 + z1 * qq2 );
232 229 t2 = z2 * ( qq1 + z2 * qq2 );
233 230 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
234 231 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
235 232 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
236 233 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
237 234 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
238 235 xsb1 = ( xsb1 >> 30 ) & 2;
239 236 xsb2 = ( xsb2 >> 30 ) & 2;
240 237 a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
241 238 a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
242 239 t0 = x0 + x0 * t0;
243 240 t1 = ( __vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+xsb1];
244 241 t2 = ( __vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+xsb2];
245 242 *py0 = t0;
246 243 *py1 = a1 + t1;
247 244 *py2 = a2 + t2;
248 245 break;
249 246
250 247 case 2:
251 248 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
252 249 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
253 250 HI(&t0) = j0;
254 251 HI(&t2) = j2;
255 252 LO(&t0) = 0;
256 253 LO(&t2) = 0;
257 254 x0 -= t0;
258 255 x2 -= t2;
259 256 z0 = x0 * x0;
260 257 z1 = x1 * x1;
261 258 z2 = x2 * x2;
262 259 t0 = z0 * ( qq1 + z0 * qq2 );
263 260 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
264 261 t2 = z2 * ( qq1 + z2 * qq2 );
265 262 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
266 263 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
267 264 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
268 265 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
269 266 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
270 267 xsb0 = ( xsb0 >> 30 ) & 2;
271 268 xsb2 = ( xsb2 >> 30 ) & 2;
272 269 a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
273 270 a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
274 271 t0 = ( __vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+xsb0];
275 272 t1 = x1 + x1 * t1;
276 273 t2 = ( __vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+xsb2];
277 274 *py0 = a0 + t0;
278 275 *py1 = t1;
279 276 *py2 = a2 + t2;
280 277 break;
281 278
282 279 case 3:
283 280 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
284 281 HI(&t2) = j2;
285 282 LO(&t2) = 0;
286 283 x2 -= t2;
287 284 z0 = x0 * x0;
288 285 z1 = x1 * x1;
289 286 z2 = x2 * x2;
290 287 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
291 288 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
292 289 t2 = z2 * ( qq1 + z2 * qq2 );
293 290 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
294 291 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
295 292 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
296 293 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
297 294 xsb2 = ( xsb2 >> 30 ) & 2;
298 295 a2 = __vlibm_TBL_sincos_hi[j2+xsb2];
299 296 t0 = x0 + x0 * t0;
300 297 t1 = x1 + x1 * t1;
301 298 t2 = ( __vlibm_TBL_sincos_hi[j2+1] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+xsb2];
302 299 *py0 = t0;
303 300 *py1 = t1;
304 301 *py2 = a2 + t2;
305 302 break;
306 303
307 304 case 4:
308 305 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
309 306 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
310 307 HI(&t0) = j0;
311 308 HI(&t1) = j1;
312 309 LO(&t0) = 0;
313 310 LO(&t1) = 0;
314 311 x0 -= t0;
315 312 x1 -= t1;
316 313 z0 = x0 * x0;
317 314 z1 = x1 * x1;
318 315 z2 = x2 * x2;
319 316 t0 = z0 * ( qq1 + z0 * qq2 );
320 317 t1 = z1 * ( qq1 + z1 * qq2 );
321 318 t2 = z2 * ( poly3[0] + z2 * poly4[0] );
322 319 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
323 320 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
324 321 t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
325 322 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
326 323 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
327 324 xsb0 = ( xsb0 >> 30 ) & 2;
328 325 xsb1 = ( xsb1 >> 30 ) & 2;
329 326 a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
330 327 a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
331 328 t0 = ( __vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+xsb0];
332 329 t1 = ( __vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+xsb1];
333 330 t2 = x2 + x2 * t2;
334 331 *py0 = a0 + t0;
335 332 *py1 = a1 + t1;
336 333 *py2 = t2;
337 334 break;
338 335
339 336 case 5:
340 337 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
341 338 HI(&t1) = j1;
342 339 LO(&t1) = 0;
343 340 x1 -= t1;
344 341 z0 = x0 * x0;
345 342 z1 = x1 * x1;
346 343 z2 = x2 * x2;
347 344 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
348 345 t1 = z1 * ( qq1 + z1 * qq2 );
349 346 t2 = z2 * ( poly3[0] + z2 * poly4[0] );
350 347 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
351 348 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
352 349 t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
353 350 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
354 351 xsb1 = ( xsb1 >> 30 ) & 2;
355 352 a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
356 353 t0 = x0 + x0 * t0;
357 354 t1 = ( __vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+xsb1];
358 355 t2 = x2 + x2 * t2;
359 356 *py0 = t0;
360 357 *py1 = a1 + t1;
361 358 *py2 = t2;
362 359 break;
363 360
364 361 case 6:
365 362 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
366 363 HI(&t0) = j0;
367 364 LO(&t0) = 0;
368 365 x0 -= t0;
369 366 z0 = x0 * x0;
370 367 z1 = x1 * x1;
371 368 z2 = x2 * x2;
372 369 t0 = z0 * ( qq1 + z0 * qq2 );
373 370 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
374 371 t2 = z2 * ( poly3[0] + z2 * poly4[0] );
375 372 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
376 373 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
377 374 t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
378 375 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
379 376 xsb0 = ( xsb0 >> 30 ) & 2;
380 377 a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
381 378 t0 = ( __vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+xsb0];
382 379 t1 = x1 + x1 * t1;
383 380 t2 = x2 + x2 * t2;
384 381 *py0 = a0 + t0;
385 382 *py1 = t1;
386 383 *py2 = t2;
387 384 break;
388 385
389 386 case 7:
390 387 z0 = x0 * x0;
391 388 z1 = x1 * x1;
392 389 z2 = x2 * x2;
393 390 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
394 391 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
395 392 t2 = z2 * ( poly3[0] + z2 * poly4[0] );
396 393 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
397 394 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
398 395 t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
399 396 t0 = x0 + x0 * t0;
400 397 t1 = x1 + x1 * t1;
401 398 t2 = x2 + x2 * t2;
402 399 *py0 = t0;
403 400 *py1 = t1;
404 401 *py2 = t2;
405 402 break;
406 403 }
407 404
408 405 x += stridex;
409 406 y += stridey;
410 407 i = 0;
411 408 } while ( --n > 0 );
412 409
413 410 if ( i > 0 )
414 411 {
415 412 double a0, a1, w0, w1;
416 413 double t0, t1, z0, z1;
417 414 unsigned j0, j1;
418 415
419 416 if ( i > 1 )
420 417 {
421 418 if ( hx1 < 0x3fc90000 )
422 419 {
423 420 z1 = x1 * x1;
424 421 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
425 422 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
426 423 t1 = x1 + x1 * t1;
427 424 *py1 = t1;
428 425 }
429 426 else
430 427 {
431 428 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
432 429 HI(&t1) = j1;
433 430 LO(&t1) = 0;
434 431 x1 -= t1;
435 432 z1 = x1 * x1;
436 433 t1 = z1 * ( qq1 + z1 * qq2 );
437 434 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
438 435 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
439 436 xsb1 = ( xsb1 >> 30 ) & 2;
440 437 a1 = __vlibm_TBL_sincos_hi[j1+xsb1];
441 438 t1 = ( __vlibm_TBL_sincos_hi[j1+1] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+xsb1];
442 439 *py1 = a1 + t1;
443 440 }
444 441 }
445 442 if ( hx0 < 0x3fc90000 )
446 443 {
447 444 z0 = x0 * x0;
448 445 t0 = z0 * ( poly3[0] + z0 * poly4[0] );
449 446 t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
450 447 t0 = x0 + x0 * t0;
451 448 *py0 = t0;
452 449 }
453 450 else
454 451 {
455 452 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
456 453 HI(&t0) = j0;
457 454 LO(&t0) = 0;
458 455 x0 -= t0;
459 456 z0 = x0 * x0;
460 457 t0 = z0 * ( qq1 + z0 * qq2 );
461 458 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
462 459 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
463 460 xsb0 = ( xsb0 >> 30 ) & 2;
464 461 a0 = __vlibm_TBL_sincos_hi[j0+xsb0];
465 462 t0 = ( __vlibm_TBL_sincos_hi[j0+1] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+xsb0];
466 463 *py0 = a0 + t0;
467 464 }
468 465 }
469 466
470 467 return;
471 468
472 469 /*
473 470 * MEDIUM RANGE PROCESSING
474 471 * Jump here at first sign of medium range argument. We are a bit
475 472 * confused due to the jump.. fix up several variables and jump into
476 473 * the nth loop, same as was being processed above.
477 474 */
478 475
479 476 MEDIUM:
480 477
481 478 x0_or_one[1] = 1.0;
482 479 x1_or_one[1] = 1.0;
483 480 x2_or_one[1] = 1.0;
484 481 x0_or_one[3] = -1.0;
485 482 x1_or_one[3] = -1.0;
486 483 x2_or_one[3] = -1.0;
487 484 y0_or_zero[1] = 0.0;
488 485 y1_or_zero[1] = 0.0;
489 486 y2_or_zero[1] = 0.0;
490 487 y0_or_zero[3] = 0.0;
491 488 y1_or_zero[3] = 0.0;
492 489 y2_or_zero[3] = 0.0;
493 490
494 491 if ( biguns == 3 )
495 492 {
496 493 biguns = 0;
497 494 xsb0 = xsb0 >> 31;
498 495 xsb1 = xsb1 >> 31;
499 496 goto loop2;
500 497 }
501 498 else if ( biguns == 2 )
502 499 {
↓ open down ↓ |
334 lines elided |
↑ open up ↑ |
503 500 xsb0 = xsb0 >> 31;
504 501 biguns = 0;
505 502 goto loop1;
506 503 }
507 504 biguns = 0;
508 505
509 506 do
510 507 {
511 508 double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
512 509 unsigned hx;
513 - int n0, n1, n2;
510 + int n0, n1, n2;
514 511
515 512 loop0:
516 513 hx = HI(x);
517 514 xsb0 = hx >> 31;
518 515 hx &= ~0x80000000;
519 516 if ( hx < 0x3e400000 )
520 517 {
521 - volatile int v = *x;
522 518 *y = *x;
523 519 x += stridex;
524 520 y += stridey;
525 521 i = 0;
526 522 if ( --n <= 0 )
527 523 break;
528 524 goto loop0;
529 525 }
530 526 if ( hx > 0x413921fb )
531 527 {
532 528 if ( hx >= 0x7ff00000 )
533 529 {
534 530 x0 = *x;
535 531 *y = x0 - x0;
536 532 }
537 533 else
538 534 biguns = 1;
539 535 x += stridex;
540 536 y += stridey;
541 537 i = 0;
542 538 if ( --n <= 0 )
543 539 break;
544 540 goto loop0;
545 541 }
546 542 x0 = *x;
547 543 py0 = y;
548 544 x += stridex;
549 545 y += stridey;
↓ open down ↓ |
18 lines elided |
↑ open up ↑ |
550 546 i = 1;
551 547 if ( --n <= 0 )
552 548 break;
553 549
554 550 loop1:
555 551 hx = HI(x);
556 552 xsb1 = hx >> 31;
557 553 hx &= ~0x80000000;
558 554 if ( hx < 0x3e400000 )
559 555 {
560 - volatile int v = *x;
561 556 *y = *x;
562 557 x += stridex;
563 558 y += stridey;
564 559 i = 1;
565 560 if ( --n <= 0 )
566 561 break;
567 562 goto loop1;
568 563 }
569 564 if ( hx > 0x413921fb )
570 565 {
571 566 if ( hx >= 0x7ff00000 )
572 567 {
573 568 x1 = *x;
574 569 *y = x1 - x1;
575 570 }
576 571 else
577 572 biguns = 1;
578 573 x += stridex;
579 574 y += stridey;
580 575 i = 1;
581 576 if ( --n <= 0 )
582 577 break;
583 578 goto loop1;
584 579 }
585 580 x1 = *x;
586 581 py1 = y;
587 582 x += stridex;
588 583 y += stridey;
↓ open down ↓ |
18 lines elided |
↑ open up ↑ |
589 584 i = 2;
590 585 if ( --n <= 0 )
591 586 break;
592 587
593 588 loop2:
594 589 hx = HI(x);
595 590 xsb2 = hx >> 31;
596 591 hx &= ~0x80000000;
597 592 if ( hx < 0x3e400000 )
598 593 {
599 - volatile int v = *x;
600 594 *y = *x;
601 595 x += stridex;
602 596 y += stridey;
603 597 i = 2;
604 598 if ( --n <= 0 )
605 599 break;
606 600 goto loop2;
607 601 }
608 602 if ( hx > 0x413921fb )
609 603 {
610 604 if ( hx >= 0x7ff00000 )
611 605 {
612 606 x2 = *x;
613 607 *y = x2 - x2;
614 608 }
615 609 else
616 610 biguns = 1;
617 611 x += stridex;
618 612 y += stridey;
619 613 i = 2;
620 614 if ( --n <= 0 )
621 615 break;
622 616 goto loop2;
623 617 }
624 618 x2 = *x;
625 619 py2 = y;
626 620
627 621 n0 = (int) ( x0 * invpio2 + half[xsb0] );
628 622 n1 = (int) ( x1 * invpio2 + half[xsb1] );
629 623 n2 = (int) ( x2 * invpio2 + half[xsb2] );
630 624 fn0 = (double) n0;
631 625 fn1 = (double) n1;
632 626 fn2 = (double) n2;
633 627 n0 &= 3;
634 628 n1 &= 3;
635 629 n2 &= 3;
636 630 a0 = x0 - fn0 * pio2_1;
637 631 a1 = x1 - fn1 * pio2_1;
638 632 a2 = x2 - fn2 * pio2_1;
639 633 w0 = fn0 * pio2_2;
640 634 w1 = fn1 * pio2_2;
641 635 w2 = fn2 * pio2_2;
642 636 x0 = a0 - w0;
643 637 x1 = a1 - w1;
644 638 x2 = a2 - w2;
645 639 y0 = ( a0 - x0 ) - w0;
646 640 y1 = ( a1 - x1 ) - w1;
647 641 y2 = ( a2 - x2 ) - w2;
648 642 a0 = x0;
649 643 a1 = x1;
650 644 a2 = x2;
651 645 w0 = fn0 * pio2_3 - y0;
652 646 w1 = fn1 * pio2_3 - y1;
653 647 w2 = fn2 * pio2_3 - y2;
654 648 x0 = a0 - w0;
655 649 x1 = a1 - w1;
656 650 x2 = a2 - w2;
657 651 y0 = ( a0 - x0 ) - w0;
658 652 y1 = ( a1 - x1 ) - w1;
659 653 y2 = ( a2 - x2 ) - w2;
660 654 a0 = x0;
661 655 a1 = x1;
662 656 a2 = x2;
663 657 w0 = fn0 * pio2_3t - y0;
664 658 w1 = fn1 * pio2_3t - y1;
665 659 w2 = fn2 * pio2_3t - y2;
666 660 x0 = a0 - w0;
667 661 x1 = a1 - w1;
668 662 x2 = a2 - w2;
669 663 y0 = ( a0 - x0 ) - w0;
670 664 y1 = ( a1 - x1 ) - w1;
671 665 y2 = ( a2 - x2 ) - w2;
672 666 xsb0 = HI(&x0);
673 667 i = ( ( xsb0 & ~0x80000000 ) - thresh[n0&1] ) >> 31;
674 668 xsb1 = HI(&x1);
675 669 i |= ( ( ( xsb1 & ~0x80000000 ) - thresh[n1&1] ) >> 30 ) & 2;
676 670 xsb2 = HI(&x2);
677 671 i |= ( ( ( xsb2 & ~0x80000000 ) - thresh[n2&1] ) >> 29 ) & 4;
678 672 switch ( i )
679 673 {
680 674 double t0, t1, t2, z0, z1, z2;
681 675 unsigned j0, j1, j2;
682 676
683 677 case 0:
684 678 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
685 679 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
686 680 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
687 681 HI(&t0) = j0;
688 682 HI(&t1) = j1;
689 683 HI(&t2) = j2;
690 684 LO(&t0) = 0;
691 685 LO(&t1) = 0;
692 686 LO(&t2) = 0;
693 687 x0 = ( x0 - t0 ) + y0;
694 688 x1 = ( x1 - t1 ) + y1;
695 689 x2 = ( x2 - t2 ) + y2;
696 690 z0 = x0 * x0;
697 691 z1 = x1 * x1;
698 692 z2 = x2 * x2;
699 693 t0 = z0 * ( qq1 + z0 * qq2 );
700 694 t1 = z1 * ( qq1 + z1 * qq2 );
701 695 t2 = z2 * ( qq1 + z2 * qq2 );
702 696 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
703 697 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
704 698 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
705 699 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
706 700 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
707 701 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
708 702 xsb0 = ( xsb0 >> 30 ) & 2;
709 703 xsb1 = ( xsb1 >> 30 ) & 2;
710 704 xsb2 = ( xsb2 >> 30 ) & 2;
711 705 n0 ^= ( xsb0 & ~( n0 << 1 ) );
712 706 n1 ^= ( xsb1 & ~( n1 << 1 ) );
713 707 n2 ^= ( xsb2 & ~( n2 << 1 ) );
714 708 xsb0 |= 1;
715 709 xsb1 |= 1;
716 710 xsb2 |= 1;
717 711 a0 = __vlibm_TBL_sincos_hi[j0+n0];
718 712 a1 = __vlibm_TBL_sincos_hi[j1+n1];
719 713 a2 = __vlibm_TBL_sincos_hi[j2+n2];
720 714 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
721 715 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
722 716 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
723 717 *py0 = ( a0 + t0 );
724 718 *py1 = ( a1 + t1 );
725 719 *py2 = ( a2 + t2 );
726 720 break;
727 721
728 722 case 1:
729 723 j0 = n0 & 1;
730 724 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
731 725 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
732 726 HI(&t1) = j1;
733 727 HI(&t2) = j2;
734 728 LO(&t1) = 0;
735 729 LO(&t2) = 0;
736 730 x0_or_one[0] = x0;
737 731 x0_or_one[2] = -x0;
738 732 y0_or_zero[0] = y0;
739 733 y0_or_zero[2] = -y0;
740 734 x1 = ( x1 - t1 ) + y1;
741 735 x2 = ( x2 - t2 ) + y2;
742 736 z0 = x0 * x0;
743 737 z1 = x1 * x1;
744 738 z2 = x2 * x2;
745 739 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
746 740 t1 = z1 * ( qq1 + z1 * qq2 );
747 741 t2 = z2 * ( qq1 + z2 * qq2 );
748 742 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
749 743 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
750 744 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
751 745 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
752 746 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
753 747 xsb1 = ( xsb1 >> 30 ) & 2;
754 748 xsb2 = ( xsb2 >> 30 ) & 2;
755 749 n1 ^= ( xsb1 & ~( n1 << 1 ) );
756 750 n2 ^= ( xsb2 & ~( n2 << 1 ) );
757 751 xsb1 |= 1;
758 752 xsb2 |= 1;
759 753 a1 = __vlibm_TBL_sincos_hi[j1+n1];
760 754 a2 = __vlibm_TBL_sincos_hi[j2+n2];
761 755 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
762 756 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
763 757 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
764 758 *py0 = t0;
765 759 *py1 = ( a1 + t1 );
766 760 *py2 = ( a2 + t2 );
767 761 break;
768 762
769 763 case 2:
770 764 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
771 765 j1 = n1 & 1;
772 766 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
773 767 HI(&t0) = j0;
774 768 HI(&t2) = j2;
775 769 LO(&t0) = 0;
776 770 LO(&t2) = 0;
777 771 x1_or_one[0] = x1;
778 772 x1_or_one[2] = -x1;
779 773 x0 = ( x0 - t0 ) + y0;
780 774 y1_or_zero[0] = y1;
781 775 y1_or_zero[2] = -y1;
782 776 x2 = ( x2 - t2 ) + y2;
783 777 z0 = x0 * x0;
784 778 z1 = x1 * x1;
785 779 z2 = x2 * x2;
786 780 t0 = z0 * ( qq1 + z0 * qq2 );
787 781 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
788 782 t2 = z2 * ( qq1 + z2 * qq2 );
789 783 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
790 784 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
791 785 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
792 786 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
793 787 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
794 788 xsb0 = ( xsb0 >> 30 ) & 2;
795 789 xsb2 = ( xsb2 >> 30 ) & 2;
796 790 n0 ^= ( xsb0 & ~( n0 << 1 ) );
797 791 n2 ^= ( xsb2 & ~( n2 << 1 ) );
798 792 xsb0 |= 1;
799 793 xsb2 |= 1;
800 794 a0 = __vlibm_TBL_sincos_hi[j0+n0];
801 795 a2 = __vlibm_TBL_sincos_hi[j2+n2];
802 796 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
803 797 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
804 798 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
805 799 *py0 = ( a0 + t0 );
806 800 *py1 = t1;
807 801 *py2 = ( a2 + t2 );
808 802 break;
809 803
810 804 case 3:
811 805 j0 = n0 & 1;
812 806 j1 = n1 & 1;
813 807 j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
814 808 HI(&t2) = j2;
815 809 LO(&t2) = 0;
816 810 x0_or_one[0] = x0;
817 811 x0_or_one[2] = -x0;
818 812 x1_or_one[0] = x1;
819 813 x1_or_one[2] = -x1;
820 814 y0_or_zero[0] = y0;
821 815 y0_or_zero[2] = -y0;
822 816 y1_or_zero[0] = y1;
823 817 y1_or_zero[2] = -y1;
824 818 x2 = ( x2 - t2 ) + y2;
825 819 z0 = x0 * x0;
826 820 z1 = x1 * x1;
827 821 z2 = x2 * x2;
828 822 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
829 823 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
830 824 t2 = z2 * ( qq1 + z2 * qq2 );
831 825 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
832 826 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
833 827 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
834 828 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
835 829 xsb2 = ( xsb2 >> 30 ) & 2;
836 830 n2 ^= ( xsb2 & ~( n2 << 1 ) );
837 831 xsb2 |= 1;
838 832 a2 = __vlibm_TBL_sincos_hi[j2+n2];
839 833 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
840 834 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
841 835 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
842 836 *py0 = t0;
843 837 *py1 = t1;
844 838 *py2 = ( a2 + t2 );
845 839 break;
846 840
847 841 case 4:
848 842 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
849 843 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
850 844 j2 = n2 & 1;
851 845 HI(&t0) = j0;
852 846 HI(&t1) = j1;
853 847 LO(&t0) = 0;
854 848 LO(&t1) = 0;
855 849 x2_or_one[0] = x2;
856 850 x2_or_one[2] = -x2;
857 851 x0 = ( x0 - t0 ) + y0;
858 852 x1 = ( x1 - t1 ) + y1;
859 853 y2_or_zero[0] = y2;
860 854 y2_or_zero[2] = -y2;
861 855 z0 = x0 * x0;
862 856 z1 = x1 * x1;
863 857 z2 = x2 * x2;
864 858 t0 = z0 * ( qq1 + z0 * qq2 );
865 859 t1 = z1 * ( qq1 + z1 * qq2 );
866 860 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
867 861 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
868 862 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
869 863 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
870 864 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
871 865 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
872 866 xsb0 = ( xsb0 >> 30 ) & 2;
873 867 xsb1 = ( xsb1 >> 30 ) & 2;
874 868 n0 ^= ( xsb0 & ~( n0 << 1 ) );
875 869 n1 ^= ( xsb1 & ~( n1 << 1 ) );
876 870 xsb0 |= 1;
877 871 xsb1 |= 1;
878 872 a0 = __vlibm_TBL_sincos_hi[j0+n0];
879 873 a1 = __vlibm_TBL_sincos_hi[j1+n1];
880 874 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
881 875 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
882 876 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
883 877 *py0 = ( a0 + t0 );
884 878 *py1 = ( a1 + t1 );
885 879 *py2 = t2;
886 880 break;
887 881
888 882 case 5:
889 883 j0 = n0 & 1;
890 884 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
891 885 j2 = n2 & 1;
892 886 HI(&t1) = j1;
893 887 LO(&t1) = 0;
894 888 x0_or_one[0] = x0;
895 889 x0_or_one[2] = -x0;
896 890 x2_or_one[0] = x2;
897 891 x2_or_one[2] = -x2;
898 892 y0_or_zero[0] = y0;
899 893 y0_or_zero[2] = -y0;
900 894 x1 = ( x1 - t1 ) + y1;
901 895 y2_or_zero[0] = y2;
902 896 y2_or_zero[2] = -y2;
903 897 z0 = x0 * x0;
904 898 z1 = x1 * x1;
905 899 z2 = x2 * x2;
906 900 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
907 901 t1 = z1 * ( qq1 + z1 * qq2 );
908 902 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
909 903 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
910 904 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
911 905 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
912 906 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
913 907 xsb1 = ( xsb1 >> 30 ) & 2;
914 908 n1 ^= ( xsb1 & ~( n1 << 1 ) );
915 909 xsb1 |= 1;
916 910 a1 = __vlibm_TBL_sincos_hi[j1+n1];
917 911 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
918 912 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
919 913 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
920 914 *py0 = t0;
921 915 *py1 = ( a1 + t1 );
922 916 *py2 = t2;
923 917 break;
924 918
925 919 case 6:
926 920 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
927 921 j1 = n1 & 1;
928 922 j2 = n2 & 1;
929 923 HI(&t0) = j0;
930 924 LO(&t0) = 0;
931 925 x1_or_one[0] = x1;
932 926 x1_or_one[2] = -x1;
933 927 x2_or_one[0] = x2;
934 928 x2_or_one[2] = -x2;
935 929 x0 = ( x0 - t0 ) + y0;
936 930 y1_or_zero[0] = y1;
937 931 y1_or_zero[2] = -y1;
938 932 y2_or_zero[0] = y2;
939 933 y2_or_zero[2] = -y2;
940 934 z0 = x0 * x0;
941 935 z1 = x1 * x1;
942 936 z2 = x2 * x2;
943 937 t0 = z0 * ( qq1 + z0 * qq2 );
944 938 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
945 939 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
946 940 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
947 941 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
948 942 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
949 943 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
950 944 xsb0 = ( xsb0 >> 30 ) & 2;
951 945 n0 ^= ( xsb0 & ~( n0 << 1 ) );
952 946 xsb0 |= 1;
953 947 a0 = __vlibm_TBL_sincos_hi[j0+n0];
954 948 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
955 949 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
956 950 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
957 951 *py0 = ( a0 + t0 );
958 952 *py1 = t1;
959 953 *py2 = t2;
960 954 break;
961 955
962 956 case 7:
963 957 j0 = n0 & 1;
964 958 j1 = n1 & 1;
965 959 j2 = n2 & 1;
966 960 x0_or_one[0] = x0;
967 961 x0_or_one[2] = -x0;
968 962 x1_or_one[0] = x1;
969 963 x1_or_one[2] = -x1;
970 964 x2_or_one[0] = x2;
971 965 x2_or_one[2] = -x2;
972 966 y0_or_zero[0] = y0;
973 967 y0_or_zero[2] = -y0;
974 968 y1_or_zero[0] = y1;
975 969 y1_or_zero[2] = -y1;
976 970 y2_or_zero[0] = y2;
977 971 y2_or_zero[2] = -y2;
978 972 z0 = x0 * x0;
979 973 z1 = x1 * x1;
980 974 z2 = x2 * x2;
981 975 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
982 976 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
983 977 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
984 978 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
985 979 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
986 980 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
987 981 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
988 982 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
989 983 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
990 984 *py0 = t0;
991 985 *py1 = t1;
992 986 *py2 = t2;
993 987 break;
994 988 }
995 989
996 990 x += stridex;
997 991 y += stridey;
998 992 i = 0;
999 993 } while ( --n > 0 );
1000 994
1001 995 if ( i > 0 )
1002 996 {
1003 997 double fn0, fn1, a0, a1, w0, w1, y0, y1;
1004 998 double t0, t1, z0, z1;
1005 999 unsigned j0, j1;
1006 1000 int n0, n1;
1007 1001
1008 1002 if ( i > 1 )
1009 1003 {
1010 1004 n1 = (int) ( x1 * invpio2 + half[xsb1] );
1011 1005 fn1 = (double) n1;
1012 1006 n1 &= 3;
1013 1007 a1 = x1 - fn1 * pio2_1;
1014 1008 w1 = fn1 * pio2_2;
1015 1009 x1 = a1 - w1;
1016 1010 y1 = ( a1 - x1 ) - w1;
1017 1011 a1 = x1;
1018 1012 w1 = fn1 * pio2_3 - y1;
1019 1013 x1 = a1 - w1;
1020 1014 y1 = ( a1 - x1 ) - w1;
1021 1015 a1 = x1;
1022 1016 w1 = fn1 * pio2_3t - y1;
1023 1017 x1 = a1 - w1;
1024 1018 y1 = ( a1 - x1 ) - w1;
1025 1019 xsb1 = HI(&x1);
1026 1020 if ( ( xsb1 & ~0x80000000 ) < thresh[n1&1] )
1027 1021 {
1028 1022 j1 = n1 & 1;
1029 1023 x1_or_one[0] = x1;
1030 1024 x1_or_one[2] = -x1;
1031 1025 y1_or_zero[0] = y1;
1032 1026 y1_or_zero[2] = -y1;
1033 1027 z1 = x1 * x1;
1034 1028 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1035 1029 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1036 1030 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1037 1031 *py1 = t1;
1038 1032 }
1039 1033 else
1040 1034 {
1041 1035 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1042 1036 HI(&t1) = j1;
1043 1037 LO(&t1) = 0;
1044 1038 x1 = ( x1 - t1 ) + y1;
1045 1039 z1 = x1 * x1;
1046 1040 t1 = z1 * ( qq1 + z1 * qq2 );
1047 1041 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1048 1042 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1049 1043 xsb1 = ( xsb1 >> 30 ) & 2;
1050 1044 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1051 1045 xsb1 |= 1;
1052 1046 a1 = __vlibm_TBL_sincos_hi[j1+n1];
1053 1047 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
1054 1048 *py1 = ( a1 + t1 );
1055 1049 }
1056 1050 }
1057 1051 n0 = (int) ( x0 * invpio2 + half[xsb0] );
1058 1052 fn0 = (double) n0;
1059 1053 n0 &= 3;
1060 1054 a0 = x0 - fn0 * pio2_1;
1061 1055 w0 = fn0 * pio2_2;
1062 1056 x0 = a0 - w0;
1063 1057 y0 = ( a0 - x0 ) - w0;
1064 1058 a0 = x0;
1065 1059 w0 = fn0 * pio2_3 - y0;
1066 1060 x0 = a0 - w0;
1067 1061 y0 = ( a0 - x0 ) - w0;
1068 1062 a0 = x0;
1069 1063 w0 = fn0 * pio2_3t - y0;
1070 1064 x0 = a0 - w0;
1071 1065 y0 = ( a0 - x0 ) - w0;
1072 1066 xsb0 = HI(&x0);
1073 1067 if ( ( xsb0 & ~0x80000000 ) < thresh[n0&1] )
1074 1068 {
1075 1069 j0 = n0 & 1;
1076 1070 x0_or_one[0] = x0;
1077 1071 x0_or_one[2] = -x0;
1078 1072 y0_or_zero[0] = y0;
1079 1073 y0_or_zero[2] = -y0;
1080 1074 z0 = x0 * x0;
1081 1075 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1082 1076 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1083 1077 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1084 1078 *py0 = t0;
1085 1079 }
1086 1080 else
1087 1081 {
1088 1082 j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1089 1083 HI(&t0) = j0;
1090 1084 LO(&t0) = 0;
1091 1085 x0 = ( x0 - t0 ) + y0;
1092 1086 z0 = x0 * x0;
1093 1087 t0 = z0 * ( qq1 + z0 * qq2 );
1094 1088 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1095 1089 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1096 1090 xsb0 = ( xsb0 >> 30 ) & 2;
1097 1091 n0 ^= ( xsb0 & ~( n0 << 1 ) );
1098 1092 xsb0 |= 1;
1099 1093 a0 = __vlibm_TBL_sincos_hi[j0+n0];
1100 1094 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
1101 1095 *py0 = ( a0 + t0 );
1102 1096 }
1103 1097 }
1104 1098
1105 1099 if ( biguns )
1106 1100 __vlibm_vsin_big( nsave, xsave, sxsave, ysave, sysave, 0x413921fb );
1107 1101 }
↓ open down ↓ |
498 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX