1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #include <sys/isa_defs.h> 31 32 #ifdef _LITTLE_ENDIAN 33 #define HI(x) *(1+(int*)x) 34 #define LO(x) *(unsigned*)x 35 #else 36 #define HI(x) *(int*)x 37 #define LO(x) *(1+(unsigned*)x) 38 #endif 39 40 #ifdef __RESTRICT 41 #define restrict _Restrict 42 #else 43 #define restrict 44 #endif 45 46 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[]; 47 48 static const double 49 half[2] = { 0.5, -0.5 }, 50 one = 1.0, 51 invpio2 = 0.636619772367581343075535, 52 pio2_1 = 1.570796326734125614166, 53 pio2_2 = 6.077100506303965976596e-11, 54 pio2_3 = 2.022266248711166455796e-21, 55 pio2_3t = 8.478427660368899643959e-32, 56 pp1 = -1.666666666605760465276263943134982554676e-0001, 57 pp2 = 8.333261209690963126718376566146180944442e-0003, 58 qq1 = -4.999999999977710986407023955908711557870e-0001, 59 qq2 = 4.166654863857219350645055881018842089580e-0002, 60 poly1[2]= { -1.666666666666629669805215138920301589656e-0001, 61 -4.999999999999931701464060878888294524481e-0001 }, 62 poly2[2]= { 8.333333332390951295683993455280336376663e-0003, 63 4.166666666394861917535640593963708222319e-0002 }, 64 poly3[2]= { -1.984126237997976692791551778230098403960e-0004, 65 -1.388888552656142867832756687736851681462e-0003 }, 66 poly4[2]= { 2.753403624854277237649987622848330351110e-0006, 67 2.478519423681460796618128289454530524759e-0005 }; 68 69 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 }; 70 71 extern void __vlibm_vcos_big( int, double *, int, double *, int, int ); 72 73 void 74 __vlibm_vcos_big_ultra3( int n, double * restrict x, int stridex, double * restrict y, 75 int stridey, int pthresh ) 76 { 77 double x0_or_one[4], x1_or_one[4], x2_or_one[4]; 78 double y0_or_zero[4], y1_or_zero[4], y2_or_zero[4]; 79 double x0, x1, x2, *py0, *py1, *py2, *xsave, *ysave; 80 unsigned xsb0, xsb1, xsb2; 81 int i, biguns, nsave, sxsave, sysave; 82 83 nsave = n; 84 xsave = x; 85 sxsave = stridex; 86 ysave = y; 87 sysave = stridey; 88 biguns = 0; 89 90 x0_or_one[1] = 1.0; 91 x1_or_one[1] = 1.0; 92 x2_or_one[1] = 1.0; 93 x0_or_one[3] = -1.0; 94 x1_or_one[3] = -1.0; 95 x2_or_one[3] = -1.0; 96 y0_or_zero[1] = 0.0; 97 y1_or_zero[1] = 0.0; 98 y2_or_zero[1] = 0.0; 99 y0_or_zero[3] = 0.0; 100 y1_or_zero[3] = 0.0; 101 y2_or_zero[3] = 0.0; 102 103 do 104 { 105 double fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2; 106 unsigned hx; 107 int n0, n1, n2; 108 109 loop0: 110 hx = HI(x); 111 xsb0 = hx >> 31; 112 hx &= ~0x80000000; 113 if ( hx <= pthresh || hx > 0x413921fb ) 114 { 115 if ( hx > 0x413921fb && hx < 0x7ff00000) 116 biguns = 1; 117 x += stridex; 118 y += stridey; 119 i = 0; 120 if ( --n <= 0 ) 121 break; 122 goto loop0; 123 } 124 x0 = *x; 125 py0 = y; 126 x += stridex; 127 y += stridey; 128 i = 1; 129 if ( --n <= 0 ) 130 break; 131 132 loop1: 133 hx = HI(x); 134 xsb1 = hx >> 31; 135 hx &= ~0x80000000; 136 if ( hx <= pthresh || hx > 0x413921fb ) 137 { 138 if ( hx > 0x413921fb && hx < 0x7ff00000) 139 biguns = 1; 140 x += stridex; 141 y += stridey; 142 i = 1; 143 if ( --n <= 0 ) 144 break; 145 goto loop1; 146 } 147 x1 = *x; 148 py1 = y; 149 x += stridex; 150 y += stridey; 151 i = 2; 152 if ( --n <= 0 ) 153 break; 154 155 loop2: 156 hx = HI(x); 157 xsb2 = hx >> 31; 158 hx &= ~0x80000000; 159 if ( hx <= pthresh || hx > 0x413921fb ) 160 { 161 if ( hx > 0x413921fb && hx < 0x7ff00000) 162 biguns = 1; 163 x += stridex; 164 y += stridey; 165 i = 2; 166 if ( --n <= 0 ) 167 break; 168 goto loop2; 169 } 170 x2 = *x; 171 py2 = y; 172 173 n0 = (int) ( x0 * invpio2 + half[xsb0] ); 174 n1 = (int) ( x1 * invpio2 + half[xsb1] ); 175 n2 = (int) ( x2 * invpio2 + half[xsb2] ); 176 fn0 = (double) n0; 177 fn1 = (double) n1; 178 fn2 = (double) n2; 179 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ 180 n1 = (n1 + 1) & 3; 181 n2 = (n2 + 1) & 3; 182 a0 = x0 - fn0 * pio2_1; 183 a1 = x1 - fn1 * pio2_1; 184 a2 = x2 - fn2 * pio2_1; 185 w0 = fn0 * pio2_2; 186 w1 = fn1 * pio2_2; 187 w2 = fn2 * pio2_2; 188 x0 = a0 - w0; 189 x1 = a1 - w1; 190 x2 = a2 - w2; 191 y0 = ( a0 - x0 ) - w0; 192 y1 = ( a1 - x1 ) - w1; 193 y2 = ( a2 - x2 ) - w2; 194 a0 = x0; 195 a1 = x1; 196 a2 = x2; 197 w0 = fn0 * pio2_3 - y0; 198 w1 = fn1 * pio2_3 - y1; 199 w2 = fn2 * pio2_3 - y2; 200 x0 = a0 - w0; 201 x1 = a1 - w1; 202 x2 = a2 - w2; 203 y0 = ( a0 - x0 ) - w0; 204 y1 = ( a1 - x1 ) - w1; 205 y2 = ( a2 - x2 ) - w2; 206 a0 = x0; 207 a1 = x1; 208 a2 = x2; 209 w0 = fn0 * pio2_3t - y0; 210 w1 = fn1 * pio2_3t - y1; 211 w2 = fn2 * pio2_3t - y2; 212 x0 = a0 - w0; 213 x1 = a1 - w1; 214 x2 = a2 - w2; 215 y0 = ( a0 - x0 ) - w0; 216 y1 = ( a1 - x1 ) - w1; 217 y2 = ( a2 - x2 ) - w2; 218 xsb0 = HI(&x0); 219 i = ( ( xsb0 & ~0x80000000 ) - thresh[n0&1] ) >> 31; 220 xsb1 = HI(&x1); 221 i |= ( ( ( xsb1 & ~0x80000000 ) - thresh[n1&1] ) >> 30 ) & 2; 222 xsb2 = HI(&x2); 223 i |= ( ( ( xsb2 & ~0x80000000 ) - thresh[n2&1] ) >> 29 ) & 4; 224 switch ( i ) 225 { 226 double t0, t1, t2, z0, z1, z2; 227 unsigned j0, j1, j2; 228 229 case 0: 230 j0 = ( xsb0 + 0x4000 ) & 0xffff8000; 231 j1 = ( xsb1 + 0x4000 ) & 0xffff8000; 232 j2 = ( xsb2 + 0x4000 ) & 0xffff8000; 233 HI(&t0) = j0; 234 HI(&t1) = j1; 235 HI(&t2) = j2; 236 LO(&t0) = 0; 237 LO(&t1) = 0; 238 LO(&t2) = 0; 239 x0 = ( x0 - t0 ) + y0; 240 x1 = ( x1 - t1 ) + y1; 241 x2 = ( x2 - t2 ) + y2; 242 z0 = x0 * x0; 243 z1 = x1 * x1; 244 z2 = x2 * x2; 245 t0 = z0 * ( qq1 + z0 * qq2 ); 246 t1 = z1 * ( qq1 + z1 * qq2 ); 247 t2 = z2 * ( qq1 + z2 * qq2 ); 248 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); 249 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); 250 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); 251 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; 252 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; 253 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; 254 xsb0 = ( xsb0 >> 30 ) & 2; 255 xsb1 = ( xsb1 >> 30 ) & 2; 256 xsb2 = ( xsb2 >> 30 ) & 2; 257 n0 ^= ( xsb0 & ~( n0 << 1 ) ); 258 n1 ^= ( xsb1 & ~( n1 << 1 ) ); 259 n2 ^= ( xsb2 & ~( n2 << 1 ) ); 260 xsb0 |= 1; 261 xsb1 |= 1; 262 xsb2 |= 1; 263 a0 = __vlibm_TBL_sincos_hi[j0+n0]; 264 a1 = __vlibm_TBL_sincos_hi[j1+n1]; 265 a2 = __vlibm_TBL_sincos_hi[j2+n2]; 266 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; 267 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; 268 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; 269 *py0 = ( a0 + t0 ); 270 *py1 = ( a1 + t1 ); 271 *py2 = ( a2 + t2 ); 272 break; 273 274 case 1: 275 j0 = n0 & 1; 276 j1 = ( xsb1 + 0x4000 ) & 0xffff8000; 277 j2 = ( xsb2 + 0x4000 ) & 0xffff8000; 278 HI(&t1) = j1; 279 HI(&t2) = j2; 280 LO(&t1) = 0; 281 LO(&t2) = 0; 282 x0_or_one[0] = x0; 283 x0_or_one[2] = -x0; 284 y0_or_zero[0] = y0; 285 y0_or_zero[2] = -y0; 286 x1 = ( x1 - t1 ) + y1; 287 x2 = ( x2 - t2 ) + y2; 288 z0 = x0 * x0; 289 z1 = x1 * x1; 290 z2 = x2 * x2; 291 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); 292 t1 = z1 * ( qq1 + z1 * qq2 ); 293 t2 = z2 * ( qq1 + z2 * qq2 ); 294 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); 295 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); 296 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); 297 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; 298 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; 299 xsb1 = ( xsb1 >> 30 ) & 2; 300 xsb2 = ( xsb2 >> 30 ) & 2; 301 n1 ^= ( xsb1 & ~( n1 << 1 ) ); 302 n2 ^= ( xsb2 & ~( n2 << 1 ) ); 303 xsb1 |= 1; 304 xsb2 |= 1; 305 a1 = __vlibm_TBL_sincos_hi[j1+n1]; 306 a2 = __vlibm_TBL_sincos_hi[j2+n2]; 307 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); 308 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; 309 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; 310 *py0 = t0; 311 *py1 = ( a1 + t1 ); 312 *py2 = ( a2 + t2 ); 313 break; 314 315 case 2: 316 j0 = ( xsb0 + 0x4000 ) & 0xffff8000; 317 j1 = n1 & 1; 318 j2 = ( xsb2 + 0x4000 ) & 0xffff8000; 319 HI(&t0) = j0; 320 HI(&t2) = j2; 321 LO(&t0) = 0; 322 LO(&t2) = 0; 323 x1_or_one[0] = x1; 324 x1_or_one[2] = -x1; 325 x0 = ( x0 - t0 ) + y0; 326 y1_or_zero[0] = y1; 327 y1_or_zero[2] = -y1; 328 x2 = ( x2 - t2 ) + y2; 329 z0 = x0 * x0; 330 z1 = x1 * x1; 331 z2 = x2 * x2; 332 t0 = z0 * ( qq1 + z0 * qq2 ); 333 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); 334 t2 = z2 * ( qq1 + z2 * qq2 ); 335 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); 336 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); 337 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); 338 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; 339 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; 340 xsb0 = ( xsb0 >> 30 ) & 2; 341 xsb2 = ( xsb2 >> 30 ) & 2; 342 n0 ^= ( xsb0 & ~( n0 << 1 ) ); 343 n2 ^= ( xsb2 & ~( n2 << 1 ) ); 344 xsb0 |= 1; 345 xsb2 |= 1; 346 a0 = __vlibm_TBL_sincos_hi[j0+n0]; 347 a2 = __vlibm_TBL_sincos_hi[j2+n2]; 348 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; 349 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); 350 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; 351 *py0 = ( a0 + t0 ); 352 *py1 = t1; 353 *py2 = ( a2 + t2 ); 354 break; 355 356 case 3: 357 j0 = n0 & 1; 358 j1 = n1 & 1; 359 j2 = ( xsb2 + 0x4000 ) & 0xffff8000; 360 HI(&t2) = j2; 361 LO(&t2) = 0; 362 x0_or_one[0] = x0; 363 x0_or_one[2] = -x0; 364 x1_or_one[0] = x1; 365 x1_or_one[2] = -x1; 366 y0_or_zero[0] = y0; 367 y0_or_zero[2] = -y0; 368 y1_or_zero[0] = y1; 369 y1_or_zero[2] = -y1; 370 x2 = ( x2 - t2 ) + y2; 371 z0 = x0 * x0; 372 z1 = x1 * x1; 373 z2 = x2 * x2; 374 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); 375 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); 376 t2 = z2 * ( qq1 + z2 * qq2 ); 377 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); 378 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); 379 w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) ); 380 j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; 381 xsb2 = ( xsb2 >> 30 ) & 2; 382 n2 ^= ( xsb2 & ~( n2 << 1 ) ); 383 xsb2 |= 1; 384 a2 = __vlibm_TBL_sincos_hi[j2+n2]; 385 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); 386 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); 387 t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2]; 388 *py0 = t0; 389 *py1 = t1; 390 *py2 = ( a2 + t2 ); 391 break; 392 393 case 4: 394 j0 = ( xsb0 + 0x4000 ) & 0xffff8000; 395 j1 = ( xsb1 + 0x4000 ) & 0xffff8000; 396 j2 = n2 & 1; 397 HI(&t0) = j0; 398 HI(&t1) = j1; 399 LO(&t0) = 0; 400 LO(&t1) = 0; 401 x2_or_one[0] = x2; 402 x2_or_one[2] = -x2; 403 x0 = ( x0 - t0 ) + y0; 404 x1 = ( x1 - t1 ) + y1; 405 y2_or_zero[0] = y2; 406 y2_or_zero[2] = -y2; 407 z0 = x0 * x0; 408 z1 = x1 * x1; 409 z2 = x2 * x2; 410 t0 = z0 * ( qq1 + z0 * qq2 ); 411 t1 = z1 * ( qq1 + z1 * qq2 ); 412 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); 413 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); 414 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); 415 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); 416 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; 417 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; 418 xsb0 = ( xsb0 >> 30 ) & 2; 419 xsb1 = ( xsb1 >> 30 ) & 2; 420 n0 ^= ( xsb0 & ~( n0 << 1 ) ); 421 n1 ^= ( xsb1 & ~( n1 << 1 ) ); 422 xsb0 |= 1; 423 xsb1 |= 1; 424 a0 = __vlibm_TBL_sincos_hi[j0+n0]; 425 a1 = __vlibm_TBL_sincos_hi[j1+n1]; 426 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; 427 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; 428 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); 429 *py0 = ( a0 + t0 ); 430 *py1 = ( a1 + t1 ); 431 *py2 = t2; 432 break; 433 434 case 5: 435 j0 = n0 & 1; 436 j1 = ( xsb1 + 0x4000 ) & 0xffff8000; 437 j2 = n2 & 1; 438 HI(&t1) = j1; 439 LO(&t1) = 0; 440 x0_or_one[0] = x0; 441 x0_or_one[2] = -x0; 442 x2_or_one[0] = x2; 443 x2_or_one[2] = -x2; 444 y0_or_zero[0] = y0; 445 y0_or_zero[2] = -y0; 446 x1 = ( x1 - t1 ) + y1; 447 y2_or_zero[0] = y2; 448 y2_or_zero[2] = -y2; 449 z0 = x0 * x0; 450 z1 = x1 * x1; 451 z2 = x2 * x2; 452 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); 453 t1 = z1 * ( qq1 + z1 * qq2 ); 454 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); 455 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); 456 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); 457 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); 458 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; 459 xsb1 = ( xsb1 >> 30 ) & 2; 460 n1 ^= ( xsb1 & ~( n1 << 1 ) ); 461 xsb1 |= 1; 462 a1 = __vlibm_TBL_sincos_hi[j1+n1]; 463 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); 464 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; 465 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); 466 *py0 = t0; 467 *py1 = ( a1 + t1 ); 468 *py2 = t2; 469 break; 470 471 case 6: 472 j0 = ( xsb0 + 0x4000 ) & 0xffff8000; 473 j1 = n1 & 1; 474 j2 = n2 & 1; 475 HI(&t0) = j0; 476 LO(&t0) = 0; 477 x1_or_one[0] = x1; 478 x1_or_one[2] = -x1; 479 x2_or_one[0] = x2; 480 x2_or_one[2] = -x2; 481 x0 = ( x0 - t0 ) + y0; 482 y1_or_zero[0] = y1; 483 y1_or_zero[2] = -y1; 484 y2_or_zero[0] = y2; 485 y2_or_zero[2] = -y2; 486 z0 = x0 * x0; 487 z1 = x1 * x1; 488 z2 = x2 * x2; 489 t0 = z0 * ( qq1 + z0 * qq2 ); 490 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); 491 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); 492 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); 493 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); 494 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); 495 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; 496 xsb0 = ( xsb0 >> 30 ) & 2; 497 n0 ^= ( xsb0 & ~( n0 << 1 ) ); 498 xsb0 |= 1; 499 a0 = __vlibm_TBL_sincos_hi[j0+n0]; 500 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; 501 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); 502 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); 503 *py0 = ( a0 + t0 ); 504 *py1 = t1; 505 *py2 = t2; 506 break; 507 508 case 7: 509 j0 = n0 & 1; 510 j1 = n1 & 1; 511 j2 = n2 & 1; 512 x0_or_one[0] = x0; 513 x0_or_one[2] = -x0; 514 x1_or_one[0] = x1; 515 x1_or_one[2] = -x1; 516 x2_or_one[0] = x2; 517 x2_or_one[2] = -x2; 518 y0_or_zero[0] = y0; 519 y0_or_zero[2] = -y0; 520 y1_or_zero[0] = y1; 521 y1_or_zero[2] = -y1; 522 y2_or_zero[0] = y2; 523 y2_or_zero[2] = -y2; 524 z0 = x0 * x0; 525 z1 = x1 * x1; 526 z2 = x2 * x2; 527 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); 528 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); 529 t2 = z2 * ( poly3[j2] + z2 * poly4[j2] ); 530 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); 531 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); 532 t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) ); 533 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); 534 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); 535 t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 ); 536 *py0 = t0; 537 *py1 = t1; 538 *py2 = t2; 539 break; 540 } 541 542 x += stridex; 543 y += stridey; 544 i = 0; 545 } while ( --n > 0 ); 546 547 if ( i > 0 ) 548 { 549 double fn0, fn1, a0, a1, w0, w1, y0, y1; 550 double t0, t1, z0, z1; 551 unsigned j0, j1; 552 int n0, n1; 553 554 if ( i > 1 ) 555 { 556 n1 = (int) ( x1 * invpio2 + half[xsb1] ); 557 fn1 = (double) n1; 558 n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ 559 a1 = x1 - fn1 * pio2_1; 560 w1 = fn1 * pio2_2; 561 x1 = a1 - w1; 562 y1 = ( a1 - x1 ) - w1; 563 a1 = x1; 564 w1 = fn1 * pio2_3 - y1; 565 x1 = a1 - w1; 566 y1 = ( a1 - x1 ) - w1; 567 a1 = x1; 568 w1 = fn1 * pio2_3t - y1; 569 x1 = a1 - w1; 570 y1 = ( a1 - x1 ) - w1; 571 xsb1 = HI(&x1); 572 if ( ( xsb1 & ~0x80000000 ) < thresh[n1&1] ) 573 { 574 j1 = n1 & 1; 575 x1_or_one[0] = x1; 576 x1_or_one[2] = -x1; 577 y1_or_zero[0] = y1; 578 y1_or_zero[2] = -y1; 579 z1 = x1 * x1; 580 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] ); 581 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) ); 582 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 ); 583 *py1 = t1; 584 } 585 else 586 { 587 j1 = ( xsb1 + 0x4000 ) & 0xffff8000; 588 HI(&t1) = j1; 589 LO(&t1) = 0; 590 x1 = ( x1 - t1 ) + y1; 591 z1 = x1 * x1; 592 t1 = z1 * ( qq1 + z1 * qq2 ); 593 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) ); 594 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; 595 xsb1 = ( xsb1 >> 30 ) & 2; 596 n1 ^= ( xsb1 & ~( n1 << 1 ) ); 597 xsb1 |= 1; 598 a1 = __vlibm_TBL_sincos_hi[j1+n1]; 599 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1]; 600 *py1 = ( a1 + t1 ); 601 } 602 } 603 n0 = (int) ( x0 * invpio2 + half[xsb0] ); 604 fn0 = (double) n0; 605 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */ 606 a0 = x0 - fn0 * pio2_1; 607 w0 = fn0 * pio2_2; 608 x0 = a0 - w0; 609 y0 = ( a0 - x0 ) - w0; 610 a0 = x0; 611 w0 = fn0 * pio2_3 - y0; 612 x0 = a0 - w0; 613 y0 = ( a0 - x0 ) - w0; 614 a0 = x0; 615 w0 = fn0 * pio2_3t - y0; 616 x0 = a0 - w0; 617 y0 = ( a0 - x0 ) - w0; 618 xsb0 = HI(&x0); 619 if ( ( xsb0 & ~0x80000000 ) < thresh[n0&1] ) 620 { 621 j0 = n0 & 1; 622 x0_or_one[0] = x0; 623 x0_or_one[2] = -x0; 624 y0_or_zero[0] = y0; 625 y0_or_zero[2] = -y0; 626 z0 = x0 * x0; 627 t0 = z0 * ( poly3[j0] + z0 * poly4[j0] ); 628 t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) ); 629 t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 ); 630 *py0 = t0; 631 } 632 else 633 { 634 j0 = ( xsb0 + 0x4000 ) & 0xffff8000; 635 HI(&t0) = j0; 636 LO(&t0) = 0; 637 x0 = ( x0 - t0 ) + y0; 638 z0 = x0 * x0; 639 t0 = z0 * ( qq1 + z0 * qq2 ); 640 w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) ); 641 j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3; 642 xsb0 = ( xsb0 >> 30 ) & 2; 643 n0 ^= ( xsb0 & ~( n0 << 1 ) ); 644 xsb0 |= 1; 645 a0 = __vlibm_TBL_sincos_hi[j0+n0]; 646 t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0]; 647 *py0 = ( a0 + t0 ); 648 } 649 } 650 651 if ( biguns ) 652 __vlibm_vcos_big( nsave, xsave, sxsave, ysave, sysave, 0x413921fb ); 653 }