1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #include <sys/isa_defs.h>
  31 
  32 #ifdef _LITTLE_ENDIAN
  33 #define HI(x)   *(1+(int*)x)
  34 #define LO(x)   *(unsigned*)x
  35 #else
  36 #define HI(x)   *(int*)x
  37 #define LO(x)   *(1+(unsigned*)x)
  38 #endif
  39 
  40 #ifdef __RESTRICT
  41 #define restrict _Restrict
  42 #else
  43 #define restrict
  44 #endif
  45 
  46 /*
  47  * vsincos.c
  48  *
  49  * Vector sine and cosine function.  Just slight modifications to vcos.c.
  50  */
  51 
  52 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
  53 
  54 static const double
  55         half[2] = { 0.5, -0.5 },
  56         one             = 1.0,
  57         invpio2 = 0.636619772367581343075535,  /* 53 bits of pi/2 */
  58         pio2_1  = 1.570796326734125614166,  /* first 33 bits of pi/2 */
  59         pio2_2  = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
  60         pio2_3  = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
  61         pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
  62         pp1             = -1.666666666605760465276263943134982554676e-0001,
  63         pp2             =  8.333261209690963126718376566146180944442e-0003,
  64         qq1             = -4.999999999977710986407023955908711557870e-0001,
  65         qq2             =  4.166654863857219350645055881018842089580e-0002,
  66         poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
  67                                 -4.999999999999931701464060878888294524481e-0001 },
  68         poly2[2]= {  8.333333332390951295683993455280336376663e-0003,
  69                                  4.166666666394861917535640593963708222319e-0002 },
  70         poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
  71                                 -1.388888552656142867832756687736851681462e-0003 },
  72         poly4[2]= {  2.753403624854277237649987622848330351110e-0006,
  73                                  2.478519423681460796618128289454530524759e-0005 };
  74 
  75 /* Don't __ the following; acomp will handle it */
  76 extern double fabs( double );
  77 extern void __vlibm_vsincos_big( int, double *, int, double *, int, double *, int, int );
  78 
  79 /*
  80  * y[i*stridey] := sin( x[i*stridex] ), for i = 0..n.
  81  * c[i*stridec] := cos( x[i*stridex] ), for i = 0..n.
  82  *
  83  * Calls __vlibm_vsincos_big to handle all elts which have abs >~ 1.647e+06.
  84  * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
  85  *
  86  * elts < 2^-27 use the approximation 1.0 ~ cos(x).
  87  */
  88 void
  89 __vsincos( int n, double * restrict x, int stridex,
  90                                 double * restrict y, int stridey,
  91                                 double * restrict c, int stridec )
  92 {
  93         double          x0_or_one[4], x1_or_one[4], x2_or_one[4];
  94         double          y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
  95         double          x0, x1, x2,
  96                                 *py0, *py1, *py2,
  97                                 *pc0, *pc1, *pc2,
  98                                 *xsave, *ysave, *csave;
  99         unsigned        hx0, hx1, hx2, xsb0, xsb1, xsb2;
 100         int                     i, biguns, nsave, sxsave, sysave, scsave;
 101 
 102         nsave = n;
 103         xsave = x;
 104         sxsave = stridex;
 105         ysave = y;
 106         sysave = stridey;
 107         csave = c;
 108         scsave = stridec;
 109         biguns = 0;
 110 
 111         do /* MAIN LOOP */
 112         {
 113 
 114                 /* Gotos here so _break_ exits MAIN LOOP. */
 115 LOOP0:  /* Find first arg in right range. */
 116                 xsb0 = HI(x); /* get most significant word */
 117                 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
 118                 if ( hx0 > 0x3fe921fb ) {
 119                         /* Too big: arg reduction needed, so leave for second part */
 120                         biguns = 1;
 121                         x += stridex;
 122                         y += stridey;
 123                         c += stridec;
 124                         i = 0;
 125                         if ( --n <= 0 )
 126                                 break;
 127                         goto LOOP0;
 128                 }
 129                 if ( hx0 < 0x3e400000 ) {
 130                         /* Too small.  cos x ~ 1, sin x ~ x. */
 131                         volatile int v = *x;
 132                         *c = 1.0;
 133                         *y = *x;
 134                         x += stridex;
 135                         y += stridey;
 136                         c += stridec;
 137                         i = 0;
 138                         if ( --n <= 0 )
 139                                 break;
 140                         goto LOOP0;
 141                 }
 142                 x0 = *x;
 143                 py0 = y;
 144                 pc0 = c;
 145                 x += stridex;
 146                 y += stridey;
 147                 c += stridec;
 148                 i = 1;
 149                 if ( --n <= 0 )
 150                         break;
 151 
 152 LOOP1: /* Get second arg, same as above. */
 153                 xsb1 = HI(x);
 154                 hx1 = xsb1 & ~0x80000000;
 155                 if ( hx1 > 0x3fe921fb )
 156                 {
 157                         biguns = 1;
 158                         x += stridex;
 159                         y += stridey;
 160                         c += stridec;
 161                         i = 1;
 162                         if ( --n <= 0 )
 163                                 break;
 164                         goto LOOP1;
 165                 }
 166                 if ( hx1 < 0x3e400000 )
 167                 {
 168                         volatile int v = *x;
 169                         *c = 1.0;
 170                         *y = *x;
 171                         x += stridex;
 172                         y += stridey;
 173                         c += stridec;
 174                         i = 1;
 175                         if ( --n <= 0 )
 176                                 break;
 177                         goto LOOP1;
 178                 }
 179                 x1 = *x;
 180                 py1 = y;
 181                 pc1 = c;
 182                 x += stridex;
 183                 y += stridey;
 184                 c += stridec;
 185                 i = 2;
 186                 if ( --n <= 0 )
 187                         break;
 188 
 189 LOOP2: /* Get third arg, same as above. */
 190                 xsb2 = HI(x);
 191                 hx2 = xsb2 & ~0x80000000;
 192                 if ( hx2 > 0x3fe921fb )
 193                 {
 194                         biguns = 1;
 195                         x += stridex;
 196                         y += stridey;
 197                         c += stridec;
 198                         i = 2;
 199                         if ( --n <= 0 )
 200                                 break;
 201                         goto LOOP2;
 202                 }
 203                 if ( hx2 < 0x3e400000 )
 204                 {
 205                         volatile int v = *x;
 206                         *c = 1.0;
 207                         *y = *x;
 208                         x += stridex;
 209                         y += stridey;
 210                         c += stridec;
 211                         i = 2;
 212                         if ( --n <= 0 )
 213                                 break;
 214                         goto LOOP2;
 215                 }
 216                 x2 = *x;
 217                 py2 = y;
 218                 pc2 = c;
 219 
 220                 /*
 221                  * 0x3fc40000 = 5/32 ~ 0.15625
 222                  * Get msb after subtraction.  Will be 1 only if
 223                  * hx0 - 5/32 is negative.
 224                  */
 225                 i = ( hx2 - 0x3fc40000 ) >> 31;
 226                 i |= ( ( hx1 - 0x3fc40000 ) >> 30 ) & 2;
 227                 i |= ( ( hx0 - 0x3fc40000 ) >> 29 ) & 4;
 228                 switch ( i )
 229                 {
 230                         double          a1_0, a1_1, a1_2, a2_0, a2_1, a2_2;
 231                         double          w0, w1, w2;
 232                         double          t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2;
 233                         double          z0, z1, z2;
 234                         unsigned        j0, j1, j2;
 235 
 236                 case 0: /* All are > 5/32 */
 237                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 238                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 239                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 240 
 241                         HI(&t0) = j0;
 242                         HI(&t1) = j1;
 243                         HI(&t2) = j2;
 244                         LO(&t0) = 0;
 245                         LO(&t1) = 0;
 246                         LO(&t2) = 0;
 247 
 248                         x0 -= t0;
 249                         x1 -= t1;
 250                         x2 -= t2;
 251 
 252                         z0 = x0 * x0;
 253                         z1 = x1 * x1;
 254                         z2 = x2 * x2;
 255 
 256                         t0 = z0 * ( qq1 + z0 * qq2 );
 257                         t1 = z1 * ( qq1 + z1 * qq2 );
 258                         t2 = z2 * ( qq1 + z2 * qq2 );
 259 
 260                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 261                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 262                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 263 
 264                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 265                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 266                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 267 
 268                         xsb0 = ( xsb0 >> 30 ) & 2;
 269                         xsb1 = ( xsb1 >> 30 ) & 2;
 270                         xsb2 = ( xsb2 >> 30 ) & 2;
 271 
 272                         a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
 273                         a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
 274                         a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
 275 
 276                         a2_0 = __vlibm_TBL_sincos_hi[j0+1];     /* cos_hi(t) */
 277                         a2_1 = __vlibm_TBL_sincos_hi[j1+1];
 278                         a2_2 = __vlibm_TBL_sincos_hi[j2+1];
 279                                 /* cos_lo(t) */
 280                         t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
 281                         t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
 282                         t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
 283 
 284                         *pc0 = a2_0 + t2_0;
 285                         *pc1 = a2_1 + t2_1;
 286                         *pc2 = a2_2 + t2_2;
 287 
 288                         t1_0 = a2_0*w0 + a1_0*t0;
 289                         t1_1 = a2_1*w1 + a1_1*t1;
 290                         t1_2 = a2_2*w2 + a1_2*t2;
 291 
 292                         t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
 293                         t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
 294                         t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
 295 
 296                         *py0 = a1_0 + t1_0;
 297                         *py1 = a1_1 + t1_1;
 298                         *py2 = a1_2 + t1_2;
 299 
 300                         break;
 301 
 302                 case 1:
 303                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 304                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 305                         HI(&t0) = j0;
 306                         HI(&t1) = j1;
 307                         LO(&t0) = 0;
 308                         LO(&t1) = 0;
 309                         x0 -= t0;
 310                         x1 -= t1;
 311                         z0 = x0 * x0;
 312                         z1 = x1 * x1;
 313                         z2 = x2 * x2;
 314                         t0 = z0 * ( qq1 + z0 * qq2 );
 315                         t1 = z1 * ( qq1 + z1 * qq2 );
 316                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 317                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 318                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 319                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 320                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 321                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 322                         xsb0 = ( xsb0 >> 30 ) & 2;
 323                         xsb1 = ( xsb1 >> 30 ) & 2;
 324 
 325                         a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
 326                         a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
 327 
 328                         a2_0 = __vlibm_TBL_sincos_hi[j0+1];     /* cos_hi(t) */
 329                         a2_1 = __vlibm_TBL_sincos_hi[j1+1];
 330                                 /* cos_lo(t) */
 331                         t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
 332                         t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
 333 
 334                         *pc0 = a2_0 + t2_0;
 335                         *pc1 = a2_1 + t2_1;
 336                         *pc2 = one + t2;
 337 
 338                         t1_0 = a2_0*w0 + a1_0*t0;
 339                         t1_1 = a2_1*w1 + a1_1*t1;
 340                         t2 = z2 * ( poly3[0] + z2 * poly4[0] );
 341 
 342                         t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
 343                         t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
 344                         t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
 345 
 346                         *py0 = a1_0 + t1_0;
 347                         *py1 = a1_1 + t1_1;
 348                         t2 = x2 + x2 * t2;
 349                         *py2 = t2;
 350 
 351                         break;
 352 
 353                 case 2:
 354                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 355                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 356                         HI(&t0) = j0;
 357                         HI(&t2) = j2;
 358                         LO(&t0) = 0;
 359                         LO(&t2) = 0;
 360                         x0 -= t0;
 361                         x2 -= t2;
 362                         z0 = x0 * x0;
 363                         z1 = x1 * x1;
 364                         z2 = x2 * x2;
 365                         t0 = z0 * ( qq1 + z0 * qq2 );
 366                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 367                         t2 = z2 * ( qq1 + z2 * qq2 );
 368                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 369                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 370                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 371                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 372                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 373                         xsb0 = ( xsb0 >> 30 ) & 2;
 374                         xsb2 = ( xsb2 >> 30 ) & 2;
 375 
 376                         a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
 377                         a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
 378 
 379                         a2_0 = __vlibm_TBL_sincos_hi[j0+1];     /* cos_hi(t) */
 380                         a2_2 = __vlibm_TBL_sincos_hi[j2+1];
 381                                 /* cos_lo(t) */
 382                         t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
 383                         t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
 384 
 385                         *pc0 = a2_0 + t2_0;
 386                         *pc1 = one + t1;
 387                         *pc2 = a2_2 + t2_2;
 388 
 389                         t1_0 = a2_0*w0 + a1_0*t0;
 390                         t1 = z1 * ( poly3[0] + z1 * poly4[0] );
 391                         t1_2 = a2_2*w2 + a1_2*t2;
 392 
 393                         t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
 394                         t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
 395                         t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
 396 
 397                         *py0 = a1_0 + t1_0;
 398                         t1 = x1 + x1 * t1;
 399                         *py1 = t1;
 400                         *py2 = a1_2 + t1_2;
 401 
 402                         break;
 403 
 404                 case 3:
 405                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 406                         HI(&t0) = j0;
 407                         LO(&t0) = 0;
 408                         x0 -= t0;
 409                         z0 = x0 * x0;
 410                         z1 = x1 * x1;
 411                         z2 = x2 * x2;
 412                         t0 = z0 * ( qq1 + z0 * qq2 );
 413                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 414                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 415                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 416                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 417                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 418                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 419                         xsb0 = ( xsb0 >> 30 ) & 2;
 420                         a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
 421 
 422                         a2_0 = __vlibm_TBL_sincos_hi[j0+1];     /* cos_hi(t) */
 423 
 424                         t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
 425 
 426                         *pc0 = a2_0 + t2_0;
 427                         *pc1 = one + t1;
 428                         *pc2 = one + t2;
 429 
 430                         t1_0 = a2_0*w0 + a1_0*t0;
 431                         t1 = z1 * ( poly3[0] + z1 * poly4[0] );
 432                         t2 = z2 * ( poly3[0] + z2 * poly4[0] );
 433 
 434                         t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
 435                         t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
 436                         t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
 437 
 438                         *py0 = a1_0 + t1_0;
 439                         t1 = x1 + x1 * t1;
 440                         *py1 = t1;
 441                         t2 = x2 + x2 * t2;
 442                         *py2 = t2;
 443 
 444                         break;
 445 
 446                 case 4:
 447                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 448                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 449                         HI(&t1) = j1;
 450                         HI(&t2) = j2;
 451                         LO(&t1) = 0;
 452                         LO(&t2) = 0;
 453                         x1 -= t1;
 454                         x2 -= t2;
 455                         z0 = x0 * x0;
 456                         z1 = x1 * x1;
 457                         z2 = x2 * x2;
 458                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 459                         t1 = z1 * ( qq1 + z1 * qq2 );
 460                         t2 = z2 * ( qq1 + z2 * qq2 );
 461                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 462                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 463                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 464                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 465                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 466                         xsb1 = ( xsb1 >> 30 ) & 2;
 467                         xsb2 = ( xsb2 >> 30 ) & 2;
 468 
 469                         a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
 470                         a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
 471 
 472                         a2_1 = __vlibm_TBL_sincos_hi[j1+1];
 473                         a2_2 = __vlibm_TBL_sincos_hi[j2+1];
 474                                 /* cos_lo(t) */
 475                         t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
 476                         t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
 477 
 478                         *pc0 = one + t0;
 479                         *pc1 = a2_1 + t2_1;
 480                         *pc2 = a2_2 + t2_2;
 481 
 482                         t0 = z0 * ( poly3[0] + z0 * poly4[0] );
 483                         t1_1 = a2_1*w1 + a1_1*t1;
 484                         t1_2 = a2_2*w2 + a1_2*t2;
 485 
 486                         t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
 487                         t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
 488                         t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
 489 
 490                         t0 = x0 + x0 * t0;
 491                         *py0 = t0;
 492                         *py1 = a1_1 + t1_1;
 493                         *py2 = a1_2 + t1_2;
 494 
 495                         break;
 496 
 497                 case 5:
 498                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 499                         HI(&t1) = j1;
 500                         LO(&t1) = 0;
 501                         x1 -= t1;
 502                         z0 = x0 * x0;
 503                         z1 = x1 * x1;
 504                         z2 = x2 * x2;
 505                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 506                         t1 = z1 * ( qq1 + z1 * qq2 );
 507                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 508                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 509                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 510                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 511                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 512                         xsb1 = ( xsb1 >> 30 ) & 2;
 513 
 514                         a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
 515 
 516                         a2_1 = __vlibm_TBL_sincos_hi[j1+1];
 517 
 518                         t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
 519 
 520                         *pc0 = one + t0;
 521                         *pc1 = a2_1 + t2_1;
 522                         *pc2 = one + t2;
 523 
 524                         t0 = z0 * ( poly3[0] + z0 * poly4[0] );
 525                         t1_1 = a2_1*w1 + a1_1*t1;
 526                         t2 = z2 * ( poly3[0] + z2 * poly4[0] );
 527 
 528                         t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
 529                         t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
 530                         t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
 531 
 532                         t0 = x0 + x0 * t0;
 533                         *py0 = t0;
 534                         *py1 = a1_1 + t1_1;
 535                         t2 = x2 + x2 * t2;
 536                         *py2 = t2;
 537 
 538                         break;
 539 
 540                 case 6:
 541                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 542                         HI(&t2) = j2;
 543                         LO(&t2) = 0;
 544                         x2 -= t2;
 545                         z0 = x0 * x0;
 546                         z1 = x1 * x1;
 547                         z2 = x2 * x2;
 548                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 549                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 550                         t2 = z2 * ( qq1 + z2 * qq2 );
 551                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 552                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 553                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 554                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 555                         xsb2 = ( xsb2 >> 30 ) & 2;
 556                         a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
 557 
 558                         a2_2 = __vlibm_TBL_sincos_hi[j2+1];
 559 
 560                         t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
 561 
 562                         *pc0 = one + t0;
 563                         *pc1 = one + t1;
 564                         *pc2 = a2_2 + t2_2;
 565 
 566                         t0 = z0 * ( poly3[0] + z0 * poly4[0] );
 567                         t1 = z1 * ( poly3[0] + z1 * poly4[0] );
 568                         t1_2 = a2_2*w2 + a1_2*t2;
 569 
 570                         t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
 571                         t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
 572                         t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
 573 
 574                         t0 = x0 + x0 * t0;
 575                         *py0 = t0;
 576                         t1 = x1 + x1 * t1;
 577                         *py1 = t1;
 578                         *py2 = a1_2 + t1_2;
 579 
 580                         break;
 581 
 582                 case 7: /* All are < 5/32 */
 583                         z0 = x0 * x0;
 584                         z1 = x1 * x1;
 585                         z2 = x2 * x2;
 586                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 587                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 588                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 589                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 590                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 591                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 592                         *pc0 = one + t0;
 593                         *pc1 = one + t1;
 594                         *pc2 = one + t2;
 595                         t0 = z0 * ( poly3[0] + z0 * poly4[0] );
 596                         t1 = z1 * ( poly3[0] + z1 * poly4[0] );
 597                         t2 = z2 * ( poly3[0] + z2 * poly4[0] );
 598                         t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
 599                         t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
 600                         t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
 601                         t0 = x0 + x0 * t0;
 602                         t1 = x1 + x1 * t1;
 603                         t2 = x2 + x2 * t2;
 604                         *py0 = t0;
 605                         *py1 = t1;
 606                         *py2 = t2;
 607                         break;
 608                 }
 609 
 610                 x += stridex;
 611                 y += stridey;
 612                 c += stridec;
 613                 i = 0;
 614         } while ( --n > 0 ); /* END MAIN LOOP */
 615 
 616         /*
 617          * CLEAN UP last 0, 1, or 2 elts.
 618          */
 619         if ( i > 0 ) /* Clean up elts at tail.  i < 3. */
 620         {
 621                 double          a1_0, a1_1, a2_0, a2_1;
 622                 double          w0, w1;
 623                 double          t0, t1, t1_0, t1_1, t2_0, t2_1;
 624                 double          z0, z1;
 625                 unsigned        j0, j1;
 626 
 627                 if ( i > 1 )
 628                 {
 629                         if ( hx1 < 0x3fc40000 )
 630                         {
 631                                 z1 = x1 * x1;
 632                                 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 633                                 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 634                                 t1 = one + t1;
 635                                 *pc1 = t1;
 636                                 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
 637                                 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
 638                                 t1 = x1 + x1 * t1;
 639                                 *py1 = t1;
 640                         }
 641                         else
 642                         {
 643                                 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 644                                 HI(&t1) = j1;
 645                                 LO(&t1) = 0;
 646                                 x1 -= t1;
 647                                 z1 = x1 * x1;
 648                                 t1 = z1 * ( qq1 + z1 * qq2 );
 649                                 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 650                                 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 651                                 xsb1 = ( xsb1 >> 30 ) & 2;
 652                                 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
 653                                 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
 654                                 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
 655                                 *pc1 = a2_1 + t2_1;
 656                                 t1_1 = a2_1*w1 + a1_1*t1;
 657                                 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
 658                                 *py1 = a1_1 + t1_1;
 659                         }
 660                 }
 661                 if ( hx0 < 0x3fc40000 )
 662                 {
 663                         z0 = x0 * x0;
 664                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 665                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 666                         t0 = one + t0;
 667                         *pc0 = t0;
 668                         t0 = z0 * ( poly3[0] + z0 * poly4[0] );
 669                         t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
 670                         t0 = x0 + x0 * t0;
 671                         *py0 = t0;
 672                 }
 673                 else
 674                 {
 675                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 676                         HI(&t0) = j0;
 677                         LO(&t0) = 0;
 678                         x0 -= t0;
 679                         z0 = x0 * x0;
 680                         t0 = z0 * ( qq1 + z0 * qq2 );
 681                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 682                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 683                         xsb0 = ( xsb0 >> 30 ) & 2;
 684                         a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
 685                         a2_0 = __vlibm_TBL_sincos_hi[j0+1];     /* cos_hi(t) */
 686                         t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
 687                         *pc0 = a2_0 + t2_0;
 688                         t1_0 = a2_0*w0 + a1_0*t0;
 689                         t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
 690                         *py0 = a1_0 + t1_0;
 691                 }
 692         } /* END CLEAN UP */
 693 
 694         if ( !biguns )
 695                 return;
 696 
 697         /*
 698          * Take care of BIGUNS.
 699          */
 700         n = nsave;
 701         x = xsave;
 702         stridex = sxsave;
 703         y = ysave;
 704         stridey = sysave;
 705         c = csave;
 706         stridec = scsave;
 707         biguns = 0;
 708 
 709         x0_or_one[1] = 1.0;
 710         x1_or_one[1] = 1.0;
 711         x2_or_one[1] = 1.0;
 712         x0_or_one[3] = -1.0;
 713         x1_or_one[3] = -1.0;
 714         x2_or_one[3] = -1.0;
 715         y0_or_zero[1] = 0.0;
 716         y1_or_zero[1] = 0.0;
 717         y2_or_zero[1] = 0.0;
 718         y0_or_zero[3] = 0.0;
 719         y1_or_zero[3] = 0.0;
 720         y2_or_zero[3] = 0.0;
 721 
 722         do
 723         {
 724                 double          fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
 725                 unsigned        hx;
 726                 int                     n0, n1, n2;
 727 
 728                 /*
 729                  * Find 3 more to work on: Not already done, not too big.
 730                  */
 731 loop0:
 732                 hx = HI(x);
 733                 xsb0 = hx >> 31;
 734                 hx &= ~0x80000000;
 735                 if ( hx <= 0x3fe921fb ) /* Done above. */
 736                 {
 737                         x += stridex;
 738                         y += stridey;
 739                         c += stridec;
 740                         i = 0;
 741                         if ( --n <= 0 )
 742                                 break;
 743                         goto loop0;
 744                 }
 745                 if ( hx > 0x413921fb ) /* (1.6471e+06) Too big: leave it. */
 746                 {
 747                         if ( hx >= 0x7ff00000 ) /* Inf or NaN */
 748                         {
 749                                 x0 = *x;
 750                                 *y = x0 - x0;
 751                                 *c = x0 - x0;
 752                         }
 753                         else {
 754                                 biguns = 1;
 755                         }
 756                         x += stridex;
 757                         y += stridey;
 758                         c += stridec;
 759                         i = 0;
 760                         if ( --n <= 0 )
 761                                 break;
 762                         goto loop0;
 763                 }
 764                 x0 = *x;
 765                 py0 = y;
 766                 pc0 = c;
 767                 x += stridex;
 768                 y += stridey;
 769                 c += stridec;
 770                 i = 1;
 771                 if ( --n <= 0 )
 772                         break;
 773 
 774 loop1:
 775                 hx = HI(x);
 776                 xsb1 = hx >> 31;
 777                 hx &= ~0x80000000;
 778                 if ( hx <= 0x3fe921fb )
 779                 {
 780                         x += stridex;
 781                         y += stridey;
 782                         c += stridec;
 783                         i = 1;
 784                         if ( --n <= 0 )
 785                                 break;
 786                         goto loop1;
 787                 }
 788                 if ( hx > 0x413921fb )
 789                 {
 790                         if ( hx >= 0x7ff00000 )
 791                         {
 792                                 x1 = *x;
 793                                 *y = x1 - x1;
 794                                 *c = x1 - x1;
 795                         }
 796                         else {
 797                                 biguns = 1;
 798                         }
 799                         x += stridex;
 800                         y += stridey;
 801                         c += stridec;
 802                         i = 1;
 803                         if ( --n <= 0 )
 804                                 break;
 805                         goto loop1;
 806                 }
 807                 x1 = *x;
 808                 py1 = y;
 809                 pc1 = c;
 810                 x += stridex;
 811                 y += stridey;
 812                 c += stridec;
 813                 i = 2;
 814                 if ( --n <= 0 )
 815                         break;
 816 
 817 loop2:
 818                 hx = HI(x);
 819                 xsb2 = hx >> 31;
 820                 hx &= ~0x80000000;
 821                 if ( hx <= 0x3fe921fb )
 822                 {
 823                         x += stridex;
 824                         y += stridey;
 825                         c += stridec;
 826                         i = 2;
 827                         if ( --n <= 0 )
 828                                 break;
 829                         goto loop2;
 830                 }
 831                 if ( hx > 0x413921fb )
 832                 {
 833                         if ( hx >= 0x7ff00000 )
 834                         {
 835                                 x2 = *x;
 836                                 *y = x2 - x2;
 837                                 *c = x2 - x2;
 838                         }
 839                         else {
 840                                 biguns = 1;
 841                         }
 842                         x += stridex;
 843                         y += stridey;
 844                         c += stridec;
 845                         i = 2;
 846                         if ( --n <= 0 )
 847                                 break;
 848                         goto loop2;
 849                 }
 850                 x2 = *x;
 851                 py2 = y;
 852                 pc2 = c;
 853 
 854                 n0 = (int) ( x0 * invpio2 + half[xsb0] );
 855                 n1 = (int) ( x1 * invpio2 + half[xsb1] );
 856                 n2 = (int) ( x2 * invpio2 + half[xsb2] );
 857                 fn0 = (double) n0;
 858                 fn1 = (double) n1;
 859                 fn2 = (double) n2;
 860                 n0 &= 3;
 861                 n1 &= 3;
 862                 n2 &= 3;
 863                 a0 = x0 - fn0 * pio2_1;
 864                 a1 = x1 - fn1 * pio2_1;
 865                 a2 = x2 - fn2 * pio2_1;
 866                 w0 = fn0 * pio2_2;
 867                 w1 = fn1 * pio2_2;
 868                 w2 = fn2 * pio2_2;
 869                 x0 = a0 - w0;
 870                 x1 = a1 - w1;
 871                 x2 = a2 - w2;
 872                 y0 = ( a0 - x0 ) - w0;
 873                 y1 = ( a1 - x1 ) - w1;
 874                 y2 = ( a2 - x2 ) - w2;
 875                 a0 = x0;
 876                 a1 = x1;
 877                 a2 = x2;
 878                 w0 = fn0 * pio2_3 - y0;
 879                 w1 = fn1 * pio2_3 - y1;
 880                 w2 = fn2 * pio2_3 - y2;
 881                 x0 = a0 - w0;
 882                 x1 = a1 - w1;
 883                 x2 = a2 - w2;
 884                 y0 = ( a0 - x0 ) - w0;
 885                 y1 = ( a1 - x1 ) - w1;
 886                 y2 = ( a2 - x2 ) - w2;
 887                 a0 = x0;
 888                 a1 = x1;
 889                 a2 = x2;
 890                 w0 = fn0 * pio2_3t - y0;
 891                 w1 = fn1 * pio2_3t - y1;
 892                 w2 = fn2 * pio2_3t - y2;
 893                 x0 = a0 - w0;
 894                 x1 = a1 - w1;
 895                 x2 = a2 - w2;
 896                 y0 = ( a0 - x0 ) - w0;
 897                 y1 = ( a1 - x1 ) - w1;
 898                 y2 = ( a2 - x2 ) - w2;
 899                 xsb2 = HI(&x2);
 900                 i = ( ( xsb2 & ~0x80000000 ) - 0x3fc40000 ) >> 31;
 901                 xsb1 = HI(&x1);
 902                 i |= ( ( ( xsb1 & ~0x80000000 ) - 0x3fc40000 ) >> 30 ) & 2;
 903                 xsb0 = HI(&x0);
 904                 i |= ( ( ( xsb0 & ~0x80000000 ) - 0x3fc40000 ) >> 29 ) & 4;
 905                 switch ( i )
 906                 {
 907                         double          a1_0, a1_1, a1_2, a2_0, a2_1, a2_2;
 908                         double          t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2;
 909                         double          z0, z1, z2;
 910                         unsigned        j0, j1, j2;
 911 
 912                 case 0:
 913                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 914                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 915                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 916                         HI(&t0) = j0;
 917                         HI(&t1) = j1;
 918                         HI(&t2) = j2;
 919                         LO(&t0) = 0;
 920                         LO(&t1) = 0;
 921                         LO(&t2) = 0;
 922                         x0 = ( x0 - t0 ) + y0;
 923                         x1 = ( x1 - t1 ) + y1;
 924                         x2 = ( x2 - t2 ) + y2;
 925                         z0 = x0 * x0;
 926                         z1 = x1 * x1;
 927                         z2 = x2 * x2;
 928                         t0 = z0 * ( qq1 + z0 * qq2 );
 929                         t1 = z1 * ( qq1 + z1 * qq2 );
 930                         t2 = z2 * ( qq1 + z2 * qq2 );
 931                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 932                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 933                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 934                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 935                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 936                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 937                         xsb0 = ( xsb0 >> 30 ) & 2;
 938                         xsb1 = ( xsb1 >> 30 ) & 2;
 939                         xsb2 = ( xsb2 >> 30 ) & 2;
 940                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
 941                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
 942                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
 943                         xsb0 |= 1;
 944                         xsb1 |= 1;
 945                         xsb2 |= 1;
 946 
 947                         a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
 948                         a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
 949                         a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
 950 
 951                         a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
 952                         a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
 953                         a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
 954 
 955                         t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
 956                         t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
 957                         t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
 958 
 959                         w0 *= a2_0;
 960                         w1 *= a2_1;
 961                         w2 *= a2_2;
 962 
 963                         *pc0 = a2_0 + t2_0;
 964                         *pc1 = a2_1 + t2_1;
 965                         *pc2 = a2_2 + t2_2;
 966 
 967                         t1_0 = w0 + a1_0*t0;
 968                         t1_1 = w1 + a1_1*t1;
 969                         t1_2 = w2 + a1_2*t2;
 970 
 971                         t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
 972                         t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
 973                         t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
 974 
 975                         *py0 = a1_0 + t1_0;
 976                         *py1 = a1_1 + t1_1;
 977                         *py2 = a1_2 + t1_2;
 978 
 979                         break;
 980 
 981                 case 1:
 982                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 983                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 984                         j2 = n2 & 1;
 985                         HI(&t0) = j0;
 986                         HI(&t1) = j1;
 987                         LO(&t0) = 0;
 988                         LO(&t1) = 0;
 989                         x2_or_one[0] = x2;
 990                         x2_or_one[2] = -x2;
 991                         x0 = ( x0 - t0 ) + y0;
 992                         x1 = ( x1 - t1 ) + y1;
 993                         y2_or_zero[0] = y2;
 994                         y2_or_zero[2] = -y2;
 995                         z0 = x0 * x0;
 996                         z1 = x1 * x1;
 997                         z2 = x2 * x2;
 998                         t0 = z0 * ( qq1 + z0 * qq2 );
 999                         t1 = z1 * ( qq1 + z1 * qq2 );
1000                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1001                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1002                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1003                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1004                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1005                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1006                         xsb0 = ( xsb0 >> 30 ) & 2;
1007                         xsb1 = ( xsb1 >> 30 ) & 2;
1008                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
1009                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
1010                         xsb0 |= 1;
1011                         xsb1 |= 1;
1012                         a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1013                         a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1014 
1015                         a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1016                         a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1017 
1018                         t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1019                         t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1020                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1021 
1022                         *pc0 = a2_0 + t2_0;
1023                         *pc1 = a2_1 + t2_1;
1024                         *py2 = t2;
1025 
1026                         n2 = (n2 + 1) & 3;
1027                         j2 = (j2 + 1) & 1;
1028                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1029 
1030                         t1_0 = a2_0*w0 + a1_0*t0;
1031                         t1_1 = a2_1*w1 + a1_1*t1;
1032                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1033 
1034                         t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1035                         t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1036                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1037 
1038                         *py0 = a1_0 + t1_0;
1039                         *py1 = a1_1 + t1_1;
1040                         *pc2 = t2;
1041 
1042                         break;
1043 
1044                 case 2:
1045                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1046                         j1 = n1 & 1;
1047                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
1048                         HI(&t0) = j0;
1049                         HI(&t2) = j2;
1050                         LO(&t0) = 0;
1051                         LO(&t2) = 0;
1052                         x1_or_one[0] = x1;
1053                         x1_or_one[2] = -x1;
1054                         x0 = ( x0 - t0 ) + y0;
1055                         y1_or_zero[0] = y1;
1056                         y1_or_zero[2] = -y1;
1057                         x2 = ( x2 - t2 ) + y2;
1058                         z0 = x0 * x0;
1059                         z1 = x1 * x1;
1060                         z2 = x2 * x2;
1061                         t0 = z0 * ( qq1 + z0 * qq2 );
1062                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1063                         t2 = z2 * ( qq1 + z2 * qq2 );
1064                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1065                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1066                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
1067                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1068                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1069                         xsb0 = ( xsb0 >> 30 ) & 2;
1070                         xsb2 = ( xsb2 >> 30 ) & 2;
1071                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
1072                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
1073                         xsb0 |= 1;
1074                         xsb2 |= 1;
1075 
1076                         a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1077                         a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1078 
1079                         a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1080                         a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1081 
1082                         t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1083                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1084                         t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
1085 
1086                         *pc0 = a2_0 + t2_0;
1087                         *py1 = t1;
1088                         *pc2 = a2_2 + t2_2;
1089 
1090                         n1 = (n1 + 1) & 3;
1091                         j1 = (j1 + 1) & 1;
1092                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1093 
1094                         t1_0 = a2_0*w0 + a1_0*t0;
1095                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1096                         t1_2 = a2_2*w2 + a1_2*t2;
1097 
1098                         t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1099                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1100                         t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1101 
1102                         *py0 = a1_0 + t1_0;
1103                         *pc1 = t1;
1104                         *py2 = a1_2 + t1_2;
1105 
1106                         break;
1107 
1108                 case 3:
1109                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1110                         j1 = n1 & 1;
1111                         j2 = n2 & 1;
1112                         HI(&t0) = j0;
1113                         LO(&t0) = 0;
1114                         x1_or_one[0] = x1;
1115                         x1_or_one[2] = -x1;
1116                         x2_or_one[0] = x2;
1117                         x2_or_one[2] = -x2;
1118                         x0 = ( x0 - t0 ) + y0;
1119                         y1_or_zero[0] = y1;
1120                         y1_or_zero[2] = -y1;
1121                         y2_or_zero[0] = y2;
1122                         y2_or_zero[2] = -y2;
1123                         z0 = x0 * x0;
1124                         z1 = x1 * x1;
1125                         z2 = x2 * x2;
1126                         t0 = z0 * ( qq1 + z0 * qq2 );
1127                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1128                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1129                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1130                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1131                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1132                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1133                         xsb0 = ( xsb0 >> 30 ) & 2;
1134                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
1135                         xsb0 |= 1;
1136 
1137                         a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1138                         a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1139 
1140                         t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1141                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1142                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1143 
1144                         *pc0 = a2_0 + t2_0;
1145                         *py1 = t1;
1146                         *py2 = t2;
1147 
1148                         n1 = (n1 + 1) & 3;
1149                         n2 = (n2 + 1) & 3;
1150                         j1 = (j1 + 1) & 1;
1151                         j2 = (j2 + 1) & 1;
1152 
1153                         t1_0 = a2_0*w0 + a1_0*t0;
1154                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1155                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1156 
1157                         t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1158                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1159                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1160 
1161                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1162                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1163 
1164                         *py0 = a1_0 + t1_0;
1165                         *pc1 = t1;
1166                         *pc2 = t2;
1167 
1168                         break;
1169 
1170                 case 4:
1171                         j0 = n0 & 1;
1172                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1173                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
1174                         HI(&t1) = j1;
1175                         HI(&t2) = j2;
1176                         LO(&t1) = 0;
1177                         LO(&t2) = 0;
1178                         x0_or_one[0] = x0;
1179                         x0_or_one[2] = -x0;
1180                         y0_or_zero[0] = y0;
1181                         y0_or_zero[2] = -y0;
1182                         x1 = ( x1 - t1 ) + y1;
1183                         x2 = ( x2 - t2 ) + y2;
1184                         z0 = x0 * x0;
1185                         z1 = x1 * x1;
1186                         z2 = x2 * x2;
1187                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1188                         t1 = z1 * ( qq1 + z1 * qq2 );
1189                         t2 = z2 * ( qq1 + z2 * qq2 );
1190                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1191                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1192                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
1193                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1194                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1195                         xsb1 = ( xsb1 >> 30 ) & 2;
1196                         xsb2 = ( xsb2 >> 30 ) & 2;
1197                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
1198                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
1199                         xsb1 |= 1;
1200                         xsb2 |= 1;
1201 
1202                         a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1203                         a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1204 
1205                         a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1206                         a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1207 
1208                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1209                         t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1210                         t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
1211 
1212                         *py0 = t0;
1213                         *pc1 = a2_1 + t2_1;
1214                         *pc2 = a2_2 + t2_2;
1215 
1216                         n0 = (n0 + 1) & 3;
1217                         j0 = (j0 + 1) & 1;
1218                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1219 
1220                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1221                         t1_1 = a2_1*w1 + a1_1*t1;
1222                         t1_2 = a2_2*w2 + a1_2*t2;
1223 
1224                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1225                         t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1226                         t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1227 
1228                         *py1 = a1_1 + t1_1;
1229                         *py2 = a1_2 + t1_2;
1230                         *pc0 = t0;
1231 
1232                         break;
1233 
1234                 case 5:
1235                         j0 = n0 & 1;
1236                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1237                         j2 = n2 & 1;
1238                         HI(&t1) = j1;
1239                         LO(&t1) = 0;
1240                         x0_or_one[0] = x0;
1241                         x0_or_one[2] = -x0;
1242                         x2_or_one[0] = x2;
1243                         x2_or_one[2] = -x2;
1244                         y0_or_zero[0] = y0;
1245                         y0_or_zero[2] = -y0;
1246                         x1 = ( x1 - t1 ) + y1;
1247                         y2_or_zero[0] = y2;
1248                         y2_or_zero[2] = -y2;
1249                         z0 = x0 * x0;
1250                         z1 = x1 * x1;
1251                         z2 = x2 * x2;
1252                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1253                         t1 = z1 * ( qq1 + z1 * qq2 );
1254                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1255                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1256                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1257                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1258                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1259                         xsb1 = ( xsb1 >> 30 ) & 2;
1260                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
1261                         xsb1 |= 1;
1262 
1263                         a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1264                         a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1265 
1266                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1267                         t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1268                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1269 
1270                         *py0 = t0;
1271                         *pc1 = a2_1 + t2_1;
1272                         *py2 = t2;
1273 
1274                         n0 = (n0 + 1) & 3;
1275                         n2 = (n2 + 1) & 3;
1276                         j0 = (j0 + 1) & 1;
1277                         j2 = (j2 + 1) & 1;
1278 
1279                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1280                         t1_1 = a2_1*w1 + a1_1*t1;
1281                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1282 
1283                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1284                         t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1285                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1286 
1287                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1288                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1289 
1290                         *pc0 = t0;
1291                         *py1 = a1_1 + t1_1;
1292                         *pc2 = t2;
1293 
1294                         break;
1295 
1296                 case 6:
1297                         j0 = n0 & 1;
1298                         j1 = n1 & 1;
1299                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
1300                         HI(&t2) = j2;
1301                         LO(&t2) = 0;
1302                         x0_or_one[0] = x0;
1303                         x0_or_one[2] = -x0;
1304                         x1_or_one[0] = x1;
1305                         x1_or_one[2] = -x1;
1306                         y0_or_zero[0] = y0;
1307                         y0_or_zero[2] = -y0;
1308                         y1_or_zero[0] = y1;
1309                         y1_or_zero[2] = -y1;
1310                         x2 = ( x2 - t2 ) + y2;
1311                         z0 = x0 * x0;
1312                         z1 = x1 * x1;
1313                         z2 = x2 * x2;
1314                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1315                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1316                         t2 = z2 * ( qq1 + z2 * qq2 );
1317                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1318                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1319                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
1320                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1321                         xsb2 = ( xsb2 >> 30 ) & 2;
1322                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
1323                         xsb2 |= 1;
1324 
1325                         a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1326                         a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1327 
1328                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1329                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1330                         t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
1331 
1332                         *py0 = t0;
1333                         *py1 = t1;
1334                         *pc2 = a2_2 + t2_2;
1335 
1336                         n0 = (n0 + 1) & 3;
1337                         n1 = (n1 + 1) & 3;
1338                         j0 = (j0 + 1) & 1;
1339                         j1 = (j1 + 1) & 1;
1340 
1341                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1342                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1343                         t1_2 = a2_2*w2 + a1_2*t2;
1344 
1345                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1346                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1347                         t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1348 
1349                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1350                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1351 
1352                         *pc0 = t0;
1353                         *pc1 = t1;
1354                         *py2 = a1_2 + t1_2;
1355 
1356                         break;
1357 
1358                 case 7:
1359                         j0 = n0 & 1;
1360                         j1 = n1 & 1;
1361                         j2 = n2 & 1;
1362                         x0_or_one[0] = x0;
1363                         x0_or_one[2] = -x0;
1364                         x1_or_one[0] = x1;
1365                         x1_or_one[2] = -x1;
1366                         x2_or_one[0] = x2;
1367                         x2_or_one[2] = -x2;
1368                         y0_or_zero[0] = y0;
1369                         y0_or_zero[2] = -y0;
1370                         y1_or_zero[0] = y1;
1371                         y1_or_zero[2] = -y1;
1372                         y2_or_zero[0] = y2;
1373                         y2_or_zero[2] = -y2;
1374                         z0 = x0 * x0;
1375                         z1 = x1 * x1;
1376                         z2 = x2 * x2;
1377                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1378                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1379                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1380                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1381                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1382                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1383                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1384                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1385                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1386                         *py0 = t0;
1387                         *py1 = t1;
1388                         *py2 = t2;
1389 
1390                         n0 = (n0 + 1) & 3;
1391                         n1 = (n1 + 1) & 3;
1392                         n2 = (n2 + 1) & 3;
1393                         j0 = (j0 + 1) & 1;
1394                         j1 = (j1 + 1) & 1;
1395                         j2 = (j2 + 1) & 1;
1396                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1397                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1398                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1399                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1400                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1401                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1402                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1403                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1404                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1405                         *pc0 = t0;
1406                         *pc1 = t1;
1407                         *pc2 = t2;
1408                         break;
1409                 }
1410 
1411                 x += stridex;
1412                 y += stridey;
1413                 c += stridec;
1414                 i = 0;
1415         } while ( --n > 0 );
1416 
1417         if ( i > 0 )
1418         {
1419                 double          a1_0, a1_1, a2_0, a2_1;
1420                 double          t0, t1, t1_0, t1_1, t2_0, t2_1;
1421                 double          fn0, fn1, a0, a1, w0, w1, y0, y1;
1422                 double          z0, z1;
1423                 unsigned        j0, j1;
1424                 int             n0, n1;
1425 
1426                 if ( i > 1 )
1427                 {
1428                         n1 = (int) ( x1 * invpio2 + half[xsb1] );
1429                         fn1 = (double) n1;
1430                         n1 &= 3;
1431                         a1 = x1 - fn1 * pio2_1;
1432                         w1 = fn1 * pio2_2;
1433                         x1 = a1 - w1;
1434                         y1 = ( a1 - x1 ) - w1;
1435                         a1 = x1;
1436                         w1 = fn1 * pio2_3 - y1;
1437                         x1 = a1 - w1;
1438                         y1 = ( a1 - x1 ) - w1;
1439                         a1 = x1;
1440                         w1 = fn1 * pio2_3t - y1;
1441                         x1 = a1 - w1;
1442                         y1 = ( a1 - x1 ) - w1;
1443                         xsb1 = HI(&x1);
1444                         if ( ( xsb1 & ~0x80000000 ) < 0x3fc40000 )
1445                         {
1446                                 j1 = n1 & 1;
1447                                 x1_or_one[0] = x1;
1448                                 x1_or_one[2] = -x1;
1449                                 y1_or_zero[0] = y1;
1450                                 y1_or_zero[2] = -y1;
1451                                 z1 = x1 * x1;
1452                                 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1453                                 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1454                                 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1455                                 *py1 = t1;
1456                                 n1 = (n1 + 1) & 3;
1457                                 j1 = (j1 + 1) & 1;
1458                                 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1459                                 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1460                                 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1461                                 *pc1 = t1;
1462                         }
1463                         else
1464                         {
1465                                 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1466                                 HI(&t1) = j1;
1467                                 LO(&t1) = 0;
1468                                 x1 = ( x1 - t1 ) + y1;
1469                                 z1 = x1 * x1;
1470                                 t1 = z1 * ( qq1 + z1 * qq2 );
1471                                 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1472                                 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1473                                 xsb1 = ( xsb1 >> 30 ) & 2;
1474                                 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1475                                 xsb1 |= 1;
1476                                 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1477                                 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1478                                 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1479                                 *pc1 = a2_1 + t2_1;
1480                                 t1_1 = a2_1*w1 + a1_1*t1;
1481                                 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1482                                 *py1 = a1_1 + t1_1;
1483                         }
1484                 }
1485                 n0 = (int) ( x0 * invpio2 + half[xsb0] );
1486                 fn0 = (double) n0;
1487                 n0 &= 3;
1488                 a0 = x0 - fn0 * pio2_1;
1489                 w0 = fn0 * pio2_2;
1490                 x0 = a0 - w0;
1491                 y0 = ( a0 - x0 ) - w0;
1492                 a0 = x0;
1493                 w0 = fn0 * pio2_3 - y0;
1494                 x0 = a0 - w0;
1495                 y0 = ( a0 - x0 ) - w0;
1496                 a0 = x0;
1497                 w0 = fn0 * pio2_3t - y0;
1498                 x0 = a0 - w0;
1499                 y0 = ( a0 - x0 ) - w0;
1500                 xsb0 = HI(&x0);
1501                 if ( ( xsb0 & ~0x80000000 ) < 0x3fc40000 )
1502                 {
1503                         j0 = n0 & 1;
1504                         x0_or_one[0] = x0;
1505                         x0_or_one[2] = -x0;
1506                         y0_or_zero[0] = y0;
1507                         y0_or_zero[2] = -y0;
1508                         z0 = x0 * x0;
1509                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1510                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1511                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1512                         *py0 = t0;
1513                         n0 = (n0 + 1) & 3;
1514                         j0 = (j0 + 1) & 1;
1515                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1516                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1517                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1518                         *pc0 = t0;
1519                 }
1520                 else
1521                 {
1522                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1523                         HI(&t0) = j0;
1524                         LO(&t0) = 0;
1525                         x0 = ( x0 - t0 ) + y0;
1526                         z0 = x0 * x0;
1527                         t0 = z0 * ( qq1 + z0 * qq2 );
1528                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1529                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1530                         xsb0 = ( xsb0 >> 30 ) & 2;
1531                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
1532                         xsb0 |= 1;
1533                         a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1534                         a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1535                         t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1536                         *pc0 = a2_0 + t2_0;
1537                         t1_0 = a2_0*w0 + a1_0*t0;
1538                         t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1539                         *py0 = a1_0 + t1_0;
1540                 }
1541         }
1542 
1543         if ( biguns ) {
1544                 __vlibm_vsincos_big( nsave, xsave, sxsave, ysave, sysave, csave, scsave, 0x413921fb );
1545         }
1546 }