1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #include <sys/isa_defs.h>
  31 #include <sys/ccompile.h>
  32 
  33 #ifdef _LITTLE_ENDIAN
  34 #define HI(x)   *(1+(int*)x)
  35 #define LO(x)   *(unsigned*)x
  36 #else
  37 #define HI(x)   *(int*)x
  38 #define LO(x)   *(1+(unsigned*)x)
  39 #endif
  40 
  41 #ifdef __RESTRICT
  42 #define restrict _Restrict
  43 #else
  44 #define restrict
  45 #endif
  46 
  47 /*
  48  * vsincos.c
  49  *
  50  * Vector sine and cosine function.  Just slight modifications to vcos.c.
  51  */
  52 
  53 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
  54 
  55 static const double
  56         half[2] = { 0.5, -0.5 },
  57         one             = 1.0,
  58         invpio2 = 0.636619772367581343075535,  /* 53 bits of pi/2 */
  59         pio2_1  = 1.570796326734125614166,  /* first 33 bits of pi/2 */
  60         pio2_2  = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
  61         pio2_3  = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
  62         pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
  63         pp1             = -1.666666666605760465276263943134982554676e-0001,
  64         pp2             =  8.333261209690963126718376566146180944442e-0003,
  65         qq1             = -4.999999999977710986407023955908711557870e-0001,
  66         qq2             =  4.166654863857219350645055881018842089580e-0002,
  67         poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
  68                                 -4.999999999999931701464060878888294524481e-0001 },
  69         poly2[2]= {  8.333333332390951295683993455280336376663e-0003,
  70                                  4.166666666394861917535640593963708222319e-0002 },
  71         poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
  72                                 -1.388888552656142867832756687736851681462e-0003 },
  73         poly4[2]= {  2.753403624854277237649987622848330351110e-0006,
  74                                  2.478519423681460796618128289454530524759e-0005 };
  75 
  76 /* Don't __ the following; acomp will handle it */
  77 extern double fabs( double );
  78 extern void __vlibm_vsincos_big( int, double *, int, double *, int, double *, int, int );
  79 
  80 /*
  81  * y[i*stridey] := sin( x[i*stridex] ), for i = 0..n.
  82  * c[i*stridec] := cos( x[i*stridex] ), for i = 0..n.
  83  *
  84  * Calls __vlibm_vsincos_big to handle all elts which have abs >~ 1.647e+06.
  85  * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
  86  *
  87  * elts < 2^-27 use the approximation 1.0 ~ cos(x).
  88  */
  89 void
  90 __vsincos( int n, double * restrict x, int stridex,
  91                                 double * restrict y, int stridey,
  92                                 double * restrict c, int stridec )
  93 {
  94         double          x0_or_one[4], x1_or_one[4], x2_or_one[4];
  95         double          y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
  96         double          x0, x1, x2,
  97                         *py0, *py1, *py2,
  98                         *pc0, *pc1, *pc2,
  99                         *xsave, *ysave, *csave;
 100         unsigned        hx0, hx1, hx2, xsb0, xsb1, xsb2;
 101         int             i, biguns, nsave, sxsave, sysave, scsave;
 102         nsave = n;
 103         xsave = x;
 104         sxsave = stridex;
 105         ysave = y;
 106         sysave = stridey;
 107         csave = c;
 108         scsave = stridec;
 109         biguns = 0;
 110 
 111         do /* MAIN LOOP */
 112         {
 113 
 114                 /* Gotos here so _break_ exits MAIN LOOP. */
 115 LOOP0:  /* Find first arg in right range. */
 116                 xsb0 = HI(x); /* get most significant word */
 117                 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
 118                 if ( hx0 > 0x3fe921fb ) {
 119                         /* Too big: arg reduction needed, so leave for second part */
 120                         biguns = 1;
 121                         x += stridex;
 122                         y += stridey;
 123                         c += stridec;
 124                         i = 0;
 125                         if ( --n <= 0 )
 126                                 break;
 127                         goto LOOP0;
 128                 }
 129                 if ( hx0 < 0x3e400000 ) {
 130                         /* Too small.  cos x ~ 1, sin x ~ x. */
 131                         *c = 1.0;
 132                         *y = *x;
 133                         x += stridex;
 134                         y += stridey;
 135                         c += stridec;
 136                         i = 0;
 137                         if ( --n <= 0 )
 138                                 break;
 139                         goto LOOP0;
 140                 }
 141                 x0 = *x;
 142                 py0 = y;
 143                 pc0 = c;
 144                 x += stridex;
 145                 y += stridey;
 146                 c += stridec;
 147                 i = 1;
 148                 if ( --n <= 0 )
 149                         break;
 150 
 151 LOOP1: /* Get second arg, same as above. */
 152                 xsb1 = HI(x);
 153                 hx1 = xsb1 & ~0x80000000;
 154                 if ( hx1 > 0x3fe921fb )
 155                 {
 156                         biguns = 1;
 157                         x += stridex;
 158                         y += stridey;
 159                         c += stridec;
 160                         i = 1;
 161                         if ( --n <= 0 )
 162                                 break;
 163                         goto LOOP1;
 164                 }
 165                 if ( hx1 < 0x3e400000 )
 166                 {
 167                         *c = 1.0;
 168                         *y = *x;
 169                         x += stridex;
 170                         y += stridey;
 171                         c += stridec;
 172                         i = 1;
 173                         if ( --n <= 0 )
 174                                 break;
 175                         goto LOOP1;
 176                 }
 177                 x1 = *x;
 178                 py1 = y;
 179                 pc1 = c;
 180                 x += stridex;
 181                 y += stridey;
 182                 c += stridec;
 183                 i = 2;
 184                 if ( --n <= 0 )
 185                         break;
 186 
 187 LOOP2: /* Get third arg, same as above. */
 188                 xsb2 = HI(x);
 189                 hx2 = xsb2 & ~0x80000000;
 190                 if ( hx2 > 0x3fe921fb )
 191                 {
 192                         biguns = 1;
 193                         x += stridex;
 194                         y += stridey;
 195                         c += stridec;
 196                         i = 2;
 197                         if ( --n <= 0 )
 198                                 break;
 199                         goto LOOP2;
 200                 }
 201                 if ( hx2 < 0x3e400000 )
 202                 {
 203                         *c = 1.0;
 204                         *y = *x;
 205                         x += stridex;
 206                         y += stridey;
 207                         c += stridec;
 208                         i = 2;
 209                         if ( --n <= 0 )
 210                                 break;
 211                         goto LOOP2;
 212                 }
 213                 x2 = *x;
 214                 py2 = y;
 215                 pc2 = c;
 216 
 217                 /*
 218                  * 0x3fc40000 = 5/32 ~ 0.15625
 219                  * Get msb after subtraction.  Will be 1 only if
 220                  * hx0 - 5/32 is negative.
 221                  */
 222                 i = ( hx2 - 0x3fc40000 ) >> 31;
 223                 i |= ( ( hx1 - 0x3fc40000 ) >> 30 ) & 2;
 224                 i |= ( ( hx0 - 0x3fc40000 ) >> 29 ) & 4;
 225                 switch ( i )
 226                 {
 227                         double          a1_0, a1_1, a1_2, a2_0, a2_1, a2_2;
 228                         double          w0, w1, w2;
 229                         double          t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2;
 230                         double          z0, z1, z2;
 231                         unsigned        j0, j1, j2;
 232 
 233                 case 0: /* All are > 5/32 */
 234                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 235                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 236                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 237 
 238                         HI(&t0) = j0;
 239                         HI(&t1) = j1;
 240                         HI(&t2) = j2;
 241                         LO(&t0) = 0;
 242                         LO(&t1) = 0;
 243                         LO(&t2) = 0;
 244 
 245                         x0 -= t0;
 246                         x1 -= t1;
 247                         x2 -= t2;
 248 
 249                         z0 = x0 * x0;
 250                         z1 = x1 * x1;
 251                         z2 = x2 * x2;
 252 
 253                         t0 = z0 * ( qq1 + z0 * qq2 );
 254                         t1 = z1 * ( qq1 + z1 * qq2 );
 255                         t2 = z2 * ( qq1 + z2 * qq2 );
 256 
 257                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 258                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 259                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 260 
 261                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 262                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 263                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 264 
 265                         xsb0 = ( xsb0 >> 30 ) & 2;
 266                         xsb1 = ( xsb1 >> 30 ) & 2;
 267                         xsb2 = ( xsb2 >> 30 ) & 2;
 268 
 269                         a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
 270                         a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
 271                         a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
 272 
 273                         a2_0 = __vlibm_TBL_sincos_hi[j0+1];     /* cos_hi(t) */
 274                         a2_1 = __vlibm_TBL_sincos_hi[j1+1];
 275                         a2_2 = __vlibm_TBL_sincos_hi[j2+1];
 276                                 /* cos_lo(t) */
 277                         t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
 278                         t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
 279                         t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
 280 
 281                         *pc0 = a2_0 + t2_0;
 282                         *pc1 = a2_1 + t2_1;
 283                         *pc2 = a2_2 + t2_2;
 284 
 285                         t1_0 = a2_0*w0 + a1_0*t0;
 286                         t1_1 = a2_1*w1 + a1_1*t1;
 287                         t1_2 = a2_2*w2 + a1_2*t2;
 288 
 289                         t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
 290                         t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
 291                         t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
 292 
 293                         *py0 = a1_0 + t1_0;
 294                         *py1 = a1_1 + t1_1;
 295                         *py2 = a1_2 + t1_2;
 296 
 297                         break;
 298 
 299                 case 1:
 300                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 301                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 302                         HI(&t0) = j0;
 303                         HI(&t1) = j1;
 304                         LO(&t0) = 0;
 305                         LO(&t1) = 0;
 306                         x0 -= t0;
 307                         x1 -= t1;
 308                         z0 = x0 * x0;
 309                         z1 = x1 * x1;
 310                         z2 = x2 * x2;
 311                         t0 = z0 * ( qq1 + z0 * qq2 );
 312                         t1 = z1 * ( qq1 + z1 * qq2 );
 313                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 314                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 315                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 316                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 317                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 318                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 319                         xsb0 = ( xsb0 >> 30 ) & 2;
 320                         xsb1 = ( xsb1 >> 30 ) & 2;
 321 
 322                         a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
 323                         a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
 324 
 325                         a2_0 = __vlibm_TBL_sincos_hi[j0+1];     /* cos_hi(t) */
 326                         a2_1 = __vlibm_TBL_sincos_hi[j1+1];
 327                                 /* cos_lo(t) */
 328                         t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
 329                         t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
 330 
 331                         *pc0 = a2_0 + t2_0;
 332                         *pc1 = a2_1 + t2_1;
 333                         *pc2 = one + t2;
 334 
 335                         t1_0 = a2_0*w0 + a1_0*t0;
 336                         t1_1 = a2_1*w1 + a1_1*t1;
 337                         t2 = z2 * ( poly3[0] + z2 * poly4[0] );
 338 
 339                         t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
 340                         t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
 341                         t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
 342 
 343                         *py0 = a1_0 + t1_0;
 344                         *py1 = a1_1 + t1_1;
 345                         t2 = x2 + x2 * t2;
 346                         *py2 = t2;
 347 
 348                         break;
 349 
 350                 case 2:
 351                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 352                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 353                         HI(&t0) = j0;
 354                         HI(&t2) = j2;
 355                         LO(&t0) = 0;
 356                         LO(&t2) = 0;
 357                         x0 -= t0;
 358                         x2 -= t2;
 359                         z0 = x0 * x0;
 360                         z1 = x1 * x1;
 361                         z2 = x2 * x2;
 362                         t0 = z0 * ( qq1 + z0 * qq2 );
 363                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 364                         t2 = z2 * ( qq1 + z2 * qq2 );
 365                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 366                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 367                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 368                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 369                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 370                         xsb0 = ( xsb0 >> 30 ) & 2;
 371                         xsb2 = ( xsb2 >> 30 ) & 2;
 372 
 373                         a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
 374                         a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
 375 
 376                         a2_0 = __vlibm_TBL_sincos_hi[j0+1];     /* cos_hi(t) */
 377                         a2_2 = __vlibm_TBL_sincos_hi[j2+1];
 378                                 /* cos_lo(t) */
 379                         t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
 380                         t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
 381 
 382                         *pc0 = a2_0 + t2_0;
 383                         *pc1 = one + t1;
 384                         *pc2 = a2_2 + t2_2;
 385 
 386                         t1_0 = a2_0*w0 + a1_0*t0;
 387                         t1 = z1 * ( poly3[0] + z1 * poly4[0] );
 388                         t1_2 = a2_2*w2 + a1_2*t2;
 389 
 390                         t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
 391                         t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
 392                         t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
 393 
 394                         *py0 = a1_0 + t1_0;
 395                         t1 = x1 + x1 * t1;
 396                         *py1 = t1;
 397                         *py2 = a1_2 + t1_2;
 398 
 399                         break;
 400 
 401                 case 3:
 402                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 403                         HI(&t0) = j0;
 404                         LO(&t0) = 0;
 405                         x0 -= t0;
 406                         z0 = x0 * x0;
 407                         z1 = x1 * x1;
 408                         z2 = x2 * x2;
 409                         t0 = z0 * ( qq1 + z0 * qq2 );
 410                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 411                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 412                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 413                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 414                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 415                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 416                         xsb0 = ( xsb0 >> 30 ) & 2;
 417                         a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
 418 
 419                         a2_0 = __vlibm_TBL_sincos_hi[j0+1];     /* cos_hi(t) */
 420 
 421                         t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
 422 
 423                         *pc0 = a2_0 + t2_0;
 424                         *pc1 = one + t1;
 425                         *pc2 = one + t2;
 426 
 427                         t1_0 = a2_0*w0 + a1_0*t0;
 428                         t1 = z1 * ( poly3[0] + z1 * poly4[0] );
 429                         t2 = z2 * ( poly3[0] + z2 * poly4[0] );
 430 
 431                         t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
 432                         t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
 433                         t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
 434 
 435                         *py0 = a1_0 + t1_0;
 436                         t1 = x1 + x1 * t1;
 437                         *py1 = t1;
 438                         t2 = x2 + x2 * t2;
 439                         *py2 = t2;
 440 
 441                         break;
 442 
 443                 case 4:
 444                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 445                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 446                         HI(&t1) = j1;
 447                         HI(&t2) = j2;
 448                         LO(&t1) = 0;
 449                         LO(&t2) = 0;
 450                         x1 -= t1;
 451                         x2 -= t2;
 452                         z0 = x0 * x0;
 453                         z1 = x1 * x1;
 454                         z2 = x2 * x2;
 455                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 456                         t1 = z1 * ( qq1 + z1 * qq2 );
 457                         t2 = z2 * ( qq1 + z2 * qq2 );
 458                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 459                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 460                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 461                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 462                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 463                         xsb1 = ( xsb1 >> 30 ) & 2;
 464                         xsb2 = ( xsb2 >> 30 ) & 2;
 465 
 466                         a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
 467                         a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
 468 
 469                         a2_1 = __vlibm_TBL_sincos_hi[j1+1];
 470                         a2_2 = __vlibm_TBL_sincos_hi[j2+1];
 471                                 /* cos_lo(t) */
 472                         t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
 473                         t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
 474 
 475                         *pc0 = one + t0;
 476                         *pc1 = a2_1 + t2_1;
 477                         *pc2 = a2_2 + t2_2;
 478 
 479                         t0 = z0 * ( poly3[0] + z0 * poly4[0] );
 480                         t1_1 = a2_1*w1 + a1_1*t1;
 481                         t1_2 = a2_2*w2 + a1_2*t2;
 482 
 483                         t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
 484                         t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
 485                         t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
 486 
 487                         t0 = x0 + x0 * t0;
 488                         *py0 = t0;
 489                         *py1 = a1_1 + t1_1;
 490                         *py2 = a1_2 + t1_2;
 491 
 492                         break;
 493 
 494                 case 5:
 495                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 496                         HI(&t1) = j1;
 497                         LO(&t1) = 0;
 498                         x1 -= t1;
 499                         z0 = x0 * x0;
 500                         z1 = x1 * x1;
 501                         z2 = x2 * x2;
 502                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 503                         t1 = z1 * ( qq1 + z1 * qq2 );
 504                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 505                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 506                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 507                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 508                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 509                         xsb1 = ( xsb1 >> 30 ) & 2;
 510 
 511                         a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
 512 
 513                         a2_1 = __vlibm_TBL_sincos_hi[j1+1];
 514 
 515                         t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
 516 
 517                         *pc0 = one + t0;
 518                         *pc1 = a2_1 + t2_1;
 519                         *pc2 = one + t2;
 520 
 521                         t0 = z0 * ( poly3[0] + z0 * poly4[0] );
 522                         t1_1 = a2_1*w1 + a1_1*t1;
 523                         t2 = z2 * ( poly3[0] + z2 * poly4[0] );
 524 
 525                         t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
 526                         t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
 527                         t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
 528 
 529                         t0 = x0 + x0 * t0;
 530                         *py0 = t0;
 531                         *py1 = a1_1 + t1_1;
 532                         t2 = x2 + x2 * t2;
 533                         *py2 = t2;
 534 
 535                         break;
 536 
 537                 case 6:
 538                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 539                         HI(&t2) = j2;
 540                         LO(&t2) = 0;
 541                         x2 -= t2;
 542                         z0 = x0 * x0;
 543                         z1 = x1 * x1;
 544                         z2 = x2 * x2;
 545                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 546                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 547                         t2 = z2 * ( qq1 + z2 * qq2 );
 548                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 549                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 550                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 551                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 552                         xsb2 = ( xsb2 >> 30 ) & 2;
 553                         a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
 554 
 555                         a2_2 = __vlibm_TBL_sincos_hi[j2+1];
 556 
 557                         t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
 558 
 559                         *pc0 = one + t0;
 560                         *pc1 = one + t1;
 561                         *pc2 = a2_2 + t2_2;
 562 
 563                         t0 = z0 * ( poly3[0] + z0 * poly4[0] );
 564                         t1 = z1 * ( poly3[0] + z1 * poly4[0] );
 565                         t1_2 = a2_2*w2 + a1_2*t2;
 566 
 567                         t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
 568                         t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
 569                         t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
 570 
 571                         t0 = x0 + x0 * t0;
 572                         *py0 = t0;
 573                         t1 = x1 + x1 * t1;
 574                         *py1 = t1;
 575                         *py2 = a1_2 + t1_2;
 576 
 577                         break;
 578 
 579                 case 7: /* All are < 5/32 */
 580                         z0 = x0 * x0;
 581                         z1 = x1 * x1;
 582                         z2 = x2 * x2;
 583                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 584                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 585                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 586                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 587                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 588                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 589                         *pc0 = one + t0;
 590                         *pc1 = one + t1;
 591                         *pc2 = one + t2;
 592                         t0 = z0 * ( poly3[0] + z0 * poly4[0] );
 593                         t1 = z1 * ( poly3[0] + z1 * poly4[0] );
 594                         t2 = z2 * ( poly3[0] + z2 * poly4[0] );
 595                         t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
 596                         t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
 597                         t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
 598                         t0 = x0 + x0 * t0;
 599                         t1 = x1 + x1 * t1;
 600                         t2 = x2 + x2 * t2;
 601                         *py0 = t0;
 602                         *py1 = t1;
 603                         *py2 = t2;
 604                         break;
 605                 }
 606 
 607                 x += stridex;
 608                 y += stridey;
 609                 c += stridec;
 610                 i = 0;
 611         } while ( --n > 0 ); /* END MAIN LOOP */
 612 
 613         /*
 614          * CLEAN UP last 0, 1, or 2 elts.
 615          */
 616         if ( i > 0 ) /* Clean up elts at tail.  i < 3. */
 617         {
 618                 double          a1_0, a1_1, a2_0, a2_1;
 619                 double          w0, w1;
 620                 double          t0, t1, t1_0, t1_1, t2_0, t2_1;
 621                 double          z0, z1;
 622                 unsigned        j0, j1;
 623 
 624                 if ( i > 1 )
 625                 {
 626                         if ( hx1 < 0x3fc40000 )
 627                         {
 628                                 z1 = x1 * x1;
 629                                 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 630                                 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 631                                 t1 = one + t1;
 632                                 *pc1 = t1;
 633                                 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
 634                                 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
 635                                 t1 = x1 + x1 * t1;
 636                                 *py1 = t1;
 637                         }
 638                         else
 639                         {
 640                                 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 641                                 HI(&t1) = j1;
 642                                 LO(&t1) = 0;
 643                                 x1 -= t1;
 644                                 z1 = x1 * x1;
 645                                 t1 = z1 * ( qq1 + z1 * qq2 );
 646                                 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 647                                 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 648                                 xsb1 = ( xsb1 >> 30 ) & 2;
 649                                 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
 650                                 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
 651                                 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
 652                                 *pc1 = a2_1 + t2_1;
 653                                 t1_1 = a2_1*w1 + a1_1*t1;
 654                                 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
 655                                 *py1 = a1_1 + t1_1;
 656                         }
 657                 }
 658                 if ( hx0 < 0x3fc40000 )
 659                 {
 660                         z0 = x0 * x0;
 661                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 662                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 663                         t0 = one + t0;
 664                         *pc0 = t0;
 665                         t0 = z0 * ( poly3[0] + z0 * poly4[0] );
 666                         t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
 667                         t0 = x0 + x0 * t0;
 668                         *py0 = t0;
 669                 }
 670                 else
 671                 {
 672                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 673                         HI(&t0) = j0;
 674                         LO(&t0) = 0;
 675                         x0 -= t0;
 676                         z0 = x0 * x0;
 677                         t0 = z0 * ( qq1 + z0 * qq2 );
 678                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 679                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 680                         xsb0 = ( xsb0 >> 30 ) & 2;
 681                         a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
 682                         a2_0 = __vlibm_TBL_sincos_hi[j0+1];     /* cos_hi(t) */
 683                         t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
 684                         *pc0 = a2_0 + t2_0;
 685                         t1_0 = a2_0*w0 + a1_0*t0;
 686                         t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
 687                         *py0 = a1_0 + t1_0;
 688                 }
 689         } /* END CLEAN UP */
 690 
 691         if ( !biguns )
 692                 return;
 693 
 694         /*
 695          * Take care of BIGUNS.
 696          */
 697         n = nsave;
 698         x = xsave;
 699         stridex = sxsave;
 700         y = ysave;
 701         stridey = sysave;
 702         c = csave;
 703         stridec = scsave;
 704         biguns = 0;
 705 
 706         x0_or_one[1] = 1.0;
 707         x1_or_one[1] = 1.0;
 708         x2_or_one[1] = 1.0;
 709         x0_or_one[3] = -1.0;
 710         x1_or_one[3] = -1.0;
 711         x2_or_one[3] = -1.0;
 712         y0_or_zero[1] = 0.0;
 713         y1_or_zero[1] = 0.0;
 714         y2_or_zero[1] = 0.0;
 715         y0_or_zero[3] = 0.0;
 716         y1_or_zero[3] = 0.0;
 717         y2_or_zero[3] = 0.0;
 718 
 719         do
 720         {
 721                 double          fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
 722                 unsigned        hx;
 723                 int                     n0, n1, n2;
 724 
 725                 /*
 726                  * Find 3 more to work on: Not already done, not too big.
 727                  */
 728 loop0:
 729                 hx = HI(x);
 730                 xsb0 = hx >> 31;
 731                 hx &= ~0x80000000;
 732                 if ( hx <= 0x3fe921fb ) /* Done above. */
 733                 {
 734                         x += stridex;
 735                         y += stridey;
 736                         c += stridec;
 737                         i = 0;
 738                         if ( --n <= 0 )
 739                                 break;
 740                         goto loop0;
 741                 }
 742                 if ( hx > 0x413921fb ) /* (1.6471e+06) Too big: leave it. */
 743                 {
 744                         if ( hx >= 0x7ff00000 ) /* Inf or NaN */
 745                         {
 746                                 x0 = *x;
 747                                 *y = x0 - x0;
 748                                 *c = x0 - x0;
 749                         }
 750                         else {
 751                                 biguns = 1;
 752                         }
 753                         x += stridex;
 754                         y += stridey;
 755                         c += stridec;
 756                         i = 0;
 757                         if ( --n <= 0 )
 758                                 break;
 759                         goto loop0;
 760                 }
 761                 x0 = *x;
 762                 py0 = y;
 763                 pc0 = c;
 764                 x += stridex;
 765                 y += stridey;
 766                 c += stridec;
 767                 i = 1;
 768                 if ( --n <= 0 )
 769                         break;
 770 
 771 loop1:
 772                 hx = HI(x);
 773                 xsb1 = hx >> 31;
 774                 hx &= ~0x80000000;
 775                 if ( hx <= 0x3fe921fb )
 776                 {
 777                         x += stridex;
 778                         y += stridey;
 779                         c += stridec;
 780                         i = 1;
 781                         if ( --n <= 0 )
 782                                 break;
 783                         goto loop1;
 784                 }
 785                 if ( hx > 0x413921fb )
 786                 {
 787                         if ( hx >= 0x7ff00000 )
 788                         {
 789                                 x1 = *x;
 790                                 *y = x1 - x1;
 791                                 *c = x1 - x1;
 792                         }
 793                         else {
 794                                 biguns = 1;
 795                         }
 796                         x += stridex;
 797                         y += stridey;
 798                         c += stridec;
 799                         i = 1;
 800                         if ( --n <= 0 )
 801                                 break;
 802                         goto loop1;
 803                 }
 804                 x1 = *x;
 805                 py1 = y;
 806                 pc1 = c;
 807                 x += stridex;
 808                 y += stridey;
 809                 c += stridec;
 810                 i = 2;
 811                 if ( --n <= 0 )
 812                         break;
 813 
 814 loop2:
 815                 hx = HI(x);
 816                 xsb2 = hx >> 31;
 817                 hx &= ~0x80000000;
 818                 if ( hx <= 0x3fe921fb )
 819                 {
 820                         x += stridex;
 821                         y += stridey;
 822                         c += stridec;
 823                         i = 2;
 824                         if ( --n <= 0 )
 825                                 break;
 826                         goto loop2;
 827                 }
 828                 if ( hx > 0x413921fb )
 829                 {
 830                         if ( hx >= 0x7ff00000 )
 831                         {
 832                                 x2 = *x;
 833                                 *y = x2 - x2;
 834                                 *c = x2 - x2;
 835                         }
 836                         else {
 837                                 biguns = 1;
 838                         }
 839                         x += stridex;
 840                         y += stridey;
 841                         c += stridec;
 842                         i = 2;
 843                         if ( --n <= 0 )
 844                                 break;
 845                         goto loop2;
 846                 }
 847                 x2 = *x;
 848                 py2 = y;
 849                 pc2 = c;
 850 
 851                 n0 = (int) ( x0 * invpio2 + half[xsb0] );
 852                 n1 = (int) ( x1 * invpio2 + half[xsb1] );
 853                 n2 = (int) ( x2 * invpio2 + half[xsb2] );
 854                 fn0 = (double) n0;
 855                 fn1 = (double) n1;
 856                 fn2 = (double) n2;
 857                 n0 &= 3;
 858                 n1 &= 3;
 859                 n2 &= 3;
 860                 a0 = x0 - fn0 * pio2_1;
 861                 a1 = x1 - fn1 * pio2_1;
 862                 a2 = x2 - fn2 * pio2_1;
 863                 w0 = fn0 * pio2_2;
 864                 w1 = fn1 * pio2_2;
 865                 w2 = fn2 * pio2_2;
 866                 x0 = a0 - w0;
 867                 x1 = a1 - w1;
 868                 x2 = a2 - w2;
 869                 y0 = ( a0 - x0 ) - w0;
 870                 y1 = ( a1 - x1 ) - w1;
 871                 y2 = ( a2 - x2 ) - w2;
 872                 a0 = x0;
 873                 a1 = x1;
 874                 a2 = x2;
 875                 w0 = fn0 * pio2_3 - y0;
 876                 w1 = fn1 * pio2_3 - y1;
 877                 w2 = fn2 * pio2_3 - y2;
 878                 x0 = a0 - w0;
 879                 x1 = a1 - w1;
 880                 x2 = a2 - w2;
 881                 y0 = ( a0 - x0 ) - w0;
 882                 y1 = ( a1 - x1 ) - w1;
 883                 y2 = ( a2 - x2 ) - w2;
 884                 a0 = x0;
 885                 a1 = x1;
 886                 a2 = x2;
 887                 w0 = fn0 * pio2_3t - y0;
 888                 w1 = fn1 * pio2_3t - y1;
 889                 w2 = fn2 * pio2_3t - y2;
 890                 x0 = a0 - w0;
 891                 x1 = a1 - w1;
 892                 x2 = a2 - w2;
 893                 y0 = ( a0 - x0 ) - w0;
 894                 y1 = ( a1 - x1 ) - w1;
 895                 y2 = ( a2 - x2 ) - w2;
 896                 xsb2 = HI(&x2);
 897                 i = ( ( xsb2 & ~0x80000000 ) - 0x3fc40000 ) >> 31;
 898                 xsb1 = HI(&x1);
 899                 i |= ( ( ( xsb1 & ~0x80000000 ) - 0x3fc40000 ) >> 30 ) & 2;
 900                 xsb0 = HI(&x0);
 901                 i |= ( ( ( xsb0 & ~0x80000000 ) - 0x3fc40000 ) >> 29 ) & 4;
 902                 switch ( i )
 903                 {
 904                         double          a1_0, a1_1, a1_2, a2_0, a2_1, a2_2;
 905                         double          t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2;
 906                         double          z0, z1, z2;
 907                         unsigned        j0, j1, j2;
 908 
 909                 case 0:
 910                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 911                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 912                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 913                         HI(&t0) = j0;
 914                         HI(&t1) = j1;
 915                         HI(&t2) = j2;
 916                         LO(&t0) = 0;
 917                         LO(&t1) = 0;
 918                         LO(&t2) = 0;
 919                         x0 = ( x0 - t0 ) + y0;
 920                         x1 = ( x1 - t1 ) + y1;
 921                         x2 = ( x2 - t2 ) + y2;
 922                         z0 = x0 * x0;
 923                         z1 = x1 * x1;
 924                         z2 = x2 * x2;
 925                         t0 = z0 * ( qq1 + z0 * qq2 );
 926                         t1 = z1 * ( qq1 + z1 * qq2 );
 927                         t2 = z2 * ( qq1 + z2 * qq2 );
 928                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 929                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 930                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 931                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 932                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 933                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 934                         xsb0 = ( xsb0 >> 30 ) & 2;
 935                         xsb1 = ( xsb1 >> 30 ) & 2;
 936                         xsb2 = ( xsb2 >> 30 ) & 2;
 937                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
 938                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
 939                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
 940                         xsb0 |= 1;
 941                         xsb1 |= 1;
 942                         xsb2 |= 1;
 943 
 944                         a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
 945                         a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
 946                         a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
 947 
 948                         a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
 949                         a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
 950                         a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
 951 
 952                         t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
 953                         t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
 954                         t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
 955 
 956                         w0 *= a2_0;
 957                         w1 *= a2_1;
 958                         w2 *= a2_2;
 959 
 960                         *pc0 = a2_0 + t2_0;
 961                         *pc1 = a2_1 + t2_1;
 962                         *pc2 = a2_2 + t2_2;
 963 
 964                         t1_0 = w0 + a1_0*t0;
 965                         t1_1 = w1 + a1_1*t1;
 966                         t1_2 = w2 + a1_2*t2;
 967 
 968                         t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
 969                         t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
 970                         t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
 971 
 972                         *py0 = a1_0 + t1_0;
 973                         *py1 = a1_1 + t1_1;
 974                         *py2 = a1_2 + t1_2;
 975 
 976                         break;
 977 
 978                 case 1:
 979                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 980                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 981                         j2 = n2 & 1;
 982                         HI(&t0) = j0;
 983                         HI(&t1) = j1;
 984                         LO(&t0) = 0;
 985                         LO(&t1) = 0;
 986                         x2_or_one[0] = x2;
 987                         x2_or_one[2] = -x2;
 988                         x0 = ( x0 - t0 ) + y0;
 989                         x1 = ( x1 - t1 ) + y1;
 990                         y2_or_zero[0] = y2;
 991                         y2_or_zero[2] = -y2;
 992                         z0 = x0 * x0;
 993                         z1 = x1 * x1;
 994                         z2 = x2 * x2;
 995                         t0 = z0 * ( qq1 + z0 * qq2 );
 996                         t1 = z1 * ( qq1 + z1 * qq2 );
 997                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
 998                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 999                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1000                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1001                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1002                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1003                         xsb0 = ( xsb0 >> 30 ) & 2;
1004                         xsb1 = ( xsb1 >> 30 ) & 2;
1005                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
1006                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
1007                         xsb0 |= 1;
1008                         xsb1 |= 1;
1009                         a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1010                         a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1011 
1012                         a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1013                         a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1014 
1015                         t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1016                         t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1017                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1018 
1019                         *pc0 = a2_0 + t2_0;
1020                         *pc1 = a2_1 + t2_1;
1021                         *py2 = t2;
1022 
1023                         n2 = (n2 + 1) & 3;
1024                         j2 = (j2 + 1) & 1;
1025                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1026 
1027                         t1_0 = a2_0*w0 + a1_0*t0;
1028                         t1_1 = a2_1*w1 + a1_1*t1;
1029                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1030 
1031                         t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1032                         t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1033                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1034 
1035                         *py0 = a1_0 + t1_0;
1036                         *py1 = a1_1 + t1_1;
1037                         *pc2 = t2;
1038 
1039                         break;
1040 
1041                 case 2:
1042                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1043                         j1 = n1 & 1;
1044                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
1045                         HI(&t0) = j0;
1046                         HI(&t2) = j2;
1047                         LO(&t0) = 0;
1048                         LO(&t2) = 0;
1049                         x1_or_one[0] = x1;
1050                         x1_or_one[2] = -x1;
1051                         x0 = ( x0 - t0 ) + y0;
1052                         y1_or_zero[0] = y1;
1053                         y1_or_zero[2] = -y1;
1054                         x2 = ( x2 - t2 ) + y2;
1055                         z0 = x0 * x0;
1056                         z1 = x1 * x1;
1057                         z2 = x2 * x2;
1058                         t0 = z0 * ( qq1 + z0 * qq2 );
1059                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1060                         t2 = z2 * ( qq1 + z2 * qq2 );
1061                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1062                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1063                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
1064                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1065                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1066                         xsb0 = ( xsb0 >> 30 ) & 2;
1067                         xsb2 = ( xsb2 >> 30 ) & 2;
1068                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
1069                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
1070                         xsb0 |= 1;
1071                         xsb2 |= 1;
1072 
1073                         a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1074                         a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1075 
1076                         a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1077                         a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1078 
1079                         t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1080                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1081                         t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
1082 
1083                         *pc0 = a2_0 + t2_0;
1084                         *py1 = t1;
1085                         *pc2 = a2_2 + t2_2;
1086 
1087                         n1 = (n1 + 1) & 3;
1088                         j1 = (j1 + 1) & 1;
1089                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1090 
1091                         t1_0 = a2_0*w0 + a1_0*t0;
1092                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1093                         t1_2 = a2_2*w2 + a1_2*t2;
1094 
1095                         t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1096                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1097                         t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1098 
1099                         *py0 = a1_0 + t1_0;
1100                         *pc1 = t1;
1101                         *py2 = a1_2 + t1_2;
1102 
1103                         break;
1104 
1105                 case 3:
1106                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1107                         j1 = n1 & 1;
1108                         j2 = n2 & 1;
1109                         HI(&t0) = j0;
1110                         LO(&t0) = 0;
1111                         x1_or_one[0] = x1;
1112                         x1_or_one[2] = -x1;
1113                         x2_or_one[0] = x2;
1114                         x2_or_one[2] = -x2;
1115                         x0 = ( x0 - t0 ) + y0;
1116                         y1_or_zero[0] = y1;
1117                         y1_or_zero[2] = -y1;
1118                         y2_or_zero[0] = y2;
1119                         y2_or_zero[2] = -y2;
1120                         z0 = x0 * x0;
1121                         z1 = x1 * x1;
1122                         z2 = x2 * x2;
1123                         t0 = z0 * ( qq1 + z0 * qq2 );
1124                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1125                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1126                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1127                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1128                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1129                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1130                         xsb0 = ( xsb0 >> 30 ) & 2;
1131                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
1132                         xsb0 |= 1;
1133 
1134                         a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1135                         a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1136 
1137                         t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1138                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1139                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1140 
1141                         *pc0 = a2_0 + t2_0;
1142                         *py1 = t1;
1143                         *py2 = t2;
1144 
1145                         n1 = (n1 + 1) & 3;
1146                         n2 = (n2 + 1) & 3;
1147                         j1 = (j1 + 1) & 1;
1148                         j2 = (j2 + 1) & 1;
1149 
1150                         t1_0 = a2_0*w0 + a1_0*t0;
1151                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1152                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1153 
1154                         t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1155                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1156                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1157 
1158                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1159                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1160 
1161                         *py0 = a1_0 + t1_0;
1162                         *pc1 = t1;
1163                         *pc2 = t2;
1164 
1165                         break;
1166 
1167                 case 4:
1168                         j0 = n0 & 1;
1169                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1170                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
1171                         HI(&t1) = j1;
1172                         HI(&t2) = j2;
1173                         LO(&t1) = 0;
1174                         LO(&t2) = 0;
1175                         x0_or_one[0] = x0;
1176                         x0_or_one[2] = -x0;
1177                         y0_or_zero[0] = y0;
1178                         y0_or_zero[2] = -y0;
1179                         x1 = ( x1 - t1 ) + y1;
1180                         x2 = ( x2 - t2 ) + y2;
1181                         z0 = x0 * x0;
1182                         z1 = x1 * x1;
1183                         z2 = x2 * x2;
1184                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1185                         t1 = z1 * ( qq1 + z1 * qq2 );
1186                         t2 = z2 * ( qq1 + z2 * qq2 );
1187                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1188                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1189                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
1190                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1191                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1192                         xsb1 = ( xsb1 >> 30 ) & 2;
1193                         xsb2 = ( xsb2 >> 30 ) & 2;
1194                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
1195                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
1196                         xsb1 |= 1;
1197                         xsb2 |= 1;
1198 
1199                         a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1200                         a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1201 
1202                         a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1203                         a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1204 
1205                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1206                         t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1207                         t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
1208 
1209                         *py0 = t0;
1210                         *pc1 = a2_1 + t2_1;
1211                         *pc2 = a2_2 + t2_2;
1212 
1213                         n0 = (n0 + 1) & 3;
1214                         j0 = (j0 + 1) & 1;
1215                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1216 
1217                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1218                         t1_1 = a2_1*w1 + a1_1*t1;
1219                         t1_2 = a2_2*w2 + a1_2*t2;
1220 
1221                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1222                         t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1223                         t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1224 
1225                         *py1 = a1_1 + t1_1;
1226                         *py2 = a1_2 + t1_2;
1227                         *pc0 = t0;
1228 
1229                         break;
1230 
1231                 case 5:
1232                         j0 = n0 & 1;
1233                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1234                         j2 = n2 & 1;
1235                         HI(&t1) = j1;
1236                         LO(&t1) = 0;
1237                         x0_or_one[0] = x0;
1238                         x0_or_one[2] = -x0;
1239                         x2_or_one[0] = x2;
1240                         x2_or_one[2] = -x2;
1241                         y0_or_zero[0] = y0;
1242                         y0_or_zero[2] = -y0;
1243                         x1 = ( x1 - t1 ) + y1;
1244                         y2_or_zero[0] = y2;
1245                         y2_or_zero[2] = -y2;
1246                         z0 = x0 * x0;
1247                         z1 = x1 * x1;
1248                         z2 = x2 * x2;
1249                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1250                         t1 = z1 * ( qq1 + z1 * qq2 );
1251                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1252                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1253                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1254                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1255                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1256                         xsb1 = ( xsb1 >> 30 ) & 2;
1257                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
1258                         xsb1 |= 1;
1259 
1260                         a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1261                         a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1262 
1263                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1264                         t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1265                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1266 
1267                         *py0 = t0;
1268                         *pc1 = a2_1 + t2_1;
1269                         *py2 = t2;
1270 
1271                         n0 = (n0 + 1) & 3;
1272                         n2 = (n2 + 1) & 3;
1273                         j0 = (j0 + 1) & 1;
1274                         j2 = (j2 + 1) & 1;
1275 
1276                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1277                         t1_1 = a2_1*w1 + a1_1*t1;
1278                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1279 
1280                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1281                         t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1282                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1283 
1284                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1285                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1286 
1287                         *pc0 = t0;
1288                         *py1 = a1_1 + t1_1;
1289                         *pc2 = t2;
1290 
1291                         break;
1292 
1293                 case 6:
1294                         j0 = n0 & 1;
1295                         j1 = n1 & 1;
1296                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
1297                         HI(&t2) = j2;
1298                         LO(&t2) = 0;
1299                         x0_or_one[0] = x0;
1300                         x0_or_one[2] = -x0;
1301                         x1_or_one[0] = x1;
1302                         x1_or_one[2] = -x1;
1303                         y0_or_zero[0] = y0;
1304                         y0_or_zero[2] = -y0;
1305                         y1_or_zero[0] = y1;
1306                         y1_or_zero[2] = -y1;
1307                         x2 = ( x2 - t2 ) + y2;
1308                         z0 = x0 * x0;
1309                         z1 = x1 * x1;
1310                         z2 = x2 * x2;
1311                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1312                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1313                         t2 = z2 * ( qq1 + z2 * qq2 );
1314                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1315                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1316                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
1317                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1318                         xsb2 = ( xsb2 >> 30 ) & 2;
1319                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
1320                         xsb2 |= 1;
1321 
1322                         a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1323                         a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1324 
1325                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1326                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1327                         t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
1328 
1329                         *py0 = t0;
1330                         *py1 = t1;
1331                         *pc2 = a2_2 + t2_2;
1332 
1333                         n0 = (n0 + 1) & 3;
1334                         n1 = (n1 + 1) & 3;
1335                         j0 = (j0 + 1) & 1;
1336                         j1 = (j1 + 1) & 1;
1337 
1338                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1339                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1340                         t1_2 = a2_2*w2 + a1_2*t2;
1341 
1342                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1343                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1344                         t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1345 
1346                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1347                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1348 
1349                         *pc0 = t0;
1350                         *pc1 = t1;
1351                         *py2 = a1_2 + t1_2;
1352 
1353                         break;
1354 
1355                 case 7:
1356                         j0 = n0 & 1;
1357                         j1 = n1 & 1;
1358                         j2 = n2 & 1;
1359                         x0_or_one[0] = x0;
1360                         x0_or_one[2] = -x0;
1361                         x1_or_one[0] = x1;
1362                         x1_or_one[2] = -x1;
1363                         x2_or_one[0] = x2;
1364                         x2_or_one[2] = -x2;
1365                         y0_or_zero[0] = y0;
1366                         y0_or_zero[2] = -y0;
1367                         y1_or_zero[0] = y1;
1368                         y1_or_zero[2] = -y1;
1369                         y2_or_zero[0] = y2;
1370                         y2_or_zero[2] = -y2;
1371                         z0 = x0 * x0;
1372                         z1 = x1 * x1;
1373                         z2 = x2 * x2;
1374                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1375                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1376                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1377                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1378                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1379                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1380                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1381                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1382                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1383                         *py0 = t0;
1384                         *py1 = t1;
1385                         *py2 = t2;
1386 
1387                         n0 = (n0 + 1) & 3;
1388                         n1 = (n1 + 1) & 3;
1389                         n2 = (n2 + 1) & 3;
1390                         j0 = (j0 + 1) & 1;
1391                         j1 = (j1 + 1) & 1;
1392                         j2 = (j2 + 1) & 1;
1393                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1394                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1395                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1396                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1397                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1398                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1399                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1400                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1401                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1402                         *pc0 = t0;
1403                         *pc1 = t1;
1404                         *pc2 = t2;
1405                         break;
1406                 }
1407 
1408                 x += stridex;
1409                 y += stridey;
1410                 c += stridec;
1411                 i = 0;
1412         } while ( --n > 0 );
1413 
1414         if ( i > 0 )
1415         {
1416                 double          a1_0, a1_1, a2_0, a2_1;
1417                 double          t0, t1, t1_0, t1_1, t2_0, t2_1;
1418                 double          fn0, fn1, a0, a1, w0, w1, y0, y1;
1419                 double          z0, z1;
1420                 unsigned        j0, j1;
1421                 int             n0, n1;
1422 
1423                 if ( i > 1 )
1424                 {
1425                         n1 = (int) ( x1 * invpio2 + half[xsb1] );
1426                         fn1 = (double) n1;
1427                         n1 &= 3;
1428                         a1 = x1 - fn1 * pio2_1;
1429                         w1 = fn1 * pio2_2;
1430                         x1 = a1 - w1;
1431                         y1 = ( a1 - x1 ) - w1;
1432                         a1 = x1;
1433                         w1 = fn1 * pio2_3 - y1;
1434                         x1 = a1 - w1;
1435                         y1 = ( a1 - x1 ) - w1;
1436                         a1 = x1;
1437                         w1 = fn1 * pio2_3t - y1;
1438                         x1 = a1 - w1;
1439                         y1 = ( a1 - x1 ) - w1;
1440                         xsb1 = HI(&x1);
1441                         if ( ( xsb1 & ~0x80000000 ) < 0x3fc40000 )
1442                         {
1443                                 j1 = n1 & 1;
1444                                 x1_or_one[0] = x1;
1445                                 x1_or_one[2] = -x1;
1446                                 y1_or_zero[0] = y1;
1447                                 y1_or_zero[2] = -y1;
1448                                 z1 = x1 * x1;
1449                                 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1450                                 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1451                                 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1452                                 *py1 = t1;
1453                                 n1 = (n1 + 1) & 3;
1454                                 j1 = (j1 + 1) & 1;
1455                                 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1456                                 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1457                                 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1458                                 *pc1 = t1;
1459                         }
1460                         else
1461                         {
1462                                 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1463                                 HI(&t1) = j1;
1464                                 LO(&t1) = 0;
1465                                 x1 = ( x1 - t1 ) + y1;
1466                                 z1 = x1 * x1;
1467                                 t1 = z1 * ( qq1 + z1 * qq2 );
1468                                 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1469                                 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1470                                 xsb1 = ( xsb1 >> 30 ) & 2;
1471                                 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1472                                 xsb1 |= 1;
1473                                 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1474                                 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1475                                 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1476                                 *pc1 = a2_1 + t2_1;
1477                                 t1_1 = a2_1*w1 + a1_1*t1;
1478                                 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1479                                 *py1 = a1_1 + t1_1;
1480                         }
1481                 }
1482                 n0 = (int) ( x0 * invpio2 + half[xsb0] );
1483                 fn0 = (double) n0;
1484                 n0 &= 3;
1485                 a0 = x0 - fn0 * pio2_1;
1486                 w0 = fn0 * pio2_2;
1487                 x0 = a0 - w0;
1488                 y0 = ( a0 - x0 ) - w0;
1489                 a0 = x0;
1490                 w0 = fn0 * pio2_3 - y0;
1491                 x0 = a0 - w0;
1492                 y0 = ( a0 - x0 ) - w0;
1493                 a0 = x0;
1494                 w0 = fn0 * pio2_3t - y0;
1495                 x0 = a0 - w0;
1496                 y0 = ( a0 - x0 ) - w0;
1497                 xsb0 = HI(&x0);
1498                 if ( ( xsb0 & ~0x80000000 ) < 0x3fc40000 )
1499                 {
1500                         j0 = n0 & 1;
1501                         x0_or_one[0] = x0;
1502                         x0_or_one[2] = -x0;
1503                         y0_or_zero[0] = y0;
1504                         y0_or_zero[2] = -y0;
1505                         z0 = x0 * x0;
1506                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1507                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1508                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1509                         *py0 = t0;
1510                         n0 = (n0 + 1) & 3;
1511                         j0 = (j0 + 1) & 1;
1512                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1513                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1514                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1515                         *pc0 = t0;
1516                 }
1517                 else
1518                 {
1519                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1520                         HI(&t0) = j0;
1521                         LO(&t0) = 0;
1522                         x0 = ( x0 - t0 ) + y0;
1523                         z0 = x0 * x0;
1524                         t0 = z0 * ( qq1 + z0 * qq2 );
1525                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1526                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1527                         xsb0 = ( xsb0 >> 30 ) & 2;
1528                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
1529                         xsb0 |= 1;
1530                         a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1531                         a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1532                         t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1533                         *pc0 = a2_0 + t2_0;
1534                         t1_0 = a2_0*w0 + a1_0*t0;
1535                         t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1536                         *py0 = a1_0 + t1_0;
1537                 }
1538         }
1539 
1540         if ( biguns ) {
1541                 __vlibm_vsincos_big( nsave, xsave, sxsave, ysave, sysave, csave, scsave, 0x413921fb );
1542         }
1543 }