1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #include <sys/isa_defs.h>
  31 #include <sys/ccompile.h>
  32 
  33 #ifdef _LITTLE_ENDIAN
  34 #define HI(x)   *(1+(int*)x)
  35 #define LO(x)   *(unsigned*)x
  36 #else
  37 #define HI(x)   *(int*)x
  38 #define LO(x)   *(1+(unsigned*)x)
  39 #endif
  40 
  41 #ifdef __RESTRICT
  42 #define restrict _Restrict
  43 #else
  44 #define restrict
  45 #endif
  46 
  47 /*
  48  * vsincos.c
  49  *
  50  * Vector sine and cosine function.  Just slight modifications to vcos.c.
  51  */
  52 
  53 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
  54 
  55 static const double
  56         half[2] = { 0.5, -0.5 },
  57         one             = 1.0,
  58         invpio2 = 0.636619772367581343075535,  /* 53 bits of pi/2 */
  59         pio2_1  = 1.570796326734125614166,  /* first 33 bits of pi/2 */
  60         pio2_2  = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
  61         pio2_3  = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
  62         pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
  63         pp1             = -1.666666666605760465276263943134982554676e-0001,
  64         pp2             =  8.333261209690963126718376566146180944442e-0003,
  65         qq1             = -4.999999999977710986407023955908711557870e-0001,
  66         qq2             =  4.166654863857219350645055881018842089580e-0002,
  67         poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
  68                                 -4.999999999999931701464060878888294524481e-0001 },
  69         poly2[2]= {  8.333333332390951295683993455280336376663e-0003,
  70                                  4.166666666394861917535640593963708222319e-0002 },
  71         poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
  72                                 -1.388888552656142867832756687736851681462e-0003 },
  73         poly4[2]= {  2.753403624854277237649987622848330351110e-0006,
  74                                  2.478519423681460796618128289454530524759e-0005 };
  75 
  76 /* Don't __ the following; acomp will handle it */
  77 extern double fabs( double );
  78 extern void __vlibm_vsincos_big( int, double *, int, double *, int, double *, int, int );
  79 
  80 /*
  81  * y[i*stridey] := sin( x[i*stridex] ), for i = 0..n.
  82  * c[i*stridec] := cos( x[i*stridex] ), for i = 0..n.
  83  *
  84  * Calls __vlibm_vsincos_big to handle all elts which have abs >~ 1.647e+06.
  85  * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
  86  *
  87  * elts < 2^-27 use the approximation 1.0 ~ cos(x).
  88  */
  89 void
  90 __vsincos( int n, double * restrict x, int stridex,
  91                                 double * restrict y, int stridey,
  92                                 double * restrict c, int stridec )
  93 {
  94         double          x0_or_one[4], x1_or_one[4], x2_or_one[4];
  95         double          y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
  96         double          x0, x1, x2,
  97                         *py0, *py1, *py2,
  98                         *pc0, *pc1, *pc2,
  99                         *xsave, *ysave, *csave;
 100         unsigned        hx0, hx1, hx2, xsb0, xsb1, xsb2;
 101         int             i, biguns, nsave, sxsave, sysave, scsave;
 102         volatile int    v __GNU_UNUSED;
 103         nsave = n;
 104         xsave = x;
 105         sxsave = stridex;
 106         ysave = y;
 107         sysave = stridey;
 108         csave = c;
 109         scsave = stridec;
 110         biguns = 0;
 111 
 112         do /* MAIN LOOP */
 113         {
 114 
 115                 /* Gotos here so _break_ exits MAIN LOOP. */
 116 LOOP0:  /* Find first arg in right range. */
 117                 xsb0 = HI(x); /* get most significant word */
 118                 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
 119                 if ( hx0 > 0x3fe921fb ) {
 120                         /* Too big: arg reduction needed, so leave for second part */
 121                         biguns = 1;
 122                         x += stridex;
 123                         y += stridey;
 124                         c += stridec;
 125                         i = 0;
 126                         if ( --n <= 0 )
 127                                 break;
 128                         goto LOOP0;
 129                 }
 130                 if ( hx0 < 0x3e400000 ) {
 131                         /* Too small.  cos x ~ 1, sin x ~ x. */
 132                         v = *x;
 133                         *c = 1.0;
 134                         *y = *x;
 135                         x += stridex;
 136                         y += stridey;
 137                         c += stridec;
 138                         i = 0;
 139                         if ( --n <= 0 )
 140                                 break;
 141                         goto LOOP0;
 142                 }
 143                 x0 = *x;
 144                 py0 = y;
 145                 pc0 = c;
 146                 x += stridex;
 147                 y += stridey;
 148                 c += stridec;
 149                 i = 1;
 150                 if ( --n <= 0 )
 151                         break;
 152 
 153 LOOP1: /* Get second arg, same as above. */
 154                 xsb1 = HI(x);
 155                 hx1 = xsb1 & ~0x80000000;
 156                 if ( hx1 > 0x3fe921fb )
 157                 {
 158                         biguns = 1;
 159                         x += stridex;
 160                         y += stridey;
 161                         c += stridec;
 162                         i = 1;
 163                         if ( --n <= 0 )
 164                                 break;
 165                         goto LOOP1;
 166                 }
 167                 if ( hx1 < 0x3e400000 )
 168                 {
 169                         v = *x;
 170                         *c = 1.0;
 171                         *y = *x;
 172                         x += stridex;
 173                         y += stridey;
 174                         c += stridec;
 175                         i = 1;
 176                         if ( --n <= 0 )
 177                                 break;
 178                         goto LOOP1;
 179                 }
 180                 x1 = *x;
 181                 py1 = y;
 182                 pc1 = c;
 183                 x += stridex;
 184                 y += stridey;
 185                 c += stridec;
 186                 i = 2;
 187                 if ( --n <= 0 )
 188                         break;
 189 
 190 LOOP2: /* Get third arg, same as above. */
 191                 xsb2 = HI(x);
 192                 hx2 = xsb2 & ~0x80000000;
 193                 if ( hx2 > 0x3fe921fb )
 194                 {
 195                         biguns = 1;
 196                         x += stridex;
 197                         y += stridey;
 198                         c += stridec;
 199                         i = 2;
 200                         if ( --n <= 0 )
 201                                 break;
 202                         goto LOOP2;
 203                 }
 204                 if ( hx2 < 0x3e400000 )
 205                 {
 206                         v = *x;
 207                         *c = 1.0;
 208                         *y = *x;
 209                         x += stridex;
 210                         y += stridey;
 211                         c += stridec;
 212                         i = 2;
 213                         if ( --n <= 0 )
 214                                 break;
 215                         goto LOOP2;
 216                 }
 217                 x2 = *x;
 218                 py2 = y;
 219                 pc2 = c;
 220 
 221                 /*
 222                  * 0x3fc40000 = 5/32 ~ 0.15625
 223                  * Get msb after subtraction.  Will be 1 only if
 224                  * hx0 - 5/32 is negative.
 225                  */
 226                 i = ( hx2 - 0x3fc40000 ) >> 31;
 227                 i |= ( ( hx1 - 0x3fc40000 ) >> 30 ) & 2;
 228                 i |= ( ( hx0 - 0x3fc40000 ) >> 29 ) & 4;
 229                 switch ( i )
 230                 {
 231                         double          a1_0, a1_1, a1_2, a2_0, a2_1, a2_2;
 232                         double          w0, w1, w2;
 233                         double          t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2;
 234                         double          z0, z1, z2;
 235                         unsigned        j0, j1, j2;
 236 
 237                 case 0: /* All are > 5/32 */
 238                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 239                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 240                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 241 
 242                         HI(&t0) = j0;
 243                         HI(&t1) = j1;
 244                         HI(&t2) = j2;
 245                         LO(&t0) = 0;
 246                         LO(&t1) = 0;
 247                         LO(&t2) = 0;
 248 
 249                         x0 -= t0;
 250                         x1 -= t1;
 251                         x2 -= t2;
 252 
 253                         z0 = x0 * x0;
 254                         z1 = x1 * x1;
 255                         z2 = x2 * x2;
 256 
 257                         t0 = z0 * ( qq1 + z0 * qq2 );
 258                         t1 = z1 * ( qq1 + z1 * qq2 );
 259                         t2 = z2 * ( qq1 + z2 * qq2 );
 260 
 261                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 262                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 263                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 264 
 265                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 266                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 267                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 268 
 269                         xsb0 = ( xsb0 >> 30 ) & 2;
 270                         xsb1 = ( xsb1 >> 30 ) & 2;
 271                         xsb2 = ( xsb2 >> 30 ) & 2;
 272 
 273                         a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
 274                         a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
 275                         a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
 276 
 277                         a2_0 = __vlibm_TBL_sincos_hi[j0+1];     /* cos_hi(t) */
 278                         a2_1 = __vlibm_TBL_sincos_hi[j1+1];
 279                         a2_2 = __vlibm_TBL_sincos_hi[j2+1];
 280                                 /* cos_lo(t) */
 281                         t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
 282                         t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
 283                         t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
 284 
 285                         *pc0 = a2_0 + t2_0;
 286                         *pc1 = a2_1 + t2_1;
 287                         *pc2 = a2_2 + t2_2;
 288 
 289                         t1_0 = a2_0*w0 + a1_0*t0;
 290                         t1_1 = a2_1*w1 + a1_1*t1;
 291                         t1_2 = a2_2*w2 + a1_2*t2;
 292 
 293                         t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
 294                         t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
 295                         t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
 296 
 297                         *py0 = a1_0 + t1_0;
 298                         *py1 = a1_1 + t1_1;
 299                         *py2 = a1_2 + t1_2;
 300 
 301                         break;
 302 
 303                 case 1:
 304                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 305                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 306                         HI(&t0) = j0;
 307                         HI(&t1) = j1;
 308                         LO(&t0) = 0;
 309                         LO(&t1) = 0;
 310                         x0 -= t0;
 311                         x1 -= t1;
 312                         z0 = x0 * x0;
 313                         z1 = x1 * x1;
 314                         z2 = x2 * x2;
 315                         t0 = z0 * ( qq1 + z0 * qq2 );
 316                         t1 = z1 * ( qq1 + z1 * qq2 );
 317                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 318                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 319                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 320                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 321                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 322                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 323                         xsb0 = ( xsb0 >> 30 ) & 2;
 324                         xsb1 = ( xsb1 >> 30 ) & 2;
 325 
 326                         a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
 327                         a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
 328 
 329                         a2_0 = __vlibm_TBL_sincos_hi[j0+1];     /* cos_hi(t) */
 330                         a2_1 = __vlibm_TBL_sincos_hi[j1+1];
 331                                 /* cos_lo(t) */
 332                         t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
 333                         t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
 334 
 335                         *pc0 = a2_0 + t2_0;
 336                         *pc1 = a2_1 + t2_1;
 337                         *pc2 = one + t2;
 338 
 339                         t1_0 = a2_0*w0 + a1_0*t0;
 340                         t1_1 = a2_1*w1 + a1_1*t1;
 341                         t2 = z2 * ( poly3[0] + z2 * poly4[0] );
 342 
 343                         t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
 344                         t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
 345                         t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
 346 
 347                         *py0 = a1_0 + t1_0;
 348                         *py1 = a1_1 + t1_1;
 349                         t2 = x2 + x2 * t2;
 350                         *py2 = t2;
 351 
 352                         break;
 353 
 354                 case 2:
 355                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 356                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 357                         HI(&t0) = j0;
 358                         HI(&t2) = j2;
 359                         LO(&t0) = 0;
 360                         LO(&t2) = 0;
 361                         x0 -= t0;
 362                         x2 -= t2;
 363                         z0 = x0 * x0;
 364                         z1 = x1 * x1;
 365                         z2 = x2 * x2;
 366                         t0 = z0 * ( qq1 + z0 * qq2 );
 367                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 368                         t2 = z2 * ( qq1 + z2 * qq2 );
 369                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 370                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 371                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 372                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 373                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 374                         xsb0 = ( xsb0 >> 30 ) & 2;
 375                         xsb2 = ( xsb2 >> 30 ) & 2;
 376 
 377                         a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
 378                         a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
 379 
 380                         a2_0 = __vlibm_TBL_sincos_hi[j0+1];     /* cos_hi(t) */
 381                         a2_2 = __vlibm_TBL_sincos_hi[j2+1];
 382                                 /* cos_lo(t) */
 383                         t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
 384                         t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
 385 
 386                         *pc0 = a2_0 + t2_0;
 387                         *pc1 = one + t1;
 388                         *pc2 = a2_2 + t2_2;
 389 
 390                         t1_0 = a2_0*w0 + a1_0*t0;
 391                         t1 = z1 * ( poly3[0] + z1 * poly4[0] );
 392                         t1_2 = a2_2*w2 + a1_2*t2;
 393 
 394                         t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
 395                         t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
 396                         t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
 397 
 398                         *py0 = a1_0 + t1_0;
 399                         t1 = x1 + x1 * t1;
 400                         *py1 = t1;
 401                         *py2 = a1_2 + t1_2;
 402 
 403                         break;
 404 
 405                 case 3:
 406                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 407                         HI(&t0) = j0;
 408                         LO(&t0) = 0;
 409                         x0 -= t0;
 410                         z0 = x0 * x0;
 411                         z1 = x1 * x1;
 412                         z2 = x2 * x2;
 413                         t0 = z0 * ( qq1 + z0 * qq2 );
 414                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 415                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 416                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 417                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 418                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 419                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 420                         xsb0 = ( xsb0 >> 30 ) & 2;
 421                         a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
 422 
 423                         a2_0 = __vlibm_TBL_sincos_hi[j0+1];     /* cos_hi(t) */
 424 
 425                         t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
 426 
 427                         *pc0 = a2_0 + t2_0;
 428                         *pc1 = one + t1;
 429                         *pc2 = one + t2;
 430 
 431                         t1_0 = a2_0*w0 + a1_0*t0;
 432                         t1 = z1 * ( poly3[0] + z1 * poly4[0] );
 433                         t2 = z2 * ( poly3[0] + z2 * poly4[0] );
 434 
 435                         t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
 436                         t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
 437                         t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
 438 
 439                         *py0 = a1_0 + t1_0;
 440                         t1 = x1 + x1 * t1;
 441                         *py1 = t1;
 442                         t2 = x2 + x2 * t2;
 443                         *py2 = t2;
 444 
 445                         break;
 446 
 447                 case 4:
 448                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 449                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 450                         HI(&t1) = j1;
 451                         HI(&t2) = j2;
 452                         LO(&t1) = 0;
 453                         LO(&t2) = 0;
 454                         x1 -= t1;
 455                         x2 -= t2;
 456                         z0 = x0 * x0;
 457                         z1 = x1 * x1;
 458                         z2 = x2 * x2;
 459                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 460                         t1 = z1 * ( qq1 + z1 * qq2 );
 461                         t2 = z2 * ( qq1 + z2 * qq2 );
 462                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 463                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 464                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 465                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 466                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 467                         xsb1 = ( xsb1 >> 30 ) & 2;
 468                         xsb2 = ( xsb2 >> 30 ) & 2;
 469 
 470                         a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
 471                         a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
 472 
 473                         a2_1 = __vlibm_TBL_sincos_hi[j1+1];
 474                         a2_2 = __vlibm_TBL_sincos_hi[j2+1];
 475                                 /* cos_lo(t) */
 476                         t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
 477                         t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
 478 
 479                         *pc0 = one + t0;
 480                         *pc1 = a2_1 + t2_1;
 481                         *pc2 = a2_2 + t2_2;
 482 
 483                         t0 = z0 * ( poly3[0] + z0 * poly4[0] );
 484                         t1_1 = a2_1*w1 + a1_1*t1;
 485                         t1_2 = a2_2*w2 + a1_2*t2;
 486 
 487                         t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
 488                         t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
 489                         t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
 490 
 491                         t0 = x0 + x0 * t0;
 492                         *py0 = t0;
 493                         *py1 = a1_1 + t1_1;
 494                         *py2 = a1_2 + t1_2;
 495 
 496                         break;
 497 
 498                 case 5:
 499                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 500                         HI(&t1) = j1;
 501                         LO(&t1) = 0;
 502                         x1 -= t1;
 503                         z0 = x0 * x0;
 504                         z1 = x1 * x1;
 505                         z2 = x2 * x2;
 506                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 507                         t1 = z1 * ( qq1 + z1 * qq2 );
 508                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 509                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 510                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 511                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 512                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 513                         xsb1 = ( xsb1 >> 30 ) & 2;
 514 
 515                         a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
 516 
 517                         a2_1 = __vlibm_TBL_sincos_hi[j1+1];
 518 
 519                         t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
 520 
 521                         *pc0 = one + t0;
 522                         *pc1 = a2_1 + t2_1;
 523                         *pc2 = one + t2;
 524 
 525                         t0 = z0 * ( poly3[0] + z0 * poly4[0] );
 526                         t1_1 = a2_1*w1 + a1_1*t1;
 527                         t2 = z2 * ( poly3[0] + z2 * poly4[0] );
 528 
 529                         t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
 530                         t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
 531                         t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
 532 
 533                         t0 = x0 + x0 * t0;
 534                         *py0 = t0;
 535                         *py1 = a1_1 + t1_1;
 536                         t2 = x2 + x2 * t2;
 537                         *py2 = t2;
 538 
 539                         break;
 540 
 541                 case 6:
 542                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 543                         HI(&t2) = j2;
 544                         LO(&t2) = 0;
 545                         x2 -= t2;
 546                         z0 = x0 * x0;
 547                         z1 = x1 * x1;
 548                         z2 = x2 * x2;
 549                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 550                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 551                         t2 = z2 * ( qq1 + z2 * qq2 );
 552                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 553                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 554                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 555                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 556                         xsb2 = ( xsb2 >> 30 ) & 2;
 557                         a1_2 = __vlibm_TBL_sincos_hi[j2+xsb2];
 558 
 559                         a2_2 = __vlibm_TBL_sincos_hi[j2+1];
 560 
 561                         t2_2 = __vlibm_TBL_sincos_lo[j2+1] - ( a1_2*w2 - a2_2*t2 );
 562 
 563                         *pc0 = one + t0;
 564                         *pc1 = one + t1;
 565                         *pc2 = a2_2 + t2_2;
 566 
 567                         t0 = z0 * ( poly3[0] + z0 * poly4[0] );
 568                         t1 = z1 * ( poly3[0] + z1 * poly4[0] );
 569                         t1_2 = a2_2*w2 + a1_2*t2;
 570 
 571                         t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
 572                         t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
 573                         t1_2 += __vlibm_TBL_sincos_lo[j2+xsb2];
 574 
 575                         t0 = x0 + x0 * t0;
 576                         *py0 = t0;
 577                         t1 = x1 + x1 * t1;
 578                         *py1 = t1;
 579                         *py2 = a1_2 + t1_2;
 580 
 581                         break;
 582 
 583                 case 7: /* All are < 5/32 */
 584                         z0 = x0 * x0;
 585                         z1 = x1 * x1;
 586                         z2 = x2 * x2;
 587                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 588                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 589                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 590                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 591                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 592                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 593                         *pc0 = one + t0;
 594                         *pc1 = one + t1;
 595                         *pc2 = one + t2;
 596                         t0 = z0 * ( poly3[0] + z0 * poly4[0] );
 597                         t1 = z1 * ( poly3[0] + z1 * poly4[0] );
 598                         t2 = z2 * ( poly3[0] + z2 * poly4[0] );
 599                         t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
 600                         t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
 601                         t2 = z2 * ( poly1[0] + z2 * ( poly2[0] + t2 ) );
 602                         t0 = x0 + x0 * t0;
 603                         t1 = x1 + x1 * t1;
 604                         t2 = x2 + x2 * t2;
 605                         *py0 = t0;
 606                         *py1 = t1;
 607                         *py2 = t2;
 608                         break;
 609                 }
 610 
 611                 x += stridex;
 612                 y += stridey;
 613                 c += stridec;
 614                 i = 0;
 615         } while ( --n > 0 ); /* END MAIN LOOP */
 616 
 617         /*
 618          * CLEAN UP last 0, 1, or 2 elts.
 619          */
 620         if ( i > 0 ) /* Clean up elts at tail.  i < 3. */
 621         {
 622                 double          a1_0, a1_1, a2_0, a2_1;
 623                 double          w0, w1;
 624                 double          t0, t1, t1_0, t1_1, t2_0, t2_1;
 625                 double          z0, z1;
 626                 unsigned        j0, j1;
 627 
 628                 if ( i > 1 )
 629                 {
 630                         if ( hx1 < 0x3fc40000 )
 631                         {
 632                                 z1 = x1 * x1;
 633                                 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 634                                 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 635                                 t1 = one + t1;
 636                                 *pc1 = t1;
 637                                 t1 = z1 * ( poly3[0] + z1 * poly4[0] );
 638                                 t1 = z1 * ( poly1[0] + z1 * ( poly2[0] + t1 ) );
 639                                 t1 = x1 + x1 * t1;
 640                                 *py1 = t1;
 641                         }
 642                         else
 643                         {
 644                                 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 645                                 HI(&t1) = j1;
 646                                 LO(&t1) = 0;
 647                                 x1 -= t1;
 648                                 z1 = x1 * x1;
 649                                 t1 = z1 * ( qq1 + z1 * qq2 );
 650                                 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 651                                 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 652                                 xsb1 = ( xsb1 >> 30 ) & 2;
 653                                 a1_1 = __vlibm_TBL_sincos_hi[j1+xsb1];
 654                                 a2_1 = __vlibm_TBL_sincos_hi[j1+1];
 655                                 t2_1 = __vlibm_TBL_sincos_lo[j1+1] - ( a1_1*w1 - a2_1*t1 );
 656                                 *pc1 = a2_1 + t2_1;
 657                                 t1_1 = a2_1*w1 + a1_1*t1;
 658                                 t1_1 += __vlibm_TBL_sincos_lo[j1+xsb1];
 659                                 *py1 = a1_1 + t1_1;
 660                         }
 661                 }
 662                 if ( hx0 < 0x3fc40000 )
 663                 {
 664                         z0 = x0 * x0;
 665                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 666                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 667                         t0 = one + t0;
 668                         *pc0 = t0;
 669                         t0 = z0 * ( poly3[0] + z0 * poly4[0] );
 670                         t0 = z0 * ( poly1[0] + z0 * ( poly2[0] + t0 ) );
 671                         t0 = x0 + x0 * t0;
 672                         *py0 = t0;
 673                 }
 674                 else
 675                 {
 676                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 677                         HI(&t0) = j0;
 678                         LO(&t0) = 0;
 679                         x0 -= t0;
 680                         z0 = x0 * x0;
 681                         t0 = z0 * ( qq1 + z0 * qq2 );
 682                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 683                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 684                         xsb0 = ( xsb0 >> 30 ) & 2;
 685                         a1_0 = __vlibm_TBL_sincos_hi[j0+xsb0]; /* sin_hi(t) */
 686                         a2_0 = __vlibm_TBL_sincos_hi[j0+1];     /* cos_hi(t) */
 687                         t2_0 = __vlibm_TBL_sincos_lo[j0+1] - ( a1_0*w0 - a2_0*t0 );
 688                         *pc0 = a2_0 + t2_0;
 689                         t1_0 = a2_0*w0 + a1_0*t0;
 690                         t1_0 += __vlibm_TBL_sincos_lo[j0+xsb0]; /* sin_lo(t) */
 691                         *py0 = a1_0 + t1_0;
 692                 }
 693         } /* END CLEAN UP */
 694 
 695         if ( !biguns )
 696                 return;
 697 
 698         /*
 699          * Take care of BIGUNS.
 700          */
 701         n = nsave;
 702         x = xsave;
 703         stridex = sxsave;
 704         y = ysave;
 705         stridey = sysave;
 706         c = csave;
 707         stridec = scsave;
 708         biguns = 0;
 709 
 710         x0_or_one[1] = 1.0;
 711         x1_or_one[1] = 1.0;
 712         x2_or_one[1] = 1.0;
 713         x0_or_one[3] = -1.0;
 714         x1_or_one[3] = -1.0;
 715         x2_or_one[3] = -1.0;
 716         y0_or_zero[1] = 0.0;
 717         y1_or_zero[1] = 0.0;
 718         y2_or_zero[1] = 0.0;
 719         y0_or_zero[3] = 0.0;
 720         y1_or_zero[3] = 0.0;
 721         y2_or_zero[3] = 0.0;
 722 
 723         do
 724         {
 725                 double          fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
 726                 unsigned        hx;
 727                 int                     n0, n1, n2;
 728 
 729                 /*
 730                  * Find 3 more to work on: Not already done, not too big.
 731                  */
 732 loop0:
 733                 hx = HI(x);
 734                 xsb0 = hx >> 31;
 735                 hx &= ~0x80000000;
 736                 if ( hx <= 0x3fe921fb ) /* Done above. */
 737                 {
 738                         x += stridex;
 739                         y += stridey;
 740                         c += stridec;
 741                         i = 0;
 742                         if ( --n <= 0 )
 743                                 break;
 744                         goto loop0;
 745                 }
 746                 if ( hx > 0x413921fb ) /* (1.6471e+06) Too big: leave it. */
 747                 {
 748                         if ( hx >= 0x7ff00000 ) /* Inf or NaN */
 749                         {
 750                                 x0 = *x;
 751                                 *y = x0 - x0;
 752                                 *c = x0 - x0;
 753                         }
 754                         else {
 755                                 biguns = 1;
 756                         }
 757                         x += stridex;
 758                         y += stridey;
 759                         c += stridec;
 760                         i = 0;
 761                         if ( --n <= 0 )
 762                                 break;
 763                         goto loop0;
 764                 }
 765                 x0 = *x;
 766                 py0 = y;
 767                 pc0 = c;
 768                 x += stridex;
 769                 y += stridey;
 770                 c += stridec;
 771                 i = 1;
 772                 if ( --n <= 0 )
 773                         break;
 774 
 775 loop1:
 776                 hx = HI(x);
 777                 xsb1 = hx >> 31;
 778                 hx &= ~0x80000000;
 779                 if ( hx <= 0x3fe921fb )
 780                 {
 781                         x += stridex;
 782                         y += stridey;
 783                         c += stridec;
 784                         i = 1;
 785                         if ( --n <= 0 )
 786                                 break;
 787                         goto loop1;
 788                 }
 789                 if ( hx > 0x413921fb )
 790                 {
 791                         if ( hx >= 0x7ff00000 )
 792                         {
 793                                 x1 = *x;
 794                                 *y = x1 - x1;
 795                                 *c = x1 - x1;
 796                         }
 797                         else {
 798                                 biguns = 1;
 799                         }
 800                         x += stridex;
 801                         y += stridey;
 802                         c += stridec;
 803                         i = 1;
 804                         if ( --n <= 0 )
 805                                 break;
 806                         goto loop1;
 807                 }
 808                 x1 = *x;
 809                 py1 = y;
 810                 pc1 = c;
 811                 x += stridex;
 812                 y += stridey;
 813                 c += stridec;
 814                 i = 2;
 815                 if ( --n <= 0 )
 816                         break;
 817 
 818 loop2:
 819                 hx = HI(x);
 820                 xsb2 = hx >> 31;
 821                 hx &= ~0x80000000;
 822                 if ( hx <= 0x3fe921fb )
 823                 {
 824                         x += stridex;
 825                         y += stridey;
 826                         c += stridec;
 827                         i = 2;
 828                         if ( --n <= 0 )
 829                                 break;
 830                         goto loop2;
 831                 }
 832                 if ( hx > 0x413921fb )
 833                 {
 834                         if ( hx >= 0x7ff00000 )
 835                         {
 836                                 x2 = *x;
 837                                 *y = x2 - x2;
 838                                 *c = x2 - x2;
 839                         }
 840                         else {
 841                                 biguns = 1;
 842                         }
 843                         x += stridex;
 844                         y += stridey;
 845                         c += stridec;
 846                         i = 2;
 847                         if ( --n <= 0 )
 848                                 break;
 849                         goto loop2;
 850                 }
 851                 x2 = *x;
 852                 py2 = y;
 853                 pc2 = c;
 854 
 855                 n0 = (int) ( x0 * invpio2 + half[xsb0] );
 856                 n1 = (int) ( x1 * invpio2 + half[xsb1] );
 857                 n2 = (int) ( x2 * invpio2 + half[xsb2] );
 858                 fn0 = (double) n0;
 859                 fn1 = (double) n1;
 860                 fn2 = (double) n2;
 861                 n0 &= 3;
 862                 n1 &= 3;
 863                 n2 &= 3;
 864                 a0 = x0 - fn0 * pio2_1;
 865                 a1 = x1 - fn1 * pio2_1;
 866                 a2 = x2 - fn2 * pio2_1;
 867                 w0 = fn0 * pio2_2;
 868                 w1 = fn1 * pio2_2;
 869                 w2 = fn2 * pio2_2;
 870                 x0 = a0 - w0;
 871                 x1 = a1 - w1;
 872                 x2 = a2 - w2;
 873                 y0 = ( a0 - x0 ) - w0;
 874                 y1 = ( a1 - x1 ) - w1;
 875                 y2 = ( a2 - x2 ) - w2;
 876                 a0 = x0;
 877                 a1 = x1;
 878                 a2 = x2;
 879                 w0 = fn0 * pio2_3 - y0;
 880                 w1 = fn1 * pio2_3 - y1;
 881                 w2 = fn2 * pio2_3 - y2;
 882                 x0 = a0 - w0;
 883                 x1 = a1 - w1;
 884                 x2 = a2 - w2;
 885                 y0 = ( a0 - x0 ) - w0;
 886                 y1 = ( a1 - x1 ) - w1;
 887                 y2 = ( a2 - x2 ) - w2;
 888                 a0 = x0;
 889                 a1 = x1;
 890                 a2 = x2;
 891                 w0 = fn0 * pio2_3t - y0;
 892                 w1 = fn1 * pio2_3t - y1;
 893                 w2 = fn2 * pio2_3t - y2;
 894                 x0 = a0 - w0;
 895                 x1 = a1 - w1;
 896                 x2 = a2 - w2;
 897                 y0 = ( a0 - x0 ) - w0;
 898                 y1 = ( a1 - x1 ) - w1;
 899                 y2 = ( a2 - x2 ) - w2;
 900                 xsb2 = HI(&x2);
 901                 i = ( ( xsb2 & ~0x80000000 ) - 0x3fc40000 ) >> 31;
 902                 xsb1 = HI(&x1);
 903                 i |= ( ( ( xsb1 & ~0x80000000 ) - 0x3fc40000 ) >> 30 ) & 2;
 904                 xsb0 = HI(&x0);
 905                 i |= ( ( ( xsb0 & ~0x80000000 ) - 0x3fc40000 ) >> 29 ) & 4;
 906                 switch ( i )
 907                 {
 908                         double          a1_0, a1_1, a1_2, a2_0, a2_1, a2_2;
 909                         double          t0, t1, t2, t1_0, t1_1, t1_2, t2_0, t2_1, t2_2;
 910                         double          z0, z1, z2;
 911                         unsigned        j0, j1, j2;
 912 
 913                 case 0:
 914                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 915                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 916                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 917                         HI(&t0) = j0;
 918                         HI(&t1) = j1;
 919                         HI(&t2) = j2;
 920                         LO(&t0) = 0;
 921                         LO(&t1) = 0;
 922                         LO(&t2) = 0;
 923                         x0 = ( x0 - t0 ) + y0;
 924                         x1 = ( x1 - t1 ) + y1;
 925                         x2 = ( x2 - t2 ) + y2;
 926                         z0 = x0 * x0;
 927                         z1 = x1 * x1;
 928                         z2 = x2 * x2;
 929                         t0 = z0 * ( qq1 + z0 * qq2 );
 930                         t1 = z1 * ( qq1 + z1 * qq2 );
 931                         t2 = z2 * ( qq1 + z2 * qq2 );
 932                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 933                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 934                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 935                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 936                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 937                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 938                         xsb0 = ( xsb0 >> 30 ) & 2;
 939                         xsb1 = ( xsb1 >> 30 ) & 2;
 940                         xsb2 = ( xsb2 >> 30 ) & 2;
 941                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
 942                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
 943                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
 944                         xsb0 |= 1;
 945                         xsb1 |= 1;
 946                         xsb2 |= 1;
 947 
 948                         a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
 949                         a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
 950                         a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
 951 
 952                         a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
 953                         a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
 954                         a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
 955 
 956                         t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
 957                         t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
 958                         t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
 959 
 960                         w0 *= a2_0;
 961                         w1 *= a2_1;
 962                         w2 *= a2_2;
 963 
 964                         *pc0 = a2_0 + t2_0;
 965                         *pc1 = a2_1 + t2_1;
 966                         *pc2 = a2_2 + t2_2;
 967 
 968                         t1_0 = w0 + a1_0*t0;
 969                         t1_1 = w1 + a1_1*t1;
 970                         t1_2 = w2 + a1_2*t2;
 971 
 972                         t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
 973                         t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
 974                         t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
 975 
 976                         *py0 = a1_0 + t1_0;
 977                         *py1 = a1_1 + t1_1;
 978                         *py2 = a1_2 + t1_2;
 979 
 980                         break;
 981 
 982                 case 1:
 983                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 984                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 985                         j2 = n2 & 1;
 986                         HI(&t0) = j0;
 987                         HI(&t1) = j1;
 988                         LO(&t0) = 0;
 989                         LO(&t1) = 0;
 990                         x2_or_one[0] = x2;
 991                         x2_or_one[2] = -x2;
 992                         x0 = ( x0 - t0 ) + y0;
 993                         x1 = ( x1 - t1 ) + y1;
 994                         y2_or_zero[0] = y2;
 995                         y2_or_zero[2] = -y2;
 996                         z0 = x0 * x0;
 997                         z1 = x1 * x1;
 998                         z2 = x2 * x2;
 999                         t0 = z0 * ( qq1 + z0 * qq2 );
1000                         t1 = z1 * ( qq1 + z1 * qq2 );
1001                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1002                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1003                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1004                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1005                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1006                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1007                         xsb0 = ( xsb0 >> 30 ) & 2;
1008                         xsb1 = ( xsb1 >> 30 ) & 2;
1009                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
1010                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
1011                         xsb0 |= 1;
1012                         xsb1 |= 1;
1013                         a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1014                         a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1015 
1016                         a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1017                         a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1018 
1019                         t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1020                         t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1021                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1022 
1023                         *pc0 = a2_0 + t2_0;
1024                         *pc1 = a2_1 + t2_1;
1025                         *py2 = t2;
1026 
1027                         n2 = (n2 + 1) & 3;
1028                         j2 = (j2 + 1) & 1;
1029                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1030 
1031                         t1_0 = a2_0*w0 + a1_0*t0;
1032                         t1_1 = a2_1*w1 + a1_1*t1;
1033                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1034 
1035                         t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1036                         t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1037                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1038 
1039                         *py0 = a1_0 + t1_0;
1040                         *py1 = a1_1 + t1_1;
1041                         *pc2 = t2;
1042 
1043                         break;
1044 
1045                 case 2:
1046                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1047                         j1 = n1 & 1;
1048                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
1049                         HI(&t0) = j0;
1050                         HI(&t2) = j2;
1051                         LO(&t0) = 0;
1052                         LO(&t2) = 0;
1053                         x1_or_one[0] = x1;
1054                         x1_or_one[2] = -x1;
1055                         x0 = ( x0 - t0 ) + y0;
1056                         y1_or_zero[0] = y1;
1057                         y1_or_zero[2] = -y1;
1058                         x2 = ( x2 - t2 ) + y2;
1059                         z0 = x0 * x0;
1060                         z1 = x1 * x1;
1061                         z2 = x2 * x2;
1062                         t0 = z0 * ( qq1 + z0 * qq2 );
1063                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1064                         t2 = z2 * ( qq1 + z2 * qq2 );
1065                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1066                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1067                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
1068                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1069                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1070                         xsb0 = ( xsb0 >> 30 ) & 2;
1071                         xsb2 = ( xsb2 >> 30 ) & 2;
1072                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
1073                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
1074                         xsb0 |= 1;
1075                         xsb2 |= 1;
1076 
1077                         a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1078                         a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1079 
1080                         a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1081                         a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1082 
1083                         t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1084                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1085                         t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
1086 
1087                         *pc0 = a2_0 + t2_0;
1088                         *py1 = t1;
1089                         *pc2 = a2_2 + t2_2;
1090 
1091                         n1 = (n1 + 1) & 3;
1092                         j1 = (j1 + 1) & 1;
1093                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1094 
1095                         t1_0 = a2_0*w0 + a1_0*t0;
1096                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1097                         t1_2 = a2_2*w2 + a1_2*t2;
1098 
1099                         t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1100                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1101                         t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1102 
1103                         *py0 = a1_0 + t1_0;
1104                         *pc1 = t1;
1105                         *py2 = a1_2 + t1_2;
1106 
1107                         break;
1108 
1109                 case 3:
1110                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1111                         j1 = n1 & 1;
1112                         j2 = n2 & 1;
1113                         HI(&t0) = j0;
1114                         LO(&t0) = 0;
1115                         x1_or_one[0] = x1;
1116                         x1_or_one[2] = -x1;
1117                         x2_or_one[0] = x2;
1118                         x2_or_one[2] = -x2;
1119                         x0 = ( x0 - t0 ) + y0;
1120                         y1_or_zero[0] = y1;
1121                         y1_or_zero[2] = -y1;
1122                         y2_or_zero[0] = y2;
1123                         y2_or_zero[2] = -y2;
1124                         z0 = x0 * x0;
1125                         z1 = x1 * x1;
1126                         z2 = x2 * x2;
1127                         t0 = z0 * ( qq1 + z0 * qq2 );
1128                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1129                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1130                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1131                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1132                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1133                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1134                         xsb0 = ( xsb0 >> 30 ) & 2;
1135                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
1136                         xsb0 |= 1;
1137 
1138                         a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1139                         a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1140 
1141                         t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1142                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1143                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1144 
1145                         *pc0 = a2_0 + t2_0;
1146                         *py1 = t1;
1147                         *py2 = t2;
1148 
1149                         n1 = (n1 + 1) & 3;
1150                         n2 = (n2 + 1) & 3;
1151                         j1 = (j1 + 1) & 1;
1152                         j2 = (j2 + 1) & 1;
1153 
1154                         t1_0 = a2_0*w0 + a1_0*t0;
1155                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1156                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1157 
1158                         t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1159                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1160                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1161 
1162                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1163                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1164 
1165                         *py0 = a1_0 + t1_0;
1166                         *pc1 = t1;
1167                         *pc2 = t2;
1168 
1169                         break;
1170 
1171                 case 4:
1172                         j0 = n0 & 1;
1173                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1174                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
1175                         HI(&t1) = j1;
1176                         HI(&t2) = j2;
1177                         LO(&t1) = 0;
1178                         LO(&t2) = 0;
1179                         x0_or_one[0] = x0;
1180                         x0_or_one[2] = -x0;
1181                         y0_or_zero[0] = y0;
1182                         y0_or_zero[2] = -y0;
1183                         x1 = ( x1 - t1 ) + y1;
1184                         x2 = ( x2 - t2 ) + y2;
1185                         z0 = x0 * x0;
1186                         z1 = x1 * x1;
1187                         z2 = x2 * x2;
1188                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1189                         t1 = z1 * ( qq1 + z1 * qq2 );
1190                         t2 = z2 * ( qq1 + z2 * qq2 );
1191                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1192                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1193                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
1194                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1195                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1196                         xsb1 = ( xsb1 >> 30 ) & 2;
1197                         xsb2 = ( xsb2 >> 30 ) & 2;
1198                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
1199                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
1200                         xsb1 |= 1;
1201                         xsb2 |= 1;
1202 
1203                         a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1204                         a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1205 
1206                         a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1207                         a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1208 
1209                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1210                         t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1211                         t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
1212 
1213                         *py0 = t0;
1214                         *pc1 = a2_1 + t2_1;
1215                         *pc2 = a2_2 + t2_2;
1216 
1217                         n0 = (n0 + 1) & 3;
1218                         j0 = (j0 + 1) & 1;
1219                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1220 
1221                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1222                         t1_1 = a2_1*w1 + a1_1*t1;
1223                         t1_2 = a2_2*w2 + a1_2*t2;
1224 
1225                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1226                         t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1227                         t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1228 
1229                         *py1 = a1_1 + t1_1;
1230                         *py2 = a1_2 + t1_2;
1231                         *pc0 = t0;
1232 
1233                         break;
1234 
1235                 case 5:
1236                         j0 = n0 & 1;
1237                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1238                         j2 = n2 & 1;
1239                         HI(&t1) = j1;
1240                         LO(&t1) = 0;
1241                         x0_or_one[0] = x0;
1242                         x0_or_one[2] = -x0;
1243                         x2_or_one[0] = x2;
1244                         x2_or_one[2] = -x2;
1245                         y0_or_zero[0] = y0;
1246                         y0_or_zero[2] = -y0;
1247                         x1 = ( x1 - t1 ) + y1;
1248                         y2_or_zero[0] = y2;
1249                         y2_or_zero[2] = -y2;
1250                         z0 = x0 * x0;
1251                         z1 = x1 * x1;
1252                         z2 = x2 * x2;
1253                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1254                         t1 = z1 * ( qq1 + z1 * qq2 );
1255                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1256                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1257                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1258                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1259                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1260                         xsb1 = ( xsb1 >> 30 ) & 2;
1261                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
1262                         xsb1 |= 1;
1263 
1264                         a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1265                         a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1266 
1267                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1268                         t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1269                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1270 
1271                         *py0 = t0;
1272                         *pc1 = a2_1 + t2_1;
1273                         *py2 = t2;
1274 
1275                         n0 = (n0 + 1) & 3;
1276                         n2 = (n2 + 1) & 3;
1277                         j0 = (j0 + 1) & 1;
1278                         j2 = (j2 + 1) & 1;
1279 
1280                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1281                         t1_1 = a2_1*w1 + a1_1*t1;
1282                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1283 
1284                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1285                         t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1286                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1287 
1288                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1289                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1290 
1291                         *pc0 = t0;
1292                         *py1 = a1_1 + t1_1;
1293                         *pc2 = t2;
1294 
1295                         break;
1296 
1297                 case 6:
1298                         j0 = n0 & 1;
1299                         j1 = n1 & 1;
1300                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
1301                         HI(&t2) = j2;
1302                         LO(&t2) = 0;
1303                         x0_or_one[0] = x0;
1304                         x0_or_one[2] = -x0;
1305                         x1_or_one[0] = x1;
1306                         x1_or_one[2] = -x1;
1307                         y0_or_zero[0] = y0;
1308                         y0_or_zero[2] = -y0;
1309                         y1_or_zero[0] = y1;
1310                         y1_or_zero[2] = -y1;
1311                         x2 = ( x2 - t2 ) + y2;
1312                         z0 = x0 * x0;
1313                         z1 = x1 * x1;
1314                         z2 = x2 * x2;
1315                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1316                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1317                         t2 = z2 * ( qq1 + z2 * qq2 );
1318                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1319                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1320                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
1321                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1322                         xsb2 = ( xsb2 >> 30 ) & 2;
1323                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
1324                         xsb2 |= 1;
1325 
1326                         a1_2 = __vlibm_TBL_sincos_hi[j2+n2];
1327                         a2_2 = __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)];
1328 
1329                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1330                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1331                         t2_2 = __vlibm_TBL_sincos_lo[j2+((n2+xsb2)&3)] - ( a1_2*w2 - a2_2*t2 );
1332 
1333                         *py0 = t0;
1334                         *py1 = t1;
1335                         *pc2 = a2_2 + t2_2;
1336 
1337                         n0 = (n0 + 1) & 3;
1338                         n1 = (n1 + 1) & 3;
1339                         j0 = (j0 + 1) & 1;
1340                         j1 = (j1 + 1) & 1;
1341 
1342                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1343                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1344                         t1_2 = a2_2*w2 + a1_2*t2;
1345 
1346                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1347                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1348                         t1_2 += __vlibm_TBL_sincos_lo[j2+n2];
1349 
1350                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1351                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1352 
1353                         *pc0 = t0;
1354                         *pc1 = t1;
1355                         *py2 = a1_2 + t1_2;
1356 
1357                         break;
1358 
1359                 case 7:
1360                         j0 = n0 & 1;
1361                         j1 = n1 & 1;
1362                         j2 = n2 & 1;
1363                         x0_or_one[0] = x0;
1364                         x0_or_one[2] = -x0;
1365                         x1_or_one[0] = x1;
1366                         x1_or_one[2] = -x1;
1367                         x2_or_one[0] = x2;
1368                         x2_or_one[2] = -x2;
1369                         y0_or_zero[0] = y0;
1370                         y0_or_zero[2] = -y0;
1371                         y1_or_zero[0] = y1;
1372                         y1_or_zero[2] = -y1;
1373                         y2_or_zero[0] = y2;
1374                         y2_or_zero[2] = -y2;
1375                         z0 = x0 * x0;
1376                         z1 = x1 * x1;
1377                         z2 = x2 * x2;
1378                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1379                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1380                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1381                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1382                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1383                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1384                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1385                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1386                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1387                         *py0 = t0;
1388                         *py1 = t1;
1389                         *py2 = t2;
1390 
1391                         n0 = (n0 + 1) & 3;
1392                         n1 = (n1 + 1) & 3;
1393                         n2 = (n2 + 1) & 3;
1394                         j0 = (j0 + 1) & 1;
1395                         j1 = (j1 + 1) & 1;
1396                         j2 = (j2 + 1) & 1;
1397                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1398                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1399                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
1400                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1401                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1402                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
1403                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1404                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1405                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
1406                         *pc0 = t0;
1407                         *pc1 = t1;
1408                         *pc2 = t2;
1409                         break;
1410                 }
1411 
1412                 x += stridex;
1413                 y += stridey;
1414                 c += stridec;
1415                 i = 0;
1416         } while ( --n > 0 );
1417 
1418         if ( i > 0 )
1419         {
1420                 double          a1_0, a1_1, a2_0, a2_1;
1421                 double          t0, t1, t1_0, t1_1, t2_0, t2_1;
1422                 double          fn0, fn1, a0, a1, w0, w1, y0, y1;
1423                 double          z0, z1;
1424                 unsigned        j0, j1;
1425                 int             n0, n1;
1426 
1427                 if ( i > 1 )
1428                 {
1429                         n1 = (int) ( x1 * invpio2 + half[xsb1] );
1430                         fn1 = (double) n1;
1431                         n1 &= 3;
1432                         a1 = x1 - fn1 * pio2_1;
1433                         w1 = fn1 * pio2_2;
1434                         x1 = a1 - w1;
1435                         y1 = ( a1 - x1 ) - w1;
1436                         a1 = x1;
1437                         w1 = fn1 * pio2_3 - y1;
1438                         x1 = a1 - w1;
1439                         y1 = ( a1 - x1 ) - w1;
1440                         a1 = x1;
1441                         w1 = fn1 * pio2_3t - y1;
1442                         x1 = a1 - w1;
1443                         y1 = ( a1 - x1 ) - w1;
1444                         xsb1 = HI(&x1);
1445                         if ( ( xsb1 & ~0x80000000 ) < 0x3fc40000 )
1446                         {
1447                                 j1 = n1 & 1;
1448                                 x1_or_one[0] = x1;
1449                                 x1_or_one[2] = -x1;
1450                                 y1_or_zero[0] = y1;
1451                                 y1_or_zero[2] = -y1;
1452                                 z1 = x1 * x1;
1453                                 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1454                                 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1455                                 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1456                                 *py1 = t1;
1457                                 n1 = (n1 + 1) & 3;
1458                                 j1 = (j1 + 1) & 1;
1459                                 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1460                                 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1461                                 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1462                                 *pc1 = t1;
1463                         }
1464                         else
1465                         {
1466                                 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1467                                 HI(&t1) = j1;
1468                                 LO(&t1) = 0;
1469                                 x1 = ( x1 - t1 ) + y1;
1470                                 z1 = x1 * x1;
1471                                 t1 = z1 * ( qq1 + z1 * qq2 );
1472                                 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1473                                 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1474                                 xsb1 = ( xsb1 >> 30 ) & 2;
1475                                 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1476                                 xsb1 |= 1;
1477                                 a1_1 = __vlibm_TBL_sincos_hi[j1+n1];
1478                                 a2_1 = __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)];
1479                                 t2_1 = __vlibm_TBL_sincos_lo[j1+((n1+xsb1)&3)] - ( a1_1*w1 - a2_1*t1 );
1480                                 *pc1 = a2_1 + t2_1;
1481                                 t1_1 = a2_1*w1 + a1_1*t1;
1482                                 t1_1 += __vlibm_TBL_sincos_lo[j1+n1];
1483                                 *py1 = a1_1 + t1_1;
1484                         }
1485                 }
1486                 n0 = (int) ( x0 * invpio2 + half[xsb0] );
1487                 fn0 = (double) n0;
1488                 n0 &= 3;
1489                 a0 = x0 - fn0 * pio2_1;
1490                 w0 = fn0 * pio2_2;
1491                 x0 = a0 - w0;
1492                 y0 = ( a0 - x0 ) - w0;
1493                 a0 = x0;
1494                 w0 = fn0 * pio2_3 - y0;
1495                 x0 = a0 - w0;
1496                 y0 = ( a0 - x0 ) - w0;
1497                 a0 = x0;
1498                 w0 = fn0 * pio2_3t - y0;
1499                 x0 = a0 - w0;
1500                 y0 = ( a0 - x0 ) - w0;
1501                 xsb0 = HI(&x0);
1502                 if ( ( xsb0 & ~0x80000000 ) < 0x3fc40000 )
1503                 {
1504                         j0 = n0 & 1;
1505                         x0_or_one[0] = x0;
1506                         x0_or_one[2] = -x0;
1507                         y0_or_zero[0] = y0;
1508                         y0_or_zero[2] = -y0;
1509                         z0 = x0 * x0;
1510                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1511                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1512                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1513                         *py0 = t0;
1514                         n0 = (n0 + 1) & 3;
1515                         j0 = (j0 + 1) & 1;
1516                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1517                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1518                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1519                         *pc0 = t0;
1520                 }
1521                 else
1522                 {
1523                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1524                         HI(&t0) = j0;
1525                         LO(&t0) = 0;
1526                         x0 = ( x0 - t0 ) + y0;
1527                         z0 = x0 * x0;
1528                         t0 = z0 * ( qq1 + z0 * qq2 );
1529                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1530                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1531                         xsb0 = ( xsb0 >> 30 ) & 2;
1532                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
1533                         xsb0 |= 1;
1534                         a1_0 = __vlibm_TBL_sincos_hi[j0+n0];
1535                         a2_0 = __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)];
1536                         t2_0 = __vlibm_TBL_sincos_lo[j0+((n0+xsb0)&3)] - ( a1_0*w0 - a2_0*t0 );
1537                         *pc0 = a2_0 + t2_0;
1538                         t1_0 = a2_0*w0 + a1_0*t0;
1539                         t1_0 += __vlibm_TBL_sincos_lo[j0+n0];
1540                         *py0 = a1_0 + t1_0;
1541                 }
1542         }
1543 
1544         if ( biguns ) {
1545                 __vlibm_vsincos_big( nsave, xsave, sxsave, ysave, sysave, csave, scsave, 0x413921fb );
1546         }
1547 }