1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #include <sys/isa_defs.h>
  31 #include <sys/ccompile.h>
  32 
  33 #ifdef _LITTLE_ENDIAN
  34 #define HI(x)   *(1+(int*)x)
  35 #define LO(x)   *(unsigned*)x
  36 #else
  37 #define HI(x)   *(int*)x
  38 #define LO(x)   *(1+(unsigned*)x)
  39 #endif
  40 
  41 #ifdef __RESTRICT
  42 #define restrict _Restrict
  43 #else
  44 #define restrict
  45 #endif
  46 
  47 /*
  48  * vcos.1.c
  49  *
  50  * Vector cosine function.  Just slight modifications to vsin.8.c, mainly
  51  * in the primary range part.
  52  *
  53  * Modification to primary range processing.  If an argument that does not
  54  * fall in the primary range is encountered, then processing is continued
  55  * in the medium range.
  56  *
  57  */
  58 
  59 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
  60 
  61 static const double
  62         half[2] = { 0.5, -0.5 },
  63         one             = 1.0,
  64         invpio2 = 0.636619772367581343075535,  /* 53 bits of pi/2 */
  65         pio2_1  = 1.570796326734125614166,      /* first 33 bits of pi/2 */
  66         pio2_2  = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
  67         pio2_3  = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
  68         pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
  69         pp1             = -1.666666666605760465276263943134982554676e-0001,
  70         pp2             =  8.333261209690963126718376566146180944442e-0003,
  71         qq1             = -4.999999999977710986407023955908711557870e-0001,
  72         qq2             =  4.166654863857219350645055881018842089580e-0002,
  73         poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
  74                                 -4.999999999999931701464060878888294524481e-0001 },
  75         poly2[2]= {  8.333333332390951295683993455280336376663e-0003,
  76                                  4.166666666394861917535640593963708222319e-0002 },
  77         poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
  78                                 -1.388888552656142867832756687736851681462e-0003 },
  79         poly4[2]= {  2.753403624854277237649987622848330351110e-0006,
  80                                  2.478519423681460796618128289454530524759e-0005 };
  81 
  82 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
  83 
  84 /* Don't __ the following; acomp will handle it */
  85 extern double fabs( double );
  86 extern void __vlibm_vcos_big( int, double *, int, double *, int, int );
  87 
  88 /*
  89  * y[i*stridey] := cos( x[i*stridex] ), for i = 0..n.
  90  *
  91  * Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06.
  92  * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
  93  *
  94  * elts < 2^-27 use the approximation 1.0 ~ cos(x).
  95  */
  96 void
  97 __vcos( int n, double * restrict x, int stridex, double * restrict y,
  98         int stridey )
  99 {
 100         double          x0_or_one[4], x1_or_one[4], x2_or_one[4];
 101         double          y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
 102         double          x0, x1, x2, *py0 = 0, *py1 = 0, *py2, *xsave, *ysave;
 103         unsigned        hx0, hx1, hx2, xsb0, xsb1 = 0, xsb2;
 104         int             i, biguns, nsave, sxsave, sysave;
 105         volatile int    v __GNU_UNUSED;
 106         nsave = n;
 107         xsave = x;
 108         sxsave = stridex;
 109         ysave = y;
 110         sysave = stridey;
 111         biguns = 0;
 112 
 113         do /* MAIN LOOP */
 114         {
 115                 /* Gotos here so _break_ exits MAIN LOOP. */
 116 LOOP0:  /* Find first arg in right range. */
 117                 xsb0 = HI(x); /* get most significant word */
 118                 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
 119                 if ( hx0 > 0x3fe921fb ) {
 120                         /* Too big: arg reduction needed, so leave for second part */
 121                         biguns = 1;
 122                         goto MEDIUM;
 123                 }
 124                 if ( hx0 < 0x3e400000 ) {
 125                         /* Too small.  cos x ~ 1. */
 126                         v = *x;
 127                         *y = 1.0;
 128                         x += stridex;
 129                         y += stridey;
 130                         i = 0;
 131                         if ( --n <= 0 )
 132                                 break;
 133                         goto LOOP0;
 134                 }
 135                 x0 = *x;
 136                 py0 = y;
 137                 x += stridex;
 138                 y += stridey;
 139                 i = 1;
 140                 if ( --n <= 0 )
 141                         break;
 142 
 143 LOOP1: /* Get second arg, same as above. */
 144                 xsb1 = HI(x);
 145                 hx1 = xsb1 & ~0x80000000;
 146                 if ( hx1 > 0x3fe921fb )
 147                 {
 148                         biguns = 2;
 149                         goto MEDIUM;
 150                 }
 151                 if ( hx1 < 0x3e400000 )
 152                 {
 153                         v = *x;
 154                         *y = 1.0;
 155                         x += stridex;
 156                         y += stridey;
 157                         i = 1;
 158                         if ( --n <= 0 )
 159                                 break;
 160                         goto LOOP1;
 161                 }
 162                 x1 = *x;
 163                 py1 = y;
 164                 x += stridex;
 165                 y += stridey;
 166                 i = 2;
 167                 if ( --n <= 0 )
 168                         break;
 169 
 170 LOOP2: /* Get third arg, same as above. */
 171                 xsb2 = HI(x);
 172                 hx2 = xsb2 & ~0x80000000;
 173                 if ( hx2 > 0x3fe921fb )
 174                 {
 175                         biguns = 3;
 176                         goto MEDIUM;
 177                 }
 178                 if ( hx2 < 0x3e400000 )
 179                 {
 180                         v = *x;
 181                         *y = 1.0;
 182                         x += stridex;
 183                         y += stridey;
 184                         i = 2;
 185                         if ( --n <= 0 )
 186                                 break;
 187                         goto LOOP2;
 188                 }
 189                 x2 = *x;
 190                 py2 = y;
 191 
 192                 /*
 193                  * 0x3fc40000 = 5/32 ~ 0.15625
 194                  * Get msb after subtraction.  Will be 1 only if
 195                  * hx0 - 5/32 is negative.
 196                  */
 197                 i = ( hx0 - 0x3fc40000 ) >> 31;
 198                 i |= ( ( hx1 - 0x3fc40000 ) >> 30 ) & 2;
 199                 i |= ( ( hx2 - 0x3fc40000 ) >> 29 ) & 4;
 200                 switch ( i )
 201                 {
 202                         double          a0, a1, a2, w0, w1, w2;
 203                         double          t0, t1, t2, z0, z1, z2;
 204                         unsigned        j0, j1, j2;
 205 
 206                 case 0: /* All are > 5/32 */
 207                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 208                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 209                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 210                         HI(&t0) = j0;
 211                         HI(&t1) = j1;
 212                         HI(&t2) = j2;
 213                         LO(&t0) = 0;
 214                         LO(&t1) = 0;
 215                         LO(&t2) = 0;
 216                         x0 -= t0;
 217                         x1 -= t1;
 218                         x2 -= t2;
 219                         z0 = x0 * x0;
 220                         z1 = x1 * x1;
 221                         z2 = x2 * x2;
 222                         t0 = z0 * ( qq1 + z0 * qq2 );
 223                         t1 = z1 * ( qq1 + z1 * qq2 );
 224                         t2 = z2 * ( qq1 + z2 * qq2 );
 225                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 226                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 227                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 228                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 229                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 230                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 231                         xsb0 = ( xsb0 >> 30 ) & 2;
 232                         xsb1 = ( xsb1 >> 30 ) & 2;
 233                         xsb2 = ( xsb2 >> 30 ) & 2;
 234                         a0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
 235                         a1 = __vlibm_TBL_sincos_hi[j1+1];
 236                         a2 = __vlibm_TBL_sincos_hi[j2+1];
 237                            /*   cos_lo(t)                        sin_hi(t) */
 238                         t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
 239                         t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
 240                         t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 );
 241 
 242                         *py0 = a0 + t0;
 243                         *py1 = a1 + t1;
 244                         *py2 = a2 + t2;
 245                         break;
 246 
 247                 case 1:
 248                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 249                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 250                         HI(&t1) = j1;
 251                         HI(&t2) = j2;
 252                         LO(&t1) = 0;
 253                         LO(&t2) = 0;
 254                         x1 -= t1;
 255                         x2 -= t2;
 256                         z0 = x0 * x0;
 257                         z1 = x1 * x1;
 258                         z2 = x2 * x2;
 259                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 260                         t1 = z1 * ( qq1 + z1 * qq2 );
 261                         t2 = z2 * ( qq1 + z2 * qq2 );
 262                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 263                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 264                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 265                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 266                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 267                         xsb1 = ( xsb1 >> 30 ) & 2;
 268                         xsb2 = ( xsb2 >> 30 ) & 2;
 269                         a1 = __vlibm_TBL_sincos_hi[j1+1];
 270                         a2 = __vlibm_TBL_sincos_hi[j2+1];
 271                         t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
 272                         t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 );
 273                         *py0 = one + t0;
 274                         *py1 = a1 + t1;
 275                         *py2 = a2 + t2;
 276                         break;
 277 
 278                 case 2:
 279                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 280                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 281                         HI(&t0) = j0;
 282                         HI(&t2) = j2;
 283                         LO(&t0) = 0;
 284                         LO(&t2) = 0;
 285                         x0 -= t0;
 286                         x2 -= t2;
 287                         z0 = x0 * x0;
 288                         z1 = x1 * x1;
 289                         z2 = x2 * x2;
 290                         t0 = z0 * ( qq1 + z0 * qq2 );
 291                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 292                         t2 = z2 * ( qq1 + z2 * qq2 );
 293                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 294                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 295                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 296                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 297                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 298                         xsb0 = ( xsb0 >> 30 ) & 2;
 299                         xsb2 = ( xsb2 >> 30 ) & 2;
 300                         a0 = __vlibm_TBL_sincos_hi[j0+1];
 301                         a2 = __vlibm_TBL_sincos_hi[j2+1];
 302                         t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
 303                         t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 );
 304                         *py0 = a0 + t0;
 305                         *py1 = one + t1;
 306                         *py2 = a2 + t2;
 307                         break;
 308 
 309                 case 3:
 310                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 311                         HI(&t2) = j2;
 312                         LO(&t2) = 0;
 313                         x2 -= t2;
 314                         z0 = x0 * x0;
 315                         z1 = x1 * x1;
 316                         z2 = x2 * x2;
 317                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 318                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 319                         t2 = z2 * ( qq1 + z2 * qq2 );
 320                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 321                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 322                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 323                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 324                         xsb2 = ( xsb2 >> 30 ) & 2;
 325                         a2 = __vlibm_TBL_sincos_hi[j2+1];
 326                         t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 );
 327                         *py0 = one + t0;
 328                         *py1 = one + t1;
 329                         *py2 = a2 + t2;
 330                         break;
 331 
 332                 case 4:
 333                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 334                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 335                         HI(&t0) = j0;
 336                         HI(&t1) = j1;
 337                         LO(&t0) = 0;
 338                         LO(&t1) = 0;
 339                         x0 -= t0;
 340                         x1 -= t1;
 341                         z0 = x0 * x0;
 342                         z1 = x1 * x1;
 343                         z2 = x2 * x2;
 344                         t0 = z0 * ( qq1 + z0 * qq2 );
 345                         t1 = z1 * ( qq1 + z1 * qq2 );
 346                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 347                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 348                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 349                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 350                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 351                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 352                         xsb0 = ( xsb0 >> 30 ) & 2;
 353                         xsb1 = ( xsb1 >> 30 ) & 2;
 354                         a0 = __vlibm_TBL_sincos_hi[j0+1];
 355                         a1 = __vlibm_TBL_sincos_hi[j1+1];
 356                         t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
 357                         t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
 358                         *py0 = a0 + t0;
 359                         *py1 = a1 + t1;
 360                         *py2 = one + t2;
 361                         break;
 362 
 363                 case 5:
 364                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 365                         HI(&t1) = j1;
 366                         LO(&t1) = 0;
 367                         x1 -= t1;
 368                         z0 = x0 * x0;
 369                         z1 = x1 * x1;
 370                         z2 = x2 * x2;
 371                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 372                         t1 = z1 * ( qq1 + z1 * qq2 );
 373                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 374                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 375                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 376                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 377                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 378                         xsb1 = ( xsb1 >> 30 ) & 2;
 379                         a1 = __vlibm_TBL_sincos_hi[j1+1];
 380                         t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
 381                         *py0 = one + t0;
 382                         *py1 = a1 + t1;
 383                         *py2 = one + t2;
 384                         break;
 385 
 386                 case 6:
 387                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 388                         HI(&t0) = j0;
 389                         LO(&t0) = 0;
 390                         x0 -= t0;
 391                         z0 = x0 * x0;
 392                         z1 = x1 * x1;
 393                         z2 = x2 * x2;
 394                         t0 = z0 * ( qq1 + z0 * qq2 );
 395                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 396                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 397                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 398                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 399                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 400                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 401                         xsb0 = ( xsb0 >> 30 ) & 2;
 402                         a0 = __vlibm_TBL_sincos_hi[j0+1];
 403                         t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
 404                         *py0 = a0 + t0;
 405                         *py1 = one + t1;
 406                         *py2 = one + t2;
 407                         break;
 408 
 409                 case 7: /* All are < 5/32 */
 410                         z0 = x0 * x0;
 411                         z1 = x1 * x1;
 412                         z2 = x2 * x2;
 413                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 414                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 415                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 416                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 417                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 418                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 419                         *py0 = one + t0;
 420                         *py1 = one + t1;
 421                         *py2 = one + t2;
 422                         break;
 423                 }
 424 
 425                 x += stridex;
 426                 y += stridey;
 427                 i = 0;
 428         } while ( --n > 0 ); /* END MAIN LOOP */
 429 
 430         /*
 431          * CLEAN UP last 0, 1, or 2 elts.
 432          */
 433         if ( i > 0 ) /* Clean up elts at tail.  i < 3. */
 434         {
 435                 double          a0, a1, w0, w1;
 436                 double          t0, t1, z0, z1;
 437                 unsigned        j0, j1;
 438 
 439                 if ( i > 1 )
 440                 {
 441                         if ( hx1 < 0x3fc40000 )
 442                         {
 443                                 z1 = x1 * x1;
 444                                 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 445                                 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 446                                 t1 = one + t1;
 447                                 *py1 = t1;
 448                         }
 449                         else
 450                         {
 451                                 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 452                                 HI(&t1) = j1;
 453                                 LO(&t1) = 0;
 454                                 x1 -= t1;
 455                                 z1 = x1 * x1;
 456                                 t1 = z1 * ( qq1 + z1 * qq2 );
 457                                 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 458                                 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 459                                 xsb1 = ( xsb1 >> 30 ) & 2;
 460                                 a1 = __vlibm_TBL_sincos_hi[j1+1];
 461                                 t1 = __vlibm_TBL_sincos_lo[j1+1]
 462                                         - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
 463                                 *py1 = a1 + t1;
 464                         }
 465                 }
 466                 if ( hx0 < 0x3fc40000 )
 467                 {
 468                         z0 = x0 * x0;
 469                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 470                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 471                         t0 = one + t0;
 472                         *py0 = t0;
 473                 }
 474                 else
 475                 {
 476                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 477                         HI(&t0) = j0;
 478                         LO(&t0) = 0;
 479                         x0 -= t0;
 480                         z0 = x0 * x0;
 481                         t0 = z0 * ( qq1 + z0 * qq2 );
 482                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 483                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 484                         xsb0 = ( xsb0 >> 30 ) & 2;
 485                         a0 = __vlibm_TBL_sincos_hi[j0+1];
 486                         t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
 487                         *py0 = a0 + t0;
 488                 }
 489         } /* END CLEAN UP */
 490 
 491         return;
 492 
 493         /*
 494          * Take care of BIGUNS.
 495          *
 496          * We have jumped here in the middle of processing after having
 497          * encountered a medium range argument.  Therefore things are in a
 498          * bit of a tizzy.
 499          */
 500 
 501 MEDIUM:
 502 
 503         x0_or_one[1] = 1.0;
 504         x1_or_one[1] = 1.0;
 505         x2_or_one[1] = 1.0;
 506         x0_or_one[3] = -1.0;
 507         x1_or_one[3] = -1.0;
 508         x2_or_one[3] = -1.0;
 509         y0_or_zero[1] = 0.0;
 510         y1_or_zero[1] = 0.0;
 511         y2_or_zero[1] = 0.0;
 512         y0_or_zero[3] = 0.0;
 513         y1_or_zero[3] = 0.0;
 514         y2_or_zero[3] = 0.0;
 515 
 516         if ( biguns == 3 )
 517         {
 518                 biguns = 0;
 519                 xsb0 = xsb0 >> 31;
 520                 xsb1 = xsb1 >> 31;
 521                 goto loop2;
 522         }
 523         else if ( biguns == 2 )
 524         {
 525                 xsb0 = xsb0 >> 31;
 526                 biguns = 0;
 527                 goto loop1;
 528         }
 529         biguns = 0;
 530 
 531         do
 532         {
 533                 double          fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
 534                 unsigned        hx;
 535                 int                     n0, n1, n2;
 536 
 537                 /*
 538                  * Find 3 more to work on: Not already done, not too big.
 539                  */
 540 
 541 loop0:
 542                 hx = HI(x);
 543                 xsb0 = hx >> 31;
 544                 hx &= ~0x80000000;
 545                 if ( hx > 0x413921fb ) /* (1.6471e+06) Too big: leave it. */
 546                 {
 547                         if ( hx >= 0x7ff00000 ) /* Inf or NaN */
 548                         {
 549                                 x0 = *x;
 550                                 *y = x0 - x0;
 551                         }
 552                         else
 553                                 biguns = 1;
 554                         x += stridex;
 555                         y += stridey;
 556                         i = 0;
 557                         if ( --n <= 0 )
 558                                 break;
 559                         goto loop0;
 560                 }
 561                 x0 = *x;
 562                 py0 = y;
 563                 x += stridex;
 564                 y += stridey;
 565                 i = 1;
 566                 if ( --n <= 0 )
 567                         break;
 568 
 569 loop1:
 570                 hx = HI(x);
 571                 xsb1 = hx >> 31;
 572                 hx &= ~0x80000000;
 573                 if ( hx > 0x413921fb )
 574                 {
 575                         if ( hx >= 0x7ff00000 )
 576                         {
 577                                 x1 = *x;
 578                                 *y = x1 - x1;
 579                         }
 580                         else
 581                                 biguns = 1;
 582                         x += stridex;
 583                         y += stridey;
 584                         i = 1;
 585                         if ( --n <= 0 )
 586                                 break;
 587                         goto loop1;
 588                 }
 589                 x1 = *x;
 590                 py1 = y;
 591                 x += stridex;
 592                 y += stridey;
 593                 i = 2;
 594                 if ( --n <= 0 )
 595                         break;
 596 
 597 loop2:
 598                 hx = HI(x);
 599                 xsb2 = hx >> 31;
 600                 hx &= ~0x80000000;
 601                 if ( hx > 0x413921fb )
 602                 {
 603                         if ( hx >= 0x7ff00000 )
 604                         {
 605                                 x2 = *x;
 606                                 *y = x2 - x2;
 607                         }
 608                         else
 609                                 biguns = 1;
 610                         x += stridex;
 611                         y += stridey;
 612                         i = 2;
 613                         if ( --n <= 0 )
 614                                 break;
 615                         goto loop2;
 616                 }
 617                 x2 = *x;
 618                 py2 = y;
 619 
 620                 n0 = (int) ( x0 * invpio2 + half[xsb0] );
 621                 n1 = (int) ( x1 * invpio2 + half[xsb1] );
 622                 n2 = (int) ( x2 * invpio2 + half[xsb2] );
 623                 fn0 = (double) n0;
 624                 fn1 = (double) n1;
 625                 fn2 = (double) n2;
 626                 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
 627                 n1 = (n1 + 1) & 3;
 628                 n2 = (n2 + 1) & 3;
 629                 a0 = x0 - fn0 * pio2_1;
 630                 a1 = x1 - fn1 * pio2_1;
 631                 a2 = x2 - fn2 * pio2_1;
 632                 w0 = fn0 * pio2_2;
 633                 w1 = fn1 * pio2_2;
 634                 w2 = fn2 * pio2_2;
 635                 x0 = a0 - w0;
 636                 x1 = a1 - w1;
 637                 x2 = a2 - w2;
 638                 y0 = ( a0 - x0 ) - w0;
 639                 y1 = ( a1 - x1 ) - w1;
 640                 y2 = ( a2 - x2 ) - w2;
 641                 a0 = x0;
 642                 a1 = x1;
 643                 a2 = x2;
 644                 w0 = fn0 * pio2_3 - y0;
 645                 w1 = fn1 * pio2_3 - y1;
 646                 w2 = fn2 * pio2_3 - y2;
 647                 x0 = a0 - w0;
 648                 x1 = a1 - w1;
 649                 x2 = a2 - w2;
 650                 y0 = ( a0 - x0 ) - w0;
 651                 y1 = ( a1 - x1 ) - w1;
 652                 y2 = ( a2 - x2 ) - w2;
 653                 a0 = x0;
 654                 a1 = x1;
 655                 a2 = x2;
 656                 w0 = fn0 * pio2_3t - y0;
 657                 w1 = fn1 * pio2_3t - y1;
 658                 w2 = fn2 * pio2_3t - y2;
 659                 x0 = a0 - w0;
 660                 x1 = a1 - w1;
 661                 x2 = a2 - w2;
 662                 y0 = ( a0 - x0 ) - w0;
 663                 y1 = ( a1 - x1 ) - w1;
 664                 y2 = ( a2 - x2 ) - w2;
 665                 xsb0 = HI(&x0);
 666                 i = ( ( xsb0 & ~0x80000000 ) - thresh[n0&1] ) >> 31;
 667                 xsb1 = HI(&x1);
 668                 i |= ( ( ( xsb1 & ~0x80000000 ) - thresh[n1&1] ) >> 30 ) & 2;
 669                 xsb2 = HI(&x2);
 670                 i |= ( ( ( xsb2 & ~0x80000000 ) - thresh[n2&1] ) >> 29 ) & 4;
 671                 switch ( i )
 672                 {
 673                         double          t0, t1, t2, z0, z1, z2;
 674                         unsigned        j0, j1, j2;
 675 
 676                 case 0:
 677                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 678                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 679                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 680                         HI(&t0) = j0;
 681                         HI(&t1) = j1;
 682                         HI(&t2) = j2;
 683                         LO(&t0) = 0;
 684                         LO(&t1) = 0;
 685                         LO(&t2) = 0;
 686                         x0 = ( x0 - t0 ) + y0;
 687                         x1 = ( x1 - t1 ) + y1;
 688                         x2 = ( x2 - t2 ) + y2;
 689                         z0 = x0 * x0;
 690                         z1 = x1 * x1;
 691                         z2 = x2 * x2;
 692                         t0 = z0 * ( qq1 + z0 * qq2 );
 693                         t1 = z1 * ( qq1 + z1 * qq2 );
 694                         t2 = z2 * ( qq1 + z2 * qq2 );
 695                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 696                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 697                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 698                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 699                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 700                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 701                         xsb0 = ( xsb0 >> 30 ) & 2;
 702                         xsb1 = ( xsb1 >> 30 ) & 2;
 703                         xsb2 = ( xsb2 >> 30 ) & 2;
 704                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
 705                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
 706                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
 707                         xsb0 |= 1;
 708                         xsb1 |= 1;
 709                         xsb2 |= 1;
 710                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
 711                         a1 = __vlibm_TBL_sincos_hi[j1+n1];
 712                         a2 = __vlibm_TBL_sincos_hi[j2+n2];
 713                         t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
 714                         t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
 715                         t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
 716                         *py0 = ( a0 + t0 );
 717                         *py1 = ( a1 + t1 );
 718                         *py2 = ( a2 + t2 );
 719                         break;
 720 
 721                 case 1:
 722                         j0 = n0 & 1;
 723                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 724                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 725                         HI(&t1) = j1;
 726                         HI(&t2) = j2;
 727                         LO(&t1) = 0;
 728                         LO(&t2) = 0;
 729                         x0_or_one[0] = x0;
 730                         x0_or_one[2] = -x0;
 731                         y0_or_zero[0] = y0;
 732                         y0_or_zero[2] = -y0;
 733                         x1 = ( x1 - t1 ) + y1;
 734                         x2 = ( x2 - t2 ) + y2;
 735                         z0 = x0 * x0;
 736                         z1 = x1 * x1;
 737                         z2 = x2 * x2;
 738                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
 739                         t1 = z1 * ( qq1 + z1 * qq2 );
 740                         t2 = z2 * ( qq1 + z2 * qq2 );
 741                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
 742                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 743                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 744                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 745                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 746                         xsb1 = ( xsb1 >> 30 ) & 2;
 747                         xsb2 = ( xsb2 >> 30 ) & 2;
 748                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
 749                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
 750                         xsb1 |= 1;
 751                         xsb2 |= 1;
 752                         a1 = __vlibm_TBL_sincos_hi[j1+n1];
 753                         a2 = __vlibm_TBL_sincos_hi[j2+n2];
 754                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
 755                         t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
 756                         t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
 757                         *py0 = t0;
 758                         *py1 = ( a1 + t1 );
 759                         *py2 = ( a2 + t2 );
 760                         break;
 761 
 762                 case 2:
 763                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 764                         j1 = n1 & 1;
 765                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 766                         HI(&t0) = j0;
 767                         HI(&t2) = j2;
 768                         LO(&t0) = 0;
 769                         LO(&t2) = 0;
 770                         x1_or_one[0] = x1;
 771                         x1_or_one[2] = -x1;
 772                         x0 = ( x0 - t0 ) + y0;
 773                         y1_or_zero[0] = y1;
 774                         y1_or_zero[2] = -y1;
 775                         x2 = ( x2 - t2 ) + y2;
 776                         z0 = x0 * x0;
 777                         z1 = x1 * x1;
 778                         z2 = x2 * x2;
 779                         t0 = z0 * ( qq1 + z0 * qq2 );
 780                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
 781                         t2 = z2 * ( qq1 + z2 * qq2 );
 782                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 783                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
 784                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 785                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 786                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 787                         xsb0 = ( xsb0 >> 30 ) & 2;
 788                         xsb2 = ( xsb2 >> 30 ) & 2;
 789                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
 790                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
 791                         xsb0 |= 1;
 792                         xsb2 |= 1;
 793                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
 794                         a2 = __vlibm_TBL_sincos_hi[j2+n2];
 795                         t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
 796                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
 797                         t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
 798                         *py0 = ( a0 + t0 );
 799                         *py1 = t1;
 800                         *py2 = ( a2 + t2 );
 801                         break;
 802 
 803                 case 3:
 804                         j0 = n0 & 1;
 805                         j1 = n1 & 1;
 806                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 807                         HI(&t2) = j2;
 808                         LO(&t2) = 0;
 809                         x0_or_one[0] = x0;
 810                         x0_or_one[2] = -x0;
 811                         x1_or_one[0] = x1;
 812                         x1_or_one[2] = -x1;
 813                         y0_or_zero[0] = y0;
 814                         y0_or_zero[2] = -y0;
 815                         y1_or_zero[0] = y1;
 816                         y1_or_zero[2] = -y1;
 817                         x2 = ( x2 - t2 ) + y2;
 818                         z0 = x0 * x0;
 819                         z1 = x1 * x1;
 820                         z2 = x2 * x2;
 821                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
 822                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
 823                         t2 = z2 * ( qq1 + z2 * qq2 );
 824                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
 825                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
 826                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 827                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 828                         xsb2 = ( xsb2 >> 30 ) & 2;
 829                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
 830                         xsb2 |= 1;
 831                         a2 = __vlibm_TBL_sincos_hi[j2+n2];
 832                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
 833                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
 834                         t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
 835                         *py0 = t0;
 836                         *py1 = t1;
 837                         *py2 = ( a2 + t2 );
 838                         break;
 839 
 840                 case 4:
 841                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 842                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 843                         j2 = n2 & 1;
 844                         HI(&t0) = j0;
 845                         HI(&t1) = j1;
 846                         LO(&t0) = 0;
 847                         LO(&t1) = 0;
 848                         x2_or_one[0] = x2;
 849                         x2_or_one[2] = -x2;
 850                         x0 = ( x0 - t0 ) + y0;
 851                         x1 = ( x1 - t1 ) + y1;
 852                         y2_or_zero[0] = y2;
 853                         y2_or_zero[2] = -y2;
 854                         z0 = x0 * x0;
 855                         z1 = x1 * x1;
 856                         z2 = x2 * x2;
 857                         t0 = z0 * ( qq1 + z0 * qq2 );
 858                         t1 = z1 * ( qq1 + z1 * qq2 );
 859                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
 860                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 861                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 862                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
 863                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 864                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 865                         xsb0 = ( xsb0 >> 30 ) & 2;
 866                         xsb1 = ( xsb1 >> 30 ) & 2;
 867                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
 868                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
 869                         xsb0 |= 1;
 870                         xsb1 |= 1;
 871                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
 872                         a1 = __vlibm_TBL_sincos_hi[j1+n1];
 873                         t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
 874                         t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
 875                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
 876                         *py0 = ( a0 + t0 );
 877                         *py1 = ( a1 + t1 );
 878                         *py2 = t2;
 879                         break;
 880 
 881                 case 5:
 882                         j0 = n0 & 1;
 883                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 884                         j2 = n2 & 1;
 885                         HI(&t1) = j1;
 886                         LO(&t1) = 0;
 887                         x0_or_one[0] = x0;
 888                         x0_or_one[2] = -x0;
 889                         x2_or_one[0] = x2;
 890                         x2_or_one[2] = -x2;
 891                         y0_or_zero[0] = y0;
 892                         y0_or_zero[2] = -y0;
 893                         x1 = ( x1 - t1 ) + y1;
 894                         y2_or_zero[0] = y2;
 895                         y2_or_zero[2] = -y2;
 896                         z0 = x0 * x0;
 897                         z1 = x1 * x1;
 898                         z2 = x2 * x2;
 899                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
 900                         t1 = z1 * ( qq1 + z1 * qq2 );
 901                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
 902                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
 903                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 904                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
 905                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 906                         xsb1 = ( xsb1 >> 30 ) & 2;
 907                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
 908                         xsb1 |= 1;
 909                         a1 = __vlibm_TBL_sincos_hi[j1+n1];
 910                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
 911                         t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
 912                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
 913                         *py0 = t0;
 914                         *py1 = ( a1 + t1 );
 915                         *py2 = t2;
 916                         break;
 917 
 918                 case 6:
 919                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 920                         j1 = n1 & 1;
 921                         j2 = n2 & 1;
 922                         HI(&t0) = j0;
 923                         LO(&t0) = 0;
 924                         x1_or_one[0] = x1;
 925                         x1_or_one[2] = -x1;
 926                         x2_or_one[0] = x2;
 927                         x2_or_one[2] = -x2;
 928                         x0 = ( x0 - t0 ) + y0;
 929                         y1_or_zero[0] = y1;
 930                         y1_or_zero[2] = -y1;
 931                         y2_or_zero[0] = y2;
 932                         y2_or_zero[2] = -y2;
 933                         z0 = x0 * x0;
 934                         z1 = x1 * x1;
 935                         z2 = x2 * x2;
 936                         t0 = z0 * ( qq1 + z0 * qq2 );
 937                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
 938                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
 939                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 940                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
 941                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
 942                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 943                         xsb0 = ( xsb0 >> 30 ) & 2;
 944                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
 945                         xsb0 |= 1;
 946                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
 947                         t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
 948                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
 949                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
 950                         *py0 = ( a0 + t0 );
 951                         *py1 = t1;
 952                         *py2 = t2;
 953                         break;
 954 
 955                 case 7:
 956                         j0 = n0 & 1;
 957                         j1 = n1 & 1;
 958                         j2 = n2 & 1;
 959                         x0_or_one[0] = x0;
 960                         x0_or_one[2] = -x0;
 961                         x1_or_one[0] = x1;
 962                         x1_or_one[2] = -x1;
 963                         x2_or_one[0] = x2;
 964                         x2_or_one[2] = -x2;
 965                         y0_or_zero[0] = y0;
 966                         y0_or_zero[2] = -y0;
 967                         y1_or_zero[0] = y1;
 968                         y1_or_zero[2] = -y1;
 969                         y2_or_zero[0] = y2;
 970                         y2_or_zero[2] = -y2;
 971                         z0 = x0 * x0;
 972                         z1 = x1 * x1;
 973                         z2 = x2 * x2;
 974                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
 975                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
 976                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
 977                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
 978                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
 979                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
 980                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
 981                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
 982                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
 983                         *py0 = t0;
 984                         *py1 = t1;
 985                         *py2 = t2;
 986                         break;
 987                 }
 988 
 989                 x += stridex;
 990                 y += stridey;
 991                 i = 0;
 992         } while ( --n > 0 );
 993 
 994         if ( i > 0 )
 995         {
 996                 double          fn0, fn1, a0, a1, w0, w1, y0, y1;
 997                 double          t0, t1, z0, z1;
 998                 unsigned        j0, j1;
 999                 int                     n0, n1;
1000 
1001                 if ( i > 1 )
1002                 {
1003                         n1 = (int) ( x1 * invpio2 + half[xsb1] );
1004                         fn1 = (double) n1;
1005                         n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1006                         a1 = x1 - fn1 * pio2_1;
1007                         w1 = fn1 * pio2_2;
1008                         x1 = a1 - w1;
1009                         y1 = ( a1 - x1 ) - w1;
1010                         a1 = x1;
1011                         w1 = fn1 * pio2_3 - y1;
1012                         x1 = a1 - w1;
1013                         y1 = ( a1 - x1 ) - w1;
1014                         a1 = x1;
1015                         w1 = fn1 * pio2_3t - y1;
1016                         x1 = a1 - w1;
1017                         y1 = ( a1 - x1 ) - w1;
1018                         xsb1 = HI(&x1);
1019                         if ( ( xsb1 & ~0x80000000 ) < thresh[n1&1] )
1020                         {
1021                                 j1 = n1 & 1;
1022                                 x1_or_one[0] = x1;
1023                                 x1_or_one[2] = -x1;
1024                                 y1_or_zero[0] = y1;
1025                                 y1_or_zero[2] = -y1;
1026                                 z1 = x1 * x1;
1027                                 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1028                                 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1029                                 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1030                                 *py1 = t1;
1031                         }
1032                         else
1033                         {
1034                                 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1035                                 HI(&t1) = j1;
1036                                 LO(&t1) = 0;
1037                                 x1 = ( x1 - t1 ) + y1;
1038                                 z1 = x1 * x1;
1039                                 t1 = z1 * ( qq1 + z1 * qq2 );
1040                                 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1041                                 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1042                                 xsb1 = ( xsb1 >> 30 ) & 2;
1043                                 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1044                                 xsb1 |= 1;
1045                                 a1 = __vlibm_TBL_sincos_hi[j1+n1];
1046                                 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
1047                                 *py1 = ( a1 + t1 );
1048                         }
1049                 }
1050                 n0 = (int) ( x0 * invpio2 + half[xsb0] );
1051                 fn0 = (double) n0;
1052                 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1053                 a0 = x0 - fn0 * pio2_1;
1054                 w0 = fn0 * pio2_2;
1055                 x0 = a0 - w0;
1056                 y0 = ( a0 - x0 ) - w0;
1057                 a0 = x0;
1058                 w0 = fn0 * pio2_3 - y0;
1059                 x0 = a0 - w0;
1060                 y0 = ( a0 - x0 ) - w0;
1061                 a0 = x0;
1062                 w0 = fn0 * pio2_3t - y0;
1063                 x0 = a0 - w0;
1064                 y0 = ( a0 - x0 ) - w0;
1065                 xsb0 = HI(&x0);
1066                 if ( ( xsb0 & ~0x80000000 ) < thresh[n0&1] )
1067                 {
1068                         j0 = n0 & 1;
1069                         x0_or_one[0] = x0;
1070                         x0_or_one[2] = -x0;
1071                         y0_or_zero[0] = y0;
1072                         y0_or_zero[2] = -y0;
1073                         z0 = x0 * x0;
1074                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1075                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1076                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1077                         *py0 = t0;
1078                 }
1079                 else
1080                 {
1081                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1082                         HI(&t0) = j0;
1083                         LO(&t0) = 0;
1084                         x0 = ( x0 - t0 ) + y0;
1085                         z0 = x0 * x0;
1086                         t0 = z0 * ( qq1 + z0 * qq2 );
1087                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1088                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1089                         xsb0 = ( xsb0 >> 30 ) & 2;
1090                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
1091                         xsb0 |= 1;
1092                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
1093                         t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
1094                         *py0 = ( a0 + t0 );
1095                 }
1096         }
1097 
1098         if ( biguns )
1099                 __vlibm_vcos_big( nsave, xsave, sxsave, ysave, sysave, 0x413921fb );
1100 }