1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #include <sys/isa_defs.h>
  31 
  32 #ifdef _LITTLE_ENDIAN
  33 #define HI(x)   *(1+(int*)x)
  34 #define LO(x)   *(unsigned*)x
  35 #else
  36 #define HI(x)   *(int*)x
  37 #define LO(x)   *(1+(unsigned*)x)
  38 #endif
  39 
  40 #ifdef __RESTRICT
  41 #define restrict _Restrict
  42 #else
  43 #define restrict
  44 #endif
  45 
  46 /*
  47  * vcos.1.c
  48  *
  49  * Vector cosine function.  Just slight modifications to vsin.8.c, mainly
  50  * in the primary range part.
  51  *
  52  * Modification to primary range processing.  If an argument that does not
  53  * fall in the primary range is encountered, then processing is continued
  54  * in the medium range.
  55  *
  56  */
  57 
  58 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
  59 
  60 static const double
  61         half[2] = { 0.5, -0.5 },
  62         one             = 1.0,
  63         invpio2 = 0.636619772367581343075535,  /* 53 bits of pi/2 */
  64         pio2_1  = 1.570796326734125614166,      /* first 33 bits of pi/2 */
  65         pio2_2  = 6.077100506303965976596e-11, /* second 33 bits of pi/2 */
  66         pio2_3  = 2.022266248711166455796e-21, /* third 33 bits of pi/2 */
  67         pio2_3t = 8.478427660368899643959e-32, /* pi/2 - pio2_3 */
  68         pp1             = -1.666666666605760465276263943134982554676e-0001,
  69         pp2             =  8.333261209690963126718376566146180944442e-0003,
  70         qq1             = -4.999999999977710986407023955908711557870e-0001,
  71         qq2             =  4.166654863857219350645055881018842089580e-0002,
  72         poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
  73                                 -4.999999999999931701464060878888294524481e-0001 },
  74         poly2[2]= {  8.333333332390951295683993455280336376663e-0003,
  75                                  4.166666666394861917535640593963708222319e-0002 },
  76         poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
  77                                 -1.388888552656142867832756687736851681462e-0003 },
  78         poly4[2]= {  2.753403624854277237649987622848330351110e-0006,
  79                                  2.478519423681460796618128289454530524759e-0005 };
  80 
  81 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
  82 
  83 /* Don't __ the following; acomp will handle it */
  84 extern double fabs( double );
  85 extern void __vlibm_vcos_big( int, double *, int, double *, int, int );
  86 
  87 /*
  88  * y[i*stridey] := cos( x[i*stridex] ), for i = 0..n.
  89  *
  90  * Calls __vlibm_vcos_big to handle all elts which have abs >~ 1.647e+06.
  91  * Argument reduction is done here for elts pi/4 < arg < 1.647e+06.
  92  *
  93  * elts < 2^-27 use the approximation 1.0 ~ cos(x).
  94  */
  95 void
  96 __vcos( int n, double * restrict x, int stridex, double * restrict y,
  97         int stridey )
  98 {
  99         double          x0_or_one[4], x1_or_one[4], x2_or_one[4];
 100         double          y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
 101         double          x0, x1, x2, *py0, *py1, *py2, *xsave, *ysave;
 102         unsigned        hx0, hx1, hx2, xsb0, xsb1, xsb2;
 103         int                     i, biguns, nsave, sxsave, sysave;
 104 
 105         nsave = n;
 106         xsave = x;
 107         sxsave = stridex;
 108         ysave = y;
 109         sysave = stridey;
 110         biguns = 0;
 111 
 112         do /* MAIN LOOP */
 113         {
 114                 /* Gotos here so _break_ exits MAIN LOOP. */
 115 LOOP0:  /* Find first arg in right range. */
 116                 xsb0 = HI(x); /* get most significant word */
 117                 hx0 = xsb0 & ~0x80000000; /* mask off sign bit */
 118                 if ( hx0 > 0x3fe921fb ) {
 119                         /* Too big: arg reduction needed, so leave for second part */
 120                         biguns = 1;
 121                         goto MEDIUM;
 122                 }
 123                 if ( hx0 < 0x3e400000 ) {
 124                         /* Too small.  cos x ~ 1. */
 125                         volatile int v = *x;
 126                         *y = 1.0;
 127                         x += stridex;
 128                         y += stridey;
 129                         i = 0;
 130                         if ( --n <= 0 )
 131                                 break;
 132                         goto LOOP0;
 133                 }
 134                 x0 = *x;
 135                 py0 = y;
 136                 x += stridex;
 137                 y += stridey;
 138                 i = 1;
 139                 if ( --n <= 0 )
 140                         break;
 141 
 142 LOOP1: /* Get second arg, same as above. */
 143                 xsb1 = HI(x);
 144                 hx1 = xsb1 & ~0x80000000;
 145                 if ( hx1 > 0x3fe921fb )
 146                 {
 147                         biguns = 2;
 148                         goto MEDIUM;
 149                 }
 150                 if ( hx1 < 0x3e400000 )
 151                 {
 152                         volatile int v = *x;
 153                         *y = 1.0;
 154                         x += stridex;
 155                         y += stridey;
 156                         i = 1;
 157                         if ( --n <= 0 )
 158                                 break;
 159                         goto LOOP1;
 160                 }
 161                 x1 = *x;
 162                 py1 = y;
 163                 x += stridex;
 164                 y += stridey;
 165                 i = 2;
 166                 if ( --n <= 0 )
 167                         break;
 168 
 169 LOOP2: /* Get third arg, same as above. */
 170                 xsb2 = HI(x);
 171                 hx2 = xsb2 & ~0x80000000;
 172                 if ( hx2 > 0x3fe921fb )
 173                 {
 174                         biguns = 3;
 175                         goto MEDIUM;
 176                 }
 177                 if ( hx2 < 0x3e400000 )
 178                 {
 179                         volatile int v = *x;
 180                         *y = 1.0;
 181                         x += stridex;
 182                         y += stridey;
 183                         i = 2;
 184                         if ( --n <= 0 )
 185                                 break;
 186                         goto LOOP2;
 187                 }
 188                 x2 = *x;
 189                 py2 = y;
 190 
 191                 /*
 192                  * 0x3fc40000 = 5/32 ~ 0.15625
 193                  * Get msb after subtraction.  Will be 1 only if
 194                  * hx0 - 5/32 is negative.
 195                  */
 196                 i = ( hx0 - 0x3fc40000 ) >> 31;
 197                 i |= ( ( hx1 - 0x3fc40000 ) >> 30 ) & 2;
 198                 i |= ( ( hx2 - 0x3fc40000 ) >> 29 ) & 4;
 199                 switch ( i )
 200                 {
 201                         double          a0, a1, a2, w0, w1, w2;
 202                         double          t0, t1, t2, z0, z1, z2;
 203                         unsigned        j0, j1, j2;
 204 
 205                 case 0: /* All are > 5/32 */
 206                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 207                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 208                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 209                         HI(&t0) = j0;
 210                         HI(&t1) = j1;
 211                         HI(&t2) = j2;
 212                         LO(&t0) = 0;
 213                         LO(&t1) = 0;
 214                         LO(&t2) = 0;
 215                         x0 -= t0;
 216                         x1 -= t1;
 217                         x2 -= t2;
 218                         z0 = x0 * x0;
 219                         z1 = x1 * x1;
 220                         z2 = x2 * x2;
 221                         t0 = z0 * ( qq1 + z0 * qq2 );
 222                         t1 = z1 * ( qq1 + z1 * qq2 );
 223                         t2 = z2 * ( qq1 + z2 * qq2 );
 224                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 225                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 226                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 227                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 228                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 229                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 230                         xsb0 = ( xsb0 >> 30 ) & 2;
 231                         xsb1 = ( xsb1 >> 30 ) & 2;
 232                         xsb2 = ( xsb2 >> 30 ) & 2;
 233                         a0 = __vlibm_TBL_sincos_hi[j0+1]; /* cos_hi(t) */
 234                         a1 = __vlibm_TBL_sincos_hi[j1+1];
 235                         a2 = __vlibm_TBL_sincos_hi[j2+1];
 236                            /*   cos_lo(t)                        sin_hi(t) */
 237                         t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
 238                         t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
 239                         t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 );
 240 
 241                         *py0 = a0 + t0;
 242                         *py1 = a1 + t1;
 243                         *py2 = a2 + t2;
 244                         break;
 245 
 246                 case 1:
 247                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 248                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 249                         HI(&t1) = j1;
 250                         HI(&t2) = j2;
 251                         LO(&t1) = 0;
 252                         LO(&t2) = 0;
 253                         x1 -= t1;
 254                         x2 -= t2;
 255                         z0 = x0 * x0;
 256                         z1 = x1 * x1;
 257                         z2 = x2 * x2;
 258                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 259                         t1 = z1 * ( qq1 + z1 * qq2 );
 260                         t2 = z2 * ( qq1 + z2 * qq2 );
 261                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 262                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 263                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 264                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 265                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 266                         xsb1 = ( xsb1 >> 30 ) & 2;
 267                         xsb2 = ( xsb2 >> 30 ) & 2;
 268                         a1 = __vlibm_TBL_sincos_hi[j1+1];
 269                         a2 = __vlibm_TBL_sincos_hi[j2+1];
 270                         t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
 271                         t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 );
 272                         *py0 = one + t0;
 273                         *py1 = a1 + t1;
 274                         *py2 = a2 + t2;
 275                         break;
 276 
 277                 case 2:
 278                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 279                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 280                         HI(&t0) = j0;
 281                         HI(&t2) = j2;
 282                         LO(&t0) = 0;
 283                         LO(&t2) = 0;
 284                         x0 -= t0;
 285                         x2 -= t2;
 286                         z0 = x0 * x0;
 287                         z1 = x1 * x1;
 288                         z2 = x2 * x2;
 289                         t0 = z0 * ( qq1 + z0 * qq2 );
 290                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 291                         t2 = z2 * ( qq1 + z2 * qq2 );
 292                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 293                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 294                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 295                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 296                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 297                         xsb0 = ( xsb0 >> 30 ) & 2;
 298                         xsb2 = ( xsb2 >> 30 ) & 2;
 299                         a0 = __vlibm_TBL_sincos_hi[j0+1];
 300                         a2 = __vlibm_TBL_sincos_hi[j2+1];
 301                         t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
 302                         t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 );
 303                         *py0 = a0 + t0;
 304                         *py1 = one + t1;
 305                         *py2 = a2 + t2;
 306                         break;
 307 
 308                 case 3:
 309                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 310                         HI(&t2) = j2;
 311                         LO(&t2) = 0;
 312                         x2 -= t2;
 313                         z0 = x0 * x0;
 314                         z1 = x1 * x1;
 315                         z2 = x2 * x2;
 316                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 317                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 318                         t2 = z2 * ( qq1 + z2 * qq2 );
 319                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 320                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 321                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 322                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 323                         xsb2 = ( xsb2 >> 30 ) & 2;
 324                         a2 = __vlibm_TBL_sincos_hi[j2+1];
 325                         t2 = __vlibm_TBL_sincos_lo[j2+1] - ( __vlibm_TBL_sincos_hi[j2+xsb2]*w2 - a2*t2 );
 326                         *py0 = one + t0;
 327                         *py1 = one + t1;
 328                         *py2 = a2 + t2;
 329                         break;
 330 
 331                 case 4:
 332                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 333                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 334                         HI(&t0) = j0;
 335                         HI(&t1) = j1;
 336                         LO(&t0) = 0;
 337                         LO(&t1) = 0;
 338                         x0 -= t0;
 339                         x1 -= t1;
 340                         z0 = x0 * x0;
 341                         z1 = x1 * x1;
 342                         z2 = x2 * x2;
 343                         t0 = z0 * ( qq1 + z0 * qq2 );
 344                         t1 = z1 * ( qq1 + z1 * qq2 );
 345                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 346                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 347                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 348                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 349                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 350                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 351                         xsb0 = ( xsb0 >> 30 ) & 2;
 352                         xsb1 = ( xsb1 >> 30 ) & 2;
 353                         a0 = __vlibm_TBL_sincos_hi[j0+1];
 354                         a1 = __vlibm_TBL_sincos_hi[j1+1];
 355                         t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
 356                         t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
 357                         *py0 = a0 + t0;
 358                         *py1 = a1 + t1;
 359                         *py2 = one + t2;
 360                         break;
 361 
 362                 case 5:
 363                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 364                         HI(&t1) = j1;
 365                         LO(&t1) = 0;
 366                         x1 -= t1;
 367                         z0 = x0 * x0;
 368                         z1 = x1 * x1;
 369                         z2 = x2 * x2;
 370                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 371                         t1 = z1 * ( qq1 + z1 * qq2 );
 372                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 373                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 374                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 375                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 376                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 377                         xsb1 = ( xsb1 >> 30 ) & 2;
 378                         a1 = __vlibm_TBL_sincos_hi[j1+1];
 379                         t1 = __vlibm_TBL_sincos_lo[j1+1] - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
 380                         *py0 = one + t0;
 381                         *py1 = a1 + t1;
 382                         *py2 = one + t2;
 383                         break;
 384 
 385                 case 6:
 386                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 387                         HI(&t0) = j0;
 388                         LO(&t0) = 0;
 389                         x0 -= t0;
 390                         z0 = x0 * x0;
 391                         z1 = x1 * x1;
 392                         z2 = x2 * x2;
 393                         t0 = z0 * ( qq1 + z0 * qq2 );
 394                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 395                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 396                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 397                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 398                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 399                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 400                         xsb0 = ( xsb0 >> 30 ) & 2;
 401                         a0 = __vlibm_TBL_sincos_hi[j0+1];
 402                         t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
 403                         *py0 = a0 + t0;
 404                         *py1 = one + t1;
 405                         *py2 = one + t2;
 406                         break;
 407 
 408                 case 7: /* All are < 5/32 */
 409                         z0 = x0 * x0;
 410                         z1 = x1 * x1;
 411                         z2 = x2 * x2;
 412                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 413                         t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 414                         t2 = z2 * ( poly3[1] + z2 * poly4[1] );
 415                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 416                         t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 417                         t2 = z2 * ( poly1[1] + z2 * ( poly2[1] + t2 ) );
 418                         *py0 = one + t0;
 419                         *py1 = one + t1;
 420                         *py2 = one + t2;
 421                         break;
 422                 }
 423 
 424                 x += stridex;
 425                 y += stridey;
 426                 i = 0;
 427         } while ( --n > 0 ); /* END MAIN LOOP */
 428 
 429         /*
 430          * CLEAN UP last 0, 1, or 2 elts.
 431          */
 432         if ( i > 0 ) /* Clean up elts at tail.  i < 3. */
 433         {
 434                 double          a0, a1, w0, w1;
 435                 double          t0, t1, z0, z1;
 436                 unsigned        j0, j1;
 437 
 438                 if ( i > 1 )
 439                 {
 440                         if ( hx1 < 0x3fc40000 )
 441                         {
 442                                 z1 = x1 * x1;
 443                                 t1 = z1 * ( poly3[1] + z1 * poly4[1] );
 444                                 t1 = z1 * ( poly1[1] + z1 * ( poly2[1] + t1 ) );
 445                                 t1 = one + t1;
 446                                 *py1 = t1;
 447                         }
 448                         else
 449                         {
 450                                 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 451                                 HI(&t1) = j1;
 452                                 LO(&t1) = 0;
 453                                 x1 -= t1;
 454                                 z1 = x1 * x1;
 455                                 t1 = z1 * ( qq1 + z1 * qq2 );
 456                                 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 457                                 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 458                                 xsb1 = ( xsb1 >> 30 ) & 2;
 459                                 a1 = __vlibm_TBL_sincos_hi[j1+1];
 460                                 t1 = __vlibm_TBL_sincos_lo[j1+1]
 461                                         - ( __vlibm_TBL_sincos_hi[j1+xsb1]*w1 - a1*t1 );
 462                                 *py1 = a1 + t1;
 463                         }
 464                 }
 465                 if ( hx0 < 0x3fc40000 )
 466                 {
 467                         z0 = x0 * x0;
 468                         t0 = z0 * ( poly3[1] + z0 * poly4[1] );
 469                         t0 = z0 * ( poly1[1] + z0 * ( poly2[1] + t0 ) );
 470                         t0 = one + t0;
 471                         *py0 = t0;
 472                 }
 473                 else
 474                 {
 475                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 476                         HI(&t0) = j0;
 477                         LO(&t0) = 0;
 478                         x0 -= t0;
 479                         z0 = x0 * x0;
 480                         t0 = z0 * ( qq1 + z0 * qq2 );
 481                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 482                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 483                         xsb0 = ( xsb0 >> 30 ) & 2;
 484                         a0 = __vlibm_TBL_sincos_hi[j0+1];
 485                         t0 = __vlibm_TBL_sincos_lo[j0+1] - ( __vlibm_TBL_sincos_hi[j0+xsb0]*w0 - a0*t0 );
 486                         *py0 = a0 + t0;
 487                 }
 488         } /* END CLEAN UP */
 489 
 490         return;
 491 
 492         /*
 493          * Take care of BIGUNS.
 494          *
 495          * We have jumped here in the middle of processing after having
 496          * encountered a medium range argument.  Therefore things are in a
 497          * bit of a tizzy.
 498          */
 499 
 500 MEDIUM:
 501 
 502         x0_or_one[1] = 1.0;
 503         x1_or_one[1] = 1.0;
 504         x2_or_one[1] = 1.0;
 505         x0_or_one[3] = -1.0;
 506         x1_or_one[3] = -1.0;
 507         x2_or_one[3] = -1.0;
 508         y0_or_zero[1] = 0.0;
 509         y1_or_zero[1] = 0.0;
 510         y2_or_zero[1] = 0.0;
 511         y0_or_zero[3] = 0.0;
 512         y1_or_zero[3] = 0.0;
 513         y2_or_zero[3] = 0.0;
 514 
 515         if ( biguns == 3 )
 516         {
 517                 biguns = 0;
 518                 xsb0 = xsb0 >> 31;
 519                 xsb1 = xsb1 >> 31;
 520                 goto loop2;
 521         }
 522         else if ( biguns == 2 )
 523         {
 524                 xsb0 = xsb0 >> 31;
 525                 biguns = 0;
 526                 goto loop1;
 527         }
 528         biguns = 0;
 529 
 530         do
 531         {
 532                 double          fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
 533                 unsigned        hx;
 534                 int                     n0, n1, n2;
 535 
 536                 /*
 537                  * Find 3 more to work on: Not already done, not too big.
 538                  */
 539 
 540 loop0:
 541                 hx = HI(x);
 542                 xsb0 = hx >> 31;
 543                 hx &= ~0x80000000;
 544                 if ( hx > 0x413921fb ) /* (1.6471e+06) Too big: leave it. */
 545                 {
 546                         if ( hx >= 0x7ff00000 ) /* Inf or NaN */
 547                         {
 548                                 x0 = *x;
 549                                 *y = x0 - x0;
 550                         }
 551                         else
 552                                 biguns = 1;
 553                         x += stridex;
 554                         y += stridey;
 555                         i = 0;
 556                         if ( --n <= 0 )
 557                                 break;
 558                         goto loop0;
 559                 }
 560                 x0 = *x;
 561                 py0 = y;
 562                 x += stridex;
 563                 y += stridey;
 564                 i = 1;
 565                 if ( --n <= 0 )
 566                         break;
 567 
 568 loop1:
 569                 hx = HI(x);
 570                 xsb1 = hx >> 31;
 571                 hx &= ~0x80000000;
 572                 if ( hx > 0x413921fb )
 573                 {
 574                         if ( hx >= 0x7ff00000 )
 575                         {
 576                                 x1 = *x;
 577                                 *y = x1 - x1;
 578                         }
 579                         else
 580                                 biguns = 1;
 581                         x += stridex;
 582                         y += stridey;
 583                         i = 1;
 584                         if ( --n <= 0 )
 585                                 break;
 586                         goto loop1;
 587                 }
 588                 x1 = *x;
 589                 py1 = y;
 590                 x += stridex;
 591                 y += stridey;
 592                 i = 2;
 593                 if ( --n <= 0 )
 594                         break;
 595 
 596 loop2:
 597                 hx = HI(x);
 598                 xsb2 = hx >> 31;
 599                 hx &= ~0x80000000;
 600                 if ( hx > 0x413921fb )
 601                 {
 602                         if ( hx >= 0x7ff00000 )
 603                         {
 604                                 x2 = *x;
 605                                 *y = x2 - x2;
 606                         }
 607                         else
 608                                 biguns = 1;
 609                         x += stridex;
 610                         y += stridey;
 611                         i = 2;
 612                         if ( --n <= 0 )
 613                                 break;
 614                         goto loop2;
 615                 }
 616                 x2 = *x;
 617                 py2 = y;
 618 
 619                 n0 = (int) ( x0 * invpio2 + half[xsb0] );
 620                 n1 = (int) ( x1 * invpio2 + half[xsb1] );
 621                 n2 = (int) ( x2 * invpio2 + half[xsb2] );
 622                 fn0 = (double) n0;
 623                 fn1 = (double) n1;
 624                 fn2 = (double) n2;
 625                 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
 626                 n1 = (n1 + 1) & 3;
 627                 n2 = (n2 + 1) & 3;
 628                 a0 = x0 - fn0 * pio2_1;
 629                 a1 = x1 - fn1 * pio2_1;
 630                 a2 = x2 - fn2 * pio2_1;
 631                 w0 = fn0 * pio2_2;
 632                 w1 = fn1 * pio2_2;
 633                 w2 = fn2 * pio2_2;
 634                 x0 = a0 - w0;
 635                 x1 = a1 - w1;
 636                 x2 = a2 - w2;
 637                 y0 = ( a0 - x0 ) - w0;
 638                 y1 = ( a1 - x1 ) - w1;
 639                 y2 = ( a2 - x2 ) - w2;
 640                 a0 = x0;
 641                 a1 = x1;
 642                 a2 = x2;
 643                 w0 = fn0 * pio2_3 - y0;
 644                 w1 = fn1 * pio2_3 - y1;
 645                 w2 = fn2 * pio2_3 - y2;
 646                 x0 = a0 - w0;
 647                 x1 = a1 - w1;
 648                 x2 = a2 - w2;
 649                 y0 = ( a0 - x0 ) - w0;
 650                 y1 = ( a1 - x1 ) - w1;
 651                 y2 = ( a2 - x2 ) - w2;
 652                 a0 = x0;
 653                 a1 = x1;
 654                 a2 = x2;
 655                 w0 = fn0 * pio2_3t - y0;
 656                 w1 = fn1 * pio2_3t - y1;
 657                 w2 = fn2 * pio2_3t - y2;
 658                 x0 = a0 - w0;
 659                 x1 = a1 - w1;
 660                 x2 = a2 - w2;
 661                 y0 = ( a0 - x0 ) - w0;
 662                 y1 = ( a1 - x1 ) - w1;
 663                 y2 = ( a2 - x2 ) - w2;
 664                 xsb0 = HI(&x0);
 665                 i = ( ( xsb0 & ~0x80000000 ) - thresh[n0&1] ) >> 31;
 666                 xsb1 = HI(&x1);
 667                 i |= ( ( ( xsb1 & ~0x80000000 ) - thresh[n1&1] ) >> 30 ) & 2;
 668                 xsb2 = HI(&x2);
 669                 i |= ( ( ( xsb2 & ~0x80000000 ) - thresh[n2&1] ) >> 29 ) & 4;
 670                 switch ( i )
 671                 {
 672                         double          t0, t1, t2, z0, z1, z2;
 673                         unsigned        j0, j1, j2;
 674 
 675                 case 0:
 676                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 677                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 678                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 679                         HI(&t0) = j0;
 680                         HI(&t1) = j1;
 681                         HI(&t2) = j2;
 682                         LO(&t0) = 0;
 683                         LO(&t1) = 0;
 684                         LO(&t2) = 0;
 685                         x0 = ( x0 - t0 ) + y0;
 686                         x1 = ( x1 - t1 ) + y1;
 687                         x2 = ( x2 - t2 ) + y2;
 688                         z0 = x0 * x0;
 689                         z1 = x1 * x1;
 690                         z2 = x2 * x2;
 691                         t0 = z0 * ( qq1 + z0 * qq2 );
 692                         t1 = z1 * ( qq1 + z1 * qq2 );
 693                         t2 = z2 * ( qq1 + z2 * qq2 );
 694                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 695                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 696                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 697                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 698                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 699                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 700                         xsb0 = ( xsb0 >> 30 ) & 2;
 701                         xsb1 = ( xsb1 >> 30 ) & 2;
 702                         xsb2 = ( xsb2 >> 30 ) & 2;
 703                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
 704                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
 705                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
 706                         xsb0 |= 1;
 707                         xsb1 |= 1;
 708                         xsb2 |= 1;
 709                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
 710                         a1 = __vlibm_TBL_sincos_hi[j1+n1];
 711                         a2 = __vlibm_TBL_sincos_hi[j2+n2];
 712                         t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
 713                         t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
 714                         t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
 715                         *py0 = ( a0 + t0 );
 716                         *py1 = ( a1 + t1 );
 717                         *py2 = ( a2 + t2 );
 718                         break;
 719 
 720                 case 1:
 721                         j0 = n0 & 1;
 722                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 723                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 724                         HI(&t1) = j1;
 725                         HI(&t2) = j2;
 726                         LO(&t1) = 0;
 727                         LO(&t2) = 0;
 728                         x0_or_one[0] = x0;
 729                         x0_or_one[2] = -x0;
 730                         y0_or_zero[0] = y0;
 731                         y0_or_zero[2] = -y0;
 732                         x1 = ( x1 - t1 ) + y1;
 733                         x2 = ( x2 - t2 ) + y2;
 734                         z0 = x0 * x0;
 735                         z1 = x1 * x1;
 736                         z2 = x2 * x2;
 737                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
 738                         t1 = z1 * ( qq1 + z1 * qq2 );
 739                         t2 = z2 * ( qq1 + z2 * qq2 );
 740                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
 741                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 742                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 743                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 744                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 745                         xsb1 = ( xsb1 >> 30 ) & 2;
 746                         xsb2 = ( xsb2 >> 30 ) & 2;
 747                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
 748                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
 749                         xsb1 |= 1;
 750                         xsb2 |= 1;
 751                         a1 = __vlibm_TBL_sincos_hi[j1+n1];
 752                         a2 = __vlibm_TBL_sincos_hi[j2+n2];
 753                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
 754                         t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
 755                         t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
 756                         *py0 = t0;
 757                         *py1 = ( a1 + t1 );
 758                         *py2 = ( a2 + t2 );
 759                         break;
 760 
 761                 case 2:
 762                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 763                         j1 = n1 & 1;
 764                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 765                         HI(&t0) = j0;
 766                         HI(&t2) = j2;
 767                         LO(&t0) = 0;
 768                         LO(&t2) = 0;
 769                         x1_or_one[0] = x1;
 770                         x1_or_one[2] = -x1;
 771                         x0 = ( x0 - t0 ) + y0;
 772                         y1_or_zero[0] = y1;
 773                         y1_or_zero[2] = -y1;
 774                         x2 = ( x2 - t2 ) + y2;
 775                         z0 = x0 * x0;
 776                         z1 = x1 * x1;
 777                         z2 = x2 * x2;
 778                         t0 = z0 * ( qq1 + z0 * qq2 );
 779                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
 780                         t2 = z2 * ( qq1 + z2 * qq2 );
 781                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 782                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
 783                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 784                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 785                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 786                         xsb0 = ( xsb0 >> 30 ) & 2;
 787                         xsb2 = ( xsb2 >> 30 ) & 2;
 788                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
 789                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
 790                         xsb0 |= 1;
 791                         xsb2 |= 1;
 792                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
 793                         a2 = __vlibm_TBL_sincos_hi[j2+n2];
 794                         t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
 795                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
 796                         t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
 797                         *py0 = ( a0 + t0 );
 798                         *py1 = t1;
 799                         *py2 = ( a2 + t2 );
 800                         break;
 801 
 802                 case 3:
 803                         j0 = n0 & 1;
 804                         j1 = n1 & 1;
 805                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 806                         HI(&t2) = j2;
 807                         LO(&t2) = 0;
 808                         x0_or_one[0] = x0;
 809                         x0_or_one[2] = -x0;
 810                         x1_or_one[0] = x1;
 811                         x1_or_one[2] = -x1;
 812                         y0_or_zero[0] = y0;
 813                         y0_or_zero[2] = -y0;
 814                         y1_or_zero[0] = y1;
 815                         y1_or_zero[2] = -y1;
 816                         x2 = ( x2 - t2 ) + y2;
 817                         z0 = x0 * x0;
 818                         z1 = x1 * x1;
 819                         z2 = x2 * x2;
 820                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
 821                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
 822                         t2 = z2 * ( qq1 + z2 * qq2 );
 823                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
 824                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
 825                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 826                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 827                         xsb2 = ( xsb2 >> 30 ) & 2;
 828                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
 829                         xsb2 |= 1;
 830                         a2 = __vlibm_TBL_sincos_hi[j2+n2];
 831                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
 832                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
 833                         t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
 834                         *py0 = t0;
 835                         *py1 = t1;
 836                         *py2 = ( a2 + t2 );
 837                         break;
 838 
 839                 case 4:
 840                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 841                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 842                         j2 = n2 & 1;
 843                         HI(&t0) = j0;
 844                         HI(&t1) = j1;
 845                         LO(&t0) = 0;
 846                         LO(&t1) = 0;
 847                         x2_or_one[0] = x2;
 848                         x2_or_one[2] = -x2;
 849                         x0 = ( x0 - t0 ) + y0;
 850                         x1 = ( x1 - t1 ) + y1;
 851                         y2_or_zero[0] = y2;
 852                         y2_or_zero[2] = -y2;
 853                         z0 = x0 * x0;
 854                         z1 = x1 * x1;
 855                         z2 = x2 * x2;
 856                         t0 = z0 * ( qq1 + z0 * qq2 );
 857                         t1 = z1 * ( qq1 + z1 * qq2 );
 858                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
 859                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 860                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 861                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
 862                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 863                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 864                         xsb0 = ( xsb0 >> 30 ) & 2;
 865                         xsb1 = ( xsb1 >> 30 ) & 2;
 866                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
 867                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
 868                         xsb0 |= 1;
 869                         xsb1 |= 1;
 870                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
 871                         a1 = __vlibm_TBL_sincos_hi[j1+n1];
 872                         t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
 873                         t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
 874                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
 875                         *py0 = ( a0 + t0 );
 876                         *py1 = ( a1 + t1 );
 877                         *py2 = t2;
 878                         break;
 879 
 880                 case 5:
 881                         j0 = n0 & 1;
 882                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 883                         j2 = n2 & 1;
 884                         HI(&t1) = j1;
 885                         LO(&t1) = 0;
 886                         x0_or_one[0] = x0;
 887                         x0_or_one[2] = -x0;
 888                         x2_or_one[0] = x2;
 889                         x2_or_one[2] = -x2;
 890                         y0_or_zero[0] = y0;
 891                         y0_or_zero[2] = -y0;
 892                         x1 = ( x1 - t1 ) + y1;
 893                         y2_or_zero[0] = y2;
 894                         y2_or_zero[2] = -y2;
 895                         z0 = x0 * x0;
 896                         z1 = x1 * x1;
 897                         z2 = x2 * x2;
 898                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
 899                         t1 = z1 * ( qq1 + z1 * qq2 );
 900                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
 901                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
 902                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 903                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
 904                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 905                         xsb1 = ( xsb1 >> 30 ) & 2;
 906                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
 907                         xsb1 |= 1;
 908                         a1 = __vlibm_TBL_sincos_hi[j1+n1];
 909                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
 910                         t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
 911                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
 912                         *py0 = t0;
 913                         *py1 = ( a1 + t1 );
 914                         *py2 = t2;
 915                         break;
 916 
 917                 case 6:
 918                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 919                         j1 = n1 & 1;
 920                         j2 = n2 & 1;
 921                         HI(&t0) = j0;
 922                         LO(&t0) = 0;
 923                         x1_or_one[0] = x1;
 924                         x1_or_one[2] = -x1;
 925                         x2_or_one[0] = x2;
 926                         x2_or_one[2] = -x2;
 927                         x0 = ( x0 - t0 ) + y0;
 928                         y1_or_zero[0] = y1;
 929                         y1_or_zero[2] = -y1;
 930                         y2_or_zero[0] = y2;
 931                         y2_or_zero[2] = -y2;
 932                         z0 = x0 * x0;
 933                         z1 = x1 * x1;
 934                         z2 = x2 * x2;
 935                         t0 = z0 * ( qq1 + z0 * qq2 );
 936                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
 937                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
 938                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 939                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
 940                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
 941                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 942                         xsb0 = ( xsb0 >> 30 ) & 2;
 943                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
 944                         xsb0 |= 1;
 945                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
 946                         t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
 947                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
 948                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
 949                         *py0 = ( a0 + t0 );
 950                         *py1 = t1;
 951                         *py2 = t2;
 952                         break;
 953 
 954                 case 7:
 955                         j0 = n0 & 1;
 956                         j1 = n1 & 1;
 957                         j2 = n2 & 1;
 958                         x0_or_one[0] = x0;
 959                         x0_or_one[2] = -x0;
 960                         x1_or_one[0] = x1;
 961                         x1_or_one[2] = -x1;
 962                         x2_or_one[0] = x2;
 963                         x2_or_one[2] = -x2;
 964                         y0_or_zero[0] = y0;
 965                         y0_or_zero[2] = -y0;
 966                         y1_or_zero[0] = y1;
 967                         y1_or_zero[2] = -y1;
 968                         y2_or_zero[0] = y2;
 969                         y2_or_zero[2] = -y2;
 970                         z0 = x0 * x0;
 971                         z1 = x1 * x1;
 972                         z2 = x2 * x2;
 973                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
 974                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
 975                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
 976                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
 977                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
 978                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
 979                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
 980                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
 981                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
 982                         *py0 = t0;
 983                         *py1 = t1;
 984                         *py2 = t2;
 985                         break;
 986                 }
 987 
 988                 x += stridex;
 989                 y += stridey;
 990                 i = 0;
 991         } while ( --n > 0 );
 992 
 993         if ( i > 0 )
 994         {
 995                 double          fn0, fn1, a0, a1, w0, w1, y0, y1;
 996                 double          t0, t1, z0, z1;
 997                 unsigned        j0, j1;
 998                 int                     n0, n1;
 999 
1000                 if ( i > 1 )
1001                 {
1002                         n1 = (int) ( x1 * invpio2 + half[xsb1] );
1003                         fn1 = (double) n1;
1004                         n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1005                         a1 = x1 - fn1 * pio2_1;
1006                         w1 = fn1 * pio2_2;
1007                         x1 = a1 - w1;
1008                         y1 = ( a1 - x1 ) - w1;
1009                         a1 = x1;
1010                         w1 = fn1 * pio2_3 - y1;
1011                         x1 = a1 - w1;
1012                         y1 = ( a1 - x1 ) - w1;
1013                         a1 = x1;
1014                         w1 = fn1 * pio2_3t - y1;
1015                         x1 = a1 - w1;
1016                         y1 = ( a1 - x1 ) - w1;
1017                         xsb1 = HI(&x1);
1018                         if ( ( xsb1 & ~0x80000000 ) < thresh[n1&1] )
1019                         {
1020                                 j1 = n1 & 1;
1021                                 x1_or_one[0] = x1;
1022                                 x1_or_one[2] = -x1;
1023                                 y1_or_zero[0] = y1;
1024                                 y1_or_zero[2] = -y1;
1025                                 z1 = x1 * x1;
1026                                 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
1027                                 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
1028                                 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
1029                                 *py1 = t1;
1030                         }
1031                         else
1032                         {
1033                                 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
1034                                 HI(&t1) = j1;
1035                                 LO(&t1) = 0;
1036                                 x1 = ( x1 - t1 ) + y1;
1037                                 z1 = x1 * x1;
1038                                 t1 = z1 * ( qq1 + z1 * qq2 );
1039                                 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
1040                                 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1041                                 xsb1 = ( xsb1 >> 30 ) & 2;
1042                                 n1 ^= ( xsb1 & ~( n1 << 1 ) );
1043                                 xsb1 |= 1;
1044                                 a1 = __vlibm_TBL_sincos_hi[j1+n1];
1045                                 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
1046                                 *py1 = ( a1 + t1 );
1047                         }
1048                 }
1049                 n0 = (int) ( x0 * invpio2 + half[xsb0] );
1050                 fn0 = (double) n0;
1051                 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
1052                 a0 = x0 - fn0 * pio2_1;
1053                 w0 = fn0 * pio2_2;
1054                 x0 = a0 - w0;
1055                 y0 = ( a0 - x0 ) - w0;
1056                 a0 = x0;
1057                 w0 = fn0 * pio2_3 - y0;
1058                 x0 = a0 - w0;
1059                 y0 = ( a0 - x0 ) - w0;
1060                 a0 = x0;
1061                 w0 = fn0 * pio2_3t - y0;
1062                 x0 = a0 - w0;
1063                 y0 = ( a0 - x0 ) - w0;
1064                 xsb0 = HI(&x0);
1065                 if ( ( xsb0 & ~0x80000000 ) < thresh[n0&1] )
1066                 {
1067                         j0 = n0 & 1;
1068                         x0_or_one[0] = x0;
1069                         x0_or_one[2] = -x0;
1070                         y0_or_zero[0] = y0;
1071                         y0_or_zero[2] = -y0;
1072                         z0 = x0 * x0;
1073                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
1074                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
1075                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
1076                         *py0 = t0;
1077                 }
1078                 else
1079                 {
1080                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
1081                         HI(&t0) = j0;
1082                         LO(&t0) = 0;
1083                         x0 = ( x0 - t0 ) + y0;
1084                         z0 = x0 * x0;
1085                         t0 = z0 * ( qq1 + z0 * qq2 );
1086                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
1087                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
1088                         xsb0 = ( xsb0 >> 30 ) & 2;
1089                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
1090                         xsb0 |= 1;
1091                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
1092                         t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
1093                         *py0 = ( a0 + t0 );
1094                 }
1095         }
1096 
1097         if ( biguns )
1098                 __vlibm_vcos_big( nsave, xsave, sxsave, ysave, sysave, 0x413921fb );
1099 }