1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #include <sys/isa_defs.h>
  31 
  32 #ifdef _LITTLE_ENDIAN
  33 #define HI(x)   *(1+(int*)x)
  34 #define LO(x)   *(unsigned*)x
  35 #else
  36 #define HI(x)   *(int*)x
  37 #define LO(x)   *(1+(unsigned*)x)
  38 #endif
  39 
  40 #ifdef __RESTRICT
  41 #define restrict _Restrict
  42 #else
  43 #define restrict
  44 #endif
  45 
  46 extern const double __vlibm_TBL_sincos_hi[], __vlibm_TBL_sincos_lo[];
  47 
  48 static const double
  49         half[2] = { 0.5, -0.5 },
  50         one             = 1.0,
  51         invpio2 = 0.636619772367581343075535,
  52         pio2_1  = 1.570796326734125614166,
  53         pio2_2  = 6.077100506303965976596e-11,
  54         pio2_3  = 2.022266248711166455796e-21,
  55         pio2_3t = 8.478427660368899643959e-32,
  56         pp1             = -1.666666666605760465276263943134982554676e-0001,
  57         pp2             =  8.333261209690963126718376566146180944442e-0003,
  58         qq1             = -4.999999999977710986407023955908711557870e-0001,
  59         qq2             =  4.166654863857219350645055881018842089580e-0002,
  60         poly1[2]= { -1.666666666666629669805215138920301589656e-0001,
  61                                 -4.999999999999931701464060878888294524481e-0001 },
  62         poly2[2]= {  8.333333332390951295683993455280336376663e-0003,
  63                                  4.166666666394861917535640593963708222319e-0002 },
  64         poly3[2]= { -1.984126237997976692791551778230098403960e-0004,
  65                                 -1.388888552656142867832756687736851681462e-0003 },
  66         poly4[2]= {  2.753403624854277237649987622848330351110e-0006,
  67                                  2.478519423681460796618128289454530524759e-0005 };
  68 
  69 static const unsigned thresh[2] = { 0x3fc90000, 0x3fc40000 };
  70 
  71 extern void __vlibm_vcos_big( int, double *, int, double *, int, int );
  72 
  73 void
  74 __vlibm_vcos_big_ultra3( int n, double * restrict x, int stridex, double * restrict y,
  75         int stridey, int pthresh )
  76 {
  77         double          x0_or_one[4], x1_or_one[4], x2_or_one[4];
  78         double          y0_or_zero[4], y1_or_zero[4], y2_or_zero[4];
  79         double          x0, x1, x2, *py0, *py1, *py2, *xsave, *ysave;
  80         unsigned        xsb0, xsb1, xsb2;
  81         int                     i, biguns, nsave, sxsave, sysave;
  82 
  83         nsave = n;
  84         xsave = x;
  85         sxsave = stridex;
  86         ysave = y;
  87         sysave = stridey;
  88         biguns = 0;
  89 
  90         x0_or_one[1] = 1.0;
  91         x1_or_one[1] = 1.0;
  92         x2_or_one[1] = 1.0;
  93         x0_or_one[3] = -1.0;
  94         x1_or_one[3] = -1.0;
  95         x2_or_one[3] = -1.0;
  96         y0_or_zero[1] = 0.0;
  97         y1_or_zero[1] = 0.0;
  98         y2_or_zero[1] = 0.0;
  99         y0_or_zero[3] = 0.0;
 100         y1_or_zero[3] = 0.0;
 101         y2_or_zero[3] = 0.0;
 102 
 103         do
 104         {
 105                 double          fn0, fn1, fn2, a0, a1, a2, w0, w1, w2, y0, y1, y2;
 106                 unsigned        hx;
 107                 int                     n0, n1, n2;
 108 
 109 loop0:
 110                 hx = HI(x);
 111                 xsb0 = hx >> 31;
 112                 hx &= ~0x80000000;
 113                 if ( hx <= pthresh || hx > 0x413921fb )
 114                 {
 115                         if ( hx > 0x413921fb && hx < 0x7ff00000)
 116                                 biguns = 1;
 117                         x += stridex;
 118                         y += stridey;
 119                         i = 0;
 120                         if ( --n <= 0 )
 121                                 break;
 122                         goto loop0;
 123                 }
 124                 x0 = *x;
 125                 py0 = y;
 126                 x += stridex;
 127                 y += stridey;
 128                 i = 1;
 129                 if ( --n <= 0 )
 130                         break;
 131 
 132 loop1:
 133                 hx = HI(x);
 134                 xsb1 = hx >> 31;
 135                 hx &= ~0x80000000;
 136                 if ( hx <= pthresh || hx > 0x413921fb )
 137                 {
 138                         if ( hx > 0x413921fb && hx < 0x7ff00000)
 139                                 biguns = 1;
 140                         x += stridex;
 141                         y += stridey;
 142                         i = 1;
 143                         if ( --n <= 0 )
 144                                 break;
 145                         goto loop1;
 146                 }
 147                 x1 = *x;
 148                 py1 = y;
 149                 x += stridex;
 150                 y += stridey;
 151                 i = 2;
 152                 if ( --n <= 0 )
 153                         break;
 154 
 155 loop2:
 156                 hx = HI(x);
 157                 xsb2 = hx >> 31;
 158                 hx &= ~0x80000000;
 159                 if ( hx <= pthresh || hx > 0x413921fb )
 160                 {
 161                         if ( hx > 0x413921fb && hx < 0x7ff00000)
 162                                 biguns = 1;
 163                         x += stridex;
 164                         y += stridey;
 165                         i = 2;
 166                         if ( --n <= 0 )
 167                                 break;
 168                         goto loop2;
 169                 }
 170                 x2 = *x;
 171                 py2 = y;
 172 
 173                 n0 = (int) ( x0 * invpio2 + half[xsb0] );
 174                 n1 = (int) ( x1 * invpio2 + half[xsb1] );
 175                 n2 = (int) ( x2 * invpio2 + half[xsb2] );
 176                 fn0 = (double) n0;
 177                 fn1 = (double) n1;
 178                 fn2 = (double) n2;
 179                 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
 180                 n1 = (n1 + 1) & 3;
 181                 n2 = (n2 + 1) & 3;
 182                 a0 = x0 - fn0 * pio2_1;
 183                 a1 = x1 - fn1 * pio2_1;
 184                 a2 = x2 - fn2 * pio2_1;
 185                 w0 = fn0 * pio2_2;
 186                 w1 = fn1 * pio2_2;
 187                 w2 = fn2 * pio2_2;
 188                 x0 = a0 - w0;
 189                 x1 = a1 - w1;
 190                 x2 = a2 - w2;
 191                 y0 = ( a0 - x0 ) - w0;
 192                 y1 = ( a1 - x1 ) - w1;
 193                 y2 = ( a2 - x2 ) - w2;
 194                 a0 = x0;
 195                 a1 = x1;
 196                 a2 = x2;
 197                 w0 = fn0 * pio2_3 - y0;
 198                 w1 = fn1 * pio2_3 - y1;
 199                 w2 = fn2 * pio2_3 - y2;
 200                 x0 = a0 - w0;
 201                 x1 = a1 - w1;
 202                 x2 = a2 - w2;
 203                 y0 = ( a0 - x0 ) - w0;
 204                 y1 = ( a1 - x1 ) - w1;
 205                 y2 = ( a2 - x2 ) - w2;
 206                 a0 = x0;
 207                 a1 = x1;
 208                 a2 = x2;
 209                 w0 = fn0 * pio2_3t - y0;
 210                 w1 = fn1 * pio2_3t - y1;
 211                 w2 = fn2 * pio2_3t - y2;
 212                 x0 = a0 - w0;
 213                 x1 = a1 - w1;
 214                 x2 = a2 - w2;
 215                 y0 = ( a0 - x0 ) - w0;
 216                 y1 = ( a1 - x1 ) - w1;
 217                 y2 = ( a2 - x2 ) - w2;
 218                 xsb0 = HI(&x0);
 219                 i = ( ( xsb0 & ~0x80000000 ) - thresh[n0&1] ) >> 31;
 220                 xsb1 = HI(&x1);
 221                 i |= ( ( ( xsb1 & ~0x80000000 ) - thresh[n1&1] ) >> 30 ) & 2;
 222                 xsb2 = HI(&x2);
 223                 i |= ( ( ( xsb2 & ~0x80000000 ) - thresh[n2&1] ) >> 29 ) & 4;
 224                 switch ( i )
 225                 {
 226                         double          t0, t1, t2, z0, z1, z2;
 227                         unsigned        j0, j1, j2;
 228 
 229                 case 0:
 230                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 231                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 232                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 233                         HI(&t0) = j0;
 234                         HI(&t1) = j1;
 235                         HI(&t2) = j2;
 236                         LO(&t0) = 0;
 237                         LO(&t1) = 0;
 238                         LO(&t2) = 0;
 239                         x0 = ( x0 - t0 ) + y0;
 240                         x1 = ( x1 - t1 ) + y1;
 241                         x2 = ( x2 - t2 ) + y2;
 242                         z0 = x0 * x0;
 243                         z1 = x1 * x1;
 244                         z2 = x2 * x2;
 245                         t0 = z0 * ( qq1 + z0 * qq2 );
 246                         t1 = z1 * ( qq1 + z1 * qq2 );
 247                         t2 = z2 * ( qq1 + z2 * qq2 );
 248                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 249                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 250                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 251                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 252                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 253                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 254                         xsb0 = ( xsb0 >> 30 ) & 2;
 255                         xsb1 = ( xsb1 >> 30 ) & 2;
 256                         xsb2 = ( xsb2 >> 30 ) & 2;
 257                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
 258                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
 259                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
 260                         xsb0 |= 1;
 261                         xsb1 |= 1;
 262                         xsb2 |= 1;
 263                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
 264                         a1 = __vlibm_TBL_sincos_hi[j1+n1];
 265                         a2 = __vlibm_TBL_sincos_hi[j2+n2];
 266                         t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
 267                         t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
 268                         t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
 269                         *py0 = ( a0 + t0 );
 270                         *py1 = ( a1 + t1 );
 271                         *py2 = ( a2 + t2 );
 272                         break;
 273 
 274                 case 1:
 275                         j0 = n0 & 1;
 276                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 277                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 278                         HI(&t1) = j1;
 279                         HI(&t2) = j2;
 280                         LO(&t1) = 0;
 281                         LO(&t2) = 0;
 282                         x0_or_one[0] = x0;
 283                         x0_or_one[2] = -x0;
 284                         y0_or_zero[0] = y0;
 285                         y0_or_zero[2] = -y0;
 286                         x1 = ( x1 - t1 ) + y1;
 287                         x2 = ( x2 - t2 ) + y2;
 288                         z0 = x0 * x0;
 289                         z1 = x1 * x1;
 290                         z2 = x2 * x2;
 291                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
 292                         t1 = z1 * ( qq1 + z1 * qq2 );
 293                         t2 = z2 * ( qq1 + z2 * qq2 );
 294                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
 295                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 296                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 297                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 298                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 299                         xsb1 = ( xsb1 >> 30 ) & 2;
 300                         xsb2 = ( xsb2 >> 30 ) & 2;
 301                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
 302                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
 303                         xsb1 |= 1;
 304                         xsb2 |= 1;
 305                         a1 = __vlibm_TBL_sincos_hi[j1+n1];
 306                         a2 = __vlibm_TBL_sincos_hi[j2+n2];
 307                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
 308                         t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
 309                         t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
 310                         *py0 = t0;
 311                         *py1 = ( a1 + t1 );
 312                         *py2 = ( a2 + t2 );
 313                         break;
 314 
 315                 case 2:
 316                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 317                         j1 = n1 & 1;
 318                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 319                         HI(&t0) = j0;
 320                         HI(&t2) = j2;
 321                         LO(&t0) = 0;
 322                         LO(&t2) = 0;
 323                         x1_or_one[0] = x1;
 324                         x1_or_one[2] = -x1;
 325                         x0 = ( x0 - t0 ) + y0;
 326                         y1_or_zero[0] = y1;
 327                         y1_or_zero[2] = -y1;
 328                         x2 = ( x2 - t2 ) + y2;
 329                         z0 = x0 * x0;
 330                         z1 = x1 * x1;
 331                         z2 = x2 * x2;
 332                         t0 = z0 * ( qq1 + z0 * qq2 );
 333                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
 334                         t2 = z2 * ( qq1 + z2 * qq2 );
 335                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 336                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
 337                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 338                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 339                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 340                         xsb0 = ( xsb0 >> 30 ) & 2;
 341                         xsb2 = ( xsb2 >> 30 ) & 2;
 342                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
 343                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
 344                         xsb0 |= 1;
 345                         xsb2 |= 1;
 346                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
 347                         a2 = __vlibm_TBL_sincos_hi[j2+n2];
 348                         t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
 349                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
 350                         t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
 351                         *py0 = ( a0 + t0 );
 352                         *py1 = t1;
 353                         *py2 = ( a2 + t2 );
 354                         break;
 355 
 356                 case 3:
 357                         j0 = n0 & 1;
 358                         j1 = n1 & 1;
 359                         j2 = ( xsb2 + 0x4000 ) & 0xffff8000;
 360                         HI(&t2) = j2;
 361                         LO(&t2) = 0;
 362                         x0_or_one[0] = x0;
 363                         x0_or_one[2] = -x0;
 364                         x1_or_one[0] = x1;
 365                         x1_or_one[2] = -x1;
 366                         y0_or_zero[0] = y0;
 367                         y0_or_zero[2] = -y0;
 368                         y1_or_zero[0] = y1;
 369                         y1_or_zero[2] = -y1;
 370                         x2 = ( x2 - t2 ) + y2;
 371                         z0 = x0 * x0;
 372                         z1 = x1 * x1;
 373                         z2 = x2 * x2;
 374                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
 375                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
 376                         t2 = z2 * ( qq1 + z2 * qq2 );
 377                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
 378                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
 379                         w2 = x2 * ( one + z2 * ( pp1 + z2 * pp2 ) );
 380                         j2 = ( ( ( j2 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 381                         xsb2 = ( xsb2 >> 30 ) & 2;
 382                         n2 ^= ( xsb2 & ~( n2 << 1 ) );
 383                         xsb2 |= 1;
 384                         a2 = __vlibm_TBL_sincos_hi[j2+n2];
 385                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
 386                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
 387                         t2 = ( __vlibm_TBL_sincos_hi[j2+((n2+xsb2)&3)] * w2 + a2 * t2 ) + __vlibm_TBL_sincos_lo[j2+n2];
 388                         *py0 = t0;
 389                         *py1 = t1;
 390                         *py2 = ( a2 + t2 );
 391                         break;
 392 
 393                 case 4:
 394                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 395                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 396                         j2 = n2 & 1;
 397                         HI(&t0) = j0;
 398                         HI(&t1) = j1;
 399                         LO(&t0) = 0;
 400                         LO(&t1) = 0;
 401                         x2_or_one[0] = x2;
 402                         x2_or_one[2] = -x2;
 403                         x0 = ( x0 - t0 ) + y0;
 404                         x1 = ( x1 - t1 ) + y1;
 405                         y2_or_zero[0] = y2;
 406                         y2_or_zero[2] = -y2;
 407                         z0 = x0 * x0;
 408                         z1 = x1 * x1;
 409                         z2 = x2 * x2;
 410                         t0 = z0 * ( qq1 + z0 * qq2 );
 411                         t1 = z1 * ( qq1 + z1 * qq2 );
 412                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
 413                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 414                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 415                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
 416                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 417                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 418                         xsb0 = ( xsb0 >> 30 ) & 2;
 419                         xsb1 = ( xsb1 >> 30 ) & 2;
 420                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
 421                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
 422                         xsb0 |= 1;
 423                         xsb1 |= 1;
 424                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
 425                         a1 = __vlibm_TBL_sincos_hi[j1+n1];
 426                         t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
 427                         t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
 428                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
 429                         *py0 = ( a0 + t0 );
 430                         *py1 = ( a1 + t1 );
 431                         *py2 = t2;
 432                         break;
 433 
 434                 case 5:
 435                         j0 = n0 & 1;
 436                         j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 437                         j2 = n2 & 1;
 438                         HI(&t1) = j1;
 439                         LO(&t1) = 0;
 440                         x0_or_one[0] = x0;
 441                         x0_or_one[2] = -x0;
 442                         x2_or_one[0] = x2;
 443                         x2_or_one[2] = -x2;
 444                         y0_or_zero[0] = y0;
 445                         y0_or_zero[2] = -y0;
 446                         x1 = ( x1 - t1 ) + y1;
 447                         y2_or_zero[0] = y2;
 448                         y2_or_zero[2] = -y2;
 449                         z0 = x0 * x0;
 450                         z1 = x1 * x1;
 451                         z2 = x2 * x2;
 452                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
 453                         t1 = z1 * ( qq1 + z1 * qq2 );
 454                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
 455                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
 456                         w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 457                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
 458                         j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 459                         xsb1 = ( xsb1 >> 30 ) & 2;
 460                         n1 ^= ( xsb1 & ~( n1 << 1 ) );
 461                         xsb1 |= 1;
 462                         a1 = __vlibm_TBL_sincos_hi[j1+n1];
 463                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
 464                         t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
 465                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
 466                         *py0 = t0;
 467                         *py1 = ( a1 + t1 );
 468                         *py2 = t2;
 469                         break;
 470 
 471                 case 6:
 472                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 473                         j1 = n1 & 1;
 474                         j2 = n2 & 1;
 475                         HI(&t0) = j0;
 476                         LO(&t0) = 0;
 477                         x1_or_one[0] = x1;
 478                         x1_or_one[2] = -x1;
 479                         x2_or_one[0] = x2;
 480                         x2_or_one[2] = -x2;
 481                         x0 = ( x0 - t0 ) + y0;
 482                         y1_or_zero[0] = y1;
 483                         y1_or_zero[2] = -y1;
 484                         y2_or_zero[0] = y2;
 485                         y2_or_zero[2] = -y2;
 486                         z0 = x0 * x0;
 487                         z1 = x1 * x1;
 488                         z2 = x2 * x2;
 489                         t0 = z0 * ( qq1 + z0 * qq2 );
 490                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
 491                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
 492                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 493                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
 494                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
 495                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 496                         xsb0 = ( xsb0 >> 30 ) & 2;
 497                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
 498                         xsb0 |= 1;
 499                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
 500                         t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
 501                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
 502                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
 503                         *py0 = ( a0 + t0 );
 504                         *py1 = t1;
 505                         *py2 = t2;
 506                         break;
 507 
 508                 case 7:
 509                         j0 = n0 & 1;
 510                         j1 = n1 & 1;
 511                         j2 = n2 & 1;
 512                         x0_or_one[0] = x0;
 513                         x0_or_one[2] = -x0;
 514                         x1_or_one[0] = x1;
 515                         x1_or_one[2] = -x1;
 516                         x2_or_one[0] = x2;
 517                         x2_or_one[2] = -x2;
 518                         y0_or_zero[0] = y0;
 519                         y0_or_zero[2] = -y0;
 520                         y1_or_zero[0] = y1;
 521                         y1_or_zero[2] = -y1;
 522                         y2_or_zero[0] = y2;
 523                         y2_or_zero[2] = -y2;
 524                         z0 = x0 * x0;
 525                         z1 = x1 * x1;
 526                         z2 = x2 * x2;
 527                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
 528                         t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
 529                         t2 = z2 * ( poly3[j2] + z2 * poly4[j2] );
 530                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
 531                         t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
 532                         t2 = z2 * ( poly1[j2] + z2 * ( poly2[j2] + t2 ) );
 533                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
 534                         t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
 535                         t2 = x2_or_one[n2] + ( y2_or_zero[n2] + x2_or_one[n2] * t2 );
 536                         *py0 = t0;
 537                         *py1 = t1;
 538                         *py2 = t2;
 539                         break;
 540                 }
 541 
 542                 x += stridex;
 543                 y += stridey;
 544                 i = 0;
 545         } while ( --n > 0 );
 546 
 547         if ( i > 0 )
 548         {
 549                 double          fn0, fn1, a0, a1, w0, w1, y0, y1;
 550                 double          t0, t1, z0, z1;
 551                 unsigned        j0, j1;
 552                 int                     n0, n1;
 553 
 554                 if ( i > 1 )
 555                 {
 556                         n1 = (int) ( x1 * invpio2 + half[xsb1] );
 557                         fn1 = (double) n1;
 558                         n1 = (n1 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
 559                         a1 = x1 - fn1 * pio2_1;
 560                         w1 = fn1 * pio2_2;
 561                         x1 = a1 - w1;
 562                         y1 = ( a1 - x1 ) - w1;
 563                         a1 = x1;
 564                         w1 = fn1 * pio2_3 - y1;
 565                         x1 = a1 - w1;
 566                         y1 = ( a1 - x1 ) - w1;
 567                         a1 = x1;
 568                         w1 = fn1 * pio2_3t - y1;
 569                         x1 = a1 - w1;
 570                         y1 = ( a1 - x1 ) - w1;
 571                         xsb1 = HI(&x1);
 572                         if ( ( xsb1 & ~0x80000000 ) < thresh[n1&1] )
 573                         {
 574                                 j1 = n1 & 1;
 575                                 x1_or_one[0] = x1;
 576                                 x1_or_one[2] = -x1;
 577                                 y1_or_zero[0] = y1;
 578                                 y1_or_zero[2] = -y1;
 579                                 z1 = x1 * x1;
 580                                 t1 = z1 * ( poly3[j1] + z1 * poly4[j1] );
 581                                 t1 = z1 * ( poly1[j1] + z1 * ( poly2[j1] + t1 ) );
 582                                 t1 = x1_or_one[n1] + ( y1_or_zero[n1] + x1_or_one[n1] * t1 );
 583                                 *py1 = t1;
 584                         }
 585                         else
 586                         {
 587                                 j1 = ( xsb1 + 0x4000 ) & 0xffff8000;
 588                                 HI(&t1) = j1;
 589                                 LO(&t1) = 0;
 590                                 x1 = ( x1 - t1 ) + y1;
 591                                 z1 = x1 * x1;
 592                                 t1 = z1 * ( qq1 + z1 * qq2 );
 593                                 w1 = x1 * ( one + z1 * ( pp1 + z1 * pp2 ) );
 594                                 j1 = ( ( ( j1 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 595                                 xsb1 = ( xsb1 >> 30 ) & 2;
 596                                 n1 ^= ( xsb1 & ~( n1 << 1 ) );
 597                                 xsb1 |= 1;
 598                                 a1 = __vlibm_TBL_sincos_hi[j1+n1];
 599                                 t1 = ( __vlibm_TBL_sincos_hi[j1+((n1+xsb1)&3)] * w1 + a1 * t1 ) + __vlibm_TBL_sincos_lo[j1+n1];
 600                                 *py1 = ( a1 + t1 );
 601                         }
 602                 }
 603                 n0 = (int) ( x0 * invpio2 + half[xsb0] );
 604                 fn0 = (double) n0;
 605                 n0 = (n0 + 1) & 3; /* Add 1 (before the mod) to make sin into cos */
 606                 a0 = x0 - fn0 * pio2_1;
 607                 w0 = fn0 * pio2_2;
 608                 x0 = a0 - w0;
 609                 y0 = ( a0 - x0 ) - w0;
 610                 a0 = x0;
 611                 w0 = fn0 * pio2_3 - y0;
 612                 x0 = a0 - w0;
 613                 y0 = ( a0 - x0 ) - w0;
 614                 a0 = x0;
 615                 w0 = fn0 * pio2_3t - y0;
 616                 x0 = a0 - w0;
 617                 y0 = ( a0 - x0 ) - w0;
 618                 xsb0 = HI(&x0);
 619                 if ( ( xsb0 & ~0x80000000 ) < thresh[n0&1] )
 620                 {
 621                         j0 = n0 & 1;
 622                         x0_or_one[0] = x0;
 623                         x0_or_one[2] = -x0;
 624                         y0_or_zero[0] = y0;
 625                         y0_or_zero[2] = -y0;
 626                         z0 = x0 * x0;
 627                         t0 = z0 * ( poly3[j0] + z0 * poly4[j0] );
 628                         t0 = z0 * ( poly1[j0] + z0 * ( poly2[j0] + t0 ) );
 629                         t0 = x0_or_one[n0] + ( y0_or_zero[n0] + x0_or_one[n0] * t0 );
 630                         *py0 = t0;
 631                 }
 632                 else
 633                 {
 634                         j0 = ( xsb0 + 0x4000 ) & 0xffff8000;
 635                         HI(&t0) = j0;
 636                         LO(&t0) = 0;
 637                         x0 = ( x0 - t0 ) + y0;
 638                         z0 = x0 * x0;
 639                         t0 = z0 * ( qq1 + z0 * qq2 );
 640                         w0 = x0 * ( one + z0 * ( pp1 + z0 * pp2 ) );
 641                         j0 = ( ( ( j0 & ~0x80000000 ) - 0x3fc40000 ) >> 13 ) & ~0x3;
 642                         xsb0 = ( xsb0 >> 30 ) & 2;
 643                         n0 ^= ( xsb0 & ~( n0 << 1 ) );
 644                         xsb0 |= 1;
 645                         a0 = __vlibm_TBL_sincos_hi[j0+n0];
 646                         t0 = ( __vlibm_TBL_sincos_hi[j0+((n0+xsb0)&3)] * w0 + a0 * t0 ) + __vlibm_TBL_sincos_lo[j0+n0];
 647                         *py0 = ( a0 + t0 );
 648                 }
 649         }
 650 
 651         if ( biguns )
 652                 __vlibm_vcos_big( nsave, xsave, sxsave, ysave, sysave, 0x413921fb );
 653 }