1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #include <sys/isa_defs.h>
  31 #include "libm_inlines.h"
  32 
  33 #ifdef _LITTLE_ENDIAN
  34 #define HI(x)   *(1+(int*)x)
  35 #define LO(x)   *(unsigned*)x
  36 #else
  37 #define HI(x)   *(int*)x
  38 #define LO(x)   *(1+(unsigned*)x)
  39 #endif
  40 
  41 #ifdef __RESTRICT
  42 #define restrict _Restrict
  43 #else
  44 #define restrict
  45 #endif
  46 
  47 extern const double __vlibm_TBL_atan2[];
  48 
  49 static const double
  50 zero    =  0.0,
  51 twom3   =  0.125,
  52 one             =  1.0,
  53 two110  =  1.2980742146337069071e+33,
  54 pio4    =  7.8539816339744827900e-01,
  55 pio2    =  1.5707963267948965580e+00,
  56 pio2_lo =  6.1232339957367658860e-17,
  57 pi              =  3.1415926535897931160e+00,
  58 pi_lo   =  1.2246467991473531772e-16,
  59 p1              = -3.33333333333327571893331786354179101074860633009e-0001,
  60 p2              =  1.99999999942671624230086497610394721817438631379e-0001,
  61 p3              = -1.42856965565428636896183013324727205980484158356e-0001,
  62 p4              =  1.10894981496317081405107718475040168084164825641e-0001;
  63 
  64 /* Don't __ the following; acomp will handle it */
  65 extern double fabs( double );
  66 
  67 void
  68 __vatan2( int n, double * restrict y, int stridey, double * restrict x,
  69         int stridex, double * restrict z, int stridez )
  70 {
  71         double          x0, x1, x2, y0, y1, y2, *pz0, *pz1, *pz2;
  72         double          ah0, ah1, ah2, al0, al1, al2, t0, t1, t2;
  73         double          z0, z1, z2, sign0, sign1, sign2, xh;
  74         int                     i, k, hx, hy, sx, sy;
  75 
  76         do
  77         {
  78 loop0:
  79                 hy = HI(y);
  80                 sy = hy & 0x80000000;
  81                 hy &= ~0x80000000;
  82                 sign0 = ( sy )? -one : one;
  83 
  84                 hx = HI(x);
  85                 sx = hx & 0x80000000;
  86                 hx &= ~0x80000000;
  87 
  88                 if ( hy > hx || ( hy == hx && LO(y) > LO(x) ) )
  89                 {
  90                         i = hx;
  91                         hx = hy;
  92                         hy = i;
  93                         x0 = fabs( *y );
  94                         y0 = fabs( *x );
  95                         if ( sx )
  96                         {
  97                                 ah0 = pio2;
  98                                 al0 = pio2_lo;
  99                         }
 100                         else
 101                         {
 102                                 ah0 = -pio2;
 103                                 al0 = -pio2_lo;
 104                                 sign0 = -sign0;
 105                         }
 106                 }
 107                 else
 108                 {
 109                         x0 = fabs( *x );
 110                         y0 = fabs( *y );
 111                         if ( sx )
 112                         {
 113                                 ah0 = -pi;
 114                                 al0 = -pi_lo;
 115                                 sign0 = -sign0;
 116                         }
 117                         else
 118                                 ah0 = al0 = zero;
 119                 }
 120 
 121                 if ( hx >= 0x7fe00000 || hx - hy >= 0x03600000 )
 122                 {
 123                         if ( hx >= 0x7ff00000 )
 124                         {
 125                                 if ( ( hx ^ 0x7ff00000 ) | LO(&x0) ) /* nan */
 126                                         ah0 =  x0 + y0;
 127                                 else if ( hy >= 0x7ff00000 )
 128                                         ah0 += pio4;
 129                                 *z = sign0 * ah0;
 130                                 x += stridex;
 131                                 y += stridey;
 132                                 z += stridez;
 133                                 i = 0;
 134                                 if ( --n <= 0 )
 135                                         break;
 136                                 goto loop0;
 137                         }
 138                         if ( hx - hy >= 0x03600000 )
 139                         {
 140                                 if ( (int) ah0 == 0 )
 141                                         ah0 = y0 / x0;
 142                                 *z = sign0 * ah0;
 143                                 x += stridex;
 144                                 y += stridey;
 145                                 z += stridez;
 146                                 i = 0;
 147                                 if ( --n <= 0 )
 148                                         break;
 149                                 goto loop0;
 150                         }
 151                         y0 *= twom3;
 152                         x0 *= twom3;
 153                         hy -= 0x00300000;
 154                         hx -= 0x00300000;
 155                 }
 156                 else if ( hy < 0x00100000 )
 157                 {
 158                         if ( ( hy | LO(&y0) ) == 0 )
 159                         {
 160                                 *z = sign0 * ah0;
 161                                 x += stridex;
 162                                 y += stridey;
 163                                 z += stridez;
 164                                 i = 0;
 165                                 if ( --n <= 0 )
 166                                         break;
 167                                 goto loop0;
 168                         }
 169                         y0 *= two110;
 170                         x0 *= two110;
 171                         hy = HI(&y0);
 172                         hx = HI(&x0);
 173                 }
 174 
 175                 k = ( ( ( hx - hy ) + 0x00004000 ) >> 13 ) & ~0x3;
 176                 if ( k > 644 )
 177                         k = 644;
 178                 ah0 += __vlibm_TBL_atan2[k];
 179                 al0 += __vlibm_TBL_atan2[k+1];
 180                 t0 = __vlibm_TBL_atan2[k+2];
 181 
 182                 xh = x0;
 183                 LO(&xh) = 0;
 184                 z0 = ( ( y0 - t0 * xh ) - t0 * ( x0 - xh ) ) / ( x0 + y0 * t0 );
 185                 pz0 = z;
 186                 x += stridex;
 187                 y += stridey;
 188                 z += stridez;
 189                 i = 1;
 190                 if ( --n <= 0 )
 191                         break;
 192 
 193 loop1:
 194                 hy = HI(y);
 195                 sy = hy & 0x80000000;
 196                 hy &= ~0x80000000;
 197                 sign1 = ( sy )? -one : one;
 198 
 199                 hx = HI(x);
 200                 sx = hx & 0x80000000;
 201                 hx &= ~0x80000000;
 202 
 203                 if ( hy > hx || ( hy == hx && LO(y) > LO(x) ) )
 204                 {
 205                         i = hx;
 206                         hx = hy;
 207                         hy = i;
 208                         x1 = fabs( *y );
 209                         y1 = fabs( *x );
 210                         if ( sx )
 211                         {
 212                                 ah1 = pio2;
 213                                 al1 = pio2_lo;
 214                         }
 215                         else
 216                         {
 217                                 ah1 = -pio2;
 218                                 al1 = -pio2_lo;
 219                                 sign1 = -sign1;
 220                         }
 221                 }
 222                 else
 223                 {
 224                         x1 = fabs( *x );
 225                         y1 = fabs( *y );
 226                         if ( sx )
 227                         {
 228                                 ah1 = -pi;
 229                                 al1 = -pi_lo;
 230                                 sign1 = -sign1;
 231                         }
 232                         else
 233                                 ah1 = al1 = zero;
 234                 }
 235 
 236                 if ( hx >= 0x7fe00000 || hx - hy >= 0x03600000 )
 237                 {
 238                         if ( hx >= 0x7ff00000 )
 239                         {
 240                                 if ( ( hx ^ 0x7ff00000 ) | LO(&x1) ) /* nan */
 241                                         ah1 =  x1 + y1;
 242                                 else if ( hy >= 0x7ff00000 )
 243                                         ah1 += pio4;
 244                                 *z = sign1 * ah1;
 245                                 x += stridex;
 246                                 y += stridey;
 247                                 z += stridez;
 248                                 i = 1;
 249                                 if ( --n <= 0 )
 250                                         break;
 251                                 goto loop1;
 252                         }
 253                         if ( hx - hy >= 0x03600000 )
 254                         {
 255                                 if ( (int) ah1 == 0 )
 256                                         ah1 = y1 / x1;
 257                                 *z = sign1 * ah1;
 258                                 x += stridex;
 259                                 y += stridey;
 260                                 z += stridez;
 261                                 i = 1;
 262                                 if ( --n <= 0 )
 263                                         break;
 264                                 goto loop1;
 265                         }
 266                         y1 *= twom3;
 267                         x1 *= twom3;
 268                         hy -= 0x00300000;
 269                         hx -= 0x00300000;
 270                 }
 271                 else if ( hy < 0x00100000 )
 272                 {
 273                         if ( ( hy | LO(&y1) ) == 0 )
 274                         {
 275                                 *z = sign1 * ah1;
 276                                 x += stridex;
 277                                 y += stridey;
 278                                 z += stridez;
 279                                 i = 1;
 280                                 if ( --n <= 0 )
 281                                         break;
 282                                 goto loop1;
 283                         }
 284                         y1 *= two110;
 285                         x1 *= two110;
 286                         hy = HI(&y1);
 287                         hx = HI(&x1);
 288                 }
 289 
 290                 k = ( ( ( hx - hy ) + 0x00004000 ) >> 13 ) & ~0x3;
 291                 if ( k > 644 )
 292                         k = 644;
 293                 ah1 += __vlibm_TBL_atan2[k];
 294                 al1 += __vlibm_TBL_atan2[k+1];
 295                 t1 = __vlibm_TBL_atan2[k+2];
 296 
 297                 xh = x1;
 298                 LO(&xh) = 0;
 299                 z1 = ( ( y1 - t1 * xh ) - t1 * ( x1 - xh ) ) / ( x1 + y1 * t1 );
 300                 pz1 = z;
 301                 x += stridex;
 302                 y += stridey;
 303                 z += stridez;
 304                 i = 2;
 305                 if ( --n <= 0 )
 306                         break;
 307 
 308 loop2:
 309                 hy = HI(y);
 310                 sy = hy & 0x80000000;
 311                 hy &= ~0x80000000;
 312                 sign2 = ( sy )? -one : one;
 313 
 314                 hx = HI(x);
 315                 sx = hx & 0x80000000;
 316                 hx &= ~0x80000000;
 317 
 318                 if ( hy > hx || ( hy == hx && LO(y) > LO(x) ) )
 319                 {
 320                         i = hx;
 321                         hx = hy;
 322                         hy = i;
 323                         x2 = fabs( *y );
 324                         y2 = fabs( *x );
 325                         if ( sx )
 326                         {
 327                                 ah2 = pio2;
 328                                 al2 = pio2_lo;
 329                         }
 330                         else
 331                         {
 332                                 ah2 = -pio2;
 333                                 al2 = -pio2_lo;
 334                                 sign2 = -sign2;
 335                         }
 336                 }
 337                 else
 338                 {
 339                         x2 = fabs( *x );
 340                         y2 = fabs( *y );
 341                         if ( sx )
 342                         {
 343                                 ah2 = -pi;
 344                                 al2 = -pi_lo;
 345                                 sign2 = -sign2;
 346                         }
 347                         else
 348                                 ah2 = al2 = zero;
 349                 }
 350 
 351                 if ( hx >= 0x7fe00000 || hx - hy >= 0x03600000 )
 352                 {
 353                         if ( hx >= 0x7ff00000 )
 354                         {
 355                                 if ( ( hx ^ 0x7ff00000 ) | LO(&x2) ) /* nan */
 356                                         ah2 =  x2 + y2;
 357                                 else if ( hy >= 0x7ff00000 )
 358                                         ah2 += pio4;
 359                                 *z = sign2 * ah2;
 360                                 x += stridex;
 361                                 y += stridey;
 362                                 z += stridez;
 363                                 i = 2;
 364                                 if ( --n <= 0 )
 365                                         break;
 366                                 goto loop2;
 367                         }
 368                         if ( hx - hy >= 0x03600000 )
 369                         {
 370                                 if ( (int) ah2 == 0 )
 371                                         ah2 = y2 / x2;
 372                                 *z = sign2 * ah2;
 373                                 x += stridex;
 374                                 y += stridey;
 375                                 z += stridez;
 376                                 i = 2;
 377                                 if ( --n <= 0 )
 378                                         break;
 379                                 goto loop2;
 380                         }
 381                         y2 *= twom3;
 382                         x2 *= twom3;
 383                         hy -= 0x00300000;
 384                         hx -= 0x00300000;
 385                 }
 386                 else if ( hy < 0x00100000 )
 387                 {
 388                         if ( ( hy | LO(&y2) ) == 0 )
 389                         {
 390                                 *z = sign2 * ah2;
 391                                 x += stridex;
 392                                 y += stridey;
 393                                 z += stridez;
 394                                 i = 2;
 395                                 if ( --n <= 0 )
 396                                         break;
 397                                 goto loop2;
 398                         }
 399                         y2 *= two110;
 400                         x2 *= two110;
 401                         hy = HI(&y2);
 402                         hx = HI(&x2);
 403                 }
 404 
 405                 k = ( ( ( hx - hy ) + 0x00004000 ) >> 13 ) & ~0x3;
 406                 if ( k > 644 )
 407                         k = 644;
 408                 ah2 += __vlibm_TBL_atan2[k];
 409                 al2 += __vlibm_TBL_atan2[k+1];
 410                 t2 = __vlibm_TBL_atan2[k+2];
 411 
 412                 xh = x2;
 413                 LO(&xh) = 0;
 414                 z2 = ( ( y2 - t2 * xh ) - t2 * ( x2 - xh ) ) / ( x2 + y2 * t2 );
 415                 pz2 = z;
 416 
 417                 x0 = z0 * z0;
 418                 x1 = z1 * z1;
 419                 x2 = z2 * z2;
 420 
 421                 t0 = ah0 + ( z0 + ( al0 + ( z0 * x0 ) * ( p1 + x0 *
 422                         ( p2 + x0 * ( p3 + x0 * p4 ) ) ) ) );
 423                 t1 = ah1 + ( z1 + ( al1 + ( z1 * x1 ) * ( p1 + x1 *
 424                         ( p2 + x1 * ( p3 + x1 * p4 ) ) ) ) );
 425                 t2 = ah2 + ( z2 + ( al2 + ( z2 * x2 ) * ( p1 + x2 *
 426                         ( p2 + x2 * ( p3 + x2 * p4 ) ) ) ) );
 427 
 428                 *pz0 = sign0 * t0;
 429                 *pz1 = sign1 * t1;
 430                 *pz2 = sign2 * t2;
 431 
 432                 x += stridex;
 433                 y += stridey;
 434                 z += stridez;
 435                 i = 0;
 436         } while ( --n > 0 );
 437 
 438         if ( i > 0 )
 439         {
 440                 if ( i > 1 )
 441                 {
 442                         x1 = z1 * z1;
 443                         t1 = ah1 + ( z1 + ( al1 + ( z1 * x1 ) * ( p1 + x1 *
 444                                 ( p2 + x1 * ( p3 + x1 * p4 ) ) ) ) );
 445                         *pz1 = sign1 * t1;
 446                 }
 447 
 448                 x0 = z0 * z0;
 449                 t0 = ah0 + ( z0 + ( al0 + ( z0 * x0 ) * ( p1 + x0 *
 450                         ( p2 + x0 * ( p3 + x0 * p4 ) ) ) ) );
 451                 *pz0 = sign0 * t0;
 452         }
 453 }