1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #ifdef __RESTRICT
  31 #define restrict _Restrict
  32 #else
  33 #define restrict
  34 #endif
  35 
  36 extern const double __vlibm_TBL_atan1[];
  37 
  38 static const double
  39 pio4    =  7.8539816339744827900e-01,
  40 pio2    =  1.5707963267948965580e+00,
  41 pi      =  3.1415926535897931160e+00;
  42 
  43 static const float
  44 zero    =  0.0f,
  45 one     =  1.0f,
  46 q1      = -3.3333333333296428046e-01f,
  47 q2      =  1.9999999186853752618e-01f,
  48 twop24  =  16777216.0f;
  49 
  50 void
  51 __vatan2f( int n, float * restrict y, int stridey, float * restrict x,
  52         int stridex, float * restrict z, int stridez )
  53 {
  54         float           x0, x1, x2, y0, y1, y2, *pz0 = 0, *pz1, *pz2;
  55         double          ah0, ah1, ah2;
  56         double          t0, t1, t2;
  57         double          sx0, sx1, sx2;
  58         double          sign0, sign1, sign2;
  59         int             i, k0 = 0, k1, k2, hx, sx, sy;
  60         int             hy0, hy1, hy2;
  61         float           base0 = 0.0, base1, base2;
  62         double          num0, num1, num2;
  63         double          den0, den1, den2;
  64         double          dx0, dx1, dx2;
  65         double          dy0, dy1, dy2;
  66         double          db0, db1, db2;
  67 
  68         do
  69         {
  70 loop0:
  71                 hy0 = *(int*)y;
  72                 hx = *(int*)x;
  73                 sign0 = one;
  74                 sy = hy0 & 0x80000000;
  75                 hy0 &= ~0x80000000;
  76 
  77                 sx = hx & 0x80000000;
  78                 hx &= ~0x80000000;
  79 
  80                 if ( hy0 > hx )
  81                 {
  82                         x0 = *y;
  83                         y0 = *x;
  84                         i = hx;
  85                         hx = hy0;
  86                         hy0 = i;
  87                         if ( sy ) 
  88                         {
  89                                 x0 = -x0;
  90                                 sign0 = -sign0;
  91                         }
  92                         if ( sx )
  93                         {
  94                                 y0 = -y0;
  95                                 ah0 = pio2;
  96                         }
  97                         else
  98                         {
  99                                 ah0 = -pio2;
 100                                 sign0 = -sign0;
 101                         }
 102                 }
 103                 else
 104                 {
 105                         y0 = *y;
 106                         x0 = *x;
 107                         if ( sy ) 
 108                         {
 109                                 y0 = -y0;
 110                                 sign0 = -sign0;
 111                         }
 112                         if ( sx )
 113                         {
 114                                 x0 = -x0;
 115                                 ah0 = -pi;
 116                                 sign0 = -sign0;
 117                         }
 118                         else
 119                                 ah0 = zero;
 120                 }
 121 
 122                 if ( hx >= 0x7f800000 || hx - hy0 >= 0x0c800000 )
 123                 {
 124                         if ( hx >= 0x7f800000 )
 125                         {
 126                                 if ( hx ^ 0x7f800000 ) /* nan */
 127                                         ah0 =  x0 + y0;
 128                                 else if ( hy0 >= 0x7f800000 )
 129                                         ah0 += pio4;
 130                         }
 131                         else if ( (int) ah0 == 0 )
 132                                 ah0 = y0 / x0;
 133                         *z = (sign0 == one) ? ah0 : -ah0; 
 134 /* sign0*ah0 would change nan behavior relative to previous release */
 135                         x += stridex;
 136                         y += stridey;
 137                         z += stridez;
 138                         i = 0;
 139                         if ( --n <= 0 )
 140                                 break;
 141                         goto loop0;
 142                 }
 143                 if (hy0 < 0x00800000) {
 144                         if ( hy0 == 0 )
 145                         {
 146                                 *z = sign0 * (float) ah0;
 147                                 x += stridex;
 148                                 y += stridey;
 149                                 z += stridez;
 150                                 i = 0;
 151                                 if ( --n <= 0 )
 152                                         break;
 153                                 goto loop0;
 154                         }
 155                         y0 *= twop24; /* scale subnormal y */
 156                         x0 *= twop24; /* scale possibly subnormal x */
 157                         hy0 = *(int*)&y0;
 158                         hx = *(int*)&x0;
 159                 }
 160                 pz0 = z;
 161 
 162                 k0 = ( hy0 - hx + 0x3f800000 ) & 0xfff80000;
 163                 if( k0 >= 0x3C800000 )          /* if |x| >= (1/64)... */
 164                 { 
 165                         *(int*)&base0 = k0;
 166                         k0 = (k0 - 0x3C800000) >> 18; /* (index >> 19) << 1) */
 167                         k0 += 4;
 168                                 /* skip over 0,0,pi/2,pi/2 */
 169                 }  
 170                 else                            /* |x| < 1/64 */
 171                 { 
 172                         k0 = 0;
 173                         base0 = zero;
 174                 }
 175 
 176                 x += stridex;
 177                 y += stridey;
 178                 z += stridez;
 179                 i = 1;
 180                 if ( --n <= 0 )
 181                         break;
 182 
 183 
 184 loop1:
 185                 hy1 = *(int*)y;
 186                 hx = *(int*)x;
 187                 sign1 = one;
 188                 sy = hy1 & 0x80000000;
 189                 hy1 &= ~0x80000000;
 190 
 191                 sx = hx & 0x80000000;
 192                 hx &= ~0x80000000;
 193 
 194                 if ( hy1 > hx )
 195                 {
 196                         x1 = *y;
 197                         y1 = *x;
 198                         i = hx;
 199                         hx = hy1;
 200                         hy1 = i;
 201                         if ( sy ) 
 202                         {
 203                                 x1 = -x1;
 204                                 sign1 = -sign1;
 205                         }
 206                         if ( sx )
 207                         {
 208                                 y1 = -y1;
 209                                 ah1 = pio2;
 210                         }
 211                         else
 212                         {
 213                                 ah1 = -pio2;
 214                                 sign1 = -sign1;
 215                         }
 216                 }
 217                 else
 218                 {
 219                         y1 = *y;
 220                         x1 = *x;
 221                         if ( sy ) 
 222                         {
 223                                 y1 = -y1;
 224                                 sign1 = -sign1;
 225                         }
 226                         if ( sx )
 227                         {
 228                                 x1 = -x1;
 229                                 ah1 = -pi;
 230                                 sign1 = -sign1;
 231                         }
 232                         else
 233                                 ah1 = zero;
 234                 }
 235 
 236                 if ( hx >= 0x7f800000 || hx - hy1 >= 0x0c800000 )
 237                 {
 238                         if ( hx >= 0x7f800000 )
 239                         {
 240                                 if ( hx ^ 0x7f800000 ) /* nan */
 241                                         ah1 =  x1 + y1;
 242                                 else if ( hy1 >= 0x7f800000 )
 243                                         ah1 += pio4;
 244                         }
 245                         else if ( (int) ah1 == 0 )
 246                                 ah1 = y1 / x1;
 247                         *z = (sign1 == one)? ah1 : -ah1;
 248                         x += stridex;
 249                         y += stridey;
 250                         z += stridez;
 251                         i = 1;
 252                         if ( --n <= 0 )
 253                                 break;
 254                         goto loop1;
 255                 }
 256                 if (hy1 < 0x00800000) {
 257                         if ( hy1 == 0 )
 258                         {
 259                                 *z = sign1 * (float) ah1;
 260                                 x += stridex;
 261                                 y += stridey;
 262                                 z += stridez;
 263                                 i = 1;
 264                                 if ( --n <= 0 )
 265                                         break;
 266                                 goto loop1;
 267                         }
 268                         y1 *= twop24; /* scale subnormal y */
 269                         x1 *= twop24; /* scale possibly subnormal x */
 270                         hy1 = *(int*)&y1;
 271                         hx = *(int*)&x1;
 272                 }
 273                 pz1 = z;
 274 
 275                 k1 = ( hy1 - hx + 0x3f800000 ) & 0xfff80000;
 276                 if( k1 >= 0x3C800000 )          /* if |x| >= (1/64)... */
 277                 { 
 278                         *(int*)&base1 = k1;
 279                         k1 = (k1 - 0x3C800000) >> 18; /* (index >> 19) << 1) */
 280                         k1 += 4;
 281                                 /* skip over 0,0,pi/2,pi/2 */
 282                 }  
 283                 else                            /* |x| < 1/64 */
 284                 { 
 285                         k1 = 0;
 286                         base1 = zero;
 287                 }
 288 
 289                 x += stridex;
 290                 y += stridey;
 291                 z += stridez;
 292                 i = 2;
 293                 if ( --n <= 0 )
 294                         break;
 295 
 296 loop2:
 297                 hy2 = *(int*)y;
 298                 hx = *(int*)x;
 299                 sign2 = one;
 300                 sy = hy2 & 0x80000000;
 301                 hy2 &= ~0x80000000;
 302 
 303                 sx = hx & 0x80000000;
 304                 hx &= ~0x80000000;
 305 
 306                 if ( hy2 > hx )
 307                 {
 308                         x2 = *y;
 309                         y2 = *x;
 310                         i = hx;
 311                         hx = hy2;
 312                         hy2 = i;
 313                         if ( sy ) 
 314                         {
 315                                 x2 = -x2;
 316                                 sign2 = -sign2;
 317                         }
 318                         if ( sx )
 319                         {
 320                                 y2 = -y2;
 321                                 ah2 = pio2;
 322                         }
 323                         else
 324                         {
 325                                 ah2 = -pio2;
 326                                 sign2 = -sign2;
 327                         }
 328                 }
 329                 else
 330                 {
 331                         y2 = *y;
 332                         x2 = *x;
 333                         if ( sy ) 
 334                         {
 335                                 y2 = -y2;
 336                                 sign2 = -sign2;
 337                         }
 338                         if ( sx )
 339                         {
 340                                 x2 = -x2;
 341                                 ah2 = -pi;
 342                                 sign2 = -sign2;
 343                         }
 344                         else
 345                                 ah2 = zero;
 346                 }
 347 
 348                 if ( hx >= 0x7f800000 || hx - hy2 >= 0x0c800000 )
 349                 {
 350                         if ( hx >= 0x7f800000 )
 351                         {
 352                                 if ( hx ^ 0x7f800000 ) /* nan */
 353                                         ah2 =  x2 + y2;
 354                                 else if ( hy2 >= 0x7f800000 )
 355                                         ah2 += pio4;
 356                         }
 357                         else if ( (int) ah2 == 0 )
 358                                 ah2 = y2 / x2;
 359                         *z = (sign2 == one)? ah2 : -ah2;
 360                         x += stridex;
 361                         y += stridey;
 362                         z += stridez;
 363                         i = 2;
 364                         if ( --n <= 0 )
 365                                 break;
 366                         goto loop2;
 367                 }
 368                 if (hy2 < 0x00800000) {
 369                         if ( hy2 == 0 )
 370                         {
 371                                 *z = sign2 * (float) ah2;
 372                                 x += stridex;
 373                                 y += stridey;
 374                                 z += stridez;
 375                                 i = 2;
 376                                 if ( --n <= 0 )
 377                                         break;
 378                                 goto loop2;
 379                         }
 380                         y2 *= twop24; /* scale subnormal y */
 381                         x2 *= twop24; /* scale possibly subnormal x */
 382                         hy2 = *(int*)&y2;
 383                         hx = *(int*)&x2;
 384                 }
 385 
 386                 pz2 = z;
 387 
 388                 k2 = ( hy2 - hx + 0x3f800000 ) & 0xfff80000;
 389                 if( k2 >= 0x3C800000 )          /* if |x| >= (1/64)... */
 390                 { 
 391                         *(int*)&base2 = k2;
 392                         k2 = (k2 - 0x3C800000) >> 18; /* (index >> 19) << 1) */
 393                         k2 += 4;
 394                                 /* skip over 0,0,pi/2,pi/2 */
 395                 }  
 396                 else                            /* |x| < 1/64 */
 397                 { 
 398                         k2 = 0;
 399                         base2 = zero;
 400                 }
 401 
 402                 goto endloop;
 403 
 404 endloop:
 405 
 406                 ah2 += __vlibm_TBL_atan1[k2];   
 407                 ah1 += __vlibm_TBL_atan1[k1];   
 408                 ah0 += __vlibm_TBL_atan1[k0];   
 409 
 410                 db2 = base2;
 411                 db1 = base1;
 412                 db0 = base0;
 413                 dy2 = y2;
 414                 dy1 = y1;
 415                 dy0 = y0;
 416                 dx2 = x2;
 417                 dx1 = x1;
 418                 dx0 = x0;
 419 
 420                 num2 = dy2 - dx2 * db2;
 421                 den2 = dx2 + dy2 * db2;
 422 
 423                 num1 = dy1 - dx1 * db1;
 424                 den1 = dx1 + dy1 * db1;
 425 
 426                 num0 = dy0 - dx0 * db0;
 427                 den0 = dx0 + dy0 * db0;
 428 
 429                 t2 = num2 / den2;
 430                 t1 = num1 / den1;
 431                 t0 = num0 / den0;
 432 
 433                 sx2 = t2 * t2;
 434                 sx1 = t1 * t1;
 435                 sx0 = t0 * t0;
 436  
 437                 t2 += t2 * sx2 * ( q1 + sx2 * q2 );
 438                 t1 += t1 * sx1 * ( q1 + sx1 * q2 );
 439                 t0 += t0 * sx0 * ( q1 + sx0 * q2 );
 440 
 441                 t2 += ah2;
 442                 t1 += ah1;
 443                 t0 += ah0;
 444 
 445                 *pz2 = sign2 * t2;
 446                 *pz1 = sign1 * t1;
 447                 *pz0 = sign0 * t0;
 448 
 449                 x += stridex;
 450                 y += stridey;
 451                 z += stridez;
 452                 i = 0;
 453         } while ( --n > 0 );
 454 
 455         if ( i > 1 )
 456         {
 457                 ah1 += __vlibm_TBL_atan1[k1];   
 458                 t1 = ( y1 - x1 * (double)base1 ) / 
 459                         ( x1 + y1 * (double)base1 );
 460                 sx1 = t1 * t1;
 461                 t1 += t1 * sx1 * ( q1 + sx1 * q2 );
 462                 t1 += ah1;
 463                 *pz1 = sign1 * t1;
 464         }
 465 
 466         if ( i > 0 )
 467         {
 468                 ah0 += __vlibm_TBL_atan1[k0];   
 469                 t0 = ( y0 - x0 * (double)base0 ) / 
 470                         ( x0 + y0 * (double)base0 );
 471                 sx0 = t0 * t0;
 472                 t0 += t0 * sx0 * ( q1 + sx0 * q2 );
 473                 t0 += ah0;
 474                 *pz0 = sign0 * t0;
 475         }
 476 }