webrev_693-libm-all-changes-since-richlowe Wdiff usr/src/lib/libmvec/common/__vatanf.c

Print this page

Split	Close
Expand all
Collapse all

          --- old/usr/src/lib/libmvec/common/__vatanf.c
          +++ new/usr/src/lib/libmvec/common/__vatanf.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24   24   */
  25   25  /*
  26   26   * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27   27   * Use is subject to license terms.
  28   28   */
  29   29  
  30   30  #ifdef __RESTRICT

↓ open down ↓

30 lines elided

↑ open up ↑

  31   31  #define restrict _Restrict
  32   32  #else
  33   33  #define restrict
  34   34  #endif
  35   35  
  36   36  void
  37   37  __vatanf( int n, float * restrict x, int stridex, float * restrict y, int stridey )
  38   38  {
  39   39    extern const double __vlibm_TBL_atan1[];
  40   40    double  conup0, conup1, conup2;
  41      -  float dummy, ansf;
       41 +  float dummy, ansf = 0.0;
  42   42    float f0, f1, f2;
  43   43    float ans0, ans1, ans2;
  44   44    float poly0, poly1, poly2;
  45   45    float sign0, sign1, sign2;
  46   46    int intf, intz, argcount;
  47   47    int index0, index1, index2; 
  48   48    float z,*yaddr0,*yaddr1,*yaddr2;
  49   49    int *pz = (int *) &z;
  50   50  #ifdef UNROLL4
  51   51    double conup3;

  52   52    int index3;
  53   53    float f3, ans3, poly3, sign3, *yaddr3;
  54   54  #endif
  55   55  
  56   56  /*    Power series  atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
  57   57   *    Error =  -3.08254E-18   On the interval  |x| < 1/64 */
  58   58  
  59   59    static const float p1 = -0.33329644f /* -3.333333333329292858E-01f */ ;
  60   60    static const float pone = 1.0f;
  61   61  
  62   62    if( n <= 0 ) return;          /* if no. of elements is 0 or neg, do nothing */
  63   63    do
  64   64    {
  65   65    LOOP0:
  66   66  
  67   67          intf     = *(int *) x;          /* upper half of x, as integer */
  68   68          f0 = *x;
  69   69          sign0 = pone;
  70   70          if (intf < 0) {
  71   71                  intf = intf & ~0x80000000; /* abs(upper argument) */
  72   72                  f0 = -f0;
  73   73                  sign0 = -sign0;
  74   74          }
  75   75    
  76   76      if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */
  77   77      {
  78   78        if( intf > 0x7f800000 ) 
  79   79        {  
  80   80          ansf  = f0- f0;                                 /* return NaN if x=NaN*/
  81   81        }
  82   82        else if( intf < 0x31800000 )              /* avoid underflow for small arg */
  83   83        {
  84   84          dummy = 1.0e37 + f0;
  85   85          dummy = dummy;
  86   86          ansf  = f0;
  87   87        }
  88   88        else if( intf > 0x5B000000 )              /* avoid underflow for big arg  */
  89   89        {
  90   90          index0= 2;
  91   91          ansf  = __vlibm_TBL_atan1[index0];/* pi/2 up */
  92   92        }
  93   93        *y      = sign0*ansf;             /* store answer, with sign bit  */
  94   94        x      += stridex;
  95   95        y      += stridey;
  96   96        argcount = 0;                             /* initialize argcount          */
  97   97        if ( --n <=0 ) break;                     /* we are done                  */
  98   98        goto LOOP0;                               /* otherwise, examine next arg  */
  99   99      }
 100  100    
 101  101      if (intf > 0x42800000)                      /* if(|x| > 64                  */
 102  102      { 
 103  103      f0 = -pone/f0;
 104  104          index0 = 2;                             /* point to pi/2 upper, lower   */
 105  105      }
 106  106      else if( intf >= 0x3C800000 )               /* if |x| >= (1/64)...          */
 107  107      {
 108  108        intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper        */
 109  109        pz[0]  = intz;                            /* store as a float (z)         */
 110  110      f0 = (f0 - z)/(pone + f0*z);
 111  111          index0 = (intz - 0x3C800000) >> 18;     /* (index >> 19) << 1)          */
 112  112          index0 = index0+ 4;                     /* skip over 0,0,pi/2,pi/2      */
 113  113      } 
 114  114      else                                        /* |x| < 1/64 */
 115  115      {
 116  116          index0   = 0;                           /* points to 0,0 in table       */
 117  117      }
 118  118      yaddr0   = y;                               /* address to store this answer */ 
 119  119      x       += stridex;                         /* point to next arg            */
 120  120      y       += stridey;                         /* point to next result         */
 121  121      argcount = 1;                               /* we now have 1 good argument  */
 122  122      if ( --n <=0 ) 
 123  123      {
 124  124        goto UNROLL;                              /* finish up with 1 good arg    */
 125  125      }
 126  126  
 127  127      /*--------------------------------------------------------------------------*/
 128  128      /*--------------------------------------------------------------------------*/
 129  129      /*--------------------------------------------------------------------------*/
 130  130  
 131  131    LOOP1:
 132  132  
 133  133          intf     = *(int *) x;          /* upper half of x, as integer */
 134  134          f1 = *x;
 135  135          sign1 = pone;
 136  136          if (intf < 0) {
 137  137                  intf = intf & ~0x80000000; /* abs(upper argument) */
 138  138                  f1 = -f1;
 139  139                  sign1 = -sign1;
 140  140          }
 141  141    
 142  142      if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */
 143  143      {
 144  144        if( intf > 0x7f800000 ) 
 145  145        {  
 146  146          ansf   = f1 - f1;                       /* return NaN if x=NaN*/
 147  147        }
 148  148        else if( intf < 0x31800000 )              /* avoid underflow for small arg */
 149  149        {
 150  150          dummy = 1.0e37 + f1;
 151  151          dummy = dummy;
 152  152          ansf   = f1;
 153  153        }
 154  154        else if( intf > 0x5B000000 )              /* avoid underflow for big arg  */
 155  155        {
 156  156          index1 = 2;
 157  157          ansf   = __vlibm_TBL_atan1[index1] ;/* pi/2 up */
 158  158        }
 159  159        *y      = sign1 * ansf;           /* store answer, with sign bit  */
 160  160        x      += stridex;
 161  161        y      += stridey;
 162  162        argcount = 1;                             /* we still have 1 good arg     */
 163  163        if ( --n <=0 ) 
 164  164        {
 165  165          goto UNROLL;                            /* finish up with 1 good arg    */
 166  166        }
 167  167        goto LOOP1;                               /* otherwise, examine next arg  */
 168  168      }
 169  169    
 170  170      if (intf > 0x42800000)                      /* if(|x| > 64                  */
 171  171      { 
 172  172      f1 = -pone/f1;
 173  173        index1 = 2;                               /* point to pi/2 upper, lower   */
 174  174      }
 175  175      else if( intf >= 0x3C800000 )               /* if |x| >= (1/64)...          */
 176  176      {
 177  177        intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper        */
 178  178        pz[0]  = intz;                            /* store as a float (z)         */
 179  179      f1 = (f1 - z)/(pone + f1*z); 
 180  180        index1 = (intz - 0x3C800000) >> 18;       /* (index >> 19) << 1)          */
 181  181        index1 = index1 + 4;                      /* skip over 0,0,pi/2,pi/2      */
 182  182      }
 183  183      else
 184  184      {
 185  185          index1   = 0;                           /* points to 0,0 in table       */
 186  186      }
 187  187  
 188  188      yaddr1   = y;                               /* address to store this answer */ 
 189  189      x       += stridex;                         /* point to next arg            */
 190  190      y       += stridey;                         /* point to next result         */
 191  191      argcount = 2;                               /* we now have 2 good arguments */
 192  192      if ( --n <=0 ) 
 193  193      {
 194  194        goto UNROLL;                              /* finish up with 2 good args   */
 195  195      }
 196  196  
 197  197      /*--------------------------------------------------------------------------*/
 198  198      /*--------------------------------------------------------------------------*/
 199  199      /*--------------------------------------------------------------------------*/
 200  200  
 201  201    LOOP2:
 202  202  
 203  203          intf     = *(int *) x;          /* upper half of x, as integer */
 204  204          f2 = *x;
 205  205          sign2 = pone;
 206  206          if (intf < 0) {
 207  207                  intf = intf & ~0x80000000; /* abs(upper argument) */
 208  208                  f2 = -f2;
 209  209                  sign2 = -sign2;
 210  210          }
 211  211    
 212  212      if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */
 213  213      {
 214  214        if( intf > 0x7f800000 ) 
 215  215        {  
 216  216          ansf   = f2 - f2;                       /* return NaN if x=NaN*/
 217  217        }
 218  218        else if( intf < 0x31800000 )              /* avoid underflow for small arg */
 219  219        {
 220  220          dummy = 1.0e37 + f2;
 221  221          dummy = dummy;
 222  222          ansf   = f2;
 223  223        }
 224  224        else if( intf > 0x5B000000 )              /* avoid underflow for big arg  */
 225  225        {
 226  226          index2 = 2;
 227  227          ansf   = __vlibm_TBL_atan1[index2] ;/* pi/2 up */
 228  228        }
 229  229        *y      = sign2 * ansf;           /* store answer, with sign bit  */
 230  230        x      += stridex;
 231  231        y      += stridey;
 232  232        argcount = 2;                             /* we still have 2 good args    */
 233  233        if ( --n <=0 ) 
 234  234        {
 235  235          goto UNROLL;                            /* finish up with 2 good args   */
 236  236        }
 237  237        goto LOOP2;                               /* otherwise, examine next arg  */
 238  238      }
 239  239    
 240  240      if (intf > 0x42800000)                      /* if(|x| > 64                  */
 241  241      { 
 242  242      f2 = -pone/f2;
 243  243        index2 = 2;                               /* point to pi/2 upper, lower   */
 244  244      }
 245  245      else if( intf >= 0x3C800000 )               /* if |x| >= (1/64)...          */
 246  246      {
 247  247        intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper        */
 248  248        pz[0]  = intz;                            /* store as a float (z)         */
 249  249      f2 = (f2 - z)/(pone + f2*z);
 250  250        index2 = (intz - 0x3C800000) >> 18;       /* (index >> 19) << 1)          */
 251  251        index2 = index2 + 4;                      /* skip over 0,0,pi/2,pi/2      */
 252  252      }
 253  253      else
 254  254      {
 255  255          index2   = 0;                           /* points to 0,0 in table       */
 256  256      }
 257  257      yaddr2   = y;                               /* address to store this answer */ 
 258  258      x       += stridex;                         /* point to next arg            */
 259  259      y       += stridey;                         /* point to next result         */
 260  260      argcount = 3;                               /* we now have 3 good arguments */
 261  261      if ( --n <=0 ) 
 262  262      {
 263  263        goto UNROLL;                              /* finish up with 2 good args   */
 264  264      }
 265  265  
 266  266  
 267  267      /*--------------------------------------------------------------------------*/
 268  268      /*--------------------------------------------------------------------------*/
 269  269      /*--------------------------------------------------------------------------*/
 270  270  
 271  271  #ifdef UNROLL4
 272  272    LOOP3:
 273  273  
 274  274          intf     = *(int *) x;          /* upper half of x, as integer */
 275  275          f3 = *x;
 276  276          sign3 = pone;
 277  277          if (intf < 0) {
 278  278                  intf = intf & ~0x80000000; /* abs(upper argument) */
 279  279                  f3 = -f3;
 280  280                  sign3 = -sign3;
 281  281          }
 282  282    
 283  283      if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */
 284  284      {
 285  285        if( intf > 0x7f800000 ) 
 286  286        {  
 287  287          ansf   = f3 - f3;                       /* return NaN if x=NaN*/
 288  288        }
 289  289        else if( intf < 0x31800000 )              /* avoid underflow for small arg */
 290  290        {
 291  291          dummy = 1.0e37 + f3;
 292  292          dummy = dummy;
 293  293          ansf   = f3;
 294  294        }
 295  295        else if( intf > 0x5B000000 )              /* avoid underflow for big arg  */
 296  296        {
 297  297          index3 = 2;
 298  298          ansf   = __vlibm_TBL_atan1[index3] ;/* pi/2 up */
 299  299        }
 300  300        *y      = sign3 * ansf;           /* store answer, with sign bit  */
 301  301        x      += stridex;
 302  302        y      += stridey;
 303  303        argcount = 3;                             /* we still have 3 good args    */
 304  304        if ( --n <=0 ) 
 305  305        {
 306  306          goto UNROLL;                            /* finish up with 3 good args   */
 307  307        }
 308  308        goto LOOP3;                               /* otherwise, examine next arg  */
 309  309      }
 310  310    
 311  311      if (intf > 0x42800000)                      /* if(|x| > 64                  */
 312  312      { 
 313  313          n3 = -pone;
 314  314          d3 = f3;
 315  315      f3 = n3/d3;
 316  316        index3 = 2;                               /* point to pi/2 upper, lower   */
 317  317      }
 318  318      else if( intf >= 0x3C800000 )               /* if |x| >= (1/64)...          */
 319  319      {
 320  320        intz   = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper        */
 321  321        pz[0]  = intz;                            /* store as a float (z)         */
 322  322          n3     = (f3 - z);
 323  323          d3     = (pone + f3*z);                 /* get reduced argument         */
 324  324      f3 = n3/d3;
 325  325        index3 = (intz - 0x3C800000) >> 18;       /* (index >> 19) << 1)          */
 326  326        index3 = index3 + 4;                      /* skip over 0,0,pi/2,pi/2      */
 327  327      }
 328  328      else
 329  329      {
 330  330          n3 = f3;
 331  331          d3 = pone;
 332  332          index3   = 0;                           /* points to 0,0 in table       */
 333  333      }
 334  334      yaddr3   = y;                               /* address to store this answer */ 
 335  335      x       += stridex;                         /* point to next arg            */
 336  336      y       += stridey;                         /* point to next result         */
 337  337      argcount = 4;                               /* we now have 4 good arguments */
 338  338      if ( --n <=0 ) 
 339  339      {
 340  340        goto UNROLL;                              /* finish up with 3 good args   */
 341  341      }
 342  342  #endif /* UNROLL4 */
 343  343  
 344  344  /* here is the n-way unrolled section, 
 345  345     but we may actually have less than n 
 346  346     arguments at this point
 347  347  */
 348  348  
 349  349  UNROLL:
 350  350  
 351  351  #ifdef UNROLL4
 352  352      if (argcount == 4)
 353  353      {
 354  354      conup0   = __vlibm_TBL_atan1[index0];
 355  355      conup1   = __vlibm_TBL_atan1[index1];
 356  356      conup2   = __vlibm_TBL_atan1[index2];
 357  357      conup3   = __vlibm_TBL_atan1[index3];
 358  358      poly0    = p1*f0*f0*f0 + f0;
 359  359      ans0     = sign0 * (float)(conup0 + poly0);
 360  360      poly1    = p1*f1*f1*f1 + f1;
 361  361      ans1     = sign1 * (float)(conup1 + poly1);
 362  362      poly2    = p1*f2*f2*f2 + f2;
 363  363      ans2     = sign2 * (float)(conup2 + poly2);
 364  364      poly3    = p1*f3*f3*f3 + f3;
 365  365      ans3     = sign3 * (float)(conup3 + poly3);
 366  366      *yaddr0  = ans0;
 367  367      *yaddr1  = ans1;
 368  368      *yaddr2  = ans2;
 369  369      *yaddr3  = ans3;
 370  370      }
 371  371      else 
 372  372  #endif
 373  373      if (argcount == 3)
 374  374      {
 375  375      conup0   = __vlibm_TBL_atan1[index0];
 376  376      conup1   = __vlibm_TBL_atan1[index1];
 377  377      conup2   = __vlibm_TBL_atan1[index2];
 378  378      poly0    = p1*f0*f0*f0 + f0;
 379  379      poly1    = p1*f1*f1*f1 + f1;
 380  380      poly2    = p1*f2*f2*f2 + f2;
 381  381      ans0     = sign0 * (float)(conup0 + poly0);
 382  382      ans1     = sign1 * (float)(conup1 + poly1);
 383  383      ans2     = sign2 * (float)(conup2 + poly2);
 384  384      *yaddr0  = ans0;
 385  385      *yaddr1  = ans1;
 386  386      *yaddr2  = ans2;
 387  387      }
 388  388      else 
 389  389      if (argcount == 2)
 390  390      {
 391  391      conup0   = __vlibm_TBL_atan1[index0];
 392  392      conup1   = __vlibm_TBL_atan1[index1];
 393  393      poly0    = p1*f0*f0*f0 + f0;
 394  394      poly1    = p1*f1*f1*f1 + f1;
 395  395      ans0     = sign0 * (float)(conup0 + poly0);
 396  396      ans1     = sign1 * (float)(conup1 + poly1);
 397  397      *yaddr0  = ans0;
 398  398      *yaddr1  = ans1;
 399  399      }
 400  400      else 
 401  401      if (argcount == 1)
 402  402      {
 403  403      conup0   = __vlibm_TBL_atan1[index0];
 404  404      poly0    = p1*f0*f0*f0 + f0;
 405  405      ans0     = sign0 * (float)(conup0 + poly0);
 406  406      *yaddr0  = ans0;
 407  407       }
 408  408  
 409  409    }  while (n > 0);
 410  410  
 411  411  }

↓ open down ↓

360 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX