webrev_693-libm-all-changes-since-original-2012-v02 Wdiff usr/src/lib/libmvec/common/__vatan.c

Print this page

Split	Close
Expand all
Collapse all

          --- old/usr/src/lib/libmvec/common/__vatan.c
          +++ new/usr/src/lib/libmvec/common/__vatan.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24   24   */
  25   25  /*
  26   26   * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27   27   * Use is subject to license terms.
  28   28   */
  29   29  
  30   30  #include <sys/isa_defs.h>
  31   31  #include "libm_inlines.h"
  32   32  
  33   33  #ifdef _LITTLE_ENDIAN
  34   34  #define HI(x)   *(1+(int*)x)
  35   35  #define LO(x)   *(unsigned*)x
  36   36  #else
  37   37  #define HI(x)   *(int*)x
  38   38  #define LO(x)   *(1+(unsigned*)x)
  39   39  #endif

↓ open down ↓

39 lines elided

↑ open up ↑

  40   40  
  41   41  #ifdef __RESTRICT
  42   42  #define restrict _Restrict
  43   43  #else
  44   44  #define restrict
  45   45  #endif
  46   46  
  47   47  void
  48   48  __vatan( int n, double * restrict x, int stridex, double * restrict y, int stridey )
  49   49  {
  50      -  double  f , z, ans, ansu , ansl , tmp , poly , conup , conlo , dummy;
       50 +  double  f , z, ans = 0.0L, ansu , ansl , tmp , poly , conup , conlo , dummy;
  51   51    double  f1,   ans1, ansu1, ansl1, tmp1, poly1, conup1, conlo1;
  52   52    double  f2,   ans2, ansu2, ansl2, tmp2, poly2, conup2, conlo2;
  53   53    int index, sign, intf, intflo, intz, argcount;
  54      -  int index1, sign1 ;
  55      -  int index2, sign2 ;
  56      -  double *yaddr,*yaddr1,*yaddr2;
       54 +  int index1, sign1 = 0;
       55 +  int index2, sign2 = 0;
       56 +  double *yaddr,*yaddr1 = 0,*yaddr2 = 0;
  57   57    extern const double __vlibm_TBL_atan1[];
  58   58    extern double fabs( double );
  59   59  
  60   60  /*    Power series  atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
  61   61   *    Error =  -3.08254E-18   On the interval  |x| < 1/64 */
  62   62  
  63   63  /* define dummy names for readability.  Use parray to help compiler optimize loads */
  64   64  #define p3    parray[0]
  65   65  #define p2    parray[1]
  66   66  #define p1    parray[2]

  67   67  
  68   68    static const double parray[] = { 
  69   69     -1.428029046844299722E-01,           /* p[3]         */
  70   70      1.999999917247000615E-01,           /* p[2]         */
  71   71     -3.333333333329292858E-01,           /* p[1]         */
  72   72      1.0,                                /* not used for p[0], though            */
  73   73     -1.0,                                /* used to flip sign of answer          */
  74   74    };
  75   75  
  76   76    if( n <= 0 ) return;          /* if no. of elements is 0 or neg, do nothing */
  77   77    do
  78   78    {
  79   79    LOOP0:
  80   80  
  81   81      f        = fabs(*x);                        /* fetch argument               */
  82   82      intf     = HI(x);                   /* upper half of x, as integer  */
  83   83      intflo   = LO(x);                   /* lower half of x, as integer  */
  84   84      sign     = intf &  0x80000000;              /* sign of argument             */
  85   85      intf     = intf & ~0x80000000;              /* abs(upper argument)          */
  86   86    
  87   87      if( (intf > 0x43600000) || (intf < 0x3e300000) ) /* filter out special cases */
  88   88      {
  89   89        if(  (intf > 0x7ff00000) || ((intf == 0x7ff00000) &&  (intflo !=0) ) ) 
  90   90        {  
  91   91          ans   = f - f;                          /* return NaN if x=NaN*/
  92   92        }
  93   93        else if( intf < 0x3e300000 )              /* avoid underflow for small arg */
  94   94        {
  95   95          dummy = 1.0e37 + f;
  96   96          dummy = dummy;
  97   97          ans   = f;
  98   98        }
  99   99        else if( intf > 0x43600000 )              /* avoid underflow for big arg  */
 100  100        {
 101  101          index = 2;
 102  102          ans   = __vlibm_TBL_atan1[index] + __vlibm_TBL_atan1[index+1];/* pi/2 up + pi/2 low   */
 103  103        }
 104  104        *y      = (sign) ? -ans: ans;             /* store answer, with sign bit  */
 105  105        x      += stridex;
 106  106        y      += stridey;
 107  107        argcount = 0;                             /* initialize argcount          */
 108  108        if ( --n <=0 ) break;                     /* we are done                  */
 109  109        goto LOOP0;                               /* otherwise, examine next arg  */
 110  110      }
 111  111    
 112  112      index    = 0;                               /* points to 0,0 in table       */
 113  113      if (intf > 0x40500000)                      /* if(|x| > 64                  */
 114  114      { f = -1.0/f;
 115  115        index  = 2;                               /* point to pi/2 upper, lower   */
 116  116      }
 117  117      else if( intf >= 0x3f900000 )               /* if |x| >= (1/64)...          */
 118  118      {
 119  119        intz   = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper        */
 120  120        HI(&z)  = intz;                           /* store as a double (z)        */
 121  121        LO(&z)  = 0;                              /* ...lower                     */
 122  122        f      = (f - z)/(1.0 + f*z);             /* get reduced argument         */
 123  123        index  = (intz - 0x3f900000) >> 15;       /* (index >> 16) << 1)          */
 124  124        index  = index + 4;                       /* skip over 0,0,pi/2,pi/2      */
 125  125      }
 126  126      yaddr    = y;                               /* address to store this answer */ 
 127  127      x       += stridex;                         /* point to next arg            */
 128  128      y       += stridey;                         /* point to next result         */
 129  129      argcount = 1;                               /* we now have 1 good argument  */
 130  130      if ( --n <=0 ) 
 131  131      {
 132  132        f1      = 0.0;                            /* put dummy values in args 1,2 */
 133  133        f2      = 0.0;
 134  134        index1  = 0;
 135  135        index2  = 0;
 136  136        goto UNROLL3;                             /* finish up with 1 good arg    */
 137  137      }
 138  138  
 139  139      /*--------------------------------------------------------------------------*/
 140  140      /*--------------------------------------------------------------------------*/
 141  141      /*--------------------------------------------------------------------------*/
 142  142  
 143  143    LOOP1:
 144  144  
 145  145      f1       = fabs(*x);                        /* fetch argument               */
 146  146      intf     = HI(x);                   /* upper half of x, as integer  */
 147  147      intflo   = LO(x);                   /* lower half of x, as integer  */
 148  148      sign1    = intf &  0x80000000;              /* sign of argument             */
 149  149      intf     = intf & ~0x80000000;              /* abs(upper argument)          */
 150  150    
 151  151      if( (intf > 0x43600000) || (intf < 0x3e300000) ) /* filter out special cases */
 152  152      {
 153  153        if(  (intf > 0x7ff00000) || ((intf == 0x7ff00000) &&  (intflo !=0) ) ) 
 154  154        {  
 155  155          ans   = f1 - f1;                        /* return NaN if x=NaN*/
 156  156        }
 157  157        else if( intf < 0x3e300000 )              /* avoid underflow for small arg */
 158  158        {
 159  159          dummy = 1.0e37 + f1;
 160  160          dummy = dummy;
 161  161          ans   = f1;
 162  162        }
 163  163        else if( intf > 0x43600000 )              /* avoid underflow for big arg  */
 164  164        {
 165  165          index1 = 2;
 166  166          ans   = __vlibm_TBL_atan1[index1] + __vlibm_TBL_atan1[index1+1];/* pi/2 up + pi/2 low   */
 167  167        }
 168  168        *y      = (sign1) ? -ans: ans;            /* store answer, with sign bit  */
 169  169        x      += stridex;
 170  170        y      += stridey;
 171  171        argcount = 1;                             /* we still have 1 good arg     */
 172  172        if ( --n <=0 ) 
 173  173        {
 174  174          f1      = 0.0;                          /* put dummy values in args 1,2 */
 175  175          f2      = 0.0;
 176  176          index1  = 0;
 177  177          index2  = 0;
 178  178          goto UNROLL3;                           /* finish up with 1 good arg    */
 179  179        }
 180  180        goto LOOP1;                               /* otherwise, examine next arg  */
 181  181      }
 182  182    
 183  183      index1   = 0;                               /* points to 0,0 in table       */
 184  184      if (intf > 0x40500000)                      /* if(|x| > 64                  */
 185  185      { f1 = -1.0/f1;
 186  186        index1 = 2;                               /* point to pi/2 upper, lower   */
 187  187      }
 188  188      else if( intf >= 0x3f900000 )               /* if |x| >= (1/64)...          */
 189  189      {
 190  190        intz   = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper        */
 191  191        HI(&z) = intz;                            /* store as a double (z)        */
 192  192        LO(&z) = 0;                               /* ...lower                     */
 193  193        f1     = (f1 - z)/(1.0 + f1*z);           /* get reduced argument         */
 194  194        index1 = (intz - 0x3f900000) >> 15;       /* (index >> 16) << 1)          */
 195  195        index1 = index1 + 4;                      /* skip over 0,0,pi/2,pi/2      */
 196  196      }
 197  197      yaddr1   = y;                               /* address to store this answer */ 
 198  198      x       += stridex;                         /* point to next arg            */
 199  199      y       += stridey;                         /* point to next result         */
 200  200      argcount = 2;                               /* we now have 2 good arguments */
 201  201      if ( --n <=0 ) 
 202  202      {
 203  203        f2      = 0.0;                            /* put dummy value in arg 2 */
 204  204        index2  = 0;
 205  205        goto UNROLL3;                             /* finish up with 2 good args   */
 206  206      }
 207  207  
 208  208      /*--------------------------------------------------------------------------*/
 209  209      /*--------------------------------------------------------------------------*/
 210  210      /*--------------------------------------------------------------------------*/
 211  211  
 212  212    LOOP2:
 213  213  
 214  214      f2       = fabs(*x);                        /* fetch argument               */
 215  215      intf     = HI(x);                   /* upper half of x, as integer  */
 216  216      intflo   = LO(x);                   /* lower half of x, as integer  */
 217  217      sign2    = intf &  0x80000000;              /* sign of argument             */
 218  218      intf     = intf & ~0x80000000;              /* abs(upper argument)          */
 219  219    
 220  220      if( (intf > 0x43600000) || (intf < 0x3e300000) ) /* filter out special cases */
 221  221      {
 222  222        if(  (intf > 0x7ff00000) || ((intf == 0x7ff00000) &&  (intflo !=0) ) ) 
 223  223        {  
 224  224          ans   = f2 - f2;                        /* return NaN if x=NaN*/
 225  225        }
 226  226        else if( intf < 0x3e300000 )              /* avoid underflow for small arg */
 227  227        {
 228  228          dummy = 1.0e37 + f2;
 229  229          dummy = dummy;
 230  230          ans   = f2;
 231  231        }
 232  232        else if( intf > 0x43600000 )              /* avoid underflow for big arg  */
 233  233        {
 234  234          index2 = 2;
 235  235          ans   = __vlibm_TBL_atan1[index2] + __vlibm_TBL_atan1[index2+1];/* pi/2 up + pi/2 low   */
 236  236        }
 237  237        *y      = (sign2) ? -ans: ans;            /* store answer, with sign bit  */
 238  238        x      += stridex;
 239  239        y      += stridey;
 240  240        argcount = 2;                             /* we still have 2 good args    */
 241  241        if ( --n <=0 ) 
 242  242        {
 243  243          f2      = 0.0;                          /* put dummy value in arg 2 */
 244  244          index2  = 0;
 245  245          goto UNROLL3;                           /* finish up with 2 good args   */
 246  246        }
 247  247        goto LOOP2;                               /* otherwise, examine next arg  */
 248  248      }
 249  249    
 250  250      index2   = 0;                               /* points to 0,0 in table       */
 251  251      if (intf > 0x40500000)                      /* if(|x| > 64                  */
 252  252      { f2 = -1.0/f2;
 253  253        index2 = 2;                               /* point to pi/2 upper, lower   */
 254  254      }
 255  255      else if( intf >= 0x3f900000 )               /* if |x| >= (1/64)...          */
 256  256      {
 257  257        intz   = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper        */
 258  258        HI(&z) = intz;                            /* store as a double (z)        */
 259  259        LO(&z) = 0;                               /* ...lower                     */
 260  260        f2     = (f2 - z)/(1.0 + f2*z);           /* get reduced argument         */
 261  261        index2 = (intz - 0x3f900000) >> 15;       /* (index >> 16) << 1)          */
 262  262        index2 = index2 + 4;                      /* skip over 0,0,pi/2,pi/2      */
 263  263      }
 264  264      yaddr2   = y;                               /* address to store this answer */ 
 265  265      x       += stridex;                         /* point to next arg            */
 266  266      y       += stridey;                         /* point to next result         */
 267  267      argcount = 3;                               /* we now have 3 good arguments */
 268  268  
 269  269  
 270  270  /* here is the 3 way unrolled section, 
 271  271     note, we may actually only have 
 272  272     1,2, or 3 'real' arguments at this point
 273  273  */
 274  274  
 275  275  UNROLL3:
 276  276  
 277  277      conup    = __vlibm_TBL_atan1[index ];       /* upper table                  */
 278  278      conup1   = __vlibm_TBL_atan1[index1];       /* upper table                  */
 279  279      conup2   = __vlibm_TBL_atan1[index2];       /* upper table                  */
 280  280  
 281  281      conlo    = __vlibm_TBL_atan1[index +1];     /* lower table                  */
 282  282      conlo1   = __vlibm_TBL_atan1[index1+1];     /* lower table                  */
 283  283      conlo2   = __vlibm_TBL_atan1[index2+1];     /* lower table                  */
 284  284  
 285  285      tmp      = f *f ;
 286  286      tmp1     = f1*f1;
 287  287      tmp2     = f2*f2;
 288  288  
 289  289      poly     = f *((p3*tmp  + p2)*tmp  + p1)*tmp ;
 290  290      poly1    = f1*((p3*tmp1 + p2)*tmp1 + p1)*tmp1;
 291  291      poly2    = f2*((p3*tmp2 + p2)*tmp2 + p1)*tmp2;
 292  292  
 293  293      ansu     = conup  + f ;                     /* compute atan(f)  upper       */
 294  294      ansu1    = conup1 + f1;                     /* compute atan(f)  upper       */
 295  295      ansu2    = conup2 + f2;                     /* compute atan(f)  upper       */
 296  296  
 297  297      ansl     = (((conup  - ansu ) + f ) + poly ) + conlo ;
 298  298      ansl1    = (((conup1 - ansu1) + f1) + poly1) + conlo1;
 299  299      ansl2    = (((conup2 - ansu2) + f2) + poly2) + conlo2;
 300  300  
 301  301      ans      = ansu  + ansl ;
 302  302      ans1     = ansu1 + ansl1;
 303  303      ans2     = ansu2 + ansl2;
 304  304  
 305  305  /* now check to see if these are 'real' or 'dummy' arguments BEFORE storing */
 306  306  
 307  307     *yaddr    = sign ? -ans: ans;                /* this one is always good      */
 308  308     if(argcount < 3) break;                      /* end loop and finish up       */
 309  309       *yaddr1   = sign1 ? -ans1: ans1;
 310  310       *yaddr2   = sign2 ? -ans2: ans2;
 311  311  
 312  312    }  while (--n > 0);
 313  313  
 314  314   if(argcount == 2) 
 315  315     {  *yaddr1  = sign1 ? -ans1: ans1;
 316  316     }
 317  317  }

↓ open down ↓

251 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX