il_11210 Wdiff usr/src/lib/libm/common/C/expm1.c

Print this page

11210 libm should be cstyle(1ONBLD) clean

Split	Close
Expand all
Collapse all

          --- old/usr/src/lib/libm/common/C/expm1.c
          +++ new/usr/src/lib/libm/common/C/expm1.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.

↓ open down ↓

14 lines elided

↑ open up ↑

  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24   24   */
       25 +
  25   26  /*
  26   27   * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27   28   * Use is subject to license terms.
  28   29   */
  29   30  
  30   31  #pragma weak __expm1 = expm1
  31   32  
  32      -/* INDENT OFF */
       33 +
  33   34  /*
  34   35   * expm1(x)
  35   36   * Returns exp(x)-1, the exponential of x minus 1.
  36   37   *
  37   38   * Method
  38   39   *   1. Arugment reduction:
  39   40   *      Given x, find r and integer k such that
  40   41   *
  41   42   *               x = k*ln2 + r,  |r| <= 0.5*ln2 ~ 0.34658
  42   43   *

  43   44   *      Here a correction term c will be computed to compensate
  44   45   *      the error in r when rounded to a floating-point number.
  45   46   *
  46   47   *   2. Approximating expm1(r) by a special rational function on

↓ open down ↓

4 lines elided

↑ open up ↑

  47   48   *      the interval [0,0.34658]:
  48   49   *      Since
  49   50   *          r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 - r^4/360 + ...
  50   51   *      we define R1(r*r) by
  51   52   *          r*(exp(r)+1)/(exp(r)-1) = 2+ r^2/6 * R1(r*r)
  52   53   *      That is,
  53   54   *          R1(r**2) = 6/r *((exp(r)+1)/(exp(r)-1) - 2/r)
  54   55   *                   = 6/r * ( 1 + 2.0*(1/(exp(r)-1) - 1/r))
  55   56   *                   = 1 - r^2/60 + r^4/2520 - r^6/100800 + ...
  56   57   *      We use a special Reme algorithm on [0,0.347] to generate
  57      - *      a polynomial of degree 5 in r*r to approximate R1. The
       58 + *      a polynomial of degree 5 in r*r to approximate R1. The
  58   59   *      maximum error of this polynomial approximation is bounded
  59   60   *      by 2**-61. In other words,
  60   61   *          R1(z) ~ 1.0 + Q1*z + Q2*z**2 + Q3*z**3 + Q4*z**4 + Q5*z**5
  61      - *      where   Q1  =  -1.6666666666666567384E-2,
  62      - *              Q2  =   3.9682539681370365873E-4,
  63      - *              Q3  =  -9.9206344733435987357E-6,
  64      - *              Q4  =   2.5051361420808517002E-7,
  65      - *              Q5  =  -6.2843505682382617102E-9;
  66      - *      (where z=r*r, and the values of Q1 to Q5 are listed below)
       62 + *      where   Q1  =  -1.6666666666666567384E-2,
       63 + *              Q2  =   3.9682539681370365873E-4,
       64 + *              Q3  =  -9.9206344733435987357E-6,
       65 + *              Q4  =   2.5051361420808517002E-7,
       66 + *              Q5  =  -6.2843505682382617102E-9;
       67 + *      (where z=r*r, and the values of Q1 to Q5 are listed below)
  67   68   *      with error bounded by
  68   69   *          |                  5           |     -61
  69   70   *          | 1.0+Q1*z+...+Q5*z   -  R1(z) | <= 2
  70   71   *          |                              |
  71   72   *
  72   73   *      expm1(r) = exp(r)-1 is then computed by the following
  73      - *      specific way which minimize the accumulation rounding error:
       74 + *      specific way which minimize the accumulation rounding error:
  74   75   *                             2     3
  75   76   *                            r     r    [ 3 - (R1 + R1*r/2)  ]
  76   77   *            expm1(r) = r + --- + --- * [--------------------]
  77   78   *                            2     2    [ 6 - r*(3 - R1*r/2) ]
  78   79   *
  79   80   *      To compensate the error in the argument reduction, we use
  80   81   *              expm1(r+c) = expm1(r) + c + expm1(r)*c
  81   82   *                         ~ expm1(r) + c + r*c
  82   83   *      Thus c+r*c will be added in as the correction terms for
  83   84   *      expm1(r+c). Now rearrange the term to avoid optimization
  84      - *      screw up:
       85 + *      screw up:
  85   86   *                      (      2                                    2 )
  86   87   *                      ({  ( r    [ R1 -  (3 - R1*r/2) ]  )  }    r  )
  87   88   *       expm1(r+c)~r - ({r*(--- * [--------------------]-c)-c} - --- )
  88   89   *                      ({  ( 2    [ 6 - r*(3 - R1*r/2) ]  )  }    2  )
  89   90   *                      (                                             )
  90   91   *
  91   92   *                 = r - E
  92   93   *   3. Scale back to obtain expm1(x):
  93   94   *      From step 1, we have
  94   95   *         expm1(x) = either 2^k*[expm1(r)+1] - 1

  95   96   *                  = or     2^k*[expm1(r) + (1-2^-k)]
  96   97   *   4. Implementation notes:
  97   98   *      (A). To save one multiplication, we scale the coefficient Qi
  98   99   *           to Qi*2^i, and replace z by (x^2)/2.
  99  100   *      (B). To achieve maximum accuracy, we compute expm1(x) by
 100  101   *        (i)   if x < -56*ln2, return -1.0, (raise inexact if x != inf)
 101  102   *        (ii)  if k=0, return r-E
 102  103   *        (iii) if k=-1, return 0.5*(r-E)-0.5
 103  104   *        (iv)  if k=1 if r < -0.25, return 2*((r+0.5)- E)
 104  105   *                                      else         return  1.0+2.0*(r-E);
 105  106   *        (v)   if (k<-2||k>56) return 2^k(1-(E-r)) - 1 (or exp(x)-1)
 106  107   *        (vi)  if k <= 20, return 2^k((1-2^-k)-(E-r)), else
 107  108   *        (vii) return 2^k(1-((E+2^-k)-r))
 108  109   *
 109  110   * Special cases:
 110  111   *      expm1(INF) is INF, expm1(NaN) is NaN;
 111  112   *      expm1(-INF) is -1, and
 112  113   *      for finite argument, only expm1(0)=0 is exact.
 113  114   *
 114  115   * Accuracy:
 115  116   *      according to an error analysis, the error is always less than
 116  117   *      1 ulp (unit in the last place).
 117  118   *

↓ open down ↓

23 lines elided

↑ open up ↑

 118  119   * Misc. info.
 119  120   *      For IEEE double
 120  121   *          if x >  7.09782712893383973096e+02 then expm1(x) overflow
 121  122   *
 122  123   * Constants:
 123  124   * The hexadecimal values are the intended ones for the following
 124  125   * constants. The decimal values may be used, provided that the
 125  126   * compiler will convert from decimal to binary accurately enough
 126  127   * to produce the hexadecimal values shown.
 127  128   */
 128      -/* INDENT ON */
 129  129  
 130  130  #include "libm_macros.h"
 131  131  #include <math.h>
 132  132  
 133  133  static const double xxx[] = {
 134      -/* one */                1.0,
 135      -/* huge */               1.0e+300,
 136      -/* tiny */               1.0e-300,
 137      -/* o_threshold */        7.09782712893383973096e+02,    /* 40862E42 FEFA39EF */
 138      -/* ln2_hi */             6.93147180369123816490e-01,    /* 3FE62E42 FEE00000 */
 139      -/* ln2_lo */             1.90821492927058770002e-10,    /* 3DEA39EF 35793C76 */
 140      -/* invln2 */             1.44269504088896338700e+00,    /* 3FF71547 652B82FE */
 141      -/* scaled coefficients related to expm1 */
 142      -/* Q1 */                -3.33333333333331316428e-02,    /* BFA11111 111110F4 */
 143      -/* Q2 */                 1.58730158725481460165e-03,    /* 3F5A01A0 19FE5585 */
 144      -/* Q3 */                -7.93650757867487942473e-05,    /* BF14CE19 9EAADBB7 */
 145      -/* Q4 */                 4.00821782732936239552e-06,    /* 3ED0CFCA 86E65239 */
 146      -/* Q5 */                -2.01099218183624371326e-07     /* BE8AFDB7 6E09C32D */
      134 +/* one */
      135 +        1.0,
      136 +/* huge */ 1.0e+300,
      137 +/* tiny */ 1.0e-300,
      138 +/* o_threshold */ 7.09782712893383973096e+02,   /* 40862E42 FEFA39EF */
      139 +/* ln2_hi */ 6.93147180369123816490e-01,        /* 3FE62E42 FEE00000 */
      140 +/* ln2_lo */ 1.90821492927058770002e-10,        /* 3DEA39EF 35793C76 */
      141 +/* invln2 */ 1.44269504088896338700e+00,        /* 3FF71547 652B82FE */
      142 +
      143 +/*
      144 + * scaled coefficients related to expm1
      145 + * Q1
      146 + */
      147 +        -3.33333333333331316428e-02,            /* BFA11111 111110F4 */
      148 +/* Q2 */ 1.58730158725481460165e-03,            /* 3F5A01A0 19FE5585 */
      149 +/* Q3 */ -7.93650757867487942473e-05,           /* BF14CE19 9EAADBB7 */
      150 +/* Q4 */ 4.00821782732936239552e-06,            /* 3ED0CFCA 86E65239 */
      151 +/* Q5 */ -2.01099218183624371326e-07            /* BE8AFDB7 6E09C32D */
 147  152  };
 148      -#define one             xxx[0]
 149      -#define huge            xxx[1]
 150      -#define tiny            xxx[2]
 151      -#define o_threshold     xxx[3]
 152      -#define ln2_hi          xxx[4]
 153      -#define ln2_lo          xxx[5]
 154      -#define invln2          xxx[6]
 155      -#define Q1              xxx[7]
 156      -#define Q2              xxx[8]
 157      -#define Q3              xxx[9]
 158      -#define Q4              xxx[10]
 159      -#define Q5              xxx[11]
      153 +
      154 +#define one                     xxx[0]
      155 +#define huge                    xxx[1]
      156 +#define tiny                    xxx[2]
      157 +#define o_threshold             xxx[3]
      158 +#define ln2_hi                  xxx[4]
      159 +#define ln2_lo                  xxx[5]
      160 +#define invln2                  xxx[6]
      161 +#define Q1                      xxx[7]
      162 +#define Q2                      xxx[8]
      163 +#define Q3                      xxx[9]
      164 +#define Q4                      xxx[10]
      165 +#define Q5                      xxx[11]
 160  166  
 161  167  double
 162      -expm1(double x) {
      168 +expm1(double x)
      169 +{
 163  170          double y, hi, lo, c = 0.0L, t, e, hxs, hfx, r1;
 164  171          int k, xsb;
 165  172          unsigned hx;
 166  173  
 167      -        hx = ((unsigned *) &x)[HIWORD];         /* high word of x */
 168      -        xsb = hx & 0x80000000;                  /* sign bit of x */
      174 +        hx = ((unsigned *)&x)[HIWORD];  /* high word of x */
      175 +        xsb = hx & 0x80000000;          /* sign bit of x */
      176 +
 169  177          if (xsb == 0)
 170  178                  y = x;
 171  179          else
 172      -                y = -x;                         /* y = |x| */
 173      -        hx &= 0x7fffffff;                       /* high word of |x| */
      180 +                y = -x;                 /* y = |x| */
      181 +
      182 +        hx &= 0x7fffffff;               /* high word of |x| */
 174  183  
 175      -        /* filter out huge and non-finite argument */
 176      -        /* for example exp(38)-1 is approximately 3.1855932e+16 */
      184 +        /*
      185 +         * filter out huge and non-finite argument
      186 +         * for example exp(38)-1 is approximately 3.1855932e+16
      187 +         */
 177  188          if (hx >= 0x4043687A) {
 178  189                  /* if |x|>=56*ln2 (~38.8162...) */
 179      -                if (hx >= 0x40862E42) {         /* if |x|>=709.78... -> inf */
      190 +                if (hx >= 0x40862E42) { /* if |x|>=709.78... -> inf */
 180  191                          if (hx >= 0x7ff00000) {
 181      -                                if (((hx & 0xfffff) | ((int *) &x)[LOWORD])
 182      -                                        != 0)
      192 +                                if (((hx & 0xfffff) | ((int *)&x)[LOWORD]) != 0)
 183  193                                          return (x * x); /* + -> * for Cheetah */
 184  194                                  else
 185  195                                          /* exp(+-inf)={inf,-1} */
 186  196                                          return (xsb == 0 ? x : -1.0);
 187  197                          }
      198 +
 188  199                          if (x > o_threshold)
 189  200                                  return (huge * huge);   /* overflow */
 190  201                  }
 191      -                if (xsb != 0) {         /* x < -56*ln2, return -1.0 w/inexact */
      202 +
      203 +                if (xsb != 0) { /* x < -56*ln2, return -1.0 w/inexact */
 192  204                          if (x + tiny < 0.0)             /* raise inexact */
 193  205                                  return (tiny - one);    /* return -1 */
 194  206                  }
 195  207          }
 196  208  
 197  209          /* argument reduction */
 198      -        if (hx > 0x3fd62e42) {                  /* if  |x| > 0.5 ln2 */
 199      -                if (hx < 0x3FF0A2B2) {          /* and |x| < 1.5 ln2 */
 200      -                        if (xsb == 0) {         /* positive number */
      210 +        if (hx > 0x3fd62e42) {          /* if  |x| > 0.5 ln2 */
      211 +                if (hx < 0x3FF0A2B2) {  /* and |x| < 1.5 ln2 */
      212 +                        if (xsb == 0) { /* positive number */
 201  213                                  hi = x - ln2_hi;
 202  214                                  lo = ln2_lo;
 203  215                                  k = 1;
 204  216                          } else {
 205  217                                  /* negative number */
 206  218                                  hi = x + ln2_hi;
 207  219                                  lo = -ln2_lo;
 208  220                                  k = -1;
 209  221                          }
 210  222                  } else {
 211  223                          /* |x| > 1.5 ln2 */
 212      -                        k = (int) (invln2 * x + (xsb == 0 ? 0.5 : -0.5));
      224 +                        k = (int)(invln2 * x + (xsb == 0 ? 0.5 : -0.5));
 213  225                          t = k;
 214  226                          hi = x - t * ln2_hi;    /* t*ln2_hi is exact here */
 215  227                          lo = t * ln2_lo;
 216  228                  }
      229 +
 217  230                  x = hi - lo;
 218      -                c = (hi - x) - lo; /* still at |x| > 0.5 ln2 */
      231 +                c = (hi - x) - lo;      /* still at |x| > 0.5 ln2 */
 219  232          } else if (hx < 0x3c900000) {
 220  233                  /* when |x|<2**-54, return x */
 221  234                  t = huge + x;           /* return x w/inexact when x != 0 */
 222  235                  return (x - (t - (huge + x)));
 223      -        } else
      236 +        } else {
 224  237                  /* |x| <= 0.5 ln2 */
 225  238                  k = 0;
      239 +        }
 226  240  
 227  241          /* x is now in primary range */
 228  242          hfx = 0.5 * x;
 229  243          hxs = x * hfx;
 230  244          r1 = one + hxs * (Q1 + hxs * (Q2 + hxs * (Q3 + hxs * (Q4 + hxs * Q5))));
 231  245          t = 3.0 - r1 * hfx;
 232  246          e = hxs * ((r1 - t) / (6.0 - x * t));
 233      -        if (k == 0) /* |x| <= 0.5 ln2 */
      247 +
      248 +        if (k == 0) {                   /* |x| <= 0.5 ln2 */
 234  249                  return (x - (x * e - hxs));
 235      -        else {          /* |x| > 0.5 ln2 */
      250 +        } else {                        /* |x| > 0.5 ln2 */
 236  251                  e = (x * (e - c) - c);
 237  252                  e -= hxs;
      253 +
 238  254                  if (k == -1)
 239  255                          return (0.5 * (x - e) - 0.5);
      256 +
 240  257                  if (k == 1) {
 241  258                          if (x < -0.25)
 242  259                                  return (-2.0 * (e - (x + 0.5)));
 243  260                          else
 244  261                                  return (one + 2.0 * (x - e));
 245  262                  }
      263 +
 246  264                  if (k <= -2 || k > 56) {        /* suffice to return exp(x)-1 */
 247  265                          y = one - (e - x);
 248      -                        ((int *) &y)[HIWORD] += k << 20;
      266 +                        ((int *)&y)[HIWORD] += k << 20;
 249  267                          return (y - one);
 250  268                  }
      269 +
 251  270                  t = one;
      271 +
 252  272                  if (k < 20) {
 253      -                        ((int *) &t)[HIWORD] = 0x3ff00000 - (0x200000 >> k);
 254      -                                                        /* t = 1 - 2^-k */
      273 +                        ((int *)&t)[HIWORD] = 0x3ff00000 - (0x200000 >> k);
      274 +                        /* t = 1 - 2^-k */
 255  275                          y = t - (e - x);
 256      -                        ((int *) &y)[HIWORD] += k << 20;
      276 +                        ((int *)&y)[HIWORD] += k << 20;
 257  277                  } else {
 258      -                        ((int *) &t)[HIWORD] = (0x3ff - k) << 20; /* 2^-k */
      278 +                        ((int *)&t)[HIWORD] = (0x3ff - k) << 20; /* 2^-k */
 259  279                          y = x - (e + t);
 260  280                          y += one;
 261      -                        ((int *) &y)[HIWORD] += k << 20;
      281 +                        ((int *)&y)[HIWORD] += k << 20;
 262  282                  }
 263  283          }
      284 +
 264  285          return (y);
 265  286  }

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX