il_11210 New usr/src/lib/libm/common/C/log2.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  28  * Use is subject to license terms.
  29  */
  30 
  31 #pragma weak __log2 = log2
  32 
  33 
  34 /*
  35  * log2(x) = log(x)/log2
  36  *
  37  * Base on Table look-up algorithm with product polynomial
  38  * approximation for log(x).
  39  *
  40  * By K.C. Ng, Nov 29, 2004
  41  *
  42  * (a). For x in [1-0.125, 1+0.125], from log.c we have
  43  *      log(x) =  f + ((a1*f^2) *
  44  *                 ((a2 + (a3*f)*(a4+f)) + (f^3)*(a5+f))) *
  45  *                 (((a6 + f*(a7+f)) + (f^3)*(a8+f)) *
  46  *                 ((a9 + (a10*f)*(a11+f)) + (f^3)*(a12+f)))
  47  *      where f = x - 1.
  48  *      (i) modify a1 <- a1 / log2
  49  *      (ii) 1/log2 = 1.4426950408889634...
  50  *                  = 1.5 - 0.057304959... (4 bit shift)
  51  *           Let lv = 1.5 - 1/log2, then
  52  *           lv = 0.057304959111036592640075318998107956665325,
  53  *      (iii) f*1.5 is exact because f has 3 trailing zero.
  54  *      (iv) Thus, log2(x) = f*1.5 - (lv*f - PPoly)
  55  *
  56  * (b). For 0.09375 <= x < 24
  57  *      Let j = (ix - 0x3fb80000) >> 15. Look up Y[j], 1/Y[j], and log(Y[j])
  58  *      from _TBL_log.c. Then
  59  *              log2(x)  = log2(Y[j]) + log2(1 + (x-Y[j])*(1/Y[j]))
  60  *                        = log(Y[j])(1/log2) + log2(1 + s)
  61  *      where
  62  *              s = (x-Y[j])*(1/Y[j])
  63  *      From log.c, we have log(1+s) =
  64  *                                2              2                     2
  65  *              (b s) (b + b s + s ) [b + b s + s (b + s)] (b + b s + s )
  66  *                1     2   3          4   5        6        7   8
  67  *
  68  *      By setting b1 <- b1/log2, we have
  69  *              log2(x) = 1.5 * T - (lv * T - POLY(s))
  70  *
  71  * (c). Otherwise, get "n", the exponent of x, and then normalize x to
  72  *      z in [1,2). Then similar to (b) find a Y[i] that matches z to 5.5
  73  *      significant bits. Then
  74  *          log2(x) = n + log2(z).
  75  *
  76  * Special cases:
  77  *      log2(x) is NaN with signal if x < 0 (including -INF) ;
  78  *      log2(+INF) is +INF; log2(0) is -INF with signal;
  79  *      log2(NaN) is that NaN with no signal.
  80  *
  81  * Maximum error observed: less than 0.84 ulp
  82  *
  83  * Constants:
  84  * The hexadecimal values are the intended ones for the following constants.
  85  * The decimal values may be used, provided that the compiler will convert
  86  * from decimal to binary accurately enough to produce the hexadecimal values
  87  * shown.
  88  */
  89 
  90 #include "libm.h"
  91 #include "libm_protos.h"
  92 
  93 extern const double _TBL_log[];
  94 
  95 static const double P[] = {
  96 /* ONE   */
  97         1.0,
  98 /* TWO52 */ 4503599627370496.0,
  99 /* LN10V */ 1.4426950408889634073599246810018920433347, /* 1/log10 */
 100 /* ZERO  */ 0.0,
 101 /* A1    */ -9.6809362455249638217841932228967194640116e-02,
 102 /* A2    */ 1.99628461483039965074226529395673424005508422852e+0000,
 103 /* A3    */ 2.26812367662950720159642514772713184356689453125e+0000,
 104 /* A4    */ -9.05030639084976384900471657601883634924888610840e-0001,
 105 /* A5    */ -1.48275767132434044270894446526654064655303955078e+0000,
 106 /* A6    */ 1.88158320939722756293122074566781520843505859375e+0000,
 107 /* A7    */ 1.83309386046986411145098827546462416648864746094e+0000,
 108 /* A8    */ 1.24847063988317086291601754055591300129890441895e+0000,
 109 /* A9    */ 1.98372421445537705508854742220137268304824829102e+0000,
 110 /* A10   */ -3.94711735767898475035764249696512706577777862549e-0001,
 111 /* A11   */ 3.07890395362954372160402272129431366920471191406e+0000,
 112 /* A12   */ -9.60099585275022149311041630426188930869102478027e-0001,
 113 /* B1    */ -1.8039695622547469514898963204616532885451e-01,
 114 /* B2    */ 1.87161713283355151891381127914642725337613123482e+0000,
 115 /* B3    */ -1.89082956295731507978530316904652863740921020508e+0000,
 116 /* B4    */ -2.50562891673640253387134180229622870683670043945e+0000,
 117 /* B5    */ 1.64822828085258366037635369139024987816810607910e+0000,
 118 /* B6    */ -1.24409107065868340669112512841820716857910156250e+0000,
 119 /* B7    */ 1.70534231658220414296067701798165217041969299316e+0000,
 120 /* B8    */ 1.99196833784655646937267192697618156671524047852e+0000,
 121 /* LGH   */ 1.5,
 122 /* LGL   */ 0.057304959111036592640075318998107956665325,
 123 };
 124 
 125 #define ONE             P[0]
 126 #define TWO52           P[1]
 127 #define LN10V           P[2]
 128 #define ZERO            P[3]
 129 #define A1              P[4]
 130 #define A2              P[5]
 131 #define A3              P[6]
 132 #define A4              P[7]
 133 #define A5              P[8]
 134 #define A6              P[9]
 135 #define A7              P[10]
 136 #define A8              P[11]
 137 #define A9              P[12]
 138 #define A10             P[13]
 139 #define A11             P[14]
 140 #define A12             P[15]
 141 #define B1              P[16]
 142 #define B2              P[17]
 143 #define B3              P[18]
 144 #define B4              P[19]
 145 #define B5              P[20]
 146 #define B6              P[21]
 147 #define B7              P[22]
 148 #define B8              P[23]
 149 #define LGH             P[24]
 150 #define LGL             P[25]
 151 
 152 double
 153 log2(double x)
 154 {
 155         int i, hx, ix, n, lx;
 156 
 157         n = 0;
 158         hx = ((int *)&x)[HIWORD];
 159         ix = hx & 0x7fffffff;
 160         lx = ((int *)&x)[LOWORD];
 161 
 162         /* subnormal,0,negative,inf,nan */
 163         if ((hx + 0x100000) < 0x200000) {
 164 #if defined(FPADD_TRAPS_INCOMPLETE_ON_NAN)
 165                 if (ix >= 0x7ff80000) /* assumes sparc-like QNaN */
 166                         return (x);   /* for Cheetah when x is QNaN */
 167 #endif
 168 
 169                 if (((hx << 1) | lx) == 0)                /* log(0.0) = -inf */
 170                         return (A5 / fabs(x));
 171 
 172                 if (hx < 0) {                                /* x < 0 */
 173                         if (ix >= 0x7ff00000)
 174                                 return (x - x);         /* x is -inf or NaN */
 175                         else
 176                                 return (ZERO / (x - x));
 177                 }
 178 
 179                 if (((hx - 0x7ff00000) | lx) == 0)      /* log(inf) = inf */
 180                         return (x);
 181 
 182                 if (ix >= 0x7ff00000)                        /* log(NaN) = NaN */
 183                         return (x - x);
 184 
 185                 x *= TWO52;
 186                 n = -52;
 187                 hx = ((int *)&x)[HIWORD];
 188                 ix = hx & 0x7fffffff;
 189                 lx = ((int *)&x)[LOWORD];
 190         }
 191 
 192         /* 0.09375 (0x3fb80000) <= x < 24 (0x40380000) */
 193         i = ix >> 19;
 194 
 195         if (i >= 0x7f7 && i <= 0x806) {
 196                 /* 0.875 <= x < 1.125 */
 197                 if (ix >= 0x3fec0000 && ix < 0x3ff20000) {
 198                         double s, z, r, w;
 199 
 200                         s = x - ONE;
 201                         z = s * s;
 202                         r = (A10 * s) * (A11 + s);
 203                         w = z * s;
 204 
 205                         if (((ix << 12) | lx) == 0) {
 206                                 return (z);
 207                         } else {
 208                                 return (LGH * s - (LGL * s - ((A1 * z) * ((A2 +
 209                                     (A3 * s) * (A4 + s)) + w * (A5 + s))) *
 210                                     (((A6 + s * (A7 + s)) + w * (A8 + s)) *
 211                                     ((A9 + r) + w * (A12 + s)))));
 212                         }
 213                 } else {
 214                         double *tb, s;
 215 
 216                         i = (ix - 0x3fb80000) >> 15;
 217                         tb = (double *)_TBL_log + (i + i + i);
 218 
 219                         if (((ix << 12) | lx) == 0)       /* 2's power */
 220                                 return ((double)((ix >> 20) - 0x3ff));
 221 
 222                         s = (x - tb[0]) * tb[1];
 223                         return (LGH * tb[2] - (LGL * tb[2] - ((B1 * s) *
 224                             (B2 + s * (B3 + s))) * (((B4 + s * B5) + (s * s) *
 225                             (B6 + s)) * (B7 + s * (B8 + s)))));
 226                 }
 227         } else {
 228                 double *tb, dn, s;
 229 
 230                 dn = (double)(n + ((ix >> 20) - 0x3ff));
 231                 ix <<= 12;
 232 
 233                 if ((ix | lx) == 0)
 234                         return (dn);
 235 
 236                 i = ((unsigned)ix >> 12) | 0x3ff00000;    /* scale x to [1,2) */
 237                 ((int *)&x)[HIWORD] = i;
 238                 i = (i - 0x3fb80000) >> 15;
 239                 tb = (double *)_TBL_log + (i + i + i);
 240                 s = (x - tb[0]) * tb[1];
 241                 return (dn + (tb[2] * LN10V + ((B1 * s) * (B2 + s * (B3 + s))) *
 242                     (((B4 + s * B5) + (s * s) * (B6 + s)) *
 243                     (B7 + s * (B8 + s)))));
 244         }
 245 }