illumos-gate New usr/src/lib/libmvec/common/_

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #ifdef __RESTRICT
  31 #define restrict _Restrict
  32 #else
  33 #define restrict
  34 #endif
  35 
  36 /* float logf(float x)
  37  *
  38  * Method :
  39  *      1. Special cases:
  40  *              for x is negative, -Inf => QNaN + invalid;
  41  *              for x = 0               => -Inf + divide-by-zero;
  42  *              for x = +Inf            => Inf;
  43  *              for x = NaN             => QNaN.
  44  *      2. Computes logarithm from:
  45  *              x = m * 2**n => log(x) = n * log(2) + log(m),
  46  *              m = [1, 2).
  47  *      Let m = m0 + dm, where m0 = 1 + k / 32,
  48  *              k = [0, 32],
  49  *              dm = [-1/64, 1/64].
  50  *      Then log(m) = log(m0 + dm) = log(m0) + log(1+y),
  51  *              where y = dm*(1/m0), y = [-1/66, 1/64].
  52  *      Then
  53  *              1/m0 is looked up in a table of 1, 1/(1+1/32), ..., 1/(1+32/32);
  54  *              log(m0) is looked up in a table of log(1), log(1+1/32),
  55  *              ..., log(1+32/32).
  56  *              log(1+y) is computed using approximation:
  57  *              log(1+y) = ((a3*y + a2)*y + a1)*y*y + y.
  58  * Accuracy:
  59  *      The maximum relative error for the approximating
  60  *      polynomial is 2**(-28.41).  All calculations are of
  61  *      double precision.
  62  *      Maximum error observed: less than 0.545 ulp for the
  63  *      whole float type range.
  64  */
  65 
  66 static const double __TBL_logf[] = {
  67         /* __TBL_logf[2*i] = log(1+i/32), i = [0, 32] */
  68         /* __TBL_logf[2*i+1] = 2**(-23)/(1+i/32), i = [0, 32] */
  69 0.000000000000000000e+00, 1.192092895507812500e-07, 3.077165866675368733e-02,
  70 1.155968868371212153e-07, 6.062462181643483994e-02, 1.121969784007352926e-07,
  71 8.961215868968713805e-02, 1.089913504464285680e-07, 1.177830356563834557e-01,
  72 1.059638129340277719e-07, 1.451820098444978890e-01, 1.030999260979729787e-07,
  73 1.718502569266592284e-01, 1.003867701480263102e-07, 1.978257433299198675e-01,
  74 9.781275040064102225e-08, 2.231435513142097649e-01, 9.536743164062500529e-08,
  75 2.478361639045812692e-01, 9.304139672256097884e-08, 2.719337154836417580e-01,
  76 9.082612537202380448e-08, 2.954642128938358980e-01, 8.871388989825581272e-08,
  77 3.184537311185345887e-01, 8.669766512784091150e-08, 3.409265869705931928e-01,
  78 8.477105034722222546e-08, 3.629054936893684746e-01, 8.292820142663043248e-08,
  79 3.844116989103320559e-01, 8.116377160904255122e-08, 4.054651081081643849e-01,
  80 7.947285970052082892e-08, 4.260843953109000881e-01, 7.785096460459183052e-08,
  81 4.462871026284195297e-01, 7.629394531250000159e-08, 4.660897299245992387e-01,
  82 7.479798560049019504e-08, 4.855078157817008244e-01, 7.335956280048077330e-08,
  83 5.045560107523953119e-01, 7.197542010613207272e-08, 5.232481437645478684e-01,
  84 7.064254195601851460e-08, 5.415972824327444091e-01, 6.935813210227272390e-08,
  85 5.596157879354226594e-01, 6.811959402901785336e-08, 5.773153650348236132e-01,
  86 6.692451343201754014e-08, 5.947071077466927758e-01, 6.577064251077586116e-08,
  87 6.118015411059929409e-01, 6.465588585805084723e-08, 6.286086594223740942e-01,
  88 6.357828776041666578e-08, 6.451379613735847007e-01, 6.253602074795082293e-08,
  89 6.613984822453650159e-01, 6.152737525201612732e-08, 6.773988235918061429e-01,
  90 6.055075024801586965e-08, 6.931471805599452862e-01, 5.960464477539062500e-08
  91 };
  92 
  93 static const double
  94         K3 = -2.49887584306188944706e-01,
  95         K2 =  3.33368809981254554946e-01,
  96         K1 = -5.00000008402474976565e-01;
  97 
  98 static const union {
  99         int     i;
 100         float   f;
 101 } inf = { 0x7f800000 };
 102 
 103 #define INF     inf.f
 104 
 105 #define PROCESS(N)                                                              \
 106         iy##N = ival##N & 0x007fffff;                                               \
 107         ival##N = (iy##N + 0x20000) & 0xfffc0000;                           \
 108         i##N  = ival##N >> 17;                                                    \
 109         iy##N = iy##N - ival##N;                                                \
 110         ty##N = LN2 * (double) exp##N + __TBL_logf[i##N];                       \
 111         yy##N = (double) iy##N * __TBL_logf[i##N + 1];                          \
 112         yy##N = ((K3 * yy##N + K2) * yy##N + K1) * yy##N * yy##N + yy##N;       \
 113         y[0] = (float)(yy##N + ty##N);                                          \
 114         y += stridey;
 115 
 116 #define PREPROCESS(N, index, label)                                             \
 117         ival##N = *(int*)x;                                                     \
 118         value = x[0];                                                           \
 119         x += stridex;                                                           \
 120         exp##N = (ival##N >> 23) - 127;                                           \
 121         if ( (ival##N & 0x7fffffff) >= 0x7f800000 ) /* X = NaN or Inf */ \
 122         {                                                                       \
 123                 y[index] = value + INF;                                         \
 124                 goto label;                                                     \
 125         }                                                                       \
 126         if ( ival##N < 0x00800000 )                                          \
 127         {                                                                       \
 128                 if ( ival##N > 0 )   /* X = denormal */                      \
 129                 {                                                               \
 130                         value = (float) ival##N;                                \
 131                         ival##N = *(int*) &value;                           \
 132                         exp##N = (ival##N >> 23) - (127 + 149);                   \
 133                 }                                                               \
 134                 else                                                            \
 135                 {                                                               \
 136                         value = 0.0f;                                           \
 137                         y[index] = ((ival##N & 0x7fffffff) == 0) ?          \
 138                                 -1.0f / value : value / value;                  \
 139                         goto label;                                             \
 140                 }                                                               \
 141         }
 142 
 143 void
 144 __vlogf( int n, float * restrict x, int stridex, float * restrict y,
 145         int stridey )
 146 {
 147         double  LN2 = __TBL_logf[64];           /* log(2) = 0.6931471805599453094       */
 148         double  yy0, yy1, yy2, yy3, yy4;
 149         double  ty0, ty1, ty2, ty3, ty4;
 150         float   value;
 151         int     i0, i1, i2, i3, i4;
 152         int     ival0, ival1, ival2, ival3, ival4;
 153         int     exp0, exp1, exp2, exp3, exp4;
 154         int     iy0, iy1, iy2, iy3, iy4;
 155 
 156         y -= stridey;
 157 
 158         for ( ; ; )
 159         {
 160 begin:
 161                 y += stridey;
 162 
 163                 if ( --n < 0 )
 164                         break;
 165 
 166                 PREPROCESS(0, 0, begin)
 167 
 168                 if ( --n < 0 )
 169                         goto process1;
 170 
 171                 PREPROCESS(1, stridey, process1)
 172 
 173                 if ( --n < 0 )
 174                         goto process2;
 175 
 176                 PREPROCESS(2, (stridey << 1), process2)
 177 
 178                 if ( --n < 0 )
 179                         goto process3;
 180 
 181                 PREPROCESS(3, (stridey << 1) + stridey, process3)
 182 
 183                 if ( --n < 0 )
 184                         goto process4;
 185 
 186                 PREPROCESS(4, (stridey << 2), process4)
 187 
 188                 iy0 = ival0 & 0x007fffff;
 189                 iy1 = ival1 & 0x007fffff;
 190                 iy2 = ival2 & 0x007fffff;
 191                 iy3 = ival3 & 0x007fffff;
 192                 iy4 = ival4 & 0x007fffff;
 193 
 194                 ival0 = (iy0 + 0x20000) & 0xfffc0000;
 195                 ival1 = (iy1 + 0x20000) & 0xfffc0000;
 196                 ival2 = (iy2 + 0x20000) & 0xfffc0000;
 197                 ival3 = (iy3 + 0x20000) & 0xfffc0000;
 198                 ival4 = (iy4 + 0x20000) & 0xfffc0000;
 199 
 200                 i0 = ival0 >> 17;
 201                 i1 = ival1 >> 17;
 202                 i2 = ival2 >> 17;
 203                 i3 = ival3 >> 17;
 204                 i4 = ival4 >> 17;
 205 
 206                 iy0 = iy0 - ival0;
 207                 iy1 = iy1 - ival1;
 208                 iy2 = iy2 - ival2;
 209                 iy3 = iy3 - ival3;
 210                 iy4 = iy4 - ival4;
 211 
 212                 ty0 = LN2 * (double) exp0 + __TBL_logf[i0];
 213                 ty1 = LN2 * (double) exp1 + __TBL_logf[i1];
 214                 ty2 = LN2 * (double) exp2 + __TBL_logf[i2];
 215                 ty3 = LN2 * (double) exp3 + __TBL_logf[i3];
 216                 ty4 = LN2 * (double) exp4 + __TBL_logf[i4];
 217 
 218                 yy0 = (double) iy0 * __TBL_logf[i0 + 1];
 219                 yy1 = (double) iy1 * __TBL_logf[i1 + 1];
 220                 yy2 = (double) iy2 * __TBL_logf[i2 + 1];
 221                 yy3 = (double) iy3 * __TBL_logf[i3 + 1];
 222                 yy4 = (double) iy4 * __TBL_logf[i4 + 1];
 223 
 224                 yy0 = ((K3 * yy0 + K2) * yy0 + K1) * yy0 * yy0 + yy0;
 225                 yy1 = ((K3 * yy1 + K2) * yy1 + K1) * yy1 * yy1 + yy1;
 226                 yy2 = ((K3 * yy2 + K2) * yy2 + K1) * yy2 * yy2 + yy2;
 227                 yy3 = ((K3 * yy3 + K2) * yy3 + K1) * yy3 * yy3 + yy3;
 228                 yy4 = ((K3 * yy4 + K2) * yy4 + K1) * yy4 * yy4 + yy4;
 229 
 230                 y[0] = (float)(yy0 + ty0);
 231                 y += stridey;
 232                 y[0] = (float)(yy1 + ty1);
 233                 y += stridey;
 234                 y[0] = (float)(yy2 + ty2);
 235                 y += stridey;
 236                 y[0] = (float)(yy3 + ty3);
 237                 y += stridey;
 238                 y[0] = (float)(yy4 + ty4);
 239                 continue;
 240 
 241 process1:
 242                 PROCESS(0)
 243                 continue;
 244 
 245 process2:
 246                 PROCESS(0)
 247                 PROCESS(1)
 248                 continue;
 249 
 250 process3:
 251                 PROCESS(0)
 252                 PROCESS(1)
 253                 PROCESS(2)
 254                 continue;
 255 
 256 process4:
 257                 PROCESS(0)
 258                 PROCESS(1)
 259                 PROCESS(2)
 260                 PROCESS(3)
 261         }
 262 }