Print this page
5261 libm should stop using synonyms.h
5298 fabs is 0-sized, confuses dis(1) and others
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Gordon Ross <gwr@nexenta.com>


  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #include "libm_synonyms.h"
  31 #include "libm_inlines.h"
  32 
  33 #ifdef __RESTRICT
  34 #define restrict _Restrict
  35 #else
  36 #define restrict
  37 #endif
  38 
  39 /* float rsqrtf(float x)
  40  *
  41  * Method :
  42  *      1. Special cases:
  43  *              for x = NaN                             => QNaN;
  44  *              for x = +Inf                            => 0;
  45  *              for x is negative, -Inf                 => QNaN + invalid;
  46  *              for x = +0                              => +Inf + divide-by-zero;
  47  *              for x = -0                              => -Inf + divide-by-zero.
  48  *      2. Computes reciprocal square root from:
  49  *              x = m * 2**n
  50  *      Where:


  54  *              rsqrtf(x) = 1/sqrt( m * 2**n ) = (2 ** (-n/2)) * (1/sqrt(m))
  55  *      2. Computes 1/sqrt(m) from:
  56  *              1/sqrt(m) = (1/sqrt(m0)) * (1/sqrt(1 + (1/m0)*dm))
  57  *      Where:
  58  *              m = m0 + dm,
  59  *              m0 = 0.5 * (1 + k/64) for m = [0.5,         0.5+127/256), k = [0, 63];
  60  *              m0 = 1.0 * (0 + k/64) for m = [0.5+127/256, 1.0+127/128), k = [64, 127];
  61  *      Then:
  62  *              1/sqrt(m0), 1/m0 are looked up in a table,
  63  *              1/sqrt(1 + (1/m0)*dm) is computed using approximation:
  64  *                      1/sqrt(1 + z) = ((a3 * z + a2) * z + a1) * z + a0
  65  *                      where z = [-1/64, 1/64].
  66  *
  67  * Accuracy:
  68  *      The maximum relative error for the approximating
  69  *      polynomial is 2**(-27.87).
  70  *      Maximum error observed: less than 0.534 ulp for the
  71  *      whole float type range.
  72  */
  73 
  74 #define sqrtf __sqrtf
  75 
  76 extern float sqrtf(float);
  77 
  78 static const double __TBL_rsqrtf[] = {
  79 /*
  80 i = [0,63]
  81  TBL[2*i  ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-24;
  82  TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46)));
  83 i = [64,127]
  84  TBL[2*i  ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-23;
  85  TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46)));
  86 */
  87  1.1920928955078125000e-07, 1.4142135623730951455e+00,
  88  1.1737530048076923728e-07, 1.4032928308912466786e+00,
  89  1.1559688683712121533e-07, 1.3926212476455828160e+00,
  90  1.1387156016791044559e-07, 1.3821894809301762397e+00,
  91  1.1219697840073529256e-07, 1.3719886811400707760e+00,
  92  1.1057093523550724772e-07, 1.3620104492139977204e+00,
  93  1.0899135044642856803e-07, 1.3522468075656264297e+00,
  94  1.0745626100352112918e-07, 1.3426901732747025253e+00,
  95  1.0596381293402777190e-07, 1.3333333333333332593e+00,


 486 
 487                 iexp0 = ax0 >> 24;
 488                 iexp0 = 0x3f - iexp0;
 489                 iexp0 = iexp0 << 23;
 490 
 491                 si0 = (ax0 >> 13) & 0x7f0;
 492 
 493                 tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0];
 494                 tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1];
 495                 iax0 = ax0 & 0x7ffe0000;
 496                 iax0 = ax0 - iax0;
 497                 xx0 = iax0 * tbl_div0;
 498                 res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0);
 499 
 500                 fres0 = res0;
 501                 iexp0 += *(int*)&fres0;
 502                 *(int*)py = iexp0;
 503                 py += stridey;
 504         }
 505 }
 506 


  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 

  30 #include "libm_inlines.h"
  31 
  32 #ifdef __RESTRICT
  33 #define restrict _Restrict
  34 #else
  35 #define restrict
  36 #endif
  37 
  38 /* float rsqrtf(float x)
  39  *
  40  * Method :
  41  *      1. Special cases:
  42  *              for x = NaN                             => QNaN;
  43  *              for x = +Inf                            => 0;
  44  *              for x is negative, -Inf                 => QNaN + invalid;
  45  *              for x = +0                              => +Inf + divide-by-zero;
  46  *              for x = -0                              => -Inf + divide-by-zero.
  47  *      2. Computes reciprocal square root from:
  48  *              x = m * 2**n
  49  *      Where:


  53  *              rsqrtf(x) = 1/sqrt( m * 2**n ) = (2 ** (-n/2)) * (1/sqrt(m))
  54  *      2. Computes 1/sqrt(m) from:
  55  *              1/sqrt(m) = (1/sqrt(m0)) * (1/sqrt(1 + (1/m0)*dm))
  56  *      Where:
  57  *              m = m0 + dm,
  58  *              m0 = 0.5 * (1 + k/64) for m = [0.5,         0.5+127/256), k = [0, 63];
  59  *              m0 = 1.0 * (0 + k/64) for m = [0.5+127/256, 1.0+127/128), k = [64, 127];
  60  *      Then:
  61  *              1/sqrt(m0), 1/m0 are looked up in a table,
  62  *              1/sqrt(1 + (1/m0)*dm) is computed using approximation:
  63  *                      1/sqrt(1 + z) = ((a3 * z + a2) * z + a1) * z + a0
  64  *                      where z = [-1/64, 1/64].
  65  *
  66  * Accuracy:
  67  *      The maximum relative error for the approximating
  68  *      polynomial is 2**(-27.87).
  69  *      Maximum error observed: less than 0.534 ulp for the
  70  *      whole float type range.
  71  */
  72 


  73 extern float sqrtf(float);
  74 
  75 static const double __TBL_rsqrtf[] = {
  76 /*
  77 i = [0,63]
  78  TBL[2*i  ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-24;
  79  TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46)));
  80 i = [64,127]
  81  TBL[2*i  ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-23;
  82  TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46)));
  83 */
  84  1.1920928955078125000e-07, 1.4142135623730951455e+00,
  85  1.1737530048076923728e-07, 1.4032928308912466786e+00,
  86  1.1559688683712121533e-07, 1.3926212476455828160e+00,
  87  1.1387156016791044559e-07, 1.3821894809301762397e+00,
  88  1.1219697840073529256e-07, 1.3719886811400707760e+00,
  89  1.1057093523550724772e-07, 1.3620104492139977204e+00,
  90  1.0899135044642856803e-07, 1.3522468075656264297e+00,
  91  1.0745626100352112918e-07, 1.3426901732747025253e+00,
  92  1.0596381293402777190e-07, 1.3333333333333332593e+00,


 483 
 484                 iexp0 = ax0 >> 24;
 485                 iexp0 = 0x3f - iexp0;
 486                 iexp0 = iexp0 << 23;
 487 
 488                 si0 = (ax0 >> 13) & 0x7f0;
 489 
 490                 tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0];
 491                 tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1];
 492                 iax0 = ax0 & 0x7ffe0000;
 493                 iax0 = ax0 - iax0;
 494                 xx0 = iax0 * tbl_div0;
 495                 res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0);
 496 
 497                 fres0 = res0;
 498                 iexp0 += *(int*)&fres0;
 499                 *(int*)py = iexp0;
 500                 py += stridey;
 501         }
 502 }