Print this page
5261 libm should stop using synonyms.h
5298 fabs is 0-sized, confuses dis(1) and others
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Gordon Ross <gwr@nexenta.com>


  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #include <sys/isa_defs.h>
  31 #include "libm_synonyms.h"
  32 #include "libm_inlines.h"
  33 
  34 #ifdef _LITTLE_ENDIAN
  35 #define HI(x)   *(1+(int*)x)
  36 #define LO(x)   *(unsigned*)x
  37 #else
  38 #define HI(x)   *(int*)x
  39 #define LO(x)   *(1+(unsigned*)x)
  40 #endif
  41 
  42 #ifdef __RESTRICT
  43 #define restrict _Restrict
  44 #else
  45 #define restrict
  46 #endif
  47 
  48 /* double rsqrt(double x)
  49  *
  50  * Method :
  51  *      1. Special cases:


  66  *      Where:
  67  *              m = m0 + dm,
  68  *              m0 = 0.5 * (1 + k/64) for m = [0.5,         0.5+127/256), k = [0, 63];
  69  *              m0 = 1.0 * (0 + k/64) for m = [0.5+127/256, 1.0+127/128), k = [64, 127];
  70  *              m0 = 2.0              for m = [1.0+127/128, 2.0),         k = 128.
  71  *      Then:
  72  *              1/sqrt(m0) is looked up in a table,
  73  *              1/m0 is computed as (1/sqrt(m0)) * (1/sqrt(m0)).
  74  *              1/sqrt(1 + (1/m0)*dm) is computed using approximation:
  75  *                      1/sqrt(1 + z) = (((((a6 * z + a5) * z + a4) * z + a3)
  76  *                                              * z + a2) * z + a1) * z + a0
  77  *                      where z = [-1/128, 1/128].
  78  *
  79  * Accuracy:
  80  *      The maximum relative error for the approximating
  81  *      polynomial is 2**(-56.26).
  82  *      Maximum error observed: less than 0.563 ulp after 1.500.000.000
  83  *      results.
  84  */
  85 
  86 #define sqrt __sqrt
  87 
  88 extern double sqrt (double);
  89 extern const double __vlibm_TBL_rsqrt[];
  90 
  91 static void
  92 __vrsqrt_n(int n, double * restrict px, int stridex, double * restrict py, int stridey);
  93 
  94 #pragma no_inline(__vrsqrt_n)
  95 
  96 #define RETURN(ret)                                             \
  97 {                                                               \
  98         *py = (ret);                                            \
  99         py += stridey;                                          \
 100         if (n_n == 0)                                           \
 101         {                                                       \
 102                 spx = px; spy = py;                             \
 103                 hx = HI(px);                                    \
 104                 continue;                                       \
 105         }                                                       \
 106         n--;                                                    \
 107         break;                                                  \


 395                 LO(&res_c0) = 0;
 396 
 397                 px += stridex;
 398 
 399                 dexp_hi0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[0];
 400                 dexp_lo0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[1];
 401                 xx0 = dexp_hi0 * dexp_hi0;
 402                 xx0 = (res0 - res_c0) * xx0;
 403                 res0 = (((((K6 * xx0 + K5) * xx0 + K4) * xx0 + K3) * xx0 + K2) * xx0 + K1) * xx0;
 404 
 405                 res0 = dexp_hi0 * res0 + dexp_lo0 + dexp_hi0;
 406 
 407                 HI(&dsqrt_exp0) = sqrt_exp0;
 408                 LO(&dsqrt_exp0) = 0;
 409                 res0 *= dsqrt_exp0;
 410 
 411                 *py = res0;
 412                 py += stridey;
 413         }
 414 }
 415 


  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #include <sys/isa_defs.h>

  31 #include "libm_inlines.h"
  32 
  33 #ifdef _LITTLE_ENDIAN
  34 #define HI(x)   *(1+(int*)x)
  35 #define LO(x)   *(unsigned*)x
  36 #else
  37 #define HI(x)   *(int*)x
  38 #define LO(x)   *(1+(unsigned*)x)
  39 #endif
  40 
  41 #ifdef __RESTRICT
  42 #define restrict _Restrict
  43 #else
  44 #define restrict
  45 #endif
  46 
  47 /* double rsqrt(double x)
  48  *
  49  * Method :
  50  *      1. Special cases:


  65  *      Where:
  66  *              m = m0 + dm,
  67  *              m0 = 0.5 * (1 + k/64) for m = [0.5,         0.5+127/256), k = [0, 63];
  68  *              m0 = 1.0 * (0 + k/64) for m = [0.5+127/256, 1.0+127/128), k = [64, 127];
  69  *              m0 = 2.0              for m = [1.0+127/128, 2.0),         k = 128.
  70  *      Then:
  71  *              1/sqrt(m0) is looked up in a table,
  72  *              1/m0 is computed as (1/sqrt(m0)) * (1/sqrt(m0)).
  73  *              1/sqrt(1 + (1/m0)*dm) is computed using approximation:
  74  *                      1/sqrt(1 + z) = (((((a6 * z + a5) * z + a4) * z + a3)
  75  *                                              * z + a2) * z + a1) * z + a0
  76  *                      where z = [-1/128, 1/128].
  77  *
  78  * Accuracy:
  79  *      The maximum relative error for the approximating
  80  *      polynomial is 2**(-56.26).
  81  *      Maximum error observed: less than 0.563 ulp after 1.500.000.000
  82  *      results.
  83  */
  84 


  85 extern double sqrt (double);
  86 extern const double __vlibm_TBL_rsqrt[];
  87 
  88 static void
  89 __vrsqrt_n(int n, double * restrict px, int stridex, double * restrict py, int stridey);
  90 
  91 #pragma no_inline(__vrsqrt_n)
  92 
  93 #define RETURN(ret)                                             \
  94 {                                                               \
  95         *py = (ret);                                            \
  96         py += stridey;                                          \
  97         if (n_n == 0)                                           \
  98         {                                                       \
  99                 spx = px; spy = py;                             \
 100                 hx = HI(px);                                    \
 101                 continue;                                       \
 102         }                                                       \
 103         n--;                                                    \
 104         break;                                                  \


 392                 LO(&res_c0) = 0;
 393 
 394                 px += stridex;
 395 
 396                 dexp_hi0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[0];
 397                 dexp_lo0 = ((double*)((char*)__vlibm_TBL_rsqrt + ind0))[1];
 398                 xx0 = dexp_hi0 * dexp_hi0;
 399                 xx0 = (res0 - res_c0) * xx0;
 400                 res0 = (((((K6 * xx0 + K5) * xx0 + K4) * xx0 + K3) * xx0 + K2) * xx0 + K1) * xx0;
 401 
 402                 res0 = dexp_hi0 * res0 + dexp_lo0 + dexp_hi0;
 403 
 404                 HI(&dsqrt_exp0) = sqrt_exp0;
 405                 LO(&dsqrt_exp0) = 0;
 406                 res0 *= dsqrt_exp0;
 407 
 408                 *py = res0;
 409                 py += stridey;
 410         }
 411 }