il_11210 Wdiff usr/src/lib/libm/common/C/__rem_pio2m.c

Print this page

11210 libm should be cstyle(1ONBLD) clean

Split	Close
Expand all
Collapse all

          --- old/usr/src/lib/libm/common/C/__rem_pio2m.c
          +++ new/usr/src/lib/libm/common/C/__rem_pio2m.c

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions

↓ open down ↓

10 lines elided

↑ open up ↑

  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
       21 +
  21   22  /*
  22   23   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23   24   */
       25 +
  24   26  /*
  25   27   * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  26   28   * Use is subject to license terms.
  27   29   */
  28   30  
  29   31  /*
  30   32   * int __rem_pio2m(x,y,e0,nx,prec,ipio2)
  31   33   * double x[],y[]; int e0,nx,prec; const int ipio2[];
  32   34   *
  33   35   * __rem_pio2m return the last three digits of N with

  34   36   *              y = x - N*pi/2
  35   37   * so that |y| < pi/4.
  36   38   *

↓ open down ↓

3 lines elided

↑ open up ↑

  37   39   * The method is to compute the integer (mod 8) and fraction parts of
  38   40   * (2/pi)*x without doing the full multiplication. In general we
  39   41   * skip the part of the product that are known to be a huge integer (
  40   42   * more accurately, = 0 mod 8 ). Thus the number of operations are
  41   43   * independent of the exponent of the input.
  42   44   *
  43   45   * (2/PI) is represented by an array of 24-bit integers in ipio2[].
  44   46   * Here PI could as well be a machine value pi.
  45   47   *
  46   48   * Input parameters:
  47      - *      x[]     The input value (must be positive) is broken into nx
       49 + *      x[]     The input value (must be positive) is broken into nx
  48   50   *              pieces of 24-bit integers in double precision format.
  49   51   *              x[i] will be the i-th 24 bit of x. The scaled exponent
  50   52   *              of x[0] is given in input parameter e0 (i.e., x[0]*2^e0
  51   53   *              match x's up to 24 bits.
  52   54   *
  53   55   *              Example of breaking a double z into x[0]+x[1]+x[2]:
  54   56   *                      e0 = ilogb(z)-23
  55   57   *                      z  = scalbn(z,-e0)
  56   58   *              for i = 0,1,2
  57   59   *                      x[i] =  floor(z)

  58   60   *                      z    = (z-x[i])*2**24
  59   61   *
  60   62   *
  61   63   *      y[]     ouput result in an array of double precision numbers.
  62   64   *              The dimension of y[] is:
  63   65   *                      24-bit  precision       1
  64   66   *                      53-bit  precision       2
  65   67   *                      64-bit  precision       2
  66   68   *                      113-bit precision       3
  67   69   *              The actual value is the sum of them. Thus for 113-bit
  68   70   *              precsion, one may have to do something like:
  69   71   *

↓ open down ↓

12 lines elided

↑ open up ↑

  70   72   *              long double t,w,r_head, r_tail;
  71   73   *              t = (long double)y[2] + (long double)y[1];
  72   74   *              w = (long double)y[0];
  73   75   *              r_head = t+w;
  74   76   *              r_tail = w - (r_head - t);
  75   77   *
  76   78   *      e0      The exponent of x[0]
  77   79   *
  78   80   *      nx      dimension of x[]
  79   81   *
  80      - *      prec    an interger indicating the precision:
       82 + *      prec    an interger indicating the precision:
  81   83   *                      0       24  bits (single)
  82   84   *                      1       53  bits (double)
  83   85   *                      2       64  bits (extended)
  84   86   *                      3       113 bits (quad)
  85   87   *
  86   88   *      ipio2[]
  87   89   *              integer array, contains the (24*i)-th to (24*i+23)-th
  88   90   *              bit of 2/pi or 2/PI after binary point. The corresponding
  89   91   *              floating value is
  90   92   *
  91   93   *                      ipio2[i] * 2^(-24(i+1)).
  92   94   *
  93   95   * External function:
  94   96   *      double scalbn( ), floor( );
  95   97   *
  96   98   *
  97   99   * Here is the description of some local variables:
  98  100   *
  99      - *      jk      jk+1 is the initial number of terms of ipio2[] needed
      101 + *      jk      jk+1 is the initial number of terms of ipio2[] needed
 100  102   *              in the computation. The recommended value is 3,4,4,
 101  103   *              6 for single, double, extended,and quad.
 102  104   *
 103      - *      jz      local integer variable indicating the number of
      105 + *      jz      local integer variable indicating the number of
 104  106   *              terms of ipio2[] used.
 105  107   *
 106  108   *      jx      nx - 1
 107  109   *
 108  110   *      jv      index for pointing to the suitable ipio2[] for the
 109  111   *              computation. In general, we want
 110  112   *                      ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8
 111  113   *              is an integer. Thus
 112  114   *                      e0-3-24*jv >= 0 or (e0-3)/24 >= jv
 113  115   *              Hence jv = max(0,(e0-3)/24).
 114  116   *
 115  117   *      jp      jp+1 is the number of terms in pio2[] needed, jp = jk.
 116  118   *
 117      - *      q[]     double array with integral value, representing the
      119 + *      q[]     double array with integral value, representing the
 118  120   *              24-bits chunk of the product of x and 2/pi.
 119  121   *
 120  122   *      q0      the corresponding exponent of q[0]. Note that the
 121  123   *              exponent for q[i] would be q0-24*i.
 122  124   *
 123  125   *      pio2[]  double precision array, obtained by cutting pi/2
 124  126   *              into 24 bits chunks.
 125  127   *
 126  128   *      f[]     ipio2[] in floating point
 127  129   *

 128  130   *      iq[]    integer array by breaking up q[] in 24-bits chunk.
 129  131   *
 130  132   *      fq[]    final product of x*(2/pi) in fq[0],..,fq[jk]
 131  133   *
 132  134   *      ih      integer. If >0 it indicats q[] is >= 0.5, hence

↓ open down ↓

5 lines elided

↑ open up ↑

 133  135   *              it also indicates the *sign* of the result.
 134  136   *
 135  137   */
 136  138  
 137  139  #include "libm.h"
 138  140  
 139  141  #if defined(__i386) && !defined(__amd64)
 140  142  extern int __swapRP(int);
 141  143  #endif
 142  144  
 143      -static const int init_jk[] = { 3, 4, 4, 6 }; /* initial value for jk */
 144      -
      145 +static const int init_jk[] = { 3, 4, 4, 6 };    /* initial value for jk */
 145  146  static const double pio2[] = {
 146      -        1.57079625129699707031e+00,
 147      -        7.54978941586159635335e-08,
 148      -        5.39030252995776476554e-15,
 149      -        3.28200341580791294123e-22,
 150      -        1.27065575308067607349e-29,
 151      -        1.22933308981111328932e-36,
 152      -        2.73370053816464559624e-44,
 153      -        2.16741683877804819444e-51,
      147 +        1.57079625129699707031e+00, 7.54978941586159635335e-08,
      148 +        5.39030252995776476554e-15, 3.28200341580791294123e-22,
      149 +        1.27065575308067607349e-29, 1.22933308981111328932e-36,
      150 +        2.73370053816464559624e-44, 2.16741683877804819444e-51,
 154  151  };
 155  152  
 156      -static const double
 157      -        zero    = 0.0,
 158      -        one     = 1.0,
 159      -        half    = 0.5,
 160      -        eight   = 8.0,
 161      -        eighth  = 0.125,
 162      -        two24   = 16777216.0,
 163      -        twon24  = 5.960464477539062500E-8;
      153 +static const double zero = 0.0,
      154 +        one = 1.0,
      155 +        half = 0.5,
      156 +        eight = 8.0,
      157 +        eighth = 0.125,
      158 +        two24 = 16777216.0,
      159 +        twon24 = 5.960464477539062500E-8;
 164  160  
 165  161  int
 166  162  __rem_pio2m(double *x, double *y, int e0, int nx, int prec, const int *ipio2)
 167  163  {
 168      -        int     jz, jx, jv, jp, jk, carry, n, iq[20];
 169      -        int     i, j, k, m, q0, ih;
 170      -        double  z, fw, f[20], fq[20], q[20];
      164 +        int jz, jx, jv, jp, jk, carry, n, iq[20];
      165 +        int i, j, k, m, q0, ih;
      166 +        double z, fw, f[20], fq[20], q[20];
      167 +
 171  168  #if defined(__i386) && !defined(__amd64)
 172      -        int     rp;
      169 +        int rp;
 173  170  
 174  171          rp = __swapRP(fp_extended);
 175  172  #endif
 176  173  
 177  174          /* initialize jk */
 178  175          jp = jk = init_jk[prec];
 179  176  
 180  177          /* determine jx,jv,q0, note that 3>q0 */
 181  178          jx = nx - 1;
 182  179          jv = (e0 - 3) / 24;
      180 +
 183  181          if (jv < 0)
 184  182                  jv = 0;
      183 +
 185  184          q0 = e0 - 24 * (jv + 1);
 186  185  
 187  186          /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */
 188  187          j = jv - jx;
 189  188          m = jx + jk;
      189 +
 190  190          for (i = 0; i <= m; i++, j++)
 191      -                f[i] = (j < 0)? zero : (double)ipio2[j];
      191 +                f[i] = (j < 0) ? zero : (double)ipio2[j];
 192  192  
 193  193          /* compute q[0],q[1],...q[jk] */
 194  194          for (i = 0; i <= jk; i++) {
 195  195                  for (j = 0, fw = zero; j <= jx; j++)
 196      -                        fw += x[j] * f[jx+i-j];
      196 +                        fw += x[j] * f[jx + i - j];
      197 +
 197  198                  q[i] = fw;
 198  199          }
 199  200  
 200  201          jz = jk;
      202 +
 201  203  recompute:
 202  204          /* distill q[] into iq[] reversingly */
 203  205          for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) {
 204  206                  fw = (double)((int)(twon24 * z));
 205  207                  iq[i] = (int)(z - two24 * fw);
 206      -                z = q[j-1] + fw;
      208 +                z = q[j - 1] + fw;
 207  209          }
 208  210  
 209  211          /* compute n */
 210  212          z = scalbn(z, q0);              /* actual value of z */
 211  213          z -= eight * floor(z * eighth); /* trim off integer >= 8 */
 212  214          n = (int)z;
 213  215          z -= (double)n;
 214  216          ih = 0;
      217 +
 215  218          if (q0 > 0) {                   /* need iq[jz-1] to determine n */
 216      -                i = (iq[jz-1] >> (24 - q0));
      219 +                i = (iq[jz - 1] >> (24 - q0));
 217  220                  n += i;
 218      -                iq[jz-1] -= i << (24 - q0);
 219      -                ih = iq[jz-1] >> (23 - q0);
      221 +                iq[jz - 1] -= i << (24 - q0);
      222 +                ih = iq[jz - 1] >> (23 - q0);
 220  223          } else if (q0 == 0) {
 221      -                ih = iq[jz-1] >> 23;
      224 +                ih = iq[jz - 1] >> 23;
 222  225          } else if (z >= half) {
 223  226                  ih = 2;
 224  227          }
 225  228  
 226      -        if (ih > 0) {   /* q > 0.5 */
      229 +        if (ih > 0) {                   /* q > 0.5 */
 227  230                  n += 1;
 228  231                  carry = 0;
      232 +
 229  233                  for (i = 0; i < jz; i++) {      /* compute 1-q */
 230  234                          j = iq[i];
      235 +
 231  236                          if (carry == 0) {
 232  237                                  if (j != 0) {
 233  238                                          carry = 1;
 234  239                                          iq[i] = 0x1000000 - j;
 235  240                                  }
 236  241                          } else {
 237  242                                  iq[i] = 0xffffff - j;
 238  243                          }
 239  244                  }
      245 +
 240  246                  if (q0 > 0) {           /* rare case: chance is 1 in 12 */
 241  247                          switch (q0) {
 242  248                          case 1:
 243      -                                iq[jz-1] &= 0x7fffff;
      249 +                                iq[jz - 1] &= 0x7fffff;
 244  250                                  break;
 245  251                          case 2:
 246      -                                iq[jz-1] &= 0x3fffff;
      252 +                                iq[jz - 1] &= 0x3fffff;
 247  253                                  break;
 248  254                          }
 249  255                  }
      256 +
 250  257                  if (ih == 2) {
 251  258                          z = one - z;
      259 +
 252  260                          if (carry != 0)
 253  261                                  z -= scalbn(one, q0);
 254  262                  }
 255  263          }
 256  264  
 257  265          /* check if recomputation is needed */
 258  266          if (z == zero) {
 259  267                  j = 0;
      268 +
 260  269                  for (i = jz - 1; i >= jk; i--)
 261  270                          j |= iq[i];
 262      -                if (j == 0) {   /* need recomputation */
      271 +
      272 +                if (j == 0) {           /* need recomputation */
 263  273                          /* set k to no. of terms needed */
 264      -                        for (k = 1; iq[jk-k] == 0; k++)
      274 +                        for (k = 1; iq[jk - k] == 0; k++)
 265  275                                  ;
 266  276  
 267  277                          /* add q[jz+1] to q[jz+k] */
 268  278                          for (i = jz + 1; i <= jz + k; i++) {
 269      -                                f[jx+i] = (double)ipio2[jv+i];
      279 +                                f[jx + i] = (double)ipio2[jv + i];
      280 +
 270  281                                  for (j = 0, fw = zero; j <= jx; j++)
 271      -                                        fw += x[j] * f[jx+i-j];
      282 +                                        fw += x[j] * f[jx + i - j];
      283 +
 272  284                                  q[i] = fw;
 273  285                          }
      286 +
 274  287                          jz += k;
 275  288                          goto recompute;
 276  289                  }
 277  290          }
 278  291  
 279  292          /* cut out zero terms */
 280  293          if (z == zero) {
 281  294                  jz -= 1;
 282  295                  q0 -= 24;
      296 +
 283  297                  while (iq[jz] == 0) {
 284  298                          jz--;
 285  299                          q0 -= 24;
 286  300                  }
 287      -        } else {                /* break z into 24-bit if neccessary */
      301 +        } else {                        /* break z into 24-bit if neccessary */
 288  302                  z = scalbn(z, -q0);
      303 +
 289  304                  if (z >= two24) {
 290  305                          fw = (double)((int)(twon24 * z));
 291  306                          iq[jz] = (int)(z - two24 * fw);
 292  307                          jz += 1;
 293  308                          q0 += 24;
 294  309                          iq[jz] = (int)fw;
 295  310                  } else {
 296  311                          iq[jz] = (int)z;
 297  312                  }
 298  313          }
 299  314  
 300  315          /* convert integer "bit" chunk to floating-point value */
 301  316          fw = scalbn(one, q0);
      317 +
 302  318          for (i = jz; i >= 0; i--) {
 303  319                  q[i] = fw * (double)iq[i];
 304  320                  fw *= twon24;
 305  321          }
 306  322  
 307  323          /* compute pio2[0,...,jp]*q[jz,...,0] */
 308  324          for (i = jz; i >= 0; i--) {
 309  325                  for (fw = zero, k = 0; k <= jp && k <= jz - i; k++)
 310      -                        fw += pio2[k] * q[i+k];
 311      -                fq[jz-i] = fw;
      326 +                        fw += pio2[k] * q[i + k];
      327 +
      328 +                fq[jz - i] = fw;
 312  329          }
 313  330  
 314  331          /* compress fq[] into y[] */
 315  332          switch (prec) {
 316  333          case 0:
 317  334                  fw = zero;
      335 +
 318  336                  for (i = jz; i >= 0; i--)
 319  337                          fw += fq[i];
 320      -                y[0] = (ih == 0)? fw : -fw;
      338 +
      339 +                y[0] = (ih == 0) ? fw : -fw;
 321  340                  break;
 322  341  
 323  342          case 1:
 324  343          case 2:
 325  344                  fw = zero;
      345 +
 326  346                  for (i = jz; i >= 0; i--)
 327  347                          fw += fq[i];
 328      -                y[0] = (ih == 0)? fw : -fw;
      348 +
      349 +                y[0] = (ih == 0) ? fw : -fw;
 329  350                  fw = fq[0] - fw;
      351 +
 330  352                  for (i = 1; i <= jz; i++)
 331  353                          fw += fq[i];
 332      -                y[1] = (ih == 0)? fw : -fw;
      354 +
      355 +                y[1] = (ih == 0) ? fw : -fw;
 333  356                  break;
 334  357  
 335  358          default:
      359 +
 336  360                  for (i = jz; i > 0; i--) {
 337      -                        fw = fq[i-1] + fq[i];
 338      -                        fq[i] += fq[i-1] - fw;
 339      -                        fq[i-1] = fw;
      361 +                        fw = fq[i - 1] + fq[i];
      362 +                        fq[i] += fq[i - 1] - fw;
      363 +                        fq[i - 1] = fw;
 340  364                  }
      365 +
 341  366                  for (i = jz; i > 1; i--) {
 342      -                        fw = fq[i-1] + fq[i];
 343      -                        fq[i] += fq[i-1] - fw;
 344      -                        fq[i-1] = fw;
      367 +                        fw = fq[i - 1] + fq[i];
      368 +                        fq[i] += fq[i - 1] - fw;
      369 +                        fq[i - 1] = fw;
 345  370                  }
      371 +
 346  372                  for (fw = zero, i = jz; i >= 2; i--)
 347  373                          fw += fq[i];
      374 +
 348  375                  if (ih == 0) {
 349  376                          y[0] = fq[0];
 350  377                          y[1] = fq[1];
 351  378                          y[2] = fw;
 352  379                  } else {
 353  380                          y[0] = -fq[0];
 354  381                          y[1] = -fq[1];
 355  382                          y[2] = -fw;
 356  383                  }
 357  384          }
 358  385  
 359  386  #if defined(__i386) && !defined(__amd64)
 360  387          (void) __swapRP(rp);
 361  388  #endif
 362  389          return (n & 7);
 363  390  }

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX