1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 26 /* 27 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 /* 32 * int __rem_pio2m(x,y,e0,nx,prec,ipio2) 33 * double x[],y[]; int e0,nx,prec; const int ipio2[]; 34 * 35 * __rem_pio2m return the last three digits of N with 36 * y = x - N*pi/2 37 * so that |y| < pi/4. 38 * 39 * The method is to compute the integer (mod 8) and fraction parts of 40 * (2/pi)*x without doing the full multiplication. In general we 41 * skip the part of the product that are known to be a huge integer ( 42 * more accurately, = 0 mod 8 ). Thus the number of operations are 43 * independent of the exponent of the input. 44 * 45 * (2/PI) is represented by an array of 24-bit integers in ipio2[]. 46 * Here PI could as well be a machine value pi. 47 * 48 * Input parameters: 49 * x[] The input value (must be positive) is broken into nx 50 * pieces of 24-bit integers in double precision format. 51 * x[i] will be the i-th 24 bit of x. The scaled exponent 52 * of x[0] is given in input parameter e0 (i.e., x[0]*2^e0 53 * match x's up to 24 bits. 54 * 55 * Example of breaking a double z into x[0]+x[1]+x[2]: 56 * e0 = ilogb(z)-23 57 * z = scalbn(z,-e0) 58 * for i = 0,1,2 59 * x[i] = floor(z) 60 * z = (z-x[i])*2**24 61 * 62 * 63 * y[] ouput result in an array of double precision numbers. 64 * The dimension of y[] is: 65 * 24-bit precision 1 66 * 53-bit precision 2 67 * 64-bit precision 2 68 * 113-bit precision 3 69 * The actual value is the sum of them. Thus for 113-bit 70 * precsion, one may have to do something like: 71 * 72 * long double t,w,r_head, r_tail; 73 * t = (long double)y[2] + (long double)y[1]; 74 * w = (long double)y[0]; 75 * r_head = t+w; 76 * r_tail = w - (r_head - t); 77 * 78 * e0 The exponent of x[0] 79 * 80 * nx dimension of x[] 81 * 82 * prec an interger indicating the precision: 83 * 0 24 bits (single) 84 * 1 53 bits (double) 85 * 2 64 bits (extended) 86 * 3 113 bits (quad) 87 * 88 * ipio2[] 89 * integer array, contains the (24*i)-th to (24*i+23)-th 90 * bit of 2/pi or 2/PI after binary point. The corresponding 91 * floating value is 92 * 93 * ipio2[i] * 2^(-24(i+1)). 94 * 95 * External function: 96 * double scalbn( ), floor( ); 97 * 98 * 99 * Here is the description of some local variables: 100 * 101 * jk jk+1 is the initial number of terms of ipio2[] needed 102 * in the computation. The recommended value is 3,4,4, 103 * 6 for single, double, extended,and quad. 104 * 105 * jz local integer variable indicating the number of 106 * terms of ipio2[] used. 107 * 108 * jx nx - 1 109 * 110 * jv index for pointing to the suitable ipio2[] for the 111 * computation. In general, we want 112 * ( 2^e0*x[0] * ipio2[jv-1]*2^(-24jv) )/8 113 * is an integer. Thus 114 * e0-3-24*jv >= 0 or (e0-3)/24 >= jv 115 * Hence jv = max(0,(e0-3)/24). 116 * 117 * jp jp+1 is the number of terms in pio2[] needed, jp = jk. 118 * 119 * q[] double array with integral value, representing the 120 * 24-bits chunk of the product of x and 2/pi. 121 * 122 * q0 the corresponding exponent of q[0]. Note that the 123 * exponent for q[i] would be q0-24*i. 124 * 125 * pio2[] double precision array, obtained by cutting pi/2 126 * into 24 bits chunks. 127 * 128 * f[] ipio2[] in floating point 129 * 130 * iq[] integer array by breaking up q[] in 24-bits chunk. 131 * 132 * fq[] final product of x*(2/pi) in fq[0],..,fq[jk] 133 * 134 * ih integer. If >0 it indicats q[] is >= 0.5, hence 135 * it also indicates the *sign* of the result. 136 * 137 */ 138 139 #include "libm.h" 140 141 #if defined(__i386) && !defined(__amd64) 142 extern int __swapRP(int); 143 #endif 144 145 static const int init_jk[] = { 3, 4, 4, 6 }; /* initial value for jk */ 146 static const double pio2[] = { 147 1.57079625129699707031e+00, 7.54978941586159635335e-08, 148 5.39030252995776476554e-15, 3.28200341580791294123e-22, 149 1.27065575308067607349e-29, 1.22933308981111328932e-36, 150 2.73370053816464559624e-44, 2.16741683877804819444e-51, 151 }; 152 153 static const double zero = 0.0, 154 one = 1.0, 155 half = 0.5, 156 eight = 8.0, 157 eighth = 0.125, 158 two24 = 16777216.0, 159 twon24 = 5.960464477539062500E-8; 160 161 int 162 __rem_pio2m(double *x, double *y, int e0, int nx, int prec, const int *ipio2) 163 { 164 int jz, jx, jv, jp, jk, carry, n, iq[20]; 165 int i, j, k, m, q0, ih; 166 double z, fw, f[20], fq[20], q[20]; 167 168 #if defined(__i386) && !defined(__amd64) 169 int rp; 170 171 rp = __swapRP(fp_extended); 172 #endif 173 174 /* initialize jk */ 175 jp = jk = init_jk[prec]; 176 177 /* determine jx,jv,q0, note that 3>q0 */ 178 jx = nx - 1; 179 jv = (e0 - 3) / 24; 180 181 if (jv < 0) 182 jv = 0; 183 184 q0 = e0 - 24 * (jv + 1); 185 186 /* set up f[0] to f[jx+jk] where f[jx+jk] = ipio2[jv+jk] */ 187 j = jv - jx; 188 m = jx + jk; 189 190 for (i = 0; i <= m; i++, j++) 191 f[i] = (j < 0) ? zero : (double)ipio2[j]; 192 193 /* compute q[0],q[1],...q[jk] */ 194 for (i = 0; i <= jk; i++) { 195 for (j = 0, fw = zero; j <= jx; j++) 196 fw += x[j] * f[jx + i - j]; 197 198 q[i] = fw; 199 } 200 201 jz = jk; 202 203 recompute: 204 /* distill q[] into iq[] reversingly */ 205 for (i = 0, j = jz, z = q[jz]; j > 0; i++, j--) { 206 fw = (double)((int)(twon24 * z)); 207 iq[i] = (int)(z - two24 * fw); 208 z = q[j - 1] + fw; 209 } 210 211 /* compute n */ 212 z = scalbn(z, q0); /* actual value of z */ 213 z -= eight * floor(z * eighth); /* trim off integer >= 8 */ 214 n = (int)z; 215 z -= (double)n; 216 ih = 0; 217 218 if (q0 > 0) { /* need iq[jz-1] to determine n */ 219 i = (iq[jz - 1] >> (24 - q0)); 220 n += i; 221 iq[jz - 1] -= i << (24 - q0); 222 ih = iq[jz - 1] >> (23 - q0); 223 } else if (q0 == 0) { 224 ih = iq[jz - 1] >> 23; 225 } else if (z >= half) { 226 ih = 2; 227 } 228 229 if (ih > 0) { /* q > 0.5 */ 230 n += 1; 231 carry = 0; 232 233 for (i = 0; i < jz; i++) { /* compute 1-q */ 234 j = iq[i]; 235 236 if (carry == 0) { 237 if (j != 0) { 238 carry = 1; 239 iq[i] = 0x1000000 - j; 240 } 241 } else { 242 iq[i] = 0xffffff - j; 243 } 244 } 245 246 if (q0 > 0) { /* rare case: chance is 1 in 12 */ 247 switch (q0) { 248 case 1: 249 iq[jz - 1] &= 0x7fffff; 250 break; 251 case 2: 252 iq[jz - 1] &= 0x3fffff; 253 break; 254 } 255 } 256 257 if (ih == 2) { 258 z = one - z; 259 260 if (carry != 0) 261 z -= scalbn(one, q0); 262 } 263 } 264 265 /* check if recomputation is needed */ 266 if (z == zero) { 267 j = 0; 268 269 for (i = jz - 1; i >= jk; i--) 270 j |= iq[i]; 271 272 if (j == 0) { /* need recomputation */ 273 /* set k to no. of terms needed */ 274 for (k = 1; iq[jk - k] == 0; k++) 275 ; 276 277 /* add q[jz+1] to q[jz+k] */ 278 for (i = jz + 1; i <= jz + k; i++) { 279 f[jx + i] = (double)ipio2[jv + i]; 280 281 for (j = 0, fw = zero; j <= jx; j++) 282 fw += x[j] * f[jx + i - j]; 283 284 q[i] = fw; 285 } 286 287 jz += k; 288 goto recompute; 289 } 290 } 291 292 /* cut out zero terms */ 293 if (z == zero) { 294 jz -= 1; 295 q0 -= 24; 296 297 while (iq[jz] == 0) { 298 jz--; 299 q0 -= 24; 300 } 301 } else { /* break z into 24-bit if neccessary */ 302 z = scalbn(z, -q0); 303 304 if (z >= two24) { 305 fw = (double)((int)(twon24 * z)); 306 iq[jz] = (int)(z - two24 * fw); 307 jz += 1; 308 q0 += 24; 309 iq[jz] = (int)fw; 310 } else { 311 iq[jz] = (int)z; 312 } 313 } 314 315 /* convert integer "bit" chunk to floating-point value */ 316 fw = scalbn(one, q0); 317 318 for (i = jz; i >= 0; i--) { 319 q[i] = fw * (double)iq[i]; 320 fw *= twon24; 321 } 322 323 /* compute pio2[0,...,jp]*q[jz,...,0] */ 324 for (i = jz; i >= 0; i--) { 325 for (fw = zero, k = 0; k <= jp && k <= jz - i; k++) 326 fw += pio2[k] * q[i + k]; 327 328 fq[jz - i] = fw; 329 } 330 331 /* compress fq[] into y[] */ 332 switch (prec) { 333 case 0: 334 fw = zero; 335 336 for (i = jz; i >= 0; i--) 337 fw += fq[i]; 338 339 y[0] = (ih == 0) ? fw : -fw; 340 break; 341 342 case 1: 343 case 2: 344 fw = zero; 345 346 for (i = jz; i >= 0; i--) 347 fw += fq[i]; 348 349 y[0] = (ih == 0) ? fw : -fw; 350 fw = fq[0] - fw; 351 352 for (i = 1; i <= jz; i++) 353 fw += fq[i]; 354 355 y[1] = (ih == 0) ? fw : -fw; 356 break; 357 358 default: 359 360 for (i = jz; i > 0; i--) { 361 fw = fq[i - 1] + fq[i]; 362 fq[i] += fq[i - 1] - fw; 363 fq[i - 1] = fw; 364 } 365 366 for (i = jz; i > 1; i--) { 367 fw = fq[i - 1] + fq[i]; 368 fq[i] += fq[i - 1] - fw; 369 fq[i - 1] = fw; 370 } 371 372 for (fw = zero, i = jz; i >= 2; i--) 373 fw += fq[i]; 374 375 if (ih == 0) { 376 y[0] = fq[0]; 377 y[1] = fq[1]; 378 y[2] = fw; 379 } else { 380 y[0] = -fq[0]; 381 y[1] = -fq[1]; 382 y[2] = -fw; 383 } 384 } 385 386 #if defined(__i386) && !defined(__amd64) 387 (void) __swapRP(rp); 388 #endif 389 return (n & 7); 390 }