1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #ifdef __RESTRICT 31 #define restrict _Restrict 32 #else 33 #define restrict 34 #endif 35 36 extern const double __vlibm_TBL_atan1[]; 37 38 static const double 39 pio4 = 7.8539816339744827900e-01, 40 pio2 = 1.5707963267948965580e+00, 41 pi = 3.1415926535897931160e+00; 42 43 static const float 44 zero = 0.0f, 45 one = 1.0f, 46 q1 = -3.3333333333296428046e-01f, 47 q2 = 1.9999999186853752618e-01f, 48 twop24 = 16777216.0f; 49 50 void 51 __vatan2f( int n, float * restrict y, int stridey, float * restrict x, 52 int stridex, float * restrict z, int stridez ) 53 { 54 float x0, x1, x2, y0, y1, y2, *pz0, *pz1, *pz2; 55 double ah0, ah1, ah2; 56 double t0, t1, t2; 57 double sx0, sx1, sx2; 58 double sign0, sign1, sign2; 59 int i, k0, k1, k2, hx, sx, sy; 60 int hy0, hy1, hy2; 61 float base0, base1, base2; 62 double num0, num1, num2; 63 double den0, den1, den2; 64 double dx0, dx1, dx2; 65 double dy0, dy1, dy2; 66 double db0, db1, db2; 67 68 do 69 { 70 loop0: 71 hy0 = *(int*)y; 72 hx = *(int*)x; 73 sign0 = one; 74 sy = hy0 & 0x80000000; 75 hy0 &= ~0x80000000; 76 77 sx = hx & 0x80000000; 78 hx &= ~0x80000000; 79 80 if ( hy0 > hx ) 81 { 82 x0 = *y; 83 y0 = *x; 84 i = hx; 85 hx = hy0; 86 hy0 = i; 87 if ( sy ) 88 { 89 x0 = -x0; 90 sign0 = -sign0; 91 } 92 if ( sx ) 93 { 94 y0 = -y0; 95 ah0 = pio2; 96 } 97 else 98 { 99 ah0 = -pio2; 100 sign0 = -sign0; 101 } 102 } 103 else 104 { 105 y0 = *y; 106 x0 = *x; 107 if ( sy ) 108 { 109 y0 = -y0; 110 sign0 = -sign0; 111 } 112 if ( sx ) 113 { 114 x0 = -x0; 115 ah0 = -pi; 116 sign0 = -sign0; 117 } 118 else 119 ah0 = zero; 120 } 121 122 if ( hx >= 0x7f800000 || hx - hy0 >= 0x0c800000 ) 123 { 124 if ( hx >= 0x7f800000 ) 125 { 126 if ( hx ^ 0x7f800000 ) /* nan */ 127 ah0 = x0 + y0; 128 else if ( hy0 >= 0x7f800000 ) 129 ah0 += pio4; 130 } 131 else if ( (int) ah0 == 0 ) 132 ah0 = y0 / x0; 133 *z = (sign0 == one) ? ah0 : -ah0; 134 /* sign0*ah0 would change nan behavior relative to previous release */ 135 x += stridex; 136 y += stridey; 137 z += stridez; 138 i = 0; 139 if ( --n <= 0 ) 140 break; 141 goto loop0; 142 } 143 if (hy0 < 0x00800000) { 144 if ( hy0 == 0 ) 145 { 146 *z = sign0 * (float) ah0; 147 x += stridex; 148 y += stridey; 149 z += stridez; 150 i = 0; 151 if ( --n <= 0 ) 152 break; 153 goto loop0; 154 } 155 y0 *= twop24; /* scale subnormal y */ 156 x0 *= twop24; /* scale possibly subnormal x */ 157 hy0 = *(int*)&y0; 158 hx = *(int*)&x0; 159 } 160 pz0 = z; 161 162 k0 = ( hy0 - hx + 0x3f800000 ) & 0xfff80000; 163 if( k0 >= 0x3C800000 ) /* if |x| >= (1/64)... */ 164 { 165 *(int*)&base0 = k0; 166 k0 = (k0 - 0x3C800000) >> 18; /* (index >> 19) << 1) */ 167 k0 += 4; 168 /* skip over 0,0,pi/2,pi/2 */ 169 } 170 else /* |x| < 1/64 */ 171 { 172 k0 = 0; 173 base0 = zero; 174 } 175 176 x += stridex; 177 y += stridey; 178 z += stridez; 179 i = 1; 180 if ( --n <= 0 ) 181 break; 182 183 184 loop1: 185 hy1 = *(int*)y; 186 hx = *(int*)x; 187 sign1 = one; 188 sy = hy1 & 0x80000000; 189 hy1 &= ~0x80000000; 190 191 sx = hx & 0x80000000; 192 hx &= ~0x80000000; 193 194 if ( hy1 > hx ) 195 { 196 x1 = *y; 197 y1 = *x; 198 i = hx; 199 hx = hy1; 200 hy1 = i; 201 if ( sy ) 202 { 203 x1 = -x1; 204 sign1 = -sign1; 205 } 206 if ( sx ) 207 { 208 y1 = -y1; 209 ah1 = pio2; 210 } 211 else 212 { 213 ah1 = -pio2; 214 sign1 = -sign1; 215 } 216 } 217 else 218 { 219 y1 = *y; 220 x1 = *x; 221 if ( sy ) 222 { 223 y1 = -y1; 224 sign1 = -sign1; 225 } 226 if ( sx ) 227 { 228 x1 = -x1; 229 ah1 = -pi; 230 sign1 = -sign1; 231 } 232 else 233 ah1 = zero; 234 } 235 236 if ( hx >= 0x7f800000 || hx - hy1 >= 0x0c800000 ) 237 { 238 if ( hx >= 0x7f800000 ) 239 { 240 if ( hx ^ 0x7f800000 ) /* nan */ 241 ah1 = x1 + y1; 242 else if ( hy1 >= 0x7f800000 ) 243 ah1 += pio4; 244 } 245 else if ( (int) ah1 == 0 ) 246 ah1 = y1 / x1; 247 *z = (sign1 == one)? ah1 : -ah1; 248 x += stridex; 249 y += stridey; 250 z += stridez; 251 i = 1; 252 if ( --n <= 0 ) 253 break; 254 goto loop1; 255 } 256 if (hy1 < 0x00800000) { 257 if ( hy1 == 0 ) 258 { 259 *z = sign1 * (float) ah1; 260 x += stridex; 261 y += stridey; 262 z += stridez; 263 i = 1; 264 if ( --n <= 0 ) 265 break; 266 goto loop1; 267 } 268 y1 *= twop24; /* scale subnormal y */ 269 x1 *= twop24; /* scale possibly subnormal x */ 270 hy1 = *(int*)&y1; 271 hx = *(int*)&x1; 272 } 273 pz1 = z; 274 275 k1 = ( hy1 - hx + 0x3f800000 ) & 0xfff80000; 276 if( k1 >= 0x3C800000 ) /* if |x| >= (1/64)... */ 277 { 278 *(int*)&base1 = k1; 279 k1 = (k1 - 0x3C800000) >> 18; /* (index >> 19) << 1) */ 280 k1 += 4; 281 /* skip over 0,0,pi/2,pi/2 */ 282 } 283 else /* |x| < 1/64 */ 284 { 285 k1 = 0; 286 base1 = zero; 287 } 288 289 x += stridex; 290 y += stridey; 291 z += stridez; 292 i = 2; 293 if ( --n <= 0 ) 294 break; 295 296 loop2: 297 hy2 = *(int*)y; 298 hx = *(int*)x; 299 sign2 = one; 300 sy = hy2 & 0x80000000; 301 hy2 &= ~0x80000000; 302 303 sx = hx & 0x80000000; 304 hx &= ~0x80000000; 305 306 if ( hy2 > hx ) 307 { 308 x2 = *y; 309 y2 = *x; 310 i = hx; 311 hx = hy2; 312 hy2 = i; 313 if ( sy ) 314 { 315 x2 = -x2; 316 sign2 = -sign2; 317 } 318 if ( sx ) 319 { 320 y2 = -y2; 321 ah2 = pio2; 322 } 323 else 324 { 325 ah2 = -pio2; 326 sign2 = -sign2; 327 } 328 } 329 else 330 { 331 y2 = *y; 332 x2 = *x; 333 if ( sy ) 334 { 335 y2 = -y2; 336 sign2 = -sign2; 337 } 338 if ( sx ) 339 { 340 x2 = -x2; 341 ah2 = -pi; 342 sign2 = -sign2; 343 } 344 else 345 ah2 = zero; 346 } 347 348 if ( hx >= 0x7f800000 || hx - hy2 >= 0x0c800000 ) 349 { 350 if ( hx >= 0x7f800000 ) 351 { 352 if ( hx ^ 0x7f800000 ) /* nan */ 353 ah2 = x2 + y2; 354 else if ( hy2 >= 0x7f800000 ) 355 ah2 += pio4; 356 } 357 else if ( (int) ah2 == 0 ) 358 ah2 = y2 / x2; 359 *z = (sign2 == one)? ah2 : -ah2; 360 x += stridex; 361 y += stridey; 362 z += stridez; 363 i = 2; 364 if ( --n <= 0 ) 365 break; 366 goto loop2; 367 } 368 if (hy2 < 0x00800000) { 369 if ( hy2 == 0 ) 370 { 371 *z = sign2 * (float) ah2; 372 x += stridex; 373 y += stridey; 374 z += stridez; 375 i = 2; 376 if ( --n <= 0 ) 377 break; 378 goto loop2; 379 } 380 y2 *= twop24; /* scale subnormal y */ 381 x2 *= twop24; /* scale possibly subnormal x */ 382 hy2 = *(int*)&y2; 383 hx = *(int*)&x2; 384 } 385 386 pz2 = z; 387 388 k2 = ( hy2 - hx + 0x3f800000 ) & 0xfff80000; 389 if( k2 >= 0x3C800000 ) /* if |x| >= (1/64)... */ 390 { 391 *(int*)&base2 = k2; 392 k2 = (k2 - 0x3C800000) >> 18; /* (index >> 19) << 1) */ 393 k2 += 4; 394 /* skip over 0,0,pi/2,pi/2 */ 395 } 396 else /* |x| < 1/64 */ 397 { 398 k2 = 0; 399 base2 = zero; 400 } 401 402 goto endloop; 403 404 endloop: 405 406 ah2 += __vlibm_TBL_atan1[k2]; 407 ah1 += __vlibm_TBL_atan1[k1]; 408 ah0 += __vlibm_TBL_atan1[k0]; 409 410 db2 = base2; 411 db1 = base1; 412 db0 = base0; 413 dy2 = y2; 414 dy1 = y1; 415 dy0 = y0; 416 dx2 = x2; 417 dx1 = x1; 418 dx0 = x0; 419 420 num2 = dy2 - dx2 * db2; 421 den2 = dx2 + dy2 * db2; 422 423 num1 = dy1 - dx1 * db1; 424 den1 = dx1 + dy1 * db1; 425 426 num0 = dy0 - dx0 * db0; 427 den0 = dx0 + dy0 * db0; 428 429 t2 = num2 / den2; 430 t1 = num1 / den1; 431 t0 = num0 / den0; 432 433 sx2 = t2 * t2; 434 sx1 = t1 * t1; 435 sx0 = t0 * t0; 436 437 t2 += t2 * sx2 * ( q1 + sx2 * q2 ); 438 t1 += t1 * sx1 * ( q1 + sx1 * q2 ); 439 t0 += t0 * sx0 * ( q1 + sx0 * q2 ); 440 441 t2 += ah2; 442 t1 += ah1; 443 t0 += ah0; 444 445 *pz2 = sign2 * t2; 446 *pz1 = sign1 * t1; 447 *pz0 = sign0 * t0; 448 449 x += stridex; 450 y += stridey; 451 z += stridez; 452 i = 0; 453 } while ( --n > 0 ); 454 455 if ( i > 1 ) 456 { 457 ah1 += __vlibm_TBL_atan1[k1]; 458 t1 = ( y1 - x1 * (double)base1 ) / 459 ( x1 + y1 * (double)base1 ); 460 sx1 = t1 * t1; 461 t1 += t1 * sx1 * ( q1 + sx1 * q2 ); 462 t1 += ah1; 463 *pz1 = sign1 * t1; 464 } 465 466 if ( i > 0 ) 467 { 468 ah0 += __vlibm_TBL_atan1[k0]; 469 t0 = ( y0 - x0 * (double)base0 ) / 470 ( x0 + y0 * (double)base0 ); 471 sx0 = t0 * t0; 472 t0 += t0 * sx0 * ( q1 + sx0 * q2 ); 473 t0 += ah0; 474 *pz0 = sign0 * t0; 475 } 476 }