Print this page
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libmvec/common/__vatan2f.c
+++ new/usr/src/lib/libmvec/common/__vatan2f.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 27 * Use is subject to license terms.
28 28 */
29 29
30 30 #ifdef __RESTRICT
31 31 #define restrict _Restrict
32 32 #else
33 33 #define restrict
34 34 #endif
35 35
36 36 extern const double __vlibm_TBL_atan1[];
37 37
38 38 static const double
39 39 pio4 = 7.8539816339744827900e-01,
40 40 pio2 = 1.5707963267948965580e+00,
41 41 pi = 3.1415926535897931160e+00;
42 42
43 43 static const float
↓ open down ↓ |
43 lines elided |
↑ open up ↑ |
44 44 zero = 0.0f,
45 45 one = 1.0f,
46 46 q1 = -3.3333333333296428046e-01f,
47 47 q2 = 1.9999999186853752618e-01f,
48 48 twop24 = 16777216.0f;
49 49
50 50 void
51 51 __vatan2f( int n, float * restrict y, int stridey, float * restrict x,
52 52 int stridex, float * restrict z, int stridez )
53 53 {
54 - float x0, x1, x2, y0, y1, y2, *pz0, *pz1, *pz2;
54 + float x0, x1, x2, y0, y1, y2, *pz0 = 0, *pz1, *pz2;
55 55 double ah0, ah1, ah2;
56 56 double t0, t1, t2;
57 57 double sx0, sx1, sx2;
58 58 double sign0, sign1, sign2;
59 - int i, k0, k1, k2, hx, sx, sy;
59 + int i, k0 = 0, k1, k2, hx, sx, sy;
60 60 int hy0, hy1, hy2;
61 - float base0, base1, base2;
61 + float base0 = 0.0, base1, base2;
62 62 double num0, num1, num2;
63 63 double den0, den1, den2;
64 64 double dx0, dx1, dx2;
65 65 double dy0, dy1, dy2;
66 - double db0, db1, db2;
66 + double db0, db1, db2;
67 67
68 68 do
69 69 {
70 70 loop0:
71 71 hy0 = *(int*)y;
72 72 hx = *(int*)x;
73 73 sign0 = one;
74 74 sy = hy0 & 0x80000000;
75 75 hy0 &= ~0x80000000;
76 76
77 77 sx = hx & 0x80000000;
78 78 hx &= ~0x80000000;
79 79
80 80 if ( hy0 > hx )
81 81 {
82 82 x0 = *y;
83 83 y0 = *x;
84 84 i = hx;
85 85 hx = hy0;
86 86 hy0 = i;
87 87 if ( sy )
88 88 {
89 89 x0 = -x0;
90 90 sign0 = -sign0;
91 91 }
92 92 if ( sx )
93 93 {
94 94 y0 = -y0;
95 95 ah0 = pio2;
96 96 }
97 97 else
98 98 {
99 99 ah0 = -pio2;
100 100 sign0 = -sign0;
101 101 }
102 102 }
103 103 else
104 104 {
105 105 y0 = *y;
106 106 x0 = *x;
107 107 if ( sy )
108 108 {
109 109 y0 = -y0;
110 110 sign0 = -sign0;
111 111 }
112 112 if ( sx )
113 113 {
114 114 x0 = -x0;
115 115 ah0 = -pi;
116 116 sign0 = -sign0;
117 117 }
118 118 else
119 119 ah0 = zero;
120 120 }
121 121
122 122 if ( hx >= 0x7f800000 || hx - hy0 >= 0x0c800000 )
123 123 {
124 124 if ( hx >= 0x7f800000 )
125 125 {
126 126 if ( hx ^ 0x7f800000 ) /* nan */
127 127 ah0 = x0 + y0;
128 128 else if ( hy0 >= 0x7f800000 )
129 129 ah0 += pio4;
130 130 }
131 131 else if ( (int) ah0 == 0 )
132 132 ah0 = y0 / x0;
133 133 *z = (sign0 == one) ? ah0 : -ah0;
134 134 /* sign0*ah0 would change nan behavior relative to previous release */
135 135 x += stridex;
136 136 y += stridey;
137 137 z += stridez;
138 138 i = 0;
139 139 if ( --n <= 0 )
140 140 break;
141 141 goto loop0;
142 142 }
143 143 if (hy0 < 0x00800000) {
144 144 if ( hy0 == 0 )
145 145 {
146 146 *z = sign0 * (float) ah0;
147 147 x += stridex;
148 148 y += stridey;
149 149 z += stridez;
150 150 i = 0;
151 151 if ( --n <= 0 )
152 152 break;
153 153 goto loop0;
154 154 }
155 155 y0 *= twop24; /* scale subnormal y */
156 156 x0 *= twop24; /* scale possibly subnormal x */
157 157 hy0 = *(int*)&y0;
158 158 hx = *(int*)&x0;
159 159 }
160 160 pz0 = z;
161 161
162 162 k0 = ( hy0 - hx + 0x3f800000 ) & 0xfff80000;
163 163 if( k0 >= 0x3C800000 ) /* if |x| >= (1/64)... */
164 164 {
165 165 *(int*)&base0 = k0;
166 166 k0 = (k0 - 0x3C800000) >> 18; /* (index >> 19) << 1) */
167 167 k0 += 4;
168 168 /* skip over 0,0,pi/2,pi/2 */
169 169 }
170 170 else /* |x| < 1/64 */
171 171 {
172 172 k0 = 0;
173 173 base0 = zero;
174 174 }
175 175
176 176 x += stridex;
177 177 y += stridey;
178 178 z += stridez;
179 179 i = 1;
180 180 if ( --n <= 0 )
181 181 break;
182 182
183 183
184 184 loop1:
185 185 hy1 = *(int*)y;
186 186 hx = *(int*)x;
187 187 sign1 = one;
188 188 sy = hy1 & 0x80000000;
189 189 hy1 &= ~0x80000000;
190 190
191 191 sx = hx & 0x80000000;
192 192 hx &= ~0x80000000;
193 193
194 194 if ( hy1 > hx )
195 195 {
196 196 x1 = *y;
197 197 y1 = *x;
198 198 i = hx;
199 199 hx = hy1;
200 200 hy1 = i;
201 201 if ( sy )
202 202 {
203 203 x1 = -x1;
204 204 sign1 = -sign1;
205 205 }
206 206 if ( sx )
207 207 {
208 208 y1 = -y1;
209 209 ah1 = pio2;
210 210 }
211 211 else
212 212 {
213 213 ah1 = -pio2;
214 214 sign1 = -sign1;
215 215 }
216 216 }
217 217 else
218 218 {
219 219 y1 = *y;
220 220 x1 = *x;
221 221 if ( sy )
222 222 {
223 223 y1 = -y1;
224 224 sign1 = -sign1;
225 225 }
226 226 if ( sx )
227 227 {
228 228 x1 = -x1;
229 229 ah1 = -pi;
230 230 sign1 = -sign1;
231 231 }
232 232 else
233 233 ah1 = zero;
234 234 }
235 235
236 236 if ( hx >= 0x7f800000 || hx - hy1 >= 0x0c800000 )
237 237 {
238 238 if ( hx >= 0x7f800000 )
239 239 {
240 240 if ( hx ^ 0x7f800000 ) /* nan */
241 241 ah1 = x1 + y1;
242 242 else if ( hy1 >= 0x7f800000 )
243 243 ah1 += pio4;
244 244 }
245 245 else if ( (int) ah1 == 0 )
246 246 ah1 = y1 / x1;
247 247 *z = (sign1 == one)? ah1 : -ah1;
248 248 x += stridex;
249 249 y += stridey;
250 250 z += stridez;
251 251 i = 1;
252 252 if ( --n <= 0 )
253 253 break;
254 254 goto loop1;
255 255 }
256 256 if (hy1 < 0x00800000) {
257 257 if ( hy1 == 0 )
258 258 {
259 259 *z = sign1 * (float) ah1;
260 260 x += stridex;
261 261 y += stridey;
262 262 z += stridez;
263 263 i = 1;
264 264 if ( --n <= 0 )
265 265 break;
266 266 goto loop1;
267 267 }
268 268 y1 *= twop24; /* scale subnormal y */
269 269 x1 *= twop24; /* scale possibly subnormal x */
270 270 hy1 = *(int*)&y1;
271 271 hx = *(int*)&x1;
272 272 }
273 273 pz1 = z;
274 274
275 275 k1 = ( hy1 - hx + 0x3f800000 ) & 0xfff80000;
276 276 if( k1 >= 0x3C800000 ) /* if |x| >= (1/64)... */
277 277 {
278 278 *(int*)&base1 = k1;
279 279 k1 = (k1 - 0x3C800000) >> 18; /* (index >> 19) << 1) */
280 280 k1 += 4;
281 281 /* skip over 0,0,pi/2,pi/2 */
282 282 }
283 283 else /* |x| < 1/64 */
284 284 {
285 285 k1 = 0;
286 286 base1 = zero;
287 287 }
288 288
289 289 x += stridex;
290 290 y += stridey;
291 291 z += stridez;
292 292 i = 2;
293 293 if ( --n <= 0 )
294 294 break;
295 295
296 296 loop2:
297 297 hy2 = *(int*)y;
298 298 hx = *(int*)x;
299 299 sign2 = one;
300 300 sy = hy2 & 0x80000000;
301 301 hy2 &= ~0x80000000;
302 302
303 303 sx = hx & 0x80000000;
304 304 hx &= ~0x80000000;
305 305
306 306 if ( hy2 > hx )
307 307 {
308 308 x2 = *y;
309 309 y2 = *x;
310 310 i = hx;
311 311 hx = hy2;
312 312 hy2 = i;
313 313 if ( sy )
314 314 {
315 315 x2 = -x2;
316 316 sign2 = -sign2;
317 317 }
318 318 if ( sx )
319 319 {
320 320 y2 = -y2;
321 321 ah2 = pio2;
322 322 }
323 323 else
324 324 {
325 325 ah2 = -pio2;
326 326 sign2 = -sign2;
327 327 }
328 328 }
329 329 else
330 330 {
331 331 y2 = *y;
332 332 x2 = *x;
333 333 if ( sy )
334 334 {
335 335 y2 = -y2;
336 336 sign2 = -sign2;
337 337 }
338 338 if ( sx )
339 339 {
340 340 x2 = -x2;
341 341 ah2 = -pi;
342 342 sign2 = -sign2;
343 343 }
344 344 else
345 345 ah2 = zero;
346 346 }
347 347
348 348 if ( hx >= 0x7f800000 || hx - hy2 >= 0x0c800000 )
349 349 {
350 350 if ( hx >= 0x7f800000 )
351 351 {
352 352 if ( hx ^ 0x7f800000 ) /* nan */
353 353 ah2 = x2 + y2;
354 354 else if ( hy2 >= 0x7f800000 )
355 355 ah2 += pio4;
356 356 }
357 357 else if ( (int) ah2 == 0 )
358 358 ah2 = y2 / x2;
359 359 *z = (sign2 == one)? ah2 : -ah2;
360 360 x += stridex;
361 361 y += stridey;
362 362 z += stridez;
363 363 i = 2;
364 364 if ( --n <= 0 )
365 365 break;
366 366 goto loop2;
367 367 }
368 368 if (hy2 < 0x00800000) {
369 369 if ( hy2 == 0 )
370 370 {
371 371 *z = sign2 * (float) ah2;
372 372 x += stridex;
373 373 y += stridey;
374 374 z += stridez;
375 375 i = 2;
376 376 if ( --n <= 0 )
377 377 break;
378 378 goto loop2;
379 379 }
380 380 y2 *= twop24; /* scale subnormal y */
381 381 x2 *= twop24; /* scale possibly subnormal x */
382 382 hy2 = *(int*)&y2;
383 383 hx = *(int*)&x2;
384 384 }
385 385
386 386 pz2 = z;
387 387
388 388 k2 = ( hy2 - hx + 0x3f800000 ) & 0xfff80000;
389 389 if( k2 >= 0x3C800000 ) /* if |x| >= (1/64)... */
390 390 {
391 391 *(int*)&base2 = k2;
392 392 k2 = (k2 - 0x3C800000) >> 18; /* (index >> 19) << 1) */
393 393 k2 += 4;
394 394 /* skip over 0,0,pi/2,pi/2 */
395 395 }
396 396 else /* |x| < 1/64 */
397 397 {
398 398 k2 = 0;
399 399 base2 = zero;
400 400 }
401 401
402 402 goto endloop;
403 403
404 404 endloop:
405 405
406 406 ah2 += __vlibm_TBL_atan1[k2];
407 407 ah1 += __vlibm_TBL_atan1[k1];
408 408 ah0 += __vlibm_TBL_atan1[k0];
409 409
410 410 db2 = base2;
411 411 db1 = base1;
412 412 db0 = base0;
413 413 dy2 = y2;
414 414 dy1 = y1;
415 415 dy0 = y0;
416 416 dx2 = x2;
417 417 dx1 = x1;
418 418 dx0 = x0;
419 419
420 420 num2 = dy2 - dx2 * db2;
421 421 den2 = dx2 + dy2 * db2;
422 422
423 423 num1 = dy1 - dx1 * db1;
424 424 den1 = dx1 + dy1 * db1;
425 425
426 426 num0 = dy0 - dx0 * db0;
427 427 den0 = dx0 + dy0 * db0;
428 428
429 429 t2 = num2 / den2;
430 430 t1 = num1 / den1;
431 431 t0 = num0 / den0;
432 432
433 433 sx2 = t2 * t2;
434 434 sx1 = t1 * t1;
435 435 sx0 = t0 * t0;
436 436
437 437 t2 += t2 * sx2 * ( q1 + sx2 * q2 );
438 438 t1 += t1 * sx1 * ( q1 + sx1 * q2 );
439 439 t0 += t0 * sx0 * ( q1 + sx0 * q2 );
440 440
441 441 t2 += ah2;
442 442 t1 += ah1;
443 443 t0 += ah0;
444 444
445 445 *pz2 = sign2 * t2;
446 446 *pz1 = sign1 * t1;
447 447 *pz0 = sign0 * t0;
448 448
449 449 x += stridex;
450 450 y += stridey;
451 451 z += stridez;
452 452 i = 0;
453 453 } while ( --n > 0 );
454 454
455 455 if ( i > 1 )
456 456 {
457 457 ah1 += __vlibm_TBL_atan1[k1];
458 458 t1 = ( y1 - x1 * (double)base1 ) /
459 459 ( x1 + y1 * (double)base1 );
460 460 sx1 = t1 * t1;
461 461 t1 += t1 * sx1 * ( q1 + sx1 * q2 );
462 462 t1 += ah1;
463 463 *pz1 = sign1 * t1;
464 464 }
465 465
466 466 if ( i > 0 )
467 467 {
468 468 ah0 += __vlibm_TBL_atan1[k0];
469 469 t0 = ( y0 - x0 * (double)base0 ) /
470 470 ( x0 + y0 * (double)base0 );
471 471 sx0 = t0 * t0;
472 472 t0 += t0 * sx0 * ( q1 + sx0 * q2 );
473 473 t0 += ah0;
474 474 *pz0 = sign0 * t0;
475 475 }
476 476 }
↓ open down ↓ |
400 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX