1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
23 */
24 /*
25 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
27 */
28
29 #include "libm.h" /* __k_atan2 */
30 #include "complex_wrapper.h"
31
32 /*
33 * double __k_atan2(double y, double x, double *e)
34 *
35 * Compute atan2 with error terms.
36 *
37 * Important formula:
38 * 3 5
39 * x x
40 * atan(x) = x - ----- + ----- - ... (for x <= 1)
41 * 3 5
42 *
43 * pi 1 1
58 */
59
60 /*
61 * (void) mx_poly (double *z, double *a, double *e, int n)
62 * return
63 * e = a + z*(a + z*(a + ... z*(a + e)...))
64 * 0 2 4 2n
65 * Note:
66 * 1. e and coefficient ai are represented by two double numbers.
67 * For e, the first one contain the leading 24 bits rounded, and the
68 * second one contain the remaining 53 bits (total 77 bits accuracy).
69 * For ai, the first one contian the leading 53 bits rounded, and the
70 * second is the remaining 53 bits (total 106 bits accuracy).
71 * 2. z is an array of three doubles.
72 * z[0] : the rounded value of Z (the intended value of z)
73 * z[1] : the leading 24 bits of Z rounded
74 * z[2] : the remaining 53 bits of Z
75 * Note that z[0] = z[1]+z[2] rounded.
76 *
77 */
78
79 static void
80 mx_poly(const double *z, const double *a, double *e, int n) {
81 double r, s, t, p_h, p_l, z_h, z_l, p;
82 int i;
83
84 n = n + n;
85 p = e[0] + a[n];
86 p_l = a[n + 1];
87 p_h = (double) ((float) p);
88 p = a[n - 2] + z[0] * p;
89 z_h = z[1]; z_l = z[2];
90 p_l += e[0] - (p_h - a[n]);
91
92 for (i = n - 2; i >= 2; i -= 2) {
93 /* compute p = ai + z * p */
94 t = z_h * p_h;
95 s = z[0] * p_l + p_h * z_l;
96 p_h = (double) ((float) p);
97 s += a[i + 1];
98 r = t - (p_h - a[i]);
99 p = a[i - 2] + z[0] * p;
100 p_l = r + s;
101 }
102 e[0] = (double)((float) p);
103 t = z_h * p_h;
104 s = z[0] * p_l + p_h * z_l;
105 r = t - (e[0] - a[0]);
106 e[1] = r + s;
107 }
108
109 /*
110 * Table of constants for atan from 0.125 to 8
111 * 0.125 -- 0x3fc00000 --- (increment at bit 16)
112 * 0x3fc10000
113 * 0x3fc20000
114 * ... ...
115 * 0x401f0000
116 * 8.000 -- 0x40200000 (total: 97)
117 * By K.C. Ng, March 9, 1989
118 */
119
120 static const double TBL_atan_hi[] = {
121 1.243549945467614382e-01, 1.320397616146387620e-01, 1.397088742891636204e-01,
122 1.473614810886516302e-01, 1.549967419239409727e-01, 1.626138285979485676e-01,
123 1.702119252854744080e-01, 1.777902289926760471e-01, 1.853479499956947607e-01,
124 1.928843122579746439e-01, 2.003985538258785115e-01, 2.078899272022629863e-01,
125 2.153576996977380476e-01, 2.228011537593945213e-01, 2.302195872768437179e-01,
126 2.376123138654712419e-01, 2.449786631268641435e-01, 2.596296294082575118e-01,
127 2.741674511196587893e-01, 2.885873618940774099e-01, 3.028848683749714166e-01,
128 3.170557532091470287e-01, 3.310960767041321029e-01, 3.450021772071051318e-01,
129 3.587706702705721895e-01, 3.723984466767542023e-01, 3.858826693980737521e-01,
130 3.992207695752525431e-01, 4.124104415973872673e-01, 4.254496373700422662e-01,
131 4.383365598579578304e-01, 4.510696559885234436e-01, 4.636476090008060935e-01,
132 4.883339510564055352e-01, 5.123894603107377321e-01, 5.358112379604637043e-01,
133 5.585993153435624414e-01, 5.807563535676704136e-01, 6.022873461349641522e-01,
134 6.231993299340659043e-01, 6.435011087932843710e-01, 6.632029927060932861e-01,
135 6.823165548747480713e-01, 7.008544078844501923e-01, 7.188299996216245269e-01,
136 7.362574289814280970e-01, 7.531512809621944138e-01, 7.695264804056582975e-01,
137 7.853981633974482790e-01, 8.156919233162234217e-01, 8.441539861131710509e-01,
138 8.709034570756529758e-01, 8.960553845713439269e-01, 9.197196053504168578e-01,
139 9.420000403794636101e-01, 9.629943306809362058e-01, 9.827937232473290541e-01,
140 1.001483135694234639e+00, 1.019141344266349725e+00, 1.035841253008800145e+00,
141 1.051650212548373764e+00, 1.066630365315743623e+00, 1.080839000541168327e+00,
142 1.094328907321189925e+00, 1.107148717794090409e+00, 1.130953743979160375e+00,
143 1.152571997215667610e+00, 1.172273881128476303e+00, 1.190289949682531656e+00,
144 1.206817370285252489e+00, 1.222025323210989667e+00, 1.236059489478081863e+00,
145 1.249045772398254428e+00, 1.261093382252440387e+00, 1.272297395208717319e+00,
146 1.282740879744270757e+00, 1.292496667789785336e+00, 1.301628834009196156e+00,
147 1.310193935047555547e+00, 1.318242051016837113e+00, 1.325817663668032553e+00,
148 1.339705659598999565e+00, 1.352127380920954636e+00, 1.363300100359693845e+00,
149 1.373400766945015894e+00, 1.382574821490125894e+00, 1.390942827002418447e+00,
150 1.398605512271957618e+00, 1.405647649380269870e+00, 1.412141064608495311e+00,
151 1.418146998399631542e+00, 1.423717971406494032e+00, 1.428899272190732761e+00,
152 1.433730152484709031e+00, 1.438244794498222623e+00, 1.442473099109101931e+00,
153 1.446441332248135092e+00,
154 };
155
156 static const double TBL_atan_lo[] = {
157 -3.125324142453938311e-18, -1.276925400709959526e-17, 2.479758919089733066e-17,
158 5.409599147666297957e-18, 9.585415594114323829e-18, 7.784470643106252464e-18,
159 -3.541164079802125137e-18, 2.372599351477449041e-17, 4.180692268843078977e-18,
160 2.034098543938166622e-17, 3.139954287184449286e-18, 7.333160666520898500e-18,
161 4.738160130078732886e-19, -5.498822172446843173e-18, 1.231340452914270316e-17,
162 1.058231431371112987e-17, 1.069875561873445139e-17, 1.923875492461530410e-17,
163 8.261353575163771936e-18, -1.428369957377257085e-17, -1.101082790300136900e-17,
164 -1.893928924292642146e-17, -7.952610375793798701e-18, -2.293880475557830393e-17,
165 3.088733564861919217e-17, 1.961231150484565340e-17, 2.378822732491940868e-17,
166 2.246598105617042065e-17, 3.963462895355093301e-17, 2.331553074189288466e-17,
167 -2.494277030626540909e-17, 3.280735600183735558e-17, 2.269877745296168709e-17,
168 -1.137323618932958456e-17, -2.546278147285580353e-17, -4.063795683482557497e-18,
169 -5.455630548591626394e-18, -1.441464378193066908e-17, 2.950430737228402307e-17,
170 2.672403885140095079e-17, 1.583478505144428617e-17, -3.076054864429649001e-17,
171 6.943223671560007740e-18, -1.987626234335816123e-17, -2.147838844445698302e-17,
172 3.473937648299456719e-17, -2.425693465918206812e-17, -3.704991905602721293e-17,
173 3.061616997868383018e-17, -1.071456562778743077e-17, -4.841337011934916763e-17,
174 -2.269823590747287052e-17, 2.923876285774304890e-17, -4.057439412852767923e-17,
175 5.460837485846687627e-17, -3.986660595210752445e-18, 1.390331103123099845e-17,
176 9.438308023545392000e-17, 1.000401886936679889e-17, 3.194313981784503706e-17,
177 -9.650564731467513515e-17, -5.956589637160374564e-17, -1.567632251135907253e-17,
178 -5.490676155022364226e-18, 9.404471373566379412e-17, 7.123833804538446299e-17,
179 -9.159738508900378819e-17, 8.385188614028674371e-17, 7.683333629842068806e-17,
180 4.172467638861439118e-17, -2.979162864892849274e-17, 7.879752739459421280e-17,
181 -2.196203799612310905e-18, 3.242139621534960503e-17, 2.245875015034507026e-17,
182 -9.283188754266129476e-18, -6.830804768926660334e-17, -1.236918499824626670e-17,
183 8.745413734780278834e-17, -6.319394031144676258e-17, -8.824429373951136321e-17,
184 -2.599011860304134377e-17, 2.147674250751150961e-17, 1.093246171526936217e-16,
185 -3.307710355769516504e-17, -3.561490438648230100e-17, -9.843712133488842595e-17,
186 -2.324061182591627982e-17, -8.922630138234492386e-17, -9.573807110557223276e-17,
187 -8.263883782511013632e-17, 8.721870922223967507e-17, -6.457134743238754385e-17,
188 -4.396204466767636187e-17, -2.493019910264565554e-17, -1.105119435430315713e-16,
189 9.211323971545051565e-17,
190 };
191
192 /*
193 * mx_atan(x,err)
194 * Table look-up algorithm
195 * By K.C. Ng, March 9, 1989
196 *
197 * Algorithm.
198 *
199 * The algorithm is based on atan(x)=atan(y)+atan((x-y)/(1+x*y)).
200 * We use poly1(x) to approximate atan(x) for x in [0,1/8] with
201 * error (relative)
202 * |(atan(x)-poly1(x))/x|<= 2^-83.41
203 *
204 * and use poly2(x) to approximate atan(x) for x in [0,1/65] with
205 * error
206 * |atan(x)-poly2(x)|<= 2^-86.8
207 *
208 * Here poly1 and poly2 are odd polynomial with the following form:
209 * x + x^3*(a1+x^2*(a2+...))
225 * If iy is the high word of y, then
226 * single : j = (iy - 0x3e000000) >> 19
227 * double : j = (iy - 0x3fc00000) >> 16
228 * quad : j = (iy - 0x3ffc0000) >> 12
229 *
230 * Let s = (x-y)/(1+x*y). Then
231 * atan(x) = atan(y) + poly1(s)
232 * = _TBL_atan_hi[j] + (_TBL_atan_lo[j] + poly2(s) )
233 *
234 * Note. |s| <= 1.5384615385e-02 = 1/65. Maxium occurs at x = 1.03125
235 *
236 */
237
238 #define P1 p[2]
239 #define P4 p[8]
240 #define P5 p[9]
241 #define P6 p[10]
242 #define P7 p[11]
243 #define P8 p[12]
244 #define P9 p[13]
245 static const double p[] = {
246 1.0,
247 0.0,
248 -3.33333333333333314830e-01, /* p1 = BFD55555 55555555 */
249 -1.85030852238476921863e-17, /* p1_l = BC755525 9783A49C */
250 2.00000000000000011102e-01, /* p2 = 3FC99999 9999999A */
251 -1.27263196576150347368e-17, /* p2_l = BC6D584B 0D874007 */
252 -1.42857142857141405923e-01, /* p3 = BFC24924 9249245E */
253 -1.34258204847170493327e-17, /* p3_l = BC6EF534 A112500D */
254 1.11111111110486909803e-01, /* p4 = 3FBC71C7 1C71176A */
255 -9.09090907557387889470e-02, /* p5 = BFB745D1 73B47A7D */
256 7.69230541541713053189e-02, /* p6 = 3FB3B13A B1E68DE6 */
257 -6.66645815401964159097e-02, /* p7 = BFB110EE 1584446A */
258 5.87081768778560317279e-02, /* p8 = 3FAE0EFF 87657733 */
259 -4.90818147456113240690e-02, /* p9 = BFA92140 6A524B5C */
260 };
261 #define Q1 q[2]
262 #define Q3 q[6]
263 #define Q4 q[7]
264 #define Q5 q[8]
265 static const double q[] = {
266 1.0,
267 0.0,
268 -3.33333333333333314830e-01, /* q1 = BFD55555 55555555 */
269 -1.85022941571278638733e-17, /* q1_l = BC7554E9 D20EFA66 */
270 1.99999999999999927836e-01, /* q2 = 3FC99999 99999997 */
271 -1.28782564407438833398e-17, /* q2_l = BC6DB1FB 17217417 */
272 -1.42857142855492280642e-01, /* q3 = BFC24924 92483C46 */
273 1.11111097130183356096e-01, /* q4 = 3FBC71C6 E06595CC */
274 -9.08553303569109294013e-02, /* q5 = BFB7424B 808CDA76 */
275 };
276 static const double
277 one = 1.0,
278 pio2hi = 1.570796326794896558e+00,
279 pio2lo = 6.123233995736765886e-17;
280
281 static double
282 mx_atan(double x, double *err) {
283 double y, z, r, s, t, w, s_h, s_l, x_h, x_l, zz[3], ee[2], z_h,
284 z_l, r_h, r_l, u, v;
285 int ix, iy, sign, j;
286
287 ix = ((int *) &x)[HIWORD];
288 sign = ix & 0x80000000;
289 ix ^= sign;
290
291 /* for |x| < 1/8 */
292 if (ix < 0x3fc00000) {
293 if (ix < 0x3f300000) { /* when |x| < 2**-12 */
294 if (ix < 0x3d800000) { /* if |x| < 2**-39 */
295 *err = (double) ((int) x);
296 return (x);
297 }
298 z = x * x;
299 t = x * z * (q[2] + z * (q[4] + z * q[6]));
300 r = x + t;
301 *err = t - (r - x);
302 return (r);
303 }
304 z = x * x;
305
306 /* use double precision at p4 and on */
307 ee[0] = z *
308 (P4 + z *
309 (P5 + z * (P6 + z * (P7 + z * (P8 + z * P9)))));
310
311 x_h = (double) ((float) x);
312 z_h = (double) ((float) z);
313 x_l = x - x_h;
314 z_l = (x_h * x_h - z_h);
315 zz[0] = z;
316 zz[1] = z_h;
317 zz[2] = z_l + x_l * (x + x_h);
318
319 /*
320 * compute (1+z*(p1+z*(p2+z*(p3+e)))) by call
321 * mx_poly
322 */
323
324 mx_poly(zz, p, ee, 3);
325
326 /* finally x*(1+z*(p1+...)) */
327 r = x_h * ee[0];
328 t = x * ee[1] + x_l * ee[0];
329 s = t + r;
330 *err = t - (s - r);
331 return (s);
332 }
333 /* for |x| >= 8.0 */
334 if (ix >= 0x40200000) { /* x >= 8 */
335 x = fabs(x);
336 if (ix >= 0x42600000) { /* x >= 2**39 */
337 if (ix >= 0x44c00000) { /* x >= 2**77 */
338 y = -pio2lo;
339 } else
340 y = one / x - pio2lo;
341 if (sign == 0) {
342 t = pio2hi - y;
343 *err = -(y - (pio2hi - t));
344 } else {
345 t = y - pio2hi;
346 *err = y - (pio2hi + t);
347 }
348 return (t);
349 } else {
350 /* compute r = 1/x */
351 r = one / x;
352 z = r * r;
353 if (ix < 0x40504000) { /* 8 < x < 65 */
354
355 /* use double precision at p4 and on */
356 ee[0] = z *
357 (P4 + z *
358 (P5 + z *
359 (P6 + z * (P7 + z * (P8 + z * P9)))));
360 x_h = (double) ((float) x);
361 r_h = (double) ((float) r);
362 z_h = (double) ((float) z);
363 r_l = r * ((x_h - x) * r_h - (x_h * r_h - one));
364 z_l = (r_h * r_h - z_h);
365 zz[0] = z;
366 zz[1] = z_h;
367 zz[2] = z_l + r_l * (r + r_h);
368 /*
369 * compute (1+z*(p1+z*(p2+z*(p3+e)))) by call
370 * mx_poly
371 */
372 mx_poly(zz, p, ee, 3);
373 } else { /* x < 65 < 2**39 */
374 /* use double precision at q3 and on */
375 ee[0] = z * (Q3 + z * (Q4 + z * Q5));
376 x_h = (double) ((float) x);
377 r_h = (double) ((float) r);
378 z_h = (double) ((float) z);
379 r_l = r * ((x_h - x) * r_h - (x_h * r_h - one));
380 z_l = (r_h * r_h - z_h);
381 zz[0] = z;
382 zz[1] = z_h;
383 zz[2] = z_l + r_l * (r + r_h);
384 /*
385 * compute (1+z*(q1+z*(q2+e))) by call
386 * mx_poly
387 */
388 mx_poly(zz, q, ee, 2);
389 }
390 /* pio2 - r*(1+...) */
391 v = r_h * ee[0];
392 t = pio2lo - (r * ee[1] + r_l * ee[0]);
393 if (sign == 0) {
394 s = pio2hi - v;
395 t -= (v - (pio2hi - s));
396 } else {
397 s = v - pio2hi;
398 t = -(t - (v - (s + pio2hi)));
399 }
400 w = s + t;
401 *err = t - (w - s);
402 return (w);
403 }
404 }
405 /* now x is between 1/8 and 8 */
406 ((int *) &x)[HIWORD] = ix;
407 iy = (ix + 0x00008000) & 0x7fff0000;
408 ((int *) &y)[HIWORD] = iy;
409 ((int *) &y)[LOWORD] = 0;
410 j = (iy - 0x3fc00000) >> 16;
411
412 w = (x - y);
413 v = 1 / (one + x * y);
414 s = w * v;
415 z = s * s;
416 /* use double precision at q3 and on */
417 ee[0] = z * (Q3 + z * (Q4 + z * Q5));
418 s_h = (double) ((float) s);
419 z_h = (double) ((float) z);
420 x_h = (double) ((float) x);
421 t = (double) ((float) (one + x * y));
422 r = -((x_h - x) * y - (x_h * y - (t - one)));
423 s_l = -v * (s_h * r - (w - s_h * t));
424 z_l = (s_h * s_h - z_h);
425 zz[0] = z;
426 zz[1] = z_h;
427 zz[2] = z_l + s_l * (s + s_h);
428 /* compute (1+z*(q1+z*(q2+e))) by call mx_poly */
429 mx_poly(zz, q, ee, 2);
430 v = s_h * ee[0];
431 t = TBL_atan_lo[j] + (s * ee[1] + s_l * ee[0]);
432 u = TBL_atan_hi[j];
433 s = u + v;
434 t += (v - (s - u));
435 w = s + t;
436 *err = t - (w - s);
437 if (sign != 0) {
438 w = -w;
439 *err = -*err;
440 }
441 return (w);
442 }
443
444 static const double
445 twom768 = 6.441148769597133308e-232, /* 2^-768 */
446 two768 = 1.552518092300708935e+231, /* 2^768 */
447 pi = 3.1415926535897931159979634685,
448 pi_lo = 1.224646799147353177e-16,
449 pio2 = 1.570796326794896558e+00,
450 pio2_lo = 6.123233995736765886e-17,
451 pio4 = 0.78539816339744827899949,
452 pio4_lo = 3.061616997868382943e-17,
453 pi3o4 = 2.356194490192344836998,
454 pi3o4_lo = 9.184850993605148829195e-17;
455
456 double
457 __k_atan2(double y, double x, double *w) {
458 double t, xh, th, t1, t2, w1, w2;
459 int ix, iy, hx, hy, lx, ly;
460
461 hy = ((int *) &y)[HIWORD];
462 ly = ((int *) &y)[LOWORD];
463 iy = hy & ~0x80000000;
464
465 hx = ((int *) &x)[HIWORD];
466 lx = ((int *) &x)[LOWORD];
467 ix = hx & ~0x80000000;
468
469 *w = 0.0;
470 if (ix >= 0x7ff00000 || iy >= 0x7ff00000) { /* ignore inexact */
471 if (isnan(x) || isnan(y))
472 return (x * y);
473 else if (iy < 0x7ff00000) {
474 if (hx >= 0) { /* ATAN2(+-finite, +inf) is +-0 */
475 *w *= y;
476 return (*w);
477 } else { /* ATAN2(+-finite, -inf) is +-pi */
478 *w = copysign(pi_lo, y);
479 return (copysign(pi, y));
480 }
481 } else if (ix < 0x7ff00000) {
482 /* ATAN2(+-inf, finite) is +-pi/2 */
483 *w = (hy >= 0)? pio2_lo : -pio2_lo;
484 return ((hy >= 0)? pio2 : -pio2);
485 } else if (hx > 0) { /* ATAN2(+-INF,+INF) = +-pi/4 */
486 *w = (hy >= 0)? pio4_lo : -pio4_lo;
487 return ((hy >= 0)? pio4 : -pio4);
488 } else { /* ATAN2(+-INF,-INF) = +-3pi/4 */
489 *w = (hy >= 0)? pi3o4_lo : -pi3o4_lo;
490 return ((hy >= 0)? pi3o4 : -pi3o4);
491 }
492 } else if ((ix | lx) == 0 || (iy | ly) == 0) {
493 if ((iy | ly) == 0) {
494 if (hx >= 0) /* ATAN2(+-0, +(0 <= x <= inf)) is +-0 */
495 return (y);
496 else { /* ATAN2(+-0, -(0 <= x <= inf)) is +-pi */
497 *w = (hy >= 0)? pi_lo : -pi_lo;
498 return ((hy >= 0)? pi : -pi);
499 }
500 } else { /* ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2 */
501 *w = (hy >= 0)? pio2_lo : -pio2_lo;
502 return ((hy >= 0)? pio2 : -pio2);
503 }
504 } else if (iy - ix > 0x06400000) { /* |x/y| < 2 ** -100 */
505 *w = (hy >= 0)? pio2_lo : -pio2_lo;
506 return ((hy >= 0)? pio2 : -pio2);
507 } else if (ix - iy > 0x06400000) { /* |y/x| < 2 ** -100 */
508 if (hx < 0) {
509 *w = (hy >= 0)? pi_lo : -pi_lo;
510 return ((hy >= 0)? pi : -pi);
511 } else {
512 t = y / x;
513 th = t;
514 ((int *) &th)[LOWORD] &= 0xf8000000;
515 xh = x;
516 ((int *) &xh)[LOWORD] &= 0xf8000000;
517 t1 = (x - xh) * t + xh * (t - th);
518 t2 = y - xh * th;
519 *w = (t2 - t1) / x;
520 return (t);
521 }
522 } else {
523 if (ix >= 0x5f300000) {
524 x *= twom768;
525 y *= twom768;
526 } else if (ix < 0x23d00000) {
527 x *= two768;
528 y *= two768;
529 }
530 y = fabs(y);
531 x = fabs(x);
532 t = y / x;
533 th = t;
534 ((int *) &th)[LOWORD] &= 0xf8000000;
535 xh = x;
536 ((int *) &xh)[LOWORD] &= 0xf8000000;
537 t1 = (x - xh) * t + xh * (t - th);
538 t2 = y - xh * th;
539 w1 = mx_atan(t, &w2);
540 w2 += (t2 - t1) / (x + y * t);
541 if (hx < 0) {
542 t1 = pi - w1;
543 t2 = pi - t1;
544 w2 = (pi_lo - w2) - (w1 - t2);
545 w1 = t1;
546 }
547 *w = (hy >= 0)? w2 : -w2;
548 return ((hy >= 0)? w1 : -w1);
549 }
550 }
|
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25
26 /*
27 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
29 */
30
31 #include "libm.h" /* __k_atan2 */
32 #include "complex_wrapper.h"
33
34 /*
35 * double __k_atan2(double y, double x, double *e)
36 *
37 * Compute atan2 with error terms.
38 *
39 * Important formula:
40 * 3 5
41 * x x
42 * atan(x) = x - ----- + ----- - ... (for x <= 1)
43 * 3 5
44 *
45 * pi 1 1
60 */
61
62 /*
63 * (void) mx_poly (double *z, double *a, double *e, int n)
64 * return
65 * e = a + z*(a + z*(a + ... z*(a + e)...))
66 * 0 2 4 2n
67 * Note:
68 * 1. e and coefficient ai are represented by two double numbers.
69 * For e, the first one contain the leading 24 bits rounded, and the
70 * second one contain the remaining 53 bits (total 77 bits accuracy).
71 * For ai, the first one contian the leading 53 bits rounded, and the
72 * second is the remaining 53 bits (total 106 bits accuracy).
73 * 2. z is an array of three doubles.
74 * z[0] : the rounded value of Z (the intended value of z)
75 * z[1] : the leading 24 bits of Z rounded
76 * z[2] : the remaining 53 bits of Z
77 * Note that z[0] = z[1]+z[2] rounded.
78 *
79 */
80 static void
81 mx_poly(const double *z, const double *a, double *e, int n)
82 {
83 double r, s, t, p_h, p_l, z_h, z_l, p;
84 int i;
85
86 n = n + n;
87 p = e[0] + a[n];
88 p_l = a[n + 1];
89 p_h = (double)((float)p);
90 p = a[n - 2] + z[0] * p;
91 z_h = z[1];
92 z_l = z[2];
93 p_l += e[0] - (p_h - a[n]);
94
95 for (i = n - 2; i >= 2; i -= 2) {
96 /* compute p = ai + z * p */
97 t = z_h * p_h;
98 s = z[0] * p_l + p_h * z_l;
99 p_h = (double)((float)p);
100 s += a[i + 1];
101 r = t - (p_h - a[i]);
102 p = a[i - 2] + z[0] * p;
103 p_l = r + s;
104 }
105
106 e[0] = (double)((float)p);
107 t = z_h * p_h;
108 s = z[0] * p_l + p_h * z_l;
109 r = t - (e[0] - a[0]);
110 e[1] = r + s;
111 }
112
113 /*
114 * Table of constants for atan from 0.125 to 8
115 * 0.125 -- 0x3fc00000 --- (increment at bit 16)
116 * 0x3fc10000
117 * 0x3fc20000
118 * ... ...
119 * 0x401f0000
120 * 8.000 -- 0x40200000 (total: 97)
121 * By K.C. Ng, March 9, 1989
122 */
123
124 static const double TBL_atan_hi[] = {
125 1.243549945467614382e-01, 1.320397616146387620e-01,
126 1.397088742891636204e-01, 1.473614810886516302e-01,
127 1.549967419239409727e-01, 1.626138285979485676e-01,
128 1.702119252854744080e-01, 1.777902289926760471e-01,
129 1.853479499956947607e-01, 1.928843122579746439e-01,
130 2.003985538258785115e-01, 2.078899272022629863e-01,
131 2.153576996977380476e-01, 2.228011537593945213e-01,
132 2.302195872768437179e-01, 2.376123138654712419e-01,
133 2.449786631268641435e-01, 2.596296294082575118e-01,
134 2.741674511196587893e-01, 2.885873618940774099e-01,
135 3.028848683749714166e-01, 3.170557532091470287e-01,
136 3.310960767041321029e-01, 3.450021772071051318e-01,
137 3.587706702705721895e-01, 3.723984466767542023e-01,
138 3.858826693980737521e-01, 3.992207695752525431e-01,
139 4.124104415973872673e-01, 4.254496373700422662e-01,
140 4.383365598579578304e-01, 4.510696559885234436e-01,
141 4.636476090008060935e-01, 4.883339510564055352e-01,
142 5.123894603107377321e-01, 5.358112379604637043e-01,
143 5.585993153435624414e-01, 5.807563535676704136e-01,
144 6.022873461349641522e-01, 6.231993299340659043e-01,
145 6.435011087932843710e-01, 6.632029927060932861e-01,
146 6.823165548747480713e-01, 7.008544078844501923e-01,
147 7.188299996216245269e-01, 7.362574289814280970e-01,
148 7.531512809621944138e-01, 7.695264804056582975e-01,
149 7.853981633974482790e-01, 8.156919233162234217e-01,
150 8.441539861131710509e-01, 8.709034570756529758e-01,
151 8.960553845713439269e-01, 9.197196053504168578e-01,
152 9.420000403794636101e-01, 9.629943306809362058e-01,
153 9.827937232473290541e-01, 1.001483135694234639e+00,
154 1.019141344266349725e+00, 1.035841253008800145e+00,
155 1.051650212548373764e+00, 1.066630365315743623e+00,
156 1.080839000541168327e+00, 1.094328907321189925e+00,
157 1.107148717794090409e+00, 1.130953743979160375e+00,
158 1.152571997215667610e+00, 1.172273881128476303e+00,
159 1.190289949682531656e+00, 1.206817370285252489e+00,
160 1.222025323210989667e+00, 1.236059489478081863e+00,
161 1.249045772398254428e+00, 1.261093382252440387e+00,
162 1.272297395208717319e+00, 1.282740879744270757e+00,
163 1.292496667789785336e+00, 1.301628834009196156e+00,
164 1.310193935047555547e+00, 1.318242051016837113e+00,
165 1.325817663668032553e+00, 1.339705659598999565e+00,
166 1.352127380920954636e+00, 1.363300100359693845e+00,
167 1.373400766945015894e+00, 1.382574821490125894e+00,
168 1.390942827002418447e+00, 1.398605512271957618e+00,
169 1.405647649380269870e+00, 1.412141064608495311e+00,
170 1.418146998399631542e+00, 1.423717971406494032e+00,
171 1.428899272190732761e+00, 1.433730152484709031e+00,
172 1.438244794498222623e+00, 1.442473099109101931e+00,
173 1.446441332248135092e+00,
174 };
175
176 static const double TBL_atan_lo[] = {
177 -3.125324142453938311e-18, -1.276925400709959526e-17,
178 2.479758919089733066e-17, 5.409599147666297957e-18,
179 9.585415594114323829e-18, 7.784470643106252464e-18,
180 -3.541164079802125137e-18, 2.372599351477449041e-17,
181 4.180692268843078977e-18, 2.034098543938166622e-17,
182 3.139954287184449286e-18, 7.333160666520898500e-18,
183 4.738160130078732886e-19, -5.498822172446843173e-18,
184 1.231340452914270316e-17, 1.058231431371112987e-17,
185 1.069875561873445139e-17, 1.923875492461530410e-17,
186 8.261353575163771936e-18, -1.428369957377257085e-17,
187 -1.101082790300136900e-17, -1.893928924292642146e-17,
188 -7.952610375793798701e-18, -2.293880475557830393e-17,
189 3.088733564861919217e-17, 1.961231150484565340e-17,
190 2.378822732491940868e-17, 2.246598105617042065e-17,
191 3.963462895355093301e-17, 2.331553074189288466e-17,
192 -2.494277030626540909e-17, 3.280735600183735558e-17,
193 2.269877745296168709e-17, -1.137323618932958456e-17,
194 -2.546278147285580353e-17, -4.063795683482557497e-18,
195 -5.455630548591626394e-18, -1.441464378193066908e-17,
196 2.950430737228402307e-17, 2.672403885140095079e-17,
197 1.583478505144428617e-17, -3.076054864429649001e-17,
198 6.943223671560007740e-18, -1.987626234335816123e-17,
199 -2.147838844445698302e-17, 3.473937648299456719e-17,
200 -2.425693465918206812e-17, -3.704991905602721293e-17,
201 3.061616997868383018e-17, -1.071456562778743077e-17,
202 -4.841337011934916763e-17, -2.269823590747287052e-17,
203 2.923876285774304890e-17, -4.057439412852767923e-17,
204 5.460837485846687627e-17, -3.986660595210752445e-18,
205 1.390331103123099845e-17, 9.438308023545392000e-17,
206 1.000401886936679889e-17, 3.194313981784503706e-17,
207 -9.650564731467513515e-17, -5.956589637160374564e-17,
208 -1.567632251135907253e-17, -5.490676155022364226e-18,
209 9.404471373566379412e-17, 7.123833804538446299e-17,
210 -9.159738508900378819e-17, 8.385188614028674371e-17,
211 7.683333629842068806e-17, 4.172467638861439118e-17,
212 -2.979162864892849274e-17, 7.879752739459421280e-17,
213 -2.196203799612310905e-18, 3.242139621534960503e-17,
214 2.245875015034507026e-17, -9.283188754266129476e-18,
215 -6.830804768926660334e-17, -1.236918499824626670e-17,
216 8.745413734780278834e-17, -6.319394031144676258e-17,
217 -8.824429373951136321e-17, -2.599011860304134377e-17,
218 2.147674250751150961e-17, 1.093246171526936217e-16,
219 -3.307710355769516504e-17, -3.561490438648230100e-17,
220 -9.843712133488842595e-17, -2.324061182591627982e-17,
221 -8.922630138234492386e-17, -9.573807110557223276e-17,
222 -8.263883782511013632e-17, 8.721870922223967507e-17,
223 -6.457134743238754385e-17, -4.396204466767636187e-17,
224 -2.493019910264565554e-17, -1.105119435430315713e-16,
225 9.211323971545051565e-17,
226 };
227
228 /*
229 * mx_atan(x,err)
230 * Table look-up algorithm
231 * By K.C. Ng, March 9, 1989
232 *
233 * Algorithm.
234 *
235 * The algorithm is based on atan(x)=atan(y)+atan((x-y)/(1+x*y)).
236 * We use poly1(x) to approximate atan(x) for x in [0,1/8] with
237 * error (relative)
238 * |(atan(x)-poly1(x))/x|<= 2^-83.41
239 *
240 * and use poly2(x) to approximate atan(x) for x in [0,1/65] with
241 * error
242 * |atan(x)-poly2(x)|<= 2^-86.8
243 *
244 * Here poly1 and poly2 are odd polynomial with the following form:
245 * x + x^3*(a1+x^2*(a2+...))
261 * If iy is the high word of y, then
262 * single : j = (iy - 0x3e000000) >> 19
263 * double : j = (iy - 0x3fc00000) >> 16
264 * quad : j = (iy - 0x3ffc0000) >> 12
265 *
266 * Let s = (x-y)/(1+x*y). Then
267 * atan(x) = atan(y) + poly1(s)
268 * = _TBL_atan_hi[j] + (_TBL_atan_lo[j] + poly2(s) )
269 *
270 * Note. |s| <= 1.5384615385e-02 = 1/65. Maxium occurs at x = 1.03125
271 *
272 */
273
274 #define P1 p[2]
275 #define P4 p[8]
276 #define P5 p[9]
277 #define P6 p[10]
278 #define P7 p[11]
279 #define P8 p[12]
280 #define P9 p[13]
281
282 static const double p[] = {
283 1.0,
284 0.0,
285 -3.33333333333333314830e-01, /* p1 = BFD55555 55555555 */
286 -1.85030852238476921863e-17, /* p1_l = BC755525 9783A49C */
287 2.00000000000000011102e-01, /* p2 = 3FC99999 9999999A */
288 -1.27263196576150347368e-17, /* p2_l = BC6D584B 0D874007 */
289 -1.42857142857141405923e-01, /* p3 = BFC24924 9249245E */
290 -1.34258204847170493327e-17, /* p3_l = BC6EF534 A112500D */
291 1.11111111110486909803e-01, /* p4 = 3FBC71C7 1C71176A */
292 -9.09090907557387889470e-02, /* p5 = BFB745D1 73B47A7D */
293 7.69230541541713053189e-02, /* p6 = 3FB3B13A B1E68DE6 */
294 -6.66645815401964159097e-02, /* p7 = BFB110EE 1584446A */
295 5.87081768778560317279e-02, /* p8 = 3FAE0EFF 87657733 */
296 -4.90818147456113240690e-02, /* p9 = BFA92140 6A524B5C */
297 };
298
299 #define Q1 q[2]
300 #define Q3 q[6]
301 #define Q4 q[7]
302 #define Q5 q[8]
303
304 static const double q[] = {
305 1.0,
306 0.0,
307 -3.33333333333333314830e-01, /* q1 = BFD55555 55555555 */
308 -1.85022941571278638733e-17, /* q1_l = BC7554E9 D20EFA66 */
309 1.99999999999999927836e-01, /* q2 = 3FC99999 99999997 */
310 -1.28782564407438833398e-17, /* q2_l = BC6DB1FB 17217417 */
311 -1.42857142855492280642e-01, /* q3 = BFC24924 92483C46 */
312 1.11111097130183356096e-01, /* q4 = 3FBC71C6 E06595CC */
313 -9.08553303569109294013e-02, /* q5 = BFB7424B 808CDA76 */
314 };
315
316 static const double one = 1.0,
317 pio2hi = 1.570796326794896558e+00,
318 pio2lo = 6.123233995736765886e-17;
319
320 static double
321 mx_atan(double x, double *err)
322 {
323 double y, z, r, s, t, w, s_h, s_l, x_h, x_l, zz[3], ee[2], z_h, z_l,
324 r_h, r_l, u, v;
325 int ix, iy, sign, j;
326
327 ix = ((int *)&x)[HIWORD];
328 sign = ix & 0x80000000;
329 ix ^= sign;
330
331 /* for |x| < 1/8 */
332 if (ix < 0x3fc00000) {
333 if (ix < 0x3f300000) { /* when |x| < 2**-12 */
334 if (ix < 0x3d800000) { /* if |x| < 2**-39 */
335 *err = (double)((int)x);
336 return (x);
337 }
338
339 z = x * x;
340 t = x * z * (q[2] + z * (q[4] + z * q[6]));
341 r = x + t;
342 *err = t - (r - x);
343 return (r);
344 }
345
346 z = x * x;
347
348 /* use double precision at p4 and on */
349 ee[0] = z * (P4 + z * (P5 + z * (P6 + z * (P7 + z * (P8 + z *
350 P9)))));
351
352 x_h = (double)((float)x);
353 z_h = (double)((float)z);
354 x_l = x - x_h;
355 z_l = (x_h * x_h - z_h);
356 zz[0] = z;
357 zz[1] = z_h;
358 zz[2] = z_l + x_l * (x + x_h);
359
360 /*
361 * compute (1+z*(p1+z*(p2+z*(p3+e)))) by call
362 * mx_poly
363 */
364
365 mx_poly(zz, p, ee, 3);
366
367 /* finally x*(1+z*(p1+...)) */
368 r = x_h * ee[0];
369 t = x * ee[1] + x_l * ee[0];
370 s = t + r;
371 *err = t - (s - r);
372 return (s);
373 }
374
375 /* for |x| >= 8.0 */
376 if (ix >= 0x40200000) { /* x >= 8 */
377 x = fabs(x);
378
379 if (ix >= 0x42600000) { /* x >= 2**39 */
380 if (ix >= 0x44c00000) /* x >= 2**77 */
381 y = -pio2lo;
382 else
383 y = one / x - pio2lo;
384
385 if (sign == 0) {
386 t = pio2hi - y;
387 *err = -(y - (pio2hi - t));
388 } else {
389 t = y - pio2hi;
390 *err = y - (pio2hi + t);
391 }
392
393 return (t);
394 } else {
395 /* compute r = 1/x */
396 r = one / x;
397 z = r * r;
398
399 if (ix < 0x40504000) { /* 8 < x < 65 */
400 /* use double precision at p4 and on */
401 ee[0] = z * (P4 + z * (P5 + z * (P6 + z * (P7 +
402 z * (P8 + z * P9)))));
403 x_h = (double)((float)x);
404 r_h = (double)((float)r);
405 z_h = (double)((float)z);
406 r_l = r * ((x_h - x) * r_h - (x_h * r_h - one));
407 z_l = (r_h * r_h - z_h);
408 zz[0] = z;
409 zz[1] = z_h;
410 zz[2] = z_l + r_l * (r + r_h);
411
412 /*
413 * compute (1+z*(p1+z*(p2+z*(p3+e)))) by call
414 * mx_poly
415 */
416 mx_poly(zz, p, ee, 3);
417 } else { /* x < 65 < 2**39 */
418 /* use double precision at q3 and on */
419 ee[0] = z * (Q3 + z * (Q4 + z * Q5));
420 x_h = (double)((float)x);
421 r_h = (double)((float)r);
422 z_h = (double)((float)z);
423 r_l = r * ((x_h - x) * r_h - (x_h * r_h - one));
424 z_l = (r_h * r_h - z_h);
425 zz[0] = z;
426 zz[1] = z_h;
427 zz[2] = z_l + r_l * (r + r_h);
428
429 /*
430 * compute (1+z*(q1+z*(q2+e))) by call
431 * mx_poly
432 */
433 mx_poly(zz, q, ee, 2);
434 }
435
436 /* pio2 - r*(1+...) */
437 v = r_h * ee[0];
438 t = pio2lo - (r * ee[1] + r_l * ee[0]);
439
440 if (sign == 0) {
441 s = pio2hi - v;
442 t -= (v - (pio2hi - s));
443 } else {
444 s = v - pio2hi;
445 t = -(t - (v - (s + pio2hi)));
446 }
447
448 w = s + t;
449 *err = t - (w - s);
450 return (w);
451 }
452 }
453
454 /* now x is between 1/8 and 8 */
455 ((int *)&x)[HIWORD] = ix;
456 iy = (ix + 0x00008000) & 0x7fff0000;
457 ((int *)&y)[HIWORD] = iy;
458 ((int *)&y)[LOWORD] = 0;
459 j = (iy - 0x3fc00000) >> 16;
460
461 w = (x - y);
462 v = 1 / (one + x * y);
463 s = w * v;
464 z = s * s;
465 /* use double precision at q3 and on */
466 ee[0] = z * (Q3 + z * (Q4 + z * Q5));
467 s_h = (double)((float)s);
468 z_h = (double)((float)z);
469 x_h = (double)((float)x);
470 t = (double)((float)(one + x * y));
471 r = -((x_h - x) * y - (x_h * y - (t - one)));
472 s_l = -v * (s_h * r - (w - s_h * t));
473 z_l = (s_h * s_h - z_h);
474 zz[0] = z;
475 zz[1] = z_h;
476 zz[2] = z_l + s_l * (s + s_h);
477 /* compute (1+z*(q1+z*(q2+e))) by call mx_poly */
478 mx_poly(zz, q, ee, 2);
479 v = s_h * ee[0];
480 t = TBL_atan_lo[j] + (s * ee[1] + s_l * ee[0]);
481 u = TBL_atan_hi[j];
482 s = u + v;
483 t += (v - (s - u));
484 w = s + t;
485 *err = t - (w - s);
486
487 if (sign != 0) {
488 w = -w;
489 *err = -*err;
490 }
491
492 return (w);
493 }
494
495 static const double twom768 = 6.441148769597133308e-232, /* 2^-768 */
496 two768 = 1.552518092300708935e+231, /* 2^768 */
497 pi = 3.1415926535897931159979634685,
498 pi_lo = 1.224646799147353177e-16,
499 pio2 = 1.570796326794896558e+00,
500 pio2_lo = 6.123233995736765886e-17,
501 pio4 = 0.78539816339744827899949,
502 pio4_lo = 3.061616997868382943e-17,
503 pi3o4 = 2.356194490192344836998,
504 pi3o4_lo = 9.184850993605148829195e-17;
505
506 double
507 __k_atan2(double y, double x, double *w)
508 {
509 double t, xh, th, t1, t2, w1, w2;
510 int ix, iy, hx, hy, lx, ly;
511
512 hy = ((int *)&y)[HIWORD];
513 ly = ((int *)&y)[LOWORD];
514 iy = hy & ~0x80000000;
515
516 hx = ((int *)&x)[HIWORD];
517 lx = ((int *)&x)[LOWORD];
518 ix = hx & ~0x80000000;
519
520 *w = 0.0;
521
522 if (ix >= 0x7ff00000 || iy >= 0x7ff00000) { /* ignore inexact */
523 if (isnan(x) || isnan(y)) {
524 return (x * y);
525 } else if (iy < 0x7ff00000) {
526 if (hx >= 0) { /* ATAN2(+-finite, +inf) is +-0 */
527 *w *= y;
528 return (*w);
529 } else { /* ATAN2(+-finite, -inf) is +-pi */
530 *w = copysign(pi_lo, y);
531 return (copysign(pi, y));
532 }
533 } else if (ix < 0x7ff00000) {
534 /* ATAN2(+-inf, finite) is +-pi/2 */
535 *w = (hy >= 0) ? pio2_lo : -pio2_lo;
536 return ((hy >= 0) ? pio2 : -pio2);
537 } else if (hx > 0) { /* ATAN2(+-INF,+INF) = +-pi/4 */
538 *w = (hy >= 0) ? pio4_lo : -pio4_lo;
539 return ((hy >= 0) ? pio4 : -pio4);
540 } else { /* ATAN2(+-INF,-INF) = +-3pi/4 */
541 *w = (hy >= 0) ? pi3o4_lo : -pi3o4_lo;
542 return ((hy >= 0) ? pi3o4 : -pi3o4);
543 }
544 } else if ((ix | lx) == 0 || (iy | ly) == 0) {
545 if ((iy | ly) == 0) {
546 if (hx >= 0) { /* ATAN2(+-0, +(0 <= x <= inf)) is +-0 */
547 return (y);
548 } else { /* ATAN2(+-0, -(0 <= x <= inf)) is +-pi */
549 *w = (hy >= 0) ? pi_lo : -pi_lo;
550 return ((hy >= 0) ? pi : -pi);
551 }
552 } else { /* ATAN2(+-(anything but 0 and NaN), 0) is +-pi/2 */
553 *w = (hy >= 0) ? pio2_lo : -pio2_lo;
554 return ((hy >= 0) ? pio2 : -pio2);
555 }
556 } else if (iy - ix > 0x06400000) { /* |x/y| < 2 ** -100 */
557 *w = (hy >= 0) ? pio2_lo : -pio2_lo;
558 return ((hy >= 0) ? pio2 : -pio2);
559 } else if (ix - iy > 0x06400000) { /* |y/x| < 2 ** -100 */
560 if (hx < 0) {
561 *w = (hy >= 0) ? pi_lo : -pi_lo;
562 return ((hy >= 0) ? pi : -pi);
563 } else {
564 t = y / x;
565 th = t;
566 ((int *)&th)[LOWORD] &= 0xf8000000;
567 xh = x;
568 ((int *)&xh)[LOWORD] &= 0xf8000000;
569 t1 = (x - xh) * t + xh * (t - th);
570 t2 = y - xh * th;
571 *w = (t2 - t1) / x;
572 return (t);
573 }
574 } else {
575 if (ix >= 0x5f300000) {
576 x *= twom768;
577 y *= twom768;
578 } else if (ix < 0x23d00000) {
579 x *= two768;
580 y *= two768;
581 }
582
583 y = fabs(y);
584 x = fabs(x);
585 t = y / x;
586 th = t;
587 ((int *)&th)[LOWORD] &= 0xf8000000;
588 xh = x;
589 ((int *)&xh)[LOWORD] &= 0xf8000000;
590 t1 = (x - xh) * t + xh * (t - th);
591 t2 = y - xh * th;
592 w1 = mx_atan(t, &w2);
593 w2 += (t2 - t1) / (x + y * t);
594
595 if (hx < 0) {
596 t1 = pi - w1;
597 t2 = pi - t1;
598 w2 = (pi_lo - w2) - (w1 - t2);
599 w1 = t1;
600 }
601
602 *w = (hy >= 0) ? w2 : -w2;
603 return ((hy >= 0) ? w1 : -w1);
604 }
605 }
|