Print this page
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libmvec/common/__vatan.c
+++ new/usr/src/lib/libmvec/common/__vatan.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 27 * Use is subject to license terms.
28 28 */
29 29
30 30 #include <sys/isa_defs.h>
31 31 #include "libm_inlines.h"
32 32
33 33 #ifdef _LITTLE_ENDIAN
34 34 #define HI(x) *(1+(int*)x)
35 35 #define LO(x) *(unsigned*)x
36 36 #else
37 37 #define HI(x) *(int*)x
38 38 #define LO(x) *(1+(unsigned*)x)
39 39 #endif
↓ open down ↓ |
39 lines elided |
↑ open up ↑ |
40 40
41 41 #ifdef __RESTRICT
42 42 #define restrict _Restrict
43 43 #else
44 44 #define restrict
45 45 #endif
46 46
47 47 void
48 48 __vatan( int n, double * restrict x, int stridex, double * restrict y, int stridey )
49 49 {
50 - double f , z, ans, ansu , ansl , tmp , poly , conup , conlo , dummy;
50 + double f , z, ans = 0.0L, ansu , ansl , tmp , poly , conup , conlo , dummy;
51 51 double f1, ans1, ansu1, ansl1, tmp1, poly1, conup1, conlo1;
52 52 double f2, ans2, ansu2, ansl2, tmp2, poly2, conup2, conlo2;
53 53 int index, sign, intf, intflo, intz, argcount;
54 - int index1, sign1 ;
55 - int index2, sign2 ;
56 - double *yaddr,*yaddr1,*yaddr2;
54 + int index1, sign1 = 0;
55 + int index2, sign2 = 0;
56 + double *yaddr,*yaddr1 = 0,*yaddr2 = 0;
57 57 extern const double __vlibm_TBL_atan1[];
58 58 extern double fabs( double );
59 59
60 60 /* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
61 61 * Error = -3.08254E-18 On the interval |x| < 1/64 */
62 62
63 63 /* define dummy names for readability. Use parray to help compiler optimize loads */
64 64 #define p3 parray[0]
65 65 #define p2 parray[1]
66 66 #define p1 parray[2]
67 67
68 68 static const double parray[] = {
69 69 -1.428029046844299722E-01, /* p[3] */
70 70 1.999999917247000615E-01, /* p[2] */
71 71 -3.333333333329292858E-01, /* p[1] */
72 72 1.0, /* not used for p[0], though */
73 73 -1.0, /* used to flip sign of answer */
74 74 };
75 75
76 76 if( n <= 0 ) return; /* if no. of elements is 0 or neg, do nothing */
77 77 do
78 78 {
79 79 LOOP0:
80 80
81 81 f = fabs(*x); /* fetch argument */
82 82 intf = HI(x); /* upper half of x, as integer */
83 83 intflo = LO(x); /* lower half of x, as integer */
84 84 sign = intf & 0x80000000; /* sign of argument */
85 85 intf = intf & ~0x80000000; /* abs(upper argument) */
86 86
87 87 if( (intf > 0x43600000) || (intf < 0x3e300000) ) /* filter out special cases */
88 88 {
89 89 if( (intf > 0x7ff00000) || ((intf == 0x7ff00000) && (intflo !=0) ) )
90 90 {
91 91 ans = f - f; /* return NaN if x=NaN*/
92 92 }
93 93 else if( intf < 0x3e300000 ) /* avoid underflow for small arg */
94 94 {
95 95 dummy = 1.0e37 + f;
96 96 dummy = dummy;
97 97 ans = f;
98 98 }
99 99 else if( intf > 0x43600000 ) /* avoid underflow for big arg */
100 100 {
101 101 index = 2;
102 102 ans = __vlibm_TBL_atan1[index] + __vlibm_TBL_atan1[index+1];/* pi/2 up + pi/2 low */
103 103 }
104 104 *y = (sign) ? -ans: ans; /* store answer, with sign bit */
105 105 x += stridex;
106 106 y += stridey;
107 107 argcount = 0; /* initialize argcount */
108 108 if ( --n <=0 ) break; /* we are done */
109 109 goto LOOP0; /* otherwise, examine next arg */
110 110 }
111 111
112 112 index = 0; /* points to 0,0 in table */
113 113 if (intf > 0x40500000) /* if(|x| > 64 */
114 114 { f = -1.0/f;
115 115 index = 2; /* point to pi/2 upper, lower */
116 116 }
117 117 else if( intf >= 0x3f900000 ) /* if |x| >= (1/64)... */
118 118 {
119 119 intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */
120 120 HI(&z) = intz; /* store as a double (z) */
121 121 LO(&z) = 0; /* ...lower */
122 122 f = (f - z)/(1.0 + f*z); /* get reduced argument */
123 123 index = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */
124 124 index = index + 4; /* skip over 0,0,pi/2,pi/2 */
125 125 }
126 126 yaddr = y; /* address to store this answer */
127 127 x += stridex; /* point to next arg */
128 128 y += stridey; /* point to next result */
129 129 argcount = 1; /* we now have 1 good argument */
130 130 if ( --n <=0 )
131 131 {
132 132 f1 = 0.0; /* put dummy values in args 1,2 */
133 133 f2 = 0.0;
134 134 index1 = 0;
135 135 index2 = 0;
136 136 goto UNROLL3; /* finish up with 1 good arg */
137 137 }
138 138
139 139 /*--------------------------------------------------------------------------*/
140 140 /*--------------------------------------------------------------------------*/
141 141 /*--------------------------------------------------------------------------*/
142 142
143 143 LOOP1:
144 144
145 145 f1 = fabs(*x); /* fetch argument */
146 146 intf = HI(x); /* upper half of x, as integer */
147 147 intflo = LO(x); /* lower half of x, as integer */
148 148 sign1 = intf & 0x80000000; /* sign of argument */
149 149 intf = intf & ~0x80000000; /* abs(upper argument) */
150 150
151 151 if( (intf > 0x43600000) || (intf < 0x3e300000) ) /* filter out special cases */
152 152 {
153 153 if( (intf > 0x7ff00000) || ((intf == 0x7ff00000) && (intflo !=0) ) )
154 154 {
155 155 ans = f1 - f1; /* return NaN if x=NaN*/
156 156 }
157 157 else if( intf < 0x3e300000 ) /* avoid underflow for small arg */
158 158 {
159 159 dummy = 1.0e37 + f1;
160 160 dummy = dummy;
161 161 ans = f1;
162 162 }
163 163 else if( intf > 0x43600000 ) /* avoid underflow for big arg */
164 164 {
165 165 index1 = 2;
166 166 ans = __vlibm_TBL_atan1[index1] + __vlibm_TBL_atan1[index1+1];/* pi/2 up + pi/2 low */
167 167 }
168 168 *y = (sign1) ? -ans: ans; /* store answer, with sign bit */
169 169 x += stridex;
170 170 y += stridey;
171 171 argcount = 1; /* we still have 1 good arg */
172 172 if ( --n <=0 )
173 173 {
174 174 f1 = 0.0; /* put dummy values in args 1,2 */
175 175 f2 = 0.0;
176 176 index1 = 0;
177 177 index2 = 0;
178 178 goto UNROLL3; /* finish up with 1 good arg */
179 179 }
180 180 goto LOOP1; /* otherwise, examine next arg */
181 181 }
182 182
183 183 index1 = 0; /* points to 0,0 in table */
184 184 if (intf > 0x40500000) /* if(|x| > 64 */
185 185 { f1 = -1.0/f1;
186 186 index1 = 2; /* point to pi/2 upper, lower */
187 187 }
188 188 else if( intf >= 0x3f900000 ) /* if |x| >= (1/64)... */
189 189 {
190 190 intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */
191 191 HI(&z) = intz; /* store as a double (z) */
192 192 LO(&z) = 0; /* ...lower */
193 193 f1 = (f1 - z)/(1.0 + f1*z); /* get reduced argument */
194 194 index1 = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */
195 195 index1 = index1 + 4; /* skip over 0,0,pi/2,pi/2 */
196 196 }
197 197 yaddr1 = y; /* address to store this answer */
198 198 x += stridex; /* point to next arg */
199 199 y += stridey; /* point to next result */
200 200 argcount = 2; /* we now have 2 good arguments */
201 201 if ( --n <=0 )
202 202 {
203 203 f2 = 0.0; /* put dummy value in arg 2 */
204 204 index2 = 0;
205 205 goto UNROLL3; /* finish up with 2 good args */
206 206 }
207 207
208 208 /*--------------------------------------------------------------------------*/
209 209 /*--------------------------------------------------------------------------*/
210 210 /*--------------------------------------------------------------------------*/
211 211
212 212 LOOP2:
213 213
214 214 f2 = fabs(*x); /* fetch argument */
215 215 intf = HI(x); /* upper half of x, as integer */
216 216 intflo = LO(x); /* lower half of x, as integer */
217 217 sign2 = intf & 0x80000000; /* sign of argument */
218 218 intf = intf & ~0x80000000; /* abs(upper argument) */
219 219
220 220 if( (intf > 0x43600000) || (intf < 0x3e300000) ) /* filter out special cases */
221 221 {
222 222 if( (intf > 0x7ff00000) || ((intf == 0x7ff00000) && (intflo !=0) ) )
223 223 {
224 224 ans = f2 - f2; /* return NaN if x=NaN*/
225 225 }
226 226 else if( intf < 0x3e300000 ) /* avoid underflow for small arg */
227 227 {
228 228 dummy = 1.0e37 + f2;
229 229 dummy = dummy;
230 230 ans = f2;
231 231 }
232 232 else if( intf > 0x43600000 ) /* avoid underflow for big arg */
233 233 {
234 234 index2 = 2;
235 235 ans = __vlibm_TBL_atan1[index2] + __vlibm_TBL_atan1[index2+1];/* pi/2 up + pi/2 low */
236 236 }
237 237 *y = (sign2) ? -ans: ans; /* store answer, with sign bit */
238 238 x += stridex;
239 239 y += stridey;
240 240 argcount = 2; /* we still have 2 good args */
241 241 if ( --n <=0 )
242 242 {
243 243 f2 = 0.0; /* put dummy value in arg 2 */
244 244 index2 = 0;
245 245 goto UNROLL3; /* finish up with 2 good args */
246 246 }
247 247 goto LOOP2; /* otherwise, examine next arg */
248 248 }
249 249
250 250 index2 = 0; /* points to 0,0 in table */
251 251 if (intf > 0x40500000) /* if(|x| > 64 */
252 252 { f2 = -1.0/f2;
253 253 index2 = 2; /* point to pi/2 upper, lower */
254 254 }
255 255 else if( intf >= 0x3f900000 ) /* if |x| >= (1/64)... */
256 256 {
257 257 intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */
258 258 HI(&z) = intz; /* store as a double (z) */
259 259 LO(&z) = 0; /* ...lower */
260 260 f2 = (f2 - z)/(1.0 + f2*z); /* get reduced argument */
261 261 index2 = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */
262 262 index2 = index2 + 4; /* skip over 0,0,pi/2,pi/2 */
263 263 }
264 264 yaddr2 = y; /* address to store this answer */
265 265 x += stridex; /* point to next arg */
266 266 y += stridey; /* point to next result */
267 267 argcount = 3; /* we now have 3 good arguments */
268 268
269 269
270 270 /* here is the 3 way unrolled section,
271 271 note, we may actually only have
272 272 1,2, or 3 'real' arguments at this point
273 273 */
274 274
275 275 UNROLL3:
276 276
277 277 conup = __vlibm_TBL_atan1[index ]; /* upper table */
278 278 conup1 = __vlibm_TBL_atan1[index1]; /* upper table */
279 279 conup2 = __vlibm_TBL_atan1[index2]; /* upper table */
280 280
281 281 conlo = __vlibm_TBL_atan1[index +1]; /* lower table */
282 282 conlo1 = __vlibm_TBL_atan1[index1+1]; /* lower table */
283 283 conlo2 = __vlibm_TBL_atan1[index2+1]; /* lower table */
284 284
285 285 tmp = f *f ;
286 286 tmp1 = f1*f1;
287 287 tmp2 = f2*f2;
288 288
289 289 poly = f *((p3*tmp + p2)*tmp + p1)*tmp ;
290 290 poly1 = f1*((p3*tmp1 + p2)*tmp1 + p1)*tmp1;
291 291 poly2 = f2*((p3*tmp2 + p2)*tmp2 + p1)*tmp2;
292 292
293 293 ansu = conup + f ; /* compute atan(f) upper */
294 294 ansu1 = conup1 + f1; /* compute atan(f) upper */
295 295 ansu2 = conup2 + f2; /* compute atan(f) upper */
296 296
297 297 ansl = (((conup - ansu ) + f ) + poly ) + conlo ;
298 298 ansl1 = (((conup1 - ansu1) + f1) + poly1) + conlo1;
299 299 ansl2 = (((conup2 - ansu2) + f2) + poly2) + conlo2;
300 300
301 301 ans = ansu + ansl ;
302 302 ans1 = ansu1 + ansl1;
303 303 ans2 = ansu2 + ansl2;
304 304
305 305 /* now check to see if these are 'real' or 'dummy' arguments BEFORE storing */
306 306
307 307 *yaddr = sign ? -ans: ans; /* this one is always good */
308 308 if(argcount < 3) break; /* end loop and finish up */
309 309 *yaddr1 = sign1 ? -ans1: ans1;
310 310 *yaddr2 = sign2 ? -ans2: ans2;
311 311
312 312 } while (--n > 0);
313 313
314 314 if(argcount == 2)
315 315 { *yaddr1 = sign1 ? -ans1: ans1;
316 316 }
317 317 }
↓ open down ↓ |
251 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX