Print this page
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libmvec/common/__vatanf.c
+++ new/usr/src/lib/libmvec/common/__vatanf.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 27 * Use is subject to license terms.
28 28 */
29 29
30 30 #ifdef __RESTRICT
↓ open down ↓ |
30 lines elided |
↑ open up ↑ |
31 31 #define restrict _Restrict
32 32 #else
33 33 #define restrict
34 34 #endif
35 35
36 36 void
37 37 __vatanf( int n, float * restrict x, int stridex, float * restrict y, int stridey )
38 38 {
39 39 extern const double __vlibm_TBL_atan1[];
40 40 double conup0, conup1, conup2;
41 - float dummy, ansf;
41 + float dummy, ansf = 0.0;
42 42 float f0, f1, f2;
43 43 float ans0, ans1, ans2;
44 44 float poly0, poly1, poly2;
45 45 float sign0, sign1, sign2;
46 46 int intf, intz, argcount;
47 47 int index0, index1, index2;
48 48 float z,*yaddr0,*yaddr1,*yaddr2;
49 49 int *pz = (int *) &z;
50 50 #ifdef UNROLL4
51 51 double conup3;
52 52 int index3;
53 53 float f3, ans3, poly3, sign3, *yaddr3;
54 54 #endif
55 55
56 56 /* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7
57 57 * Error = -3.08254E-18 On the interval |x| < 1/64 */
58 58
59 59 static const float p1 = -0.33329644f /* -3.333333333329292858E-01f */ ;
60 60 static const float pone = 1.0f;
61 61
62 62 if( n <= 0 ) return; /* if no. of elements is 0 or neg, do nothing */
63 63 do
64 64 {
65 65 LOOP0:
66 66
67 67 intf = *(int *) x; /* upper half of x, as integer */
68 68 f0 = *x;
69 69 sign0 = pone;
70 70 if (intf < 0) {
71 71 intf = intf & ~0x80000000; /* abs(upper argument) */
72 72 f0 = -f0;
73 73 sign0 = -sign0;
74 74 }
75 75
76 76 if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */
77 77 {
78 78 if( intf > 0x7f800000 )
79 79 {
80 80 ansf = f0- f0; /* return NaN if x=NaN*/
81 81 }
82 82 else if( intf < 0x31800000 ) /* avoid underflow for small arg */
83 83 {
84 84 dummy = 1.0e37 + f0;
85 85 dummy = dummy;
86 86 ansf = f0;
87 87 }
88 88 else if( intf > 0x5B000000 ) /* avoid underflow for big arg */
89 89 {
90 90 index0= 2;
91 91 ansf = __vlibm_TBL_atan1[index0];/* pi/2 up */
92 92 }
93 93 *y = sign0*ansf; /* store answer, with sign bit */
94 94 x += stridex;
95 95 y += stridey;
96 96 argcount = 0; /* initialize argcount */
97 97 if ( --n <=0 ) break; /* we are done */
98 98 goto LOOP0; /* otherwise, examine next arg */
99 99 }
100 100
101 101 if (intf > 0x42800000) /* if(|x| > 64 */
102 102 {
103 103 f0 = -pone/f0;
104 104 index0 = 2; /* point to pi/2 upper, lower */
105 105 }
106 106 else if( intf >= 0x3C800000 ) /* if |x| >= (1/64)... */
107 107 {
108 108 intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */
109 109 pz[0] = intz; /* store as a float (z) */
110 110 f0 = (f0 - z)/(pone + f0*z);
111 111 index0 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */
112 112 index0 = index0+ 4; /* skip over 0,0,pi/2,pi/2 */
113 113 }
114 114 else /* |x| < 1/64 */
115 115 {
116 116 index0 = 0; /* points to 0,0 in table */
117 117 }
118 118 yaddr0 = y; /* address to store this answer */
119 119 x += stridex; /* point to next arg */
120 120 y += stridey; /* point to next result */
121 121 argcount = 1; /* we now have 1 good argument */
122 122 if ( --n <=0 )
123 123 {
124 124 goto UNROLL; /* finish up with 1 good arg */
125 125 }
126 126
127 127 /*--------------------------------------------------------------------------*/
128 128 /*--------------------------------------------------------------------------*/
129 129 /*--------------------------------------------------------------------------*/
130 130
131 131 LOOP1:
132 132
133 133 intf = *(int *) x; /* upper half of x, as integer */
134 134 f1 = *x;
135 135 sign1 = pone;
136 136 if (intf < 0) {
137 137 intf = intf & ~0x80000000; /* abs(upper argument) */
138 138 f1 = -f1;
139 139 sign1 = -sign1;
140 140 }
141 141
142 142 if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */
143 143 {
144 144 if( intf > 0x7f800000 )
145 145 {
146 146 ansf = f1 - f1; /* return NaN if x=NaN*/
147 147 }
148 148 else if( intf < 0x31800000 ) /* avoid underflow for small arg */
149 149 {
150 150 dummy = 1.0e37 + f1;
151 151 dummy = dummy;
152 152 ansf = f1;
153 153 }
154 154 else if( intf > 0x5B000000 ) /* avoid underflow for big arg */
155 155 {
156 156 index1 = 2;
157 157 ansf = __vlibm_TBL_atan1[index1] ;/* pi/2 up */
158 158 }
159 159 *y = sign1 * ansf; /* store answer, with sign bit */
160 160 x += stridex;
161 161 y += stridey;
162 162 argcount = 1; /* we still have 1 good arg */
163 163 if ( --n <=0 )
164 164 {
165 165 goto UNROLL; /* finish up with 1 good arg */
166 166 }
167 167 goto LOOP1; /* otherwise, examine next arg */
168 168 }
169 169
170 170 if (intf > 0x42800000) /* if(|x| > 64 */
171 171 {
172 172 f1 = -pone/f1;
173 173 index1 = 2; /* point to pi/2 upper, lower */
174 174 }
175 175 else if( intf >= 0x3C800000 ) /* if |x| >= (1/64)... */
176 176 {
177 177 intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */
178 178 pz[0] = intz; /* store as a float (z) */
179 179 f1 = (f1 - z)/(pone + f1*z);
180 180 index1 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */
181 181 index1 = index1 + 4; /* skip over 0,0,pi/2,pi/2 */
182 182 }
183 183 else
184 184 {
185 185 index1 = 0; /* points to 0,0 in table */
186 186 }
187 187
188 188 yaddr1 = y; /* address to store this answer */
189 189 x += stridex; /* point to next arg */
190 190 y += stridey; /* point to next result */
191 191 argcount = 2; /* we now have 2 good arguments */
192 192 if ( --n <=0 )
193 193 {
194 194 goto UNROLL; /* finish up with 2 good args */
195 195 }
196 196
197 197 /*--------------------------------------------------------------------------*/
198 198 /*--------------------------------------------------------------------------*/
199 199 /*--------------------------------------------------------------------------*/
200 200
201 201 LOOP2:
202 202
203 203 intf = *(int *) x; /* upper half of x, as integer */
204 204 f2 = *x;
205 205 sign2 = pone;
206 206 if (intf < 0) {
207 207 intf = intf & ~0x80000000; /* abs(upper argument) */
208 208 f2 = -f2;
209 209 sign2 = -sign2;
210 210 }
211 211
212 212 if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */
213 213 {
214 214 if( intf > 0x7f800000 )
215 215 {
216 216 ansf = f2 - f2; /* return NaN if x=NaN*/
217 217 }
218 218 else if( intf < 0x31800000 ) /* avoid underflow for small arg */
219 219 {
220 220 dummy = 1.0e37 + f2;
221 221 dummy = dummy;
222 222 ansf = f2;
223 223 }
224 224 else if( intf > 0x5B000000 ) /* avoid underflow for big arg */
225 225 {
226 226 index2 = 2;
227 227 ansf = __vlibm_TBL_atan1[index2] ;/* pi/2 up */
228 228 }
229 229 *y = sign2 * ansf; /* store answer, with sign bit */
230 230 x += stridex;
231 231 y += stridey;
232 232 argcount = 2; /* we still have 2 good args */
233 233 if ( --n <=0 )
234 234 {
235 235 goto UNROLL; /* finish up with 2 good args */
236 236 }
237 237 goto LOOP2; /* otherwise, examine next arg */
238 238 }
239 239
240 240 if (intf > 0x42800000) /* if(|x| > 64 */
241 241 {
242 242 f2 = -pone/f2;
243 243 index2 = 2; /* point to pi/2 upper, lower */
244 244 }
245 245 else if( intf >= 0x3C800000 ) /* if |x| >= (1/64)... */
246 246 {
247 247 intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */
248 248 pz[0] = intz; /* store as a float (z) */
249 249 f2 = (f2 - z)/(pone + f2*z);
250 250 index2 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */
251 251 index2 = index2 + 4; /* skip over 0,0,pi/2,pi/2 */
252 252 }
253 253 else
254 254 {
255 255 index2 = 0; /* points to 0,0 in table */
256 256 }
257 257 yaddr2 = y; /* address to store this answer */
258 258 x += stridex; /* point to next arg */
259 259 y += stridey; /* point to next result */
260 260 argcount = 3; /* we now have 3 good arguments */
261 261 if ( --n <=0 )
262 262 {
263 263 goto UNROLL; /* finish up with 2 good args */
264 264 }
265 265
266 266
267 267 /*--------------------------------------------------------------------------*/
268 268 /*--------------------------------------------------------------------------*/
269 269 /*--------------------------------------------------------------------------*/
270 270
271 271 #ifdef UNROLL4
272 272 LOOP3:
273 273
274 274 intf = *(int *) x; /* upper half of x, as integer */
275 275 f3 = *x;
276 276 sign3 = pone;
277 277 if (intf < 0) {
278 278 intf = intf & ~0x80000000; /* abs(upper argument) */
279 279 f3 = -f3;
280 280 sign3 = -sign3;
281 281 }
282 282
283 283 if( (intf > 0x5B000000) || (intf < 0x31800000) ) /* filter out special cases */
284 284 {
285 285 if( intf > 0x7f800000 )
286 286 {
287 287 ansf = f3 - f3; /* return NaN if x=NaN*/
288 288 }
289 289 else if( intf < 0x31800000 ) /* avoid underflow for small arg */
290 290 {
291 291 dummy = 1.0e37 + f3;
292 292 dummy = dummy;
293 293 ansf = f3;
294 294 }
295 295 else if( intf > 0x5B000000 ) /* avoid underflow for big arg */
296 296 {
297 297 index3 = 2;
298 298 ansf = __vlibm_TBL_atan1[index3] ;/* pi/2 up */
299 299 }
300 300 *y = sign3 * ansf; /* store answer, with sign bit */
301 301 x += stridex;
302 302 y += stridey;
303 303 argcount = 3; /* we still have 3 good args */
304 304 if ( --n <=0 )
305 305 {
306 306 goto UNROLL; /* finish up with 3 good args */
307 307 }
308 308 goto LOOP3; /* otherwise, examine next arg */
309 309 }
310 310
311 311 if (intf > 0x42800000) /* if(|x| > 64 */
312 312 {
313 313 n3 = -pone;
314 314 d3 = f3;
315 315 f3 = n3/d3;
316 316 index3 = 2; /* point to pi/2 upper, lower */
317 317 }
318 318 else if( intf >= 0x3C800000 ) /* if |x| >= (1/64)... */
319 319 {
320 320 intz = (intf + 0x00040000) & 0x7ff80000;/* round arg, keep upper */
321 321 pz[0] = intz; /* store as a float (z) */
322 322 n3 = (f3 - z);
323 323 d3 = (pone + f3*z); /* get reduced argument */
324 324 f3 = n3/d3;
325 325 index3 = (intz - 0x3C800000) >> 18; /* (index >> 19) << 1) */
326 326 index3 = index3 + 4; /* skip over 0,0,pi/2,pi/2 */
327 327 }
328 328 else
329 329 {
330 330 n3 = f3;
331 331 d3 = pone;
332 332 index3 = 0; /* points to 0,0 in table */
333 333 }
334 334 yaddr3 = y; /* address to store this answer */
335 335 x += stridex; /* point to next arg */
336 336 y += stridey; /* point to next result */
337 337 argcount = 4; /* we now have 4 good arguments */
338 338 if ( --n <=0 )
339 339 {
340 340 goto UNROLL; /* finish up with 3 good args */
341 341 }
342 342 #endif /* UNROLL4 */
343 343
344 344 /* here is the n-way unrolled section,
345 345 but we may actually have less than n
346 346 arguments at this point
347 347 */
348 348
349 349 UNROLL:
350 350
351 351 #ifdef UNROLL4
352 352 if (argcount == 4)
353 353 {
354 354 conup0 = __vlibm_TBL_atan1[index0];
355 355 conup1 = __vlibm_TBL_atan1[index1];
356 356 conup2 = __vlibm_TBL_atan1[index2];
357 357 conup3 = __vlibm_TBL_atan1[index3];
358 358 poly0 = p1*f0*f0*f0 + f0;
359 359 ans0 = sign0 * (float)(conup0 + poly0);
360 360 poly1 = p1*f1*f1*f1 + f1;
361 361 ans1 = sign1 * (float)(conup1 + poly1);
362 362 poly2 = p1*f2*f2*f2 + f2;
363 363 ans2 = sign2 * (float)(conup2 + poly2);
364 364 poly3 = p1*f3*f3*f3 + f3;
365 365 ans3 = sign3 * (float)(conup3 + poly3);
366 366 *yaddr0 = ans0;
367 367 *yaddr1 = ans1;
368 368 *yaddr2 = ans2;
369 369 *yaddr3 = ans3;
370 370 }
371 371 else
372 372 #endif
373 373 if (argcount == 3)
374 374 {
375 375 conup0 = __vlibm_TBL_atan1[index0];
376 376 conup1 = __vlibm_TBL_atan1[index1];
377 377 conup2 = __vlibm_TBL_atan1[index2];
378 378 poly0 = p1*f0*f0*f0 + f0;
379 379 poly1 = p1*f1*f1*f1 + f1;
380 380 poly2 = p1*f2*f2*f2 + f2;
381 381 ans0 = sign0 * (float)(conup0 + poly0);
382 382 ans1 = sign1 * (float)(conup1 + poly1);
383 383 ans2 = sign2 * (float)(conup2 + poly2);
384 384 *yaddr0 = ans0;
385 385 *yaddr1 = ans1;
386 386 *yaddr2 = ans2;
387 387 }
388 388 else
389 389 if (argcount == 2)
390 390 {
391 391 conup0 = __vlibm_TBL_atan1[index0];
392 392 conup1 = __vlibm_TBL_atan1[index1];
393 393 poly0 = p1*f0*f0*f0 + f0;
394 394 poly1 = p1*f1*f1*f1 + f1;
395 395 ans0 = sign0 * (float)(conup0 + poly0);
396 396 ans1 = sign1 * (float)(conup1 + poly1);
397 397 *yaddr0 = ans0;
398 398 *yaddr1 = ans1;
399 399 }
400 400 else
401 401 if (argcount == 1)
402 402 {
403 403 conup0 = __vlibm_TBL_atan1[index0];
404 404 poly0 = p1*f0*f0*f0 + f0;
405 405 ans0 = sign0 * (float)(conup0 + poly0);
406 406 *yaddr0 = ans0;
407 407 }
408 408
409 409 } while (n > 0);
410 410
411 411 }
↓ open down ↓ |
360 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX