1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #include <sys/isa_defs.h> 31 #include "libm_inlines.h" 32 33 #ifdef _LITTLE_ENDIAN 34 #define HI(x) *(1+(int*)x) 35 #define LO(x) *(unsigned*)x 36 #else 37 #define HI(x) *(int*)x 38 #define LO(x) *(1+(unsigned*)x) 39 #endif 40 41 #ifdef __RESTRICT 42 #define restrict _Restrict 43 #else 44 #define restrict 45 #endif 46 47 void 48 __vatan( int n, double * restrict x, int stridex, double * restrict y, int stridey ) 49 { 50 double f , z, ans, ansu , ansl , tmp , poly , conup , conlo , dummy; 51 double f1, ans1, ansu1, ansl1, tmp1, poly1, conup1, conlo1; 52 double f2, ans2, ansu2, ansl2, tmp2, poly2, conup2, conlo2; 53 int index, sign, intf, intflo, intz, argcount; 54 int index1, sign1 ; 55 int index2, sign2 ; 56 double *yaddr,*yaddr1,*yaddr2; 57 extern const double __vlibm_TBL_atan1[]; 58 extern double fabs( double ); 59 60 /* Power series atan(x) = x + p1*x**3 + p2*x**5 + p3*x**7 61 * Error = -3.08254E-18 On the interval |x| < 1/64 */ 62 63 /* define dummy names for readability. Use parray to help compiler optimize loads */ 64 #define p3 parray[0] 65 #define p2 parray[1] 66 #define p1 parray[2] 67 68 static const double parray[] = { 69 -1.428029046844299722E-01, /* p[3] */ 70 1.999999917247000615E-01, /* p[2] */ 71 -3.333333333329292858E-01, /* p[1] */ 72 1.0, /* not used for p[0], though */ 73 -1.0, /* used to flip sign of answer */ 74 }; 75 76 if( n <= 0 ) return; /* if no. of elements is 0 or neg, do nothing */ 77 do 78 { 79 LOOP0: 80 81 f = fabs(*x); /* fetch argument */ 82 intf = HI(x); /* upper half of x, as integer */ 83 intflo = LO(x); /* lower half of x, as integer */ 84 sign = intf & 0x80000000; /* sign of argument */ 85 intf = intf & ~0x80000000; /* abs(upper argument) */ 86 87 if( (intf > 0x43600000) || (intf < 0x3e300000) ) /* filter out special cases */ 88 { 89 if( (intf > 0x7ff00000) || ((intf == 0x7ff00000) && (intflo !=0) ) ) 90 { 91 ans = f - f; /* return NaN if x=NaN*/ 92 } 93 else if( intf < 0x3e300000 ) /* avoid underflow for small arg */ 94 { 95 dummy = 1.0e37 + f; 96 dummy = dummy; 97 ans = f; 98 } 99 else if( intf > 0x43600000 ) /* avoid underflow for big arg */ 100 { 101 index = 2; 102 ans = __vlibm_TBL_atan1[index] + __vlibm_TBL_atan1[index+1];/* pi/2 up + pi/2 low */ 103 } 104 *y = (sign) ? -ans: ans; /* store answer, with sign bit */ 105 x += stridex; 106 y += stridey; 107 argcount = 0; /* initialize argcount */ 108 if ( --n <=0 ) break; /* we are done */ 109 goto LOOP0; /* otherwise, examine next arg */ 110 } 111 112 index = 0; /* points to 0,0 in table */ 113 if (intf > 0x40500000) /* if(|x| > 64 */ 114 { f = -1.0/f; 115 index = 2; /* point to pi/2 upper, lower */ 116 } 117 else if( intf >= 0x3f900000 ) /* if |x| >= (1/64)... */ 118 { 119 intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */ 120 HI(&z) = intz; /* store as a double (z) */ 121 LO(&z) = 0; /* ...lower */ 122 f = (f - z)/(1.0 + f*z); /* get reduced argument */ 123 index = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */ 124 index = index + 4; /* skip over 0,0,pi/2,pi/2 */ 125 } 126 yaddr = y; /* address to store this answer */ 127 x += stridex; /* point to next arg */ 128 y += stridey; /* point to next result */ 129 argcount = 1; /* we now have 1 good argument */ 130 if ( --n <=0 ) 131 { 132 f1 = 0.0; /* put dummy values in args 1,2 */ 133 f2 = 0.0; 134 index1 = 0; 135 index2 = 0; 136 goto UNROLL3; /* finish up with 1 good arg */ 137 } 138 139 /*--------------------------------------------------------------------------*/ 140 /*--------------------------------------------------------------------------*/ 141 /*--------------------------------------------------------------------------*/ 142 143 LOOP1: 144 145 f1 = fabs(*x); /* fetch argument */ 146 intf = HI(x); /* upper half of x, as integer */ 147 intflo = LO(x); /* lower half of x, as integer */ 148 sign1 = intf & 0x80000000; /* sign of argument */ 149 intf = intf & ~0x80000000; /* abs(upper argument) */ 150 151 if( (intf > 0x43600000) || (intf < 0x3e300000) ) /* filter out special cases */ 152 { 153 if( (intf > 0x7ff00000) || ((intf == 0x7ff00000) && (intflo !=0) ) ) 154 { 155 ans = f1 - f1; /* return NaN if x=NaN*/ 156 } 157 else if( intf < 0x3e300000 ) /* avoid underflow for small arg */ 158 { 159 dummy = 1.0e37 + f1; 160 dummy = dummy; 161 ans = f1; 162 } 163 else if( intf > 0x43600000 ) /* avoid underflow for big arg */ 164 { 165 index1 = 2; 166 ans = __vlibm_TBL_atan1[index1] + __vlibm_TBL_atan1[index1+1];/* pi/2 up + pi/2 low */ 167 } 168 *y = (sign1) ? -ans: ans; /* store answer, with sign bit */ 169 x += stridex; 170 y += stridey; 171 argcount = 1; /* we still have 1 good arg */ 172 if ( --n <=0 ) 173 { 174 f1 = 0.0; /* put dummy values in args 1,2 */ 175 f2 = 0.0; 176 index1 = 0; 177 index2 = 0; 178 goto UNROLL3; /* finish up with 1 good arg */ 179 } 180 goto LOOP1; /* otherwise, examine next arg */ 181 } 182 183 index1 = 0; /* points to 0,0 in table */ 184 if (intf > 0x40500000) /* if(|x| > 64 */ 185 { f1 = -1.0/f1; 186 index1 = 2; /* point to pi/2 upper, lower */ 187 } 188 else if( intf >= 0x3f900000 ) /* if |x| >= (1/64)... */ 189 { 190 intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */ 191 HI(&z) = intz; /* store as a double (z) */ 192 LO(&z) = 0; /* ...lower */ 193 f1 = (f1 - z)/(1.0 + f1*z); /* get reduced argument */ 194 index1 = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */ 195 index1 = index1 + 4; /* skip over 0,0,pi/2,pi/2 */ 196 } 197 yaddr1 = y; /* address to store this answer */ 198 x += stridex; /* point to next arg */ 199 y += stridey; /* point to next result */ 200 argcount = 2; /* we now have 2 good arguments */ 201 if ( --n <=0 ) 202 { 203 f2 = 0.0; /* put dummy value in arg 2 */ 204 index2 = 0; 205 goto UNROLL3; /* finish up with 2 good args */ 206 } 207 208 /*--------------------------------------------------------------------------*/ 209 /*--------------------------------------------------------------------------*/ 210 /*--------------------------------------------------------------------------*/ 211 212 LOOP2: 213 214 f2 = fabs(*x); /* fetch argument */ 215 intf = HI(x); /* upper half of x, as integer */ 216 intflo = LO(x); /* lower half of x, as integer */ 217 sign2 = intf & 0x80000000; /* sign of argument */ 218 intf = intf & ~0x80000000; /* abs(upper argument) */ 219 220 if( (intf > 0x43600000) || (intf < 0x3e300000) ) /* filter out special cases */ 221 { 222 if( (intf > 0x7ff00000) || ((intf == 0x7ff00000) && (intflo !=0) ) ) 223 { 224 ans = f2 - f2; /* return NaN if x=NaN*/ 225 } 226 else if( intf < 0x3e300000 ) /* avoid underflow for small arg */ 227 { 228 dummy = 1.0e37 + f2; 229 dummy = dummy; 230 ans = f2; 231 } 232 else if( intf > 0x43600000 ) /* avoid underflow for big arg */ 233 { 234 index2 = 2; 235 ans = __vlibm_TBL_atan1[index2] + __vlibm_TBL_atan1[index2+1];/* pi/2 up + pi/2 low */ 236 } 237 *y = (sign2) ? -ans: ans; /* store answer, with sign bit */ 238 x += stridex; 239 y += stridey; 240 argcount = 2; /* we still have 2 good args */ 241 if ( --n <=0 ) 242 { 243 f2 = 0.0; /* put dummy value in arg 2 */ 244 index2 = 0; 245 goto UNROLL3; /* finish up with 2 good args */ 246 } 247 goto LOOP2; /* otherwise, examine next arg */ 248 } 249 250 index2 = 0; /* points to 0,0 in table */ 251 if (intf > 0x40500000) /* if(|x| > 64 */ 252 { f2 = -1.0/f2; 253 index2 = 2; /* point to pi/2 upper, lower */ 254 } 255 else if( intf >= 0x3f900000 ) /* if |x| >= (1/64)... */ 256 { 257 intz = (intf + 0x00008000) & 0x7fff0000;/* round arg, keep upper */ 258 HI(&z) = intz; /* store as a double (z) */ 259 LO(&z) = 0; /* ...lower */ 260 f2 = (f2 - z)/(1.0 + f2*z); /* get reduced argument */ 261 index2 = (intz - 0x3f900000) >> 15; /* (index >> 16) << 1) */ 262 index2 = index2 + 4; /* skip over 0,0,pi/2,pi/2 */ 263 } 264 yaddr2 = y; /* address to store this answer */ 265 x += stridex; /* point to next arg */ 266 y += stridey; /* point to next result */ 267 argcount = 3; /* we now have 3 good arguments */ 268 269 270 /* here is the 3 way unrolled section, 271 note, we may actually only have 272 1,2, or 3 'real' arguments at this point 273 */ 274 275 UNROLL3: 276 277 conup = __vlibm_TBL_atan1[index ]; /* upper table */ 278 conup1 = __vlibm_TBL_atan1[index1]; /* upper table */ 279 conup2 = __vlibm_TBL_atan1[index2]; /* upper table */ 280 281 conlo = __vlibm_TBL_atan1[index +1]; /* lower table */ 282 conlo1 = __vlibm_TBL_atan1[index1+1]; /* lower table */ 283 conlo2 = __vlibm_TBL_atan1[index2+1]; /* lower table */ 284 285 tmp = f *f ; 286 tmp1 = f1*f1; 287 tmp2 = f2*f2; 288 289 poly = f *((p3*tmp + p2)*tmp + p1)*tmp ; 290 poly1 = f1*((p3*tmp1 + p2)*tmp1 + p1)*tmp1; 291 poly2 = f2*((p3*tmp2 + p2)*tmp2 + p1)*tmp2; 292 293 ansu = conup + f ; /* compute atan(f) upper */ 294 ansu1 = conup1 + f1; /* compute atan(f) upper */ 295 ansu2 = conup2 + f2; /* compute atan(f) upper */ 296 297 ansl = (((conup - ansu ) + f ) + poly ) + conlo ; 298 ansl1 = (((conup1 - ansu1) + f1) + poly1) + conlo1; 299 ansl2 = (((conup2 - ansu2) + f2) + poly2) + conlo2; 300 301 ans = ansu + ansl ; 302 ans1 = ansu1 + ansl1; 303 ans2 = ansu2 + ansl2; 304 305 /* now check to see if these are 'real' or 'dummy' arguments BEFORE storing */ 306 307 *yaddr = sign ? -ans: ans; /* this one is always good */ 308 if(argcount < 3) break; /* end loop and finish up */ 309 *yaddr1 = sign1 ? -ans1: ans1; 310 *yaddr2 = sign2 ? -ans2: ans2; 311 312 } while (--n > 0); 313 314 if(argcount == 2) 315 { *yaddr1 = sign1 ? -ans1: ans1; 316 } 317 }