Print this page
5261 libm should stop using synonyms.h
5298 fabs is 0-sized, confuses dis(1) and others
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Gordon Ross <gwr@nexenta.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libmvec/common/__vrsqrtf.c
+++ new/usr/src/lib/libmvec/common/__vrsqrtf.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
↓ open down ↓ |
19 lines elided |
↑ open up ↑ |
20 20 */
21 21
22 22 /*
23 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 27 * Use is subject to license terms.
28 28 */
29 29
30 -#include "libm_synonyms.h"
31 30 #include "libm_inlines.h"
32 31
33 32 #ifdef __RESTRICT
34 33 #define restrict _Restrict
35 34 #else
36 35 #define restrict
37 36 #endif
38 37
39 38 /* float rsqrtf(float x)
40 39 *
41 40 * Method :
42 41 * 1. Special cases:
43 42 * for x = NaN => QNaN;
44 43 * for x = +Inf => 0;
45 44 * for x is negative, -Inf => QNaN + invalid;
46 45 * for x = +0 => +Inf + divide-by-zero;
47 46 * for x = -0 => -Inf + divide-by-zero.
48 47 * 2. Computes reciprocal square root from:
49 48 * x = m * 2**n
50 49 * Where:
51 50 * m = [0.5, 2),
52 51 * n = ((exponent + 1) & ~1).
53 52 * Then:
54 53 * rsqrtf(x) = 1/sqrt( m * 2**n ) = (2 ** (-n/2)) * (1/sqrt(m))
55 54 * 2. Computes 1/sqrt(m) from:
56 55 * 1/sqrt(m) = (1/sqrt(m0)) * (1/sqrt(1 + (1/m0)*dm))
57 56 * Where:
58 57 * m = m0 + dm,
59 58 * m0 = 0.5 * (1 + k/64) for m = [0.5, 0.5+127/256), k = [0, 63];
60 59 * m0 = 1.0 * (0 + k/64) for m = [0.5+127/256, 1.0+127/128), k = [64, 127];
61 60 * Then:
62 61 * 1/sqrt(m0), 1/m0 are looked up in a table,
63 62 * 1/sqrt(1 + (1/m0)*dm) is computed using approximation:
↓ open down ↓ |
23 lines elided |
↑ open up ↑ |
64 63 * 1/sqrt(1 + z) = ((a3 * z + a2) * z + a1) * z + a0
65 64 * where z = [-1/64, 1/64].
66 65 *
67 66 * Accuracy:
68 67 * The maximum relative error for the approximating
69 68 * polynomial is 2**(-27.87).
70 69 * Maximum error observed: less than 0.534 ulp for the
71 70 * whole float type range.
72 71 */
73 72
74 -#define sqrtf __sqrtf
75 -
76 73 extern float sqrtf(float);
77 74
78 75 static const double __TBL_rsqrtf[] = {
79 76 /*
80 77 i = [0,63]
81 78 TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-24;
82 79 TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46)));
83 80 i = [64,127]
84 81 TBL[2*i ] = 1 / (*(double*)&(0x3fe0000000000000ULL + (i << 46))) * 2**-23;
85 82 TBL[2*i+1] = 1 / sqrtl(*(double*)&(0x3fe0000000000000ULL + (i << 46)));
86 83 */
87 84 1.1920928955078125000e-07, 1.4142135623730951455e+00,
88 85 1.1737530048076923728e-07, 1.4032928308912466786e+00,
89 86 1.1559688683712121533e-07, 1.3926212476455828160e+00,
90 87 1.1387156016791044559e-07, 1.3821894809301762397e+00,
91 88 1.1219697840073529256e-07, 1.3719886811400707760e+00,
92 89 1.1057093523550724772e-07, 1.3620104492139977204e+00,
93 90 1.0899135044642856803e-07, 1.3522468075656264297e+00,
94 91 1.0745626100352112918e-07, 1.3426901732747025253e+00,
95 92 1.0596381293402777190e-07, 1.3333333333333332593e+00,
96 93 1.0451225385273972023e-07, 1.3241694217637887121e+00,
97 94 1.0309992609797297870e-07, 1.3151918984428583315e+00,
98 95 1.0172526041666667320e-07, 1.3063945294843617440e+00,
99 96 1.0038677014802631022e-07, 1.2977713690461003537e+00,
100 97 9.9083045860389616921e-08, 1.2893167424406084542e+00,
101 98 9.7812750400641022247e-08, 1.2810252304406970492e+00,
102 99 9.6574614319620251657e-08, 1.2728916546811681609e+00,
103 100 9.5367431640625005294e-08, 1.2649110640673517647e+00,
104 101 9.4190055941358019463e-08, 1.2570787221094177344e+00,
105 102 9.3041396722560978838e-08, 1.2493900951088485751e+00,
106 103 9.1920416039156631290e-08, 1.2418408411301324890e+00,
107 104 9.0826125372023804482e-08, 1.2344267996967352996e+00,
108 105 8.9757582720588234048e-08, 1.2271439821557927896e+00,
109 106 8.8713889898255812722e-08, 1.2199885626608373279e+00,
110 107 8.7694190014367814875e-08, 1.2129568697262453902e+00,
111 108 8.6697665127840911497e-08, 1.2060453783110545167e+00,
112 109 8.5723534058988761666e-08, 1.1992507023933782762e+00,
113 110 8.4771050347222225457e-08, 1.1925695879998878812e+00,
114 111 8.3839500343406599951e-08, 1.1859989066577618644e+00,
115 112 8.2928201426630432481e-08, 1.1795356492391770864e+00,
116 113 8.2036500336021511923e-08, 1.1731769201708264205e+00,
117 114 8.1163771609042551220e-08, 1.1669199319831564665e+00,
118 115 8.0309416118421050820e-08, 1.1607620001760186046e+00,
119 116 7.9472859700520828922e-08, 1.1547005383792514621e+00,
120 117 7.8653551868556699530e-08, 1.1487330537883810866e+00,
121 118 7.7850964604591830522e-08, 1.1428571428571427937e+00,
122 119 7.7064591224747481298e-08, 1.1370704872299222110e+00,
123 120 7.6293945312500001588e-08, 1.1313708498984760276e+00,
124 121 7.5538559715346535571e-08, 1.1257560715684669095e+00,
125 122 7.4797985600490195040e-08, 1.1202240672224077489e+00,
126 123 7.4071791565533974158e-08, 1.1147728228665882977e+00,
127 124 7.3359562800480773303e-08, 1.1094003924504582947e+00,
128 125 7.2660900297619054173e-08, 1.1041048949477667573e+00,
129 126 7.1975420106132072725e-08, 1.0988845115895122806e+00,
130 127 7.1302752628504667579e-08, 1.0937374832394612945e+00,
131 128 7.0642541956018514597e-08, 1.0886621079036347126e+00,
132 129 6.9994445240825691959e-08, 1.0836567383657542685e+00,
133 130 6.9358132102272723904e-08, 1.0787197799411873955e+00,
134 131 6.8733284065315314719e-08, 1.0738496883424388795e+00,
135 132 6.8119594029017853361e-08, 1.0690449676496975862e+00,
136 133 6.7516765763274335346e-08, 1.0643041683803828867e+00,
137 134 6.6924513432017540145e-08, 1.0596258856520350822e+00,
138 135 6.6342561141304348632e-08, 1.0550087574332591700e+00,
139 136 6.5770642510775861156e-08, 1.0504514628777803509e+00,
140 137 6.5208500267094023655e-08, 1.0459527207369814228e+00,
141 138 6.4655885858050847233e-08, 1.0415112878465908608e+00,
142 139 6.4112559086134451001e-08, 1.0371259576834630511e+00,
143 140 6.3578287760416665784e-08, 1.0327955589886446131e+00,
144 141 6.3052847365702481089e-08, 1.0285189544531601058e+00,
145 142 6.2536020747950822927e-08, 1.0242950394631678002e+00,
146 143 6.2027597815040656970e-08, 1.0201227409013413627e+00,
147 144 6.1527375252016127325e-08, 1.0160010160015240377e+00,
148 145 6.1035156250000001271e-08, 1.0119288512538813229e+00,
149 146 6.0550750248015869655e-08, 1.0079052613579393416e+00,
150 147 6.0073972687007873182e-08, 1.0039292882210537616e+00,
151 148 1.1920928955078125000e-07, 1.0000000000000000000e+00,
152 149 1.1737530048076923728e-07, 9.9227787671366762812e-01,
153 150 1.1559688683712121533e-07, 9.8473192783466190203e-01,
154 151 1.1387156016791044559e-07, 9.7735555485044178781e-01,
155 152 1.1219697840073529256e-07, 9.7014250014533187638e-01,
156 153 1.1057093523550724772e-07, 9.6308682468615358641e-01,
157 154 1.0899135044642856803e-07, 9.5618288746751489704e-01,
158 155 1.0745626100352112918e-07, 9.4942532655508271588e-01,
159 156 1.0596381293402777190e-07, 9.4280904158206335630e-01,
160 157 1.0451225385273972023e-07, 9.3632917756904454620e-01,
161 158 1.0309992609797297870e-07, 9.2998110995055427441e-01,
162 159 1.0172526041666667320e-07, 9.2376043070340119190e-01,
163 160 1.0038677014802631022e-07, 9.1766293548224708854e-01,
164 161 9.9083045860389616921e-08, 9.1168461167710357351e-01,
165 162 9.7812750400641022247e-08, 9.0582162731567661407e-01,
166 163 9.6574614319620251657e-08, 9.0007032074081916306e-01,
167 164 9.5367431640625005294e-08, 8.9442719099991585541e-01,
168 165 9.4190055941358019463e-08, 8.8888888888888883955e-01,
169 166 9.3041396722560978838e-08, 8.8345220859877238162e-01,
170 167 9.1920416039156631290e-08, 8.7811407991752277180e-01,
171 168 9.0826125372023804482e-08, 8.7287156094396955996e-01,
172 169 8.9757582720588234048e-08, 8.6772183127462465535e-01,
173 170 8.8713889898255812722e-08, 8.6266218562750729415e-01,
174 171 8.7694190014367814875e-08, 8.5769002787023584933e-01,
175 172 8.6697665127840911497e-08, 8.5280286542244176928e-01,
176 173 8.5723534058988761666e-08, 8.4799830400508802164e-01,
177 174 8.4771050347222225457e-08, 8.4327404271156780613e-01,
178 175 8.3839500343406599951e-08, 8.3862786937753464045e-01,
179 176 8.2928201426630432481e-08, 8.3405765622829908246e-01,
180 177 8.2036500336021511923e-08, 8.2956135578434020417e-01,
181 178 8.1163771609042551220e-08, 8.2513699700703468931e-01,
182 179 8.0309416118421050820e-08, 8.2078268166812329287e-01,
183 180 7.9472859700520828922e-08, 8.1649658092772603446e-01,
184 181 7.8653551868556699530e-08, 8.1227693210689522196e-01,
185 182 7.7850964604591830522e-08, 8.0812203564176865456e-01,
186 183 7.7064591224747481298e-08, 8.0403025220736967782e-01,
187 184 7.6293945312500001588e-08, 8.0000000000000004441e-01,
188 185 7.5538559715346535571e-08, 7.9602975216799132241e-01,
189 186 7.4797985600490195040e-08, 7.9211803438133943089e-01,
190 187 7.4071791565533974158e-08, 7.8826342253143455441e-01,
191 188 7.3359562800480773303e-08, 7.8446454055273617811e-01,
192 189 7.2660900297619054173e-08, 7.8072005835882651859e-01,
193 190 7.1975420106132072725e-08, 7.7702868988581130782e-01,
194 191 7.1302752628504667579e-08, 7.7338919123653082632e-01,
195 192 7.0642541956018514597e-08, 7.6980035891950104876e-01,
196 193 6.9994445240825691959e-08, 7.6626102817692109959e-01,
197 194 6.9358132102272723904e-08, 7.6277007139647390321e-01,
198 195 6.8733284065315314719e-08, 7.5932639660199918730e-01,
199 196 6.8119594029017853361e-08, 7.5592894601845450619e-01,
200 197 6.7516765763274335346e-08, 7.5257669470687782454e-01,
201 198 6.6924513432017540145e-08, 7.4926864926535519107e-01,
202 199 6.6342561141304348632e-08, 7.4600384659225105199e-01,
203 200 6.5770642510775861156e-08, 7.4278135270820744296e-01,
204 201 6.5208500267094023655e-08, 7.3960026163363878915e-01,
205 202 6.4655885858050847233e-08, 7.3645969431865865307e-01,
206 203 6.4112559086134451001e-08, 7.3335879762256905856e-01,
207 204 6.3578287760416665784e-08, 7.3029674334022143256e-01,
208 205 6.3052847365702481089e-08, 7.2727272727272729291e-01,
209 206 6.2536020747950822927e-08, 7.2428596834014824513e-01,
210 207 6.2027597815040656970e-08, 7.2133570773394584119e-01,
211 208 6.1527375252016127325e-08, 7.1842120810709964029e-01,
212 209 6.1035156250000001271e-08, 7.1554175279993270653e-01,
213 210 6.0550750248015869655e-08, 7.1269664509979835376e-01,
214 211 6.0073972687007873182e-08, 7.0988520753289097165e-01,
215 212 };
216 213
217 214 static const unsigned long long LCONST[] = {
218 215 0x3feffffffee7f18fULL, /* A0 = 9.99999997962321453275e-01 */
219 216 0xbfdffffffe07e52fULL, /* A1 =-4.99999998166077580600e-01 */
220 217 0x3fd801180ca296d9ULL, /* A2 = 3.75066768969515586277e-01 */
221 218 0xbfd400fc0bbb8e78ULL, /* A3 =-3.12560092408808548438e-01 */
222 219 };
223 220
224 221 static void
225 222 __vrsqrtf_n(int n, float * restrict px, int stridex, float * restrict py, int stridey);
226 223
227 224 #pragma no_inline(__vrsqrtf_n)
228 225
229 226 #define RETURN(ret) \
230 227 { \
231 228 *py = (ret); \
232 229 py += stridey; \
233 230 if (n_n == 0) \
234 231 { \
235 232 spx = px; spy = py; \
236 233 ax0 = *(int*)px; \
237 234 continue; \
238 235 } \
239 236 n--; \
240 237 break; \
241 238 }
242 239
243 240 void
244 241 __vrsqrtf(int n, float * restrict px, int stridex, float * restrict py, int stridey)
245 242 {
246 243 float *spx, *spy;
247 244 int ax0, n_n;
248 245 float res;
249 246 float FONE = 1.0f, FTWO = 2.0f;
250 247
251 248 while (n > 1)
252 249 {
253 250 n_n = 0;
254 251 spx = px;
255 252 spy = py;
256 253 ax0 = *(int*)px;
257 254 for (; n > 1 ; n--)
258 255 {
259 256 px += stridex;
260 257 if (ax0 >= 0x7f800000) /* X = NaN or Inf */
261 258 {
262 259 res = *(px - stridex);
263 260 RETURN (FONE / res)
264 261 }
265 262
266 263 py += stridey;
267 264
268 265 if (ax0 < 0x00800000) /* X = denormal, zero or negative */
269 266 {
270 267 py -= stridey;
271 268 res = *(px - stridex);
272 269
273 270 if ((ax0 & 0x7fffffff) == 0) /* |X| = zero */
274 271 {
275 272 RETURN (FONE / res)
276 273 }
277 274 else if (ax0 >= 0) /* X = denormal */
278 275 {
279 276 double A0 = ((double*)LCONST)[0]; /* 9.99999997962321453275e-01 */
280 277 double A1 = ((double*)LCONST)[1]; /* -4.99999998166077580600e-01 */
281 278 double A2 = ((double*)LCONST)[2]; /* 3.75066768969515586277e-01 */
282 279 double A3 = ((double*)LCONST)[3]; /* -3.12560092408808548438e-01 */
283 280
284 281 double res0, xx0, tbl_div0, tbl_sqrt0;
285 282 float fres0;
286 283 int iax0, si0, iexp0;
287 284
288 285 res = *(int*)&res;
289 286 res *= FTWO;
290 287 ax0 = *(int*)&res;
291 288 iexp0 = ax0 >> 24;
292 289 iexp0 = 0x3f + 0x4b - iexp0;
293 290 iexp0 = iexp0 << 23;
294 291
295 292 si0 = (ax0 >> 13) & 0x7f0;
296 293
297 294 tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0];
298 295 tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1];
299 296 iax0 = ax0 & 0x7ffe0000;
300 297 iax0 = ax0 - iax0;
301 298 xx0 = iax0 * tbl_div0;
302 299 res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0);
303 300
304 301 fres0 = res0;
305 302 iexp0 += *(int*)&fres0;
306 303 RETURN(*(float*)&iexp0)
307 304 }
308 305 else /* X = negative */
309 306 {
310 307 RETURN (sqrtf(res))
311 308 }
312 309 }
313 310 n_n++;
314 311 ax0 = *(int*)px;
315 312 }
316 313 if (n_n > 0)
317 314 __vrsqrtf_n(n_n, spx, stridex, spy, stridey);
318 315 }
319 316
320 317 if (n > 0)
321 318 {
322 319 ax0 = *(int*)px;
323 320
324 321 if (ax0 >= 0x7f800000) /* X = NaN or Inf */
325 322 {
326 323 res = *px;
327 324 *py = FONE / res;
328 325 }
329 326 else if (ax0 < 0x00800000) /* X = denormal, zero or negative */
330 327 {
331 328 res = *px;
332 329
333 330 if ((ax0 & 0x7fffffff) == 0) /* |X| = zero */
334 331 {
335 332 *py = FONE / res;
336 333 }
337 334 else if (ax0 >= 0) /* X = denormal */
338 335 {
339 336 double A0 = ((double*)LCONST)[0]; /* 9.99999997962321453275e-01 */
340 337 double A1 = ((double*)LCONST)[1]; /* -4.99999998166077580600e-01 */
341 338 double A2 = ((double*)LCONST)[2]; /* 3.75066768969515586277e-01 */
342 339 double A3 = ((double*)LCONST)[3]; /* -3.12560092408808548438e-01 */
343 340 double res0, xx0, tbl_div0, tbl_sqrt0;
344 341 float fres0;
345 342 int iax0, si0, iexp0;
346 343
347 344 res = *(int*)&res;
348 345 res *= FTWO;
349 346 ax0 = *(int*)&res;
350 347 iexp0 = ax0 >> 24;
351 348 iexp0 = 0x3f + 0x4b - iexp0;
352 349 iexp0 = iexp0 << 23;
353 350
354 351 si0 = (ax0 >> 13) & 0x7f0;
355 352
356 353 tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0];
357 354 tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1];
358 355 iax0 = ax0 & 0x7ffe0000;
359 356 iax0 = ax0 - iax0;
360 357 xx0 = iax0 * tbl_div0;
361 358 res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0);
362 359
363 360 fres0 = res0;
364 361 iexp0 += *(int*)&fres0;
365 362
366 363 *(int*)py = iexp0;
367 364 }
368 365 else /* X = negative */
369 366 {
370 367 *py = sqrtf(res);
371 368 }
372 369 }
373 370 else
374 371 {
375 372 double A0 = ((double*)LCONST)[0]; /* 9.99999997962321453275e-01 */
376 373 double A1 = ((double*)LCONST)[1]; /* -4.99999998166077580600e-01 */
377 374 double A2 = ((double*)LCONST)[2]; /* 3.75066768969515586277e-01 */
378 375 double A3 = ((double*)LCONST)[3]; /* -3.12560092408808548438e-01 */
379 376 double res0, xx0, tbl_div0, tbl_sqrt0;
380 377 float fres0;
381 378 int iax0, si0, iexp0;
382 379
383 380 iexp0 = ax0 >> 24;
384 381 iexp0 = 0x3f - iexp0;
385 382 iexp0 = iexp0 << 23;
386 383
387 384 si0 = (ax0 >> 13) & 0x7f0;
388 385
389 386 tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0];
390 387 tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1];
391 388 iax0 = ax0 & 0x7ffe0000;
392 389 iax0 = ax0 - iax0;
393 390 xx0 = iax0 * tbl_div0;
394 391 res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0);
395 392
396 393 fres0 = res0;
397 394 iexp0 += *(int*)&fres0;
398 395
399 396 *(int*)py = iexp0;
400 397 }
401 398 }
402 399 }
403 400
404 401 void
405 402 __vrsqrtf_n(int n, float * restrict px, int stridex, float * restrict py, int stridey)
406 403 {
407 404 double A0 = ((double*)LCONST)[0]; /* 9.99999997962321453275e-01 */
408 405 double A1 = ((double*)LCONST)[1]; /* -4.99999998166077580600e-01 */
409 406 double A2 = ((double*)LCONST)[2]; /* 3.75066768969515586277e-01 */
410 407 double A3 = ((double*)LCONST)[3]; /* -3.12560092408808548438e-01 */
411 408 double res0, xx0, tbl_div0, tbl_sqrt0;
412 409 float fres0;
413 410 int iax0, ax0, si0, iexp0;
414 411
415 412 #if defined(ARCH_v7) || defined(ARCH_v8)
416 413 double res1, xx1, tbl_div1, tbl_sqrt1;
417 414 double res2, xx2, tbl_div2, tbl_sqrt2;
418 415 float fres1, fres2;
419 416 int iax1, ax1, si1, iexp1;
420 417 int iax2, ax2, si2, iexp2;
421 418
422 419 for(; n > 2 ; n -= 3)
423 420 {
424 421 ax0 = *(int*)px;
425 422 px += stridex;
426 423
427 424 ax1 = *(int*)px;
428 425 px += stridex;
429 426
430 427 ax2 = *(int*)px;
431 428 px += stridex;
432 429
433 430 iexp0 = ax0 >> 24;
434 431 iexp1 = ax1 >> 24;
435 432 iexp2 = ax2 >> 24;
436 433 iexp0 = 0x3f - iexp0;
437 434 iexp1 = 0x3f - iexp1;
438 435 iexp2 = 0x3f - iexp2;
439 436
440 437 iexp0 = iexp0 << 23;
441 438 iexp1 = iexp1 << 23;
442 439 iexp2 = iexp2 << 23;
443 440
444 441 si0 = (ax0 >> 13) & 0x7f0;
445 442 si1 = (ax1 >> 13) & 0x7f0;
446 443 si2 = (ax2 >> 13) & 0x7f0;
447 444
448 445 tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0];
449 446 tbl_div1 = ((double*)((char*)__TBL_rsqrtf + si1))[0];
450 447 tbl_div2 = ((double*)((char*)__TBL_rsqrtf + si2))[0];
451 448 tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1];
452 449 tbl_sqrt1 = ((double*)((char*)__TBL_rsqrtf + si1))[1];
453 450 tbl_sqrt2 = ((double*)((char*)__TBL_rsqrtf + si2))[1];
454 451 iax0 = ax0 & 0x7ffe0000;
455 452 iax1 = ax1 & 0x7ffe0000;
456 453 iax2 = ax2 & 0x7ffe0000;
457 454 iax0 = ax0 - iax0;
458 455 iax1 = ax1 - iax1;
459 456 iax2 = ax2 - iax2;
460 457 xx0 = iax0 * tbl_div0;
461 458 xx1 = iax1 * tbl_div1;
462 459 xx2 = iax2 * tbl_div2;
463 460 res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0);
464 461 res1 = tbl_sqrt1 * (((A3 * xx1 + A2) * xx1 + A1) * xx1 + A0);
465 462 res2 = tbl_sqrt2 * (((A3 * xx2 + A2) * xx2 + A1) * xx2 + A0);
466 463
467 464 fres0 = res0;
468 465 fres1 = res1;
469 466 fres2 = res2;
470 467
471 468 iexp0 += *(int*)&fres0;
472 469 iexp1 += *(int*)&fres1;
473 470 iexp2 += *(int*)&fres2;
474 471 *(int*)py = iexp0;
475 472 py += stridey;
476 473 *(int*)py = iexp1;
477 474 py += stridey;
478 475 *(int*)py = iexp2;
479 476 py += stridey;
480 477 }
481 478 #endif
482 479 for(; n > 0 ; n--)
483 480 {
484 481 ax0 = *(int*)px;
485 482 px += stridex;
486 483
487 484 iexp0 = ax0 >> 24;
488 485 iexp0 = 0x3f - iexp0;
489 486 iexp0 = iexp0 << 23;
490 487
491 488 si0 = (ax0 >> 13) & 0x7f0;
492 489
493 490 tbl_div0 = ((double*)((char*)__TBL_rsqrtf + si0))[0];
494 491 tbl_sqrt0 = ((double*)((char*)__TBL_rsqrtf + si0))[1];
495 492 iax0 = ax0 & 0x7ffe0000;
↓ open down ↓ |
410 lines elided |
↑ open up ↑ |
496 493 iax0 = ax0 - iax0;
497 494 xx0 = iax0 * tbl_div0;
498 495 res0 = tbl_sqrt0 * (((A3 * xx0 + A2) * xx0 + A1) * xx0 + A0);
499 496
500 497 fres0 = res0;
501 498 iexp0 += *(int*)&fres0;
502 499 *(int*)py = iexp0;
503 500 py += stridey;
504 501 }
505 502 }
506 -
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX