1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
23 */
24 /*
25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
27 */
28
29 .file "__vrhypotf.S"
30
31 #include "libm.h"
32
33 RO_DATA
34 .align 64
35 .CONST_TBL:
36 ! i = [0,63]
37 ! TBL[2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46)));
38 ! TBL[2*i+1] = (double)(0.5/sqrtl(2) / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46))));
39 ! TBL[128+2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46)));
40 ! TBL[128+2*i+1] = (double)(0.25 / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46))));
41
42 .word 0x3ff00000, 0x00000000, 0x3fd6a09e, 0x667f3bcd,
43 .word 0x3fef81f8, 0x1f81f820, 0x3fd673e3, 0x2ef63a03,
44 .word 0x3fef07c1, 0xf07c1f08, 0x3fd6482d, 0x37a5a3d2,
45 .word 0x3fee9131, 0xabf0b767, 0x3fd61d72, 0xb7978671,
46 .word 0x3fee1e1e, 0x1e1e1e1e, 0x3fd5f3aa, 0x673fa911,
47 .word 0x3fedae60, 0x76b981db, 0x3fd5cacb, 0x7802f342,
48 .word 0x3fed41d4, 0x1d41d41d, 0x3fd5a2cd, 0x8c69d61a,
49 .word 0x3fecd856, 0x89039b0b, 0x3fd57ba8, 0xb0ee01b9,
50 .word 0x3fec71c7, 0x1c71c71c, 0x3fd55555, 0x55555555,
51 .word 0x3fec0e07, 0x0381c0e0, 0x3fd52fcc, 0x468d6b54,
52 .word 0x3febacf9, 0x14c1bad0, 0x3fd50b06, 0xa8fc6b70,
53 .word 0x3feb4e81, 0xb4e81b4f, 0x3fd4e6fd, 0xf33cf032,
54 .word 0x3feaf286, 0xbca1af28, 0x3fd4c3ab, 0xe93bcf74,
55 .word 0x3fea98ef, 0x606a63be, 0x3fd4a10a, 0x97af7b92,
56 .word 0x3fea41a4, 0x1a41a41a, 0x3fd47f14, 0x4fe17f9f,
57 .word 0x3fe9ec8e, 0x951033d9, 0x3fd45dc3, 0xa3c34fa3,
58 .word 0x3fe99999, 0x9999999a, 0x3fd43d13, 0x6248490f,
59 .word 0x3fe948b0, 0xfcd6e9e0, 0x3fd41cfe, 0x93ff5199,
60 .word 0x3fe8f9c1, 0x8f9c18fa, 0x3fd3fd80, 0x77e70577,
61 .word 0x3fe8acb9, 0x0f6bf3aa, 0x3fd3de94, 0x8077db58,
62 .word 0x3fe86186, 0x18618618, 0x3fd3c036, 0x50e00e03,
63 .word 0x3fe81818, 0x18181818, 0x3fd3a261, 0xba6d7a37,
64 .word 0x3fe7d05f, 0x417d05f4, 0x3fd38512, 0xba21f51e,
65 .word 0x3fe78a4c, 0x8178a4c8, 0x3fd36845, 0x766eec92,
66 .word 0x3fe745d1, 0x745d1746, 0x3fd34bf6, 0x3d156826,
67 .word 0x3fe702e0, 0x5c0b8170, 0x3fd33021, 0x8127c0e0,
68 .word 0x3fe6c16c, 0x16c16c17, 0x3fd314c3, 0xd92a9e91,
69 .word 0x3fe68168, 0x16816817, 0x3fd2f9d9, 0xfd52fd50,
70 .word 0x3fe642c8, 0x590b2164, 0x3fd2df60, 0xc5df2c9e,
71 .word 0x3fe60581, 0x60581606, 0x3fd2c555, 0x2988e428,
72 .word 0x3fe5c988, 0x2b931057, 0x3fd2abb4, 0x3c0eb0f4,
73 .word 0x3fe58ed2, 0x308158ed, 0x3fd2927b, 0x2cd320f5,
74 .word 0x3fe55555, 0x55555555, 0x3fd279a7, 0x4590331c,
75 .word 0x3fe51d07, 0xeae2f815, 0x3fd26135, 0xe91daf55,
76 .word 0x3fe4e5e0, 0xa72f0539, 0x3fd24924, 0x92492492,
77 .word 0x3fe4afd6, 0xa052bf5b, 0x3fd23170, 0xd2be638a,
78 .word 0x3fe47ae1, 0x47ae147b, 0x3fd21a18, 0x51ff630a,
79 .word 0x3fe446f8, 0x6562d9fb, 0x3fd20318, 0xcc6a8f5d,
80 .word 0x3fe41414, 0x14141414, 0x3fd1ec70, 0x124e98f9,
81 .word 0x3fe3e22c, 0xbce4a902, 0x3fd1d61c, 0x070ae7d3,
82 .word 0x3fe3b13b, 0x13b13b14, 0x3fd1c01a, 0xa03be896,
83 .word 0x3fe38138, 0x13813814, 0x3fd1aa69, 0xe4f2777f,
84 .word 0x3fe3521c, 0xfb2b78c1, 0x3fd19507, 0xecf5b9e9,
85 .word 0x3fe323e3, 0x4a2b10bf, 0x3fd17ff2, 0xe00ec3ee,
86 .word 0x3fe2f684, 0xbda12f68, 0x3fd16b28, 0xf55d72d4,
87 .word 0x3fe2c9fb, 0x4d812ca0, 0x3fd156a8, 0x72b5ef62,
88 .word 0x3fe29e41, 0x29e4129e, 0x3fd1426f, 0xac0654db,
89 .word 0x3fe27350, 0xb8812735, 0x3fd12e7d, 0x02c40253,
90 .word 0x3fe24924, 0x92492492, 0x3fd11ace, 0xe560242a,
91 .word 0x3fe21fb7, 0x8121fb78, 0x3fd10763, 0xcec30b26,
92 .word 0x3fe1f704, 0x7dc11f70, 0x3fd0f43a, 0x45cdedad,
93 .word 0x3fe1cf06, 0xada2811d, 0x3fd0e150, 0xdce2b60c,
94 .word 0x3fe1a7b9, 0x611a7b96, 0x3fd0cea6, 0x317186dc,
95 .word 0x3fe18118, 0x11811812, 0x3fd0bc38, 0xeb8ba412,
96 .word 0x3fe15b1e, 0x5f75270d, 0x3fd0aa07, 0xbd7b7488,
97 .word 0x3fe135c8, 0x1135c811, 0x3fd09811, 0x63615499,
98 .word 0x3fe11111, 0x11111111, 0x3fd08654, 0xa2d4f6db,
99 .word 0x3fe0ecf5, 0x6be69c90, 0x3fd074d0, 0x4a8b1438,
100 .word 0x3fe0c971, 0x4fbcda3b, 0x3fd06383, 0x31ff307a,
101 .word 0x3fe0a681, 0x0a6810a7, 0x3fd0526c, 0x39213bfa,
102 .word 0x3fe08421, 0x08421084, 0x3fd0418a, 0x4806de7d,
103 .word 0x3fe0624d, 0xd2f1a9fc, 0x3fd030dc, 0x4ea03a72,
104 .word 0x3fe04104, 0x10410410, 0x3fd02061, 0x446ffa9a,
105 .word 0x3fe02040, 0x81020408, 0x3fd01018, 0x28467ee9,
106 .word 0x3ff00000, 0x00000000, 0x3fd00000, 0x00000000,
107 .word 0x3fef81f8, 0x1f81f820, 0x3fcfc0bd, 0x88a0f1d9,
108 .word 0x3fef07c1, 0xf07c1f08, 0x3fcf82ec, 0x882c0f9b,
109 .word 0x3fee9131, 0xabf0b767, 0x3fcf467f, 0x2814b0cc,
110 .word 0x3fee1e1e, 0x1e1e1e1e, 0x3fcf0b68, 0x48d2af1c,
111 .word 0x3fedae60, 0x76b981db, 0x3fced19b, 0x75e78957,
112 .word 0x3fed41d4, 0x1d41d41d, 0x3fce990c, 0xdad55ed2,
113 .word 0x3fecd856, 0x89039b0b, 0x3fce61b1, 0x38f18adc,
114 .word 0x3fec71c7, 0x1c71c71c, 0x3fce2b7d, 0xddfefa66,
115 .word 0x3fec0e07, 0x0381c0e0, 0x3fcdf668, 0x9b7e6350,
116 .word 0x3febacf9, 0x14c1bad0, 0x3fcdc267, 0xbea45549,
117 .word 0x3feb4e81, 0xb4e81b4f, 0x3fcd8f72, 0x08e6b82d,
118 .word 0x3feaf286, 0xbca1af28, 0x3fcd5d7e, 0xa914b937,
119 .word 0x3fea98ef, 0x606a63be, 0x3fcd2c85, 0x34ed6d86,
120 .word 0x3fea41a4, 0x1a41a41a, 0x3fccfc7d, 0xa32a9213,
121 .word 0x3fe9ec8e, 0x951033d9, 0x3fcccd60, 0x45f5d358,
122 .word 0x3fe99999, 0x9999999a, 0x3fcc9f25, 0xc5bfedd9,
123 .word 0x3fe948b0, 0xfcd6e9e0, 0x3fcc71c7, 0x1c71c71c,
124 .word 0x3fe8f9c1, 0x8f9c18fa, 0x3fcc453d, 0x90f057a2,
125 .word 0x3fe8acb9, 0x0f6bf3aa, 0x3fcc1982, 0xb2ece47b,
126 .word 0x3fe86186, 0x18618618, 0x3fcbee90, 0x56fb9c39,
127 .word 0x3fe81818, 0x18181818, 0x3fcbc460, 0x92eb3118,
128 .word 0x3fe7d05f, 0x417d05f4, 0x3fcb9aed, 0xba588347,
129 .word 0x3fe78a4c, 0x8178a4c8, 0x3fcb7232, 0x5b79db11,
130 .word 0x3fe745d1, 0x745d1746, 0x3fcb4a29, 0x3c1d9550,
131 .word 0x3fe702e0, 0x5c0b8170, 0x3fcb22cd, 0x56d87d7e,
132 .word 0x3fe6c16c, 0x16c16c17, 0x3fcafc19, 0xd8606169,
133 .word 0x3fe68168, 0x16816817, 0x3fcad60a, 0x1d0fb394,
134 .word 0x3fe642c8, 0x590b2164, 0x3fcab099, 0xae8f539a,
135 .word 0x3fe60581, 0x60581606, 0x3fca8bc4, 0x41a3d02c,
136 .word 0x3fe5c988, 0x2b931057, 0x3fca6785, 0xb41bacf7,
137 .word 0x3fe58ed2, 0x308158ed, 0x3fca43da, 0x0adc6899,
138 .word 0x3fe55555, 0x55555555, 0x3fca20bd, 0x700c2c3e,
139 .word 0x3fe51d07, 0xeae2f815, 0x3fc9fe2c, 0x315637ee,
140 .word 0x3fe4e5e0, 0xa72f0539, 0x3fc9dc22, 0xbe484458,
141 .word 0x3fe4afd6, 0xa052bf5b, 0x3fc9ba9d, 0xa6c73588,
142 .word 0x3fe47ae1, 0x47ae147b, 0x3fc99999, 0x9999999a,
143 .word 0x3fe446f8, 0x6562d9fb, 0x3fc97913, 0x63068b54,
144 .word 0x3fe41414, 0x14141414, 0x3fc95907, 0xeb87ab44,
145 .word 0x3fe3e22c, 0xbce4a902, 0x3fc93974, 0x368cfa31,
146 .word 0x3fe3b13b, 0x13b13b14, 0x3fc91a55, 0x6151761c,
147 .word 0x3fe38138, 0x13813814, 0x3fc8fba8, 0xa1bf6f96,
148 .word 0x3fe3521c, 0xfb2b78c1, 0x3fc8dd6b, 0x4563a009,
149 .word 0x3fe323e3, 0x4a2b10bf, 0x3fc8bf9a, 0xb06e1af3,
150 .word 0x3fe2f684, 0xbda12f68, 0x3fc8a234, 0x5cc04426,
151 .word 0x3fe2c9fb, 0x4d812ca0, 0x3fc88535, 0xd90703c6,
152 .word 0x3fe29e41, 0x29e4129e, 0x3fc8689c, 0xc7e07e7d,
153 .word 0x3fe27350, 0xb8812735, 0x3fc84c66, 0xdf0ca4c2,
154 .word 0x3fe24924, 0x92492492, 0x3fc83091, 0xe6a7f7e7,
155 .word 0x3fe21fb7, 0x8121fb78, 0x3fc8151b, 0xb86fee1d,
156 .word 0x3fe1f704, 0x7dc11f70, 0x3fc7fa02, 0x3f1068d1,
157 .word 0x3fe1cf06, 0xada2811d, 0x3fc7df43, 0x7579b9b5,
158 .word 0x3fe1a7b9, 0x611a7b96, 0x3fc7c4dd, 0x663ebb88,
159 .word 0x3fe18118, 0x11811812, 0x3fc7aace, 0x2afa8b72,
160 .word 0x3fe15b1e, 0x5f75270d, 0x3fc79113, 0xebbd7729,
161 .word 0x3fe135c8, 0x1135c811, 0x3fc777ac, 0xde80baea,
162 .word 0x3fe11111, 0x11111111, 0x3fc75e97, 0x46a0b098,
163 .word 0x3fe0ecf5, 0x6be69c90, 0x3fc745d1, 0x745d1746,
164 .word 0x3fe0c971, 0x4fbcda3b, 0x3fc72d59, 0xc45f1fc5,
165 .word 0x3fe0a681, 0x0a6810a7, 0x3fc7152e, 0x9f44f01f,
166 .word 0x3fe08421, 0x08421084, 0x3fc6fd4e, 0x79325467,
167 .word 0x3fe0624d, 0xd2f1a9fc, 0x3fc6e5b7, 0xd16657e1,
168 .word 0x3fe04104, 0x10410410, 0x3fc6ce69, 0x31d5858d,
169 .word 0x3fe02040, 0x81020408, 0x3fc6b761, 0x2ec892f6,
170
171 .word 0x000fffff, 0xffffffff ! DC0
172 .word 0x3ff00000, 0 ! DC1
173 .word 0x7fffc000, 0 ! DC2
174 .word 0x7fe00000, 0 ! DA0
175 .word 0x60000000, 0 ! DA1
176 .word 0x80808080, 0x3f800000 ! SCALE , FONE = 1.0f
177 .word 0x3fefffff, 0xfee7f18f ! KA0 = 9.99999997962321453275e-01
178 .word 0xbfdfffff, 0xfe07e52f ! KA1 = -4.99999998166077580600e-01
179 .word 0x3fd80118, 0x0ca296d9 ! KA2 = 3.75066768969515586277e-01
180 .word 0xbfd400fc, 0x0bbb8e78 ! KA3 = -3.12560092408808548438e-01
181
182 #define _0x7f800000 %o0
183 #define _0x7fffffff %o7
184 #define TBL %l2
185
186 #define TBL_SHIFT 2048
187
188 #define stridex %l3
189 #define stridey %l4
190 #define stridez %l5
191 #define counter %i0
192
193 #define DA0 %f52
194 #define DA1 %f44
195 #define SCALE %f6
196
197 #define DC0 %f46
198 #define DC1 %f8
199 #define FZERO %f9
200 #define DC2 %f50
201
202 #define KA3 %f56
203 #define KA2 %f58
204 #define KA1 %f60
205 #define KA0 %f54
206
207 #define tmp_counter STACK_BIAS-0x04
208 #define tmp_px STACK_BIAS-0x20
209 #define tmp_py STACK_BIAS-0x18
210
211 #define ftmp0 STACK_BIAS-0x10
212 #define ftmp1 STACK_BIAS-0x0c
213 #define ftmp2 STACK_BIAS-0x10
214 #define ftmp3 STACK_BIAS-0x0c
215 #define ftmp4 STACK_BIAS-0x08
216
217 ! sizeof temp storage - must be a multiple of 16 for V9
218 #define tmps 0x20
219
220 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
221 ! !!!!! algorithm !!!!!
222 ! x0 = *px;
223 ! ax = *(int*)px;
224 !
225 ! y0 = *py;
226 ! ay = *(int*)py;
227 !
228 ! ax &= 0x7fffffff;
229 ! ay &= 0x7fffffff;
230 !
231 ! px += stridex;
232 ! py += stridey;
233 !
234 ! if ( ax >= 0x7f800000 || ay >= 0x7f800000 )
235 ! {
236 ! *pz = fabsf(x0) * fabsf(y0);
237 ! if( ax == 0x7f800000 ) *pz = 0.0f;
238 ! else if( ay == 0x7f800000 ) *pz = 0.0f;
239 ! pz += stridez;
240 ! continue;
241 ! }
242 !
243 ! if ( ay == 0 )
244 ! {
245 ! if ( ax == 0 )
246 ! {
247 ! *pz = 1.0f / 0.0f;
248 ! pz += stridez;
249 ! continue;
250 ! }
251 ! }
252 !
253 ! hyp0 = x0 * (double)x0;
254 ! dtmp0 = y0 * (double)y0;
255 ! hyp0 += dtmp0;
256 !
257 ! ibase0 = ((int*)&hyp0)[0];
258 !
259 ! dbase0 = vis_fand(hyp0,DA0);
260 ! dbase0 = vis_fmul8x16(SCALE, dbase0);
261 ! dbase0 = vis_fpsub32(DA1,dbase0);
262 !
263 ! hyp0 = vis_fand(hyp0,DC0);
264 ! hyp0 = vis_for(hyp0,DC1);
265 ! h_hi0 = vis_fand(hyp0,DC2);
266 !
267 ! ibase0 >>= 10;
268 ! si0 = ibase0 & 0x7f0;
269 ! xx0 = ((double*)((char*)TBL + si0))[0];
270 !
271 ! dtmp1 = hyp0 - h_hi0;
272 ! xx0 = dtmp1 * xx0;
273 ! res0 = ((double*)((char*)arr + si0))[1];
274 ! dtmp2 = KA3 * xx0;
275 ! dtmp2 += KA2;
276 ! dtmp2 *= xx0;
277 ! dtmp2 += KA1;
278 ! dtmp2 *= xx0;
279 ! dtmp2 += KA0;
280 ! res0 *= dtmp2;
281 ! res0 *= dbase0;
282 ! ftmp0 = (float)res0;
283 ! *pz = ftmp0;
284 ! pz += stridez;
285 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
286
287 ENTRY(__vrhypotf)
288 save %sp,-SA(MINFRAME)-tmps,%sp
289 PIC_SETUP(l7)
290 PIC_SET(l7,.CONST_TBL,l2)
291 wr %g0,0x82,%asi
292
293 #ifdef __sparcv9
294 ldx [%fp+STACK_BIAS+176],stridez
295 #else
296 ld [%fp+STACK_BIAS+92],stridez
297 #endif
298
299 stx %i1,[%fp+tmp_px]
300 sll %i2,2,stridex
301
302 stx %i3,[%fp+tmp_py]
303 sll %i4,2,stridey
304
305 st %i0,[%fp+tmp_counter]
306 sll stridez,2,stridez
307 mov %i5,%o1
308
309 ldd [TBL+TBL_SHIFT],DC0
310 ldd [TBL+TBL_SHIFT+8],DC1
311 ldd [TBL+TBL_SHIFT+16],DC2
312 ldd [TBL+TBL_SHIFT+24],DA0
313 ldd [TBL+TBL_SHIFT+32],DA1
314 ldd [TBL+TBL_SHIFT+40],SCALE
315 ldd [TBL+TBL_SHIFT+48],KA0
316
317 ldd [TBL+TBL_SHIFT+56],KA1
318 sethi %hi(0x7f800000),%o0
319
320 ldd [TBL+TBL_SHIFT+64],KA2
321 sethi %hi(0x7ffffc00),%o7
322
323 ldd [TBL+TBL_SHIFT+72],KA3
324 add %o7,1023,%o7
325
326 .begin:
327 ld [%fp+tmp_counter],counter
328 ldx [%fp+tmp_px],%o4
329 ldx [%fp+tmp_py],%i2
330 st %g0,[%fp+tmp_counter]
331 .begin1:
332 cmp counter,0
333 ble,pn %icc,.exit
334 nop
335
336 lda [%i2]0x82,%l6 ! (3_0) ay = *(int*)py;
337
338 lda [%o4]0x82,%i5 ! (3_0) ax = *(int*)px;
339
340 lda [%i2]0x82,%f2 ! (3_0) y0 = *py;
341 and %l6,_0x7fffffff,%l6 ! (3_0) ay &= 0x7fffffff;
342
343 and %i5,_0x7fffffff,%i5 ! (3_0) ax &= 0x7fffffff;
344 cmp %l6,_0x7f800000 ! (3_0) ay ? 0x7f800000
345 bge,pn %icc,.spec0 ! (3_0) if ( ay >= 0x7f800000 )
346 lda [%o4]0x82,%f4 ! (3_0) x0 = *px;
347
348 cmp %i5,_0x7f800000 ! (3_0) ax ? 0x7f800000
349 bge,pn %icc,.spec0 ! (3_0) if ( ax >= 0x7f800000 )
350 nop
351
352 cmp %l6,0 ! (3_0)
353 be,pn %icc,.spec1 ! (3_0) if ( ay == 0 )
354 fsmuld %f4,%f4,%f36 ! (3_0) hyp0 = x0 * (double)x0;
355 .cont_spec1:
356 lda [%i2+stridey]0x82,%l6 ! (4_0) ay = *(int*)py;
357
358 fsmuld %f2,%f2,%f62 ! (3_0) dtmp0 = y0 * (double)y0;
359 lda [stridex+%o4]0x82,%i5 ! (4_0) ax = *(int*)px;
360
361 add %o4,stridex,%l0 ! px += stridex
362
363 add %i2,stridey,%i2 ! py += stridey
364 and %l6,_0x7fffffff,%l6 ! (4_0) ay &= 0x7fffffff;
365
366 and %i5,_0x7fffffff,%i5 ! (4_0) ax &= 0x7fffffff;
367 lda [%i2]0x82,%f2 ! (4_0) y0 = *py;
368
369 faddd %f36,%f62,%f20 ! (3_0) hyp0 += dtmp0;
370 cmp %l6,_0x7f800000 ! (4_0) ay ? 0x7f800000
371
372 bge,pn %icc,.update0 ! (4_0) if ( ay >= 0x7f800000 )
373 lda [stridex+%o4]0x82,%f4 ! (4_0) x0 = *px;
374 .cont0:
375 cmp %i5,_0x7f800000 ! (4_0) ax ? 0x7f800000
376 bge,pn %icc,.update1 ! (4_0) if ( ax >= 0x7f800000 )
377 st %f20,[%fp+ftmp4] ! (3_0) ibase0 = ((int*)&hyp0)[0];
378 .cont1:
379 cmp %l6,0 ! (4_1) ay ? 0
380 be,pn %icc,.update2 ! (4_1) if ( ay == 0 )
381 fsmuld %f4,%f4,%f38 ! (4_1) hyp0 = x0 * (double)x0;
382 .cont2:
383 lda [%i2+stridey]0x82,%l6 ! (0_0) ay = *(int*)py;
384
385 fsmuld %f2,%f2,%f62 ! (4_1) dtmp0 = y0 * (double)y0;
386 lda [%l0+stridex]0x82,%i5 ! (0_0) ax = *(int*)px;
387
388 add %l0,stridex,%i1 ! px += stridex
389
390 add %i2,stridey,%i2 ! py += stridey
391 and %l6,_0x7fffffff,%l6 ! (0_0) ay &= 0x7fffffff;
392
393 and %i5,_0x7fffffff,%i5 ! (0_0) ax &= 0x7fffffff;
394 lda [%i2]0x82,%f2 ! (0_0) y0 = *py;
395
396 cmp %l6,_0x7f800000 ! (0_0) ay ? 0x7f800000
397 bge,pn %icc,.update3 ! (0_0) if ( ay >= 0x7f800000 )
398 faddd %f38,%f62,%f12 ! (4_1) hyp0 += dtmp0;
399 .cont3:
400 lda [%i1]0x82,%f4 ! (0_0) x0 = *px;
401
402 cmp %i5,_0x7f800000 ! (0_0) ax ? 0x7f800000
403 bge,pn %icc,.update4 ! (0_0) if ( ax >= 0x7f800000 )
404 st %f12,[%fp+ftmp0] ! (4_1) ibase0 = ((int*)&hyp0)[0];
405 .cont4:
406 cmp %l6,0 ! (0_0) ay ? 0
407 be,pn %icc,.update5 ! (0_0) if ( ay == 0 )
408 fsmuld %f4,%f4,%f38 ! (0_0) hyp0 = x0 * (double)x0;
409 .cont5:
410 lda [%i2+stridey]0x82,%l6 ! (1_0) ay = *(int*)py;
411
412 fsmuld %f2,%f2,%f62 ! (0_0) dtmp0 = y0 * (double)y0;
413 lda [%i1+stridex]0x82,%i5 ! (1_0) ax = *(int*)px;
414
415 add %i1,stridex,%g5 ! px += stridex
416
417 add %i2,stridey,%o3 ! py += stridey
418 and %l6,_0x7fffffff,%l6 ! (1_0) ay &= 0x7fffffff;
419 fand %f20,DC0,%f30 ! (3_1) hyp0 = vis_fand(hyp0,DC0);
420
421 and %i5,_0x7fffffff,%i5 ! (1_0) ax &= 0x7fffffff;
422 lda [%o3]0x82,%f2 ! (1_0) y0 = *py;
423
424 faddd %f38,%f62,%f14 ! (0_0) hyp0 += dtmp0;
425 cmp %l6,_0x7f800000 ! (1_0) ay ? 0x7f800000
426
427 lda [%g5]0x82,%f4 ! (1_0) x0 = *px;
428 bge,pn %icc,.update6 ! (1_0) if ( ay >= 0x7f800000 )
429 for %f30,DC1,%f28 ! (3_1) hyp0 = vis_for(hyp0,DC1);
430 .cont6:
431 cmp %i5,_0x7f800000 ! (1_0) ax ? 0x7f800000
432 bge,pn %icc,.update7 ! (1_0) if ( ax >= 0x7f800000 )
433 ld [%fp+ftmp4],%l1 ! (3_1) ibase0 = ((int*)&hyp0)[0];
434 .cont7:
435 st %f14,[%fp+ftmp1] ! (0_0) ibase0 = ((int*)&hyp0)[0];
436
437 cmp %l6,0 ! (1_0) ay ? 0
438 be,pn %icc,.update8 ! (1_0) if ( ay == 0 )
439 fand %f28,DC2,%f30 ! (3_1) h_hi0 = vis_fand(hyp0,DC2);
440 .cont8:
441 fsmuld %f4,%f4,%f38 ! (1_0) hyp0 = x0 * (double)x0;
442 sra %l1,10,%o5 ! (3_1) ibase0 >>= 10;
443
444 and %o5,2032,%o4 ! (3_1) si0 = ibase0 & 0x7f0;
445 lda [%o3+stridey]0x82,%l6 ! (2_0) ay = *(int*)py;
446
447 fsmuld %f2,%f2,%f62 ! (1_0) dtmp0 = y0 * (double)y0;
448 add %o4,TBL,%l7 ! (3_1) (char*)TBL + si0
449 lda [stridex+%g5]0x82,%i5 ! (2_0) ax = *(int*)px;
450 fsubd %f28,%f30,%f28 ! (3_1) dtmp1 = hyp0 - h_hi0;
451
452 add %g5,stridex,%i4 ! px += stridex
453 ldd [TBL+%o4],%f42 ! (3_1) xx0 = ((double*)((char*)TBL + si0))[0];
454
455 and %l6,_0x7fffffff,%l6 ! (2_0) ay &= 0x7fffffff;
456 add %o3,stridey,%i2 ! py += stridey
457 fand %f12,DC0,%f30 ! (4_1) hyp0 = vis_fand(hyp0,DC0);
458
459 and %i5,_0x7fffffff,%i5 ! (2_0) ax &= 0x7fffffff;
460 lda [%i2]0x82,%f2 ! (2_0) y0 = *py;
461
462 faddd %f38,%f62,%f16 ! (1_0) hyp0 += dtmp0;
463 cmp %l6,_0x7f800000 ! (2_0) ay ? 0x7f800000
464 fmuld %f28,%f42,%f26 ! (3_1) xx0 = dtmp1 * xx0;
465
466 lda [stridex+%g5]0x82,%f4 ! (2_0) x0 = *px;
467 bge,pn %icc,.update9 ! (2_0) if ( ay >= 0x7f800000
468 for %f30,DC1,%f28 ! (4_1) hyp0 = vis_for(hyp0,DC1);
469 .cont9:
470 cmp %i5,_0x7f800000 ! (2_0) ax ? 0x7f800000
471 bge,pn %icc,.update10 ! (2_0) if ( ax >= 0x7f800000 )
472 ld [%fp+ftmp0],%i3 ! (4_1) ibase0 = ((int*)&hyp0)[0];
473 .cont10:
474 st %f16,[%fp+ftmp2] ! (1_0) ibase0 = ((int*)&hyp0)[0];
475
476 fmuld KA3,%f26,%f34 ! (3_1) dtmp2 = KA3 * xx0;
477 cmp %l6,0 ! (2_0) ay ? 0
478 be,pn %icc,.update11 ! (2_0) if ( ay == 0 )
479 fand %f28,DC2,%f30 ! (4_1) h_hi0 = vis_fand(hyp0,DC2);
480 .cont11:
481 fsmuld %f4,%f4,%f36 ! (2_0) hyp0 = x0 * (double)x0;
482 sra %i3,10,%i3 ! (4_1) ibase0 >>= 10;
483
484 and %i3,2032,%i3 ! (4_1) si0 = ibase0 & 0x7f0;
485 lda [%i2+stridey]0x82,%l6 ! (3_0) ay = *(int*)py;
486
487 fsmuld %f2,%f2,%f62 ! (2_0) dtmp0 = y0 * (double)y0;
488 add %i3,TBL,%i3 ! (4_1) (char*)TBL + si0
489 lda [%i4+stridex]0x82,%i5 ! (3_0) ax = *(int*)px;
490 fsubd %f28,%f30,%f28 ! (4_1) dtmp1 = hyp0 - h_hi0;
491
492 add %i4,stridex,%o4 ! px += stridex
493 ldd [%i3],%f42 ! (4_1) xx0 = ((double*)((char*)TBL + si0))[0];
494 faddd %f34,KA2,%f10 ! (3_1) dtmp2 += KA2;
495
496 add %i2,stridey,%i2 ! py += stridey
497 and %l6,_0x7fffffff,%l6 ! (3_0) ay &= 0x7fffffff;
498 fand %f14,DC0,%f30 ! (0_0) hyp0 = vis_fand(hyp0,DC0);
499
500 and %i5,_0x7fffffff,%i5 ! (3_0) ax &= 0x7fffffff;
501 lda [%i2]0x82,%f2 ! (3_0) y0 = *py;
502
503 faddd %f36,%f62,%f18 ! (2_0) hyp0 += dtmp0;
504 cmp %l6,_0x7f800000 ! (3_0) ay ? 0x7f800000
505 fmuld %f28,%f42,%f32 ! (4_1) xx0 = dtmp1 * xx0;
506
507 fmuld %f10,%f26,%f10 ! (3_1) dtmp2 *= xx0;
508 lda [%o4]0x82,%f4 ! (3_0) x0 = *px;
509 bge,pn %icc,.update12 ! (3_0) if ( ay >= 0x7f800000 )
510 for %f30,DC1,%f28 ! (0_0) hyp0 = vis_for(hyp0,DC1);
511 .cont12:
512 cmp %i5,_0x7f800000 ! (3_0) ax ? 0x7f800000
513 bge,pn %icc,.update13 ! (3_0) if ( ax >= 0x7f800000 )
514 ld [%fp+ftmp1],%i1 ! (0_0) ibase0 = ((int*)&hyp0)[0];
515 .cont13:
516 st %f18,[%fp+ftmp3] ! (2_0) ibase0 = ((int*)&hyp0)[0];
517
518 fmuld KA3,%f32,%f34 ! (4_1) dtmp2 = KA3 * xx0;
519 cmp %l6,0 ! (3_0)
520 be,pn %icc,.update14 ! (3_0) if ( ay == 0 )
521 fand %f28,DC2,%f30 ! (0_0) h_hi0 = vis_fand(hyp0,DC2);
522 .cont14:
523 fsmuld %f4,%f4,%f36 ! (3_0) hyp0 = x0 * (double)x0;
524 sra %i1,10,%l1 ! (0_0) ibase0 >>= 10;
525 faddd %f10,KA1,%f40 ! (3_1) dtmp2 += KA1;
526
527 and %l1,2032,%o5 ! (0_0) si0 = ibase0 & 0x7f0;
528 lda [%i2+stridey]0x82,%l6 ! (4_0) ay = *(int*)py;
529
530 fsmuld %f2,%f2,%f62 ! (3_0) dtmp0 = y0 * (double)y0;
531 add %o5,TBL,%l1 ! (0_0) (char*)TBL + si0
532 lda [stridex+%o4]0x82,%i5 ! (4_0) ax = *(int*)px;
533 fsubd %f28,%f30,%f28 ! (0_0) dtmp1 = hyp0 - h_hi0;
534
535 add %o4,stridex,%l0 ! px += stridex
536 ldd [TBL+%o5],%f42 ! (0_0) xx0 = ((double*)((char*)TBL + si0))[0];
537 faddd %f34,KA2,%f10 ! (4_1) dtmp2 += KA2;
538
539 fmuld %f40,%f26,%f40 ! (3_1) dtmp2 *= xx0;
540 add %i2,stridey,%i2 ! py += stridey
541 and %l6,_0x7fffffff,%l6 ! (4_0) ay &= 0x7fffffff;
542 fand %f16,DC0,%f30 ! (1_0) hyp0 = vis_fand(hyp0,DC0);
543
544 and %i5,_0x7fffffff,%i5 ! (4_0) ax &= 0x7fffffff;
545 lda [%i2]0x82,%f2 ! (4_0) y0 = *py;
546 fand %f20,DA0,%f24 ! (3_1) dbase0 = vis_fand(hyp0,DA0);
547
548 faddd %f36,%f62,%f20 ! (3_0) hyp0 += dtmp0;
549 cmp %l6,_0x7f800000 ! (4_0) ay ? 0x7f800000
550 ldd [%l7+8],%f36 ! (3_1) res0 = ((double*)((char*)arr + si0))[1];
551 fmuld %f28,%f42,%f26 ! (0_0) xx0 = dtmp1 * xx0;
552
553 fmuld %f10,%f32,%f10 ! (4_1) dtmp2 *= xx0;
554 lda [stridex+%o4]0x82,%f4 ! (4_0) x0 = *px;
555 bge,pn %icc,.update15 ! (4_0) if ( ay >= 0x7f800000 )
556 for %f30,DC1,%f28 ! (1_0) hyp0 = vis_for(hyp0,DC1);
557 .cont15:
558 fmul8x16 SCALE,%f24,%f24 ! (3_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
559 cmp %i5,_0x7f800000 ! (4_0) ax ? 0x7f800000
560 ld [%fp+ftmp2],%i1 ! (1_0) ibase0 = ((int*)&hyp0)[0];
561 faddd %f40,KA0,%f62 ! (3_1) dtmp2 += KA0;
562
563 bge,pn %icc,.update16 ! (4_0) if ( ax >= 0x7f800000 )
564 st %f20,[%fp+ftmp4] ! (3_0) ibase0 = ((int*)&hyp0)[0];
565 .cont16:
566 fmuld KA3,%f26,%f34 ! (0_0) dtmp2 = KA3 * xx0;
567 fand %f28,DC2,%f30 ! (1_0) h_hi0 = vis_fand(hyp0,DC2);
568
569 mov %o1,%i4
570 cmp counter,5
571 bl,pn %icc,.tail
572 nop
573
574 ba .main_loop
575 sub counter,5,counter
576
577 .align 16
578 .main_loop:
579 fsmuld %f4,%f4,%f38 ! (4_1) hyp0 = x0 * (double)x0;
580 sra %i1,10,%o2 ! (1_1) ibase0 >>= 10;
581 cmp %l6,0 ! (4_1) ay ? 0
582 faddd %f10,KA1,%f40 ! (4_2) dtmp2 += KA1;
583
584 fmuld %f36,%f62,%f36 ! (3_2) res0 *= dtmp2;
585 and %o2,2032,%o2 ! (1_1) si0 = ibase0 & 0x7f0;
586 lda [%i2+stridey]0x82,%l6 ! (0_0) ay = *(int*)py;
587 fpsub32 DA1,%f24,%f24 ! (3_2) dbase0 = vis_fpsub32(DA1,dbase0);
588
589 fsmuld %f2,%f2,%f62 ! (4_1) dtmp0 = y0 * (double)y0;
590 add %o2,TBL,%o2 ! (1_1) (char*)TBL + si0
591 lda [%l0+stridex]0x82,%o1 ! (0_0) ax = *(int*)px;
592 fsubd %f28,%f30,%f28 ! (1_1) dtmp1 = hyp0 - h_hi0;
593
594 add %l0,stridex,%i1 ! px += stridex
595 ldd [%o2],%f42 ! (1_1) xx0 = ((double*)((char*)TBL + si0))[0];
596 be,pn %icc,.update17 ! (4_1) if ( ay == 0 )
597 faddd %f34,KA2,%f10 ! (0_1) dtmp2 += KA2;
598 .cont17:
599 fmuld %f40,%f32,%f40 ! (4_2) dtmp2 *= xx0;
600 add %i2,stridey,%i2 ! py += stridey
601 and %l6,_0x7fffffff,%l6 ! (0_0) ay &= 0x7fffffff;
602 fand %f18,DC0,%f30 ! (2_1) hyp0 = vis_fand(hyp0,DC0);
603
604 fmuld %f36,%f24,%f32 ! (3_2) res0 *= dbase0;
605 and %o1,_0x7fffffff,%o1 ! (0_0) ax &= 0x7fffffff;
606 lda [%i2]0x82,%f2 ! (0_0) y0 = *py;
607 fand %f12,DA0,%f24 ! (4_2) dbase0 = vis_fand(hyp0,DA0);
608
609 faddd %f38,%f62,%f12 ! (4_1) hyp0 += dtmp0;
610 cmp %l6,_0x7f800000 ! (0_0) ay ? 0x7f800000
611 ldd [%i3+8],%f62 ! (4_2) res0 = ((double*)((char*)arr + si0))[1];
612 fmuld %f28,%f42,%f36 ! (1_1) xx0 = dtmp1 * xx0;
613
614 fmuld %f10,%f26,%f10 ! (0_1) dtmp2 *= xx0;
615 lda [%i1]0x82,%f4 ! (0_0) x0 = *px;
616 bge,pn %icc,.update18 ! (0_0) if ( ay >= 0x7f800000 )
617 for %f30,DC1,%f28 ! (2_1) hyp0 = vis_for(hyp0,DC1);
618 .cont18:
619 fmul8x16 SCALE,%f24,%f24 ! (4_2) dbase0 = vis_fmul8x16(SCALE, dbase0);
620 cmp %o1,_0x7f800000 ! (0_0) ax ? 0x7f800000
621 ld [%fp+ftmp3],%l0 ! (2_1) ibase0 = ((int*)&hyp0)[0];
622 faddd %f40,KA0,%f42 ! (4_2) dtmp2 += KA0;
623
624 add %i4,stridez,%i3 ! pz += stridez
625 st %f12,[%fp+ftmp0] ! (4_1) ibase0 = ((int*)&hyp0)[0];
626 bge,pn %icc,.update19 ! (0_0) if ( ax >= 0x7f800000 )
627 fdtos %f32,%f1 ! (3_2) ftmp0 = (float)res0;
628 .cont19:
629 fmuld KA3,%f36,%f34 ! (1_1) dtmp2 = KA3 * xx0;
630 cmp %l6,0 ! (0_0) ay ? 0
631 st %f1,[%i4] ! (3_2) *pz = ftmp0;
632 fand %f28,DC2,%f30 ! (2_1) h_hi0 = vis_fand(hyp0,DC2);
633
634 fsmuld %f4,%f4,%f38 ! (0_0) hyp0 = x0 * (double)x0;
635 sra %l0,10,%i4 ! (2_1) ibase0 >>= 10;
636 be,pn %icc,.update20 ! (0_0) if ( ay == 0 )
637 faddd %f10,KA1,%f40 ! (0_1) dtmp2 += KA1;
638 .cont20:
639 fmuld %f62,%f42,%f32 ! (4_2) res0 *= dtmp2;
640 and %i4,2032,%g1 ! (2_1) si0 = ibase0 & 0x7f0;
641 lda [%i2+stridey]0x82,%l6 ! (1_0) ay = *(int*)py;
642 fpsub32 DA1,%f24,%f24 ! (4_2) dbase0 = vis_fpsub32(DA1,dbase0);
643
644 fsmuld %f2,%f2,%f62 ! (0_0) dtmp0 = y0 * (double)y0;
645 add %g1,TBL,%l0 ! (2_1) (char*)TBL + si0
646 lda [%i1+stridex]0x82,%i5 ! (1_0) ax = *(int*)px;
647 fsubd %f28,%f30,%f28 ! (2_1) dtmp1 = hyp0 - h_hi0;
648
649 nop
650 add %i1,stridex,%g5 ! px += stridex
651 ldd [TBL+%g1],%f42 ! (2_1) xx0 = ((double*)((char*)TBL + si0))[0];
652 faddd %f34,KA2,%f10 ! (1_1) dtmp2 += KA2;
653
654 fmuld %f40,%f26,%f40 ! (0_1) dtmp2 *= xx0;
655 add %i2,stridey,%o3 ! py += stridey
656 and %l6,_0x7fffffff,%l6 ! (1_0) ay &= 0x7fffffff;
657 fand %f20,DC0,%f30 ! (3_1) hyp0 = vis_fand(hyp0,DC0);
658
659 fmuld %f32,%f24,%f26 ! (4_2) res0 *= dbase0;
660 and %i5,_0x7fffffff,%i5 ! (1_0) ax &= 0x7fffffff;
661 lda [%o3]0x82,%f2 ! (1_0) y0 = *py;
662 fand %f14,DA0,%f24 ! (0_1) dbase0 = vis_fand(hyp0,DA0);
663
664 faddd %f38,%f62,%f14 ! (0_0) hyp0 += dtmp0;
665 cmp %l6,_0x7f800000 ! (1_0) ay ? 0x7f800000
666 ldd [%l1+8],%f62 ! (0_1) res0 = ((double*)((char*)arr + si0))[1];
667 fmuld %f28,%f42,%f32 ! (2_1) xx0 = dtmp1 * xx0;
668
669 fmuld %f10,%f36,%f10 ! (1_1) dtmp2 *= xx0;
670 lda [%g5]0x82,%f4 ! (1_0) x0 = *px;
671 bge,pn %icc,.update21 ! (1_0) if ( ay >= 0x7f800000 )
672 for %f30,DC1,%f28 ! (3_1) hyp0 = vis_for(hyp0,DC1);
673 .cont21:
674 fmul8x16 SCALE,%f24,%f24 ! (0_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
675 cmp %i5,_0x7f800000 ! (1_0) ax ? 0x7f800000
676 ld [%fp+ftmp4],%l1 ! (3_1) ibase0 = ((int*)&hyp0)[0];
677 faddd %f40,KA0,%f42 ! (0_1) dtmp2 += KA0
678
679 add %i3,stridez,%o1 ! pz += stridez
680 st %f14,[%fp+ftmp1] ! (0_0) ibase0 = ((int*)&hyp0)[0];
681 bge,pn %icc,.update22 ! (1_0) if ( ax >= 0x7f800000 )
682 fdtos %f26,%f1 ! (4_2) ftmp0 = (float)res0;
683 .cont22:
684 fmuld KA3,%f32,%f34 ! (2_1) dtmp2 = KA3 * xx0;
685 cmp %l6,0 ! (1_0) ay ? 0
686 st %f1,[%i3] ! (4_2) *pz = ftmp0;
687 fand %f28,DC2,%f30 ! (3_1) h_hi0 = vis_fand(hyp0,DC2);
688
689 fsmuld %f4,%f4,%f38 ! (1_0) hyp0 = x0 * (double)x0;
690 sra %l1,10,%o5 ! (3_1) ibase0 >>= 10;
691 be,pn %icc,.update23 ! (1_0) if ( ay == 0 )
692 faddd %f10,KA1,%f40 ! (1_1) dtmp2 += KA1;
693 .cont23:
694 fmuld %f62,%f42,%f26 ! (0_1) res0 *= dtmp2;
695 and %o5,2032,%o4 ! (3_1) si0 = ibase0 & 0x7f0;
696 lda [%o3+stridey]0x82,%l6 ! (2_0) ay = *(int*)py;
697 fpsub32 DA1,%f24,%f24 ! (0_1) dbase0 = vis_fpsub32(DA1,dbase0);
698
699 fsmuld %f2,%f2,%f62 ! (1_0) dtmp0 = y0 * (double)y0;
700 add %o4,TBL,%l7 ! (3_1) (char*)TBL + si0
701 lda [stridex+%g5]0x82,%i5 ! (2_0) ax = *(int*)px;
702 fsubd %f28,%f30,%f28 ! (3_1) dtmp1 = hyp0 - h_hi0;
703
704 nop
705 add %g5,stridex,%i4 ! px += stridex
706 ldd [TBL+%o4],%f42 ! (3_1) xx0 = ((double*)((char*)TBL + si0))[0];
707 faddd %f34,KA2,%f10 ! (2_1) dtmp2 += KA2;
708
709 fmuld %f40,%f36,%f40 ! (1_1) dtmp2 *= xx0;
710 and %l6,_0x7fffffff,%l6 ! (2_0) ay &= 0x7fffffff;
711 add %o3,stridey,%i2 ! py += stridey
712 fand %f12,DC0,%f30 ! (4_1) hyp0 = vis_fand(hyp0,DC0);
713
714 fmuld %f26,%f24,%f36 ! (0_1) res0 *= dbase0;
715 and %i5,_0x7fffffff,%i5 ! (2_0) ax &= 0x7fffffff;
716 lda [%i2]0x82,%f2 ! (2_0) y0 = *py;
717 fand %f16,DA0,%f24 ! (1_1) dbase0 = vis_fand(hyp0,DA0);
718
719 faddd %f38,%f62,%f16 ! (1_0) hyp0 += dtmp0;
720 cmp %l6,_0x7f800000 ! (2_0) ay ? 0x7f800000
721 ldd [%o2+8],%f38 ! (1_1) res0 = ((double*)((char*)arr + si0))[1];
722 fmuld %f28,%f42,%f26 ! (3_1) xx0 = dtmp1 * xx0;
723
724 fmuld %f10,%f32,%f10 ! (2_1) dtmp2 *= xx0;
725 lda [stridex+%g5]0x82,%f4 ! (2_0) x0 = *px;
726 bge,pn %icc,.update24 ! (2_0) if ( ay >= 0x7f800000
727 for %f30,DC1,%f28 ! (4_1) hyp0 = vis_for(hyp0,DC1);
728 .cont24:
729 fmul8x16 SCALE,%f24,%f24 ! (1_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
730 cmp %i5,_0x7f800000 ! (2_0) ax ? 0x7f800000
731 ld [%fp+ftmp0],%i3 ! (4_1) ibase0 = ((int*)&hyp0)[0];
732 faddd %f40,KA0,%f62 ! (1_1) dtmp2 += KA0;
733
734 add %o1,stridez,%g1 ! pz += stridez
735 st %f16,[%fp+ftmp2] ! (1_0) ibase0 = ((int*)&hyp0)[0];
736 bge,pn %icc,.update25 ! (2_0) if ( ax >= 0x7f800000 )
737 fdtos %f36,%f1 ! (0_1) ftmp0 = (float)res0;
738 .cont25:
739 fmuld KA3,%f26,%f34 ! (3_1) dtmp2 = KA3 * xx0;
740 cmp %l6,0 ! (2_0) ay ? 0
741 st %f1,[%o1] ! (0_1) *pz = ftmp0;
742 fand %f28,DC2,%f30 ! (4_1) h_hi0 = vis_fand(hyp0,DC2);
743
744 fsmuld %f4,%f4,%f36 ! (2_0) hyp0 = x0 * (double)x0;
745 sra %i3,10,%i3 ! (4_1) ibase0 >>= 10;
746 be,pn %icc,.update26 ! (2_0) if ( ay == 0 )
747 faddd %f10,KA1,%f40 ! (2_1) dtmp2 += KA1;
748 .cont26:
749 fmuld %f38,%f62,%f38 ! (1_1) res0 *= dtmp2;
750 and %i3,2032,%i3 ! (4_1) si0 = ibase0 & 0x7f0;
751 lda [%i2+stridey]0x82,%l6 ! (3_0) ay = *(int*)py;
752 fpsub32 DA1,%f24,%f24 ! (1_1) dbase0 = vis_fpsub32(DA1,dbase0);
753
754 fsmuld %f2,%f2,%f62 ! (2_0) dtmp0 = y0 * (double)y0;
755 add %i3,TBL,%i3 ! (4_1) (char*)TBL + si0
756 lda [%i4+stridex]0x82,%i5 ! (3_0) ax = *(int*)px;
757 fsubd %f28,%f30,%f28 ! (4_1) dtmp1 = hyp0 - h_hi0;
758
759 nop
760 add %i4,stridex,%o4 ! px += stridex
761 ldd [%i3],%f42 ! (4_1) xx0 = ((double*)((char*)TBL + si0))[0];
762 faddd %f34,KA2,%f10 ! (3_1) dtmp2 += KA2;
763
764 fmuld %f40,%f32,%f40 ! (2_1) dtmp2 *= xx0;
765 add %i2,stridey,%i2 ! py += stridey
766 and %l6,_0x7fffffff,%l6 ! (3_0) ay &= 0x7fffffff;
767 fand %f14,DC0,%f30 ! (0_0) hyp0 = vis_fand(hyp0,DC0);
768
769 fmuld %f38,%f24,%f38 ! (1_1) res0 *= dbase0;
770 and %i5,_0x7fffffff,%i5 ! (3_0) ax &= 0x7fffffff;
771 lda [%i2]0x82,%f2 ! (3_0) y0 = *py;
772 fand %f18,DA0,%f24 ! (2_1) dbase0 = vis_fand(hyp0,DA0);
773
774 faddd %f36,%f62,%f18 ! (2_0) hyp0 += dtmp0;
775 cmp %l6,_0x7f800000 ! (3_0) ay ? 0x7f800000
776 ldd [%l0+8],%f62 ! (2_1) res0 = ((double*)((char*)arr + si0))[1];
777 fmuld %f28,%f42,%f32 ! (4_1) xx0 = dtmp1 * xx0;
778
779 fmuld %f10,%f26,%f10 ! (3_1) dtmp2 *= xx0;
780 lda [%o4]0x82,%f4 ! (3_0) x0 = *px;
781 bge,pn %icc,.update27 ! (3_0) if ( ay >= 0x7f800000 )
782 for %f30,DC1,%f28 ! (0_0) hyp0 = vis_for(hyp0,DC1);
783 .cont27:
784 fmul8x16 SCALE,%f24,%f24 ! (2_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
785 cmp %i5,_0x7f800000 ! (3_0) ax ? 0x7f800000
786 ld [%fp+ftmp1],%i1 ! (0_0) ibase0 = ((int*)&hyp0)[0];
787 faddd %f40,KA0,%f42 ! (2_1) dtmp2 += KA0;
788
789 add %g1,stridez,%o3 ! pz += stridez
790 st %f18,[%fp+ftmp3] ! (2_0) ibase0 = ((int*)&hyp0)[0];
791 bge,pn %icc,.update28 ! (3_0) if ( ax >= 0x7f800000 )
792 fdtos %f38,%f1 ! (1_1) ftmp0 = (float)res0;
793 .cont28:
794 fmuld KA3,%f32,%f34 ! (4_1) dtmp2 = KA3 * xx0;
795 cmp %l6,0 ! (3_0)
796 st %f1,[%g1] ! (1_1) *pz = ftmp0;
797 fand %f28,DC2,%f30 ! (0_0) h_hi0 = vis_fand(hyp0,DC2);
798
799 fsmuld %f4,%f4,%f36 ! (3_0) hyp0 = x0 * (double)x0;
800 sra %i1,10,%l1 ! (0_0) ibase0 >>= 10;
801 be,pn %icc,.update29 ! (3_0) if ( ay == 0 )
802 faddd %f10,KA1,%f40 ! (3_1) dtmp2 += KA1;
803 .cont29:
804 fmuld %f62,%f42,%f38 ! (2_1) res0 *= dtmp2;
805 and %l1,2032,%o5 ! (0_0) si0 = ibase0 & 0x7f0;
806 lda [%i2+stridey]0x82,%l6 ! (4_0) ay = *(int*)py;
807 fpsub32 DA1,%f24,%f24 ! (2_1) dbase0 = vis_fpsub32(DA1,dbase0);
808
809 fsmuld %f2,%f2,%f62 ! (3_0) dtmp0 = y0 * (double)y0;
810 add %o5,TBL,%l1 ! (0_0) (char*)TBL + si0
811 lda [stridex+%o4]0x82,%i5 ! (4_0) ax = *(int*)px;
812 fsubd %f28,%f30,%f28 ! (0_0) dtmp1 = hyp0 - h_hi0;
813
814 add %o3,stridez,%i4 ! pz += stridez
815 add %o4,stridex,%l0 ! px += stridex
816 ldd [TBL+%o5],%f42 ! (0_0) xx0 = ((double*)((char*)TBL + si0))[0];
817 faddd %f34,KA2,%f10 ! (4_1) dtmp2 += KA2;
818
819 fmuld %f40,%f26,%f40 ! (3_1) dtmp2 *= xx0;
820 add %i2,stridey,%i2 ! py += stridey
821 and %l6,_0x7fffffff,%l6 ! (4_0) ay &= 0x7fffffff;
822 fand %f16,DC0,%f30 ! (1_0) hyp0 = vis_fand(hyp0,DC0);
823
824 fmuld %f38,%f24,%f38 ! (2_1) res0 *= dbase0;
825 and %i5,_0x7fffffff,%i5 ! (4_0) ax &= 0x7fffffff;
826 lda [%i2]0x82,%f2 ! (4_0) y0 = *py;
827 fand %f20,DA0,%f24 ! (3_1) dbase0 = vis_fand(hyp0,DA0);
828
829 faddd %f36,%f62,%f20 ! (3_0) hyp0 += dtmp0;
830 cmp %l6,_0x7f800000 ! (4_0) ay ? 0x7f800000
831 ldd [%l7+8],%f36 ! (3_1) res0 = ((double*)((char*)arr + si0))[1];
832 fmuld %f28,%f42,%f26 ! (0_0) xx0 = dtmp1 * xx0;
833
834 fmuld %f10,%f32,%f10 ! (4_1) dtmp2 *= xx0;
835 lda [stridex+%o4]0x82,%f4 ! (4_0) x0 = *px;
836 bge,pn %icc,.update30 ! (4_0) if ( ay >= 0x7f800000 )
837 for %f30,DC1,%f28 ! (1_0) hyp0 = vis_for(hyp0,DC1);
838 .cont30:
839 fmul8x16 SCALE,%f24,%f24 ! (3_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
840 cmp %i5,_0x7f800000 ! (4_0) ax ? 0x7f800000
841 ld [%fp+ftmp2],%i1 ! (1_0) ibase0 = ((int*)&hyp0)[0];
842 faddd %f40,KA0,%f62 ! (3_1) dtmp2 += KA0;
843
844 bge,pn %icc,.update31 ! (4_0) if ( ax >= 0x7f800000 )
845 st %f20,[%fp+ftmp4] ! (3_0) ibase0 = ((int*)&hyp0)[0];
846 .cont31:
847 subcc counter,5,counter ! counter -= 5;
848 fdtos %f38,%f1 ! (2_1) ftmp0 = (float)res0;
849
850 fmuld KA3,%f26,%f34 ! (0_0) dtmp2 = KA3 * xx0;
851 st %f1,[%o3] ! (2_1) *pz = ftmp0;
852 bpos,pt %icc,.main_loop
853 fand %f28,DC2,%f30 ! (1_0) h_hi0 = vis_fand(hyp0,DC2);
854
855 add counter,5,counter
856
857 .tail:
858 subcc counter,1,counter
859 bneg .begin
860 mov %i4,%o1
861
862 sra %i1,10,%o2 ! (1_1) ibase0 >>= 10;
863 faddd %f10,KA1,%f40 ! (4_2) dtmp2 += KA1;
864
865 fmuld %f36,%f62,%f36 ! (3_2) res0 *= dtmp2;
866 and %o2,2032,%o2 ! (1_1) si0 = ibase0 & 0x7f0;
867 fpsub32 DA1,%f24,%f24 ! (3_2) dbase0 = vis_fpsub32(DA1,dbase0);
868
869 add %o2,TBL,%o2 ! (1_1) (char*)TBL + si0
870 fsubd %f28,%f30,%f28 ! (1_1) dtmp1 = hyp0 - h_hi0;
871
872 ldd [%o2],%f42 ! (1_1) xx0 = ((double*)((char*)TBL + si0))[0];
873 faddd %f34,KA2,%f10 ! (0_1) dtmp2 += KA2;
874
875 fmuld %f40,%f32,%f40 ! (4_2) dtmp2 *= xx0;
876
877 fmuld %f36,%f24,%f32 ! (3_2) res0 *= dbase0;
878 fand %f12,DA0,%f24 ! (4_2) dbase0 = vis_fand(hyp0,DA0);
879
880 ldd [%i3+8],%f62 ! (4_2) res0 = ((double*)((char*)arr + si0))[1];
881 fmuld %f28,%f42,%f36 ! (1_1) xx0 = dtmp1 * xx0;
882
883 fmuld %f10,%f26,%f10 ! (0_1) dtmp2 *= xx0;
884
885 fmul8x16 SCALE,%f24,%f24 ! (4_2) dbase0 = vis_fmul8x16(SCALE, dbase0);
886 faddd %f40,KA0,%f42 ! (4_2) dtmp2 += KA0;
887
888 add %i4,stridez,%i3 ! pz += stridez
889 fdtos %f32,%f1 ! (3_2) ftmp0 = (float)res0;
890
891 fmuld KA3,%f36,%f34 ! (1_1) dtmp2 = KA3 * xx0;
892 st %f1,[%i4] ! (3_2) *pz = ftmp0;
893
894 subcc counter,1,counter
895 bneg .begin
896 mov %i3,%o1
897
898 faddd %f10,KA1,%f40 ! (0_1) dtmp2 += KA1;
899
900 fmuld %f62,%f42,%f32 ! (4_2) res0 *= dtmp2;
901 fpsub32 DA1,%f24,%f24 ! (4_2) dbase0 = vis_fpsub32(DA1,dbase0);
902
903
904 faddd %f34,KA2,%f10 ! (1_1) dtmp2 += KA2;
905
906 fmuld %f40,%f26,%f40 ! (0_1) dtmp2 *= xx0;
907
908 fmuld %f32,%f24,%f26 ! (4_2) res0 *= dbase0;
909 fand %f14,DA0,%f24 ! (0_1) dbase0 = vis_fand(hyp0,DA0);
910
911 ldd [%l1+8],%f62 ! (0_1) res0 = ((double*)((char*)arr + si0))[1];
912
913 fmuld %f10,%f36,%f10 ! (1_1) dtmp2 *= xx0;
914
915 fmul8x16 SCALE,%f24,%f24 ! (0_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
916 faddd %f40,KA0,%f42 ! (0_1) dtmp2 += KA0
917
918 add %i3,stridez,%o1 ! pz += stridez
919 fdtos %f26,%f1 ! (4_2) ftmp0 = (float)res0;
920
921 st %f1,[%i3] ! (4_2) *pz = ftmp0;
922
923 subcc counter,1,counter
924 bneg .begin
925 nop
926
927 faddd %f10,KA1,%f40 ! (1_1) dtmp2 += KA1;
928
929 fmuld %f62,%f42,%f26 ! (0_1) res0 *= dtmp2;
930 fpsub32 DA1,%f24,%f24 ! (0_1) dbase0 = vis_fpsub32(DA1,dbase0);
931
932 fmuld %f40,%f36,%f40 ! (1_1) dtmp2 *= xx0;
933
934 fmuld %f26,%f24,%f36 ! (0_1) res0 *= dbase0;
935 fand %f16,DA0,%f24 ! (1_1) dbase0 = vis_fand(hyp0,DA0);
936
937 ldd [%o2+8],%f38 ! (1_1) res0 = ((double*)((char*)arr + si0))[1];
938
939 fmul8x16 SCALE,%f24,%f24 ! (1_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
940 faddd %f40,KA0,%f62 ! (1_1) dtmp2 += KA0;
941
942 add %o1,stridez,%g1 ! pz += stridez
943 fdtos %f36,%f1 ! (0_1) ftmp0 = (float)res0;
944
945 st %f1,[%o1] ! (0_1) *pz = ftmp0;
946
947 subcc counter,1,counter
948 bneg .begin
949 mov %g1,%o1
950
951 fmuld %f38,%f62,%f38 ! (1_1) res0 *= dtmp2;
952 fpsub32 DA1,%f24,%f24 ! (1_1) dbase0 = vis_fpsub32(DA1,dbase0);
953
954 fmuld %f38,%f24,%f38 ! (1_1) res0 *= dbase0;
955
956 fdtos %f38,%f1 ! (1_1) ftmp0 = (float)res0;
957 st %f1,[%g1] ! (1_1) *pz = ftmp0;
958
959 ba .begin
960 add %g1,stridez,%o1 ! pz += stridez
961
962 .align 16
963 .spec0:
964 fabss %f2,%f2 ! fabsf(y0);
965
966 fabss %f4,%f4 ! fabsf(x0);
967
968 fcmps %f2,%f4
969
970 cmp %l6,_0x7f800000 ! ay ? 0x7f800000
971 be,a 1f ! if( ay == 0x7f800000 )
972 st %g0,[%o1] ! *pz = 0.0f;
973
974 cmp %i5,_0x7f800000 ! ax ? 0x7f800000
975 be,a 1f ! if( ax == 0x7f800000 )
976 st %g0,[%o1] ! *pz = 0.0f;
977
978 fmuls %f2,%f4,%f2 ! fabsf(x0) * fabsf(y0);
979 st %f2,[%o1] ! *pz = fabsf(x0) + fabsf(y0);
980 1:
981 add %o4,stridex,%o4 ! px += stridex;
982 add %i2,stridey,%i2 ! py += stridey;
983
984 add %o1,stridez,%o1 ! pz += stridez;
985 ba .begin1
986 sub counter,1,counter ! counter--;
987
988 .align 16
989 .spec1:
990 cmp %i5,0 ! ax ? 0
991 bne,pt %icc,.cont_spec1 ! if ( ax != 0 )
992 nop
993
994 add %o4,stridex,%o4 ! px += stridex;
995 add %i2,stridey,%i2 ! py += stridey;
996
997 fdivs %f7,%f9,%f2 ! 1.0f / 0.0f
998 st %f2,[%o1] ! *pz = 1.0f / 0.0f;
999
1000 add %o1,stridez,%o1 ! pz += stridez;
1001 ba .begin1
1002 sub counter,1,counter ! counter--;
1003
1004 .align 16
1005 .update0:
1006 cmp counter,1
1007 ble .cont0
1008 ld [TBL+TBL_SHIFT+44],%f2
1009
1010 sub counter,1,counter
1011 st counter,[%fp+tmp_counter]
1012
1013 stx %l0,[%fp+tmp_px]
1014
1015 stx %i2,[%fp+tmp_py]
1016 ba .cont0
1017 mov 1,counter
1018
1019 .align 16
1020 .update1:
1021 cmp counter,1
1022 ble .cont1
1023 ld [TBL+TBL_SHIFT+44],%f4
1024
1025 sub counter,1,counter
1026 st counter,[%fp+tmp_counter]
1027
1028 stx %l0,[%fp+tmp_px]
1029
1030 stx %i2,[%fp+tmp_py]
1031 ba .cont1
1032 mov 1,counter
1033
1034 .align 16
1035 .update2:
1036 cmp %i5,0
1037 bne .cont2
1038
1039 cmp counter,1
1040 ble .cont2
1041 ld [TBL+TBL_SHIFT+44],%f2
1042
1043 sub counter,1,counter
1044 st counter,[%fp+tmp_counter]
1045
1046 stx %l0,[%fp+tmp_px]
1047
1048 stx %i2,[%fp+tmp_py]
1049 ba .cont2
1050 mov 1,counter
1051
1052 .align 16
1053 .update3:
1054 cmp counter,2
1055 ble .cont3
1056 ld [TBL+TBL_SHIFT+44],%f2
1057
1058 sub counter,2,counter
1059 st counter,[%fp+tmp_counter]
1060
1061 stx %i1,[%fp+tmp_px]
1062
1063 stx %i2,[%fp+tmp_py]
1064 ba .cont3
1065 mov 2,counter
1066
1067 .align 16
1068 .update4:
1069 cmp counter,2
1070 ble .cont4
1071 ld [TBL+TBL_SHIFT+44],%f4
1072
1073 sub counter,2,counter
1074 st counter,[%fp+tmp_counter]
1075
1076 stx %i1,[%fp+tmp_px]
1077
1078 stx %i2,[%fp+tmp_py]
1079 ba .cont4
1080 mov 2,counter
1081
1082 .align 16
1083 .update5:
1084 cmp %i5,0
1085 bne .cont5
1086
1087 cmp counter,2
1088 ble .cont5
1089 ld [TBL+TBL_SHIFT+44],%f2
1090
1091 sub counter,2,counter
1092 st counter,[%fp+tmp_counter]
1093
1094 stx %i1,[%fp+tmp_px]
1095
1096 stx %i2,[%fp+tmp_py]
1097 ba .cont5
1098 mov 2,counter
1099
1100 .align 16
1101 .update6:
1102 cmp counter,3
1103 ble .cont6
1104 ld [TBL+TBL_SHIFT+44],%f2
1105
1106 sub counter,3,counter
1107 st counter,[%fp+tmp_counter]
1108
1109 stx %g5,[%fp+tmp_px]
1110
1111 stx %o3,[%fp+tmp_py]
1112 ba .cont6
1113 mov 3,counter
1114
1115 .align 16
1116 .update7:
1117 cmp counter,3
1118 ble .cont7
1119 ld [TBL+TBL_SHIFT+44],%f4
1120
1121 sub counter,3,counter
1122 st counter,[%fp+tmp_counter]
1123
1124 stx %g5,[%fp+tmp_px]
1125
1126 stx %o3,[%fp+tmp_py]
1127 ba .cont7
1128 mov 3,counter
1129
1130 .align 16
1131 .update8:
1132 cmp %i5,0
1133 bne .cont8
1134
1135 cmp counter,3
1136 ble .cont8
1137 ld [TBL+TBL_SHIFT+44],%f2
1138
1139 sub counter,3,counter
1140 st counter,[%fp+tmp_counter]
1141
1142 stx %g5,[%fp+tmp_px]
1143
1144 stx %o3,[%fp+tmp_py]
1145 ba .cont8
1146 mov 3,counter
1147
1148 .align 16
1149 .update9:
1150 cmp counter,4
1151 ble .cont9
1152 ld [TBL+TBL_SHIFT+44],%f2
1153
1154 sub counter,4,counter
1155 st counter,[%fp+tmp_counter]
1156
1157 stx %i4,[%fp+tmp_px]
1158
1159 stx %i2,[%fp+tmp_py]
1160 ba .cont9
1161 mov 4,counter
1162
1163 .align 16
1164 .update10:
1165 cmp counter,4
1166 ble .cont10
1167 ld [TBL+TBL_SHIFT+44],%f4
1168
1169 sub counter,4,counter
1170 st counter,[%fp+tmp_counter]
1171
1172 stx %i4,[%fp+tmp_px]
1173
1174 stx %i2,[%fp+tmp_py]
1175 ba .cont10
1176 mov 4,counter
1177
1178 .align 16
1179 .update11:
1180 cmp %i5,0
1181 bne .cont11
1182
1183 cmp counter,4
1184 ble .cont11
1185 ld [TBL+TBL_SHIFT+44],%f2
1186
1187 sub counter,4,counter
1188 st counter,[%fp+tmp_counter]
1189
1190 stx %i4,[%fp+tmp_px]
1191
1192 stx %i2,[%fp+tmp_py]
1193 ba .cont11
1194 mov 4,counter
1195
1196 .align 16
1197 .update12:
1198 cmp counter,5
1199 ble .cont12
1200 ld [TBL+TBL_SHIFT+44],%f2
1201
1202 sub counter,5,counter
1203 st counter,[%fp+tmp_counter]
1204
1205 stx %o4,[%fp+tmp_px]
1206
1207 stx %i2,[%fp+tmp_py]
1208 ba .cont12
1209 mov 5,counter
1210
1211 .align 16
1212 .update13:
1213 cmp counter,5
1214 ble .cont13
1215 ld [TBL+TBL_SHIFT+44],%f4
1216
1217 sub counter,5,counter
1218 st counter,[%fp+tmp_counter]
1219
1220 stx %o4,[%fp+tmp_px]
1221
1222 stx %i2,[%fp+tmp_py]
1223 ba .cont13
1224 mov 5,counter
1225
1226 .align 16
1227 .update14:
1228 cmp %i5,0
1229 bne .cont14
1230
1231 cmp counter,5
1232 ble .cont14
1233 ld [TBL+TBL_SHIFT+44],%f2
1234
1235 sub counter,5,counter
1236 st counter,[%fp+tmp_counter]
1237
1238 stx %o4,[%fp+tmp_px]
1239
1240 stx %i2,[%fp+tmp_py]
1241 ba .cont14
1242 mov 5,counter
1243
1244 .align 16
1245 .update15:
1246 cmp counter,6
1247 ble .cont15
1248 ld [TBL+TBL_SHIFT+44],%f2
1249
1250 sub counter,6,counter
1251 st counter,[%fp+tmp_counter]
1252
1253 stx %l0,[%fp+tmp_px]
1254
1255 stx %i2,[%fp+tmp_py]
1256 ba .cont15
1257 mov 6,counter
1258
1259 .align 16
1260 .update16:
1261 cmp counter,6
1262 ble .cont16
1263 ld [TBL+TBL_SHIFT+44],%f4
1264
1265 sub counter,6,counter
1266 st counter,[%fp+tmp_counter]
1267
1268 stx %l0,[%fp+tmp_px]
1269
1270 stx %i2,[%fp+tmp_py]
1271 ba .cont16
1272 mov 6,counter
1273
1274 .align 16
1275 .update17:
1276 cmp %i5,0
1277 bne .cont17
1278
1279 cmp counter,1
1280 ble .cont17
1281 fmovd DC1,%f62
1282
1283 sub counter,1,counter
1284 st counter,[%fp+tmp_counter]
1285
1286 stx %l0,[%fp+tmp_px]
1287
1288 stx %i2,[%fp+tmp_py]
1289 ba .cont17
1290 mov 1,counter
1291
1292 .align 16
1293 .update18:
1294 cmp counter,2
1295 ble .cont18
1296 ld [TBL+TBL_SHIFT+44],%f2
1297
1298 sub counter,2,counter
1299 st counter,[%fp+tmp_counter]
1300
1301 stx %i1,[%fp+tmp_px]
1302
1303 stx %i2,[%fp+tmp_py]
1304 ba .cont18
1305 mov 2,counter
1306
1307 .align 16
1308 .update19:
1309 cmp counter,2
1310 ble .cont19
1311 ld [TBL+TBL_SHIFT+44],%f4
1312
1313 sub counter,2,counter
1314 st counter,[%fp+tmp_counter]
1315
1316 stx %i1,[%fp+tmp_px]
1317
1318 stx %i2,[%fp+tmp_py]
1319 ba .cont19
1320 mov 2,counter
1321
1322 .align 16
1323 .update20:
1324 cmp %o1,0
1325 bne .cont20
1326
1327 cmp counter,2
1328 ble .cont20
1329 ld [TBL+TBL_SHIFT+44],%f2
1330
1331 sub counter,2,counter
1332 st counter,[%fp+tmp_counter]
1333
1334 stx %i1,[%fp+tmp_px]
1335
1336 stx %i2,[%fp+tmp_py]
1337 ba .cont20
1338 mov 2,counter
1339
1340 .align 16
1341 .update21:
1342 cmp counter,3
1343 ble .cont21
1344 ld [TBL+TBL_SHIFT+44],%f2
1345
1346 sub counter,3,counter
1347 st counter,[%fp+tmp_counter]
1348
1349 stx %g5,[%fp+tmp_px]
1350
1351 stx %o3,[%fp+tmp_py]
1352 ba .cont21
1353 mov 3,counter
1354
1355 .align 16
1356 .update22:
1357 cmp counter,3
1358 ble .cont22
1359 ld [TBL+TBL_SHIFT+44],%f4
1360
1361 sub counter,3,counter
1362 st counter,[%fp+tmp_counter]
1363
1364 stx %g5,[%fp+tmp_px]
1365
1366 stx %o3,[%fp+tmp_py]
1367 ba .cont22
1368 mov 3,counter
1369
1370 .align 16
1371 .update23:
1372 cmp %i5,0
1373 bne .cont23
1374
1375 cmp counter,3
1376 ble .cont23
1377 ld [TBL+TBL_SHIFT+44],%f2
1378
1379 sub counter,3,counter
1380 st counter,[%fp+tmp_counter]
1381
1382 stx %g5,[%fp+tmp_px]
1383
1384 stx %o3,[%fp+tmp_py]
1385 ba .cont23
1386 mov 3,counter
1387
1388 .align 16
1389 .update24:
1390 cmp counter,4
1391 ble .cont24
1392 ld [TBL+TBL_SHIFT+44],%f2
1393
1394 sub counter,4,counter
1395 st counter,[%fp+tmp_counter]
1396
1397 stx %i4,[%fp+tmp_px]
1398
1399 stx %i2,[%fp+tmp_py]
1400 ba .cont24
1401 mov 4,counter
1402
1403 .align 16
1404 .update25:
1405 cmp counter,4
1406 ble .cont25
1407 ld [TBL+TBL_SHIFT+44],%f4
1408
1409 sub counter,4,counter
1410 st counter,[%fp+tmp_counter]
1411
1412 stx %i4,[%fp+tmp_px]
1413
1414 stx %i2,[%fp+tmp_py]
1415 ba .cont25
1416 mov 4,counter
1417
1418 .align 16
1419 .update26:
1420 cmp %i5,0
1421 bne .cont26
1422
1423 cmp counter,4
1424 ble .cont26
1425 ld [TBL+TBL_SHIFT+44],%f2
1426
1427 sub counter,4,counter
1428 st counter,[%fp+tmp_counter]
1429
1430 stx %i4,[%fp+tmp_px]
1431
1432 stx %i2,[%fp+tmp_py]
1433 ba .cont26
1434 mov 4,counter
1435
1436 .align 16
1437 .update27:
1438 cmp counter,5
1439 ble .cont27
1440 ld [TBL+TBL_SHIFT+44],%f2
1441
1442 sub counter,5,counter
1443 st counter,[%fp+tmp_counter]
1444
1445 stx %o4,[%fp+tmp_px]
1446
1447 stx %i2,[%fp+tmp_py]
1448 ba .cont27
1449 mov 5,counter
1450
1451 .align 16
1452 .update28:
1453 cmp counter,5
1454 ble .cont28
1455 ld [TBL+TBL_SHIFT+44],%f4
1456
1457 sub counter,5,counter
1458 st counter,[%fp+tmp_counter]
1459
1460 stx %o4,[%fp+tmp_px]
1461
1462 stx %i2,[%fp+tmp_py]
1463 ba .cont28
1464 mov 5,counter
1465
1466 .align 16
1467 .update29:
1468 cmp %i5,0
1469 bne .cont29
1470
1471 cmp counter,5
1472 ble .cont29
1473 ld [TBL+TBL_SHIFT+44],%f2
1474
1475 sub counter,5,counter
1476 st counter,[%fp+tmp_counter]
1477
1478 stx %o4,[%fp+tmp_px]
1479
1480 stx %i2,[%fp+tmp_py]
1481 ba .cont29
1482 mov 5,counter
1483
1484 .align 16
1485 .update30:
1486 cmp counter,6
1487 ble .cont30
1488 ld [TBL+TBL_SHIFT+44],%f2
1489
1490 sub counter,6,counter
1491 st counter,[%fp+tmp_counter]
1492
1493 stx %l0,[%fp+tmp_px]
1494
1495 stx %i2,[%fp+tmp_py]
1496 ba .cont30
1497 mov 6,counter
1498
1499 .align 16
1500 .update31:
1501 cmp counter,6
1502 ble .cont31
1503 ld [TBL+TBL_SHIFT+44],%f4
1504
1505 sub counter,6,counter
1506 st counter,[%fp+tmp_counter]
1507
1508 stx %l0,[%fp+tmp_px]
1509
1510 stx %i2,[%fp+tmp_py]
1511 ba .cont31
1512 mov 6,counter
1513
1514 .align 16
1515 .exit:
1516 ret
1517 restore
1518 SET_SIZE(__vrhypotf)
1519