1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
23 */
24 /*
25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
27 */
28
29 .file "__vatanf.S"
30
31 #include "libm.h"
32
33 RO_DATA
34 .align 64
35
36 .CONST_TBL:
37 .word 0x3fefffff, 0xfffccbbc ! K0 = 9.99999999976686608841e-01
38 .word 0xbfd55554, 0x51c6b90f ! K1 = -3.33333091601972730504e-01
39 .word 0x3fc98d6d, 0x926596cc ! K2 = 1.99628540499523379702e-01
40 .word 0x00020000, 0x00000000 ! DC1
41 .word 0xfffc0000, 0x00000000 ! DC2
42 .word 0x7ff00000, 0x00000000 ! DC3
43 .word 0x3ff00000, 0x00000000 ! DONE = 1.0
44 .word 0x40000000, 0x00000000 ! DTWO = 2.0
45
46 ! parr0 = *(int*)&(1.0 / *(double*)&(((long long)i << 45) | 0x3ff0100000000000ULL)) + 0x3ff00000, i = [0, 127]
47
48 .word 0x7fdfe01f, 0x7fdfa11c, 0x7fdf6310, 0x7fdf25f6
49 .word 0x7fdee9c7, 0x7fdeae80, 0x7fde741a, 0x7fde3a91
50 .word 0x7fde01e0, 0x7fddca01, 0x7fdd92f2, 0x7fdd5cac
51 .word 0x7fdd272c, 0x7fdcf26e, 0x7fdcbe6d, 0x7fdc8b26
52 .word 0x7fdc5894, 0x7fdc26b5, 0x7fdbf583, 0x7fdbc4fd
53 .word 0x7fdb951e, 0x7fdb65e2, 0x7fdb3748, 0x7fdb094b
54 .word 0x7fdadbe8, 0x7fdaaf1d, 0x7fda82e6, 0x7fda5741
55 .word 0x7fda2c2a, 0x7fda01a0, 0x7fd9d79f, 0x7fd9ae24
56 .word 0x7fd9852f, 0x7fd95cbb, 0x7fd934c6, 0x7fd90d4f
57 .word 0x7fd8e652, 0x7fd8bfce, 0x7fd899c0, 0x7fd87427
58 .word 0x7fd84f00, 0x7fd82a4a, 0x7fd80601, 0x7fd7e225
59 .word 0x7fd7beb3, 0x7fd79baa, 0x7fd77908, 0x7fd756ca
60 .word 0x7fd734f0, 0x7fd71378, 0x7fd6f260, 0x7fd6d1a6
61 .word 0x7fd6b149, 0x7fd69147, 0x7fd6719f, 0x7fd6524f
62 .word 0x7fd63356, 0x7fd614b3, 0x7fd5f664, 0x7fd5d867
63 .word 0x7fd5babc, 0x7fd59d61, 0x7fd58056, 0x7fd56397
64 .word 0x7fd54725, 0x7fd52aff, 0x7fd50f22, 0x7fd4f38f
65 .word 0x7fd4d843, 0x7fd4bd3e, 0x7fd4a27f, 0x7fd48805
66 .word 0x7fd46dce, 0x7fd453d9, 0x7fd43a27, 0x7fd420b5
67 .word 0x7fd40782, 0x7fd3ee8f, 0x7fd3d5d9, 0x7fd3bd60
68 .word 0x7fd3a524, 0x7fd38d22, 0x7fd3755b, 0x7fd35dce
69 .word 0x7fd34679, 0x7fd32f5c, 0x7fd31877, 0x7fd301c8
70 .word 0x7fd2eb4e, 0x7fd2d50a, 0x7fd2bef9, 0x7fd2a91c
71 .word 0x7fd29372, 0x7fd27dfa, 0x7fd268b3, 0x7fd2539d
72 .word 0x7fd23eb7, 0x7fd22a01, 0x7fd21579, 0x7fd20120
73 .word 0x7fd1ecf4, 0x7fd1d8f5, 0x7fd1c522, 0x7fd1b17c
74 .word 0x7fd19e01, 0x7fd18ab0, 0x7fd1778a, 0x7fd1648d
75 .word 0x7fd151b9, 0x7fd13f0e, 0x7fd12c8b, 0x7fd11a30
76 .word 0x7fd107fb, 0x7fd0f5ed, 0x7fd0e406, 0x7fd0d244
77 .word 0x7fd0c0a7, 0x7fd0af2f, 0x7fd09ddb, 0x7fd08cab
78 .word 0x7fd07b9f, 0x7fd06ab5, 0x7fd059ee, 0x7fd04949
79 .word 0x7fd038c6, 0x7fd02864, 0x7fd01824, 0x7fd00804
80
81 .word 0x3ff00000, 0x00000000 ! 1.0
82 .word 0xbff00000, 0x00000000 ! -1.0
83
84 ! parr1[i] = atan((double)*(float*)&((i + 460) << 21)), i = [0, 155]
85
86 .word 0x3f2fffff, 0xf555555c, 0x3f33ffff, 0xf595555f
87 .word 0x3f37ffff, 0xee000018, 0x3f3bffff, 0xe36aaadf
88 .word 0x3f3fffff, 0xd55555bc, 0x3f43ffff, 0xd65555f2
89 .word 0x3f47ffff, 0xb8000185, 0x3f4bffff, 0x8daaadf3
90 .word 0x3f4fffff, 0x55555bbc, 0x3f53ffff, 0x59555f19
91 .word 0x3f57fffe, 0xe000184d, 0x3f5bfffe, 0x36aadf30
92 .word 0x3f5ffffd, 0x5555bbbc, 0x3f63fffd, 0x6555f195
93 .word 0x3f67fffb, 0x800184cc, 0x3f6bfff8, 0xdaadf302
94 .word 0x3f6ffff5, 0x555bbbb7, 0x3f73fff5, 0x955f194a
95 .word 0x3f77ffee, 0x00184ca6, 0x3f7bffe3, 0x6adf2fd1
96 .word 0x3f7fffd5, 0x55bbba97, 0x3f83ffd6, 0x55f1929c
97 .word 0x3f87ffb8, 0x0184c30a, 0x3f8bff8d, 0xadf2e78c
98 .word 0x3f8fff55, 0x5bbb729b, 0x3f93ff59, 0x5f18a700
99 .word 0x3f97fee0, 0x184a5c36, 0x3f9bfe36, 0xdf291712
100 .word 0x3f9ffd55, 0xbba97625, 0x3fa3fd65, 0xf169c9d9
101 .word 0x3fa7fb81, 0x8430da2a, 0x3fabf8dd, 0xf139c444
102 .word 0x3faff55b, 0xb72cfdea, 0x3fb3f59f, 0x0e7c559d
103 .word 0x3fb7ee18, 0x2602f10f, 0x3fbbe39e, 0xbe6f07c4
104 .word 0x3fbfd5ba, 0x9aac2f6e, 0x3fc3d6ee, 0xe8c6626c
105 .word 0x3fc7b97b, 0x4bce5b02, 0x3fcb90d7, 0x529260a2
106 .word 0x3fcf5b75, 0xf92c80dd, 0x3fd36277, 0x3707ebcc
107 .word 0x3fd6f619, 0x41e4def1, 0x3fda64ee, 0xc3cc23fd
108 .word 0x3fddac67, 0x0561bb4f, 0x3fe1e00b, 0xabdefeb4
109 .word 0x3fe4978f, 0xa3269ee1, 0x3fe700a7, 0xc5784634
110 .word 0x3fe921fb, 0x54442d18, 0x3fecac7c, 0x57846f9e
111 .word 0x3fef730b, 0xd281f69b, 0x3ff0d38f, 0x2c5ba09f
112 .word 0x3ff1b6e1, 0x92ebbe44, 0x3ff30b6d, 0x796a4da8
113 .word 0x3ff3fc17, 0x6b7a8560, 0x3ff4ae10, 0xfc6589a5
114 .word 0x3ff5368c, 0x951e9cfd, 0x3ff5f973, 0x15254857
115 .word 0x3ff67d88, 0x63bc99bd, 0x3ff6dcc5, 0x7bb565fd
116 .word 0x3ff7249f, 0xaa996a21, 0x3ff789bd, 0x2c160054
117 .word 0x3ff7cd6f, 0x6dc59db4, 0x3ff7fde8, 0x0870c2a0
118 .word 0x3ff82250, 0x768ac529, 0x3ff8555a, 0x2787981f
119 .word 0x3ff87769, 0xeb8e956b, 0x3ff88fc2, 0x18ace9dc
120 .word 0x3ff8a205, 0xfd558740, 0x3ff8bb9a, 0x63718f45
121 .word 0x3ff8cca9, 0x27cf0b3d, 0x3ff8d8d8, 0xbf65316f
122 .word 0x3ff8e1fc, 0xa98cb633, 0x3ff8eec8, 0xcfd00665
123 .word 0x3ff8f751, 0x0eba96e6, 0x3ff8fd69, 0x4acf36b0
124 .word 0x3ff901fb, 0x7eee715e, 0x3ff90861, 0xd082d9b5
125 .word 0x3ff90ca6, 0x0b9322c5, 0x3ff90fb2, 0x37a7ea27
126 .word 0x3ff911fb, 0x59997f3a, 0x3ff9152e, 0x8a326c38
127 .word 0x3ff91750, 0xab2e0d12, 0x3ff918d6, 0xc2f9c9e2
128 .word 0x3ff919fb, 0x54eed7a9, 0x3ff91b94, 0xee352849
129 .word 0x3ff91ca5, 0xff216922, 0x3ff91d69, 0x0b3f72ff
130 .word 0x3ff91dfb, 0x5459826d, 0x3ff91ec8, 0x211be619
131 .word 0x3ff91f50, 0xa99fd49a, 0x3ff91fb2, 0x2fb5defa
132 .word 0x3ff91ffb, 0x5446d7c3, 0x3ff92061, 0xbaabf105
133 .word 0x3ff920a5, 0xfeefa208, 0x3ff920d6, 0xc1fb87e7
134 .word 0x3ff920fb, 0x5444826e, 0x3ff9212e, 0x87778bfc
135 .word 0x3ff92150, 0xa9999bb6, 0x3ff92169, 0x0b1faabb
136 .word 0x3ff9217b, 0x544437c3, 0x3ff92194, 0xedddcc28
137 .word 0x3ff921a5, 0xfeeedaec, 0x3ff921b2, 0x2fb1e5f1
138 .word 0x3ff921bb, 0x54442e6e, 0x3ff921c8, 0x2110fa94
139 .word 0x3ff921d0, 0xa99982d3, 0x3ff921d6, 0xc1fb08c6
140 .word 0x3ff921db, 0x54442d43, 0x3ff921e1, 0xbaaa9395
141 .word 0x3ff921e5, 0xfeeed7d0, 0x3ff921e9, 0x0b1f9ad7
142 .word 0x3ff921eb, 0x54442d1e, 0x3ff921ee, 0x8777604e
143 .word 0x3ff921f0, 0xa999826f, 0x3ff921f2, 0x2fb1e3f5
144 .word 0x3ff921f3, 0x54442d19, 0x3ff921f4, 0xedddc6b2
145 .word 0x3ff921f5, 0xfeeed7c3, 0x3ff921f6, 0xc1fb0886
146 .word 0x3ff921f7, 0x54442d18, 0x3ff921f8, 0x2110f9e5
147 .word 0x3ff921f8, 0xa999826e, 0x3ff921f9, 0x0b1f9acf
148 .word 0x3ff921f9, 0x54442d18, 0x3ff921f9, 0xbaaa937f
149 .word 0x3ff921f9, 0xfeeed7c3, 0x3ff921fa, 0x2fb1e3f4
150 .word 0x3ff921fa, 0x54442d18, 0x3ff921fa, 0x8777604b
151 .word 0x3ff921fa, 0xa999826e, 0x3ff921fa, 0xc1fb0886
152 .word 0x3ff921fa, 0xd4442d18, 0x3ff921fa, 0xedddc6b2
153 .word 0x3ff921fa, 0xfeeed7c3, 0x3ff921fb, 0x0b1f9acf
154 .word 0x3ff921fb, 0x14442d18, 0x3ff921fb, 0x2110f9e5
155 .word 0x3ff921fb, 0x2999826e, 0x3ff921fb, 0x2fb1e3f4
156 .word 0x3ff921fb, 0x34442d18, 0x3ff921fb, 0x3aaa937f
157 .word 0x3ff921fb, 0x3eeed7c3, 0x3ff921fb, 0x41fb0886
158 .word 0x3ff921fb, 0x44442d18, 0x3ff921fb, 0x4777604b
159 .word 0x3ff921fb, 0x4999826e, 0x3ff921fb, 0x4b1f9acf
160 .word 0x3ff921fb, 0x4c442d18, 0x3ff921fb, 0x4dddc6b2
161 .word 0x3ff921fb, 0x4eeed7c3, 0x3ff921fb, 0x4fb1e3f4
162 .word 0x3ff921fb, 0x50442d18, 0x3ff921fb, 0x5110f9e5
163 .word 0x3ff921fb, 0x5199826e, 0x3ff921fb, 0x51fb0886
164
165 #define DC2 %f2
166 #define DTWO %f6
167 #define DONE %f52
168 #define K0 %f54
169 #define K1 %f56
170 #define K2 %f58
171 #define DC1 %f60
172 #define DC3 %f62
173
174 #define stridex %o2
175 #define stridey %o3
176 #define MASK_0x7fffffff %i1
177 #define MASK_0x100000 %i5
178
179 #define tmp_px STACK_BIAS-32
180 #define tmp_counter STACK_BIAS-24
181 #define tmp0 STACK_BIAS-16
182 #define tmp1 STACK_BIAS-8
183
184 #define counter %l1
185
186 ! sizeof temp storage - must be a multiple of 16 for V9
187 #define tmps 0x20
188
189 !--------------------------------------------------------------------
190 ! !!!!! vatanf algorithm !!!!!
191 ! ux = ((int*)px)[0];
192 ! ax = ux & 0x7fffffff;
193 !
194 ! if ( ax < 0x39b89c55 )
195 ! {
196 ! *(int*)py = ux;
197 ! goto next;
198 ! }
199 !
200 ! if ( ax > 0x4c700518 )
201 ! {
202 ! if ( ax > 0x7f800000 )
203 ! {
204 ! float fpx = fabsf(*px);
205 ! fpx *= fpx;
206 ! *py = fpx;
207 ! goto next;
208 ! }
209 !
210 ! sign = ux & 0x80000000;
211 ! sign |= pi_2;
212 ! *(int*)py = sign;
213 ! goto next;
214 ! }
215 !
216 ! ftmp0 = *px;
217 ! x = (double)ftmp0;
218 ! px += stridex;
219 ! y = vis_fpadd32(x,DC1);
220 ! y = vis_fand(y,DC2);
221 ! div = x * y;
222 ! xx = x - y;
223 ! div += DONE;
224 ! i = ((unsigned long long*)&div)[0];
225 ! y0 = vis_fand(div,DC3);
226 ! i >>= 43;
227 ! i &= 508;
228 ! *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
229 ! y0 = vis_fpsub32(dtmp0, y0);
230 ! dtmp0 = div0 * y0;
231 ! dtmp0 = DTWO - dtmp0;
232 ! y0 *= dtmp0;
233 ! dtmp1 = div0 * y0;
234 ! dtmp1 = DTWO - dtmp1;
235 ! y0 *= dtmp1;
236 ! ax = ux & 0x7fffffff;
237 ! ax += 0x00100000;
238 ! ax >>= 18;
239 ! ax &= -8;
240 ! res = *(double*)((char*)parr1 + ax);
241 ! ux >>= 28;
242 ! ux &= -8;
243 ! dtmp0 = *(double*)((char*)sign_arr + ux);
244 ! res *= dtmp0;
245 ! xx *= y0;
246 ! x2 = xx * xx;
247 ! dtmp0 = K2 * x2;
248 ! dtmp0 += K1;
249 ! dtmp0 *= x2;
250 ! dtmp0 += K0;
251 ! dtmp0 *= xx;
252 ! res += dtmp0;
253 ! ftmp0 = (float)res;
254 ! py[0] = ftmp0;
255 ! py += stridey;
256 !--------------------------------------------------------------------
257
258 ENTRY(__vatanf)
259 save %sp,-SA(MINFRAME)-tmps,%sp
260 PIC_SETUP(l7)
261 PIC_SET(l7,.CONST_TBL,l2)
262
263 st %i0,[%fp+tmp_counter]
264
265 sllx %i2,2,stridex
266 sllx %i4,2,stridey
267
268 or %g0,%i3,%o1
269 stx %i1,[%fp+tmp_px]
270
271 ldd [%l2],K0
272 ldd [%l2+8],K1
273 ldd [%l2+16],K2
274 ldd [%l2+24],DC1
275 ldd [%l2+32],DC2
276 ldd [%l2+40],DC3
277 ldd [%l2+48],DONE
278 ldd [%l2+56],DTWO
279
280 add %l2,64,%i4
281 add %l2,64+512,%l0
282 add %l2,64+512+16-0x1cc*8,%l7
283
284 sethi %hi(0x100000),MASK_0x100000
285 sethi %hi(0x7ffffc00),MASK_0x7fffffff
286 add MASK_0x7fffffff,1023,MASK_0x7fffffff
287
288 sethi %hi(0x39b89c00),%o4
289 add %o4,0x55,%o4
290 sethi %hi(0x4c700400),%o5
291 add %o5,0x118,%o5
292
293 .begin:
294 ld [%fp+tmp_counter],counter
295 ldx [%fp+tmp_px],%i3
296 st %g0,[%fp+tmp_counter]
297 .begin1:
298 cmp counter,0
299 ble,pn %icc,.exit
300 nop
301
302 lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0];
303
304 and %l6,MASK_0x7fffffff,%l5 ! (0_0) ax = ux & 0x7fffffff;
305 lda [%i3]0x82,%f0 ! (0_0) ftmp0 = *px;
306
307 cmp %l5,%o4 ! (0_0) ax ? 0x39b89c55
308 bl,pn %icc,.spec0 ! (0_0) if ( ax < 0x39b89c55 )
309 nop
310
311 cmp %l5,%o5 ! (0_0) ax ? 0x4c700518
312 bg,pn %icc,.spec1 ! (0_0) if ( ax > 0x4c700518 )
313 nop
314
315 add %i3,stridex,%l5 ! px += stridex;
316 fstod %f0,%f22 ! (0_0) ftmp0 = *px;
317 mov %l6,%i3
318
319 lda [%l5]0x82,%l6 ! (1_0) ux = ((int*)px)[0];
320
321 and %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff;
322 lda [%l5]0x82,%f0 ! (1_0) ftmp0 = *px;
323 add %l5,stridex,%l4 ! px += stridex;
324 fpadd32 %f22,DC1,%f24 ! (0_0) y = vis_fpadd32(x,dconst1);
325
326 cmp %o7,%o4 ! (1_0) ax ? 0x39b89c55
327 bl,pn %icc,.update0 ! (1_0) if ( ax < 0x39b89c55 )
328 nop
329 .cont0:
330 cmp %o7,%o5 ! (1_0) ax ? 0x4c700518
331 bg,pn %icc,.update1 ! (1_0) if ( ax > 0x4c700518 )
332 nop
333 .cont1:
334 fstod %f0,%f20 ! (1_0) x = (double)ftmp0;
335 mov %l6,%l5
336
337 fand %f24,DC2,%f26 ! (0_0) y = vis_fand(y,dconst2);
338
339 fmuld %f22,%f26,%f32 ! (0_0) div = x * y;
340
341 lda [%l4]0x82,%l6 ! (2_0) ux = ((int*)px)[0];
342 fsubd %f22,%f26,%f22 ! (0_0) xx = x - y;
343
344 and %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff;
345 lda [%l4]0x82,%f0 ! (2_0) ftmp0 = *px;
346 add %l4,stridex,%l3 ! px += stridex;
347 fpadd32 %f20,DC1,%f24 ! (1_0) y = vis_fpadd32(x,dconst1);
348
349 cmp %o7,%o4 ! (2_0) ax ? 0x39b89c55
350 bl,pn %icc,.update2 ! (2_0) if ( ax < 0x39b89c55 )
351 faddd DONE,%f32,%f32 ! (0_0) div += done;
352 .cont2:
353 cmp %o7,%o5 ! (2_0) ax ? 0x4c700518
354 bg,pn %icc,.update3 ! (2_0) if ( ax > 0x4c700518 )
355 nop
356 .cont3:
357 std %f32,[%fp+tmp0] ! (0_0) i = ((unsigned long long*)&div)[0];
358 mov %l6,%l4
359 fstod %f0,%f18 ! (2_0) x = (double)ftmp0;
360
361 fand %f24,DC2,%f26 ! (1_0) y = vis_fand(y,dconst2);
362
363 fmuld %f20,%f26,%f30 ! (1_0) div = x * y;
364
365 lda [%l3]0x82,%l6 ! (3_0) ux = ((int*)px)[0];
366 fsubd %f20,%f26,%f20 ! (1_0) xx = x - y;
367
368 and %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff;
369 lda [%l3]0x82,%f0 ! (3_0) ftmp0 = *px;
370 add %l3,stridex,%i0 ! px += stridex;
371 fpadd32 %f18,DC1,%f24 ! (2_0) y = vis_fpadd32(x,dconst1);
372
373 cmp %o7,%o4 ! (3_0) ax ? 0x39b89c55
374 bl,pn %icc,.update4 ! (3_0) if ( ax < 0x39b89c55 )
375 faddd DONE,%f30,%f30 ! (1_0) div += done;
376 .cont4:
377 cmp %o7,%o5 ! (3_0) ax ? 0x4c700518
378 bg,pn %icc,.update5 ! (3_0) if ( ax > 0x4c700518 )
379 nop
380 .cont5:
381 std %f30,[%fp+tmp1] ! (1_0) i = ((unsigned long long*)&div)[0];
382 mov %l6,%l3
383 fstod %f0,%f16 ! (3_0) x = (double)ftmp0;
384
385 ldx [%fp+tmp0],%o0 ! (0_0) i = ((unsigned long long*)&div)[0];
386 fand %f24,DC2,%f26 ! (2_0) y = vis_fand(y,dconst2);
387
388 fand %f32,DC3,%f24 ! (0_0) y0 = vis_fand(div,dconst3);
389
390 srlx %o0,43,%o0 ! (0_0) i >>= 43;
391
392 and %o0,508,%l6 ! (0_0) i &= 508;
393
394 ld [%i4+%l6],%f0 ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
395
396 fmuld %f18,%f26,%f28 ! (2_0) div = x * y;
397
398 lda [%i0]0x82,%l6 ! (4_0) ux = ((int*)px)[0];
399 fsubd %f18,%f26,%f18 ! (2_0) xx = x - y;
400
401 fpsub32 %f0,%f24,%f40 ! (0_0) y0 = vis_fpsub32(dtmp0, y0);
402
403 and %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff;
404 lda [%i0]0x82,%f0 ! (4_0) ftmp0 = *px;
405 add %i0,stridex,%i2 ! px += stridex;
406 fpadd32 %f16,DC1,%f24 ! (3_0) y = vis_fpadd32(x,dconst1);
407
408 cmp %o7,%o4 ! (4_0) ax ? 0x39b89c55
409 bl,pn %icc,.update6 ! (4_0) if ( ax < 0x39b89c55 )
410 faddd DONE,%f28,%f28 ! (2_0) div += done;
411 .cont6:
412 fmuld %f32,%f40,%f42 ! (0_0) dtmp0 = div0 * y0;
413 cmp %o7,%o5 ! (4_0) ax ? 0x4c700518
414 bg,pn %icc,.update7 ! (4_0) if ( ax > 0x4c700518 )
415 nop
416 .cont7:
417 std %f28,[%fp+tmp0] ! (2_0) i = ((unsigned long long*)&div)[0];
418 mov %l6,%i0
419 fstod %f0,%f14 ! (4_0) x = (double)ftmp0;
420
421 ldx [%fp+tmp1],%g1 ! (1_0) i = ((unsigned long long*)&div)[0];
422 fand %f24,DC2,%f26 ! (3_0) y = vis_fand(y,dconst2);
423
424 fand %f30,DC3,%f24 ! (1_0) y0 = vis_fand(div,dconst3);
425
426 fsubd DTWO,%f42,%f44 ! (0_0) dtmp0 = dtwo - dtmp0;
427 srlx %g1,43,%g1 ! (1_0) i >>= 43;
428
429 and %g1,508,%l6 ! (1_0) i &= 508;
430
431 ld [%i4+%l6],%f0 ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
432
433 fmuld %f16,%f26,%f34 ! (3_0) div = x * y;
434
435 lda [%i2]0x82,%l6 ! (5_0) ux = ((int*)px)[0];
436 fsubd %f16,%f26,%f16 ! (3_0) xx = x - y;
437
438 fpsub32 %f0,%f24,%f38 ! (1_0) y0 = vis_fpsub32(dtmp0, y0);
439 add %i2,stridex,%l2 ! px += stridex;
440
441 fmuld %f40,%f44,%f40 ! (0_0) y0 *= dtmp0;
442 and %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff;
443 lda [%i2]0x82,%f0 ! (5_0) ftmp0 = *px;
444 fpadd32 %f14,DC1,%f24 ! (4_0) y = vis_fpadd32(x,dconst1);
445
446 cmp %o7,%o4 ! (5_0) ax ? 0x39b89c55
447 bl,pn %icc,.update8 ! (5_0) if ( ax < 0x39b89c55 )
448 faddd DONE,%f34,%f34 ! (3_0) div += done;
449 .cont8:
450 fmuld %f30,%f38,%f42 ! (1_0) dtmp0 = div0 * y0;
451 cmp %o7,%o5 ! (5_0) ax ? 0x4c700518
452 bg,pn %icc,.update9 ! (5_0) if ( ax > 0x4c700518 )
453 nop
454 .cont9:
455 std %f34,[%fp+tmp1] ! (3_0) i = ((unsigned long long*)&div)[0];
456 mov %l6,%i2
457 fstod %f0,%f36 ! (5_0) x = (double)ftmp0;
458
459 fmuld %f32,%f40,%f32 ! (0_0) dtmp1 = div0 * y0;
460 ldx [%fp+tmp0],%o0 ! (2_0) i = ((unsigned long long*)&div)[0];
461 fand %f24,DC2,%f26 ! (4_0) y = vis_fand(y,dconst2);
462
463 fand %f28,DC3,%f24 ! (2_0) y0 = vis_fand(div,dconst3);
464
465 fsubd DTWO,%f42,%f44 ! (1_0) dtmp0 = dtwo - dtmp0;
466 srlx %o0,43,%o0 ! (2_0) i >>= 43;
467
468 and %o0,508,%l6 ! (2_0) i &= 508;
469 fsubd DTWO,%f32,%f46 ! (0_0) dtmp1 = dtwo - dtmp1;
470
471 ld [%i4+%l6],%f0 ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
472
473 fmuld %f14,%f26,%f32 ! (4_0) div = x * y;
474
475 lda [%l2]0x82,%l6 ! (6_0) ux = ((int*)px)[0];
476 fsubd %f14,%f26,%f14 ! (4_0) xx = x - y;
477
478 fmuld %f40,%f46,%f26 ! (0_0) y0 *= dtmp1;
479 add %l2,stridex,%g5 ! px += stridex;
480 fpsub32 %f0,%f24,%f40 ! (2_0) y0 = vis_fpsub32(dtmp0, y0);
481
482 fmuld %f38,%f44,%f38 ! (1_0) y0 *= dtmp0;
483 and %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff;
484 lda [%l2]0x82,%f0 ! (6_0) ftmp0 = *px;
485 fpadd32 %f36,DC1,%f24 ! (5_0) y = vis_fpadd32(x,dconst1);
486
487 cmp %o7,%o4 ! (6_0) ax ? 0x39b89c55
488 bl,pn %icc,.update10 ! (6_0) if ( ax < 0x39b89c55 )
489 faddd DONE,%f32,%f32 ! (4_0) div += done;
490 .cont10:
491 fmuld %f28,%f40,%f42 ! (2_0) dtmp0 = div0 * y0;
492 cmp %o7,%o5 ! (6_0) ax ? 0x4c700518
493 bg,pn %icc,.update11 ! (6_0) if ( ax > 0x4c700518 )
494 nop
495 .cont11:
496 fmuld %f22,%f26,%f22 ! (0_0) xx *= y0;
497 mov %l6,%l2
498 std %f32,[%fp+tmp0] ! (4_0) i = ((unsigned long long*)&div)[0];
499 fstod %f0,%f10 ! (6_0) x = (double)ftmp0;
500
501 fmuld %f30,%f38,%f30 ! (1_0) dtmp1 = div0 * y0;
502 ldx [%fp+tmp1],%g1 ! (3_0) i = ((unsigned long long*)&div)[0];
503 fand %f24,DC2,%f26 ! (5_0) y = vis_fand(y,dconst2);
504
505 fand %f34,DC3,%f24 ! (3_0) y0 = vis_fand(div,dconst3);
506
507 fmuld %f22,%f22,%f50 ! (0_0) x2 = xx * xx;
508 srlx %g1,43,%g1 ! (3_0) i >>= 43;
509 fsubd DTWO,%f42,%f44 ! (2_0) dtmp0 = dtwo - dtmp0;
510
511 and %g1,508,%l6 ! (3_0) i &= 508;
512 mov %i3,%o7
513 fsubd DTWO,%f30,%f46 ! (1_0) dtmp1 = dtwo - dtmp1;
514
515 ld [%i4+%l6],%f0 ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
516
517 fmuld %f36,%f26,%f30 ! (5_0) div = x * y;
518 srl %o7,28,%g1 ! (0_0) ux >>= 28;
519 add %g5,stridex,%i3 ! px += stridex;
520
521 fmuld K2,%f50,%f4 ! (0_0) dtmp0 = K2 * x2;
522 and %o7,MASK_0x7fffffff,%o0 ! (0_0) ax = ux & 0x7fffffff;
523 lda [%g5]0x82,%l6 ! (7_0) ux = ((int*)px)[0];
524 fsubd %f36,%f26,%f36 ! (5_0) xx = x - y;
525
526 fmuld %f38,%f46,%f26 ! (1_0) y0 *= dtmp1;
527 add %o0,MASK_0x100000,%o0 ! (0_0) ax += 0x00100000;
528 and %g1,-8,%g1 ! (0_0) ux &= -8;
529 fpsub32 %f0,%f24,%f38 ! (3_0) y0 = vis_fpsub32(dtmp0, y0);
530
531 fmuld %f40,%f44,%f40 ! (2_0) y0 *= dtmp0;
532 and %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff;
533 lda [%g5]0x82,%f0 ! (7_0) ftmp0 = *px;
534 fpadd32 %f10,DC1,%f24 ! (6_0) y = vis_fpadd32(x,dconst1);
535
536 cmp %o7,%o4 ! (7_0) ax ? 0x39b89c55
537 bl,pn %icc,.update12 ! (7_0) if ( ax < 0x39b89c55 )
538 faddd DONE,%f30,%f30 ! (5_0) div += done;
539 .cont12:
540 fmuld %f34,%f38,%f42 ! (3_0) dtmp0 = div0 * y0;
541 cmp %o7,%o5 ! (7_0) ax ? 0x4c700518
542 bg,pn %icc,.update13 ! (7_0) if ( ax > 0x4c700518 )
543 faddd %f4,K1,%f4 ! (0_0) dtmp0 += K1;
544 .cont13:
545 fmuld %f20,%f26,%f20 ! (1_0) xx *= y0;
546 srl %o0,18,%o7 ! (0_0) ax >>= 18;
547 std %f30,[%fp+tmp1] ! (5_0) i = ((unsigned long long*)&div)[0];
548 fstod %f0,%f8 ! (7_0) x = (double)ftmp0;
549
550 fmuld %f28,%f40,%f28 ! (2_0) dtmp1 = div0 * y0;
551 and %o7,-8,%o7 ! (0_0) ux &= -8;
552 ldx [%fp+tmp0],%o0 ! (4_0) i = ((unsigned long long*)&div)[0];
553 fand %f24,DC2,%f26 ! (6_0) y = vis_fand(y,dconst2);
554
555 add %o7,%l7,%o7 ! (0_0) (char*)parr1 + ax;
556 mov %l6,%g5
557 ldd [%l0+%g1],%f48 ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux);
558
559 fmuld %f4,%f50,%f4 ! (0_0) dtmp0 *= x2;
560 srlx %o0,43,%o0 ! (4_0) i >>= 43;
561 ldd [%o7],%f0 ! (0_0) res = *(double*)((char*)parr1 + ax);
562 fand %f32,DC3,%f24 ! (4_0) y0 = vis_fand(div,dconst3);
563
564 fmuld %f20,%f20,%f50 ! (1_0) x2 = xx * xx;
565 and %o0,508,%l6 ! (4_0) i &= 508;
566 mov %l5,%o7
567 fsubd DTWO,%f42,%f44 ! (3_0) dtmp0 = dtwo - dtmp0;
568
569 fsubd DTWO,%f28,%f46 ! (2_0) dtmp1 = dtwo - dtmp1;
570
571 fmuld %f0,%f48,%f48 ! (0_0) res *= dtmp0;
572 srl %o7,28,%l5 ! (1_0) ux >>= 28;
573 ld [%i4+%l6],%f0 ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
574
575 fmuld %f10,%f26,%f28 ! (6_0) div = x * y;
576 faddd %f4,K0,%f42 ! (0_0) dtmp0 += K0;
577
578 subcc counter,8,counter
579 bneg,pn %icc,.tail
580 or %g0,%o1,%o0
581
582 add %fp,tmp0,%g1
583 lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0];
584
585 ba .main_loop
586 add %i3,stridex,%l5 ! px += stridex;
587
588 .align 16
589 .main_loop:
590 fsubd %f10,%f26,%f10 ! (6_1) xx = x - y;
591 and %o7,MASK_0x7fffffff,%o1 ! (1_1) ax = ux & 0x7fffffff;
592 st %f12,[%g1] ! (7_1) py[0] = ftmp0;
593 fmuld K2,%f50,%f4 ! (1_1) dtmp0 = K2 * x2;
594
595 fmuld %f40,%f46,%f26 ! (2_1) y0 *= dtmp1;
596 srl %o7,28,%o7 ! (1_0) ux >>= 28;
597 add %o1,MASK_0x100000,%g1 ! (1_1) ax += 0x00100000;
598 fpsub32 %f0,%f24,%f40 ! (4_1) y0 = vis_fpsub32(dtmp0, y0);
599
600 fmuld %f38,%f44,%f38 ! (3_1) y0 *= dtmp0;
601 and %l6,MASK_0x7fffffff,%o1 ! (0_0) ax = ux & 0x7fffffff;
602 lda [%i3]0x82,%f0 ! (0_0) ftmp0 = *px;
603 fpadd32 %f8,DC1,%f24 ! (7_1) y = vis_fpadd32(x,dconst1);
604
605 fmuld %f42,%f22,%f44 ! (0_1) dtmp0 *= xx;
606 cmp %o1,%o4 ! (0_0) ax ? 0x39b89c55
607 bl,pn %icc,.update14 ! (0_0) if ( ax < 0x39b89c55 )
608 faddd DONE,%f28,%f28 ! (6_1) div += done;
609 .cont14:
610 fmuld %f32,%f40,%f42 ! (4_1) dtmp0 = div0 * y0;
611 cmp %o1,%o5 ! (0_0) ax ? 0x4c700518
612 bg,pn %icc,.update15 ! (0_0) if ( ax > 0x4c700518 )
613 faddd %f4,K1,%f4 ! (1_1) dtmp0 += K1;
614 .cont15:
615 fmuld %f18,%f26,%f18 ! (2_1) xx *= y0;
616 srl %g1,18,%o1 ! (1_1) ax >>= 18;
617 std %f28,[%fp+tmp0] ! (6_1) i = ((unsigned long long*)&div)[0];
618 fstod %f0,%f22 ! (0_0) ftmp0 = *px;
619
620 fmuld %f34,%f38,%f34 ! (3_1) dtmp1 = div0 * y0;
621 and %o1,-8,%o1 ! (1_1) ax &= -8;
622 ldx [%fp+tmp1],%g1 ! (5_1) i = ((unsigned long long*)&div)[0];
623 fand %f24,DC2,%f26 ! (7_1) y = vis_fand(y,dconst2);
624
625 ldd [%o1+%l7],%f0 ! (1_1) res = *(double*)((char*)parr1 + ax);
626 and %o7,-8,%o7 ! (1_1) ux &= -8;
627 mov %l6,%i3
628 faddd %f48,%f44,%f12 ! (0_1) res += dtmp0;
629
630 fmuld %f4,%f50,%f4 ! (1_1) dtmp0 *= x2;
631 nop
632 ldd [%l0+%o7],%f48 ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux);
633 fand %f30,DC3,%f24 ! (5_1) y0 = vis_fand(div,dconst3);
634
635 fmuld %f18,%f18,%f50 ! (2_1) x2 = xx * xx;
636 srlx %g1,43,%g1 ! (5_1) i >>= 43;
637 mov %l4,%o7
638 fsubd DTWO,%f42,%f44 ! (4_1) dtmp0 = dtwo - dtmp0;
639
640 and %g1,508,%l6 ! (5_1) i &= 508;
641 nop
642 bn,pn %icc,.exit
643 fsubd DTWO,%f34,%f46 ! (3_1) dtmp1 = dtwo - dtmp1;
644
645 fmuld %f0,%f48,%f48 ! (1_1) res *= dtmp0;
646 add %o0,stridey,%g1 ! py += stridey;
647 ld [%i4+%l6],%f0 ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
648 fdtos %f12,%f12 ! (0_1) ftmp0 = (float)res;
649
650 fmuld %f8,%f26,%f34 ! (7_1) div = x * y;
651 srl %o7,28,%o1 ! (2_1) ux >>= 28;
652 lda [%l5]0x82,%l6 ! (1_0) ux = ((int*)px)[0];
653 faddd %f4,K0,%f42 ! (1_1) dtmp0 += K0;
654
655 fmuld K2,%f50,%f4 ! (2_1) dtmp0 = K2 * x2;
656 and %o7,MASK_0x7fffffff,%o7 ! (2_1) ax = ux & 0x7fffffff;
657 st %f12,[%o0] ! (0_1) py[0] = ftmp0;
658 fsubd %f8,%f26,%f8 ! (7_1) xx = x - y;
659
660 fmuld %f38,%f46,%f26 ! (3_1) y0 *= dtmp1;
661 add %l5,stridex,%l4 ! px += stridex;
662 add %o7,MASK_0x100000,%o0 ! (2_1) ax += 0x00100000;
663 fpsub32 %f0,%f24,%f38 ! (5_1) y0 = vis_fpsub32(dtmp0, y0);
664
665 fmuld %f40,%f44,%f40 ! (4_1) y0 *= dtmp0;
666 and %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff;
667 lda [%l5]0x82,%f0 ! (1_0) ftmp0 = *px;
668 fpadd32 %f22,DC1,%f24 ! (0_0) y = vis_fpadd32(x,dconst1);
669
670 fmuld %f42,%f20,%f44 ! (1_1) dtmp0 *= xx;
671 cmp %o7,%o4 ! (1_0) ax ? 0x39b89c55
672 bl,pn %icc,.update16 ! (1_0) if ( ax < 0x39b89c55 )
673 faddd DONE,%f34,%f34 ! (7_1) div += done;
674 .cont16:
675 fmuld %f30,%f38,%f42 ! (5_1) dtmp0 = div0 * y0;
676 cmp %o7,%o5 ! (1_0) ax ? 0x4c700518
677 bg,pn %icc,.update17 ! (1_0) if ( ax > 0x4c700518 )
678 faddd %f4,K1,%f4 ! (2_1) dtmp0 += K1;
679 .cont17:
680 fmuld %f16,%f26,%f16 ! (3_1) xx *= y0;
681 srl %o0,18,%o7 ! (2_1) ax >>= 18;
682 std %f34,[%fp+tmp1] ! (7_1) i = ((unsigned long long*)&div)[0];
683 fstod %f0,%f20 ! (1_0) x = (double)ftmp0;
684
685 fmuld %f32,%f40,%f32 ! (4_1) dtmp1 = div0 * y0;
686 ldx [%fp+tmp0],%o0 ! (6_1) i = ((unsigned long long*)&div)[0];
687 and %o1,-8,%o1 ! (2_1) ux &= -8;
688 fand %f24,DC2,%f26 ! (0_0) y = vis_fand(y,dconst2);
689
690 faddd %f48,%f44,%f12 ! (1_1) res += dtmp0;
691 and %o7,-8,%o7 ! (2_1) ax &= -8;
692 ldd [%l0+%o1],%f48 ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux);
693 bn,pn %icc,.exit
694
695 ldd [%o7+%l7],%f0 ! (2_1) res = *(double*)((char*)parr1 + ax);
696 mov %l6,%l5
697 fmuld %f4,%f50,%f4 ! (2_1) dtmp0 *= x2;
698 fand %f28,DC3,%f24 ! (6_1) y0 = vis_fand(div,dconst3);
699
700 fmuld %f16,%f16,%f50 ! (3_1) x2 = xx * xx;
701 srlx %o0,43,%o0 ! (6_1) i >>= 43;
702 mov %l3,%o7
703 fsubd DTWO,%f42,%f44 ! (5_1) dtmp0 = dtwo - dtmp0;
704
705 and %o0,508,%l6 ! (6_1) i &= 508;
706 add %l4,stridex,%l3 ! px += stridex;
707 bn,pn %icc,.exit
708 fsubd DTWO,%f32,%f46 ! (4_1) dtmp1 = dtwo - dtmp1;
709
710 fmuld %f0,%f48,%f48 ! (2_1) res *= dtmp0;
711 add %g1,stridey,%o0 ! py += stridey;
712 ld [%i4+%l6],%f0 ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
713 fdtos %f12,%f12 ! (1_1) ftmp0 = (float)res;
714
715 fmuld %f22,%f26,%f32 ! (0_0) div = x * y;
716 srl %o7,28,%o1 ! (3_1) ux >>= 28;
717 lda [%l4]0x82,%l6 ! (2_0) ux = ((int*)px)[0];
718 faddd %f4,K0,%f42 ! (2_1) dtmp0 += K0;
719
720 fmuld K2,%f50,%f4 ! (3_1) dtmp0 = K2 * x2;
721 and %o7,MASK_0x7fffffff,%o7 ! (3_1) ax = ux & 0x7fffffff;
722 st %f12,[%g1] ! (1_1) py[0] = ftmp0;
723 fsubd %f22,%f26,%f22 ! (0_0) xx = x - y;
724
725 fmuld %f40,%f46,%f26 ! (4_1) y0 *= dtmp1;
726 add %o7,MASK_0x100000,%g1 ! (3_1) ax += 0x00100000;
727 and %o1,-8,%o1 ! (3_1) ux &= -8;
728 fpsub32 %f0,%f24,%f40 ! (6_1) y0 = vis_fpsub32(dtmp0, y0);
729
730 fmuld %f38,%f44,%f38 ! (5_1) y0 *= dtmp0;
731 and %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff;
732 lda [%l4]0x82,%f0 ! (2_0) ftmp0 = *px;
733 fpadd32 %f20,DC1,%f24 ! (1_0) y = vis_fpadd32(x,dconst1);
734
735 fmuld %f42,%f18,%f44 ! (2_1) dtmp0 *= xx;
736 cmp %o7,%o4 ! (2_0) ax ? 0x39b89c55
737 bl,pn %icc,.update18 ! (2_0) if ( ax < 0x39b89c55 )
738 faddd DONE,%f32,%f32 ! (0_0) div += done;
739 .cont18:
740 fmuld %f28,%f40,%f42 ! (6_1) dtmp0 = div0 * y0;
741 cmp %o7,%o5 ! (2_0) ax ? 0x4c700518
742 bg,pn %icc,.update19 ! (2_0) if ( ax > 0x4c700518 )
743 faddd %f4,K1,%f4 ! (3_1) dtmp0 += K1;
744 .cont19:
745 fmuld %f14,%f26,%f14 ! (4_1) xx *= y0;
746 srl %g1,18,%o7 ! (3_1) ax >>= 18;
747 std %f32,[%fp+tmp0] ! (0_0) i = ((unsigned long long*)&div)[0];
748 fstod %f0,%f18 ! (2_0) x = (double)ftmp0;
749
750 fmuld %f30,%f38,%f30 ! (5_1) dtmp1 = div0 * y0;
751 and %o7,-8,%o7 ! (3_1) ax &= -8;
752 ldx [%fp+tmp1],%g1 ! (7_1) i = ((unsigned long long*)&div)[0];
753 fand %f24,DC2,%f26 ! (1_0) y = vis_fand(y,dconst2);
754
755 faddd %f48,%f44,%f12 ! (2_1) res += dtmp0;
756 mov %l6,%l4
757 ldd [%l0+%o1],%f48 ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux);
758 bn,pn %icc,.exit
759
760 fmuld %f4,%f50,%f4 ! (3_1) dtmp0 *= x2;
761 ldd [%o7+%l7],%f0 ! (3_1) res = *(double*)((char*)parr1 + ax)
762 nop
763 fand %f34,DC3,%f24 ! (7_1) y0 = vis_fand(div,dconst3);
764
765 fmuld %f14,%f14,%f50 ! (4_1) x2 = xx * xx;
766 srlx %g1,43,%g1 ! (7_1) i >>= 43;
767 mov %i0,%o7
768 fsubd DTWO,%f42,%f44 ! (6_1) dtmp0 = dtwo - dtmp0;
769
770 and %g1,508,%l6 ! (7_1) i &= 508;
771 add %l3,stridex,%i0 ! px += stridex;
772 bn,pn %icc,.exit
773 fsubd DTWO,%f30,%f46 ! (5_1) dtmp1 = dtwo - dtmp1;
774
775 fmuld %f0,%f48,%f48 ! (3_1) res *= dtmp0;
776 add %o0,stridey,%g1 ! py += stridey;
777 ld [%i4+%l6],%f0 ! (7_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
778 fdtos %f12,%f12 ! (2_1) ftmp0 = (float)res;
779
780 fmuld %f20,%f26,%f30 ! (1_0) div = x * y;
781 srl %o7,28,%o1 ! (4_1) ux >>= 28;
782 lda [%l3]0x82,%l6 ! (3_0) ux = ((int*)px)[0];
783 faddd %f4,K0,%f42 ! (3_1) dtmp0 += K0;
784
785 fmuld K2,%f50,%f4 ! (4_1) dtmp0 = K2 * x2;
786 and %o7,MASK_0x7fffffff,%o7 ! (4_1) ax = ux & 0x7fffffff;
787 st %f12,[%o0] ! (2_1) py[0] = ftmp0;
788 fsubd %f20,%f26,%f20 ! (1_0) xx = x - y;
789
790 fmuld %f38,%f46,%f26 ! (5_1) y0 *= dtmp1;
791 add %o7,MASK_0x100000,%o0 ! (4_1) ax += 0x00100000;
792 and %o1,-8,%o1 ! (4_1) ux &= -8;
793 fpsub32 %f0,%f24,%f38 ! (7_1) y0 = vis_fpsub32(dtmp0, y0);
794
795 fmuld %f40,%f44,%f40 ! (6_1) y0 *= dtmp0;
796 and %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff;
797 lda [%l3]0x82,%f0 ! (3_0) ftmp0 = *px;
798 fpadd32 %f18,DC1,%f24 ! (2_0) y = vis_fpadd32(x,dconst1);
799
800 fmuld %f42,%f16,%f44 ! (3_1) dtmp0 *= xx;
801 cmp %o7,%o4 ! (3_0) ax ? 0x39b89c55
802 bl,pn %icc,.update20 ! (3_0) if ( ax < 0x39b89c55 )
803 faddd DONE,%f30,%f30 ! (1_0) div += done;
804 .cont20:
805 fmuld %f34,%f38,%f42 ! (7_1) dtmp0 = div0 * y0;
806 cmp %o7,%o5 ! (3_0) ax ? 0x4c700518
807 bg,pn %icc,.update21 ! (3_0) if ( ax > 0x4c700518 )
808 faddd %f4,K1,%f4 ! (4_1) dtmp0 += K1;
809 .cont21:
810 fmuld %f36,%f26,%f36 ! (5_1) xx *= y0;
811 srl %o0,18,%o7 ! (4_1) ax >>= 18;
812 std %f30,[%fp+tmp1] ! (1_0) i = ((unsigned long long*)&div)[0];
813 fstod %f0,%f16 ! (3_0) x = (double)ftmp0;
814
815 fmuld %f28,%f40,%f28 ! (6_1) dtmp1 = div0 * y0;
816 and %o7,-8,%o7 ! (4_1) ax &= -8;
817 ldx [%fp+tmp0],%o0 ! (0_0) i = ((unsigned long long*)&div)[0];
818 fand %f24,DC2,%f26 ! (2_0) y = vis_fand(y,dconst2);
819
820 faddd %f48,%f44,%f12 ! (3_1) res += dtmp0;
821 nop
822 ldd [%l0+%o1],%f48 ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux);
823 bn,pn %icc,.exit
824
825 ldd [%o7+%l7],%f0 ! (4_1) res = *(double*)((char*)parr1 + ax);
826 mov %l6,%l3
827 fmuld %f4,%f50,%f4 ! (4_1) dtmp0 *= x2;
828 fand %f32,DC3,%f24 ! (0_0) y0 = vis_fand(div,dconst3);
829
830 fmuld %f36,%f36,%f50 ! (5_1) x2 = xx * xx;
831 srlx %o0,43,%o0 ! (0_0) i >>= 43;
832 mov %i2,%o7
833 fsubd DTWO,%f42,%f44 ! (7_1) dtmp0 = dtwo - dtmp0;
834
835 and %o0,508,%l6 ! (0_0) i &= 508;
836 add %i0,stridex,%i2 ! px += stridex;
837 bn,pn %icc,.exit
838 fsubd DTWO,%f28,%f46 ! (6_1) dtmp1 = dtwo - dtmp1;
839
840 fmuld %f0,%f48,%f48 ! (4_1) res *= dtmp0;
841 add %g1,stridey,%o0 ! py += stridey;
842 ld [%i4+%l6],%f0 ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
843 fdtos %f12,%f12 ! (3_1) ftmp0 = (float)res;
844
845 fmuld %f18,%f26,%f28 ! (2_0) div = x * y;
846 srl %o7,28,%o1 ! (5_1) ux >>= 28;
847 lda [%i0]0x82,%l6 ! (4_0) ux = ((int*)px)[0];
848 faddd %f4,K0,%f42 ! (4_1) dtmp0 += K0;
849
850 fmuld K2,%f50,%f4 ! (5_1) dtmp0 = K2 * x2;
851 and %o7,MASK_0x7fffffff,%o7 ! (5_1) ax = ux & 0x7fffffff;
852 st %f12,[%g1] ! (3_1) py[0] = ftmp0;
853 fsubd %f18,%f26,%f18 ! (2_0) xx = x - y;
854
855 fmuld %f40,%f46,%f26 ! (6_1) y0 *= dtmp1;
856 add %o7,MASK_0x100000,%g1 ! (5_1) ax += 0x00100000;
857 and %o1,-8,%o1 ! (5_1) ux &= -8;
858 fpsub32 %f0,%f24,%f40 ! (0_0) y0 = vis_fpsub32(dtmp0, y0);
859
860 fmuld %f38,%f44,%f38 ! (7_1) y0 *= dtmp0;
861 and %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff;
862 lda [%i0]0x82,%f0 ! (4_0) ftmp0 = *px;
863 fpadd32 %f16,DC1,%f24 ! (3_0) y = vis_fpadd32(x,dconst1);
864
865 fmuld %f42,%f14,%f44 ! (4_1) dtmp0 *= xx;
866 cmp %o7,%o4 ! (4_0) ax ? 0x39b89c55
867 bl,pn %icc,.update22 ! (4_0) if ( ax < 0x39b89c55 )
868 faddd DONE,%f28,%f28 ! (2_0) div += done;
869 .cont22:
870 fmuld %f32,%f40,%f42 ! (0_0) dtmp0 = div0 * y0;
871 cmp %o7,%o5 ! (4_0) ax ? 0x4c700518
872 bg,pn %icc,.update23 ! (4_0) if ( ax > 0x4c700518 )
873 faddd %f4,K1,%f4 ! (5_1) dtmp0 += K1;
874 .cont23:
875 fmuld %f10,%f26,%f10 ! (6_1) xx *= y0;
876 srl %g1,18,%o7 ! (5_1) ax >>= 18;
877 std %f28,[%fp+tmp0] ! (2_0) i = ((unsigned long long*)&div)[0];
878 fstod %f0,%f14 ! (4_0) x = (double)ftmp0;
879
880 fmuld %f34,%f38,%f34 ! (7_1) dtmp1 = div0 * y0;
881 and %o7,-8,%o7 ! (5_1) ax &= -8;
882 ldx [%fp+tmp1],%g1 ! (1_0) i = ((unsigned long long*)&div)[0];
883 fand %f24,DC2,%f26 ! (3_0) y = vis_fand(y,dconst2);
884
885 faddd %f48,%f44,%f12 ! (4_1) res += dtmp0;
886 mov %l6,%i0
887 ldd [%l0+%o1],%f48 ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux);
888 bn,pn %icc,.exit
889
890 ldd [%o7+%l7],%f0 ! (5_1) res = *(double*)((char*)parr1 + ax);
891 nop
892 fmuld %f4,%f50,%f4 ! (5_1) dtmp0 *= x2;
893 fand %f30,DC3,%f24 ! (1_0) y0 = vis_fand(div,dconst3);
894
895 fmuld %f10,%f10,%f50 ! (6_1) x2 = xx * xx;
896 srlx %g1,43,%g1 ! (1_0) i >>= 43;
897 mov %l2,%o7
898 fsubd DTWO,%f42,%f44 ! (0_0) dtmp0 = dtwo - dtmp0;
899
900 and %g1,508,%l6 ! (1_0) i &= 508;
901 add %i2,stridex,%l2 ! px += stridex;
902 bn,pn %icc,.exit
903 fsubd DTWO,%f34,%f46 ! (7_1) dtmp1 = dtwo - dtmp1;
904
905 fmuld %f0,%f48,%f48 ! (5_1) res *= dtmp0;
906 add %o0,stridey,%g1 ! py += stridey;
907 ld [%i4+%l6],%f0 ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
908 fdtos %f12,%f12 ! (4_1) ftmp0 = (float)res;
909
910 fmuld %f16,%f26,%f34 ! (3_0) div = x * y;
911 srl %o7,28,%o1 ! (6_1) ux >>= 28;
912 lda [%i2]0x82,%l6 ! (5_0) ux = ((int*)px)[0];
913 faddd %f4,K0,%f42 ! (5_1) dtmp0 += K0;
914
915 fmuld K2,%f50,%f4 ! (6_1) dtmp0 = K2 * x2;
916 and %o7,MASK_0x7fffffff,%o7 ! (6_1) ax = ux & 0x7fffffff;
917 st %f12,[%o0] ! (4_1) py[0] = ftmp0;
918 fsubd %f16,%f26,%f16 ! (3_0) xx = x - y;
919
920 fmuld %f38,%f46,%f26 ! (7_1) y0 *= dtmp1;
921 add %o7,MASK_0x100000,%o0 ! (6_1) ax += 0x00100000;
922 and %o1,-8,%o1 ! (6_1) ux &= -8;
923 fpsub32 %f0,%f24,%f38 ! (1_0) y0 = vis_fpsub32(dtmp0, y0);
924
925 fmuld %f40,%f44,%f40 ! (0_0) y0 *= dtmp0;
926 and %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff;
927 lda [%i2]0x82,%f0 ! (5_0) ftmp0 = *px;
928 fpadd32 %f14,DC1,%f24 ! (4_0) y = vis_fpadd32(x,dconst1);
929
930 fmuld %f42,%f36,%f44 ! (5_1) dtmp0 *= xx;
931 cmp %o7,%o4 ! (5_0) ax ? 0x39b89c55
932 bl,pn %icc,.update24 ! (5_0) if ( ax < 0x39b89c55 )
933 faddd DONE,%f34,%f34 ! (3_0) div += done;
934 .cont24:
935 fmuld %f30,%f38,%f42 ! (1_0) dtmp0 = div0 * y0;
936 cmp %o7,%o5 ! (5_0) ax ? 0x4c700518
937 bg,pn %icc,.update25 ! (5_0) if ( ax > 0x4c700518 )
938 faddd %f4,K1,%f4 ! (6_1) dtmp0 += K1;
939 .cont25:
940 fmuld %f8,%f26,%f8 ! (7_1) xx *= y0;
941 srl %o0,18,%o7 ! (6_1) ax >>= 18;
942 std %f34,[%fp+tmp1] ! (3_0) i = ((unsigned long long*)&div)[0];
943 fstod %f0,%f36 ! (5_0) x = (double)ftmp0;
944
945 fmuld %f32,%f40,%f32 ! (0_0) dtmp1 = div0 * y0;
946 and %o7,-8,%o7 ! (6_1) ax &= -8;
947 ldx [%fp+tmp0],%o0 ! (2_0) i = ((unsigned long long*)&div)[0];
948 fand %f24,DC2,%f26 ! (4_0) y = vis_fand(y,dconst2);
949
950 faddd %f48,%f44,%f12 ! (5_1) res += dtmp0;
951 mov %l6,%i2
952 ldd [%l0+%o1],%f48 ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux);
953 bn,pn %icc,.exit
954
955 ldd [%o7+%l7],%f0 ! (6_1) res = *(double*)((char*)parr1 + ax);
956 nop
957 fmuld %f4,%f50,%f4 ! (6_1) dtmp0 *= x2;
958 fand %f28,DC3,%f24 ! (2_0) y0 = vis_fand(div,dconst3);
959
960 fmuld %f8,%f8,%f50 ! (7_1) x2 = xx * xx;
961 srlx %o0,43,%o0 ! (2_0) i >>= 43;
962 mov %g5,%o7
963 fsubd DTWO,%f42,%f44 ! (1_0) dtmp0 = dtwo - dtmp0;
964
965 and %o0,508,%l6 ! (2_0) i &= 508;
966 add %l2,stridex,%g5 ! px += stridex;
967 bn,pn %icc,.exit
968 fsubd DTWO,%f32,%f46 ! (0_0) dtmp1 = dtwo - dtmp1;
969
970 fmuld %f0,%f48,%f48 ! (6_1) res *= dtmp0;
971 add %g1,stridey,%o0 ! py += stridey;
972 ld [%i4+%l6],%f0 ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
973 fdtos %f12,%f12 ! (5_1) ftmp0 = (float)res;
974
975 fmuld %f14,%f26,%f32 ! (4_0) div = x * y;
976 srl %o7,28,%o1 ! (7_1) ux >>= 28;
977 lda [%l2]0x82,%l6 ! (6_0) ux = ((int*)px)[0];
978 faddd %f4,K0,%f42 ! (6_1) dtmp0 += K0;
979
980 fmuld K2,%f50,%f4 ! (7_1) dtmp0 = K2 * x2;
981 and %o7,MASK_0x7fffffff,%o7 ! (7_1) ax = ux & 0x7fffffff;
982 st %f12,[%g1] ! (5_1) py[0] = ftmp0;
983 fsubd %f14,%f26,%f14 ! (4_0) xx = x - y;
984
985 fmuld %f40,%f46,%f26 ! (0_0) y0 *= dtmp1;
986 add %o7,MASK_0x100000,%g1 ! (7_1) ax += 0x00100000;
987 and %o1,-8,%o1 ! (7_1) ux &= -8;
988 fpsub32 %f0,%f24,%f40 ! (2_0) y0 = vis_fpsub32(dtmp0, y0);
989
990 fmuld %f38,%f44,%f38 ! (1_0) y0 *= dtmp0;
991 and %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff;
992 lda [%l2]0x82,%f0 ! (6_0) ftmp0 = *px;
993 fpadd32 %f36,DC1,%f24 ! (5_0) y = vis_fpadd32(x,dconst1);
994
995 fmuld %f42,%f10,%f44 ! (6_1) dtmp0 *= xx;
996 cmp %o7,%o4 ! (6_0) ax ? 0x39b89c55
997 bl,pn %icc,.update26 ! (6_0) if ( ax < 0x39b89c55 )
998 faddd DONE,%f32,%f32 ! (4_0) div += done;
999 .cont26:
1000 fmuld %f28,%f40,%f42 ! (2_0) dtmp0 = div0 * y0;
1001 cmp %o7,%o5 ! (6_0) ax ? 0x4c700518
1002 bg,pn %icc,.update27 ! (6_0) if ( ax > 0x4c700518 )
1003 faddd %f4,K1,%f4 ! (7_1) dtmp0 += K1;
1004 .cont27:
1005 fmuld %f22,%f26,%f22 ! (0_0) xx *= y0;
1006 srl %g1,18,%o7 ! (7_1) ax >>= 18;
1007 std %f32,[%fp+tmp0] ! (4_0) i = ((unsigned long long*)&div)[0];
1008 fstod %f0,%f10 ! (6_0) x = (double)ftmp0;
1009
1010 fmuld %f30,%f38,%f30 ! (1_0) dtmp1 = div0 * y0;
1011 and %o7,-8,%o7 ! (7_1) ax &= -8;
1012 ldx [%fp+tmp1],%g1 ! (3_0) i = ((unsigned long long*)&div)[0];
1013 fand %f24,DC2,%f26 ! (5_0) y = vis_fand(y,dconst2);
1014
1015 faddd %f48,%f44,%f12 ! (6_1) res += dtmp0;
1016 mov %l6,%l2
1017 ldd [%l0+%o1],%f48 ! (7_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1018 bn,pn %icc,.exit
1019
1020 ldd [%o7+%l7],%f0 ! (7_1) res = *(double*)((char*)parr1 + ax);
1021 nop
1022 fmuld %f4,%f50,%f4 ! (7_1) dtmp0 *= x2;
1023 fand %f34,DC3,%f24 ! (3_0) y0 = vis_fand(div,dconst3);
1024
1025 fmuld %f22,%f22,%f50 ! (0_0) x2 = xx * xx;
1026 srlx %g1,43,%g1 ! (3_0) i >>= 43;
1027 mov %i3,%o7
1028 fsubd DTWO,%f42,%f44 ! (2_0) dtmp0 = dtwo - dtmp0;
1029
1030 and %g1,508,%l6 ! (3_0) i &= 508;
1031 add %g5,stridex,%i3 ! px += stridex;
1032 bn,pn %icc,.exit
1033 fsubd DTWO,%f30,%f46 ! (1_0) dtmp1 = dtwo - dtmp1;
1034
1035 fmuld %f0,%f48,%f48 ! (7_1) res *= dtmp0;
1036 add %o0,stridey,%g1 ! py += stridey;
1037 ld [%i4+%l6],%f0 ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
1038 fdtos %f12,%f12 ! (6_1) ftmp0 = (float)res;
1039
1040 fmuld %f36,%f26,%f30 ! (5_0) div = x * y;
1041 srl %o7,28,%o1 ! (0_0) ux >>= 28;
1042 lda [%g5]0x82,%l6 ! (7_0) ux = ((int*)px)[0];
1043 faddd %f4,K0,%f42 ! (7_1) dtmp0 += K0;
1044
1045 fmuld K2,%f50,%f4 ! (0_0) dtmp0 = K2 * x2;
1046 and %o7,MASK_0x7fffffff,%o7 ! (0_0) ax = ux & 0x7fffffff;
1047 st %f12,[%o0] ! (6_1) py[0] = ftmp0;
1048 fsubd %f36,%f26,%f36 ! (5_0) xx = x - y;
1049
1050 fmuld %f38,%f46,%f26 ! (1_0) y0 *= dtmp1;
1051 add %o7,MASK_0x100000,%o0 ! (0_0) ax += 0x00100000;
1052 and %o1,-8,%o1 ! (0_0) ux &= -8;
1053 fpsub32 %f0,%f24,%f38 ! (3_0) y0 = vis_fpsub32(dtmp0, y0);
1054
1055 fmuld %f40,%f44,%f40 ! (2_0) y0 *= dtmp0;
1056 and %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff;
1057 lda [%g5]0x82,%f0 ! (7_0) ftmp0 = *px;
1058 fpadd32 %f10,DC1,%f24 ! (6_0) y = vis_fpadd32(x,dconst1);
1059
1060 fmuld %f42,%f8,%f44 ! (7_1) dtmp0 *= xx;
1061 cmp %o7,%o4 ! (7_0) ax ? 0x39b89c55
1062 bl,pn %icc,.update28 ! (7_0) if ( ax < 0x39b89c55 )
1063 faddd DONE,%f30,%f30 ! (5_0) div += done;
1064 .cont28:
1065 fmuld %f34,%f38,%f42 ! (3_0) dtmp0 = div0 * y0;
1066 cmp %o7,%o5 ! (7_0) ax ? 0x4c700518
1067 bg,pn %icc,.update29 ! (7_0) if ( ax > 0x4c700518 )
1068 faddd %f4,K1,%f4 ! (0_0) dtmp0 += K1;
1069 .cont29:
1070 fmuld %f20,%f26,%f20 ! (1_0) xx *= y0;
1071 srl %o0,18,%o7 ! (0_0) ax >>= 18;
1072 std %f30,[%fp+tmp1] ! (5_0) i = ((unsigned long long*)&div)[0];
1073 fstod %f0,%f8 ! (7_0) x = (double)ftmp0;
1074
1075 fmuld %f28,%f40,%f28 ! (2_0) dtmp1 = div0 * y0;
1076 and %o7,-8,%o7 ! (0_0) ux &= -8;
1077 ldx [%fp+tmp0],%o0 ! (4_0) i = ((unsigned long long*)&div)[0];
1078 fand %f24,DC2,%f26 ! (6_0) y = vis_fand(y,dconst2);
1079
1080 faddd %f48,%f44,%f12 ! (7_1) res += dtmp0;
1081 subcc counter,8,counter
1082 ldd [%l0+%o1],%f48 ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux);
1083 bn,pn %icc,.exit
1084
1085 fmuld %f4,%f50,%f4 ! (0_0) dtmp0 *= x2;
1086 mov %l6,%g5
1087 ldd [%o7+%l7],%f0 ! (0_0) res = *(double*)((char*)parr1 + ax);
1088 fand %f32,DC3,%f24 ! (4_0) y0 = vis_fand(div,dconst3);
1089
1090 fmuld %f20,%f20,%f50 ! (1_0) x2 = xx * xx;
1091 srlx %o0,43,%l6 ! (4_0) i >>= 43;
1092 mov %l5,%o7
1093 fsubd DTWO,%f42,%f44 ! (3_0) dtmp0 = dtwo - dtmp0;
1094
1095 add %g1,stridey,%o0 ! py += stridey;
1096 and %l6,508,%l6 ! (4_0) i &= 508;
1097 bn,pn %icc,.exit
1098 fsubd DTWO,%f28,%f46 ! (2_0) dtmp1 = dtwo - dtmp1;
1099
1100 fmuld %f0,%f48,%f48 ! (0_0) res *= dtmp0;
1101 ld [%i4+%l6],%f0 ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
1102 add %i3,stridex,%l5 ! px += stridex;
1103 fdtos %f12,%f12 ! (7_1) ftmp0 = (float)res;
1104
1105 lda [%i3]0x82,%l6 ! (0_0) ux = ((int*)px)[0];
1106 fmuld %f10,%f26,%f28 ! (6_0) div = x * y;
1107 bpos,pt %icc,.main_loop
1108 faddd %f4,K0,%f42 ! (0_0) dtmp0 += K0;
1109
1110 srl %o7,28,%l5 ! (1_0) ux >>= 28;
1111 st %f12,[%g1] ! (7_1) py[0] = ftmp0;
1112
1113 .tail:
1114 addcc counter,7,counter
1115 bneg,pn %icc,.begin
1116 or %g0,%o0,%o1
1117
1118 fsubd %f10,%f26,%f10 ! (6_1) xx = x - y;
1119 and %o7,MASK_0x7fffffff,%g1 ! (1_1) ax = ux & 0x7fffffff;
1120 fmuld K2,%f50,%f4 ! (1_1) dtmp0 = K2 * x2;
1121
1122 fmuld %f40,%f46,%f26 ! (2_1) y0 *= dtmp1;
1123 add %g1,MASK_0x100000,%g1 ! (1_1) ax += 0x00100000;
1124 and %l5,-8,%l5 ! (1_1) ux &= -8;
1125 fpsub32 %f0,%f24,%f40 ! (4_1) y0 = vis_fpsub32(dtmp0, y0);
1126
1127 fmuld %f38,%f44,%f38 ! (3_1) y0 *= dtmp0;
1128
1129 fmuld %f42,%f22,%f44 ! (0_1) dtmp0 *= xx;
1130 faddd DONE,%f28,%f28 ! (6_1) div += done;
1131
1132 fmuld %f32,%f40,%f42 ! (4_1) dtmp0 = div0 * y0;
1133 faddd %f4,K1,%f4 ! (1_1) dtmp0 += K1;
1134
1135 fmuld %f18,%f26,%f18 ! (2_1) xx *= y0;
1136 srl %g1,18,%o7 ! (1_1) ax >>= 18;
1137 std %f28,[%fp+tmp0] ! (6_1) i = ((unsigned long long*)&div)[0];
1138
1139 fmuld %f34,%f38,%f34 ! (3_1) dtmp1 = div0 * y0;
1140 and %o7,-8,%o7 ! (1_1) ax &= -8;
1141 ldx [%fp+tmp1],%g1 ! (5_1) i = ((unsigned long long*)&div)[0];
1142
1143 faddd %f48,%f44,%f12 ! (0_1) res += dtmp0;
1144 add %o7,%l7,%o7 ! (1_1) (char*)parr1 + ax;
1145 ldd [%l0+%l5],%f48 ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1146
1147 fmuld %f4,%f50,%f4 ! (1_1) dtmp0 *= x2;
1148 fand %f30,DC3,%f24 ! (5_1) y0 = vis_fand(div,dconst3);
1149 ldd [%o7],%f0 ! (1_1) res = *(double*)((char*)parr1 + ax);
1150
1151 fmuld %f18,%f18,%f50 ! (2_1) x2 = xx * xx;
1152 fsubd DTWO,%f42,%f44 ! (4_1) dtmp0 = dtwo - dtmp0;
1153 srlx %g1,43,%g1 ! (5_1) i >>= 43;
1154
1155 and %g1,508,%l6 ! (5_1) i &= 508;
1156 mov %l4,%o7
1157 fsubd DTWO,%f34,%f46 ! (3_1) dtmp1 = dtwo - dtmp1;
1158
1159 fmuld %f0,%f48,%f48 ! (1_1) res *= dtmp0;
1160 add %o0,stridey,%g1 ! py += stridey;
1161 ld [%i4+%l6],%f0 ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
1162 fdtos %f12,%f12 ! (0_1) ftmp0 = (float)res;
1163
1164 srl %o7,28,%l4 ! (2_1) ux >>= 28;
1165 st %f12,[%o0] ! (0_1) py[0] = ftmp0;
1166 faddd %f4,K0,%f42 ! (1_1) dtmp0 += K0;
1167
1168 subcc counter,1,counter
1169 bneg,pn %icc,.begin
1170 or %g0,%g1,%o1
1171
1172 fmuld K2,%f50,%f4 ! (2_1) dtmp0 = K2 * x2;
1173 and %o7,MASK_0x7fffffff,%o0 ! (2_1) ax = ux & 0x7fffffff;
1174
1175 fmuld %f38,%f46,%f26 ! (3_1) y0 *= dtmp1;
1176 add %o0,MASK_0x100000,%o0 ! (2_1) ax += 0x00100000;
1177 and %l4,-8,%l4 ! (2_1) ux &= -8;
1178 fpsub32 %f0,%f24,%f38 ! (5_1) y0 = vis_fpsub32(dtmp0, y0);
1179
1180 fmuld %f40,%f44,%f40 ! (4_1) y0 *= dtmp0;
1181
1182 fmuld %f42,%f20,%f44 ! (1_1) dtmp0 *= xx;
1183
1184 fmuld %f30,%f38,%f42 ! (5_1) dtmp0 = div0 * y0;
1185 faddd %f4,K1,%f4 ! (2_1) dtmp0 += K1;
1186
1187 fmuld %f16,%f26,%f16 ! (3_1) xx *= y0;
1188 srl %o0,18,%o7 ! (2_1) ax >>= 18;
1189
1190 fmuld %f32,%f40,%f32 ! (4_1) dtmp1 = div0 * y0;
1191 and %o7,-8,%o7 ! (2_1) ax &= -8;
1192 ldx [%fp+tmp0],%o0 ! (6_1) i = ((unsigned long long*)&div)[0];
1193
1194 faddd %f48,%f44,%f12 ! (1_1) res += dtmp0;
1195 add %o7,%l7,%o7 ! (2_1) (char*)parr1 + ax;
1196 ldd [%l0+%l4],%f48 ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1197
1198 fmuld %f4,%f50,%f4 ! (2_1) dtmp0 *= x2;
1199 fand %f28,DC3,%f24 ! (6_1) y0 = vis_fand(div,dconst3);
1200 ldd [%o7],%f0 ! (2_1) res = *(double*)((char*)parr1 + ax);
1201
1202 fmuld %f16,%f16,%f50 ! (3_1) x2 = xx * xx;
1203 fsubd DTWO,%f42,%f44 ! (5_1) dtmp0 = dtwo - dtmp0;
1204 srlx %o0,43,%o0 ! (6_1) i >>= 43;
1205
1206 and %o0,508,%l6 ! (6_1) i &= 508;
1207 mov %l3,%o7
1208 fsubd DTWO,%f32,%f46 ! (4_1) dtmp1 = dtwo - dtmp1;
1209
1210 fmuld %f0,%f48,%f48 ! (2_1) res *= dtmp0;
1211 add %g1,stridey,%o0 ! py += stridey;
1212 ld [%i4+%l6],%f0 ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
1213 fdtos %f12,%f12 ! (1_1) ftmp0 = (float)res;
1214
1215 srl %o7,28,%l3 ! (3_1) ux >>= 28;
1216 st %f12,[%g1] ! (1_1) py[0] = ftmp0;
1217 faddd %f4,K0,%f42 ! (2_1) dtmp0 += K0;
1218
1219 subcc counter,1,counter
1220 bneg,pn %icc,.begin
1221 or %g0,%o0,%o1
1222
1223 fmuld K2,%f50,%f4 ! (3_1) dtmp0 = K2 * x2;
1224 and %o7,MASK_0x7fffffff,%g1 ! (3_1) ax = ux & 0x7fffffff;
1225
1226 fmuld %f40,%f46,%f26 ! (4_1) y0 *= dtmp1;
1227 add %g1,MASK_0x100000,%g1 ! (3_1) ax += 0x00100000;
1228 and %l3,-8,%l3 ! (3_1) ux &= -8;
1229 fpsub32 %f0,%f24,%f40 ! (6_1) y0 = vis_fpsub32(dtmp0, y0);
1230
1231 fmuld %f38,%f44,%f38 ! (5_1) y0 *= dtmp0;
1232
1233 fmuld %f42,%f18,%f44 ! (2_1) dtmp0 *= xx;
1234
1235 fmuld %f28,%f40,%f42 ! (6_1) dtmp0 = div0 * y0;
1236 faddd %f4,K1,%f4 ! (3_1) dtmp0 += K1;
1237
1238 fmuld %f14,%f26,%f14 ! (4_1) xx *= y0;
1239 srl %g1,18,%o7 ! (3_1) ax >>= 18;
1240
1241 fmuld %f30,%f38,%f30 ! (5_1) dtmp1 = div0 * y0;
1242 and %o7,-8,%o7 ! (3_1) ax &= -8;
1243
1244 faddd %f48,%f44,%f12 ! (2_1) res += dtmp0;
1245 add %o7,%l7,%o7 ! (3_1) (char*)parr1 + ax;
1246 ldd [%l0+%l3],%f48 ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1247
1248 fmuld %f4,%f50,%f4 ! (3_1) dtmp0 *= x2;
1249 ldd [%o7],%f0 ! (3_1) res = *(double*)((char*)parr1 + ax)
1250
1251 fmuld %f14,%f14,%f50 ! (4_1) x2 = xx * xx;
1252 fsubd DTWO,%f42,%f44 ! (6_1) dtmp0 = dtwo - dtmp0;
1253
1254 mov %i0,%o7
1255 fsubd DTWO,%f30,%f46 ! (5_1) dtmp1 = dtwo - dtmp1;
1256
1257 fmuld %f0,%f48,%f48 ! (3_1) res *= dtmp0;
1258 add %o0,stridey,%g1 ! py += stridey;
1259 fdtos %f12,%f12 ! (2_1) ftmp0 = (float)res;
1260
1261 srl %o7,28,%i0 ! (4_1) ux >>= 28;
1262 st %f12,[%o0] ! (2_1) py[0] = ftmp0;
1263 faddd %f4,K0,%f42 ! (3_1) dtmp0 += K0;
1264
1265 subcc counter,1,counter
1266 bneg,pn %icc,.begin
1267 or %g0,%g1,%o1
1268
1269 fmuld K2,%f50,%f4 ! (4_1) dtmp0 = K2 * x2;
1270 and %o7,MASK_0x7fffffff,%o0 ! (4_1) ax = ux & 0x7fffffff;
1271
1272 fmuld %f38,%f46,%f26 ! (5_1) y0 *= dtmp1;
1273 add %o0,MASK_0x100000,%o0 ! (4_1) ax += 0x00100000;
1274 and %i0,-8,%i0 ! (4_1) ux &= -8;
1275
1276 fmuld %f40,%f44,%f40 ! (6_1) y0 *= dtmp0;
1277
1278 fmuld %f42,%f16,%f44 ! (3_1) dtmp0 *= xx;
1279
1280 faddd %f4,K1,%f4 ! (4_1) dtmp0 += K1;
1281
1282 fmuld %f36,%f26,%f36 ! (5_1) xx *= y0;
1283 srl %o0,18,%o7 ! (4_1) ax >>= 18;
1284
1285 fmuld %f28,%f40,%f28 ! (6_1) dtmp1 = div0 * y0;
1286 and %o7,-8,%o7 ! (4_1) ax &= -8;
1287
1288 faddd %f48,%f44,%f12 ! (3_1) res += dtmp0;
1289 add %o7,%l7,%o7 ! (4_1) (char*)parr1 + ax;
1290 ldd [%l0+%i0],%f48 ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1291
1292 fmuld %f4,%f50,%f4 ! (4_1) dtmp0 *= x2;
1293 ldd [%o7],%f0 ! (4_1) res = *(double*)((char*)parr1 + ax);
1294
1295 fmuld %f36,%f36,%f50 ! (5_1) x2 = xx * xx;
1296
1297 mov %i2,%o7
1298 fsubd DTWO,%f28,%f46 ! (6_1) dtmp1 = dtwo - dtmp1;
1299
1300 fmuld %f0,%f48,%f48 ! (4_1) res *= dtmp0;
1301 add %g1,stridey,%o0 ! py += stridey;
1302 fdtos %f12,%f12 ! (3_1) ftmp0 = (float)res;
1303
1304 srl %o7,28,%i2 ! (5_1) ux >>= 28;
1305 st %f12,[%g1] ! (3_1) py[0] = ftmp0;
1306 faddd %f4,K0,%f42 ! (4_1) dtmp0 += K0;
1307
1308 subcc counter,1,counter
1309 bneg,pn %icc,.begin
1310 or %g0,%o0,%o1
1311
1312 fmuld K2,%f50,%f4 ! (5_1) dtmp0 = K2 * x2;
1313 and %o7,MASK_0x7fffffff,%g1 ! (5_1) ax = ux & 0x7fffffff;
1314
1315 fmuld %f40,%f46,%f26 ! (6_1) y0 *= dtmp1;
1316 add %g1,MASK_0x100000,%g1 ! (5_1) ax += 0x00100000;
1317 and %i2,-8,%i2 ! (5_1) ux &= -8;
1318
1319 fmuld %f42,%f14,%f44 ! (4_1) dtmp0 *= xx;
1320
1321 faddd %f4,K1,%f4 ! (5_1) dtmp0 += K1;
1322
1323 fmuld %f10,%f26,%f10 ! (6_1) xx *= y0;
1324 srl %g1,18,%o7 ! (5_1) ax >>= 18;
1325
1326 and %o7,-8,%o7 ! (5_1) ax &= -8;
1327
1328 faddd %f48,%f44,%f12 ! (4_1) res += dtmp0;
1329 add %o7,%l7,%o7 ! (5_1) (char*)parr1 + ax;
1330 ldd [%l0+%i2],%f48 ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1331
1332 fmuld %f4,%f50,%f4 ! (5_1) dtmp0 *= x2;
1333 ldd [%o7],%f0 ! (5_1) res = *(double*)((char*)parr1 + ax);
1334
1335 fmuld %f10,%f10,%f50 ! (6_1) x2 = xx * xx;
1336
1337 mov %l2,%o7
1338
1339 fmuld %f0,%f48,%f48 ! (5_1) res *= dtmp0;
1340 add %o0,stridey,%g1 ! py += stridey;
1341 fdtos %f12,%f12 ! (4_1) ftmp0 = (float)res;
1342
1343 srl %o7,28,%l2 ! (6_1) ux >>= 28;
1344 st %f12,[%o0] ! (4_1) py[0] = ftmp0;
1345 faddd %f4,K0,%f42 ! (5_1) dtmp0 += K0;
1346
1347 subcc counter,1,counter
1348 bneg,pn %icc,.begin
1349 or %g0,%g1,%o1
1350
1351 fmuld K2,%f50,%f4 ! (6_1) dtmp0 = K2 * x2;
1352 and %o7,MASK_0x7fffffff,%o0 ! (6_1) ax = ux & 0x7fffffff;
1353
1354 add %o0,MASK_0x100000,%o0 ! (6_1) ax += 0x00100000;
1355 and %l2,-8,%l2 ! (6_1) ux &= -8;
1356
1357 fmuld %f42,%f36,%f44 ! (5_1) dtmp0 *= xx;
1358
1359 faddd %f4,K1,%f4 ! (6_1) dtmp0 += K1;
1360
1361 srl %o0,18,%o7 ! (6_1) ax >>= 18;
1362
1363 and %o7,-8,%o7 ! (6_1) ax &= -8;
1364
1365 faddd %f48,%f44,%f12 ! (5_1) res += dtmp0;
1366 add %o7,%l7,%o7 ! (6_1) (char*)parr1 + ax;
1367 ldd [%l0+%l2],%f48 ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1368
1369 fmuld %f4,%f50,%f4 ! (6_1) dtmp0 *= x2;
1370 ldd [%o7],%f0 ! (6_1) res = *(double*)((char*)parr1 + ax);
1371
1372 fmuld %f0,%f48,%f48 ! (6_1) res *= dtmp0;
1373 add %g1,stridey,%o0 ! py += stridey;
1374 fdtos %f12,%f12 ! (5_1) ftmp0 = (float)res;
1375
1376 st %f12,[%g1] ! (5_1) py[0] = ftmp0;
1377 faddd %f4,K0,%f42 ! (6_1) dtmp0 += K0;
1378
1379 subcc counter,1,counter
1380 bneg,pn %icc,.begin
1381 or %g0,%o0,%o1
1382
1383 fmuld %f42,%f10,%f44 ! (6_1) dtmp0 *= xx;
1384
1385 faddd %f48,%f44,%f12 ! (6_1) res += dtmp0;
1386
1387 add %o0,stridey,%g1 ! py += stridey;
1388 fdtos %f12,%f12 ! (6_1) ftmp0 = (float)res;
1389
1390 st %f12,[%o0] ! (6_1) py[0] = ftmp0;
1391
1392 ba .begin
1393 or %g0,%g1,%o1 ! py += stridey;
1394
1395 .exit:
1396 ret
1397 restore %g0,%g0,%g0
1398
1399 .align 16
1400 .spec0:
1401 add %i3,stridex,%i3 ! px += stridex;
1402 sub counter,1,counter
1403 st %l6,[%o1] ! *(int*)py = ux;
1404
1405 ba .begin1
1406 add %o1,stridey,%o1 ! py += stridey;
1407
1408 .align 16
1409 .spec1:
1410 sethi %hi(0x7f800000),%l3
1411 sethi %hi(0x3fc90c00),%l4 ! pi_2
1412
1413 sethi %hi(0x80000000),%o0
1414 add %l4,0x3db,%l4 ! pi_2
1415
1416 cmp %l5,%l3 ! if ( ax > 0x7f800000 )
1417 bg,a,pn %icc,1f
1418 fabss %f0,%f0 ! fpx = fabsf(*px);
1419
1420 and %l6,%o0,%l6 ! sign = ux & 0x80000000;
1421
1422 or %l6,%l4,%l6 ! sign |= pi_2;
1423
1424 add %i3,stridex,%i3 ! px += stridex;
1425 sub counter,1,counter
1426 st %l6,[%o1] ! *(int*)py = sign;
1427
1428 ba .begin1
1429 add %o1,stridey,%o1 ! py += stridey;
1430
1431 1:
1432 fmuls %f0,%f0,%f0 ! fpx *= fpx;
1433
1434 add %i3,stridex,%i3 ! px += stridex
1435 sub counter,1,counter
1436 st %f0,[%o1] ! *py = fpx;
1437
1438 ba .begin1
1439 add %o1,stridey,%o1 ! py += stridey;
1440
1441 .align 16
1442 .update0:
1443 cmp counter,1
1444 fzeros %f0
1445 ble,a .cont0
1446 sethi %hi(0x3fffffff),%l6
1447
1448 sub counter,1,counter
1449 st counter,[%fp+tmp_counter]
1450
1451 stx %l5,[%fp+tmp_px]
1452 sethi %hi(0x3fffffff),%l6
1453 ba .cont0
1454 or %g0,1,counter
1455
1456 .align 16
1457 .update1:
1458 cmp counter,1
1459 fzeros %f0
1460 ble,a .cont1
1461 sethi %hi(0x3fffffff),%l6
1462
1463 sub counter,1,counter
1464 st counter,[%fp+tmp_counter]
1465
1466 stx %l5,[%fp+tmp_px]
1467 sethi %hi(0x3fffffff),%l6
1468 ba .cont1
1469 or %g0,1,counter
1470
1471 .align 16
1472 .update2:
1473 cmp counter,2
1474 fzeros %f0
1475 ble,a .cont2
1476 sethi %hi(0x3fffffff),%l6
1477
1478 sub counter,2,counter
1479 st counter,[%fp+tmp_counter]
1480
1481 stx %l4,[%fp+tmp_px]
1482 sethi %hi(0x3fffffff),%l6
1483 ba .cont2
1484 or %g0,2,counter
1485
1486 .align 16
1487 .update3:
1488 cmp counter,2
1489 fzeros %f0
1490 ble,a .cont3
1491 sethi %hi(0x3fffffff),%l6
1492
1493 sub counter,2,counter
1494 st counter,[%fp+tmp_counter]
1495
1496 stx %l4,[%fp+tmp_px]
1497 sethi %hi(0x3fffffff),%l6
1498 ba .cont3
1499 or %g0,2,counter
1500
1501 .align 16
1502 .update4:
1503 cmp counter,3
1504 fzeros %f0
1505 ble,a .cont4
1506 sethi %hi(0x3fffffff),%l6
1507
1508 sub counter,3,counter
1509 st counter,[%fp+tmp_counter]
1510
1511 stx %l3,[%fp+tmp_px]
1512 sethi %hi(0x3fffffff),%l6
1513 ba .cont4
1514 or %g0,3,counter
1515
1516 .align 16
1517 .update5:
1518 cmp counter,3
1519 fzeros %f0
1520 ble,a .cont5
1521 sethi %hi(0x3fffffff),%l6
1522
1523 sub counter,3,counter
1524 st counter,[%fp+tmp_counter]
1525
1526 stx %l3,[%fp+tmp_px]
1527 sethi %hi(0x3fffffff),%l6
1528 ba .cont5
1529 or %g0,3,counter
1530
1531 .align 16
1532 .update6:
1533 cmp counter,4
1534 fzeros %f0
1535 ble,a .cont6
1536 sethi %hi(0x3fffffff),%l6
1537
1538 sub counter,4,counter
1539 st counter,[%fp+tmp_counter]
1540
1541 stx %i0,[%fp+tmp_px]
1542 sethi %hi(0x3fffffff),%l6
1543 ba .cont6
1544 or %g0,4,counter
1545
1546 .align 16
1547 .update7:
1548 cmp counter,4
1549 fzeros %f0
1550 ble,a .cont7
1551 sethi %hi(0x3fffffff),%l6
1552
1553 sub counter,4,counter
1554 st counter,[%fp+tmp_counter]
1555
1556 stx %i0,[%fp+tmp_px]
1557 sethi %hi(0x3fffffff),%l6
1558 ba .cont7
1559 or %g0,4,counter
1560
1561 .align 16
1562 .update8:
1563 cmp counter,5
1564 fzeros %f0
1565 ble,a .cont8
1566 sethi %hi(0x3fffffff),%l6
1567
1568 sub counter,5,counter
1569 st counter,[%fp+tmp_counter]
1570
1571 stx %i2,[%fp+tmp_px]
1572 sethi %hi(0x3fffffff),%l6
1573 ba .cont8
1574 or %g0,5,counter
1575
1576 .align 16
1577 .update9:
1578 cmp counter,5
1579 fzeros %f0
1580 ble,a .cont9
1581 sethi %hi(0x3fffffff),%l6
1582
1583 sub counter,5,counter
1584 st counter,[%fp+tmp_counter]
1585
1586 stx %i2,[%fp+tmp_px]
1587 sethi %hi(0x3fffffff),%l6
1588 ba .cont9
1589 or %g0,5,counter
1590
1591 .align 16
1592 .update10:
1593 cmp counter,6
1594 fzeros %f0
1595 ble,a .cont10
1596 sethi %hi(0x3fffffff),%l6
1597
1598 sub counter,6,counter
1599 st counter,[%fp+tmp_counter]
1600
1601 stx %l2,[%fp+tmp_px]
1602 sethi %hi(0x3fffffff),%l6
1603 ba .cont10
1604 or %g0,6,counter
1605
1606 .align 16
1607 .update11:
1608 cmp counter,6
1609 fzeros %f0
1610 ble,a .cont11
1611 sethi %hi(0x3fffffff),%l6
1612
1613 sub counter,6,counter
1614 st counter,[%fp+tmp_counter]
1615
1616 stx %l2,[%fp+tmp_px]
1617 sethi %hi(0x3fffffff),%l6
1618 ba .cont11
1619 or %g0,6,counter
1620
1621 .align 16
1622 .update12:
1623 cmp counter,7
1624 fzeros %f0
1625 ble,a .cont12
1626 sethi %hi(0x3fffffff),%l6
1627
1628 sub counter,7,counter
1629 st counter,[%fp+tmp_counter]
1630
1631 stx %g5,[%fp+tmp_px]
1632 sethi %hi(0x3fffffff),%l6
1633 ba .cont12
1634 or %g0,7,counter
1635
1636 .align 16
1637 .update13:
1638 cmp counter,7
1639 fzeros %f0
1640 ble,a .cont13
1641 sethi %hi(0x3fffffff),%l6
1642
1643 sub counter,7,counter
1644 st counter,[%fp+tmp_counter]
1645
1646 stx %g5,[%fp+tmp_px]
1647 sethi %hi(0x3fffffff),%l6
1648 ba .cont13
1649 or %g0,7,counter
1650
1651 .align 16
1652 .update14:
1653 cmp counter,0
1654 fzeros %f0
1655 ble,a .cont14
1656 sethi %hi(0x3fffffff),%l6
1657
1658 sub counter,0,counter
1659 st counter,[%fp+tmp_counter]
1660
1661 stx %i3,[%fp+tmp_px]
1662 sethi %hi(0x3fffffff),%l6
1663 ba .cont14
1664 or %g0,0,counter
1665
1666 .align 16
1667 .update15:
1668 cmp counter,0
1669 fzeros %f0
1670 ble,a .cont15
1671 sethi %hi(0x3fffffff),%l6
1672
1673 sub counter,0,counter
1674 st counter,[%fp+tmp_counter]
1675
1676 stx %i3,[%fp+tmp_px]
1677 sethi %hi(0x3fffffff),%l6
1678 ba .cont15
1679 or %g0,0,counter
1680
1681 .align 16
1682 .update16:
1683 cmp counter,1
1684 fzeros %f0
1685 ble,a .cont16
1686 sethi %hi(0x3fffffff),%l6
1687
1688 sub counter,1,counter
1689 st counter,[%fp+tmp_counter]
1690
1691 stx %l5,[%fp+tmp_px]
1692 sethi %hi(0x3fffffff),%l6
1693 ba .cont16
1694 or %g0,1,counter
1695
1696 .align 16
1697 .update17:
1698 cmp counter,1
1699 fzeros %f0
1700 ble,a .cont17
1701 sethi %hi(0x3fffffff),%l6
1702
1703 sub counter,1,counter
1704 st counter,[%fp+tmp_counter]
1705
1706 stx %l5,[%fp+tmp_px]
1707 sethi %hi(0x3fffffff),%l6
1708 ba .cont17
1709 or %g0,1,counter
1710
1711 .align 16
1712 .update18:
1713 cmp counter,2
1714 fzeros %f0
1715 ble,a .cont18
1716 sethi %hi(0x3fffffff),%l6
1717
1718 sub counter,2,counter
1719 st counter,[%fp+tmp_counter]
1720
1721 stx %l4,[%fp+tmp_px]
1722 sethi %hi(0x3fffffff),%l6
1723 ba .cont18
1724 or %g0,2,counter
1725
1726 .align 16
1727 .update19:
1728 cmp counter,2
1729 fzeros %f0
1730 ble,a .cont19
1731 sethi %hi(0x3fffffff),%l6
1732
1733 sub counter,2,counter
1734 st counter,[%fp+tmp_counter]
1735
1736 stx %l4,[%fp+tmp_px]
1737 sethi %hi(0x3fffffff),%l6
1738 ba .cont19
1739 or %g0,2,counter
1740
1741 .align 16
1742 .update20:
1743 cmp counter,3
1744 fzeros %f0
1745 ble,a .cont20
1746 sethi %hi(0x3fffffff),%l6
1747
1748 sub counter,3,counter
1749 st counter,[%fp+tmp_counter]
1750
1751 stx %l3,[%fp+tmp_px]
1752 sethi %hi(0x3fffffff),%l6
1753 ba .cont20
1754 or %g0,3,counter
1755
1756 .align 16
1757 .update21:
1758 cmp counter,3
1759 fzeros %f0
1760 ble,a .cont21
1761 sethi %hi(0x3fffffff),%l6
1762
1763 sub counter,3,counter
1764 st counter,[%fp+tmp_counter]
1765
1766 stx %l3,[%fp+tmp_px]
1767 sethi %hi(0x3fffffff),%l6
1768 ba .cont21
1769 or %g0,3,counter
1770
1771 .align 16
1772 .update22:
1773 cmp counter,4
1774 fzeros %f0
1775 ble,a .cont22
1776 sethi %hi(0x3fffffff),%l6
1777
1778 sub counter,4,counter
1779 st counter,[%fp+tmp_counter]
1780
1781 stx %i0,[%fp+tmp_px]
1782 sethi %hi(0x3fffffff),%l6
1783 ba .cont22
1784 or %g0,4,counter
1785
1786 .align 16
1787 .update23:
1788 cmp counter,4
1789 fzeros %f0
1790 ble,a .cont23
1791 sethi %hi(0x3fffffff),%l6
1792
1793 sub counter,4,counter
1794 st counter,[%fp+tmp_counter]
1795
1796 stx %i0,[%fp+tmp_px]
1797 sethi %hi(0x3fffffff),%l6
1798 ba .cont23
1799 or %g0,4,counter
1800
1801 .align 16
1802 .update24:
1803 cmp counter,5
1804 fzeros %f0
1805 ble,a .cont24
1806 sethi %hi(0x3fffffff),%l6
1807
1808 sub counter,5,counter
1809 st counter,[%fp+tmp_counter]
1810
1811 stx %i2,[%fp+tmp_px]
1812 sethi %hi(0x3fffffff),%l6
1813 ba .cont24
1814 or %g0,5,counter
1815
1816 .align 16
1817 .update25:
1818 cmp counter,5
1819 fzeros %f0
1820 ble,a .cont25
1821 sethi %hi(0x3fffffff),%l6
1822
1823 sub counter,5,counter
1824 st counter,[%fp+tmp_counter]
1825
1826 stx %i2,[%fp+tmp_px]
1827 sethi %hi(0x3fffffff),%l6
1828 ba .cont25
1829 or %g0,5,counter
1830
1831 .align 16
1832 .update26:
1833 cmp counter,6
1834 fzeros %f0
1835 ble,a .cont26
1836 sethi %hi(0x3fffffff),%l6
1837
1838 sub counter,6,counter
1839 st counter,[%fp+tmp_counter]
1840
1841 stx %l2,[%fp+tmp_px]
1842 sethi %hi(0x3fffffff),%l6
1843 ba .cont26
1844 or %g0,6,counter
1845
1846 .align 16
1847 .update27:
1848 cmp counter,6
1849 fzeros %f0
1850 ble,a .cont27
1851 sethi %hi(0x3fffffff),%l6
1852
1853 sub counter,6,counter
1854 st counter,[%fp+tmp_counter]
1855
1856 stx %l2,[%fp+tmp_px]
1857 sethi %hi(0x3fffffff),%l6
1858 ba .cont27
1859 or %g0,6,counter
1860
1861 .align 16
1862 .update28:
1863 cmp counter,7
1864 fzeros %f0
1865 ble,a .cont28
1866 sethi %hi(0x3fffffff),%l6
1867
1868 sub counter,7,counter
1869 st counter,[%fp+tmp_counter]
1870
1871 stx %g5,[%fp+tmp_px]
1872 sethi %hi(0x3fffffff),%l6
1873 ba .cont28
1874 or %g0,7,counter
1875
1876 .align 16
1877 .update29:
1878 cmp counter,7
1879 fzeros %f0
1880 ble,a .cont29
1881 sethi %hi(0x3fffffff),%l6
1882
1883 sub counter,7,counter
1884 st counter,[%fp+tmp_counter]
1885
1886 stx %g5,[%fp+tmp_px]
1887 sethi %hi(0x3fffffff),%l6
1888 ba .cont29
1889 or %g0,7,counter
1890
1891 SET_SIZE(__vatanf)
1892