1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
23 */
24 /*
25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
27 */
28
29 .file "__vhypotf.S"
30
31 #include "libm.h"
32
33 RO_DATA
34 .align 64
35
36 .CONST_TBL:
37 .word 0x3fe00001, 0x80007e00 ! K1 = 5.00000715259318464227e-01
38 .word 0xbfc00003, 0xc0017a01 ! K2 = -1.25000447037521686593e-01
39 .word 0x000fffff, 0xffffffff ! DC0 = 0x000fffffffffffff
40 .word 0x3ff00000, 0x00000000 ! DC1 = 0x3ff0000000000000
41 .word 0x7ffff000, 0x00000000 ! DC2 = 0x7ffff00000000000
42 .word 0x7fe00000, 0x00000000 ! DA0 = 0x7fe0000000000000
43 .word 0x47efffff, 0xe0000000 ! DFMAX = 3.402823e+38
44 .word 0x7f7fffff, 0x80808080 ! FMAX = 3.402823e+38 , SCALE = 0x80808080
45 .word 0x20000000, 0x00000000 ! DA1 = 0x2000000000000000
46
47 #define DC0 %f12
48 #define DC1 %f10
49 #define DC2 %f42
50 #define DA0 %f6
51 #define DA1 %f4
52 #define K2 %f26
53 #define K1 %f28
54 #define SCALE %f3
55 #define FMAX %f2
56 #define DFMAX %f50
57
58 #define stridex %l6
59 #define stridey %i4
60 #define stridez %l5
61 #define _0x7fffffff %o1
62 #define _0x7f3504f3 %o2
63 #define _0x1ff0 %l2
64 #define TBL %l1
65
66 #define counter %l0
67
68 #define tmp_px STACK_BIAS-0x30
69 #define tmp_py STACK_BIAS-0x28
70 #define tmp_counter STACK_BIAS-0x20
71 #define tmp0 STACK_BIAS-0x18
72 #define tmp1 STACK_BIAS-0x10
73 #define tmp2 STACK_BIAS-0x0c
74 #define tmp3 STACK_BIAS-0x08
75 #define tmp4 STACK_BIAS-0x04
76
77 ! sizeof temp storage - must be a multiple of 16 for V9
78 #define tmps 0x30
79
80 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
81 ! !!!!! algorithm !!!!!
82 ! hx0 = *(int*)px;
83 ! x0 = *px;
84 ! px += stridex;
85 !
86 ! hy0 = *(int*)py;
87 ! y0 = *py;
88 ! py += stridey;
89 !
90 ! hx0 &= 0x7fffffff;
91 ! hy0 &= 0x7fffffff;
92 !
93 ! if ( hx >= 0x7f3504f3 || hy >= 0x7f3504f3 )
94 ! {
95 ! if ( hx >= 0x7f800000 || hy >= 0x7f800000 )
96 ! {
97 ! if ( hx == 0x7f800000 || hy == 0x7f800000 )
98 ! *(int*)pz = 0x7f800000;
99 ! else *pz = x * y;
100 ! }
101 ! else
102 ! {
103 ! hyp = sqrt(x * (double)x + y * (double)y);
104 ! if ( hyp <= DMAX ) ftmp0 = (float)hyp;
105 ! else ftmp0 = FMAX * FMAX;
106 ! *pz = ftmp0;
107 ! }
108 ! pz += stridez;
109 ! continue;
110 ! }
111 ! if ( (hx | hy) == 0 )
112 ! {
113 ! *pz = 0;
114 ! pz += stridez;
115 ! continue;
116 ! }
117 ! dx0 = x0 * (double)x0;
118 ! dy0 = y0 * (double)y0;
119 ! db0 = dx0 + dy0;
120 !
121 ! iexp0 = ((int*)&db0)[0];
122 !
123 ! h0 = vis_fand(db0,DC0);
124 ! h0 = vis_for(h0,DC1);
125 ! h_hi0 = vis_fand(h0,DC2);
126 !
127 ! db0 = vis_fand(db0,DA0);
128 ! db0 = vis_fmul8x16(SCALE, db0);
129 ! db0 = vis_fpadd32(db0,DA1);
130 !
131 ! iexp0 >>= 8;
132 ! di0 = iexp0 & 0x1ff0;
133 ! si0 = (char*)sqrt_arr + di0;
134 !
135 ! dtmp0 = ((double*)((char*)div_arr + di0))[0];
136 ! xx0 = h0 - h_hi0;
137 ! xx0 *= dmp0;
138 !
139 ! dtmp0 = ((double*)si0)[1];
140 ! res0 = K2 * xx0;
141 ! res0 += K1;
142 ! res0 *= xx0;
143 ! res0 += DC1;
144 ! res0 = dtmp0 * res0;
145 ! res0 *= db0;
146 ! ftmp0 = (float)res0;
147 ! *pz = ftmp0;
148 ! pz += stridez;
149 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
150
151 ENTRY(__vhypotf)
152 save %sp,-SA(MINFRAME)-tmps,%sp
153 PIC_SETUP(l7)
154 PIC_SET(l7,.CONST_TBL,o3)
155 PIC_SET(l7,__vlibm_TBL_sqrtf,l1)
156
157 #ifdef __sparcv9
158 ldx [%fp+STACK_BIAS+176],stridez
159 #else
160 ld [%fp+STACK_BIAS+92],stridez
161 #endif
162 st %i0,[%fp+tmp_counter]
163
164 stx %i1,[%fp+tmp_px]
165
166 stx %i3,[%fp+tmp_py]
167
168 ldd [%o3],K1
169 sethi %hi(0x7ffffc00),%o1
170
171 ldd [%o3+8],K2
172 sethi %hi(0x7f350400),%o2
173
174 ldd [%o3+16],DC0
175 add %o1,1023,_0x7fffffff
176 add %o2,0xf3,_0x7f3504f3
177
178 ldd [%o3+24],DC1
179 sll %i2,2,stridex
180
181 ld [%o3+56],FMAX
182
183 ldd [%o3+32],DC2
184 sll %i4,2,stridey
185
186 ldd [%o3+40],DA0
187 sll stridez,2,stridez
188
189 ldd [%o3+48],DFMAX
190
191 ld [%o3+60],SCALE
192 or %g0,0xff8,%l2
193
194 ldd [%o3+64],DA1
195 sll %l2,1,_0x1ff0
196 or %g0,%i5,%l7
197
198 .begin:
199 ld [%fp+tmp_counter],counter
200 ldx [%fp+tmp_px],%i1
201 ldx [%fp+tmp_py],%i2
202 st %g0,[%fp+tmp_counter]
203 .begin1:
204 cmp counter,0
205 ble,pn %icc,.exit
206 lda [%i1]0x82,%l3 ! (3_0) hx0 = *(int*)px;
207
208 lda [%i2]0x82,%l4 ! (3_0) hy0 = *(int*)py;
209
210 lda [%i1]0x82,%f17 ! (3_0) x0 = *px;
211 and %l3,_0x7fffffff,%l3 ! (3_0) hx0 &= 0x7fffffff;
212
213 cmp %l3,_0x7f3504f3 ! (3_0) hx ? 0x7f3504f3
214 bge,pn %icc,.spec ! (3_0) if ( hx >= 0x7f3504f3 )
215 and %l4,_0x7fffffff,%l4 ! (3_0) hy0 &= 0x7fffffff;
216
217 cmp %l4,_0x7f3504f3 ! (3_0) hy ? 0x7f3504f3
218 bge,pn %icc,.spec ! (3_0) if ( hy >= 0x7f3504f3 )
219 or %g0,%i2,%o7
220
221 orcc %l3,%l4,%g0
222 bz,pn %icc,.spec1
223
224 add %i1,stridex,%i1 ! px += stridex
225 fsmuld %f17,%f17,%f44 ! (3_0) dx0 = x0 * (double)x0;
226 lda [%i2]0x82,%f17 ! (3_0) y0 = *py;
227
228 lda [%i1]0x82,%l3 ! (4_0) hx0 = *(int*)px;
229
230 lda [stridey+%o7]0x82,%l4 ! (4_0) hy0 = *(int*)py;
231
232 and %l3,_0x7fffffff,%l3 ! (4_0) hx0 &= 0x7fffffff;
233
234 fsmuld %f17,%f17,%f24 ! (3_0) dy0 = y0 * (double)y0;
235 cmp %l3,_0x7f3504f3 ! (4_0) hx ? 0x7f3504f3
236 bge,pn %icc,.update0 ! (4_0) if ( hx >= 0x7f3504f3 )
237 and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff;
238
239 orcc %l3,%l4,%g0
240 bz,pn %icc,.update0
241 lda [%i1]0x82,%f17 ! (4_0) x0 = *px;
242 .cont0:
243 faddd %f44,%f24,%f24 ! (3_0) db0 = dx0 + dy0;
244
245 fsmuld %f17,%f17,%f40 ! (4_1) dy0 = x0 * (double)x0;
246 cmp %l4,_0x7f3504f3 ! (4_1) hy ? 0x7f3504f3
247 lda [stridey+%o7]0x82,%f17 ! (4_1) hy0 = *py;
248
249 add %o7,stridey,%i5 ! py += stridey
250 lda [%i1+stridex]0x82,%l3 ! (0_0) hx0 = *(int*)px;
251
252 bge,pn %icc,.update1 ! (4_1) if ( hy >= 0x7f3504f3 )
253 st %f24,[%fp+tmp0] ! (3_1) iexp0 = ((int*)&db0)[0];
254 .cont1:
255 and %l3,_0x7fffffff,%l3 ! (0_0) hx0 &= 0x7fffffff;
256
257 fsmuld %f17,%f17,%f48 ! (4_1) dy0 = y0 * (double)y0;
258 lda [%i1+stridex]0x82,%f8 ! (0_0) x0 = *px;
259
260 add %i1,stridex,%i1 ! px += stridex
261
262 lda [%i5+stridey]0x82,%l4 ! (0_0) hy0 = *(int*)py;
263 cmp %l3,_0x7f3504f3 ! (0_0) hx ? 0x7f3504f3
264 bge,pn %icc,.update2 ! (0_0) if ( hx >= 0x7f3504f3 )
265 add %i5,stridey,%o4 ! py += stridey
266 .cont2:
267 faddd %f40,%f48,%f20 ! (4_1) db0 = dx0 + dy0;
268
269 fsmuld %f8,%f8,%f40 ! (0_0) dx0 = x0 * (double)x0;
270 and %l4,_0x7fffffff,%l4 ! (0_0) hy0 &= 0x7fffffff;
271 lda [%i5+stridey]0x82,%f17 ! (0_0) hy0 = *py;
272
273 cmp %l4,_0x7f3504f3 ! (0_0) hy ? 0x7f3504f3
274 bge,pn %icc,.update3 ! (0_0) if ( hy >= 0x7f3504f3 )
275 st %f20,[%fp+tmp1] ! (4_1) iexp0 = ((int*)&db0)[0];
276
277 orcc %l3,%l4,%g0
278 bz,pn %icc,.update3
279 .cont3:
280 lda [%i1+stridex]0x82,%l3 ! (1_0) hx0 = *(int*)px;
281
282 fand %f24,DC0,%f60 ! (3_1) h0 = vis_fand(db0,DC0);
283
284 and %l3,_0x7fffffff,%l3 ! (1_0) hx0 &= 0x7fffffff;
285
286 fsmuld %f17,%f17,%f34 ! (0_0) dy0 = y0 * (double)y0;
287 cmp %l3,_0x7f3504f3 ! (1_0) hx ? 0x7f3504f3
288 lda [%o4+stridey]0x82,%l4 ! (1_0) hy0 = *(int*)py;
289
290 add %i1,stridex,%i1 ! px += stridex
291
292 lda [%i1]0x82,%f17 ! (1_0) x0 = *px;
293 bge,pn %icc,.update4 ! (1_0) if ( hx >= 0x7f3504f3 )
294 add %o4,stridey,%i5 ! py += stridey
295 .cont4:
296 and %l4,_0x7fffffff,%l4 ! (1_0) hy0 &= 0x7fffffff;
297 for %f60,DC1,%f46 ! (3_1) h0 = vis_for(h0,DC1);
298
299 cmp %l4,_0x7f3504f3 ! (1_0) hy ? 0x7f3504f3
300 ld [%fp+tmp0],%o0 ! (3_1) iexp0 = ((int*)&db0)[0];
301 faddd %f40,%f34,%f0 ! (0_0) db0 = dx0 + dy0;
302
303 fsmuld %f17,%f17,%f40 ! (1_0) dx0 = x0 * (double)x0;
304 add %i1,stridex,%i1 ! px += stridex
305 lda [%o4+stridey]0x82,%f17 ! (1_0) y0 = *py;
306
307 srax %o0,8,%o0 ! (3_1) iexp0 >>= 8;
308 bge,pn %icc,.update5 ! (1_0) if ( hy >= 0x7f3504f3 )
309 fand %f46,DC2,%f38 ! (3_1) h_hi0 = vis_fand(h0,DC2);
310
311 orcc %l3,%l4,%g0
312 bz,pn %icc,.update5
313 .cont5:
314 lda [%i1]0x82,%l3 ! (2_0) hx0 = *(int*)px;
315
316 and %o0,_0x1ff0,%o0 ! (3_1) di0 = iexp0 & 0x1ff0;
317 st %f0,[%fp+tmp2] ! (0_0) iexp0 = ((int*)&db0)[0];
318 fand %f20,DC0,%f60 ! (4_1) h0 = vis_fand(db0,DC0);
319
320 ldd [TBL+%o0],%f22 ! (3_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
321 fsubd %f46,%f38,%f38 ! (3_1) xx0 = h0 - h_hi0;
322
323 fsmuld %f17,%f17,%f32 ! (1_0) dy0 = y0 * (double)y0;
324 add %i5,stridey,%i2 ! py += stridey
325 lda [stridey+%i5]0x82,%l4 ! (2_0) hy0 = *(int*)py;
326
327 and %l3,_0x7fffffff,%l3 ! (2_0) hx0 &= 0x7fffffff;
328
329 lda [%i1]0x82,%f17 ! (2_0) x0 = *px;
330 cmp %l3,_0x7f3504f3 ! (2_0) hx ? 0x7f3504f3
331
332 fmuld %f38,%f22,%f38 ! (3_1) xx0 *= dmp0;
333 and %l4,_0x7fffffff,%l4 ! (2_0) hy0 &= 0x7fffffff;
334 for %f60,DC1,%f46 ! (4_1) h0 = vis_for(h0,DC1);
335
336 bge,pn %icc,.update6 ! (2_0) if ( hx >= 0x7f3504f3 )
337 ld [%fp+tmp1],%o3 ! (4_1) iexp0 = ((int*)&db0)[0];
338 .cont6:
339 faddd %f40,%f32,%f18 ! (1_0) db0 = dx0 + dy0;
340
341 fsmuld %f17,%f17,%f44 ! (2_0) dx0 = x0 * (double)x0;
342 cmp %l4,_0x7f3504f3 ! (2_0) hy ? 0x7f3504f3
343 lda [stridey+%i5]0x82,%f17 ! (2_0) y0 = *py;
344
345 add %i1,stridex,%i1 ! px += stridex
346 bge,pn %icc,.update7 ! (2_0) if ( hy >= 0x7f3504f3 )
347 fand %f46,DC2,%f58 ! (4_1) h_hi0 = vis_fand(h0,DC2);
348
349 orcc %l3,%l4,%g0
350 bz,pn %icc,.update7
351 nop
352 .cont7:
353 fmuld K2,%f38,%f56 ! (3_1) res0 = K2 * xx0;
354 srax %o3,8,%o3 ! (4_1) iexp0 >>= 8;
355 lda [%i1]0x82,%l3 ! (3_0) hx0 = *(int*)px;
356
357 and %o3,_0x1ff0,%o3 ! (4_1) di0 = iexp0 & 0x1ff0;
358 st %f18,[%fp+tmp3] ! (1_0) iexp0 = ((int*)&db0)[0];
359 fand %f0,DC0,%f60 ! (0_0) h0 = vis_fand(db0,DC0);
360
361 ldd [TBL+%o3],%f22 ! (4_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
362 add %i2,stridey,%o7 ! py += stridey
363 fsubd %f46,%f58,%f58 ! (4_1) xx0 = h0 - h_hi0;
364
365 fsmuld %f17,%f17,%f30 ! (2_0) dy0 = y0 * (double)y0;
366 lda [stridey+%i2]0x82,%l4 ! (3_0) hy0 = *(int*)py;
367 and %l3,_0x7fffffff,%l3 ! (3_0) hx0 &= 0x7fffffff;
368
369 faddd %f56,K1,%f54 ! (3_1) res0 += K1;
370 cmp %l3,_0x7f3504f3 ! (3_0) hx ? 0x7f3504f3
371
372 lda [%i1]0x82,%f17 ! (3_0) x0 = *px;
373 add %i1,stridex,%i1 ! px += stridex
374 bge,pn %icc,.update8 ! (3_0) if ( hx >= 0x7f3504f3 )
375
376 fmuld %f58,%f22,%f58 ! (4_1) xx0 *= dmp0;
377 .cont8:
378 and %l4,_0x7fffffff,%l4 ! (3_0) hy0 &= 0x7fffffff;
379 for %f60,DC1,%f46 ! (0_0) h0 = vis_for(h0,DC1);
380
381 cmp %l4,_0x7f3504f3 ! (3_0) hy ? 0x7f3504f3
382 ld [%fp+tmp2],%g1 ! (0_0) iexp0 = ((int*)&db0)[0];
383 faddd %f44,%f30,%f30 ! (2_0) db0 = dx0 + dy0;
384
385 fsmuld %f17,%f17,%f44 ! (3_0) dx0 = x0 * (double)x0;
386 bge,pn %icc,.update9 ! (3_0) if ( hy >= 0x7f3504f3 )
387 lda [stridey+%i2]0x82,%f17 ! (3_0) y0 = *py;
388
389 orcc %l3,%l4,%g0
390 bz,pn %icc,.update9
391 nop
392 .cont9:
393 fmuld %f54,%f38,%f40 ! (3_1) res0 *= xx0;
394 lda [%i1]0x82,%l3 ! (4_0) hx0 = *(int*)px;
395 fand %f46,DC2,%f38 ! (0_0) h_hi0 = vis_fand(h0,DC2);
396
397 fmuld K2,%f58,%f54 ! (4_1) res0 = K2 * xx0;
398 srax %g1,8,%o5 ! (0_0) iexp0 >>= 8;
399 lda [stridey+%o7]0x82,%l4 ! (4_0) hy0 = *(int*)py;
400 fand %f24,DA0,%f56 ! (3_1) db0 = vis_fand(db0,DA0);
401
402 and %o5,_0x1ff0,%o5 ! (0_0) di0 = iexp0 & 0x1ff0;
403 st %f30,[%fp+tmp4] ! (2_0) iexp0 = ((int*)&db0)[0];
404 fand %f18,DC0,%f60 ! (1_0) h0 = vis_fand(db0,DC0);
405
406 ldd [TBL+%o5],%f22 ! (0_0) dtmp0 = ((double*)((char*)div_arr + di0))[0];
407 add %o0,TBL,%g1 ! (3_1) si0 = (char*)sqrt_arr + di0;
408 and %l3,_0x7fffffff,%l3 ! (4_0) hx0 &= 0x7fffffff;
409 fsubd %f46,%f38,%f38 ! (0_0) xx0 = h0 - h_hi0;
410
411 fsmuld %f17,%f17,%f24 ! (3_0) dy0 = y0 * (double)y0;
412 cmp %l3,_0x7f3504f3 ! (4_0) hx ? 0x7f3504f3
413 bge,pn %icc,.update10 ! (4_0) if ( hx >= 0x7f3504f3 )
414 faddd %f40,DC1,%f40 ! (3_1) res0 += DC1;
415
416 fmul8x16 SCALE,%f56,%f36 ! (3_1) db0 = vis_fmul8x16(SCALE, db0);
417 and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff;
418 ldd [%g1+8],%f56 ! (3_1) dtmp0 = ((double*)si0)[1];
419 faddd %f54,K1,%f54 ! (4_1) res0 += K1;
420
421 lda [%i1]0x82,%f17 ! (4_0) x0 = *px;
422 .cont10:
423 fmuld %f38,%f22,%f38 ! (0_0) xx0 *= dmp0;
424 cmp counter,5
425 for %f60,DC1,%f46 ! (1_0) h0 = vis_for(h0,DC1);
426
427 ld [%fp+tmp3],%g1 ! (1_0) iexp0 = ((int*)&db0)[0];
428 fmuld %f56,%f40,%f62 ! (3_1) res0 = dtmp0 * res0;
429 faddd %f44,%f24,%f24 ! (3_0) db0 = dx0 + dy0;
430
431 bl,pn %icc,.tail
432 nop
433
434 ba .main_loop
435 sub counter,5,counter
436
437 .align 16
438 .main_loop:
439 fsmuld %f17,%f17,%f40 ! (4_1) dy0 = x0 * (double)x0;
440 cmp %l4,_0x7f3504f3 ! (4_1) hy ? 0x7f3504f3
441 lda [stridey+%o7]0x82,%f17 ! (4_1) hy0 = *py;
442 fpadd32 %f36,DA1,%f36 ! (3_2) db0 = vis_fpadd32(db0,DA1);
443
444 fmuld %f54,%f58,%f58 ! (4_2) res0 *= xx0;
445 add %o7,stridey,%i5 ! py += stridey
446 st %f24,[%fp+tmp0] ! (3_1) iexp0 = ((int*)&db0)[0];
447 fand %f46,DC2,%f44 ! (1_1) h_hi0 = vis_fand(h0,DC2);
448
449 fmuld K2,%f38,%f56 ! (0_1) res0 = K2 * xx0;
450 srax %g1,8,%g5 ! (1_1) iexp0 >>= 8;
451 bge,pn %icc,.update11 ! (4_1) if ( hy >= 0x7f3504f3 )
452 fand %f20,DA0,%f54 ! (4_2) db0 = vis_fand(db0,DA0);
453
454 orcc %l3,%l4,%g0
455 nop
456 bz,pn %icc,.update11
457 fzero %f52
458 .cont11:
459 fmuld %f62,%f36,%f62 ! (3_2) res0 *= db0;
460 and %g5,_0x1ff0,%g5 ! (1_1) di0 = iexp0 & 0x1ff0;
461 lda [%i1+stridex]0x82,%l3 ! (0_0) hx0 = *(int*)px;
462 fand %f30,DC0,%f60 ! (2_1) h0 = vis_fand(db0,DC0);
463
464 ldd [%g5+TBL],%f22 ! (1_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
465 add %o3,TBL,%g1 ! (4_2) si0 = (char*)sqrt_arr + di0;
466 add %i1,stridex,%i0 ! px += stridex
467 fsubd %f46,%f44,%f44 ! (1_1) xx0 = h0 - h_hi0;
468
469 fsmuld %f17,%f17,%f48 ! (4_1) dy0 = y0 * (double)y0;
470 nop
471 lda [%i1+stridex]0x82,%f8 ! (0_0) x0 = *px;
472 faddd %f58,DC1,%f36 ! (4_2) res0 += DC1;
473
474 faddd %f56,K1,%f58 ! (0_1) res0 += K1;
475 and %l3,_0x7fffffff,%l3 ! (0_0) hx0 &= 0x7fffffff;
476 ldd [%g1+8],%f56 ! (4_2) dtmp0 = ((double*)si0)[1];
477 fmul8x16 SCALE,%f54,%f54 ! (4_2) db0 = vis_fmul8x16(SCALE, db0);
478
479 lda [%i5+stridey]0x82,%l4 ! (0_0) hy0 = *(int*)py;
480 cmp %l3,_0x7f3504f3 ! (0_0) hx ? 0x7f3504f3
481 bge,pn %icc,.update12 ! (0_0) if ( hx >= 0x7f3504f3 )
482 fdtos %f62,%f14 ! (3_2) ftmp0 = (float)res0;
483 .cont12:
484 fmuld %f44,%f22,%f44 ! (1_1) xx0 *= dmp0;
485 add %l7,stridez,%o7 ! pz += stridez
486 st %f14,[%l7] ! (3_2) *pz = ftmp0;
487 for %f60,DC1,%f46 ! (2_1) h0 = vis_for(h0,DC1);
488
489 fmuld %f56,%f36,%f36 ! (4_2) res0 = dtmp0 * res0;
490 add %i5,stridey,%o4 ! py += stridey
491 ld [%fp+tmp4],%g1 ! (2_1) iexp0 = ((int*)&db0)[0];
492 faddd %f40,%f48,%f20 ! (4_1) db0 = dx0 + dy0;
493
494 fsmuld %f8,%f8,%f40 ! (0_0) dx0 = x0 * (double)x0;
495 and %l4,_0x7fffffff,%l4 ! (0_0) hy0 &= 0x7fffffff;
496 lda [%i5+stridey]0x82,%f17 ! (0_0) hy0 = *py;
497 fpadd32 %f54,DA1,%f62 ! (4_2) db0 = vis_fpadd32(db0,DA1);
498
499 fmuld %f58,%f38,%f38 ! (0_1) res0 *= xx0;
500 cmp %l4,_0x7f3504f3 ! (0_0) hy ? 0x7f3504f3
501 st %f20,[%fp+tmp1] ! (4_1) iexp0 = ((int*)&db0)[0];
502 fand %f46,DC2,%f58 ! (2_1) h_hi0 = vis_fand(h0,DC2);
503
504 fmuld K2,%f44,%f56 ! (1_1) res0 = K2 * xx0;
505 srax %g1,8,%g1 ! (2_1) iexp0 >>= 8;
506 bge,pn %icc,.update13 ! (0_0) if ( hy >= 0x7f3504f3 )
507 fand %f0,DA0,%f54 ! (0_1) db0 = vis_fand(db0,DA0);
508
509 orcc %l3,%l4,%g0
510 nop
511 bz,pn %icc,.update13
512 fzero %f52
513 .cont13:
514 fmuld %f36,%f62,%f62 ! (4_2) res0 *= db0;
515 and %g1,_0x1ff0,%g1 ! (2_1) di0 = iexp0 & 0x1ff0;
516 lda [%i0+stridex]0x82,%l3 ! (1_0) hx0 = *(int*)px;
517 fand %f24,DC0,%f60 ! (3_1) h0 = vis_fand(db0,DC0);
518
519 ldd [TBL+%g1],%f22 ! (2_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
520 add %o5,TBL,%o0 ! (0_1) si0 = (char*)sqrt_arr + di0;
521 add %i0,stridex,%i1 ! px += stridex
522 fsubd %f46,%f58,%f58 ! (2_1) xx0 = h0 - h_hi0;
523
524 fsmuld %f17,%f17,%f34 ! (0_0) dy0 = y0 * (double)y0;
525 add %o7,stridez,%i0 ! pz += stridez
526 lda [%o4+stridey]0x82,%l4 ! (1_0) hy0 = *(int*)py;
527 faddd %f38,DC1,%f36 ! (0_1) res0 += DC1;
528
529 faddd %f56,K1,%f38 ! (1_1) res0 += K1;
530 and %l3,_0x7fffffff,%l3 ! (1_0) hx0 &= 0x7fffffff;
531 ldd [%o0+8],%f56 ! (0_1) dtmp0 = ((double*)si0)[1];
532 fmul8x16 SCALE,%f54,%f54 ! (0_1) db0 = vis_fmul8x16(SCALE, db0);
533
534 lda [%i1]0x82,%f17 ! (1_0) x0 = *px;
535 cmp %l3,_0x7f3504f3 ! (1_0) hx ? 0x7f3504f3
536 bge,pn %icc,.update14 ! (1_0) if ( hx >= 0x7f3504f3 )
537 fdtos %f62,%f14 ! (4_2) ftmp0 = (float)res0;
538 .cont14:
539 fmuld %f58,%f22,%f58 ! (2_1) xx0 *= dmp0;
540 and %l4,_0x7fffffff,%l4 ! (1_0) hy0 &= 0x7fffffff;
541 add %o4,stridey,%i5 ! py += stridey
542 for %f60,DC1,%f46 ! (3_1) h0 = vis_for(h0,DC1);
543
544 fmuld %f56,%f36,%f36 ! (0_1) res0 = dtmp0 * res0;
545 cmp %l4,_0x7f3504f3 ! (1_0) hy ? 0x7f3504f3
546 ld [%fp+tmp0],%o0 ! (3_1) iexp0 = ((int*)&db0)[0];
547 faddd %f40,%f34,%f0 ! (0_0) db0 = dx0 + dy0;
548
549 fsmuld %f17,%f17,%f40 ! (1_0) dx0 = x0 * (double)x0;
550 add %i1,stridex,%i1 ! px += stridex
551 lda [%o4+stridey]0x82,%f17 ! (1_0) y0 = *py;
552 fpadd32 %f54,DA1,%f62 ! (0_1) db0 = vis_fpadd32(db0,DA1);
553
554 fmuld %f38,%f44,%f44 ! (1_1) res0 *= xx0;
555 st %f14,[%o7] ! (4_2) *pz = ftmp0;
556 bge,pn %icc,.update15 ! (1_0) if ( hy >= 0x7f3504f3 )
557 fand %f46,DC2,%f38 ! (3_1) h_hi0 = vis_fand(h0,DC2);
558
559 orcc %l3,%l4,%g0
560 bz,pn %icc,.update15
561 nop
562 .cont15:
563 fmuld K2,%f58,%f54 ! (2_1) res0 = K2 * xx0;
564 srax %o0,8,%o0 ! (3_1) iexp0 >>= 8;
565 st %f0,[%fp+tmp2] ! (0_0) iexp0 = ((int*)&db0)[0];
566 fand %f18,DA0,%f56 ! (1_1) db0 = vis_fand(db0,DA0);
567
568 fmuld %f36,%f62,%f62 ! (0_1) res0 *= db0;
569 and %o0,_0x1ff0,%o0 ! (3_1) di0 = iexp0 & 0x1ff0;
570 lda [%i1]0x82,%l3 ! (2_0) hx0 = *(int*)px;
571 fand %f20,DC0,%f60 ! (4_1) h0 = vis_fand(db0,DC0);
572
573 ldd [TBL+%o0],%f22 ! (3_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
574 add %g5,TBL,%o3 ! (1_1) si0 = (char*)sqrt_arr + di0;
575 add %i0,stridez,%i3 ! pz += stridez
576 fsubd %f46,%f38,%f38 ! (3_1) xx0 = h0 - h_hi0;
577
578 fsmuld %f17,%f17,%f32 ! (1_0) dy0 = y0 * (double)y0;
579 add %i5,stridey,%i2 ! py += stridey
580 lda [stridey+%i5]0x82,%l4 ! (2_0) hy0 = *(int*)py;
581 faddd %f44,DC1,%f44 ! (1_1) res0 += DC1;
582
583 fmul8x16 SCALE,%f56,%f36 ! (1_1) db0 = vis_fmul8x16(SCALE, db0);
584 and %l3,_0x7fffffff,%l3 ! (2_0) hx0 &= 0x7fffffff;
585 ldd [%o3+8],%f56 ! (1_1) dtmp0 = ((double*)si0)[1];
586 faddd %f54,K1,%f54 ! (2_1) res0 += K1;
587
588 lda [%i1]0x82,%f17 ! (2_0) x0 = *px;
589 cmp %l3,_0x7f3504f3 ! (2_0) hx ? 0x7f3504f3
590 add %i3,stridez,%o4 ! pz += stridez
591 fdtos %f62,%f14 ! (0_1) ftmp0 = (float)res0;
592
593 fmuld %f38,%f22,%f38 ! (3_1) xx0 *= dmp0;
594 and %l4,_0x7fffffff,%l4 ! (2_0) hy0 &= 0x7fffffff;
595 st %f14,[%i0] ! (0_1) *pz = ftmp0;
596 for %f60,DC1,%f46 ! (4_1) h0 = vis_for(h0,DC1);
597
598 fmuld %f56,%f44,%f62 ! (1_1) res0 = dtmp0 * res0;
599 bge,pn %icc,.update16 ! (2_0) if ( hx >= 0x7f3504f3 )
600 ld [%fp+tmp1],%o3 ! (4_1) iexp0 = ((int*)&db0)[0];
601 faddd %f40,%f32,%f18 ! (1_0) db0 = dx0 + dy0;
602 .cont16:
603 fsmuld %f17,%f17,%f44 ! (2_0) dx0 = x0 * (double)x0;
604 cmp %l4,_0x7f3504f3 ! (2_0) hy ? 0x7f3504f3
605 lda [stridey+%i5]0x82,%f17 ! (2_0) y0 = *py;
606 fpadd32 %f36,DA1,%f36 ! (1_1) db0 = vis_fpadd32(db0,DA1);
607
608 fmuld %f54,%f58,%f54 ! (2_1) res0 *= xx0;
609 add %i1,stridex,%l7 ! px += stridex
610 bge,pn %icc,.update17 ! (2_0) if ( hy >= 0x7f3504f3 )
611 fand %f46,DC2,%f58 ! (4_1) h_hi0 = vis_fand(h0,DC2);
612
613 orcc %l3,%l4,%g0
614 nop
615 bz,pn %icc,.update17
616 fzero %f52
617 .cont17:
618 fmuld K2,%f38,%f56 ! (3_1) res0 = K2 * xx0;
619 srax %o3,8,%o3 ! (4_1) iexp0 >>= 8;
620 st %f18,[%fp+tmp3] ! (1_0) iexp0 = ((int*)&db0)[0];
621 fand %f30,DA0,%f40 ! (2_1) db0 = vis_fand(db0,DA0);
622
623 fmuld %f62,%f36,%f62 ! (1_1) res0 *= db0;
624 and %o3,_0x1ff0,%o3 ! (4_1) di0 = iexp0 & 0x1ff0;
625 lda [%l7]0x82,%l3 ! (3_0) hx0 = *(int*)px;
626 fand %f0,DC0,%f60 ! (0_0) h0 = vis_fand(db0,DC0);
627
628 ldd [TBL+%o3],%f22 ! (4_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
629 add %g1,TBL,%g1 ! (2_1) si0 = (char*)sqrt_arr + di0;
630 add %i2,stridey,%o7 ! py += stridey
631 fsubd %f46,%f58,%f58 ! (4_1) xx0 = h0 - h_hi0;
632
633 fsmuld %f17,%f17,%f30 ! (2_0) dy0 = y0 * (double)y0;
634 lda [stridey+%i2]0x82,%l4 ! (3_0) hy0 = *(int*)py;
635 add %l7,stridex,%i1 ! px += stridex
636 faddd %f54,DC1,%f36 ! (2_1) res0 += DC1;
637
638 faddd %f56,K1,%f54 ! (3_1) res0 += K1;
639 and %l3,_0x7fffffff,%l3 ! (3_0) hx0 &= 0x7fffffff;
640 ldd [%g1+8],%f56 ! (2_1) dtmp0 = ((double*)si0)[1];
641 fmul8x16 SCALE,%f40,%f40 ! (2_1) db0 = vis_fmul8x16(SCALE, db0);
642
643 lda [%l7]0x82,%f17 ! (3_0) x0 = *px;
644 cmp %l3,_0x7f3504f3 ! (3_0) hx ? 0x7f3504f3
645 bge,pn %icc,.update18 ! (3_0) if ( hx >= 0x7f3504f3 )
646 fdtos %f62,%f14 ! (1_1) ftmp0 = (float)res0;
647 .cont18:
648 fmuld %f58,%f22,%f58 ! (4_1) xx0 *= dmp0;
649 and %l4,_0x7fffffff,%l4 ! (3_0) hy0 &= 0x7fffffff;
650 st %f14,[%i3] ! (1_1) *pz = ftmp0;
651 for %f60,DC1,%f46 ! (0_0) h0 = vis_for(h0,DC1);
652
653 fmuld %f56,%f36,%f36 ! (2_1) res0 = dtmp0 * res0;
654 cmp %l4,_0x7f3504f3 ! (3_0) hy ? 0x7f3504f3
655 ld [%fp+tmp2],%g1 ! (0_0) iexp0 = ((int*)&db0)[0];
656 faddd %f44,%f30,%f30 ! (2_0) db0 = dx0 + dy0;
657
658 fsmuld %f17,%f17,%f44 ! (3_0) dx0 = x0 * (double)x0;
659 bge,pn %icc,.update19 ! (3_0) if ( hy >= 0x7f3504f3 )
660 lda [stridey+%i2]0x82,%f17 ! (3_0) y0 = *py;
661 fpadd32 %f40,DA1,%f62 ! (2_1) db0 = vis_fpadd32(db0,DA1);
662
663 .cont19:
664 fmuld %f54,%f38,%f40 ! (3_1) res0 *= xx0;
665 orcc %l3,%l4,%g0
666 st %f30,[%fp+tmp4] ! (2_0) iexp0 = ((int*)&db0)[0];
667 fand %f46,DC2,%f38 ! (0_0) h_hi0 = vis_fand(h0,DC2);
668
669 fmuld K2,%f58,%f54 ! (4_1) res0 = K2 * xx0;
670 srax %g1,8,%o5 ! (0_0) iexp0 >>= 8;
671 lda [%i1]0x82,%l3 ! (4_0) hx0 = *(int*)px;
672 fand %f24,DA0,%f56 ! (3_1) db0 = vis_fand(db0,DA0);
673
674 fmuld %f36,%f62,%f62 ! (2_1) res0 *= db0;
675 and %o5,_0x1ff0,%o5 ! (0_0) di0 = iexp0 & 0x1ff0;
676 bz,pn %icc,.update19a
677 fand %f18,DC0,%f60 ! (1_0) h0 = vis_fand(db0,DC0);
678 .cont19a:
679 ldd [TBL+%o5],%f22 ! (0_0) dtmp0 = ((double*)((char*)div_arr + di0))[0];
680 add %o0,TBL,%g1 ! (3_1) si0 = (char*)sqrt_arr + di0;
681 and %l3,_0x7fffffff,%l3 ! (4_0) hx0 &= 0x7fffffff;
682 fsubd %f46,%f38,%f38 ! (0_0) xx0 = h0 - h_hi0;
683
684 fsmuld %f17,%f17,%f24 ! (3_0) dy0 = y0 * (double)y0;
685 cmp %l3,_0x7f3504f3 ! (4_0) hx ? 0x7f3504f3
686 lda [stridey+%o7]0x82,%l4 ! (4_0) hy0 = *(int*)py;
687 faddd %f40,DC1,%f40 ! (3_1) res0 += DC1;
688
689 fmul8x16 SCALE,%f56,%f36 ! (3_1) db0 = vis_fmul8x16(SCALE, db0);
690 bge,pn %icc,.update20 ! (4_0) if ( hx >= 0x7f3504f3 )
691 ldd [%g1+8],%f56 ! (3_1) dtmp0 = ((double*)si0)[1];
692 faddd %f54,K1,%f54 ! (4_1) res0 += K1;
693
694 lda [%i1]0x82,%f17 ! (4_0) x0 = *px;
695 .cont20:
696 subcc counter,5,counter ! counter -= 5
697 add %o4,stridez,%l7 ! pz += stridez
698 fdtos %f62,%f14 ! (2_1) ftmp0 = (float)res0;
699
700 fmuld %f38,%f22,%f38 ! (0_0) xx0 *= dmp0;
701 and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff;
702 st %f14,[%o4] ! (2_1) *pz = ftmp0;
703 for %f60,DC1,%f46 ! (1_0) h0 = vis_for(h0,DC1);
704
705 ld [%fp+tmp3],%g1 ! (1_0) iexp0 = ((int*)&db0)[0];
706 fmuld %f56,%f40,%f62 ! (3_1) res0 = dtmp0 * res0;
707 bpos,pt %icc,.main_loop
708 faddd %f44,%f24,%f24 ! (3_0) db0 = dx0 + dy0;
709
710 add counter,5,counter
711
712 .tail:
713 subcc counter,1,counter
714 bneg .begin
715 nop
716
717 fpadd32 %f36,DA1,%f36 ! (3_2) db0 = vis_fpadd32(db0,DA1);
718
719 fmuld %f54,%f58,%f58 ! (4_2) res0 *= xx0;
720 fand %f46,DC2,%f44 ! (1_1) h_hi0 = vis_fand(h0,DC2);
721
722 fmuld K2,%f38,%f56 ! (0_1) res0 = K2 * xx0;
723 srax %g1,8,%g5 ! (1_1) iexp0 >>= 8;
724 fand %f20,DA0,%f54 ! (4_2) db0 = vis_fand(db0,DA0);
725
726 fmuld %f62,%f36,%f62 ! (3_2) res0 *= db0;
727 and %g5,_0x1ff0,%g5 ! (1_1) di0 = iexp0 & 0x1ff0;
728
729 ldd [%g5+TBL],%f22 ! (1_1) dtmp0 = ((double*)((char*)div_arr + di0))[0];
730 add %o3,TBL,%g1 ! (4_2) si0 = (char*)sqrt_arr + di0;
731 fsubd %f46,%f44,%f44 ! (1_1) xx0 = h0 - h_hi0;
732
733 faddd %f58,DC1,%f36 ! (4_2) res0 += DC1;
734
735 faddd %f56,K1,%f58 ! (0_1) res0 += K1;
736 ldd [%g1+8],%f56 ! (4_2) dtmp0 = ((double*)si0)[1];
737 fmul8x16 SCALE,%f54,%f54 ! (4_2) db0 = vis_fmul8x16(SCALE, db0);
738
739 fdtos %f62,%f14 ! (3_2) ftmp0 = (float)res0;
740
741 fmuld %f44,%f22,%f44 ! (1_1) xx0 *= dmp0;
742 add %l7,stridez,%o7 ! pz += stridez
743 st %f14,[%l7] ! (3_2) *pz = ftmp0;
744
745 subcc counter,1,counter
746 bneg .begin
747 or %g0,%o7,%l7
748
749 fmuld %f56,%f36,%f36 ! (4_2) res0 = dtmp0 * res0;
750
751 fpadd32 %f54,DA1,%f62 ! (4_2) db0 = vis_fpadd32(db0,DA1);
752
753 fmuld %f58,%f38,%f38 ! (0_1) res0 *= xx0;
754
755 fmuld K2,%f44,%f56 ! (1_1) res0 = K2 * xx0;
756 fand %f0,DA0,%f54 ! (0_1) db0 = vis_fand(db0,DA0);
757
758 fmuld %f36,%f62,%f62 ! (4_2) res0 *= db0;
759
760 add %o5,TBL,%o0 ! (0_1) si0 = (char*)sqrt_arr + di0;
761
762 faddd %f38,DC1,%f36 ! (0_1) res0 += DC1;
763
764 faddd %f56,K1,%f38 ! (1_1) res0 += K1;
765 ldd [%o0+8],%f56 ! (0_1) dtmp0 = ((double*)si0)[1];
766 fmul8x16 SCALE,%f54,%f54 ! (0_1) db0 = vis_fmul8x16(SCALE, db0);
767
768 add %o7,stridez,%i0 ! pz += stridez
769 fdtos %f62,%f14 ! (4_2) ftmp0 = (float)res0;
770
771 fmuld %f56,%f36,%f36 ! (0_1) res0 = dtmp0 * res0;
772
773 fpadd32 %f54,DA1,%f62 ! (0_1) db0 = vis_fpadd32(db0,DA1);
774
775 fmuld %f38,%f44,%f44 ! (1_1) res0 *= xx0;
776 add %i0,stridez,%i3 ! pz += stridez
777 st %f14,[%o7] ! (4_2) *pz = ftmp0;
778
779 subcc counter,1,counter
780 bneg .begin
781 or %g0,%i0,%l7
782
783 fand %f18,DA0,%f56 ! (1_1) db0 = vis_fand(db0,DA0);
784
785 fmuld %f36,%f62,%f62 ! (0_1) res0 *= db0;
786
787 add %g5,TBL,%o3 ! (1_1) si0 = (char*)sqrt_arr + di0;
788
789 faddd %f44,DC1,%f44 ! (1_1) res0 += DC1;
790
791 fmul8x16 SCALE,%f56,%f36 ! (1_1) db0 = vis_fmul8x16(SCALE, db0);
792 ldd [%o3+8],%f56 ! (1_1) dtmp0 = ((double*)si0)[1];
793
794 add %i3,stridez,%o4 ! pz += stridez
795 fdtos %f62,%f14 ! (0_1) ftmp0 = (float)res0;
796
797 st %f14,[%i0] ! (0_1) *pz = ftmp0;
798
799 subcc counter,1,counter
800 bneg .begin
801 or %g0,%i3,%l7
802
803 fmuld %f56,%f44,%f62 ! (1_1) res0 = dtmp0 * res0;
804
805 fpadd32 %f36,DA1,%f36 ! (1_1) db0 = vis_fpadd32(db0,DA1);
806
807 fmuld %f62,%f36,%f62 ! (1_1) res0 *= db0;
808
809 fdtos %f62,%f14 ! (1_1) ftmp0 = (float)res0;
810
811 st %f14,[%i3] ! (1_1) *pz = ftmp0;
812
813 ba .begin
814 or %g0,%o4,%l7
815
816 .align 16
817 .spec1:
818 st %g0,[%l7] ! *pz = 0;
819 add %l7,stridez,%l7 ! pz += stridez
820
821 add %i2,stridey,%i2 ! py += stridey
822 ba .begin1
823 sub counter,1,counter ! counter--
824
825 .align 16
826 .spec:
827 sethi %hi(0x7f800000),%i0
828 cmp %l3,%i0 ! hx ? 0x7f800000
829 bge,pt %icc,2f ! if ( hx >= 0x7f800000 )
830 ld [%i2],%f8
831
832 cmp %l4,%i0 ! hy ? 0x7f800000
833 bge,pt %icc,2f ! if ( hy >= 0x7f800000 )
834 nop
835
836 fsmuld %f17,%f17,%f44 ! x * (double)x
837 fsmuld %f8,%f8,%f24 ! y * (double)y
838 faddd %f44,%f24,%f24 ! x * (double)x + y * (double)y
839 fsqrtd %f24,%f24 ! hyp = sqrt(x * (double)x + y * (double)y);
840 fcmped %f24,DFMAX ! hyp ? DMAX
841 fbug,a 1f ! if ( hyp > DMAX )
842 fmuls FMAX,FMAX,%f20 ! ftmp0 = FMAX * FMAX;
843
844 fdtos %f24,%f20 ! ftmp0 = (float)hyp;
845 1:
846 st %f20,[%l7] ! *pz = ftmp0;
847 add %l7,stridez,%l7 ! pz += stridez
848 add %i1,stridex,%i1 ! px += stridex
849
850 add %i2,stridey,%i2 ! py += stridey
851 ba .begin1
852 sub counter,1,counter ! counter--
853 2:
854 fcmps %f17,%f8 ! exceptions
855 cmp %l3,%i0 ! hx ? 0x7f800000
856 be,a %icc,1f ! if ( hx == 0x7f800000 )
857 st %i0,[%l7] ! *(int*)pz = 0x7f800000;
858
859 cmp %l4,%i0 ! hy ? 0x7f800000
860 be,a %icc,1f ! if ( hy == 0x7f800000
861 st %i0,[%l7] ! *(int*)pz = 0x7f800000;
862
863 fmuls %f17,%f8,%f8 ! x * y
864 st %f8,[%l7] ! *pz = x * y;
865
866 1:
867 add %l7,stridez,%l7 ! pz += stridez
868 add %i1,stridex,%i1 ! px += stridex
869
870 add %i2,stridey,%i2 ! py += stridey
871 ba .begin1
872 sub counter,1,counter ! counter--
873
874 .align 16
875 .update0:
876 cmp counter,1
877 ble .cont0
878 fzeros %f17
879
880 stx %i1,[%fp+tmp_px]
881
882 add %o7,stridey,%i5
883 stx %i5,[%fp+tmp_py]
884
885 sub counter,1,counter
886 st counter,[%fp+tmp_counter]
887
888 ba .cont0
889 or %g0,1,counter
890
891 .align 16
892 .update1:
893 cmp counter,1
894 ble .cont1
895 fzeros %f17
896
897 stx %i1,[%fp+tmp_px]
898 stx %i5,[%fp+tmp_py]
899
900 sub counter,1,counter
901 st counter,[%fp+tmp_counter]
902
903 ba .cont1
904 or %g0,1,counter
905
906 .align 16
907 .update2:
908 cmp counter,2
909 ble .cont2
910 fzeros %f8
911
912 stx %i1,[%fp+tmp_px]
913 stx %o4,[%fp+tmp_py]
914
915 sub counter,2,counter
916 st counter,[%fp+tmp_counter]
917
918 ba .cont2
919 or %g0,2,counter
920
921 .align 16
922 .update3:
923 cmp counter,2
924 ble .cont3
925 fzeros %f17
926
927 stx %i1,[%fp+tmp_px]
928 stx %o4,[%fp+tmp_py]
929
930 sub counter,2,counter
931 st counter,[%fp+tmp_counter]
932
933 ba .cont3
934 or %g0,2,counter
935
936 .align 16
937 .update4:
938 cmp counter,3
939 ble .cont4
940 fzeros %f17
941
942 stx %i1,[%fp+tmp_px]
943 stx %i5,[%fp+tmp_py]
944
945 sub counter,3,counter
946 st counter,[%fp+tmp_counter]
947
948 ba .cont4
949 or %g0,3,counter
950
951 .align 16
952 .update5:
953 cmp counter,3
954 ble .cont5
955 fzeros %f17
956
957 sub %i1,stridex,%i2
958 stx %i2,[%fp+tmp_px]
959 stx %i5,[%fp+tmp_py]
960
961 sub counter,3,counter
962 st counter,[%fp+tmp_counter]
963
964 ba .cont5
965 or %g0,3,counter
966
967 .align 16
968 .update6:
969 cmp counter,4
970 ble .cont6
971 fzeros %f17
972
973 stx %i1,[%fp+tmp_px]
974 stx %i2,[%fp+tmp_py]
975
976 sub counter,4,counter
977 st counter,[%fp+tmp_counter]
978
979 ba .cont6
980 or %g0,4,counter
981
982 .align 16
983 .update7:
984 cmp counter,4
985 ble .cont7
986 fzeros %f17
987
988 sub %i1,stridex,%o7
989 stx %o7,[%fp+tmp_px]
990 stx %i2,[%fp+tmp_py]
991
992 sub counter,4,counter
993 st counter,[%fp+tmp_counter]
994
995 ba .cont7
996 or %g0,4,counter
997
998 .align 16
999 .update8:
1000 cmp counter,5
1001 ble .cont8
1002 fzeros %f17
1003
1004 sub %i1,stridex,%o5
1005 stx %o5,[%fp+tmp_px]
1006 stx %o7,[%fp+tmp_py]
1007
1008 sub counter,5,counter
1009 st counter,[%fp+tmp_counter]
1010
1011 ba .cont8
1012 or %g0,5,counter
1013
1014 .align 16
1015 .update9:
1016 cmp counter,5
1017 ble .cont9
1018 fzeros %f17
1019
1020 sub %i1,stridex,%o5
1021 stx %o5,[%fp+tmp_px]
1022 stx %o7,[%fp+tmp_py]
1023
1024 sub counter,5,counter
1025 st counter,[%fp+tmp_counter]
1026
1027 ba .cont9
1028 or %g0,5,counter
1029
1030 .align 16
1031 .update10:
1032 fmul8x16 SCALE,%f56,%f36 ! (3_1) db0 = vis_fmul8x16(SCALE, db0);
1033 and %l4,_0x7fffffff,%l4 ! (4_0) hy0 &= 0x7fffffff;
1034 ldd [%g1+8],%f56 ! (3_1) dtmp0 = ((double*)si0)[1];
1035 faddd %f54,K1,%f54 ! (4_1) res0 += K1;
1036
1037 cmp counter,6
1038 ble .cont10
1039 fzeros %f17
1040
1041 stx %i1,[%fp+tmp_px]
1042 add %o7,stridey,%i5
1043 stx %i5,[%fp+tmp_py]
1044
1045 sub counter,6,counter
1046 st counter,[%fp+tmp_counter]
1047
1048 ba .cont10
1049 or %g0,6,counter
1050
1051 .align 16
1052 .update11:
1053 cmp counter,1
1054 ble .cont11
1055 fzeros %f17
1056
1057 stx %i1,[%fp+tmp_px]
1058 stx %i5,[%fp+tmp_py]
1059
1060 sub counter,1,counter
1061 st counter,[%fp+tmp_counter]
1062
1063 ba .cont11
1064 or %g0,1,counter
1065
1066 .align 16
1067 .update12:
1068 cmp counter,2
1069 ble .cont12
1070 fzeros %f8
1071
1072 stx %i0,[%fp+tmp_px]
1073 add %i5,stridey,%o4
1074 stx %o4,[%fp+tmp_py]
1075
1076 sub counter,2,counter
1077 st counter,[%fp+tmp_counter]
1078
1079 ba .cont12
1080 or %g0,2,counter
1081
1082 .align 16
1083 .update13:
1084 cmp counter,2
1085 ble .cont13
1086 fzeros %f17
1087
1088 stx %i0,[%fp+tmp_px]
1089 stx %o4,[%fp+tmp_py]
1090
1091 sub counter,2,counter
1092 st counter,[%fp+tmp_counter]
1093
1094 ba .cont13
1095 or %g0,2,counter
1096
1097 .align 16
1098 .update14:
1099 cmp counter,3
1100 ble .cont14
1101 fzeros %f17
1102
1103 stx %i1,[%fp+tmp_px]
1104 add %o4,stridey,%i5
1105 stx %i5,[%fp+tmp_py]
1106
1107 sub counter,3,counter
1108 st counter,[%fp+tmp_counter]
1109
1110 ba .cont14
1111 or %g0,3,counter
1112
1113 .align 16
1114 .update15:
1115 cmp counter,3
1116 ble .cont15
1117 fzeros %f17
1118
1119 sub %i1,stridex,%i2
1120 stx %i2,[%fp+tmp_px]
1121 stx %i5,[%fp+tmp_py]
1122
1123 sub counter,3,counter
1124 st counter,[%fp+tmp_counter]
1125
1126 ba .cont15
1127 or %g0,3,counter
1128
1129 .align 16
1130 .update16:
1131 faddd %f40,%f32,%f18 ! (1_0) db0 = dx0 + dy0;
1132 cmp counter,4
1133 ble .cont16
1134 fzeros %f17
1135
1136 stx %i1,[%fp+tmp_px]
1137 stx %i2,[%fp+tmp_py]
1138
1139 sub counter,4,counter
1140 st counter,[%fp+tmp_counter]
1141
1142 ba .cont16
1143 or %g0,4,counter
1144
1145 .align 16
1146 .update17:
1147 cmp counter,4
1148 ble .cont17
1149 fzeros %f17
1150
1151 stx %i1,[%fp+tmp_px]
1152 stx %i2,[%fp+tmp_py]
1153
1154 sub counter,4,counter
1155 st counter,[%fp+tmp_counter]
1156
1157 ba .cont17
1158 or %g0,4,counter
1159
1160 .align 16
1161 .update18:
1162 cmp counter,5
1163 ble .cont18
1164 fzeros %f17
1165
1166 stx %l7,[%fp+tmp_px]
1167 stx %o7,[%fp+tmp_py]
1168
1169 sub counter,5,counter
1170 st counter,[%fp+tmp_counter]
1171
1172 ba .cont18
1173 or %g0,5,counter
1174
1175 .align 16
1176 .update19:
1177 fpadd32 %f40,DA1,%f62 ! (2_1) db0 = vis_fpadd32(db0,DA1);
1178 cmp counter,5
1179 ble .cont19
1180 fzeros %f17
1181
1182 stx %l7,[%fp+tmp_px]
1183 stx %o7,[%fp+tmp_py]
1184
1185 sub counter,5,counter
1186 st counter,[%fp+tmp_counter]
1187
1188 ba .cont19
1189 or %g0,5,counter
1190
1191 .align 16
1192 .update19a:
1193 cmp counter,5
1194 ble .cont19a
1195 fzeros %f17
1196
1197 stx %l7,[%fp+tmp_px]
1198 stx %o7,[%fp+tmp_py]
1199
1200 sub counter,5,counter
1201 st counter,[%fp+tmp_counter]
1202
1203 ba .cont19a
1204 or %g0,5,counter
1205
1206 .align 16
1207 .update20:
1208 faddd %f54,K1,%f54 ! (4_1) res0 += K1;
1209 cmp counter,6
1210 ble .cont20
1211 fzeros %f17
1212
1213 stx %i1,[%fp+tmp_px]
1214 add %o7,stridey,%g1
1215 stx %g1,[%fp+tmp_py]
1216
1217 sub counter,6,counter
1218 st counter,[%fp+tmp_counter]
1219
1220 ba .cont20
1221 or %g0,6,counter
1222
1223 .exit:
1224 ret
1225 restore
1226 SET_SIZE(__vhypotf)
1227