1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
23 */
24 /*
25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
27 */
28
29 .file "__vrhypot.S"
30
31 #include "libm.h"
32
33 RO_DATA
34 .align 64
35
36 .CONST_TBL:
37 .word 0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465,
38 .word 0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a,
39 .word 0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6,
40 .word 0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3,
41 .word 0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b,
42 .word 0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036,
43 .word 0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01,
44 .word 0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1,
45 .word 0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb,
46 .word 0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5,
47 .word 0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405,
48 .word 0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc,
49 .word 0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7,
50 .word 0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec,
51 .word 0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b,
52 .word 0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed,
53 .word 0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150,
54 .word 0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539,
55 .word 0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66,
56 .word 0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995,
57 .word 0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d,
58 .word 0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19,
59 .word 0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404,
60 .word 0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22,
61 .word 0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47,
62 .word 0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a,
63 .word 0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06,
64 .word 0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358,
65 .word 0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20,
66 .word 0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f,
67 .word 0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197,
68 .word 0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010,
69
70 .word 0x42300000, 0 ! D2ON36 = 2**36
71 .word 0xffffff00, 0 ! DA0
72 .word 0xfff00000, 0 ! DA1
73 .word 0x3ff00000, 0 ! DONE = 1.0
74 .word 0x40000000, 0 ! DTWO = 2.0
75 .word 0x7fd00000, 0 ! D2ON1022
76 .word 0x3cb00000, 0 ! D2ONM52
77 .word 0x43200000, 0 ! D2ON51
78 .word 0x0007ffff, 0xffffffff ! 0x0007ffffffffffff
79
80 #define stridex %l2
81 #define stridey %l3
82 #define stridez %l5
83
84 #define TBL_SHIFT 512
85
86 #define TBL %l1
87 #define counter %l4
88
89 #define _0x7ff00000 %l0
90 #define _0x00100000 %o5
91 #define _0x7fffffff %l6
92
93 #define D2ON36 %f4
94 #define DTWO %f6
95 #define DONE %f8
96 #define DA0 %f58
97 #define DA1 %f56
98
99 #define dtmp0 STACK_BIAS-0x80
100 #define dtmp1 STACK_BIAS-0x78
101 #define dtmp2 STACK_BIAS-0x70
102 #define dtmp3 STACK_BIAS-0x68
103 #define dtmp4 STACK_BIAS-0x60
104 #define dtmp5 STACK_BIAS-0x58
105 #define dtmp6 STACK_BIAS-0x50
106 #define dtmp7 STACK_BIAS-0x48
107 #define dtmp8 STACK_BIAS-0x40
108 #define dtmp9 STACK_BIAS-0x38
109 #define dtmp10 STACK_BIAS-0x30
110 #define dtmp11 STACK_BIAS-0x28
111 #define dtmp12 STACK_BIAS-0x20
112 #define dtmp13 STACK_BIAS-0x18
113 #define dtmp14 STACK_BIAS-0x10
114 #define dtmp15 STACK_BIAS-0x08
115
116 #define ftmp0 STACK_BIAS-0x100
117 #define tmp_px STACK_BIAS-0x98
118 #define tmp_py STACK_BIAS-0x90
119 #define tmp_counter STACK_BIAS-0x88
120
121 ! sizeof temp storage - must be a multiple of 16 for V9
122 #define tmps 0x100
123
124 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
125 ! !!!!! algorithm !!!!!
126 ! hx0 = *(int*)px;
127 ! hy0 = *(int*)py;
128 !
129 ! ((float*)&x0)[0] = ((float*)px)[0];
130 ! ((float*)&x0)[1] = ((float*)px)[1];
131 ! ((float*)&y0)[0] = ((float*)py)[0];
132 ! ((float*)&y0)[1] = ((float*)py)[1];
133 !
134 ! hx0 &= 0x7fffffff;
135 ! hy0 &= 0x7fffffff;
136 !
137 ! diff0 = hy0 - hx0;
138 ! j0 = diff0 >> 31;
139 ! j0 &= diff0;
140 ! j0 = hy0 - j0;
141 ! j0 &= 0x7ff00000;
142 !
143 ! j0 = 0x7ff00000 - j0;
144 ! ll = (long long)j0 << 32;
145 ! *(long long*)&scl0 = ll;
146 !
147 ! if ( hx0 >= 0x7ff00000 || hy0 >= 0x7ff00000 )
148 ! {
149 ! lx = ((int*)px)[1];
150 ! ly = ((int*)py)[1];
151 !
152 ! if ( hx0 == 0x7ff00000 && lx == 0 ) res0 = 0.0;
153 ! else if ( hy0 == 0x7ff00000 && ly == 0 ) res0 = 0.0;
154 ! else res0 = fabs(x0) * fabs(y0);
155 !
156 ! ((float*)pz)[0] = ((float*)&res0)[0];
157 ! ((float*)pz)[1] = ((float*)&res0)[1];
158 !
159 ! px += stridex;
160 ! py += stridey;
161 ! pz += stridez;
162 ! continue;
163 ! }
164 ! if ( hx0 < 0x00100000 && hy0 < 0x00100000 )
165 ! {
166 ! lx = ((int*)px)[1];
167 ! ly = ((int*)py)[1];
168 ! ii = hx0 | hy0;
169 ! ii |= lx;
170 ! ii |= ly;
171 ! if ( ii == 0 )
172 ! {
173 ! res0 = 1.0 / 0.0;
174 ! ((float*)pz)[0] = ((float*)&res0)[0];
175 ! ((float*)pz)[1] = ((float*)&res0)[1];
176 !
177 ! px += stridex;
178 ! py += stridey;
179 ! pz += stridez;
180 ! continue;
181 ! }
182 ! x0 = fabs(x0);
183 ! y0 = fabs(y0);
184 ! if ( hx0 < 0x00080000 )
185 ! {
186 ! x0 = *(long long*)&x0;
187 ! }
188 ! else
189 ! {
190 ! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
191 ! x0 = vis_fand(x0, dtmp0);
192 ! x0 = *(long long*)&x0;
193 ! x0 += D2ON51;
194 ! }
195 ! x0 *= D2ONM52;
196 ! if ( hy0 < 0x00080000 )
197 ! {
198 ! y0 = *(long long*)&y0;
199 ! }
200 ! else
201 ! {
202 ! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
203 ! y0 = vis_fand(y0, dtmp0);
204 ! y0 = *(long long*)&y0;
205 ! y0 += D2ON51;
206 ! }
207 ! y0 *= D2ONM52;
208 ! *(long long*)&scl0 = 0x7fd0000000000000ULL;
209 ! }
210 ! else
211 ! {
212 ! x0 *= scl0;
213 ! y0 *= scl0;
214 ! }
215 !
216 ! x_hi0 = x0 + D2ON36;
217 ! y_hi0 = y0 + D2ON36;
218 ! x_hi0 -= D2ON36;
219 ! y_hi0 -= D2ON36;
220 ! x_lo0 = x0 - x_hi0;
221 ! y_lo0 = y0 - y_hi0;
222 ! res0_hi = x_hi0 * x_hi0;
223 ! dtmp0 = y_hi0 * y_hi0;
224 ! res0_hi += dtmp0;
225 ! res0_lo = x0 + x_hi0;
226 ! res0_lo *= x_lo0;
227 ! dtmp1 = y0 + y_hi0;
228 ! dtmp1 *= y_lo0;
229 ! res0_lo += dtmp1;
230 !
231 ! dres = res0_hi + res0_lo;
232 ! dexp0 = vis_fand(dres,DA1);
233 ! iarr = ((int*)&dres)[0];
234 !
235 ! iarr >>= 11;
236 ! iarr &= 0x1fc;
237 ! dtmp0 = ((double*)((char*)dll1 + iarr))[0];
238 ! dd = vis_fpsub32(dtmp0, dexp0);
239 !
240 ! dtmp0 = dd * dres;
241 ! dtmp0 = DTWO - dtmp0;
242 ! dd *= dtmp0;
243 ! dtmp1 = dd * dres;
244 ! dtmp1 = DTWO - dtmp1;
245 ! dd *= dtmp1;
246 ! dtmp2 = dd * dres;
247 ! dtmp2 = DTWO - dtmp2;
248 ! dres = dd * dtmp2;
249 !
250 ! res0 = vis_fand(dres,DA0);
251 !
252 ! dtmp0 = res0_hi * res0;
253 ! dtmp0 = DONE - dtmp0;
254 ! dtmp1 = res0_lo * res0;
255 ! dtmp0 -= dtmp1;
256 ! dtmp0 *= dres;
257 ! res0 += dtmp0;
258 !
259 ! res0 = sqrt ( res0 );
260 !
261 ! res0 = scl0 * res0;
262 !
263 ! ((float*)pz)[0] = ((float*)&res0)[0];
264 ! ((float*)pz)[1] = ((float*)&res0)[1];
265 !
266 ! px += stridex;
267 ! py += stridey;
268 ! pz += stridez;
269 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
270
271 ENTRY(__vrhypot)
272 save %sp,-SA(MINFRAME)-tmps,%sp
273 PIC_SETUP(l7)
274 PIC_SET(l7,.CONST_TBL,l1)
275 wr %g0,0x82,%asi
276
277 #ifdef __sparcv9
278 ldx [%fp+STACK_BIAS+176],stridez
279 #else
280 ld [%fp+STACK_BIAS+92],stridez
281 #endif
282
283 sll %i2,3,stridex
284 sethi %hi(0x7ff00000),_0x7ff00000
285 st %i0,[%fp+tmp_counter]
286
287 sll %i4,3,stridey
288 sethi %hi(0x00100000),_0x00100000
289 stx %i1,[%fp+tmp_px]
290
291 sll stridez,3,stridez
292 sethi %hi(0x7ffffc00),_0x7fffffff
293 stx %i3,[%fp+tmp_py]
294
295 ldd [TBL+TBL_SHIFT],D2ON36
296 add _0x7fffffff,1023,_0x7fffffff
297
298 ldd [TBL+TBL_SHIFT+8],DA0
299
300 ldd [TBL+TBL_SHIFT+16],DA1
301
302 ldd [TBL+TBL_SHIFT+24],DONE
303
304 ldd [TBL+TBL_SHIFT+32],DTWO
305
306 .begin:
307 ld [%fp+tmp_counter],counter
308 ldx [%fp+tmp_px],%i4
309 ldx [%fp+tmp_py],%i3
310 st %g0,[%fp+tmp_counter]
311 .begin1:
312 cmp counter,0
313 ble,pn %icc,.exit
314
315 lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px;
316 add %i4,stridex,%i1
317
318 lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py;
319 add %i3,stridey,%i0 ! py += stridey
320
321 and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff;
322
323 cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000
324 bge,pn %icc,.spec0 ! (7_0) if ( hx0 >= 0x7ff00000 )
325 and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff;
326
327 cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000
328 bge,pn %icc,.spec0 ! (7_0) if ( hy0 >= 0x7ff00000 )
329 sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0;
330
331 sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31;
332 cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000
333 bl,pn %icc,.spec1 ! (7_0) if ( hx0 < 0x00100000 )
334
335 and %o1,%o3,%o1 ! (7_0) j0 &= diff0;
336 .cont_spec0:
337 sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0;
338
339 and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000;
340
341 sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0;
342
343 sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
344
345 stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
346
347 stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll;
348 .cont_spec1:
349 lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px;
350 mov %i1,%i2
351
352 lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py;
353
354 and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff;
355 mov %i0,%o0
356
357 cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000
358 bge,pn %icc,.update0 ! (0_0) if ( hx0 >= 0x7ff00000 )
359 and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff;
360
361 cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000
362 sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0;
363 bge,pn %icc,.update0 ! (0_0) if ( hy0 >= 0x7ff00000 )
364 sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31;
365
366 cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000
367
368 and %o1,%o3,%o1 ! (0_0) j0 &= diff0;
369 bl,pn %icc,.update1 ! (0_0) if ( hx0 < 0x00100000 )
370 sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0;
371 .cont0:
372 and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000;
373
374 sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0;
375 .cont1:
376 sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32;
377 stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll;
378
379 ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll;
380
381 lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0];
382
383 lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1];
384
385 lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0];
386
387 add %i1,stridex,%i4 ! px += stridex
388 lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1];
389
390 fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0;
391 add %i4,stridex,%i1 ! px += stridex
392
393 fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0;
394
395 lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px;
396
397 add %i0,stridey,%i3 ! py += stridey
398 faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36;
399
400 lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py;
401 add %i3,stridey,%i0 ! py += stridey
402 faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36;
403
404 and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff;
405
406 cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000
407 stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll;
408
409 and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff;
410 bge,pn %icc,.update2 ! (1_0) if ( hx0 >= 0x7ff00000 )
411 fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36;
412
413 cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000
414 sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0;
415 bge,pn %icc,.update3 ! (1_0) if ( hy0 >= 0x7ff00000 )
416 fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36;
417
418 sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31;
419
420 and %o1,%o3,%o1 ! (1_0) j0 &= diff0;
421
422 fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0;
423 sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0;
424 cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000
425 fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0;
426
427 fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0;
428 and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000;
429 bl,pn %icc,.update4 ! (1_0) if ( hx0 < 0x00100000 )
430 faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0;
431
432 sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0;
433 .cont4:
434 sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32;
435 stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll;
436 faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0;
437
438 fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0;
439
440 fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0;
441 ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll;
442 faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0;
443
444 lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0];
445
446 lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1];
447
448 fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0;
449 lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0];
450
451 lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1];
452
453 fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0;
454
455 fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0;
456 faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1;
457
458 lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px;
459 mov %i1,%i2
460
461 faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36;
462
463 lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py;
464 mov %i0,%o0
465 faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36;
466
467 faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo;
468 and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff;
469
470 cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000
471 bge,pn %icc,.update5 ! (2_0) if ( hx0 >= 0x7ff00000 )
472 stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll;
473
474 and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff;
475 st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0];
476 fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36;
477
478 sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0;
479 cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000
480 bge,pn %icc,.update6 ! (2_0) if ( hy0 >= 0x7ff00000 )
481 fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36;
482
483 sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31;
484
485 and %o1,%o3,%o1 ! (2_0) j0 &= diff0;
486
487 fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0;
488 cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000
489 sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0;
490 fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0;
491
492 fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0;
493 and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000;
494 bl,pn %icc,.update7 ! (2_0) if ( hx0 < 0x00100000 )
495 faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0;
496 .cont7:
497 sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0;
498
499 sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32;
500 .cont8:
501 stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll;
502 faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0;
503
504 fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0;
505
506 fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0;
507 ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll;
508 faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0;
509
510 lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0];
511
512 lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1];
513
514 fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0;
515 lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0];
516
517 add %i1,stridex,%i4 ! px += stridex
518 lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1];
519
520 ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0];
521 add %i4,stridex,%i1 ! px += stridex
522 fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1);
523
524 fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0;
525
526 fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0;
527 sra %o2,11,%i3 ! (7_1) iarr >>= 11;
528 faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1;
529
530 and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc;
531
532 add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr
533 lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px;
534
535 add %i0,stridey,%i3 ! py += stridey
536 ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
537 faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36;
538
539 lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py;
540 add %i3,stridey,%i0 ! py += stridey
541 faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36;
542
543 faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo;
544 and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff;
545
546 cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000
547 stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll;
548 bge,pn %icc,.update9 ! (3_0) if ( hx0 >= 0x7ff00000 )
549 fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0);
550
551 and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff;
552 st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0];
553 fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36;
554
555 sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0;
556 cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000
557 bge,pn %icc,.update10 ! (3_0) if ( hy0 >= 0x7ff00000 )
558 fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36;
559
560 fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres;
561 sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31;
562
563 and %o1,%o3,%o1 ! (3_0) j0 &= diff0;
564
565 fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0;
566 cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000
567 sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0;
568 fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0;
569
570 fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0;
571 and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000;
572 bl,pn %icc,.update11 ! (3_0) if ( hx0 < 0x00100000 )
573 faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0;
574 .cont11:
575 sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0;
576 fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
577 .cont12:
578 sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32;
579 stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll;
580 faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0;
581
582 fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0
583
584 fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0;
585 ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll;
586 faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0;
587
588 lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0];
589 fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0;
590
591 lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1];
592
593 fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0;
594 lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0];
595
596 lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1];
597
598 fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres;
599 ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0];
600 fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1);
601
602 fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0;
603
604 fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0;
605 sra %o2,11,%o4 ! (0_0) iarr >>= 11;
606 faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1;
607
608 and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc;
609
610 add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr
611 mov %i1,%i2
612 lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px;
613 fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1;
614
615 ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
616 faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36;
617
618 lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py;
619 mov %i0,%o0
620 faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36;
621
622 and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff;
623 faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo;
624
625 fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1;
626 cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000
627 stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll;
628 fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0);
629
630 and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff;
631 bge,pn %icc,.update13 ! (4_0) if ( hx0 >= 0x7ff00000 )
632 st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0];
633 fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36;
634
635 sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0;
636 cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000
637 bge,pn %icc,.update14 ! (4_0) if ( hy0 >= 0x7ff00000 )
638 fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36;
639
640 fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres;
641 sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31;
642
643 and %o1,%o3,%o1 ! (4_0) j0 &= diff0;
644
645 fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
646 sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0;
647 cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000
648 fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
649
650 fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
651 and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000;
652 bl,pn %icc,.update15 ! (4_0) if ( hx0 < 0x00100000 )
653 faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
654 .cont15:
655 sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0;
656 fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
657 .cont16:
658 fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres;
659 sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32;
660 stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll;
661 faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0;
662
663 fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0;
664
665 fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0;
666 ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll;
667 faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0;
668
669 lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0];
670 fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0;
671
672 lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1];
673
674 fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0;
675 lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0];
676 fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2;
677
678 lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1];
679 add %i1,stridex,%i4 ! px += stridex
680
681 fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres;
682 ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0];
683 add %i4,stridex,%i1 ! px += stridex
684 fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1);
685
686 fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0;
687
688 fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0;
689 sra %o2,11,%i3 ! (1_0) iarr >>= 11;
690 faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1;
691
692 and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc;
693 fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2;
694
695 add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr
696 lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px;
697 fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1;
698
699 add %i0,stridey,%i3 ! py += stridey
700 ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
701 faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36;
702
703 lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py;
704 add %i3,stridey,%i0 ! py += stridey
705 faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36;
706
707 and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff;
708 faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo;
709
710 fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1;
711 cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000
712 stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll;
713 fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0);
714
715 and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff;
716 st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0];
717 bge,pn %icc,.update17 ! (5_0) if ( hx0 >= 0x7ff00000 )
718 fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36;
719
720 sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0;
721 cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000
722 bge,pn %icc,.update18 ! (5_0) if ( hy0 >= 0x7ff00000 )
723 fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36;
724
725 fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres;
726 sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31;
727
728 and %o1,%o3,%o1 ! (5_0) j0 &= diff0;
729 fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0);
730
731 fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
732 sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0;
733 cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000
734 fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
735
736 fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
737 and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000;
738 bl,pn %icc,.update19 ! (5_0) if ( hx0 < 0x00100000 )
739 faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
740 .cont19a:
741 fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
742 sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0;
743 fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
744 .cont19b:
745 fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres;
746 sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32;
747 stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll;
748 faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0;
749
750 fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0;
751 fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0;
752 .cont20:
753 fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0;
754 ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll;
755 faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0;
756
757 fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0;
758 lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0];
759 fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0;
760
761 lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1];
762
763 fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0;
764 lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0];
765 fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2;
766
767 lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1];
768
769 fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres;
770 ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0];
771 fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1);
772
773 fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0;
774 fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1;
775
776 fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0;
777 sra %o2,11,%o4 ! (2_0) iarr >>= 11;
778 faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1;
779
780 and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc;
781 fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2;
782
783 add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr
784 mov %i1,%i2
785 lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px;
786 fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1;
787
788 fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres;
789 ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
790 faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36;
791
792 lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py;
793 mov %i0,%o0
794 faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36;
795
796 and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff;
797 faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo;
798
799 fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1;
800 cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000
801 stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll;
802 fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0);
803
804 and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff;
805 st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0];
806 bge,pn %icc,.update21 ! (6_0) if ( hx0 >= 0x7ff00000 )
807 fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36;
808
809 sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0;
810 cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000
811 bge,pn %icc,.update22 ! (6_0) if ( hy0 >= 0x7ff00000 )
812 fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36;
813
814 fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres;
815 sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31;
816 faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0;
817
818 and %o1,%o3,%o1 ! (6_0) j0 &= diff0;
819 fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0);
820
821 fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
822 sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0;
823 cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000
824 fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
825
826 fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
827 and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000;
828 bl,pn %icc,.update23 ! (6_0) if ( hx0 < 0x00100000 )
829 faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
830 .cont23a:
831 fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
832 sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0;
833 fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
834 .cont23b:
835 fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0;
836 sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32;
837 stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll;
838 faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0;
839
840 fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0;
841 fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0;
842 .cont24:
843 fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0;
844 ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll;
845 faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0;
846
847 lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0];
848 fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0;
849
850 lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1];
851 fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0;
852
853 fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0;
854 lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0];
855 fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2;
856
857 add %i1,stridex,%i4 ! px += stridex
858 lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1];
859
860 fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres;
861 add %i4,stridex,%i1 ! px += stridex
862 ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0];
863 fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1);
864
865 fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0;
866 fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1;
867
868 fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0;
869 sra %o2,11,%i3 ! (3_0) iarr >>= 11;
870 faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1;
871
872 and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc;
873 fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2;
874
875 fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 );
876 add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr
877 lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px;
878 fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1;
879
880 fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres;
881 add %i0,stridey,%i3 ! py += stridey
882 ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
883 faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36;
884
885 lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py;
886 add %i3,stridey,%i0 ! py += stridey
887 faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36;
888
889 and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff;
890 faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo;
891
892 fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1;
893 cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000
894 stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll;
895 fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0);
896
897 and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff;
898 st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0];
899 bge,pn %icc,.update25 ! (7_0) if ( hx0 >= 0x7ff00000 )
900 fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36;
901
902 sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0;
903 cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000
904 bge,pn %icc,.update26 ! (7_0) if ( hy0 >= 0x7ff00000 )
905 fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36;
906
907 fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres;
908 sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31;
909 faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0;
910
911 and %o1,%o3,%o1 ! (7_0) j0 &= diff0;
912 fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0);
913
914 fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
915 sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0;
916 cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000
917 fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
918
919 fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
920 and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000;
921 bl,pn %icc,.update27 ! (7_0) if ( hx0 < 0x00100000 )
922 faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
923 .cont27a:
924 fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
925 sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0;
926 fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
927 .cont27b:
928 fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0;
929 sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
930 stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
931 faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0;
932
933 fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0;
934 fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0;
935 .cont28:
936 fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0;
937 ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll;
938 faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0;
939
940 fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0;
941 lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0];
942
943 lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1];
944 fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0;
945
946 fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0;
947 lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0];
948 fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2;
949
950 lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1];
951
952 fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres;
953 ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0];
954 fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1);
955
956 fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0;
957 ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll;
958 fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1;
959
960 fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0;
961 sra %o2,11,%o4 ! (4_0) iarr >>= 11;
962 faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1;
963
964 and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc;
965 fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2;
966
967 cmp counter,8
968 bl,pn %icc,.tail
969 nop
970
971 ba .main_loop
972 sub counter,8,counter
973
974 .align 16
975 .main_loop:
976 fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 );
977 add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr
978 lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px;
979 fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1;
980
981 fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres;
982 mov %i1,%i2
983 ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
984 faddd %f10,D2ON36,%f50 ! (6_1) x_hi0 = x0 + D2ON36;
985
986 nop
987 mov %i0,%o0
988 lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py;
989 faddd %f60,D2ON36,%f2 ! (6_1) y_hi0 = y0 + D2ON36;
990
991 faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo;
992 and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff;
993 st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0];
994 fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0;
995
996 fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1;
997 cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000
998 st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0];
999 fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0);
1000
1001 and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff;
1002 st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1];
1003 bge,pn %icc,.update29 ! (0_0) if ( hx0 >= 0x7ff00000 )
1004 fsubd %f50,D2ON36,%f20 ! (6_1) x_hi0 -= D2ON36;
1005
1006 cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000
1007 sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0;
1008 bge,pn %icc,.update30 ! (0_0) if ( hy0 >= 0x7ff00000 )
1009 fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36;
1010
1011 fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres;
1012 sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31;
1013 stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll;
1014 faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0;
1015
1016 and %o1,%o3,%o1 ! (0_0) j0 &= diff0;
1017 cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000
1018 bl,pn %icc,.update31 ! (0_0) if ( hx0 < 0x00100000 )
1019 fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0);
1020 .cont31:
1021 fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0;
1022 sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0;
1023 nop
1024 fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0;
1025
1026 fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0;
1027 add %i5,stridez,%i5 ! pz += stridez
1028 and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000;
1029 faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0;
1030
1031 fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres;
1032 sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0;
1033 nop
1034 fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0;
1035 .cont32:
1036 fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0;
1037 sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32;
1038 stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll;
1039 faddd %f60,%f2,%f50 ! (6_1) dtmp1 = y0 + y_hi0;
1040
1041 fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0;
1042 nop
1043 bn,pn %icc,.exit
1044 fsubd %f60,%f2,%f2 ! (6_1) y_lo0 = y0 - y_hi0;
1045
1046 fmuld %f62,%f28,%f28 ! (6_1) res0_lo *= x_lo0;
1047 nop
1048 ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll;
1049 faddd %f0,%f46,%f30 ! (6_1) res0_hi += dtmp0;
1050
1051 nop
1052 nop
1053 lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0];
1054 fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0;
1055
1056 nop
1057 nop
1058 lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1];
1059 fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0;
1060
1061 fmuld %f50,%f2,%f46 ! (6_1) dtmp1 *= y_lo0;
1062 nop
1063 lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0];
1064 fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2;
1065
1066 add %i1,stridex,%i4 ! px += stridex
1067 nop
1068 lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1];
1069 bn,pn %icc,.exit
1070
1071 fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres;
1072 add %i4,stridex,%i1 ! px += stridex
1073 ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0];
1074 fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1);
1075
1076 fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0;
1077 nop
1078 ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll;
1079 fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1;
1080
1081 fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0;
1082 sra %o2,11,%i3 ! (5_1) iarr >>= 11;
1083 nop
1084 faddd %f28,%f46,%f40 ! (6_1) res0_lo += dtmp1;
1085
1086 and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc;
1087 nop
1088 bn,pn %icc,.exit
1089 fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2;
1090
1091 fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 );
1092 lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px;
1093 add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr
1094 fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1;
1095
1096 fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres;
1097 add %i0,stridey,%i3 ! py += stridey
1098 ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1099 faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36;
1100
1101 nop
1102 add %i3,stridey,%i0 ! py += stridey
1103 lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py;
1104 faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36;
1105
1106 faddd %f30,%f40,%f18 ! (6_1) dres = res0_hi + res0_lo;
1107 and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff;
1108 st %f18,[%fp+ftmp0] ! (6_1) iarr = ((int*)&dres)[0];
1109 fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0;
1110
1111 fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1;
1112 cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000
1113 st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0];
1114 fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0);
1115
1116 and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff;
1117 nop
1118 bge,pn %icc,.update33 ! (1_0) if ( hx0 >= 0x7ff00000 )
1119 fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36;
1120
1121 cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000
1122 sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0;
1123 st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1];
1124 fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36;
1125
1126 fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres;
1127 sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31;
1128 bge,pn %icc,.update34 ! (1_0) if ( hy0 >= 0x7ff00000 )
1129 faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0;
1130
1131 and %o1,%o3,%o1 ! (1_0) j0 &= diff0;
1132 add %i5,stridez,%i5 ! pz += stridez
1133 stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll;
1134 fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0);
1135
1136 fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0;
1137 sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0;
1138 cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000
1139 fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0;
1140
1141 fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0;
1142 and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000;
1143 bl,pn %icc,.update35 ! (1_0) if ( hx0 < 0x00100000 )
1144 faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0;
1145 .cont35a:
1146 fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0;
1147 nop
1148 sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0;
1149 fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0;
1150 .cont35b:
1151 fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres;
1152 sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32;
1153 stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll;
1154 faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0;
1155
1156 fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0;
1157 nop
1158 nop
1159 fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0;
1160 .cont36:
1161 fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0;
1162 nop
1163 ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll;
1164 faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0;
1165
1166 fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0;
1167 nop
1168 lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0];
1169 fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0;
1170
1171 nop
1172 nop
1173 lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1];
1174 bn,pn %icc,.exit
1175
1176 fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0;
1177 nop
1178 lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0];
1179 fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2;
1180
1181 nop
1182 nop
1183 lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1];
1184 bn,pn %icc,.exit
1185
1186 fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres;
1187 nop
1188 ld [%fp+ftmp0],%o2 ! (6_1) iarr = ((int*)&dres)[0];
1189 fand %f18,DA1,%f2 ! (6_1) dexp0 = vis_fand(dres,DA1);
1190
1191 fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0;
1192 nop
1193 ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll;
1194 fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1;
1195
1196 fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0;
1197 sra %o2,11,%g1 ! (6_1) iarr >>= 11;
1198 nop
1199 faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1;
1200
1201 nop
1202 and %g1,0x1fc,%g1 ! (6_1) iarr &= 0x1fc;
1203 bn,pn %icc,.exit
1204 fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2;
1205
1206 fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 );
1207 lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px;
1208 add %g1,TBL,%g1 ! (6_1) (char*)dll1 + iarr
1209 fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1;
1210
1211 fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres;
1212 mov %i1,%i2
1213 ld [%g1],%f28 ! (6_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1214 faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36;
1215
1216 nop
1217 mov %i0,%o0
1218 lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py;
1219 faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36;
1220
1221 faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo;
1222 and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff;
1223 st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0];
1224 fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0;
1225
1226 fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1;
1227 cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000
1228 st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0];
1229 fpsub32 %f28,%f2,%f28 ! (6_1) dd = vis_fpsub32(dtmp0, dexp0);
1230
1231 and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff;
1232 nop
1233 bge,pn %icc,.update37 ! (2_0) if ( hx0 >= 0x7ff00000 )
1234 fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36;
1235
1236 sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0;
1237 cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000
1238 st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1];
1239 fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36;
1240
1241 fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres;
1242 sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31;
1243 bge,pn %icc,.update38 ! (2_0) if ( hy0 >= 0x7ff00000 )
1244 faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0;
1245
1246 and %o1,%o3,%o1 ! (2_0) j0 &= diff0;
1247 add %i5,stridez,%i5 ! pz += stridez
1248 stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll;
1249 fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0);
1250
1251 fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0;
1252 cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000
1253 sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0;
1254 fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0;
1255
1256 fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0;
1257 and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000;
1258 bl,pn %icc,.update39 ! (2_0) if ( hx0 < 0x00100000 )
1259 faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0;
1260 .cont39a:
1261 fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0;
1262 sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0;
1263 nop
1264 fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0;
1265 .cont39b:
1266 fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres;
1267 sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32;
1268 stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll;
1269 faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0;
1270
1271 fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0;
1272 nop
1273 nop
1274 fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0;
1275 .cont40:
1276 fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0;
1277 nop
1278 ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll;
1279 faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0;
1280
1281 fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0;
1282 nop
1283 lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0];
1284 fmuld %f28,%f20,%f54 ! (6_1) dd *= dtmp0;
1285
1286 nop
1287 nop
1288 lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1];
1289 bn,pn %icc,.exit
1290
1291 fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0;
1292 nop
1293 lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0];
1294 fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2;
1295
1296 add %i1,stridex,%i4 ! px += stridex
1297 nop
1298 lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1];
1299 bn,pn %icc,.exit
1300
1301 fmuld %f54,%f18,%f46 ! (6_1) dtmp1 = dd * dres;
1302 add %i4,stridex,%i1 ! px += stridex
1303 ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0];
1304 fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1);
1305
1306 fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0;
1307 nop
1308 ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll;
1309 fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1;
1310
1311 fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0;
1312 sra %o2,11,%i3 ! (7_1) iarr >>= 11;
1313 nop
1314 faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1;
1315
1316 and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc;
1317 nop
1318 bn,pn %icc,.exit
1319 fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2;
1320
1321 fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 );
1322 add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr
1323 lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px;
1324 fsubd DTWO,%f46,%f62 ! (6_1) dtmp1 = DTWO - dtmp1;
1325
1326 fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres;
1327 add %i0,stridey,%i3 ! py += stridey
1328 ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1329 faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36;
1330
1331 nop
1332 add %i3,stridey,%i0 ! py += stridey
1333 lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py;
1334 faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36;
1335
1336 faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo;
1337 and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff;
1338 st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0];
1339 fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0;
1340
1341 fmuld %f54,%f62,%f24 ! (6_1) dd *= dtmp1;
1342 cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000
1343 st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0];
1344 fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0);
1345
1346 and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff;
1347 nop
1348 bge,pn %icc,.update41 ! (3_0) if ( hx0 >= 0x7ff00000 )
1349 fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36;
1350
1351 sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0;
1352 cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000
1353 st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1];
1354 fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36;
1355
1356 fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres;
1357 sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31;
1358 bge,pn %icc,.update42 ! (3_0) if ( hy0 >= 0x7ff00000 )
1359 faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0;
1360
1361 and %o1,%o3,%o1 ! (3_0) j0 &= diff0;
1362 add %i5,stridez,%i5 ! pz += stridez
1363 stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll;
1364 fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0);
1365
1366 fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0;
1367 cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000
1368 sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0;
1369 fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0;
1370
1371 fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0;
1372 and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000;
1373 bl,pn %icc,.update43 ! (3_0) if ( hx0 < 0x00100000 )
1374 faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0;
1375 .cont43a:
1376 fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0;
1377 nop
1378 sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0;
1379 fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
1380 .cont43b:
1381 fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres;
1382 sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32;
1383 stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll;
1384 faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0;
1385
1386 fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0;
1387 nop
1388 nop
1389 fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0
1390 .cont44:
1391 fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0;
1392 nop
1393 ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll;
1394 faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0;
1395
1396 fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0;
1397 nop
1398 lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0];
1399 fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0;
1400
1401 nop
1402 nop
1403 lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1];
1404 bn,pn %icc,.exit
1405
1406 fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0;
1407 nop
1408 lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0];
1409 fsubd DTWO,%f18,%f20 ! (6_1) dtmp2 = DTWO - dtmp2;
1410
1411 nop
1412 nop
1413 lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1];
1414 bn,pn %icc,.exit
1415
1416 fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres;
1417 nop
1418 ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0];
1419 fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1);
1420
1421 fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0;
1422 nop
1423 ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll;
1424 fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1;
1425
1426 fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0;
1427 sra %o2,11,%o4 ! (0_0) iarr >>= 11;
1428 nop
1429 faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1;
1430
1431 and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc;
1432 nop
1433 bn,pn %icc,.exit
1434 fmuld %f24,%f20,%f26 ! (6_1) dres = dd * dtmp2;
1435
1436 fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 );
1437 add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr
1438 lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px;
1439 fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1;
1440
1441 fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1;
1442 mov %i1,%i2
1443 ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1444 faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36;
1445
1446 nop
1447 mov %i0,%o0
1448 lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py;
1449 faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36;
1450
1451 fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0;
1452 nop
1453 and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff;
1454 faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo;
1455
1456 fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1;
1457 cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000
1458 st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0];
1459 fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0);
1460
1461 and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff;
1462 st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0];
1463 bge,pn %icc,.update45 ! (4_0) if ( hx0 >= 0x7ff00000 )
1464 fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36;
1465
1466 sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0;
1467 cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000
1468 bge,pn %icc,.update46 ! (4_0) if ( hy0 >= 0x7ff00000 )
1469 fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36;
1470
1471 fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres;
1472 sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31;
1473 st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
1474 faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0;
1475
1476 and %o1,%o3,%o1 ! (4_0) j0 &= diff0;
1477 cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000
1478 bl,pn %icc,.update47 ! (4_0) if ( hx0 < 0x00100000 )
1479 fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0);
1480 .cont47a:
1481 fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
1482 sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0;
1483 stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll;
1484 fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
1485
1486 fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
1487 and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000;
1488 add %i5,stridez,%i5 ! pz += stridez
1489 faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
1490
1491 fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0;
1492 nop
1493 sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0;
1494 fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
1495 .cont47b:
1496 fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres;
1497 sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32;
1498 stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll;
1499 faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0;
1500
1501 fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0;
1502 nop
1503 nop
1504 fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0;
1505 .cont48:
1506 fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0;
1507 nop
1508 ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll;
1509 faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0;
1510
1511 fsubd DONE,%f10,%f60 ! (6_1) dtmp0 = DONE - dtmp0;
1512 nop
1513 lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0];
1514 fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0;
1515
1516 nop
1517 nop
1518 lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1];
1519 bn,pn %icc,.exit
1520
1521 fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0;
1522 nop
1523 lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0];
1524 fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2;
1525
1526 lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1];
1527 add %i1,stridex,%i4 ! px += stridex
1528 nop
1529 bn,pn %icc,.exit
1530
1531 fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres;
1532 add %i4,stridex,%i1 ! px += stridex
1533 ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0];
1534 fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1);
1535
1536 fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0;
1537 nop
1538 ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll;
1539 fsubd %f60,%f40,%f46 ! (6_1) dtmp0 -= dtmp1;
1540
1541 fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0;
1542 sra %o2,11,%i3 ! (1_0) iarr >>= 11;
1543 nop
1544 faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1;
1545
1546 and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc;
1547 nop
1548 bn,pn %icc,.exit
1549 fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2;
1550
1551 fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 );
1552 add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr
1553 lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px;
1554 fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1;
1555
1556 fmuld %f46,%f26,%f52 ! (6_1) dtmp0 *= dres;
1557 add %i0,stridey,%i3 ! py += stridey
1558 ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1559 faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36;
1560
1561 nop
1562 add %i3,stridey,%i0 ! py += stridey
1563 lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py;
1564 faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36;
1565
1566 fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0;
1567 and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff;
1568 nop
1569 faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo;
1570
1571 fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1;
1572 cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000
1573 st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0];
1574 fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0);
1575
1576 and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff;
1577 st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0];
1578 bge,pn %icc,.update49 ! (5_0) if ( hx0 >= 0x7ff00000 )
1579 fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36;
1580
1581 sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0;
1582 cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000
1583 bge,pn %icc,.update50 ! (5_0) if ( hy0 >= 0x7ff00000 )
1584 fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36;
1585
1586 fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres;
1587 sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31;
1588 st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
1589 faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0;
1590
1591 and %o1,%o3,%o1 ! (5_0) j0 &= diff0;
1592 cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000
1593 bl,pn %icc,.update51 ! (5_0) if ( hx0 < 0x00100000 )
1594 fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0);
1595 .cont51a:
1596 fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
1597 sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0;
1598 stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll;
1599 fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
1600
1601 fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
1602 and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000;
1603 add %i5,stridez,%i5 ! pz += stridez
1604 faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
1605
1606 fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
1607 sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0;
1608 nop
1609 fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
1610 .cont51b:
1611 fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres;
1612 sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32;
1613 stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll;
1614 faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0;
1615
1616 fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0;
1617 nop
1618 nop
1619 fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0;
1620 .cont52:
1621 fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0;
1622 nop
1623 ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll;
1624 faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0;
1625
1626 fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0;
1627 nop
1628 lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0];
1629 fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0;
1630
1631 nop
1632 nop
1633 lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1];
1634 bn,pn %icc,.exit
1635
1636 fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0;
1637 nop
1638 lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0];
1639 fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2;
1640
1641 nop
1642 nop
1643 lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1];
1644 bn,pn %icc,.exit
1645
1646 fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres;
1647 nop
1648 ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0];
1649 fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1);
1650
1651 fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0;
1652 nop
1653 ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll;
1654 fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1;
1655
1656 fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0;
1657 sra %o2,11,%o4 ! (2_0) iarr >>= 11;
1658 nop
1659 faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1;
1660
1661 and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc;
1662 nop
1663 bn,pn %icc,.exit
1664 fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2;
1665
1666 fsqrtd %f52,%f24 ! (6_1) res0 = sqrt ( res0 );
1667 add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr
1668 lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px;
1669 fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1;
1670
1671 fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres;
1672 mov %i1,%i2
1673 ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1674 faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36;
1675
1676 nop
1677 mov %i0,%o0
1678 lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py;
1679 faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36;
1680
1681 fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0;
1682 and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff;
1683 nop
1684 faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo;
1685
1686 fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1;
1687 cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000
1688 st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0];
1689 fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0);
1690
1691 and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff;
1692 st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0];
1693 bge,pn %icc,.update53 ! (6_0) if ( hx0 >= 0x7ff00000 )
1694 fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36;
1695
1696 sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0;
1697 cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000
1698 bge,pn %icc,.update54 ! (6_0) if ( hy0 >= 0x7ff00000 )
1699 fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36;
1700
1701 fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres;
1702 sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31;
1703 st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
1704 faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0;
1705
1706 and %o1,%o3,%o1 ! (6_0) j0 &= diff0;
1707 cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000
1708 bl,pn %icc,.update55 ! (6_0) if ( hx0 < 0x00100000 )
1709 fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0);
1710 .cont55a:
1711 fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
1712 sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0;
1713 stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll;
1714 fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
1715
1716 fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
1717 and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000;
1718 add %i5,stridez,%i5 ! pz += stridez
1719 faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
1720
1721 fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
1722 sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0;
1723 nop
1724 fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
1725 .cont55b:
1726 fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0;
1727 sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32;
1728 stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll;
1729 faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0;
1730
1731 fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0;
1732 nop
1733 nop
1734 fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0;
1735 .cont56:
1736 fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0;
1737 nop
1738 ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll;
1739 faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0;
1740
1741 lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0];
1742 nop
1743 nop
1744 fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0;
1745
1746 lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1];
1747 nop
1748 nop
1749 fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0;
1750
1751 fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0;
1752 nop
1753 lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0];
1754 fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2;
1755
1756 nop
1757 add %i1,stridex,%i4 ! px += stridex
1758 lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1];
1759 bn,pn %icc,.exit
1760
1761 fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres;
1762 add %i4,stridex,%i1 ! px += stridex
1763 ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0];
1764 fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1);
1765
1766 fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0;
1767 nop
1768 ldd [%fp+dtmp14],%f0 ! (6_1) *(long long*)&scl0 = ll;
1769 fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1;
1770
1771 fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0;
1772 sra %o2,11,%i3 ! (3_0) iarr >>= 11;
1773 nop
1774 faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1;
1775
1776 and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc;
1777 nop
1778 bn,pn %icc,.exit
1779 fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2;
1780
1781 fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 );
1782 add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr
1783 lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px;
1784 fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1;
1785
1786 fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres;
1787 add %i0,stridey,%i3 ! py += stridey
1788 ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1789 faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36;
1790
1791 nop
1792 add %i3,stridey,%i0 ! py += stridey
1793 lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py;
1794 faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36;
1795
1796 fmuld %f0,%f24,%f2 ! (6_1) res0 = scl0 * res0;
1797 and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff;
1798 nop
1799 faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo;
1800
1801 fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1;
1802 cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000
1803 st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0];
1804 fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0);
1805
1806 and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff;
1807 st %f2,[%i5] ! (6_1) ((float*)pz)[0] = ((float*)&res0)[0];
1808 bge,pn %icc,.update57 ! (7_0) if ( hx0 >= 0x7ff00000 )
1809 fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36;
1810
1811 sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0;
1812 cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000
1813 bge,pn %icc,.update58 ! (7_0) if ( hy0 >= 0x7ff00000 )
1814 fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36;
1815
1816 fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres;
1817 sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31;
1818 st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1];
1819 faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0;
1820
1821 and %o1,%o3,%o1 ! (7_0) j0 &= diff0;
1822 cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000
1823 bl,pn %icc,.update59 ! (7_0) if ( hx0 < 0x00100000 )
1824 fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0);
1825 .cont59a:
1826 fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
1827 sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0;
1828 stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll;
1829 fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
1830
1831 fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
1832 and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000;
1833 add %i5,stridez,%i5 ! pz += stridez
1834 faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
1835
1836 fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
1837 sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0;
1838 nop
1839 fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
1840 .cont59b:
1841 fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0;
1842 sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
1843 stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
1844 faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0;
1845
1846 fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0;
1847 nop
1848 nop
1849 fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0;
1850 .cont60:
1851 fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0;
1852 nop
1853 ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll;
1854 faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0;
1855
1856 fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0;
1857 nop
1858 lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0];
1859 bn,pn %icc,.exit
1860
1861 lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1];
1862 nop
1863 nop
1864 fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0;
1865
1866 fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0;
1867 nop
1868 lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0];
1869 fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2;
1870
1871 nop
1872 nop
1873 lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1];
1874 bn,pn %icc,.exit
1875
1876 fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres;
1877 nop
1878 ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0];
1879 fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1);
1880
1881 fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0;
1882 nop
1883 ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll;
1884 fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1;
1885
1886 fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0;
1887 sra %o2,11,%o4 ! (4_0) iarr >>= 11;
1888 nop
1889 faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1;
1890
1891 and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc;
1892 subcc counter,8,counter ! counter -= 8;
1893 bpos,pt %icc,.main_loop
1894 fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2;
1895
1896 add counter,8,counter
1897
1898 .tail:
1899 subcc counter,1,counter
1900 bneg .begin
1901 nop
1902
1903 fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 );
1904 add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr
1905 fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1;
1906
1907 fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres;
1908 ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1909
1910 fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0;
1911 st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0];
1912 faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo;
1913
1914 subcc counter,1,counter
1915 st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1];
1916 bneg .begin
1917 add %i5,stridez,%i5 ! pz += stridez
1918
1919 fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1;
1920 st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0];
1921 fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0);
1922
1923 fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres;
1924 faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0;
1925
1926
1927 fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0);
1928
1929 fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres;
1930 fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0;
1931
1932 fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0;
1933
1934 fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0;
1935
1936 fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0;
1937
1938 fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0;
1939
1940 fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2;
1941
1942 fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres;
1943 ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0];
1944 fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1);
1945
1946 ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll;
1947 fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1;
1948
1949 sra %o2,11,%i3 ! (5_1) iarr >>= 11;
1950
1951 and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc;
1952 fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2;
1953
1954 fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 );
1955 add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr
1956 fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1;
1957
1958 fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres;
1959 ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1960
1961 fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0;
1962
1963 fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1;
1964 fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0);
1965
1966 st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0];
1967
1968 fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres;
1969 st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1];
1970 faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0;
1971
1972 subcc counter,1,counter
1973 bneg .begin
1974 add %i5,stridez,%i5 ! pz += stridez
1975
1976 fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0);
1977
1978 fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0;
1979 fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0;
1980
1981 fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres;
1982
1983 fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0;
1984
1985 fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0;
1986 fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0;
1987
1988 fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2;
1989
1990 fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres;
1991
1992 ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll;
1993 fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1;
1994
1995 fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2;
1996
1997 fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 );
1998 fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1;
1999
2000 fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres;
2001
2002 fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0;
2003
2004 fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1;
2005
2006 st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0];
2007
2008 subcc counter,1,counter
2009 st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1];
2010 bneg .begin
2011 add %i5,stridez,%i5 ! pz += stridez
2012
2013 faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0;
2014
2015 fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0);
2016
2017 fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0;
2018
2019 fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres;
2020
2021 fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0;
2022
2023 fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0;
2024
2025 fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2;
2026
2027 ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll;
2028 fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1;
2029
2030 fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2;
2031
2032 fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 );
2033
2034 fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres;
2035
2036 fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0;
2037
2038 st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0];
2039
2040 st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1];
2041 faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0;
2042
2043 subcc counter,1,counter
2044 bneg .begin
2045 add %i5,stridez,%i5 ! pz += stridez
2046
2047 fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0);
2048
2049 fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0;
2050
2051 fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0;
2052
2053 fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0;
2054
2055 ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll;
2056 fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1;
2057
2058 fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 );
2059
2060 fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1;
2061
2062 fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0;
2063 st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0];
2064 st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
2065 faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0;
2066
2067 subcc counter,1,counter
2068 bneg .begin
2069 add %i5,stridez,%i5 ! pz += stridez
2070
2071 ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll;
2072
2073 fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 );
2074
2075 fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0
2076 st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0];
2077 st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
2078
2079 subcc counter,1,counter
2080 bneg .begin
2081 add %i5,stridez,%i5 ! pz += stridez
2082
2083 ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll;
2084
2085 fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0;
2086 st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0];
2087 st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
2088
2089 ba .begin
2090 add %i5,stridez,%i5
2091
2092 .align 16
2093 .spec0:
2094 cmp %o7,_0x7ff00000 ! hx0 ? 0x7ff00000
2095 bne 1f ! if ( hx0 != 0x7ff00000 )
2096 ld [%i4+4],%i2 ! lx = ((int*)px)[1];
2097
2098 cmp %i2,0 ! lx ? 0
2099 be 3f ! if ( lx == 0 )
2100 nop
2101 1:
2102 cmp %l7,_0x7ff00000 ! hy0 ? 0x7ff00000
2103 bne 2f ! if ( hy0 != 0x7ff00000 )
2104 ld [%i3+4],%o2 ! ly = ((int*)py)[1];
2105
2106 cmp %o2,0 ! ly ? 0
2107 be 3f ! if ( ly == 0 )
2108 2:
2109 ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0];
2110 ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1];
2111
2112 ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0];
2113 add %i4,stridex,%i4 ! px += stridex
2114 ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1];
2115
2116 fabsd %f0,%f0
2117
2118 fabsd %f2,%f2
2119
2120 fmuld %f0,%f2,%f0 ! res0 = fabs(x0) * fabs(y0);
2121 add %i3,stridey,%i3 ! py += stridey;
2122 st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0];
2123
2124 st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1];
2125 add %i5,stridez,%i5 ! pz += stridez
2126 ba .begin1
2127 sub counter,1,counter
2128 3:
2129 add %i4,stridex,%i4 ! px += stridex
2130 add %i3,stridey,%i3 ! py += stridey
2131 st %g0,[%i5] ! ((int*)pz)[0] = 0;
2132
2133 add %i5,stridez,%i5 ! pz += stridez;
2134 st %g0,[%i5+4] ! ((int*)pz)[1] = 0;
2135 ba .begin1
2136 sub counter,1,counter
2137
2138 .align 16
2139 .spec1:
2140 and %o1,%o3,%o1 ! (7_0) j0 &= diff0;
2141
2142 cmp %l7,_0x00100000 ! (7_0) hy0 ? 0x00100000
2143 bge,pn %icc,.cont_spec0 ! (7_0) if ( hy0 < 0x00100000 )
2144
2145 ld [%i4+4],%i2 ! lx = ((int*)px)[1];
2146 or %o7,%l7,%g5 ! ii = hx0 | hy0;
2147 fzero %f0
2148
2149 ld [%i3+4],%o2 ! ly = ((int*)py)[1];
2150 or %i2,%g5,%g5 ! ii |= lx;
2151
2152 orcc %o2,%g5,%g5 ! ii |= ly;
2153 bnz,a,pn %icc,1f ! if ( ii != 0 )
2154 sethi %hi(0x00080000),%i2
2155
2156 fdivd DONE,%f0,%f0 ! res0 = 1.0 / 0.0;
2157
2158 st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0];
2159
2160 add %i4,stridex,%i4 ! px += stridex;
2161 add %i3,stridey,%i3 ! py += stridey;
2162 st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1];
2163
2164 add %i5,stridez,%i5 ! pz += stridez;
2165 ba .begin1
2166 sub counter,1,counter
2167 1:
2168 ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0];
2169
2170 ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1];
2171
2172 ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0];
2173
2174 fabsd %f0,%f0 ! x0 = fabs(x0);
2175 ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1];
2176
2177 ldd [TBL+TBL_SHIFT+64],%f12 ! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
2178 add %fp,dtmp2,%i4
2179 add %fp,dtmp3,%i3
2180
2181 fabsd %f2,%f2 ! y0 = fabs(y0);
2182 ldd [TBL+TBL_SHIFT+56],%f10 ! D2ON51
2183
2184 ldx [TBL+TBL_SHIFT+48],%g5 ! D2ONM52
2185 cmp %o7,%i2 ! hx0 ? 0x00080000
2186 bl,a 1f ! if ( hx0 < 0x00080000 )
2187 fxtod %f0,%f0 ! x0 = *(long long*)&x0;
2188
2189 fand %f0,%f12,%f0 ! x0 = vis_fand(x0, dtmp0);
2190 fxtod %f0,%f0 ! x0 = *(long long*)&x0;
2191 faddd %f0,%f10,%f0 ! x0 += D2ON51;
2192 1:
2193 std %f0,[%i4]
2194
2195 ldx [TBL+TBL_SHIFT+40],%g1 ! D2ON1022
2196 cmp %l7,%i2 ! hy0 ? 0x00080000
2197 bl,a 1f ! if ( hy0 < 0x00080000 )
2198 fxtod %f2,%f2 ! y0 = *(long long*)&y0;
2199
2200 fand %f2,%f12,%f2 ! y0 = vis_fand(y0, dtmp0);
2201 fxtod %f2,%f2 ! y0 = *(long long*)&y0;
2202 faddd %f2,%f10,%f2 ! y0 += D2ON51;
2203 1:
2204 std %f2,[%i3]
2205
2206 stx %g5,[%fp+dtmp15] ! D2ONM52
2207
2208 ba .cont_spec1
2209 stx %g1,[%fp+dtmp0] ! D2ON1022
2210
2211 .align 16
2212 .update0:
2213 cmp counter,1
2214 ble 1f
2215 nop
2216
2217 sub counter,1,counter
2218 st counter,[%fp+tmp_counter]
2219
2220 stx %i2,[%fp+tmp_px]
2221
2222 stx %o0,[%fp+tmp_py]
2223
2224 mov 1,counter
2225 1:
2226 sethi %hi(0x3ff00000),%o4
2227 add TBL,TBL_SHIFT+24,%i2
2228 ba .cont1
2229 add TBL,TBL_SHIFT+24,%o0
2230
2231 .align 16
2232 .update1:
2233 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
2234 bge,pn %icc,.cont0 ! (0_0) if ( hy0 < 0x00100000 )
2235
2236 cmp counter,1
2237 ble,a 1f
2238 nop
2239
2240 sub counter,1,counter
2241 st counter,[%fp+tmp_counter]
2242
2243 stx %i2,[%fp+tmp_px]
2244
2245 mov 1,counter
2246 stx %o0,[%fp+tmp_py]
2247 1:
2248 sethi %hi(0x3ff00000),%o4
2249 add TBL,TBL_SHIFT+24,%i2
2250 ba .cont1
2251 add TBL,TBL_SHIFT+24,%o0
2252
2253 .align 16
2254 .update2:
2255 cmp counter,2
2256 ble 1f
2257 nop
2258
2259 sub counter,2,counter
2260 st counter,[%fp+tmp_counter]
2261
2262 stx %i4,[%fp+tmp_px]
2263
2264 stx %i3,[%fp+tmp_py]
2265
2266 mov 2,counter
2267 1:
2268 fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36;
2269
2270 fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0;
2271 fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0;
2272
2273 fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0;
2274 faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0;
2275
2276 sethi %hi(0x3ff00000),%o4
2277 add TBL,TBL_SHIFT+24,%i4
2278 ba .cont4
2279 add TBL,TBL_SHIFT+24,%i3
2280
2281 .align 16
2282 .update3:
2283 cmp counter,2
2284 ble 1f
2285 nop
2286
2287 sub counter,2,counter
2288 st counter,[%fp+tmp_counter]
2289
2290 stx %i4,[%fp+tmp_px]
2291
2292 stx %i3,[%fp+tmp_py]
2293
2294 mov 2,counter
2295 1:
2296 fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0;
2297 fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0;
2298
2299 fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0;
2300 faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0;
2301
2302 sethi %hi(0x3ff00000),%o4
2303 add TBL,TBL_SHIFT+24,%i4
2304 ba .cont4
2305 add TBL,TBL_SHIFT+24,%i3
2306
2307 .align 16
2308 .update4:
2309 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
2310 bge,a,pn %icc,.cont4 ! (0_0) if ( hy0 < 0x00100000 )
2311 sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0;
2312
2313 cmp counter,2
2314 ble,a 1f
2315 nop
2316
2317 sub counter,2,counter
2318 st counter,[%fp+tmp_counter]
2319
2320 stx %i4,[%fp+tmp_px]
2321
2322 mov 2,counter
2323 stx %i3,[%fp+tmp_py]
2324 1:
2325 sethi %hi(0x3ff00000),%o4
2326 add TBL,TBL_SHIFT+24,%i4
2327 ba .cont4
2328 add TBL,TBL_SHIFT+24,%i3
2329
2330 .align 16
2331 .update5:
2332 cmp counter,3
2333 ble 1f
2334 nop
2335
2336 sub counter,3,counter
2337 st counter,[%fp+tmp_counter]
2338
2339 stx %i2,[%fp+tmp_px]
2340
2341 stx %o0,[%fp+tmp_py]
2342
2343 mov 3,counter
2344 1:
2345 st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0];
2346 fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36;
2347
2348 fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36;
2349
2350 fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0;
2351 fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0;
2352
2353 fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0;
2354 faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0;
2355
2356 sethi %hi(0x3ff00000),%g1
2357 add TBL,TBL_SHIFT+24,%i2
2358
2359 sllx %g1,32,%g1
2360 ba .cont8
2361 add TBL,TBL_SHIFT+24,%o0
2362
2363 .align 16
2364 .update6:
2365 cmp counter,3
2366 ble 1f
2367 nop
2368
2369 sub counter,3,counter
2370 st counter,[%fp+tmp_counter]
2371
2372 stx %i2,[%fp+tmp_px]
2373
2374 stx %o0,[%fp+tmp_py]
2375
2376 mov 3,counter
2377 1:
2378 fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0;
2379 fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0;
2380
2381 fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0;
2382 faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0;
2383
2384 sethi %hi(0x3ff00000),%g1
2385 add TBL,TBL_SHIFT+24,%i2
2386
2387 sllx %g1,32,%g1
2388 ba .cont8
2389 add TBL,TBL_SHIFT+24,%o0
2390
2391 .align 16
2392 .update7:
2393 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
2394 bge,pn %icc,.cont7 ! (0_0) if ( hy0 < 0x00100000 )
2395
2396 cmp counter,3
2397 ble,a 1f
2398 nop
2399
2400 sub counter,3,counter
2401 st counter,[%fp+tmp_counter]
2402
2403 stx %i2,[%fp+tmp_px]
2404
2405 mov 3,counter
2406 stx %o0,[%fp+tmp_py]
2407 1:
2408 sethi %hi(0x3ff00000),%g1
2409 add TBL,TBL_SHIFT+24,%i2
2410
2411 sllx %g1,32,%g1
2412 ba .cont8
2413 add TBL,TBL_SHIFT+24,%o0
2414
2415 .align 16
2416 .update9:
2417 cmp counter,4
2418 ble 1f
2419 nop
2420
2421 sub counter,4,counter
2422 st counter,[%fp+tmp_counter]
2423
2424 stx %i4,[%fp+tmp_px]
2425
2426 stx %i3,[%fp+tmp_py]
2427
2428 mov 4,counter
2429 1:
2430 st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0];
2431 fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36;
2432
2433 fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36;
2434
2435 fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres;
2436
2437
2438 fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0;
2439 fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0;
2440
2441 fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0;
2442 faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0;
2443
2444 fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
2445
2446 sethi %hi(0x3ff00000),%g1
2447 add TBL,TBL_SHIFT+24,%i4
2448 ba .cont12
2449 add TBL,TBL_SHIFT+24,%i3
2450
2451 .align 16
2452 .update10:
2453 cmp counter,4
2454 ble 1f
2455 nop
2456
2457 sub counter,4,counter
2458 st counter,[%fp+tmp_counter]
2459
2460 stx %i4,[%fp+tmp_px]
2461
2462 stx %i3,[%fp+tmp_py]
2463
2464 mov 4,counter
2465 1:
2466 fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres;
2467
2468
2469 fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0;
2470 fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0;
2471
2472 fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0;
2473 faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0;
2474
2475 fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
2476
2477 sethi %hi(0x3ff00000),%g1
2478 add TBL,TBL_SHIFT+24,%i4
2479 ba .cont12
2480 add TBL,TBL_SHIFT+24,%i3
2481
2482 .align 16
2483 .update11:
2484 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
2485 bge,pn %icc,.cont11 ! (0_0) if ( hy0 < 0x00100000 )
2486
2487 cmp counter,4
2488 ble,a 1f
2489 nop
2490
2491 sub counter,4,counter
2492 st counter,[%fp+tmp_counter]
2493
2494 stx %i4,[%fp+tmp_px]
2495
2496 mov 4,counter
2497 stx %i3,[%fp+tmp_py]
2498 1:
2499 sethi %hi(0x3ff00000),%g1
2500 add TBL,TBL_SHIFT+24,%i4
2501
2502 fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
2503 ba .cont12
2504 add TBL,TBL_SHIFT+24,%i3
2505
2506 .align 16
2507 .update13:
2508 cmp counter,5
2509 ble 1f
2510 nop
2511
2512 sub counter,5,counter
2513 st counter,[%fp+tmp_counter]
2514
2515 stx %i2,[%fp+tmp_px]
2516
2517 stx %o0,[%fp+tmp_py]
2518
2519 mov 5,counter
2520 1:
2521 fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36;
2522
2523 fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36;
2524
2525 fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres;
2526
2527 fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
2528 fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
2529
2530 fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
2531 faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
2532
2533 fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
2534
2535 sethi %hi(0x3ff00000),%g1
2536 add TBL,TBL_SHIFT+24,%i2
2537 ba .cont16
2538 add TBL,TBL_SHIFT+24,%o0
2539
2540 .align 16
2541 .update14:
2542 cmp counter,5
2543 ble 1f
2544 nop
2545
2546 sub counter,5,counter
2547 st counter,[%fp+tmp_counter]
2548
2549 stx %i2,[%fp+tmp_px]
2550
2551 stx %o0,[%fp+tmp_py]
2552
2553 mov 5,counter
2554 1:
2555 fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres;
2556
2557 fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
2558 fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
2559
2560 fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
2561 faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
2562
2563 fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
2564
2565 sethi %hi(0x3ff00000),%g1
2566 add TBL,TBL_SHIFT+24,%i2
2567 ba .cont16
2568 add TBL,TBL_SHIFT+24,%o0
2569
2570 .align 16
2571 .update15:
2572 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
2573 bge,pn %icc,.cont15 ! (0_0) if ( hy0 < 0x00100000 )
2574
2575 cmp counter,5
2576 ble,a 1f
2577 nop
2578
2579 sub counter,5,counter
2580 st counter,[%fp+tmp_counter]
2581
2582 stx %i2,[%fp+tmp_px]
2583
2584 mov 5,counter
2585 stx %o0,[%fp+tmp_py]
2586 1:
2587 sethi %hi(0x3ff00000),%g1
2588 add TBL,TBL_SHIFT+24,%i2
2589
2590 fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
2591 ba .cont16
2592 add TBL,TBL_SHIFT+24,%o0
2593
2594 .align 16
2595 .update17:
2596 cmp counter,6
2597 ble 1f
2598 nop
2599
2600 sub counter,6,counter
2601 st counter,[%fp+tmp_counter]
2602
2603 stx %i4,[%fp+tmp_px]
2604
2605 stx %i3,[%fp+tmp_py]
2606
2607 mov 6,counter
2608 1:
2609 fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36;
2610
2611 fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres;
2612
2613 fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0);
2614
2615 fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
2616 fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
2617
2618 fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
2619 faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
2620
2621 fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
2622 fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
2623
2624 fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres;
2625 faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0;
2626
2627 fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0;
2628 fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0;
2629
2630 sethi %hi(0x3ff00000),%g1
2631 add TBL,TBL_SHIFT+24,%i4
2632
2633 sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32;
2634 stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll;
2635 ba .cont20
2636 add TBL,TBL_SHIFT+24,%i3
2637
2638 .align 16
2639 .update18:
2640 cmp counter,6
2641 ble 1f
2642 nop
2643
2644 sub counter,6,counter
2645 st counter,[%fp+tmp_counter]
2646
2647 stx %i4,[%fp+tmp_px]
2648
2649 stx %i3,[%fp+tmp_py]
2650
2651 mov 6,counter
2652 1:
2653 fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres;
2654
2655 fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0);
2656
2657 fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
2658 fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
2659
2660 fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
2661 faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
2662
2663 fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
2664 fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
2665
2666 fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres;
2667 faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0;
2668
2669 fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0;
2670 fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0;
2671
2672 sethi %hi(0x3ff00000),%g1
2673 add TBL,TBL_SHIFT+24,%i4
2674
2675 sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32;
2676 stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll;
2677 ba .cont20
2678 add TBL,TBL_SHIFT+24,%i3
2679
2680 .align 16
2681 .update19:
2682 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
2683 bge,pn %icc,.cont19a ! (0_0) if ( hy0 < 0x00100000 )
2684
2685 cmp counter,6
2686 ble,a 1f
2687 nop
2688
2689 sub counter,6,counter
2690 st counter,[%fp+tmp_counter]
2691
2692 stx %i4,[%fp+tmp_px]
2693
2694 mov 6,counter
2695 stx %i3,[%fp+tmp_py]
2696 1:
2697 fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
2698 sethi %hi(0x3ff00000),%g1
2699 add TBL,TBL_SHIFT+24,%i4
2700 fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
2701
2702 ba .cont19b
2703 add TBL,TBL_SHIFT+24,%i3
2704
2705 .align 16
2706 .update21:
2707 cmp counter,7
2708 ble 1f
2709 nop
2710
2711 sub counter,7,counter
2712 st counter,[%fp+tmp_counter]
2713
2714 stx %i2,[%fp+tmp_px]
2715
2716 stx %o0,[%fp+tmp_py]
2717
2718 mov 7,counter
2719 1:
2720 fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36;
2721
2722 fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres;
2723 faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0;
2724
2725 fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0);
2726
2727 fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
2728 fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
2729
2730 fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
2731 faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
2732
2733 fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
2734 fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
2735
2736 fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0;
2737 faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0;
2738
2739 fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0;
2740 sethi %hi(0x3ff00000),%g1
2741 add TBL,TBL_SHIFT+24,%i2
2742 fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0;
2743
2744 sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32;
2745 stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll;
2746 ba .cont24
2747 add TBL,TBL_SHIFT+24,%o0
2748
2749 .align 16
2750 .update22:
2751 cmp counter,7
2752 ble 1f
2753 nop
2754
2755 sub counter,7,counter
2756 st counter,[%fp+tmp_counter]
2757
2758 stx %i2,[%fp+tmp_px]
2759
2760 stx %o0,[%fp+tmp_py]
2761
2762 mov 7,counter
2763 1:
2764 fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres;
2765 faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0;
2766
2767 fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0);
2768
2769 fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
2770 fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
2771
2772 fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
2773 faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
2774
2775 fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
2776 fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
2777
2778 fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0;
2779 faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0;
2780
2781 fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0;
2782 sethi %hi(0x3ff00000),%g1
2783 add TBL,TBL_SHIFT+24,%i2
2784 fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0;
2785
2786 sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32;
2787 stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll;
2788 ba .cont24
2789 add TBL,TBL_SHIFT+24,%o0
2790
2791 .align 16
2792 .update23:
2793 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
2794 bge,pn %icc,.cont23a ! (0_0) if ( hy0 < 0x00100000 )
2795
2796 cmp counter,7
2797 ble,a 1f
2798 nop
2799
2800 sub counter,7,counter
2801 st counter,[%fp+tmp_counter]
2802
2803 stx %i2,[%fp+tmp_px]
2804
2805 mov 7,counter
2806 stx %o0,[%fp+tmp_py]
2807 1:
2808 fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
2809 sethi %hi(0x3ff00000),%g1
2810 add TBL,TBL_SHIFT+24,%i2
2811 fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
2812
2813 ba .cont23b
2814 add TBL,TBL_SHIFT+24,%o0
2815
2816 .align 16
2817 .update25:
2818 cmp counter,8
2819 ble 1f
2820 nop
2821
2822 sub counter,8,counter
2823 st counter,[%fp+tmp_counter]
2824
2825 stx %i4,[%fp+tmp_px]
2826
2827 stx %i3,[%fp+tmp_py]
2828
2829 mov 8,counter
2830 1:
2831 fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36;
2832
2833 fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres;
2834 faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0;
2835
2836 fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0);
2837
2838 fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
2839 fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
2840
2841 fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
2842 faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
2843
2844 fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
2845 fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
2846
2847 fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0;
2848 faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0;
2849
2850 fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0;
2851 sethi %hi(0x3ff00000),%g1
2852 add TBL,TBL_SHIFT+24,%i4
2853 fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0;
2854
2855 sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
2856 stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
2857 ba .cont28
2858 add TBL,TBL_SHIFT+24,%i3
2859
2860 .align 16
2861 .update26:
2862 cmp counter,8
2863 ble 1f
2864 nop
2865
2866 sub counter,8,counter
2867 st counter,[%fp+tmp_counter]
2868
2869 stx %i4,[%fp+tmp_px]
2870
2871 stx %i3,[%fp+tmp_py]
2872
2873 mov 8,counter
2874 1:
2875 fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres;
2876 faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0;
2877
2878 fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0);
2879
2880 fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
2881 fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
2882
2883 fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
2884 faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
2885
2886 fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
2887 fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
2888
2889 fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0;
2890 faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0;
2891
2892 fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0;
2893 sethi %hi(0x3ff00000),%g1
2894 add TBL,TBL_SHIFT+24,%i4
2895 fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0;
2896
2897 sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
2898 stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
2899 ba .cont28
2900 add TBL,TBL_SHIFT+24,%i3
2901
2902 .align 16
2903 .update27:
2904 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
2905 bge,pn %icc,.cont27a ! (0_0) if ( hy0 < 0x00100000 )
2906
2907 cmp counter,8
2908 ble,a 1f
2909 nop
2910
2911 sub counter,8,counter
2912 st counter,[%fp+tmp_counter]
2913
2914 stx %i4,[%fp+tmp_px]
2915
2916 mov 8,counter
2917 stx %i3,[%fp+tmp_py]
2918 1:
2919 fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
2920 sethi %hi(0x3ff00000),%g1
2921 add TBL,TBL_SHIFT+24,%i4
2922 fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
2923
2924 ba .cont27b
2925 add TBL,TBL_SHIFT+24,%i3
2926
2927 .align 16
2928 .update29:
2929 cmp counter,1
2930 ble 1f
2931 nop
2932
2933 sub counter,1,counter
2934 st counter,[%fp+tmp_counter]
2935
2936 stx %i2,[%fp+tmp_px]
2937
2938 stx %o0,[%fp+tmp_py]
2939
2940 mov 1,counter
2941 1:
2942 fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36;
2943
2944 fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres;
2945 stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll;
2946 faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0;
2947
2948 fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0);
2949
2950 fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0;
2951 fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0;
2952
2953 fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0;
2954 add %i5,stridez,%i5 ! pz += stridez
2955 faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0;
2956
2957 fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres;
2958 sethi %hi(0x3ff00000),%o4
2959 add TBL,TBL_SHIFT+24,%i2
2960 fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0;
2961
2962 ba .cont32
2963 add TBL,TBL_SHIFT+24,%o0
2964
2965 .align 16
2966 .update30:
2967 cmp counter,1
2968 ble 1f
2969 nop
2970
2971 sub counter,1,counter
2972 st counter,[%fp+tmp_counter]
2973
2974 stx %i2,[%fp+tmp_px]
2975
2976 stx %o0,[%fp+tmp_py]
2977
2978 mov 1,counter
2979 1:
2980 fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres;
2981 stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll;
2982 faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0;
2983
2984 fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0);
2985
2986 fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0;
2987 fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0;
2988
2989 fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0;
2990 add %i5,stridez,%i5 ! pz += stridez
2991 faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0;
2992
2993 fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres;
2994 sethi %hi(0x3ff00000),%o4
2995 add TBL,TBL_SHIFT+24,%i2
2996 fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0;
2997
2998 ba .cont32
2999 add TBL,TBL_SHIFT+24,%o0
3000
3001 .align 16
3002 .update31:
3003 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
3004 bge,pn %icc,.cont31 ! (0_0) if ( hy0 < 0x00100000 )
3005
3006 cmp counter,1
3007 ble,a 1f
3008 nop
3009
3010 sub counter,1,counter
3011 st counter,[%fp+tmp_counter]
3012
3013 stx %i2,[%fp+tmp_px]
3014
3015 mov 1,counter
3016 stx %o0,[%fp+tmp_py]
3017 1:
3018 fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0;
3019 fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0;
3020
3021 fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0;
3022 add %i5,stridez,%i5 ! pz += stridez
3023 faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0;
3024
3025 fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres;
3026 sethi %hi(0x3ff00000),%o4
3027 add TBL,TBL_SHIFT+24,%i2
3028 fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0;
3029
3030 ba .cont32
3031 add TBL,TBL_SHIFT+24,%o0
3032
3033 .align 16
3034 .update33:
3035 cmp counter,2
3036 ble 1f
3037 nop
3038
3039 sub counter,2,counter
3040 st counter,[%fp+tmp_counter]
3041
3042 stx %i4,[%fp+tmp_px]
3043
3044 stx %i3,[%fp+tmp_py]
3045
3046 mov 2,counter
3047 1:
3048 st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1];
3049 fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36;
3050
3051 fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres;
3052 faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0;
3053
3054 add %i5,stridez,%i5 ! pz += stridez
3055 stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll;
3056 fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0);
3057
3058 fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0;
3059 fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0;
3060
3061 fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0;
3062 faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0;
3063
3064 fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0;
3065 fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0;
3066
3067 fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres;
3068 faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0;
3069
3070 fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0;
3071 sethi %hi(0x3ff00000),%o4
3072 add TBL,TBL_SHIFT+24,%i4
3073 fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0;
3074
3075 sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32;
3076 stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll;
3077 ba .cont36
3078 add TBL,TBL_SHIFT+24,%i3
3079
3080 .align 16
3081 .update34:
3082 cmp counter,2
3083 ble 1f
3084 nop
3085
3086 sub counter,2,counter
3087 st counter,[%fp+tmp_counter]
3088
3089 stx %i4,[%fp+tmp_px]
3090
3091 stx %i3,[%fp+tmp_py]
3092
3093 mov 2,counter
3094 1:
3095 add %i5,stridez,%i5 ! pz += stridez
3096 stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll;
3097 fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0);
3098
3099 fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0;
3100 fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0;
3101
3102 fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0;
3103 faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0;
3104
3105 fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0;
3106 fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0;
3107
3108 fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres;
3109 faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0;
3110
3111 fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0;
3112 sethi %hi(0x3ff00000),%o4
3113 add TBL,TBL_SHIFT+24,%i4
3114 fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0;
3115
3116 sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32;
3117 stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll;
3118 ba .cont36
3119 add TBL,TBL_SHIFT+24,%i3
3120
3121 .align 16
3122 .update35:
3123 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
3124 bge,pn %icc,.cont35a ! (0_0) if ( hy0 < 0x00100000 )
3125
3126 cmp counter,2
3127 ble,a 1f
3128 nop
3129
3130 sub counter,2,counter
3131 st counter,[%fp+tmp_counter]
3132
3133 stx %i4,[%fp+tmp_px]
3134
3135 mov 2,counter
3136 stx %i3,[%fp+tmp_py]
3137 1:
3138 fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0;
3139 sethi %hi(0x3ff00000),%o4
3140 add TBL,TBL_SHIFT+24,%i4
3141 fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0;
3142
3143 ba .cont35b
3144 add TBL,TBL_SHIFT+24,%i3
3145
3146 .align 16
3147 .update37:
3148 cmp counter,3
3149 ble 1f
3150 nop
3151
3152 sub counter,3,counter
3153 st counter,[%fp+tmp_counter]
3154
3155 stx %i2,[%fp+tmp_px]
3156
3157 stx %o0,[%fp+tmp_py]
3158
3159 mov 3,counter
3160 1:
3161 st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1];
3162 fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36;
3163
3164 fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres;
3165 faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0;
3166
3167 add %i5,stridez,%i5 ! pz += stridez
3168 stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll;
3169 fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0);
3170
3171 fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0;
3172 fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0;
3173
3174 fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0;
3175 faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0;
3176
3177 fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0;
3178 fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0;
3179
3180 fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres;
3181 faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0;
3182
3183 fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0;
3184 sethi %hi(0x3ff00000),%g1
3185 add TBL,TBL_SHIFT+24,%i2
3186 fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0;
3187
3188 sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32;
3189 stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll;
3190 ba .cont40
3191 add TBL,TBL_SHIFT+24,%o0
3192
3193 .align 16
3194 .update38:
3195 cmp counter,3
3196 ble 1f
3197 nop
3198
3199 sub counter,3,counter
3200 st counter,[%fp+tmp_counter]
3201
3202 stx %i2,[%fp+tmp_px]
3203
3204 stx %o0,[%fp+tmp_py]
3205
3206 mov 3,counter
3207 1:
3208 add %i5,stridez,%i5 ! pz += stridez
3209 stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll;
3210 fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0);
3211
3212 fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0;
3213 fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0;
3214
3215 fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0;
3216 faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0;
3217
3218 fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0;
3219 fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0;
3220
3221 fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres;
3222 faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0;
3223
3224 fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0;
3225 sethi %hi(0x3ff00000),%g1
3226 add TBL,TBL_SHIFT+24,%i2
3227 fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0;
3228
3229 sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32;
3230 stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll;
3231 ba .cont40
3232 add TBL,TBL_SHIFT+24,%o0
3233
3234 .align 16
3235 .update39:
3236 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
3237 bge,pn %icc,.cont39a ! (0_0) if ( hy0 < 0x00100000 )
3238
3239 cmp counter,3
3240 ble,a 1f
3241 nop
3242
3243 sub counter,3,counter
3244 st counter,[%fp+tmp_counter]
3245
3246 stx %i2,[%fp+tmp_px]
3247
3248 mov 3,counter
3249 stx %o0,[%fp+tmp_py]
3250 1:
3251 fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0;
3252 sethi %hi(0x3ff00000),%g1
3253 add TBL,TBL_SHIFT+24,%i2
3254 fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0;
3255
3256 ba .cont39b
3257 add TBL,TBL_SHIFT+24,%o0
3258
3259 .align 16
3260 .update41:
3261 cmp counter,4
3262 ble 1f
3263 nop
3264
3265 sub counter,4,counter
3266 st counter,[%fp+tmp_counter]
3267
3268 stx %i4,[%fp+tmp_px]
3269
3270 stx %i3,[%fp+tmp_py]
3271
3272 mov 4,counter
3273 1:
3274 st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1];
3275 fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36;
3276
3277 fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres;
3278 faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0;
3279
3280 add %i5,stridez,%i5 ! pz += stridez
3281 stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll;
3282 fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0);
3283
3284 fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0;
3285 fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0;
3286
3287 fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0;
3288 faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0;
3289
3290 fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0;
3291 fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
3292
3293 fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres;
3294 faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0;
3295
3296 fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0;
3297 sethi %hi(0x3ff00000),%g1
3298 add TBL,TBL_SHIFT+24,%i4
3299 fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0
3300
3301 sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32;
3302 stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll;
3303 ba .cont44
3304 add TBL,TBL_SHIFT+24,%i3
3305
3306 .align 16
3307 .update42:
3308 cmp counter,4
3309 ble 1f
3310 nop
3311
3312 sub counter,4,counter
3313 st counter,[%fp+tmp_counter]
3314
3315 stx %i4,[%fp+tmp_px]
3316
3317 stx %i3,[%fp+tmp_py]
3318
3319 mov 4,counter
3320 1:
3321 add %i5,stridez,%i5 ! pz += stridez
3322 stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll;
3323 fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0);
3324
3325 fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0;
3326 fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0;
3327
3328 fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0;
3329 faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0;
3330
3331 fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0;
3332 fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
3333
3334 fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres;
3335 faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0;
3336
3337 fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0;
3338 sethi %hi(0x3ff00000),%g1
3339 add TBL,TBL_SHIFT+24,%i4
3340 fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0
3341
3342 sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32;
3343 stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll;
3344 ba .cont44
3345 add TBL,TBL_SHIFT+24,%i3
3346
3347 .align 16
3348 .update43:
3349 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
3350 bge,pn %icc,.cont43a ! (0_0) if ( hy0 < 0x00100000 )
3351
3352 cmp counter,4
3353 ble,a 1f
3354 nop
3355
3356 sub counter,4,counter
3357 st counter,[%fp+tmp_counter]
3358
3359 stx %i4,[%fp+tmp_px]
3360
3361 mov 4,counter
3362 stx %i3,[%fp+tmp_py]
3363 1:
3364 fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0;
3365 sethi %hi(0x3ff00000),%g1
3366 add TBL,TBL_SHIFT+24,%i4
3367 fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0;
3368
3369 ba .cont43b
3370 add TBL,TBL_SHIFT+24,%i3
3371
3372 .align 16
3373 .update45:
3374 cmp counter,5
3375 ble 1f
3376 nop
3377
3378 sub counter,5,counter
3379 st counter,[%fp+tmp_counter]
3380
3381 stx %i2,[%fp+tmp_px]
3382
3383 stx %o0,[%fp+tmp_py]
3384
3385 mov 5,counter
3386 1:
3387 fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36;
3388
3389 fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres;
3390 st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
3391 faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0;
3392
3393 fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0);
3394
3395 fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
3396 stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll;
3397 fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
3398
3399 fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
3400 add %i5,stridez,%i5 ! pz += stridez
3401 faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
3402
3403 fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0;
3404 fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
3405
3406 fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres;
3407 faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0;
3408
3409 fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0;
3410 sethi %hi(0x3ff00000),%g1
3411 add TBL,TBL_SHIFT+24,%i2
3412 fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0;
3413
3414 sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32;
3415 stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll;
3416 ba .cont48
3417 add TBL,TBL_SHIFT+24,%o0
3418
3419 .align 16
3420 .update46:
3421 cmp counter,5
3422 ble 1f
3423 nop
3424
3425 sub counter,5,counter
3426 st counter,[%fp+tmp_counter]
3427
3428 stx %i2,[%fp+tmp_px]
3429
3430 stx %o0,[%fp+tmp_py]
3431
3432 mov 5,counter
3433 1:
3434 fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres;
3435 st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
3436 faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0;
3437
3438 fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0);
3439
3440 fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
3441 stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll;
3442 fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
3443
3444 fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
3445 add %i5,stridez,%i5 ! pz += stridez
3446 faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
3447
3448 fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0;
3449 fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
3450
3451 fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres;
3452 faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0;
3453
3454 fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0;
3455 sethi %hi(0x3ff00000),%g1
3456 add TBL,TBL_SHIFT+24,%i2
3457 fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0;
3458
3459 sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32;
3460 stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll;
3461 ba .cont48
3462 add TBL,TBL_SHIFT+24,%o0
3463
3464 .align 16
3465 .update47:
3466 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
3467 bge,pn %icc,.cont47a ! (0_0) if ( hy0 < 0x00100000 )
3468
3469 cmp counter,5
3470 ble,a 1f
3471 nop
3472
3473 sub counter,5,counter
3474 st counter,[%fp+tmp_counter]
3475
3476 stx %i2,[%fp+tmp_px]
3477
3478 mov 5,counter
3479 stx %o0,[%fp+tmp_py]
3480 1:
3481 fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0;
3482 stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll;
3483 fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0;
3484
3485 fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0;
3486 add %i5,stridez,%i5 ! pz += stridez
3487 faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0;
3488
3489 fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0;
3490 sethi %hi(0x3ff00000),%g1
3491 add TBL,TBL_SHIFT+24,%i2
3492 fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0;
3493
3494 ba .cont47b
3495 add TBL,TBL_SHIFT+24,%o0
3496
3497 .align 16
3498 .update49:
3499 cmp counter,6
3500 ble 1f
3501 nop
3502
3503 sub counter,6,counter
3504 st counter,[%fp+tmp_counter]
3505
3506 stx %i4,[%fp+tmp_px]
3507
3508 stx %i3,[%fp+tmp_py]
3509
3510 mov 6,counter
3511 1:
3512 fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36;
3513
3514 fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres;
3515 st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
3516 faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0;
3517
3518 fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0);
3519
3520 fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
3521 stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll;
3522 fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
3523
3524 fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
3525 add %i5,stridez,%i5 ! pz += stridez
3526 faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
3527
3528 fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
3529 fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
3530
3531 fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres;
3532 faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0;
3533
3534 fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0;
3535 sethi %hi(0x3ff00000),%g1
3536 add TBL,TBL_SHIFT+24,%i4
3537 fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0;
3538
3539 sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32;
3540 stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll;
3541 ba .cont52
3542 add TBL,TBL_SHIFT+24,%i3
3543
3544 .align 16
3545 .update50:
3546 cmp counter,6
3547 ble 1f
3548 nop
3549
3550 sub counter,6,counter
3551 st counter,[%fp+tmp_counter]
3552
3553 stx %i4,[%fp+tmp_px]
3554
3555 stx %i3,[%fp+tmp_py]
3556
3557 mov 6,counter
3558 1:
3559 fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres;
3560 st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
3561 faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0;
3562
3563 fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0);
3564
3565 fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
3566 stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll;
3567 fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
3568
3569 fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
3570 add %i5,stridez,%i5 ! pz += stridez
3571 faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
3572
3573 fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
3574 fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
3575
3576 fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres;
3577 faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0;
3578
3579 fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0;
3580 sethi %hi(0x3ff00000),%g1
3581 add TBL,TBL_SHIFT+24,%i4
3582 fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0;
3583
3584 sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32;
3585 stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll;
3586 ba .cont52
3587 add TBL,TBL_SHIFT+24,%i3
3588
3589 .align 16
3590 .update51:
3591 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
3592 bge,pn %icc,.cont51a ! (0_0) if ( hy0 < 0x00100000 )
3593
3594 cmp counter,6
3595 ble,a 1f
3596 nop
3597
3598 sub counter,6,counter
3599 st counter,[%fp+tmp_counter]
3600
3601 stx %i4,[%fp+tmp_px]
3602
3603 mov 6,counter
3604 stx %i3,[%fp+tmp_py]
3605 1:
3606 fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0;
3607 stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll;
3608 fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0;
3609
3610 fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0;
3611 add %i5,stridez,%i5 ! pz += stridez
3612 faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0;
3613
3614 fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0;
3615 sethi %hi(0x3ff00000),%g1
3616 add TBL,TBL_SHIFT+24,%i4
3617 fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0;
3618
3619 ba .cont51b
3620 add TBL,TBL_SHIFT+24,%i3
3621
3622 .align 16
3623 .update53:
3624 cmp counter,7
3625 ble 1f
3626 nop
3627
3628 sub counter,7,counter
3629 st counter,[%fp+tmp_counter]
3630
3631 stx %i2,[%fp+tmp_px]
3632
3633 stx %o0,[%fp+tmp_py]
3634
3635 mov 7,counter
3636 1:
3637 fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36;
3638
3639 fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres;
3640 st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
3641 faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0;
3642
3643 fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0);
3644
3645 fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
3646 stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll;
3647 fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
3648
3649 fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
3650 add %i5,stridez,%i5 ! pz += stridez
3651 faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
3652
3653 fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
3654 fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
3655
3656 fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0;
3657 faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0;
3658
3659 fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0;
3660 sethi %hi(0x3ff00000),%g1
3661 add TBL,TBL_SHIFT+24,%i2
3662 fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0;
3663
3664 sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32;
3665 stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll;
3666 ba .cont56
3667 add TBL,TBL_SHIFT+24,%o0
3668
3669 .align 16
3670 .update54:
3671 cmp counter,7
3672 ble 1f
3673 nop
3674
3675 sub counter,7,counter
3676 st counter,[%fp+tmp_counter]
3677
3678 stx %i2,[%fp+tmp_px]
3679
3680 stx %o0,[%fp+tmp_py]
3681
3682 mov 7,counter
3683 1:
3684 fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres;
3685 st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
3686 faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0;
3687
3688 fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0);
3689
3690 fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
3691 stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll;
3692 fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
3693
3694 fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
3695 add %i5,stridez,%i5 ! pz += stridez
3696 faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
3697
3698 fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
3699 fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
3700
3701 fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0;
3702 faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0;
3703
3704 fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0;
3705 sethi %hi(0x3ff00000),%g1
3706 add TBL,TBL_SHIFT+24,%i2
3707 fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0;
3708
3709 sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32;
3710 stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll;
3711 ba .cont56
3712 add TBL,TBL_SHIFT+24,%o0
3713
3714 .align 16
3715 .update55:
3716 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
3717 bge,pn %icc,.cont55a ! (0_0) if ( hy0 < 0x00100000 )
3718
3719 cmp counter,7
3720 ble,a 1f
3721 nop
3722
3723 sub counter,7,counter
3724 st counter,[%fp+tmp_counter]
3725
3726 stx %i2,[%fp+tmp_px]
3727
3728 mov 7,counter
3729 stx %o0,[%fp+tmp_py]
3730 1:
3731 fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0;
3732 stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll;
3733 fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0;
3734
3735 fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0;
3736 add %i5,stridez,%i5 ! pz += stridez
3737 faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0;
3738
3739 fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres;
3740 sethi %hi(0x3ff00000),%g1
3741 add TBL,TBL_SHIFT+24,%i2
3742 fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0;
3743
3744 ba .cont55b
3745 add TBL,TBL_SHIFT+24,%o0
3746
3747 .align 16
3748 .update57:
3749 cmp counter,8
3750 ble 1f
3751 nop
3752
3753 sub counter,8,counter
3754 st counter,[%fp+tmp_counter]
3755
3756 stx %i4,[%fp+tmp_px]
3757
3758 stx %i3,[%fp+tmp_py]
3759
3760 mov 8,counter
3761 1:
3762 fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36;
3763
3764 fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres;
3765 st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1];
3766 faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0;
3767
3768 fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0);
3769
3770 fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
3771 stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll;
3772 fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
3773
3774 fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
3775 add %i5,stridez,%i5 ! pz += stridez
3776 faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
3777
3778 fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
3779 fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
3780
3781 fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0;
3782 faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0;
3783
3784 fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0;
3785 fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0;
3786
3787 sethi %hi(0x3ff00000),%g1
3788 add TBL,TBL_SHIFT+24,%i4
3789
3790 sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
3791 stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
3792 ba .cont60
3793 add TBL,TBL_SHIFT+24,%i3
3794
3795 .align 16
3796 .update58:
3797 cmp counter,8
3798 ble 1f
3799 nop
3800
3801 sub counter,8,counter
3802 st counter,[%fp+tmp_counter]
3803
3804 stx %i4,[%fp+tmp_px]
3805
3806 stx %i3,[%fp+tmp_py]
3807
3808 mov 8,counter
3809 1:
3810 fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres;
3811 st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1];
3812 faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0;
3813
3814 fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0);
3815
3816 fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
3817 stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll;
3818 fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
3819
3820 fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
3821 add %i5,stridez,%i5 ! pz += stridez
3822 faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
3823
3824 fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
3825 fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
3826
3827 fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0;
3828 faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0;
3829
3830 fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0;
3831 fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0;
3832
3833 sethi %hi(0x3ff00000),%g1
3834 add TBL,TBL_SHIFT+24,%i4
3835
3836 sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32;
3837 stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll;
3838 ba .cont60
3839 add TBL,TBL_SHIFT+24,%i3
3840
3841 .align 16
3842 .update59:
3843 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000
3844 bge,pn %icc,.cont59a ! (0_0) if ( hy0 < 0x00100000 )
3845
3846 cmp counter,8
3847 ble,a 1f
3848 nop
3849
3850 sub counter,8,counter
3851 st counter,[%fp+tmp_counter]
3852
3853 stx %i4,[%fp+tmp_px]
3854
3855 mov 8,counter
3856 stx %i3,[%fp+tmp_py]
3857 1:
3858 fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0;
3859 stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll;
3860 fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0;
3861
3862 fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0;
3863 add %i5,stridez,%i5 ! pz += stridez
3864 faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0;
3865
3866 fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres;
3867 sethi %hi(0x3ff00000),%g1
3868 add TBL,TBL_SHIFT+24,%i4
3869 fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0;
3870
3871 ba .cont59b
3872 add TBL,TBL_SHIFT+24,%i3
3873
3874 .align 16
3875 .exit:
3876 ret
3877 restore
3878 SET_SIZE(__vrhypot)
3879