1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
23 */
24 /*
25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
26 * Use is subject to license terms.
27 */
28
29 .file "__vrsqrt.S"
30
31 #include "libm.h"
32
33 RO_DATA
34 .align 64
35
36 .CONST_TBL:
37 .word 0xbfe00000, 0x0000002f ! K1 =-5.00000000000005209867e-01;
38 .word 0x3fd80000, 0x00000058 ! K2 = 3.75000000000004884257e-01;
39 .word 0xbfd3ffff, 0xff444bc8 ! K3 =-3.12499999317136886551e-01;
40 .word 0x3fd17fff, 0xff5006fe ! K4 = 2.73437499359815081532e-01;
41 .word 0xbfcf80bb, 0xb33ef574 ! K5 =-2.46116125605037803130e-01;
42 .word 0x3fcce0af, 0xf8156949 ! K6 = 2.25606914648617522896e-01;
43
44 .word 0x001fffff, 0xffffffff ! DC0
45 .word 0x3fe00000, 0x00000000 ! DC1
46 .word 0x00002000, 0x00000000 ! DC2
47 .word 0x7fffc000, 0x00000000 ! DC3
48 .word 0x0007ffff, 0xffffffff ! DC4
49
50 .word 0x43200000, 0x00000000 ! D2ON51 = pow(2,51)
51 .word 0x3ff00000, 0x00000000 ! DONE = 1.0
52
53 #define stridex %l5
54 #define stridey %l7
55 #define counter %l0
56 #define TBL %l3
57 #define _0x7ff00000 %o0
58 #define _0x00100000 %o1
59
60 #define DC0 %f56
61 #define DC1 %f54
62 #define DC2 %f48
63 #define DC3 %f46
64 #define K6 %f42
65 #define K5 %f20
66 #define K4 %f52
67 #define K3 %f50
68 #define K2 %f14
69 #define K1 %f12
70 #define DONE %f4
71
72 #define tmp_counter %g5
73 #define tmp_px %o5
74
75 #define tmp0 STACK_BIAS-0x40
76 #define tmp1 STACK_BIAS-0x38
77 #define tmp2 STACK_BIAS-0x30
78 #define tmp3 STACK_BIAS-0x28
79 #define tmp4 STACK_BIAS-0x20
80 #define tmp5 STACK_BIAS-0x18
81 #define tmp6 STACK_BIAS-0x10
82 #define tmp7 STACK_BIAS-0x08
83
84 ! sizeof temp storage - must be a multiple of 16 for V9
85 #define tmps 0x40
86
87 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
88 ! !!!!! algorithm !!!!!
89 ! ((float*)&res)[0] = ((float*)px)[0];
90 ! ((float*)&res)[1] = ((float*)px)[1];
91 ! hx = *(int*)px;
92 ! if ( hx >= 0x7ff00000 )
93 ! {
94 ! res = DONE / res;
95 ! ((float*)py)[0] = ((float*)&res)[0];
96 ! ((float*)py)[1] = ((float*)&res)[1];
97 ! px += stridex;
98 ! py += stridey;
99 ! continue;
100 ! }
101 ! if ( hx < 0x00100000 )
102 ! {
103 ! ax = hx & 0x7fffffff;
104 ! lx = ((int*)px)[1];
105 !
106 ! if ( (ax | lx) == 0 )
107 ! {
108 ! res = DONE / res;
109 ! ((float*)py)[0] = ((float*)&res)[0];
110 ! ((float*)py)[1] = ((float*)&res)[1];
111 ! px += stridex;
112 ! py += stridey;
113 ! continue;
114 ! }
115 ! else if ( hx >= 0 )
116 ! {
117 ! if ( hx < 0x00080000 )
118 ! {
119 ! res = *(long long*)&res;
120 ! hx = *(int*)&res - (537 << 21);
121 ! }
122 ! else
123 ! {
124 ! res = vis_fand(res,DC4);
125 ! res = *(long long*)&res;
126 ! res += D2ON51;
127 ! hx = *(int*)&res - (537 << 21);
128 ! }
129 ! }
130 ! else
131 ! {
132 ! res = sqrt(res);
133 ! ((float*)py)[0] = ((float*)&res)[0];
134 ! ((float*)py)[1] = ((float*)&res)[1];
135 ! px += stridex;
136 ! py += stridey;
137 ! continue;
138 ! }
139 ! }
140 !
141 ! iexp = hx >> 21;
142 ! iexp = -iexp;
143 ! iexp += 0x5fe;
144 ! lexp = iexp << 52;
145 ! dlexp = *(double*)&lexp;
146 ! hx >>= 10;
147 ! hx &= 0x7f8;
148 ! hx += 8;
149 ! hx &= -16;
150 !
151 ! res = vis_fand(res,DC0);
152 ! res = vis_for(res,DC1);
153 ! res_c = vis_fpadd32(res,DC2);
154 ! res_c = vis_fand(res_c,DC3);
155 !
156 ! addr = (char*)arr + hx;
157 ! dexp_hi = ((double*)addr)[0];
158 ! dexp_lo = ((double*)addr)[1];
159 ! dtmp0 = dexp_hi * dexp_hi;
160 ! xx = res - res_c;
161 ! xx *= dtmp0;
162 ! res = K6 * xx;
163 ! res += K5;
164 ! res *= xx;
165 ! res += K4;
166 ! res *= xx;
167 ! res += K3;
168 ! res *= xx;
169 ! res += K2;
170 ! res *= xx;
171 ! res += K1;
172 ! res *= xx;
173 ! res = dexp_hi * res;
174 ! res += dexp_lo;
175 ! res += dexp_hi;
176 !
177 ! res *= dlexp;
178 !
179 ! ((float*)py)[0] = ((float*)&res)[0];
180 ! ((float*)py)[1] = ((float*)&res)[1];
181 !
182 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
183
184 ENTRY(__vrsqrt)
185 save %sp,-SA(MINFRAME)-tmps,%sp
186 PIC_SETUP(l7)
187 PIC_SET(l7,.CONST_TBL,o3)
188 PIC_SET(l7,__vlibm_TBL_rsqrt,l3)
189 wr %g0,0x82,%asi
190
191 ldd [%o3],K1
192 sethi %hi(0x7ff00000),%o0
193 mov %i3,%o4
194
195 ldd [%o3+0x08],K2
196 sethi %hi(0x00100000),%o1
197 mov %i1,tmp_px
198
199 ldd [%o3+0x10],K3
200 sll %i2,3,stridex
201 mov %i0,tmp_counter
202
203 ldd [%o3+0x18],K4
204 sll %i4,3,stridey
205
206 ldd [%o3+0x20],K5
207 ldd [%o3+0x28],K6
208 ldd [%o3+0x30],DC0
209 ldd [%o3+0x38],DC1
210 ldd [%o3+0x40],DC2
211 ldd [%o3+0x48],DC3
212
213 .begin:
214 mov tmp_counter,counter
215 mov tmp_px,%i1
216 clr tmp_counter
217 .begin1:
218 cmp counter,0
219 ble,pn %icc,.exit
220 ldd [%o3+0x60],DONE
221
222 lda [%i1]%asi,%f0 ! (6_0) ((float*)res)[0] = ((float*)px)[0];
223 sethi %hi(0x7ffffc00),%i0
224
225 lda [%i1+4]%asi,%f1 ! (6_0) ((float*)res)[1] = ((float*)px)[1];
226 add %i0,1023,%i0
227
228 fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
229
230 lda [%i1]%asi,%g1 ! (6_1) hx = *(int*)px;
231 sethi %hi(0x00080000),%i4
232
233 lda [%i1+4]%asi,%l4
234 add %i1,stridex,%l6 ! px += stridex
235
236 sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
237 lda [%l6]%asi,%f8 ! (0_0) ((float*)res)[0] = ((float*)px)[0];
238 for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
239
240 lda [%l6+4]%asi,%f9 ! (0_0) ((float*)res)[1] = ((float*)px)[1];
241 sra %g1,10,%o2 ! (6_1) hx >>= 10;
242 and %g1,%i0,%i2
243
244 cmp %g1,_0x7ff00000 ! (6_1) hx ? 0x7ff00000
245 bge,pn %icc,.spec0 ! (6_1) if ( hx >= 0x7ff00000 )
246 and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
247
248 cmp %g1,_0x00100000 ! (6_1) hx ? 0x00100000
249 bl,pn %icc,.spec1 ! (6_1) if ( hx < 0x00100000 )
250 sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
251 .cont_spec:
252 fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
253
254 fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
255
256 add %o2,8,%l4 ! (6_1) hx += 8;
257
258 add %o7,1534,%o7 ! (6_1) iexp += 0x5fe;
259
260 lda [%l6]%asi,%g1 ! (0_0) hx = *(int*)px;
261 sllx %o7,52,%o7 ! (6_1) iexp << 52;
262 and %l4,-16,%l4 ! (6_1) hx = -16;
263
264 add %l4,TBL,%l4 ! (6_1) addr = (char*)arr + hx;
265 stx %o7,[%fp+tmp1] ! (6_1) dlexp = *(double*)lexp;
266
267 add %l6,stridex,%l6 ! px += stridex
268 ldd [%l4],%f30 ! (6_1) dtmp0 = ((double*)addr)[0];
269
270 sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
271 lda [%l6]%asi,%f0 ! (1_0) ((float*)res)[0] = ((float*)px)[0];
272 for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
273
274 sra %g1,10,%o2 ! (0_0) hx >>= 10;
275 sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
276 lda [%l6+4]%asi,%f1 ! (1_0) ((float*)res)[1] = ((float*)px)[1];
277
278 cmp %g1,_0x7ff00000 ! (0_0) hx ? 0x7ff00000
279 bge,pn %icc,.update0 ! (0_0) if ( hx >= 0x7ff00000 )
280 fand %f18,DC3,%f6 ! (6_1) res_c = vis_fand(res_c,DC3);
281 .cont0:
282 and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
283 fmuld %f30,%f30,%f10 ! (6_1) dtmp0 = dexp_hi * dexp_hi;
284
285 cmp %g1,_0x00100000 ! (0_0) hx ? 0x00100000
286 bl,pn %icc,.update1 ! (0_0) if ( hx < 0x00100000 )
287 add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
288 .cont1:
289 fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
290
291 fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
292
293 add %o2,8,%l2 ! (0_0) hx += 8;
294 fsubd %f44,%f6,%f6 ! (6_1) xx = res - res_c;
295
296 lda [%l6]%asi,%g1 ! (1_0) hx = *(int*)px;
297 sllx %o7,52,%o7 ! (0_0) iexp << 52;
298 and %l2,-16,%l2 ! (0_0) hx = -16;
299
300 add %l2,TBL,%l2 ! (0_0) addr = (char*)arr + hx;
301 add %l6,stridex,%l6 ! px += stridex
302 stx %o7,[%fp+tmp2] ! (0_0) dlexp = *(double*)lexp;
303
304 fmuld %f6,%f10,%f26 ! (6_1) xx *= dtmp0;
305 ldd [%l2],%f10 ! (0_0) dtmp0 = ((double*)addr)[0];
306
307 sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
308 lda [%l6]%asi,%f6 ! (2_0) ((float*)res)[0] = ((float*)px)[0];
309 for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
310
311 sra %g1,10,%o2 ! (1_0) hx >>= 10;
312 cmp %g1,_0x7ff00000 ! (1_0) hx ? 0x7ff00000
313 bge,pn %icc,.update2 ! (1_0) if ( hx >= 0x7ff00000 )
314 lda [%l6+4]%asi,%f7 ! (2_0) ((float*)res)[1] = ((float*)px)[1];
315 .cont2:
316 fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3);
317
318 fmuld %f10,%f10,%f10 ! (0_0) dtmp0 = dexp_hi * dexp_hi;
319 cmp %g1,_0x00100000 ! (1_0) hx ? 0x00100000
320 bl,pn %icc,.update3 ! (1_0) if ( hx < 0x00100000 )
321 and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
322 .cont3:
323 sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
324 fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
325
326 add %o7,1534,%o7 ! (1_0) iexp += 0x5fe;
327 fpadd32 %f44,DC2,%f18 ! (1_0) res_c = vis_fpadd32(res,DC2);
328
329 fmuld K6,%f26,%f62 ! (6_1) res = K6 * xx;
330 add %o2,8,%i2 ! (1_0) hx += 8;
331 fsubd %f28,%f8,%f32 ! (0_0) xx = res - res_c;
332
333 lda [%l6]%asi,%g1 ! (2_0) hx = *(int*)px;
334 sllx %o7,52,%o7 ! (1_0) iexp << 52;
335 and %i2,-16,%i2 ! (1_0) hx = -16;
336
337 add %i2,TBL,%i2 ! (1_0) addr = (char*)arr + hx;
338 stx %o7,[%fp+tmp3] ! (1_0) dlexp = *(double*)lexp;
339
340 fmuld %f32,%f10,%f32 ! (0_0) xx *= dtmp0;
341 add %l6,stridex,%l6 ! px += stridex
342 ldd [%i2],%f10 ! (1_0) dtmp0 = ((double*)addr)[0];
343 faddd %f62,K5,%f62 ! (6_1) res += K5;
344
345 sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
346 lda [%l6]%asi,%f0 ! (3_0) ((float*)res)[0] = ((float*)px)[0];
347 for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
348
349 sra %g1,10,%o2 ! (2_0) hx >>= 10;
350 cmp %g1,_0x7ff00000 ! (2_0) hx ? 0x7ff00000
351 bge,pn %icc,.update4 ! (2_0) if ( hx >= 0x7ff00000 )
352 lda [%l6+4]%asi,%f1 ! (3_0) ((float*)res)[1] = ((float*)px)[1];
353 .cont4:
354 fmuld %f62,%f26,%f40 ! (6_1) res *= xx;
355 fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3);
356
357 fmuld %f10,%f10,%f10 ! (1_0) dtmp0 = dexp_hi * dexp_hi;
358 cmp %g1,_0x00100000 ! (2_0) hx ? 0x00100000
359 bl,pn %icc,.update5 ! (2_0) if ( hx < 0x00100000 )
360 and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
361 .cont5:
362 sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
363 fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
364
365 add %o7,1534,%o7 ! (2_0) iexp += 0x5fe;
366 fpadd32 %f28,DC2,%f18 ! (2_0) res_c = vis_fpadd32(res,DC2);
367
368 fmuld K6,%f32,%f62 ! (0_0) res = K6 * xx;
369 add %o2,8,%i4 ! (2_0) hx += 8;
370 fsubd %f44,%f8,%f6 ! (1_0) xx = res - res_c;
371
372 faddd %f40,K4,%f40 ! (6_1) res += K4;
373
374 lda [%l6]%asi,%g1 ! (3_0) hx = *(int*)px;
375 sllx %o7,52,%o7 ! (2_0) iexp << 52;
376 and %i4,-16,%i4 ! (2_0) hx = -16;
377
378 add %i4,TBL,%i4 ! (2_0) addr = (char*)arr + hx;
379 stx %o7,[%fp+tmp4] ! (2_0) dlexp = *(double*)lexp;
380
381 fmuld %f6,%f10,%f38 ! (1_0) xx *= dtmp0;
382 ldd [%i4],%f24 ! (2_0) dtmp0 = ((double*)addr)[0];
383 faddd %f62,K5,%f62 ! (0_0) res += K5;
384
385 fmuld %f40,%f26,%f34 ! (6_1) res *= xx;
386 add %l6,stridex,%l6 ! px += stridex
387
388 sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
389 lda [%l6]%asi,%f8 ! (4_0) ((float*)res)[0] = ((float*)px)[0];
390 for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
391
392 sra %g1,10,%o2 ! (3_0) hx >>= 10;
393 cmp %g1,_0x7ff00000 ! (3_0) hx ? 0x7ff00000
394 bge,pn %icc,.update6 ! (3_0) if ( hx >= 0x7ff00000 )
395 lda [%l6+4]%asi,%f9 ! (4_0) ((float*)res)[1] = ((float*)px)[1];
396 .cont6:
397 fmuld %f62,%f32,%f60 ! (0_0) res *= xx;
398 cmp %g1,_0x00100000 ! (3_0) hx ? 0x00100000
399 fand %f18,DC3,%f22 ! (2_0) res_c = vis_fand(res_c,DC3);
400
401 fmuld %f24,%f24,%f24 ! (2_0) dtmp0 = dexp_hi * dexp_hi;
402 bl,pn %icc,.update7 ! (3_0) if ( hx < 0x00100000 )
403 and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
404 faddd %f34,K3,%f6 ! (6_1) res += K3;
405 .cont7:
406 sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
407 fand %f8,DC0,%f16 ! (4_0) res = vis_fand(res,DC0);
408
409 add %o7,1534,%o7 ! (3_0) iexp += 0x5fe;
410 fpadd32 %f44,DC2,%f18 ! (3_0) res_c = vis_fpadd32(res,DC2);
411
412 fmuld K6,%f38,%f62 ! (1_0) res = K6 * xx;
413 add %o2,8,%i5 ! (3_0) hx += 8;
414 fsubd %f28,%f22,%f28 ! (2_0) xx = res - res_c;
415
416 fmuld %f6,%f26,%f22 ! (6_1) res *= xx;
417 faddd %f60,K4,%f60 ! (0_0) res += K4;
418
419 lda [%l6]%asi,%g1 ! (4_0) hx = *(int*)px;
420 sllx %o7,52,%o7 ! (3_0) iexp << 52;
421 and %i5,-16,%i5 ! (3_0) hx = -16;
422
423 add %i5,TBL,%i5 ! (3_0) addr = (char*)arr + hx;
424 stx %o7,[%fp+tmp5] ! (3_0) dlexp = *(double*)lexp;
425
426 fmuld %f28,%f24,%f36 ! (2_0) xx *= dtmp0;
427 add %l6,stridex,%i0 ! px += stridex
428 ldd [%i5],%f28 ! (3_0) dtmp0 = ((double*)addr)[0];
429 faddd %f62,K5,%f62 ! (1_0) res += K5;
430
431 faddd %f22,K2,%f10 ! (6_1) res += K2;
432 fmuld %f60,%f32,%f34 ! (0_0) res *= xx;
433
434 sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
435 lda [%i0]%asi,%f0 ! (5_0) ((float*)res)[0] = ((float*)px)[0];
436 for %f16,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
437
438 sra %g1,10,%o2 ! (4_0) hx >>= 10;
439 cmp %g1,_0x7ff00000 ! (4_0) hx ? 0x7ff00000
440 bge,pn %icc,.update8 ! (4_0) if ( hx >= 0x7ff00000 )
441 lda [%i0+4]%asi,%f1 ! (5_0) ((float*)res)[1] = ((float*)px)[1];
442 .cont8:
443 fand %f18,DC3,%f40 ! (3_0) res_c = vis_fand(res_c,DC3);
444 fmuld %f62,%f38,%f62 ! (1_0) res *= xx;
445
446 fmuld %f10,%f26,%f58 ! (6_1) res *= xx;
447 cmp %g1,_0x00100000 ! (4_0) hx ? 0x00100000
448 and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
449 faddd %f34,K3,%f60 ! (0_0) res += K3;
450
451 fmuld %f28,%f28,%f28 ! (3_0) dtmp0 = dexp_hi * dexp_hi;
452 bl,pn %icc,.update9 ! (4_0) if ( hx < 0x00100000 )
453 sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
454 fand %f0,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
455 .cont9:
456 add %o7,1534,%o7 ! (4_0) iexp += 0x5fe;
457 fpadd32 %f24,DC2,%f18 ! (4_0) res_c = vis_fpadd32(res,DC2);
458
459 fmuld K6,%f36,%f10 ! (2_0) res = K6 * xx;
460 add %o2,8,%l1 ! (4_0) hx += 8;
461 fsubd %f44,%f40,%f44 ! (3_0) xx = res - res_c;
462
463 fmuld %f60,%f32,%f60 ! (0_0) res *= xx;
464 faddd %f62,K4,%f6 ! (1_0) res += K4;
465
466 lda [%i0]%asi,%g1 ! (5_0) hx = *(int*)px;
467 sllx %o7,52,%o7 ! (4_0) iexp << 52;
468 and %l1,-16,%l1 ! (4_0) hx = -16;
469 faddd %f58,K1,%f58 ! (6_1) res += K1;
470
471 add %i0,stridex,%i1 ! px += stridex
472 add %l1,TBL,%l1 ! (4_0) addr = (char*)arr + hx;
473 stx %o7,[%fp+tmp6] ! (4_0) dlexp = *(double*)lexp;
474
475 fmuld %f44,%f28,%f40 ! (3_0) xx *= dtmp0;
476 ldd [%l1],%f44 ! (4_0) dtmp0 = ((double*)addr)[0];
477 faddd %f10,K5,%f62 ! (2_0) res += K5;
478
479 fmuld %f6,%f38,%f34 ! (1_0) res *= xx;
480 sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
481 nop
482 faddd %f60,K2,%f60 ! (0_0) res += K2;
483
484 for %f16,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
485 sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
486 lda [%i1]%asi,%f6 ! (6_0) ((float*)res)[0] = ((float*)px)[0];
487 fmuld %f58,%f26,%f26 ! (6_1) res *= xx;
488
489 sra %g1,10,%o2 ! (5_0) hx >>= 10;
490 cmp %g1,_0x7ff00000 ! (5_0) hx ? 0x7ff00000
491 bge,pn %icc,.update10 ! (5_0) if ( hx >= 0x7ff00000 )
492 lda [%i1+4]%asi,%f7 ! (6_0) ((float*)res)[1] = ((float*)px)[1];
493 .cont10:
494 fand %f18,DC3,%f8 ! (4_0) res_c = vis_fand(res_c,DC3);
495 fmuld %f62,%f36,%f62 ! (2_0) res *= xx;
496
497 fmuld %f60,%f32,%f58 ! (0_0) res *= xx;
498 cmp %g1,_0x00100000 ! (5_0) hx ? 0x00100000
499 and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
500 faddd %f34,K3,%f34 ! (1_0) res += K3;
501
502 fmuld %f30,%f26,%f26 ! (6_1) res = dexp_hi * res;
503 bl,pn %icc,.update11 ! (5_0) if ( hx < 0x00100000 )
504 nop
505 fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
506 .cont11:
507 ldd [%l4+8],%f60 ! (6_1) dexp_lo = ((double*)addr)[1];
508 fmuld %f44,%f44,%f44 ! (4_0) dtmp0 = dexp_hi * dexp_hi;
509 fpadd32 %f28,DC2,%f18 ! (5_0) res_c = vis_fpadd32(res,DC2);
510
511 fmuld K6,%f40,%f22 ! (3_0) res = K6 * xx;
512 add %o2,8,%i3 ! (5_0) hx += 8;
513 fsubd %f24,%f8,%f10 ! (4_0) xx = res - res_c;
514
515 fmuld %f34,%f38,%f24 ! (1_0) res *= xx;
516 or %g0,%o4,%i0
517
518 cmp counter,7
519 bl,pn %icc,.tail
520 faddd %f62,K4,%f34 ! (2_0) res += K4;
521
522 ba .main_loop
523 sub counter,7,counter ! counter
524
525 .align 16
526 .main_loop:
527 add %o7,1534,%o7 ! (5_0) iexp += 0x5fe;
528 and %i3,-16,%i3 ! (5_1) hx = -16;
529 lda [%i1]%asi,%g1 ! (6_1) hx = *(int*)px;
530 faddd %f58,K1,%f58 ! (0_1) res += K1;
531
532 add %i3,TBL,%i3 ! (5_1) addr = (char*)arr + hx;
533 sllx %o7,52,%o7 ! (5_1) iexp << 52;
534 stx %o7,[%fp+tmp0] ! (5_1) dlexp = *(double*)lexp;
535 faddd %f26,%f60,%f8 ! (6_2) res += dexp_lo;
536
537 faddd %f22,K5,%f62 ! (3_1) res += K5;
538 add %i1,stridex,%l6 ! px += stridex
539 ldd [%i3],%f22 ! (5_1) dtmp0 = ((double*)addr)[0];
540 fmuld %f10,%f44,%f60 ! (4_1) xx *= dtmp0;
541
542 faddd %f24,K2,%f26 ! (1_1) res += K2;
543 add %i0,stridey,%i1 ! px += stridey
544 ldd [%l2],%f24 ! (0_1) dexp_hi = ((double*)addr)[0];
545 fmuld %f34,%f36,%f34 ! (2_1) res *= xx;
546
547 fmuld %f58,%f32,%f58 ! (0_1) res *= xx;
548 sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
549 lda [%l6]%asi,%f0 ! (0_0) ((float*)res)[0] = ((float*)px)[0];
550 for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
551
552 lda [%l6+4]%asi,%f1 ! (0_0) ((float*)res)[1] = ((float*)px)[1];
553 sra %g1,10,%o2 ! (6_1) hx >>= 10;
554 fmuld %f22,%f22,%f10 ! (5_1) dtmp0 = dexp_hi * dexp_hi;
555 faddd %f8,%f30,%f30 ! (6_2) res += dexp_hi;
556
557 fmuld %f62,%f40,%f32 ! (3_1) res *= xx;
558 cmp %g1,_0x7ff00000 ! (6_1) hx ? 0x7ff00000
559 ldd [%fp+tmp1],%f62 ! (6_2) dlexp = *(double*)lexp;
560 fand %f18,DC3,%f8 ! (5_1) res_c = vis_fand(res_c,DC3);
561
562 fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
563 bge,pn %icc,.update12 ! (6_1) if ( hx >= 0x7ff00000 )
564 and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
565 faddd %f34,K3,%f34 ! (2_1) res += K3;
566 .cont12:
567 fmuld %f24,%f58,%f58 ! (0_1) res = dexp_hi * res;
568 cmp %g1,_0x00100000 ! (6_1) hx ? 0x00100000
569 sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
570 fand %f0,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
571
572 fmuld %f30,%f62,%f2 ! (6_2) res *= dlexp;
573 bl,pn %icc,.update13 ! (6_1) if ( hx < 0x00100000 )
574 ldd [%l2+8],%f30 ! (0_1) dexp_lo = ((double*)addr)[1];
575 fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
576 .cont13:
577 fmuld K6,%f60,%f62 ! (4_1) res = K6 * xx;
578 add %o2,8,%l4 ! (6_1) hx += 8;
579 st %f2,[%i0] ! (6_2) ((float*)py)[0] = ((float*)res)[0];
580 fsubd %f28,%f8,%f6 ! (5_1) xx = res - res_c;
581
582 fmuld %f34,%f36,%f28 ! (2_1) res *= xx;
583 add %o7,1534,%o7 ! (6_1) iexp += 0x5fe;
584 st %f3,[%i0+4] ! (6_2) ((float*)py)[1] = ((float*)res)[1];
585 faddd %f32,K4,%f32 ! (3_1) res += K4;
586
587 lda [%l6]%asi,%g1 ! (0_0) hx = *(int*)px;
588 sllx %o7,52,%o7 ! (6_1) iexp << 52;
589 and %l4,-16,%l4 ! (6_1) hx = -16;
590 faddd %f26,K1,%f26 ! (1_1) res += K1;
591
592 add %i1,stridey,%i0 ! px += stridey
593 add %l4,TBL,%l4 ! (6_1) addr = (char*)arr + hx;
594 stx %o7,[%fp+tmp1] ! (6_1) dlexp = *(double*)lexp;
595 faddd %f58,%f30,%f8 ! (0_1) res += dexp_lo;
596
597 fmuld %f6,%f10,%f58 ! (5_1) xx *= dtmp0;
598 add %l6,stridex,%l6 ! px += stridex
599 ldd [%l4],%f30 ! (6_1) dtmp0 = ((double*)addr)[0];
600 faddd %f62,K5,%f62 ! (4_1) res += K5;
601
602 fmuld %f32,%f40,%f34 ! (3_1) res *= xx;
603 sra %g1,10,%o2 ! (0_0) hx >>= 10;
604 ldd [%i2],%f4 ! (1_1) dexp_hi = ((double*)addr)[0];
605 faddd %f28,K2,%f32 ! (2_1) res += K2;
606
607 fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
608 sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
609 lda [%l6]%asi,%f6 ! (1_0) ((float*)res)[0] = ((float*)px)[0];
610 for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
611
612 fmuld %f30,%f30,%f30 ! (6_1) dtmp0 = dexp_hi * dexp_hi;
613 sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
614 lda [%l6+4]%asi,%f7 ! (1_0) ((float*)res)[1] = ((float*)px)[1];
615 faddd %f8,%f24,%f24 ! (0_1) res += dexp_hi;
616
617 fmuld %f62,%f60,%f38 ! (4_1) res *= xx;
618 cmp %g1,_0x7ff00000 ! (0_0) hx ? 0x7ff00000
619 ldd [%fp+tmp2],%f62 ! (0_1) dlexp = *(double*)lexp;
620 fand %f18,DC3,%f8 ! (6_1) res_c = vis_fand(res_c,DC3);
621
622 fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
623 bge,pn %icc,.update14 ! (0_0) if ( hx >= 0x7ff00000 )
624 and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
625 faddd %f34,K3,%f34 ! (3_1) res += K3;
626 .cont14:
627 fmuld %f4,%f26,%f26 ! (1_1) res = dexp_hi * res;
628 cmp %g1,_0x00100000 ! (0_0) hx ? 0x00100000
629 add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
630 fand %f6,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
631
632 fmuld %f24,%f62,%f2 ! (0_1) res *= dlexp;
633 bl,pn %icc,.update15 ! (0_0) if ( hx < 0x00100000 )
634 ldd [%i2+8],%f24 ! (1_1) dexp_lo = ((double*)addr)[1];
635 fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
636 .cont15:
637 fmuld K6,%f58,%f62 ! (5_1) res = K6 * xx;
638 add %o2,8,%l2 ! (0_0) hx += 8;
639 st %f2,[%i1] ! (0_1) ((float*)py)[0] = ((float*)res)[0];
640 fsubd %f44,%f8,%f10 ! (6_1) xx = res - res_c;
641
642 fmuld %f34,%f40,%f44 ! (3_1) res *= xx;
643 nop
644 st %f3,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)res)[1];
645 faddd %f38,K4,%f38 ! (4_1) res += K4;
646
647 lda [%l6]%asi,%g1 ! (1_0) hx = *(int*)px;
648 sllx %o7,52,%o7 ! (0_0) iexp << 52;
649 and %l2,-16,%l2 ! (0_0) hx = -16;
650 faddd %f32,K1,%f32 ! (2_1) res += K1;
651
652 add %l2,TBL,%l2 ! (0_0) addr = (char*)arr + hx;
653 add %l6,stridex,%l6 ! px += stridex
654 stx %o7,[%fp+tmp2] ! (0_0) dlexp = *(double*)lexp;
655 faddd %f26,%f24,%f8 ! (1_1) res += dexp_lo;
656
657 fmuld %f10,%f30,%f26 ! (6_1) xx *= dtmp0;
658 add %i0,stridey,%i1 ! px += stridey
659 ldd [%l2],%f30 ! (0_0) dtmp0 = ((double*)addr)[0];
660 faddd %f62,K5,%f62 ! (5_1) res += K5;
661
662 fmuld %f38,%f60,%f34 ! (4_1) res *= xx;
663 sra %g1,10,%o2 ! (1_0) hx >>= 10;
664 ldd [%i4],%f24 ! (2_1) dexp_hi = ((double*)addr)[0];
665 faddd %f44,K2,%f38 ! (3_1) res += K2;
666
667 fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
668 sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
669 lda [%l6]%asi,%f0 ! (2_0) ((float*)res)[0] = ((float*)px)[0];
670 for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
671
672 fmuld %f30,%f30,%f30 ! (0_0) dtmp0 = dexp_hi * dexp_hi;
673 cmp %g1,_0x7ff00000 ! (1_0) hx ? 0x7ff00000
674 lda [%l6+4]%asi,%f1 ! (2_0) ((float*)res)[1] = ((float*)px)[1];
675 faddd %f8,%f4,%f4 ! (1_1) res += dexp_hi;
676
677 fmuld %f62,%f58,%f36 ! (5_1) res *= xx;
678 bge,pn %icc,.update16 ! (1_0) if ( hx >= 0x7ff00000 )
679 ldd [%fp+tmp3],%f62 ! (1_1) dlexp = *(double*)lexp;
680 fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3);
681 .cont16:
682 fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
683 cmp %g1,_0x00100000 ! (1_0) hx ? 0x00100000
684 and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
685 faddd %f34,K3,%f34 ! (4_1) res += K3;
686
687 fmuld %f24,%f32,%f32 ! (2_1) res = dexp_hi * res;
688 bl,pn %icc,.update17 ! (1_0) if ( hx < 0x00100000 )
689 sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
690 fand %f0,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
691 .cont17:
692 fmuld %f4,%f62,%f2 ! (1_1) res *= dlexp;
693 add %o7,1534,%o7 ! (1_0) iexp += 0x5fe;
694 ldd [%i4+8],%f4 ! (2_1) dexp_lo = ((double*)addr)[1];
695 fpadd32 %f44,DC2,%f18 ! (1_0) res_c = vis_fpadd32(res,DC2);
696
697 fmuld K6,%f26,%f62 ! (6_1) res = K6 * xx;
698 add %o2,8,%i2 ! (1_0) hx += 8;
699 st %f2,[%i0] ! (1_1) ((float*)py)[0] = ((float*)res)[0];
700 fsubd %f28,%f8,%f6 ! (0_0) xx = res - res_c;
701
702 fmuld %f34,%f60,%f28 ! (4_1) res *= xx;
703 nop
704 st %f3,[%i0+4] ! (1_1) ((float*)py)[1] = ((float*)res)[1];
705 faddd %f36,K4,%f36 ! (5_1) res += K4;
706
707 lda [%l6]%asi,%g1 ! (2_0) hx = *(int*)px;
708 sllx %o7,52,%o7 ! (1_0) iexp << 52;
709 and %i2,-16,%i2 ! (1_0) hx = -16;
710 faddd %f38,K1,%f38 ! (3_1) res += K1;
711
712 add %i1,stridey,%i0 ! px += stridey
713 add %i2,TBL,%i2 ! (1_0) addr = (char*)arr + hx;
714 stx %o7,[%fp+tmp3] ! (1_0) dlexp = *(double*)lexp;
715 faddd %f32,%f4,%f8 ! (2_1) res += dexp_lo;
716
717 fmuld %f6,%f30,%f32 ! (0_0) xx *= dtmp0;
718 add %l6,stridex,%l6 ! px += stridex
719 ldd [%i2],%f30 ! (1_0) dtmp0 = ((double*)addr)[0];
720 faddd %f62,K5,%f62 ! (6_1) res += K5;
721
722 fmuld %f36,%f58,%f34 ! (5_1) res *= xx;
723 sra %g1,10,%o2 ! (2_0) hx >>= 10;
724 ldd [%i5],%f4 ! (3_1) dexp_hi = ((double*)addr)[0];
725 faddd %f28,K2,%f36 ! (4_1) res += K2;
726
727 fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
728 sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
729 lda [%l6]%asi,%f6 ! (3_0) ((float*)res)[0] = ((float*)px)[0];
730 for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
731
732 fmuld %f30,%f30,%f30 ! (1_0) dtmp0 = dexp_hi * dexp_hi;
733 cmp %g1,_0x7ff00000 ! (2_0) hx ? 0x7ff00000
734 lda [%l6+4]%asi,%f7 ! (3_0) ((float*)res)[1] = ((float*)px)[1];
735 faddd %f8,%f24,%f24 ! (2_1) res += dexp_hi;
736
737 fmuld %f62,%f26,%f40 ! (6_1) res *= xx;
738 bge,pn %icc,.update18 ! (2_0) if ( hx >= 0x7ff00000 )
739 ldd [%fp+tmp4],%f62 ! (2_1) dlexp = *(double*)lexp;
740 fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3);
741 .cont18:
742 fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
743 cmp %g1,_0x00100000 ! (2_0) hx ? 0x00100000
744 and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
745 faddd %f34,K3,%f34 ! (5_1) res += K3;
746
747 fmuld %f4,%f38,%f38 ! (3_1) res = dexp_hi * res;
748 bl,pn %icc,.update19 ! (2_0) if ( hx < 0x00100000 )
749 sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
750 fand %f6,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
751 .cont19:
752 fmuld %f24,%f62,%f2 ! (2_1) res *= dlexp;
753 add %o7,1534,%o7 ! (2_0) iexp += 0x5fe;
754 ldd [%i5+8],%f24 ! (3_1) dexp_lo = ((double*)addr)[1];
755 fpadd32 %f28,DC2,%f18 ! (2_0) res_c = vis_fpadd32(res,DC2);
756
757 fmuld K6,%f32,%f62 ! (0_0) res = K6 * xx;
758 add %o2,8,%i4 ! (2_0) hx += 8;
759 st %f2,[%i1] ! (2_1) ((float*)py)[0] = ((float*)res)[0];
760 fsubd %f44,%f8,%f10 ! (1_0) xx = res - res_c;
761
762 fmuld %f34,%f58,%f44 ! (5_1) res *= xx;
763 nop
764 st %f3,[%i1+4] ! (2_1) ((float*)py)[1] = ((float*)res)[1];
765 faddd %f40,K4,%f40 ! (6_1) res += K4;
766
767 lda [%l6]%asi,%g1 ! (3_0) hx = *(int*)px;
768 sllx %o7,52,%o7 ! (2_0) iexp << 52;
769 and %i4,-16,%i4 ! (2_0) hx = -16;
770 faddd %f36,K1,%f36 ! (4_1) res += K1;
771
772 add %l6,stridex,%l6 ! px += stridex
773 add %i4,TBL,%i4 ! (2_0) addr = (char*)arr + hx;
774 stx %o7,[%fp+tmp4] ! (2_0) dlexp = *(double*)lexp;
775 faddd %f38,%f24,%f8 ! (3_1) res += dexp_lo;
776
777 fmuld %f10,%f30,%f38 ! (1_0) xx *= dtmp0;
778 add %i0,stridey,%i1 ! px += stridey
779 ldd [%i4],%f24 ! (2_0) dtmp0 = ((double*)addr)[0];
780 faddd %f62,K5,%f62 ! (0_0) res += K5;
781
782 fmuld %f40,%f26,%f34 ! (6_1) res *= xx;
783 sra %g1,10,%o2 ! (3_0) hx >>= 10;
784 ldd [%l1],%f30 ! (4_1) dexp_hi = ((double*)addr)[0];
785 faddd %f44,K2,%f40 ! (5_1) res += K2;
786
787 fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
788 sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
789 lda [%l6]%asi,%f0 ! (4_0) ((float*)res)[0] = ((float*)px)[0];
790 for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
791
792 fmuld %f24,%f24,%f24 ! (2_0) dtmp0 = dexp_hi * dexp_hi;
793 cmp %g1,_0x7ff00000 ! (3_0) hx ? 0x7ff00000
794 lda [%l6+4]%asi,%f1 ! (4_0) ((float*)res)[1] = ((float*)px)[1];
795 faddd %f8,%f4,%f8 ! (3_1) res += dexp_hi;
796
797 fmuld %f62,%f32,%f60 ! (0_0) res *= xx;
798 bge,pn %icc,.update20 ! (3_0) if ( hx >= 0x7ff00000 )
799 ldd [%fp+tmp5],%f62 ! (3_1) dlexp = *(double*)lexp;
800 fand %f18,DC3,%f4 ! (2_0) res_c = vis_fand(res_c,DC3);
801 .cont20:
802 fmuld %f40,%f58,%f40 ! (5_1) res *= xx;
803 cmp %g1,_0x00100000 ! (3_0) hx ? 0x00100000
804 and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
805 faddd %f34,K3,%f10 ! (6_1) res += K3;
806
807 fmuld %f30,%f36,%f36 ! (4_1) res = dexp_hi * res;
808 bl,pn %icc,.update21 ! (3_0) if ( hx < 0x00100000 )
809 sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
810 fand %f0,DC0,%f16 ! (4_0) res = vis_fand(res,DC0);
811 .cont21:
812 fmuld %f8,%f62,%f8 ! (3_1) res *= dlexp;
813 add %o7,1534,%o7 ! (3_0) iexp += 0x5fe;
814 ldd [%l1+8],%f34 ! (4_1) dexp_lo = ((double*)addr)[1];
815 fpadd32 %f44,DC2,%f18 ! (3_0) res_c = vis_fpadd32(res,DC2);
816
817 fmuld K6,%f38,%f62 ! (1_0) res = K6 * xx;
818 add %o2,8,%i5 ! (3_0) hx += 8;
819 st %f8,[%i0] ! (3_1) ((float*)py)[0] = ((float*)res)[0];
820 fsubd %f28,%f4,%f28 ! (2_0) xx = res - res_c;
821
822 fmuld %f10,%f26,%f4 ! (6_1) res *= xx;
823 nop
824 st %f9,[%i0+4] ! (3_1) ((float*)py)[1] = ((float*)res)[1];
825 faddd %f60,K4,%f60 ! (0_0) res += K4;
826
827 lda [%l6]%asi,%g1 ! (4_0) hx = *(int*)px;
828 sllx %o7,52,%o7 ! (3_0) iexp << 52;
829 and %i5,-16,%i5 ! (3_0) hx = -16;
830 faddd %f40,K1,%f40 ! (5_1) res += K1;
831
832 add %l6,stridex,%i0 ! px += stridex
833 add %i5,TBL,%i5 ! (3_0) addr = (char*)arr + hx;
834 stx %o7,[%fp+tmp5] ! (3_0) dlexp = *(double*)lexp;
835 faddd %f36,%f34,%f8 ! (4_1) res += dexp_lo;
836
837 fmuld %f28,%f24,%f36 ! (2_0) xx *= dtmp0;
838 add %i1,stridey,%l6 ! px += stridey
839 ldd [%i5],%f28 ! (3_0) dtmp0 = ((double*)addr)[0];
840 faddd %f62,K5,%f62 ! (1_0) res += K5;
841
842 faddd %f4,K2,%f10 ! (6_1) res += K2;
843 sra %g1,10,%o2 ! (4_0) hx >>= 10;
844 nop
845 fmuld %f60,%f32,%f34 ! (0_0) res *= xx;
846
847 fmuld %f40,%f58,%f40 ! (5_1) res *= xx;
848 sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
849 lda [%i0]%asi,%f6 ! (5_0) ((float*)res)[0] = ((float*)px)[0];
850 for %f16,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
851
852 fmuld %f28,%f28,%f28 ! (3_0) dtmp0 = dexp_hi * dexp_hi;
853 cmp %g1,_0x7ff00000 ! (4_0) hx ? 0x7ff00000
854 lda [%i0+4]%asi,%f7 ! (5_0) ((float*)res)[1] = ((float*)px)[1];
855 faddd %f8,%f30,%f30 ! (4_1) res += dexp_hi;
856
857 fand %f18,DC3,%f8 ! (3_0) res_c = vis_fand(res_c,DC3);
858 bge,pn %icc,.update22 ! (4_0) if ( hx >= 0x7ff00000 )
859 ldd [%fp+tmp6],%f18 ! (4_1) dlexp = *(double*)lexp;
860 fmuld %f62,%f38,%f62 ! (1_0) res *= xx;
861 .cont22:
862 fmuld %f10,%f26,%f58 ! (6_1) res *= xx;
863 cmp %g1,_0x00100000 ! (4_0) hx ? 0x00100000
864 and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
865 faddd %f34,K3,%f60 ! (0_0) res += K3;
866
867 fmuld %f22,%f40,%f40 ! (5_1) res = dexp_hi * res;
868 bl,pn %icc,.update23 ! (4_0) if ( hx < 0x00100000 )
869 sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
870 fand %f6,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
871 .cont23:
872 fmuld %f30,%f18,%f6 ! (4_1) res *= dlexp;
873 add %o7,1534,%o7 ! (4_0) iexp += 0x5fe;
874 ldd [%i3+8],%f34 ! (5_1) dexp_lo = ((double*)addr)[1];
875 fpadd32 %f24,DC2,%f18 ! (4_0) res_c = vis_fpadd32(res,DC2);
876
877 fmuld K6,%f36,%f30 ! (2_0) res = K6 * xx;
878 add %o2,8,%l1 ! (4_0) hx += 8;
879 st %f6,[%i1] ! (4_1) ((float*)py)[0] = ((float*)res)[0];
880 fsubd %f44,%f8,%f44 ! (3_0) xx = res - res_c;
881
882 fmuld %f60,%f32,%f60 ! (0_0) res *= xx;
883 sllx %o7,52,%o7 ! (4_0) iexp << 52;
884 st %f7,[%i1+4] ! (4_1) ((float*)py)[1] = ((float*)res)[1];
885 faddd %f62,K4,%f6 ! (1_0) res += K4;
886
887 lda [%i0]%asi,%g1 ! (5_0) hx = *(int*)px;
888 add %i0,stridex,%i1 ! px += stridex
889 and %l1,-16,%l1 ! (4_0) hx = -16;
890 faddd %f58,K1,%f58 ! (6_1) res += K1;
891
892 add %l1,TBL,%l1 ! (4_0) addr = (char*)arr + hx;
893 add %l6,stridey,%i0 ! px += stridey
894 stx %o7,[%fp+tmp6] ! (4_0) dlexp = *(double*)lexp;
895 faddd %f40,%f34,%f8 ! (5_1) res += dexp_lo;
896
897 fmuld %f44,%f28,%f40 ! (3_0) xx *= dtmp0;
898 nop
899 ldd [%l1],%f44 ! (4_0) dtmp0 = ((double*)addr)[0];
900 faddd %f30,K5,%f62 ! (2_0) res += K5;
901
902 fmuld %f6,%f38,%f34 ! (1_0) res *= xx;
903 sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
904 ldd [%l4],%f30 ! (6_1) dexp_hi = ((double*)addr)[0];
905 faddd %f60,K2,%f60 ! (0_0) res += K2;
906
907 for %f16,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
908 sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
909 lda [%i1]%asi,%f6 ! (6_0) ((float*)res)[0] = ((float*)px)[0];
910 fmuld %f58,%f26,%f26 ! (6_1) res *= xx;
911
912 fmuld %f44,%f44,%f44 ! (4_0) dtmp0 = dexp_hi * dexp_hi;
913 cmp %g1,_0x7ff00000 ! (5_0) hx ? 0x7ff00000
914 lda [%i1+4]%asi,%f7 ! (6_0) ((float*)res)[1] = ((float*)px)[1];
915 faddd %f8,%f22,%f22 ! (5_1) res += dexp_hi;
916
917 fand %f18,DC3,%f8 ! (4_0) res_c = vis_fand(res_c,DC3);
918 bge,pn %icc,.update24 ! (5_0) if ( hx >= 0x7ff00000 )
919 ldd [%fp+tmp0],%f18 ! (5_1) dlexp = *(double*)lexp;
920 fmuld %f62,%f36,%f62 ! (2_0) res *= xx;
921 .cont24:
922 fmuld %f60,%f32,%f58 ! (0_0) res *= xx;
923 sra %g1,10,%o2 ! (5_0) hx >>= 10;
924 cmp %g1,_0x00100000 ! (5_0) hx ? 0x00100000
925 faddd %f34,K3,%f34 ! (1_0) res += K3;
926
927 fmuld %f30,%f26,%f26 ! (6_1) res = dexp_hi * res;
928 bl,pn %icc,.update25 ! (5_0) if ( hx < 0x00100000 )
929 and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
930 fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
931 .cont25:
932 fmuld %f22,%f18,%f2 ! (5_1) res *= dlexp;
933 subcc counter,7,counter ! counter -= 7;
934 ldd [%l4+8],%f60 ! (6_1) dexp_lo = ((double*)addr)[1];
935 fpadd32 %f28,DC2,%f18 ! (5_0) res_c = vis_fpadd32(res,DC2);
936
937 fmuld K6,%f40,%f22 ! (3_0) res = K6 * xx;
938 add %o2,8,%i3 ! (5_0) hx += 8;
939 st %f2,[%l6] ! (5_1) ((float*)py)[0] = ((float*)res)[0];
940 fsubd %f24,%f8,%f10 ! (4_0) xx = res - res_c;
941
942 fmuld %f34,%f38,%f24 ! (1_0) res *= xx;
943 st %f3,[%l6+4] ! (5_1) ((float*)py)[1] = ((float*)res)[1];
944 bpos,pt %icc,.main_loop
945 faddd %f62,K4,%f34 ! (2_0) res += K4;
946
947 add counter,7,counter
948 .tail:
949 add %o7,1534,%o7 ! (5_0) iexp += 0x5fe;
950 subcc counter,1,counter
951 bneg,a .begin
952 mov %i0,%o4
953
954 faddd %f58,K1,%f58 ! (0_1) res += K1;
955
956 faddd %f26,%f60,%f8 ! (6_2) res += dexp_lo;
957
958 faddd %f22,K5,%f62 ! (3_1) res += K5;
959 fmuld %f10,%f44,%f60 ! (4_1) xx *= dtmp0;
960
961 faddd %f24,K2,%f26 ! (1_1) res += K2;
962 add %i1,stridex,%l6 ! px += stridex
963 ldd [%l2],%f24 ! (0_1) dexp_hi = ((double*)addr)[0];
964 fmuld %f34,%f36,%f34 ! (2_1) res *= xx;
965
966 fmuld %f58,%f32,%f58 ! (0_1) res *= xx;
967
968 add %i0,stridey,%i1 ! px += stridey
969 faddd %f8,%f30,%f30 ! (6_2) res += dexp_hi;
970
971 fmuld %f62,%f40,%f32 ! (3_1) res *= xx;
972 ldd [%fp+tmp1],%f62 ! (6_2) dlexp = *(double*)lexp;
973
974 fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
975 faddd %f34,K3,%f34 ! (2_1) res += K3;
976
977 fmuld %f24,%f58,%f58 ! (0_1) res = dexp_hi * res;
978
979 fmuld %f30,%f62,%f2 ! (6_2) res *= dlexp;
980 ldd [%l2+8],%f30 ! (0_1) dexp_lo = ((double*)addr)[1];
981
982 fmuld K6,%f60,%f62 ! (4_1) res = K6 * xx;
983 st %f2,[%i0] ! (6_2) ((float*)py)[0] = ((float*)res)[0];
984
985 fmuld %f34,%f36,%f28 ! (2_1) res *= xx;
986 st %f3,[%i0+4] ! (6_2) ((float*)py)[1] = ((float*)res)[1];
987 faddd %f32,K4,%f32 ! (3_1) res += K4;
988
989 subcc counter,1,counter
990 bneg,a .begin
991 mov %i1,%o4
992
993 faddd %f26,K1,%f26 ! (1_1) res += K1;
994
995 faddd %f58,%f30,%f8 ! (0_1) res += dexp_lo;
996
997 add %l6,stridex,%l6 ! px += stridex
998 faddd %f62,K5,%f62 ! (4_1) res += K5;
999
1000 fmuld %f32,%f40,%f34 ! (3_1) res *= xx;
1001 add %i1,stridey,%i0 ! px += stridey
1002 ldd [%i2],%f22 ! (1_1) dexp_hi = ((double*)addr)[0];
1003 faddd %f28,K2,%f32 ! (2_1) res += K2;
1004
1005 fmuld %f26,%f38,%f26 ! (1_1) res *= xx;
1006
1007 faddd %f8,%f24,%f24 ! (0_1) res += dexp_hi;
1008
1009 fmuld %f62,%f60,%f38 ! (4_1) res *= xx;
1010 ldd [%fp+tmp2],%f62 ! (0_1) dlexp = *(double*)lexp;
1011
1012 fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
1013 faddd %f34,K3,%f34 ! (3_1) res += K3;
1014
1015 fmuld %f22,%f26,%f26 ! (1_1) res = dexp_hi * res;
1016
1017 fmuld %f24,%f62,%f2 ! (0_1) res *= dlexp;
1018 ldd [%i2+8],%f24 ! (1_1) dexp_lo = ((double*)addr)[1];
1019
1020 st %f2,[%i1] ! (0_1) ((float*)py)[0] = ((float*)res)[0];
1021
1022 fmuld %f34,%f40,%f44 ! (3_1) res *= xx;
1023 st %f3,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)res)[1];
1024 faddd %f38,K4,%f38 ! (4_1) res += K4;
1025
1026 subcc counter,1,counter
1027 bneg,a .begin
1028 mov %i0,%o4
1029
1030 faddd %f32,K1,%f32 ! (2_1) res += K1;
1031
1032 add %l6,stridex,%l6 ! px += stridex
1033 faddd %f26,%f24,%f8 ! (1_1) res += dexp_lo;
1034
1035 add %i0,stridey,%i1 ! px += stridey
1036
1037 fmuld %f38,%f60,%f34 ! (4_1) res *= xx;
1038 ldd [%i4],%f24 ! (2_1) dexp_hi = ((double*)addr)[0];
1039 faddd %f44,K2,%f38 ! (3_1) res += K2;
1040
1041 fmuld %f32,%f36,%f32 ! (2_1) res *= xx;
1042
1043 faddd %f8,%f22,%f22 ! (1_1) res += dexp_hi;
1044
1045 ldd [%fp+tmp3],%f62 ! (1_1) dlexp = *(double*)lexp;
1046
1047 fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
1048 faddd %f34,K3,%f34 ! (4_1) res += K3;
1049
1050 fmuld %f24,%f32,%f32 ! (2_1) res = dexp_hi * res;
1051
1052 fmuld %f22,%f62,%f2 ! (1_1) res *= dlexp;
1053 ldd [%i4+8],%f22 ! (2_1) dexp_lo = ((double*)addr)[1];
1054
1055 st %f2,[%i0] ! (1_1) ((float*)py)[0] = ((float*)res)[0];
1056
1057 fmuld %f34,%f60,%f28 ! (4_1) res *= xx;
1058 st %f3,[%i0+4] ! (1_1) ((float*)py)[1] = ((float*)res)[1];
1059
1060 subcc counter,1,counter
1061 bneg,a .begin
1062 mov %i1,%o4
1063
1064 faddd %f38,K1,%f38 ! (3_1) res += K1;
1065
1066 faddd %f32,%f22,%f8 ! (2_1) res += dexp_lo;
1067
1068 add %l6,stridex,%l6 ! px += stridex
1069
1070 add %i1,stridey,%i0 ! px += stridey
1071 ldd [%i5],%f22 ! (3_1) dexp_hi = ((double*)addr)[0];
1072 faddd %f28,K2,%f36 ! (4_1) res += K2;
1073
1074 fmuld %f38,%f40,%f38 ! (3_1) res *= xx;
1075
1076 faddd %f8,%f24,%f24 ! (2_1) res += dexp_hi;
1077
1078 ldd [%fp+tmp4],%f62 ! (2_1) dlexp = *(double*)lexp;
1079
1080 fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
1081
1082 fmuld %f22,%f38,%f38 ! (3_1) res = dexp_hi * res;
1083
1084 fmuld %f24,%f62,%f2 ! (2_1) res *= dlexp;
1085 ldd [%i5+8],%f24 ! (3_1) dexp_lo = ((double*)addr)[1];
1086
1087 st %f2,[%i1] ! (2_1) ((float*)py)[0] = ((float*)res)[0];
1088
1089 st %f3,[%i1+4] ! (2_1) ((float*)py)[1] = ((float*)res)[1];
1090
1091 subcc counter,1,counter
1092 bneg,a .begin
1093 mov %i0,%o4
1094
1095 faddd %f36,K1,%f36 ! (4_1) res += K1;
1096
1097 faddd %f38,%f24,%f8 ! (3_1) res += dexp_lo;
1098
1099 add %i0,stridey,%i1 ! px += stridey
1100
1101 add %l6,stridex,%l6 ! px += stridex
1102 ldd [%l1],%f30 ! (4_1) dexp_hi = ((double*)addr)[0];
1103
1104 fmuld %f36,%f60,%f36 ! (4_1) res *= xx;
1105
1106 faddd %f8,%f22,%f8 ! (3_1) res += dexp_hi;
1107
1108 ldd [%fp+tmp5],%f62 ! (3_1) dlexp = *(double*)lexp;
1109
1110 fmuld %f30,%f36,%f36 ! (4_1) res = dexp_hi * res;
1111
1112 fmuld %f8,%f62,%f8 ! (3_1) res *= dlexp;
1113 ldd [%l1+8],%f34 ! (4_1) dexp_lo = ((double*)addr)[1];
1114
1115 st %f8,[%i0] ! (3_1) ((float*)py)[0] = ((float*)res)[0];
1116
1117 st %f9,[%i0+4] ! (3_1) ((float*)py)[1] = ((float*)res)[1];
1118
1119 subcc counter,1,counter
1120 bneg,a .begin
1121 mov %i1,%o4
1122
1123 faddd %f36,%f34,%f8 ! (4_1) res += dexp_lo;
1124
1125 add %l6,stridex,%i0 ! px += stridex
1126
1127 add %i1,stridey,%l6 ! px += stridey
1128
1129 faddd %f8,%f30,%f30 ! (4_1) res += dexp_hi;
1130
1131 ldd [%fp+tmp6],%f18 ! (4_1) dlexp = *(double*)lexp;
1132
1133 fmuld %f30,%f18,%f6 ! (4_1) res *= dlexp;
1134
1135 st %f6,[%i1] ! (4_1) ((float*)py)[0] = ((float*)res)[0];
1136
1137 st %f7,[%i1+4] ! (4_1) ((float*)py)[1] = ((float*)res)[1];
1138
1139 ba .begin
1140 add %i1,stridey,%o4
1141
1142 .align 16
1143 .spec0:
1144 fdivd DONE,%f0,%f0 ! res = DONE / res;
1145 add %i1,stridex,%i1 ! px += stridex
1146 st %f0,[%o4] ! ((float*)py)[0] = ((float*)&res)[0];
1147 st %f1,[%o4+4] ! ((float*)py)[1] = ((float*)&res)[1];
1148 add %o4,stridey,%o4 ! py += stridey
1149 ba .begin1
1150 sub counter,1,counter
1151
1152 .align 16
1153 .spec1:
1154 orcc %i2,%l4,%g0
1155 bz,a 2f
1156 fdivd DONE,%f0,%f0 ! res = DONE / res;
1157
1158 cmp %g1,0
1159 bl,a 2f
1160 fsqrtd %f0,%f0 ! res = sqrt(res);
1161
1162 cmp %g1,%i4
1163 bge,a 1f
1164 ldd [%o3+0x50],%f18
1165
1166 fxtod %f0,%f0 ! res = *(long long*)&res;
1167 st %f0,[%fp+tmp0]
1168
1169 fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
1170 ld [%fp+tmp0],%g1
1171
1172 sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
1173 for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
1174
1175 sra %g1,10,%o2 ! (6_1) hx >>= 10;
1176 sub %o7,537,%o7
1177
1178 and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
1179 ba .cont_spec
1180 sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
1181
1182 1:
1183 fand %f0,%f18,%f0 ! res = vis_fand(res,DC4);
1184
1185 ldd [%o3+0x58],%f28
1186 fxtod %f0,%f0 ! res = *(long long*)&res;
1187
1188 faddd %f0,%f28,%f0 ! res += D2ON51;
1189 st %f0,[%fp+tmp0]
1190
1191 fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
1192 ld [%fp+tmp0],%g1
1193
1194 sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
1195 for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
1196
1197 sra %g1,10,%o2 ! (6_1) hx >>= 10;
1198 sub %o7,537,%o7
1199
1200 and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
1201 ba .cont_spec
1202 sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
1203
1204 2:
1205 add %i1,stridex,%i1 ! px += stridex
1206 st %f0,[%o4] ! ((float*)py)[0] = ((float*)&res)[0];
1207 st %f1,[%o4+4] ! ((float*)py)[1] = ((float*)&res)[1];
1208 add %o4,stridey,%o4 ! py += stridey
1209 ba .begin1
1210 sub counter,1,counter
1211
1212 .align 16
1213 .update0:
1214 cmp counter,1
1215 ble .cont0
1216 nop
1217
1218 sub %l6,stridex,tmp_px
1219 sub counter,1,tmp_counter
1220
1221 ba .cont0
1222 mov 1,counter
1223
1224 .align 16
1225 .update1:
1226 cmp counter,1
1227 ble .cont1
1228 sub %l6,stridex,%i1
1229
1230 ld [%i1+4],%i2
1231 cmp %g1,0
1232 bl 1f
1233
1234 orcc %g1,%i2,%g0
1235 bz 1f
1236 sethi %hi(0x00080000),%i3
1237
1238 cmp %g1,%i3
1239 bge,a 2f
1240 ldd [%o3+0x50],%f18
1241
1242 fxtod %f8,%f8 ! res = *(long long*)&res;
1243 st %f8,[%fp+tmp7]
1244
1245 fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
1246 ld [%fp+tmp7],%g1
1247
1248 sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
1249 sra %g1,10,%o2 ! (0_0) hx >>= 10;
1250 for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
1251
1252 sub %o7,537,%o7
1253
1254 sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
1255
1256 and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
1257 ba .cont1
1258 add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
1259 2:
1260 fand %f8,%f18,%f8
1261 fxtod %f8,%f8 ! res = *(long long*)&res;
1262 ldd [%o3+0x58],%f18
1263 faddd %f8,%f18,%f8
1264 st %f8,[%fp+tmp7]
1265
1266 fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0);
1267 ld [%fp+tmp7],%g1
1268
1269 sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
1270 sra %g1,10,%o2 ! (0_0) hx >>= 10;
1271 for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
1272
1273 sub %o7,537,%o7
1274
1275 sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
1276
1277 and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
1278 ba .cont1
1279 add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
1280 1:
1281 sub %l6,stridex,tmp_px
1282 sub counter,1,tmp_counter
1283
1284 ba .cont1
1285 mov 1,counter
1286
1287 .align 16
1288 .update2:
1289 cmp counter,2
1290 ble .cont2
1291 nop
1292
1293 sub %l6,stridex,tmp_px
1294 sub counter,2,tmp_counter
1295
1296 ba .cont2
1297 mov 2,counter
1298
1299 .align 16
1300 .update3:
1301 cmp counter,2
1302 ble .cont3
1303 sub %l6,stridex,%i1
1304
1305 ld [%i1+4],%i2
1306 cmp %g1,0
1307 bl 1f
1308
1309 orcc %g1,%i2,%g0
1310 bz 1f
1311 sethi %hi(0x00080000),%i3
1312
1313 cmp %g1,%i3
1314 bge,a 2f
1315 ldd [%o3+0x50],%f18
1316
1317 fxtod %f0,%f0 ! res = *(long long*)&res;
1318 st %f0,[%fp+tmp7]
1319
1320 fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
1321 ld [%fp+tmp7],%g1
1322
1323 sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
1324 for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
1325
1326 sra %g1,10,%o2 ! (1_0) hx >>= 10;
1327 sub %o7,537,%o7
1328 ba .cont3
1329 and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
1330 2:
1331 fand %f0,%f18,%f0
1332 fxtod %f0,%f0 ! res = *(long long*)&res;
1333 ldd [%o3+0x58],%f18
1334 faddd %f0,%f18,%f0
1335 st %f0,[%fp+tmp7]
1336
1337 fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0);
1338 ld [%fp+tmp7],%g1
1339
1340 sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
1341 for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
1342
1343 sra %g1,10,%o2 ! (1_0) hx >>= 10;
1344 sub %o7,537,%o7
1345 ba .cont3
1346 and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
1347 1:
1348 sub %l6,stridex,tmp_px
1349 sub counter,2,tmp_counter
1350
1351 ba .cont3
1352 mov 2,counter
1353
1354 .align 16
1355 .update4:
1356 cmp counter,3
1357 ble .cont4
1358 nop
1359
1360 sub %l6,stridex,tmp_px
1361 sub counter,3,tmp_counter
1362
1363 ba .cont4
1364 mov 3,counter
1365
1366 .align 16
1367 .update5:
1368 cmp counter,3
1369 ble .cont5
1370 sub %l6,stridex,%i1
1371
1372 ld [%i1+4],%i3
1373 cmp %g1,0
1374 bl 1f
1375
1376 orcc %g1,%i3,%g0
1377 bz 1f
1378 sethi %hi(0x00080000),%i4
1379
1380 cmp %g1,%i4
1381 bge,a 2f
1382 ldd [%o3+0x50],%f18
1383
1384 fxtod %f6,%f6 ! res = *(long long*)&res;
1385 st %f6,[%fp+tmp7]
1386
1387 fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
1388 ld [%fp+tmp7],%g1
1389
1390 sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
1391 sra %g1,10,%o2 ! (2_0) hx >>= 10;
1392
1393 sub %o7,537,%o7
1394 and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
1395 ba .cont5
1396 for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
1397 2:
1398 fand %f6,%f18,%f6
1399 fxtod %f6,%f6 ! res = *(long long*)&res;
1400 ldd [%o3+0x58],%f18
1401 faddd %f6,%f18,%f6
1402 st %f6,[%fp+tmp7]
1403
1404 fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
1405 ld [%fp+tmp7],%g1
1406
1407 sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
1408 sra %g1,10,%o2 ! (2_0) hx >>= 10;
1409
1410 sub %o7,537,%o7
1411 and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
1412 ba .cont5
1413 for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
1414 1:
1415 sub %l6,stridex,tmp_px
1416 sub counter,3,tmp_counter
1417
1418 ba .cont5
1419 mov 3,counter
1420
1421 .align 16
1422 .update6:
1423 cmp counter,4
1424 ble .cont6
1425 nop
1426
1427 sub %l6,stridex,tmp_px
1428 sub counter,4,tmp_counter
1429
1430 ba .cont6
1431 mov 4,counter
1432
1433 .align 16
1434 .update7:
1435 sub %l6,stridex,%i1
1436 cmp counter,4
1437 ble .cont7
1438 faddd %f34,K3,%f6 ! (6_1) res += K3;
1439
1440 ld [%i1+4],%i3
1441 cmp %g1,0
1442 bl 1f
1443
1444 orcc %g1,%i3,%g0
1445 bz 1f
1446 sethi %hi(0x00080000),%i5
1447
1448 cmp %g1,%i5
1449 bge,a 2f
1450 ldd [%o3+0x50],%f18
1451
1452 fxtod %f0,%f0 ! res = *(long long*)&res;
1453 st %f0,[%fp+tmp7]
1454
1455 fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
1456 ld [%fp+tmp7],%g1
1457
1458 sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
1459 sra %g1,10,%o2 ! (3_0) hx >>= 10;
1460
1461 sub %o7,537,%o7
1462 and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
1463 ba .cont7
1464 for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
1465 2:
1466 fand %f0,%f18,%f0
1467 fxtod %f0,%f0 ! res = *(long long*)&res;
1468 ldd [%o3+0x58],%f18
1469 faddd %f0,%f18,%f0
1470 st %f0,[%fp+tmp7]
1471
1472 fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
1473 ld [%fp+tmp7],%g1
1474
1475 sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
1476 sra %g1,10,%o2 ! (3_0) hx >>= 10;
1477
1478 sub %o7,537,%o7
1479 and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
1480 ba .cont7
1481 for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
1482 1:
1483 sub %l6,stridex,tmp_px
1484 sub counter,4,tmp_counter
1485
1486 ba .cont7
1487 mov 4,counter
1488
1489 .align 16
1490 .update8:
1491 cmp counter,5
1492 ble .cont8
1493 nop
1494
1495 mov %l6,tmp_px
1496 sub counter,5,tmp_counter
1497
1498 ba .cont8
1499 mov 5,counter
1500
1501 .align 16
1502 .update9:
1503 ld [%l6+4],%i3
1504 cmp counter,5
1505 ble .cont9
1506 fand %f0,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
1507
1508 cmp %g1,0
1509 bl 1f
1510
1511 orcc %g1,%i3,%g0
1512 bz 1f
1513 sethi %hi(0x00080000),%i1
1514
1515 cmp %g1,%i1
1516 bge,a 2f
1517 ldd [%o3+0x50],%f18
1518
1519 fxtod %f8,%f8 ! res = *(long long*)&res;
1520 st %f8,[%fp+tmp7]
1521
1522 fand %f8,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
1523 ld [%fp+tmp7],%g1
1524
1525 sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
1526 sra %g1,10,%o2 ! (4_0) hx >>= 10;
1527
1528 sub %o7,537,%o7
1529
1530 and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
1531 sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
1532 ba .cont9
1533 for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
1534 2:
1535 fand %f8,%f18,%f8
1536 fxtod %f8,%f8 ! res = *(long long*)&res;
1537 ldd [%o3+0x58],%f18
1538 faddd %f8,%f18,%f8
1539 st %f8,[%fp+tmp7]
1540
1541 fand %f8,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
1542 ld [%fp+tmp7],%g1
1543
1544 sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
1545 sra %g1,10,%o2 ! (4_0) hx >>= 10;
1546
1547 sub %o7,537,%o7
1548
1549 and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
1550 sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
1551 ba .cont9
1552 for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
1553 1:
1554 mov %l6,tmp_px
1555 sub counter,5,tmp_counter
1556
1557 ba .cont9
1558 mov 5,counter
1559
1560 .align 16
1561 .update10:
1562 cmp counter,6
1563 ble .cont10
1564 nop
1565
1566 mov %i0,tmp_px
1567 sub counter,6,tmp_counter
1568
1569 ba .cont10
1570 mov 6,counter
1571
1572 .align 16
1573 .update11:
1574 ld [%i0+4],%i3
1575 cmp counter,6
1576 ble .cont11
1577 fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
1578
1579 cmp %g1,0
1580 bl 1f
1581
1582 orcc %g1,%i3,%g0
1583 bz 1f
1584 sethi %hi(0x00080000),%i3
1585
1586 cmp %g1,%i3
1587 bge,a 2f
1588 ldd [%o3+0x50],%f18
1589
1590 fxtod %f0,%f0 ! res = *(long long*)&res;
1591 st %f0,[%fp+tmp7]
1592
1593 fand %f0,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
1594 ld [%fp+tmp7],%g1
1595
1596 sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
1597 sra %g1,10,%o2 ! (5_0) hx >>= 10;
1598
1599 sub %o7,537,%o7
1600
1601 sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
1602
1603 and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
1604 ba .cont11
1605 for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
1606 2:
1607 fand %f0,%f18,%f0
1608 fxtod %f0,%f0 ! res = *(long long*)&res;
1609 ldd [%o3+0x58],%f18
1610 faddd %f0,%f18,%f0
1611 st %f0,[%fp+tmp7]
1612
1613 fand %f0,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
1614 ld [%fp+tmp7],%g1
1615
1616 sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
1617 sra %g1,10,%o2 ! (5_0) hx >>= 10;
1618
1619 sub %o7,537,%o7
1620
1621 sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
1622
1623 and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
1624 ba .cont11
1625 for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
1626 1:
1627 mov %i0,tmp_px
1628 sub counter,6,tmp_counter
1629
1630 ba .cont11
1631 mov 6,counter
1632
1633 .align 16
1634 .update12:
1635 cmp counter,0
1636 ble .cont12
1637 faddd %f34,K3,%f34 ! (2_1) res += K3;
1638
1639 sub %l6,stridex,tmp_px
1640 sub counter,0,tmp_counter
1641
1642 ba .cont12
1643 mov 0,counter
1644
1645 .align 16
1646 .update13:
1647 sub %l6,stridex,%l4
1648 cmp counter,0
1649 ble .cont13
1650 fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
1651
1652 ld [%l4+4],%l4
1653 cmp %g1,0
1654 bl 1f
1655
1656 orcc %g1,%l4,%g0
1657 bz 1f
1658 sethi %hi(0x00080000),%l4
1659
1660 cmp %g1,%l4
1661 bge,a 2f
1662 ldd [%o3+0x50],%f62
1663
1664 fxtod %f6,%f6 ! res = *(long long*)&res;
1665 st %f6,[%fp+tmp7]
1666
1667 fand %f6,DC0,%f44 ! (6_0) res = vis_fand(res,DC0);
1668 ld [%fp+tmp7],%g1
1669
1670 sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
1671 sra %g1,10,%o2 ! (6_1) hx >>= 10;
1672
1673 sub %o7,537,%o7
1674 and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
1675 for %f44,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
1676
1677 sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
1678 ba .cont13
1679 fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
1680 2:
1681 fand %f6,%f62,%f6
1682 fxtod %f6,%f6 ! res = *(long long*)&res;
1683 ldd [%o3+0x58],%f62
1684 faddd %f6,%f62,%f6
1685 st %f6,[%fp+tmp7]
1686
1687 fand %f6,DC0,%f44 ! (6_0) res = vis_fand(res,DC0);
1688 ld [%fp+tmp7],%g1
1689
1690 sra %g1,21,%o7 ! (6_1) iexp = hx >> 21;
1691 sra %g1,10,%o2 ! (6_1) hx >>= 10;
1692 for %f44,DC1,%f44 ! (6_1) res = vis_for(res,DC1);
1693
1694 sub %o7,537,%o7
1695
1696 and %o2,2040,%o2 ! (6_1) hx &= 0x7f8;
1697 sub %g0,%o7,%o7 ! (6_1) iexp = -iexp;
1698 ba .cont13
1699 fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2);
1700 1:
1701 sub %l6,stridex,tmp_px
1702 sub counter,0,tmp_counter
1703
1704 ba .cont13
1705 mov 0,counter
1706
1707 .align 16
1708 .update14:
1709 cmp counter,1
1710 ble .cont14
1711 faddd %f34,K3,%f34 ! (3_1) res += K3;
1712
1713 sub %l6,stridex,tmp_px
1714 sub counter,1,tmp_counter
1715
1716 ba .cont14
1717 mov 1,counter
1718
1719 .align 16
1720 .update15:
1721 sub %l6,stridex,%l2
1722 cmp counter,1
1723 ble .cont15
1724 fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
1725
1726 ld [%l2+4],%l2
1727 cmp %g1,0
1728 bl 1f
1729
1730 orcc %g1,%l2,%g0
1731 bz 1f
1732 sethi %hi(0x00080000),%l2
1733
1734 cmp %g1,%l2
1735 bge,a 2f
1736 ldd [%o3+0x50],%f62
1737
1738 fxtod %f0,%f0 ! res = *(long long*)&res;
1739 st %f0,[%fp+tmp7]
1740
1741 fand %f0,DC0,%f18 ! (0_0) res = vis_fand(res,DC0);
1742 ld [%fp+tmp7],%g1
1743
1744 sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
1745 sra %g1,10,%o2 ! (0_0) hx >>= 10;
1746
1747 sub %o7,537,%o7
1748 for %f18,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
1749
1750 sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
1751
1752 and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
1753 add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
1754 ba .cont15
1755 fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
1756 2:
1757 fand %f0,%f62,%f0
1758 fxtod %f0,%f0 ! res = *(long long*)&res;
1759 ldd [%o3+0x58],%f62
1760 faddd %f0,%f62,%f0
1761 st %f0,[%fp+tmp7]
1762
1763 fand %f0,DC0,%f18 ! (0_0) res = vis_fand(res,DC0);
1764 ld [%fp+tmp7],%g1
1765
1766 sra %g1,21,%o7 ! (0_0) iexp = hx >> 21;
1767 sra %g1,10,%o2 ! (0_0) hx >>= 10;
1768 for %f18,DC1,%f28 ! (0_0) res = vis_for(res,DC1);
1769
1770 sub %o7,537,%o7
1771
1772 sub %g0,%o7,%o7 ! (0_0) iexp = -iexp;
1773
1774 and %o2,2040,%o2 ! (0_0) hx &= 0x7f8;
1775 add %o7,1534,%o7 ! (0_0) iexp += 0x5fe;
1776 ba .cont15
1777 fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2);
1778 1:
1779 sub %l6,stridex,tmp_px
1780 sub counter,1,tmp_counter
1781
1782 ba .cont15
1783 mov 1,counter
1784
1785 .align 16
1786 .update16:
1787 cmp counter,2
1788 ble .cont16
1789 fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3);
1790
1791 sub %l6,stridex,tmp_px
1792 sub counter,2,tmp_counter
1793
1794 ba .cont16
1795 mov 2,counter
1796
1797 .align 16
1798 .update17:
1799 sub %l6,stridex,%i2
1800 cmp counter,2
1801 ble .cont17
1802 fand %f0,DC0,%f16 ! (2_0) res = vis_fand(res,DC0);
1803
1804 ld [%i2+4],%i2
1805 cmp %g1,0
1806 bl 1f
1807
1808 orcc %g1,%i2,%g0
1809 bz 1f
1810 sethi %hi(0x00080000),%i2
1811
1812 cmp %g1,%i2
1813 bge,a 2f
1814 ldd [%o3+0x50],%f2
1815
1816 fxtod %f6,%f6 ! res = *(long long*)&res;
1817 st %f6,[%fp+tmp7]
1818
1819 fand %f6,DC0,%f44 ! (1_0) res = vis_fand(res,DC0);
1820 ld [%fp+tmp7],%g1
1821
1822 sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
1823 sra %g1,10,%o2 ! (1_0) hx >>= 10;
1824
1825 sub %o7,537,%o7
1826
1827 and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
1828 sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
1829 ba .cont17
1830 for %f44,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
1831 2:
1832 fand %f6,%f2,%f6
1833 fxtod %f6,%f6 ! res = *(long long*)&res;
1834 ldd [%o3+0x58],%f2
1835 faddd %f6,%f2,%f6
1836 st %f6,[%fp+tmp7]
1837
1838 fand %f6,DC0,%f44 ! (1_0) res = vis_fand(res,DC0);
1839 ld [%fp+tmp7],%g1
1840
1841 sra %g1,21,%o7 ! (1_0) iexp = hx >> 21;
1842 sra %g1,10,%o2 ! (1_0) hx >>= 10;
1843
1844 sub %o7,537,%o7
1845
1846 and %o2,2040,%o2 ! (1_0) hx &= 0x7f8;
1847 sub %g0,%o7,%o7 ! (1_0) iexp = -iexp;
1848 ba .cont17
1849 for %f44,DC1,%f44 ! (1_0) res = vis_for(res,DC1);
1850 1:
1851 sub %l6,stridex,tmp_px
1852 sub counter,2,tmp_counter
1853
1854 ba .cont17
1855 mov 2,counter
1856
1857 .align 16
1858 .update18:
1859 cmp counter,3
1860 ble .cont18
1861 fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3);
1862
1863 sub %l6,stridex,tmp_px
1864 sub counter,3,tmp_counter
1865
1866 ba .cont18
1867 mov 3,counter
1868
1869 .align 16
1870 .update19:
1871 sub %l6,stridex,%i4
1872 cmp counter,3
1873 ble .cont19
1874 fand %f6,DC0,%f16 ! (3_0) res = vis_fand(res,DC0);
1875
1876 ld [%i4+4],%i4
1877 cmp %g1,0
1878 bl 1f
1879
1880 orcc %g1,%i4,%g0
1881 bz 1f
1882 sethi %hi(0x00080000),%i4
1883
1884 cmp %g1,%i4
1885 bge,a 2f
1886 ldd [%o3+0x50],%f2
1887
1888 fxtod %f0,%f0 ! res = *(long long*)&res;
1889 st %f0,[%fp+tmp7]
1890
1891 fand %f0,DC0,%f28 ! (2_0) res = vis_fand(res,DC0);
1892 ld [%fp+tmp7],%g1
1893
1894 sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
1895
1896 sra %g1,10,%o2 ! (2_0) hx >>= 10;
1897 sub %o7,537,%o7
1898
1899 and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
1900 sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
1901 ba .cont19
1902 for %f28,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
1903 2:
1904 fand %f0,%f2,%f0
1905 fxtod %f0,%f0 ! res = *(long long*)&res;
1906 ldd [%o3+0x58],%f2
1907 faddd %f0,%f2,%f0
1908 st %f0,[%fp+tmp7]
1909
1910 fand %f0,DC0,%f28 ! (2_0) res = vis_fand(res,DC0);
1911 ld [%fp+tmp7],%g1
1912
1913 sra %g1,21,%o7 ! (2_0) iexp = hx >> 21;
1914
1915 sra %g1,10,%o2 ! (2_0) hx >>= 10;
1916 sub %o7,537,%o7
1917
1918 and %o2,2040,%o2 ! (2_0) hx &= 0x7f8;
1919 sub %g0,%o7,%o7 ! (2_0) iexp = -iexp;
1920 ba .cont19
1921 for %f28,DC1,%f28 ! (2_0) res = vis_for(res,DC1);
1922 1:
1923 sub %l6,stridex,tmp_px
1924 sub counter,3,tmp_counter
1925
1926 ba .cont19
1927 mov 3,counter
1928
1929 .align 16
1930 .update20:
1931 cmp counter,4
1932 ble .cont20
1933 fand %f18,DC3,%f4 ! (2_0) res_c = vis_fand(res_c,DC3);
1934
1935 sub %l6,stridex,tmp_px
1936 sub counter,4,tmp_counter
1937
1938 ba .cont20
1939 mov 4,counter
1940
1941 .align 16
1942 .update21:
1943 sub %l6,stridex,%i5
1944 cmp counter,4
1945 ble .cont21
1946 fand %f0,DC0,%f16 ! (4_0) res = vis_fand(res,DC0);
1947
1948 ld [%i5+4],%i5
1949 cmp %g1,0
1950 bl 1f
1951
1952 orcc %g1,%i5,%g0
1953 bz 1f
1954 sethi %hi(0x00080000),%i5
1955
1956 cmp %g1,%i5
1957 bge,a 2f
1958 ldd [%o3+0x50],%f34
1959
1960 fxtod %f6,%f6 ! res = *(long long*)&res;
1961 st %f6,[%fp+tmp7]
1962
1963 fand %f6,DC0,%f44 ! (3_0) res = vis_fand(res,DC0);
1964 ld [%fp+tmp7],%g1
1965
1966 sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
1967 sra %g1,10,%o2 ! (3_0) hx >>= 10;
1968
1969 sub %o7,537,%o7
1970 and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
1971
1972 sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
1973 ba .cont21
1974 for %f44,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
1975 2:
1976 fand %f6,%f34,%f6
1977 fxtod %f6,%f6 ! res = *(long long*)&res;
1978 ldd [%o3+0x58],%f34
1979 faddd %f6,%f34,%f6
1980 st %f6,[%fp+tmp7]
1981
1982 fand %f6,DC0,%f44 ! (3_0) res = vis_fand(res,DC0);
1983 ld [%fp+tmp7],%g1
1984
1985 sra %g1,21,%o7 ! (3_0) iexp = hx >> 21;
1986 sra %g1,10,%o2 ! (3_0) hx >>= 10;
1987
1988 sub %o7,537,%o7
1989 and %o2,2040,%o2 ! (3_0) hx &= 0x7f8;
1990
1991 sub %g0,%o7,%o7 ! (3_0) iexp = -iexp;
1992 ba .cont21
1993 for %f44,DC1,%f44 ! (3_0) res = vis_for(res,DC1);
1994 1:
1995 sub %l6,stridex,tmp_px
1996 sub counter,4,tmp_counter
1997
1998 ba .cont21
1999 mov 4,counter
2000
2001 .align 16
2002 .update22:
2003 cmp counter,5
2004 ble .cont22
2005 fmuld %f62,%f38,%f62 ! (1_0) res *= xx;
2006
2007 sub %i0,stridex,tmp_px
2008 sub counter,5,tmp_counter
2009
2010 ba .cont22
2011 mov 5,counter
2012
2013 .align 16
2014 .update23:
2015 sub %i0,stridex,%l1
2016 cmp counter,5
2017 ble .cont23
2018 fand %f6,DC0,%f16 ! (5_0) res = vis_fand(res,DC0);
2019
2020 ld [%l1+4],%l1
2021 cmp %g1,0
2022 bl 1f
2023
2024 orcc %g1,%l1,%g0
2025 bz 1f
2026 sethi %hi(0x00080000),%l1
2027
2028 cmp %g1,%l1
2029 bge,a 2f
2030 ldd [%o3+0x50],%f34
2031
2032 fxtod %f0,%f0 ! res = *(long long*)&res;
2033 st %f0,[%fp+tmp7]
2034
2035 fand %f0,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
2036 ld [%fp+tmp7],%g1
2037
2038 sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
2039
2040 sra %g1,10,%o2 ! (4_0) hx >>= 10;
2041 sub %o7,537,%o7
2042
2043 and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
2044 sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
2045 ba .cont23
2046 for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
2047 2:
2048 fand %f0,%f34,%f0
2049 fxtod %f0,%f0 ! res = *(long long*)&res;
2050 ldd [%o3+0x58],%f34
2051 faddd %f0,%f34,%f0
2052 st %f0,[%fp+tmp7]
2053
2054 fand %f0,DC0,%f24 ! (4_0) res = vis_fand(res,DC0);
2055 ld [%fp+tmp7],%g1
2056
2057 sra %g1,21,%o7 ! (4_0) iexp = hx >> 21;
2058
2059 sra %g1,10,%o2 ! (4_0) hx >>= 10;
2060 sub %o7,537,%o7
2061
2062 and %o2,2040,%o2 ! (4_0) hx &= 0x7f8;
2063 sub %g0,%o7,%o7 ! (4_0) iexp = -iexp;
2064 ba .cont23
2065 for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1);
2066 1:
2067 sub %i0,stridex,tmp_px
2068 sub counter,5,tmp_counter
2069
2070 ba .cont23
2071 mov 5,counter
2072
2073 .align 16
2074 .update24:
2075 cmp counter,6
2076 ble .cont24
2077 fmuld %f62,%f36,%f62 ! (2_0) res *= xx;
2078
2079 sub %i1,stridex,tmp_px
2080 sub counter,6,tmp_counter
2081
2082 ba .cont24
2083 mov 6,counter
2084
2085 .align 16
2086 .update25:
2087 sub %i1,stridex,%i3
2088 cmp counter,6
2089 ble .cont25
2090 fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0);
2091
2092 ld [%i3+4],%i3
2093 cmp %g1,0
2094 bl 1f
2095
2096 orcc %g1,%i3,%g0
2097 bz 1f
2098 nop
2099
2100 sub %i1,stridex,%i3
2101 ld [%i3],%f10
2102 ld [%i3+4],%f11
2103
2104 sethi %hi(0x00080000),%i3
2105
2106 cmp %g1,%i3
2107 bge,a 2f
2108 ldd [%o3+0x50],%f60
2109
2110 fxtod %f10,%f10 ! res = *(long long*)&res;
2111 st %f10,[%fp+tmp7]
2112
2113 fand %f10,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
2114 ld [%fp+tmp7],%g1
2115
2116 sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
2117
2118 sra %g1,10,%o2 ! (5_0) hx >>= 10;
2119 sub %o7,537,%o7
2120
2121 and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
2122 sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
2123
2124 ba .cont25
2125 for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
2126 2:
2127 fand %f10,%f60,%f10
2128 fxtod %f10,%f10 ! res = *(long long*)&res;
2129 ldd [%o3+0x58],%f60
2130 faddd %f10,%f60,%f10
2131 st %f10,[%fp+tmp7]
2132
2133 fand %f10,DC0,%f28 ! (5_0) res = vis_fand(res,DC0);
2134 ld [%fp+tmp7],%g1
2135
2136 sra %g1,21,%o7 ! (5_0) iexp = hx >> 21;
2137
2138 sra %g1,10,%o2 ! (5_0) hx >>= 10;
2139 sub %o7,537,%o7
2140
2141 and %o2,2040,%o2 ! (5_0) hx &= 0x7f8;
2142 sub %g0,%o7,%o7 ! (5_0) iexp = -iexp;
2143
2144 ba .cont25
2145 for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1);
2146 1:
2147 sub %i1,stridex,tmp_px
2148 sub counter,6,tmp_counter
2149
2150 ba .cont25
2151 mov 6,counter
2152
2153 .exit:
2154 ret
2155 restore
2156 SET_SIZE(__vrsqrt)
2157