Print this page
libmvec: further unifdef'ing (LIBMVEC_SO_BUILD)
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libmvec/common/vis/__vsqrtf_ultra3.S
+++ new/usr/src/lib/libmvec/common/vis/__vsqrtf_ultra3.S
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
↓ open down ↓ |
21 lines elided |
↑ open up ↑ |
22 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
23 23 */
24 24 /*
25 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
26 26 * Use is subject to license terms.
27 27 */
28 28
29 29 .file "__vsqrtf_ultra3.S"
30 30
31 31 #include "libm.h"
32 -#if defined(LIBMVEC_SO_BUILD)
33 32 .weak __vsqrtf
34 33 .type __vsqrtf,#function
35 34 __vsqrtf = __vsqrtf_ultra3
36 -#endif
37 35
38 36 RO_DATA
39 37 .align 64
40 38
41 39 .CONST_TBL:
42 40 .word 0x3fe00001, 0x80007e00 ! K1 = 5.00000715259318464227e-01
43 41 .word 0xbfc00003, 0xc0017a01 ! K2 = -1.25000447037521686593e-01
44 42 .word 0x000fffff, 0xffffffff ! DC0 = 0x000fffffffffffff
45 43 .word 0x3ff00000, 0x00000000 ! DC1 = 0x3ff0000000000000
46 44 .word 0x7ffff000, 0x00000000 ! DC2 = 0x7ffff00000000000
47 45
48 46 #define DC0 %f6
49 47 #define DC1 %f4
50 48 #define DC2 %f2
51 49 #define K2 %f38
52 50 #define K1 %f36
53 51 #define TBL %l2
54 52 #define stridex %l3
55 53 #define stridey %l4
56 54 #define _0x1ff0 %l5
57 55 #define counter %l6
58 56 #define _0x00800000 %l7
59 57 #define _0x7f800000 %o0
60 58
61 59 #define tmp_px STACK_BIAS-0x40
62 60 #define tmp_counter STACK_BIAS-0x38
63 61 #define tmp0 STACK_BIAS-0x30
64 62 #define tmp1 STACK_BIAS-0x28
65 63 #define tmp2 STACK_BIAS-0x20
66 64 #define tmp3 STACK_BIAS-0x18
67 65 #define tmp4 STACK_BIAS-0x10
68 66
69 67 ! sizeof temp storage - must be a multiple of 16 for V9
70 68 #define tmps 0x40
71 69
72 70 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
73 71 ! !!!!! algorithm !!!!!
74 72 !
75 73 ! x0 = *px;
76 74 ! ax = *(int*)px;
77 75 ! px += stridex;
78 76 !
79 77 ! if( ax >= 0x7f800000 )
80 78 ! {
81 79 ! *py = sqrtf(x0);
82 80 ! py += stridey;
83 81 ! continue;
84 82 ! }
85 83 ! if( ax < 0x00800000 )
86 84 ! {
87 85 ! *py = sqrtf(x0);
88 86 ! py += stridey;
89 87 ! continue;
90 88 ! }
91 89 !
92 90 ! db0 = (double)x0;
93 91 ! iexp0 = ax >> 24;
94 92 ! iexp0 += 0x3c0;
95 93 ! lexp0 = (long long)iexp0 << 52;
96 94 !
97 95 ! db0 = vis_fand(db0,DC0);
98 96 ! db0 = vis_for(db0,DC1);
99 97 ! hi0 = vis_fand(db0,DC2);
100 98 !
101 99 ! ax >>= 11;
102 100 ! si0 = ax & 0x1ff0;
103 101 ! dtmp0 = ((double*)((char*)TBL + si0))[0];
104 102 ! xx0 = (db0 - hi0);
105 103 ! xx0 *= dtmp0;
106 104 ! dtmp0 = ((double*)((char*)TBL + si0))[1]
107 105 ! res0 = K2 * xx0;
108 106 ! res0 += K1;
109 107 ! res0 *= xx0;
110 108 ! res0 += DC1;
111 109 ! res0 = dtmp0 * res0;
112 110 ! dtmp1 = *((double*)&lexp0);
113 111 ! res0 *= dtmp1;
114 112 ! fres0 = (float)res0;
115 113 ! *py = fres0;
116 114 ! py += stridey;
117 115 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
118 116
119 117 ENTRY(__vsqrtf_ultra3)
120 118 save %sp,-SA(MINFRAME)-tmps,%sp
121 119 PIC_SETUP(l7)
122 120 PIC_SET(l7,.CONST_TBL,o2)
123 121 PIC_SET(l7,__vlibm_TBL_sqrtf,l2)
124 122
125 123 st %i0,[%fp+tmp_counter]
126 124 sll %i2,2,stridex
127 125 or %g0,0xff8,%l5
128 126
129 127 stx %i1,[%fp+tmp_px]
130 128 sll %l5,1,_0x1ff0
131 129
132 130 ldd [%o2],K1
133 131 sll %i4,2,stridey
134 132
135 133 ldd [%o2+8],K2
136 134 or %g0,%i3,%g5
137 135
138 136 ldd [%o2+16],DC0
139 137 sethi %hi(0x7f800000),%o0
140 138
141 139 ldd [%o2+24],DC1
142 140 sethi %hi(0x00800000),%l7
143 141
144 142 ldd [%o2+32],DC2
145 143
146 144 .begin:
147 145 ld [%fp+tmp_counter],counter
148 146 ldx [%fp+tmp_px],%i1
149 147 st %g0,[%fp+tmp_counter]
150 148 .begin1:
151 149 cmp counter,0
152 150 ble,pn %icc,.exit
153 151
154 152 lda [%i1]0x82,%o2 ! (2_0) ax = *(int*)px;
155 153
156 154 or %g0,%i1,%o7
157 155 lda [%i1]0x82,%f25 ! (2_0) x0 = *px;
158 156
159 157 cmp %o2,_0x7f800000 ! (2_0) ax ? 0x7f800000
160 158 bge,pn %icc,.spec ! (2_0) if( ax >= 0x7f800000 )
161 159 nop
162 160
163 161 cmp %o2,_0x00800000 ! (2_0) ax ? 0x00800000
164 162 bl,pn %icc,.spec ! (2_0) if( ax < 0x00800000 )
165 163 nop
166 164
167 165 fstod %f25,%f56 ! (2_0) db0 = (double)x0;
168 166
169 167 lda [stridex+%o7]0x82,%o1 ! (3_0) ax = *(int*)px;
170 168
171 169 sra %o2,24,%l1 ! (2_0) iexp0 = ax >> 24;
172 170
173 171 add %o7,stridex,%i1 ! px += stridex
174 172 add %l1,960,%l0 ! (2_0) iexp0 += 0x3c0;
175 173 lda [stridex+%o7]0x82,%f0 ! (3_0) x0 = *px;
176 174 fand %f56,DC0,%f60 ! (2_0) db0 = vis_fand(db0,DC0);
177 175
178 176 cmp %o1,_0x7f800000 ! (3_0) ax ? 0x7f800000
179 177 bge,pn %icc,.update0 ! (3_0) if( ax >= 0x7f800000 )
180 178 nop
181 179 .cont0:
182 180 sllx %l0,52,%o3 ! (2_0) lexp0 = (long long)iexp0 << 52;
183 181
184 182 sra %o2,11,%i2 ! (2_0) ax >>= 11;
185 183 stx %o3,[%fp+tmp0] ! (2_0) dtmp1 = *((double*)&lexp0);
186 184 for %f60,DC1,%f40 ! (2_0) db0 = vis_for(db0,DC1);
187 185
188 186 cmp %o1,_0x00800000 ! (3_0) ax ? 0x00800000
189 187 bl,pn %icc,.update1 ! (3_0) if( ax < 0x00800000 )
190 188 nop
191 189 .cont1:
192 190 fstod %f0,%f48 ! (3_0) db0 = (double)x0;
193 191
194 192 and %i2,_0x1ff0,%o3 ! (2_0) si0 = ax & 0x1ff0;
195 193 lda [%i1+stridex]0x82,%o2 ! (4_0) ax = *(int*)px;
196 194
197 195 add %i1,stridex,%i1 ! px += stridex
198 196 add %o3,TBL,%i2 ! (2_0) (char*)TBL + si0
199 197 fand %f40,DC2,%f46 ! (2_0) hi0 = vis_fand(db0,DC2);
200 198
201 199 sra %o1,24,%o4 ! (3_0) iexp0 = ax >> 24;
202 200
203 201 lda [%i1]0x82,%f13 ! (4_0) x0 = *px;
204 202 fand %f48,DC0,%f58 ! (3_0) db0 = vis_fand(db0,DC0);
205 203
206 204 add %o4,960,%i0 ! (3_0) iexp0 += 0x3c0;
207 205
208 206 cmp %o2,_0x7f800000 ! (4_1) ax ? 0x7f800000
209 207 bge,pn %icc,.update2 ! (4_1) if( ax >= 0x7f800000 )
210 208 nop
211 209 .cont2:
212 210 fsubd %f40,%f46,%f44 ! (2_1) xx0 = (db0 - hi0);
213 211 sllx %i0,52,%g1 ! (3_1) lexp0 = (long long)iexp0 << 52;
214 212 ldd [%i2],%f40 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
215 213
216 214 sra %o1,11,%l0 ! (3_1) ax >>= 11;
217 215 stx %g1,[%fp+tmp1] ! (3_1) dtmp1 = *((double*)&lexp0);
218 216 for %f58,DC1,%f48 ! (3_1) db0 = vis_for(db0,DC1);
219 217
220 218 cmp %o2,_0x00800000 ! (4_1) ax ? 0x00800000
221 219 bl,pn %icc,.update3 ! (4_1) if( ax < 0x00800000 )
222 220 nop
223 221 .cont3:
224 222 fstod %f13,%f50 ! (4_1) db0 = (double)x0;
225 223
226 224 fmuld %f44,%f40,%f46 ! (2_1) xx0 *= dtmp0;
227 225 and %l0,_0x1ff0,%i0 ! (3_1) si0 = ax & 0x1ff0;
228 226 lda [%i1+stridex]0x82,%l1 ! (0_0) ax = *(int*)px;
229 227
230 228 add %i0,TBL,%l0 ! (3_1) (char*)TBL + si0
231 229 fand %f48,DC2,%f62 ! (3_1) hi0 = vis_fand(db0,DC2);
232 230
233 231 sra %o2,24,%o7 ! (4_1) iexp0 = ax >> 24;
234 232
235 233 add %i1,stridex,%o4 ! px += stridex
236 234 add %o7,960,%o7 ! (4_1) iexp0 += 0x3c0;
237 235 lda [%i1+stridex]0x82,%f17 ! (0_0) x0 = *px;
238 236 fand %f50,DC0,%f54 ! (4_1) db0 = vis_fand(db0,DC0);
239 237
240 238 fmuld K2,%f46,%f52 ! (2_1) res0 = K2 * xx0;
241 239 cmp %l1,_0x7f800000 ! (0_0) ax ? 0x7f800000
242 240 bge,pn %icc,.update4 ! (0_0) if( ax >= 0x7f800000 )
243 241 fsubd %f48,%f62,%f42 ! (3_1) xx0 = (db0 - hi0);
244 242 .cont4:
245 243 sllx %o7,52,%o1 ! (4_1) lexp0 = (long long)iexp0 << 52;
246 244 ldd [%i0+TBL],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
247 245
248 246 sra %o2,11,%i5 ! (4_1) ax >>= 11;
249 247 stx %o1,[%fp+tmp2] ! (4_1) dtmp1 = *((double*)&lexp0);
250 248 for %f54,DC1,%f34 ! (4_1) db0 = vis_for(db0,DC1);
251 249
252 250 cmp %l1,_0x00800000 ! (0_0) ax ? 0x00800000
253 251 bl,pn %icc,.update5 ! (0_0) if( ax < 0x00800000 )
254 252 nop
255 253 .cont5:
256 254 fstod %f17,%f56 ! (0_0) db0 = (double)x0;
257 255
258 256 fmuld %f42,%f40,%f42 ! (3_1) xx0 *= dtmp0;
259 257 lda [stridex+%o4]0x82,%i0 ! (1_0) ax = *(int*)px;
260 258 faddd %f52,K1,%f52 ! (2_1) res0 += K1;
261 259
262 260 sra %l1,24,%g1 ! (0_0) iexp0 = ax >> 24;
263 261 and %i5,_0x1ff0,%i5 ! (4_1) si0 = ax & 0x1ff0;
264 262 fand %f34,DC2,%f62 ! (4_1) hi0 = vis_fand(db0,DC2);
265 263
266 264 add %o4,stridex,%i1 ! px += stridex
267 265
268 266 add %g1,960,%o5 ! (0_0) iexp0 += 0x3c0;
269 267 add %i5,TBL,%i3 ! (4_1) (char*)TBL + si0
270 268 lda [stridex+%o4]0x82,%f21 ! (1_0) x0 = *px;
271 269 fand %f56,DC0,%f32 ! (0_0) db0 = vis_fand(db0,DC0);
272 270
273 271 fmuld K2,%f42,%f50 ! (3_1) res0 = K2 * xx0;
274 272 cmp %i0,_0x7f800000 ! (1_0) ax ? 0x7f800000
275 273 bge,pn %icc,.update6 ! (1_0) if( ax >= 0x7f800000 )
276 274 fsubd %f34,%f62,%f54 ! (4_1) xx0 = (db0 - hi0);
277 275 .cont6:
278 276 fmuld %f52,%f46,%f52 ! (2_1) res0 *= xx0;
279 277 sllx %o5,52,%o7 ! (0_0) lexp0 = (long long)iexp0 << 52;
280 278 ldd [TBL+%i5],%f62 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
281 279
282 280 sra %l1,11,%i4 ! (0_0) ax >>= 11;
283 281 stx %o7,[%fp+tmp3] ! (0_0) dtmp1 = *((double*)&lexp0);
284 282 for %f32,DC1,%f48 ! (0_0) db0 = vis_for(db0,DC1);
285 283
286 284 cmp %i0,_0x00800000 ! (1_0) ax ? 0x00800000
287 285 bl,pn %icc,.update7 ! (1_0) if( ax < 0x00800000 )
288 286 nop
289 287 .cont7:
290 288 fstod %f21,%f56 ! (1_0) db0 = (double)x0;
291 289
292 290 fmuld %f54,%f62,%f46 ! (4_1) xx0 *= dtmp0;
293 291 and %i4,_0x1ff0,%g1 ! (0_0) si0 = ax & 0x1ff0;
294 292 lda [%i1+stridex]0x82,%o2 ! (2_0) ax = *(int*)px;
295 293 faddd %f50,K1,%f62 ! (3_1) res0 += K1;
296 294
297 295 add %g1,TBL,%i5 ! (0_0) (double*)((char*)TBL + si0
298 296 fand %f48,DC2,%f32 ! (0_0) hi0 = vis_fand(db0,DC2);
299 297
300 298 sra %i0,24,%o4 ! (1_0) iexp0 = ax >> 24;
301 299 ldd [%i2+8],%f60 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
302 300 faddd %f52,DC1,%f58 ! (2_1) res0 += DC1;
303 301
304 302 add %i1,stridex,%o7 ! px += stridex
305 303 add %o4,960,%i2 ! (1_0) iexp0 += 0x3c0;
306 304 lda [%i1+stridex]0x82,%f25 ! (2_0) x0 = *px;
307 305 fand %f56,DC0,%f34 ! (1_0) db0 = vis_fand(db0,DC0);
308 306
309 307 fmuld K2,%f46,%f50 ! (4_1) res0 = K2 * xx0;
310 308 cmp %o2,_0x7f800000 ! (2_0) ax ? 0x7f800000
311 309 bge,pn %icc,.update8 ! (2_0) if( ax >= 0x7f800000 )
312 310 fsubd %f48,%f32,%f52 ! (0_0) xx0 = (db0 - hi0);
313 311 .cont8:
314 312 fmuld %f62,%f42,%f54 ! (3_1) res0 *= xx0;
315 313 sllx %i2,52,%o4 ! (1_0) lexp0 = (long long)iexp0 << 52;
316 314 ldd [TBL+%g1],%f32 ! (0_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
317 315
318 316 fmuld %f60,%f58,%f60 ! (2_1) res0 = dtmp0 * res0;
319 317 sra %i0,11,%g1 ! (1_0) ax >>= 11;
320 318 stx %o4,[%fp+tmp4] ! (1_0) dtmp1 = *((double*)&lexp0);
321 319 for %f34,DC1,%f48 ! (1_0) db0 = vis_for(db0,DC1);
322 320
323 321 cmp %o2,_0x00800000 ! (2_0) ax ? 0x00800000
324 322 bl,pn %icc,.update9 ! (2_0) if( ax < 0x00800000 )
325 323 ldd [%fp+tmp0],%f40 ! (2_1) dtmp1 = *((double*)&lexp0);
326 324 fstod %f25,%f56 ! (2_0) db0 = (double)x0;
327 325 .cont9:
328 326 fmuld %f52,%f32,%f42 ! (0_0) xx0 *= dtmp0;
329 327 and %g1,_0x1ff0,%o5 ! (1_0) si0 = ax & 0x1ff0;
330 328 lda [stridex+%o7]0x82,%o1 ! (3_0) ax = *(int*)px;
331 329 faddd %f50,K1,%f34 ! (4_1) res0 += K1;
332 330
333 331 add %o5,TBL,%i4 ! (1_0) (char*)TBL + si0
334 332 fand %f48,DC2,%f62 ! (1_0) hi0 = vis_fand(db0,DC2);
335 333
336 334 fmuld %f60,%f40,%f32 ! (2_1) res0 *= dtmp1;
337 335 sra %o2,24,%l1 ! (2_0) iexp0 = ax >> 24;
338 336 ldd [%l0+8],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
339 337 faddd %f54,DC1,%f58 ! (3_1) res0 += DC1;
340 338
341 339 add %o7,stridex,%i1 ! px += stridex
342 340 add %l1,960,%l0 ! (2_0) iexp0 += 0x3c0;
343 341 lda [stridex+%o7]0x82,%f0 ! (3_0) x0 = *px;
344 342 fand %f56,DC0,%f60 ! (2_0) db0 = vis_fand(db0,DC0);
345 343
346 344 fmuld K2,%f42,%f50 ! (0_0) res0 = K2 * xx0;
347 345 cmp %o1,_0x7f800000 ! (3_0) ax ? 0x7f800000
348 346 bge,pn %icc,.update10 ! (3_0) if( ax >= 0x7f800000 )
349 347 fsubd %f48,%f62,%f54 ! (1_0) xx0 = (db0 - hi0);
350 348 .cont10:
351 349 fmuld %f34,%f46,%f52 ! (4_1) res0 *= xx0;
352 350 sllx %l0,52,%o3 ! (2_0) lexp0 = (long long)iexp0 << 52;
353 351 ldd [TBL+%o5],%f56 ! (1_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
354 352
355 353 fmuld %f40,%f58,%f34 ! (3_1) res0 = dtmp0 * res0;
356 354 sra %o2,11,%i2 ! (2_0) ax >>= 11;
357 355 stx %o3,[%fp+tmp0] ! (2_0) dtmp1 = *((double*)&lexp0);
358 356 for %f60,DC1,%f40 ! (2_0) db0 = vis_for(db0,DC1);
359 357
360 358 cmp %o1,_0x00800000 ! (3_0) ax ? 0x00800000
361 359 bl,pn %icc,.update11 ! (3_0) if( ax < 0x00800000 )
362 360 ldd [%fp+tmp1],%f62 ! (3_1) dtmp1 = *((double*)&lexp0);
363 361 fstod %f0,%f48 ! (3_0) db0 = (double)x0;
364 362 .cont11:
365 363 fmuld %f54,%f56,%f30 ! (1_0) xx0 *= dtmp0;
366 364 and %i2,_0x1ff0,%o3 ! (2_0) si0 = ax & 0x1ff0;
367 365 lda [%i1+stridex]0x82,%o2 ! (4_0) ax = *(int*)px;
368 366 faddd %f50,K1,%f56 ! (0_0) res0 += K1;
369 367
370 368 add %i1,stridex,%i1 ! px += stridex
371 369 add %o3,TBL,%i2 ! (2_0) (char*)TBL + si0
372 370 fand %f40,DC2,%f46 ! (2_0) hi0 = vis_fand(db0,DC2);
373 371
374 372 fmuld %f34,%f62,%f28 ! (3_1) res0 *= dtmp1;
375 373 sra %o1,24,%o4 ! (3_0) iexp0 = ax >> 24;
376 374 ldd [%i3+8],%f50 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
377 375 faddd %f52,DC1,%f54 ! (4_1) res0 += DC1;
378 376
379 377 lda [%i1]0x82,%f13 ! (4_0) x0 = *px;
380 378 fand %f48,DC0,%f58 ! (3_0) db0 = vis_fand(db0,DC0);
381 379
382 380 or %g0,%g5,%i3
383 381 cmp counter,5
384 382 bl,pn %icc,.tail
385 383 add %o4,960,%g5 ! (3_0) iexp0 += 0x3c0;
386 384
387 385 ba .main_loop
388 386 sub counter,5,counter ! counter
389 387
390 388 .align 16
391 389 .main_loop:
392 390 fmuld K2,%f30,%f60 ! (1_1) res0 = K2 * xx0;
393 391 cmp %o2,_0x7f800000 ! (4_1) ax ? 0x7f800000
394 392 bge,pn %icc,.update12 ! (4_1) if( ax >= 0x7f800000 )
395 393 fsubd %f40,%f46,%f44 ! (2_1) xx0 = (db0 - hi0);
396 394 .cont12:
397 395 fmuld %f56,%f42,%f52 ! (0_1) res0 *= xx0;
398 396 sllx %g5,52,%g5 ! (3_1) lexp0 = (long long)iexp0 << 52;
399 397 ldd [%i2],%f40 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
400 398 fdtos %f32,%f15 ! (2_2) fres0 = (float)res0;
401 399
402 400 fmuld %f50,%f54,%f42 ! (4_2) res0 = dtmp0 * res0;
403 401 sra %o1,11,%l0 ! (3_1) ax >>= 11;
404 402 stx %g5,[%fp+tmp1] ! (3_1) dtmp1 = *((double*)&lexp0);
405 403 for %f58,DC1,%f48 ! (3_1) db0 = vis_for(db0,DC1);
406 404
407 405 cmp %o2,_0x00800000 ! (4_1) ax ? 0x00800000
408 406 bl,pn %icc,.update13 ! (4_1) if( ax < 0x00800000 )
409 407 ldd [%fp+tmp2],%f56 ! (4_2) dtmp1 = *((double*)&lexp0);
410 408 fstod %f13,%f50 ! (4_1) db0 = (double)x0;
411 409 .cont13:
412 410 fmuld %f44,%f40,%f46 ! (2_1) xx0 *= dtmp0;
413 411 and %l0,_0x1ff0,%i0 ! (3_1) si0 = ax & 0x1ff0;
414 412 lda [%i1+stridex]0x82,%l1 ! (0_0) ax = *(int*)px;
415 413 faddd %f60,K1,%f32 ! (1_1) res0 += K1;
416 414
417 415 add %i0,TBL,%l0 ! (3_1) (char*)TBL + si0
418 416 add %i3,stridey,%o3 ! py += stridey
419 417 st %f15,[%i3] ! (2_2) *py = fres0;
420 418 fand %f48,DC2,%f62 ! (3_1) hi0 = vis_fand(db0,DC2);
421 419
422 420 fmuld %f42,%f56,%f44 ! (4_2) res0 *= dtmp1;
423 421 sra %o2,24,%o7 ! (4_1) iexp0 = ax >> 24;
424 422 ldd [%i5+8],%f58 ! (0_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
425 423 faddd %f52,DC1,%f34 ! (0_1) res0 += DC1;
426 424
427 425 add %i1,stridex,%o4 ! px += stridex
428 426 add %o7,960,%o7 ! (4_1) iexp0 += 0x3c0;
429 427 lda [%i1+stridex]0x82,%f17 ! (0_0) x0 = *px;
430 428 fand %f50,DC0,%f54 ! (4_1) db0 = vis_fand(db0,DC0);
431 429
432 430 fmuld K2,%f46,%f52 ! (2_1) res0 = K2 * xx0;
433 431 cmp %l1,_0x7f800000 ! (0_0) ax ? 0x7f800000
434 432 bge,pn %icc,.update14 ! (0_0) if( ax >= 0x7f800000 )
435 433 fsubd %f48,%f62,%f42 ! (3_1) xx0 = (db0 - hi0);
436 434 .cont14:
437 435 fmuld %f32,%f30,%f48 ! (1_1) res0 *= xx0;
438 436 sllx %o7,52,%o1 ! (4_1) lexp0 = (long long)iexp0 << 52;
439 437 ldd [%i0+TBL],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
440 438 fdtos %f28,%f19 ! (3_2) fres0 = (float)res0;
441 439
442 440 fmuld %f58,%f34,%f32 ! (0_1) res0 = dtmp0 * res0;
443 441 sra %o2,11,%i5 ! (4_1) ax >>= 11;
444 442 stx %o1,[%fp+tmp2] ! (4_1) dtmp1 = *((double*)&lexp0);
445 443 for %f54,DC1,%f34 ! (4_1) db0 = vis_for(db0,DC1);
446 444
447 445 cmp %l1,_0x00800000 ! (0_0) ax ? 0x00800000
448 446 bl,pn %icc,.update15 ! (0_0) if( ax < 0x00800000 )
449 447 ldd [%fp+tmp3],%f60 ! (0_1) dtmp1 = *((double*)&lexp0);
450 448 fstod %f17,%f56 ! (0_0) db0 = (double)x0;
451 449 .cont15:
452 450 fmuld %f42,%f40,%f42 ! (3_1) xx0 *= dtmp0;
453 451 add %o3,stridey,%g5 ! py += stridey
454 452 lda [stridex+%o4]0x82,%i0 ! (1_0) ax = *(int*)px;
455 453 faddd %f52,K1,%f52 ! (2_1) res0 += K1;
456 454
457 455 sra %l1,24,%g1 ! (0_0) iexp0 = ax >> 24;
458 456 and %i5,_0x1ff0,%i5 ! (4_1) si0 = ax & 0x1ff0;
459 457 st %f19,[%o3] ! (3_2) *py = fres0;
460 458 fand %f34,DC2,%f62 ! (4_1) hi0 = vis_fand(db0,DC2);
461 459
462 460 fmuld %f32,%f60,%f40 ! (0_1) res0 *= dtmp1;
463 461 add %o4,stridex,%i1 ! px += stridex
464 462 ldd [%i4+8],%f60 ! (1_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
465 463 faddd %f48,DC1,%f58 ! (1_1) res0 += DC1;
466 464
467 465 add %g1,960,%o5 ! (0_0) iexp0 += 0x3c0;
468 466 add %i5,TBL,%i3 ! (4_1) (char*)TBL + si0
469 467 lda [stridex+%o4]0x82,%f21 ! (1_0) x0 = *px;
470 468 fand %f56,DC0,%f32 ! (0_0) db0 = vis_fand(db0,DC0);
471 469
472 470 fmuld K2,%f42,%f50 ! (3_1) res0 = K2 * xx0;
473 471 cmp %i0,_0x7f800000 ! (1_0) ax ? 0x7f800000
474 472 bge,pn %icc,.update16 ! (1_0) if( ax >= 0x7f800000 )
475 473 fsubd %f34,%f62,%f54 ! (4_1) xx0 = (db0 - hi0);
476 474 .cont16:
477 475 fmuld %f52,%f46,%f52 ! (2_1) res0 *= xx0;
478 476 sllx %o5,52,%o7 ! (0_0) lexp0 = (long long)iexp0 << 52;
479 477 ldd [TBL+%i5],%f62 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
480 478 fdtos %f44,%f23 ! (4_2) fres0 = (float)res0;
481 479
482 480 fmuld %f60,%f58,%f44 ! (1_1) res0 = dtmp0 * res0;
483 481 sra %l1,11,%i4 ! (0_0) ax >>= 11;
484 482 stx %o7,[%fp+tmp3] ! (0_0) dtmp1 = *((double*)&lexp0);
485 483 for %f32,DC1,%f48 ! (0_0) db0 = vis_for(db0,DC1);
486 484
487 485 cmp %i0,_0x00800000 ! (1_0) ax ? 0x00800000
488 486 bl,pn %icc,.update17 ! (1_0) if( ax < 0x00800000 )
489 487 ldd [%fp+tmp4],%f34 ! (1_1) dtmp1 = *((double*)&lexp0);
490 488 fstod %f21,%f56 ! (1_0) db0 = (double)x0;
491 489 .cont17:
492 490 fmuld %f54,%f62,%f46 ! (4_1) xx0 *= dtmp0;
493 491 and %i4,_0x1ff0,%g1 ! (0_0) si0 = ax & 0x1ff0;
494 492 lda [%i1+stridex]0x82,%o2 ! (2_0) ax = *(int*)px;
495 493 faddd %f50,K1,%f62 ! (3_1) res0 += K1;
496 494
497 495 add %g1,TBL,%i5 ! (0_0) (double*)((char*)TBL + si0
498 496 add %g5,stridey,%g5 ! py += stridey
499 497 st %f23,[stridey+%o3] ! (4_2) *py = fres0;
500 498 fand %f48,DC2,%f32 ! (0_0) hi0 = vis_fand(db0,DC2);
501 499
502 500 fmuld %f44,%f34,%f44 ! (1_1) res0 *= dtmp1;
503 501 sra %i0,24,%o4 ! (1_0) iexp0 = ax >> 24;
504 502 ldd [%i2+8],%f60 ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
505 503 faddd %f52,DC1,%f58 ! (2_1) res0 += DC1;
506 504
507 505 add %i1,stridex,%o7 ! px += stridex
508 506 add %o4,960,%i2 ! (1_0) iexp0 += 0x3c0;
509 507 lda [%i1+stridex]0x82,%f25 ! (2_0) x0 = *px;
510 508 fand %f56,DC0,%f34 ! (1_0) db0 = vis_fand(db0,DC0);
511 509
512 510 fmuld K2,%f46,%f50 ! (4_1) res0 = K2 * xx0;
513 511 cmp %o2,_0x7f800000 ! (2_0) ax ? 0x7f800000
514 512 bge,pn %icc,.update18 ! (2_0) if( ax >= 0x7f800000 )
515 513 fsubd %f48,%f32,%f52 ! (0_0) xx0 = (db0 - hi0);
516 514 .cont18:
517 515 fmuld %f62,%f42,%f54 ! (3_1) res0 *= xx0;
518 516 sllx %i2,52,%o4 ! (1_0) lexp0 = (long long)iexp0 << 52;
519 517 ldd [TBL+%g1],%f32 ! (0_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
520 518 fdtos %f40,%f27 ! (0_1) fres0 = (float)res0;
521 519
522 520 fmuld %f60,%f58,%f60 ! (2_1) res0 = dtmp0 * res0;
523 521 sra %i0,11,%g1 ! (1_0) ax >>= 11;
524 522 stx %o4,[%fp+tmp4] ! (1_0) dtmp1 = *((double*)&lexp0);
525 523 for %f34,DC1,%f48 ! (1_0) db0 = vis_for(db0,DC1);
526 524
527 525 cmp %o2,_0x00800000 ! (2_0) ax ? 0x00800000
528 526 bl,pn %icc,.update19 ! (2_0) if( ax < 0x00800000 )
529 527 ldd [%fp+tmp0],%f40 ! (2_1) dtmp1 = *((double*)&lexp0);
530 528 fstod %f25,%f56 ! (2_0) db0 = (double)x0;
531 529 .cont19:
532 530 fmuld %f52,%f32,%f42 ! (0_0) xx0 *= dtmp0;
533 531 and %g1,_0x1ff0,%o5 ! (1_0) si0 = ax & 0x1ff0;
534 532 lda [stridex+%o7]0x82,%o1 ! (3_0) ax = *(int*)px;
535 533 faddd %f50,K1,%f34 ! (4_1) res0 += K1;
536 534
537 535 add %o5,TBL,%i4 ! (1_0) (char*)TBL + si0
538 536 add %g5,stridey,%g1 ! py += stridey
539 537 st %f27,[%g5] ! (0_1) *py = fres0;
540 538 fand %f48,DC2,%f62 ! (1_0) hi0 = vis_fand(db0,DC2);
541 539
542 540 fmuld %f60,%f40,%f32 ! (2_1) res0 *= dtmp1;
543 541 sra %o2,24,%l1 ! (2_0) iexp0 = ax >> 24;
544 542 ldd [%l0+8],%f40 ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
545 543 faddd %f54,DC1,%f58 ! (3_1) res0 += DC1;
546 544
547 545 add %o7,stridex,%i1 ! px += stridex
548 546 add %l1,960,%l0 ! (2_0) iexp0 += 0x3c0;
549 547 lda [stridex+%o7]0x82,%f0 ! (3_0) x0 = *px;
550 548 fand %f56,DC0,%f60 ! (2_0) db0 = vis_fand(db0,DC0);
551 549
552 550 fmuld K2,%f42,%f50 ! (0_0) res0 = K2 * xx0;
553 551 cmp %o1,_0x7f800000 ! (3_0) ax ? 0x7f800000
554 552 bge,pn %icc,.update20 ! (3_0) if( ax >= 0x7f800000 )
555 553 fsubd %f48,%f62,%f54 ! (1_0) xx0 = (db0 - hi0);
556 554 .cont20:
557 555 fmuld %f34,%f46,%f52 ! (4_1) res0 *= xx0;
558 556 sllx %l0,52,%o3 ! (2_0) lexp0 = (long long)iexp0 << 52;
559 557 ldd [TBL+%o5],%f56 ! (1_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
560 558 fdtos %f44,%f8 ! (1_1) fres0 = (float)res0;
561 559
562 560 fmuld %f40,%f58,%f34 ! (3_1) res0 = dtmp0 * res0;
563 561 sra %o2,11,%i2 ! (2_0) ax >>= 11;
564 562 stx %o3,[%fp+tmp0] ! (2_0) dtmp1 = *((double*)&lexp0);
565 563 for %f60,DC1,%f40 ! (2_0) db0 = vis_for(db0,DC1);
566 564
567 565 cmp %o1,_0x00800000 ! (3_0) ax ? 0x00800000
568 566 bl,pn %icc,.update21 ! (3_0) if( ax < 0x00800000 )
569 567 ldd [%fp+tmp1],%f62 ! (3_1) dtmp1 = *((double*)&lexp0);
570 568 fstod %f0,%f48 ! (3_0) db0 = (double)x0;
571 569 .cont21:
572 570 fmuld %f54,%f56,%f30 ! (1_0) xx0 *= dtmp0;
573 571 and %i2,_0x1ff0,%o3 ! (2_0) si0 = ax & 0x1ff0;
574 572 lda [%i1+stridex]0x82,%o2 ! (4_0) ax = *(int*)px;
575 573 faddd %f50,K1,%f56 ! (0_0) res0 += K1;
576 574
577 575 add %i1,stridex,%i1 ! px += stridex
578 576 add %o3,TBL,%i2 ! (2_0) (char*)TBL + si0
579 577 st %f8,[stridey+%g5] ! (1_1) *py = fres0;
580 578 fand %f40,DC2,%f46 ! (2_0) hi0 = vis_fand(db0,DC2);
581 579
582 580 fmuld %f34,%f62,%f28 ! (3_1) res0 *= dtmp1;
583 581 sra %o1,24,%o4 ! (3_0) iexp0 = ax >> 24;
584 582 ldd [%i3+8],%f50 ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
585 583 faddd %f52,DC1,%f54 ! (4_1) res0 += DC1;
586 584
587 585 add %g1,stridey,%i3 ! py += stridey
588 586 subcc counter,5,counter ! counter
589 587 lda [%i1]0x82,%f13 ! (4_0) x0 = *px;
590 588 fand %f48,DC0,%f58 ! (3_0) db0 = vis_fand(db0,DC0);
591 589
592 590 bpos,pt %icc,.main_loop
593 591 add %o4,960,%g5 ! (3_0) iexp0 += 0x3c0;
594 592
595 593 add counter,5,counter
596 594 .tail:
597 595 subcc counter,1,counter
598 596 bneg,a .begin
599 597 or %g0,%i3,%g5
600 598
601 599 fmuld %f56,%f42,%f52 ! (0_1) res0 *= xx0;
602 600 fdtos %f32,%f15 ! (2_2) fres0 = (float)res0;
603 601
604 602 fmuld %f50,%f54,%f42 ! (4_2) res0 = dtmp0 * res0;
605 603
606 604 ldd [%fp+tmp2],%f56 ! (4_2) dtmp1 = *((double*)&lexp0);
607 605
608 606 add %i3,stridey,%o3 ! py += stridey
609 607 st %f15,[%i3] ! (2_2) *py = fres0;
610 608
611 609 subcc counter,1,counter
612 610 bneg,a .begin
613 611 or %g0,%o3,%g5
614 612
615 613 fmuld %f42,%f56,%f44 ! (4_2) res0 *= dtmp1;
616 614 ldd [%i5+8],%f58 ! (0_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
617 615 faddd %f52,DC1,%f34 ! (0_1) res0 += DC1;
618 616
619 617 fdtos %f28,%f19 ! (3_2) fres0 = (float)res0;
620 618
621 619 fmuld %f58,%f34,%f32 ! (0_1) res0 = dtmp0 * res0;
622 620
623 621 ldd [%fp+tmp3],%f60 ! (0_1) dtmp1 = *((double*)&lexp0);
624 622
625 623 add %o3,stridey,%g5 ! py += stridey
626 624
627 625 st %f19,[%o3] ! (3_2) *py = fres0;
628 626
629 627 subcc counter,1,counter
630 628 bneg,a .begin
631 629 nop
632 630
633 631 fmuld %f32,%f60,%f40 ! (0_1) res0 *= dtmp1;
634 632
635 633 fdtos %f44,%f23 ! (4_2) fres0 = (float)res0;
636 634
637 635 add %g5,stridey,%g5 ! py += stridey
638 636 st %f23,[stridey+%o3] ! (4_2) *py = fres0;
639 637
640 638 subcc counter,1,counter
641 639 bneg,a .begin
642 640 nop
643 641
644 642 fdtos %f40,%f27 ! (0_1) fres0 = (float)res0;
645 643
646 644 st %f27,[%g5] ! (0_1) *py = fres0;
647 645
648 646 ba .begin
649 647 add %g5,stridey,%g5
650 648
651 649 .align 16
652 650 .spec:
653 651 fsqrts %f25,%f25
654 652 sub counter,1,counter
655 653 add %i1,stridex,%i1
656 654 st %f25,[%g5]
657 655 ba .begin1
658 656 add %g5,stridey,%g5
659 657
660 658 .align 16
661 659 .update0:
662 660 cmp counter,1
663 661 ble .cont0
664 662 fzeros %f0
665 663
666 664 stx %i1,[%fp+tmp_px]
667 665 sethi %hi(0x7f800000),%o1
668 666
669 667 sub counter,1,counter
670 668 st counter,[%fp+tmp_counter]
671 669
672 670 ba .cont0
673 671 or %g0,1,counter
674 672
675 673 .align 16
676 674 .update1:
677 675 cmp counter,1
678 676 ble .cont1
679 677 fzeros %f0
680 678
681 679 stx %i1,[%fp+tmp_px]
682 680 clr %o1
683 681
684 682 sub counter,1,counter
685 683 st counter,[%fp+tmp_counter]
686 684
687 685 ba .cont1
688 686 or %g0,1,counter
689 687
690 688 .align 16
691 689 .update2:
692 690 cmp counter,2
693 691 ble .cont2
694 692 fzeros %f13
695 693
696 694 stx %i1,[%fp+tmp_px]
697 695 sethi %hi(0x7f800000),%o2
698 696
699 697 sub counter,2,counter
700 698 st counter,[%fp+tmp_counter]
701 699
702 700 ba .cont2
703 701 or %g0,2,counter
704 702
705 703 .align 16
706 704 .update3:
707 705 cmp counter,2
708 706 ble .cont3
709 707 fzeros %f13
710 708
711 709 stx %i1,[%fp+tmp_px]
712 710 clr %o2
713 711
714 712 sub counter,2,counter
715 713 st counter,[%fp+tmp_counter]
716 714
717 715 ba .cont3
718 716 or %g0,2,counter
719 717
720 718 .align 16
721 719 .update4:
722 720 cmp counter,3
723 721 ble .cont4
724 722 fzeros %f17
725 723
726 724 stx %o4,[%fp+tmp_px]
727 725 sethi %hi(0x7f800000),%l1
728 726
729 727 sub counter,3,counter
730 728 st counter,[%fp+tmp_counter]
731 729
732 730 ba .cont4
733 731 or %g0,3,counter
734 732
735 733 .align 16
736 734 .update5:
737 735 cmp counter,3
738 736 ble .cont5
739 737 fzeros %f17
740 738
741 739 stx %o4,[%fp+tmp_px]
742 740 clr %l1
743 741
744 742 sub counter,3,counter
745 743 st counter,[%fp+tmp_counter]
746 744
747 745 ba .cont5
748 746 or %g0,3,counter
749 747
750 748 .align 16
751 749 .update6:
752 750 cmp counter,4
753 751 ble .cont6
754 752 fzeros %f21
755 753
756 754 stx %i1,[%fp+tmp_px]
757 755 sethi %hi(0x7f800000),%i0
758 756
759 757 sub counter,4,counter
760 758 st counter,[%fp+tmp_counter]
761 759
762 760 ba .cont6
763 761 or %g0,4,counter
764 762
765 763 .align 16
766 764 .update7:
767 765 cmp counter,4
768 766 ble .cont7
769 767 fzeros %f21
770 768
771 769 stx %i1,[%fp+tmp_px]
772 770 clr %i0
773 771
774 772 sub counter,4,counter
775 773 st counter,[%fp+tmp_counter]
776 774
777 775 ba .cont7
778 776 or %g0,4,counter
779 777
780 778 .align 16
781 779 .update8:
782 780 cmp counter,5
783 781 ble .cont8
784 782 fzeros %f25
785 783
786 784 stx %o7,[%fp+tmp_px]
787 785 sethi %hi(0x7f800000),%o2
788 786
789 787 sub counter,5,counter
790 788 st counter,[%fp+tmp_counter]
791 789
792 790 ba .cont8
793 791 or %g0,5,counter
794 792
795 793 .align 16
796 794 .update9:
797 795 cmp counter,5
798 796 ble .cont9
799 797 fzeros %f25
800 798
801 799 stx %o7,[%fp+tmp_px]
802 800 clr %o2
803 801
804 802 sub counter,5,counter
805 803 st counter,[%fp+tmp_counter]
806 804
807 805 ba .cont9
808 806 or %g0,5,counter
809 807
810 808 .align 16
811 809 .update10:
812 810 cmp counter,6
813 811 ble .cont10
814 812 fzeros %f0
815 813
816 814 stx %i1,[%fp+tmp_px]
817 815 sethi %hi(0x7f800000),%o1
818 816
819 817 sub counter,6,counter
820 818 st counter,[%fp+tmp_counter]
821 819
822 820 ba .cont10
823 821 or %g0,6,counter
824 822
825 823 .align 16
826 824 .update11:
827 825 cmp counter,6
828 826 ble .cont11
829 827 fzeros %f0
830 828
831 829 stx %i1,[%fp+tmp_px]
832 830 clr %o1
833 831
834 832 sub counter,6,counter
835 833 st counter,[%fp+tmp_counter]
836 834
837 835 ba .cont11
838 836 or %g0,6,counter
839 837
840 838 .align 16
841 839 .update12:
842 840 cmp counter,2
843 841 ble .cont12
844 842 fzeros %f13
845 843
846 844 stx %i1,[%fp+tmp_px]
847 845 sethi %hi(0x7f800000),%o2
848 846
849 847 sub counter,2,counter
850 848 st counter,[%fp+tmp_counter]
851 849
852 850 ba .cont12
853 851 or %g0,2,counter
854 852
855 853 .align 16
856 854 .update13:
857 855 cmp counter,2
858 856 ble .cont13
859 857 fzeros %f13
860 858
861 859 stx %i1,[%fp+tmp_px]
862 860 clr %o2
863 861
864 862 sub counter,2,counter
865 863 st counter,[%fp+tmp_counter]
866 864
867 865 ba .cont13
868 866 or %g0,2,counter
869 867
870 868 .align 16
871 869 .update14:
872 870 cmp counter,3
873 871 ble .cont14
874 872 fzeros %f17
875 873
876 874 stx %o4,[%fp+tmp_px]
877 875 sethi %hi(0x7f800000),%l1
878 876
879 877 sub counter,3,counter
880 878 st counter,[%fp+tmp_counter]
881 879
882 880 ba .cont14
883 881 or %g0,3,counter
884 882
885 883 .align 16
886 884 .update15:
887 885 cmp counter,3
888 886 ble .cont15
889 887 fzeros %f17
890 888
891 889 stx %o4,[%fp+tmp_px]
892 890 clr %l1
893 891
894 892 sub counter,3,counter
895 893 st counter,[%fp+tmp_counter]
896 894
897 895 ba .cont15
898 896 or %g0,3,counter
899 897
900 898 .align 16
901 899 .update16:
902 900 cmp counter,4
903 901 ble .cont16
904 902 fzeros %f21
905 903
906 904 stx %i1,[%fp+tmp_px]
907 905 sethi %hi(0x7f800000),%i0
908 906
909 907 sub counter,4,counter
910 908 st counter,[%fp+tmp_counter]
911 909
912 910 ba .cont16
913 911 or %g0,4,counter
914 912
915 913 .align 16
916 914 .update17:
917 915 cmp counter,4
918 916 ble .cont17
919 917 fzeros %f21
920 918
921 919 stx %i1,[%fp+tmp_px]
922 920 clr %i0
923 921
924 922 sub counter,4,counter
925 923 st counter,[%fp+tmp_counter]
926 924
927 925 ba .cont17
928 926 or %g0,4,counter
929 927
930 928 .align 16
931 929 .update18:
932 930 cmp counter,5
933 931 ble .cont18
934 932 fzeros %f25
935 933
936 934 stx %o7,[%fp+tmp_px]
937 935 sethi %hi(0x7f800000),%o2
938 936
939 937 sub counter,5,counter
940 938 st counter,[%fp+tmp_counter]
941 939
942 940 ba .cont18
943 941 or %g0,5,counter
944 942
945 943 .align 16
946 944 .update19:
947 945 cmp counter,5
948 946 ble .cont19
949 947 fzeros %f25
950 948
951 949 stx %o7,[%fp+tmp_px]
952 950 clr %o2
953 951
954 952 sub counter,5,counter
955 953 st counter,[%fp+tmp_counter]
956 954
957 955 ba .cont19
958 956 or %g0,5,counter
959 957
960 958 .align 16
961 959 .update20:
962 960 cmp counter,6
963 961 ble .cont20
964 962 fzeros %f0
965 963
966 964 stx %i1,[%fp+tmp_px]
967 965 sethi %hi(0x7f800000),%o1
968 966
969 967 sub counter,6,counter
970 968 st counter,[%fp+tmp_counter]
971 969
972 970 ba .cont20
973 971 or %g0,6,counter
974 972
975 973 .align 16
976 974 .update21:
977 975 cmp counter,6
978 976 ble .cont21
979 977 fzeros %f0
980 978
981 979 stx %i1,[%fp+tmp_px]
982 980 clr %o1
983 981
984 982 sub counter,6,counter
985 983 st counter,[%fp+tmp_counter]
986 984
987 985 ba .cont21
988 986 or %g0,6,counter
989 987
990 988 .exit:
991 989 ret
992 990 restore
993 991 SET_SIZE(__vsqrtf_ultra3)
994 992
↓ open down ↓ |
948 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX