1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24 /* 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "__vrsqrt.S" 30 31 #include "libm.h" 32 33 RO_DATA 34 .align 64 35 36 .CONST_TBL: 37 .word 0xbfe00000, 0x0000002f ! K1 =-5.00000000000005209867e-01; 38 .word 0x3fd80000, 0x00000058 ! K2 = 3.75000000000004884257e-01; 39 .word 0xbfd3ffff, 0xff444bc8 ! K3 =-3.12499999317136886551e-01; 40 .word 0x3fd17fff, 0xff5006fe ! K4 = 2.73437499359815081532e-01; 41 .word 0xbfcf80bb, 0xb33ef574 ! K5 =-2.46116125605037803130e-01; 42 .word 0x3fcce0af, 0xf8156949 ! K6 = 2.25606914648617522896e-01; 43 44 .word 0x001fffff, 0xffffffff ! DC0 45 .word 0x3fe00000, 0x00000000 ! DC1 46 .word 0x00002000, 0x00000000 ! DC2 47 .word 0x7fffc000, 0x00000000 ! DC3 48 .word 0x0007ffff, 0xffffffff ! DC4 49 50 .word 0x43200000, 0x00000000 ! D2ON51 = pow(2,51) 51 .word 0x3ff00000, 0x00000000 ! DONE = 1.0 52 53 #define stridex %l5 54 #define stridey %l7 55 #define counter %l0 56 #define TBL %l3 57 #define _0x7ff00000 %o0 58 #define _0x00100000 %o1 59 60 #define DC0 %f56 61 #define DC1 %f54 62 #define DC2 %f48 63 #define DC3 %f46 64 #define K6 %f42 65 #define K5 %f20 66 #define K4 %f52 67 #define K3 %f50 68 #define K2 %f14 69 #define K1 %f12 70 #define DONE %f4 71 72 #define tmp_counter %g5 73 #define tmp_px %o5 74 75 #define tmp0 STACK_BIAS-0x40 76 #define tmp1 STACK_BIAS-0x38 77 #define tmp2 STACK_BIAS-0x30 78 #define tmp3 STACK_BIAS-0x28 79 #define tmp4 STACK_BIAS-0x20 80 #define tmp5 STACK_BIAS-0x18 81 #define tmp6 STACK_BIAS-0x10 82 #define tmp7 STACK_BIAS-0x08 83 84 ! sizeof temp storage - must be a multiple of 16 for V9 85 #define tmps 0x40 86 87 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 88 ! !!!!! algorithm !!!!! 89 ! ((float*)&res)[0] = ((float*)px)[0]; 90 ! ((float*)&res)[1] = ((float*)px)[1]; 91 ! hx = *(int*)px; 92 ! if ( hx >= 0x7ff00000 ) 93 ! { 94 ! res = DONE / res; 95 ! ((float*)py)[0] = ((float*)&res)[0]; 96 ! ((float*)py)[1] = ((float*)&res)[1]; 97 ! px += stridex; 98 ! py += stridey; 99 ! continue; 100 ! } 101 ! if ( hx < 0x00100000 ) 102 ! { 103 ! ax = hx & 0x7fffffff; 104 ! lx = ((int*)px)[1]; 105 ! 106 ! if ( (ax | lx) == 0 ) 107 ! { 108 ! res = DONE / res; 109 ! ((float*)py)[0] = ((float*)&res)[0]; 110 ! ((float*)py)[1] = ((float*)&res)[1]; 111 ! px += stridex; 112 ! py += stridey; 113 ! continue; 114 ! } 115 ! else if ( hx >= 0 ) 116 ! { 117 ! if ( hx < 0x00080000 ) 118 ! { 119 ! res = *(long long*)&res; 120 ! hx = *(int*)&res - (537 << 21); 121 ! } 122 ! else 123 ! { 124 ! res = vis_fand(res,DC4); 125 ! res = *(long long*)&res; 126 ! res += D2ON51; 127 ! hx = *(int*)&res - (537 << 21); 128 ! } 129 ! } 130 ! else 131 ! { 132 ! res = sqrt(res); 133 ! ((float*)py)[0] = ((float*)&res)[0]; 134 ! ((float*)py)[1] = ((float*)&res)[1]; 135 ! px += stridex; 136 ! py += stridey; 137 ! continue; 138 ! } 139 ! } 140 ! 141 ! iexp = hx >> 21; 142 ! iexp = -iexp; 143 ! iexp += 0x5fe; 144 ! lexp = iexp << 52; 145 ! dlexp = *(double*)&lexp; 146 ! hx >>= 10; 147 ! hx &= 0x7f8; 148 ! hx += 8; 149 ! hx &= -16; 150 ! 151 ! res = vis_fand(res,DC0); 152 ! res = vis_for(res,DC1); 153 ! res_c = vis_fpadd32(res,DC2); 154 ! res_c = vis_fand(res_c,DC3); 155 ! 156 ! addr = (char*)arr + hx; 157 ! dexp_hi = ((double*)addr)[0]; 158 ! dexp_lo = ((double*)addr)[1]; 159 ! dtmp0 = dexp_hi * dexp_hi; 160 ! xx = res - res_c; 161 ! xx *= dtmp0; 162 ! res = K6 * xx; 163 ! res += K5; 164 ! res *= xx; 165 ! res += K4; 166 ! res *= xx; 167 ! res += K3; 168 ! res *= xx; 169 ! res += K2; 170 ! res *= xx; 171 ! res += K1; 172 ! res *= xx; 173 ! res = dexp_hi * res; 174 ! res += dexp_lo; 175 ! res += dexp_hi; 176 ! 177 ! res *= dlexp; 178 ! 179 ! ((float*)py)[0] = ((float*)&res)[0]; 180 ! ((float*)py)[1] = ((float*)&res)[1]; 181 ! 182 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 183 184 ENTRY(__vrsqrt) 185 save %sp,-SA(MINFRAME)-tmps,%sp 186 PIC_SETUP(l7) 187 PIC_SET(l7,.CONST_TBL,o3) 188 PIC_SET(l7,__vlibm_TBL_rsqrt,l3) 189 wr %g0,0x82,%asi 190 191 ldd [%o3],K1 192 sethi %hi(0x7ff00000),%o0 193 mov %i3,%o4 194 195 ldd [%o3+0x08],K2 196 sethi %hi(0x00100000),%o1 197 mov %i1,tmp_px 198 199 ldd [%o3+0x10],K3 200 sll %i2,3,stridex 201 mov %i0,tmp_counter 202 203 ldd [%o3+0x18],K4 204 sll %i4,3,stridey 205 206 ldd [%o3+0x20],K5 207 ldd [%o3+0x28],K6 208 ldd [%o3+0x30],DC0 209 ldd [%o3+0x38],DC1 210 ldd [%o3+0x40],DC2 211 ldd [%o3+0x48],DC3 212 213 .begin: 214 mov tmp_counter,counter 215 mov tmp_px,%i1 216 clr tmp_counter 217 .begin1: 218 cmp counter,0 219 ble,pn %icc,.exit 220 ldd [%o3+0x60],DONE 221 222 lda [%i1]%asi,%f0 ! (6_0) ((float*)res)[0] = ((float*)px)[0]; 223 sethi %hi(0x7ffffc00),%i0 224 225 lda [%i1+4]%asi,%f1 ! (6_0) ((float*)res)[1] = ((float*)px)[1]; 226 add %i0,1023,%i0 227 228 fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); 229 230 lda [%i1]%asi,%g1 ! (6_1) hx = *(int*)px; 231 sethi %hi(0x00080000),%i4 232 233 lda [%i1+4]%asi,%l4 234 add %i1,stridex,%l6 ! px += stridex 235 236 sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; 237 lda [%l6]%asi,%f8 ! (0_0) ((float*)res)[0] = ((float*)px)[0]; 238 for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1); 239 240 lda [%l6+4]%asi,%f9 ! (0_0) ((float*)res)[1] = ((float*)px)[1]; 241 sra %g1,10,%o2 ! (6_1) hx >>= 10; 242 and %g1,%i0,%i2 243 244 cmp %g1,_0x7ff00000 ! (6_1) hx ? 0x7ff00000 245 bge,pn %icc,.spec0 ! (6_1) if ( hx >= 0x7ff00000 ) 246 and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; 247 248 cmp %g1,_0x00100000 ! (6_1) hx ? 0x00100000 249 bl,pn %icc,.spec1 ! (6_1) if ( hx < 0x00100000 ) 250 sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; 251 .cont_spec: 252 fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0); 253 254 fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); 255 256 add %o2,8,%l4 ! (6_1) hx += 8; 257 258 add %o7,1534,%o7 ! (6_1) iexp += 0x5fe; 259 260 lda [%l6]%asi,%g1 ! (0_0) hx = *(int*)px; 261 sllx %o7,52,%o7 ! (6_1) iexp << 52; 262 and %l4,-16,%l4 ! (6_1) hx = -16; 263 264 add %l4,TBL,%l4 ! (6_1) addr = (char*)arr + hx; 265 stx %o7,[%fp+tmp1] ! (6_1) dlexp = *(double*)lexp; 266 267 add %l6,stridex,%l6 ! px += stridex 268 ldd [%l4],%f30 ! (6_1) dtmp0 = ((double*)addr)[0]; 269 270 sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; 271 lda [%l6]%asi,%f0 ! (1_0) ((float*)res)[0] = ((float*)px)[0]; 272 for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1); 273 274 sra %g1,10,%o2 ! (0_0) hx >>= 10; 275 sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; 276 lda [%l6+4]%asi,%f1 ! (1_0) ((float*)res)[1] = ((float*)px)[1]; 277 278 cmp %g1,_0x7ff00000 ! (0_0) hx ? 0x7ff00000 279 bge,pn %icc,.update0 ! (0_0) if ( hx >= 0x7ff00000 ) 280 fand %f18,DC3,%f6 ! (6_1) res_c = vis_fand(res_c,DC3); 281 .cont0: 282 and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; 283 fmuld %f30,%f30,%f10 ! (6_1) dtmp0 = dexp_hi * dexp_hi; 284 285 cmp %g1,_0x00100000 ! (0_0) hx ? 0x00100000 286 bl,pn %icc,.update1 ! (0_0) if ( hx < 0x00100000 ) 287 add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; 288 .cont1: 289 fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0); 290 291 fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); 292 293 add %o2,8,%l2 ! (0_0) hx += 8; 294 fsubd %f44,%f6,%f6 ! (6_1) xx = res - res_c; 295 296 lda [%l6]%asi,%g1 ! (1_0) hx = *(int*)px; 297 sllx %o7,52,%o7 ! (0_0) iexp << 52; 298 and %l2,-16,%l2 ! (0_0) hx = -16; 299 300 add %l2,TBL,%l2 ! (0_0) addr = (char*)arr + hx; 301 add %l6,stridex,%l6 ! px += stridex 302 stx %o7,[%fp+tmp2] ! (0_0) dlexp = *(double*)lexp; 303 304 fmuld %f6,%f10,%f26 ! (6_1) xx *= dtmp0; 305 ldd [%l2],%f10 ! (0_0) dtmp0 = ((double*)addr)[0]; 306 307 sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; 308 lda [%l6]%asi,%f6 ! (2_0) ((float*)res)[0] = ((float*)px)[0]; 309 for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1); 310 311 sra %g1,10,%o2 ! (1_0) hx >>= 10; 312 cmp %g1,_0x7ff00000 ! (1_0) hx ? 0x7ff00000 313 bge,pn %icc,.update2 ! (1_0) if ( hx >= 0x7ff00000 ) 314 lda [%l6+4]%asi,%f7 ! (2_0) ((float*)res)[1] = ((float*)px)[1]; 315 .cont2: 316 fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3); 317 318 fmuld %f10,%f10,%f10 ! (0_0) dtmp0 = dexp_hi * dexp_hi; 319 cmp %g1,_0x00100000 ! (1_0) hx ? 0x00100000 320 bl,pn %icc,.update3 ! (1_0) if ( hx < 0x00100000 ) 321 and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; 322 .cont3: 323 sub %g0,%o7,%o7 ! (1_0) iexp = -iexp; 324 fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); 325 326 add %o7,1534,%o7 ! (1_0) iexp += 0x5fe; 327 fpadd32 %f44,DC2,%f18 ! (1_0) res_c = vis_fpadd32(res,DC2); 328 329 fmuld K6,%f26,%f62 ! (6_1) res = K6 * xx; 330 add %o2,8,%i2 ! (1_0) hx += 8; 331 fsubd %f28,%f8,%f32 ! (0_0) xx = res - res_c; 332 333 lda [%l6]%asi,%g1 ! (2_0) hx = *(int*)px; 334 sllx %o7,52,%o7 ! (1_0) iexp << 52; 335 and %i2,-16,%i2 ! (1_0) hx = -16; 336 337 add %i2,TBL,%i2 ! (1_0) addr = (char*)arr + hx; 338 stx %o7,[%fp+tmp3] ! (1_0) dlexp = *(double*)lexp; 339 340 fmuld %f32,%f10,%f32 ! (0_0) xx *= dtmp0; 341 add %l6,stridex,%l6 ! px += stridex 342 ldd [%i2],%f10 ! (1_0) dtmp0 = ((double*)addr)[0]; 343 faddd %f62,K5,%f62 ! (6_1) res += K5; 344 345 sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; 346 lda [%l6]%asi,%f0 ! (3_0) ((float*)res)[0] = ((float*)px)[0]; 347 for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1); 348 349 sra %g1,10,%o2 ! (2_0) hx >>= 10; 350 cmp %g1,_0x7ff00000 ! (2_0) hx ? 0x7ff00000 351 bge,pn %icc,.update4 ! (2_0) if ( hx >= 0x7ff00000 ) 352 lda [%l6+4]%asi,%f1 ! (3_0) ((float*)res)[1] = ((float*)px)[1]; 353 .cont4: 354 fmuld %f62,%f26,%f40 ! (6_1) res *= xx; 355 fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3); 356 357 fmuld %f10,%f10,%f10 ! (1_0) dtmp0 = dexp_hi * dexp_hi; 358 cmp %g1,_0x00100000 ! (2_0) hx ? 0x00100000 359 bl,pn %icc,.update5 ! (2_0) if ( hx < 0x00100000 ) 360 and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; 361 .cont5: 362 sub %g0,%o7,%o7 ! (2_0) iexp = -iexp; 363 fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); 364 365 add %o7,1534,%o7 ! (2_0) iexp += 0x5fe; 366 fpadd32 %f28,DC2,%f18 ! (2_0) res_c = vis_fpadd32(res,DC2); 367 368 fmuld K6,%f32,%f62 ! (0_0) res = K6 * xx; 369 add %o2,8,%i4 ! (2_0) hx += 8; 370 fsubd %f44,%f8,%f6 ! (1_0) xx = res - res_c; 371 372 faddd %f40,K4,%f40 ! (6_1) res += K4; 373 374 lda [%l6]%asi,%g1 ! (3_0) hx = *(int*)px; 375 sllx %o7,52,%o7 ! (2_0) iexp << 52; 376 and %i4,-16,%i4 ! (2_0) hx = -16; 377 378 add %i4,TBL,%i4 ! (2_0) addr = (char*)arr + hx; 379 stx %o7,[%fp+tmp4] ! (2_0) dlexp = *(double*)lexp; 380 381 fmuld %f6,%f10,%f38 ! (1_0) xx *= dtmp0; 382 ldd [%i4],%f24 ! (2_0) dtmp0 = ((double*)addr)[0]; 383 faddd %f62,K5,%f62 ! (0_0) res += K5; 384 385 fmuld %f40,%f26,%f34 ! (6_1) res *= xx; 386 add %l6,stridex,%l6 ! px += stridex 387 388 sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; 389 lda [%l6]%asi,%f8 ! (4_0) ((float*)res)[0] = ((float*)px)[0]; 390 for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1); 391 392 sra %g1,10,%o2 ! (3_0) hx >>= 10; 393 cmp %g1,_0x7ff00000 ! (3_0) hx ? 0x7ff00000 394 bge,pn %icc,.update6 ! (3_0) if ( hx >= 0x7ff00000 ) 395 lda [%l6+4]%asi,%f9 ! (4_0) ((float*)res)[1] = ((float*)px)[1]; 396 .cont6: 397 fmuld %f62,%f32,%f60 ! (0_0) res *= xx; 398 cmp %g1,_0x00100000 ! (3_0) hx ? 0x00100000 399 fand %f18,DC3,%f22 ! (2_0) res_c = vis_fand(res_c,DC3); 400 401 fmuld %f24,%f24,%f24 ! (2_0) dtmp0 = dexp_hi * dexp_hi; 402 bl,pn %icc,.update7 ! (3_0) if ( hx < 0x00100000 ) 403 and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; 404 faddd %f34,K3,%f6 ! (6_1) res += K3; 405 .cont7: 406 sub %g0,%o7,%o7 ! (3_0) iexp = -iexp; 407 fand %f8,DC0,%f16 ! (4_0) res = vis_fand(res,DC0); 408 409 add %o7,1534,%o7 ! (3_0) iexp += 0x5fe; 410 fpadd32 %f44,DC2,%f18 ! (3_0) res_c = vis_fpadd32(res,DC2); 411 412 fmuld K6,%f38,%f62 ! (1_0) res = K6 * xx; 413 add %o2,8,%i5 ! (3_0) hx += 8; 414 fsubd %f28,%f22,%f28 ! (2_0) xx = res - res_c; 415 416 fmuld %f6,%f26,%f22 ! (6_1) res *= xx; 417 faddd %f60,K4,%f60 ! (0_0) res += K4; 418 419 lda [%l6]%asi,%g1 ! (4_0) hx = *(int*)px; 420 sllx %o7,52,%o7 ! (3_0) iexp << 52; 421 and %i5,-16,%i5 ! (3_0) hx = -16; 422 423 add %i5,TBL,%i5 ! (3_0) addr = (char*)arr + hx; 424 stx %o7,[%fp+tmp5] ! (3_0) dlexp = *(double*)lexp; 425 426 fmuld %f28,%f24,%f36 ! (2_0) xx *= dtmp0; 427 add %l6,stridex,%i0 ! px += stridex 428 ldd [%i5],%f28 ! (3_0) dtmp0 = ((double*)addr)[0]; 429 faddd %f62,K5,%f62 ! (1_0) res += K5; 430 431 faddd %f22,K2,%f10 ! (6_1) res += K2; 432 fmuld %f60,%f32,%f34 ! (0_0) res *= xx; 433 434 sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; 435 lda [%i0]%asi,%f0 ! (5_0) ((float*)res)[0] = ((float*)px)[0]; 436 for %f16,DC1,%f24 ! (4_0) res = vis_for(res,DC1); 437 438 sra %g1,10,%o2 ! (4_0) hx >>= 10; 439 cmp %g1,_0x7ff00000 ! (4_0) hx ? 0x7ff00000 440 bge,pn %icc,.update8 ! (4_0) if ( hx >= 0x7ff00000 ) 441 lda [%i0+4]%asi,%f1 ! (5_0) ((float*)res)[1] = ((float*)px)[1]; 442 .cont8: 443 fand %f18,DC3,%f40 ! (3_0) res_c = vis_fand(res_c,DC3); 444 fmuld %f62,%f38,%f62 ! (1_0) res *= xx; 445 446 fmuld %f10,%f26,%f58 ! (6_1) res *= xx; 447 cmp %g1,_0x00100000 ! (4_0) hx ? 0x00100000 448 and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; 449 faddd %f34,K3,%f60 ! (0_0) res += K3; 450 451 fmuld %f28,%f28,%f28 ! (3_0) dtmp0 = dexp_hi * dexp_hi; 452 bl,pn %icc,.update9 ! (4_0) if ( hx < 0x00100000 ) 453 sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; 454 fand %f0,DC0,%f16 ! (5_0) res = vis_fand(res,DC0); 455 .cont9: 456 add %o7,1534,%o7 ! (4_0) iexp += 0x5fe; 457 fpadd32 %f24,DC2,%f18 ! (4_0) res_c = vis_fpadd32(res,DC2); 458 459 fmuld K6,%f36,%f10 ! (2_0) res = K6 * xx; 460 add %o2,8,%l1 ! (4_0) hx += 8; 461 fsubd %f44,%f40,%f44 ! (3_0) xx = res - res_c; 462 463 fmuld %f60,%f32,%f60 ! (0_0) res *= xx; 464 faddd %f62,K4,%f6 ! (1_0) res += K4; 465 466 lda [%i0]%asi,%g1 ! (5_0) hx = *(int*)px; 467 sllx %o7,52,%o7 ! (4_0) iexp << 52; 468 and %l1,-16,%l1 ! (4_0) hx = -16; 469 faddd %f58,K1,%f58 ! (6_1) res += K1; 470 471 add %i0,stridex,%i1 ! px += stridex 472 add %l1,TBL,%l1 ! (4_0) addr = (char*)arr + hx; 473 stx %o7,[%fp+tmp6] ! (4_0) dlexp = *(double*)lexp; 474 475 fmuld %f44,%f28,%f40 ! (3_0) xx *= dtmp0; 476 ldd [%l1],%f44 ! (4_0) dtmp0 = ((double*)addr)[0]; 477 faddd %f10,K5,%f62 ! (2_0) res += K5; 478 479 fmuld %f6,%f38,%f34 ! (1_0) res *= xx; 480 sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; 481 nop 482 faddd %f60,K2,%f60 ! (0_0) res += K2; 483 484 for %f16,DC1,%f28 ! (5_0) res = vis_for(res,DC1); 485 sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; 486 lda [%i1]%asi,%f6 ! (6_0) ((float*)res)[0] = ((float*)px)[0]; 487 fmuld %f58,%f26,%f26 ! (6_1) res *= xx; 488 489 sra %g1,10,%o2 ! (5_0) hx >>= 10; 490 cmp %g1,_0x7ff00000 ! (5_0) hx ? 0x7ff00000 491 bge,pn %icc,.update10 ! (5_0) if ( hx >= 0x7ff00000 ) 492 lda [%i1+4]%asi,%f7 ! (6_0) ((float*)res)[1] = ((float*)px)[1]; 493 .cont10: 494 fand %f18,DC3,%f8 ! (4_0) res_c = vis_fand(res_c,DC3); 495 fmuld %f62,%f36,%f62 ! (2_0) res *= xx; 496 497 fmuld %f60,%f32,%f58 ! (0_0) res *= xx; 498 cmp %g1,_0x00100000 ! (5_0) hx ? 0x00100000 499 and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; 500 faddd %f34,K3,%f34 ! (1_0) res += K3; 501 502 fmuld %f30,%f26,%f26 ! (6_1) res = dexp_hi * res; 503 bl,pn %icc,.update11 ! (5_0) if ( hx < 0x00100000 ) 504 nop 505 fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); 506 .cont11: 507 ldd [%l4+8],%f60 ! (6_1) dexp_lo = ((double*)addr)[1]; 508 fmuld %f44,%f44,%f44 ! (4_0) dtmp0 = dexp_hi * dexp_hi; 509 fpadd32 %f28,DC2,%f18 ! (5_0) res_c = vis_fpadd32(res,DC2); 510 511 fmuld K6,%f40,%f22 ! (3_0) res = K6 * xx; 512 add %o2,8,%i3 ! (5_0) hx += 8; 513 fsubd %f24,%f8,%f10 ! (4_0) xx = res - res_c; 514 515 fmuld %f34,%f38,%f24 ! (1_0) res *= xx; 516 or %g0,%o4,%i0 517 518 cmp counter,7 519 bl,pn %icc,.tail 520 faddd %f62,K4,%f34 ! (2_0) res += K4; 521 522 ba .main_loop 523 sub counter,7,counter ! counter 524 525 .align 16 526 .main_loop: 527 add %o7,1534,%o7 ! (5_0) iexp += 0x5fe; 528 and %i3,-16,%i3 ! (5_1) hx = -16; 529 lda [%i1]%asi,%g1 ! (6_1) hx = *(int*)px; 530 faddd %f58,K1,%f58 ! (0_1) res += K1; 531 532 add %i3,TBL,%i3 ! (5_1) addr = (char*)arr + hx; 533 sllx %o7,52,%o7 ! (5_1) iexp << 52; 534 stx %o7,[%fp+tmp0] ! (5_1) dlexp = *(double*)lexp; 535 faddd %f26,%f60,%f8 ! (6_2) res += dexp_lo; 536 537 faddd %f22,K5,%f62 ! (3_1) res += K5; 538 add %i1,stridex,%l6 ! px += stridex 539 ldd [%i3],%f22 ! (5_1) dtmp0 = ((double*)addr)[0]; 540 fmuld %f10,%f44,%f60 ! (4_1) xx *= dtmp0; 541 542 faddd %f24,K2,%f26 ! (1_1) res += K2; 543 add %i0,stridey,%i1 ! px += stridey 544 ldd [%l2],%f24 ! (0_1) dexp_hi = ((double*)addr)[0]; 545 fmuld %f34,%f36,%f34 ! (2_1) res *= xx; 546 547 fmuld %f58,%f32,%f58 ! (0_1) res *= xx; 548 sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; 549 lda [%l6]%asi,%f0 ! (0_0) ((float*)res)[0] = ((float*)px)[0]; 550 for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1); 551 552 lda [%l6+4]%asi,%f1 ! (0_0) ((float*)res)[1] = ((float*)px)[1]; 553 sra %g1,10,%o2 ! (6_1) hx >>= 10; 554 fmuld %f22,%f22,%f10 ! (5_1) dtmp0 = dexp_hi * dexp_hi; 555 faddd %f8,%f30,%f30 ! (6_2) res += dexp_hi; 556 557 fmuld %f62,%f40,%f32 ! (3_1) res *= xx; 558 cmp %g1,_0x7ff00000 ! (6_1) hx ? 0x7ff00000 559 ldd [%fp+tmp1],%f62 ! (6_2) dlexp = *(double*)lexp; 560 fand %f18,DC3,%f8 ! (5_1) res_c = vis_fand(res_c,DC3); 561 562 fmuld %f26,%f38,%f26 ! (1_1) res *= xx; 563 bge,pn %icc,.update12 ! (6_1) if ( hx >= 0x7ff00000 ) 564 and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; 565 faddd %f34,K3,%f34 ! (2_1) res += K3; 566 .cont12: 567 fmuld %f24,%f58,%f58 ! (0_1) res = dexp_hi * res; 568 cmp %g1,_0x00100000 ! (6_1) hx ? 0x00100000 569 sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; 570 fand %f0,DC0,%f16 ! (0_0) res = vis_fand(res,DC0); 571 572 fmuld %f30,%f62,%f2 ! (6_2) res *= dlexp; 573 bl,pn %icc,.update13 ! (6_1) if ( hx < 0x00100000 ) 574 ldd [%l2+8],%f30 ! (0_1) dexp_lo = ((double*)addr)[1]; 575 fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); 576 .cont13: 577 fmuld K6,%f60,%f62 ! (4_1) res = K6 * xx; 578 add %o2,8,%l4 ! (6_1) hx += 8; 579 st %f2,[%i0] ! (6_2) ((float*)py)[0] = ((float*)res)[0]; 580 fsubd %f28,%f8,%f6 ! (5_1) xx = res - res_c; 581 582 fmuld %f34,%f36,%f28 ! (2_1) res *= xx; 583 add %o7,1534,%o7 ! (6_1) iexp += 0x5fe; 584 st %f3,[%i0+4] ! (6_2) ((float*)py)[1] = ((float*)res)[1]; 585 faddd %f32,K4,%f32 ! (3_1) res += K4; 586 587 lda [%l6]%asi,%g1 ! (0_0) hx = *(int*)px; 588 sllx %o7,52,%o7 ! (6_1) iexp << 52; 589 and %l4,-16,%l4 ! (6_1) hx = -16; 590 faddd %f26,K1,%f26 ! (1_1) res += K1; 591 592 add %i1,stridey,%i0 ! px += stridey 593 add %l4,TBL,%l4 ! (6_1) addr = (char*)arr + hx; 594 stx %o7,[%fp+tmp1] ! (6_1) dlexp = *(double*)lexp; 595 faddd %f58,%f30,%f8 ! (0_1) res += dexp_lo; 596 597 fmuld %f6,%f10,%f58 ! (5_1) xx *= dtmp0; 598 add %l6,stridex,%l6 ! px += stridex 599 ldd [%l4],%f30 ! (6_1) dtmp0 = ((double*)addr)[0]; 600 faddd %f62,K5,%f62 ! (4_1) res += K5; 601 602 fmuld %f32,%f40,%f34 ! (3_1) res *= xx; 603 sra %g1,10,%o2 ! (0_0) hx >>= 10; 604 ldd [%i2],%f4 ! (1_1) dexp_hi = ((double*)addr)[0]; 605 faddd %f28,K2,%f32 ! (2_1) res += K2; 606 607 fmuld %f26,%f38,%f26 ! (1_1) res *= xx; 608 sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; 609 lda [%l6]%asi,%f6 ! (1_0) ((float*)res)[0] = ((float*)px)[0]; 610 for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1); 611 612 fmuld %f30,%f30,%f30 ! (6_1) dtmp0 = dexp_hi * dexp_hi; 613 sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; 614 lda [%l6+4]%asi,%f7 ! (1_0) ((float*)res)[1] = ((float*)px)[1]; 615 faddd %f8,%f24,%f24 ! (0_1) res += dexp_hi; 616 617 fmuld %f62,%f60,%f38 ! (4_1) res *= xx; 618 cmp %g1,_0x7ff00000 ! (0_0) hx ? 0x7ff00000 619 ldd [%fp+tmp2],%f62 ! (0_1) dlexp = *(double*)lexp; 620 fand %f18,DC3,%f8 ! (6_1) res_c = vis_fand(res_c,DC3); 621 622 fmuld %f32,%f36,%f32 ! (2_1) res *= xx; 623 bge,pn %icc,.update14 ! (0_0) if ( hx >= 0x7ff00000 ) 624 and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; 625 faddd %f34,K3,%f34 ! (3_1) res += K3; 626 .cont14: 627 fmuld %f4,%f26,%f26 ! (1_1) res = dexp_hi * res; 628 cmp %g1,_0x00100000 ! (0_0) hx ? 0x00100000 629 add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; 630 fand %f6,DC0,%f16 ! (1_0) res = vis_fand(res,DC0); 631 632 fmuld %f24,%f62,%f2 ! (0_1) res *= dlexp; 633 bl,pn %icc,.update15 ! (0_0) if ( hx < 0x00100000 ) 634 ldd [%i2+8],%f24 ! (1_1) dexp_lo = ((double*)addr)[1]; 635 fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); 636 .cont15: 637 fmuld K6,%f58,%f62 ! (5_1) res = K6 * xx; 638 add %o2,8,%l2 ! (0_0) hx += 8; 639 st %f2,[%i1] ! (0_1) ((float*)py)[0] = ((float*)res)[0]; 640 fsubd %f44,%f8,%f10 ! (6_1) xx = res - res_c; 641 642 fmuld %f34,%f40,%f44 ! (3_1) res *= xx; 643 nop 644 st %f3,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)res)[1]; 645 faddd %f38,K4,%f38 ! (4_1) res += K4; 646 647 lda [%l6]%asi,%g1 ! (1_0) hx = *(int*)px; 648 sllx %o7,52,%o7 ! (0_0) iexp << 52; 649 and %l2,-16,%l2 ! (0_0) hx = -16; 650 faddd %f32,K1,%f32 ! (2_1) res += K1; 651 652 add %l2,TBL,%l2 ! (0_0) addr = (char*)arr + hx; 653 add %l6,stridex,%l6 ! px += stridex 654 stx %o7,[%fp+tmp2] ! (0_0) dlexp = *(double*)lexp; 655 faddd %f26,%f24,%f8 ! (1_1) res += dexp_lo; 656 657 fmuld %f10,%f30,%f26 ! (6_1) xx *= dtmp0; 658 add %i0,stridey,%i1 ! px += stridey 659 ldd [%l2],%f30 ! (0_0) dtmp0 = ((double*)addr)[0]; 660 faddd %f62,K5,%f62 ! (5_1) res += K5; 661 662 fmuld %f38,%f60,%f34 ! (4_1) res *= xx; 663 sra %g1,10,%o2 ! (1_0) hx >>= 10; 664 ldd [%i4],%f24 ! (2_1) dexp_hi = ((double*)addr)[0]; 665 faddd %f44,K2,%f38 ! (3_1) res += K2; 666 667 fmuld %f32,%f36,%f32 ! (2_1) res *= xx; 668 sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; 669 lda [%l6]%asi,%f0 ! (2_0) ((float*)res)[0] = ((float*)px)[0]; 670 for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1); 671 672 fmuld %f30,%f30,%f30 ! (0_0) dtmp0 = dexp_hi * dexp_hi; 673 cmp %g1,_0x7ff00000 ! (1_0) hx ? 0x7ff00000 674 lda [%l6+4]%asi,%f1 ! (2_0) ((float*)res)[1] = ((float*)px)[1]; 675 faddd %f8,%f4,%f4 ! (1_1) res += dexp_hi; 676 677 fmuld %f62,%f58,%f36 ! (5_1) res *= xx; 678 bge,pn %icc,.update16 ! (1_0) if ( hx >= 0x7ff00000 ) 679 ldd [%fp+tmp3],%f62 ! (1_1) dlexp = *(double*)lexp; 680 fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3); 681 .cont16: 682 fmuld %f38,%f40,%f38 ! (3_1) res *= xx; 683 cmp %g1,_0x00100000 ! (1_0) hx ? 0x00100000 684 and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; 685 faddd %f34,K3,%f34 ! (4_1) res += K3; 686 687 fmuld %f24,%f32,%f32 ! (2_1) res = dexp_hi * res; 688 bl,pn %icc,.update17 ! (1_0) if ( hx < 0x00100000 ) 689 sub %g0,%o7,%o7 ! (1_0) iexp = -iexp; 690 fand %f0,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); 691 .cont17: 692 fmuld %f4,%f62,%f2 ! (1_1) res *= dlexp; 693 add %o7,1534,%o7 ! (1_0) iexp += 0x5fe; 694 ldd [%i4+8],%f4 ! (2_1) dexp_lo = ((double*)addr)[1]; 695 fpadd32 %f44,DC2,%f18 ! (1_0) res_c = vis_fpadd32(res,DC2); 696 697 fmuld K6,%f26,%f62 ! (6_1) res = K6 * xx; 698 add %o2,8,%i2 ! (1_0) hx += 8; 699 st %f2,[%i0] ! (1_1) ((float*)py)[0] = ((float*)res)[0]; 700 fsubd %f28,%f8,%f6 ! (0_0) xx = res - res_c; 701 702 fmuld %f34,%f60,%f28 ! (4_1) res *= xx; 703 nop 704 st %f3,[%i0+4] ! (1_1) ((float*)py)[1] = ((float*)res)[1]; 705 faddd %f36,K4,%f36 ! (5_1) res += K4; 706 707 lda [%l6]%asi,%g1 ! (2_0) hx = *(int*)px; 708 sllx %o7,52,%o7 ! (1_0) iexp << 52; 709 and %i2,-16,%i2 ! (1_0) hx = -16; 710 faddd %f38,K1,%f38 ! (3_1) res += K1; 711 712 add %i1,stridey,%i0 ! px += stridey 713 add %i2,TBL,%i2 ! (1_0) addr = (char*)arr + hx; 714 stx %o7,[%fp+tmp3] ! (1_0) dlexp = *(double*)lexp; 715 faddd %f32,%f4,%f8 ! (2_1) res += dexp_lo; 716 717 fmuld %f6,%f30,%f32 ! (0_0) xx *= dtmp0; 718 add %l6,stridex,%l6 ! px += stridex 719 ldd [%i2],%f30 ! (1_0) dtmp0 = ((double*)addr)[0]; 720 faddd %f62,K5,%f62 ! (6_1) res += K5; 721 722 fmuld %f36,%f58,%f34 ! (5_1) res *= xx; 723 sra %g1,10,%o2 ! (2_0) hx >>= 10; 724 ldd [%i5],%f4 ! (3_1) dexp_hi = ((double*)addr)[0]; 725 faddd %f28,K2,%f36 ! (4_1) res += K2; 726 727 fmuld %f38,%f40,%f38 ! (3_1) res *= xx; 728 sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; 729 lda [%l6]%asi,%f6 ! (3_0) ((float*)res)[0] = ((float*)px)[0]; 730 for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1); 731 732 fmuld %f30,%f30,%f30 ! (1_0) dtmp0 = dexp_hi * dexp_hi; 733 cmp %g1,_0x7ff00000 ! (2_0) hx ? 0x7ff00000 734 lda [%l6+4]%asi,%f7 ! (3_0) ((float*)res)[1] = ((float*)px)[1]; 735 faddd %f8,%f24,%f24 ! (2_1) res += dexp_hi; 736 737 fmuld %f62,%f26,%f40 ! (6_1) res *= xx; 738 bge,pn %icc,.update18 ! (2_0) if ( hx >= 0x7ff00000 ) 739 ldd [%fp+tmp4],%f62 ! (2_1) dlexp = *(double*)lexp; 740 fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3); 741 .cont18: 742 fmuld %f36,%f60,%f36 ! (4_1) res *= xx; 743 cmp %g1,_0x00100000 ! (2_0) hx ? 0x00100000 744 and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; 745 faddd %f34,K3,%f34 ! (5_1) res += K3; 746 747 fmuld %f4,%f38,%f38 ! (3_1) res = dexp_hi * res; 748 bl,pn %icc,.update19 ! (2_0) if ( hx < 0x00100000 ) 749 sub %g0,%o7,%o7 ! (2_0) iexp = -iexp; 750 fand %f6,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); 751 .cont19: 752 fmuld %f24,%f62,%f2 ! (2_1) res *= dlexp; 753 add %o7,1534,%o7 ! (2_0) iexp += 0x5fe; 754 ldd [%i5+8],%f24 ! (3_1) dexp_lo = ((double*)addr)[1]; 755 fpadd32 %f28,DC2,%f18 ! (2_0) res_c = vis_fpadd32(res,DC2); 756 757 fmuld K6,%f32,%f62 ! (0_0) res = K6 * xx; 758 add %o2,8,%i4 ! (2_0) hx += 8; 759 st %f2,[%i1] ! (2_1) ((float*)py)[0] = ((float*)res)[0]; 760 fsubd %f44,%f8,%f10 ! (1_0) xx = res - res_c; 761 762 fmuld %f34,%f58,%f44 ! (5_1) res *= xx; 763 nop 764 st %f3,[%i1+4] ! (2_1) ((float*)py)[1] = ((float*)res)[1]; 765 faddd %f40,K4,%f40 ! (6_1) res += K4; 766 767 lda [%l6]%asi,%g1 ! (3_0) hx = *(int*)px; 768 sllx %o7,52,%o7 ! (2_0) iexp << 52; 769 and %i4,-16,%i4 ! (2_0) hx = -16; 770 faddd %f36,K1,%f36 ! (4_1) res += K1; 771 772 add %l6,stridex,%l6 ! px += stridex 773 add %i4,TBL,%i4 ! (2_0) addr = (char*)arr + hx; 774 stx %o7,[%fp+tmp4] ! (2_0) dlexp = *(double*)lexp; 775 faddd %f38,%f24,%f8 ! (3_1) res += dexp_lo; 776 777 fmuld %f10,%f30,%f38 ! (1_0) xx *= dtmp0; 778 add %i0,stridey,%i1 ! px += stridey 779 ldd [%i4],%f24 ! (2_0) dtmp0 = ((double*)addr)[0]; 780 faddd %f62,K5,%f62 ! (0_0) res += K5; 781 782 fmuld %f40,%f26,%f34 ! (6_1) res *= xx; 783 sra %g1,10,%o2 ! (3_0) hx >>= 10; 784 ldd [%l1],%f30 ! (4_1) dexp_hi = ((double*)addr)[0]; 785 faddd %f44,K2,%f40 ! (5_1) res += K2; 786 787 fmuld %f36,%f60,%f36 ! (4_1) res *= xx; 788 sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; 789 lda [%l6]%asi,%f0 ! (4_0) ((float*)res)[0] = ((float*)px)[0]; 790 for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1); 791 792 fmuld %f24,%f24,%f24 ! (2_0) dtmp0 = dexp_hi * dexp_hi; 793 cmp %g1,_0x7ff00000 ! (3_0) hx ? 0x7ff00000 794 lda [%l6+4]%asi,%f1 ! (4_0) ((float*)res)[1] = ((float*)px)[1]; 795 faddd %f8,%f4,%f8 ! (3_1) res += dexp_hi; 796 797 fmuld %f62,%f32,%f60 ! (0_0) res *= xx; 798 bge,pn %icc,.update20 ! (3_0) if ( hx >= 0x7ff00000 ) 799 ldd [%fp+tmp5],%f62 ! (3_1) dlexp = *(double*)lexp; 800 fand %f18,DC3,%f4 ! (2_0) res_c = vis_fand(res_c,DC3); 801 .cont20: 802 fmuld %f40,%f58,%f40 ! (5_1) res *= xx; 803 cmp %g1,_0x00100000 ! (3_0) hx ? 0x00100000 804 and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; 805 faddd %f34,K3,%f10 ! (6_1) res += K3; 806 807 fmuld %f30,%f36,%f36 ! (4_1) res = dexp_hi * res; 808 bl,pn %icc,.update21 ! (3_0) if ( hx < 0x00100000 ) 809 sub %g0,%o7,%o7 ! (3_0) iexp = -iexp; 810 fand %f0,DC0,%f16 ! (4_0) res = vis_fand(res,DC0); 811 .cont21: 812 fmuld %f8,%f62,%f8 ! (3_1) res *= dlexp; 813 add %o7,1534,%o7 ! (3_0) iexp += 0x5fe; 814 ldd [%l1+8],%f34 ! (4_1) dexp_lo = ((double*)addr)[1]; 815 fpadd32 %f44,DC2,%f18 ! (3_0) res_c = vis_fpadd32(res,DC2); 816 817 fmuld K6,%f38,%f62 ! (1_0) res = K6 * xx; 818 add %o2,8,%i5 ! (3_0) hx += 8; 819 st %f8,[%i0] ! (3_1) ((float*)py)[0] = ((float*)res)[0]; 820 fsubd %f28,%f4,%f28 ! (2_0) xx = res - res_c; 821 822 fmuld %f10,%f26,%f4 ! (6_1) res *= xx; 823 nop 824 st %f9,[%i0+4] ! (3_1) ((float*)py)[1] = ((float*)res)[1]; 825 faddd %f60,K4,%f60 ! (0_0) res += K4; 826 827 lda [%l6]%asi,%g1 ! (4_0) hx = *(int*)px; 828 sllx %o7,52,%o7 ! (3_0) iexp << 52; 829 and %i5,-16,%i5 ! (3_0) hx = -16; 830 faddd %f40,K1,%f40 ! (5_1) res += K1; 831 832 add %l6,stridex,%i0 ! px += stridex 833 add %i5,TBL,%i5 ! (3_0) addr = (char*)arr + hx; 834 stx %o7,[%fp+tmp5] ! (3_0) dlexp = *(double*)lexp; 835 faddd %f36,%f34,%f8 ! (4_1) res += dexp_lo; 836 837 fmuld %f28,%f24,%f36 ! (2_0) xx *= dtmp0; 838 add %i1,stridey,%l6 ! px += stridey 839 ldd [%i5],%f28 ! (3_0) dtmp0 = ((double*)addr)[0]; 840 faddd %f62,K5,%f62 ! (1_0) res += K5; 841 842 faddd %f4,K2,%f10 ! (6_1) res += K2; 843 sra %g1,10,%o2 ! (4_0) hx >>= 10; 844 nop 845 fmuld %f60,%f32,%f34 ! (0_0) res *= xx; 846 847 fmuld %f40,%f58,%f40 ! (5_1) res *= xx; 848 sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; 849 lda [%i0]%asi,%f6 ! (5_0) ((float*)res)[0] = ((float*)px)[0]; 850 for %f16,DC1,%f24 ! (4_0) res = vis_for(res,DC1); 851 852 fmuld %f28,%f28,%f28 ! (3_0) dtmp0 = dexp_hi * dexp_hi; 853 cmp %g1,_0x7ff00000 ! (4_0) hx ? 0x7ff00000 854 lda [%i0+4]%asi,%f7 ! (5_0) ((float*)res)[1] = ((float*)px)[1]; 855 faddd %f8,%f30,%f30 ! (4_1) res += dexp_hi; 856 857 fand %f18,DC3,%f8 ! (3_0) res_c = vis_fand(res_c,DC3); 858 bge,pn %icc,.update22 ! (4_0) if ( hx >= 0x7ff00000 ) 859 ldd [%fp+tmp6],%f18 ! (4_1) dlexp = *(double*)lexp; 860 fmuld %f62,%f38,%f62 ! (1_0) res *= xx; 861 .cont22: 862 fmuld %f10,%f26,%f58 ! (6_1) res *= xx; 863 cmp %g1,_0x00100000 ! (4_0) hx ? 0x00100000 864 and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; 865 faddd %f34,K3,%f60 ! (0_0) res += K3; 866 867 fmuld %f22,%f40,%f40 ! (5_1) res = dexp_hi * res; 868 bl,pn %icc,.update23 ! (4_0) if ( hx < 0x00100000 ) 869 sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; 870 fand %f6,DC0,%f16 ! (5_0) res = vis_fand(res,DC0); 871 .cont23: 872 fmuld %f30,%f18,%f6 ! (4_1) res *= dlexp; 873 add %o7,1534,%o7 ! (4_0) iexp += 0x5fe; 874 ldd [%i3+8],%f34 ! (5_1) dexp_lo = ((double*)addr)[1]; 875 fpadd32 %f24,DC2,%f18 ! (4_0) res_c = vis_fpadd32(res,DC2); 876 877 fmuld K6,%f36,%f30 ! (2_0) res = K6 * xx; 878 add %o2,8,%l1 ! (4_0) hx += 8; 879 st %f6,[%i1] ! (4_1) ((float*)py)[0] = ((float*)res)[0]; 880 fsubd %f44,%f8,%f44 ! (3_0) xx = res - res_c; 881 882 fmuld %f60,%f32,%f60 ! (0_0) res *= xx; 883 sllx %o7,52,%o7 ! (4_0) iexp << 52; 884 st %f7,[%i1+4] ! (4_1) ((float*)py)[1] = ((float*)res)[1]; 885 faddd %f62,K4,%f6 ! (1_0) res += K4; 886 887 lda [%i0]%asi,%g1 ! (5_0) hx = *(int*)px; 888 add %i0,stridex,%i1 ! px += stridex 889 and %l1,-16,%l1 ! (4_0) hx = -16; 890 faddd %f58,K1,%f58 ! (6_1) res += K1; 891 892 add %l1,TBL,%l1 ! (4_0) addr = (char*)arr + hx; 893 add %l6,stridey,%i0 ! px += stridey 894 stx %o7,[%fp+tmp6] ! (4_0) dlexp = *(double*)lexp; 895 faddd %f40,%f34,%f8 ! (5_1) res += dexp_lo; 896 897 fmuld %f44,%f28,%f40 ! (3_0) xx *= dtmp0; 898 nop 899 ldd [%l1],%f44 ! (4_0) dtmp0 = ((double*)addr)[0]; 900 faddd %f30,K5,%f62 ! (2_0) res += K5; 901 902 fmuld %f6,%f38,%f34 ! (1_0) res *= xx; 903 sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; 904 ldd [%l4],%f30 ! (6_1) dexp_hi = ((double*)addr)[0]; 905 faddd %f60,K2,%f60 ! (0_0) res += K2; 906 907 for %f16,DC1,%f28 ! (5_0) res = vis_for(res,DC1); 908 sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; 909 lda [%i1]%asi,%f6 ! (6_0) ((float*)res)[0] = ((float*)px)[0]; 910 fmuld %f58,%f26,%f26 ! (6_1) res *= xx; 911 912 fmuld %f44,%f44,%f44 ! (4_0) dtmp0 = dexp_hi * dexp_hi; 913 cmp %g1,_0x7ff00000 ! (5_0) hx ? 0x7ff00000 914 lda [%i1+4]%asi,%f7 ! (6_0) ((float*)res)[1] = ((float*)px)[1]; 915 faddd %f8,%f22,%f22 ! (5_1) res += dexp_hi; 916 917 fand %f18,DC3,%f8 ! (4_0) res_c = vis_fand(res_c,DC3); 918 bge,pn %icc,.update24 ! (5_0) if ( hx >= 0x7ff00000 ) 919 ldd [%fp+tmp0],%f18 ! (5_1) dlexp = *(double*)lexp; 920 fmuld %f62,%f36,%f62 ! (2_0) res *= xx; 921 .cont24: 922 fmuld %f60,%f32,%f58 ! (0_0) res *= xx; 923 sra %g1,10,%o2 ! (5_0) hx >>= 10; 924 cmp %g1,_0x00100000 ! (5_0) hx ? 0x00100000 925 faddd %f34,K3,%f34 ! (1_0) res += K3; 926 927 fmuld %f30,%f26,%f26 ! (6_1) res = dexp_hi * res; 928 bl,pn %icc,.update25 ! (5_0) if ( hx < 0x00100000 ) 929 and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; 930 fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); 931 .cont25: 932 fmuld %f22,%f18,%f2 ! (5_1) res *= dlexp; 933 subcc counter,7,counter ! counter -= 7; 934 ldd [%l4+8],%f60 ! (6_1) dexp_lo = ((double*)addr)[1]; 935 fpadd32 %f28,DC2,%f18 ! (5_0) res_c = vis_fpadd32(res,DC2); 936 937 fmuld K6,%f40,%f22 ! (3_0) res = K6 * xx; 938 add %o2,8,%i3 ! (5_0) hx += 8; 939 st %f2,[%l6] ! (5_1) ((float*)py)[0] = ((float*)res)[0]; 940 fsubd %f24,%f8,%f10 ! (4_0) xx = res - res_c; 941 942 fmuld %f34,%f38,%f24 ! (1_0) res *= xx; 943 st %f3,[%l6+4] ! (5_1) ((float*)py)[1] = ((float*)res)[1]; 944 bpos,pt %icc,.main_loop 945 faddd %f62,K4,%f34 ! (2_0) res += K4; 946 947 add counter,7,counter 948 .tail: 949 add %o7,1534,%o7 ! (5_0) iexp += 0x5fe; 950 subcc counter,1,counter 951 bneg,a .begin 952 mov %i0,%o4 953 954 faddd %f58,K1,%f58 ! (0_1) res += K1; 955 956 faddd %f26,%f60,%f8 ! (6_2) res += dexp_lo; 957 958 faddd %f22,K5,%f62 ! (3_1) res += K5; 959 fmuld %f10,%f44,%f60 ! (4_1) xx *= dtmp0; 960 961 faddd %f24,K2,%f26 ! (1_1) res += K2; 962 add %i1,stridex,%l6 ! px += stridex 963 ldd [%l2],%f24 ! (0_1) dexp_hi = ((double*)addr)[0]; 964 fmuld %f34,%f36,%f34 ! (2_1) res *= xx; 965 966 fmuld %f58,%f32,%f58 ! (0_1) res *= xx; 967 968 add %i0,stridey,%i1 ! px += stridey 969 faddd %f8,%f30,%f30 ! (6_2) res += dexp_hi; 970 971 fmuld %f62,%f40,%f32 ! (3_1) res *= xx; 972 ldd [%fp+tmp1],%f62 ! (6_2) dlexp = *(double*)lexp; 973 974 fmuld %f26,%f38,%f26 ! (1_1) res *= xx; 975 faddd %f34,K3,%f34 ! (2_1) res += K3; 976 977 fmuld %f24,%f58,%f58 ! (0_1) res = dexp_hi * res; 978 979 fmuld %f30,%f62,%f2 ! (6_2) res *= dlexp; 980 ldd [%l2+8],%f30 ! (0_1) dexp_lo = ((double*)addr)[1]; 981 982 fmuld K6,%f60,%f62 ! (4_1) res = K6 * xx; 983 st %f2,[%i0] ! (6_2) ((float*)py)[0] = ((float*)res)[0]; 984 985 fmuld %f34,%f36,%f28 ! (2_1) res *= xx; 986 st %f3,[%i0+4] ! (6_2) ((float*)py)[1] = ((float*)res)[1]; 987 faddd %f32,K4,%f32 ! (3_1) res += K4; 988 989 subcc counter,1,counter 990 bneg,a .begin 991 mov %i1,%o4 992 993 faddd %f26,K1,%f26 ! (1_1) res += K1; 994 995 faddd %f58,%f30,%f8 ! (0_1) res += dexp_lo; 996 997 add %l6,stridex,%l6 ! px += stridex 998 faddd %f62,K5,%f62 ! (4_1) res += K5; 999 1000 fmuld %f32,%f40,%f34 ! (3_1) res *= xx; 1001 add %i1,stridey,%i0 ! px += stridey 1002 ldd [%i2],%f22 ! (1_1) dexp_hi = ((double*)addr)[0]; 1003 faddd %f28,K2,%f32 ! (2_1) res += K2; 1004 1005 fmuld %f26,%f38,%f26 ! (1_1) res *= xx; 1006 1007 faddd %f8,%f24,%f24 ! (0_1) res += dexp_hi; 1008 1009 fmuld %f62,%f60,%f38 ! (4_1) res *= xx; 1010 ldd [%fp+tmp2],%f62 ! (0_1) dlexp = *(double*)lexp; 1011 1012 fmuld %f32,%f36,%f32 ! (2_1) res *= xx; 1013 faddd %f34,K3,%f34 ! (3_1) res += K3; 1014 1015 fmuld %f22,%f26,%f26 ! (1_1) res = dexp_hi * res; 1016 1017 fmuld %f24,%f62,%f2 ! (0_1) res *= dlexp; 1018 ldd [%i2+8],%f24 ! (1_1) dexp_lo = ((double*)addr)[1]; 1019 1020 st %f2,[%i1] ! (0_1) ((float*)py)[0] = ((float*)res)[0]; 1021 1022 fmuld %f34,%f40,%f44 ! (3_1) res *= xx; 1023 st %f3,[%i1+4] ! (0_1) ((float*)py)[1] = ((float*)res)[1]; 1024 faddd %f38,K4,%f38 ! (4_1) res += K4; 1025 1026 subcc counter,1,counter 1027 bneg,a .begin 1028 mov %i0,%o4 1029 1030 faddd %f32,K1,%f32 ! (2_1) res += K1; 1031 1032 add %l6,stridex,%l6 ! px += stridex 1033 faddd %f26,%f24,%f8 ! (1_1) res += dexp_lo; 1034 1035 add %i0,stridey,%i1 ! px += stridey 1036 1037 fmuld %f38,%f60,%f34 ! (4_1) res *= xx; 1038 ldd [%i4],%f24 ! (2_1) dexp_hi = ((double*)addr)[0]; 1039 faddd %f44,K2,%f38 ! (3_1) res += K2; 1040 1041 fmuld %f32,%f36,%f32 ! (2_1) res *= xx; 1042 1043 faddd %f8,%f22,%f22 ! (1_1) res += dexp_hi; 1044 1045 ldd [%fp+tmp3],%f62 ! (1_1) dlexp = *(double*)lexp; 1046 1047 fmuld %f38,%f40,%f38 ! (3_1) res *= xx; 1048 faddd %f34,K3,%f34 ! (4_1) res += K3; 1049 1050 fmuld %f24,%f32,%f32 ! (2_1) res = dexp_hi * res; 1051 1052 fmuld %f22,%f62,%f2 ! (1_1) res *= dlexp; 1053 ldd [%i4+8],%f22 ! (2_1) dexp_lo = ((double*)addr)[1]; 1054 1055 st %f2,[%i0] ! (1_1) ((float*)py)[0] = ((float*)res)[0]; 1056 1057 fmuld %f34,%f60,%f28 ! (4_1) res *= xx; 1058 st %f3,[%i0+4] ! (1_1) ((float*)py)[1] = ((float*)res)[1]; 1059 1060 subcc counter,1,counter 1061 bneg,a .begin 1062 mov %i1,%o4 1063 1064 faddd %f38,K1,%f38 ! (3_1) res += K1; 1065 1066 faddd %f32,%f22,%f8 ! (2_1) res += dexp_lo; 1067 1068 add %l6,stridex,%l6 ! px += stridex 1069 1070 add %i1,stridey,%i0 ! px += stridey 1071 ldd [%i5],%f22 ! (3_1) dexp_hi = ((double*)addr)[0]; 1072 faddd %f28,K2,%f36 ! (4_1) res += K2; 1073 1074 fmuld %f38,%f40,%f38 ! (3_1) res *= xx; 1075 1076 faddd %f8,%f24,%f24 ! (2_1) res += dexp_hi; 1077 1078 ldd [%fp+tmp4],%f62 ! (2_1) dlexp = *(double*)lexp; 1079 1080 fmuld %f36,%f60,%f36 ! (4_1) res *= xx; 1081 1082 fmuld %f22,%f38,%f38 ! (3_1) res = dexp_hi * res; 1083 1084 fmuld %f24,%f62,%f2 ! (2_1) res *= dlexp; 1085 ldd [%i5+8],%f24 ! (3_1) dexp_lo = ((double*)addr)[1]; 1086 1087 st %f2,[%i1] ! (2_1) ((float*)py)[0] = ((float*)res)[0]; 1088 1089 st %f3,[%i1+4] ! (2_1) ((float*)py)[1] = ((float*)res)[1]; 1090 1091 subcc counter,1,counter 1092 bneg,a .begin 1093 mov %i0,%o4 1094 1095 faddd %f36,K1,%f36 ! (4_1) res += K1; 1096 1097 faddd %f38,%f24,%f8 ! (3_1) res += dexp_lo; 1098 1099 add %i0,stridey,%i1 ! px += stridey 1100 1101 add %l6,stridex,%l6 ! px += stridex 1102 ldd [%l1],%f30 ! (4_1) dexp_hi = ((double*)addr)[0]; 1103 1104 fmuld %f36,%f60,%f36 ! (4_1) res *= xx; 1105 1106 faddd %f8,%f22,%f8 ! (3_1) res += dexp_hi; 1107 1108 ldd [%fp+tmp5],%f62 ! (3_1) dlexp = *(double*)lexp; 1109 1110 fmuld %f30,%f36,%f36 ! (4_1) res = dexp_hi * res; 1111 1112 fmuld %f8,%f62,%f8 ! (3_1) res *= dlexp; 1113 ldd [%l1+8],%f34 ! (4_1) dexp_lo = ((double*)addr)[1]; 1114 1115 st %f8,[%i0] ! (3_1) ((float*)py)[0] = ((float*)res)[0]; 1116 1117 st %f9,[%i0+4] ! (3_1) ((float*)py)[1] = ((float*)res)[1]; 1118 1119 subcc counter,1,counter 1120 bneg,a .begin 1121 mov %i1,%o4 1122 1123 faddd %f36,%f34,%f8 ! (4_1) res += dexp_lo; 1124 1125 add %l6,stridex,%i0 ! px += stridex 1126 1127 add %i1,stridey,%l6 ! px += stridey 1128 1129 faddd %f8,%f30,%f30 ! (4_1) res += dexp_hi; 1130 1131 ldd [%fp+tmp6],%f18 ! (4_1) dlexp = *(double*)lexp; 1132 1133 fmuld %f30,%f18,%f6 ! (4_1) res *= dlexp; 1134 1135 st %f6,[%i1] ! (4_1) ((float*)py)[0] = ((float*)res)[0]; 1136 1137 st %f7,[%i1+4] ! (4_1) ((float*)py)[1] = ((float*)res)[1]; 1138 1139 ba .begin 1140 add %i1,stridey,%o4 1141 1142 .align 16 1143 .spec0: 1144 fdivd DONE,%f0,%f0 ! res = DONE / res; 1145 add %i1,stridex,%i1 ! px += stridex 1146 st %f0,[%o4] ! ((float*)py)[0] = ((float*)&res)[0]; 1147 st %f1,[%o4+4] ! ((float*)py)[1] = ((float*)&res)[1]; 1148 add %o4,stridey,%o4 ! py += stridey 1149 ba .begin1 1150 sub counter,1,counter 1151 1152 .align 16 1153 .spec1: 1154 orcc %i2,%l4,%g0 1155 bz,a 2f 1156 fdivd DONE,%f0,%f0 ! res = DONE / res; 1157 1158 cmp %g1,0 1159 bl,a 2f 1160 fsqrtd %f0,%f0 ! res = sqrt(res); 1161 1162 cmp %g1,%i4 1163 bge,a 1f 1164 ldd [%o3+0x50],%f18 1165 1166 fxtod %f0,%f0 ! res = *(long long*)&res; 1167 st %f0,[%fp+tmp0] 1168 1169 fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); 1170 ld [%fp+tmp0],%g1 1171 1172 sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; 1173 for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1); 1174 1175 sra %g1,10,%o2 ! (6_1) hx >>= 10; 1176 sub %o7,537,%o7 1177 1178 and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; 1179 ba .cont_spec 1180 sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; 1181 1182 1: 1183 fand %f0,%f18,%f0 ! res = vis_fand(res,DC4); 1184 1185 ldd [%o3+0x58],%f28 1186 fxtod %f0,%f0 ! res = *(long long*)&res; 1187 1188 faddd %f0,%f28,%f0 ! res += D2ON51; 1189 st %f0,[%fp+tmp0] 1190 1191 fand %f0,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); 1192 ld [%fp+tmp0],%g1 1193 1194 sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; 1195 for %f16,DC1,%f44 ! (6_1) res = vis_for(res,DC1); 1196 1197 sra %g1,10,%o2 ! (6_1) hx >>= 10; 1198 sub %o7,537,%o7 1199 1200 and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; 1201 ba .cont_spec 1202 sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; 1203 1204 2: 1205 add %i1,stridex,%i1 ! px += stridex 1206 st %f0,[%o4] ! ((float*)py)[0] = ((float*)&res)[0]; 1207 st %f1,[%o4+4] ! ((float*)py)[1] = ((float*)&res)[1]; 1208 add %o4,stridey,%o4 ! py += stridey 1209 ba .begin1 1210 sub counter,1,counter 1211 1212 .align 16 1213 .update0: 1214 cmp counter,1 1215 ble .cont0 1216 nop 1217 1218 sub %l6,stridex,tmp_px 1219 sub counter,1,tmp_counter 1220 1221 ba .cont0 1222 mov 1,counter 1223 1224 .align 16 1225 .update1: 1226 cmp counter,1 1227 ble .cont1 1228 sub %l6,stridex,%i1 1229 1230 ld [%i1+4],%i2 1231 cmp %g1,0 1232 bl 1f 1233 1234 orcc %g1,%i2,%g0 1235 bz 1f 1236 sethi %hi(0x00080000),%i3 1237 1238 cmp %g1,%i3 1239 bge,a 2f 1240 ldd [%o3+0x50],%f18 1241 1242 fxtod %f8,%f8 ! res = *(long long*)&res; 1243 st %f8,[%fp+tmp7] 1244 1245 fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0); 1246 ld [%fp+tmp7],%g1 1247 1248 sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; 1249 sra %g1,10,%o2 ! (0_0) hx >>= 10; 1250 for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1); 1251 1252 sub %o7,537,%o7 1253 1254 sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; 1255 1256 and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; 1257 ba .cont1 1258 add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; 1259 2: 1260 fand %f8,%f18,%f8 1261 fxtod %f8,%f8 ! res = *(long long*)&res; 1262 ldd [%o3+0x58],%f18 1263 faddd %f8,%f18,%f8 1264 st %f8,[%fp+tmp7] 1265 1266 fand %f8,DC0,%f16 ! (0_0) res = vis_fand(res,DC0); 1267 ld [%fp+tmp7],%g1 1268 1269 sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; 1270 sra %g1,10,%o2 ! (0_0) hx >>= 10; 1271 for %f16,DC1,%f28 ! (0_0) res = vis_for(res,DC1); 1272 1273 sub %o7,537,%o7 1274 1275 sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; 1276 1277 and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; 1278 ba .cont1 1279 add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; 1280 1: 1281 sub %l6,stridex,tmp_px 1282 sub counter,1,tmp_counter 1283 1284 ba .cont1 1285 mov 1,counter 1286 1287 .align 16 1288 .update2: 1289 cmp counter,2 1290 ble .cont2 1291 nop 1292 1293 sub %l6,stridex,tmp_px 1294 sub counter,2,tmp_counter 1295 1296 ba .cont2 1297 mov 2,counter 1298 1299 .align 16 1300 .update3: 1301 cmp counter,2 1302 ble .cont3 1303 sub %l6,stridex,%i1 1304 1305 ld [%i1+4],%i2 1306 cmp %g1,0 1307 bl 1f 1308 1309 orcc %g1,%i2,%g0 1310 bz 1f 1311 sethi %hi(0x00080000),%i3 1312 1313 cmp %g1,%i3 1314 bge,a 2f 1315 ldd [%o3+0x50],%f18 1316 1317 fxtod %f0,%f0 ! res = *(long long*)&res; 1318 st %f0,[%fp+tmp7] 1319 1320 fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0); 1321 ld [%fp+tmp7],%g1 1322 1323 sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; 1324 for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1); 1325 1326 sra %g1,10,%o2 ! (1_0) hx >>= 10; 1327 sub %o7,537,%o7 1328 ba .cont3 1329 and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; 1330 2: 1331 fand %f0,%f18,%f0 1332 fxtod %f0,%f0 ! res = *(long long*)&res; 1333 ldd [%o3+0x58],%f18 1334 faddd %f0,%f18,%f0 1335 st %f0,[%fp+tmp7] 1336 1337 fand %f0,DC0,%f16 ! (1_0) res = vis_fand(res,DC0); 1338 ld [%fp+tmp7],%g1 1339 1340 sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; 1341 for %f16,DC1,%f44 ! (1_0) res = vis_for(res,DC1); 1342 1343 sra %g1,10,%o2 ! (1_0) hx >>= 10; 1344 sub %o7,537,%o7 1345 ba .cont3 1346 and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; 1347 1: 1348 sub %l6,stridex,tmp_px 1349 sub counter,2,tmp_counter 1350 1351 ba .cont3 1352 mov 2,counter 1353 1354 .align 16 1355 .update4: 1356 cmp counter,3 1357 ble .cont4 1358 nop 1359 1360 sub %l6,stridex,tmp_px 1361 sub counter,3,tmp_counter 1362 1363 ba .cont4 1364 mov 3,counter 1365 1366 .align 16 1367 .update5: 1368 cmp counter,3 1369 ble .cont5 1370 sub %l6,stridex,%i1 1371 1372 ld [%i1+4],%i3 1373 cmp %g1,0 1374 bl 1f 1375 1376 orcc %g1,%i3,%g0 1377 bz 1f 1378 sethi %hi(0x00080000),%i4 1379 1380 cmp %g1,%i4 1381 bge,a 2f 1382 ldd [%o3+0x50],%f18 1383 1384 fxtod %f6,%f6 ! res = *(long long*)&res; 1385 st %f6,[%fp+tmp7] 1386 1387 fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); 1388 ld [%fp+tmp7],%g1 1389 1390 sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; 1391 sra %g1,10,%o2 ! (2_0) hx >>= 10; 1392 1393 sub %o7,537,%o7 1394 and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; 1395 ba .cont5 1396 for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1); 1397 2: 1398 fand %f6,%f18,%f6 1399 fxtod %f6,%f6 ! res = *(long long*)&res; 1400 ldd [%o3+0x58],%f18 1401 faddd %f6,%f18,%f6 1402 st %f6,[%fp+tmp7] 1403 1404 fand %f6,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); 1405 ld [%fp+tmp7],%g1 1406 1407 sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; 1408 sra %g1,10,%o2 ! (2_0) hx >>= 10; 1409 1410 sub %o7,537,%o7 1411 and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; 1412 ba .cont5 1413 for %f16,DC1,%f28 ! (2_0) res = vis_for(res,DC1); 1414 1: 1415 sub %l6,stridex,tmp_px 1416 sub counter,3,tmp_counter 1417 1418 ba .cont5 1419 mov 3,counter 1420 1421 .align 16 1422 .update6: 1423 cmp counter,4 1424 ble .cont6 1425 nop 1426 1427 sub %l6,stridex,tmp_px 1428 sub counter,4,tmp_counter 1429 1430 ba .cont6 1431 mov 4,counter 1432 1433 .align 16 1434 .update7: 1435 sub %l6,stridex,%i1 1436 cmp counter,4 1437 ble .cont7 1438 faddd %f34,K3,%f6 ! (6_1) res += K3; 1439 1440 ld [%i1+4],%i3 1441 cmp %g1,0 1442 bl 1f 1443 1444 orcc %g1,%i3,%g0 1445 bz 1f 1446 sethi %hi(0x00080000),%i5 1447 1448 cmp %g1,%i5 1449 bge,a 2f 1450 ldd [%o3+0x50],%f18 1451 1452 fxtod %f0,%f0 ! res = *(long long*)&res; 1453 st %f0,[%fp+tmp7] 1454 1455 fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); 1456 ld [%fp+tmp7],%g1 1457 1458 sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; 1459 sra %g1,10,%o2 ! (3_0) hx >>= 10; 1460 1461 sub %o7,537,%o7 1462 and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; 1463 ba .cont7 1464 for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1); 1465 2: 1466 fand %f0,%f18,%f0 1467 fxtod %f0,%f0 ! res = *(long long*)&res; 1468 ldd [%o3+0x58],%f18 1469 faddd %f0,%f18,%f0 1470 st %f0,[%fp+tmp7] 1471 1472 fand %f0,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); 1473 ld [%fp+tmp7],%g1 1474 1475 sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; 1476 sra %g1,10,%o2 ! (3_0) hx >>= 10; 1477 1478 sub %o7,537,%o7 1479 and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; 1480 ba .cont7 1481 for %f16,DC1,%f44 ! (3_0) res = vis_for(res,DC1); 1482 1: 1483 sub %l6,stridex,tmp_px 1484 sub counter,4,tmp_counter 1485 1486 ba .cont7 1487 mov 4,counter 1488 1489 .align 16 1490 .update8: 1491 cmp counter,5 1492 ble .cont8 1493 nop 1494 1495 mov %l6,tmp_px 1496 sub counter,5,tmp_counter 1497 1498 ba .cont8 1499 mov 5,counter 1500 1501 .align 16 1502 .update9: 1503 ld [%l6+4],%i3 1504 cmp counter,5 1505 ble .cont9 1506 fand %f0,DC0,%f16 ! (5_0) res = vis_fand(res,DC0); 1507 1508 cmp %g1,0 1509 bl 1f 1510 1511 orcc %g1,%i3,%g0 1512 bz 1f 1513 sethi %hi(0x00080000),%i1 1514 1515 cmp %g1,%i1 1516 bge,a 2f 1517 ldd [%o3+0x50],%f18 1518 1519 fxtod %f8,%f8 ! res = *(long long*)&res; 1520 st %f8,[%fp+tmp7] 1521 1522 fand %f8,DC0,%f24 ! (4_0) res = vis_fand(res,DC0); 1523 ld [%fp+tmp7],%g1 1524 1525 sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; 1526 sra %g1,10,%o2 ! (4_0) hx >>= 10; 1527 1528 sub %o7,537,%o7 1529 1530 and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; 1531 sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; 1532 ba .cont9 1533 for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1); 1534 2: 1535 fand %f8,%f18,%f8 1536 fxtod %f8,%f8 ! res = *(long long*)&res; 1537 ldd [%o3+0x58],%f18 1538 faddd %f8,%f18,%f8 1539 st %f8,[%fp+tmp7] 1540 1541 fand %f8,DC0,%f24 ! (4_0) res = vis_fand(res,DC0); 1542 ld [%fp+tmp7],%g1 1543 1544 sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; 1545 sra %g1,10,%o2 ! (4_0) hx >>= 10; 1546 1547 sub %o7,537,%o7 1548 1549 and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; 1550 sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; 1551 ba .cont9 1552 for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1); 1553 1: 1554 mov %l6,tmp_px 1555 sub counter,5,tmp_counter 1556 1557 ba .cont9 1558 mov 5,counter 1559 1560 .align 16 1561 .update10: 1562 cmp counter,6 1563 ble .cont10 1564 nop 1565 1566 mov %i0,tmp_px 1567 sub counter,6,tmp_counter 1568 1569 ba .cont10 1570 mov 6,counter 1571 1572 .align 16 1573 .update11: 1574 ld [%i0+4],%i3 1575 cmp counter,6 1576 ble .cont11 1577 fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); 1578 1579 cmp %g1,0 1580 bl 1f 1581 1582 orcc %g1,%i3,%g0 1583 bz 1f 1584 sethi %hi(0x00080000),%i3 1585 1586 cmp %g1,%i3 1587 bge,a 2f 1588 ldd [%o3+0x50],%f18 1589 1590 fxtod %f0,%f0 ! res = *(long long*)&res; 1591 st %f0,[%fp+tmp7] 1592 1593 fand %f0,DC0,%f28 ! (5_0) res = vis_fand(res,DC0); 1594 ld [%fp+tmp7],%g1 1595 1596 sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; 1597 sra %g1,10,%o2 ! (5_0) hx >>= 10; 1598 1599 sub %o7,537,%o7 1600 1601 sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; 1602 1603 and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; 1604 ba .cont11 1605 for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1); 1606 2: 1607 fand %f0,%f18,%f0 1608 fxtod %f0,%f0 ! res = *(long long*)&res; 1609 ldd [%o3+0x58],%f18 1610 faddd %f0,%f18,%f0 1611 st %f0,[%fp+tmp7] 1612 1613 fand %f0,DC0,%f28 ! (5_0) res = vis_fand(res,DC0); 1614 ld [%fp+tmp7],%g1 1615 1616 sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; 1617 sra %g1,10,%o2 ! (5_0) hx >>= 10; 1618 1619 sub %o7,537,%o7 1620 1621 sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; 1622 1623 and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; 1624 ba .cont11 1625 for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1); 1626 1: 1627 mov %i0,tmp_px 1628 sub counter,6,tmp_counter 1629 1630 ba .cont11 1631 mov 6,counter 1632 1633 .align 16 1634 .update12: 1635 cmp counter,0 1636 ble .cont12 1637 faddd %f34,K3,%f34 ! (2_1) res += K3; 1638 1639 sub %l6,stridex,tmp_px 1640 sub counter,0,tmp_counter 1641 1642 ba .cont12 1643 mov 0,counter 1644 1645 .align 16 1646 .update13: 1647 sub %l6,stridex,%l4 1648 cmp counter,0 1649 ble .cont13 1650 fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); 1651 1652 ld [%l4+4],%l4 1653 cmp %g1,0 1654 bl 1f 1655 1656 orcc %g1,%l4,%g0 1657 bz 1f 1658 sethi %hi(0x00080000),%l4 1659 1660 cmp %g1,%l4 1661 bge,a 2f 1662 ldd [%o3+0x50],%f62 1663 1664 fxtod %f6,%f6 ! res = *(long long*)&res; 1665 st %f6,[%fp+tmp7] 1666 1667 fand %f6,DC0,%f44 ! (6_0) res = vis_fand(res,DC0); 1668 ld [%fp+tmp7],%g1 1669 1670 sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; 1671 sra %g1,10,%o2 ! (6_1) hx >>= 10; 1672 1673 sub %o7,537,%o7 1674 and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; 1675 for %f44,DC1,%f44 ! (6_1) res = vis_for(res,DC1); 1676 1677 sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; 1678 ba .cont13 1679 fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); 1680 2: 1681 fand %f6,%f62,%f6 1682 fxtod %f6,%f6 ! res = *(long long*)&res; 1683 ldd [%o3+0x58],%f62 1684 faddd %f6,%f62,%f6 1685 st %f6,[%fp+tmp7] 1686 1687 fand %f6,DC0,%f44 ! (6_0) res = vis_fand(res,DC0); 1688 ld [%fp+tmp7],%g1 1689 1690 sra %g1,21,%o7 ! (6_1) iexp = hx >> 21; 1691 sra %g1,10,%o2 ! (6_1) hx >>= 10; 1692 for %f44,DC1,%f44 ! (6_1) res = vis_for(res,DC1); 1693 1694 sub %o7,537,%o7 1695 1696 and %o2,2040,%o2 ! (6_1) hx &= 0x7f8; 1697 sub %g0,%o7,%o7 ! (6_1) iexp = -iexp; 1698 ba .cont13 1699 fpadd32 %f44,DC2,%f18 ! (6_1) res_c = vis_fpadd32(res,DC2); 1700 1: 1701 sub %l6,stridex,tmp_px 1702 sub counter,0,tmp_counter 1703 1704 ba .cont13 1705 mov 0,counter 1706 1707 .align 16 1708 .update14: 1709 cmp counter,1 1710 ble .cont14 1711 faddd %f34,K3,%f34 ! (3_1) res += K3; 1712 1713 sub %l6,stridex,tmp_px 1714 sub counter,1,tmp_counter 1715 1716 ba .cont14 1717 mov 1,counter 1718 1719 .align 16 1720 .update15: 1721 sub %l6,stridex,%l2 1722 cmp counter,1 1723 ble .cont15 1724 fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); 1725 1726 ld [%l2+4],%l2 1727 cmp %g1,0 1728 bl 1f 1729 1730 orcc %g1,%l2,%g0 1731 bz 1f 1732 sethi %hi(0x00080000),%l2 1733 1734 cmp %g1,%l2 1735 bge,a 2f 1736 ldd [%o3+0x50],%f62 1737 1738 fxtod %f0,%f0 ! res = *(long long*)&res; 1739 st %f0,[%fp+tmp7] 1740 1741 fand %f0,DC0,%f18 ! (0_0) res = vis_fand(res,DC0); 1742 ld [%fp+tmp7],%g1 1743 1744 sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; 1745 sra %g1,10,%o2 ! (0_0) hx >>= 10; 1746 1747 sub %o7,537,%o7 1748 for %f18,DC1,%f28 ! (0_0) res = vis_for(res,DC1); 1749 1750 sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; 1751 1752 and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; 1753 add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; 1754 ba .cont15 1755 fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); 1756 2: 1757 fand %f0,%f62,%f0 1758 fxtod %f0,%f0 ! res = *(long long*)&res; 1759 ldd [%o3+0x58],%f62 1760 faddd %f0,%f62,%f0 1761 st %f0,[%fp+tmp7] 1762 1763 fand %f0,DC0,%f18 ! (0_0) res = vis_fand(res,DC0); 1764 ld [%fp+tmp7],%g1 1765 1766 sra %g1,21,%o7 ! (0_0) iexp = hx >> 21; 1767 sra %g1,10,%o2 ! (0_0) hx >>= 10; 1768 for %f18,DC1,%f28 ! (0_0) res = vis_for(res,DC1); 1769 1770 sub %o7,537,%o7 1771 1772 sub %g0,%o7,%o7 ! (0_0) iexp = -iexp; 1773 1774 and %o2,2040,%o2 ! (0_0) hx &= 0x7f8; 1775 add %o7,1534,%o7 ! (0_0) iexp += 0x5fe; 1776 ba .cont15 1777 fpadd32 %f28,DC2,%f18 ! (0_0) res_c = vis_fpadd32(res,DC2); 1778 1: 1779 sub %l6,stridex,tmp_px 1780 sub counter,1,tmp_counter 1781 1782 ba .cont15 1783 mov 1,counter 1784 1785 .align 16 1786 .update16: 1787 cmp counter,2 1788 ble .cont16 1789 fand %f18,DC3,%f8 ! (0_0) res_c = vis_fand(res_c,DC3); 1790 1791 sub %l6,stridex,tmp_px 1792 sub counter,2,tmp_counter 1793 1794 ba .cont16 1795 mov 2,counter 1796 1797 .align 16 1798 .update17: 1799 sub %l6,stridex,%i2 1800 cmp counter,2 1801 ble .cont17 1802 fand %f0,DC0,%f16 ! (2_0) res = vis_fand(res,DC0); 1803 1804 ld [%i2+4],%i2 1805 cmp %g1,0 1806 bl 1f 1807 1808 orcc %g1,%i2,%g0 1809 bz 1f 1810 sethi %hi(0x00080000),%i2 1811 1812 cmp %g1,%i2 1813 bge,a 2f 1814 ldd [%o3+0x50],%f2 1815 1816 fxtod %f6,%f6 ! res = *(long long*)&res; 1817 st %f6,[%fp+tmp7] 1818 1819 fand %f6,DC0,%f44 ! (1_0) res = vis_fand(res,DC0); 1820 ld [%fp+tmp7],%g1 1821 1822 sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; 1823 sra %g1,10,%o2 ! (1_0) hx >>= 10; 1824 1825 sub %o7,537,%o7 1826 1827 and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; 1828 sub %g0,%o7,%o7 ! (1_0) iexp = -iexp; 1829 ba .cont17 1830 for %f44,DC1,%f44 ! (1_0) res = vis_for(res,DC1); 1831 2: 1832 fand %f6,%f2,%f6 1833 fxtod %f6,%f6 ! res = *(long long*)&res; 1834 ldd [%o3+0x58],%f2 1835 faddd %f6,%f2,%f6 1836 st %f6,[%fp+tmp7] 1837 1838 fand %f6,DC0,%f44 ! (1_0) res = vis_fand(res,DC0); 1839 ld [%fp+tmp7],%g1 1840 1841 sra %g1,21,%o7 ! (1_0) iexp = hx >> 21; 1842 sra %g1,10,%o2 ! (1_0) hx >>= 10; 1843 1844 sub %o7,537,%o7 1845 1846 and %o2,2040,%o2 ! (1_0) hx &= 0x7f8; 1847 sub %g0,%o7,%o7 ! (1_0) iexp = -iexp; 1848 ba .cont17 1849 for %f44,DC1,%f44 ! (1_0) res = vis_for(res,DC1); 1850 1: 1851 sub %l6,stridex,tmp_px 1852 sub counter,2,tmp_counter 1853 1854 ba .cont17 1855 mov 2,counter 1856 1857 .align 16 1858 .update18: 1859 cmp counter,3 1860 ble .cont18 1861 fand %f18,DC3,%f8 ! (1_0) res_c = vis_fand(res_c,DC3); 1862 1863 sub %l6,stridex,tmp_px 1864 sub counter,3,tmp_counter 1865 1866 ba .cont18 1867 mov 3,counter 1868 1869 .align 16 1870 .update19: 1871 sub %l6,stridex,%i4 1872 cmp counter,3 1873 ble .cont19 1874 fand %f6,DC0,%f16 ! (3_0) res = vis_fand(res,DC0); 1875 1876 ld [%i4+4],%i4 1877 cmp %g1,0 1878 bl 1f 1879 1880 orcc %g1,%i4,%g0 1881 bz 1f 1882 sethi %hi(0x00080000),%i4 1883 1884 cmp %g1,%i4 1885 bge,a 2f 1886 ldd [%o3+0x50],%f2 1887 1888 fxtod %f0,%f0 ! res = *(long long*)&res; 1889 st %f0,[%fp+tmp7] 1890 1891 fand %f0,DC0,%f28 ! (2_0) res = vis_fand(res,DC0); 1892 ld [%fp+tmp7],%g1 1893 1894 sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; 1895 1896 sra %g1,10,%o2 ! (2_0) hx >>= 10; 1897 sub %o7,537,%o7 1898 1899 and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; 1900 sub %g0,%o7,%o7 ! (2_0) iexp = -iexp; 1901 ba .cont19 1902 for %f28,DC1,%f28 ! (2_0) res = vis_for(res,DC1); 1903 2: 1904 fand %f0,%f2,%f0 1905 fxtod %f0,%f0 ! res = *(long long*)&res; 1906 ldd [%o3+0x58],%f2 1907 faddd %f0,%f2,%f0 1908 st %f0,[%fp+tmp7] 1909 1910 fand %f0,DC0,%f28 ! (2_0) res = vis_fand(res,DC0); 1911 ld [%fp+tmp7],%g1 1912 1913 sra %g1,21,%o7 ! (2_0) iexp = hx >> 21; 1914 1915 sra %g1,10,%o2 ! (2_0) hx >>= 10; 1916 sub %o7,537,%o7 1917 1918 and %o2,2040,%o2 ! (2_0) hx &= 0x7f8; 1919 sub %g0,%o7,%o7 ! (2_0) iexp = -iexp; 1920 ba .cont19 1921 for %f28,DC1,%f28 ! (2_0) res = vis_for(res,DC1); 1922 1: 1923 sub %l6,stridex,tmp_px 1924 sub counter,3,tmp_counter 1925 1926 ba .cont19 1927 mov 3,counter 1928 1929 .align 16 1930 .update20: 1931 cmp counter,4 1932 ble .cont20 1933 fand %f18,DC3,%f4 ! (2_0) res_c = vis_fand(res_c,DC3); 1934 1935 sub %l6,stridex,tmp_px 1936 sub counter,4,tmp_counter 1937 1938 ba .cont20 1939 mov 4,counter 1940 1941 .align 16 1942 .update21: 1943 sub %l6,stridex,%i5 1944 cmp counter,4 1945 ble .cont21 1946 fand %f0,DC0,%f16 ! (4_0) res = vis_fand(res,DC0); 1947 1948 ld [%i5+4],%i5 1949 cmp %g1,0 1950 bl 1f 1951 1952 orcc %g1,%i5,%g0 1953 bz 1f 1954 sethi %hi(0x00080000),%i5 1955 1956 cmp %g1,%i5 1957 bge,a 2f 1958 ldd [%o3+0x50],%f34 1959 1960 fxtod %f6,%f6 ! res = *(long long*)&res; 1961 st %f6,[%fp+tmp7] 1962 1963 fand %f6,DC0,%f44 ! (3_0) res = vis_fand(res,DC0); 1964 ld [%fp+tmp7],%g1 1965 1966 sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; 1967 sra %g1,10,%o2 ! (3_0) hx >>= 10; 1968 1969 sub %o7,537,%o7 1970 and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; 1971 1972 sub %g0,%o7,%o7 ! (3_0) iexp = -iexp; 1973 ba .cont21 1974 for %f44,DC1,%f44 ! (3_0) res = vis_for(res,DC1); 1975 2: 1976 fand %f6,%f34,%f6 1977 fxtod %f6,%f6 ! res = *(long long*)&res; 1978 ldd [%o3+0x58],%f34 1979 faddd %f6,%f34,%f6 1980 st %f6,[%fp+tmp7] 1981 1982 fand %f6,DC0,%f44 ! (3_0) res = vis_fand(res,DC0); 1983 ld [%fp+tmp7],%g1 1984 1985 sra %g1,21,%o7 ! (3_0) iexp = hx >> 21; 1986 sra %g1,10,%o2 ! (3_0) hx >>= 10; 1987 1988 sub %o7,537,%o7 1989 and %o2,2040,%o2 ! (3_0) hx &= 0x7f8; 1990 1991 sub %g0,%o7,%o7 ! (3_0) iexp = -iexp; 1992 ba .cont21 1993 for %f44,DC1,%f44 ! (3_0) res = vis_for(res,DC1); 1994 1: 1995 sub %l6,stridex,tmp_px 1996 sub counter,4,tmp_counter 1997 1998 ba .cont21 1999 mov 4,counter 2000 2001 .align 16 2002 .update22: 2003 cmp counter,5 2004 ble .cont22 2005 fmuld %f62,%f38,%f62 ! (1_0) res *= xx; 2006 2007 sub %i0,stridex,tmp_px 2008 sub counter,5,tmp_counter 2009 2010 ba .cont22 2011 mov 5,counter 2012 2013 .align 16 2014 .update23: 2015 sub %i0,stridex,%l1 2016 cmp counter,5 2017 ble .cont23 2018 fand %f6,DC0,%f16 ! (5_0) res = vis_fand(res,DC0); 2019 2020 ld [%l1+4],%l1 2021 cmp %g1,0 2022 bl 1f 2023 2024 orcc %g1,%l1,%g0 2025 bz 1f 2026 sethi %hi(0x00080000),%l1 2027 2028 cmp %g1,%l1 2029 bge,a 2f 2030 ldd [%o3+0x50],%f34 2031 2032 fxtod %f0,%f0 ! res = *(long long*)&res; 2033 st %f0,[%fp+tmp7] 2034 2035 fand %f0,DC0,%f24 ! (4_0) res = vis_fand(res,DC0); 2036 ld [%fp+tmp7],%g1 2037 2038 sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; 2039 2040 sra %g1,10,%o2 ! (4_0) hx >>= 10; 2041 sub %o7,537,%o7 2042 2043 and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; 2044 sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; 2045 ba .cont23 2046 for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1); 2047 2: 2048 fand %f0,%f34,%f0 2049 fxtod %f0,%f0 ! res = *(long long*)&res; 2050 ldd [%o3+0x58],%f34 2051 faddd %f0,%f34,%f0 2052 st %f0,[%fp+tmp7] 2053 2054 fand %f0,DC0,%f24 ! (4_0) res = vis_fand(res,DC0); 2055 ld [%fp+tmp7],%g1 2056 2057 sra %g1,21,%o7 ! (4_0) iexp = hx >> 21; 2058 2059 sra %g1,10,%o2 ! (4_0) hx >>= 10; 2060 sub %o7,537,%o7 2061 2062 and %o2,2040,%o2 ! (4_0) hx &= 0x7f8; 2063 sub %g0,%o7,%o7 ! (4_0) iexp = -iexp; 2064 ba .cont23 2065 for %f24,DC1,%f24 ! (4_0) res = vis_for(res,DC1); 2066 1: 2067 sub %i0,stridex,tmp_px 2068 sub counter,5,tmp_counter 2069 2070 ba .cont23 2071 mov 5,counter 2072 2073 .align 16 2074 .update24: 2075 cmp counter,6 2076 ble .cont24 2077 fmuld %f62,%f36,%f62 ! (2_0) res *= xx; 2078 2079 sub %i1,stridex,tmp_px 2080 sub counter,6,tmp_counter 2081 2082 ba .cont24 2083 mov 6,counter 2084 2085 .align 16 2086 .update25: 2087 sub %i1,stridex,%i3 2088 cmp counter,6 2089 ble .cont25 2090 fand %f6,DC0,%f16 ! (6_0) res = vis_fand(res,DC0); 2091 2092 ld [%i3+4],%i3 2093 cmp %g1,0 2094 bl 1f 2095 2096 orcc %g1,%i3,%g0 2097 bz 1f 2098 nop 2099 2100 sub %i1,stridex,%i3 2101 ld [%i3],%f10 2102 ld [%i3+4],%f11 2103 2104 sethi %hi(0x00080000),%i3 2105 2106 cmp %g1,%i3 2107 bge,a 2f 2108 ldd [%o3+0x50],%f60 2109 2110 fxtod %f10,%f10 ! res = *(long long*)&res; 2111 st %f10,[%fp+tmp7] 2112 2113 fand %f10,DC0,%f28 ! (5_0) res = vis_fand(res,DC0); 2114 ld [%fp+tmp7],%g1 2115 2116 sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; 2117 2118 sra %g1,10,%o2 ! (5_0) hx >>= 10; 2119 sub %o7,537,%o7 2120 2121 and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; 2122 sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; 2123 2124 ba .cont25 2125 for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1); 2126 2: 2127 fand %f10,%f60,%f10 2128 fxtod %f10,%f10 ! res = *(long long*)&res; 2129 ldd [%o3+0x58],%f60 2130 faddd %f10,%f60,%f10 2131 st %f10,[%fp+tmp7] 2132 2133 fand %f10,DC0,%f28 ! (5_0) res = vis_fand(res,DC0); 2134 ld [%fp+tmp7],%g1 2135 2136 sra %g1,21,%o7 ! (5_0) iexp = hx >> 21; 2137 2138 sra %g1,10,%o2 ! (5_0) hx >>= 10; 2139 sub %o7,537,%o7 2140 2141 and %o2,2040,%o2 ! (5_0) hx &= 0x7f8; 2142 sub %g0,%o7,%o7 ! (5_0) iexp = -iexp; 2143 2144 ba .cont25 2145 for %f28,DC1,%f28 ! (5_0) res = vis_for(res,DC1); 2146 1: 2147 sub %i1,stridex,tmp_px 2148 sub counter,6,tmp_counter 2149 2150 ba .cont25 2151 mov 6,counter 2152 2153 .exit: 2154 ret 2155 restore 2156 SET_SIZE(__vrsqrt) 2157