1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24 /* 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "__vrhypot.S" 30 31 #include "libm.h" 32 33 RO_DATA 34 .align 64 35 36 .CONST_TBL: 37 .word 0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465, 38 .word 0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a, 39 .word 0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6, 40 .word 0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3, 41 .word 0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b, 42 .word 0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036, 43 .word 0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01, 44 .word 0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1, 45 .word 0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb, 46 .word 0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5, 47 .word 0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405, 48 .word 0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc, 49 .word 0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7, 50 .word 0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec, 51 .word 0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b, 52 .word 0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed, 53 .word 0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150, 54 .word 0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539, 55 .word 0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66, 56 .word 0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995, 57 .word 0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d, 58 .word 0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19, 59 .word 0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404, 60 .word 0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22, 61 .word 0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47, 62 .word 0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a, 63 .word 0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06, 64 .word 0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358, 65 .word 0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20, 66 .word 0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f, 67 .word 0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197, 68 .word 0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010, 69 70 .word 0x42300000, 0 ! D2ON36 = 2**36 71 .word 0xffffff00, 0 ! DA0 72 .word 0xfff00000, 0 ! DA1 73 .word 0x3ff00000, 0 ! DONE = 1.0 74 .word 0x40000000, 0 ! DTWO = 2.0 75 .word 0x7fd00000, 0 ! D2ON1022 76 .word 0x3cb00000, 0 ! D2ONM52 77 .word 0x43200000, 0 ! D2ON51 78 .word 0x0007ffff, 0xffffffff ! 0x0007ffffffffffff 79 80 #define stridex %l2 81 #define stridey %l3 82 #define stridez %l5 83 84 #define TBL_SHIFT 512 85 86 #define TBL %l1 87 #define counter %l4 88 89 #define _0x7ff00000 %l0 90 #define _0x00100000 %o5 91 #define _0x7fffffff %l6 92 93 #define D2ON36 %f4 94 #define DTWO %f6 95 #define DONE %f8 96 #define DA0 %f58 97 #define DA1 %f56 98 99 #define dtmp0 STACK_BIAS-0x80 100 #define dtmp1 STACK_BIAS-0x78 101 #define dtmp2 STACK_BIAS-0x70 102 #define dtmp3 STACK_BIAS-0x68 103 #define dtmp4 STACK_BIAS-0x60 104 #define dtmp5 STACK_BIAS-0x58 105 #define dtmp6 STACK_BIAS-0x50 106 #define dtmp7 STACK_BIAS-0x48 107 #define dtmp8 STACK_BIAS-0x40 108 #define dtmp9 STACK_BIAS-0x38 109 #define dtmp10 STACK_BIAS-0x30 110 #define dtmp11 STACK_BIAS-0x28 111 #define dtmp12 STACK_BIAS-0x20 112 #define dtmp13 STACK_BIAS-0x18 113 #define dtmp14 STACK_BIAS-0x10 114 #define dtmp15 STACK_BIAS-0x08 115 116 #define ftmp0 STACK_BIAS-0x100 117 #define tmp_px STACK_BIAS-0x98 118 #define tmp_py STACK_BIAS-0x90 119 #define tmp_counter STACK_BIAS-0x88 120 121 ! sizeof temp storage - must be a multiple of 16 for V9 122 #define tmps 0x100 123 124 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 125 ! !!!!! algorithm !!!!! 126 ! hx0 = *(int*)px; 127 ! hy0 = *(int*)py; 128 ! 129 ! ((float*)&x0)[0] = ((float*)px)[0]; 130 ! ((float*)&x0)[1] = ((float*)px)[1]; 131 ! ((float*)&y0)[0] = ((float*)py)[0]; 132 ! ((float*)&y0)[1] = ((float*)py)[1]; 133 ! 134 ! hx0 &= 0x7fffffff; 135 ! hy0 &= 0x7fffffff; 136 ! 137 ! diff0 = hy0 - hx0; 138 ! j0 = diff0 >> 31; 139 ! j0 &= diff0; 140 ! j0 = hy0 - j0; 141 ! j0 &= 0x7ff00000; 142 ! 143 ! j0 = 0x7ff00000 - j0; 144 ! ll = (long long)j0 << 32; 145 ! *(long long*)&scl0 = ll; 146 ! 147 ! if ( hx0 >= 0x7ff00000 || hy0 >= 0x7ff00000 ) 148 ! { 149 ! lx = ((int*)px)[1]; 150 ! ly = ((int*)py)[1]; 151 ! 152 ! if ( hx0 == 0x7ff00000 && lx == 0 ) res0 = 0.0; 153 ! else if ( hy0 == 0x7ff00000 && ly == 0 ) res0 = 0.0; 154 ! else res0 = fabs(x0) * fabs(y0); 155 ! 156 ! ((float*)pz)[0] = ((float*)&res0)[0]; 157 ! ((float*)pz)[1] = ((float*)&res0)[1]; 158 ! 159 ! px += stridex; 160 ! py += stridey; 161 ! pz += stridez; 162 ! continue; 163 ! } 164 ! if ( hx0 < 0x00100000 && hy0 < 0x00100000 ) 165 ! { 166 ! lx = ((int*)px)[1]; 167 ! ly = ((int*)py)[1]; 168 ! ii = hx0 | hy0; 169 ! ii |= lx; 170 ! ii |= ly; 171 ! if ( ii == 0 ) 172 ! { 173 ! res0 = 1.0 / 0.0; 174 ! ((float*)pz)[0] = ((float*)&res0)[0]; 175 ! ((float*)pz)[1] = ((float*)&res0)[1]; 176 ! 177 ! px += stridex; 178 ! py += stridey; 179 ! pz += stridez; 180 ! continue; 181 ! } 182 ! x0 = fabs(x0); 183 ! y0 = fabs(y0); 184 ! if ( hx0 < 0x00080000 ) 185 ! { 186 ! x0 = *(long long*)&x0; 187 ! } 188 ! else 189 ! { 190 ! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; 191 ! x0 = vis_fand(x0, dtmp0); 192 ! x0 = *(long long*)&x0; 193 ! x0 += D2ON51; 194 ! } 195 ! x0 *= D2ONM52; 196 ! if ( hy0 < 0x00080000 ) 197 ! { 198 ! y0 = *(long long*)&y0; 199 ! } 200 ! else 201 ! { 202 ! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; 203 ! y0 = vis_fand(y0, dtmp0); 204 ! y0 = *(long long*)&y0; 205 ! y0 += D2ON51; 206 ! } 207 ! y0 *= D2ONM52; 208 ! *(long long*)&scl0 = 0x7fd0000000000000ULL; 209 ! } 210 ! else 211 ! { 212 ! x0 *= scl0; 213 ! y0 *= scl0; 214 ! } 215 ! 216 ! x_hi0 = x0 + D2ON36; 217 ! y_hi0 = y0 + D2ON36; 218 ! x_hi0 -= D2ON36; 219 ! y_hi0 -= D2ON36; 220 ! x_lo0 = x0 - x_hi0; 221 ! y_lo0 = y0 - y_hi0; 222 ! res0_hi = x_hi0 * x_hi0; 223 ! dtmp0 = y_hi0 * y_hi0; 224 ! res0_hi += dtmp0; 225 ! res0_lo = x0 + x_hi0; 226 ! res0_lo *= x_lo0; 227 ! dtmp1 = y0 + y_hi0; 228 ! dtmp1 *= y_lo0; 229 ! res0_lo += dtmp1; 230 ! 231 ! dres = res0_hi + res0_lo; 232 ! dexp0 = vis_fand(dres,DA1); 233 ! iarr = ((int*)&dres)[0]; 234 ! 235 ! iarr >>= 11; 236 ! iarr &= 0x1fc; 237 ! dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 238 ! dd = vis_fpsub32(dtmp0, dexp0); 239 ! 240 ! dtmp0 = dd * dres; 241 ! dtmp0 = DTWO - dtmp0; 242 ! dd *= dtmp0; 243 ! dtmp1 = dd * dres; 244 ! dtmp1 = DTWO - dtmp1; 245 ! dd *= dtmp1; 246 ! dtmp2 = dd * dres; 247 ! dtmp2 = DTWO - dtmp2; 248 ! dres = dd * dtmp2; 249 ! 250 ! res0 = vis_fand(dres,DA0); 251 ! 252 ! dtmp0 = res0_hi * res0; 253 ! dtmp0 = DONE - dtmp0; 254 ! dtmp1 = res0_lo * res0; 255 ! dtmp0 -= dtmp1; 256 ! dtmp0 *= dres; 257 ! res0 += dtmp0; 258 ! 259 ! res0 = sqrt ( res0 ); 260 ! 261 ! res0 = scl0 * res0; 262 ! 263 ! ((float*)pz)[0] = ((float*)&res0)[0]; 264 ! ((float*)pz)[1] = ((float*)&res0)[1]; 265 ! 266 ! px += stridex; 267 ! py += stridey; 268 ! pz += stridez; 269 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 270 271 ENTRY(__vrhypot) 272 save %sp,-SA(MINFRAME)-tmps,%sp 273 PIC_SETUP(l7) 274 PIC_SET(l7,.CONST_TBL,l1) 275 wr %g0,0x82,%asi 276 277 #ifdef __sparcv9 278 ldx [%fp+STACK_BIAS+176],stridez 279 #else 280 ld [%fp+STACK_BIAS+92],stridez 281 #endif 282 283 sll %i2,3,stridex 284 sethi %hi(0x7ff00000),_0x7ff00000 285 st %i0,[%fp+tmp_counter] 286 287 sll %i4,3,stridey 288 sethi %hi(0x00100000),_0x00100000 289 stx %i1,[%fp+tmp_px] 290 291 sll stridez,3,stridez 292 sethi %hi(0x7ffffc00),_0x7fffffff 293 stx %i3,[%fp+tmp_py] 294 295 ldd [TBL+TBL_SHIFT],D2ON36 296 add _0x7fffffff,1023,_0x7fffffff 297 298 ldd [TBL+TBL_SHIFT+8],DA0 299 300 ldd [TBL+TBL_SHIFT+16],DA1 301 302 ldd [TBL+TBL_SHIFT+24],DONE 303 304 ldd [TBL+TBL_SHIFT+32],DTWO 305 306 .begin: 307 ld [%fp+tmp_counter],counter 308 ldx [%fp+tmp_px],%i4 309 ldx [%fp+tmp_py],%i3 310 st %g0,[%fp+tmp_counter] 311 .begin1: 312 cmp counter,0 313 ble,pn %icc,.exit 314 315 lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; 316 add %i4,stridex,%i1 317 318 lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; 319 add %i3,stridey,%i0 ! py += stridey 320 321 and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; 322 323 cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 324 bge,pn %icc,.spec0 ! (7_0) if ( hx0 >= 0x7ff00000 ) 325 and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; 326 327 cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 328 bge,pn %icc,.spec0 ! (7_0) if ( hy0 >= 0x7ff00000 ) 329 sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; 330 331 sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; 332 cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 333 bl,pn %icc,.spec1 ! (7_0) if ( hx0 < 0x00100000 ) 334 335 and %o1,%o3,%o1 ! (7_0) j0 &= diff0; 336 .cont_spec0: 337 sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; 338 339 and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; 340 341 sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; 342 343 sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 344 345 stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 346 347 stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; 348 .cont_spec1: 349 lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px; 350 mov %i1,%i2 351 352 lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py; 353 354 and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff; 355 mov %i0,%o0 356 357 cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000 358 bge,pn %icc,.update0 ! (0_0) if ( hx0 >= 0x7ff00000 ) 359 and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff; 360 361 cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000 362 sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0; 363 bge,pn %icc,.update0 ! (0_0) if ( hy0 >= 0x7ff00000 ) 364 sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31; 365 366 cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000 367 368 and %o1,%o3,%o1 ! (0_0) j0 &= diff0; 369 bl,pn %icc,.update1 ! (0_0) if ( hx0 < 0x00100000 ) 370 sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0; 371 .cont0: 372 and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000; 373 374 sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0; 375 .cont1: 376 sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32; 377 stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll; 378 379 ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll; 380 381 lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0]; 382 383 lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1]; 384 385 lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0]; 386 387 add %i1,stridex,%i4 ! px += stridex 388 lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1]; 389 390 fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0; 391 add %i4,stridex,%i1 ! px += stridex 392 393 fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0; 394 395 lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px; 396 397 add %i0,stridey,%i3 ! py += stridey 398 faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36; 399 400 lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py; 401 add %i3,stridey,%i0 ! py += stridey 402 faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36; 403 404 and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff; 405 406 cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000 407 stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; 408 409 and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff; 410 bge,pn %icc,.update2 ! (1_0) if ( hx0 >= 0x7ff00000 ) 411 fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36; 412 413 cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000 414 sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0; 415 bge,pn %icc,.update3 ! (1_0) if ( hy0 >= 0x7ff00000 ) 416 fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; 417 418 sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31; 419 420 and %o1,%o3,%o1 ! (1_0) j0 &= diff0; 421 422 fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; 423 sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0; 424 cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000 425 fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; 426 427 fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; 428 and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000; 429 bl,pn %icc,.update4 ! (1_0) if ( hx0 < 0x00100000 ) 430 faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; 431 432 sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; 433 .cont4: 434 sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; 435 stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; 436 faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; 437 438 fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; 439 440 fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0; 441 ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll; 442 faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0; 443 444 lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0]; 445 446 lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1]; 447 448 fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0; 449 lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0]; 450 451 lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1]; 452 453 fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0; 454 455 fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0; 456 faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1; 457 458 lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px; 459 mov %i1,%i2 460 461 faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36; 462 463 lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py; 464 mov %i0,%o0 465 faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36; 466 467 faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo; 468 and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff; 469 470 cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000 471 bge,pn %icc,.update5 ! (2_0) if ( hx0 >= 0x7ff00000 ) 472 stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; 473 474 and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff; 475 st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; 476 fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; 477 478 sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0; 479 cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000 480 bge,pn %icc,.update6 ! (2_0) if ( hy0 >= 0x7ff00000 ) 481 fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; 482 483 sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31; 484 485 and %o1,%o3,%o1 ! (2_0) j0 &= diff0; 486 487 fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; 488 cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000 489 sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0; 490 fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; 491 492 fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; 493 and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000; 494 bl,pn %icc,.update7 ! (2_0) if ( hx0 < 0x00100000 ) 495 faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; 496 .cont7: 497 sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0; 498 499 sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; 500 .cont8: 501 stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; 502 faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; 503 504 fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; 505 506 fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0; 507 ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll; 508 faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0; 509 510 lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0]; 511 512 lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1]; 513 514 fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0; 515 lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0]; 516 517 add %i1,stridex,%i4 ! px += stridex 518 lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1]; 519 520 ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0]; 521 add %i4,stridex,%i1 ! px += stridex 522 fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1); 523 524 fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0; 525 526 fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0; 527 sra %o2,11,%i3 ! (7_1) iarr >>= 11; 528 faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1; 529 530 and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc; 531 532 add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr 533 lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px; 534 535 add %i0,stridey,%i3 ! py += stridey 536 ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 537 faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36; 538 539 lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py; 540 add %i3,stridey,%i0 ! py += stridey 541 faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36; 542 543 faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo; 544 and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff; 545 546 cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000 547 stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; 548 bge,pn %icc,.update9 ! (3_0) if ( hx0 >= 0x7ff00000 ) 549 fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0); 550 551 and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff; 552 st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; 553 fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; 554 555 sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0; 556 cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000 557 bge,pn %icc,.update10 ! (3_0) if ( hy0 >= 0x7ff00000 ) 558 fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; 559 560 fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; 561 sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31; 562 563 and %o1,%o3,%o1 ! (3_0) j0 &= diff0; 564 565 fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; 566 cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000 567 sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0; 568 fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; 569 570 fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; 571 and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000; 572 bl,pn %icc,.update11 ! (3_0) if ( hx0 < 0x00100000 ) 573 faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; 574 .cont11: 575 sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0; 576 fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 577 .cont12: 578 sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; 579 stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; 580 faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; 581 582 fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 583 584 fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0; 585 ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll; 586 faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0; 587 588 lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0]; 589 fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0; 590 591 lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1]; 592 593 fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0; 594 lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0]; 595 596 lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1]; 597 598 fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres; 599 ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0]; 600 fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1); 601 602 fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0; 603 604 fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0; 605 sra %o2,11,%o4 ! (0_0) iarr >>= 11; 606 faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1; 607 608 and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc; 609 610 add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr 611 mov %i1,%i2 612 lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px; 613 fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1; 614 615 ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 616 faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36; 617 618 lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py; 619 mov %i0,%o0 620 faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36; 621 622 and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff; 623 faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo; 624 625 fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1; 626 cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000 627 stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; 628 fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0); 629 630 and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff; 631 bge,pn %icc,.update13 ! (4_0) if ( hx0 >= 0x7ff00000 ) 632 st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0]; 633 fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; 634 635 sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0; 636 cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000 637 bge,pn %icc,.update14 ! (4_0) if ( hy0 >= 0x7ff00000 ) 638 fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; 639 640 fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; 641 sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31; 642 643 and %o1,%o3,%o1 ! (4_0) j0 &= diff0; 644 645 fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 646 sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0; 647 cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000 648 fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 649 650 fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 651 and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000; 652 bl,pn %icc,.update15 ! (4_0) if ( hx0 < 0x00100000 ) 653 faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 654 .cont15: 655 sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0; 656 fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 657 .cont16: 658 fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; 659 sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; 660 stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; 661 faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; 662 663 fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; 664 665 fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0; 666 ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll; 667 faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0; 668 669 lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0]; 670 fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0; 671 672 lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1]; 673 674 fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0; 675 lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0]; 676 fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2; 677 678 lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1]; 679 add %i1,stridex,%i4 ! px += stridex 680 681 fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres; 682 ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0]; 683 add %i4,stridex,%i1 ! px += stridex 684 fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1); 685 686 fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0; 687 688 fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0; 689 sra %o2,11,%i3 ! (1_0) iarr >>= 11; 690 faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1; 691 692 and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc; 693 fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2; 694 695 add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr 696 lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px; 697 fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1; 698 699 add %i0,stridey,%i3 ! py += stridey 700 ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 701 faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36; 702 703 lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py; 704 add %i3,stridey,%i0 ! py += stridey 705 faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36; 706 707 and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff; 708 faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo; 709 710 fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1; 711 cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000 712 stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; 713 fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0); 714 715 and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff; 716 st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0]; 717 bge,pn %icc,.update17 ! (5_0) if ( hx0 >= 0x7ff00000 ) 718 fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36; 719 720 sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0; 721 cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000 722 bge,pn %icc,.update18 ! (5_0) if ( hy0 >= 0x7ff00000 ) 723 fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; 724 725 fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; 726 sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31; 727 728 and %o1,%o3,%o1 ! (5_0) j0 &= diff0; 729 fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); 730 731 fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 732 sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0; 733 cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000 734 fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 735 736 fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 737 and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000; 738 bl,pn %icc,.update19 ! (5_0) if ( hx0 < 0x00100000 ) 739 faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 740 .cont19a: 741 fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 742 sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0; 743 fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 744 .cont19b: 745 fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; 746 sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; 747 stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; 748 faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; 749 750 fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; 751 fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; 752 .cont20: 753 fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0; 754 ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll; 755 faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0; 756 757 fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0; 758 lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0]; 759 fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0; 760 761 lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1]; 762 763 fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0; 764 lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0]; 765 fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2; 766 767 lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1]; 768 769 fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres; 770 ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0]; 771 fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1); 772 773 fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0; 774 fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1; 775 776 fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0; 777 sra %o2,11,%o4 ! (2_0) iarr >>= 11; 778 faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1; 779 780 and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc; 781 fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2; 782 783 add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr 784 mov %i1,%i2 785 lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px; 786 fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1; 787 788 fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres; 789 ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 790 faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36; 791 792 lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py; 793 mov %i0,%o0 794 faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36; 795 796 and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff; 797 faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo; 798 799 fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1; 800 cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000 801 stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; 802 fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0); 803 804 and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff; 805 st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0]; 806 bge,pn %icc,.update21 ! (6_0) if ( hx0 >= 0x7ff00000 ) 807 fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36; 808 809 sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0; 810 cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000 811 bge,pn %icc,.update22 ! (6_0) if ( hy0 >= 0x7ff00000 ) 812 fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; 813 814 fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; 815 sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31; 816 faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; 817 818 and %o1,%o3,%o1 ! (6_0) j0 &= diff0; 819 fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); 820 821 fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 822 sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0; 823 cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000 824 fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 825 826 fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 827 and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000; 828 bl,pn %icc,.update23 ! (6_0) if ( hx0 < 0x00100000 ) 829 faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 830 .cont23a: 831 fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 832 sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0; 833 fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 834 .cont23b: 835 fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; 836 sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; 837 stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; 838 faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; 839 840 fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; 841 fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; 842 .cont24: 843 fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0; 844 ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll; 845 faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0; 846 847 lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0]; 848 fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0; 849 850 lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1]; 851 fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0; 852 853 fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0; 854 lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0]; 855 fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2; 856 857 add %i1,stridex,%i4 ! px += stridex 858 lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1]; 859 860 fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres; 861 add %i4,stridex,%i1 ! px += stridex 862 ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0]; 863 fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1); 864 865 fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0; 866 fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1; 867 868 fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0; 869 sra %o2,11,%i3 ! (3_0) iarr >>= 11; 870 faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1; 871 872 and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc; 873 fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2; 874 875 fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 ); 876 add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr 877 lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; 878 fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1; 879 880 fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres; 881 add %i0,stridey,%i3 ! py += stridey 882 ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 883 faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36; 884 885 lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; 886 add %i3,stridey,%i0 ! py += stridey 887 faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36; 888 889 and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; 890 faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo; 891 892 fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1; 893 cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 894 stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; 895 fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0); 896 897 and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; 898 st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0]; 899 bge,pn %icc,.update25 ! (7_0) if ( hx0 >= 0x7ff00000 ) 900 fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36; 901 902 sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; 903 cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 904 bge,pn %icc,.update26 ! (7_0) if ( hy0 >= 0x7ff00000 ) 905 fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; 906 907 fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; 908 sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; 909 faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; 910 911 and %o1,%o3,%o1 ! (7_0) j0 &= diff0; 912 fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); 913 914 fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 915 sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; 916 cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 917 fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 918 919 fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 920 and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; 921 bl,pn %icc,.update27 ! (7_0) if ( hx0 < 0x00100000 ) 922 faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 923 .cont27a: 924 fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 925 sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; 926 fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 927 .cont27b: 928 fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; 929 sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 930 stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 931 faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; 932 933 fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; 934 fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; 935 .cont28: 936 fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0; 937 ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll; 938 faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0; 939 940 fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0; 941 lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0]; 942 943 lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1]; 944 fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0; 945 946 fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0; 947 lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0]; 948 fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2; 949 950 lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1]; 951 952 fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres; 953 ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0]; 954 fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1); 955 956 fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0; 957 ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll; 958 fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1; 959 960 fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0; 961 sra %o2,11,%o4 ! (4_0) iarr >>= 11; 962 faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1; 963 964 and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc; 965 fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2; 966 967 cmp counter,8 968 bl,pn %icc,.tail 969 nop 970 971 ba .main_loop 972 sub counter,8,counter 973 974 .align 16 975 .main_loop: 976 fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 ); 977 add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr 978 lda [%i1]0x82,%o1 ! (0_0) hx0 = *(int*)px; 979 fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1; 980 981 fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres; 982 mov %i1,%i2 983 ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 984 faddd %f10,D2ON36,%f50 ! (6_1) x_hi0 = x0 + D2ON36; 985 986 nop 987 mov %i0,%o0 988 lda [%i0]0x82,%o4 ! (0_0) hy0 = *(int*)py; 989 faddd %f60,D2ON36,%f2 ! (6_1) y_hi0 = y0 + D2ON36; 990 991 faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo; 992 and %o1,_0x7fffffff,%o7 ! (0_0) hx0 &= 0x7fffffff; 993 st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0]; 994 fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0; 995 996 fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1; 997 cmp %o7,_0x7ff00000 ! (0_0) hx0 ? 0x7ff00000 998 st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0]; 999 fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0); 1000 1001 and %o4,_0x7fffffff,%l7 ! (0_0) hy0 &= 0x7fffffff; 1002 st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1]; 1003 bge,pn %icc,.update29 ! (0_0) if ( hx0 >= 0x7ff00000 ) 1004 fsubd %f50,D2ON36,%f20 ! (6_1) x_hi0 -= D2ON36; 1005 1006 cmp %l7,_0x7ff00000 ! (0_0) hy0 ? 0x7ff00000 1007 sub %l7,%o7,%o1 ! (0_0) diff0 = hy0 - hx0; 1008 bge,pn %icc,.update30 ! (0_0) if ( hy0 >= 0x7ff00000 ) 1009 fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36; 1010 1011 fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; 1012 sra %o1,31,%o3 ! (0_0) j0 = diff0 >> 31; 1013 stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; 1014 faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; 1015 1016 and %o1,%o3,%o1 ! (0_0) j0 &= diff0; 1017 cmp %o7,_0x00100000 ! (0_0) hx0 ? 0x00100000 1018 bl,pn %icc,.update31 ! (0_0) if ( hx0 < 0x00100000 ) 1019 fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); 1020 .cont31: 1021 fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; 1022 sub %l7,%o1,%o4 ! (0_0) j0 = hy0 - j0; 1023 nop 1024 fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; 1025 1026 fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; 1027 add %i5,stridez,%i5 ! pz += stridez 1028 and %o4,%l0,%o4 ! (0_0) j0 &= 0x7ff00000; 1029 faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; 1030 1031 fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; 1032 sub %l0,%o4,%o4 ! (0_0) j0 = 0x7ff00000 - j0; 1033 nop 1034 fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; 1035 .cont32: 1036 fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0; 1037 sllx %o4,32,%o4 ! (0_0) ll = (long long)j0 << 32; 1038 stx %o4,[%fp+dtmp1] ! (0_0) *(long long*)&scl0 = ll; 1039 faddd %f60,%f2,%f50 ! (6_1) dtmp1 = y0 + y_hi0; 1040 1041 fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0; 1042 nop 1043 bn,pn %icc,.exit 1044 fsubd %f60,%f2,%f2 ! (6_1) y_lo0 = y0 - y_hi0; 1045 1046 fmuld %f62,%f28,%f28 ! (6_1) res0_lo *= x_lo0; 1047 nop 1048 ldd [%fp+dtmp15],%f62 ! (7_1) *(long long*)&scl0 = ll; 1049 faddd %f0,%f46,%f30 ! (6_1) res0_hi += dtmp0; 1050 1051 nop 1052 nop 1053 lda [%i4]%asi,%f10 ! (7_1) ((float*)&x0)[0] = ((float*)px)[0]; 1054 fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0; 1055 1056 nop 1057 nop 1058 lda [%i4+4]%asi,%f11 ! (7_1) ((float*)&x0)[1] = ((float*)px)[1]; 1059 fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0; 1060 1061 fmuld %f50,%f2,%f46 ! (6_1) dtmp1 *= y_lo0; 1062 nop 1063 lda [%i3]%asi,%f12 ! (7_1) ((float*)&y0)[0] = ((float*)py)[0]; 1064 fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2; 1065 1066 add %i1,stridex,%i4 ! px += stridex 1067 nop 1068 lda [%i3+4]%asi,%f13 ! (7_1) ((float*)&y0)[1] = ((float*)py)[1]; 1069 bn,pn %icc,.exit 1070 1071 fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres; 1072 add %i4,stridex,%i1 ! px += stridex 1073 ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0]; 1074 fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1); 1075 1076 fmuld %f10,%f62,%f10 ! (7_1) x0 *= scl0; 1077 nop 1078 ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll; 1079 fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1; 1080 1081 fmuld %f12,%f62,%f60 ! (7_1) y0 *= scl0; 1082 sra %o2,11,%i3 ! (5_1) iarr >>= 11; 1083 nop 1084 faddd %f28,%f46,%f40 ! (6_1) res0_lo += dtmp1; 1085 1086 and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc; 1087 nop 1088 bn,pn %icc,.exit 1089 fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2; 1090 1091 fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 ); 1092 lda [%i4]0x82,%o1 ! (1_0) hx0 = *(int*)px; 1093 add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr 1094 fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1; 1095 1096 fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres; 1097 add %i0,stridey,%i3 ! py += stridey 1098 ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1099 faddd %f10,D2ON36,%f46 ! (7_1) x_hi0 = x0 + D2ON36; 1100 1101 nop 1102 add %i3,stridey,%i0 ! py += stridey 1103 lda [%i3]0x82,%g1 ! (1_0) hy0 = *(int*)py; 1104 faddd %f60,D2ON36,%f50 ! (7_1) y_hi0 = y0 + D2ON36; 1105 1106 faddd %f30,%f40,%f18 ! (6_1) dres = res0_hi + res0_lo; 1107 and %o1,_0x7fffffff,%o7 ! (1_0) hx0 &= 0x7fffffff; 1108 st %f18,[%fp+ftmp0] ! (6_1) iarr = ((int*)&dres)[0]; 1109 fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0; 1110 1111 fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1; 1112 cmp %o7,_0x7ff00000 ! (1_0) hx0 ? 0x7ff00000 1113 st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1114 fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0); 1115 1116 and %g1,_0x7fffffff,%l7 ! (1_0) hy0 &= 0x7fffffff; 1117 nop 1118 bge,pn %icc,.update33 ! (1_0) if ( hx0 >= 0x7ff00000 ) 1119 fsubd %f46,D2ON36,%f20 ! (7_1) x_hi0 -= D2ON36; 1120 1121 cmp %l7,_0x7ff00000 ! (1_0) hy0 ? 0x7ff00000 1122 sub %l7,%o7,%o1 ! (1_0) diff0 = hy0 - hx0; 1123 st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1124 fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; 1125 1126 fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; 1127 sra %o1,31,%o3 ! (1_0) j0 = diff0 >> 31; 1128 bge,pn %icc,.update34 ! (1_0) if ( hy0 >= 0x7ff00000 ) 1129 faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; 1130 1131 and %o1,%o3,%o1 ! (1_0) j0 &= diff0; 1132 add %i5,stridez,%i5 ! pz += stridez 1133 stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; 1134 fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); 1135 1136 fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; 1137 sub %l7,%o1,%o4 ! (1_0) j0 = hy0 - j0; 1138 cmp %o7,_0x00100000 ! (1_0) hx0 ? 0x00100000 1139 fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; 1140 1141 fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; 1142 and %o4,%l0,%o4 ! (1_0) j0 &= 0x7ff00000; 1143 bl,pn %icc,.update35 ! (1_0) if ( hx0 < 0x00100000 ) 1144 faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; 1145 .cont35a: 1146 fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; 1147 nop 1148 sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; 1149 fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; 1150 .cont35b: 1151 fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; 1152 sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; 1153 stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; 1154 faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; 1155 1156 fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; 1157 nop 1158 nop 1159 fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; 1160 .cont36: 1161 fmuld %f62,%f0,%f0 ! (7_1) res0_lo *= x_lo0; 1162 nop 1163 ldd [%fp+dtmp1],%f62 ! (0_0) *(long long*)&scl0 = ll; 1164 faddd %f2,%f46,%f44 ! (7_1) res0_hi += dtmp0; 1165 1166 fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0; 1167 nop 1168 lda [%i2]%asi,%f10 ! (0_0) ((float*)&x0)[0] = ((float*)px)[0]; 1169 fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0; 1170 1171 nop 1172 nop 1173 lda [%i2+4]%asi,%f11 ! (0_0) ((float*)&x0)[1] = ((float*)px)[1]; 1174 bn,pn %icc,.exit 1175 1176 fmuld %f50,%f12,%f26 ! (7_1) dtmp1 *= y_lo0; 1177 nop 1178 lda [%o0]%asi,%f12 ! (0_0) ((float*)&y0)[0] = ((float*)py)[0]; 1179 fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2; 1180 1181 nop 1182 nop 1183 lda [%o0+4]%asi,%f13 ! (0_0) ((float*)&y0)[1] = ((float*)py)[1]; 1184 bn,pn %icc,.exit 1185 1186 fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres; 1187 nop 1188 ld [%fp+ftmp0],%o2 ! (6_1) iarr = ((int*)&dres)[0]; 1189 fand %f18,DA1,%f2 ! (6_1) dexp0 = vis_fand(dres,DA1); 1190 1191 fmuld %f10,%f62,%f10 ! (0_0) x0 *= scl0; 1192 nop 1193 ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll; 1194 fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1; 1195 1196 fmuld %f12,%f62,%f60 ! (0_0) y0 *= scl0; 1197 sra %o2,11,%g1 ! (6_1) iarr >>= 11; 1198 nop 1199 faddd %f0,%f26,%f38 ! (7_1) res0_lo += dtmp1; 1200 1201 nop 1202 and %g1,0x1fc,%g1 ! (6_1) iarr &= 0x1fc; 1203 bn,pn %icc,.exit 1204 fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2; 1205 1206 fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 ); 1207 lda [%i1]0x82,%o1 ! (2_0) hx0 = *(int*)px; 1208 add %g1,TBL,%g1 ! (6_1) (char*)dll1 + iarr 1209 fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1; 1210 1211 fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres; 1212 mov %i1,%i2 1213 ld [%g1],%f28 ! (6_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1214 faddd %f10,D2ON36,%f46 ! (0_0) x_hi0 = x0 + D2ON36; 1215 1216 nop 1217 mov %i0,%o0 1218 lda [%i0]0x82,%g1 ! (2_0) hy0 = *(int*)py; 1219 faddd %f60,D2ON36,%f12 ! (0_0) y_hi0 = y0 + D2ON36; 1220 1221 faddd %f44,%f38,%f14 ! (7_1) dres = res0_hi + res0_lo; 1222 and %o1,_0x7fffffff,%o7 ! (2_0) hx0 &= 0x7fffffff; 1223 st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; 1224 fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0; 1225 1226 fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1; 1227 cmp %o7,_0x7ff00000 ! (2_0) hx0 ? 0x7ff00000 1228 st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1229 fpsub32 %f28,%f2,%f28 ! (6_1) dd = vis_fpsub32(dtmp0, dexp0); 1230 1231 and %g1,_0x7fffffff,%l7 ! (2_0) hx0 &= 0x7fffffff; 1232 nop 1233 bge,pn %icc,.update37 ! (2_0) if ( hx0 >= 0x7ff00000 ) 1234 fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; 1235 1236 sub %l7,%o7,%o1 ! (2_0) diff0 = hy0 - hx0; 1237 cmp %l7,_0x7ff00000 ! (2_0) hy0 ? 0x7ff00000 1238 st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1239 fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; 1240 1241 fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres; 1242 sra %o1,31,%o3 ! (2_0) j0 = diff0 >> 31; 1243 bge,pn %icc,.update38 ! (2_0) if ( hy0 >= 0x7ff00000 ) 1244 faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; 1245 1246 and %o1,%o3,%o1 ! (2_0) j0 &= diff0; 1247 add %i5,stridez,%i5 ! pz += stridez 1248 stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; 1249 fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); 1250 1251 fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; 1252 cmp %o7,_0x00100000 ! (2_0) hx0 ? 0x00100000 1253 sub %l7,%o1,%o4 ! (2_0) j0 = hy0 - j0; 1254 fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; 1255 1256 fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; 1257 and %o4,%l0,%o4 ! (2_0) j0 &= 0x7ff00000; 1258 bl,pn %icc,.update39 ! (2_0) if ( hx0 < 0x00100000 ) 1259 faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; 1260 .cont39a: 1261 fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; 1262 sub %l0,%o4,%g1 ! (2_0) j0 = 0x7ff00000 - j0; 1263 nop 1264 fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; 1265 .cont39b: 1266 fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; 1267 sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; 1268 stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; 1269 faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; 1270 1271 fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; 1272 nop 1273 nop 1274 fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; 1275 .cont40: 1276 fmuld %f62,%f0,%f0 ! (0_0) res0_lo *= x_lo0; 1277 nop 1278 ldd [%fp+dtmp3],%f62 ! (1_0) *(long long*)&scl0 = ll; 1279 faddd %f2,%f46,%f32 ! (0_0) res0_hi += dtmp0; 1280 1281 fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0; 1282 nop 1283 lda [%i4]%asi,%f10 ! (1_0) ((float*)&x0)[0] = ((float*)px)[0]; 1284 fmuld %f28,%f20,%f54 ! (6_1) dd *= dtmp0; 1285 1286 nop 1287 nop 1288 lda [%i4+4]%asi,%f11 ! (1_0) ((float*)&x0)[1] = ((float*)px)[1]; 1289 bn,pn %icc,.exit 1290 1291 fmuld %f50,%f12,%f28 ! (0_0) dtmp1 *= y_lo0; 1292 nop 1293 lda [%i3]%asi,%f12 ! (1_0) ((float*)&y0)[0] = ((float*)py)[0]; 1294 fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2; 1295 1296 add %i1,stridex,%i4 ! px += stridex 1297 nop 1298 lda [%i3+4]%asi,%f13 ! (1_0) ((float*)&y0)[1] = ((float*)py)[1]; 1299 bn,pn %icc,.exit 1300 1301 fmuld %f54,%f18,%f46 ! (6_1) dtmp1 = dd * dres; 1302 add %i4,stridex,%i1 ! px += stridex 1303 ld [%fp+ftmp0],%o2 ! (7_1) iarr = ((int*)&dres)[0]; 1304 fand %f14,DA1,%f2 ! (7_1) dexp0 = vis_fand(dres,DA1); 1305 1306 fmuld %f10,%f62,%f10 ! (1_0) x0 *= scl0; 1307 nop 1308 ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll; 1309 fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1; 1310 1311 fmuld %f12,%f62,%f60 ! (1_0) y0 *= scl0; 1312 sra %o2,11,%i3 ! (7_1) iarr >>= 11; 1313 nop 1314 faddd %f0,%f28,%f36 ! (0_0) res0_lo += dtmp1; 1315 1316 and %i3,0x1fc,%i3 ! (7_1) iarr &= 0x1fc; 1317 nop 1318 bn,pn %icc,.exit 1319 fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2; 1320 1321 fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 ); 1322 add %i3,TBL,%o4 ! (7_1) (char*)dll1 + iarr 1323 lda [%i4]0x82,%o1 ! (3_0) hx0 = *(int*)px; 1324 fsubd DTWO,%f46,%f62 ! (6_1) dtmp1 = DTWO - dtmp1; 1325 1326 fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres; 1327 add %i0,stridey,%i3 ! py += stridey 1328 ld [%o4],%f26 ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1329 faddd %f10,D2ON36,%f46 ! (1_0) x_hi0 = x0 + D2ON36; 1330 1331 nop 1332 add %i3,stridey,%i0 ! py += stridey 1333 lda [%i3]0x82,%o4 ! (3_0) hy0 = *(int*)py; 1334 faddd %f60,D2ON36,%f12 ! (1_0) y_hi0 = y0 + D2ON36; 1335 1336 faddd %f32,%f36,%f22 ! (0_0) dres = res0_hi + res0_lo; 1337 and %o1,_0x7fffffff,%o7 ! (3_0) hx0 &= 0x7fffffff; 1338 st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; 1339 fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0; 1340 1341 fmuld %f54,%f62,%f24 ! (6_1) dd *= dtmp1; 1342 cmp %o7,_0x7ff00000 ! (3_0) hx0 ? 0x7ff00000 1343 st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1344 fpsub32 %f26,%f2,%f26 ! (7_1) dd = vis_fpsub32(dtmp0, dexp0); 1345 1346 and %o4,_0x7fffffff,%l7 ! (3_0) hy0 &= 0x7fffffff; 1347 nop 1348 bge,pn %icc,.update41 ! (3_0) if ( hx0 >= 0x7ff00000 ) 1349 fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; 1350 1351 sub %l7,%o7,%o1 ! (3_0) diff0 = hy0 - hx0; 1352 cmp %l7,_0x7ff00000 ! (3_0) hy0 ? 0x7ff00000 1353 st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1354 fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; 1355 1356 fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; 1357 sra %o1,31,%o3 ! (3_0) j0 = diff0 >> 31; 1358 bge,pn %icc,.update42 ! (3_0) if ( hy0 >= 0x7ff00000 ) 1359 faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; 1360 1361 and %o1,%o3,%o1 ! (3_0) j0 &= diff0; 1362 add %i5,stridez,%i5 ! pz += stridez 1363 stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; 1364 fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); 1365 1366 fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; 1367 cmp %o7,_0x00100000 ! (3_0) hx0 ? 0x00100000 1368 sub %l7,%o1,%o4 ! (3_0) j0 = hy0 - j0; 1369 fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; 1370 1371 fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; 1372 and %o4,%l0,%o4 ! (3_0) j0 &= 0x7ff00000; 1373 bl,pn %icc,.update43 ! (3_0) if ( hx0 < 0x00100000 ) 1374 faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; 1375 .cont43a: 1376 fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; 1377 nop 1378 sub %l0,%o4,%g1 ! (3_0) j0 = 0x7ff00000 - j0; 1379 fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 1380 .cont43b: 1381 fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; 1382 sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; 1383 stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; 1384 faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; 1385 1386 fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; 1387 nop 1388 nop 1389 fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 1390 .cont44: 1391 fmuld %f62,%f0,%f0 ! (1_0) res0_lo *= x_lo0; 1392 nop 1393 ldd [%fp+dtmp5],%f62 ! (2_0) *(long long*)&scl0 = ll; 1394 faddd %f2,%f46,%f42 ! (1_0) res0_hi += dtmp0; 1395 1396 fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0; 1397 nop 1398 lda [%i2]%asi,%f10 ! (2_0) ((float*)&x0)[0] = ((float*)px)[0]; 1399 fmuld %f26,%f20,%f54 ! (7_1) dd *= dtmp0; 1400 1401 nop 1402 nop 1403 lda [%i2+4]%asi,%f11 ! (2_0) ((float*)&x0)[1] = ((float*)px)[1]; 1404 bn,pn %icc,.exit 1405 1406 fmuld %f50,%f12,%f26 ! (1_0) dtmp1 *= y_lo0; 1407 nop 1408 lda [%o0]%asi,%f12 ! (2_0) ((float*)&y0)[0] = ((float*)py)[0]; 1409 fsubd DTWO,%f18,%f20 ! (6_1) dtmp2 = DTWO - dtmp2; 1410 1411 nop 1412 nop 1413 lda [%o0+4]%asi,%f13 ! (2_0) ((float*)&y0)[1] = ((float*)py)[1]; 1414 bn,pn %icc,.exit 1415 1416 fmuld %f54,%f14,%f50 ! (7_1) dtmp1 = dd * dres; 1417 nop 1418 ld [%fp+ftmp0],%o2 ! (0_0) iarr = ((int*)&dres)[0]; 1419 fand %f22,DA1,%f2 ! (0_0) dexp0 = vis_fand(dres,DA1); 1420 1421 fmuld %f10,%f62,%f10 ! (2_0) x0 *= scl0; 1422 nop 1423 ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll; 1424 fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1; 1425 1426 fmuld %f12,%f62,%f60 ! (2_0) y0 *= scl0; 1427 sra %o2,11,%o4 ! (0_0) iarr >>= 11; 1428 nop 1429 faddd %f0,%f26,%f34 ! (1_0) res0_lo += dtmp1; 1430 1431 and %o4,0x1fc,%o4 ! (0_0) iarr &= 0x1fc; 1432 nop 1433 bn,pn %icc,.exit 1434 fmuld %f24,%f20,%f26 ! (6_1) dres = dd * dtmp2; 1435 1436 fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 ); 1437 add %o4,TBL,%o4 ! (0_0) (char*)dll1 + iarr 1438 lda [%i1]0x82,%o1 ! (4_0) hx0 = *(int*)px; 1439 fsubd DTWO,%f50,%f20 ! (7_1) dtmp1 = DTWO - dtmp1; 1440 1441 fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1; 1442 mov %i1,%i2 1443 ld [%o4],%f28 ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1444 faddd %f10,D2ON36,%f46 ! (2_0) x_hi0 = x0 + D2ON36; 1445 1446 nop 1447 mov %i0,%o0 1448 lda [%i0]0x82,%o4 ! (4_0) hy0 = *(int*)py; 1449 faddd %f60,D2ON36,%f50 ! (2_0) y_hi0 = y0 + D2ON36; 1450 1451 fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0; 1452 nop 1453 and %o1,_0x7fffffff,%o7 ! (4_0) hx0 &= 0x7fffffff; 1454 faddd %f42,%f34,%f18 ! (1_0) dres = res0_hi + res0_lo; 1455 1456 fmuld %f54,%f20,%f16 ! (7_1) dd *= dtmp1; 1457 cmp %o7,_0x7ff00000 ! (4_0) hx0 ? 0x7ff00000 1458 st %f18,[%fp+ftmp0] ! (1_0) iarr = ((int*)&dres)[0]; 1459 fpsub32 %f28,%f2,%f28 ! (0_0) dd = vis_fpsub32(dtmp0, dexp0); 1460 1461 and %o4,_0x7fffffff,%l7 ! (4_0) hy0 &= 0x7fffffff; 1462 st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1463 bge,pn %icc,.update45 ! (4_0) if ( hx0 >= 0x7ff00000 ) 1464 fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; 1465 1466 sub %l7,%o7,%o1 ! (4_0) diff0 = hy0 - hx0; 1467 cmp %l7,_0x7ff00000 ! (4_0) hy0 ? 0x7ff00000 1468 bge,pn %icc,.update46 ! (4_0) if ( hy0 >= 0x7ff00000 ) 1469 fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; 1470 1471 fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; 1472 sra %o1,31,%o3 ! (4_0) j0 = diff0 >> 31; 1473 st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1474 faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; 1475 1476 and %o1,%o3,%o1 ! (4_0) j0 &= diff0; 1477 cmp %o7,_0x00100000 ! (4_0) hx0 ? 0x00100000 1478 bl,pn %icc,.update47 ! (4_0) if ( hx0 < 0x00100000 ) 1479 fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); 1480 .cont47a: 1481 fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 1482 sub %l7,%o1,%o4 ! (4_0) j0 = hy0 - j0; 1483 stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; 1484 fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 1485 1486 fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 1487 and %o4,%l0,%o4 ! (4_0) j0 &= 0x7ff00000; 1488 add %i5,stridez,%i5 ! pz += stridez 1489 faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 1490 1491 fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; 1492 nop 1493 sub %l0,%o4,%g1 ! (4_0) j0 = 0x7ff00000 - j0; 1494 fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 1495 .cont47b: 1496 fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; 1497 sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; 1498 stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; 1499 faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; 1500 1501 fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; 1502 nop 1503 nop 1504 fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; 1505 .cont48: 1506 fmuld %f62,%f0,%f0 ! (2_0) res0_lo *= x_lo0; 1507 nop 1508 ldd [%fp+dtmp7],%f62 ! (3_0) *(long long*)&scl0 = ll; 1509 faddd %f2,%f46,%f30 ! (2_0) res0_hi += dtmp0; 1510 1511 fsubd DONE,%f10,%f60 ! (6_1) dtmp0 = DONE - dtmp0; 1512 nop 1513 lda [%i4]%asi,%f10 ! (3_0) ((float*)&x0)[0] = ((float*)px)[0]; 1514 fmuld %f28,%f20,%f54 ! (0_0) dd *= dtmp0; 1515 1516 nop 1517 nop 1518 lda [%i4+4]%asi,%f11 ! (3_0) ((float*)&x0)[1] = ((float*)px)[1]; 1519 bn,pn %icc,.exit 1520 1521 fmuld %f50,%f12,%f28 ! (2_0) dtmp1 *= y_lo0; 1522 nop 1523 lda [%i3]%asi,%f12 ! (3_0) ((float*)&y0)[0] = ((float*)py)[0]; 1524 fsubd DTWO,%f14,%f20 ! (7_1) dtmp2 = DTWO - dtmp2; 1525 1526 lda [%i3+4]%asi,%f13 ! (3_0) ((float*)&y0)[1] = ((float*)py)[1]; 1527 add %i1,stridex,%i4 ! px += stridex 1528 nop 1529 bn,pn %icc,.exit 1530 1531 fmuld %f54,%f22,%f50 ! (0_0) dtmp1 = dd * dres; 1532 add %i4,stridex,%i1 ! px += stridex 1533 ld [%fp+ftmp0],%o2 ! (1_0) iarr = ((int*)&dres)[0]; 1534 fand %f18,DA1,%f2 ! (1_0) dexp0 = vis_fand(dres,DA1); 1535 1536 fmuld %f10,%f62,%f10 ! (3_0) x0 *= scl0; 1537 nop 1538 ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll; 1539 fsubd %f60,%f40,%f46 ! (6_1) dtmp0 -= dtmp1; 1540 1541 fmuld %f12,%f62,%f60 ! (3_0) y0 *= scl0; 1542 sra %o2,11,%i3 ! (1_0) iarr >>= 11; 1543 nop 1544 faddd %f0,%f28,%f40 ! (2_0) res0_lo += dtmp1; 1545 1546 and %i3,0x1fc,%i3 ! (1_0) iarr &= 0x1fc; 1547 nop 1548 bn,pn %icc,.exit 1549 fmuld %f16,%f20,%f28 ! (7_1) dres = dd * dtmp2; 1550 1551 fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 ); 1552 add %i3,TBL,%o4 ! (1_0) (char*)dll1 + iarr 1553 lda [%i4]0x82,%o1 ! (5_0) hx0 = *(int*)px; 1554 fsubd DTWO,%f50,%f20 ! (0_0) dtmp1 = DTWO - dtmp1; 1555 1556 fmuld %f46,%f26,%f52 ! (6_1) dtmp0 *= dres; 1557 add %i0,stridey,%i3 ! py += stridey 1558 ld [%o4],%f26 ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1559 faddd %f10,D2ON36,%f46 ! (3_0) x_hi0 = x0 + D2ON36; 1560 1561 nop 1562 add %i3,stridey,%i0 ! py += stridey 1563 lda [%i3]0x82,%o4 ! (5_0) hy0 = *(int*)py; 1564 faddd %f60,D2ON36,%f50 ! (3_0) y_hi0 = y0 + D2ON36; 1565 1566 fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0; 1567 and %o1,_0x7fffffff,%o7 ! (5_0) hx0 &= 0x7fffffff; 1568 nop 1569 faddd %f30,%f40,%f14 ! (2_0) dres = res0_hi + res0_lo; 1570 1571 fmuld %f54,%f20,%f24 ! (0_0) dd *= dtmp1; 1572 cmp %o7,_0x7ff00000 ! (5_0) hx0 ? 0x7ff00000 1573 st %f14,[%fp+ftmp0] ! (2_0) iarr = ((int*)&dres)[0]; 1574 fpsub32 %f26,%f2,%f26 ! (1_0) dd = vis_fpsub32(dtmp0, dexp0); 1575 1576 and %o4,_0x7fffffff,%l7 ! (5_0) hy0 &= 0x7fffffff; 1577 st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1578 bge,pn %icc,.update49 ! (5_0) if ( hx0 >= 0x7ff00000 ) 1579 fsubd %f46,D2ON36,%f20 ! (3_0) x_hi0 -= D2ON36; 1580 1581 sub %l7,%o7,%o1 ! (5_0) diff0 = hy0 - hx0; 1582 cmp %l7,_0x7ff00000 ! (5_0) hy0 ? 0x7ff00000 1583 bge,pn %icc,.update50 ! (5_0) if ( hy0 >= 0x7ff00000 ) 1584 fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; 1585 1586 fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; 1587 sra %o1,31,%o3 ! (5_0) j0 = diff0 >> 31; 1588 st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1589 faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; 1590 1591 and %o1,%o3,%o1 ! (5_0) j0 &= diff0; 1592 cmp %o7,_0x00100000 ! (5_0) hx0 ? 0x00100000 1593 bl,pn %icc,.update51 ! (5_0) if ( hx0 < 0x00100000 ) 1594 fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); 1595 .cont51a: 1596 fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 1597 sub %l7,%o1,%o4 ! (5_0) j0 = hy0 - j0; 1598 stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; 1599 fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 1600 1601 fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 1602 and %o4,%l0,%o4 ! (5_0) j0 &= 0x7ff00000; 1603 add %i5,stridez,%i5 ! pz += stridez 1604 faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 1605 1606 fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 1607 sub %l0,%o4,%g1 ! (5_0) j0 = 0x7ff00000 - j0; 1608 nop 1609 fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 1610 .cont51b: 1611 fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; 1612 sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; 1613 stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; 1614 faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; 1615 1616 fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; 1617 nop 1618 nop 1619 fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; 1620 .cont52: 1621 fmuld %f62,%f0,%f0 ! (3_0) res0_lo *= x_lo0; 1622 nop 1623 ldd [%fp+dtmp9],%f62 ! (4_0) *(long long*)&scl0 = ll; 1624 faddd %f2,%f46,%f44 ! (3_0) res0_hi += dtmp0; 1625 1626 fsubd DONE,%f10,%f60 ! (7_1) dtmp0 = DONE - dtmp0; 1627 nop 1628 lda [%i2]%asi,%f10 ! (4_0) ((float*)&x0)[0] = ((float*)px)[0]; 1629 fmuld %f26,%f20,%f54 ! (1_0) dd *= dtmp0; 1630 1631 nop 1632 nop 1633 lda [%i2+4]%asi,%f11 ! (4_0) ((float*)&x0)[1] = ((float*)px)[1]; 1634 bn,pn %icc,.exit 1635 1636 fmuld %f50,%f12,%f26 ! (3_0) dtmp1 *= y_lo0; 1637 nop 1638 lda [%o0]%asi,%f12 ! (4_0) ((float*)&y0)[0] = ((float*)py)[0]; 1639 fsubd DTWO,%f22,%f20 ! (0_0) dtmp2 = DTWO - dtmp2; 1640 1641 nop 1642 nop 1643 lda [%o0+4]%asi,%f13 ! (4_0) ((float*)&y0)[1] = ((float*)py)[1]; 1644 bn,pn %icc,.exit 1645 1646 fmuld %f54,%f18,%f50 ! (1_0) dtmp1 = dd * dres; 1647 nop 1648 ld [%fp+ftmp0],%o2 ! (2_0) iarr = ((int*)&dres)[0]; 1649 fand %f14,DA1,%f2 ! (2_0) dexp0 = vis_fand(dres,DA1); 1650 1651 fmuld %f10,%f62,%f10 ! (4_0) x0 *= scl0; 1652 nop 1653 ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll; 1654 fsubd %f60,%f38,%f46 ! (7_1) dtmp0 -= dtmp1; 1655 1656 fmuld %f12,%f62,%f60 ! (4_0) y0 *= scl0; 1657 sra %o2,11,%o4 ! (2_0) iarr >>= 11; 1658 nop 1659 faddd %f0,%f26,%f38 ! (3_0) res0_lo += dtmp1; 1660 1661 and %o4,0x1fc,%o4 ! (2_0) iarr &= 0x1fc; 1662 nop 1663 bn,pn %icc,.exit 1664 fmuld %f24,%f20,%f26 ! (0_0) dres = dd * dtmp2; 1665 1666 fsqrtd %f52,%f24 ! (6_1) res0 = sqrt ( res0 ); 1667 add %o4,TBL,%o4 ! (2_0) (char*)dll1 + iarr 1668 lda [%i1]0x82,%o1 ! (6_0) hx0 = *(int*)px; 1669 fsubd DTWO,%f50,%f52 ! (1_0) dtmp1 = DTWO - dtmp1; 1670 1671 fmuld %f46,%f28,%f28 ! (7_1) dtmp0 *= dres; 1672 mov %i1,%i2 1673 ld [%o4],%f20 ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1674 faddd %f10,D2ON36,%f46 ! (4_0) x_hi0 = x0 + D2ON36; 1675 1676 nop 1677 mov %i0,%o0 1678 lda [%i0]0x82,%o4 ! (6_0) hy0 = *(int*)py; 1679 faddd %f60,D2ON36,%f50 ! (4_0) y_hi0 = y0 + D2ON36; 1680 1681 fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0; 1682 and %o1,_0x7fffffff,%o7 ! (6_0) hx0 &= 0x7fffffff; 1683 nop 1684 faddd %f44,%f38,%f22 ! (3_0) dres = res0_hi + res0_lo; 1685 1686 fmuld %f54,%f52,%f16 ! (1_0) dd *= dtmp1; 1687 cmp %o7,_0x7ff00000 ! (6_0) hx0 ? 0x7ff00000 1688 st %f22,[%fp+ftmp0] ! (3_0) iarr = ((int*)&dres)[0]; 1689 fpsub32 %f20,%f2,%f52 ! (2_0) dd = vis_fpsub32(dtmp0, dexp0); 1690 1691 and %o4,_0x7fffffff,%l7 ! (6_0) hy0 &= 0x7fffffff; 1692 st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1693 bge,pn %icc,.update53 ! (6_0) if ( hx0 >= 0x7ff00000 ) 1694 fsubd %f46,D2ON36,%f46 ! (4_0) x_hi0 -= D2ON36; 1695 1696 sub %l7,%o7,%o1 ! (6_0) diff0 = hy0 - hx0; 1697 cmp %l7,_0x7ff00000 ! (6_0) hy0 ? 0x7ff00000 1698 bge,pn %icc,.update54 ! (6_0) if ( hy0 >= 0x7ff00000 ) 1699 fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; 1700 1701 fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; 1702 sra %o1,31,%o3 ! (6_0) j0 = diff0 >> 31; 1703 st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1704 faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; 1705 1706 and %o1,%o3,%o1 ! (6_0) j0 &= diff0; 1707 cmp %o7,_0x00100000 ! (6_0) hx0 ? 0x00100000 1708 bl,pn %icc,.update55 ! (6_0) if ( hx0 < 0x00100000 ) 1709 fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); 1710 .cont55a: 1711 fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 1712 sub %l7,%o1,%o4 ! (6_0) j0 = hy0 - j0; 1713 stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; 1714 fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 1715 1716 fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 1717 and %o4,%l0,%o4 ! (6_0) j0 &= 0x7ff00000; 1718 add %i5,stridez,%i5 ! pz += stridez 1719 faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 1720 1721 fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 1722 sub %l0,%o4,%g1 ! (6_0) j0 = 0x7ff00000 - j0; 1723 nop 1724 fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 1725 .cont55b: 1726 fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; 1727 sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; 1728 stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; 1729 faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; 1730 1731 fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; 1732 nop 1733 nop 1734 fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; 1735 .cont56: 1736 fmuld %f62,%f2,%f2 ! (4_0) res0_lo *= x_lo0; 1737 nop 1738 ldd [%fp+dtmp11],%f62 ! (5_0) *(long long*)&scl0 = ll; 1739 faddd %f0,%f20,%f32 ! (4_0) res0_hi += dtmp0; 1740 1741 lda [%i4]%asi,%f0 ! (5_0) ((float*)&x0)[0] = ((float*)px)[0]; 1742 nop 1743 nop 1744 fmuld %f52,%f10,%f10 ! (2_0) dd *= dtmp0; 1745 1746 lda [%i4+4]%asi,%f1 ! (5_0) ((float*)&x0)[1] = ((float*)px)[1]; 1747 nop 1748 nop 1749 fsubd DONE,%f50,%f52 ! (0_0) dtmp0 = DONE - dtmp0; 1750 1751 fmuld %f46,%f60,%f46 ! (4_0) dtmp1 *= y_lo0; 1752 nop 1753 lda [%i3]%asi,%f12 ! (5_0) ((float*)&y0)[0] = ((float*)py)[0]; 1754 fsubd DTWO,%f18,%f18 ! (1_0) dtmp2 = DTWO - dtmp2; 1755 1756 nop 1757 add %i1,stridex,%i4 ! px += stridex 1758 lda [%i3+4]%asi,%f13 ! (5_0) ((float*)&y0)[1] = ((float*)py)[1]; 1759 bn,pn %icc,.exit 1760 1761 fmuld %f10,%f14,%f50 ! (2_0) dtmp1 = dd * dres; 1762 add %i4,stridex,%i1 ! px += stridex 1763 ld [%fp+ftmp0],%o2 ! (3_0) iarr = ((int*)&dres)[0]; 1764 fand %f22,DA1,%f54 ! (3_0) dexp0 = vis_fand(dres,DA1); 1765 1766 fmuld %f0,%f62,%f60 ! (5_0) x0 *= scl0; 1767 nop 1768 ldd [%fp+dtmp14],%f0 ! (6_1) *(long long*)&scl0 = ll; 1769 fsubd %f52,%f36,%f20 ! (0_0) dtmp0 -= dtmp1; 1770 1771 fmuld %f12,%f62,%f52 ! (5_0) y0 *= scl0; 1772 sra %o2,11,%i3 ! (3_0) iarr >>= 11; 1773 nop 1774 faddd %f2,%f46,%f36 ! (4_0) res0_lo += dtmp1; 1775 1776 and %i3,0x1fc,%i3 ! (3_0) iarr &= 0x1fc; 1777 nop 1778 bn,pn %icc,.exit 1779 fmuld %f16,%f18,%f16 ! (1_0) dres = dd * dtmp2; 1780 1781 fsqrtd %f48,%f18 ! (7_1) res0 = sqrt ( res0 ); 1782 add %i3,TBL,%o4 ! (3_0) (char*)dll1 + iarr 1783 lda [%i4]0x82,%o1 ! (7_0) hx0 = *(int*)px; 1784 fsubd DTWO,%f50,%f46 ! (2_0) dtmp1 = DTWO - dtmp1; 1785 1786 fmuld %f20,%f26,%f48 ! (0_0) dtmp0 *= dres; 1787 add %i0,stridey,%i3 ! py += stridey 1788 ld [%o4],%f20 ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1789 faddd %f60,D2ON36,%f50 ! (5_0) x_hi0 = x0 + D2ON36; 1790 1791 nop 1792 add %i3,stridey,%i0 ! py += stridey 1793 lda [%i3]0x82,%o4 ! (7_0) hy0 = *(int*)py; 1794 faddd %f52,D2ON36,%f12 ! (5_0) y_hi0 = y0 + D2ON36; 1795 1796 fmuld %f0,%f24,%f2 ! (6_1) res0 = scl0 * res0; 1797 and %o1,_0x7fffffff,%o7 ! (7_0) hx0 &= 0x7fffffff; 1798 nop 1799 faddd %f32,%f36,%f24 ! (4_0) dres = res0_hi + res0_lo; 1800 1801 fmuld %f10,%f46,%f26 ! (2_0) dd *= dtmp1; 1802 cmp %o7,_0x7ff00000 ! (7_0) hx0 ? 0x7ff00000 1803 st %f24,[%fp+ftmp0] ! (4_0) iarr = ((int*)&dres)[0]; 1804 fpsub32 %f20,%f54,%f10 ! (3_0) dd = vis_fpsub32(dtmp0, dexp0); 1805 1806 and %o4,_0x7fffffff,%l7 ! (7_0) hy0 &= 0x7fffffff; 1807 st %f2,[%i5] ! (6_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1808 bge,pn %icc,.update57 ! (7_0) if ( hx0 >= 0x7ff00000 ) 1809 fsubd %f50,D2ON36,%f20 ! (5_0) x_hi0 -= D2ON36; 1810 1811 sub %l7,%o7,%o1 ! (7_0) diff0 = hy0 - hx0; 1812 cmp %l7,_0x7ff00000 ! (7_0) hy0 ? 0x7ff00000 1813 bge,pn %icc,.update58 ! (7_0) if ( hy0 >= 0x7ff00000 ) 1814 fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; 1815 1816 fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; 1817 sra %o1,31,%o3 ! (7_0) j0 = diff0 >> 31; 1818 st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1819 faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; 1820 1821 and %o1,%o3,%o1 ! (7_0) j0 &= diff0; 1822 cmp %o7,_0x00100000 ! (7_0) hx0 ? 0x00100000 1823 bl,pn %icc,.update59 ! (7_0) if ( hx0 < 0x00100000 ) 1824 fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); 1825 .cont59a: 1826 fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 1827 sub %l7,%o1,%o4 ! (7_0) j0 = hy0 - j0; 1828 stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; 1829 fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 1830 1831 fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 1832 and %o4,%l0,%o4 ! (7_0) j0 &= 0x7ff00000; 1833 add %i5,stridez,%i5 ! pz += stridez 1834 faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 1835 1836 fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 1837 sub %l0,%o4,%g1 ! (7_0) j0 = 0x7ff00000 - j0; 1838 nop 1839 fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 1840 .cont59b: 1841 fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; 1842 sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 1843 stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 1844 faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; 1845 1846 fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; 1847 nop 1848 nop 1849 fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; 1850 .cont60: 1851 fmuld %f62,%f2,%f2 ! (5_0) res0_lo *= x_lo0; 1852 nop 1853 ldd [%fp+dtmp13],%f62 ! (6_0) *(long long*)&scl0 = ll; 1854 faddd %f0,%f46,%f42 ! (5_0) res0_hi += dtmp0; 1855 1856 fmuld %f10,%f20,%f52 ! (3_0) dd *= dtmp0; 1857 nop 1858 lda [%i2]%asi,%f10 ! (6_0) ((float*)&x0)[0] = ((float*)px)[0]; 1859 bn,pn %icc,.exit 1860 1861 lda [%i2+4]%asi,%f11 ! (6_0) ((float*)&x0)[1] = ((float*)px)[1]; 1862 nop 1863 nop 1864 fsubd DONE,%f60,%f60 ! (1_0) dtmp0 = DONE - dtmp0; 1865 1866 fmuld %f50,%f54,%f46 ! (5_0) dtmp1 *= y_lo0; 1867 nop 1868 lda [%o0]%asi,%f12 ! (6_0) ((float*)&y0)[0] = ((float*)py)[0]; 1869 fsubd DTWO,%f14,%f14 ! (2_0) dtmp2 = DTWO - dtmp2; 1870 1871 nop 1872 nop 1873 lda [%o0+4]%asi,%f13 ! (6_0) ((float*)&y0)[1] = ((float*)py)[1]; 1874 bn,pn %icc,.exit 1875 1876 fmuld %f52,%f22,%f50 ! (3_0) dtmp1 = dd * dres; 1877 nop 1878 ld [%fp+ftmp0],%o2 ! (4_0) iarr = ((int*)&dres)[0]; 1879 fand %f24,DA1,%f54 ! (4_0) dexp0 = vis_fand(dres,DA1); 1880 1881 fmuld %f10,%f62,%f10 ! (6_0) x0 *= scl0; 1882 nop 1883 ldd [%fp+dtmp0],%f0 ! (7_1) *(long long*)&scl0 = ll; 1884 fsubd %f60,%f34,%f20 ! (1_0) dtmp0 -= dtmp1; 1885 1886 fmuld %f12,%f62,%f60 ! (6_0) y0 *= scl0; 1887 sra %o2,11,%o4 ! (4_0) iarr >>= 11; 1888 nop 1889 faddd %f2,%f46,%f34 ! (5_0) res0_lo += dtmp1; 1890 1891 and %o4,0x1fc,%o4 ! (4_0) iarr &= 0x1fc; 1892 subcc counter,8,counter ! counter -= 8; 1893 bpos,pt %icc,.main_loop 1894 fmuld %f26,%f14,%f26 ! (2_0) dres = dd * dtmp2; 1895 1896 add counter,8,counter 1897 1898 .tail: 1899 subcc counter,1,counter 1900 bneg .begin 1901 nop 1902 1903 fsqrtd %f48,%f14 ! (0_1) res0 = sqrt ( res0 ); 1904 add %o4,TBL,%o4 ! (4_1) (char*)dll1 + iarr 1905 fsubd DTWO,%f50,%f46 ! (3_1) dtmp1 = DTWO - dtmp1; 1906 1907 fmuld %f20,%f16,%f48 ! (1_1) dtmp0 *= dres; 1908 ld [%o4],%f20 ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1909 1910 fmuld %f0,%f18,%f0 ! (7_2) res0 = scl0 * res0; 1911 st %f0,[%i5] ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0]; 1912 faddd %f42,%f34,%f16 ! (5_1) dres = res0_hi + res0_lo; 1913 1914 subcc counter,1,counter 1915 st %f1,[%i5+4] ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1]; 1916 bneg .begin 1917 add %i5,stridez,%i5 ! pz += stridez 1918 1919 fmuld %f52,%f46,%f18 ! (3_1) dd *= dtmp1; 1920 st %f16,[%fp+ftmp0] ! (5_1) iarr = ((int*)&dres)[0]; 1921 fpsub32 %f20,%f54,%f54 ! (4_1) dd = vis_fpsub32(dtmp0, dexp0); 1922 1923 fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; 1924 faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; 1925 1926 1927 fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); 1928 1929 fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; 1930 fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; 1931 1932 fmuld %f30,%f48,%f12 ! (2_1) dtmp0 = res0_hi * res0; 1933 1934 fmuld %f40,%f48,%f40 ! (2_1) dtmp1 = res0_lo * res0; 1935 1936 fmuld %f54,%f20,%f54 ! (4_1) dd *= dtmp0; 1937 1938 fsubd DONE,%f12,%f60 ! (2_1) dtmp0 = DONE - dtmp0; 1939 1940 fsubd DTWO,%f22,%f22 ! (3_1) dtmp2 = DTWO - dtmp2; 1941 1942 fmuld %f54,%f24,%f50 ! (4_1) dtmp1 = dd * dres; 1943 ld [%fp+ftmp0],%o2 ! (5_1) iarr = ((int*)&dres)[0]; 1944 fand %f16,DA1,%f2 ! (5_1) dexp0 = vis_fand(dres,DA1); 1945 1946 ldd [%fp+dtmp2],%f0 ! (0_1) *(long long*)&scl0 = ll; 1947 fsubd %f60,%f40,%f20 ! (2_1) dtmp0 -= dtmp1; 1948 1949 sra %o2,11,%i3 ! (5_1) iarr >>= 11; 1950 1951 and %i3,0x1fc,%i3 ! (5_1) iarr &= 0x1fc; 1952 fmuld %f18,%f22,%f28 ! (3_1) dres = dd * dtmp2; 1953 1954 fsqrtd %f52,%f22 ! (1_1) res0 = sqrt ( res0 ); 1955 add %i3,TBL,%g1 ! (5_1) (char*)dll1 + iarr 1956 fsubd DTWO,%f50,%f62 ! (4_1) dtmp1 = DTWO - dtmp1; 1957 1958 fmuld %f20,%f26,%f52 ! (2_1) dtmp0 *= dres; 1959 ld [%g1],%f26 ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0]; 1960 1961 fmuld %f0,%f14,%f0 ! (0_1) res0 = scl0 * res0; 1962 1963 fmuld %f54,%f62,%f14 ! (4_1) dd *= dtmp1; 1964 fpsub32 %f26,%f2,%f26 ! (5_1) dd = vis_fpsub32(dtmp0, dexp0); 1965 1966 st %f0,[%i5] ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0]; 1967 1968 fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; 1969 st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; 1970 faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; 1971 1972 subcc counter,1,counter 1973 bneg .begin 1974 add %i5,stridez,%i5 ! pz += stridez 1975 1976 fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); 1977 1978 fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; 1979 fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; 1980 1981 fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; 1982 1983 fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; 1984 1985 fsubd DONE,%f10,%f60 ! (3_1) dtmp0 = DONE - dtmp0; 1986 fmuld %f26,%f20,%f54 ! (5_1) dd *= dtmp0; 1987 1988 fsubd DTWO,%f24,%f24 ! (4_1) dtmp2 = DTWO - dtmp2; 1989 1990 fmuld %f54,%f16,%f46 ! (5_1) dtmp1 = dd * dres; 1991 1992 ldd [%fp+dtmp4],%f50 ! (1_1) *(long long*)&scl0 = ll; 1993 fsubd %f60,%f38,%f20 ! (3_1) dtmp0 -= dtmp1; 1994 1995 fmuld %f14,%f24,%f26 ! (4_1) dres = dd * dtmp2; 1996 1997 fsqrtd %f52,%f24 ! (2_1) res0 = sqrt ( res0 ); 1998 fsubd DTWO,%f46,%f62 ! (5_1) dtmp1 = DTWO - dtmp1; 1999 2000 fmuld %f20,%f28,%f52 ! (3_1) dtmp0 *= dres; 2001 2002 fmuld %f50,%f22,%f0 ! (1_1) res0 = scl0 * res0; 2003 2004 fmuld %f54,%f62,%f22 ! (5_1) dd *= dtmp1; 2005 2006 st %f0,[%i5] ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0]; 2007 2008 subcc counter,1,counter 2009 st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; 2010 bneg .begin 2011 add %i5,stridez,%i5 ! pz += stridez 2012 2013 faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; 2014 2015 fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); 2016 2017 fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; 2018 2019 fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; 2020 2021 fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; 2022 2023 fsubd DONE,%f10,%f60 ! (4_1) dtmp0 = DONE - dtmp0; 2024 2025 fsubd DTWO,%f16,%f16 ! (5_1) dtmp2 = DTWO - dtmp2; 2026 2027 ldd [%fp+dtmp6],%f50 ! (2_1) *(long long*)&scl0 = ll; 2028 fsubd %f60,%f36,%f20 ! (4_1) dtmp0 -= dtmp1; 2029 2030 fmuld %f22,%f16,%f28 ! (5_1) dres = dd * dtmp2; 2031 2032 fsqrtd %f52,%f16 ! (3_1) res0 = sqrt ( res0 ); 2033 2034 fmuld %f20,%f26,%f52 ! (4_1) dtmp0 *= dres; 2035 2036 fmuld %f50,%f24,%f0 ! (2_1) res0 = scl0 * res0; 2037 2038 st %f0,[%i5] ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0]; 2039 2040 st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; 2041 faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; 2042 2043 subcc counter,1,counter 2044 bneg .begin 2045 add %i5,stridez,%i5 ! pz += stridez 2046 2047 fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); 2048 2049 fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; 2050 2051 fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; 2052 2053 fsubd DONE,%f10,%f60 ! (5_1) dtmp0 = DONE - dtmp0; 2054 2055 ldd [%fp+dtmp8],%f18 ! (3_1) *(long long*)&scl0 = ll; 2056 fsubd %f60,%f34,%f46 ! (5_1) dtmp0 -= dtmp1; 2057 2058 fsqrtd %f52,%f24 ! (4_1) res0 = sqrt ( res0 ); 2059 2060 fmuld %f46,%f28,%f52 ! (5_1) dtmp0 -= dtmp1; 2061 2062 fmuld %f18,%f16,%f0 ! (3_1) res0 = scl0 * res0; 2063 st %f0,[%i5] ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0]; 2064 st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; 2065 faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; 2066 2067 subcc counter,1,counter 2068 bneg .begin 2069 add %i5,stridez,%i5 ! pz += stridez 2070 2071 ldd [%fp+dtmp10],%f14 ! (4_1) *(long long*)&scl0 = ll; 2072 2073 fsqrtd %f52,%f16 ! (5_1) res0 = sqrt ( res0 ); 2074 2075 fmuld %f14,%f24,%f0 ! (4_1) res0 = scl0 * res0 2076 st %f0,[%i5] ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0]; 2077 st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; 2078 2079 subcc counter,1,counter 2080 bneg .begin 2081 add %i5,stridez,%i5 ! pz += stridez 2082 2083 ldd [%fp+dtmp12],%f22 ! (5_1) *(long long*)&scl0 = ll; 2084 2085 fmuld %f22,%f16,%f0 ! (5_1) res0 = scl0 * res0; 2086 st %f0,[%i5] ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0]; 2087 st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; 2088 2089 ba .begin 2090 add %i5,stridez,%i5 2091 2092 .align 16 2093 .spec0: 2094 cmp %o7,_0x7ff00000 ! hx0 ? 0x7ff00000 2095 bne 1f ! if ( hx0 != 0x7ff00000 ) 2096 ld [%i4+4],%i2 ! lx = ((int*)px)[1]; 2097 2098 cmp %i2,0 ! lx ? 0 2099 be 3f ! if ( lx == 0 ) 2100 nop 2101 1: 2102 cmp %l7,_0x7ff00000 ! hy0 ? 0x7ff00000 2103 bne 2f ! if ( hy0 != 0x7ff00000 ) 2104 ld [%i3+4],%o2 ! ly = ((int*)py)[1]; 2105 2106 cmp %o2,0 ! ly ? 0 2107 be 3f ! if ( ly == 0 ) 2108 2: 2109 ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0]; 2110 ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1]; 2111 2112 ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0]; 2113 add %i4,stridex,%i4 ! px += stridex 2114 ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1]; 2115 2116 fabsd %f0,%f0 2117 2118 fabsd %f2,%f2 2119 2120 fmuld %f0,%f2,%f0 ! res0 = fabs(x0) * fabs(y0); 2121 add %i3,stridey,%i3 ! py += stridey; 2122 st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0]; 2123 2124 st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1]; 2125 add %i5,stridez,%i5 ! pz += stridez 2126 ba .begin1 2127 sub counter,1,counter 2128 3: 2129 add %i4,stridex,%i4 ! px += stridex 2130 add %i3,stridey,%i3 ! py += stridey 2131 st %g0,[%i5] ! ((int*)pz)[0] = 0; 2132 2133 add %i5,stridez,%i5 ! pz += stridez; 2134 st %g0,[%i5+4] ! ((int*)pz)[1] = 0; 2135 ba .begin1 2136 sub counter,1,counter 2137 2138 .align 16 2139 .spec1: 2140 and %o1,%o3,%o1 ! (7_0) j0 &= diff0; 2141 2142 cmp %l7,_0x00100000 ! (7_0) hy0 ? 0x00100000 2143 bge,pn %icc,.cont_spec0 ! (7_0) if ( hy0 < 0x00100000 ) 2144 2145 ld [%i4+4],%i2 ! lx = ((int*)px)[1]; 2146 or %o7,%l7,%g5 ! ii = hx0 | hy0; 2147 fzero %f0 2148 2149 ld [%i3+4],%o2 ! ly = ((int*)py)[1]; 2150 or %i2,%g5,%g5 ! ii |= lx; 2151 2152 orcc %o2,%g5,%g5 ! ii |= ly; 2153 bnz,a,pn %icc,1f ! if ( ii != 0 ) 2154 sethi %hi(0x00080000),%i2 2155 2156 fdivd DONE,%f0,%f0 ! res0 = 1.0 / 0.0; 2157 2158 st %f0,[%i5] ! ((float*)pz)[0] = ((float*)&res0)[0]; 2159 2160 add %i4,stridex,%i4 ! px += stridex; 2161 add %i3,stridey,%i3 ! py += stridey; 2162 st %f1,[%i5+4] ! ((float*)pz)[1] = ((float*)&res0)[1]; 2163 2164 add %i5,stridez,%i5 ! pz += stridez; 2165 ba .begin1 2166 sub counter,1,counter 2167 1: 2168 ld [%i4],%f0 ! ((float*)&x0)[0] = ((float*)px)[0]; 2169 2170 ld [%i4+4],%f1 ! ((float*)&x0)[1] = ((float*)px)[1]; 2171 2172 ld [%i3],%f2 ! ((float*)&y0)[0] = ((float*)py)[0]; 2173 2174 fabsd %f0,%f0 ! x0 = fabs(x0); 2175 ld [%i3+4],%f3 ! ((float*)&y0)[1] = ((float*)py)[1]; 2176 2177 ldd [TBL+TBL_SHIFT+64],%f12 ! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL; 2178 add %fp,dtmp2,%i4 2179 add %fp,dtmp3,%i3 2180 2181 fabsd %f2,%f2 ! y0 = fabs(y0); 2182 ldd [TBL+TBL_SHIFT+56],%f10 ! D2ON51 2183 2184 ldx [TBL+TBL_SHIFT+48],%g5 ! D2ONM52 2185 cmp %o7,%i2 ! hx0 ? 0x00080000 2186 bl,a 1f ! if ( hx0 < 0x00080000 ) 2187 fxtod %f0,%f0 ! x0 = *(long long*)&x0; 2188 2189 fand %f0,%f12,%f0 ! x0 = vis_fand(x0, dtmp0); 2190 fxtod %f0,%f0 ! x0 = *(long long*)&x0; 2191 faddd %f0,%f10,%f0 ! x0 += D2ON51; 2192 1: 2193 std %f0,[%i4] 2194 2195 ldx [TBL+TBL_SHIFT+40],%g1 ! D2ON1022 2196 cmp %l7,%i2 ! hy0 ? 0x00080000 2197 bl,a 1f ! if ( hy0 < 0x00080000 ) 2198 fxtod %f2,%f2 ! y0 = *(long long*)&y0; 2199 2200 fand %f2,%f12,%f2 ! y0 = vis_fand(y0, dtmp0); 2201 fxtod %f2,%f2 ! y0 = *(long long*)&y0; 2202 faddd %f2,%f10,%f2 ! y0 += D2ON51; 2203 1: 2204 std %f2,[%i3] 2205 2206 stx %g5,[%fp+dtmp15] ! D2ONM52 2207 2208 ba .cont_spec1 2209 stx %g1,[%fp+dtmp0] ! D2ON1022 2210 2211 .align 16 2212 .update0: 2213 cmp counter,1 2214 ble 1f 2215 nop 2216 2217 sub counter,1,counter 2218 st counter,[%fp+tmp_counter] 2219 2220 stx %i2,[%fp+tmp_px] 2221 2222 stx %o0,[%fp+tmp_py] 2223 2224 mov 1,counter 2225 1: 2226 sethi %hi(0x3ff00000),%o4 2227 add TBL,TBL_SHIFT+24,%i2 2228 ba .cont1 2229 add TBL,TBL_SHIFT+24,%o0 2230 2231 .align 16 2232 .update1: 2233 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2234 bge,pn %icc,.cont0 ! (0_0) if ( hy0 < 0x00100000 ) 2235 2236 cmp counter,1 2237 ble,a 1f 2238 nop 2239 2240 sub counter,1,counter 2241 st counter,[%fp+tmp_counter] 2242 2243 stx %i2,[%fp+tmp_px] 2244 2245 mov 1,counter 2246 stx %o0,[%fp+tmp_py] 2247 1: 2248 sethi %hi(0x3ff00000),%o4 2249 add TBL,TBL_SHIFT+24,%i2 2250 ba .cont1 2251 add TBL,TBL_SHIFT+24,%o0 2252 2253 .align 16 2254 .update2: 2255 cmp counter,2 2256 ble 1f 2257 nop 2258 2259 sub counter,2,counter 2260 st counter,[%fp+tmp_counter] 2261 2262 stx %i4,[%fp+tmp_px] 2263 2264 stx %i3,[%fp+tmp_py] 2265 2266 mov 2,counter 2267 1: 2268 fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; 2269 2270 fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; 2271 fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; 2272 2273 fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; 2274 faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; 2275 2276 sethi %hi(0x3ff00000),%o4 2277 add TBL,TBL_SHIFT+24,%i4 2278 ba .cont4 2279 add TBL,TBL_SHIFT+24,%i3 2280 2281 .align 16 2282 .update3: 2283 cmp counter,2 2284 ble 1f 2285 nop 2286 2287 sub counter,2,counter 2288 st counter,[%fp+tmp_counter] 2289 2290 stx %i4,[%fp+tmp_px] 2291 2292 stx %i3,[%fp+tmp_py] 2293 2294 mov 2,counter 2295 1: 2296 fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; 2297 fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; 2298 2299 fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; 2300 faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; 2301 2302 sethi %hi(0x3ff00000),%o4 2303 add TBL,TBL_SHIFT+24,%i4 2304 ba .cont4 2305 add TBL,TBL_SHIFT+24,%i3 2306 2307 .align 16 2308 .update4: 2309 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2310 bge,a,pn %icc,.cont4 ! (0_0) if ( hy0 < 0x00100000 ) 2311 sub %l0,%o4,%o4 ! (1_0) j0 = 0x7ff00000 - j0; 2312 2313 cmp counter,2 2314 ble,a 1f 2315 nop 2316 2317 sub counter,2,counter 2318 st counter,[%fp+tmp_counter] 2319 2320 stx %i4,[%fp+tmp_px] 2321 2322 mov 2,counter 2323 stx %i3,[%fp+tmp_py] 2324 1: 2325 sethi %hi(0x3ff00000),%o4 2326 add TBL,TBL_SHIFT+24,%i4 2327 ba .cont4 2328 add TBL,TBL_SHIFT+24,%i3 2329 2330 .align 16 2331 .update5: 2332 cmp counter,3 2333 ble 1f 2334 nop 2335 2336 sub counter,3,counter 2337 st counter,[%fp+tmp_counter] 2338 2339 stx %i2,[%fp+tmp_px] 2340 2341 stx %o0,[%fp+tmp_py] 2342 2343 mov 3,counter 2344 1: 2345 st %f14,[%fp+ftmp0] ! (7_1) iarr = ((int*)&dres)[0]; 2346 fsubd %f46,D2ON36,%f20 ! (0_0) x_hi0 -= D2ON36; 2347 2348 fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; 2349 2350 fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; 2351 fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; 2352 2353 fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; 2354 faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; 2355 2356 sethi %hi(0x3ff00000),%g1 2357 add TBL,TBL_SHIFT+24,%i2 2358 2359 sllx %g1,32,%g1 2360 ba .cont8 2361 add TBL,TBL_SHIFT+24,%o0 2362 2363 .align 16 2364 .update6: 2365 cmp counter,3 2366 ble 1f 2367 nop 2368 2369 sub counter,3,counter 2370 st counter,[%fp+tmp_counter] 2371 2372 stx %i2,[%fp+tmp_px] 2373 2374 stx %o0,[%fp+tmp_py] 2375 2376 mov 3,counter 2377 1: 2378 fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; 2379 fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; 2380 2381 fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; 2382 faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; 2383 2384 sethi %hi(0x3ff00000),%g1 2385 add TBL,TBL_SHIFT+24,%i2 2386 2387 sllx %g1,32,%g1 2388 ba .cont8 2389 add TBL,TBL_SHIFT+24,%o0 2390 2391 .align 16 2392 .update7: 2393 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2394 bge,pn %icc,.cont7 ! (0_0) if ( hy0 < 0x00100000 ) 2395 2396 cmp counter,3 2397 ble,a 1f 2398 nop 2399 2400 sub counter,3,counter 2401 st counter,[%fp+tmp_counter] 2402 2403 stx %i2,[%fp+tmp_px] 2404 2405 mov 3,counter 2406 stx %o0,[%fp+tmp_py] 2407 1: 2408 sethi %hi(0x3ff00000),%g1 2409 add TBL,TBL_SHIFT+24,%i2 2410 2411 sllx %g1,32,%g1 2412 ba .cont8 2413 add TBL,TBL_SHIFT+24,%o0 2414 2415 .align 16 2416 .update9: 2417 cmp counter,4 2418 ble 1f 2419 nop 2420 2421 sub counter,4,counter 2422 st counter,[%fp+tmp_counter] 2423 2424 stx %i4,[%fp+tmp_px] 2425 2426 stx %i3,[%fp+tmp_py] 2427 2428 mov 4,counter 2429 1: 2430 st %f22,[%fp+ftmp0] ! (0_0) iarr = ((int*)&dres)[0]; 2431 fsubd %f46,D2ON36,%f20 ! (1_0) x_hi0 -= D2ON36; 2432 2433 fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; 2434 2435 fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; 2436 2437 2438 fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; 2439 fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; 2440 2441 fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; 2442 faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; 2443 2444 fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 2445 2446 sethi %hi(0x3ff00000),%g1 2447 add TBL,TBL_SHIFT+24,%i4 2448 ba .cont12 2449 add TBL,TBL_SHIFT+24,%i3 2450 2451 .align 16 2452 .update10: 2453 cmp counter,4 2454 ble 1f 2455 nop 2456 2457 sub counter,4,counter 2458 st counter,[%fp+tmp_counter] 2459 2460 stx %i4,[%fp+tmp_px] 2461 2462 stx %i3,[%fp+tmp_py] 2463 2464 mov 4,counter 2465 1: 2466 fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; 2467 2468 2469 fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; 2470 fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; 2471 2472 fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; 2473 faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; 2474 2475 fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 2476 2477 sethi %hi(0x3ff00000),%g1 2478 add TBL,TBL_SHIFT+24,%i4 2479 ba .cont12 2480 add TBL,TBL_SHIFT+24,%i3 2481 2482 .align 16 2483 .update11: 2484 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2485 bge,pn %icc,.cont11 ! (0_0) if ( hy0 < 0x00100000 ) 2486 2487 cmp counter,4 2488 ble,a 1f 2489 nop 2490 2491 sub counter,4,counter 2492 st counter,[%fp+tmp_counter] 2493 2494 stx %i4,[%fp+tmp_px] 2495 2496 mov 4,counter 2497 stx %i3,[%fp+tmp_py] 2498 1: 2499 sethi %hi(0x3ff00000),%g1 2500 add TBL,TBL_SHIFT+24,%i4 2501 2502 fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 2503 ba .cont12 2504 add TBL,TBL_SHIFT+24,%i3 2505 2506 .align 16 2507 .update13: 2508 cmp counter,5 2509 ble 1f 2510 nop 2511 2512 sub counter,5,counter 2513 st counter,[%fp+tmp_counter] 2514 2515 stx %i2,[%fp+tmp_px] 2516 2517 stx %o0,[%fp+tmp_py] 2518 2519 mov 5,counter 2520 1: 2521 fsubd %f46,D2ON36,%f20 ! (2_0) x_hi0 -= D2ON36; 2522 2523 fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; 2524 2525 fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; 2526 2527 fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 2528 fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 2529 2530 fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 2531 faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 2532 2533 fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 2534 2535 sethi %hi(0x3ff00000),%g1 2536 add TBL,TBL_SHIFT+24,%i2 2537 ba .cont16 2538 add TBL,TBL_SHIFT+24,%o0 2539 2540 .align 16 2541 .update14: 2542 cmp counter,5 2543 ble 1f 2544 nop 2545 2546 sub counter,5,counter 2547 st counter,[%fp+tmp_counter] 2548 2549 stx %i2,[%fp+tmp_px] 2550 2551 stx %o0,[%fp+tmp_py] 2552 2553 mov 5,counter 2554 1: 2555 fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; 2556 2557 fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 2558 fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 2559 2560 fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 2561 faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 2562 2563 fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 2564 2565 sethi %hi(0x3ff00000),%g1 2566 add TBL,TBL_SHIFT+24,%i2 2567 ba .cont16 2568 add TBL,TBL_SHIFT+24,%o0 2569 2570 .align 16 2571 .update15: 2572 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2573 bge,pn %icc,.cont15 ! (0_0) if ( hy0 < 0x00100000 ) 2574 2575 cmp counter,5 2576 ble,a 1f 2577 nop 2578 2579 sub counter,5,counter 2580 st counter,[%fp+tmp_counter] 2581 2582 stx %i2,[%fp+tmp_px] 2583 2584 mov 5,counter 2585 stx %o0,[%fp+tmp_py] 2586 1: 2587 sethi %hi(0x3ff00000),%g1 2588 add TBL,TBL_SHIFT+24,%i2 2589 2590 fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 2591 ba .cont16 2592 add TBL,TBL_SHIFT+24,%o0 2593 2594 .align 16 2595 .update17: 2596 cmp counter,6 2597 ble 1f 2598 nop 2599 2600 sub counter,6,counter 2601 st counter,[%fp+tmp_counter] 2602 2603 stx %i4,[%fp+tmp_px] 2604 2605 stx %i3,[%fp+tmp_py] 2606 2607 mov 6,counter 2608 1: 2609 fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; 2610 2611 fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; 2612 2613 fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); 2614 2615 fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 2616 fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 2617 2618 fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 2619 faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 2620 2621 fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 2622 fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 2623 2624 fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; 2625 faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; 2626 2627 fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; 2628 fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; 2629 2630 sethi %hi(0x3ff00000),%g1 2631 add TBL,TBL_SHIFT+24,%i4 2632 2633 sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; 2634 stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; 2635 ba .cont20 2636 add TBL,TBL_SHIFT+24,%i3 2637 2638 .align 16 2639 .update18: 2640 cmp counter,6 2641 ble 1f 2642 nop 2643 2644 sub counter,6,counter 2645 st counter,[%fp+tmp_counter] 2646 2647 stx %i4,[%fp+tmp_px] 2648 2649 stx %i3,[%fp+tmp_py] 2650 2651 mov 6,counter 2652 1: 2653 fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; 2654 2655 fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); 2656 2657 fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 2658 fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 2659 2660 fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 2661 faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 2662 2663 fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 2664 fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 2665 2666 fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; 2667 faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; 2668 2669 fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; 2670 fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; 2671 2672 sethi %hi(0x3ff00000),%g1 2673 add TBL,TBL_SHIFT+24,%i4 2674 2675 sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; 2676 stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; 2677 ba .cont20 2678 add TBL,TBL_SHIFT+24,%i3 2679 2680 .align 16 2681 .update19: 2682 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2683 bge,pn %icc,.cont19a ! (0_0) if ( hy0 < 0x00100000 ) 2684 2685 cmp counter,6 2686 ble,a 1f 2687 nop 2688 2689 sub counter,6,counter 2690 st counter,[%fp+tmp_counter] 2691 2692 stx %i4,[%fp+tmp_px] 2693 2694 mov 6,counter 2695 stx %i3,[%fp+tmp_py] 2696 1: 2697 fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 2698 sethi %hi(0x3ff00000),%g1 2699 add TBL,TBL_SHIFT+24,%i4 2700 fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 2701 2702 ba .cont19b 2703 add TBL,TBL_SHIFT+24,%i3 2704 2705 .align 16 2706 .update21: 2707 cmp counter,7 2708 ble 1f 2709 nop 2710 2711 sub counter,7,counter 2712 st counter,[%fp+tmp_counter] 2713 2714 stx %i2,[%fp+tmp_px] 2715 2716 stx %o0,[%fp+tmp_py] 2717 2718 mov 7,counter 2719 1: 2720 fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; 2721 2722 fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; 2723 faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; 2724 2725 fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); 2726 2727 fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 2728 fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 2729 2730 fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 2731 faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 2732 2733 fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 2734 fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 2735 2736 fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; 2737 faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; 2738 2739 fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; 2740 sethi %hi(0x3ff00000),%g1 2741 add TBL,TBL_SHIFT+24,%i2 2742 fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; 2743 2744 sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; 2745 stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; 2746 ba .cont24 2747 add TBL,TBL_SHIFT+24,%o0 2748 2749 .align 16 2750 .update22: 2751 cmp counter,7 2752 ble 1f 2753 nop 2754 2755 sub counter,7,counter 2756 st counter,[%fp+tmp_counter] 2757 2758 stx %i2,[%fp+tmp_px] 2759 2760 stx %o0,[%fp+tmp_py] 2761 2762 mov 7,counter 2763 1: 2764 fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; 2765 faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; 2766 2767 fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); 2768 2769 fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 2770 fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 2771 2772 fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 2773 faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 2774 2775 fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 2776 fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 2777 2778 fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; 2779 faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; 2780 2781 fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; 2782 sethi %hi(0x3ff00000),%g1 2783 add TBL,TBL_SHIFT+24,%i2 2784 fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; 2785 2786 sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; 2787 stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; 2788 ba .cont24 2789 add TBL,TBL_SHIFT+24,%o0 2790 2791 .align 16 2792 .update23: 2793 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2794 bge,pn %icc,.cont23a ! (0_0) if ( hy0 < 0x00100000 ) 2795 2796 cmp counter,7 2797 ble,a 1f 2798 nop 2799 2800 sub counter,7,counter 2801 st counter,[%fp+tmp_counter] 2802 2803 stx %i2,[%fp+tmp_px] 2804 2805 mov 7,counter 2806 stx %o0,[%fp+tmp_py] 2807 1: 2808 fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 2809 sethi %hi(0x3ff00000),%g1 2810 add TBL,TBL_SHIFT+24,%i2 2811 fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 2812 2813 ba .cont23b 2814 add TBL,TBL_SHIFT+24,%o0 2815 2816 .align 16 2817 .update25: 2818 cmp counter,8 2819 ble 1f 2820 nop 2821 2822 sub counter,8,counter 2823 st counter,[%fp+tmp_counter] 2824 2825 stx %i4,[%fp+tmp_px] 2826 2827 stx %i3,[%fp+tmp_py] 2828 2829 mov 8,counter 2830 1: 2831 fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; 2832 2833 fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; 2834 faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; 2835 2836 fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); 2837 2838 fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 2839 fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 2840 2841 fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 2842 faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 2843 2844 fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 2845 fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 2846 2847 fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; 2848 faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; 2849 2850 fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; 2851 sethi %hi(0x3ff00000),%g1 2852 add TBL,TBL_SHIFT+24,%i4 2853 fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; 2854 2855 sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 2856 stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 2857 ba .cont28 2858 add TBL,TBL_SHIFT+24,%i3 2859 2860 .align 16 2861 .update26: 2862 cmp counter,8 2863 ble 1f 2864 nop 2865 2866 sub counter,8,counter 2867 st counter,[%fp+tmp_counter] 2868 2869 stx %i4,[%fp+tmp_px] 2870 2871 stx %i3,[%fp+tmp_py] 2872 2873 mov 8,counter 2874 1: 2875 fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; 2876 faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; 2877 2878 fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); 2879 2880 fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 2881 fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 2882 2883 fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 2884 faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 2885 2886 fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 2887 fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 2888 2889 fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; 2890 faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; 2891 2892 fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; 2893 sethi %hi(0x3ff00000),%g1 2894 add TBL,TBL_SHIFT+24,%i4 2895 fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; 2896 2897 sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 2898 stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 2899 ba .cont28 2900 add TBL,TBL_SHIFT+24,%i3 2901 2902 .align 16 2903 .update27: 2904 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 2905 bge,pn %icc,.cont27a ! (0_0) if ( hy0 < 0x00100000 ) 2906 2907 cmp counter,8 2908 ble,a 1f 2909 nop 2910 2911 sub counter,8,counter 2912 st counter,[%fp+tmp_counter] 2913 2914 stx %i4,[%fp+tmp_px] 2915 2916 mov 8,counter 2917 stx %i3,[%fp+tmp_py] 2918 1: 2919 fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 2920 sethi %hi(0x3ff00000),%g1 2921 add TBL,TBL_SHIFT+24,%i4 2922 fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 2923 2924 ba .cont27b 2925 add TBL,TBL_SHIFT+24,%i3 2926 2927 .align 16 2928 .update29: 2929 cmp counter,1 2930 ble 1f 2931 nop 2932 2933 sub counter,1,counter 2934 st counter,[%fp+tmp_counter] 2935 2936 stx %i2,[%fp+tmp_px] 2937 2938 stx %o0,[%fp+tmp_py] 2939 2940 mov 1,counter 2941 1: 2942 fsubd %f2,D2ON36,%f2 ! (6_1) y_hi0 -= D2ON36; 2943 2944 fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; 2945 stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; 2946 faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; 2947 2948 fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); 2949 2950 fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; 2951 fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; 2952 2953 fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; 2954 add %i5,stridez,%i5 ! pz += stridez 2955 faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; 2956 2957 fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; 2958 sethi %hi(0x3ff00000),%o4 2959 add TBL,TBL_SHIFT+24,%i2 2960 fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; 2961 2962 ba .cont32 2963 add TBL,TBL_SHIFT+24,%o0 2964 2965 .align 16 2966 .update30: 2967 cmp counter,1 2968 ble 1f 2969 nop 2970 2971 sub counter,1,counter 2972 st counter,[%fp+tmp_counter] 2973 2974 stx %i2,[%fp+tmp_px] 2975 2976 stx %o0,[%fp+tmp_py] 2977 2978 mov 1,counter 2979 1: 2980 fmuld %f54,%f24,%f50 ! (4_1) dtmp0 = dd * dres; 2981 stx %g1,[%fp+dtmp0] ! (7_1) *(long long*)&scl0 = ll; 2982 faddd %f28,%f48,%f52 ! (1_1) res0 += dtmp0; 2983 2984 fand %f26,DA0,%f48 ! (2_1) res0 = vis_fand(dres,DA0); 2985 2986 fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; 2987 fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; 2988 2989 fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; 2990 add %i5,stridez,%i5 ! pz += stridez 2991 faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; 2992 2993 fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; 2994 sethi %hi(0x3ff00000),%o4 2995 add TBL,TBL_SHIFT+24,%i2 2996 fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; 2997 2998 ba .cont32 2999 add TBL,TBL_SHIFT+24,%o0 3000 3001 .align 16 3002 .update31: 3003 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3004 bge,pn %icc,.cont31 ! (0_0) if ( hy0 < 0x00100000 ) 3005 3006 cmp counter,1 3007 ble,a 1f 3008 nop 3009 3010 sub counter,1,counter 3011 st counter,[%fp+tmp_counter] 3012 3013 stx %i2,[%fp+tmp_px] 3014 3015 mov 1,counter 3016 stx %o0,[%fp+tmp_py] 3017 1: 3018 fmuld %f20,%f20,%f0 ! (6_1) res0_hi = x_hi0 * x_hi0; 3019 fsubd %f10,%f20,%f28 ! (6_1) x_lo0 = x0 - x_hi0; 3020 3021 fmuld %f2,%f2,%f46 ! (6_1) dtmp0 = y_hi0 * y_hi0; 3022 add %i5,stridez,%i5 ! pz += stridez 3023 faddd %f10,%f20,%f62 ! (6_1) res0_lo = x0 + x_hi0; 3024 3025 fmuld %f18,%f22,%f22 ! (3_1) dtmp2 = dd * dres; 3026 sethi %hi(0x3ff00000),%o4 3027 add TBL,TBL_SHIFT+24,%i2 3028 fsubd DTWO,%f50,%f20 ! (4_1) dtmp0 = DTWO - dtmp0; 3029 3030 ba .cont32 3031 add TBL,TBL_SHIFT+24,%o0 3032 3033 .align 16 3034 .update33: 3035 cmp counter,2 3036 ble 1f 3037 nop 3038 3039 sub counter,2,counter 3040 st counter,[%fp+tmp_counter] 3041 3042 stx %i4,[%fp+tmp_px] 3043 3044 stx %i3,[%fp+tmp_py] 3045 3046 mov 2,counter 3047 1: 3048 st %f1,[%i5+4] ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3049 fsubd %f50,D2ON36,%f54 ! (7_1) y_hi0 -= D2ON36; 3050 3051 fmuld %f26,%f16,%f50 ! (5_1) dtmp0 = dd * dres; 3052 faddd %f48,%f52,%f52 ! (2_1) res0 += dtmp0; 3053 3054 add %i5,stridez,%i5 ! pz += stridez 3055 stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; 3056 fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); 3057 3058 fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; 3059 fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; 3060 3061 fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; 3062 faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; 3063 3064 fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; 3065 fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; 3066 3067 fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; 3068 faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; 3069 3070 fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; 3071 sethi %hi(0x3ff00000),%o4 3072 add TBL,TBL_SHIFT+24,%i4 3073 fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; 3074 3075 sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; 3076 stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; 3077 ba .cont36 3078 add TBL,TBL_SHIFT+24,%i3 3079 3080 .align 16 3081 .update34: 3082 cmp counter,2 3083 ble 1f 3084 nop 3085 3086 sub counter,2,counter 3087 st counter,[%fp+tmp_counter] 3088 3089 stx %i4,[%fp+tmp_px] 3090 3091 stx %i3,[%fp+tmp_py] 3092 3093 mov 2,counter 3094 1: 3095 add %i5,stridez,%i5 ! pz += stridez 3096 stx %o4,[%fp+dtmp2] ! (0_0) *(long long*)&scl0 = ll; 3097 fand %f28,DA0,%f48 ! (3_1) res0 = vis_fand(dres,DA0); 3098 3099 fmuld %f20,%f20,%f2 ! (7_1) res0_hi = x_hi0 * x_hi0; 3100 fsubd %f10,%f20,%f0 ! (7_1) x_lo0 = x0 - x_hi0; 3101 3102 fmuld %f54,%f54,%f46 ! (7_1) dtmp0 = y_hi0 * y_hi0; 3103 faddd %f10,%f20,%f62 ! (7_1) res0_lo = x0 + x_hi0; 3104 3105 fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; 3106 fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; 3107 3108 fmuld %f14,%f24,%f24 ! (4_1) dtmp2 = dd * dres; 3109 faddd %f60,%f54,%f50 ! (7_1) dtmp1 = y0 + y_hi0; 3110 3111 fmuld %f38,%f48,%f38 ! (3_1) dtmp1 = res0_lo * res0; 3112 sethi %hi(0x3ff00000),%o4 3113 add TBL,TBL_SHIFT+24,%i4 3114 fsubd %f60,%f54,%f12 ! (7_1) y_lo0 = y0 - y_hi0; 3115 3116 sllx %o4,32,%o4 ! (1_0) ll = (long long)j0 << 32; 3117 stx %o4,[%fp+dtmp3] ! (1_0) *(long long*)&scl0 = ll; 3118 ba .cont36 3119 add TBL,TBL_SHIFT+24,%i3 3120 3121 .align 16 3122 .update35: 3123 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3124 bge,pn %icc,.cont35a ! (0_0) if ( hy0 < 0x00100000 ) 3125 3126 cmp counter,2 3127 ble,a 1f 3128 nop 3129 3130 sub counter,2,counter 3131 st counter,[%fp+tmp_counter] 3132 3133 stx %i4,[%fp+tmp_px] 3134 3135 mov 2,counter 3136 stx %i3,[%fp+tmp_py] 3137 1: 3138 fmuld %f44,%f48,%f10 ! (3_1) dtmp0 = res0_hi * res0; 3139 sethi %hi(0x3ff00000),%o4 3140 add TBL,TBL_SHIFT+24,%i4 3141 fsubd DTWO,%f50,%f20 ! (5_1) dtmp0 = DTWO - dtmp0; 3142 3143 ba .cont35b 3144 add TBL,TBL_SHIFT+24,%i3 3145 3146 .align 16 3147 .update37: 3148 cmp counter,3 3149 ble 1f 3150 nop 3151 3152 sub counter,3,counter 3153 st counter,[%fp+tmp_counter] 3154 3155 stx %i2,[%fp+tmp_px] 3156 3157 stx %o0,[%fp+tmp_py] 3158 3159 mov 3,counter 3160 1: 3161 st %f1,[%i5+4] ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3162 fsubd %f12,D2ON36,%f54 ! (0_0) y_hi0 -= D2ON36; 3163 3164 fmuld %f28,%f18,%f50 ! (6_1) dtmp0 = dd * dres; 3165 faddd %f48,%f52,%f52 ! (3_1) res0 += dtmp0; 3166 3167 add %i5,stridez,%i5 ! pz += stridez 3168 stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; 3169 fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); 3170 3171 fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; 3172 fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; 3173 3174 fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; 3175 faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; 3176 3177 fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; 3178 fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; 3179 3180 fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; 3181 faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; 3182 3183 fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; 3184 sethi %hi(0x3ff00000),%g1 3185 add TBL,TBL_SHIFT+24,%i2 3186 fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; 3187 3188 sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; 3189 stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; 3190 ba .cont40 3191 add TBL,TBL_SHIFT+24,%o0 3192 3193 .align 16 3194 .update38: 3195 cmp counter,3 3196 ble 1f 3197 nop 3198 3199 sub counter,3,counter 3200 st counter,[%fp+tmp_counter] 3201 3202 stx %i2,[%fp+tmp_px] 3203 3204 stx %o0,[%fp+tmp_py] 3205 3206 mov 3,counter 3207 1: 3208 add %i5,stridez,%i5 ! pz += stridez 3209 stx %o4,[%fp+dtmp4] ! (1_0) *(long long*)&scl0 = ll; 3210 fand %f26,DA0,%f48 ! (4_1) res0 = vis_fand(dres,DA0); 3211 3212 fmuld %f20,%f20,%f2 ! (0_0) res0_hi = x_hi0 * x_hi0; 3213 fsubd %f10,%f20,%f0 ! (0_0) x_lo0 = x0 - x_hi0; 3214 3215 fmuld %f54,%f54,%f46 ! (0_0) dtmp0 = y_hi0 * y_hi0; 3216 faddd %f10,%f20,%f62 ! (0_0) res0_lo = x0 + x_hi0; 3217 3218 fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; 3219 fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; 3220 3221 fmuld %f22,%f16,%f16 ! (5_1) dtmp2 = dd * dres; 3222 faddd %f60,%f54,%f50 ! (0_0) dtmp1 = y0 + y_hi0; 3223 3224 fmuld %f36,%f48,%f36 ! (4_1) dtmp1 = res0_lo * res0; 3225 sethi %hi(0x3ff00000),%g1 3226 add TBL,TBL_SHIFT+24,%i2 3227 fsubd %f60,%f54,%f12 ! (0_0) y_lo0 = y0 - y_hi0; 3228 3229 sllx %g1,32,%g1 ! (2_0) ll = (long long)j0 << 32; 3230 stx %g1,[%fp+dtmp5] ! (2_0) *(long long*)&scl0 = ll; 3231 ba .cont40 3232 add TBL,TBL_SHIFT+24,%o0 3233 3234 .align 16 3235 .update39: 3236 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3237 bge,pn %icc,.cont39a ! (0_0) if ( hy0 < 0x00100000 ) 3238 3239 cmp counter,3 3240 ble,a 1f 3241 nop 3242 3243 sub counter,3,counter 3244 st counter,[%fp+tmp_counter] 3245 3246 stx %i2,[%fp+tmp_px] 3247 3248 mov 3,counter 3249 stx %o0,[%fp+tmp_py] 3250 1: 3251 fmuld %f32,%f48,%f10 ! (4_1) dtmp0 = res0_hi * res0; 3252 sethi %hi(0x3ff00000),%g1 3253 add TBL,TBL_SHIFT+24,%i2 3254 fsubd DTWO,%f50,%f20 ! (6_1) dtmp0 = DTWO - dtmp0; 3255 3256 ba .cont39b 3257 add TBL,TBL_SHIFT+24,%o0 3258 3259 .align 16 3260 .update41: 3261 cmp counter,4 3262 ble 1f 3263 nop 3264 3265 sub counter,4,counter 3266 st counter,[%fp+tmp_counter] 3267 3268 stx %i4,[%fp+tmp_px] 3269 3270 stx %i3,[%fp+tmp_py] 3271 3272 mov 4,counter 3273 1: 3274 st %f1,[%i5+4] ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3275 fsubd %f12,D2ON36,%f54 ! (1_0) y_hi0 -= D2ON36; 3276 3277 fmuld %f26,%f14,%f50 ! (7_1) dtmp0 = dd * dres; 3278 faddd %f48,%f52,%f52 ! (4_1) res0 += dtmp0; 3279 3280 add %i5,stridez,%i5 ! pz += stridez 3281 stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; 3282 fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); 3283 3284 fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; 3285 fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; 3286 3287 fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; 3288 faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; 3289 3290 fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; 3291 fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 3292 3293 fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; 3294 faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; 3295 3296 fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; 3297 sethi %hi(0x3ff00000),%g1 3298 add TBL,TBL_SHIFT+24,%i4 3299 fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 3300 3301 sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; 3302 stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; 3303 ba .cont44 3304 add TBL,TBL_SHIFT+24,%i3 3305 3306 .align 16 3307 .update42: 3308 cmp counter,4 3309 ble 1f 3310 nop 3311 3312 sub counter,4,counter 3313 st counter,[%fp+tmp_counter] 3314 3315 stx %i4,[%fp+tmp_px] 3316 3317 stx %i3,[%fp+tmp_py] 3318 3319 mov 4,counter 3320 1: 3321 add %i5,stridez,%i5 ! pz += stridez 3322 stx %g1,[%fp+dtmp6] ! (2_0) *(long long*)&scl0 = ll; 3323 fand %f28,DA0,%f48 ! (5_1) res0 = vis_fand(dres,DA0); 3324 3325 fmuld %f20,%f20,%f2 ! (1_0) res0_hi = x_hi0 * x_hi0; 3326 fsubd %f10,%f20,%f0 ! (1_0) x_lo0 = x0 - x_hi0; 3327 3328 fmuld %f54,%f54,%f46 ! (1_0) dtmp0 = y_hi0 * y_hi0; 3329 faddd %f10,%f20,%f62 ! (1_0) res0_lo = x0 + x_hi0; 3330 3331 fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; 3332 fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 3333 3334 fmuld %f24,%f18,%f18 ! (6_1) dtmp2 = dd * dres; 3335 faddd %f60,%f54,%f50 ! (1_0) dtmp1 = y0 + y_hi0; 3336 3337 fmuld %f34,%f48,%f34 ! (5_1) dtmp1 = res0_lo * res0; 3338 sethi %hi(0x3ff00000),%g1 3339 add TBL,TBL_SHIFT+24,%i4 3340 fsubd %f60,%f54,%f12 ! (1_0) y_lo0 = y0 - y_hi0 3341 3342 sllx %g1,32,%g1 ! (3_0) ll = (long long)j0 << 32; 3343 stx %g1,[%fp+dtmp7] ! (3_0) *(long long*)&scl0 = ll; 3344 ba .cont44 3345 add TBL,TBL_SHIFT+24,%i3 3346 3347 .align 16 3348 .update43: 3349 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3350 bge,pn %icc,.cont43a ! (0_0) if ( hy0 < 0x00100000 ) 3351 3352 cmp counter,4 3353 ble,a 1f 3354 nop 3355 3356 sub counter,4,counter 3357 st counter,[%fp+tmp_counter] 3358 3359 stx %i4,[%fp+tmp_px] 3360 3361 mov 4,counter 3362 stx %i3,[%fp+tmp_py] 3363 1: 3364 fmuld %f42,%f48,%f10 ! (5_1) dtmp0 = res0_hi * res0; 3365 sethi %hi(0x3ff00000),%g1 3366 add TBL,TBL_SHIFT+24,%i4 3367 fsubd DTWO,%f50,%f20 ! (7_1) dtmp0 = DTWO - dtmp0; 3368 3369 ba .cont43b 3370 add TBL,TBL_SHIFT+24,%i3 3371 3372 .align 16 3373 .update45: 3374 cmp counter,5 3375 ble 1f 3376 nop 3377 3378 sub counter,5,counter 3379 st counter,[%fp+tmp_counter] 3380 3381 stx %i2,[%fp+tmp_px] 3382 3383 stx %o0,[%fp+tmp_py] 3384 3385 mov 5,counter 3386 1: 3387 fsubd %f50,D2ON36,%f54 ! (2_0) y_hi0 -= D2ON36; 3388 3389 fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; 3390 st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3391 faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; 3392 3393 fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); 3394 3395 fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 3396 stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; 3397 fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 3398 3399 fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 3400 add %i5,stridez,%i5 ! pz += stridez 3401 faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 3402 3403 fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; 3404 fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 3405 3406 fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; 3407 faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; 3408 3409 fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; 3410 sethi %hi(0x3ff00000),%g1 3411 add TBL,TBL_SHIFT+24,%i2 3412 fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; 3413 3414 sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; 3415 stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; 3416 ba .cont48 3417 add TBL,TBL_SHIFT+24,%o0 3418 3419 .align 16 3420 .update46: 3421 cmp counter,5 3422 ble 1f 3423 nop 3424 3425 sub counter,5,counter 3426 st counter,[%fp+tmp_counter] 3427 3428 stx %i2,[%fp+tmp_px] 3429 3430 stx %o0,[%fp+tmp_py] 3431 3432 mov 5,counter 3433 1: 3434 fmuld %f28,%f22,%f50 ! (0_0) dtmp0 = dd * dres; 3435 st %f1,[%i5+4] ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3436 faddd %f48,%f52,%f52 ! (5_1) res0 += dtmp0; 3437 3438 fand %f26,DA0,%f48 ! (6_1) res0 = vis_fand(dres,DA0); 3439 3440 fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 3441 stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; 3442 fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 3443 3444 fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 3445 add %i5,stridez,%i5 ! pz += stridez 3446 faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 3447 3448 fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; 3449 fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 3450 3451 fmuld %f16,%f14,%f14 ! (7_1) dtmp2 = dd * dres; 3452 faddd %f60,%f54,%f50 ! (2_0) dtmp1 = y0 + y_hi0; 3453 3454 fmuld %f40,%f48,%f40 ! (6_1) dtmp1 = res0_lo * res0; 3455 sethi %hi(0x3ff00000),%g1 3456 add TBL,TBL_SHIFT+24,%i2 3457 fsubd %f60,%f54,%f12 ! (2_0) y_lo0 = y0 - y_hi0; 3458 3459 sllx %g1,32,%g1 ! (4_0) ll = (long long)j0 << 32; 3460 stx %g1,[%fp+dtmp9] ! (4_0) *(long long*)&scl0 = ll; 3461 ba .cont48 3462 add TBL,TBL_SHIFT+24,%o0 3463 3464 .align 16 3465 .update47: 3466 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3467 bge,pn %icc,.cont47a ! (0_0) if ( hy0 < 0x00100000 ) 3468 3469 cmp counter,5 3470 ble,a 1f 3471 nop 3472 3473 sub counter,5,counter 3474 st counter,[%fp+tmp_counter] 3475 3476 stx %i2,[%fp+tmp_px] 3477 3478 mov 5,counter 3479 stx %o0,[%fp+tmp_py] 3480 1: 3481 fmuld %f20,%f20,%f2 ! (2_0) res0_hi = x_hi0 * x_hi0; 3482 stx %g1,[%fp+dtmp8] ! (3_0) *(long long*)&scl0 = ll; 3483 fsubd %f10,%f20,%f0 ! (2_0) x_lo0 = x0 - x_hi0; 3484 3485 fmuld %f54,%f54,%f46 ! (2_0) dtmp0 = y_hi0 * y_hi0; 3486 add %i5,stridez,%i5 ! pz += stridez 3487 faddd %f10,%f20,%f62 ! (2_0) res0_lo = x0 + x_hi0; 3488 3489 fmuld %f30,%f48,%f10 ! (6_1) dtmp0 = res0_hi * res0; 3490 sethi %hi(0x3ff00000),%g1 3491 add TBL,TBL_SHIFT+24,%i2 3492 fsubd DTWO,%f50,%f20 ! (0_0) dtmp0 = DTWO - dtmp0; 3493 3494 ba .cont47b 3495 add TBL,TBL_SHIFT+24,%o0 3496 3497 .align 16 3498 .update49: 3499 cmp counter,6 3500 ble 1f 3501 nop 3502 3503 sub counter,6,counter 3504 st counter,[%fp+tmp_counter] 3505 3506 stx %i4,[%fp+tmp_px] 3507 3508 stx %i3,[%fp+tmp_py] 3509 3510 mov 6,counter 3511 1: 3512 fsubd %f50,D2ON36,%f54 ! (3_0) y_hi0 -= D2ON36; 3513 3514 fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; 3515 st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3516 faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; 3517 3518 fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); 3519 3520 fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 3521 stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; 3522 fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 3523 3524 fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 3525 add %i5,stridez,%i5 ! pz += stridez 3526 faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 3527 3528 fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 3529 fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 3530 3531 fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; 3532 faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; 3533 3534 fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; 3535 sethi %hi(0x3ff00000),%g1 3536 add TBL,TBL_SHIFT+24,%i4 3537 fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; 3538 3539 sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; 3540 stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; 3541 ba .cont52 3542 add TBL,TBL_SHIFT+24,%i3 3543 3544 .align 16 3545 .update50: 3546 cmp counter,6 3547 ble 1f 3548 nop 3549 3550 sub counter,6,counter 3551 st counter,[%fp+tmp_counter] 3552 3553 stx %i4,[%fp+tmp_px] 3554 3555 stx %i3,[%fp+tmp_py] 3556 3557 mov 6,counter 3558 1: 3559 fmuld %f26,%f18,%f50 ! (1_0) dtmp0 = dd * dres; 3560 st %f1,[%i5+4] ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3561 faddd %f48,%f52,%f52 ! (6_1) res0 += dtmp0; 3562 3563 fand %f28,DA0,%f48 ! (7_1) res0 = vis_fand(dres,DA0); 3564 3565 fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 3566 stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; 3567 fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 3568 3569 fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 3570 add %i5,stridez,%i5 ! pz += stridez 3571 faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 3572 3573 fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 3574 fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 3575 3576 fmuld %f24,%f22,%f22 ! (0_0) dtmp2 = dd * dres; 3577 faddd %f60,%f54,%f50 ! (3_0) dtmp1 = y0 + y_hi0; 3578 3579 fmuld %f38,%f48,%f38 ! (7_1) dtmp1 = res0_lo * res0; 3580 sethi %hi(0x3ff00000),%g1 3581 add TBL,TBL_SHIFT+24,%i4 3582 fsubd %f60,%f54,%f12 ! (3_0) y_lo0 = y0 - y_hi0; 3583 3584 sllx %g1,32,%g1 ! (5_0) ll = (long long)j0 << 32; 3585 stx %g1,[%fp+dtmp11] ! (5_0) *(long long*)&scl0 = ll; 3586 ba .cont52 3587 add TBL,TBL_SHIFT+24,%i3 3588 3589 .align 16 3590 .update51: 3591 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3592 bge,pn %icc,.cont51a ! (0_0) if ( hy0 < 0x00100000 ) 3593 3594 cmp counter,6 3595 ble,a 1f 3596 nop 3597 3598 sub counter,6,counter 3599 st counter,[%fp+tmp_counter] 3600 3601 stx %i4,[%fp+tmp_px] 3602 3603 mov 6,counter 3604 stx %i3,[%fp+tmp_py] 3605 1: 3606 fmuld %f20,%f20,%f2 ! (3_0) res0_hi = x_hi0 * x_hi0; 3607 stx %g1,[%fp+dtmp10] ! (4_0) *(long long*)&scl0 = ll; 3608 fsubd %f10,%f20,%f0 ! (3_0) x_lo0 = x0 - x_hi0; 3609 3610 fmuld %f54,%f54,%f46 ! (3_0) dtmp0 = y_hi0 * y_hi0; 3611 add %i5,stridez,%i5 ! pz += stridez 3612 faddd %f10,%f20,%f62 ! (3_0) res0_lo = x0 + x_hi0; 3613 3614 fmuld %f44,%f48,%f10 ! (7_1) dtmp0 = res0_hi * res0; 3615 sethi %hi(0x3ff00000),%g1 3616 add TBL,TBL_SHIFT+24,%i4 3617 fsubd DTWO,%f50,%f20 ! (1_0) dtmp0 = DTWO - dtmp0; 3618 3619 ba .cont51b 3620 add TBL,TBL_SHIFT+24,%i3 3621 3622 .align 16 3623 .update53: 3624 cmp counter,7 3625 ble 1f 3626 nop 3627 3628 sub counter,7,counter 3629 st counter,[%fp+tmp_counter] 3630 3631 stx %i2,[%fp+tmp_px] 3632 3633 stx %o0,[%fp+tmp_py] 3634 3635 mov 7,counter 3636 1: 3637 fsubd %f50,D2ON36,%f54 ! (4_0) y_hi0 -= D2ON36; 3638 3639 fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; 3640 st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3641 faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; 3642 3643 fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); 3644 3645 fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 3646 stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; 3647 fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 3648 3649 fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 3650 add %i5,stridez,%i5 ! pz += stridez 3651 faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 3652 3653 fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 3654 fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 3655 3656 fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; 3657 faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; 3658 3659 fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; 3660 sethi %hi(0x3ff00000),%g1 3661 add TBL,TBL_SHIFT+24,%i2 3662 fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; 3663 3664 sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; 3665 stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; 3666 ba .cont56 3667 add TBL,TBL_SHIFT+24,%o0 3668 3669 .align 16 3670 .update54: 3671 cmp counter,7 3672 ble 1f 3673 nop 3674 3675 sub counter,7,counter 3676 st counter,[%fp+tmp_counter] 3677 3678 stx %i2,[%fp+tmp_px] 3679 3680 stx %o0,[%fp+tmp_py] 3681 3682 mov 7,counter 3683 1: 3684 fmuld %f52,%f14,%f50 ! (2_0) dtmp0 = dd * dres; 3685 st %f1,[%i5+4] ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3686 faddd %f48,%f28,%f48 ! (7_1) res0 += dtmp0; 3687 3688 fand %f26,DA0,%f28 ! (0_0) res0 = vis_fand(dres,DA0); 3689 3690 fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 3691 stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; 3692 fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 3693 3694 fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 3695 add %i5,stridez,%i5 ! pz += stridez 3696 faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 3697 3698 fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 3699 fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 3700 3701 fmuld %f32,%f28,%f50 ! (0_0) dtmp0 = res0_hi * res0; 3702 faddd %f60,%f54,%f46 ! (4_0) dtmp1 = y0 + y_hi0; 3703 3704 fmuld %f36,%f28,%f36 ! (0_0) dtmp1 = res0_lo * res0; 3705 sethi %hi(0x3ff00000),%g1 3706 add TBL,TBL_SHIFT+24,%i2 3707 fsubd %f60,%f54,%f60 ! (4_0) y_lo0 = y0 - y_hi0; 3708 3709 sllx %g1,32,%g1 ! (6_0) ll = (long long)j0 << 32; 3710 stx %g1,[%fp+dtmp13] ! (6_0) *(long long*)&scl0 = ll; 3711 ba .cont56 3712 add TBL,TBL_SHIFT+24,%o0 3713 3714 .align 16 3715 .update55: 3716 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3717 bge,pn %icc,.cont55a ! (0_0) if ( hy0 < 0x00100000 ) 3718 3719 cmp counter,7 3720 ble,a 1f 3721 nop 3722 3723 sub counter,7,counter 3724 st counter,[%fp+tmp_counter] 3725 3726 stx %i2,[%fp+tmp_px] 3727 3728 mov 7,counter 3729 stx %o0,[%fp+tmp_py] 3730 1: 3731 fmuld %f46,%f46,%f0 ! (4_0) res0_hi = x_hi0 * x_hi0; 3732 stx %g1,[%fp+dtmp12] ! (5_0) *(long long*)&scl0 = ll; 3733 fsubd %f10,%f46,%f2 ! (4_0) x_lo0 = x0 - x_hi0; 3734 3735 fmuld %f54,%f54,%f20 ! (4_0) dtmp0 = y_hi0 * y_hi0; 3736 add %i5,stridez,%i5 ! pz += stridez 3737 faddd %f10,%f46,%f62 ! (4_0) res0_lo = x0 + x_hi0; 3738 3739 fmuld %f16,%f18,%f18 ! (1_0) dtmp2 = dd * dres; 3740 sethi %hi(0x3ff00000),%g1 3741 add TBL,TBL_SHIFT+24,%i2 3742 fsubd DTWO,%f50,%f10 ! (2_0) dtmp0 = DTWO - dtmp0; 3743 3744 ba .cont55b 3745 add TBL,TBL_SHIFT+24,%o0 3746 3747 .align 16 3748 .update57: 3749 cmp counter,8 3750 ble 1f 3751 nop 3752 3753 sub counter,8,counter 3754 st counter,[%fp+tmp_counter] 3755 3756 stx %i4,[%fp+tmp_px] 3757 3758 stx %i3,[%fp+tmp_py] 3759 3760 mov 8,counter 3761 1: 3762 fsubd %f12,D2ON36,%f54 ! (5_0) y_hi0 -= D2ON36; 3763 3764 fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; 3765 st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3766 faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; 3767 3768 fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); 3769 3770 fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 3771 stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; 3772 fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 3773 3774 fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 3775 add %i5,stridez,%i5 ! pz += stridez 3776 faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 3777 3778 fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 3779 fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 3780 3781 fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; 3782 faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; 3783 3784 fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; 3785 fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; 3786 3787 sethi %hi(0x3ff00000),%g1 3788 add TBL,TBL_SHIFT+24,%i4 3789 3790 sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 3791 stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 3792 ba .cont60 3793 add TBL,TBL_SHIFT+24,%i3 3794 3795 .align 16 3796 .update58: 3797 cmp counter,8 3798 ble 1f 3799 nop 3800 3801 sub counter,8,counter 3802 st counter,[%fp+tmp_counter] 3803 3804 stx %i4,[%fp+tmp_px] 3805 3806 stx %i3,[%fp+tmp_py] 3807 3808 mov 8,counter 3809 1: 3810 fmuld %f10,%f22,%f50 ! (3_0) dtmp0 = dd * dres; 3811 st %f3,[%i5+4] ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1]; 3812 faddd %f28,%f48,%f48 ! (0_0) res0 += dtmp0; 3813 3814 fand %f16,DA0,%f28 ! (1_0) res0 = vis_fand(dres,DA0); 3815 3816 fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 3817 stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; 3818 fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 3819 3820 fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 3821 add %i5,stridez,%i5 ! pz += stridez 3822 faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 3823 3824 fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 3825 fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 3826 3827 fmuld %f42,%f28,%f60 ! (1_0) dtmp0 = res0_hi * res0; 3828 faddd %f52,%f54,%f50 ! (5_0) dtmp1 = y0 + y_hi0; 3829 3830 fmuld %f34,%f28,%f34 ! (1_0) dtmp1 = res0_lo * res0; 3831 fsubd %f52,%f54,%f54 ! (5_0) y_lo0 = y0 - y_hi0; 3832 3833 sethi %hi(0x3ff00000),%g1 3834 add TBL,TBL_SHIFT+24,%i4 3835 3836 sllx %g1,32,%g1 ! (7_0) ll = (long long)j0 << 32; 3837 stx %g1,[%fp+dtmp15] ! (7_0) *(long long*)&scl0 = ll; 3838 ba .cont60 3839 add TBL,TBL_SHIFT+24,%i3 3840 3841 .align 16 3842 .update59: 3843 cmp %l7,_0x00100000 ! (0_0) hy0 ? 0x00100000 3844 bge,pn %icc,.cont59a ! (0_0) if ( hy0 < 0x00100000 ) 3845 3846 cmp counter,8 3847 ble,a 1f 3848 nop 3849 3850 sub counter,8,counter 3851 st counter,[%fp+tmp_counter] 3852 3853 stx %i4,[%fp+tmp_px] 3854 3855 mov 8,counter 3856 stx %i3,[%fp+tmp_py] 3857 1: 3858 fmuld %f20,%f20,%f0 ! (5_0) res0_hi = x_hi0 * x_hi0; 3859 stx %g1,[%fp+dtmp14] ! (6_0) *(long long*)&scl0 = ll; 3860 fsubd %f60,%f20,%f2 ! (5_0) x_lo0 = x0 - x_hi0; 3861 3862 fmuld %f54,%f54,%f46 ! (5_0) dtmp0 = y_hi0 * y_hi0; 3863 add %i5,stridez,%i5 ! pz += stridez 3864 faddd %f60,%f20,%f62 ! (5_0) res0_lo = x0 + x_hi0; 3865 3866 fmuld %f26,%f14,%f14 ! (2_0) dtmp2 = dd * dres; 3867 sethi %hi(0x3ff00000),%g1 3868 add TBL,TBL_SHIFT+24,%i4 3869 fsubd DTWO,%f50,%f20 ! (3_0) dtmp0 = DTWO - dtmp0; 3870 3871 ba .cont59b 3872 add TBL,TBL_SHIFT+24,%i3 3873 3874 .align 16 3875 .exit: 3876 ret 3877 restore 3878 SET_SIZE(__vrhypot) 3879