1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file   "__vrhypot.S"
  30 
  31 #include "libm.h"
  32 
  33         RO_DATA
  34         .align  64
  35 
  36 .CONST_TBL:
  37         .word   0x7fe00000, 0x7fdfc07f, 0x7fdf81f8, 0x7fdf4465,
  38         .word   0x7fdf07c1, 0x7fdecc07, 0x7fde9131, 0x7fde573a,
  39         .word   0x7fde1e1e, 0x7fdde5d6, 0x7fddae60, 0x7fdd77b6,
  40         .word   0x7fdd41d4, 0x7fdd0cb5, 0x7fdcd856, 0x7fdca4b3,
  41         .word   0x7fdc71c7, 0x7fdc3f8f, 0x7fdc0e07, 0x7fdbdd2b,
  42         .word   0x7fdbacf9, 0x7fdb7d6c, 0x7fdb4e81, 0x7fdb2036,
  43         .word   0x7fdaf286, 0x7fdac570, 0x7fda98ef, 0x7fda6d01,
  44         .word   0x7fda41a4, 0x7fda16d3, 0x7fd9ec8e, 0x7fd9c2d1,
  45         .word   0x7fd99999, 0x7fd970e4, 0x7fd948b0, 0x7fd920fb,
  46         .word   0x7fd8f9c1, 0x7fd8d301, 0x7fd8acb9, 0x7fd886e5,
  47         .word   0x7fd86186, 0x7fd83c97, 0x7fd81818, 0x7fd7f405,
  48         .word   0x7fd7d05f, 0x7fd7ad22, 0x7fd78a4c, 0x7fd767dc,
  49         .word   0x7fd745d1, 0x7fd72428, 0x7fd702e0, 0x7fd6e1f7,
  50         .word   0x7fd6c16c, 0x7fd6a13c, 0x7fd68168, 0x7fd661ec,
  51         .word   0x7fd642c8, 0x7fd623fa, 0x7fd60581, 0x7fd5e75b,
  52         .word   0x7fd5c988, 0x7fd5ac05, 0x7fd58ed2, 0x7fd571ed,
  53         .word   0x7fd55555, 0x7fd53909, 0x7fd51d07, 0x7fd50150,
  54         .word   0x7fd4e5e0, 0x7fd4cab8, 0x7fd4afd6, 0x7fd49539,
  55         .word   0x7fd47ae1, 0x7fd460cb, 0x7fd446f8, 0x7fd42d66,
  56         .word   0x7fd41414, 0x7fd3fb01, 0x7fd3e22c, 0x7fd3c995,
  57         .word   0x7fd3b13b, 0x7fd3991c, 0x7fd38138, 0x7fd3698d,
  58         .word   0x7fd3521c, 0x7fd33ae4, 0x7fd323e3, 0x7fd30d19,
  59         .word   0x7fd2f684, 0x7fd2e025, 0x7fd2c9fb, 0x7fd2b404,
  60         .word   0x7fd29e41, 0x7fd288b0, 0x7fd27350, 0x7fd25e22,
  61         .word   0x7fd24924, 0x7fd23456, 0x7fd21fb7, 0x7fd20b47,
  62         .word   0x7fd1f704, 0x7fd1e2ef, 0x7fd1cf06, 0x7fd1bb4a,
  63         .word   0x7fd1a7b9, 0x7fd19453, 0x7fd18118, 0x7fd16e06,
  64         .word   0x7fd15b1e, 0x7fd1485f, 0x7fd135c8, 0x7fd12358,
  65         .word   0x7fd11111, 0x7fd0fef0, 0x7fd0ecf5, 0x7fd0db20,
  66         .word   0x7fd0c971, 0x7fd0b7e6, 0x7fd0a681, 0x7fd0953f,
  67         .word   0x7fd08421, 0x7fd07326, 0x7fd0624d, 0x7fd05197,
  68         .word   0x7fd04104, 0x7fd03091, 0x7fd02040, 0x7fd01010,
  69 
  70         .word   0x42300000, 0           ! D2ON36 = 2**36
  71         .word   0xffffff00, 0           ! DA0
  72         .word   0xfff00000, 0           ! DA1
  73         .word   0x3ff00000, 0           ! DONE = 1.0
  74         .word   0x40000000, 0           ! DTWO = 2.0
  75         .word   0x7fd00000, 0           ! D2ON1022
  76         .word   0x3cb00000, 0           ! D2ONM52
  77         .word   0x43200000, 0           ! D2ON51
  78         .word   0x0007ffff, 0xffffffff  ! 0x0007ffffffffffff
  79 
  80 #define stridex         %l2
  81 #define stridey         %l3
  82 #define stridez         %l5
  83 
  84 #define TBL_SHIFT       512
  85 
  86 #define TBL             %l1
  87 #define counter         %l4
  88 
  89 #define _0x7ff00000     %l0
  90 #define _0x00100000     %o5
  91 #define _0x7fffffff     %l6
  92 
  93 #define D2ON36          %f4
  94 #define DTWO            %f6
  95 #define DONE            %f8
  96 #define DA0             %f58
  97 #define DA1             %f56
  98 
  99 #define dtmp0           STACK_BIAS-0x80
 100 #define dtmp1           STACK_BIAS-0x78
 101 #define dtmp2           STACK_BIAS-0x70
 102 #define dtmp3           STACK_BIAS-0x68
 103 #define dtmp4           STACK_BIAS-0x60
 104 #define dtmp5           STACK_BIAS-0x58
 105 #define dtmp6           STACK_BIAS-0x50
 106 #define dtmp7           STACK_BIAS-0x48
 107 #define dtmp8           STACK_BIAS-0x40
 108 #define dtmp9           STACK_BIAS-0x38
 109 #define dtmp10          STACK_BIAS-0x30
 110 #define dtmp11          STACK_BIAS-0x28
 111 #define dtmp12          STACK_BIAS-0x20
 112 #define dtmp13          STACK_BIAS-0x18
 113 #define dtmp14          STACK_BIAS-0x10
 114 #define dtmp15          STACK_BIAS-0x08
 115 
 116 #define ftmp0           STACK_BIAS-0x100
 117 #define tmp_px          STACK_BIAS-0x98
 118 #define tmp_py          STACK_BIAS-0x90
 119 #define tmp_counter     STACK_BIAS-0x88
 120 
 121 ! sizeof temp storage - must be a multiple of 16 for V9
 122 #define tmps            0x100
 123 
 124 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 125 !      !!!!!   algorithm   !!!!!
 126 !  hx0 = *(int*)px;
 127 !  hy0 = *(int*)py;
 128 !
 129 !  ((float*)&x0)[0] = ((float*)px)[0];
 130 !  ((float*)&x0)[1] = ((float*)px)[1];
 131 !  ((float*)&y0)[0] = ((float*)py)[0];
 132 !  ((float*)&y0)[1] = ((float*)py)[1];
 133 !
 134 !  hx0 &= 0x7fffffff;
 135 !  hy0 &= 0x7fffffff;
 136 !
 137 !  diff0 = hy0 - hx0;
 138 !  j0 = diff0 >> 31;
 139 !  j0 &= diff0;
 140 !  j0 = hy0 - j0;
 141 !  j0 &= 0x7ff00000;
 142 !
 143 !  j0 = 0x7ff00000 - j0;
 144 !  ll = (long long)j0 << 32;
 145 !  *(long long*)&scl0 = ll;
 146 !
 147 !  if ( hx0 >= 0x7ff00000 || hy0 >= 0x7ff00000 )
 148 !  {
 149 !    lx = ((int*)px)[1];
 150 !    ly = ((int*)py)[1];
 151 !
 152 !    if ( hx0 == 0x7ff00000 && lx == 0 ) res0 = 0.0;
 153 !    else if ( hy0 == 0x7ff00000 && ly == 0 ) res0 = 0.0;
 154 !    else res0 = fabs(x0) * fabs(y0);
 155 !
 156 !    ((float*)pz)[0] = ((float*)&res0)[0];
 157 !    ((float*)pz)[1] = ((float*)&res0)[1];
 158 !
 159 !    px += stridex;
 160 !    py += stridey;
 161 !    pz += stridez;
 162 !    continue;
 163 !  }
 164 !  if ( hx0 <  0x00100000 && hy0 <  0x00100000 )
 165 !  {
 166 !    lx = ((int*)px)[1];
 167 !    ly = ((int*)py)[1];
 168 !    ii = hx0 | hy0;
 169 !    ii |= lx;
 170 !    ii |= ly;
 171 !    if ( ii == 0 )
 172 !    {
 173 !      res0 = 1.0 / 0.0;
 174 !      ((float*)pz)[0] = ((float*)&res0)[0];
 175 !      ((float*)pz)[1] = ((float*)&res0)[1];
 176 !
 177 !      px += stridex;
 178 !      py += stridey;
 179 !      pz += stridez;
 180 !      continue;
 181 !    }
 182 !    x0 = fabs(x0);
 183 !    y0 = fabs(y0);
 184 !    if ( hx0 < 0x00080000 )
 185 !    {
 186 !      x0 = *(long long*)&x0;
 187 !    }
 188 !    else
 189 !    {
 190 !      ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
 191 !      x0 = vis_fand(x0, dtmp0);
 192 !      x0 = *(long long*)&x0;
 193 !      x0 += D2ON51;
 194 !    }
 195 !    x0 *= D2ONM52;
 196 !    if ( hy0 < 0x00080000 )
 197 !    {
 198 !      y0 = *(long long*)&y0;
 199 !    }
 200 !    else
 201 !    {
 202 !      ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
 203 !      y0 = vis_fand(y0, dtmp0);
 204 !      y0 = *(long long*)&y0;
 205 !      y0 += D2ON51;
 206 !    }
 207 !    y0 *= D2ONM52;
 208 !    *(long long*)&scl0 = 0x7fd0000000000000ULL;
 209 !  }
 210 !  else
 211 !  {
 212 !    x0 *= scl0;
 213 !    y0 *= scl0;
 214 !  }
 215 !
 216 !  x_hi0 = x0 + D2ON36;
 217 !  y_hi0 = y0 + D2ON36;
 218 !  x_hi0 -= D2ON36;
 219 !  y_hi0 -= D2ON36;
 220 !  x_lo0 = x0 - x_hi0;
 221 !  y_lo0 = y0 - y_hi0;
 222 !  res0_hi = x_hi0 * x_hi0;
 223 !  dtmp0 = y_hi0 * y_hi0;
 224 !  res0_hi += dtmp0;
 225 !  res0_lo = x0 + x_hi0;
 226 !  res0_lo *= x_lo0;
 227 !  dtmp1 = y0 + y_hi0;
 228 !  dtmp1 *= y_lo0;
 229 !  res0_lo += dtmp1;
 230 !
 231 !  dres = res0_hi + res0_lo;
 232 !  dexp0 = vis_fand(dres,DA1);
 233 !  iarr = ((int*)&dres)[0];
 234 !
 235 !  iarr >>= 11;
 236 !  iarr &= 0x1fc;
 237 !  dtmp0 = ((double*)((char*)dll1 + iarr))[0];
 238 !  dd = vis_fpsub32(dtmp0, dexp0);
 239 !
 240 !  dtmp0 = dd * dres;
 241 !  dtmp0 = DTWO - dtmp0;
 242 !  dd *= dtmp0;
 243 !  dtmp1 = dd * dres;
 244 !  dtmp1 = DTWO - dtmp1;
 245 !  dd *= dtmp1;
 246 !  dtmp2 = dd * dres;
 247 !  dtmp2 = DTWO - dtmp2;
 248 !  dres = dd * dtmp2;
 249 !
 250 !  res0 = vis_fand(dres,DA0);
 251 !
 252 !  dtmp0 = res0_hi * res0;
 253 !  dtmp0 = DONE - dtmp0;
 254 !  dtmp1 = res0_lo * res0;
 255 !  dtmp0 -= dtmp1;
 256 !  dtmp0 *= dres;
 257 !  res0 += dtmp0;
 258 !
 259 !  res0 = sqrt ( res0 );
 260 !
 261 !  res0 = scl0 * res0;
 262 !
 263 !  ((float*)pz)[0] = ((float*)&res0)[0];
 264 !  ((float*)pz)[1] = ((float*)&res0)[1];
 265 !
 266 !  px += stridex;
 267 !  py += stridey;
 268 !  pz += stridez;
 269 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 270 
 271         ENTRY(__vrhypot)
 272         save    %sp,-SA(MINFRAME)-tmps,%sp
 273         PIC_SETUP(l7)
 274         PIC_SET(l7,.CONST_TBL,l1)
 275         wr      %g0,0x82,%asi
 276 
 277 #ifdef __sparcv9
 278         ldx     [%fp+STACK_BIAS+176],stridez
 279 #else
 280         ld      [%fp+STACK_BIAS+92],stridez
 281 #endif
 282 
 283         sll     %i2,3,stridex
 284         sethi   %hi(0x7ff00000),_0x7ff00000
 285         st      %i0,[%fp+tmp_counter]
 286 
 287         sll     %i4,3,stridey
 288         sethi   %hi(0x00100000),_0x00100000
 289         stx     %i1,[%fp+tmp_px]
 290 
 291         sll     stridez,3,stridez
 292         sethi   %hi(0x7ffffc00),_0x7fffffff
 293         stx     %i3,[%fp+tmp_py]
 294 
 295         ldd     [TBL+TBL_SHIFT],D2ON36
 296         add     _0x7fffffff,1023,_0x7fffffff
 297 
 298         ldd     [TBL+TBL_SHIFT+8],DA0
 299 
 300         ldd     [TBL+TBL_SHIFT+16],DA1
 301 
 302         ldd     [TBL+TBL_SHIFT+24],DONE
 303 
 304         ldd     [TBL+TBL_SHIFT+32],DTWO
 305 
 306 .begin:
 307         ld      [%fp+tmp_counter],counter
 308         ldx     [%fp+tmp_px],%i4
 309         ldx     [%fp+tmp_py],%i3
 310         st      %g0,[%fp+tmp_counter]
 311 .begin1:
 312         cmp     counter,0
 313         ble,pn  %icc,.exit
 314 
 315         lda     [%i4]0x82,%o1           ! (7_0) hx0 = *(int*)px;
 316         add     %i4,stridex,%i1
 317 
 318         lda     [%i3]0x82,%o4           ! (7_0) hy0 = *(int*)py;
 319         add     %i3,stridey,%i0         ! py += stridey
 320 
 321         and     %o1,_0x7fffffff,%o7     ! (7_0) hx0 &= 0x7fffffff;
 322 
 323         cmp     %o7,_0x7ff00000         ! (7_0) hx0 ? 0x7ff00000
 324         bge,pn  %icc,.spec0             ! (7_0) if ( hx0 >= 0x7ff00000 )
 325         and     %o4,_0x7fffffff,%l7     ! (7_0) hy0 &= 0x7fffffff;
 326 
 327         cmp     %l7,_0x7ff00000         ! (7_0) hy0 ? 0x7ff00000
 328         bge,pn  %icc,.spec0             ! (7_0) if ( hy0 >= 0x7ff00000 )
 329         sub     %l7,%o7,%o1             ! (7_0) diff0 = hy0 - hx0;
 330 
 331         sra     %o1,31,%o3              ! (7_0) j0 = diff0 >> 31;
 332         cmp     %o7,_0x00100000         ! (7_0) hx0 ? 0x00100000
 333         bl,pn   %icc,.spec1             ! (7_0) if ( hx0 < 0x00100000 )
 334 
 335         and     %o1,%o3,%o1             ! (7_0) j0 &= diff0;
 336 .cont_spec0:
 337         sub     %l7,%o1,%o4             ! (7_0) j0 = hy0 - j0;
 338 
 339         and     %o4,%l0,%o4             ! (7_0) j0 &= 0x7ff00000;
 340 
 341         sub     %l0,%o4,%g1             ! (7_0) j0 = 0x7ff00000 - j0;
 342 
 343         sllx    %g1,32,%g1              ! (7_0) ll = (long long)j0 << 32;
 344 
 345         stx     %g1,[%fp+dtmp15]        ! (7_0) *(long long*)&scl0 = ll;
 346 
 347         stx     %g1,[%fp+dtmp0]         ! (7_1) *(long long*)&scl0 = ll;
 348 .cont_spec1:
 349         lda     [%i1]0x82,%o1           ! (0_0) hx0 = *(int*)px;
 350         mov     %i1,%i2
 351 
 352         lda     [%i0]0x82,%o4           ! (0_0) hy0 = *(int*)py;
 353 
 354         and     %o1,_0x7fffffff,%o7     ! (0_0) hx0 &= 0x7fffffff;
 355         mov     %i0,%o0
 356 
 357         cmp     %o7,_0x7ff00000         ! (0_0) hx0 ? 0x7ff00000
 358         bge,pn  %icc,.update0           ! (0_0) if ( hx0 >= 0x7ff00000 )
 359         and     %o4,_0x7fffffff,%l7     ! (0_0) hy0 &= 0x7fffffff;
 360 
 361         cmp     %l7,_0x7ff00000         ! (0_0) hy0 ? 0x7ff00000
 362         sub     %l7,%o7,%o1             ! (0_0) diff0 = hy0 - hx0;
 363         bge,pn  %icc,.update0           ! (0_0) if ( hy0 >= 0x7ff00000 )
 364         sra     %o1,31,%o3              ! (0_0) j0 = diff0 >> 31;
 365 
 366         cmp     %o7,_0x00100000         ! (0_0) hx0 ? 0x00100000
 367 
 368         and     %o1,%o3,%o1             ! (0_0) j0 &= diff0;
 369         bl,pn   %icc,.update1           ! (0_0) if ( hx0 < 0x00100000 )
 370         sub     %l7,%o1,%o4             ! (0_0) j0 = hy0 - j0;
 371 .cont0:
 372         and     %o4,%l0,%o4             ! (0_0) j0 &= 0x7ff00000;
 373 
 374         sub     %l0,%o4,%o4             ! (0_0) j0 = 0x7ff00000 - j0;
 375 .cont1:
 376         sllx    %o4,32,%o4              ! (0_0) ll = (long long)j0 << 32;
 377         stx     %o4,[%fp+dtmp1]         ! (0_0) *(long long*)&scl0 = ll;
 378 
 379         ldd     [%fp+dtmp15],%f62       ! (7_1) *(long long*)&scl0 = ll;
 380 
 381         lda     [%i4]%asi,%f10          ! (7_1) ((float*)&x0)[0] = ((float*)px)[0];
 382 
 383         lda     [%i4+4]%asi,%f11        ! (7_1) ((float*)&x0)[1] = ((float*)px)[1];
 384 
 385         lda     [%i3]%asi,%f12          ! (7_1) ((float*)&y0)[0] = ((float*)py)[0];
 386 
 387         add     %i1,stridex,%i4         ! px += stridex
 388         lda     [%i3+4]%asi,%f13        ! (7_1) ((float*)&y0)[1] = ((float*)py)[1];
 389 
 390         fmuld   %f10,%f62,%f10          ! (7_1) x0 *= scl0;
 391         add     %i4,stridex,%i1         ! px += stridex
 392 
 393         fmuld   %f12,%f62,%f60          ! (7_1) y0 *= scl0;
 394 
 395         lda     [%i4]0x82,%o1           ! (1_0) hx0 = *(int*)px;
 396 
 397         add     %i0,stridey,%i3         ! py += stridey
 398         faddd   %f10,D2ON36,%f46        ! (7_1) x_hi0 = x0 + D2ON36;
 399 
 400         lda     [%i3]0x82,%g1           ! (1_0) hy0 = *(int*)py;
 401         add     %i3,stridey,%i0         ! py += stridey
 402         faddd   %f60,D2ON36,%f50        ! (7_1) y_hi0 = y0 + D2ON36;
 403 
 404         and     %o1,_0x7fffffff,%o7     ! (1_0) hx0 &= 0x7fffffff;
 405 
 406         cmp     %o7,_0x7ff00000         ! (1_0) hx0 ? 0x7ff00000
 407         stx     %o4,[%fp+dtmp2]         ! (0_0) *(long long*)&scl0 = ll;
 408 
 409         and     %g1,_0x7fffffff,%l7     ! (1_0) hy0 &= 0x7fffffff;
 410         bge,pn  %icc,.update2           ! (1_0) if ( hx0 >= 0x7ff00000 )
 411         fsubd   %f46,D2ON36,%f20        ! (7_1) x_hi0 -= D2ON36;
 412 
 413         cmp     %l7,_0x7ff00000         ! (1_0) hy0 ? 0x7ff00000
 414         sub     %l7,%o7,%o1             ! (1_0) diff0 = hy0 - hx0;
 415         bge,pn  %icc,.update3           ! (1_0) if ( hy0 >= 0x7ff00000 )
 416         fsubd   %f50,D2ON36,%f54        ! (7_1) y_hi0 -= D2ON36;
 417 
 418         sra     %o1,31,%o3              ! (1_0) j0 = diff0 >> 31;
 419 
 420         and     %o1,%o3,%o1             ! (1_0) j0 &= diff0;
 421 
 422         fmuld   %f20,%f20,%f2           ! (7_1) res0_hi = x_hi0 * x_hi0;
 423         sub     %l7,%o1,%o4             ! (1_0) j0 = hy0 - j0;
 424         cmp     %o7,_0x00100000         ! (1_0) hx0 ? 0x00100000
 425         fsubd   %f10,%f20,%f0           ! (7_1) x_lo0 = x0 - x_hi0;
 426 
 427         fmuld   %f54,%f54,%f46          ! (7_1) dtmp0 = y_hi0 * y_hi0;
 428         and     %o4,%l0,%o4             ! (1_0) j0 &= 0x7ff00000;
 429         bl,pn   %icc,.update4           ! (1_0) if ( hx0 < 0x00100000 )
 430         faddd   %f10,%f20,%f62          ! (7_1) res0_lo = x0 + x_hi0;
 431 
 432         sub     %l0,%o4,%o4             ! (1_0) j0 = 0x7ff00000 - j0;
 433 .cont4:
 434         sllx    %o4,32,%o4              ! (1_0) ll = (long long)j0 << 32;
 435         stx     %o4,[%fp+dtmp3]         ! (1_0) *(long long*)&scl0 = ll;
 436         faddd   %f60,%f54,%f50          ! (7_1) dtmp1 = y0 + y_hi0;
 437 
 438         fsubd   %f60,%f54,%f12          ! (7_1) y_lo0 = y0 - y_hi0;
 439 
 440         fmuld   %f62,%f0,%f0            ! (7_1) res0_lo *= x_lo0;
 441         ldd     [%fp+dtmp1],%f62        ! (0_0) *(long long*)&scl0 = ll;
 442         faddd   %f2,%f46,%f44           ! (7_1) res0_hi += dtmp0;
 443 
 444         lda     [%i2]%asi,%f10          ! (0_0) ((float*)&x0)[0] = ((float*)px)[0];
 445 
 446         lda     [%i2+4]%asi,%f11        ! (0_0) ((float*)&x0)[1] = ((float*)px)[1];
 447 
 448         fmuld   %f50,%f12,%f26          ! (7_1) dtmp1 *= y_lo0;
 449         lda     [%o0]%asi,%f12          ! (0_0) ((float*)&y0)[0] = ((float*)py)[0];
 450 
 451         lda     [%o0+4]%asi,%f13        ! (0_0) ((float*)&y0)[1] = ((float*)py)[1];
 452 
 453         fmuld   %f10,%f62,%f10          ! (0_0) x0 *= scl0;
 454 
 455         fmuld   %f12,%f62,%f60          ! (0_0) y0 *= scl0;
 456         faddd   %f0,%f26,%f38           ! (7_1) res0_lo += dtmp1;
 457 
 458         lda     [%i1]0x82,%o1           ! (2_0) hx0 = *(int*)px;
 459         mov     %i1,%i2
 460 
 461         faddd   %f10,D2ON36,%f46        ! (0_0) x_hi0 = x0 + D2ON36;
 462 
 463         lda     [%i0]0x82,%g1           ! (2_0) hy0 = *(int*)py;
 464         mov     %i0,%o0
 465         faddd   %f60,D2ON36,%f12        ! (0_0) y_hi0 = y0 + D2ON36;
 466 
 467         faddd   %f44,%f38,%f14          ! (7_1) dres = res0_hi + res0_lo;
 468         and     %o1,_0x7fffffff,%o7     ! (2_0) hx0 &= 0x7fffffff;
 469 
 470         cmp     %o7,_0x7ff00000         ! (2_0) hx0 ? 0x7ff00000
 471         bge,pn  %icc,.update5           ! (2_0) if ( hx0 >= 0x7ff00000 )
 472         stx     %o4,[%fp+dtmp4]         ! (1_0) *(long long*)&scl0 = ll;
 473 
 474         and     %g1,_0x7fffffff,%l7     ! (2_0) hx0 &= 0x7fffffff;
 475         st      %f14,[%fp+ftmp0]        ! (7_1) iarr = ((int*)&dres)[0];
 476         fsubd   %f46,D2ON36,%f20        ! (0_0) x_hi0 -= D2ON36;
 477 
 478         sub     %l7,%o7,%o1             ! (2_0) diff0 = hy0 - hx0;
 479         cmp     %l7,_0x7ff00000         ! (2_0) hy0 ? 0x7ff00000
 480         bge,pn  %icc,.update6           ! (2_0) if ( hy0 >= 0x7ff00000 )
 481         fsubd   %f12,D2ON36,%f54        ! (0_0) y_hi0 -= D2ON36;
 482 
 483         sra     %o1,31,%o3              ! (2_0) j0 = diff0 >> 31;
 484 
 485         and     %o1,%o3,%o1             ! (2_0) j0 &= diff0;
 486 
 487         fmuld   %f20,%f20,%f2           ! (0_0) res0_hi = x_hi0 * x_hi0;
 488         cmp     %o7,_0x00100000         ! (2_0) hx0 ? 0x00100000
 489         sub     %l7,%o1,%o4             ! (2_0) j0 = hy0 - j0;
 490         fsubd   %f10,%f20,%f0           ! (0_0) x_lo0 = x0 - x_hi0;
 491 
 492         fmuld   %f54,%f54,%f46          ! (0_0) dtmp0 = y_hi0 * y_hi0;
 493         and     %o4,%l0,%o4             ! (2_0) j0 &= 0x7ff00000;
 494         bl,pn   %icc,.update7           ! (2_0) if ( hx0 < 0x00100000 )
 495         faddd   %f10,%f20,%f62          ! (0_0) res0_lo = x0 + x_hi0;
 496 .cont7:
 497         sub     %l0,%o4,%g1             ! (2_0) j0 = 0x7ff00000 - j0;
 498 
 499         sllx    %g1,32,%g1              ! (2_0) ll = (long long)j0 << 32;
 500 .cont8:
 501         stx     %g1,[%fp+dtmp5]         ! (2_0) *(long long*)&scl0 = ll;
 502         faddd   %f60,%f54,%f50          ! (0_0) dtmp1 = y0 + y_hi0;
 503 
 504         fsubd   %f60,%f54,%f12          ! (0_0) y_lo0 = y0 - y_hi0;
 505 
 506         fmuld   %f62,%f0,%f0            ! (0_0) res0_lo *= x_lo0;
 507         ldd     [%fp+dtmp3],%f62        ! (1_0) *(long long*)&scl0 = ll;
 508         faddd   %f2,%f46,%f32           ! (0_0) res0_hi += dtmp0;
 509 
 510         lda     [%i4]%asi,%f10          ! (1_0) ((float*)&x0)[0] = ((float*)px)[0];
 511 
 512         lda     [%i4+4]%asi,%f11        ! (1_0) ((float*)&x0)[1] = ((float*)px)[1];
 513 
 514         fmuld   %f50,%f12,%f28          ! (0_0) dtmp1 *= y_lo0;
 515         lda     [%i3]%asi,%f12          ! (1_0) ((float*)&y0)[0] = ((float*)py)[0];
 516 
 517         add     %i1,stridex,%i4         ! px += stridex
 518         lda     [%i3+4]%asi,%f13        ! (1_0) ((float*)&y0)[1] = ((float*)py)[1];
 519 
 520         ld      [%fp+ftmp0],%o2         ! (7_1) iarr = ((int*)&dres)[0];
 521         add     %i4,stridex,%i1         ! px += stridex
 522         fand    %f14,DA1,%f2            ! (7_1) dexp0 = vis_fand(dres,DA1);
 523 
 524         fmuld   %f10,%f62,%f10          ! (1_0) x0 *= scl0;
 525 
 526         fmuld   %f12,%f62,%f60          ! (1_0) y0 *= scl0;
 527         sra     %o2,11,%i3              ! (7_1) iarr >>= 11;
 528         faddd   %f0,%f28,%f36           ! (0_0) res0_lo += dtmp1;
 529 
 530         and     %i3,0x1fc,%i3           ! (7_1) iarr &= 0x1fc;
 531 
 532         add     %i3,TBL,%o4             ! (7_1) (char*)dll1 + iarr
 533         lda     [%i4]0x82,%o1           ! (3_0) hx0 = *(int*)px;
 534 
 535         add     %i0,stridey,%i3         ! py += stridey
 536         ld      [%o4],%f26              ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
 537         faddd   %f10,D2ON36,%f46        ! (1_0) x_hi0 = x0 + D2ON36;
 538 
 539         lda     [%i3]0x82,%o4           ! (3_0) hy0 = *(int*)py;
 540         add     %i3,stridey,%i0         ! py += stridey
 541         faddd   %f60,D2ON36,%f12        ! (1_0) y_hi0 = y0 + D2ON36;
 542 
 543         faddd   %f32,%f36,%f22          ! (0_0) dres = res0_hi + res0_lo;
 544         and     %o1,_0x7fffffff,%o7     ! (3_0) hx0 &= 0x7fffffff;
 545 
 546         cmp     %o7,_0x7ff00000         ! (3_0) hx0 ? 0x7ff00000
 547         stx     %g1,[%fp+dtmp6]         ! (2_0) *(long long*)&scl0 = ll;
 548         bge,pn  %icc,.update9           ! (3_0) if ( hx0 >= 0x7ff00000 )
 549         fpsub32 %f26,%f2,%f26           ! (7_1) dd = vis_fpsub32(dtmp0, dexp0);
 550 
 551         and     %o4,_0x7fffffff,%l7     ! (3_0) hy0 &= 0x7fffffff;
 552         st      %f22,[%fp+ftmp0]        ! (0_0) iarr = ((int*)&dres)[0];
 553         fsubd   %f46,D2ON36,%f20        ! (1_0) x_hi0 -= D2ON36;
 554 
 555         sub     %l7,%o7,%o1             ! (3_0) diff0 = hy0 - hx0;
 556         cmp     %l7,_0x7ff00000         ! (3_0) hy0 ? 0x7ff00000
 557         bge,pn  %icc,.update10          ! (3_0) if ( hy0 >= 0x7ff00000 )
 558         fsubd   %f12,D2ON36,%f54        ! (1_0) y_hi0 -= D2ON36;
 559 
 560         fmuld   %f26,%f14,%f50          ! (7_1) dtmp0 = dd * dres;
 561         sra     %o1,31,%o3              ! (3_0) j0 = diff0 >> 31;
 562 
 563         and     %o1,%o3,%o1             ! (3_0) j0 &= diff0;
 564 
 565         fmuld   %f20,%f20,%f2           ! (1_0) res0_hi = x_hi0 * x_hi0;
 566         cmp     %o7,_0x00100000         ! (3_0) hx0 ? 0x00100000
 567         sub     %l7,%o1,%o4             ! (3_0) j0 = hy0 - j0;
 568         fsubd   %f10,%f20,%f0           ! (1_0) x_lo0 = x0 - x_hi0;
 569 
 570         fmuld   %f54,%f54,%f46          ! (1_0) dtmp0 = y_hi0 * y_hi0;
 571         and     %o4,%l0,%o4             ! (3_0) j0 &= 0x7ff00000;
 572         bl,pn   %icc,.update11          ! (3_0) if ( hx0 < 0x00100000 )
 573         faddd   %f10,%f20,%f62          ! (1_0) res0_lo = x0 + x_hi0;
 574 .cont11:
 575         sub     %l0,%o4,%g1             ! (3_0) j0 = 0x7ff00000 - j0;
 576         fsubd   DTWO,%f50,%f20          ! (7_1) dtmp0 = DTWO - dtmp0;
 577 .cont12:
 578         sllx    %g1,32,%g1              ! (3_0) ll = (long long)j0 << 32;
 579         stx     %g1,[%fp+dtmp7]         ! (3_0) *(long long*)&scl0 = ll;
 580         faddd   %f60,%f54,%f50          ! (1_0) dtmp1 = y0 + y_hi0;
 581 
 582         fsubd   %f60,%f54,%f12          ! (1_0) y_lo0 = y0 - y_hi0
 583 
 584         fmuld   %f62,%f0,%f0            ! (1_0) res0_lo *= x_lo0;
 585         ldd     [%fp+dtmp5],%f62        ! (2_0) *(long long*)&scl0 = ll;
 586         faddd   %f2,%f46,%f42           ! (1_0) res0_hi += dtmp0;
 587 
 588         lda     [%i2]%asi,%f10          ! (2_0) ((float*)&x0)[0] = ((float*)px)[0];
 589         fmuld   %f26,%f20,%f54          ! (7_1) dd *= dtmp0;
 590 
 591         lda     [%i2+4]%asi,%f11        ! (2_0) ((float*)&x0)[1] = ((float*)px)[1];
 592 
 593         fmuld   %f50,%f12,%f26          ! (1_0) dtmp1 *= y_lo0;
 594         lda     [%o0]%asi,%f12          ! (2_0) ((float*)&y0)[0] = ((float*)py)[0];
 595 
 596         lda     [%o0+4]%asi,%f13        ! (2_0) ((float*)&y0)[1] = ((float*)py)[1];
 597 
 598         fmuld   %f54,%f14,%f50          ! (7_1) dtmp1 = dd * dres;
 599         ld      [%fp+ftmp0],%o2         ! (0_0) iarr = ((int*)&dres)[0];
 600         fand    %f22,DA1,%f2            ! (0_0) dexp0 = vis_fand(dres,DA1);
 601 
 602         fmuld   %f10,%f62,%f10          ! (2_0) x0 *= scl0;
 603 
 604         fmuld   %f12,%f62,%f60          ! (2_0) y0 *= scl0;
 605         sra     %o2,11,%o4              ! (0_0) iarr >>= 11;
 606         faddd   %f0,%f26,%f34           ! (1_0) res0_lo += dtmp1;
 607 
 608         and     %o4,0x1fc,%o4           ! (0_0) iarr &= 0x1fc;
 609 
 610         add     %o4,TBL,%o4             ! (0_0) (char*)dll1 + iarr
 611         mov     %i1,%i2
 612         lda     [%i1]0x82,%o1           ! (4_0) hx0 = *(int*)px;
 613         fsubd   DTWO,%f50,%f20          ! (7_1) dtmp1 = DTWO - dtmp1;
 614 
 615         ld      [%o4],%f28              ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
 616         faddd   %f10,D2ON36,%f46        ! (2_0) x_hi0 = x0 + D2ON36;
 617 
 618         lda     [%i0]0x82,%o4           ! (4_0) hy0 = *(int*)py;
 619         mov     %i0,%o0
 620         faddd   %f60,D2ON36,%f50        ! (2_0) y_hi0 = y0 + D2ON36;
 621 
 622         and     %o1,_0x7fffffff,%o7     ! (4_0) hx0 &= 0x7fffffff;
 623         faddd   %f42,%f34,%f18          ! (1_0) dres = res0_hi + res0_lo;
 624 
 625         fmuld   %f54,%f20,%f16          ! (7_1) dd *= dtmp1;
 626         cmp     %o7,_0x7ff00000         ! (4_0) hx0 ? 0x7ff00000
 627         stx     %g1,[%fp+dtmp8]         ! (3_0) *(long long*)&scl0 = ll;
 628         fpsub32 %f28,%f2,%f28           ! (0_0) dd = vis_fpsub32(dtmp0, dexp0);
 629 
 630         and     %o4,_0x7fffffff,%l7     ! (4_0) hy0 &= 0x7fffffff;
 631         bge,pn  %icc,.update13          ! (4_0) if ( hx0 >= 0x7ff00000 )
 632         st      %f18,[%fp+ftmp0]        ! (1_0) iarr = ((int*)&dres)[0];
 633         fsubd   %f46,D2ON36,%f20        ! (2_0) x_hi0 -= D2ON36;
 634 
 635         sub     %l7,%o7,%o1             ! (4_0) diff0 = hy0 - hx0;
 636         cmp     %l7,_0x7ff00000         ! (4_0) hy0 ? 0x7ff00000
 637         bge,pn  %icc,.update14          ! (4_0) if ( hy0 >= 0x7ff00000 )
 638         fsubd   %f50,D2ON36,%f54        ! (2_0) y_hi0 -= D2ON36;
 639 
 640         fmuld   %f28,%f22,%f50          ! (0_0) dtmp0 = dd * dres;
 641         sra     %o1,31,%o3              ! (4_0) j0 = diff0 >> 31;
 642 
 643         and     %o1,%o3,%o1             ! (4_0) j0 &= diff0;
 644 
 645         fmuld   %f20,%f20,%f2           ! (2_0) res0_hi = x_hi0 * x_hi0;
 646         sub     %l7,%o1,%o4             ! (4_0) j0 = hy0 - j0;
 647         cmp     %o7,_0x00100000         ! (4_0) hx0 ? 0x00100000
 648         fsubd   %f10,%f20,%f0           ! (2_0) x_lo0 = x0 - x_hi0;
 649 
 650         fmuld   %f54,%f54,%f46          ! (2_0) dtmp0 = y_hi0 * y_hi0;
 651         and     %o4,%l0,%o4             ! (4_0) j0 &= 0x7ff00000;
 652         bl,pn   %icc,.update15          ! (4_0) if ( hx0 < 0x00100000 )
 653         faddd   %f10,%f20,%f62          ! (2_0) res0_lo = x0 + x_hi0;
 654 .cont15:
 655         sub     %l0,%o4,%g1             ! (4_0) j0 = 0x7ff00000 - j0;
 656         fsubd   DTWO,%f50,%f20          ! (0_0) dtmp0 = DTWO - dtmp0;
 657 .cont16:
 658         fmuld   %f16,%f14,%f14          ! (7_1) dtmp2 = dd * dres;
 659         sllx    %g1,32,%g1              ! (4_0) ll = (long long)j0 << 32;
 660         stx     %g1,[%fp+dtmp9]         ! (4_0) *(long long*)&scl0 = ll;
 661         faddd   %f60,%f54,%f50          ! (2_0) dtmp1 = y0 + y_hi0;
 662 
 663         fsubd   %f60,%f54,%f12          ! (2_0) y_lo0 = y0 - y_hi0;
 664 
 665         fmuld   %f62,%f0,%f0            ! (2_0) res0_lo *= x_lo0;
 666         ldd     [%fp+dtmp7],%f62        ! (3_0) *(long long*)&scl0 = ll;
 667         faddd   %f2,%f46,%f30           ! (2_0) res0_hi += dtmp0;
 668 
 669         lda     [%i4]%asi,%f10          ! (3_0) ((float*)&x0)[0] = ((float*)px)[0];
 670         fmuld   %f28,%f20,%f54          ! (0_0) dd *= dtmp0;
 671 
 672         lda     [%i4+4]%asi,%f11        ! (3_0) ((float*)&x0)[1] = ((float*)px)[1];
 673 
 674         fmuld   %f50,%f12,%f28          ! (2_0) dtmp1 *= y_lo0;
 675         lda     [%i3]%asi,%f12          ! (3_0) ((float*)&y0)[0] = ((float*)py)[0];
 676         fsubd   DTWO,%f14,%f20          ! (7_1) dtmp2 = DTWO - dtmp2;
 677 
 678         lda     [%i3+4]%asi,%f13        ! (3_0) ((float*)&y0)[1] = ((float*)py)[1];
 679         add     %i1,stridex,%i4         ! px += stridex
 680 
 681         fmuld   %f54,%f22,%f50          ! (0_0) dtmp1 = dd * dres;
 682         ld      [%fp+ftmp0],%o2         ! (1_0) iarr = ((int*)&dres)[0];
 683         add     %i4,stridex,%i1         ! px += stridex
 684         fand    %f18,DA1,%f2            ! (1_0) dexp0 = vis_fand(dres,DA1);
 685 
 686         fmuld   %f10,%f62,%f10          ! (3_0) x0 *= scl0;
 687 
 688         fmuld   %f12,%f62,%f60          ! (3_0) y0 *= scl0;
 689         sra     %o2,11,%i3              ! (1_0) iarr >>= 11;
 690         faddd   %f0,%f28,%f40           ! (2_0) res0_lo += dtmp1;
 691 
 692         and     %i3,0x1fc,%i3           ! (1_0) iarr &= 0x1fc;
 693         fmuld   %f16,%f20,%f28          ! (7_1) dres = dd * dtmp2;
 694 
 695         add     %i3,TBL,%o4             ! (1_0) (char*)dll1 + iarr
 696         lda     [%i4]0x82,%o1           ! (5_0) hx0 = *(int*)px;
 697         fsubd   DTWO,%f50,%f20          ! (0_0) dtmp1 = DTWO - dtmp1;
 698 
 699         add     %i0,stridey,%i3         ! py += stridey
 700         ld      [%o4],%f26              ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
 701         faddd   %f10,D2ON36,%f46        ! (3_0) x_hi0 = x0 + D2ON36;
 702 
 703         lda     [%i3]0x82,%o4           ! (5_0) hy0 = *(int*)py;
 704         add     %i3,stridey,%i0         ! py += stridey
 705         faddd   %f60,D2ON36,%f50        ! (3_0) y_hi0 = y0 + D2ON36;
 706 
 707         and     %o1,_0x7fffffff,%o7     ! (5_0) hx0 &= 0x7fffffff;
 708         faddd   %f30,%f40,%f14          ! (2_0) dres = res0_hi + res0_lo;
 709 
 710         fmuld   %f54,%f20,%f24          ! (0_0) dd *= dtmp1;
 711         cmp     %o7,_0x7ff00000         ! (5_0) hx0 ? 0x7ff00000
 712         stx     %g1,[%fp+dtmp10]        ! (4_0) *(long long*)&scl0 = ll;
 713         fpsub32 %f26,%f2,%f26           ! (1_0) dd = vis_fpsub32(dtmp0, dexp0);
 714 
 715         and     %o4,_0x7fffffff,%l7     ! (5_0) hy0 &= 0x7fffffff;
 716         st      %f14,[%fp+ftmp0]        ! (2_0) iarr = ((int*)&dres)[0];
 717         bge,pn  %icc,.update17          ! (5_0) if ( hx0 >= 0x7ff00000 )
 718         fsubd   %f46,D2ON36,%f20        ! (3_0) x_hi0 -= D2ON36;
 719 
 720         sub     %l7,%o7,%o1             ! (5_0) diff0 = hy0 - hx0;
 721         cmp     %l7,_0x7ff00000         ! (5_0) hy0 ? 0x7ff00000
 722         bge,pn  %icc,.update18          ! (5_0) if ( hy0 >= 0x7ff00000 )
 723         fsubd   %f50,D2ON36,%f54        ! (3_0) y_hi0 -= D2ON36;
 724 
 725         fmuld   %f26,%f18,%f50          ! (1_0) dtmp0 = dd * dres;
 726         sra     %o1,31,%o3              ! (5_0) j0 = diff0 >> 31;
 727 
 728         and     %o1,%o3,%o1             ! (5_0) j0 &= diff0;
 729         fand    %f28,DA0,%f48           ! (7_1) res0 = vis_fand(dres,DA0);
 730 
 731         fmuld   %f20,%f20,%f2           ! (3_0) res0_hi = x_hi0 * x_hi0;
 732         sub     %l7,%o1,%o4             ! (5_0) j0 = hy0 - j0;
 733         cmp     %o7,_0x00100000         ! (5_0) hx0 ? 0x00100000
 734         fsubd   %f10,%f20,%f0           ! (3_0) x_lo0 = x0 - x_hi0;
 735 
 736         fmuld   %f54,%f54,%f46          ! (3_0) dtmp0 = y_hi0 * y_hi0;
 737         and     %o4,%l0,%o4             ! (5_0) j0 &= 0x7ff00000;
 738         bl,pn   %icc,.update19          ! (5_0) if ( hx0 < 0x00100000 )
 739         faddd   %f10,%f20,%f62          ! (3_0) res0_lo = x0 + x_hi0;
 740 .cont19a:
 741         fmuld   %f44,%f48,%f10          ! (7_1) dtmp0 = res0_hi * res0;
 742         sub     %l0,%o4,%g1             ! (5_0) j0 = 0x7ff00000 - j0;
 743         fsubd   DTWO,%f50,%f20          ! (1_0) dtmp0 = DTWO - dtmp0;
 744 .cont19b:
 745         fmuld   %f24,%f22,%f22          ! (0_0) dtmp2 = dd * dres;
 746         sllx    %g1,32,%g1              ! (5_0) ll = (long long)j0 << 32;
 747         stx     %g1,[%fp+dtmp11]        ! (5_0) *(long long*)&scl0 = ll;
 748         faddd   %f60,%f54,%f50          ! (3_0) dtmp1 = y0 + y_hi0;
 749 
 750         fmuld   %f38,%f48,%f38          ! (7_1) dtmp1 = res0_lo * res0;
 751         fsubd   %f60,%f54,%f12          ! (3_0) y_lo0 = y0 - y_hi0;
 752 .cont20:
 753         fmuld   %f62,%f0,%f0            ! (3_0) res0_lo *= x_lo0;
 754         ldd     [%fp+dtmp9],%f62        ! (4_0) *(long long*)&scl0 = ll;
 755         faddd   %f2,%f46,%f44           ! (3_0) res0_hi += dtmp0;
 756 
 757         fsubd   DONE,%f10,%f60          ! (7_1) dtmp0 = DONE - dtmp0;
 758         lda     [%i2]%asi,%f10          ! (4_0) ((float*)&x0)[0] = ((float*)px)[0];
 759         fmuld   %f26,%f20,%f54          ! (1_0) dd *= dtmp0;
 760 
 761         lda     [%i2+4]%asi,%f11        ! (4_0) ((float*)&x0)[1] = ((float*)px)[1];
 762 
 763         fmuld   %f50,%f12,%f26          ! (3_0) dtmp1 *= y_lo0;
 764         lda     [%o0]%asi,%f12          ! (4_0) ((float*)&y0)[0] = ((float*)py)[0];
 765         fsubd   DTWO,%f22,%f20          ! (0_0) dtmp2 = DTWO - dtmp2;
 766 
 767         lda     [%o0+4]%asi,%f13        ! (4_0) ((float*)&y0)[1] = ((float*)py)[1];
 768 
 769         fmuld   %f54,%f18,%f50          ! (1_0) dtmp1 = dd * dres;
 770         ld      [%fp+ftmp0],%o2         ! (2_0) iarr = ((int*)&dres)[0];
 771         fand    %f14,DA1,%f2            ! (2_0) dexp0 = vis_fand(dres,DA1);
 772 
 773         fmuld   %f10,%f62,%f10          ! (4_0) x0 *= scl0;
 774         fsubd   %f60,%f38,%f46          ! (7_1) dtmp0 -= dtmp1;
 775 
 776         fmuld   %f12,%f62,%f60          ! (4_0) y0 *= scl0;
 777         sra     %o2,11,%o4              ! (2_0) iarr >>= 11;
 778         faddd   %f0,%f26,%f38           ! (3_0) res0_lo += dtmp1;
 779 
 780         and     %o4,0x1fc,%o4           ! (2_0) iarr &= 0x1fc;
 781         fmuld   %f24,%f20,%f26          ! (0_0) dres = dd * dtmp2;
 782 
 783         add     %o4,TBL,%o4             ! (2_0) (char*)dll1 + iarr
 784         mov     %i1,%i2
 785         lda     [%i1]0x82,%o1           ! (6_0) hx0 = *(int*)px;
 786         fsubd   DTWO,%f50,%f52          ! (1_0) dtmp1 = DTWO - dtmp1;
 787 
 788         fmuld   %f46,%f28,%f28          ! (7_1) dtmp0 *= dres;
 789         ld      [%o4],%f20              ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
 790         faddd   %f10,D2ON36,%f46        ! (4_0) x_hi0 = x0 + D2ON36;
 791 
 792         lda     [%i0]0x82,%o4           ! (6_0) hy0 = *(int*)py;
 793         mov     %i0,%o0
 794         faddd   %f60,D2ON36,%f50        ! (4_0) y_hi0 = y0 + D2ON36;
 795 
 796         and     %o1,_0x7fffffff,%o7     ! (6_0) hx0 &= 0x7fffffff;
 797         faddd   %f44,%f38,%f22          ! (3_0) dres = res0_hi + res0_lo;
 798 
 799         fmuld   %f54,%f52,%f16          ! (1_0) dd *= dtmp1;
 800         cmp     %o7,_0x7ff00000         ! (6_0) hx0 ? 0x7ff00000
 801         stx     %g1,[%fp+dtmp12]        ! (5_0) *(long long*)&scl0 = ll;
 802         fpsub32 %f20,%f2,%f52           ! (2_0) dd = vis_fpsub32(dtmp0, dexp0);
 803 
 804         and     %o4,_0x7fffffff,%l7     ! (6_0) hy0 &= 0x7fffffff;
 805         st      %f22,[%fp+ftmp0]        ! (3_0) iarr = ((int*)&dres)[0];
 806         bge,pn  %icc,.update21          ! (6_0) if ( hx0 >= 0x7ff00000 )
 807         fsubd   %f46,D2ON36,%f46        ! (4_0) x_hi0 -= D2ON36;
 808 
 809         sub     %l7,%o7,%o1             ! (6_0) diff0 = hy0 - hx0;
 810         cmp     %l7,_0x7ff00000         ! (6_0) hy0 ? 0x7ff00000
 811         bge,pn  %icc,.update22          ! (6_0) if ( hy0 >= 0x7ff00000 )
 812         fsubd   %f50,D2ON36,%f54        ! (4_0) y_hi0 -= D2ON36;
 813 
 814         fmuld   %f52,%f14,%f50          ! (2_0) dtmp0 = dd * dres;
 815         sra     %o1,31,%o3              ! (6_0) j0 = diff0 >> 31;
 816         faddd   %f48,%f28,%f48          ! (7_1) res0 += dtmp0;
 817 
 818         and     %o1,%o3,%o1             ! (6_0) j0 &= diff0;
 819         fand    %f26,DA0,%f28           ! (0_0) res0 = vis_fand(dres,DA0);
 820 
 821         fmuld   %f46,%f46,%f0           ! (4_0) res0_hi = x_hi0 * x_hi0;
 822         sub     %l7,%o1,%o4             ! (6_0) j0 = hy0 - j0;
 823         cmp     %o7,_0x00100000         ! (6_0) hx0 ? 0x00100000
 824         fsubd   %f10,%f46,%f2           ! (4_0) x_lo0 = x0 - x_hi0;
 825 
 826         fmuld   %f54,%f54,%f20          ! (4_0) dtmp0 = y_hi0 * y_hi0;
 827         and     %o4,%l0,%o4             ! (6_0) j0 &= 0x7ff00000;
 828         bl,pn   %icc,.update23          ! (6_0) if ( hx0 < 0x00100000 )
 829         faddd   %f10,%f46,%f62          ! (4_0) res0_lo = x0 + x_hi0;
 830 .cont23a:
 831         fmuld   %f16,%f18,%f18          ! (1_0) dtmp2 = dd * dres;
 832         sub     %l0,%o4,%g1             ! (6_0) j0 = 0x7ff00000 - j0;
 833         fsubd   DTWO,%f50,%f10          ! (2_0) dtmp0 = DTWO - dtmp0;
 834 .cont23b:
 835         fmuld   %f32,%f28,%f50          ! (0_0) dtmp0 = res0_hi * res0;
 836         sllx    %g1,32,%g1              ! (6_0) ll = (long long)j0 << 32;
 837         stx     %g1,[%fp+dtmp13]        ! (6_0) *(long long*)&scl0 = ll;
 838         faddd   %f60,%f54,%f46          ! (4_0) dtmp1 = y0 + y_hi0;
 839 
 840         fmuld   %f36,%f28,%f36          ! (0_0) dtmp1 = res0_lo * res0;
 841         fsubd   %f60,%f54,%f60          ! (4_0) y_lo0 = y0 - y_hi0;
 842 .cont24:
 843         fmuld   %f62,%f2,%f2            ! (4_0) res0_lo *= x_lo0;
 844         ldd     [%fp+dtmp11],%f62       ! (5_0) *(long long*)&scl0 = ll;
 845         faddd   %f0,%f20,%f32           ! (4_0) res0_hi += dtmp0;
 846 
 847         lda     [%i4]%asi,%f0           ! (5_0) ((float*)&x0)[0] = ((float*)px)[0];
 848         fmuld   %f52,%f10,%f10          ! (2_0) dd *= dtmp0;
 849 
 850         lda     [%i4+4]%asi,%f1         ! (5_0) ((float*)&x0)[1] = ((float*)px)[1];
 851         fsubd   DONE,%f50,%f52          ! (0_0) dtmp0 = DONE - dtmp0;
 852 
 853         fmuld   %f46,%f60,%f46          ! (4_0) dtmp1 *= y_lo0;
 854         lda     [%i3]%asi,%f12          ! (5_0) ((float*)&y0)[0] = ((float*)py)[0];
 855         fsubd   DTWO,%f18,%f18          ! (1_0) dtmp2 = DTWO - dtmp2;
 856 
 857         add     %i1,stridex,%i4         ! px += stridex
 858         lda     [%i3+4]%asi,%f13        ! (5_0) ((float*)&y0)[1] = ((float*)py)[1];
 859 
 860         fmuld   %f10,%f14,%f50          ! (2_0) dtmp1 = dd * dres;
 861         add     %i4,stridex,%i1         ! px += stridex
 862         ld      [%fp+ftmp0],%o2         ! (3_0) iarr = ((int*)&dres)[0];
 863         fand    %f22,DA1,%f54           ! (3_0) dexp0 = vis_fand(dres,DA1);
 864 
 865         fmuld   %f0,%f62,%f60           ! (5_0) x0 *= scl0;
 866         fsubd   %f52,%f36,%f20          ! (0_0) dtmp0 -= dtmp1;
 867 
 868         fmuld   %f12,%f62,%f52          ! (5_0) y0 *= scl0;
 869         sra     %o2,11,%i3              ! (3_0) iarr >>= 11;
 870         faddd   %f2,%f46,%f36           ! (4_0) res0_lo += dtmp1;
 871 
 872         and     %i3,0x1fc,%i3           ! (3_0) iarr &= 0x1fc;
 873         fmuld   %f16,%f18,%f16          ! (1_0) dres = dd * dtmp2;
 874 
 875         fsqrtd  %f48,%f18               ! (7_1) res0 = sqrt ( res0 );
 876         add     %i3,TBL,%o4             ! (3_0) (char*)dll1 + iarr
 877         lda     [%i4]0x82,%o1           ! (7_0) hx0 = *(int*)px;
 878         fsubd   DTWO,%f50,%f46          ! (2_0) dtmp1 = DTWO - dtmp1;
 879 
 880         fmuld   %f20,%f26,%f48          ! (0_0) dtmp0 *= dres;
 881         add     %i0,stridey,%i3         ! py += stridey
 882         ld      [%o4],%f20              ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
 883         faddd   %f60,D2ON36,%f50        ! (5_0) x_hi0 = x0 + D2ON36;
 884 
 885         lda     [%i3]0x82,%o4           ! (7_0) hy0 = *(int*)py;
 886         add     %i3,stridey,%i0         ! py += stridey
 887         faddd   %f52,D2ON36,%f12        ! (5_0) y_hi0 = y0 + D2ON36;
 888 
 889         and     %o1,_0x7fffffff,%o7     ! (7_0) hx0 &= 0x7fffffff;
 890         faddd   %f32,%f36,%f24          ! (4_0) dres = res0_hi + res0_lo;
 891 
 892         fmuld   %f10,%f46,%f26          ! (2_0) dd *= dtmp1;
 893         cmp     %o7,_0x7ff00000         ! (7_0) hx0 ? 0x7ff00000
 894         stx     %g1,[%fp+dtmp14]        ! (6_0) *(long long*)&scl0 = ll;
 895         fpsub32 %f20,%f54,%f10          ! (3_0) dd = vis_fpsub32(dtmp0, dexp0);
 896 
 897         and     %o4,_0x7fffffff,%l7     ! (7_0) hy0 &= 0x7fffffff;
 898         st      %f24,[%fp+ftmp0]        ! (4_0) iarr = ((int*)&dres)[0];
 899         bge,pn  %icc,.update25          ! (7_0) if ( hx0 >= 0x7ff00000 )
 900         fsubd   %f50,D2ON36,%f20        ! (5_0) x_hi0 -= D2ON36;
 901 
 902         sub     %l7,%o7,%o1             ! (7_0) diff0 = hy0 - hx0;
 903         cmp     %l7,_0x7ff00000         ! (7_0) hy0 ? 0x7ff00000
 904         bge,pn  %icc,.update26          ! (7_0) if ( hy0 >= 0x7ff00000 )
 905         fsubd   %f12,D2ON36,%f54        ! (5_0) y_hi0 -= D2ON36;
 906 
 907         fmuld   %f10,%f22,%f50          ! (3_0) dtmp0 = dd * dres;
 908         sra     %o1,31,%o3              ! (7_0) j0 = diff0 >> 31;
 909         faddd   %f28,%f48,%f48          ! (0_0) res0 += dtmp0;
 910 
 911         and     %o1,%o3,%o1             ! (7_0) j0 &= diff0;
 912         fand    %f16,DA0,%f28           ! (1_0) res0 = vis_fand(dres,DA0);
 913 
 914         fmuld   %f20,%f20,%f0           ! (5_0) res0_hi = x_hi0 * x_hi0;
 915         sub     %l7,%o1,%o4             ! (7_0) j0 = hy0 - j0;
 916         cmp     %o7,_0x00100000         ! (7_0) hx0 ? 0x00100000
 917         fsubd   %f60,%f20,%f2           ! (5_0) x_lo0 = x0 - x_hi0;
 918 
 919         fmuld   %f54,%f54,%f46          ! (5_0) dtmp0 = y_hi0 * y_hi0;
 920         and     %o4,%l0,%o4             ! (7_0) j0 &= 0x7ff00000;
 921         bl,pn   %icc,.update27          ! (7_0) if ( hx0 < 0x00100000 )
 922         faddd   %f60,%f20,%f62          ! (5_0) res0_lo = x0 + x_hi0;
 923 .cont27a:
 924         fmuld   %f26,%f14,%f14          ! (2_0) dtmp2 = dd * dres;
 925         sub     %l0,%o4,%g1             ! (7_0) j0 = 0x7ff00000 - j0;
 926         fsubd   DTWO,%f50,%f20          ! (3_0) dtmp0 = DTWO - dtmp0;
 927 .cont27b:
 928         fmuld   %f42,%f28,%f60          ! (1_0) dtmp0 = res0_hi * res0;
 929         sllx    %g1,32,%g1              ! (7_0) ll = (long long)j0 << 32;
 930         stx     %g1,[%fp+dtmp15]        ! (7_0) *(long long*)&scl0 = ll;
 931         faddd   %f52,%f54,%f50          ! (5_0) dtmp1 = y0 + y_hi0;
 932 
 933         fmuld   %f34,%f28,%f34          ! (1_0) dtmp1 = res0_lo * res0;
 934         fsubd   %f52,%f54,%f54          ! (5_0) y_lo0 = y0 - y_hi0;
 935 .cont28:
 936         fmuld   %f62,%f2,%f2            ! (5_0) res0_lo *= x_lo0;
 937         ldd     [%fp+dtmp13],%f62       ! (6_0) *(long long*)&scl0 = ll;
 938         faddd   %f0,%f46,%f42           ! (5_0) res0_hi += dtmp0;
 939 
 940         fmuld   %f10,%f20,%f52          ! (3_0) dd *= dtmp0;
 941         lda     [%i2]%asi,%f10          ! (6_0) ((float*)&x0)[0] = ((float*)px)[0];
 942 
 943         lda     [%i2+4]%asi,%f11        ! (6_0) ((float*)&x0)[1] = ((float*)px)[1];
 944         fsubd   DONE,%f60,%f60          ! (1_0) dtmp0 = DONE - dtmp0;
 945 
 946         fmuld   %f50,%f54,%f46          ! (5_0) dtmp1 *= y_lo0;
 947         lda     [%o0]%asi,%f12          ! (6_0) ((float*)&y0)[0] = ((float*)py)[0];
 948         fsubd   DTWO,%f14,%f14          ! (2_0) dtmp2 = DTWO - dtmp2;
 949 
 950         lda     [%o0+4]%asi,%f13        ! (6_0) ((float*)&y0)[1] = ((float*)py)[1];
 951 
 952         fmuld   %f52,%f22,%f50          ! (3_0) dtmp1 = dd * dres;
 953         ld      [%fp+ftmp0],%o2         ! (4_0) iarr = ((int*)&dres)[0];
 954         fand    %f24,DA1,%f54           ! (4_0) dexp0 = vis_fand(dres,DA1);
 955 
 956         fmuld   %f10,%f62,%f10          ! (6_0) x0 *= scl0;
 957         ldd     [%fp+dtmp0],%f0         ! (7_1) *(long long*)&scl0 = ll;
 958         fsubd   %f60,%f34,%f20          ! (1_0) dtmp0 -= dtmp1;
 959 
 960         fmuld   %f12,%f62,%f60          ! (6_0) y0 *= scl0;
 961         sra     %o2,11,%o4              ! (4_0) iarr >>= 11;
 962         faddd   %f2,%f46,%f34           ! (5_0) res0_lo += dtmp1;
 963 
 964         and     %o4,0x1fc,%o4           ! (4_0) iarr &= 0x1fc;
 965         fmuld   %f26,%f14,%f26          ! (2_0) dres = dd * dtmp2;
 966 
 967         cmp     counter,8
 968         bl,pn   %icc,.tail
 969         nop
 970 
 971         ba      .main_loop
 972         sub     counter,8,counter
 973 
 974         .align  16
 975 .main_loop:
 976         fsqrtd  %f48,%f14               ! (0_1) res0 = sqrt ( res0 );
 977         add     %o4,TBL,%o4             ! (4_1) (char*)dll1 + iarr
 978         lda     [%i1]0x82,%o1           ! (0_0) hx0 = *(int*)px;
 979         fsubd   DTWO,%f50,%f46          ! (3_1) dtmp1 = DTWO - dtmp1;
 980 
 981         fmuld   %f20,%f16,%f48          ! (1_1) dtmp0 *= dres;
 982         mov     %i1,%i2
 983         ld      [%o4],%f20              ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
 984         faddd   %f10,D2ON36,%f50        ! (6_1) x_hi0 = x0 + D2ON36;
 985 
 986         nop
 987         mov     %i0,%o0
 988         lda     [%i0]0x82,%o4           ! (0_0) hy0 = *(int*)py;
 989         faddd   %f60,D2ON36,%f2         ! (6_1) y_hi0 = y0 + D2ON36;
 990 
 991         faddd   %f42,%f34,%f16          ! (5_1) dres = res0_hi + res0_lo;
 992         and     %o1,_0x7fffffff,%o7     ! (0_0) hx0 &= 0x7fffffff;
 993         st      %f16,[%fp+ftmp0]        ! (5_1) iarr = ((int*)&dres)[0];
 994         fmuld   %f0,%f18,%f0            ! (7_2) res0 = scl0 * res0;
 995 
 996         fmuld   %f52,%f46,%f18          ! (3_1) dd *= dtmp1;
 997         cmp     %o7,_0x7ff00000         ! (0_0) hx0 ? 0x7ff00000
 998         st      %f0,[%i5]               ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0];
 999         fpsub32 %f20,%f54,%f54          ! (4_1) dd = vis_fpsub32(dtmp0, dexp0);
1000 
1001         and     %o4,_0x7fffffff,%l7     ! (0_0) hy0 &= 0x7fffffff;
1002         st      %f1,[%i5+4]             ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1];
1003         bge,pn  %icc,.update29          ! (0_0) if ( hx0 >= 0x7ff00000 )
1004         fsubd   %f50,D2ON36,%f20        ! (6_1) x_hi0 -= D2ON36;
1005 
1006         cmp     %l7,_0x7ff00000         ! (0_0) hy0 ? 0x7ff00000
1007         sub     %l7,%o7,%o1             ! (0_0) diff0 = hy0 - hx0;
1008         bge,pn  %icc,.update30          ! (0_0) if ( hy0 >= 0x7ff00000 )
1009         fsubd   %f2,D2ON36,%f2          ! (6_1) y_hi0 -= D2ON36;
1010 
1011         fmuld   %f54,%f24,%f50          ! (4_1) dtmp0 = dd * dres;
1012         sra     %o1,31,%o3              ! (0_0) j0 = diff0 >> 31;
1013         stx     %g1,[%fp+dtmp0]         ! (7_1) *(long long*)&scl0 = ll;
1014         faddd   %f28,%f48,%f52          ! (1_1) res0 += dtmp0;
1015 
1016         and     %o1,%o3,%o1             ! (0_0) j0 &= diff0;
1017         cmp     %o7,_0x00100000         ! (0_0) hx0 ? 0x00100000
1018         bl,pn   %icc,.update31          ! (0_0) if ( hx0 < 0x00100000 )
1019         fand    %f26,DA0,%f48           ! (2_1) res0 = vis_fand(dres,DA0);
1020 .cont31:
1021         fmuld   %f20,%f20,%f0           ! (6_1) res0_hi = x_hi0 * x_hi0;
1022         sub     %l7,%o1,%o4             ! (0_0) j0 = hy0 - j0;
1023         nop
1024         fsubd   %f10,%f20,%f28          ! (6_1) x_lo0 = x0 - x_hi0;
1025 
1026         fmuld   %f2,%f2,%f46            ! (6_1) dtmp0 = y_hi0 * y_hi0;
1027         add     %i5,stridez,%i5         ! pz += stridez
1028         and     %o4,%l0,%o4             ! (0_0) j0 &= 0x7ff00000;
1029         faddd   %f10,%f20,%f62          ! (6_1) res0_lo = x0 + x_hi0;
1030 
1031         fmuld   %f18,%f22,%f22          ! (3_1) dtmp2 = dd * dres;
1032         sub     %l0,%o4,%o4             ! (0_0) j0 = 0x7ff00000 - j0;
1033         nop
1034         fsubd   DTWO,%f50,%f20          ! (4_1) dtmp0 = DTWO - dtmp0;
1035 .cont32:
1036         fmuld   %f30,%f48,%f12          ! (2_1) dtmp0 = res0_hi * res0;
1037         sllx    %o4,32,%o4              ! (0_0) ll = (long long)j0 << 32;
1038         stx     %o4,[%fp+dtmp1]         ! (0_0) *(long long*)&scl0 = ll;
1039         faddd   %f60,%f2,%f50           ! (6_1) dtmp1 = y0 + y_hi0;
1040 
1041         fmuld   %f40,%f48,%f40          ! (2_1) dtmp1 = res0_lo * res0;
1042         nop
1043         bn,pn   %icc,.exit
1044         fsubd   %f60,%f2,%f2            ! (6_1) y_lo0 = y0 - y_hi0;
1045 
1046         fmuld   %f62,%f28,%f28          ! (6_1) res0_lo *= x_lo0;
1047         nop
1048         ldd     [%fp+dtmp15],%f62       ! (7_1) *(long long*)&scl0 = ll;
1049         faddd   %f0,%f46,%f30           ! (6_1) res0_hi += dtmp0;
1050 
1051         nop
1052         nop
1053         lda     [%i4]%asi,%f10          ! (7_1) ((float*)&x0)[0] = ((float*)px)[0];
1054         fmuld   %f54,%f20,%f54          ! (4_1) dd *= dtmp0;
1055 
1056         nop
1057         nop
1058         lda     [%i4+4]%asi,%f11        ! (7_1) ((float*)&x0)[1] = ((float*)px)[1];
1059         fsubd   DONE,%f12,%f60          ! (2_1) dtmp0 = DONE - dtmp0;
1060 
1061         fmuld   %f50,%f2,%f46           ! (6_1) dtmp1 *= y_lo0;
1062         nop
1063         lda     [%i3]%asi,%f12          ! (7_1) ((float*)&y0)[0] = ((float*)py)[0];
1064         fsubd   DTWO,%f22,%f22          ! (3_1) dtmp2 = DTWO - dtmp2;
1065 
1066         add     %i1,stridex,%i4         ! px += stridex
1067         nop
1068         lda     [%i3+4]%asi,%f13        ! (7_1) ((float*)&y0)[1] = ((float*)py)[1];
1069         bn,pn   %icc,.exit
1070 
1071         fmuld   %f54,%f24,%f50          ! (4_1) dtmp1 = dd * dres;
1072         add     %i4,stridex,%i1         ! px += stridex
1073         ld      [%fp+ftmp0],%o2         ! (5_1) iarr = ((int*)&dres)[0];
1074         fand    %f16,DA1,%f2            ! (5_1) dexp0 = vis_fand(dres,DA1);
1075 
1076         fmuld   %f10,%f62,%f10          ! (7_1) x0 *= scl0;
1077         nop
1078         ldd     [%fp+dtmp2],%f0         ! (0_1) *(long long*)&scl0 = ll;
1079         fsubd   %f60,%f40,%f20          ! (2_1) dtmp0 -= dtmp1;
1080 
1081         fmuld   %f12,%f62,%f60          ! (7_1) y0 *= scl0;
1082         sra     %o2,11,%i3              ! (5_1) iarr >>= 11;
1083         nop
1084         faddd   %f28,%f46,%f40          ! (6_1) res0_lo += dtmp1;
1085 
1086         and     %i3,0x1fc,%i3           ! (5_1) iarr &= 0x1fc;
1087         nop
1088         bn,pn   %icc,.exit
1089         fmuld   %f18,%f22,%f28          ! (3_1) dres = dd * dtmp2;
1090 
1091         fsqrtd  %f52,%f22               ! (1_1) res0 = sqrt ( res0 );
1092         lda     [%i4]0x82,%o1           ! (1_0) hx0 = *(int*)px;
1093         add     %i3,TBL,%g1             ! (5_1) (char*)dll1 + iarr
1094         fsubd   DTWO,%f50,%f62          ! (4_1) dtmp1 = DTWO - dtmp1;
1095 
1096         fmuld   %f20,%f26,%f52          ! (2_1) dtmp0 *= dres;
1097         add     %i0,stridey,%i3         ! py += stridey
1098         ld      [%g1],%f26              ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1099         faddd   %f10,D2ON36,%f46        ! (7_1) x_hi0 = x0 + D2ON36;
1100 
1101         nop
1102         add     %i3,stridey,%i0         ! py += stridey
1103         lda     [%i3]0x82,%g1           ! (1_0) hy0 = *(int*)py;
1104         faddd   %f60,D2ON36,%f50        ! (7_1) y_hi0 = y0 + D2ON36;
1105 
1106         faddd   %f30,%f40,%f18          ! (6_1) dres = res0_hi + res0_lo;
1107         and     %o1,_0x7fffffff,%o7     ! (1_0) hx0 &= 0x7fffffff;
1108         st      %f18,[%fp+ftmp0]        ! (6_1) iarr = ((int*)&dres)[0];
1109         fmuld   %f0,%f14,%f0            ! (0_1) res0 = scl0 * res0;
1110 
1111         fmuld   %f54,%f62,%f14          ! (4_1) dd *= dtmp1;
1112         cmp     %o7,_0x7ff00000         ! (1_0) hx0 ? 0x7ff00000
1113         st      %f0,[%i5]               ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0];
1114         fpsub32 %f26,%f2,%f26           ! (5_1) dd = vis_fpsub32(dtmp0, dexp0);
1115 
1116         and     %g1,_0x7fffffff,%l7     ! (1_0) hy0 &= 0x7fffffff;
1117         nop
1118         bge,pn  %icc,.update33          ! (1_0) if ( hx0 >= 0x7ff00000 )
1119         fsubd   %f46,D2ON36,%f20        ! (7_1) x_hi0 -= D2ON36;
1120 
1121         cmp     %l7,_0x7ff00000         ! (1_0) hy0 ? 0x7ff00000
1122         sub     %l7,%o7,%o1             ! (1_0) diff0 = hy0 - hx0;
1123         st      %f1,[%i5+4]             ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1];
1124         fsubd   %f50,D2ON36,%f54        ! (7_1) y_hi0 -= D2ON36;
1125 
1126         fmuld   %f26,%f16,%f50          ! (5_1) dtmp0 = dd * dres;
1127         sra     %o1,31,%o3              ! (1_0) j0 = diff0 >> 31;
1128         bge,pn  %icc,.update34          ! (1_0) if ( hy0 >= 0x7ff00000 )
1129         faddd   %f48,%f52,%f52          ! (2_1) res0 += dtmp0;
1130 
1131         and     %o1,%o3,%o1             ! (1_0) j0 &= diff0;
1132         add     %i5,stridez,%i5         ! pz += stridez
1133         stx     %o4,[%fp+dtmp2]         ! (0_0) *(long long*)&scl0 = ll;
1134         fand    %f28,DA0,%f48           ! (3_1) res0 = vis_fand(dres,DA0);
1135 
1136         fmuld   %f20,%f20,%f2           ! (7_1) res0_hi = x_hi0 * x_hi0;
1137         sub     %l7,%o1,%o4             ! (1_0) j0 = hy0 - j0;
1138         cmp     %o7,_0x00100000         ! (1_0) hx0 ? 0x00100000
1139         fsubd   %f10,%f20,%f0           ! (7_1) x_lo0 = x0 - x_hi0;
1140 
1141         fmuld   %f54,%f54,%f46          ! (7_1) dtmp0 = y_hi0 * y_hi0;
1142         and     %o4,%l0,%o4             ! (1_0) j0 &= 0x7ff00000;
1143         bl,pn   %icc,.update35          ! (1_0) if ( hx0 < 0x00100000 )
1144         faddd   %f10,%f20,%f62          ! (7_1) res0_lo = x0 + x_hi0;
1145 .cont35a:
1146         fmuld   %f44,%f48,%f10          ! (3_1) dtmp0 = res0_hi * res0;
1147         nop
1148         sub     %l0,%o4,%o4             ! (1_0) j0 = 0x7ff00000 - j0;
1149         fsubd   DTWO,%f50,%f20          ! (5_1) dtmp0 = DTWO - dtmp0;
1150 .cont35b:
1151         fmuld   %f14,%f24,%f24          ! (4_1) dtmp2 = dd * dres;
1152         sllx    %o4,32,%o4              ! (1_0) ll = (long long)j0 << 32;
1153         stx     %o4,[%fp+dtmp3]         ! (1_0) *(long long*)&scl0 = ll;
1154         faddd   %f60,%f54,%f50          ! (7_1) dtmp1 = y0 + y_hi0;
1155 
1156         fmuld   %f38,%f48,%f38          ! (3_1) dtmp1 = res0_lo * res0;
1157         nop
1158         nop
1159         fsubd   %f60,%f54,%f12          ! (7_1) y_lo0 = y0 - y_hi0;
1160 .cont36:
1161         fmuld   %f62,%f0,%f0            ! (7_1) res0_lo *= x_lo0;
1162         nop
1163         ldd     [%fp+dtmp1],%f62        ! (0_0) *(long long*)&scl0 = ll;
1164         faddd   %f2,%f46,%f44           ! (7_1) res0_hi += dtmp0;
1165 
1166         fsubd   DONE,%f10,%f60          ! (3_1) dtmp0 = DONE - dtmp0;
1167         nop
1168         lda     [%i2]%asi,%f10          ! (0_0) ((float*)&x0)[0] = ((float*)px)[0];
1169         fmuld   %f26,%f20,%f54          ! (5_1) dd *= dtmp0;
1170 
1171         nop
1172         nop
1173         lda     [%i2+4]%asi,%f11        ! (0_0) ((float*)&x0)[1] = ((float*)px)[1];
1174         bn,pn   %icc,.exit
1175 
1176         fmuld   %f50,%f12,%f26          ! (7_1) dtmp1 *= y_lo0;
1177         nop
1178         lda     [%o0]%asi,%f12          ! (0_0) ((float*)&y0)[0] = ((float*)py)[0];
1179         fsubd   DTWO,%f24,%f24          ! (4_1) dtmp2 = DTWO - dtmp2;
1180 
1181         nop
1182         nop
1183         lda     [%o0+4]%asi,%f13        ! (0_0) ((float*)&y0)[1] = ((float*)py)[1];
1184         bn,pn   %icc,.exit
1185 
1186         fmuld   %f54,%f16,%f46          ! (5_1) dtmp1 = dd * dres;
1187         nop
1188         ld      [%fp+ftmp0],%o2         ! (6_1) iarr = ((int*)&dres)[0];
1189         fand    %f18,DA1,%f2            ! (6_1) dexp0 = vis_fand(dres,DA1);
1190 
1191         fmuld   %f10,%f62,%f10          ! (0_0) x0 *= scl0;
1192         nop
1193         ldd     [%fp+dtmp4],%f50        ! (1_1) *(long long*)&scl0 = ll;
1194         fsubd   %f60,%f38,%f20          ! (3_1) dtmp0 -= dtmp1;
1195 
1196         fmuld   %f12,%f62,%f60          ! (0_0) y0 *= scl0;
1197         sra     %o2,11,%g1              ! (6_1) iarr >>= 11;
1198         nop
1199         faddd   %f0,%f26,%f38           ! (7_1) res0_lo += dtmp1;
1200 
1201         nop
1202         and     %g1,0x1fc,%g1           ! (6_1) iarr &= 0x1fc;
1203         bn,pn   %icc,.exit
1204         fmuld   %f14,%f24,%f26          ! (4_1) dres = dd * dtmp2;
1205 
1206         fsqrtd  %f52,%f24               ! (2_1) res0 = sqrt ( res0 );
1207         lda     [%i1]0x82,%o1           ! (2_0) hx0 = *(int*)px;
1208         add     %g1,TBL,%g1             ! (6_1) (char*)dll1 + iarr
1209         fsubd   DTWO,%f46,%f62          ! (5_1) dtmp1 = DTWO - dtmp1;
1210 
1211         fmuld   %f20,%f28,%f52          ! (3_1) dtmp0 *= dres;
1212         mov     %i1,%i2
1213         ld      [%g1],%f28              ! (6_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1214         faddd   %f10,D2ON36,%f46        ! (0_0) x_hi0 = x0 + D2ON36;
1215 
1216         nop
1217         mov     %i0,%o0
1218         lda     [%i0]0x82,%g1           ! (2_0) hy0 = *(int*)py;
1219         faddd   %f60,D2ON36,%f12        ! (0_0) y_hi0 = y0 + D2ON36;
1220 
1221         faddd   %f44,%f38,%f14          ! (7_1) dres = res0_hi + res0_lo;
1222         and     %o1,_0x7fffffff,%o7     ! (2_0) hx0 &= 0x7fffffff;
1223         st      %f14,[%fp+ftmp0]        ! (7_1) iarr = ((int*)&dres)[0];
1224         fmuld   %f50,%f22,%f0           ! (1_1) res0 = scl0 * res0;
1225 
1226         fmuld   %f54,%f62,%f22          ! (5_1) dd *= dtmp1;
1227         cmp     %o7,_0x7ff00000         ! (2_0) hx0 ? 0x7ff00000
1228         st      %f0,[%i5]               ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0];
1229         fpsub32 %f28,%f2,%f28           ! (6_1) dd = vis_fpsub32(dtmp0, dexp0);
1230 
1231         and     %g1,_0x7fffffff,%l7     ! (2_0) hx0 &= 0x7fffffff;
1232         nop
1233         bge,pn  %icc,.update37          ! (2_0) if ( hx0 >= 0x7ff00000 )
1234         fsubd   %f46,D2ON36,%f20        ! (0_0) x_hi0 -= D2ON36;
1235 
1236         sub     %l7,%o7,%o1             ! (2_0) diff0 = hy0 - hx0;
1237         cmp     %l7,_0x7ff00000         ! (2_0) hy0 ? 0x7ff00000
1238         st      %f1,[%i5+4]             ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1];
1239         fsubd   %f12,D2ON36,%f54        ! (0_0) y_hi0 -= D2ON36;
1240 
1241         fmuld   %f28,%f18,%f50          ! (6_1) dtmp0 = dd * dres;
1242         sra     %o1,31,%o3              ! (2_0) j0 = diff0 >> 31;
1243         bge,pn  %icc,.update38          ! (2_0) if ( hy0 >= 0x7ff00000 )
1244         faddd   %f48,%f52,%f52          ! (3_1) res0 += dtmp0;
1245 
1246         and     %o1,%o3,%o1             ! (2_0) j0 &= diff0;
1247         add     %i5,stridez,%i5         ! pz += stridez
1248         stx     %o4,[%fp+dtmp4]         ! (1_0) *(long long*)&scl0 = ll;
1249         fand    %f26,DA0,%f48           ! (4_1) res0 = vis_fand(dres,DA0);
1250 
1251         fmuld   %f20,%f20,%f2           ! (0_0) res0_hi = x_hi0 * x_hi0;
1252         cmp     %o7,_0x00100000         ! (2_0) hx0 ? 0x00100000
1253         sub     %l7,%o1,%o4             ! (2_0) j0 = hy0 - j0;
1254         fsubd   %f10,%f20,%f0           ! (0_0) x_lo0 = x0 - x_hi0;
1255 
1256         fmuld   %f54,%f54,%f46          ! (0_0) dtmp0 = y_hi0 * y_hi0;
1257         and     %o4,%l0,%o4             ! (2_0) j0 &= 0x7ff00000;
1258         bl,pn   %icc,.update39          ! (2_0) if ( hx0 < 0x00100000 )
1259         faddd   %f10,%f20,%f62          ! (0_0) res0_lo = x0 + x_hi0;
1260 .cont39a:
1261         fmuld   %f32,%f48,%f10          ! (4_1) dtmp0 = res0_hi * res0;
1262         sub     %l0,%o4,%g1             ! (2_0) j0 = 0x7ff00000 - j0;
1263         nop
1264         fsubd   DTWO,%f50,%f20          ! (6_1) dtmp0 = DTWO - dtmp0;
1265 .cont39b:
1266         fmuld   %f22,%f16,%f16          ! (5_1) dtmp2 = dd * dres;
1267         sllx    %g1,32,%g1              ! (2_0) ll = (long long)j0 << 32;
1268         stx     %g1,[%fp+dtmp5]         ! (2_0) *(long long*)&scl0 = ll;
1269         faddd   %f60,%f54,%f50          ! (0_0) dtmp1 = y0 + y_hi0;
1270 
1271         fmuld   %f36,%f48,%f36          ! (4_1) dtmp1 = res0_lo * res0;
1272         nop
1273         nop
1274         fsubd   %f60,%f54,%f12          ! (0_0) y_lo0 = y0 - y_hi0;
1275 .cont40:
1276         fmuld   %f62,%f0,%f0            ! (0_0) res0_lo *= x_lo0;
1277         nop
1278         ldd     [%fp+dtmp3],%f62        ! (1_0) *(long long*)&scl0 = ll;
1279         faddd   %f2,%f46,%f32           ! (0_0) res0_hi += dtmp0;
1280 
1281         fsubd   DONE,%f10,%f60          ! (4_1) dtmp0 = DONE - dtmp0;
1282         nop
1283         lda     [%i4]%asi,%f10          ! (1_0) ((float*)&x0)[0] = ((float*)px)[0];
1284         fmuld   %f28,%f20,%f54          ! (6_1) dd *= dtmp0;
1285 
1286         nop
1287         nop
1288         lda     [%i4+4]%asi,%f11        ! (1_0) ((float*)&x0)[1] = ((float*)px)[1];
1289         bn,pn   %icc,.exit
1290 
1291         fmuld   %f50,%f12,%f28          ! (0_0) dtmp1 *= y_lo0;
1292         nop
1293         lda     [%i3]%asi,%f12          ! (1_0) ((float*)&y0)[0] = ((float*)py)[0];
1294         fsubd   DTWO,%f16,%f16          ! (5_1) dtmp2 = DTWO - dtmp2;
1295 
1296         add     %i1,stridex,%i4         ! px += stridex
1297         nop
1298         lda     [%i3+4]%asi,%f13        ! (1_0) ((float*)&y0)[1] = ((float*)py)[1];
1299         bn,pn   %icc,.exit
1300 
1301         fmuld   %f54,%f18,%f46          ! (6_1) dtmp1 = dd * dres;
1302         add     %i4,stridex,%i1         ! px += stridex
1303         ld      [%fp+ftmp0],%o2         ! (7_1) iarr = ((int*)&dres)[0];
1304         fand    %f14,DA1,%f2            ! (7_1) dexp0 = vis_fand(dres,DA1);
1305 
1306         fmuld   %f10,%f62,%f10          ! (1_0) x0 *= scl0;
1307         nop
1308         ldd     [%fp+dtmp6],%f50        ! (2_1) *(long long*)&scl0 = ll;
1309         fsubd   %f60,%f36,%f20          ! (4_1) dtmp0 -= dtmp1;
1310 
1311         fmuld   %f12,%f62,%f60          ! (1_0) y0 *= scl0;
1312         sra     %o2,11,%i3              ! (7_1) iarr >>= 11;
1313         nop
1314         faddd   %f0,%f28,%f36           ! (0_0) res0_lo += dtmp1;
1315 
1316         and     %i3,0x1fc,%i3           ! (7_1) iarr &= 0x1fc;
1317         nop
1318         bn,pn   %icc,.exit
1319         fmuld   %f22,%f16,%f28          ! (5_1) dres = dd * dtmp2;
1320 
1321         fsqrtd  %f52,%f16               ! (3_1) res0 = sqrt ( res0 );
1322         add     %i3,TBL,%o4             ! (7_1) (char*)dll1 + iarr
1323         lda     [%i4]0x82,%o1           ! (3_0) hx0 = *(int*)px;
1324         fsubd   DTWO,%f46,%f62          ! (6_1) dtmp1 = DTWO - dtmp1;
1325 
1326         fmuld   %f20,%f26,%f52          ! (4_1) dtmp0 *= dres;
1327         add     %i0,stridey,%i3         ! py += stridey
1328         ld      [%o4],%f26              ! (7_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1329         faddd   %f10,D2ON36,%f46        ! (1_0) x_hi0 = x0 + D2ON36;
1330 
1331         nop
1332         add     %i3,stridey,%i0         ! py += stridey
1333         lda     [%i3]0x82,%o4           ! (3_0) hy0 = *(int*)py;
1334         faddd   %f60,D2ON36,%f12        ! (1_0) y_hi0 = y0 + D2ON36;
1335 
1336         faddd   %f32,%f36,%f22          ! (0_0) dres = res0_hi + res0_lo;
1337         and     %o1,_0x7fffffff,%o7     ! (3_0) hx0 &= 0x7fffffff;
1338         st      %f22,[%fp+ftmp0]        ! (0_0) iarr = ((int*)&dres)[0];
1339         fmuld   %f50,%f24,%f0           ! (2_1) res0 = scl0 * res0;
1340 
1341         fmuld   %f54,%f62,%f24          ! (6_1) dd *= dtmp1;
1342         cmp     %o7,_0x7ff00000         ! (3_0) hx0 ? 0x7ff00000
1343         st      %f0,[%i5]               ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0];
1344         fpsub32 %f26,%f2,%f26           ! (7_1) dd = vis_fpsub32(dtmp0, dexp0);
1345 
1346         and     %o4,_0x7fffffff,%l7     ! (3_0) hy0 &= 0x7fffffff;
1347         nop
1348         bge,pn  %icc,.update41          ! (3_0) if ( hx0 >= 0x7ff00000 )
1349         fsubd   %f46,D2ON36,%f20        ! (1_0) x_hi0 -= D2ON36;
1350 
1351         sub     %l7,%o7,%o1             ! (3_0) diff0 = hy0 - hx0;
1352         cmp     %l7,_0x7ff00000         ! (3_0) hy0 ? 0x7ff00000
1353         st      %f1,[%i5+4]             ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1];
1354         fsubd   %f12,D2ON36,%f54        ! (1_0) y_hi0 -= D2ON36;
1355 
1356         fmuld   %f26,%f14,%f50          ! (7_1) dtmp0 = dd * dres;
1357         sra     %o1,31,%o3              ! (3_0) j0 = diff0 >> 31;
1358         bge,pn  %icc,.update42          ! (3_0) if ( hy0 >= 0x7ff00000 )
1359         faddd   %f48,%f52,%f52          ! (4_1) res0 += dtmp0;
1360 
1361         and     %o1,%o3,%o1             ! (3_0) j0 &= diff0;
1362         add     %i5,stridez,%i5         ! pz += stridez
1363         stx     %g1,[%fp+dtmp6]         ! (2_0) *(long long*)&scl0 = ll;
1364         fand    %f28,DA0,%f48           ! (5_1) res0 = vis_fand(dres,DA0);
1365 
1366         fmuld   %f20,%f20,%f2           ! (1_0) res0_hi = x_hi0 * x_hi0;
1367         cmp     %o7,_0x00100000         ! (3_0) hx0 ? 0x00100000
1368         sub     %l7,%o1,%o4             ! (3_0) j0 = hy0 - j0;
1369         fsubd   %f10,%f20,%f0           ! (1_0) x_lo0 = x0 - x_hi0;
1370 
1371         fmuld   %f54,%f54,%f46          ! (1_0) dtmp0 = y_hi0 * y_hi0;
1372         and     %o4,%l0,%o4             ! (3_0) j0 &= 0x7ff00000;
1373         bl,pn   %icc,.update43          ! (3_0) if ( hx0 < 0x00100000 )
1374         faddd   %f10,%f20,%f62          ! (1_0) res0_lo = x0 + x_hi0;
1375 .cont43a:
1376         fmuld   %f42,%f48,%f10          ! (5_1) dtmp0 = res0_hi * res0;
1377         nop
1378         sub     %l0,%o4,%g1             ! (3_0) j0 = 0x7ff00000 - j0;
1379         fsubd   DTWO,%f50,%f20          ! (7_1) dtmp0 = DTWO - dtmp0;
1380 .cont43b:
1381         fmuld   %f24,%f18,%f18          ! (6_1) dtmp2 = dd * dres;
1382         sllx    %g1,32,%g1              ! (3_0) ll = (long long)j0 << 32;
1383         stx     %g1,[%fp+dtmp7]         ! (3_0) *(long long*)&scl0 = ll;
1384         faddd   %f60,%f54,%f50          ! (1_0) dtmp1 = y0 + y_hi0;
1385 
1386         fmuld   %f34,%f48,%f34          ! (5_1) dtmp1 = res0_lo * res0;
1387         nop
1388         nop
1389         fsubd   %f60,%f54,%f12          ! (1_0) y_lo0 = y0 - y_hi0
1390 .cont44:
1391         fmuld   %f62,%f0,%f0            ! (1_0) res0_lo *= x_lo0;
1392         nop
1393         ldd     [%fp+dtmp5],%f62        ! (2_0) *(long long*)&scl0 = ll;
1394         faddd   %f2,%f46,%f42           ! (1_0) res0_hi += dtmp0;
1395 
1396         fsubd   DONE,%f10,%f60          ! (5_1) dtmp0 = DONE - dtmp0;
1397         nop
1398         lda     [%i2]%asi,%f10          ! (2_0) ((float*)&x0)[0] = ((float*)px)[0];
1399         fmuld   %f26,%f20,%f54          ! (7_1) dd *= dtmp0;
1400 
1401         nop
1402         nop
1403         lda     [%i2+4]%asi,%f11        ! (2_0) ((float*)&x0)[1] = ((float*)px)[1];
1404         bn,pn   %icc,.exit
1405 
1406         fmuld   %f50,%f12,%f26          ! (1_0) dtmp1 *= y_lo0;
1407         nop
1408         lda     [%o0]%asi,%f12          ! (2_0) ((float*)&y0)[0] = ((float*)py)[0];
1409         fsubd   DTWO,%f18,%f20          ! (6_1) dtmp2 = DTWO - dtmp2;
1410 
1411         nop
1412         nop
1413         lda     [%o0+4]%asi,%f13        ! (2_0) ((float*)&y0)[1] = ((float*)py)[1];
1414         bn,pn   %icc,.exit
1415 
1416         fmuld   %f54,%f14,%f50          ! (7_1) dtmp1 = dd * dres;
1417         nop
1418         ld      [%fp+ftmp0],%o2         ! (0_0) iarr = ((int*)&dres)[0];
1419         fand    %f22,DA1,%f2            ! (0_0) dexp0 = vis_fand(dres,DA1);
1420 
1421         fmuld   %f10,%f62,%f10          ! (2_0) x0 *= scl0;
1422         nop
1423         ldd     [%fp+dtmp8],%f18        ! (3_1) *(long long*)&scl0 = ll;
1424         fsubd   %f60,%f34,%f46          ! (5_1) dtmp0 -= dtmp1;
1425 
1426         fmuld   %f12,%f62,%f60          ! (2_0) y0 *= scl0;
1427         sra     %o2,11,%o4              ! (0_0) iarr >>= 11;
1428         nop
1429         faddd   %f0,%f26,%f34           ! (1_0) res0_lo += dtmp1;
1430 
1431         and     %o4,0x1fc,%o4           ! (0_0) iarr &= 0x1fc;
1432         nop
1433         bn,pn   %icc,.exit
1434         fmuld   %f24,%f20,%f26          ! (6_1) dres = dd * dtmp2;
1435 
1436         fsqrtd  %f52,%f24               ! (4_1) res0 = sqrt ( res0 );
1437         add     %o4,TBL,%o4             ! (0_0) (char*)dll1 + iarr
1438         lda     [%i1]0x82,%o1           ! (4_0) hx0 = *(int*)px;
1439         fsubd   DTWO,%f50,%f20          ! (7_1) dtmp1 = DTWO - dtmp1;
1440 
1441         fmuld   %f46,%f28,%f52          ! (5_1) dtmp0 -= dtmp1;
1442         mov     %i1,%i2
1443         ld      [%o4],%f28              ! (0_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1444         faddd   %f10,D2ON36,%f46        ! (2_0) x_hi0 = x0 + D2ON36;
1445 
1446         nop
1447         mov     %i0,%o0
1448         lda     [%i0]0x82,%o4           ! (4_0) hy0 = *(int*)py;
1449         faddd   %f60,D2ON36,%f50        ! (2_0) y_hi0 = y0 + D2ON36;
1450 
1451         fmuld   %f18,%f16,%f0           ! (3_1) res0 = scl0 * res0;
1452         nop
1453         and     %o1,_0x7fffffff,%o7     ! (4_0) hx0 &= 0x7fffffff;
1454         faddd   %f42,%f34,%f18          ! (1_0) dres = res0_hi + res0_lo;
1455 
1456         fmuld   %f54,%f20,%f16          ! (7_1) dd *= dtmp1;
1457         cmp     %o7,_0x7ff00000         ! (4_0) hx0 ? 0x7ff00000
1458         st      %f18,[%fp+ftmp0]        ! (1_0) iarr = ((int*)&dres)[0];
1459         fpsub32 %f28,%f2,%f28           ! (0_0) dd = vis_fpsub32(dtmp0, dexp0);
1460 
1461         and     %o4,_0x7fffffff,%l7     ! (4_0) hy0 &= 0x7fffffff;
1462         st      %f0,[%i5]               ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0];
1463         bge,pn  %icc,.update45          ! (4_0) if ( hx0 >= 0x7ff00000 )
1464         fsubd   %f46,D2ON36,%f20        ! (2_0) x_hi0 -= D2ON36;
1465 
1466         sub     %l7,%o7,%o1             ! (4_0) diff0 = hy0 - hx0;
1467         cmp     %l7,_0x7ff00000         ! (4_0) hy0 ? 0x7ff00000
1468         bge,pn  %icc,.update46          ! (4_0) if ( hy0 >= 0x7ff00000 )
1469         fsubd   %f50,D2ON36,%f54        ! (2_0) y_hi0 -= D2ON36;
1470 
1471         fmuld   %f28,%f22,%f50          ! (0_0) dtmp0 = dd * dres;
1472         sra     %o1,31,%o3              ! (4_0) j0 = diff0 >> 31;
1473         st      %f1,[%i5+4]             ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
1474         faddd   %f48,%f52,%f52          ! (5_1) res0 += dtmp0;
1475 
1476         and     %o1,%o3,%o1             ! (4_0) j0 &= diff0;
1477         cmp     %o7,_0x00100000         ! (4_0) hx0 ? 0x00100000
1478         bl,pn   %icc,.update47          ! (4_0) if ( hx0 < 0x00100000 )
1479         fand    %f26,DA0,%f48           ! (6_1) res0 = vis_fand(dres,DA0);
1480 .cont47a:
1481         fmuld   %f20,%f20,%f2           ! (2_0) res0_hi = x_hi0 * x_hi0;
1482         sub     %l7,%o1,%o4             ! (4_0) j0 = hy0 - j0;
1483         stx     %g1,[%fp+dtmp8]         ! (3_0) *(long long*)&scl0 = ll;
1484         fsubd   %f10,%f20,%f0           ! (2_0) x_lo0 = x0 - x_hi0;
1485 
1486         fmuld   %f54,%f54,%f46          ! (2_0) dtmp0 = y_hi0 * y_hi0;
1487         and     %o4,%l0,%o4             ! (4_0) j0 &= 0x7ff00000;
1488         add     %i5,stridez,%i5         ! pz += stridez
1489         faddd   %f10,%f20,%f62          ! (2_0) res0_lo = x0 + x_hi0;
1490 
1491         fmuld   %f30,%f48,%f10          ! (6_1) dtmp0 = res0_hi * res0;
1492         nop
1493         sub     %l0,%o4,%g1             ! (4_0) j0 = 0x7ff00000 - j0;
1494         fsubd   DTWO,%f50,%f20          ! (0_0) dtmp0 = DTWO - dtmp0;
1495 .cont47b:
1496         fmuld   %f16,%f14,%f14          ! (7_1) dtmp2 = dd * dres;
1497         sllx    %g1,32,%g1              ! (4_0) ll = (long long)j0 << 32;
1498         stx     %g1,[%fp+dtmp9]         ! (4_0) *(long long*)&scl0 = ll;
1499         faddd   %f60,%f54,%f50          ! (2_0) dtmp1 = y0 + y_hi0;
1500 
1501         fmuld   %f40,%f48,%f40          ! (6_1) dtmp1 = res0_lo * res0;
1502         nop
1503         nop
1504         fsubd   %f60,%f54,%f12          ! (2_0) y_lo0 = y0 - y_hi0;
1505 .cont48:
1506         fmuld   %f62,%f0,%f0            ! (2_0) res0_lo *= x_lo0;
1507         nop
1508         ldd     [%fp+dtmp7],%f62        ! (3_0) *(long long*)&scl0 = ll;
1509         faddd   %f2,%f46,%f30           ! (2_0) res0_hi += dtmp0;
1510 
1511         fsubd   DONE,%f10,%f60          ! (6_1) dtmp0 = DONE - dtmp0;
1512         nop
1513         lda     [%i4]%asi,%f10          ! (3_0) ((float*)&x0)[0] = ((float*)px)[0];
1514         fmuld   %f28,%f20,%f54          ! (0_0) dd *= dtmp0;
1515 
1516         nop
1517         nop
1518         lda     [%i4+4]%asi,%f11        ! (3_0) ((float*)&x0)[1] = ((float*)px)[1];
1519         bn,pn   %icc,.exit
1520 
1521         fmuld   %f50,%f12,%f28          ! (2_0) dtmp1 *= y_lo0;
1522         nop
1523         lda     [%i3]%asi,%f12          ! (3_0) ((float*)&y0)[0] = ((float*)py)[0];
1524         fsubd   DTWO,%f14,%f20          ! (7_1) dtmp2 = DTWO - dtmp2;
1525 
1526         lda     [%i3+4]%asi,%f13        ! (3_0) ((float*)&y0)[1] = ((float*)py)[1];
1527         add     %i1,stridex,%i4         ! px += stridex
1528         nop
1529         bn,pn   %icc,.exit
1530 
1531         fmuld   %f54,%f22,%f50          ! (0_0) dtmp1 = dd * dres;
1532         add     %i4,stridex,%i1         ! px += stridex
1533         ld      [%fp+ftmp0],%o2         ! (1_0) iarr = ((int*)&dres)[0];
1534         fand    %f18,DA1,%f2            ! (1_0) dexp0 = vis_fand(dres,DA1);
1535 
1536         fmuld   %f10,%f62,%f10          ! (3_0) x0 *= scl0;
1537         nop
1538         ldd     [%fp+dtmp10],%f14       ! (4_1) *(long long*)&scl0 = ll;
1539         fsubd   %f60,%f40,%f46          ! (6_1) dtmp0 -= dtmp1;
1540 
1541         fmuld   %f12,%f62,%f60          ! (3_0) y0 *= scl0;
1542         sra     %o2,11,%i3              ! (1_0) iarr >>= 11;
1543         nop
1544         faddd   %f0,%f28,%f40           ! (2_0) res0_lo += dtmp1;
1545 
1546         and     %i3,0x1fc,%i3           ! (1_0) iarr &= 0x1fc;
1547         nop
1548         bn,pn   %icc,.exit
1549         fmuld   %f16,%f20,%f28          ! (7_1) dres = dd * dtmp2;
1550 
1551         fsqrtd  %f52,%f16               ! (5_1) res0 = sqrt ( res0 );
1552         add     %i3,TBL,%o4             ! (1_0) (char*)dll1 + iarr
1553         lda     [%i4]0x82,%o1           ! (5_0) hx0 = *(int*)px;
1554         fsubd   DTWO,%f50,%f20          ! (0_0) dtmp1 = DTWO - dtmp1;
1555 
1556         fmuld   %f46,%f26,%f52          ! (6_1) dtmp0 *= dres;
1557         add     %i0,stridey,%i3         ! py += stridey
1558         ld      [%o4],%f26              ! (1_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1559         faddd   %f10,D2ON36,%f46        ! (3_0) x_hi0 = x0 + D2ON36;
1560 
1561         nop
1562         add     %i3,stridey,%i0         ! py += stridey
1563         lda     [%i3]0x82,%o4           ! (5_0) hy0 = *(int*)py;
1564         faddd   %f60,D2ON36,%f50        ! (3_0) y_hi0 = y0 + D2ON36;
1565 
1566         fmuld   %f14,%f24,%f0           ! (4_1) res0 = scl0 * res0;
1567         and     %o1,_0x7fffffff,%o7     ! (5_0) hx0 &= 0x7fffffff;
1568         nop
1569         faddd   %f30,%f40,%f14          ! (2_0) dres = res0_hi + res0_lo;
1570 
1571         fmuld   %f54,%f20,%f24          ! (0_0) dd *= dtmp1;
1572         cmp     %o7,_0x7ff00000         ! (5_0) hx0 ? 0x7ff00000
1573         st      %f14,[%fp+ftmp0]        ! (2_0) iarr = ((int*)&dres)[0];
1574         fpsub32 %f26,%f2,%f26           ! (1_0) dd = vis_fpsub32(dtmp0, dexp0);
1575 
1576         and     %o4,_0x7fffffff,%l7     ! (5_0) hy0 &= 0x7fffffff;
1577         st      %f0,[%i5]               ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0];
1578         bge,pn  %icc,.update49          ! (5_0) if ( hx0 >= 0x7ff00000 )
1579         fsubd   %f46,D2ON36,%f20        ! (3_0) x_hi0 -= D2ON36;
1580 
1581         sub     %l7,%o7,%o1             ! (5_0) diff0 = hy0 - hx0;
1582         cmp     %l7,_0x7ff00000         ! (5_0) hy0 ? 0x7ff00000
1583         bge,pn  %icc,.update50          ! (5_0) if ( hy0 >= 0x7ff00000 )
1584         fsubd   %f50,D2ON36,%f54        ! (3_0) y_hi0 -= D2ON36;
1585 
1586         fmuld   %f26,%f18,%f50          ! (1_0) dtmp0 = dd * dres;
1587         sra     %o1,31,%o3              ! (5_0) j0 = diff0 >> 31;
1588         st      %f1,[%i5+4]             ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
1589         faddd   %f48,%f52,%f52          ! (6_1) res0 += dtmp0;
1590 
1591         and     %o1,%o3,%o1             ! (5_0) j0 &= diff0;
1592         cmp     %o7,_0x00100000         ! (5_0) hx0 ? 0x00100000
1593         bl,pn   %icc,.update51          ! (5_0) if ( hx0 < 0x00100000 )
1594         fand    %f28,DA0,%f48           ! (7_1) res0 = vis_fand(dres,DA0);
1595 .cont51a:
1596         fmuld   %f20,%f20,%f2           ! (3_0) res0_hi = x_hi0 * x_hi0;
1597         sub     %l7,%o1,%o4             ! (5_0) j0 = hy0 - j0;
1598         stx     %g1,[%fp+dtmp10]        ! (4_0) *(long long*)&scl0 = ll;
1599         fsubd   %f10,%f20,%f0           ! (3_0) x_lo0 = x0 - x_hi0;
1600 
1601         fmuld   %f54,%f54,%f46          ! (3_0) dtmp0 = y_hi0 * y_hi0;
1602         and     %o4,%l0,%o4             ! (5_0) j0 &= 0x7ff00000;
1603         add     %i5,stridez,%i5         ! pz += stridez
1604         faddd   %f10,%f20,%f62          ! (3_0) res0_lo = x0 + x_hi0;
1605 
1606         fmuld   %f44,%f48,%f10          ! (7_1) dtmp0 = res0_hi * res0;
1607         sub     %l0,%o4,%g1             ! (5_0) j0 = 0x7ff00000 - j0;
1608         nop
1609         fsubd   DTWO,%f50,%f20          ! (1_0) dtmp0 = DTWO - dtmp0;
1610 .cont51b:
1611         fmuld   %f24,%f22,%f22          ! (0_0) dtmp2 = dd * dres;
1612         sllx    %g1,32,%g1              ! (5_0) ll = (long long)j0 << 32;
1613         stx     %g1,[%fp+dtmp11]        ! (5_0) *(long long*)&scl0 = ll;
1614         faddd   %f60,%f54,%f50          ! (3_0) dtmp1 = y0 + y_hi0;
1615 
1616         fmuld   %f38,%f48,%f38          ! (7_1) dtmp1 = res0_lo * res0;
1617         nop
1618         nop
1619         fsubd   %f60,%f54,%f12          ! (3_0) y_lo0 = y0 - y_hi0;
1620 .cont52:
1621         fmuld   %f62,%f0,%f0            ! (3_0) res0_lo *= x_lo0;
1622         nop
1623         ldd     [%fp+dtmp9],%f62        ! (4_0) *(long long*)&scl0 = ll;
1624         faddd   %f2,%f46,%f44           ! (3_0) res0_hi += dtmp0;
1625 
1626         fsubd   DONE,%f10,%f60          ! (7_1) dtmp0 = DONE - dtmp0;
1627         nop
1628         lda     [%i2]%asi,%f10          ! (4_0) ((float*)&x0)[0] = ((float*)px)[0];
1629         fmuld   %f26,%f20,%f54          ! (1_0) dd *= dtmp0;
1630 
1631         nop
1632         nop
1633         lda     [%i2+4]%asi,%f11        ! (4_0) ((float*)&x0)[1] = ((float*)px)[1];
1634         bn,pn   %icc,.exit
1635 
1636         fmuld   %f50,%f12,%f26          ! (3_0) dtmp1 *= y_lo0;
1637         nop
1638         lda     [%o0]%asi,%f12          ! (4_0) ((float*)&y0)[0] = ((float*)py)[0];
1639         fsubd   DTWO,%f22,%f20          ! (0_0) dtmp2 = DTWO - dtmp2;
1640 
1641         nop
1642         nop
1643         lda     [%o0+4]%asi,%f13        ! (4_0) ((float*)&y0)[1] = ((float*)py)[1];
1644         bn,pn   %icc,.exit
1645 
1646         fmuld   %f54,%f18,%f50          ! (1_0) dtmp1 = dd * dres;
1647         nop
1648         ld      [%fp+ftmp0],%o2         ! (2_0) iarr = ((int*)&dres)[0];
1649         fand    %f14,DA1,%f2            ! (2_0) dexp0 = vis_fand(dres,DA1);
1650 
1651         fmuld   %f10,%f62,%f10          ! (4_0) x0 *= scl0;
1652         nop
1653         ldd     [%fp+dtmp12],%f22       ! (5_1) *(long long*)&scl0 = ll;
1654         fsubd   %f60,%f38,%f46          ! (7_1) dtmp0 -= dtmp1;
1655 
1656         fmuld   %f12,%f62,%f60          ! (4_0) y0 *= scl0;
1657         sra     %o2,11,%o4              ! (2_0) iarr >>= 11;
1658         nop
1659         faddd   %f0,%f26,%f38           ! (3_0) res0_lo += dtmp1;
1660 
1661         and     %o4,0x1fc,%o4           ! (2_0) iarr &= 0x1fc;
1662         nop
1663         bn,pn   %icc,.exit
1664         fmuld   %f24,%f20,%f26          ! (0_0) dres = dd * dtmp2;
1665 
1666         fsqrtd  %f52,%f24               ! (6_1) res0 = sqrt ( res0 );
1667         add     %o4,TBL,%o4             ! (2_0) (char*)dll1 + iarr
1668         lda     [%i1]0x82,%o1           ! (6_0) hx0 = *(int*)px;
1669         fsubd   DTWO,%f50,%f52          ! (1_0) dtmp1 = DTWO - dtmp1;
1670 
1671         fmuld   %f46,%f28,%f28          ! (7_1) dtmp0 *= dres;
1672         mov     %i1,%i2
1673         ld      [%o4],%f20              ! (2_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1674         faddd   %f10,D2ON36,%f46        ! (4_0) x_hi0 = x0 + D2ON36;
1675 
1676         nop
1677         mov     %i0,%o0
1678         lda     [%i0]0x82,%o4           ! (6_0) hy0 = *(int*)py;
1679         faddd   %f60,D2ON36,%f50        ! (4_0) y_hi0 = y0 + D2ON36;
1680 
1681         fmuld   %f22,%f16,%f0           ! (5_1) res0 = scl0 * res0;
1682         and     %o1,_0x7fffffff,%o7     ! (6_0) hx0 &= 0x7fffffff;
1683         nop
1684         faddd   %f44,%f38,%f22          ! (3_0) dres = res0_hi + res0_lo;
1685 
1686         fmuld   %f54,%f52,%f16          ! (1_0) dd *= dtmp1;
1687         cmp     %o7,_0x7ff00000         ! (6_0) hx0 ? 0x7ff00000
1688         st      %f22,[%fp+ftmp0]        ! (3_0) iarr = ((int*)&dres)[0];
1689         fpsub32 %f20,%f2,%f52           ! (2_0) dd = vis_fpsub32(dtmp0, dexp0);
1690 
1691         and     %o4,_0x7fffffff,%l7     ! (6_0) hy0 &= 0x7fffffff;
1692         st      %f0,[%i5]               ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0];
1693         bge,pn  %icc,.update53          ! (6_0) if ( hx0 >= 0x7ff00000 )
1694         fsubd   %f46,D2ON36,%f46        ! (4_0) x_hi0 -= D2ON36;
1695 
1696         sub     %l7,%o7,%o1             ! (6_0) diff0 = hy0 - hx0;
1697         cmp     %l7,_0x7ff00000         ! (6_0) hy0 ? 0x7ff00000
1698         bge,pn  %icc,.update54          ! (6_0) if ( hy0 >= 0x7ff00000 )
1699         fsubd   %f50,D2ON36,%f54        ! (4_0) y_hi0 -= D2ON36;
1700 
1701         fmuld   %f52,%f14,%f50          ! (2_0) dtmp0 = dd * dres;
1702         sra     %o1,31,%o3              ! (6_0) j0 = diff0 >> 31;
1703         st      %f1,[%i5+4]             ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
1704         faddd   %f48,%f28,%f48          ! (7_1) res0 += dtmp0;
1705 
1706         and     %o1,%o3,%o1             ! (6_0) j0 &= diff0;
1707         cmp     %o7,_0x00100000         ! (6_0) hx0 ? 0x00100000
1708         bl,pn   %icc,.update55          ! (6_0) if ( hx0 < 0x00100000 )
1709         fand    %f26,DA0,%f28           ! (0_0) res0 = vis_fand(dres,DA0);
1710 .cont55a:
1711         fmuld   %f46,%f46,%f0           ! (4_0) res0_hi = x_hi0 * x_hi0;
1712         sub     %l7,%o1,%o4             ! (6_0) j0 = hy0 - j0;
1713         stx     %g1,[%fp+dtmp12]        ! (5_0) *(long long*)&scl0 = ll;
1714         fsubd   %f10,%f46,%f2           ! (4_0) x_lo0 = x0 - x_hi0;
1715 
1716         fmuld   %f54,%f54,%f20          ! (4_0) dtmp0 = y_hi0 * y_hi0;
1717         and     %o4,%l0,%o4             ! (6_0) j0 &= 0x7ff00000;
1718         add     %i5,stridez,%i5         ! pz += stridez
1719         faddd   %f10,%f46,%f62          ! (4_0) res0_lo = x0 + x_hi0;
1720 
1721         fmuld   %f16,%f18,%f18          ! (1_0) dtmp2 = dd * dres;
1722         sub     %l0,%o4,%g1             ! (6_0) j0 = 0x7ff00000 - j0;
1723         nop
1724         fsubd   DTWO,%f50,%f10          ! (2_0) dtmp0 = DTWO - dtmp0;
1725 .cont55b:
1726         fmuld   %f32,%f28,%f50          ! (0_0) dtmp0 = res0_hi * res0;
1727         sllx    %g1,32,%g1              ! (6_0) ll = (long long)j0 << 32;
1728         stx     %g1,[%fp+dtmp13]        ! (6_0) *(long long*)&scl0 = ll;
1729         faddd   %f60,%f54,%f46          ! (4_0) dtmp1 = y0 + y_hi0;
1730 
1731         fmuld   %f36,%f28,%f36          ! (0_0) dtmp1 = res0_lo * res0;
1732         nop
1733         nop
1734         fsubd   %f60,%f54,%f60          ! (4_0) y_lo0 = y0 - y_hi0;
1735 .cont56:
1736         fmuld   %f62,%f2,%f2            ! (4_0) res0_lo *= x_lo0;
1737         nop
1738         ldd     [%fp+dtmp11],%f62       ! (5_0) *(long long*)&scl0 = ll;
1739         faddd   %f0,%f20,%f32           ! (4_0) res0_hi += dtmp0;
1740 
1741         lda     [%i4]%asi,%f0           ! (5_0) ((float*)&x0)[0] = ((float*)px)[0];
1742         nop
1743         nop
1744         fmuld   %f52,%f10,%f10          ! (2_0) dd *= dtmp0;
1745 
1746         lda     [%i4+4]%asi,%f1         ! (5_0) ((float*)&x0)[1] = ((float*)px)[1];
1747         nop
1748         nop
1749         fsubd   DONE,%f50,%f52          ! (0_0) dtmp0 = DONE - dtmp0;
1750 
1751         fmuld   %f46,%f60,%f46          ! (4_0) dtmp1 *= y_lo0;
1752         nop
1753         lda     [%i3]%asi,%f12          ! (5_0) ((float*)&y0)[0] = ((float*)py)[0];
1754         fsubd   DTWO,%f18,%f18          ! (1_0) dtmp2 = DTWO - dtmp2;
1755 
1756         nop
1757         add     %i1,stridex,%i4         ! px += stridex
1758         lda     [%i3+4]%asi,%f13        ! (5_0) ((float*)&y0)[1] = ((float*)py)[1];
1759         bn,pn   %icc,.exit
1760 
1761         fmuld   %f10,%f14,%f50          ! (2_0) dtmp1 = dd * dres;
1762         add     %i4,stridex,%i1         ! px += stridex
1763         ld      [%fp+ftmp0],%o2         ! (3_0) iarr = ((int*)&dres)[0];
1764         fand    %f22,DA1,%f54           ! (3_0) dexp0 = vis_fand(dres,DA1);
1765 
1766         fmuld   %f0,%f62,%f60           ! (5_0) x0 *= scl0;
1767         nop
1768         ldd     [%fp+dtmp14],%f0        ! (6_1) *(long long*)&scl0 = ll;
1769         fsubd   %f52,%f36,%f20          ! (0_0) dtmp0 -= dtmp1;
1770 
1771         fmuld   %f12,%f62,%f52          ! (5_0) y0 *= scl0;
1772         sra     %o2,11,%i3              ! (3_0) iarr >>= 11;
1773         nop
1774         faddd   %f2,%f46,%f36           ! (4_0) res0_lo += dtmp1;
1775 
1776         and     %i3,0x1fc,%i3           ! (3_0) iarr &= 0x1fc;
1777         nop
1778         bn,pn   %icc,.exit
1779         fmuld   %f16,%f18,%f16          ! (1_0) dres = dd * dtmp2;
1780 
1781         fsqrtd  %f48,%f18               ! (7_1) res0 = sqrt ( res0 );
1782         add     %i3,TBL,%o4             ! (3_0) (char*)dll1 + iarr
1783         lda     [%i4]0x82,%o1           ! (7_0) hx0 = *(int*)px;
1784         fsubd   DTWO,%f50,%f46          ! (2_0) dtmp1 = DTWO - dtmp1;
1785 
1786         fmuld   %f20,%f26,%f48          ! (0_0) dtmp0 *= dres;
1787         add     %i0,stridey,%i3         ! py += stridey
1788         ld      [%o4],%f20              ! (3_0) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1789         faddd   %f60,D2ON36,%f50        ! (5_0) x_hi0 = x0 + D2ON36;
1790 
1791         nop
1792         add     %i3,stridey,%i0         ! py += stridey
1793         lda     [%i3]0x82,%o4           ! (7_0) hy0 = *(int*)py;
1794         faddd   %f52,D2ON36,%f12        ! (5_0) y_hi0 = y0 + D2ON36;
1795 
1796         fmuld   %f0,%f24,%f2            ! (6_1) res0 = scl0 * res0;
1797         and     %o1,_0x7fffffff,%o7     ! (7_0) hx0 &= 0x7fffffff;
1798         nop
1799         faddd   %f32,%f36,%f24          ! (4_0) dres = res0_hi + res0_lo;
1800 
1801         fmuld   %f10,%f46,%f26          ! (2_0) dd *= dtmp1;
1802         cmp     %o7,_0x7ff00000         ! (7_0) hx0 ? 0x7ff00000
1803         st      %f24,[%fp+ftmp0]        ! (4_0) iarr = ((int*)&dres)[0];
1804         fpsub32 %f20,%f54,%f10          ! (3_0) dd = vis_fpsub32(dtmp0, dexp0);
1805 
1806         and     %o4,_0x7fffffff,%l7     ! (7_0) hy0 &= 0x7fffffff;
1807         st      %f2,[%i5]               ! (6_1) ((float*)pz)[0] = ((float*)&res0)[0];
1808         bge,pn  %icc,.update57          ! (7_0) if ( hx0 >= 0x7ff00000 )
1809         fsubd   %f50,D2ON36,%f20        ! (5_0) x_hi0 -= D2ON36;
1810 
1811         sub     %l7,%o7,%o1             ! (7_0) diff0 = hy0 - hx0;
1812         cmp     %l7,_0x7ff00000         ! (7_0) hy0 ? 0x7ff00000
1813         bge,pn  %icc,.update58          ! (7_0) if ( hy0 >= 0x7ff00000 )
1814         fsubd   %f12,D2ON36,%f54        ! (5_0) y_hi0 -= D2ON36;
1815 
1816         fmuld   %f10,%f22,%f50          ! (3_0) dtmp0 = dd * dres;
1817         sra     %o1,31,%o3              ! (7_0) j0 = diff0 >> 31;
1818         st      %f3,[%i5+4]             ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1];
1819         faddd   %f28,%f48,%f48          ! (0_0) res0 += dtmp0;
1820 
1821         and     %o1,%o3,%o1             ! (7_0) j0 &= diff0;
1822         cmp     %o7,_0x00100000         ! (7_0) hx0 ? 0x00100000
1823         bl,pn   %icc,.update59          ! (7_0) if ( hx0 < 0x00100000 )
1824         fand    %f16,DA0,%f28           ! (1_0) res0 = vis_fand(dres,DA0);
1825 .cont59a:
1826         fmuld   %f20,%f20,%f0           ! (5_0) res0_hi = x_hi0 * x_hi0;
1827         sub     %l7,%o1,%o4             ! (7_0) j0 = hy0 - j0;
1828         stx     %g1,[%fp+dtmp14]        ! (6_0) *(long long*)&scl0 = ll;
1829         fsubd   %f60,%f20,%f2           ! (5_0) x_lo0 = x0 - x_hi0;
1830 
1831         fmuld   %f54,%f54,%f46          ! (5_0) dtmp0 = y_hi0 * y_hi0;
1832         and     %o4,%l0,%o4             ! (7_0) j0 &= 0x7ff00000;
1833         add     %i5,stridez,%i5         ! pz += stridez
1834         faddd   %f60,%f20,%f62          ! (5_0) res0_lo = x0 + x_hi0;
1835 
1836         fmuld   %f26,%f14,%f14          ! (2_0) dtmp2 = dd * dres;
1837         sub     %l0,%o4,%g1             ! (7_0) j0 = 0x7ff00000 - j0;
1838         nop
1839         fsubd   DTWO,%f50,%f20          ! (3_0) dtmp0 = DTWO - dtmp0;
1840 .cont59b:
1841         fmuld   %f42,%f28,%f60          ! (1_0) dtmp0 = res0_hi * res0;
1842         sllx    %g1,32,%g1              ! (7_0) ll = (long long)j0 << 32;
1843         stx     %g1,[%fp+dtmp15]        ! (7_0) *(long long*)&scl0 = ll;
1844         faddd   %f52,%f54,%f50          ! (5_0) dtmp1 = y0 + y_hi0;
1845 
1846         fmuld   %f34,%f28,%f34          ! (1_0) dtmp1 = res0_lo * res0;
1847         nop
1848         nop
1849         fsubd   %f52,%f54,%f54          ! (5_0) y_lo0 = y0 - y_hi0;
1850 .cont60:
1851         fmuld   %f62,%f2,%f2            ! (5_0) res0_lo *= x_lo0;
1852         nop
1853         ldd     [%fp+dtmp13],%f62       ! (6_0) *(long long*)&scl0 = ll;
1854         faddd   %f0,%f46,%f42           ! (5_0) res0_hi += dtmp0;
1855 
1856         fmuld   %f10,%f20,%f52          ! (3_0) dd *= dtmp0;
1857         nop
1858         lda     [%i2]%asi,%f10          ! (6_0) ((float*)&x0)[0] = ((float*)px)[0];
1859         bn,pn   %icc,.exit
1860 
1861         lda     [%i2+4]%asi,%f11        ! (6_0) ((float*)&x0)[1] = ((float*)px)[1];
1862         nop
1863         nop
1864         fsubd   DONE,%f60,%f60          ! (1_0) dtmp0 = DONE - dtmp0;
1865 
1866         fmuld   %f50,%f54,%f46          ! (5_0) dtmp1 *= y_lo0;
1867         nop
1868         lda     [%o0]%asi,%f12          ! (6_0) ((float*)&y0)[0] = ((float*)py)[0];
1869         fsubd   DTWO,%f14,%f14          ! (2_0) dtmp2 = DTWO - dtmp2;
1870 
1871         nop
1872         nop
1873         lda     [%o0+4]%asi,%f13        ! (6_0) ((float*)&y0)[1] = ((float*)py)[1];
1874         bn,pn   %icc,.exit
1875 
1876         fmuld   %f52,%f22,%f50          ! (3_0) dtmp1 = dd * dres;
1877         nop
1878         ld      [%fp+ftmp0],%o2         ! (4_0) iarr = ((int*)&dres)[0];
1879         fand    %f24,DA1,%f54           ! (4_0) dexp0 = vis_fand(dres,DA1);
1880 
1881         fmuld   %f10,%f62,%f10          ! (6_0) x0 *= scl0;
1882         nop
1883         ldd     [%fp+dtmp0],%f0         ! (7_1) *(long long*)&scl0 = ll;
1884         fsubd   %f60,%f34,%f20          ! (1_0) dtmp0 -= dtmp1;
1885 
1886         fmuld   %f12,%f62,%f60          ! (6_0) y0 *= scl0;
1887         sra     %o2,11,%o4              ! (4_0) iarr >>= 11;
1888         nop
1889         faddd   %f2,%f46,%f34           ! (5_0) res0_lo += dtmp1;
1890 
1891         and     %o4,0x1fc,%o4           ! (4_0) iarr &= 0x1fc;
1892         subcc   counter,8,counter       ! counter -= 8;
1893         bpos,pt %icc,.main_loop
1894         fmuld   %f26,%f14,%f26          ! (2_0) dres = dd * dtmp2;
1895 
1896         add     counter,8,counter
1897 
1898 .tail:
1899         subcc   counter,1,counter
1900         bneg    .begin
1901         nop
1902 
1903         fsqrtd  %f48,%f14               ! (0_1) res0 = sqrt ( res0 );
1904         add     %o4,TBL,%o4             ! (4_1) (char*)dll1 + iarr
1905         fsubd   DTWO,%f50,%f46          ! (3_1) dtmp1 = DTWO - dtmp1;
1906 
1907         fmuld   %f20,%f16,%f48          ! (1_1) dtmp0 *= dres;
1908         ld      [%o4],%f20              ! (4_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1909 
1910         fmuld   %f0,%f18,%f0            ! (7_2) res0 = scl0 * res0;
1911         st      %f0,[%i5]               ! (7_2) ((float*)pz)[0] = ((float*)&res0)[0];
1912         faddd   %f42,%f34,%f16          ! (5_1) dres = res0_hi + res0_lo;
1913 
1914         subcc   counter,1,counter
1915         st      %f1,[%i5+4]             ! (7_2) ((float*)pz)[1] = ((float*)&res0)[1];
1916         bneg    .begin
1917         add     %i5,stridez,%i5         ! pz += stridez
1918 
1919         fmuld   %f52,%f46,%f18          ! (3_1) dd *= dtmp1;
1920         st      %f16,[%fp+ftmp0]        ! (5_1) iarr = ((int*)&dres)[0];
1921         fpsub32 %f20,%f54,%f54          ! (4_1) dd = vis_fpsub32(dtmp0, dexp0);
1922 
1923         fmuld   %f54,%f24,%f50          ! (4_1) dtmp0 = dd * dres;
1924         faddd   %f28,%f48,%f52          ! (1_1) res0 += dtmp0;
1925 
1926 
1927         fand    %f26,DA0,%f48           ! (2_1) res0 = vis_fand(dres,DA0);
1928 
1929         fmuld   %f18,%f22,%f22          ! (3_1) dtmp2 = dd * dres;
1930         fsubd   DTWO,%f50,%f20          ! (4_1) dtmp0 = DTWO - dtmp0;
1931 
1932         fmuld   %f30,%f48,%f12          ! (2_1) dtmp0 = res0_hi * res0;
1933 
1934         fmuld   %f40,%f48,%f40          ! (2_1) dtmp1 = res0_lo * res0;
1935 
1936         fmuld   %f54,%f20,%f54          ! (4_1) dd *= dtmp0;
1937 
1938         fsubd   DONE,%f12,%f60          ! (2_1) dtmp0 = DONE - dtmp0;
1939 
1940         fsubd   DTWO,%f22,%f22          ! (3_1) dtmp2 = DTWO - dtmp2;
1941 
1942         fmuld   %f54,%f24,%f50          ! (4_1) dtmp1 = dd * dres;
1943         ld      [%fp+ftmp0],%o2         ! (5_1) iarr = ((int*)&dres)[0];
1944         fand    %f16,DA1,%f2            ! (5_1) dexp0 = vis_fand(dres,DA1);
1945 
1946         ldd     [%fp+dtmp2],%f0         ! (0_1) *(long long*)&scl0 = ll;
1947         fsubd   %f60,%f40,%f20          ! (2_1) dtmp0 -= dtmp1;
1948 
1949         sra     %o2,11,%i3              ! (5_1) iarr >>= 11;
1950 
1951         and     %i3,0x1fc,%i3           ! (5_1) iarr &= 0x1fc;
1952         fmuld   %f18,%f22,%f28          ! (3_1) dres = dd * dtmp2;
1953 
1954         fsqrtd  %f52,%f22               ! (1_1) res0 = sqrt ( res0 );
1955         add     %i3,TBL,%g1             ! (5_1) (char*)dll1 + iarr
1956         fsubd   DTWO,%f50,%f62          ! (4_1) dtmp1 = DTWO - dtmp1;
1957 
1958         fmuld   %f20,%f26,%f52          ! (2_1) dtmp0 *= dres;
1959         ld      [%g1],%f26              ! (5_1) dtmp0 = ((double*)((char*)dll1 + iarr))[0];
1960 
1961         fmuld   %f0,%f14,%f0            ! (0_1) res0 = scl0 * res0;
1962 
1963         fmuld   %f54,%f62,%f14          ! (4_1) dd *= dtmp1;
1964         fpsub32 %f26,%f2,%f26           ! (5_1) dd = vis_fpsub32(dtmp0, dexp0);
1965 
1966         st      %f0,[%i5]               ! (0_1) ((float*)pz)[0] = ((float*)&res0)[0];
1967 
1968         fmuld   %f26,%f16,%f50          ! (5_1) dtmp0 = dd * dres;
1969         st      %f1,[%i5+4]             ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1];
1970         faddd   %f48,%f52,%f52          ! (2_1) res0 += dtmp0;
1971 
1972         subcc   counter,1,counter
1973         bneg    .begin
1974         add     %i5,stridez,%i5         ! pz += stridez
1975 
1976         fand    %f28,DA0,%f48           ! (3_1) res0 = vis_fand(dres,DA0);
1977 
1978         fmuld   %f44,%f48,%f10          ! (3_1) dtmp0 = res0_hi * res0;
1979         fsubd   DTWO,%f50,%f20          ! (5_1) dtmp0 = DTWO - dtmp0;
1980 
1981         fmuld   %f14,%f24,%f24          ! (4_1) dtmp2 = dd * dres;
1982 
1983         fmuld   %f38,%f48,%f38          ! (3_1) dtmp1 = res0_lo * res0;
1984 
1985         fsubd   DONE,%f10,%f60          ! (3_1) dtmp0 = DONE - dtmp0;
1986         fmuld   %f26,%f20,%f54          ! (5_1) dd *= dtmp0;
1987 
1988         fsubd   DTWO,%f24,%f24          ! (4_1) dtmp2 = DTWO - dtmp2;
1989 
1990         fmuld   %f54,%f16,%f46          ! (5_1) dtmp1 = dd * dres;
1991 
1992         ldd     [%fp+dtmp4],%f50        ! (1_1) *(long long*)&scl0 = ll;
1993         fsubd   %f60,%f38,%f20          ! (3_1) dtmp0 -= dtmp1;
1994 
1995         fmuld   %f14,%f24,%f26          ! (4_1) dres = dd * dtmp2;
1996 
1997         fsqrtd  %f52,%f24               ! (2_1) res0 = sqrt ( res0 );
1998         fsubd   DTWO,%f46,%f62          ! (5_1) dtmp1 = DTWO - dtmp1;
1999 
2000         fmuld   %f20,%f28,%f52          ! (3_1) dtmp0 *= dres;
2001 
2002         fmuld   %f50,%f22,%f0           ! (1_1) res0 = scl0 * res0;
2003 
2004         fmuld   %f54,%f62,%f22          ! (5_1) dd *= dtmp1;
2005 
2006         st      %f0,[%i5]               ! (1_1) ((float*)pz)[0] = ((float*)&res0)[0];
2007 
2008         subcc   counter,1,counter
2009         st      %f1,[%i5+4]             ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1];
2010         bneg    .begin
2011         add     %i5,stridez,%i5         ! pz += stridez
2012 
2013         faddd   %f48,%f52,%f52          ! (3_1) res0 += dtmp0;
2014 
2015         fand    %f26,DA0,%f48           ! (4_1) res0 = vis_fand(dres,DA0);
2016 
2017         fmuld   %f32,%f48,%f10          ! (4_1) dtmp0 = res0_hi * res0;
2018 
2019         fmuld   %f22,%f16,%f16          ! (5_1) dtmp2 = dd * dres;
2020 
2021         fmuld   %f36,%f48,%f36          ! (4_1) dtmp1 = res0_lo * res0;
2022 
2023         fsubd   DONE,%f10,%f60          ! (4_1) dtmp0 = DONE - dtmp0;
2024 
2025         fsubd   DTWO,%f16,%f16          ! (5_1) dtmp2 = DTWO - dtmp2;
2026 
2027         ldd     [%fp+dtmp6],%f50        ! (2_1) *(long long*)&scl0 = ll;
2028         fsubd   %f60,%f36,%f20          ! (4_1) dtmp0 -= dtmp1;
2029 
2030         fmuld   %f22,%f16,%f28          ! (5_1) dres = dd * dtmp2;
2031 
2032         fsqrtd  %f52,%f16               ! (3_1) res0 = sqrt ( res0 );
2033 
2034         fmuld   %f20,%f26,%f52          ! (4_1) dtmp0 *= dres;
2035 
2036         fmuld   %f50,%f24,%f0           ! (2_1) res0 = scl0 * res0;
2037 
2038         st      %f0,[%i5]               ! (2_1) ((float*)pz)[0] = ((float*)&res0)[0];
2039 
2040         st      %f1,[%i5+4]             ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1];
2041         faddd   %f48,%f52,%f52          ! (4_1) res0 += dtmp0;
2042 
2043         subcc   counter,1,counter
2044         bneg    .begin
2045         add     %i5,stridez,%i5         ! pz += stridez
2046 
2047         fand    %f28,DA0,%f48           ! (5_1) res0 = vis_fand(dres,DA0);
2048 
2049         fmuld   %f42,%f48,%f10          ! (5_1) dtmp0 = res0_hi * res0;
2050 
2051         fmuld   %f34,%f48,%f34          ! (5_1) dtmp1 = res0_lo * res0;
2052 
2053         fsubd   DONE,%f10,%f60          ! (5_1) dtmp0 = DONE - dtmp0;
2054 
2055         ldd     [%fp+dtmp8],%f18        ! (3_1) *(long long*)&scl0 = ll;
2056         fsubd   %f60,%f34,%f46          ! (5_1) dtmp0 -= dtmp1;
2057 
2058         fsqrtd  %f52,%f24               ! (4_1) res0 = sqrt ( res0 );
2059 
2060         fmuld   %f46,%f28,%f52          ! (5_1) dtmp0 -= dtmp1;
2061 
2062         fmuld   %f18,%f16,%f0           ! (3_1) res0 = scl0 * res0;
2063         st      %f0,[%i5]               ! (3_1) ((float*)pz)[0] = ((float*)&res0)[0];
2064         st      %f1,[%i5+4]             ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
2065         faddd   %f48,%f52,%f52          ! (5_1) res0 += dtmp0;
2066 
2067         subcc   counter,1,counter
2068         bneg    .begin
2069         add     %i5,stridez,%i5         ! pz += stridez
2070 
2071         ldd     [%fp+dtmp10],%f14       ! (4_1) *(long long*)&scl0 = ll;
2072 
2073         fsqrtd  %f52,%f16               ! (5_1) res0 = sqrt ( res0 );
2074 
2075         fmuld   %f14,%f24,%f0           ! (4_1) res0 = scl0 * res0
2076         st      %f0,[%i5]               ! (4_1) ((float*)pz)[0] = ((float*)&res0)[0];
2077         st      %f1,[%i5+4]             ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
2078 
2079         subcc   counter,1,counter
2080         bneg    .begin
2081         add     %i5,stridez,%i5         ! pz += stridez
2082 
2083         ldd     [%fp+dtmp12],%f22       ! (5_1) *(long long*)&scl0 = ll;
2084 
2085         fmuld   %f22,%f16,%f0           ! (5_1) res0 = scl0 * res0;
2086         st      %f0,[%i5]               ! (5_1) ((float*)pz)[0] = ((float*)&res0)[0];
2087         st      %f1,[%i5+4]             ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
2088 
2089         ba      .begin
2090         add     %i5,stridez,%i5
2091 
2092         .align  16
2093 .spec0:
2094         cmp     %o7,_0x7ff00000         ! hx0 ? 0x7ff00000
2095         bne     1f                      ! if ( hx0 != 0x7ff00000 )
2096         ld      [%i4+4],%i2             ! lx = ((int*)px)[1];
2097 
2098         cmp     %i2,0                   ! lx ? 0
2099         be      3f                      ! if ( lx == 0 )
2100         nop
2101 1:
2102         cmp     %l7,_0x7ff00000         ! hy0 ? 0x7ff00000
2103         bne     2f                      ! if ( hy0 != 0x7ff00000 )
2104         ld      [%i3+4],%o2             ! ly = ((int*)py)[1];
2105 
2106         cmp     %o2,0                   ! ly ? 0
2107         be      3f                      ! if ( ly == 0 )
2108 2:
2109         ld      [%i4],%f0               ! ((float*)&x0)[0] = ((float*)px)[0];
2110         ld      [%i4+4],%f1             ! ((float*)&x0)[1] = ((float*)px)[1];
2111 
2112         ld      [%i3],%f2               ! ((float*)&y0)[0] = ((float*)py)[0];
2113         add     %i4,stridex,%i4         ! px += stridex
2114         ld      [%i3+4],%f3             ! ((float*)&y0)[1] = ((float*)py)[1];
2115 
2116         fabsd   %f0,%f0
2117 
2118         fabsd   %f2,%f2
2119 
2120         fmuld   %f0,%f2,%f0             ! res0 = fabs(x0) * fabs(y0);
2121         add     %i3,stridey,%i3         ! py += stridey;
2122         st      %f0,[%i5]               ! ((float*)pz)[0] = ((float*)&res0)[0];
2123 
2124         st      %f1,[%i5+4]             ! ((float*)pz)[1] = ((float*)&res0)[1];
2125         add     %i5,stridez,%i5         ! pz += stridez
2126         ba      .begin1
2127         sub     counter,1,counter
2128 3:
2129         add     %i4,stridex,%i4         ! px += stridex
2130         add     %i3,stridey,%i3         ! py += stridey
2131         st      %g0,[%i5]               ! ((int*)pz)[0] = 0;
2132 
2133         add     %i5,stridez,%i5         ! pz += stridez;
2134         st      %g0,[%i5+4]             ! ((int*)pz)[1] = 0;
2135         ba      .begin1
2136         sub     counter,1,counter
2137 
2138         .align  16
2139 .spec1:
2140         and     %o1,%o3,%o1             ! (7_0) j0 &= diff0;
2141 
2142         cmp     %l7,_0x00100000         ! (7_0) hy0 ? 0x00100000
2143         bge,pn  %icc,.cont_spec0        ! (7_0) if ( hy0 < 0x00100000 )
2144 
2145         ld      [%i4+4],%i2             ! lx = ((int*)px)[1];
2146         or      %o7,%l7,%g5             ! ii = hx0 | hy0;
2147         fzero   %f0
2148 
2149         ld      [%i3+4],%o2             ! ly = ((int*)py)[1];
2150         or      %i2,%g5,%g5             ! ii |= lx;
2151 
2152         orcc    %o2,%g5,%g5             ! ii |= ly;
2153         bnz,a,pn        %icc,1f         ! if ( ii != 0 )
2154         sethi   %hi(0x00080000),%i2
2155 
2156         fdivd   DONE,%f0,%f0            ! res0 = 1.0 / 0.0;
2157 
2158         st      %f0,[%i5]               ! ((float*)pz)[0] = ((float*)&res0)[0];
2159 
2160         add     %i4,stridex,%i4         ! px += stridex;
2161         add     %i3,stridey,%i3         ! py += stridey;
2162         st      %f1,[%i5+4]             ! ((float*)pz)[1] = ((float*)&res0)[1];
2163 
2164         add     %i5,stridez,%i5         ! pz += stridez;
2165         ba      .begin1
2166         sub     counter,1,counter
2167 1:
2168         ld      [%i4],%f0               ! ((float*)&x0)[0] = ((float*)px)[0];
2169 
2170         ld      [%i4+4],%f1             ! ((float*)&x0)[1] = ((float*)px)[1];
2171 
2172         ld      [%i3],%f2               ! ((float*)&y0)[0] = ((float*)py)[0];
2173 
2174         fabsd   %f0,%f0                 ! x0 = fabs(x0);
2175         ld      [%i3+4],%f3             ! ((float*)&y0)[1] = ((float*)py)[1];
2176 
2177         ldd     [TBL+TBL_SHIFT+64],%f12 ! ((long long*)&dtmp0)[0] = 0x0007ffffffffffffULL;
2178         add     %fp,dtmp2,%i4
2179         add     %fp,dtmp3,%i3
2180 
2181         fabsd   %f2,%f2                 ! y0 = fabs(y0);
2182         ldd     [TBL+TBL_SHIFT+56],%f10 ! D2ON51
2183 
2184         ldx     [TBL+TBL_SHIFT+48],%g5  ! D2ONM52
2185         cmp     %o7,%i2                 ! hx0 ? 0x00080000
2186         bl,a    1f                      ! if ( hx0 < 0x00080000 )
2187         fxtod   %f0,%f0                 ! x0 = *(long long*)&x0;
2188 
2189         fand    %f0,%f12,%f0            ! x0 = vis_fand(x0, dtmp0);
2190         fxtod   %f0,%f0                 ! x0 = *(long long*)&x0;
2191         faddd   %f0,%f10,%f0            ! x0 += D2ON51;
2192 1:
2193         std     %f0,[%i4]
2194 
2195         ldx     [TBL+TBL_SHIFT+40],%g1  ! D2ON1022
2196         cmp     %l7,%i2                 ! hy0 ? 0x00080000
2197         bl,a    1f                      ! if ( hy0 < 0x00080000 )
2198         fxtod   %f2,%f2                 ! y0 = *(long long*)&y0;
2199 
2200         fand    %f2,%f12,%f2            ! y0 = vis_fand(y0, dtmp0);
2201         fxtod   %f2,%f2                 ! y0 = *(long long*)&y0;
2202         faddd   %f2,%f10,%f2            ! y0 += D2ON51;
2203 1:
2204         std     %f2,[%i3]
2205 
2206         stx     %g5,[%fp+dtmp15]        ! D2ONM52
2207 
2208         ba      .cont_spec1
2209         stx     %g1,[%fp+dtmp0]         ! D2ON1022
2210 
2211         .align  16
2212 .update0:
2213         cmp     counter,1
2214         ble     1f
2215         nop
2216 
2217         sub     counter,1,counter
2218         st      counter,[%fp+tmp_counter]
2219 
2220         stx     %i2,[%fp+tmp_px]
2221 
2222         stx     %o0,[%fp+tmp_py]
2223 
2224         mov     1,counter
2225 1:
2226         sethi   %hi(0x3ff00000),%o4
2227         add     TBL,TBL_SHIFT+24,%i2
2228         ba      .cont1
2229         add     TBL,TBL_SHIFT+24,%o0
2230 
2231         .align  16
2232 .update1:
2233         cmp     %l7,_0x00100000         ! (0_0) hy0 ? 0x00100000
2234         bge,pn  %icc,.cont0             ! (0_0) if ( hy0 < 0x00100000 )
2235 
2236         cmp     counter,1
2237         ble,a   1f
2238         nop
2239 
2240         sub     counter,1,counter
2241         st      counter,[%fp+tmp_counter]
2242 
2243         stx     %i2,[%fp+tmp_px]
2244 
2245         mov     1,counter
2246         stx     %o0,[%fp+tmp_py]
2247 1:
2248         sethi   %hi(0x3ff00000),%o4
2249         add     TBL,TBL_SHIFT+24,%i2
2250         ba      .cont1
2251         add     TBL,TBL_SHIFT+24,%o0
2252 
2253         .align  16
2254 .update2:
2255         cmp     counter,2
2256         ble     1f
2257         nop
2258 
2259         sub     counter,2,counter
2260         st      counter,[%fp+tmp_counter]
2261 
2262         stx     %i4,[%fp+tmp_px]
2263 
2264         stx     %i3,[%fp+tmp_py]
2265 
2266         mov     2,counter
2267 1:
2268         fsubd   %f50,D2ON36,%f54        ! (7_1) y_hi0 -= D2ON36;
2269 
2270         fmuld   %f20,%f20,%f2           ! (7_1) res0_hi = x_hi0 * x_hi0;
2271         fsubd   %f10,%f20,%f0           ! (7_1) x_lo0 = x0 - x_hi0;
2272 
2273         fmuld   %f54,%f54,%f46          ! (7_1) dtmp0 = y_hi0 * y_hi0;
2274         faddd   %f10,%f20,%f62          ! (7_1) res0_lo = x0 + x_hi0;
2275 
2276         sethi   %hi(0x3ff00000),%o4
2277         add     TBL,TBL_SHIFT+24,%i4
2278         ba      .cont4
2279         add     TBL,TBL_SHIFT+24,%i3
2280 
2281         .align  16
2282 .update3:
2283         cmp     counter,2
2284         ble     1f
2285         nop
2286 
2287         sub     counter,2,counter
2288         st      counter,[%fp+tmp_counter]
2289 
2290         stx     %i4,[%fp+tmp_px]
2291 
2292         stx     %i3,[%fp+tmp_py]
2293 
2294         mov     2,counter
2295 1:
2296         fmuld   %f20,%f20,%f2           ! (7_1) res0_hi = x_hi0 * x_hi0;
2297         fsubd   %f10,%f20,%f0           ! (7_1) x_lo0 = x0 - x_hi0;
2298 
2299         fmuld   %f54,%f54,%f46          ! (7_1) dtmp0 = y_hi0 * y_hi0;
2300         faddd   %f10,%f20,%f62          ! (7_1) res0_lo = x0 + x_hi0;
2301 
2302         sethi   %hi(0x3ff00000),%o4
2303         add     TBL,TBL_SHIFT+24,%i4
2304         ba      .cont4
2305         add     TBL,TBL_SHIFT+24,%i3
2306 
2307         .align  16
2308 .update4:
2309         cmp     %l7,_0x00100000         ! (0_0) hy0 ? 0x00100000
2310         bge,a,pn        %icc,.cont4     ! (0_0) if ( hy0 < 0x00100000 )
2311         sub     %l0,%o4,%o4             ! (1_0) j0 = 0x7ff00000 - j0;
2312 
2313         cmp     counter,2
2314         ble,a   1f
2315         nop
2316 
2317         sub     counter,2,counter
2318         st      counter,[%fp+tmp_counter]
2319 
2320         stx     %i4,[%fp+tmp_px]
2321 
2322         mov     2,counter
2323         stx     %i3,[%fp+tmp_py]
2324 1:
2325         sethi   %hi(0x3ff00000),%o4
2326         add     TBL,TBL_SHIFT+24,%i4
2327         ba      .cont4
2328         add     TBL,TBL_SHIFT+24,%i3
2329 
2330         .align  16
2331 .update5:
2332         cmp     counter,3
2333         ble     1f
2334         nop
2335 
2336         sub     counter,3,counter
2337         st      counter,[%fp+tmp_counter]
2338 
2339         stx     %i2,[%fp+tmp_px]
2340 
2341         stx     %o0,[%fp+tmp_py]
2342 
2343         mov     3,counter
2344 1:
2345         st      %f14,[%fp+ftmp0]        ! (7_1) iarr = ((int*)&dres)[0];
2346         fsubd   %f46,D2ON36,%f20        ! (0_0) x_hi0 -= D2ON36;
2347 
2348         fsubd   %f12,D2ON36,%f54        ! (0_0) y_hi0 -= D2ON36;
2349 
2350         fmuld   %f20,%f20,%f2           ! (0_0) res0_hi = x_hi0 * x_hi0;
2351         fsubd   %f10,%f20,%f0           ! (0_0) x_lo0 = x0 - x_hi0;
2352 
2353         fmuld   %f54,%f54,%f46          ! (0_0) dtmp0 = y_hi0 * y_hi0;
2354         faddd   %f10,%f20,%f62          ! (0_0) res0_lo = x0 + x_hi0;
2355 
2356         sethi   %hi(0x3ff00000),%g1
2357         add     TBL,TBL_SHIFT+24,%i2
2358 
2359         sllx    %g1,32,%g1
2360         ba      .cont8
2361         add     TBL,TBL_SHIFT+24,%o0
2362 
2363         .align  16
2364 .update6:
2365         cmp     counter,3
2366         ble     1f
2367         nop
2368 
2369         sub     counter,3,counter
2370         st      counter,[%fp+tmp_counter]
2371 
2372         stx     %i2,[%fp+tmp_px]
2373 
2374         stx     %o0,[%fp+tmp_py]
2375 
2376         mov     3,counter
2377 1:
2378         fmuld   %f20,%f20,%f2           ! (0_0) res0_hi = x_hi0 * x_hi0;
2379         fsubd   %f10,%f20,%f0           ! (0_0) x_lo0 = x0 - x_hi0;
2380 
2381         fmuld   %f54,%f54,%f46          ! (0_0) dtmp0 = y_hi0 * y_hi0;
2382         faddd   %f10,%f20,%f62          ! (0_0) res0_lo = x0 + x_hi0;
2383 
2384         sethi   %hi(0x3ff00000),%g1
2385         add     TBL,TBL_SHIFT+24,%i2
2386 
2387         sllx    %g1,32,%g1
2388         ba      .cont8
2389         add     TBL,TBL_SHIFT+24,%o0
2390 
2391         .align  16
2392 .update7:
2393         cmp     %l7,_0x00100000         ! (0_0) hy0 ? 0x00100000
2394         bge,pn  %icc,.cont7             ! (0_0) if ( hy0 < 0x00100000 )
2395 
2396         cmp     counter,3
2397         ble,a   1f
2398         nop
2399 
2400         sub     counter,3,counter
2401         st      counter,[%fp+tmp_counter]
2402 
2403         stx     %i2,[%fp+tmp_px]
2404 
2405         mov     3,counter
2406         stx     %o0,[%fp+tmp_py]
2407 1:
2408         sethi   %hi(0x3ff00000),%g1
2409         add     TBL,TBL_SHIFT+24,%i2
2410 
2411         sllx    %g1,32,%g1
2412         ba      .cont8
2413         add     TBL,TBL_SHIFT+24,%o0
2414 
2415         .align  16
2416 .update9:
2417         cmp     counter,4
2418         ble     1f
2419         nop
2420 
2421         sub     counter,4,counter
2422         st      counter,[%fp+tmp_counter]
2423 
2424         stx     %i4,[%fp+tmp_px]
2425 
2426         stx     %i3,[%fp+tmp_py]
2427 
2428         mov     4,counter
2429 1:
2430         st      %f22,[%fp+ftmp0]        ! (0_0) iarr = ((int*)&dres)[0];
2431         fsubd   %f46,D2ON36,%f20        ! (1_0) x_hi0 -= D2ON36;
2432 
2433         fsubd   %f12,D2ON36,%f54        ! (1_0) y_hi0 -= D2ON36;
2434 
2435         fmuld   %f26,%f14,%f50          ! (7_1) dtmp0 = dd * dres;
2436 
2437 
2438         fmuld   %f20,%f20,%f2           ! (1_0) res0_hi = x_hi0 * x_hi0;
2439         fsubd   %f10,%f20,%f0           ! (1_0) x_lo0 = x0 - x_hi0;
2440 
2441         fmuld   %f54,%f54,%f46          ! (1_0) dtmp0 = y_hi0 * y_hi0;
2442         faddd   %f10,%f20,%f62          ! (1_0) res0_lo = x0 + x_hi0;
2443 
2444         fsubd   DTWO,%f50,%f20          ! (7_1) dtmp0 = DTWO - dtmp0;
2445 
2446         sethi   %hi(0x3ff00000),%g1
2447         add     TBL,TBL_SHIFT+24,%i4
2448         ba      .cont12
2449         add     TBL,TBL_SHIFT+24,%i3
2450 
2451         .align  16
2452 .update10:
2453         cmp     counter,4
2454         ble     1f
2455         nop
2456 
2457         sub     counter,4,counter
2458         st      counter,[%fp+tmp_counter]
2459 
2460         stx     %i4,[%fp+tmp_px]
2461 
2462         stx     %i3,[%fp+tmp_py]
2463 
2464         mov     4,counter
2465 1:
2466         fmuld   %f26,%f14,%f50          ! (7_1) dtmp0 = dd * dres;
2467 
2468 
2469         fmuld   %f20,%f20,%f2           ! (1_0) res0_hi = x_hi0 * x_hi0;
2470         fsubd   %f10,%f20,%f0           ! (1_0) x_lo0 = x0 - x_hi0;
2471 
2472         fmuld   %f54,%f54,%f46          ! (1_0) dtmp0 = y_hi0 * y_hi0;
2473         faddd   %f10,%f20,%f62          ! (1_0) res0_lo = x0 + x_hi0;
2474 
2475         fsubd   DTWO,%f50,%f20          ! (7_1) dtmp0 = DTWO - dtmp0;
2476 
2477         sethi   %hi(0x3ff00000),%g1
2478         add     TBL,TBL_SHIFT+24,%i4
2479         ba      .cont12
2480         add     TBL,TBL_SHIFT+24,%i3
2481 
2482         .align  16
2483 .update11:
2484         cmp     %l7,_0x00100000         ! (0_0) hy0 ? 0x00100000
2485         bge,pn  %icc,.cont11            ! (0_0) if ( hy0 < 0x00100000 )
2486 
2487         cmp     counter,4
2488         ble,a   1f
2489         nop
2490 
2491         sub     counter,4,counter
2492         st      counter,[%fp+tmp_counter]
2493 
2494         stx     %i4,[%fp+tmp_px]
2495 
2496         mov     4,counter
2497         stx     %i3,[%fp+tmp_py]
2498 1:
2499         sethi   %hi(0x3ff00000),%g1
2500         add     TBL,TBL_SHIFT+24,%i4
2501 
2502         fsubd   DTWO,%f50,%f20          ! (7_1) dtmp0 = DTWO - dtmp0;
2503         ba      .cont12
2504         add     TBL,TBL_SHIFT+24,%i3
2505 
2506         .align  16
2507 .update13:
2508         cmp     counter,5
2509         ble     1f
2510         nop
2511 
2512         sub     counter,5,counter
2513         st      counter,[%fp+tmp_counter]
2514 
2515         stx     %i2,[%fp+tmp_px]
2516 
2517         stx     %o0,[%fp+tmp_py]
2518 
2519         mov     5,counter
2520 1:
2521         fsubd   %f46,D2ON36,%f20        ! (2_0) x_hi0 -= D2ON36;
2522 
2523         fsubd   %f50,D2ON36,%f54        ! (2_0) y_hi0 -= D2ON36;
2524 
2525         fmuld   %f28,%f22,%f50          ! (0_0) dtmp0 = dd * dres;
2526 
2527         fmuld   %f20,%f20,%f2           ! (2_0) res0_hi = x_hi0 * x_hi0;
2528         fsubd   %f10,%f20,%f0           ! (2_0) x_lo0 = x0 - x_hi0;
2529 
2530         fmuld   %f54,%f54,%f46          ! (2_0) dtmp0 = y_hi0 * y_hi0;
2531         faddd   %f10,%f20,%f62          ! (2_0) res0_lo = x0 + x_hi0;
2532 
2533         fsubd   DTWO,%f50,%f20          ! (0_0) dtmp0 = DTWO - dtmp0;
2534 
2535         sethi   %hi(0x3ff00000),%g1
2536         add     TBL,TBL_SHIFT+24,%i2
2537         ba      .cont16
2538         add     TBL,TBL_SHIFT+24,%o0
2539 
2540         .align  16
2541 .update14:
2542         cmp     counter,5
2543         ble     1f
2544         nop
2545 
2546         sub     counter,5,counter
2547         st      counter,[%fp+tmp_counter]
2548 
2549         stx     %i2,[%fp+tmp_px]
2550 
2551         stx     %o0,[%fp+tmp_py]
2552 
2553         mov     5,counter
2554 1:
2555         fmuld   %f28,%f22,%f50          ! (0_0) dtmp0 = dd * dres;
2556 
2557         fmuld   %f20,%f20,%f2           ! (2_0) res0_hi = x_hi0 * x_hi0;
2558         fsubd   %f10,%f20,%f0           ! (2_0) x_lo0 = x0 - x_hi0;
2559 
2560         fmuld   %f54,%f54,%f46          ! (2_0) dtmp0 = y_hi0 * y_hi0;
2561         faddd   %f10,%f20,%f62          ! (2_0) res0_lo = x0 + x_hi0;
2562 
2563         fsubd   DTWO,%f50,%f20          ! (0_0) dtmp0 = DTWO - dtmp0;
2564 
2565         sethi   %hi(0x3ff00000),%g1
2566         add     TBL,TBL_SHIFT+24,%i2
2567         ba      .cont16
2568         add     TBL,TBL_SHIFT+24,%o0
2569 
2570         .align  16
2571 .update15:
2572         cmp     %l7,_0x00100000         ! (0_0) hy0 ? 0x00100000
2573         bge,pn  %icc,.cont15            ! (0_0) if ( hy0 < 0x00100000 )
2574 
2575         cmp     counter,5
2576         ble,a   1f
2577         nop
2578 
2579         sub     counter,5,counter
2580         st      counter,[%fp+tmp_counter]
2581 
2582         stx     %i2,[%fp+tmp_px]
2583 
2584         mov     5,counter
2585         stx     %o0,[%fp+tmp_py]
2586 1:
2587         sethi   %hi(0x3ff00000),%g1
2588         add     TBL,TBL_SHIFT+24,%i2
2589 
2590         fsubd   DTWO,%f50,%f20          ! (0_0) dtmp0 = DTWO - dtmp0;
2591         ba      .cont16
2592         add     TBL,TBL_SHIFT+24,%o0
2593 
2594         .align  16
2595 .update17:
2596         cmp     counter,6
2597         ble     1f
2598         nop
2599 
2600         sub     counter,6,counter
2601         st      counter,[%fp+tmp_counter]
2602 
2603         stx     %i4,[%fp+tmp_px]
2604 
2605         stx     %i3,[%fp+tmp_py]
2606 
2607         mov     6,counter
2608 1:
2609         fsubd   %f50,D2ON36,%f54        ! (3_0) y_hi0 -= D2ON36;
2610 
2611         fmuld   %f26,%f18,%f50          ! (1_0) dtmp0 = dd * dres;
2612 
2613         fand    %f28,DA0,%f48           ! (7_1) res0 = vis_fand(dres,DA0);
2614 
2615         fmuld   %f20,%f20,%f2           ! (3_0) res0_hi = x_hi0 * x_hi0;
2616         fsubd   %f10,%f20,%f0           ! (3_0) x_lo0 = x0 - x_hi0;
2617 
2618         fmuld   %f54,%f54,%f46          ! (3_0) dtmp0 = y_hi0 * y_hi0;
2619         faddd   %f10,%f20,%f62          ! (3_0) res0_lo = x0 + x_hi0;
2620 
2621         fmuld   %f44,%f48,%f10          ! (7_1) dtmp0 = res0_hi * res0;
2622         fsubd   DTWO,%f50,%f20          ! (1_0) dtmp0 = DTWO - dtmp0;
2623 
2624         fmuld   %f24,%f22,%f22          ! (0_0) dtmp2 = dd * dres;
2625         faddd   %f60,%f54,%f50          ! (3_0) dtmp1 = y0 + y_hi0;
2626 
2627         fmuld   %f38,%f48,%f38          ! (7_1) dtmp1 = res0_lo * res0;
2628         fsubd   %f60,%f54,%f12          ! (3_0) y_lo0 = y0 - y_hi0;
2629 
2630         sethi   %hi(0x3ff00000),%g1
2631         add     TBL,TBL_SHIFT+24,%i4
2632 
2633         sllx    %g1,32,%g1              ! (5_0) ll = (long long)j0 << 32;
2634         stx     %g1,[%fp+dtmp11]        ! (5_0) *(long long*)&scl0 = ll;
2635         ba      .cont20
2636         add     TBL,TBL_SHIFT+24,%i3
2637 
2638         .align  16
2639 .update18:
2640         cmp     counter,6
2641         ble     1f
2642         nop
2643 
2644         sub     counter,6,counter
2645         st      counter,[%fp+tmp_counter]
2646 
2647         stx     %i4,[%fp+tmp_px]
2648 
2649         stx     %i3,[%fp+tmp_py]
2650 
2651         mov     6,counter
2652 1:
2653         fmuld   %f26,%f18,%f50          ! (1_0) dtmp0 = dd * dres;
2654 
2655         fand    %f28,DA0,%f48           ! (7_1) res0 = vis_fand(dres,DA0);
2656 
2657         fmuld   %f20,%f20,%f2           ! (3_0) res0_hi = x_hi0 * x_hi0;
2658         fsubd   %f10,%f20,%f0           ! (3_0) x_lo0 = x0 - x_hi0;
2659 
2660         fmuld   %f54,%f54,%f46          ! (3_0) dtmp0 = y_hi0 * y_hi0;
2661         faddd   %f10,%f20,%f62          ! (3_0) res0_lo = x0 + x_hi0;
2662 
2663         fmuld   %f44,%f48,%f10          ! (7_1) dtmp0 = res0_hi * res0;
2664         fsubd   DTWO,%f50,%f20          ! (1_0) dtmp0 = DTWO - dtmp0;
2665 
2666         fmuld   %f24,%f22,%f22          ! (0_0) dtmp2 = dd * dres;
2667         faddd   %f60,%f54,%f50          ! (3_0) dtmp1 = y0 + y_hi0;
2668 
2669         fmuld   %f38,%f48,%f38          ! (7_1) dtmp1 = res0_lo * res0;
2670         fsubd   %f60,%f54,%f12          ! (3_0) y_lo0 = y0 - y_hi0;
2671 
2672         sethi   %hi(0x3ff00000),%g1
2673         add     TBL,TBL_SHIFT+24,%i4
2674 
2675         sllx    %g1,32,%g1              ! (5_0) ll = (long long)j0 << 32;
2676         stx     %g1,[%fp+dtmp11]        ! (5_0) *(long long*)&scl0 = ll;
2677         ba      .cont20
2678         add     TBL,TBL_SHIFT+24,%i3
2679 
2680         .align  16
2681 .update19:
2682         cmp     %l7,_0x00100000         ! (0_0) hy0 ? 0x00100000
2683         bge,pn  %icc,.cont19a           ! (0_0) if ( hy0 < 0x00100000 )
2684 
2685         cmp     counter,6
2686         ble,a   1f
2687         nop
2688 
2689         sub     counter,6,counter
2690         st      counter,[%fp+tmp_counter]
2691 
2692         stx     %i4,[%fp+tmp_px]
2693 
2694         mov     6,counter
2695         stx     %i3,[%fp+tmp_py]
2696 1:
2697         fmuld   %f44,%f48,%f10          ! (7_1) dtmp0 = res0_hi * res0;
2698         sethi   %hi(0x3ff00000),%g1
2699         add     TBL,TBL_SHIFT+24,%i4
2700         fsubd   DTWO,%f50,%f20          ! (1_0) dtmp0 = DTWO - dtmp0;
2701 
2702         ba      .cont19b
2703         add     TBL,TBL_SHIFT+24,%i3
2704 
2705         .align  16
2706 .update21:
2707         cmp     counter,7
2708         ble     1f
2709         nop
2710 
2711         sub     counter,7,counter
2712         st      counter,[%fp+tmp_counter]
2713 
2714         stx     %i2,[%fp+tmp_px]
2715 
2716         stx     %o0,[%fp+tmp_py]
2717 
2718         mov     7,counter
2719 1:
2720         fsubd   %f50,D2ON36,%f54        ! (4_0) y_hi0 -= D2ON36;
2721 
2722         fmuld   %f52,%f14,%f50          ! (2_0) dtmp0 = dd * dres;
2723         faddd   %f48,%f28,%f48          ! (7_1) res0 += dtmp0;
2724 
2725         fand    %f26,DA0,%f28           ! (0_0) res0 = vis_fand(dres,DA0);
2726 
2727         fmuld   %f46,%f46,%f0           ! (4_0) res0_hi = x_hi0 * x_hi0;
2728         fsubd   %f10,%f46,%f2           ! (4_0) x_lo0 = x0 - x_hi0;
2729 
2730         fmuld   %f54,%f54,%f20          ! (4_0) dtmp0 = y_hi0 * y_hi0;
2731         faddd   %f10,%f46,%f62          ! (4_0) res0_lo = x0 + x_hi0;
2732 
2733         fmuld   %f16,%f18,%f18          ! (1_0) dtmp2 = dd * dres;
2734         fsubd   DTWO,%f50,%f10          ! (2_0) dtmp0 = DTWO - dtmp0;
2735 
2736         fmuld   %f32,%f28,%f50          ! (0_0) dtmp0 = res0_hi * res0;
2737         faddd   %f60,%f54,%f46          ! (4_0) dtmp1 = y0 + y_hi0;
2738 
2739         fmuld   %f36,%f28,%f36          ! (0_0) dtmp1 = res0_lo * res0;
2740         sethi   %hi(0x3ff00000),%g1
2741         add     TBL,TBL_SHIFT+24,%i2
2742         fsubd   %f60,%f54,%f60          ! (4_0) y_lo0 = y0 - y_hi0;
2743 
2744         sllx    %g1,32,%g1              ! (6_0) ll = (long long)j0 << 32;
2745         stx     %g1,[%fp+dtmp13]        ! (6_0) *(long long*)&scl0 = ll;
2746         ba      .cont24
2747         add     TBL,TBL_SHIFT+24,%o0
2748 
2749         .align  16
2750 .update22:
2751         cmp     counter,7
2752         ble     1f
2753         nop
2754 
2755         sub     counter,7,counter
2756         st      counter,[%fp+tmp_counter]
2757 
2758         stx     %i2,[%fp+tmp_px]
2759 
2760         stx     %o0,[%fp+tmp_py]
2761 
2762         mov     7,counter
2763 1:
2764         fmuld   %f52,%f14,%f50          ! (2_0) dtmp0 = dd * dres;
2765         faddd   %f48,%f28,%f48          ! (7_1) res0 += dtmp0;
2766 
2767         fand    %f26,DA0,%f28           ! (0_0) res0 = vis_fand(dres,DA0);
2768 
2769         fmuld   %f46,%f46,%f0           ! (4_0) res0_hi = x_hi0 * x_hi0;
2770         fsubd   %f10,%f46,%f2           ! (4_0) x_lo0 = x0 - x_hi0;
2771 
2772         fmuld   %f54,%f54,%f20          ! (4_0) dtmp0 = y_hi0 * y_hi0;
2773         faddd   %f10,%f46,%f62          ! (4_0) res0_lo = x0 + x_hi0;
2774 
2775         fmuld   %f16,%f18,%f18          ! (1_0) dtmp2 = dd * dres;
2776         fsubd   DTWO,%f50,%f10          ! (2_0) dtmp0 = DTWO - dtmp0;
2777 
2778         fmuld   %f32,%f28,%f50          ! (0_0) dtmp0 = res0_hi * res0;
2779         faddd   %f60,%f54,%f46          ! (4_0) dtmp1 = y0 + y_hi0;
2780 
2781         fmuld   %f36,%f28,%f36          ! (0_0) dtmp1 = res0_lo * res0;
2782         sethi   %hi(0x3ff00000),%g1
2783         add     TBL,TBL_SHIFT+24,%i2
2784         fsubd   %f60,%f54,%f60          ! (4_0) y_lo0 = y0 - y_hi0;
2785 
2786         sllx    %g1,32,%g1              ! (6_0) ll = (long long)j0 << 32;
2787         stx     %g1,[%fp+dtmp13]        ! (6_0) *(long long*)&scl0 = ll;
2788         ba      .cont24
2789         add     TBL,TBL_SHIFT+24,%o0
2790 
2791         .align  16
2792 .update23:
2793         cmp     %l7,_0x00100000         ! (0_0) hy0 ? 0x00100000
2794         bge,pn  %icc,.cont23a           ! (0_0) if ( hy0 < 0x00100000 )
2795 
2796         cmp     counter,7
2797         ble,a   1f
2798         nop
2799 
2800         sub     counter,7,counter
2801         st      counter,[%fp+tmp_counter]
2802 
2803         stx     %i2,[%fp+tmp_px]
2804 
2805         mov     7,counter
2806         stx     %o0,[%fp+tmp_py]
2807 1:
2808         fmuld   %f16,%f18,%f18          ! (1_0) dtmp2 = dd * dres;
2809         sethi   %hi(0x3ff00000),%g1
2810         add     TBL,TBL_SHIFT+24,%i2
2811         fsubd   DTWO,%f50,%f10          ! (2_0) dtmp0 = DTWO - dtmp0;
2812 
2813         ba      .cont23b
2814         add     TBL,TBL_SHIFT+24,%o0
2815 
2816         .align  16
2817 .update25:
2818         cmp     counter,8
2819         ble     1f
2820         nop
2821 
2822         sub     counter,8,counter
2823         st      counter,[%fp+tmp_counter]
2824 
2825         stx     %i4,[%fp+tmp_px]
2826 
2827         stx     %i3,[%fp+tmp_py]
2828 
2829         mov     8,counter
2830 1:
2831         fsubd   %f12,D2ON36,%f54        ! (5_0) y_hi0 -= D2ON36;
2832 
2833         fmuld   %f10,%f22,%f50          ! (3_0) dtmp0 = dd * dres;
2834         faddd   %f28,%f48,%f48          ! (0_0) res0 += dtmp0;
2835 
2836         fand    %f16,DA0,%f28           ! (1_0) res0 = vis_fand(dres,DA0);
2837 
2838         fmuld   %f20,%f20,%f0           ! (5_0) res0_hi = x_hi0 * x_hi0;
2839         fsubd   %f60,%f20,%f2           ! (5_0) x_lo0 = x0 - x_hi0;
2840 
2841         fmuld   %f54,%f54,%f46          ! (5_0) dtmp0 = y_hi0 * y_hi0;
2842         faddd   %f60,%f20,%f62          ! (5_0) res0_lo = x0 + x_hi0;
2843 
2844         fmuld   %f26,%f14,%f14          ! (2_0) dtmp2 = dd * dres;
2845         fsubd   DTWO,%f50,%f20          ! (3_0) dtmp0 = DTWO - dtmp0;
2846 
2847         fmuld   %f42,%f28,%f60          ! (1_0) dtmp0 = res0_hi * res0;
2848         faddd   %f52,%f54,%f50          ! (5_0) dtmp1 = y0 + y_hi0;
2849 
2850         fmuld   %f34,%f28,%f34          ! (1_0) dtmp1 = res0_lo * res0;
2851         sethi   %hi(0x3ff00000),%g1
2852         add     TBL,TBL_SHIFT+24,%i4
2853         fsubd   %f52,%f54,%f54          ! (5_0) y_lo0 = y0 - y_hi0;
2854 
2855         sllx    %g1,32,%g1              ! (7_0) ll = (long long)j0 << 32;
2856         stx     %g1,[%fp+dtmp15]        ! (7_0) *(long long*)&scl0 = ll;
2857         ba      .cont28
2858         add     TBL,TBL_SHIFT+24,%i3
2859 
2860         .align  16
2861 .update26:
2862         cmp     counter,8
2863         ble     1f
2864         nop
2865 
2866         sub     counter,8,counter
2867         st      counter,[%fp+tmp_counter]
2868 
2869         stx     %i4,[%fp+tmp_px]
2870 
2871         stx     %i3,[%fp+tmp_py]
2872 
2873         mov     8,counter
2874 1:
2875         fmuld   %f10,%f22,%f50          ! (3_0) dtmp0 = dd * dres;
2876         faddd   %f28,%f48,%f48          ! (0_0) res0 += dtmp0;
2877 
2878         fand    %f16,DA0,%f28           ! (1_0) res0 = vis_fand(dres,DA0);
2879 
2880         fmuld   %f20,%f20,%f0           ! (5_0) res0_hi = x_hi0 * x_hi0;
2881         fsubd   %f60,%f20,%f2           ! (5_0) x_lo0 = x0 - x_hi0;
2882 
2883         fmuld   %f54,%f54,%f46          ! (5_0) dtmp0 = y_hi0 * y_hi0;
2884         faddd   %f60,%f20,%f62          ! (5_0) res0_lo = x0 + x_hi0;
2885 
2886         fmuld   %f26,%f14,%f14          ! (2_0) dtmp2 = dd * dres;
2887         fsubd   DTWO,%f50,%f20          ! (3_0) dtmp0 = DTWO - dtmp0;
2888 
2889         fmuld   %f42,%f28,%f60          ! (1_0) dtmp0 = res0_hi * res0;
2890         faddd   %f52,%f54,%f50          ! (5_0) dtmp1 = y0 + y_hi0;
2891 
2892         fmuld   %f34,%f28,%f34          ! (1_0) dtmp1 = res0_lo * res0;
2893         sethi   %hi(0x3ff00000),%g1
2894         add     TBL,TBL_SHIFT+24,%i4
2895         fsubd   %f52,%f54,%f54          ! (5_0) y_lo0 = y0 - y_hi0;
2896 
2897         sllx    %g1,32,%g1              ! (7_0) ll = (long long)j0 << 32;
2898         stx     %g1,[%fp+dtmp15]        ! (7_0) *(long long*)&scl0 = ll;
2899         ba      .cont28
2900         add     TBL,TBL_SHIFT+24,%i3
2901 
2902         .align  16
2903 .update27:
2904         cmp     %l7,_0x00100000         ! (0_0) hy0 ? 0x00100000
2905         bge,pn  %icc,.cont27a           ! (0_0) if ( hy0 < 0x00100000 )
2906 
2907         cmp     counter,8
2908         ble,a   1f
2909         nop
2910 
2911         sub     counter,8,counter
2912         st      counter,[%fp+tmp_counter]
2913 
2914         stx     %i4,[%fp+tmp_px]
2915 
2916         mov     8,counter
2917         stx     %i3,[%fp+tmp_py]
2918 1:
2919         fmuld   %f26,%f14,%f14          ! (2_0) dtmp2 = dd * dres;
2920         sethi   %hi(0x3ff00000),%g1
2921         add     TBL,TBL_SHIFT+24,%i4
2922         fsubd   DTWO,%f50,%f20          ! (3_0) dtmp0 = DTWO - dtmp0;
2923 
2924         ba      .cont27b
2925         add     TBL,TBL_SHIFT+24,%i3
2926 
2927         .align  16
2928 .update29:
2929         cmp     counter,1
2930         ble     1f
2931         nop
2932 
2933         sub     counter,1,counter
2934         st      counter,[%fp+tmp_counter]
2935 
2936         stx     %i2,[%fp+tmp_px]
2937 
2938         stx     %o0,[%fp+tmp_py]
2939 
2940         mov     1,counter
2941 1:
2942         fsubd   %f2,D2ON36,%f2          ! (6_1) y_hi0 -= D2ON36;
2943 
2944         fmuld   %f54,%f24,%f50          ! (4_1) dtmp0 = dd * dres;
2945         stx     %g1,[%fp+dtmp0]         ! (7_1) *(long long*)&scl0 = ll;
2946         faddd   %f28,%f48,%f52          ! (1_1) res0 += dtmp0;
2947 
2948         fand    %f26,DA0,%f48           ! (2_1) res0 = vis_fand(dres,DA0);
2949 
2950         fmuld   %f20,%f20,%f0           ! (6_1) res0_hi = x_hi0 * x_hi0;
2951         fsubd   %f10,%f20,%f28          ! (6_1) x_lo0 = x0 - x_hi0;
2952 
2953         fmuld   %f2,%f2,%f46            ! (6_1) dtmp0 = y_hi0 * y_hi0;
2954         add     %i5,stridez,%i5         ! pz += stridez
2955         faddd   %f10,%f20,%f62          ! (6_1) res0_lo = x0 + x_hi0;
2956 
2957         fmuld   %f18,%f22,%f22          ! (3_1) dtmp2 = dd * dres;
2958         sethi   %hi(0x3ff00000),%o4
2959         add     TBL,TBL_SHIFT+24,%i2
2960         fsubd   DTWO,%f50,%f20          ! (4_1) dtmp0 = DTWO - dtmp0;
2961 
2962         ba      .cont32
2963         add     TBL,TBL_SHIFT+24,%o0
2964 
2965         .align  16
2966 .update30:
2967         cmp     counter,1
2968         ble     1f
2969         nop
2970 
2971         sub     counter,1,counter
2972         st      counter,[%fp+tmp_counter]
2973 
2974         stx     %i2,[%fp+tmp_px]
2975 
2976         stx     %o0,[%fp+tmp_py]
2977 
2978         mov     1,counter
2979 1:
2980         fmuld   %f54,%f24,%f50          ! (4_1) dtmp0 = dd * dres;
2981         stx     %g1,[%fp+dtmp0]         ! (7_1) *(long long*)&scl0 = ll;
2982         faddd   %f28,%f48,%f52          ! (1_1) res0 += dtmp0;
2983 
2984         fand    %f26,DA0,%f48           ! (2_1) res0 = vis_fand(dres,DA0);
2985 
2986         fmuld   %f20,%f20,%f0           ! (6_1) res0_hi = x_hi0 * x_hi0;
2987         fsubd   %f10,%f20,%f28          ! (6_1) x_lo0 = x0 - x_hi0;
2988 
2989         fmuld   %f2,%f2,%f46            ! (6_1) dtmp0 = y_hi0 * y_hi0;
2990         add     %i5,stridez,%i5         ! pz += stridez
2991         faddd   %f10,%f20,%f62          ! (6_1) res0_lo = x0 + x_hi0;
2992 
2993         fmuld   %f18,%f22,%f22          ! (3_1) dtmp2 = dd * dres;
2994         sethi   %hi(0x3ff00000),%o4
2995         add     TBL,TBL_SHIFT+24,%i2
2996         fsubd   DTWO,%f50,%f20          ! (4_1) dtmp0 = DTWO - dtmp0;
2997 
2998         ba      .cont32
2999         add     TBL,TBL_SHIFT+24,%o0
3000 
3001         .align  16
3002 .update31:
3003         cmp     %l7,_0x00100000         ! (0_0) hy0 ? 0x00100000
3004         bge,pn  %icc,.cont31            ! (0_0) if ( hy0 < 0x00100000 )
3005 
3006         cmp     counter,1
3007         ble,a   1f
3008         nop
3009 
3010         sub     counter,1,counter
3011         st      counter,[%fp+tmp_counter]
3012 
3013         stx     %i2,[%fp+tmp_px]
3014 
3015         mov     1,counter
3016         stx     %o0,[%fp+tmp_py]
3017 1:
3018         fmuld   %f20,%f20,%f0           ! (6_1) res0_hi = x_hi0 * x_hi0;
3019         fsubd   %f10,%f20,%f28          ! (6_1) x_lo0 = x0 - x_hi0;
3020 
3021         fmuld   %f2,%f2,%f46            ! (6_1) dtmp0 = y_hi0 * y_hi0;
3022         add     %i5,stridez,%i5         ! pz += stridez
3023         faddd   %f10,%f20,%f62          ! (6_1) res0_lo = x0 + x_hi0;
3024 
3025         fmuld   %f18,%f22,%f22          ! (3_1) dtmp2 = dd * dres;
3026         sethi   %hi(0x3ff00000),%o4
3027         add     TBL,TBL_SHIFT+24,%i2
3028         fsubd   DTWO,%f50,%f20          ! (4_1) dtmp0 = DTWO - dtmp0;
3029 
3030         ba      .cont32
3031         add     TBL,TBL_SHIFT+24,%o0
3032 
3033         .align  16
3034 .update33:
3035         cmp     counter,2
3036         ble     1f
3037         nop
3038 
3039         sub     counter,2,counter
3040         st      counter,[%fp+tmp_counter]
3041 
3042         stx     %i4,[%fp+tmp_px]
3043 
3044         stx     %i3,[%fp+tmp_py]
3045 
3046         mov     2,counter
3047 1:
3048         st      %f1,[%i5+4]             ! (0_1) ((float*)pz)[1] = ((float*)&res0)[1];
3049         fsubd   %f50,D2ON36,%f54        ! (7_1) y_hi0 -= D2ON36;
3050 
3051         fmuld   %f26,%f16,%f50          ! (5_1) dtmp0 = dd * dres;
3052         faddd   %f48,%f52,%f52          ! (2_1) res0 += dtmp0;
3053 
3054         add     %i5,stridez,%i5         ! pz += stridez
3055         stx     %o4,[%fp+dtmp2]         ! (0_0) *(long long*)&scl0 = ll;
3056         fand    %f28,DA0,%f48           ! (3_1) res0 = vis_fand(dres,DA0);
3057 
3058         fmuld   %f20,%f20,%f2           ! (7_1) res0_hi = x_hi0 * x_hi0;
3059         fsubd   %f10,%f20,%f0           ! (7_1) x_lo0 = x0 - x_hi0;
3060 
3061         fmuld   %f54,%f54,%f46          ! (7_1) dtmp0 = y_hi0 * y_hi0;
3062         faddd   %f10,%f20,%f62          ! (7_1) res0_lo = x0 + x_hi0;
3063 
3064         fmuld   %f44,%f48,%f10          ! (3_1) dtmp0 = res0_hi * res0;
3065         fsubd   DTWO,%f50,%f20          ! (5_1) dtmp0 = DTWO - dtmp0;
3066 
3067         fmuld   %f14,%f24,%f24          ! (4_1) dtmp2 = dd * dres;
3068         faddd   %f60,%f54,%f50          ! (7_1) dtmp1 = y0 + y_hi0;
3069 
3070         fmuld   %f38,%f48,%f38          ! (3_1) dtmp1 = res0_lo * res0;
3071         sethi   %hi(0x3ff00000),%o4
3072         add     TBL,TBL_SHIFT+24,%i4
3073         fsubd   %f60,%f54,%f12          ! (7_1) y_lo0 = y0 - y_hi0;
3074 
3075         sllx    %o4,32,%o4              ! (1_0) ll = (long long)j0 << 32;
3076         stx     %o4,[%fp+dtmp3]         ! (1_0) *(long long*)&scl0 = ll;
3077         ba      .cont36
3078         add     TBL,TBL_SHIFT+24,%i3
3079 
3080         .align  16
3081 .update34:
3082         cmp     counter,2
3083         ble     1f
3084         nop
3085 
3086         sub     counter,2,counter
3087         st      counter,[%fp+tmp_counter]
3088 
3089         stx     %i4,[%fp+tmp_px]
3090 
3091         stx     %i3,[%fp+tmp_py]
3092 
3093         mov     2,counter
3094 1:
3095         add     %i5,stridez,%i5         ! pz += stridez
3096         stx     %o4,[%fp+dtmp2]         ! (0_0) *(long long*)&scl0 = ll;
3097         fand    %f28,DA0,%f48           ! (3_1) res0 = vis_fand(dres,DA0);
3098 
3099         fmuld   %f20,%f20,%f2           ! (7_1) res0_hi = x_hi0 * x_hi0;
3100         fsubd   %f10,%f20,%f0           ! (7_1) x_lo0 = x0 - x_hi0;
3101 
3102         fmuld   %f54,%f54,%f46          ! (7_1) dtmp0 = y_hi0 * y_hi0;
3103         faddd   %f10,%f20,%f62          ! (7_1) res0_lo = x0 + x_hi0;
3104 
3105         fmuld   %f44,%f48,%f10          ! (3_1) dtmp0 = res0_hi * res0;
3106         fsubd   DTWO,%f50,%f20          ! (5_1) dtmp0 = DTWO - dtmp0;
3107 
3108         fmuld   %f14,%f24,%f24          ! (4_1) dtmp2 = dd * dres;
3109         faddd   %f60,%f54,%f50          ! (7_1) dtmp1 = y0 + y_hi0;
3110 
3111         fmuld   %f38,%f48,%f38          ! (3_1) dtmp1 = res0_lo * res0;
3112         sethi   %hi(0x3ff00000),%o4
3113         add     TBL,TBL_SHIFT+24,%i4
3114         fsubd   %f60,%f54,%f12          ! (7_1) y_lo0 = y0 - y_hi0;
3115 
3116         sllx    %o4,32,%o4              ! (1_0) ll = (long long)j0 << 32;
3117         stx     %o4,[%fp+dtmp3]         ! (1_0) *(long long*)&scl0 = ll;
3118         ba      .cont36
3119         add     TBL,TBL_SHIFT+24,%i3
3120 
3121         .align  16
3122 .update35:
3123         cmp     %l7,_0x00100000         ! (0_0) hy0 ? 0x00100000
3124         bge,pn  %icc,.cont35a           ! (0_0) if ( hy0 < 0x00100000 )
3125 
3126         cmp     counter,2
3127         ble,a   1f
3128         nop
3129 
3130         sub     counter,2,counter
3131         st      counter,[%fp+tmp_counter]
3132 
3133         stx     %i4,[%fp+tmp_px]
3134 
3135         mov     2,counter
3136         stx     %i3,[%fp+tmp_py]
3137 1:
3138         fmuld   %f44,%f48,%f10          ! (3_1) dtmp0 = res0_hi * res0;
3139         sethi   %hi(0x3ff00000),%o4
3140         add     TBL,TBL_SHIFT+24,%i4
3141         fsubd   DTWO,%f50,%f20          ! (5_1) dtmp0 = DTWO - dtmp0;
3142 
3143         ba      .cont35b
3144         add     TBL,TBL_SHIFT+24,%i3
3145 
3146         .align  16
3147 .update37:
3148         cmp     counter,3
3149         ble     1f
3150         nop
3151 
3152         sub     counter,3,counter
3153         st      counter,[%fp+tmp_counter]
3154 
3155         stx     %i2,[%fp+tmp_px]
3156 
3157         stx     %o0,[%fp+tmp_py]
3158 
3159         mov     3,counter
3160 1:
3161         st      %f1,[%i5+4]             ! (1_1) ((float*)pz)[1] = ((float*)&res0)[1];
3162         fsubd   %f12,D2ON36,%f54        ! (0_0) y_hi0 -= D2ON36;
3163 
3164         fmuld   %f28,%f18,%f50          ! (6_1) dtmp0 = dd * dres;
3165         faddd   %f48,%f52,%f52          ! (3_1) res0 += dtmp0;
3166 
3167         add     %i5,stridez,%i5         ! pz += stridez
3168         stx     %o4,[%fp+dtmp4]         ! (1_0) *(long long*)&scl0 = ll;
3169         fand    %f26,DA0,%f48           ! (4_1) res0 = vis_fand(dres,DA0);
3170 
3171         fmuld   %f20,%f20,%f2           ! (0_0) res0_hi = x_hi0 * x_hi0;
3172         fsubd   %f10,%f20,%f0           ! (0_0) x_lo0 = x0 - x_hi0;
3173 
3174         fmuld   %f54,%f54,%f46          ! (0_0) dtmp0 = y_hi0 * y_hi0;
3175         faddd   %f10,%f20,%f62          ! (0_0) res0_lo = x0 + x_hi0;
3176 
3177         fmuld   %f32,%f48,%f10          ! (4_1) dtmp0 = res0_hi * res0;
3178         fsubd   DTWO,%f50,%f20          ! (6_1) dtmp0 = DTWO - dtmp0;
3179 
3180         fmuld   %f22,%f16,%f16          ! (5_1) dtmp2 = dd * dres;
3181         faddd   %f60,%f54,%f50          ! (0_0) dtmp1 = y0 + y_hi0;
3182 
3183         fmuld   %f36,%f48,%f36          ! (4_1) dtmp1 = res0_lo * res0;
3184         sethi   %hi(0x3ff00000),%g1
3185         add     TBL,TBL_SHIFT+24,%i2
3186         fsubd   %f60,%f54,%f12          ! (0_0) y_lo0 = y0 - y_hi0;
3187 
3188         sllx    %g1,32,%g1              ! (2_0) ll = (long long)j0 << 32;
3189         stx     %g1,[%fp+dtmp5]         ! (2_0) *(long long*)&scl0 = ll;
3190         ba      .cont40
3191         add     TBL,TBL_SHIFT+24,%o0
3192 
3193         .align  16
3194 .update38:
3195         cmp     counter,3
3196         ble     1f
3197         nop
3198 
3199         sub     counter,3,counter
3200         st      counter,[%fp+tmp_counter]
3201 
3202         stx     %i2,[%fp+tmp_px]
3203 
3204         stx     %o0,[%fp+tmp_py]
3205 
3206         mov     3,counter
3207 1:
3208         add     %i5,stridez,%i5         ! pz += stridez
3209         stx     %o4,[%fp+dtmp4]         ! (1_0) *(long long*)&scl0 = ll;
3210         fand    %f26,DA0,%f48           ! (4_1) res0 = vis_fand(dres,DA0);
3211 
3212         fmuld   %f20,%f20,%f2           ! (0_0) res0_hi = x_hi0 * x_hi0;
3213         fsubd   %f10,%f20,%f0           ! (0_0) x_lo0 = x0 - x_hi0;
3214 
3215         fmuld   %f54,%f54,%f46          ! (0_0) dtmp0 = y_hi0 * y_hi0;
3216         faddd   %f10,%f20,%f62          ! (0_0) res0_lo = x0 + x_hi0;
3217 
3218         fmuld   %f32,%f48,%f10          ! (4_1) dtmp0 = res0_hi * res0;
3219         fsubd   DTWO,%f50,%f20          ! (6_1) dtmp0 = DTWO - dtmp0;
3220 
3221         fmuld   %f22,%f16,%f16          ! (5_1) dtmp2 = dd * dres;
3222         faddd   %f60,%f54,%f50          ! (0_0) dtmp1 = y0 + y_hi0;
3223 
3224         fmuld   %f36,%f48,%f36          ! (4_1) dtmp1 = res0_lo * res0;
3225         sethi   %hi(0x3ff00000),%g1
3226         add     TBL,TBL_SHIFT+24,%i2
3227         fsubd   %f60,%f54,%f12          ! (0_0) y_lo0 = y0 - y_hi0;
3228 
3229         sllx    %g1,32,%g1              ! (2_0) ll = (long long)j0 << 32;
3230         stx     %g1,[%fp+dtmp5]         ! (2_0) *(long long*)&scl0 = ll;
3231         ba      .cont40
3232         add     TBL,TBL_SHIFT+24,%o0
3233 
3234         .align  16
3235 .update39:
3236         cmp     %l7,_0x00100000         ! (0_0) hy0 ? 0x00100000
3237         bge,pn  %icc,.cont39a           ! (0_0) if ( hy0 < 0x00100000 )
3238 
3239         cmp     counter,3
3240         ble,a   1f
3241         nop
3242 
3243         sub     counter,3,counter
3244         st      counter,[%fp+tmp_counter]
3245 
3246         stx     %i2,[%fp+tmp_px]
3247 
3248         mov     3,counter
3249         stx     %o0,[%fp+tmp_py]
3250 1:
3251         fmuld   %f32,%f48,%f10          ! (4_1) dtmp0 = res0_hi * res0;
3252         sethi   %hi(0x3ff00000),%g1
3253         add     TBL,TBL_SHIFT+24,%i2
3254         fsubd   DTWO,%f50,%f20          ! (6_1) dtmp0 = DTWO - dtmp0;
3255 
3256         ba      .cont39b
3257         add     TBL,TBL_SHIFT+24,%o0
3258 
3259         .align  16
3260 .update41:
3261         cmp     counter,4
3262         ble     1f
3263         nop
3264 
3265         sub     counter,4,counter
3266         st      counter,[%fp+tmp_counter]
3267 
3268         stx     %i4,[%fp+tmp_px]
3269 
3270         stx     %i3,[%fp+tmp_py]
3271 
3272         mov     4,counter
3273 1:
3274         st      %f1,[%i5+4]             ! (2_1) ((float*)pz)[1] = ((float*)&res0)[1];
3275         fsubd   %f12,D2ON36,%f54        ! (1_0) y_hi0 -= D2ON36;
3276 
3277         fmuld   %f26,%f14,%f50          ! (7_1) dtmp0 = dd * dres;
3278         faddd   %f48,%f52,%f52          ! (4_1) res0 += dtmp0;
3279 
3280         add     %i5,stridez,%i5         ! pz += stridez
3281         stx     %g1,[%fp+dtmp6]         ! (2_0) *(long long*)&scl0 = ll;
3282         fand    %f28,DA0,%f48           ! (5_1) res0 = vis_fand(dres,DA0);
3283 
3284         fmuld   %f20,%f20,%f2           ! (1_0) res0_hi = x_hi0 * x_hi0;
3285         fsubd   %f10,%f20,%f0           ! (1_0) x_lo0 = x0 - x_hi0;
3286 
3287         fmuld   %f54,%f54,%f46          ! (1_0) dtmp0 = y_hi0 * y_hi0;
3288         faddd   %f10,%f20,%f62          ! (1_0) res0_lo = x0 + x_hi0;
3289 
3290         fmuld   %f42,%f48,%f10          ! (5_1) dtmp0 = res0_hi * res0;
3291         fsubd   DTWO,%f50,%f20          ! (7_1) dtmp0 = DTWO - dtmp0;
3292 
3293         fmuld   %f24,%f18,%f18          ! (6_1) dtmp2 = dd * dres;
3294         faddd   %f60,%f54,%f50          ! (1_0) dtmp1 = y0 + y_hi0;
3295 
3296         fmuld   %f34,%f48,%f34          ! (5_1) dtmp1 = res0_lo * res0;
3297         sethi   %hi(0x3ff00000),%g1
3298         add     TBL,TBL_SHIFT+24,%i4
3299         fsubd   %f60,%f54,%f12          ! (1_0) y_lo0 = y0 - y_hi0
3300 
3301         sllx    %g1,32,%g1              ! (3_0) ll = (long long)j0 << 32;
3302         stx     %g1,[%fp+dtmp7]         ! (3_0) *(long long*)&scl0 = ll;
3303         ba      .cont44
3304         add     TBL,TBL_SHIFT+24,%i3
3305 
3306         .align  16
3307 .update42:
3308         cmp     counter,4
3309         ble     1f
3310         nop
3311 
3312         sub     counter,4,counter
3313         st      counter,[%fp+tmp_counter]
3314 
3315         stx     %i4,[%fp+tmp_px]
3316 
3317         stx     %i3,[%fp+tmp_py]
3318 
3319         mov     4,counter
3320 1:
3321         add     %i5,stridez,%i5         ! pz += stridez
3322         stx     %g1,[%fp+dtmp6]         ! (2_0) *(long long*)&scl0 = ll;
3323         fand    %f28,DA0,%f48           ! (5_1) res0 = vis_fand(dres,DA0);
3324 
3325         fmuld   %f20,%f20,%f2           ! (1_0) res0_hi = x_hi0 * x_hi0;
3326         fsubd   %f10,%f20,%f0           ! (1_0) x_lo0 = x0 - x_hi0;
3327 
3328         fmuld   %f54,%f54,%f46          ! (1_0) dtmp0 = y_hi0 * y_hi0;
3329         faddd   %f10,%f20,%f62          ! (1_0) res0_lo = x0 + x_hi0;
3330 
3331         fmuld   %f42,%f48,%f10          ! (5_1) dtmp0 = res0_hi * res0;
3332         fsubd   DTWO,%f50,%f20          ! (7_1) dtmp0 = DTWO - dtmp0;
3333 
3334         fmuld   %f24,%f18,%f18          ! (6_1) dtmp2 = dd * dres;
3335         faddd   %f60,%f54,%f50          ! (1_0) dtmp1 = y0 + y_hi0;
3336 
3337         fmuld   %f34,%f48,%f34          ! (5_1) dtmp1 = res0_lo * res0;
3338         sethi   %hi(0x3ff00000),%g1
3339         add     TBL,TBL_SHIFT+24,%i4
3340         fsubd   %f60,%f54,%f12          ! (1_0) y_lo0 = y0 - y_hi0
3341 
3342         sllx    %g1,32,%g1              ! (3_0) ll = (long long)j0 << 32;
3343         stx     %g1,[%fp+dtmp7]         ! (3_0) *(long long*)&scl0 = ll;
3344         ba      .cont44
3345         add     TBL,TBL_SHIFT+24,%i3
3346 
3347         .align  16
3348 .update43:
3349         cmp     %l7,_0x00100000         ! (0_0) hy0 ? 0x00100000
3350         bge,pn  %icc,.cont43a           ! (0_0) if ( hy0 < 0x00100000 )
3351 
3352         cmp     counter,4
3353         ble,a   1f
3354         nop
3355 
3356         sub     counter,4,counter
3357         st      counter,[%fp+tmp_counter]
3358 
3359         stx     %i4,[%fp+tmp_px]
3360 
3361         mov     4,counter
3362         stx     %i3,[%fp+tmp_py]
3363 1:
3364         fmuld   %f42,%f48,%f10          ! (5_1) dtmp0 = res0_hi * res0;
3365         sethi   %hi(0x3ff00000),%g1
3366         add     TBL,TBL_SHIFT+24,%i4
3367         fsubd   DTWO,%f50,%f20          ! (7_1) dtmp0 = DTWO - dtmp0;
3368 
3369         ba      .cont43b
3370         add     TBL,TBL_SHIFT+24,%i3
3371 
3372         .align  16
3373 .update45:
3374         cmp     counter,5
3375         ble     1f
3376         nop
3377 
3378         sub     counter,5,counter
3379         st      counter,[%fp+tmp_counter]
3380 
3381         stx     %i2,[%fp+tmp_px]
3382 
3383         stx     %o0,[%fp+tmp_py]
3384 
3385         mov     5,counter
3386 1:
3387         fsubd   %f50,D2ON36,%f54        ! (2_0) y_hi0 -= D2ON36;
3388 
3389         fmuld   %f28,%f22,%f50          ! (0_0) dtmp0 = dd * dres;
3390         st      %f1,[%i5+4]             ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
3391         faddd   %f48,%f52,%f52          ! (5_1) res0 += dtmp0;
3392 
3393         fand    %f26,DA0,%f48           ! (6_1) res0 = vis_fand(dres,DA0);
3394 
3395         fmuld   %f20,%f20,%f2           ! (2_0) res0_hi = x_hi0 * x_hi0;
3396         stx     %g1,[%fp+dtmp8]         ! (3_0) *(long long*)&scl0 = ll;
3397         fsubd   %f10,%f20,%f0           ! (2_0) x_lo0 = x0 - x_hi0;
3398 
3399         fmuld   %f54,%f54,%f46          ! (2_0) dtmp0 = y_hi0 * y_hi0;
3400         add     %i5,stridez,%i5         ! pz += stridez
3401         faddd   %f10,%f20,%f62          ! (2_0) res0_lo = x0 + x_hi0;
3402 
3403         fmuld   %f30,%f48,%f10          ! (6_1) dtmp0 = res0_hi * res0;
3404         fsubd   DTWO,%f50,%f20          ! (0_0) dtmp0 = DTWO - dtmp0;
3405 
3406         fmuld   %f16,%f14,%f14          ! (7_1) dtmp2 = dd * dres;
3407         faddd   %f60,%f54,%f50          ! (2_0) dtmp1 = y0 + y_hi0;
3408 
3409         fmuld   %f40,%f48,%f40          ! (6_1) dtmp1 = res0_lo * res0;
3410         sethi   %hi(0x3ff00000),%g1
3411         add     TBL,TBL_SHIFT+24,%i2
3412         fsubd   %f60,%f54,%f12          ! (2_0) y_lo0 = y0 - y_hi0;
3413 
3414         sllx    %g1,32,%g1              ! (4_0) ll = (long long)j0 << 32;
3415         stx     %g1,[%fp+dtmp9]         ! (4_0) *(long long*)&scl0 = ll;
3416         ba      .cont48
3417         add     TBL,TBL_SHIFT+24,%o0
3418 
3419         .align  16
3420 .update46:
3421         cmp     counter,5
3422         ble     1f
3423         nop
3424 
3425         sub     counter,5,counter
3426         st      counter,[%fp+tmp_counter]
3427 
3428         stx     %i2,[%fp+tmp_px]
3429 
3430         stx     %o0,[%fp+tmp_py]
3431 
3432         mov     5,counter
3433 1:
3434         fmuld   %f28,%f22,%f50          ! (0_0) dtmp0 = dd * dres;
3435         st      %f1,[%i5+4]             ! (3_1) ((float*)pz)[1] = ((float*)&res0)[1];
3436         faddd   %f48,%f52,%f52          ! (5_1) res0 += dtmp0;
3437 
3438         fand    %f26,DA0,%f48           ! (6_1) res0 = vis_fand(dres,DA0);
3439 
3440         fmuld   %f20,%f20,%f2           ! (2_0) res0_hi = x_hi0 * x_hi0;
3441         stx     %g1,[%fp+dtmp8]         ! (3_0) *(long long*)&scl0 = ll;
3442         fsubd   %f10,%f20,%f0           ! (2_0) x_lo0 = x0 - x_hi0;
3443 
3444         fmuld   %f54,%f54,%f46          ! (2_0) dtmp0 = y_hi0 * y_hi0;
3445         add     %i5,stridez,%i5         ! pz += stridez
3446         faddd   %f10,%f20,%f62          ! (2_0) res0_lo = x0 + x_hi0;
3447 
3448         fmuld   %f30,%f48,%f10          ! (6_1) dtmp0 = res0_hi * res0;
3449         fsubd   DTWO,%f50,%f20          ! (0_0) dtmp0 = DTWO - dtmp0;
3450 
3451         fmuld   %f16,%f14,%f14          ! (7_1) dtmp2 = dd * dres;
3452         faddd   %f60,%f54,%f50          ! (2_0) dtmp1 = y0 + y_hi0;
3453 
3454         fmuld   %f40,%f48,%f40          ! (6_1) dtmp1 = res0_lo * res0;
3455         sethi   %hi(0x3ff00000),%g1
3456         add     TBL,TBL_SHIFT+24,%i2
3457         fsubd   %f60,%f54,%f12          ! (2_0) y_lo0 = y0 - y_hi0;
3458 
3459         sllx    %g1,32,%g1              ! (4_0) ll = (long long)j0 << 32;
3460         stx     %g1,[%fp+dtmp9]         ! (4_0) *(long long*)&scl0 = ll;
3461         ba      .cont48
3462         add     TBL,TBL_SHIFT+24,%o0
3463 
3464         .align  16
3465 .update47:
3466         cmp     %l7,_0x00100000         ! (0_0) hy0 ? 0x00100000
3467         bge,pn  %icc,.cont47a           ! (0_0) if ( hy0 < 0x00100000 )
3468 
3469         cmp     counter,5
3470         ble,a   1f
3471         nop
3472 
3473         sub     counter,5,counter
3474         st      counter,[%fp+tmp_counter]
3475 
3476         stx     %i2,[%fp+tmp_px]
3477 
3478         mov     5,counter
3479         stx     %o0,[%fp+tmp_py]
3480 1:
3481         fmuld   %f20,%f20,%f2           ! (2_0) res0_hi = x_hi0 * x_hi0;
3482         stx     %g1,[%fp+dtmp8]         ! (3_0) *(long long*)&scl0 = ll;
3483         fsubd   %f10,%f20,%f0           ! (2_0) x_lo0 = x0 - x_hi0;
3484 
3485         fmuld   %f54,%f54,%f46          ! (2_0) dtmp0 = y_hi0 * y_hi0;
3486         add     %i5,stridez,%i5         ! pz += stridez
3487         faddd   %f10,%f20,%f62          ! (2_0) res0_lo = x0 + x_hi0;
3488 
3489         fmuld   %f30,%f48,%f10          ! (6_1) dtmp0 = res0_hi * res0;
3490         sethi   %hi(0x3ff00000),%g1
3491         add     TBL,TBL_SHIFT+24,%i2
3492         fsubd   DTWO,%f50,%f20          ! (0_0) dtmp0 = DTWO - dtmp0;
3493 
3494         ba      .cont47b
3495         add     TBL,TBL_SHIFT+24,%o0
3496 
3497         .align  16
3498 .update49:
3499         cmp     counter,6
3500         ble     1f
3501         nop
3502 
3503         sub     counter,6,counter
3504         st      counter,[%fp+tmp_counter]
3505 
3506         stx     %i4,[%fp+tmp_px]
3507 
3508         stx     %i3,[%fp+tmp_py]
3509 
3510         mov     6,counter
3511 1:
3512         fsubd   %f50,D2ON36,%f54        ! (3_0) y_hi0 -= D2ON36;
3513 
3514         fmuld   %f26,%f18,%f50          ! (1_0) dtmp0 = dd * dres;
3515         st      %f1,[%i5+4]             ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
3516         faddd   %f48,%f52,%f52          ! (6_1) res0 += dtmp0;
3517 
3518         fand    %f28,DA0,%f48           ! (7_1) res0 = vis_fand(dres,DA0);
3519 
3520         fmuld   %f20,%f20,%f2           ! (3_0) res0_hi = x_hi0 * x_hi0;
3521         stx     %g1,[%fp+dtmp10]        ! (4_0) *(long long*)&scl0 = ll;
3522         fsubd   %f10,%f20,%f0           ! (3_0) x_lo0 = x0 - x_hi0;
3523 
3524         fmuld   %f54,%f54,%f46          ! (3_0) dtmp0 = y_hi0 * y_hi0;
3525         add     %i5,stridez,%i5         ! pz += stridez
3526         faddd   %f10,%f20,%f62          ! (3_0) res0_lo = x0 + x_hi0;
3527 
3528         fmuld   %f44,%f48,%f10          ! (7_1) dtmp0 = res0_hi * res0;
3529         fsubd   DTWO,%f50,%f20          ! (1_0) dtmp0 = DTWO - dtmp0;
3530 
3531         fmuld   %f24,%f22,%f22          ! (0_0) dtmp2 = dd * dres;
3532         faddd   %f60,%f54,%f50          ! (3_0) dtmp1 = y0 + y_hi0;
3533 
3534         fmuld   %f38,%f48,%f38          ! (7_1) dtmp1 = res0_lo * res0;
3535         sethi   %hi(0x3ff00000),%g1
3536         add     TBL,TBL_SHIFT+24,%i4
3537         fsubd   %f60,%f54,%f12          ! (3_0) y_lo0 = y0 - y_hi0;
3538 
3539         sllx    %g1,32,%g1              ! (5_0) ll = (long long)j0 << 32;
3540         stx     %g1,[%fp+dtmp11]        ! (5_0) *(long long*)&scl0 = ll;
3541         ba      .cont52
3542         add     TBL,TBL_SHIFT+24,%i3
3543 
3544         .align  16
3545 .update50:
3546         cmp     counter,6
3547         ble     1f
3548         nop
3549 
3550         sub     counter,6,counter
3551         st      counter,[%fp+tmp_counter]
3552 
3553         stx     %i4,[%fp+tmp_px]
3554 
3555         stx     %i3,[%fp+tmp_py]
3556 
3557         mov     6,counter
3558 1:
3559         fmuld   %f26,%f18,%f50          ! (1_0) dtmp0 = dd * dres;
3560         st      %f1,[%i5+4]             ! (4_1) ((float*)pz)[1] = ((float*)&res0)[1];
3561         faddd   %f48,%f52,%f52          ! (6_1) res0 += dtmp0;
3562 
3563         fand    %f28,DA0,%f48           ! (7_1) res0 = vis_fand(dres,DA0);
3564 
3565         fmuld   %f20,%f20,%f2           ! (3_0) res0_hi = x_hi0 * x_hi0;
3566         stx     %g1,[%fp+dtmp10]        ! (4_0) *(long long*)&scl0 = ll;
3567         fsubd   %f10,%f20,%f0           ! (3_0) x_lo0 = x0 - x_hi0;
3568 
3569         fmuld   %f54,%f54,%f46          ! (3_0) dtmp0 = y_hi0 * y_hi0;
3570         add     %i5,stridez,%i5         ! pz += stridez
3571         faddd   %f10,%f20,%f62          ! (3_0) res0_lo = x0 + x_hi0;
3572 
3573         fmuld   %f44,%f48,%f10          ! (7_1) dtmp0 = res0_hi * res0;
3574         fsubd   DTWO,%f50,%f20          ! (1_0) dtmp0 = DTWO - dtmp0;
3575 
3576         fmuld   %f24,%f22,%f22          ! (0_0) dtmp2 = dd * dres;
3577         faddd   %f60,%f54,%f50          ! (3_0) dtmp1 = y0 + y_hi0;
3578 
3579         fmuld   %f38,%f48,%f38          ! (7_1) dtmp1 = res0_lo * res0;
3580         sethi   %hi(0x3ff00000),%g1
3581         add     TBL,TBL_SHIFT+24,%i4
3582         fsubd   %f60,%f54,%f12          ! (3_0) y_lo0 = y0 - y_hi0;
3583 
3584         sllx    %g1,32,%g1              ! (5_0) ll = (long long)j0 << 32;
3585         stx     %g1,[%fp+dtmp11]        ! (5_0) *(long long*)&scl0 = ll;
3586         ba      .cont52
3587         add     TBL,TBL_SHIFT+24,%i3
3588 
3589         .align  16
3590 .update51:
3591         cmp     %l7,_0x00100000         ! (0_0) hy0 ? 0x00100000
3592         bge,pn  %icc,.cont51a           ! (0_0) if ( hy0 < 0x00100000 )
3593 
3594         cmp     counter,6
3595         ble,a   1f
3596         nop
3597 
3598         sub     counter,6,counter
3599         st      counter,[%fp+tmp_counter]
3600 
3601         stx     %i4,[%fp+tmp_px]
3602 
3603         mov     6,counter
3604         stx     %i3,[%fp+tmp_py]
3605 1:
3606         fmuld   %f20,%f20,%f2           ! (3_0) res0_hi = x_hi0 * x_hi0;
3607         stx     %g1,[%fp+dtmp10]        ! (4_0) *(long long*)&scl0 = ll;
3608         fsubd   %f10,%f20,%f0           ! (3_0) x_lo0 = x0 - x_hi0;
3609 
3610         fmuld   %f54,%f54,%f46          ! (3_0) dtmp0 = y_hi0 * y_hi0;
3611         add     %i5,stridez,%i5         ! pz += stridez
3612         faddd   %f10,%f20,%f62          ! (3_0) res0_lo = x0 + x_hi0;
3613 
3614         fmuld   %f44,%f48,%f10          ! (7_1) dtmp0 = res0_hi * res0;
3615         sethi   %hi(0x3ff00000),%g1
3616         add     TBL,TBL_SHIFT+24,%i4
3617         fsubd   DTWO,%f50,%f20          ! (1_0) dtmp0 = DTWO - dtmp0;
3618 
3619         ba      .cont51b
3620         add     TBL,TBL_SHIFT+24,%i3
3621 
3622         .align  16
3623 .update53:
3624         cmp     counter,7
3625         ble     1f
3626         nop
3627 
3628         sub     counter,7,counter
3629         st      counter,[%fp+tmp_counter]
3630 
3631         stx     %i2,[%fp+tmp_px]
3632 
3633         stx     %o0,[%fp+tmp_py]
3634 
3635         mov     7,counter
3636 1:
3637         fsubd   %f50,D2ON36,%f54        ! (4_0) y_hi0 -= D2ON36;
3638 
3639         fmuld   %f52,%f14,%f50          ! (2_0) dtmp0 = dd * dres;
3640         st      %f1,[%i5+4]             ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
3641         faddd   %f48,%f28,%f48          ! (7_1) res0 += dtmp0;
3642 
3643         fand    %f26,DA0,%f28           ! (0_0) res0 = vis_fand(dres,DA0);
3644 
3645         fmuld   %f46,%f46,%f0           ! (4_0) res0_hi = x_hi0 * x_hi0;
3646         stx     %g1,[%fp+dtmp12]        ! (5_0) *(long long*)&scl0 = ll;
3647         fsubd   %f10,%f46,%f2           ! (4_0) x_lo0 = x0 - x_hi0;
3648 
3649         fmuld   %f54,%f54,%f20          ! (4_0) dtmp0 = y_hi0 * y_hi0;
3650         add     %i5,stridez,%i5         ! pz += stridez
3651         faddd   %f10,%f46,%f62          ! (4_0) res0_lo = x0 + x_hi0;
3652 
3653         fmuld   %f16,%f18,%f18          ! (1_0) dtmp2 = dd * dres;
3654         fsubd   DTWO,%f50,%f10          ! (2_0) dtmp0 = DTWO - dtmp0;
3655 
3656         fmuld   %f32,%f28,%f50          ! (0_0) dtmp0 = res0_hi * res0;
3657         faddd   %f60,%f54,%f46          ! (4_0) dtmp1 = y0 + y_hi0;
3658 
3659         fmuld   %f36,%f28,%f36          ! (0_0) dtmp1 = res0_lo * res0;
3660         sethi   %hi(0x3ff00000),%g1
3661         add     TBL,TBL_SHIFT+24,%i2
3662         fsubd   %f60,%f54,%f60          ! (4_0) y_lo0 = y0 - y_hi0;
3663 
3664         sllx    %g1,32,%g1              ! (6_0) ll = (long long)j0 << 32;
3665         stx     %g1,[%fp+dtmp13]        ! (6_0) *(long long*)&scl0 = ll;
3666         ba      .cont56
3667         add     TBL,TBL_SHIFT+24,%o0
3668 
3669         .align  16
3670 .update54:
3671         cmp     counter,7
3672         ble     1f
3673         nop
3674 
3675         sub     counter,7,counter
3676         st      counter,[%fp+tmp_counter]
3677 
3678         stx     %i2,[%fp+tmp_px]
3679 
3680         stx     %o0,[%fp+tmp_py]
3681 
3682         mov     7,counter
3683 1:
3684         fmuld   %f52,%f14,%f50          ! (2_0) dtmp0 = dd * dres;
3685         st      %f1,[%i5+4]             ! (5_1) ((float*)pz)[1] = ((float*)&res0)[1];
3686         faddd   %f48,%f28,%f48          ! (7_1) res0 += dtmp0;
3687 
3688         fand    %f26,DA0,%f28           ! (0_0) res0 = vis_fand(dres,DA0);
3689 
3690         fmuld   %f46,%f46,%f0           ! (4_0) res0_hi = x_hi0 * x_hi0;
3691         stx     %g1,[%fp+dtmp12]        ! (5_0) *(long long*)&scl0 = ll;
3692         fsubd   %f10,%f46,%f2           ! (4_0) x_lo0 = x0 - x_hi0;
3693 
3694         fmuld   %f54,%f54,%f20          ! (4_0) dtmp0 = y_hi0 * y_hi0;
3695         add     %i5,stridez,%i5         ! pz += stridez
3696         faddd   %f10,%f46,%f62          ! (4_0) res0_lo = x0 + x_hi0;
3697 
3698         fmuld   %f16,%f18,%f18          ! (1_0) dtmp2 = dd * dres;
3699         fsubd   DTWO,%f50,%f10          ! (2_0) dtmp0 = DTWO - dtmp0;
3700 
3701         fmuld   %f32,%f28,%f50          ! (0_0) dtmp0 = res0_hi * res0;
3702         faddd   %f60,%f54,%f46          ! (4_0) dtmp1 = y0 + y_hi0;
3703 
3704         fmuld   %f36,%f28,%f36          ! (0_0) dtmp1 = res0_lo * res0;
3705         sethi   %hi(0x3ff00000),%g1
3706         add     TBL,TBL_SHIFT+24,%i2
3707         fsubd   %f60,%f54,%f60          ! (4_0) y_lo0 = y0 - y_hi0;
3708 
3709         sllx    %g1,32,%g1              ! (6_0) ll = (long long)j0 << 32;
3710         stx     %g1,[%fp+dtmp13]        ! (6_0) *(long long*)&scl0 = ll;
3711         ba      .cont56
3712         add     TBL,TBL_SHIFT+24,%o0
3713 
3714         .align  16
3715 .update55:
3716         cmp     %l7,_0x00100000         ! (0_0) hy0 ? 0x00100000
3717         bge,pn  %icc,.cont55a           ! (0_0) if ( hy0 < 0x00100000 )
3718 
3719         cmp     counter,7
3720         ble,a   1f
3721         nop
3722 
3723         sub     counter,7,counter
3724         st      counter,[%fp+tmp_counter]
3725 
3726         stx     %i2,[%fp+tmp_px]
3727 
3728         mov     7,counter
3729         stx     %o0,[%fp+tmp_py]
3730 1:
3731         fmuld   %f46,%f46,%f0           ! (4_0) res0_hi = x_hi0 * x_hi0;
3732         stx     %g1,[%fp+dtmp12]        ! (5_0) *(long long*)&scl0 = ll;
3733         fsubd   %f10,%f46,%f2           ! (4_0) x_lo0 = x0 - x_hi0;
3734 
3735         fmuld   %f54,%f54,%f20          ! (4_0) dtmp0 = y_hi0 * y_hi0;
3736         add     %i5,stridez,%i5         ! pz += stridez
3737         faddd   %f10,%f46,%f62          ! (4_0) res0_lo = x0 + x_hi0;
3738 
3739         fmuld   %f16,%f18,%f18          ! (1_0) dtmp2 = dd * dres;
3740         sethi   %hi(0x3ff00000),%g1
3741         add     TBL,TBL_SHIFT+24,%i2
3742         fsubd   DTWO,%f50,%f10          ! (2_0) dtmp0 = DTWO - dtmp0;
3743 
3744         ba      .cont55b
3745         add     TBL,TBL_SHIFT+24,%o0
3746 
3747         .align  16
3748 .update57:
3749         cmp     counter,8
3750         ble     1f
3751         nop
3752 
3753         sub     counter,8,counter
3754         st      counter,[%fp+tmp_counter]
3755 
3756         stx     %i4,[%fp+tmp_px]
3757 
3758         stx     %i3,[%fp+tmp_py]
3759 
3760         mov     8,counter
3761 1:
3762         fsubd   %f12,D2ON36,%f54        ! (5_0) y_hi0 -= D2ON36;
3763 
3764         fmuld   %f10,%f22,%f50          ! (3_0) dtmp0 = dd * dres;
3765         st      %f3,[%i5+4]             ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1];
3766         faddd   %f28,%f48,%f48          ! (0_0) res0 += dtmp0;
3767 
3768         fand    %f16,DA0,%f28           ! (1_0) res0 = vis_fand(dres,DA0);
3769 
3770         fmuld   %f20,%f20,%f0           ! (5_0) res0_hi = x_hi0 * x_hi0;
3771         stx     %g1,[%fp+dtmp14]        ! (6_0) *(long long*)&scl0 = ll;
3772         fsubd   %f60,%f20,%f2           ! (5_0) x_lo0 = x0 - x_hi0;
3773 
3774         fmuld   %f54,%f54,%f46          ! (5_0) dtmp0 = y_hi0 * y_hi0;
3775         add     %i5,stridez,%i5         ! pz += stridez
3776         faddd   %f60,%f20,%f62          ! (5_0) res0_lo = x0 + x_hi0;
3777 
3778         fmuld   %f26,%f14,%f14          ! (2_0) dtmp2 = dd * dres;
3779         fsubd   DTWO,%f50,%f20          ! (3_0) dtmp0 = DTWO - dtmp0;
3780 
3781         fmuld   %f42,%f28,%f60          ! (1_0) dtmp0 = res0_hi * res0;
3782         faddd   %f52,%f54,%f50          ! (5_0) dtmp1 = y0 + y_hi0;
3783 
3784         fmuld   %f34,%f28,%f34          ! (1_0) dtmp1 = res0_lo * res0;
3785         fsubd   %f52,%f54,%f54          ! (5_0) y_lo0 = y0 - y_hi0;
3786 
3787         sethi   %hi(0x3ff00000),%g1
3788         add     TBL,TBL_SHIFT+24,%i4
3789 
3790         sllx    %g1,32,%g1              ! (7_0) ll = (long long)j0 << 32;
3791         stx     %g1,[%fp+dtmp15]        ! (7_0) *(long long*)&scl0 = ll;
3792         ba      .cont60
3793         add     TBL,TBL_SHIFT+24,%i3
3794 
3795         .align  16
3796 .update58:
3797         cmp     counter,8
3798         ble     1f
3799         nop
3800 
3801         sub     counter,8,counter
3802         st      counter,[%fp+tmp_counter]
3803 
3804         stx     %i4,[%fp+tmp_px]
3805 
3806         stx     %i3,[%fp+tmp_py]
3807 
3808         mov     8,counter
3809 1:
3810         fmuld   %f10,%f22,%f50          ! (3_0) dtmp0 = dd * dres;
3811         st      %f3,[%i5+4]             ! (6_1) ((float*)pz)[1] = ((float*)&res0)[1];
3812         faddd   %f28,%f48,%f48          ! (0_0) res0 += dtmp0;
3813 
3814         fand    %f16,DA0,%f28           ! (1_0) res0 = vis_fand(dres,DA0);
3815 
3816         fmuld   %f20,%f20,%f0           ! (5_0) res0_hi = x_hi0 * x_hi0;
3817         stx     %g1,[%fp+dtmp14]        ! (6_0) *(long long*)&scl0 = ll;
3818         fsubd   %f60,%f20,%f2           ! (5_0) x_lo0 = x0 - x_hi0;
3819 
3820         fmuld   %f54,%f54,%f46          ! (5_0) dtmp0 = y_hi0 * y_hi0;
3821         add     %i5,stridez,%i5         ! pz += stridez
3822         faddd   %f60,%f20,%f62          ! (5_0) res0_lo = x0 + x_hi0;
3823 
3824         fmuld   %f26,%f14,%f14          ! (2_0) dtmp2 = dd * dres;
3825         fsubd   DTWO,%f50,%f20          ! (3_0) dtmp0 = DTWO - dtmp0;
3826 
3827         fmuld   %f42,%f28,%f60          ! (1_0) dtmp0 = res0_hi * res0;
3828         faddd   %f52,%f54,%f50          ! (5_0) dtmp1 = y0 + y_hi0;
3829 
3830         fmuld   %f34,%f28,%f34          ! (1_0) dtmp1 = res0_lo * res0;
3831         fsubd   %f52,%f54,%f54          ! (5_0) y_lo0 = y0 - y_hi0;
3832 
3833         sethi   %hi(0x3ff00000),%g1
3834         add     TBL,TBL_SHIFT+24,%i4
3835 
3836         sllx    %g1,32,%g1              ! (7_0) ll = (long long)j0 << 32;
3837         stx     %g1,[%fp+dtmp15]        ! (7_0) *(long long*)&scl0 = ll;
3838         ba      .cont60
3839         add     TBL,TBL_SHIFT+24,%i3
3840 
3841         .align  16
3842 .update59:
3843         cmp     %l7,_0x00100000         ! (0_0) hy0 ? 0x00100000
3844         bge,pn  %icc,.cont59a           ! (0_0) if ( hy0 < 0x00100000 )
3845 
3846         cmp     counter,8
3847         ble,a   1f
3848         nop
3849 
3850         sub     counter,8,counter
3851         st      counter,[%fp+tmp_counter]
3852 
3853         stx     %i4,[%fp+tmp_px]
3854 
3855         mov     8,counter
3856         stx     %i3,[%fp+tmp_py]
3857 1:
3858         fmuld   %f20,%f20,%f0           ! (5_0) res0_hi = x_hi0 * x_hi0;
3859         stx     %g1,[%fp+dtmp14]        ! (6_0) *(long long*)&scl0 = ll;
3860         fsubd   %f60,%f20,%f2           ! (5_0) x_lo0 = x0 - x_hi0;
3861 
3862         fmuld   %f54,%f54,%f46          ! (5_0) dtmp0 = y_hi0 * y_hi0;
3863         add     %i5,stridez,%i5         ! pz += stridez
3864         faddd   %f60,%f20,%f62          ! (5_0) res0_lo = x0 + x_hi0;
3865 
3866         fmuld   %f26,%f14,%f14          ! (2_0) dtmp2 = dd * dres;
3867         sethi   %hi(0x3ff00000),%g1
3868         add     TBL,TBL_SHIFT+24,%i4
3869         fsubd   DTWO,%f50,%f20          ! (3_0) dtmp0 = DTWO - dtmp0;
3870 
3871         ba      .cont59b
3872         add     TBL,TBL_SHIFT+24,%i3
3873 
3874         .align  16
3875 .exit:
3876         ret
3877         restore
3878         SET_SIZE(__vrhypot)
3879