1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file   "__vrsqrt.S"
  30 
  31 #include "libm.h"
  32 
  33         RO_DATA
  34         .align  64
  35 
  36 .CONST_TBL:
  37         .word   0xbfe00000, 0x0000002f  ! K1 =-5.00000000000005209867e-01;
  38         .word   0x3fd80000, 0x00000058  ! K2 = 3.75000000000004884257e-01;
  39         .word   0xbfd3ffff, 0xff444bc8  ! K3 =-3.12499999317136886551e-01;
  40         .word   0x3fd17fff, 0xff5006fe  ! K4 = 2.73437499359815081532e-01;
  41         .word   0xbfcf80bb, 0xb33ef574  ! K5 =-2.46116125605037803130e-01;
  42         .word   0x3fcce0af, 0xf8156949  ! K6 = 2.25606914648617522896e-01;
  43 
  44         .word   0x001fffff, 0xffffffff  ! DC0
  45         .word   0x3fe00000, 0x00000000  ! DC1
  46         .word   0x00002000, 0x00000000  ! DC2
  47         .word   0x7fffc000, 0x00000000  ! DC3
  48         .word   0x0007ffff, 0xffffffff  ! DC4
  49 
  50         .word   0x43200000, 0x00000000  ! D2ON51  = pow(2,51)
  51         .word   0x3ff00000, 0x00000000  ! DONE   = 1.0
  52 
  53 #define stridex         %l5
  54 #define stridey         %l7
  55 #define counter         %l0
  56 #define TBL             %l3
  57 #define _0x7ff00000     %o0
  58 #define _0x00100000     %o1
  59 
  60 #define DC0             %f56
  61 #define DC1             %f54
  62 #define DC2             %f48
  63 #define DC3             %f46
  64 #define K6              %f42
  65 #define K5              %f20
  66 #define K4              %f52
  67 #define K3              %f50
  68 #define K2              %f14
  69 #define K1              %f12
  70 #define DONE            %f4
  71 
  72 #define tmp_counter     %g5
  73 #define tmp_px          %o5
  74 
  75 #define tmp0            STACK_BIAS-0x40
  76 #define tmp1            STACK_BIAS-0x38
  77 #define tmp2            STACK_BIAS-0x30
  78 #define tmp3            STACK_BIAS-0x28
  79 #define tmp4            STACK_BIAS-0x20
  80 #define tmp5            STACK_BIAS-0x18
  81 #define tmp6            STACK_BIAS-0x10
  82 #define tmp7            STACK_BIAS-0x08
  83 
  84 ! sizeof temp storage - must be a multiple of 16 for V9
  85 #define tmps            0x40
  86 
  87 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  88 !      !!!!!   algorithm   !!!!!
  89 !  ((float*)&res)[0] = ((float*)px)[0];
  90 !  ((float*)&res)[1] = ((float*)px)[1];
  91 !  hx = *(int*)px;
  92 !  if ( hx >= 0x7ff00000 )
  93 !  {
  94 !    res = DONE / res;
  95 !    ((float*)py)[0] = ((float*)&res)[0];
  96 !    ((float*)py)[1] = ((float*)&res)[1];
  97 !    px += stridex;
  98 !    py += stridey;
  99 !    continue;
 100 !  }
 101 !  if ( hx < 0x00100000 )
 102 !  {
 103 !    ax = hx & 0x7fffffff;
 104 !    lx = ((int*)px)[1];
 105 !
 106 !    if ( (ax | lx) == 0 )
 107 !    {
 108 !      res = DONE / res;
 109 !      ((float*)py)[0] = ((float*)&res)[0];
 110 !      ((float*)py)[1] = ((float*)&res)[1];
 111 !      px += stridex;
 112 !      py += stridey;
 113 !      continue;
 114 !    }
 115 !    else if ( hx >= 0 )
 116 !    {
 117 !      if ( hx < 0x00080000 )
 118 !      {
 119 !        res = *(long long*)&res;
 120 !        hx = *(int*)&res - (537 << 21);
 121 !      }
 122 !      else
 123 !      {
 124 !        res = vis_fand(res,DC4);
 125 !        res = *(long long*)&res;
 126 !        res += D2ON51;
 127 !        hx = *(int*)&res - (537 << 21);
 128 !      }
 129 !    }
 130 !    else
 131 !    {
 132 !      res = sqrt(res);
 133 !      ((float*)py)[0] = ((float*)&res)[0];
 134 !      ((float*)py)[1] = ((float*)&res)[1];
 135 !      px += stridex;
 136 !      py += stridey;
 137 !      continue;
 138 !    }
 139 !  }
 140 !
 141 !  iexp = hx >> 21;
 142 !  iexp = -iexp;
 143 !  iexp += 0x5fe;
 144 !  lexp = iexp << 52;
 145 !  dlexp = *(double*)&lexp;
 146 !  hx >>= 10;
 147 !  hx &= 0x7f8;
 148 !  hx += 8;
 149 !  hx &= -16;
 150 !
 151 !  res = vis_fand(res,DC0);
 152 !  res = vis_for(res,DC1);
 153 !  res_c = vis_fpadd32(res,DC2);
 154 !  res_c = vis_fand(res_c,DC3);
 155 !
 156 !  addr = (char*)arr + hx;
 157 !  dexp_hi = ((double*)addr)[0];
 158 !  dexp_lo = ((double*)addr)[1];
 159 !  dtmp0 = dexp_hi * dexp_hi;
 160 !  xx = res - res_c;
 161 !  xx *= dtmp0;
 162 !  res = K6 * xx;
 163 !  res += K5;
 164 !  res *= xx;
 165 !  res += K4;
 166 !  res *= xx;
 167 !  res += K3;
 168 !  res *= xx;
 169 !  res += K2;
 170 !  res *= xx;
 171 !  res += K1;
 172 !  res *= xx;
 173 !  res = dexp_hi * res;
 174 !  res += dexp_lo;
 175 !  res += dexp_hi;
 176 !
 177 !  res *= dlexp;
 178 !
 179 !  ((float*)py)[0] = ((float*)&res)[0];
 180 !  ((float*)py)[1] = ((float*)&res)[1];
 181 !
 182 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 183 
 184         ENTRY(__vrsqrt)
 185         save    %sp,-SA(MINFRAME)-tmps,%sp
 186         PIC_SETUP(l7)
 187         PIC_SET(l7,.CONST_TBL,o3)
 188         PIC_SET(l7,__vlibm_TBL_rsqrt,l3)
 189         wr      %g0,0x82,%asi
 190 
 191         ldd     [%o3],K1
 192         sethi   %hi(0x7ff00000),%o0
 193         mov     %i3,%o4
 194 
 195         ldd     [%o3+0x08],K2
 196         sethi   %hi(0x00100000),%o1
 197         mov     %i1,tmp_px
 198 
 199         ldd     [%o3+0x10],K3
 200         sll     %i2,3,stridex
 201         mov     %i0,tmp_counter
 202 
 203         ldd     [%o3+0x18],K4
 204         sll     %i4,3,stridey
 205 
 206         ldd     [%o3+0x20],K5
 207         ldd     [%o3+0x28],K6
 208         ldd     [%o3+0x30],DC0
 209         ldd     [%o3+0x38],DC1
 210         ldd     [%o3+0x40],DC2
 211         ldd     [%o3+0x48],DC3
 212 
 213 .begin:
 214         mov     tmp_counter,counter
 215         mov     tmp_px,%i1
 216         clr     tmp_counter
 217 .begin1:
 218         cmp     counter,0
 219         ble,pn  %icc,.exit
 220         ldd     [%o3+0x60],DONE
 221 
 222         lda     [%i1]%asi,%f0           ! (6_0) ((float*)res)[0] = ((float*)px)[0];
 223         sethi   %hi(0x7ffffc00),%i0
 224 
 225         lda     [%i1+4]%asi,%f1         ! (6_0) ((float*)res)[1] = ((float*)px)[1];
 226         add     %i0,1023,%i0
 227 
 228         fand    %f0,DC0,%f16            ! (6_0) res = vis_fand(res,DC0);
 229 
 230         lda     [%i1]%asi,%g1           ! (6_1) hx = *(int*)px;
 231         sethi   %hi(0x00080000),%i4
 232 
 233         lda     [%i1+4]%asi,%l4
 234         add     %i1,stridex,%l6         ! px += stridex
 235 
 236         sra     %g1,21,%o7              ! (6_1) iexp = hx >> 21;
 237         lda     [%l6]%asi,%f8           ! (0_0) ((float*)res)[0] = ((float*)px)[0];
 238         for     %f16,DC1,%f44           ! (6_1) res = vis_for(res,DC1);
 239 
 240         lda     [%l6+4]%asi,%f9         ! (0_0) ((float*)res)[1] = ((float*)px)[1];
 241         sra     %g1,10,%o2              ! (6_1) hx >>= 10;
 242         and     %g1,%i0,%i2
 243 
 244         cmp     %g1,_0x7ff00000         ! (6_1) hx ? 0x7ff00000
 245         bge,pn  %icc,.spec0             ! (6_1) if ( hx >= 0x7ff00000 )
 246         and     %o2,2040,%o2            ! (6_1) hx &= 0x7f8;
 247 
 248         cmp     %g1,_0x00100000         ! (6_1) hx ? 0x00100000
 249         bl,pn   %icc,.spec1             ! (6_1) if ( hx < 0x00100000 )
 250         sub     %g0,%o7,%o7             ! (6_1) iexp = -iexp;
 251 .cont_spec:
 252         fand    %f8,DC0,%f16            ! (0_0) res = vis_fand(res,DC0);
 253 
 254         fpadd32 %f44,DC2,%f18           ! (6_1) res_c = vis_fpadd32(res,DC2);
 255 
 256         add     %o2,8,%l4               ! (6_1) hx += 8;
 257 
 258         add     %o7,1534,%o7            ! (6_1) iexp += 0x5fe;
 259 
 260         lda     [%l6]%asi,%g1           ! (0_0) hx = *(int*)px;
 261         sllx    %o7,52,%o7              ! (6_1) iexp << 52;
 262         and     %l4,-16,%l4             ! (6_1) hx = -16;
 263 
 264         add     %l4,TBL,%l4             ! (6_1) addr = (char*)arr + hx;
 265         stx     %o7,[%fp+tmp1]          ! (6_1) dlexp = *(double*)lexp;
 266 
 267         add     %l6,stridex,%l6         ! px += stridex
 268         ldd     [%l4],%f30              ! (6_1) dtmp0 = ((double*)addr)[0];
 269 
 270         sra     %g1,21,%o7              ! (0_0) iexp = hx >> 21;
 271         lda     [%l6]%asi,%f0           ! (1_0) ((float*)res)[0] = ((float*)px)[0];
 272         for     %f16,DC1,%f28           ! (0_0) res = vis_for(res,DC1);
 273 
 274         sra     %g1,10,%o2              ! (0_0) hx >>= 10;
 275         sub     %g0,%o7,%o7             ! (0_0) iexp = -iexp;
 276         lda     [%l6+4]%asi,%f1         ! (1_0) ((float*)res)[1] = ((float*)px)[1];
 277 
 278         cmp     %g1,_0x7ff00000         ! (0_0) hx ? 0x7ff00000
 279         bge,pn  %icc,.update0           ! (0_0) if ( hx >= 0x7ff00000 )
 280         fand    %f18,DC3,%f6            ! (6_1) res_c = vis_fand(res_c,DC3);
 281 .cont0:
 282         and     %o2,2040,%o2            ! (0_0) hx &= 0x7f8;
 283         fmuld   %f30,%f30,%f10          ! (6_1) dtmp0 = dexp_hi * dexp_hi;
 284 
 285         cmp     %g1,_0x00100000         ! (0_0) hx ? 0x00100000
 286         bl,pn   %icc,.update1           ! (0_0) if ( hx < 0x00100000 )
 287         add     %o7,1534,%o7            ! (0_0) iexp += 0x5fe;
 288 .cont1:
 289         fand    %f0,DC0,%f16            ! (1_0) res = vis_fand(res,DC0);
 290 
 291         fpadd32 %f28,DC2,%f18           ! (0_0) res_c = vis_fpadd32(res,DC2);
 292 
 293         add     %o2,8,%l2               ! (0_0) hx += 8;
 294         fsubd   %f44,%f6,%f6            ! (6_1) xx = res - res_c;
 295 
 296         lda     [%l6]%asi,%g1           ! (1_0) hx = *(int*)px;
 297         sllx    %o7,52,%o7              ! (0_0) iexp << 52;
 298         and     %l2,-16,%l2             ! (0_0) hx = -16;
 299 
 300         add     %l2,TBL,%l2             ! (0_0) addr = (char*)arr + hx;
 301         add     %l6,stridex,%l6         ! px += stridex
 302         stx     %o7,[%fp+tmp2]          ! (0_0) dlexp = *(double*)lexp;
 303 
 304         fmuld   %f6,%f10,%f26           ! (6_1) xx *= dtmp0;
 305         ldd     [%l2],%f10              ! (0_0) dtmp0 = ((double*)addr)[0];
 306 
 307         sra     %g1,21,%o7              ! (1_0) iexp = hx >> 21;
 308         lda     [%l6]%asi,%f6           ! (2_0) ((float*)res)[0] = ((float*)px)[0];
 309         for     %f16,DC1,%f44           ! (1_0) res = vis_for(res,DC1);
 310 
 311         sra     %g1,10,%o2              ! (1_0) hx >>= 10;
 312         cmp     %g1,_0x7ff00000         ! (1_0) hx ? 0x7ff00000
 313         bge,pn  %icc,.update2           ! (1_0) if ( hx >= 0x7ff00000 )
 314         lda     [%l6+4]%asi,%f7         ! (2_0) ((float*)res)[1] = ((float*)px)[1];
 315 .cont2:
 316         fand    %f18,DC3,%f8            ! (0_0) res_c = vis_fand(res_c,DC3);
 317 
 318         fmuld   %f10,%f10,%f10          ! (0_0) dtmp0 = dexp_hi * dexp_hi;
 319         cmp     %g1,_0x00100000         ! (1_0) hx ? 0x00100000
 320         bl,pn   %icc,.update3           ! (1_0) if ( hx < 0x00100000 )
 321         and     %o2,2040,%o2            ! (1_0) hx &= 0x7f8;
 322 .cont3:
 323         sub     %g0,%o7,%o7             ! (1_0) iexp = -iexp;
 324         fand    %f6,DC0,%f16            ! (2_0) res = vis_fand(res,DC0);
 325 
 326         add     %o7,1534,%o7            ! (1_0) iexp += 0x5fe;
 327         fpadd32 %f44,DC2,%f18           ! (1_0) res_c = vis_fpadd32(res,DC2);
 328 
 329         fmuld   K6,%f26,%f62            ! (6_1) res = K6 * xx;
 330         add     %o2,8,%i2               ! (1_0) hx += 8;
 331         fsubd   %f28,%f8,%f32           ! (0_0) xx = res - res_c;
 332 
 333         lda     [%l6]%asi,%g1           ! (2_0) hx = *(int*)px;
 334         sllx    %o7,52,%o7              ! (1_0) iexp << 52;
 335         and     %i2,-16,%i2             ! (1_0) hx = -16;
 336 
 337         add     %i2,TBL,%i2             ! (1_0) addr = (char*)arr + hx;
 338         stx     %o7,[%fp+tmp3]          ! (1_0) dlexp = *(double*)lexp;
 339 
 340         fmuld   %f32,%f10,%f32          ! (0_0) xx *= dtmp0;
 341         add     %l6,stridex,%l6         ! px += stridex
 342         ldd     [%i2],%f10              ! (1_0) dtmp0 = ((double*)addr)[0];
 343         faddd   %f62,K5,%f62            ! (6_1) res += K5;
 344 
 345         sra     %g1,21,%o7              ! (2_0) iexp = hx >> 21;
 346         lda     [%l6]%asi,%f0           ! (3_0) ((float*)res)[0] = ((float*)px)[0];
 347         for     %f16,DC1,%f28           ! (2_0) res = vis_for(res,DC1);
 348 
 349         sra     %g1,10,%o2              ! (2_0) hx >>= 10;
 350         cmp     %g1,_0x7ff00000         ! (2_0) hx ? 0x7ff00000
 351         bge,pn  %icc,.update4           ! (2_0) if ( hx >= 0x7ff00000 )
 352         lda     [%l6+4]%asi,%f1         ! (3_0) ((float*)res)[1] = ((float*)px)[1];
 353 .cont4:
 354         fmuld   %f62,%f26,%f40          ! (6_1) res *= xx;
 355         fand    %f18,DC3,%f8            ! (1_0) res_c = vis_fand(res_c,DC3);
 356 
 357         fmuld   %f10,%f10,%f10          ! (1_0) dtmp0 = dexp_hi * dexp_hi;
 358         cmp     %g1,_0x00100000         ! (2_0) hx ? 0x00100000
 359         bl,pn   %icc,.update5           ! (2_0) if ( hx < 0x00100000 )
 360         and     %o2,2040,%o2            ! (2_0) hx &= 0x7f8;
 361 .cont5:
 362         sub     %g0,%o7,%o7             ! (2_0) iexp = -iexp;
 363         fand    %f0,DC0,%f16            ! (3_0) res = vis_fand(res,DC0);
 364 
 365         add     %o7,1534,%o7            ! (2_0) iexp += 0x5fe;
 366         fpadd32 %f28,DC2,%f18           ! (2_0) res_c = vis_fpadd32(res,DC2);
 367 
 368         fmuld   K6,%f32,%f62            ! (0_0) res = K6 * xx;
 369         add     %o2,8,%i4               ! (2_0) hx += 8;
 370         fsubd   %f44,%f8,%f6            ! (1_0) xx = res - res_c;
 371 
 372         faddd   %f40,K4,%f40            ! (6_1) res += K4;
 373 
 374         lda     [%l6]%asi,%g1           ! (3_0) hx = *(int*)px;
 375         sllx    %o7,52,%o7              ! (2_0) iexp << 52;
 376         and     %i4,-16,%i4             ! (2_0) hx = -16;
 377 
 378         add     %i4,TBL,%i4             ! (2_0) addr = (char*)arr + hx;
 379         stx     %o7,[%fp+tmp4]          ! (2_0) dlexp = *(double*)lexp;
 380 
 381         fmuld   %f6,%f10,%f38           ! (1_0) xx *= dtmp0;
 382         ldd     [%i4],%f24              ! (2_0) dtmp0 = ((double*)addr)[0];
 383         faddd   %f62,K5,%f62            ! (0_0) res += K5;
 384 
 385         fmuld   %f40,%f26,%f34          ! (6_1) res *= xx;
 386         add     %l6,stridex,%l6         ! px += stridex
 387 
 388         sra     %g1,21,%o7              ! (3_0) iexp = hx >> 21;
 389         lda     [%l6]%asi,%f8           ! (4_0) ((float*)res)[0] = ((float*)px)[0];
 390         for     %f16,DC1,%f44           ! (3_0) res = vis_for(res,DC1);
 391 
 392         sra     %g1,10,%o2              ! (3_0) hx >>= 10;
 393         cmp     %g1,_0x7ff00000         ! (3_0) hx ? 0x7ff00000
 394         bge,pn  %icc,.update6           ! (3_0) if ( hx >= 0x7ff00000 )
 395         lda     [%l6+4]%asi,%f9         ! (4_0) ((float*)res)[1] = ((float*)px)[1];
 396 .cont6:
 397         fmuld   %f62,%f32,%f60          ! (0_0) res *= xx;
 398         cmp     %g1,_0x00100000         ! (3_0) hx ? 0x00100000
 399         fand    %f18,DC3,%f22           ! (2_0) res_c = vis_fand(res_c,DC3);
 400 
 401         fmuld   %f24,%f24,%f24          ! (2_0) dtmp0 = dexp_hi * dexp_hi;
 402         bl,pn   %icc,.update7           ! (3_0) if ( hx < 0x00100000 )
 403         and     %o2,2040,%o2            ! (3_0) hx &= 0x7f8;
 404         faddd   %f34,K3,%f6             ! (6_1) res += K3;
 405 .cont7:
 406         sub     %g0,%o7,%o7             ! (3_0) iexp = -iexp;
 407         fand    %f8,DC0,%f16            ! (4_0) res = vis_fand(res,DC0);
 408 
 409         add     %o7,1534,%o7            ! (3_0) iexp += 0x5fe;
 410         fpadd32 %f44,DC2,%f18           ! (3_0) res_c = vis_fpadd32(res,DC2);
 411 
 412         fmuld   K6,%f38,%f62            ! (1_0) res = K6 * xx;
 413         add     %o2,8,%i5               ! (3_0) hx += 8;
 414         fsubd   %f28,%f22,%f28          ! (2_0) xx = res - res_c;
 415 
 416         fmuld   %f6,%f26,%f22           ! (6_1) res *= xx;
 417         faddd   %f60,K4,%f60            ! (0_0) res += K4;
 418 
 419         lda     [%l6]%asi,%g1           ! (4_0) hx = *(int*)px;
 420         sllx    %o7,52,%o7              ! (3_0) iexp << 52;
 421         and     %i5,-16,%i5             ! (3_0) hx = -16;
 422 
 423         add     %i5,TBL,%i5             ! (3_0) addr = (char*)arr + hx;
 424         stx     %o7,[%fp+tmp5]          ! (3_0) dlexp = *(double*)lexp;
 425 
 426         fmuld   %f28,%f24,%f36          ! (2_0) xx *= dtmp0;
 427         add     %l6,stridex,%i0         ! px += stridex
 428         ldd     [%i5],%f28              ! (3_0) dtmp0 = ((double*)addr)[0];
 429         faddd   %f62,K5,%f62            ! (1_0) res += K5;
 430 
 431         faddd   %f22,K2,%f10            ! (6_1) res += K2;
 432         fmuld   %f60,%f32,%f34          ! (0_0) res *= xx;
 433 
 434         sra     %g1,21,%o7              ! (4_0) iexp = hx >> 21;
 435         lda     [%i0]%asi,%f0           ! (5_0) ((float*)res)[0] = ((float*)px)[0];
 436         for     %f16,DC1,%f24           ! (4_0) res = vis_for(res,DC1);
 437 
 438         sra     %g1,10,%o2              ! (4_0) hx >>= 10;
 439         cmp     %g1,_0x7ff00000         ! (4_0) hx ? 0x7ff00000
 440         bge,pn  %icc,.update8           ! (4_0) if ( hx >= 0x7ff00000 )
 441         lda     [%i0+4]%asi,%f1         ! (5_0) ((float*)res)[1] = ((float*)px)[1];
 442 .cont8:
 443         fand    %f18,DC3,%f40           ! (3_0) res_c = vis_fand(res_c,DC3);
 444         fmuld   %f62,%f38,%f62          ! (1_0) res *= xx;
 445 
 446         fmuld   %f10,%f26,%f58          ! (6_1) res *= xx;
 447         cmp     %g1,_0x00100000         ! (4_0) hx ? 0x00100000
 448         and     %o2,2040,%o2            ! (4_0) hx &= 0x7f8;
 449         faddd   %f34,K3,%f60            ! (0_0) res += K3;
 450 
 451         fmuld   %f28,%f28,%f28          ! (3_0) dtmp0 = dexp_hi * dexp_hi;
 452         bl,pn   %icc,.update9           ! (4_0) if ( hx < 0x00100000 )
 453         sub     %g0,%o7,%o7             ! (4_0) iexp = -iexp;
 454         fand    %f0,DC0,%f16            ! (5_0) res = vis_fand(res,DC0);
 455 .cont9:
 456         add     %o7,1534,%o7            ! (4_0) iexp += 0x5fe;
 457         fpadd32 %f24,DC2,%f18           ! (4_0) res_c = vis_fpadd32(res,DC2);
 458 
 459         fmuld   K6,%f36,%f10            ! (2_0) res = K6 * xx;
 460         add     %o2,8,%l1               ! (4_0) hx += 8;
 461         fsubd   %f44,%f40,%f44          ! (3_0) xx = res - res_c;
 462 
 463         fmuld   %f60,%f32,%f60          ! (0_0) res *= xx;
 464         faddd   %f62,K4,%f6             ! (1_0) res += K4;
 465 
 466         lda     [%i0]%asi,%g1           ! (5_0) hx = *(int*)px;
 467         sllx    %o7,52,%o7              ! (4_0) iexp << 52;
 468         and     %l1,-16,%l1             ! (4_0) hx = -16;
 469         faddd   %f58,K1,%f58            ! (6_1) res += K1;
 470 
 471         add     %i0,stridex,%i1         ! px += stridex
 472         add     %l1,TBL,%l1             ! (4_0) addr = (char*)arr + hx;
 473         stx     %o7,[%fp+tmp6]          ! (4_0) dlexp = *(double*)lexp;
 474 
 475         fmuld   %f44,%f28,%f40          ! (3_0) xx *= dtmp0;
 476         ldd     [%l1],%f44              ! (4_0) dtmp0 = ((double*)addr)[0];
 477         faddd   %f10,K5,%f62            ! (2_0) res += K5;
 478 
 479         fmuld   %f6,%f38,%f34           ! (1_0) res *= xx;
 480         sra     %g1,21,%o7              ! (5_0) iexp = hx >> 21;
 481         nop
 482         faddd   %f60,K2,%f60            ! (0_0) res += K2;
 483 
 484         for     %f16,DC1,%f28           ! (5_0) res = vis_for(res,DC1);
 485         sub     %g0,%o7,%o7             ! (5_0) iexp = -iexp;
 486         lda     [%i1]%asi,%f6           ! (6_0) ((float*)res)[0] = ((float*)px)[0];
 487         fmuld   %f58,%f26,%f26          ! (6_1) res *= xx;
 488 
 489         sra     %g1,10,%o2              ! (5_0) hx >>= 10;
 490         cmp     %g1,_0x7ff00000         ! (5_0) hx ? 0x7ff00000
 491         bge,pn  %icc,.update10          ! (5_0) if ( hx >= 0x7ff00000 )
 492         lda     [%i1+4]%asi,%f7         ! (6_0) ((float*)res)[1] = ((float*)px)[1];
 493 .cont10:
 494         fand    %f18,DC3,%f8            ! (4_0) res_c = vis_fand(res_c,DC3);
 495         fmuld   %f62,%f36,%f62          ! (2_0) res *= xx;
 496 
 497         fmuld   %f60,%f32,%f58          ! (0_0) res *= xx;
 498         cmp     %g1,_0x00100000         ! (5_0) hx ? 0x00100000
 499         and     %o2,2040,%o2            ! (5_0) hx &= 0x7f8;
 500         faddd   %f34,K3,%f34            ! (1_0) res += K3;
 501 
 502         fmuld   %f30,%f26,%f26          ! (6_1) res = dexp_hi * res;
 503         bl,pn   %icc,.update11          ! (5_0) if ( hx < 0x00100000 )
 504         nop
 505         fand    %f6,DC0,%f16            ! (6_0) res = vis_fand(res,DC0);
 506 .cont11:
 507         ldd     [%l4+8],%f60            ! (6_1) dexp_lo = ((double*)addr)[1];
 508         fmuld   %f44,%f44,%f44          ! (4_0) dtmp0 = dexp_hi * dexp_hi;
 509         fpadd32 %f28,DC2,%f18           ! (5_0) res_c = vis_fpadd32(res,DC2);
 510 
 511         fmuld   K6,%f40,%f22            ! (3_0) res = K6 * xx;
 512         add     %o2,8,%i3               ! (5_0) hx += 8;
 513         fsubd   %f24,%f8,%f10           ! (4_0) xx = res - res_c;
 514 
 515         fmuld   %f34,%f38,%f24          ! (1_0) res *= xx;
 516         or      %g0,%o4,%i0
 517 
 518         cmp     counter,7
 519         bl,pn   %icc,.tail
 520         faddd   %f62,K4,%f34            ! (2_0) res += K4;
 521 
 522         ba      .main_loop
 523         sub     counter,7,counter       ! counter
 524 
 525         .align  16
 526 .main_loop:
 527         add     %o7,1534,%o7            ! (5_0) iexp += 0x5fe;
 528         and     %i3,-16,%i3             ! (5_1) hx = -16;
 529         lda     [%i1]%asi,%g1           ! (6_1) hx = *(int*)px;
 530         faddd   %f58,K1,%f58            ! (0_1) res += K1;
 531 
 532         add     %i3,TBL,%i3             ! (5_1) addr = (char*)arr + hx;
 533         sllx    %o7,52,%o7              ! (5_1) iexp << 52;
 534         stx     %o7,[%fp+tmp0]          ! (5_1) dlexp = *(double*)lexp;
 535         faddd   %f26,%f60,%f8           ! (6_2) res += dexp_lo;
 536 
 537         faddd   %f22,K5,%f62            ! (3_1) res += K5;
 538         add     %i1,stridex,%l6         ! px += stridex
 539         ldd     [%i3],%f22              ! (5_1) dtmp0 = ((double*)addr)[0];
 540         fmuld   %f10,%f44,%f60          ! (4_1) xx *= dtmp0;
 541 
 542         faddd   %f24,K2,%f26            ! (1_1) res += K2;
 543         add     %i0,stridey,%i1         ! px += stridey
 544         ldd     [%l2],%f24              ! (0_1) dexp_hi = ((double*)addr)[0];
 545         fmuld   %f34,%f36,%f34          ! (2_1) res *= xx;
 546 
 547         fmuld   %f58,%f32,%f58          ! (0_1) res *= xx;
 548         sra     %g1,21,%o7              ! (6_1) iexp = hx >> 21;
 549         lda     [%l6]%asi,%f0           ! (0_0) ((float*)res)[0] = ((float*)px)[0];
 550         for     %f16,DC1,%f44           ! (6_1) res = vis_for(res,DC1);
 551 
 552         lda     [%l6+4]%asi,%f1         ! (0_0) ((float*)res)[1] = ((float*)px)[1];
 553         sra     %g1,10,%o2              ! (6_1) hx >>= 10;
 554         fmuld   %f22,%f22,%f10          ! (5_1) dtmp0 = dexp_hi * dexp_hi;
 555         faddd   %f8,%f30,%f30           ! (6_2) res += dexp_hi;
 556 
 557         fmuld   %f62,%f40,%f32          ! (3_1) res *= xx;
 558         cmp     %g1,_0x7ff00000         ! (6_1) hx ? 0x7ff00000
 559         ldd     [%fp+tmp1],%f62         ! (6_2) dlexp = *(double*)lexp;
 560         fand    %f18,DC3,%f8            ! (5_1) res_c = vis_fand(res_c,DC3);
 561 
 562         fmuld   %f26,%f38,%f26          ! (1_1) res *= xx;
 563         bge,pn  %icc,.update12          ! (6_1) if ( hx >= 0x7ff00000 )
 564         and     %o2,2040,%o2            ! (6_1) hx &= 0x7f8;
 565         faddd   %f34,K3,%f34            ! (2_1) res += K3;
 566 .cont12:
 567         fmuld   %f24,%f58,%f58          ! (0_1) res = dexp_hi * res;
 568         cmp     %g1,_0x00100000         ! (6_1) hx ? 0x00100000
 569         sub     %g0,%o7,%o7             ! (6_1) iexp = -iexp;
 570         fand    %f0,DC0,%f16            ! (0_0) res = vis_fand(res,DC0);
 571 
 572         fmuld   %f30,%f62,%f2           ! (6_2) res *= dlexp;
 573         bl,pn   %icc,.update13          ! (6_1) if ( hx < 0x00100000 )
 574         ldd     [%l2+8],%f30            ! (0_1) dexp_lo = ((double*)addr)[1];
 575         fpadd32 %f44,DC2,%f18           ! (6_1) res_c = vis_fpadd32(res,DC2);
 576 .cont13:
 577         fmuld   K6,%f60,%f62            ! (4_1) res = K6 * xx;
 578         add     %o2,8,%l4               ! (6_1) hx += 8;
 579         st      %f2,[%i0]               ! (6_2) ((float*)py)[0] = ((float*)res)[0];
 580         fsubd   %f28,%f8,%f6            ! (5_1) xx = res - res_c;
 581 
 582         fmuld   %f34,%f36,%f28          ! (2_1) res *= xx;
 583         add     %o7,1534,%o7            ! (6_1) iexp += 0x5fe;
 584         st      %f3,[%i0+4]             ! (6_2) ((float*)py)[1] = ((float*)res)[1];
 585         faddd   %f32,K4,%f32            ! (3_1) res += K4;
 586 
 587         lda     [%l6]%asi,%g1           ! (0_0) hx = *(int*)px;
 588         sllx    %o7,52,%o7              ! (6_1) iexp << 52;
 589         and     %l4,-16,%l4             ! (6_1) hx = -16;
 590         faddd   %f26,K1,%f26            ! (1_1) res += K1;
 591 
 592         add     %i1,stridey,%i0         ! px += stridey
 593         add     %l4,TBL,%l4             ! (6_1) addr = (char*)arr + hx;
 594         stx     %o7,[%fp+tmp1]          ! (6_1) dlexp = *(double*)lexp;
 595         faddd   %f58,%f30,%f8           ! (0_1) res += dexp_lo;
 596 
 597         fmuld   %f6,%f10,%f58           ! (5_1) xx *= dtmp0;
 598         add     %l6,stridex,%l6         ! px += stridex
 599         ldd     [%l4],%f30              ! (6_1) dtmp0 = ((double*)addr)[0];
 600         faddd   %f62,K5,%f62            ! (4_1) res += K5;
 601 
 602         fmuld   %f32,%f40,%f34          ! (3_1) res *= xx;
 603         sra     %g1,10,%o2              ! (0_0) hx >>= 10;
 604         ldd     [%i2],%f4               ! (1_1) dexp_hi = ((double*)addr)[0];
 605         faddd   %f28,K2,%f32            ! (2_1) res += K2;
 606 
 607         fmuld   %f26,%f38,%f26          ! (1_1) res *= xx;
 608         sra     %g1,21,%o7              ! (0_0) iexp = hx >> 21;
 609         lda     [%l6]%asi,%f6           ! (1_0) ((float*)res)[0] = ((float*)px)[0];
 610         for     %f16,DC1,%f28           ! (0_0) res = vis_for(res,DC1);
 611 
 612         fmuld   %f30,%f30,%f30          ! (6_1) dtmp0 = dexp_hi * dexp_hi;
 613         sub     %g0,%o7,%o7             ! (0_0) iexp = -iexp;
 614         lda     [%l6+4]%asi,%f7         ! (1_0) ((float*)res)[1] = ((float*)px)[1];
 615         faddd   %f8,%f24,%f24           ! (0_1) res += dexp_hi;
 616 
 617         fmuld   %f62,%f60,%f38          ! (4_1) res *= xx;
 618         cmp     %g1,_0x7ff00000         ! (0_0) hx ? 0x7ff00000
 619         ldd     [%fp+tmp2],%f62         ! (0_1) dlexp = *(double*)lexp;
 620         fand    %f18,DC3,%f8            ! (6_1) res_c = vis_fand(res_c,DC3);
 621 
 622         fmuld   %f32,%f36,%f32          ! (2_1) res *= xx;
 623         bge,pn  %icc,.update14          ! (0_0) if ( hx >= 0x7ff00000 )
 624         and     %o2,2040,%o2            ! (0_0) hx &= 0x7f8;
 625         faddd   %f34,K3,%f34            ! (3_1) res += K3;
 626 .cont14:
 627         fmuld   %f4,%f26,%f26           ! (1_1) res = dexp_hi * res;
 628         cmp     %g1,_0x00100000         ! (0_0) hx ? 0x00100000
 629         add     %o7,1534,%o7            ! (0_0) iexp += 0x5fe;
 630         fand    %f6,DC0,%f16            ! (1_0) res = vis_fand(res,DC0);
 631 
 632         fmuld   %f24,%f62,%f2           ! (0_1) res *= dlexp;
 633         bl,pn   %icc,.update15          ! (0_0) if ( hx < 0x00100000 )
 634         ldd     [%i2+8],%f24            ! (1_1) dexp_lo = ((double*)addr)[1];
 635         fpadd32 %f28,DC2,%f18           ! (0_0) res_c = vis_fpadd32(res,DC2);
 636 .cont15:
 637         fmuld   K6,%f58,%f62            ! (5_1) res = K6 * xx;
 638         add     %o2,8,%l2               ! (0_0) hx += 8;
 639         st      %f2,[%i1]               ! (0_1) ((float*)py)[0] = ((float*)res)[0];
 640         fsubd   %f44,%f8,%f10           ! (6_1) xx = res - res_c;
 641 
 642         fmuld   %f34,%f40,%f44          ! (3_1) res *= xx;
 643         nop
 644         st      %f3,[%i1+4]             ! (0_1) ((float*)py)[1] = ((float*)res)[1];
 645         faddd   %f38,K4,%f38            ! (4_1) res += K4;
 646 
 647         lda     [%l6]%asi,%g1           ! (1_0) hx = *(int*)px;
 648         sllx    %o7,52,%o7              ! (0_0) iexp << 52;
 649         and     %l2,-16,%l2             ! (0_0) hx = -16;
 650         faddd   %f32,K1,%f32            ! (2_1) res += K1;
 651 
 652         add     %l2,TBL,%l2             ! (0_0) addr = (char*)arr + hx;
 653         add     %l6,stridex,%l6         ! px += stridex
 654         stx     %o7,[%fp+tmp2]          ! (0_0) dlexp = *(double*)lexp;
 655         faddd   %f26,%f24,%f8           ! (1_1) res += dexp_lo;
 656 
 657         fmuld   %f10,%f30,%f26          ! (6_1) xx *= dtmp0;
 658         add     %i0,stridey,%i1         ! px += stridey
 659         ldd     [%l2],%f30              ! (0_0) dtmp0 = ((double*)addr)[0];
 660         faddd   %f62,K5,%f62            ! (5_1) res += K5;
 661 
 662         fmuld   %f38,%f60,%f34          ! (4_1) res *= xx;
 663         sra     %g1,10,%o2              ! (1_0) hx >>= 10;
 664         ldd     [%i4],%f24              ! (2_1) dexp_hi = ((double*)addr)[0];
 665         faddd   %f44,K2,%f38            ! (3_1) res += K2;
 666 
 667         fmuld   %f32,%f36,%f32          ! (2_1) res *= xx;
 668         sra     %g1,21,%o7              ! (1_0) iexp = hx >> 21;
 669         lda     [%l6]%asi,%f0           ! (2_0) ((float*)res)[0] = ((float*)px)[0];
 670         for     %f16,DC1,%f44           ! (1_0) res = vis_for(res,DC1);
 671 
 672         fmuld   %f30,%f30,%f30          ! (0_0) dtmp0 = dexp_hi * dexp_hi;
 673         cmp     %g1,_0x7ff00000         ! (1_0) hx ? 0x7ff00000
 674         lda     [%l6+4]%asi,%f1         ! (2_0) ((float*)res)[1] = ((float*)px)[1];
 675         faddd   %f8,%f4,%f4             ! (1_1) res += dexp_hi;
 676 
 677         fmuld   %f62,%f58,%f36          ! (5_1) res *= xx;
 678         bge,pn  %icc,.update16          ! (1_0) if ( hx >= 0x7ff00000 )
 679         ldd     [%fp+tmp3],%f62         ! (1_1) dlexp = *(double*)lexp;
 680         fand    %f18,DC3,%f8            ! (0_0) res_c = vis_fand(res_c,DC3);
 681 .cont16:
 682         fmuld   %f38,%f40,%f38          ! (3_1) res *= xx;
 683         cmp     %g1,_0x00100000         ! (1_0) hx ? 0x00100000
 684         and     %o2,2040,%o2            ! (1_0) hx &= 0x7f8;
 685         faddd   %f34,K3,%f34            ! (4_1) res += K3;
 686 
 687         fmuld   %f24,%f32,%f32          ! (2_1) res = dexp_hi * res;
 688         bl,pn   %icc,.update17          ! (1_0) if ( hx < 0x00100000 )
 689         sub     %g0,%o7,%o7             ! (1_0) iexp = -iexp;
 690         fand    %f0,DC0,%f16            ! (2_0) res = vis_fand(res,DC0);
 691 .cont17:
 692         fmuld   %f4,%f62,%f2            ! (1_1) res *= dlexp;
 693         add     %o7,1534,%o7            ! (1_0) iexp += 0x5fe;
 694         ldd     [%i4+8],%f4             ! (2_1) dexp_lo = ((double*)addr)[1];
 695         fpadd32 %f44,DC2,%f18           ! (1_0) res_c = vis_fpadd32(res,DC2);
 696 
 697         fmuld   K6,%f26,%f62            ! (6_1) res = K6 * xx;
 698         add     %o2,8,%i2               ! (1_0) hx += 8;
 699         st      %f2,[%i0]               ! (1_1) ((float*)py)[0] = ((float*)res)[0];
 700         fsubd   %f28,%f8,%f6            ! (0_0) xx = res - res_c;
 701 
 702         fmuld   %f34,%f60,%f28          ! (4_1) res *= xx;
 703         nop
 704         st      %f3,[%i0+4]             ! (1_1) ((float*)py)[1] = ((float*)res)[1];
 705         faddd   %f36,K4,%f36            ! (5_1) res += K4;
 706 
 707         lda     [%l6]%asi,%g1           ! (2_0) hx = *(int*)px;
 708         sllx    %o7,52,%o7              ! (1_0) iexp << 52;
 709         and     %i2,-16,%i2             ! (1_0) hx = -16;
 710         faddd   %f38,K1,%f38            ! (3_1) res += K1;
 711 
 712         add     %i1,stridey,%i0         ! px += stridey
 713         add     %i2,TBL,%i2             ! (1_0) addr = (char*)arr + hx;
 714         stx     %o7,[%fp+tmp3]          ! (1_0) dlexp = *(double*)lexp;
 715         faddd   %f32,%f4,%f8            ! (2_1) res += dexp_lo;
 716 
 717         fmuld   %f6,%f30,%f32           ! (0_0) xx *= dtmp0;
 718         add     %l6,stridex,%l6         ! px += stridex
 719         ldd     [%i2],%f30              ! (1_0) dtmp0 = ((double*)addr)[0];
 720         faddd   %f62,K5,%f62            ! (6_1) res += K5;
 721 
 722         fmuld   %f36,%f58,%f34          ! (5_1) res *= xx;
 723         sra     %g1,10,%o2              ! (2_0) hx >>= 10;
 724         ldd     [%i5],%f4               ! (3_1) dexp_hi = ((double*)addr)[0];
 725         faddd   %f28,K2,%f36            ! (4_1) res += K2;
 726 
 727         fmuld   %f38,%f40,%f38          ! (3_1) res *= xx;
 728         sra     %g1,21,%o7              ! (2_0) iexp = hx >> 21;
 729         lda     [%l6]%asi,%f6           ! (3_0) ((float*)res)[0] = ((float*)px)[0];
 730         for     %f16,DC1,%f28           ! (2_0) res = vis_for(res,DC1);
 731 
 732         fmuld   %f30,%f30,%f30          ! (1_0) dtmp0 = dexp_hi * dexp_hi;
 733         cmp     %g1,_0x7ff00000         ! (2_0) hx ? 0x7ff00000
 734         lda     [%l6+4]%asi,%f7         ! (3_0) ((float*)res)[1] = ((float*)px)[1];
 735         faddd   %f8,%f24,%f24           ! (2_1) res += dexp_hi;
 736 
 737         fmuld   %f62,%f26,%f40          ! (6_1) res *= xx;
 738         bge,pn  %icc,.update18          ! (2_0) if ( hx >= 0x7ff00000 )
 739         ldd     [%fp+tmp4],%f62         ! (2_1) dlexp = *(double*)lexp;
 740         fand    %f18,DC3,%f8            ! (1_0) res_c = vis_fand(res_c,DC3);
 741 .cont18:
 742         fmuld   %f36,%f60,%f36          ! (4_1) res *= xx;
 743         cmp     %g1,_0x00100000         ! (2_0) hx ? 0x00100000
 744         and     %o2,2040,%o2            ! (2_0) hx &= 0x7f8;
 745         faddd   %f34,K3,%f34            ! (5_1) res += K3;
 746 
 747         fmuld   %f4,%f38,%f38           ! (3_1) res = dexp_hi * res;
 748         bl,pn   %icc,.update19          ! (2_0) if ( hx < 0x00100000 )
 749         sub     %g0,%o7,%o7             ! (2_0) iexp = -iexp;
 750         fand    %f6,DC0,%f16            ! (3_0) res = vis_fand(res,DC0);
 751 .cont19:
 752         fmuld   %f24,%f62,%f2           ! (2_1) res *= dlexp;
 753         add     %o7,1534,%o7            ! (2_0) iexp += 0x5fe;
 754         ldd     [%i5+8],%f24            ! (3_1) dexp_lo = ((double*)addr)[1];
 755         fpadd32 %f28,DC2,%f18           ! (2_0) res_c = vis_fpadd32(res,DC2);
 756 
 757         fmuld   K6,%f32,%f62            ! (0_0) res = K6 * xx;
 758         add     %o2,8,%i4               ! (2_0) hx += 8;
 759         st      %f2,[%i1]               ! (2_1) ((float*)py)[0] = ((float*)res)[0];
 760         fsubd   %f44,%f8,%f10           ! (1_0) xx = res - res_c;
 761 
 762         fmuld   %f34,%f58,%f44          ! (5_1) res *= xx;
 763         nop
 764         st      %f3,[%i1+4]             ! (2_1) ((float*)py)[1] = ((float*)res)[1];
 765         faddd   %f40,K4,%f40            ! (6_1) res += K4;
 766 
 767         lda     [%l6]%asi,%g1           ! (3_0) hx = *(int*)px;
 768         sllx    %o7,52,%o7              ! (2_0) iexp << 52;
 769         and     %i4,-16,%i4             ! (2_0) hx = -16;
 770         faddd   %f36,K1,%f36            ! (4_1) res += K1;
 771 
 772         add     %l6,stridex,%l6         ! px += stridex
 773         add     %i4,TBL,%i4             ! (2_0) addr = (char*)arr + hx;
 774         stx     %o7,[%fp+tmp4]          ! (2_0) dlexp = *(double*)lexp;
 775         faddd   %f38,%f24,%f8           ! (3_1) res += dexp_lo;
 776 
 777         fmuld   %f10,%f30,%f38          ! (1_0) xx *= dtmp0;
 778         add     %i0,stridey,%i1         ! px += stridey
 779         ldd     [%i4],%f24              ! (2_0) dtmp0 = ((double*)addr)[0];
 780         faddd   %f62,K5,%f62            ! (0_0) res += K5;
 781 
 782         fmuld   %f40,%f26,%f34          ! (6_1) res *= xx;
 783         sra     %g1,10,%o2              ! (3_0) hx >>= 10;
 784         ldd     [%l1],%f30              ! (4_1) dexp_hi = ((double*)addr)[0];
 785         faddd   %f44,K2,%f40            ! (5_1) res += K2;
 786 
 787         fmuld   %f36,%f60,%f36          ! (4_1) res *= xx;
 788         sra     %g1,21,%o7              ! (3_0) iexp = hx >> 21;
 789         lda     [%l6]%asi,%f0           ! (4_0) ((float*)res)[0] = ((float*)px)[0];
 790         for     %f16,DC1,%f44           ! (3_0) res = vis_for(res,DC1);
 791 
 792         fmuld   %f24,%f24,%f24          ! (2_0) dtmp0 = dexp_hi * dexp_hi;
 793         cmp     %g1,_0x7ff00000         ! (3_0) hx ? 0x7ff00000
 794         lda     [%l6+4]%asi,%f1         ! (4_0) ((float*)res)[1] = ((float*)px)[1];
 795         faddd   %f8,%f4,%f8             ! (3_1) res += dexp_hi;
 796 
 797         fmuld   %f62,%f32,%f60          ! (0_0) res *= xx;
 798         bge,pn  %icc,.update20          ! (3_0) if ( hx >= 0x7ff00000 )
 799         ldd     [%fp+tmp5],%f62         ! (3_1) dlexp = *(double*)lexp;
 800         fand    %f18,DC3,%f4            ! (2_0) res_c = vis_fand(res_c,DC3);
 801 .cont20:
 802         fmuld   %f40,%f58,%f40          ! (5_1) res *= xx;
 803         cmp     %g1,_0x00100000         ! (3_0) hx ? 0x00100000
 804         and     %o2,2040,%o2            ! (3_0) hx &= 0x7f8;
 805         faddd   %f34,K3,%f10            ! (6_1) res += K3;
 806 
 807         fmuld   %f30,%f36,%f36          ! (4_1) res = dexp_hi * res;
 808         bl,pn   %icc,.update21          ! (3_0) if ( hx < 0x00100000 )
 809         sub     %g0,%o7,%o7             ! (3_0) iexp = -iexp;
 810         fand    %f0,DC0,%f16            ! (4_0) res = vis_fand(res,DC0);
 811 .cont21:
 812         fmuld   %f8,%f62,%f8            ! (3_1) res *= dlexp;
 813         add     %o7,1534,%o7            ! (3_0) iexp += 0x5fe;
 814         ldd     [%l1+8],%f34            ! (4_1) dexp_lo = ((double*)addr)[1];
 815         fpadd32 %f44,DC2,%f18           ! (3_0) res_c = vis_fpadd32(res,DC2);
 816 
 817         fmuld   K6,%f38,%f62            ! (1_0) res = K6 * xx;
 818         add     %o2,8,%i5               ! (3_0) hx += 8;
 819         st      %f8,[%i0]               ! (3_1) ((float*)py)[0] = ((float*)res)[0];
 820         fsubd   %f28,%f4,%f28           ! (2_0) xx = res - res_c;
 821 
 822         fmuld   %f10,%f26,%f4           ! (6_1) res *= xx;
 823         nop
 824         st      %f9,[%i0+4]             ! (3_1) ((float*)py)[1] = ((float*)res)[1];
 825         faddd   %f60,K4,%f60            ! (0_0) res += K4;
 826 
 827         lda     [%l6]%asi,%g1           ! (4_0) hx = *(int*)px;
 828         sllx    %o7,52,%o7              ! (3_0) iexp << 52;
 829         and     %i5,-16,%i5             ! (3_0) hx = -16;
 830         faddd   %f40,K1,%f40            ! (5_1) res += K1;
 831 
 832         add     %l6,stridex,%i0         ! px += stridex
 833         add     %i5,TBL,%i5             ! (3_0) addr = (char*)arr + hx;
 834         stx     %o7,[%fp+tmp5]          ! (3_0) dlexp = *(double*)lexp;
 835         faddd   %f36,%f34,%f8           ! (4_1) res += dexp_lo;
 836 
 837         fmuld   %f28,%f24,%f36          ! (2_0) xx *= dtmp0;
 838         add     %i1,stridey,%l6         ! px += stridey
 839         ldd     [%i5],%f28              ! (3_0) dtmp0 = ((double*)addr)[0];
 840         faddd   %f62,K5,%f62            ! (1_0) res += K5;
 841 
 842         faddd   %f4,K2,%f10             ! (6_1) res += K2;
 843         sra     %g1,10,%o2              ! (4_0) hx >>= 10;
 844         nop
 845         fmuld   %f60,%f32,%f34          ! (0_0) res *= xx;
 846 
 847         fmuld   %f40,%f58,%f40          ! (5_1) res *= xx;
 848         sra     %g1,21,%o7              ! (4_0) iexp = hx >> 21;
 849         lda     [%i0]%asi,%f6           ! (5_0) ((float*)res)[0] = ((float*)px)[0];
 850         for     %f16,DC1,%f24           ! (4_0) res = vis_for(res,DC1);
 851 
 852         fmuld   %f28,%f28,%f28          ! (3_0) dtmp0 = dexp_hi * dexp_hi;
 853         cmp     %g1,_0x7ff00000         ! (4_0) hx ? 0x7ff00000
 854         lda     [%i0+4]%asi,%f7         ! (5_0) ((float*)res)[1] = ((float*)px)[1];
 855         faddd   %f8,%f30,%f30           ! (4_1) res += dexp_hi;
 856 
 857         fand    %f18,DC3,%f8            ! (3_0) res_c = vis_fand(res_c,DC3);
 858         bge,pn  %icc,.update22          ! (4_0) if ( hx >= 0x7ff00000 )
 859         ldd     [%fp+tmp6],%f18         ! (4_1) dlexp = *(double*)lexp;
 860         fmuld   %f62,%f38,%f62          ! (1_0) res *= xx;
 861 .cont22:
 862         fmuld   %f10,%f26,%f58          ! (6_1) res *= xx;
 863         cmp     %g1,_0x00100000         ! (4_0) hx ? 0x00100000
 864         and     %o2,2040,%o2            ! (4_0) hx &= 0x7f8;
 865         faddd   %f34,K3,%f60            ! (0_0) res += K3;
 866 
 867         fmuld   %f22,%f40,%f40          ! (5_1) res = dexp_hi * res;
 868         bl,pn   %icc,.update23          ! (4_0) if ( hx < 0x00100000 )
 869         sub     %g0,%o7,%o7             ! (4_0) iexp = -iexp;
 870         fand    %f6,DC0,%f16            ! (5_0) res = vis_fand(res,DC0);
 871 .cont23:
 872         fmuld   %f30,%f18,%f6           ! (4_1) res *= dlexp;
 873         add     %o7,1534,%o7            ! (4_0) iexp += 0x5fe;
 874         ldd     [%i3+8],%f34            ! (5_1) dexp_lo = ((double*)addr)[1];
 875         fpadd32 %f24,DC2,%f18           ! (4_0) res_c = vis_fpadd32(res,DC2);
 876 
 877         fmuld   K6,%f36,%f30            ! (2_0) res = K6 * xx;
 878         add     %o2,8,%l1               ! (4_0) hx += 8;
 879         st      %f6,[%i1]               ! (4_1) ((float*)py)[0] = ((float*)res)[0];
 880         fsubd   %f44,%f8,%f44           ! (3_0) xx = res - res_c;
 881 
 882         fmuld   %f60,%f32,%f60          ! (0_0) res *= xx;
 883         sllx    %o7,52,%o7              ! (4_0) iexp << 52;
 884         st      %f7,[%i1+4]             ! (4_1) ((float*)py)[1] = ((float*)res)[1];
 885         faddd   %f62,K4,%f6             ! (1_0) res += K4;
 886 
 887         lda     [%i0]%asi,%g1           ! (5_0) hx = *(int*)px;
 888         add     %i0,stridex,%i1         ! px += stridex
 889         and     %l1,-16,%l1             ! (4_0) hx = -16;
 890         faddd   %f58,K1,%f58            ! (6_1) res += K1;
 891 
 892         add     %l1,TBL,%l1             ! (4_0) addr = (char*)arr + hx;
 893         add     %l6,stridey,%i0         ! px += stridey
 894         stx     %o7,[%fp+tmp6]          ! (4_0) dlexp = *(double*)lexp;
 895         faddd   %f40,%f34,%f8           ! (5_1) res += dexp_lo;
 896 
 897         fmuld   %f44,%f28,%f40          ! (3_0) xx *= dtmp0;
 898         nop
 899         ldd     [%l1],%f44              ! (4_0) dtmp0 = ((double*)addr)[0];
 900         faddd   %f30,K5,%f62            ! (2_0) res += K5;
 901 
 902         fmuld   %f6,%f38,%f34           ! (1_0) res *= xx;
 903         sra     %g1,21,%o7              ! (5_0) iexp = hx >> 21;
 904         ldd     [%l4],%f30              ! (6_1) dexp_hi = ((double*)addr)[0];
 905         faddd   %f60,K2,%f60            ! (0_0) res += K2;
 906 
 907         for     %f16,DC1,%f28           ! (5_0) res = vis_for(res,DC1);
 908         sub     %g0,%o7,%o7             ! (5_0) iexp = -iexp;
 909         lda     [%i1]%asi,%f6           ! (6_0) ((float*)res)[0] = ((float*)px)[0];
 910         fmuld   %f58,%f26,%f26          ! (6_1) res *= xx;
 911 
 912         fmuld   %f44,%f44,%f44          ! (4_0) dtmp0 = dexp_hi * dexp_hi;
 913         cmp     %g1,_0x7ff00000         ! (5_0) hx ? 0x7ff00000
 914         lda     [%i1+4]%asi,%f7         ! (6_0) ((float*)res)[1] = ((float*)px)[1];
 915         faddd   %f8,%f22,%f22           ! (5_1) res += dexp_hi;
 916 
 917         fand    %f18,DC3,%f8            ! (4_0) res_c = vis_fand(res_c,DC3);
 918         bge,pn  %icc,.update24          ! (5_0) if ( hx >= 0x7ff00000 )
 919         ldd     [%fp+tmp0],%f18         ! (5_1) dlexp = *(double*)lexp;
 920         fmuld   %f62,%f36,%f62          ! (2_0) res *= xx;
 921 .cont24:
 922         fmuld   %f60,%f32,%f58          ! (0_0) res *= xx;
 923         sra     %g1,10,%o2              ! (5_0) hx >>= 10;
 924         cmp     %g1,_0x00100000         ! (5_0) hx ? 0x00100000
 925         faddd   %f34,K3,%f34            ! (1_0) res += K3;
 926 
 927         fmuld   %f30,%f26,%f26          ! (6_1) res = dexp_hi * res;
 928         bl,pn   %icc,.update25          ! (5_0) if ( hx < 0x00100000 )
 929         and     %o2,2040,%o2            ! (5_0) hx &= 0x7f8;
 930         fand    %f6,DC0,%f16            ! (6_0) res = vis_fand(res,DC0);
 931 .cont25:
 932         fmuld   %f22,%f18,%f2           ! (5_1) res *= dlexp;
 933         subcc   counter,7,counter       ! counter -= 7;
 934         ldd     [%l4+8],%f60            ! (6_1) dexp_lo = ((double*)addr)[1];
 935         fpadd32 %f28,DC2,%f18           ! (5_0) res_c = vis_fpadd32(res,DC2);
 936 
 937         fmuld   K6,%f40,%f22            ! (3_0) res = K6 * xx;
 938         add     %o2,8,%i3               ! (5_0) hx += 8;
 939         st      %f2,[%l6]               ! (5_1) ((float*)py)[0] = ((float*)res)[0];
 940         fsubd   %f24,%f8,%f10           ! (4_0) xx = res - res_c;
 941 
 942         fmuld   %f34,%f38,%f24          ! (1_0) res *= xx;
 943         st      %f3,[%l6+4]             ! (5_1) ((float*)py)[1] = ((float*)res)[1];
 944         bpos,pt %icc,.main_loop
 945         faddd   %f62,K4,%f34            ! (2_0) res += K4;
 946 
 947         add     counter,7,counter
 948 .tail:
 949         add     %o7,1534,%o7            ! (5_0) iexp += 0x5fe;
 950         subcc   counter,1,counter
 951         bneg,a  .begin
 952         mov     %i0,%o4
 953 
 954         faddd   %f58,K1,%f58            ! (0_1) res += K1;
 955 
 956         faddd   %f26,%f60,%f8           ! (6_2) res += dexp_lo;
 957 
 958         faddd   %f22,K5,%f62            ! (3_1) res += K5;
 959         fmuld   %f10,%f44,%f60          ! (4_1) xx *= dtmp0;
 960 
 961         faddd   %f24,K2,%f26            ! (1_1) res += K2;
 962         add     %i1,stridex,%l6         ! px += stridex
 963         ldd     [%l2],%f24              ! (0_1) dexp_hi = ((double*)addr)[0];
 964         fmuld   %f34,%f36,%f34          ! (2_1) res *= xx;
 965 
 966         fmuld   %f58,%f32,%f58          ! (0_1) res *= xx;
 967 
 968         add     %i0,stridey,%i1         ! px += stridey
 969         faddd   %f8,%f30,%f30           ! (6_2) res += dexp_hi;
 970 
 971         fmuld   %f62,%f40,%f32          ! (3_1) res *= xx;
 972         ldd     [%fp+tmp1],%f62         ! (6_2) dlexp = *(double*)lexp;
 973 
 974         fmuld   %f26,%f38,%f26          ! (1_1) res *= xx;
 975         faddd   %f34,K3,%f34            ! (2_1) res += K3;
 976 
 977         fmuld   %f24,%f58,%f58          ! (0_1) res = dexp_hi * res;
 978 
 979         fmuld   %f30,%f62,%f2           ! (6_2) res *= dlexp;
 980         ldd     [%l2+8],%f30            ! (0_1) dexp_lo = ((double*)addr)[1];
 981 
 982         fmuld   K6,%f60,%f62            ! (4_1) res = K6 * xx;
 983         st      %f2,[%i0]               ! (6_2) ((float*)py)[0] = ((float*)res)[0];
 984 
 985         fmuld   %f34,%f36,%f28          ! (2_1) res *= xx;
 986         st      %f3,[%i0+4]             ! (6_2) ((float*)py)[1] = ((float*)res)[1];
 987         faddd   %f32,K4,%f32            ! (3_1) res += K4;
 988 
 989         subcc   counter,1,counter
 990         bneg,a  .begin
 991         mov     %i1,%o4
 992 
 993         faddd   %f26,K1,%f26            ! (1_1) res += K1;
 994 
 995         faddd   %f58,%f30,%f8           ! (0_1) res += dexp_lo;
 996 
 997         add     %l6,stridex,%l6         ! px += stridex
 998         faddd   %f62,K5,%f62            ! (4_1) res += K5;
 999 
1000         fmuld   %f32,%f40,%f34          ! (3_1) res *= xx;
1001         add     %i1,stridey,%i0         ! px += stridey
1002         ldd     [%i2],%f22              ! (1_1) dexp_hi = ((double*)addr)[0];
1003         faddd   %f28,K2,%f32            ! (2_1) res += K2;
1004 
1005         fmuld   %f26,%f38,%f26          ! (1_1) res *= xx;
1006 
1007         faddd   %f8,%f24,%f24           ! (0_1) res += dexp_hi;
1008 
1009         fmuld   %f62,%f60,%f38          ! (4_1) res *= xx;
1010         ldd     [%fp+tmp2],%f62         ! (0_1) dlexp = *(double*)lexp;
1011 
1012         fmuld   %f32,%f36,%f32          ! (2_1) res *= xx;
1013         faddd   %f34,K3,%f34            ! (3_1) res += K3;
1014 
1015         fmuld   %f22,%f26,%f26          ! (1_1) res = dexp_hi * res;
1016 
1017         fmuld   %f24,%f62,%f2           ! (0_1) res *= dlexp;
1018         ldd     [%i2+8],%f24            ! (1_1) dexp_lo = ((double*)addr)[1];
1019 
1020         st      %f2,[%i1]               ! (0_1) ((float*)py)[0] = ((float*)res)[0];
1021 
1022         fmuld   %f34,%f40,%f44          ! (3_1) res *= xx;
1023         st      %f3,[%i1+4]             ! (0_1) ((float*)py)[1] = ((float*)res)[1];
1024         faddd   %f38,K4,%f38            ! (4_1) res += K4;
1025 
1026         subcc   counter,1,counter
1027         bneg,a  .begin
1028         mov     %i0,%o4
1029 
1030         faddd   %f32,K1,%f32            ! (2_1) res += K1;
1031 
1032         add     %l6,stridex,%l6         ! px += stridex
1033         faddd   %f26,%f24,%f8           ! (1_1) res += dexp_lo;
1034 
1035         add     %i0,stridey,%i1         ! px += stridey
1036 
1037         fmuld   %f38,%f60,%f34          ! (4_1) res *= xx;
1038         ldd     [%i4],%f24              ! (2_1) dexp_hi = ((double*)addr)[0];
1039         faddd   %f44,K2,%f38            ! (3_1) res += K2;
1040 
1041         fmuld   %f32,%f36,%f32          ! (2_1) res *= xx;
1042 
1043         faddd   %f8,%f22,%f22           ! (1_1) res += dexp_hi;
1044 
1045         ldd     [%fp+tmp3],%f62         ! (1_1) dlexp = *(double*)lexp;
1046 
1047         fmuld   %f38,%f40,%f38          ! (3_1) res *= xx;
1048         faddd   %f34,K3,%f34            ! (4_1) res += K3;
1049 
1050         fmuld   %f24,%f32,%f32          ! (2_1) res = dexp_hi * res;
1051 
1052         fmuld   %f22,%f62,%f2           ! (1_1) res *= dlexp;
1053         ldd     [%i4+8],%f22            ! (2_1) dexp_lo = ((double*)addr)[1];
1054 
1055         st      %f2,[%i0]               ! (1_1) ((float*)py)[0] = ((float*)res)[0];
1056 
1057         fmuld   %f34,%f60,%f28          ! (4_1) res *= xx;
1058         st      %f3,[%i0+4]             ! (1_1) ((float*)py)[1] = ((float*)res)[1];
1059 
1060         subcc   counter,1,counter
1061         bneg,a  .begin
1062         mov     %i1,%o4
1063 
1064         faddd   %f38,K1,%f38            ! (3_1) res += K1;
1065 
1066         faddd   %f32,%f22,%f8           ! (2_1) res += dexp_lo;
1067 
1068         add     %l6,stridex,%l6         ! px += stridex
1069 
1070         add     %i1,stridey,%i0         ! px += stridey
1071         ldd     [%i5],%f22              ! (3_1) dexp_hi = ((double*)addr)[0];
1072         faddd   %f28,K2,%f36            ! (4_1) res += K2;
1073 
1074         fmuld   %f38,%f40,%f38          ! (3_1) res *= xx;
1075 
1076         faddd   %f8,%f24,%f24           ! (2_1) res += dexp_hi;
1077 
1078         ldd     [%fp+tmp4],%f62         ! (2_1) dlexp = *(double*)lexp;
1079 
1080         fmuld   %f36,%f60,%f36          ! (4_1) res *= xx;
1081 
1082         fmuld   %f22,%f38,%f38          ! (3_1) res = dexp_hi * res;
1083 
1084         fmuld   %f24,%f62,%f2           ! (2_1) res *= dlexp;
1085         ldd     [%i5+8],%f24            ! (3_1) dexp_lo = ((double*)addr)[1];
1086 
1087         st      %f2,[%i1]               ! (2_1) ((float*)py)[0] = ((float*)res)[0];
1088 
1089         st      %f3,[%i1+4]             ! (2_1) ((float*)py)[1] = ((float*)res)[1];
1090 
1091         subcc   counter,1,counter
1092         bneg,a  .begin
1093         mov     %i0,%o4
1094 
1095         faddd   %f36,K1,%f36            ! (4_1) res += K1;
1096 
1097         faddd   %f38,%f24,%f8           ! (3_1) res += dexp_lo;
1098 
1099         add     %i0,stridey,%i1         ! px += stridey
1100 
1101         add     %l6,stridex,%l6         ! px += stridex
1102         ldd     [%l1],%f30              ! (4_1) dexp_hi = ((double*)addr)[0];
1103 
1104         fmuld   %f36,%f60,%f36          ! (4_1) res *= xx;
1105 
1106         faddd   %f8,%f22,%f8            ! (3_1) res += dexp_hi;
1107 
1108         ldd     [%fp+tmp5],%f62         ! (3_1) dlexp = *(double*)lexp;
1109 
1110         fmuld   %f30,%f36,%f36          ! (4_1) res = dexp_hi * res;
1111 
1112         fmuld   %f8,%f62,%f8            ! (3_1) res *= dlexp;
1113         ldd     [%l1+8],%f34            ! (4_1) dexp_lo = ((double*)addr)[1];
1114 
1115         st      %f8,[%i0]               ! (3_1) ((float*)py)[0] = ((float*)res)[0];
1116 
1117         st      %f9,[%i0+4]             ! (3_1) ((float*)py)[1] = ((float*)res)[1];
1118 
1119         subcc   counter,1,counter
1120         bneg,a  .begin
1121         mov     %i1,%o4
1122 
1123         faddd   %f36,%f34,%f8           ! (4_1) res += dexp_lo;
1124 
1125         add     %l6,stridex,%i0         ! px += stridex
1126 
1127         add     %i1,stridey,%l6         ! px += stridey
1128 
1129         faddd   %f8,%f30,%f30           ! (4_1) res += dexp_hi;
1130 
1131         ldd     [%fp+tmp6],%f18         ! (4_1) dlexp = *(double*)lexp;
1132 
1133         fmuld   %f30,%f18,%f6           ! (4_1) res *= dlexp;
1134 
1135         st      %f6,[%i1]               ! (4_1) ((float*)py)[0] = ((float*)res)[0];
1136 
1137         st      %f7,[%i1+4]             ! (4_1) ((float*)py)[1] = ((float*)res)[1];
1138 
1139         ba      .begin
1140         add     %i1,stridey,%o4
1141 
1142         .align  16
1143 .spec0:
1144         fdivd   DONE,%f0,%f0            ! res = DONE / res;
1145         add     %i1,stridex,%i1         ! px += stridex
1146         st      %f0,[%o4]               ! ((float*)py)[0] = ((float*)&res)[0];
1147         st      %f1,[%o4+4]             ! ((float*)py)[1] = ((float*)&res)[1];
1148         add     %o4,stridey,%o4         ! py += stridey
1149         ba      .begin1
1150         sub     counter,1,counter
1151 
1152         .align  16
1153 .spec1:
1154         orcc    %i2,%l4,%g0
1155         bz,a    2f
1156         fdivd   DONE,%f0,%f0            ! res = DONE / res;
1157 
1158         cmp     %g1,0
1159         bl,a    2f
1160         fsqrtd  %f0,%f0                 ! res = sqrt(res);
1161 
1162         cmp     %g1,%i4
1163         bge,a   1f
1164         ldd     [%o3+0x50],%f18
1165 
1166         fxtod   %f0,%f0                 ! res = *(long long*)&res;
1167         st      %f0,[%fp+tmp0]
1168 
1169         fand    %f0,DC0,%f16            ! (6_0) res = vis_fand(res,DC0);
1170         ld      [%fp+tmp0],%g1
1171 
1172         sra     %g1,21,%o7              ! (6_1) iexp = hx >> 21;
1173         for     %f16,DC1,%f44           ! (6_1) res = vis_for(res,DC1);
1174 
1175         sra     %g1,10,%o2              ! (6_1) hx >>= 10;
1176         sub     %o7,537,%o7
1177 
1178         and     %o2,2040,%o2            ! (6_1) hx &= 0x7f8;
1179         ba      .cont_spec
1180         sub     %g0,%o7,%o7             ! (6_1) iexp = -iexp;
1181 
1182 1:
1183         fand    %f0,%f18,%f0            ! res = vis_fand(res,DC4);
1184 
1185         ldd     [%o3+0x58],%f28
1186         fxtod   %f0,%f0                 ! res = *(long long*)&res;
1187 
1188         faddd   %f0,%f28,%f0            ! res += D2ON51;
1189         st      %f0,[%fp+tmp0]
1190 
1191         fand    %f0,DC0,%f16            ! (6_0) res = vis_fand(res,DC0);
1192         ld      [%fp+tmp0],%g1
1193 
1194         sra     %g1,21,%o7              ! (6_1) iexp = hx >> 21;
1195         for     %f16,DC1,%f44           ! (6_1) res = vis_for(res,DC1);
1196 
1197         sra     %g1,10,%o2              ! (6_1) hx >>= 10;
1198         sub     %o7,537,%o7
1199 
1200         and     %o2,2040,%o2            ! (6_1) hx &= 0x7f8;
1201         ba      .cont_spec
1202         sub     %g0,%o7,%o7             ! (6_1) iexp = -iexp;
1203 
1204 2:
1205         add     %i1,stridex,%i1         ! px += stridex
1206         st      %f0,[%o4]               ! ((float*)py)[0] = ((float*)&res)[0];
1207         st      %f1,[%o4+4]             ! ((float*)py)[1] = ((float*)&res)[1];
1208         add     %o4,stridey,%o4         ! py += stridey
1209         ba      .begin1
1210         sub     counter,1,counter
1211 
1212         .align  16
1213 .update0:
1214         cmp     counter,1
1215         ble     .cont0
1216         nop
1217 
1218         sub     %l6,stridex,tmp_px
1219         sub     counter,1,tmp_counter
1220 
1221         ba      .cont0
1222         mov     1,counter
1223 
1224         .align  16
1225 .update1:
1226         cmp     counter,1
1227         ble     .cont1
1228         sub     %l6,stridex,%i1
1229 
1230         ld      [%i1+4],%i2
1231         cmp     %g1,0
1232         bl      1f
1233 
1234         orcc    %g1,%i2,%g0
1235         bz      1f
1236         sethi   %hi(0x00080000),%i3
1237 
1238         cmp     %g1,%i3
1239         bge,a   2f
1240         ldd     [%o3+0x50],%f18
1241 
1242         fxtod   %f8,%f8                 ! res = *(long long*)&res;
1243         st      %f8,[%fp+tmp7]
1244 
1245         fand    %f8,DC0,%f16            ! (0_0) res = vis_fand(res,DC0);
1246         ld      [%fp+tmp7],%g1
1247 
1248         sra     %g1,21,%o7              ! (0_0) iexp = hx >> 21;
1249         sra     %g1,10,%o2              ! (0_0) hx >>= 10;
1250         for     %f16,DC1,%f28           ! (0_0) res = vis_for(res,DC1);
1251 
1252         sub     %o7,537,%o7
1253 
1254         sub     %g0,%o7,%o7             ! (0_0) iexp = -iexp;
1255 
1256         and     %o2,2040,%o2            ! (0_0) hx &= 0x7f8;
1257         ba      .cont1
1258         add     %o7,1534,%o7            ! (0_0) iexp += 0x5fe;
1259 2:
1260         fand    %f8,%f18,%f8
1261         fxtod   %f8,%f8                 ! res = *(long long*)&res;
1262         ldd     [%o3+0x58],%f18
1263         faddd   %f8,%f18,%f8
1264         st      %f8,[%fp+tmp7]
1265 
1266         fand    %f8,DC0,%f16            ! (0_0) res = vis_fand(res,DC0);
1267         ld      [%fp+tmp7],%g1
1268 
1269         sra     %g1,21,%o7              ! (0_0) iexp = hx >> 21;
1270         sra     %g1,10,%o2              ! (0_0) hx >>= 10;
1271         for     %f16,DC1,%f28           ! (0_0) res = vis_for(res,DC1);
1272 
1273         sub     %o7,537,%o7
1274 
1275         sub     %g0,%o7,%o7             ! (0_0) iexp = -iexp;
1276 
1277         and     %o2,2040,%o2            ! (0_0) hx &= 0x7f8;
1278         ba      .cont1
1279         add     %o7,1534,%o7            ! (0_0) iexp += 0x5fe;
1280 1:
1281         sub     %l6,stridex,tmp_px
1282         sub     counter,1,tmp_counter
1283 
1284         ba      .cont1
1285         mov     1,counter
1286 
1287         .align  16
1288 .update2:
1289         cmp     counter,2
1290         ble     .cont2
1291         nop
1292 
1293         sub     %l6,stridex,tmp_px
1294         sub     counter,2,tmp_counter
1295 
1296         ba      .cont2
1297         mov     2,counter
1298 
1299         .align  16
1300 .update3:
1301         cmp     counter,2
1302         ble     .cont3
1303         sub     %l6,stridex,%i1
1304 
1305         ld      [%i1+4],%i2
1306         cmp     %g1,0
1307         bl      1f
1308 
1309         orcc    %g1,%i2,%g0
1310         bz      1f
1311         sethi   %hi(0x00080000),%i3
1312 
1313         cmp     %g1,%i3
1314         bge,a   2f
1315         ldd     [%o3+0x50],%f18
1316 
1317         fxtod   %f0,%f0                 ! res = *(long long*)&res;
1318         st      %f0,[%fp+tmp7]
1319 
1320         fand    %f0,DC0,%f16            ! (1_0) res = vis_fand(res,DC0);
1321         ld      [%fp+tmp7],%g1
1322 
1323         sra     %g1,21,%o7              ! (1_0) iexp = hx >> 21;
1324         for     %f16,DC1,%f44           ! (1_0) res = vis_for(res,DC1);
1325 
1326         sra     %g1,10,%o2              ! (1_0) hx >>= 10;
1327         sub     %o7,537,%o7
1328         ba      .cont3
1329         and     %o2,2040,%o2            ! (1_0) hx &= 0x7f8;
1330 2:
1331         fand    %f0,%f18,%f0
1332         fxtod   %f0,%f0                 ! res = *(long long*)&res;
1333         ldd     [%o3+0x58],%f18
1334         faddd   %f0,%f18,%f0
1335         st      %f0,[%fp+tmp7]
1336 
1337         fand    %f0,DC0,%f16            ! (1_0) res = vis_fand(res,DC0);
1338         ld      [%fp+tmp7],%g1
1339 
1340         sra     %g1,21,%o7              ! (1_0) iexp = hx >> 21;
1341         for     %f16,DC1,%f44           ! (1_0) res = vis_for(res,DC1);
1342 
1343         sra     %g1,10,%o2              ! (1_0) hx >>= 10;
1344         sub     %o7,537,%o7
1345         ba      .cont3
1346         and     %o2,2040,%o2            ! (1_0) hx &= 0x7f8;
1347 1:
1348         sub     %l6,stridex,tmp_px
1349         sub     counter,2,tmp_counter
1350 
1351         ba      .cont3
1352         mov     2,counter
1353 
1354         .align  16
1355 .update4:
1356         cmp     counter,3
1357         ble     .cont4
1358         nop
1359 
1360         sub     %l6,stridex,tmp_px
1361         sub     counter,3,tmp_counter
1362 
1363         ba      .cont4
1364         mov     3,counter
1365 
1366         .align  16
1367 .update5:
1368         cmp     counter,3
1369         ble     .cont5
1370         sub     %l6,stridex,%i1
1371 
1372         ld      [%i1+4],%i3
1373         cmp     %g1,0
1374         bl      1f
1375 
1376         orcc    %g1,%i3,%g0
1377         bz      1f
1378         sethi   %hi(0x00080000),%i4
1379 
1380         cmp     %g1,%i4
1381         bge,a   2f
1382         ldd     [%o3+0x50],%f18
1383 
1384         fxtod   %f6,%f6                 ! res = *(long long*)&res;
1385         st      %f6,[%fp+tmp7]
1386 
1387         fand    %f6,DC0,%f16            ! (2_0) res = vis_fand(res,DC0);
1388         ld      [%fp+tmp7],%g1
1389 
1390         sra     %g1,21,%o7              ! (2_0) iexp = hx >> 21;
1391         sra     %g1,10,%o2              ! (2_0) hx >>= 10;
1392 
1393         sub     %o7,537,%o7
1394         and     %o2,2040,%o2            ! (2_0) hx &= 0x7f8;
1395         ba      .cont5
1396         for     %f16,DC1,%f28           ! (2_0) res = vis_for(res,DC1);
1397 2:
1398         fand    %f6,%f18,%f6
1399         fxtod   %f6,%f6                 ! res = *(long long*)&res;
1400         ldd     [%o3+0x58],%f18
1401         faddd   %f6,%f18,%f6
1402         st      %f6,[%fp+tmp7]
1403 
1404         fand    %f6,DC0,%f16            ! (2_0) res = vis_fand(res,DC0);
1405         ld      [%fp+tmp7],%g1
1406 
1407         sra     %g1,21,%o7              ! (2_0) iexp = hx >> 21;
1408         sra     %g1,10,%o2              ! (2_0) hx >>= 10;
1409 
1410         sub     %o7,537,%o7
1411         and     %o2,2040,%o2            ! (2_0) hx &= 0x7f8;
1412         ba      .cont5
1413         for     %f16,DC1,%f28           ! (2_0) res = vis_for(res,DC1);
1414 1:
1415         sub     %l6,stridex,tmp_px
1416         sub     counter,3,tmp_counter
1417 
1418         ba      .cont5
1419         mov     3,counter
1420 
1421         .align  16
1422 .update6:
1423         cmp     counter,4
1424         ble     .cont6
1425         nop
1426 
1427         sub     %l6,stridex,tmp_px
1428         sub     counter,4,tmp_counter
1429 
1430         ba      .cont6
1431         mov     4,counter
1432 
1433         .align  16
1434 .update7:
1435         sub     %l6,stridex,%i1
1436         cmp     counter,4
1437         ble     .cont7
1438         faddd   %f34,K3,%f6             ! (6_1) res += K3;
1439 
1440         ld      [%i1+4],%i3
1441         cmp     %g1,0
1442         bl      1f
1443 
1444         orcc    %g1,%i3,%g0
1445         bz      1f
1446         sethi   %hi(0x00080000),%i5
1447 
1448         cmp     %g1,%i5
1449         bge,a   2f
1450         ldd     [%o3+0x50],%f18
1451 
1452         fxtod   %f0,%f0                 ! res = *(long long*)&res;
1453         st      %f0,[%fp+tmp7]
1454 
1455         fand    %f0,DC0,%f16            ! (3_0) res = vis_fand(res,DC0);
1456         ld      [%fp+tmp7],%g1
1457 
1458         sra     %g1,21,%o7              ! (3_0) iexp = hx >> 21;
1459         sra     %g1,10,%o2              ! (3_0) hx >>= 10;
1460 
1461         sub     %o7,537,%o7
1462         and     %o2,2040,%o2            ! (3_0) hx &= 0x7f8;
1463         ba      .cont7
1464         for     %f16,DC1,%f44           ! (3_0) res = vis_for(res,DC1);
1465 2:
1466         fand    %f0,%f18,%f0
1467         fxtod   %f0,%f0                 ! res = *(long long*)&res;
1468         ldd     [%o3+0x58],%f18
1469         faddd   %f0,%f18,%f0
1470         st      %f0,[%fp+tmp7]
1471 
1472         fand    %f0,DC0,%f16            ! (3_0) res = vis_fand(res,DC0);
1473         ld      [%fp+tmp7],%g1
1474 
1475         sra     %g1,21,%o7              ! (3_0) iexp = hx >> 21;
1476         sra     %g1,10,%o2              ! (3_0) hx >>= 10;
1477 
1478         sub     %o7,537,%o7
1479         and     %o2,2040,%o2            ! (3_0) hx &= 0x7f8;
1480         ba      .cont7
1481         for     %f16,DC1,%f44           ! (3_0) res = vis_for(res,DC1);
1482 1:
1483         sub     %l6,stridex,tmp_px
1484         sub     counter,4,tmp_counter
1485 
1486         ba      .cont7
1487         mov     4,counter
1488 
1489         .align  16
1490 .update8:
1491         cmp     counter,5
1492         ble     .cont8
1493         nop
1494 
1495         mov     %l6,tmp_px
1496         sub     counter,5,tmp_counter
1497 
1498         ba      .cont8
1499         mov     5,counter
1500 
1501         .align  16
1502 .update9:
1503         ld      [%l6+4],%i3
1504         cmp     counter,5
1505         ble     .cont9
1506         fand    %f0,DC0,%f16            ! (5_0) res = vis_fand(res,DC0);
1507 
1508         cmp     %g1,0
1509         bl      1f
1510 
1511         orcc    %g1,%i3,%g0
1512         bz      1f
1513         sethi   %hi(0x00080000),%i1
1514 
1515         cmp     %g1,%i1
1516         bge,a   2f
1517         ldd     [%o3+0x50],%f18
1518 
1519         fxtod   %f8,%f8                 ! res = *(long long*)&res;
1520         st      %f8,[%fp+tmp7]
1521 
1522         fand    %f8,DC0,%f24            ! (4_0) res = vis_fand(res,DC0);
1523         ld      [%fp+tmp7],%g1
1524 
1525         sra     %g1,21,%o7              ! (4_0) iexp = hx >> 21;
1526         sra     %g1,10,%o2              ! (4_0) hx >>= 10;
1527 
1528         sub     %o7,537,%o7
1529 
1530         and     %o2,2040,%o2            ! (4_0) hx &= 0x7f8;
1531         sub     %g0,%o7,%o7             ! (4_0) iexp = -iexp;
1532         ba      .cont9
1533         for     %f24,DC1,%f24           ! (4_0) res = vis_for(res,DC1);
1534 2:
1535         fand    %f8,%f18,%f8
1536         fxtod   %f8,%f8                 ! res = *(long long*)&res;
1537         ldd     [%o3+0x58],%f18
1538         faddd   %f8,%f18,%f8
1539         st      %f8,[%fp+tmp7]
1540 
1541         fand    %f8,DC0,%f24            ! (4_0) res = vis_fand(res,DC0);
1542         ld      [%fp+tmp7],%g1
1543 
1544         sra     %g1,21,%o7              ! (4_0) iexp = hx >> 21;
1545         sra     %g1,10,%o2              ! (4_0) hx >>= 10;
1546 
1547         sub     %o7,537,%o7
1548 
1549         and     %o2,2040,%o2            ! (4_0) hx &= 0x7f8;
1550         sub     %g0,%o7,%o7             ! (4_0) iexp = -iexp;
1551         ba      .cont9
1552         for     %f24,DC1,%f24           ! (4_0) res = vis_for(res,DC1);
1553 1:
1554         mov     %l6,tmp_px
1555         sub     counter,5,tmp_counter
1556 
1557         ba      .cont9
1558         mov     5,counter
1559 
1560         .align  16
1561 .update10:
1562         cmp     counter,6
1563         ble     .cont10
1564         nop
1565 
1566         mov     %i0,tmp_px
1567         sub     counter,6,tmp_counter
1568 
1569         ba      .cont10
1570         mov     6,counter
1571 
1572         .align  16
1573 .update11:
1574         ld      [%i0+4],%i3
1575         cmp     counter,6
1576         ble     .cont11
1577         fand    %f6,DC0,%f16            ! (6_0) res = vis_fand(res,DC0);
1578 
1579         cmp     %g1,0
1580         bl      1f
1581 
1582         orcc    %g1,%i3,%g0
1583         bz      1f
1584         sethi   %hi(0x00080000),%i3
1585 
1586         cmp     %g1,%i3
1587         bge,a   2f
1588         ldd     [%o3+0x50],%f18
1589 
1590         fxtod   %f0,%f0                 ! res = *(long long*)&res;
1591         st      %f0,[%fp+tmp7]
1592 
1593         fand    %f0,DC0,%f28            ! (5_0) res = vis_fand(res,DC0);
1594         ld      [%fp+tmp7],%g1
1595 
1596         sra     %g1,21,%o7              ! (5_0) iexp = hx >> 21;
1597         sra     %g1,10,%o2              ! (5_0) hx >>= 10;
1598 
1599         sub     %o7,537,%o7
1600 
1601         sub     %g0,%o7,%o7             ! (5_0) iexp = -iexp;
1602 
1603         and     %o2,2040,%o2            ! (5_0) hx &= 0x7f8;
1604         ba      .cont11
1605         for     %f28,DC1,%f28           ! (5_0) res = vis_for(res,DC1);
1606 2:
1607         fand    %f0,%f18,%f0
1608         fxtod   %f0,%f0                 ! res = *(long long*)&res;
1609         ldd     [%o3+0x58],%f18
1610         faddd   %f0,%f18,%f0
1611         st      %f0,[%fp+tmp7]
1612 
1613         fand    %f0,DC0,%f28            ! (5_0) res = vis_fand(res,DC0);
1614         ld      [%fp+tmp7],%g1
1615 
1616         sra     %g1,21,%o7              ! (5_0) iexp = hx >> 21;
1617         sra     %g1,10,%o2              ! (5_0) hx >>= 10;
1618 
1619         sub     %o7,537,%o7
1620 
1621         sub     %g0,%o7,%o7             ! (5_0) iexp = -iexp;
1622 
1623         and     %o2,2040,%o2            ! (5_0) hx &= 0x7f8;
1624         ba      .cont11
1625         for     %f28,DC1,%f28           ! (5_0) res = vis_for(res,DC1);
1626 1:
1627         mov     %i0,tmp_px
1628         sub     counter,6,tmp_counter
1629 
1630         ba      .cont11
1631         mov     6,counter
1632 
1633         .align  16
1634 .update12:
1635         cmp     counter,0
1636         ble     .cont12
1637         faddd   %f34,K3,%f34            ! (2_1) res += K3;
1638 
1639         sub     %l6,stridex,tmp_px
1640         sub     counter,0,tmp_counter
1641 
1642         ba      .cont12
1643         mov     0,counter
1644 
1645         .align  16
1646 .update13:
1647         sub     %l6,stridex,%l4
1648         cmp     counter,0
1649         ble     .cont13
1650         fpadd32 %f44,DC2,%f18           ! (6_1) res_c = vis_fpadd32(res,DC2);
1651 
1652         ld      [%l4+4],%l4
1653         cmp     %g1,0
1654         bl      1f
1655 
1656         orcc    %g1,%l4,%g0
1657         bz      1f
1658         sethi   %hi(0x00080000),%l4
1659 
1660         cmp     %g1,%l4
1661         bge,a   2f
1662         ldd     [%o3+0x50],%f62
1663 
1664         fxtod   %f6,%f6                 ! res = *(long long*)&res;
1665         st      %f6,[%fp+tmp7]
1666 
1667         fand    %f6,DC0,%f44            ! (6_0) res = vis_fand(res,DC0);
1668         ld      [%fp+tmp7],%g1
1669 
1670         sra     %g1,21,%o7              ! (6_1) iexp = hx >> 21;
1671         sra     %g1,10,%o2              ! (6_1) hx >>= 10;
1672 
1673         sub     %o7,537,%o7
1674         and     %o2,2040,%o2            ! (6_1) hx &= 0x7f8;
1675         for     %f44,DC1,%f44           ! (6_1) res = vis_for(res,DC1);
1676 
1677         sub     %g0,%o7,%o7             ! (6_1) iexp = -iexp;
1678         ba      .cont13
1679         fpadd32 %f44,DC2,%f18           ! (6_1) res_c = vis_fpadd32(res,DC2);
1680 2:
1681         fand    %f6,%f62,%f6
1682         fxtod   %f6,%f6                 ! res = *(long long*)&res;
1683         ldd     [%o3+0x58],%f62
1684         faddd   %f6,%f62,%f6
1685         st      %f6,[%fp+tmp7]
1686 
1687         fand    %f6,DC0,%f44            ! (6_0) res = vis_fand(res,DC0);
1688         ld      [%fp+tmp7],%g1
1689 
1690         sra     %g1,21,%o7              ! (6_1) iexp = hx >> 21;
1691         sra     %g1,10,%o2              ! (6_1) hx >>= 10;
1692         for     %f44,DC1,%f44           ! (6_1) res = vis_for(res,DC1);
1693 
1694         sub     %o7,537,%o7
1695 
1696         and     %o2,2040,%o2            ! (6_1) hx &= 0x7f8;
1697         sub     %g0,%o7,%o7             ! (6_1) iexp = -iexp;
1698         ba      .cont13
1699         fpadd32 %f44,DC2,%f18           ! (6_1) res_c = vis_fpadd32(res,DC2);
1700 1:
1701         sub     %l6,stridex,tmp_px
1702         sub     counter,0,tmp_counter
1703 
1704         ba      .cont13
1705         mov     0,counter
1706 
1707         .align  16
1708 .update14:
1709         cmp     counter,1
1710         ble     .cont14
1711         faddd   %f34,K3,%f34            ! (3_1) res += K3;
1712 
1713         sub     %l6,stridex,tmp_px
1714         sub     counter,1,tmp_counter
1715 
1716         ba      .cont14
1717         mov     1,counter
1718 
1719         .align  16
1720 .update15:
1721         sub     %l6,stridex,%l2
1722         cmp     counter,1
1723         ble     .cont15
1724         fpadd32 %f28,DC2,%f18           ! (0_0) res_c = vis_fpadd32(res,DC2);
1725 
1726         ld      [%l2+4],%l2
1727         cmp     %g1,0
1728         bl      1f
1729 
1730         orcc    %g1,%l2,%g0
1731         bz      1f
1732         sethi   %hi(0x00080000),%l2
1733 
1734         cmp     %g1,%l2
1735         bge,a   2f
1736         ldd     [%o3+0x50],%f62
1737 
1738         fxtod   %f0,%f0                 ! res = *(long long*)&res;
1739         st      %f0,[%fp+tmp7]
1740 
1741         fand    %f0,DC0,%f18            ! (0_0) res = vis_fand(res,DC0);
1742         ld      [%fp+tmp7],%g1
1743 
1744         sra     %g1,21,%o7              ! (0_0) iexp = hx >> 21;
1745         sra     %g1,10,%o2              ! (0_0) hx >>= 10;
1746 
1747         sub     %o7,537,%o7
1748         for     %f18,DC1,%f28           ! (0_0) res = vis_for(res,DC1);
1749 
1750         sub     %g0,%o7,%o7             ! (0_0) iexp = -iexp;
1751 
1752         and     %o2,2040,%o2            ! (0_0) hx &= 0x7f8;
1753         add     %o7,1534,%o7            ! (0_0) iexp += 0x5fe;
1754         ba      .cont15
1755         fpadd32 %f28,DC2,%f18           ! (0_0) res_c = vis_fpadd32(res,DC2);
1756 2:
1757         fand    %f0,%f62,%f0
1758         fxtod   %f0,%f0                 ! res = *(long long*)&res;
1759         ldd     [%o3+0x58],%f62
1760         faddd   %f0,%f62,%f0
1761         st      %f0,[%fp+tmp7]
1762 
1763         fand    %f0,DC0,%f18            ! (0_0) res = vis_fand(res,DC0);
1764         ld      [%fp+tmp7],%g1
1765 
1766         sra     %g1,21,%o7              ! (0_0) iexp = hx >> 21;
1767         sra     %g1,10,%o2              ! (0_0) hx >>= 10;
1768         for     %f18,DC1,%f28           ! (0_0) res = vis_for(res,DC1);
1769 
1770         sub     %o7,537,%o7
1771 
1772         sub     %g0,%o7,%o7             ! (0_0) iexp = -iexp;
1773 
1774         and     %o2,2040,%o2            ! (0_0) hx &= 0x7f8;
1775         add     %o7,1534,%o7            ! (0_0) iexp += 0x5fe;
1776         ba      .cont15
1777         fpadd32 %f28,DC2,%f18           ! (0_0) res_c = vis_fpadd32(res,DC2);
1778 1:
1779         sub     %l6,stridex,tmp_px
1780         sub     counter,1,tmp_counter
1781 
1782         ba      .cont15
1783         mov     1,counter
1784 
1785         .align  16
1786 .update16:
1787         cmp     counter,2
1788         ble     .cont16
1789         fand    %f18,DC3,%f8            ! (0_0) res_c = vis_fand(res_c,DC3);
1790 
1791         sub     %l6,stridex,tmp_px
1792         sub     counter,2,tmp_counter
1793 
1794         ba      .cont16
1795         mov     2,counter
1796 
1797         .align  16
1798 .update17:
1799         sub     %l6,stridex,%i2
1800         cmp     counter,2
1801         ble     .cont17
1802         fand    %f0,DC0,%f16            ! (2_0) res = vis_fand(res,DC0);
1803 
1804         ld      [%i2+4],%i2
1805         cmp     %g1,0
1806         bl      1f
1807 
1808         orcc    %g1,%i2,%g0
1809         bz      1f
1810         sethi   %hi(0x00080000),%i2
1811 
1812         cmp     %g1,%i2
1813         bge,a   2f
1814         ldd     [%o3+0x50],%f2
1815 
1816         fxtod   %f6,%f6                 ! res = *(long long*)&res;
1817         st      %f6,[%fp+tmp7]
1818 
1819         fand    %f6,DC0,%f44            ! (1_0) res = vis_fand(res,DC0);
1820         ld      [%fp+tmp7],%g1
1821 
1822         sra     %g1,21,%o7              ! (1_0) iexp = hx >> 21;
1823         sra     %g1,10,%o2              ! (1_0) hx >>= 10;
1824 
1825         sub     %o7,537,%o7
1826 
1827         and     %o2,2040,%o2            ! (1_0) hx &= 0x7f8;
1828         sub     %g0,%o7,%o7             ! (1_0) iexp = -iexp;
1829         ba      .cont17
1830         for     %f44,DC1,%f44           ! (1_0) res = vis_for(res,DC1);
1831 2:
1832         fand    %f6,%f2,%f6
1833         fxtod   %f6,%f6                 ! res = *(long long*)&res;
1834         ldd     [%o3+0x58],%f2
1835         faddd   %f6,%f2,%f6
1836         st      %f6,[%fp+tmp7]
1837 
1838         fand    %f6,DC0,%f44            ! (1_0) res = vis_fand(res,DC0);
1839         ld      [%fp+tmp7],%g1
1840 
1841         sra     %g1,21,%o7              ! (1_0) iexp = hx >> 21;
1842         sra     %g1,10,%o2              ! (1_0) hx >>= 10;
1843 
1844         sub     %o7,537,%o7
1845 
1846         and     %o2,2040,%o2            ! (1_0) hx &= 0x7f8;
1847         sub     %g0,%o7,%o7             ! (1_0) iexp = -iexp;
1848         ba      .cont17
1849         for     %f44,DC1,%f44           ! (1_0) res = vis_for(res,DC1);
1850 1:
1851         sub     %l6,stridex,tmp_px
1852         sub     counter,2,tmp_counter
1853 
1854         ba      .cont17
1855         mov     2,counter
1856 
1857         .align  16
1858 .update18:
1859         cmp     counter,3
1860         ble     .cont18
1861         fand    %f18,DC3,%f8            ! (1_0) res_c = vis_fand(res_c,DC3);
1862 
1863         sub     %l6,stridex,tmp_px
1864         sub     counter,3,tmp_counter
1865 
1866         ba      .cont18
1867         mov     3,counter
1868 
1869         .align  16
1870 .update19:
1871         sub     %l6,stridex,%i4
1872         cmp     counter,3
1873         ble     .cont19
1874         fand    %f6,DC0,%f16            ! (3_0) res = vis_fand(res,DC0);
1875 
1876         ld      [%i4+4],%i4
1877         cmp     %g1,0
1878         bl      1f
1879 
1880         orcc    %g1,%i4,%g0
1881         bz      1f
1882         sethi   %hi(0x00080000),%i4
1883 
1884         cmp     %g1,%i4
1885         bge,a   2f
1886         ldd     [%o3+0x50],%f2
1887 
1888         fxtod   %f0,%f0                 ! res = *(long long*)&res;
1889         st      %f0,[%fp+tmp7]
1890 
1891         fand    %f0,DC0,%f28            ! (2_0) res = vis_fand(res,DC0);
1892         ld      [%fp+tmp7],%g1
1893 
1894         sra     %g1,21,%o7              ! (2_0) iexp = hx >> 21;
1895 
1896         sra     %g1,10,%o2              ! (2_0) hx >>= 10;
1897         sub     %o7,537,%o7
1898 
1899         and     %o2,2040,%o2            ! (2_0) hx &= 0x7f8;
1900         sub     %g0,%o7,%o7             ! (2_0) iexp = -iexp;
1901         ba      .cont19
1902         for     %f28,DC1,%f28           ! (2_0) res = vis_for(res,DC1);
1903 2:
1904         fand    %f0,%f2,%f0
1905         fxtod   %f0,%f0                 ! res = *(long long*)&res;
1906         ldd     [%o3+0x58],%f2
1907         faddd   %f0,%f2,%f0
1908         st      %f0,[%fp+tmp7]
1909 
1910         fand    %f0,DC0,%f28            ! (2_0) res = vis_fand(res,DC0);
1911         ld      [%fp+tmp7],%g1
1912 
1913         sra     %g1,21,%o7              ! (2_0) iexp = hx >> 21;
1914 
1915         sra     %g1,10,%o2              ! (2_0) hx >>= 10;
1916         sub     %o7,537,%o7
1917 
1918         and     %o2,2040,%o2            ! (2_0) hx &= 0x7f8;
1919         sub     %g0,%o7,%o7             ! (2_0) iexp = -iexp;
1920         ba      .cont19
1921         for     %f28,DC1,%f28           ! (2_0) res = vis_for(res,DC1);
1922 1:
1923         sub     %l6,stridex,tmp_px
1924         sub     counter,3,tmp_counter
1925 
1926         ba      .cont19
1927         mov     3,counter
1928 
1929         .align  16
1930 .update20:
1931         cmp     counter,4
1932         ble     .cont20
1933         fand    %f18,DC3,%f4            ! (2_0) res_c = vis_fand(res_c,DC3);
1934 
1935         sub     %l6,stridex,tmp_px
1936         sub     counter,4,tmp_counter
1937 
1938         ba      .cont20
1939         mov     4,counter
1940 
1941         .align  16
1942 .update21:
1943         sub     %l6,stridex,%i5
1944         cmp     counter,4
1945         ble     .cont21
1946         fand    %f0,DC0,%f16            ! (4_0) res = vis_fand(res,DC0);
1947 
1948         ld      [%i5+4],%i5
1949         cmp     %g1,0
1950         bl      1f
1951 
1952         orcc    %g1,%i5,%g0
1953         bz      1f
1954         sethi   %hi(0x00080000),%i5
1955 
1956         cmp     %g1,%i5
1957         bge,a   2f
1958         ldd     [%o3+0x50],%f34
1959 
1960         fxtod   %f6,%f6                 ! res = *(long long*)&res;
1961         st      %f6,[%fp+tmp7]
1962 
1963         fand    %f6,DC0,%f44            ! (3_0) res = vis_fand(res,DC0);
1964         ld      [%fp+tmp7],%g1
1965 
1966         sra     %g1,21,%o7              ! (3_0) iexp = hx >> 21;
1967         sra     %g1,10,%o2              ! (3_0) hx >>= 10;
1968 
1969         sub     %o7,537,%o7
1970         and     %o2,2040,%o2            ! (3_0) hx &= 0x7f8;
1971 
1972         sub     %g0,%o7,%o7             ! (3_0) iexp = -iexp;
1973         ba      .cont21
1974         for     %f44,DC1,%f44           ! (3_0) res = vis_for(res,DC1);
1975 2:
1976         fand    %f6,%f34,%f6
1977         fxtod   %f6,%f6                 ! res = *(long long*)&res;
1978         ldd     [%o3+0x58],%f34
1979         faddd   %f6,%f34,%f6
1980         st      %f6,[%fp+tmp7]
1981 
1982         fand    %f6,DC0,%f44            ! (3_0) res = vis_fand(res,DC0);
1983         ld      [%fp+tmp7],%g1
1984 
1985         sra     %g1,21,%o7              ! (3_0) iexp = hx >> 21;
1986         sra     %g1,10,%o2              ! (3_0) hx >>= 10;
1987 
1988         sub     %o7,537,%o7
1989         and     %o2,2040,%o2            ! (3_0) hx &= 0x7f8;
1990 
1991         sub     %g0,%o7,%o7             ! (3_0) iexp = -iexp;
1992         ba      .cont21
1993         for     %f44,DC1,%f44           ! (3_0) res = vis_for(res,DC1);
1994 1:
1995         sub     %l6,stridex,tmp_px
1996         sub     counter,4,tmp_counter
1997 
1998         ba      .cont21
1999         mov     4,counter
2000 
2001         .align  16
2002 .update22:
2003         cmp     counter,5
2004         ble     .cont22
2005         fmuld   %f62,%f38,%f62          ! (1_0) res *= xx;
2006 
2007         sub     %i0,stridex,tmp_px
2008         sub     counter,5,tmp_counter
2009 
2010         ba      .cont22
2011         mov     5,counter
2012 
2013         .align  16
2014 .update23:
2015         sub     %i0,stridex,%l1
2016         cmp     counter,5
2017         ble     .cont23
2018         fand    %f6,DC0,%f16            ! (5_0) res = vis_fand(res,DC0);
2019 
2020         ld      [%l1+4],%l1
2021         cmp     %g1,0
2022         bl      1f
2023 
2024         orcc    %g1,%l1,%g0
2025         bz      1f
2026         sethi   %hi(0x00080000),%l1
2027 
2028         cmp     %g1,%l1
2029         bge,a   2f
2030         ldd     [%o3+0x50],%f34
2031 
2032         fxtod   %f0,%f0                 ! res = *(long long*)&res;
2033         st      %f0,[%fp+tmp7]
2034 
2035         fand    %f0,DC0,%f24            ! (4_0) res = vis_fand(res,DC0);
2036         ld      [%fp+tmp7],%g1
2037 
2038         sra     %g1,21,%o7              ! (4_0) iexp = hx >> 21;
2039 
2040         sra     %g1,10,%o2              ! (4_0) hx >>= 10;
2041         sub     %o7,537,%o7
2042 
2043         and     %o2,2040,%o2            ! (4_0) hx &= 0x7f8;
2044         sub     %g0,%o7,%o7             ! (4_0) iexp = -iexp;
2045         ba      .cont23
2046         for     %f24,DC1,%f24           ! (4_0) res = vis_for(res,DC1);
2047 2:
2048         fand    %f0,%f34,%f0
2049         fxtod   %f0,%f0                 ! res = *(long long*)&res;
2050         ldd     [%o3+0x58],%f34
2051         faddd   %f0,%f34,%f0
2052         st      %f0,[%fp+tmp7]
2053 
2054         fand    %f0,DC0,%f24            ! (4_0) res = vis_fand(res,DC0);
2055         ld      [%fp+tmp7],%g1
2056 
2057         sra     %g1,21,%o7              ! (4_0) iexp = hx >> 21;
2058 
2059         sra     %g1,10,%o2              ! (4_0) hx >>= 10;
2060         sub     %o7,537,%o7
2061 
2062         and     %o2,2040,%o2            ! (4_0) hx &= 0x7f8;
2063         sub     %g0,%o7,%o7             ! (4_0) iexp = -iexp;
2064         ba      .cont23
2065         for     %f24,DC1,%f24           ! (4_0) res = vis_for(res,DC1);
2066 1:
2067         sub     %i0,stridex,tmp_px
2068         sub     counter,5,tmp_counter
2069 
2070         ba      .cont23
2071         mov     5,counter
2072 
2073         .align  16
2074 .update24:
2075         cmp     counter,6
2076         ble     .cont24
2077         fmuld   %f62,%f36,%f62          ! (2_0) res *= xx;
2078 
2079         sub     %i1,stridex,tmp_px
2080         sub     counter,6,tmp_counter
2081 
2082         ba      .cont24
2083         mov     6,counter
2084 
2085         .align  16
2086 .update25:
2087         sub     %i1,stridex,%i3
2088         cmp     counter,6
2089         ble     .cont25
2090         fand    %f6,DC0,%f16            ! (6_0) res = vis_fand(res,DC0);
2091 
2092         ld      [%i3+4],%i3
2093         cmp     %g1,0
2094         bl      1f
2095 
2096         orcc    %g1,%i3,%g0
2097         bz      1f
2098         nop
2099 
2100         sub     %i1,stridex,%i3
2101         ld      [%i3],%f10
2102         ld      [%i3+4],%f11
2103 
2104         sethi   %hi(0x00080000),%i3
2105 
2106         cmp     %g1,%i3
2107         bge,a   2f
2108         ldd     [%o3+0x50],%f60
2109 
2110         fxtod   %f10,%f10               ! res = *(long long*)&res;
2111         st      %f10,[%fp+tmp7]
2112 
2113         fand    %f10,DC0,%f28           ! (5_0) res = vis_fand(res,DC0);
2114         ld      [%fp+tmp7],%g1
2115 
2116         sra     %g1,21,%o7              ! (5_0) iexp = hx >> 21;
2117 
2118         sra     %g1,10,%o2              ! (5_0) hx >>= 10;
2119         sub     %o7,537,%o7
2120 
2121         and     %o2,2040,%o2            ! (5_0) hx &= 0x7f8;
2122         sub     %g0,%o7,%o7             ! (5_0) iexp = -iexp;
2123 
2124         ba      .cont25
2125         for     %f28,DC1,%f28           ! (5_0) res = vis_for(res,DC1);
2126 2:
2127         fand    %f10,%f60,%f10
2128         fxtod   %f10,%f10               ! res = *(long long*)&res;
2129         ldd     [%o3+0x58],%f60
2130         faddd   %f10,%f60,%f10
2131         st      %f10,[%fp+tmp7]
2132 
2133         fand    %f10,DC0,%f28           ! (5_0) res = vis_fand(res,DC0);
2134         ld      [%fp+tmp7],%g1
2135 
2136         sra     %g1,21,%o7              ! (5_0) iexp = hx >> 21;
2137 
2138         sra     %g1,10,%o2              ! (5_0) hx >>= 10;
2139         sub     %o7,537,%o7
2140 
2141         and     %o2,2040,%o2            ! (5_0) hx &= 0x7f8;
2142         sub     %g0,%o7,%o7             ! (5_0) iexp = -iexp;
2143 
2144         ba      .cont25
2145         for     %f28,DC1,%f28           ! (5_0) res = vis_for(res,DC1);
2146 1:
2147         sub     %i1,stridex,tmp_px
2148         sub     counter,6,tmp_counter
2149 
2150         ba      .cont25
2151         mov     6,counter
2152 
2153 .exit:
2154         ret
2155         restore
2156         SET_SIZE(__vrsqrt)
2157