1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file   "__vatan2f.S"
  30 
  31 #include "libm.h"
  32 
  33         RO_DATA
  34         .align  64
  35 .CONST_TBL:
  36         .word   0xbff921fb, 0x54442d18  ! -M_PI_2
  37         .word   0x3ff921fb, 0x54442d18  !  M_PI_2
  38         .word   0xbff921fb, 0x54442d18  ! -M_PI_2
  39         .word   0x3ff921fb, 0x54442d18  !  M_PI_2
  40         .word   0xc00921fb, 0x54442d18  ! -M_PI
  41         .word   0x400921fb, 0x54442d18  !  M_PI
  42         .word   0x80000000, 0x00000000  ! -0.0
  43         .word   0x00000000, 0x00000000  !  0.0
  44 
  45         .word   0xbff00000, 0x00000000  ! -1.0
  46         .word   0x3ff00000, 0x00000000  !  1.0
  47 
  48         .word   0x3fefffff, 0xfe79bf93  ! K0 =  9.99999997160545464888e-01
  49         .word   0xbfd55552, 0xf0db4320  ! K1 = -3.33332762919825514315e-01
  50         .word   0x3fc998f8, 0x2493d066  ! K2 =  1.99980752811487135558e-01
  51         .word   0xbfc240b8, 0xd994abf9  ! K3 = -1.42600160828209047720e-01
  52         .word   0x3fbbfc9e, 0x8c2b0243  ! K4 =  1.09323415013030928421e-01
  53         .word   0xbfb56013, 0x64b1cac3  ! K5 = -8.34972496830160174704e-02
  54         .word   0x3fad3ad7, 0x9f53e142  ! K6 =  5.70895559303061900411e-02
  55         .word   0xbf9f148f, 0x2a829af1  ! K7 = -3.03518647857811706139e-02
  56         .word   0x3f857a8c, 0x747ed314  ! K8 =  1.04876492549493055747e-02
  57         .word   0xbf5bdf39, 0x729124b6  ! K9 = -1.70117006406859722727e-03
  58 
  59         .word   0x3fe921fb, 0x54442d18  ! M_PI_4
  60         .word   0x36a00000, 0x00000000  ! 2^(-149)
  61 
  62 #define counter         %o3
  63 #define stridex         %i4
  64 #define stridey         %i5
  65 #define stridez         %l1
  66 #define cmul_arr        %i0
  67 #define cadd_arr        %i2
  68 #define _0x7fffffff     %l0
  69 #define _0x7f800000     %l2
  70 
  71 #define K0              %f42
  72 #define K1              %f44
  73 #define K2              %f46
  74 #define K3              %f48
  75 #define K4              %f50
  76 #define K5              %f52
  77 #define K6              %f54
  78 #define K7              %f56
  79 #define K8              %f58
  80 #define K9              %f60
  81 
  82 #define tmp_counter     STACK_BIAS-32
  83 #define tmp_py          STACK_BIAS-24
  84 #define tmp_px          STACK_BIAS-16
  85 #define tmp_pz          STACK_BIAS-8
  86 
  87 ! sizeof temp storage - must be a multiple of 16 for V9
  88 #define tmps            0x20
  89 
  90 !--------------------------------------------------------------------
  91 !               !!!!!   vatan2f algorithm       !!!!!
  92 !       uy0 = *(int*)py;
  93 !       ux0 = *(int*)px;
  94 !       ay0 = uy0 & 0x7fffffff;
  95 !       ax0 = ux0 & 0x7fffffff;
  96 !       if ( ax0 >= 0x7f800000 || ay0 >= 0x7f800000 )
  97 !       {
  98 !               /* |X| or |Y| = Nan */
  99 !               if ( ax0 > 0x7f800000 || ay0 > 0x7f800000 )
 100 !               {
 101 !                       ftmp0 = *(float*)&ax0 * *(float*)&ay0;
 102 !                       *pz = ftmp0;
 103 !               }
 104 !               signx0 = (unsigned)ux0 >> 30;
 105 !               signx0 &= 2;
 106 !               signy0 = uy0 >> 31;
 107 !               if (ay0 == 0x7f800000)
 108 !                       signx0 = (ax0 == 0x7f800000) ? signx0 + 1 : 2;
 109 !               else
 110 !                       signx0 += signx0;
 111 !               res = signx0 * M_PI_4;
 112 !               signy0 <<= 3;
 113 !               dtmp0 = *(double*)((char*)(cmul_arr + 1) + signy0);
 114 !               res *= dtmp0;
 115 !               ftmp0 = (float) res;
 116 !               *pz = ftmp0;
 117 !               goto next;
 118 !       }
 119 !       if ( ax0 == 0 && ay0 == 0 )
 120 !       {
 121 !               signy0 = uy0 >> 28;
 122 !               signx0 = ux0 >> 27;
 123 !               ldiff0 = ax0 - ay0;
 124 !               ldiff0 >>= 31;
 125 !               signx0 &= -16;
 126 !               signy0 &= -8;
 127 !               ldiff0 <<= 5;
 128 !               signx0 += signy0;
 129 !               res = *(double*)((char*)(cadd_arr + 7) + ldiff0 + signx0 + signy0);
 130 !               ftmp0 = (float) res;
 131 !               *pz = ftmp0;
 132 !               goto next;
 133 !       }
 134 !       ldiff0 = ax0 - ay0;
 135 !       ldiff0 >>= 31;
 136 !       addrc0 = (char*)px - (char*)py;
 137 !       addrc0 &= ldiff0;
 138 !       fy0 = *(float*)((char*)py + addrc0);
 139 !       fx0 = *(float*)((char*)px - addrc0);
 140 !       itmp0 = *(int*)&fy0;
 141 !       if((itmp0 & 0x7fffffff) < 0x00800000)
 142 !       {
 143 !               itmp0 >>= 28;
 144 !               itmp0 &= -8;
 145 !               fy0 = fabsf(fy0);
 146 !               dtmp0 = (double) *(int*)&fy0;
 147 !               dtmp0 *= C2ONM149;
 148 !               dsign = *(double*)((char*)cmul_arr + itmp0);
 149 !               dtmp0 *= dsign;
 150 !               y0 = dtm0;
 151 !       }
 152 !       else
 153 !               y0 = (double)fy0;
 154 !       itmp0 = *(int*)&fx0;
 155 !       if((itmp0 & 0x7fffffff) < 0x00800000)
 156 !       {
 157 !               itmp0 >>= 28;
 158 !               itmp0 &= -8;
 159 !               fx0 = fabsf(fx0);
 160 !               dtmp0 = (double) *(int*)&fx0;
 161 !               dtmp0 *= C2ONM149;
 162 !               dsign = *(double*)((char*)cmul_arr + itmp0);
 163 !               dtmp0 *= dsign;
 164 !               x0 = dtmp0;
 165 !       }
 166 !       else
 167 !               x0 = (double)fx0;
 168 !       px += stridex;
 169 !       py += stridey;
 170 !       x0 = y0 / x0;
 171 !       x20 = x0 * x0;
 172 !       dtmp0 = K9 * x20;
 173 !       dtmp0 += K8;
 174 !       dtmp0 *= x20;
 175 !       dtmp0 += K7;
 176 !       dtmp0 *= x20;
 177 !       dtmp0 += K6;
 178 !       dtmp0 *= x20;
 179 !       dtmp0 += K5;
 180 !       dtmp0 *= x20;
 181 !       dtmp0 += K4;
 182 !       dtmp0 *= x20;
 183 !       dtmp0 += K3;
 184 !       dtmp0 *= x20;
 185 !       dtmp0 += K2;
 186 !       dtmp0 *= x20;
 187 !       dtmp0 += K1;
 188 !       dtmp0 *= x20;
 189 !       dtmp0 += K0;
 190 !       x0 = dtmp0 * x0;
 191 !       signy0 = uy0 >> 28;
 192 !       signy0 &= -8;
 193 !       signx0 = ux0 >> 27;
 194 !       signx0 &= -16;
 195 !       ltmp0 = ldiff0 << 5;
 196 !       ltmp0 += (char*)cadd_arr;
 197 !       ltmp0 += signx0;
 198 !       cadd0 = *(double*)(ltmp0 + signy0);
 199 !       cmul0_ind = ldiff0 << 3;
 200 !       cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
 201 !       dtmp0 = cmul0 * x0;
 202 !       dtmp0 = cadd0 + dtmp0;
 203 !       ftmp0 = (float)dtmp0;
 204 !       *pz = ftmp0;
 205 !       pz += stridez;
 206 !
 207 !--------------------------------------------------------------------
 208 
 209         ENTRY(__vatan2f)
 210         save    %sp,-SA(MINFRAME)-tmps,%sp
 211         PIC_SETUP(l7)
 212         PIC_SET(l7,.CONST_TBL,g5)
 213 
 214 #ifdef __sparcv9
 215         ldx     [%fp+STACK_BIAS+176],%l7
 216 #else
 217         ld      [%fp+STACK_BIAS+92],%l7
 218 #endif
 219 
 220         st      %i0,[%fp+tmp_counter]
 221         sethi   %hi(0x7ffffc00),_0x7fffffff
 222         add     _0x7fffffff,1023,_0x7fffffff
 223         or      %g0,%i2,%o2
 224         sll     %l7,2,stridez
 225 
 226         sethi   %hi(0x7f800000),_0x7f800000
 227         mov     %g5,%g1
 228 
 229         or      %g0,stridey,%o4
 230         add     %g1,56,cadd_arr
 231 
 232         sll     %o2,2,stridey
 233         add     %g1,72,cmul_arr
 234 
 235         ldd     [%g1+80],K0
 236         ldd     [%g1+80+8],K1
 237         ldd     [%g1+80+16],K2
 238         ldd     [%g1+80+24],K3
 239         ldd     [%g1+80+32],K4
 240         ldd     [%g1+80+40],K5
 241         ldd     [%g1+80+48],K6
 242         ldd     [%g1+80+56],K7
 243         ldd     [%g1+80+64],K8
 244         ldd     [%g1+80+72],K9
 245 
 246         sll     stridex,2,stridex
 247 
 248         stx     %i1,[%fp+tmp_py]
 249         stx     %i3,[%fp+tmp_px]
 250 .begin:
 251         ld      [%fp+tmp_counter],counter
 252         ldx     [%fp+tmp_py],%i1
 253         ldx     [%fp+tmp_px],%i3
 254         st      %g0,[%fp+tmp_counter]
 255 .begin1:
 256         subcc   counter,1,counter
 257         bneg,pn %icc,.exit
 258         nop
 259 
 260         lda     [%i1]0x82,%l4           ! (0_0) uy0 = *(int*)py;
 261 
 262         lda     [%i3]0x82,%l3           ! (0_0) ux0 = *(int*)px;
 263 
 264         and     %l4,_0x7fffffff,%l7     ! (0_0) ay0 = uy0 & 0x7fffffff;
 265 
 266         cmp     %l7,_0x7f800000
 267         bge,pn  %icc,.spec0
 268         and     %l3,_0x7fffffff,%l6     ! (0_0) ax0 = ux0 & 0x7fffffff;
 269 
 270         cmp     %l6,_0x7f800000
 271         bge,pn  %icc,.spec0
 272         sethi   %hi(0x00800000),%o5
 273 
 274         cmp     %l6,%o5
 275         bl,pn   %icc,.spec1
 276         sub     %l6,%l7,%o2             ! (0_0) ldiff0 = ax0 - ay0;
 277 
 278         cmp     %l7,%o5
 279         bl,pn   %icc,.spec1
 280         nop
 281 
 282         stx     %o4,[%fp+tmp_pz]
 283         sra     %o2,31,%l7              ! (0_0) ldiff0 >>= 31;
 284         sub     %i3,%i1,%l6             ! (0_0) addrc0 = (char*)px - (char*)py;
 285 
 286         and     %l6,%l7,%o2             ! (0_0) addrc0 &= ldiff0;
 287 
 288         lda     [%i1+%o2]0x82,%f0       ! (0_0) fy0 = *(float*)((char*)py + addrc0);
 289         sub     %i3,%o2,%o4             ! (0_0) (char*)px - addrc0
 290 
 291         lda     [%o4]0x82,%f2           ! (0_0) fx0 = *(float*)((char*)px - addrc0);
 292         sll     %l7,5,%l6               ! (0_0) ltmp0 = ldiff0 << 5;
 293 
 294         sra     %l3,27,%o5              ! (0_0) signx0 = ux0 >> 27;
 295         add     %i1,stridey,%i1         ! py += stridey
 296 
 297         add     %i3,stridex,%i3         ! px += stridex
 298 
 299         lda     [%i1]0x82,%l3           ! (1_0) uy0 = *(int*)py;
 300         sra     %l4,28,%o4              ! (0_0) signy0 = uy0 >> 28;
 301 
 302         add     %l6,cadd_arr,%l6        ! (0_0) ltmp0 += (char*)cadd_arr;
 303 
 304         fstod   %f0,%f40                ! (0_0) y0 = (double)fy0;
 305 
 306         fstod   %f2,%f2                 ! (0_0) x0 = (double)fx0;
 307 
 308 .spec1_cont:
 309         lda     [%i3]0x82,%l4           ! (1_0) ux0 = *(int*)px;
 310         and     %o5,-16,%o5             ! (0_0) signx0 &= -16;
 311 
 312         and     %o4,-8,%o4              ! (0_0) signy0 &= -8;
 313 
 314         fdivd   %f40,%f2,%f12           ! (0_0) x0 = y0 / x0;
 315 
 316         add     %l6,%o5,%o1             ! (0_0) ltmp0 += signx0;
 317 
 318         and     %l4,_0x7fffffff,%l6     ! (1_0) ax0 = ux0 & 0x7fffffff;
 319         sethi   %hi(0x00800000),%o5
 320 
 321         cmp     %l6,%o5
 322         bl,pn   %icc,.u0
 323         and     %l3,_0x7fffffff,%g1     ! (1_0) ay0 = uy0 & 0x7fffffff;
 324 .c0:
 325         cmp     %g1,%o5
 326         bl,pn   %icc,.u1
 327         ldd     [%o1+%o4],%f34          ! (0_0) cadd0 = *(double*)(ltmp0 + signy0);
 328 .c1:
 329         cmp     %l6,_0x7f800000
 330         bge,pn  %icc,.u2
 331         sub     %l6,%g1,%o1             ! (1_0) ldiff0 = ax0 - ay0;
 332 .c2:
 333         cmp     %g1,_0x7f800000
 334         bge,pn  %icc,.u3
 335         nop
 336 .c3:
 337         sra     %o1,31,%g1              ! (1_0) ldiff0 >>= 31;
 338         sub     %i3,%i1,%l6             ! (1_0) addrc0 = (char*)px - (char*)py;
 339 
 340         and     %l6,%g1,%o1             ! (1_0) addrc0 &= ldiff0;
 341 
 342         lda     [%i1+%o1]0x82,%f0       ! (1_0) fy0 = *(float*)((char*)py + addrc0);
 343         sub     %i3,%o1,%o4             ! (1_0) (char*)px - addrc0;
 344 
 345         lda     [%o4]0x82,%f2           ! (1_0) fx0 = *(float*)((char*)px - addrc0);
 346         sll     %g1,5,%l6               ! (1_0) ltmp0 = ldiff0 << 5;
 347 
 348         cmp     %o5,_0x7f800000         ! (1_0) b0 ? 0x7f800000
 349         bge,pn  %icc,.update0           ! (1_0) if ( b0 > 0x7f800000 )
 350         nop
 351 .cont0:
 352         add     %i1,stridey,%i1         ! py += stridey
 353         fstod   %f0,%f40                ! (1_0) y0 = (double)fy0;
 354 
 355         sra     %l4,27,%o5              ! (1_0) signx0 = ux0 >> 27;
 356         add     %i3,stridex,%i3         ! px += stridex
 357 
 358         sra     %l3,28,%o4              ! (1_0) signy0 = uy0 >> 28;
 359         add     %l6,cadd_arr,%l6        ! (1_0) ltmp0 += (char*)cadd_arr;
 360         fstod   %f2,%f2                 ! (1_0) x0 = (double)fx0;
 361 .d0:
 362         and     %o5,-16,%o5             ! (1_0) signx0 &= -16;
 363         and     %o4,-8,%o4              ! (1_0) signy0 &= -8;
 364 
 365         lda     [%i1]0x82,%l4           ! (2_0) uy0 = *(int*)py;
 366 
 367         lda     [%i3]0x82,%l3           ! (2_0) ux0 = *(int*)px;
 368         fdivd   %f40,%f2,%f10           ! (1_0) x0 = y0 / x0;
 369 
 370         fmuld   %f12,%f12,%f20          ! (0_0) x20 = x0 * x0;
 371 
 372         add     %l6,%o5,%o2             ! (1_0) ltmp0 += signx0;
 373 
 374         and     %l3,_0x7fffffff,%l6     ! (2_0) ax0 = ux0 & 0x7fffffff;
 375         sethi   %hi(0x00800000),%o5
 376 
 377         cmp     %l6,%o5
 378         bl,pn   %icc,.u4
 379         and     %l4,_0x7fffffff,%g5     ! (2_0) ay0 = uy0 & 0x7fffffff;
 380 .c4:
 381         cmp     %g5,%o5
 382         bl,pn   %icc,.u5
 383         fmuld   K9,%f20,%f40            ! (0_0) dtmp0 = K9 * x20;
 384 .c5:
 385         cmp     %l6,_0x7f800000
 386         bge,pn  %icc,.u6
 387         ldd     [%o2+%o4],%f32          ! (1_0) cadd0 = *(double*)(ltmp0 + signy0);
 388 .c6:
 389         cmp     %g5,_0x7f800000
 390         bge,pn  %icc,.u7
 391         sub     %l6,%g5,%o2             ! (2_0) ldiff0 = ax0 - ay0;
 392 .c7:
 393         sra     %o2,31,%g5              ! (2_0) ldiff0 >>= 31;
 394         sub     %i3,%i1,%l6             ! (2_0) addrc0 = (char*)px - (char*)py;
 395 
 396         faddd   %f40,K8,%f40            ! (0_0) dtmp0 += K8;
 397         and     %l6,%g5,%o2             ! (2_0) addrc0 &= ldiff0;
 398 
 399         lda     [%i1+%o2]0x82,%f0       ! (2_0) fy0 = *(float*)((char*)py + addrc0);
 400         sub     %i3,%o2,%o4             ! (2_0) (char*)px - addrc0;
 401 
 402         lda     [%o4]0x82,%f2           ! (2_0) fx0 = *(float*)((char*)px - addrc0);
 403 
 404         cmp     %o5,_0x7f800000         ! (2_0) b0 ? 0x7f800000
 405         bge,pn  %icc,.update1           ! (2_0) if ( b0 > 0x7f800000 )
 406         nop
 407 .cont1:
 408         fmuld   %f40,%f20,%f30          ! (0_0) dtmp0 *= x20;
 409         sll     %g5,5,%l6               ! (2_0) ltmp0 = ldiff0 << 5;
 410         add     %i1,stridey,%i1         ! py += stridey
 411         fstod   %f0,%f40                ! (2_0) y0 = (double)fy0;
 412 
 413         sra     %l3,27,%o5              ! (2_0) signx0 = ux0 >> 27;
 414         add     %i3,stridex,%i3         ! px += stridex
 415 
 416         fstod   %f2,%f2                 ! (2_0) x0 = (double)fx0;
 417         sra     %l4,28,%o4              ! (2_0) signy0 = uy0 >> 28;
 418         add     %l6,cadd_arr,%l6        ! (2_0) ltmp0 += (char*)cadd_arr;
 419 .d1:
 420         lda     [%i1]0x82,%l3           ! (3_0) uy0 = *(int*)py;
 421         and     %o5,-16,%o5             ! (2_0) signx0 &= -16;
 422         faddd   %f30,K7,%f30            ! (0_0) dtmp0 += K7;
 423 
 424         lda     [%i3]0x82,%l4           ! (3_0) ux0 = *(int*)px;
 425 
 426         fdivd   %f40,%f2,%f8            ! (2_0) x0 = y0 / x0;
 427 
 428         fmuld   %f10,%f10,%f18          ! (1_0) x20 = x0 * x0;
 429 
 430         add     %l6,%o5,%o1             ! (2_0) ltmp0 += signx0;
 431         and     %o4,-8,%o4              ! (2_0) signy0 &= -8;
 432         fmuld   %f30,%f20,%f30          ! (0_0) dtmp0 *= x20;
 433 
 434         and     %l4,_0x7fffffff,%l6     ! (3_0) ax0 = ux0 & 0x7fffffff;
 435         sethi   %hi(0x00800000),%o5
 436 
 437         cmp     %l6,%o5
 438         bl,pn   %icc,.u8
 439         and     %l3,_0x7fffffff,%o0     ! (3_0) ay0 = uy0 & 0x7fffffff;
 440 .c8:
 441         cmp     %o0,%o5
 442         bl,pn   %icc,.u9
 443         fmuld   K9,%f18,%f40            ! (1_0) dtmp0 = K9 * x20;
 444 .c9:
 445         cmp     %l6,_0x7f800000
 446         bge,pn  %icc,.u10
 447         faddd   %f30,K6,%f16            ! (0_0) dtmp0 += K6;
 448 .c10:
 449         cmp     %o0,_0x7f800000
 450         bge,pn  %icc,.u11
 451         ldd     [%o1+%o4],%f30          ! (2_0) cadd0 = *(double*)(ltmp0 + signy0);
 452 .c11:
 453         sub     %l6,%o0,%o1             ! (3_0) ldiff0 = ax0 - ay0;
 454 
 455         sra     %o1,31,%o0              ! (3_0) ldiff0 >>= 31;
 456         sub     %i3,%i1,%l6             ! (3_0) addrc0 = (char*)px - (char*)py;
 457 
 458         faddd   %f40,K8,%f40            ! (1_0) dtmp0 += K8;
 459         and     %l6,%o0,%o1             ! (3_0) addrc0 &= ldiff0;
 460         fmuld   %f16,%f20,%f16          ! (0_0) dtmp0 *= x20;
 461 
 462         lda     [%i1+%o1]0x82,%f0       ! (3_0) fy0 = *(float*)((char*)py + addrc0);
 463         sub     %i3,%o1,%o4             ! (3_0) (char*)px - addrc0;
 464 
 465         lda     [%o4]0x82,%f1           ! (3_0) fx0 = *(float*)((char*)px - addrc0);
 466 
 467         cmp     %o5,_0x7f800000         ! (3_0) b0 ? 0x7f800000
 468         bge,pn  %icc,.update2           ! (3_0) if ( b0 > 0x7f800000 )
 469         nop
 470 .cont2:
 471         fmuld   %f40,%f18,%f28          ! (1_0) dtmp0 *= x20;
 472         sll     %o0,5,%l6               ! (3_0) ltmp0 = ldiff0 << 5;
 473         add     %i1,stridey,%i1         ! py += stridey
 474         fstod   %f0,%f40                ! (3_0) y0 = (double)fy0;
 475 
 476         faddd   %f16,K5,%f2             ! (0_0) dtmp0 += K5;
 477         sra     %l4,27,%o5              ! (3_0) signx0 = ux0 >> 27;
 478         add     %i3,stridex,%i3         ! px += stridex
 479 
 480         sra     %l3,28,%o4              ! (3_0) signy0 = uy0 >> 28;
 481         fstod   %f1,%f16                ! (3_0) x0 = (double)fx0;
 482 .d2:
 483         faddd   %f28,K7,%f28            ! (1_0) dtmp0 += K7;
 484         add     %l6,cadd_arr,%l6        ! (3_0) ltmp0 += (char*)cadd_arr;
 485         and     %o5,-16,%o5             ! (3_0) signx0 &= -16;
 486 
 487         lda     [%i1]0x82,%l4           ! (4_0) uy0 = *(int*)py;
 488         fmuld   %f2,%f20,%f2            ! (0_0) dtmp0 *= x20;
 489 
 490         lda     [%i3]0x82,%l3           ! (4_0) ux0 = *(int*)px;
 491         fdivd   %f40,%f16,%f6           ! (3_0) x0 = y0 / x0;
 492 
 493         and     %o4,-8,%o4              ! (3_0) signy0 &= -8;
 494         fmuld   %f8,%f8,%f16            ! (2_0) x20 = x0 * x0;
 495 
 496         add     %l6,%o5,%o2             ! (3_0) ltmp0 += signx0;
 497         fmuld   %f28,%f18,%f28          ! (1_0) dtmp0 *= x20;
 498 
 499         and     %l3,_0x7fffffff,%l6     ! (4_0) ax0 = ux0 & 0x7fffffff;
 500         sethi   %hi(0x00800000),%o5
 501         faddd   %f2,K4,%f2              ! (0_0) dtmp0 += K4;
 502 
 503         cmp     %l6,%o5
 504         bl,pn   %icc,.u12
 505         and     %l4,_0x7fffffff,%l5     ! (4_0) ay0 = uy0 & 0x7fffffff;
 506 .c12:
 507         cmp     %l5,%o5
 508         bl,pn   %icc,.u13
 509         fmuld   K9,%f16,%f40            ! (2_0) dtmp0 = K9 * x20;
 510 .c13:
 511         cmp     %l6,_0x7f800000
 512         bge,pn  %icc,.u14
 513         faddd   %f28,K6,%f4             ! (1_0) dtmp0 += K6;
 514 .c14:
 515         ldd     [%o2+%o4],%f28          ! (3_0) cadd0 = *(double*)(ltmp0 + signy0);
 516         cmp     %l5,_0x7f800000
 517         bge,pn  %icc,.u15
 518         fmuld   %f2,%f20,%f24           ! (0_0) dtmp0 *= x20;
 519 .c15:
 520         sub     %l6,%l5,%o2             ! (4_0) ldiff0 = ax0 - ay0;
 521 
 522         sra     %o2,31,%l5              ! (4_0) ldiff0 >>= 31;
 523         sub     %i3,%i1,%l6             ! (4_0) addrc0 = (char*)px - (char*)py;
 524 
 525         faddd   %f40,K8,%f40            ! (2_0) dtmp0 += K8;
 526         and     %l6,%l5,%o2             ! (4_0) addrc0 &= ldiff0;
 527         fmuld   %f4,%f18,%f4            ! (1_0) dtmp0 *= x20;
 528 
 529         lda     [%i1+%o2]0x82,%f0       ! (4_0) fy0 = *(float*)((char*)py + addrc0);
 530         sub     %i3,%o2,%o4             ! (4_0) (char*)px - addrc0;
 531         faddd   %f24,K3,%f24            ! (0_0) dtmp0 += K3;
 532 
 533         lda     [%o4]0x82,%f2           ! (4_0) fx0 = *(float*)((char*)px - addrc0);
 534 
 535         cmp     %o5,_0x7f800000         ! (4_0) b0 ? 0x7f800000
 536         bge,pn  %icc,.update3           ! (4_0) if ( b0 > 0x7f800000 )
 537         nop
 538 .cont3:
 539         fmuld   %f40,%f16,%f26          ! (2_0) dtmp0 *= x20;
 540         sll     %l5,5,%l6               ! (4_0) ltmp0 = ldiff0 << 5;
 541         add     %i1,stridey,%i1         ! py += stridey
 542         fstod   %f0,%f40                ! (4_0) y0 = (double)fy0;
 543 
 544         faddd   %f4,K5,%f62             ! (1_0) dtmp0 += K5;
 545         add     %i3,stridex,%i3         ! px += stridex
 546         fmuld   %f24,%f20,%f24          ! (0_0) dtmp0 *= x20;
 547 
 548         fstod   %f2,%f2                 ! (4_0) x0 = (double)fx0;
 549         sra     %l3,27,%o5              ! (4_0) signx0 = ux0 >> 27;
 550         sra     %l4,28,%o4              ! (4_0) signy0 = uy0 >> 28;
 551 .d3:
 552         lda     [%i1]0x82,%l3           ! (5_0) uy0 = *(int*)py;
 553         add     %l6,cadd_arr,%l6        ! (4_0) ltmp0 += (char*)cadd_arr;
 554         faddd   %f26,K7,%f26            ! (2_0) dtmp0 += K7;
 555 
 556         fmuld   %f62,%f18,%f4           ! (1_0) dtmp0 *= x20;
 557         and     %o5,-16,%o5             ! (4_0) signx0 &= -16;
 558 
 559         lda     [%i3]0x82,%l4           ! (5_1) ux0 = *(int*)px;
 560         fdivd   %f40,%f2,%f62           ! (4_1) x0 = y0 / x0;
 561         faddd   %f24,K2,%f40            ! (0_1) dtmp0 += K2;
 562 
 563         and     %o4,-8,%o4              ! (4_1) signy0 &= -8;
 564         fmuld   %f6,%f6,%f24            ! (3_1) x20 = x0 * x0;
 565 
 566         add     %l6,%o5,%o1             ! (4_1) ltmp0 += signx0;
 567         fmuld   %f26,%f16,%f26          ! (2_1) dtmp0 *= x20;
 568 
 569         and     %l4,_0x7fffffff,%l6     ! (5_1) ax0 = ux0 & 0x7fffffff;
 570         sethi   %hi(0x00800000),%o5
 571         faddd   %f4,K4,%f4              ! (1_1) dtmp0 += K4;
 572 
 573         cmp     %l6,%o5
 574         bl,pn   %icc,.u16
 575         and     %l3,_0x7fffffff,%o7     ! (5_1) ay0 = uy0 & 0x7fffffff;
 576 .c16:
 577         cmp     %o7,%o5
 578         bl,pn   %icc,.u17
 579         fmuld   %f40,%f20,%f38          ! (0_1) dtmp0 *= x20;
 580 .c17:
 581         cmp     %l6,_0x7f800000
 582         bge,pn  %icc,.u18
 583         fmuld   K9,%f24,%f40            ! (3_1) dtmp0 = K9 * x20;
 584 .c18:
 585         cmp     %o7,_0x7f800000
 586         bge,pn  %icc,.u19
 587         faddd   %f26,K6,%f22            ! (2_1) dtmp0 += K6;
 588 .c19:
 589         ldd     [%o1+%o4],%f26          ! (4_1) cadd0 = *(double*)(ltmp0 + signy0);
 590         fmuld   %f4,%f18,%f4            ! (1_1) dtmp0 *= x20;
 591 
 592         sub     %l6,%o7,%o1             ! (5_1) ldiff0 = ax0 - ay0;
 593 
 594         sra     %o1,31,%o7              ! (5_1) ldiff0 >>= 31;
 595         sub     %i3,%i1,%l6             ! (5_1) addrc0 = (char*)px - (char*)py;
 596         faddd   %f38,K1,%f38            ! (0_1) dtmp0 += K1;
 597 
 598         faddd   %f40,K8,%f40            ! (3_1) dtmp0 += K8;
 599         and     %l6,%o7,%o1             ! (5_1) addrc0 &= ldiff0;
 600         fmuld   %f22,%f16,%f22          ! (2_1) dtmp0 *= x20;
 601 
 602         lda     [%i1+%o1]0x82,%f0       ! (5_1) fy0 = *(float*)((char*)py + addrc0);
 603         sll     %o7,5,%l6               ! (5_1) ltmp0 = ldiff0 << 5;
 604         sub     %i3,%o1,%o4             ! (5_1) (char*)px - addrc0;
 605         faddd   %f4,K3,%f4              ! (1_1) dtmp0 += K3;
 606 
 607         lda     [%o4]0x82,%f1           ! (5_1) fx0 = *(float*)((char*)px - addrc0);
 608 
 609         fmuld   %f38,%f20,%f38          ! (0_1) dtmp0 *= x20;
 610         cmp     %o5,_0x7f800000         ! (5_1) b0 ? 0x7f800000
 611         bge,pn  %icc,.update4           ! (5_1) if ( b0 > 0x7f800000 )
 612         nop
 613 .cont4:
 614         fmuld   %f40,%f24,%f36          ! (3_1) dtmp0 *= x20;
 615         fstod   %f0,%f40                ! (5_1) y0 = (double)fy0;
 616 
 617         faddd   %f22,K5,%f14            ! (2_1) dtmp0 += K5;
 618         fmuld   %f4,%f18,%f4            ! (1_1) dtmp0 *= x20;
 619 
 620         add     %i3,stridex,%i3         ! px += stridex
 621         sll     %l7,3,%l7               ! (0_1) cmul0_ind = ldiff0 << 3;
 622         fstod   %f1,%f2                 ! (5_1) x0 = (double)fx0;
 623 .d4:
 624         sra     %l3,28,%o4              ! (5_1) signy0 = uy0 >> 28;
 625         add     %i1,stridey,%i1         ! py += stridey
 626 
 627         faddd   %f36,K7,%f36            ! (3_1) dtmp0 += K7;
 628         sra     %l4,27,%o5              ! (5_1) signx0 = ux0 >> 27;
 629 
 630         lda     [%i1]0x82,%l4           ! (0_0) uy0 = *(int*)py;
 631         add     %l6,cadd_arr,%l6        ! (5_1) ltmp0 += (char*)cadd_arr;
 632         fmuld   %f14,%f16,%f22          ! (2_1) dtmp0 *= x20;
 633         faddd   %f38,K0,%f38            ! (0_1) dtmp0 += K0;
 634 
 635         lda     [%i3]0x82,%l3           ! (0_0) ux0 = *(int*)px;
 636         and     %o5,-16,%o5             ! (5_1) signx0 &= -16;
 637         fdivd   %f40,%f2,%f14           ! (5_1) x0 = y0 / x0;
 638         faddd   %f4,K2,%f40             ! (1_1) dtmp0 += K2;
 639 
 640         fmuld   %f62,%f62,%f4           ! (4_1) x20 = x0 * x0;
 641 
 642         ldd     [cmul_arr+%l7],%f0      ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
 643         add     %l6,%o5,%o2             ! (5_1) ltmp0 += signx0;
 644         and     %o4,-8,%o4              ! (5_1) signy0 &= -8;
 645         fmuld   %f36,%f24,%f36          ! (3_1) dtmp0 *= x20;
 646 
 647         fmuld   %f38,%f12,%f12          ! (0_1) x0 = dtmp0 * x0;
 648         and     %l4,_0x7fffffff,%l7     ! (0_0) ay0 = uy0 & 0x7fffffff;
 649         sethi   %hi(0x00800000),%o5
 650         faddd   %f22,K4,%f22            ! (2_1) dtmp0 += K4;
 651 
 652         and     %l3,_0x7fffffff,%l6     ! (0_0) ax0 = ux0 & 0x7fffffff;
 653         cmp     %l7,%o5
 654         bl,pn   %icc,.u20
 655         fmuld   %f40,%f18,%f38          ! (1_1) dtmp0 *= x20;
 656 .c20:
 657         cmp     %l6,%o5
 658         bl,pn   %icc,.u21
 659         fmuld   K9,%f4,%f40             ! (4_1) dtmp0 = K9 * x20;
 660 .c21:
 661         cmp     %l7,_0x7f800000
 662         bge,pn  %icc,.u22
 663         faddd   %f36,K6,%f20            ! (3_1) dtmp0 += K6;
 664 .c22:
 665         ldd     [%o2+%o4],%f36          ! (5_1) cadd0 = *(double*)(ltmp0 + signy0);
 666         cmp     %l6,_0x7f800000
 667         bge,pn  %icc,.u23
 668         fmuld   %f22,%f16,%f22          ! (2_1) dtmp0 *= x20;
 669 .c23:
 670         sub     %l6,%l7,%o2             ! (0_0) ldiff0 = ax0 - ay0;
 671 
 672         fmuld   %f0,%f12,%f12           ! (0_1) dtmp0 = cmul0 * x0;
 673         sra     %o2,31,%l7              ! (0_0) ldiff0 >>= 31;
 674         sub     %i3,%i1,%l6             ! (0_0) addrc0 = (char*)px - (char*)py;
 675         faddd   %f38,K1,%f38            ! (1_1) dtmp0 += K1;
 676 
 677         faddd   %f40,K8,%f40            ! (4_1) dtmp0 += K8;
 678         and     %l6,%l7,%o2             ! (0_0) addrc0 &= ldiff0;
 679         fmuld   %f20,%f24,%f20          ! (3_1) dtmp0 *= x20;
 680 
 681         lda     [%i1+%o2]0x82,%f0       ! (0_0) fy0 = *(float*)((char*)py + addrc0);
 682         sll     %g1,3,%g1               ! (1_1) cmul0_ind = ldiff0 << 3;
 683         sub     %i3,%o2,%o4             ! (0_0) (char*)px - addrc0
 684         faddd   %f22,K3,%f22            ! (2_1) dtmp0 += K3;
 685 
 686         lda     [%o4]0x82,%f2           ! (0_0) fx0 = *(float*)((char*)px - addrc0);
 687         sll     %l7,5,%l6               ! (0_0) ltmp0 = ldiff0 << 5;
 688 
 689         fmuld   %f38,%f18,%f38          ! (1_1) dtmp0 *= x20;
 690         cmp     %o5,_0x7f800000         ! (0_0) b0 ? 0x7f800000
 691         bge,pn  %icc,.update5           ! (0_0) if ( b0 > 0x7f800000 )
 692         faddd   %f34,%f12,%f18          ! (0_1) dtmp0 = cadd0 + dtmp0;
 693 .cont5:
 694         fmuld   %f40,%f4,%f34           ! (4_1) dtmp0 *= x20;
 695         sra     %l3,27,%o5              ! (0_0) signx0 = ux0 >> 27;
 696         add     %i3,stridex,%i3         ! px += stridex
 697         fstod   %f0,%f40                ! (0_0) y0 = (double)fy0;
 698 
 699         faddd   %f20,K5,%f12            ! (3_1) dtmp0 += K5;
 700         add     %i1,stridey,%i1         ! py += stridey
 701         fmuld   %f22,%f16,%f22          ! (2_1) dtmp0 *= x20;
 702 
 703         lda     [%i1]0x82,%l3           ! (1_0) uy0 = *(int*)py;
 704         sra     %l4,28,%o4              ! (0_0) signy0 = uy0 >> 28;
 705         add     %l6,cadd_arr,%l6        ! (0_0) ltmp0 += (char*)cadd_arr;
 706         fstod   %f2,%f2                 ! (0_0) x0 = (double)fx0;
 707 .d5:
 708         lda     [%i3]0x82,%l4           ! (1_0) ux0 = *(int*)px;
 709         and     %o5,-16,%o5             ! (0_0) signx0 &= -16;
 710         faddd   %f34,K7,%f34            ! (4_1) dtmp0 += K7;
 711 
 712         ldx     [%fp+tmp_pz],%o1
 713         fmuld   %f12,%f24,%f20          ! (3_1) dtmp0 *= x20;
 714         and     %o4,-8,%o4              ! (0_0) signy0 &= -8;
 715         faddd   %f38,K0,%f38            ! (1_1) dtmp0 += K0;
 716 
 717         fdivd   %f40,%f2,%f12           ! (0_0) x0 = y0 / x0;
 718         faddd   %f22,K2,%f40            ! (2_1) dtmp0 += K2;
 719 
 720         fdtos   %f18,%f2                ! (0_1) ftmp0 = (float)dtmp0;
 721         st      %f2,[%o1]               ! (0_1) *pz = ftmp0
 722         add     %o1,stridez,%o2
 723         fmuld   %f14,%f14,%f22          ! (5_1) x20 = x0 * x0;
 724 
 725         subcc   counter,1,counter
 726         bneg,a,pn       %icc,.begin
 727         or      %g0,%o2,%o4
 728 
 729         ldd     [cmul_arr+%g1],%f0      ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
 730         add     %l6,%o5,%o1             ! (0_0) ltmp0 += signx0;
 731         fmuld   %f34,%f4,%f34           ! (4_1) dtmp0 *= x20;
 732 
 733         fmuld   %f38,%f10,%f10          ! (1_1) x0 = dtmp0 * x0;
 734         and     %l4,_0x7fffffff,%l6     ! (1_0) ax0 = ux0 & 0x7fffffff;
 735         sethi   %hi(0x00800000),%o5
 736         faddd   %f20,K4,%f20            ! (3_1) dtmp0 += K4;
 737 
 738         and     %l3,_0x7fffffff,%g1     ! (1_0) ay0 = uy0 & 0x7fffffff;
 739         cmp     %l6,%o5
 740         bl,pn   %icc,.u24
 741         fmuld   %f40,%f16,%f38          ! (2_1) dtmp0 *= x20;
 742 .c24:
 743         cmp     %g1,%o5
 744         bl,pn   %icc,.u25
 745         fmuld   K9,%f22,%f40            ! (5_1) dtmp0 = K9 * x20;
 746 .c25:
 747         cmp     %l6,_0x7f800000
 748         bge,pn  %icc,.u26
 749         faddd   %f34,K6,%f18            ! (4_1) dtmp0 += K6;
 750 .c26:
 751         ldd     [%o1+%o4],%f34          ! (0_0) cadd0 = *(double*)(ltmp0 + signy0);
 752         cmp     %g1,_0x7f800000
 753         bge,pn  %icc,.u27
 754         fmuld   %f20,%f24,%f20          ! (3_1) dtmp0 *= x20;
 755 .c27:
 756         sub     %l6,%g1,%o1             ! (1_0) ldiff0 = ax0 - ay0;
 757 
 758         fmuld   %f0,%f10,%f10           ! (1_1) dtmp0 = cmul0 * x0;
 759         sra     %o1,31,%g1              ! (1_0) ldiff0 >>= 31;
 760         sub     %i3,%i1,%l6             ! (1_0) addrc0 = (char*)px - (char*)py;
 761         faddd   %f38,K1,%f38            ! (2_1) dtmp0 += K1;
 762 
 763         faddd   %f40,K8,%f40            ! (5_1) dtmp0 += K8;
 764         and     %l6,%g1,%o1             ! (1_0) addrc0 &= ldiff0;
 765         fmuld   %f18,%f4,%f18           ! (4_1) dtmp0 *= x20;
 766 
 767         lda     [%i1+%o1]0x82,%f0       ! (1_0) fy0 = *(float*)((char*)py + addrc0);
 768         sll     %g5,3,%g5               ! (2_1) cmul0_ind = ldiff0 << 3;
 769         sub     %i3,%o1,%o4             ! (1_0) (char*)px - addrc0;
 770         faddd   %f20,K3,%f20            ! (3_1) dtmp0 += K3;
 771 
 772         lda     [%o4]0x82,%f2           ! (1_0) fx0 = *(float*)((char*)px - addrc0);
 773         sll     %g1,5,%l6               ! (1_0) ltmp0 = ldiff0 << 5;
 774         add     %o2,stridez,%o1         ! pz += stridez
 775 
 776         fmuld   %f38,%f16,%f38          ! (2_1) dtmp0 *= x20;
 777         cmp     %o5,_0x7f800000         ! (1_0) b0 ? 0x7f800000
 778         bge,pn  %icc,.update6           ! (1_0) if ( b0 > 0x7f800000 )
 779         faddd   %f32,%f10,%f16          ! (1_1) dtmp0 = cadd0 + dtmp0;
 780 .cont6:
 781         fmuld   %f40,%f22,%f32          ! (5_1) dtmp0 *= x20;
 782         add     %i1,stridey,%i1         ! py += stridey
 783         fstod   %f0,%f40                ! (1_0) y0 = (double)fy0;
 784 
 785         faddd   %f18,K5,%f10            ! (4_1) dtmp0 += K5;
 786         sra     %l4,27,%o5              ! (1_0) signx0 = ux0 >> 27;
 787         add     %i3,stridex,%i3         ! px += stridex
 788         fmuld   %f20,%f24,%f20          ! (3_1) dtmp0 *= x20;
 789 
 790         sra     %l3,28,%o4              ! (1_0) signy0 = uy0 >> 28;
 791         add     %l6,cadd_arr,%l6        ! (1_0) ltmp0 += (char*)cadd_arr;
 792         fstod   %f2,%f2                 ! (1_0) x0 = (double)fx0;
 793 .d6:
 794         faddd   %f32,K7,%f32            ! (5_1) dtmp0 += K7;
 795         and     %o5,-16,%o5             ! (1_0) signx0 &= -16;
 796         and     %o4,-8,%o4              ! (1_0) signy0 &= -8;
 797 
 798         lda     [%i1]0x82,%l4           ! (2_0) uy0 = *(int*)py;
 799         fmuld   %f10,%f4,%f18           ! (4_1) dtmp0 *= x20;
 800         faddd   %f38,K0,%f38            ! (2_1) dtmp0 += K0;
 801 
 802         lda     [%i3]0x82,%l3           ! (2_0) ux0 = *(int*)px;
 803         fdivd   %f40,%f2,%f10           ! (1_0) x0 = y0 / x0;
 804         faddd   %f20,K2,%f40            ! (3_1) dtmp0 += K2;
 805 
 806         fmuld   %f12,%f12,%f20          ! (0_0) x20 = x0 * x0;
 807         fdtos   %f16,%f2                ! (1_1) ftmp0 = (float)dtmp0;
 808         st      %f2,[%o2]               ! (1_1) *pz = ftmp0;
 809 
 810         subcc   counter,1,counter
 811         bneg,a,pn       %icc,.begin
 812         or      %g0,%o1,%o4
 813 
 814         ldd     [cmul_arr+%g5],%f0      ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
 815         add     %l6,%o5,%o2             ! (1_0) ltmp0 += signx0;
 816         fmuld   %f32,%f22,%f32          ! (5_1) dtmp0 *= x20;
 817 
 818         fmuld   %f38,%f8,%f8            ! (2_1) x0 = dtmp0 * x0;
 819         and     %l3,_0x7fffffff,%l6     ! (2_0) ax0 = ux0 & 0x7fffffff;
 820         sethi   %hi(0x00800000),%o5
 821         faddd   %f18,K4,%f18            ! (4_1) dtmp0 += K4;
 822 
 823         and     %l4,_0x7fffffff,%g5     ! (2_0) ay0 = uy0 & 0x7fffffff;
 824         cmp     %l6,%o5
 825         bl,pn   %icc,.u28
 826         fmuld   %f40,%f24,%f38          ! (3_1) dtmp0 *= x20;
 827 .c28:
 828         cmp     %g5,%o5
 829         bl,pn   %icc,.u29
 830         fmuld   K9,%f20,%f40            ! (0_0) dtmp0 = K9 * x20;
 831 .c29:
 832         cmp     %l6,_0x7f800000
 833         bge,pn  %icc,.u30
 834         faddd   %f32,K6,%f16            ! (5_1) dtmp0 += K6;
 835 .c30:
 836         ldd     [%o2+%o4],%f32          ! (1_0) cadd0 = *(double*)(ltmp0 + signy0);
 837         cmp     %g5,_0x7f800000
 838         bge,pn  %icc,.u31
 839         fmuld   %f18,%f4,%f18           ! (4_1) dtmp0 *= x20;
 840 .c31:
 841         sub     %l6,%g5,%o2             ! (2_0) ldiff0 = ax0 - ay0;
 842 
 843         fmuld   %f0,%f8,%f8             ! (2_1) dtmp0 = cmul0 * x0;
 844         sra     %o2,31,%g5              ! (2_0) ldiff0 >>= 31;
 845         sub     %i3,%i1,%l6             ! (2_0) addrc0 = (char*)px - (char*)py;
 846         faddd   %f38,K1,%f38            ! (3_1) dtmp0 += K1;
 847 
 848         faddd   %f40,K8,%f40            ! (0_0) dtmp0 += K8;
 849         and     %l6,%g5,%o2             ! (2_0) addrc0 &= ldiff0;
 850         fmuld   %f16,%f22,%f16          ! (5_1) dtmp0 *= x20;
 851 
 852         lda     [%i1+%o2]0x82,%f0       ! (2_0) fy0 = *(float*)((char*)py + addrc0);
 853         sub     %i3,%o2,%o4             ! (2_0) (char*)px - addrc0;
 854         add     %o1,stridez,%o2         ! pz += stridez
 855         faddd   %f18,K3,%f18            ! (4_1) dtmp0 += K3;
 856 
 857         lda     [%o4]0x82,%f2           ! (2_0) fx0 = *(float*)((char*)px - addrc0);
 858         sll     %o0,3,%o0               ! (3_1) cmul0_ind = ldiff0 << 3;
 859 
 860         fmuld   %f38,%f24,%f38          ! (3_1) dtmp0 *= x20;
 861         cmp     %o5,_0x7f800000         ! (2_0) b0 ? 0x7f800000
 862         bge,pn  %icc,.update7           ! (2_0) if ( b0 > 0x7f800000 )
 863         faddd   %f30,%f8,%f24           ! (2_1) dtmp0 = cadd0 + dtmp0;
 864 .cont7:
 865         fmuld   %f40,%f20,%f30          ! (0_0) dtmp0 *= x20;
 866         sll     %g5,5,%l6               ! (2_0) ltmp0 = ldiff0 << 5;
 867         add     %i1,stridey,%i1         ! py += stridey
 868         fstod   %f0,%f40                ! (2_0) y0 = (double)fy0;
 869 
 870         faddd   %f16,K5,%f8             ! (5_1) dtmp0 += K5;
 871         sra     %l3,27,%o5              ! (2_0) signx0 = ux0 >> 27;
 872         add     %i3,stridex,%i3         ! px += stridex
 873         fmuld   %f18,%f4,%f18           ! (4_1) dtmp0 *= x20;
 874 
 875         fstod   %f2,%f2                 ! (2_0) x0 = (double)fx0;
 876         sra     %l4,28,%o4              ! (2_0) signy0 = uy0 >> 28;
 877         add     %l6,cadd_arr,%l6        ! (2_0) ltmp0 += (char*)cadd_arr;
 878 .d7:
 879         lda     [%i1]0x82,%l3           ! (3_0) uy0 = *(int*)py;
 880         and     %o5,-16,%o5             ! (2_0) signx0 &= -16;
 881         faddd   %f30,K7,%f30            ! (0_0) dtmp0 += K7;
 882 
 883         lda     [%i3]0x82,%l4           ! (3_0) ux0 = *(int*)px;
 884         fmuld   %f8,%f22,%f16           ! (5_1) dtmp0 *= x20;
 885         faddd   %f38,K0,%f38            ! (3_1) dtmp0 += K0;
 886 
 887         fdivd   %f40,%f2,%f8            ! (2_0) x0 = y0 / x0;
 888         faddd   %f18,K2,%f40            ! (4_1) dtmp0 += K2;
 889 
 890         fmuld   %f10,%f10,%f18          ! (1_0) x20 = x0 * x0;
 891         fdtos   %f24,%f1                ! (2_1) ftmp0 = (float)dtmp0;
 892         st      %f1,[%o1]               ! (2_1) *pz = ftmp0;
 893 
 894         subcc   counter,1,counter
 895         bneg,a,pn       %icc,.begin
 896         or      %g0,%o2,%o4
 897 
 898         ldd     [cmul_arr+%o0],%f2      ! (3_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
 899         add     %l6,%o5,%o1             ! (2_0) ltmp0 += signx0;
 900         and     %o4,-8,%o4              ! (2_0) signy0 &= -8;
 901         fmuld   %f30,%f20,%f30          ! (0_0) dtmp0 *= x20;
 902 
 903         fmuld   %f38,%f6,%f6            ! (3_1) x0 = dtmp0 * x0;
 904         and     %l4,_0x7fffffff,%l6     ! (3_0) ax0 = ux0 & 0x7fffffff;
 905         sethi   %hi(0x00800000),%o5
 906         faddd   %f16,K4,%f24            ! (5_1) dtmp0 += K4;
 907 
 908         and     %l3,_0x7fffffff,%o0     ! (3_0) ay0 = uy0 & 0x7fffffff;
 909         cmp     %l6,%o5
 910         bl,pn   %icc,.u32
 911         fmuld   %f40,%f4,%f38           ! (4_1) dtmp0 *= x20;
 912 .c32:
 913         cmp     %o0,%o5
 914         bl,pn   %icc,.u33
 915         fmuld   K9,%f18,%f40            ! (1_0) dtmp0 = K9 * x20;
 916 .c33:
 917         cmp     %l6,_0x7f800000
 918         bge,pn  %icc,.u34
 919         faddd   %f30,K6,%f16            ! (0_0) dtmp0 += K6;
 920 .c34:
 921         ldd     [%o1+%o4],%f30          ! (2_0) cadd0 = *(double*)(ltmp0 + signy0);
 922         cmp     %o0,_0x7f800000
 923         bge,pn  %icc,.u35
 924         fmuld   %f24,%f22,%f24          ! (5_1) dtmp0 *= x20;
 925 .c35:
 926         sub     %l6,%o0,%o1             ! (3_0) ldiff0 = ax0 - ay0;
 927 
 928         fmuld   %f2,%f6,%f6             ! (3_1) dtmp0 = cmul0 * x0;
 929         sra     %o1,31,%o0              ! (3_0) ldiff0 >>= 31;
 930         sub     %i3,%i1,%l6             ! (3_0) addrc0 = (char*)px - (char*)py;
 931         faddd   %f38,K1,%f38            ! (4_1) dtmp0 += K1;
 932 
 933         faddd   %f40,K8,%f40            ! (1_0) dtmp0 += K8;
 934         and     %l6,%o0,%o1             ! (3_0) addrc0 &= ldiff0;
 935         fmuld   %f16,%f20,%f16          ! (0_0) dtmp0 *= x20;
 936 
 937         lda     [%i1+%o1]0x82,%f0       ! (3_0) fy0 = *(float*)((char*)py + addrc0);
 938         sub     %i3,%o1,%o4             ! (3_0) (char*)px - addrc0;
 939         add     %o2,stridez,%o1         ! pz += stridez
 940         faddd   %f24,K3,%f24            ! (5_1) dtmp0 += K3;
 941 
 942         lda     [%o4]0x82,%f1           ! (3_0) fx0 = *(float*)((char*)px - addrc0);
 943         sll     %l5,3,%l5               ! (4_1) cmul0_ind = ldiff0 << 3;
 944 
 945         fmuld   %f38,%f4,%f38           ! (4_1) dtmp0 *= x20;
 946         cmp     %o5,_0x7f800000         ! (3_0) b0 ? 0x7f800000
 947         bge,pn  %icc,.update8           ! (3_0) if ( b0 > 0x7f800000 )
 948         faddd   %f28,%f6,%f4            ! (3_1) dtmp0 = cadd0 + dtmp0;
 949 .cont8:
 950         fmuld   %f40,%f18,%f28          ! (1_0) dtmp0 *= x20;
 951         sll     %o0,5,%l6               ! (3_0) ltmp0 = ldiff0 << 5;
 952         add     %i1,stridey,%i1         ! py += stridey
 953         fstod   %f0,%f40                ! (3_0) y0 = (double)fy0;
 954 
 955         faddd   %f16,K5,%f2             ! (0_0) dtmp0 += K5;
 956         sra     %l4,27,%o5              ! (3_0) signx0 = ux0 >> 27;
 957         add     %i3,stridex,%i3         ! px += stridex
 958         fmuld   %f24,%f22,%f24          ! (5_1) dtmp0 *= x20;
 959 
 960         sra     %l3,28,%o4              ! (3_0) signy0 = uy0 >> 28;
 961         fstod   %f1,%f16                ! (3_0) x0 = (double)fx0;
 962 .d8:
 963         faddd   %f28,K7,%f28            ! (1_0) dtmp0 += K7;
 964         add     %l6,cadd_arr,%l6        ! (3_0) ltmp0 += (char*)cadd_arr;
 965         and     %o5,-16,%o5             ! (3_0) signx0 &= -16;
 966 
 967         lda     [%i1]0x82,%l4           ! (4_0) uy0 = *(int*)py;
 968         fmuld   %f2,%f20,%f2            ! (0_0) dtmp0 *= x20;
 969         faddd   %f38,K0,%f38            ! (4_1) dtmp0 += K0;
 970 
 971         lda     [%i3]0x82,%l3           ! (4_0) ux0 = *(int*)px;
 972         fdivd   %f40,%f16,%f6           ! (3_0) x0 = y0 / x0;
 973         faddd   %f24,K2,%f24            ! (5_1) dtmp0 += K2;
 974 
 975         fdtos   %f4,%f1                 ! (3_1) ftmp0 = (float)dtmp0;
 976         and     %o4,-8,%o4              ! (3_0) signy0 &= -8;
 977         st      %f1,[%o2]               ! (3_1) *pz = ftmp0;
 978         fmuld   %f8,%f8,%f16            ! (2_0) x20 = x0 * x0;
 979 
 980         subcc   counter,1,counter
 981         bneg,a,pn       %icc,.begin
 982         or      %g0,%o1,%o4
 983 
 984         ldd     [cmul_arr+%l5],%f0      ! (4_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
 985         add     %l6,%o5,%o2             ! (3_0) ltmp0 += signx0;
 986         fmuld   %f28,%f18,%f28          ! (1_0) dtmp0 *= x20;
 987 
 988         fmuld   %f38,%f62,%f62          ! (4_1) x0 = dtmp0 * x0;
 989         and     %l3,_0x7fffffff,%l6     ! (4_0) ax0 = ux0 & 0x7fffffff;
 990         sethi   %hi(0x00800000),%o5
 991         faddd   %f2,K4,%f2              ! (0_0) dtmp0 += K4;
 992 
 993         and     %l4,_0x7fffffff,%l5     ! (4_0) ay0 = uy0 & 0x7fffffff;
 994         cmp     %l6,%o5
 995         bl,pn   %icc,.u36
 996         fmuld   %f24,%f22,%f38          ! (5_1) dtmp0 *= x20;
 997 .c36:
 998         cmp     %l5,%o5
 999         bl,pn   %icc,.u37
1000         fmuld   K9,%f16,%f40            ! (2_0) dtmp0 = K9 * x20;
1001 .c37:
1002         cmp     %l6,_0x7f800000
1003         bge,pn  %icc,.u38
1004         faddd   %f28,K6,%f4             ! (1_0) dtmp0 += K6;
1005 .c38:
1006         ldd     [%o2+%o4],%f28          ! (3_0) cadd0 = *(double*)(ltmp0 + signy0);
1007         cmp     %l5,_0x7f800000
1008         bge,pn  %icc,.u39
1009         fmuld   %f2,%f20,%f24           ! (0_0) dtmp0 *= x20;
1010 .c39:
1011         sub     %l6,%l5,%o2             ! (4_0) ldiff0 = ax0 - ay0;
1012 
1013         fmuld   %f0,%f62,%f62           ! (4_1) dtmp0 = cmul0 * x0;
1014         sra     %o2,31,%l5              ! (4_0) ldiff0 >>= 31;
1015         sub     %i3,%i1,%l6             ! (4_0) addrc0 = (char*)px - (char*)py;
1016         faddd   %f38,K1,%f38            ! (5_1) dtmp0 += K1;
1017 
1018         faddd   %f40,K8,%f40            ! (2_0) dtmp0 += K8;
1019         and     %l6,%l5,%o2             ! (4_0) addrc0 &= ldiff0;
1020         fmuld   %f4,%f18,%f4            ! (1_0) dtmp0 *= x20;
1021 
1022         lda     [%i1+%o2]0x82,%f0       ! (4_0) fy0 = *(float*)((char*)py + addrc0);
1023         sub     %i3,%o2,%o4             ! (4_0) (char*)px - addrc0;
1024         add     %o1,stridez,%o2         ! pz += stridez
1025         faddd   %f24,K3,%f24            ! (0_0) dtmp0 += K3;
1026 
1027         lda     [%o4]0x82,%f2           ! (4_0) fx0 = *(float*)((char*)px - addrc0);
1028         sll     %o7,3,%o7               ! (5_1) cmul0_ind = ldiff0 << 3;
1029 
1030         fmuld   %f38,%f22,%f38          ! (5_1) dtmp0 *= x20;
1031         cmp     %o5,_0x7f800000         ! (4_0) b0 ? 0x7f800000
1032         bge,pn  %icc,.update9           ! (4_0) if ( b0 > 0x7f800000 )
1033         faddd   %f26,%f62,%f22          ! (4_1) dtmp0 = cadd0 + dtmp0;
1034 .cont9:
1035         fmuld   %f40,%f16,%f26          ! (2_0) dtmp0 *= x20;
1036         sll     %l5,5,%l6               ! (4_0) ltmp0 = ldiff0 << 5;
1037         add     %i1,stridey,%i1         ! py += stridey
1038         fstod   %f0,%f40                ! (4_0) y0 = (double)fy0;
1039 
1040         faddd   %f4,K5,%f62             ! (1_0) dtmp0 += K5;
1041         sra     %l3,27,%o5              ! (4_0) signx0 = ux0 >> 27;
1042         add     %i3,stridex,%i3         ! px += stridex
1043         fmuld   %f24,%f20,%f24          ! (0_0) dtmp0 *= x20;
1044 
1045         fstod   %f2,%f2                 ! (4_0) x0 = (double)fx0;
1046         sra     %l4,28,%o4              ! (4_0) signy0 = uy0 >> 28;
1047 .d9:
1048         lda     [%i1]0x82,%l3           ! (5_0) uy0 = *(int*)py;
1049         add     %l6,cadd_arr,%l6        ! (4_0) ltmp0 += (char*)cadd_arr;
1050         faddd   %f26,K7,%f26            ! (2_0) dtmp0 += K7;
1051 
1052         fmuld   %f62,%f18,%f4           ! (1_0) dtmp0 *= x20;
1053         and     %o5,-16,%o5             ! (4_0) signx0 &= -16;
1054         faddd   %f38,K0,%f38            ! (5_1) dtmp0 += K0;
1055 
1056         subcc   counter,5,counter
1057         bneg,pn %icc,.tail
1058         nop
1059 
1060         ba      .main_loop
1061         nop
1062 
1063         .align  16
1064 .main_loop:
1065         lda     [%i3]0x82,%l4           ! (5_1) ux0 = *(int*)px;
1066         nop
1067         fdivd   %f40,%f2,%f62           ! (4_1) x0 = y0 / x0;
1068         faddd   %f24,K2,%f40            ! (0_1) dtmp0 += K2;
1069 
1070         fdtos   %f22,%f22               ! (4_2) ftmp0 = (float)dtmp0;
1071         and     %o4,-8,%o4              ! (4_1) signy0 &= -8;
1072         st      %f22,[%o1]              ! (4_2) *pz = ftmp0;
1073         fmuld   %f6,%f6,%f24            ! (3_1) x20 = x0 * x0;
1074 
1075         ldd     [cmul_arr+%o7],%f0      ! (5_2) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1076         add     %l6,%o5,%o1             ! (4_1) ltmp0 += signx0;
1077         fmuld   %f26,%f16,%f26          ! (2_1) dtmp0 *= x20;
1078 
1079         fmuld   %f38,%f14,%f14          ! (5_2) x0 = dtmp0 * x0;
1080         and     %l4,_0x7fffffff,%l6     ! (5_1) ax0 = ux0 & 0x7fffffff;
1081         sethi   %hi(0x00800000),%o5
1082         faddd   %f4,K4,%f4              ! (1_1) dtmp0 += K4;
1083 
1084         and     %l3,_0x7fffffff,%o7     ! (5_1) ay0 = uy0 & 0x7fffffff;
1085         fmuld   %f40,%f20,%f38          ! (0_1) dtmp0 *= x20;
1086 
1087         cmp     %l6,%o5
1088         bl,pn   %icc,.up0
1089         fmuld   K9,%f24,%f40            ! (3_1) dtmp0 = K9 * x20;
1090 .co0:
1091         nop
1092         cmp     %o7,%o5
1093         bl,pn   %icc,.up1
1094         faddd   %f26,K6,%f22            ! (2_1) dtmp0 += K6;
1095 .co1:
1096         ldd     [%o1+%o4],%f26          ! (4_1) cadd0 = *(double*)(ltmp0 + signy0);
1097         cmp     %l6,_0x7f800000
1098         bge,pn  %icc,.up2
1099         fmuld   %f4,%f18,%f4            ! (1_1) dtmp0 *= x20;
1100 .co2:
1101         sub     %l6,%o7,%o1             ! (5_1) ldiff0 = ax0 - ay0;
1102         cmp     %o7,_0x7f800000
1103         bge,pn  %icc,.up3
1104 
1105         fmuld   %f0,%f14,%f14           ! (5_2) dtmp0 = cmul0 * x0;
1106 .co3:
1107         sra     %o1,31,%o7              ! (5_1) ldiff0 >>= 31;
1108         sub     %i3,%i1,%l6             ! (5_1) addrc0 = (char*)px - (char*)py;
1109         faddd   %f38,K1,%f38            ! (0_1) dtmp0 += K1;
1110 
1111         faddd   %f40,K8,%f40            ! (3_1) dtmp0 += K8;
1112         and     %l6,%o7,%o1             ! (5_1) addrc0 &= ldiff0;
1113         fmuld   %f22,%f16,%f22          ! (2_1) dtmp0 *= x20;
1114 
1115         lda     [%i1+%o1]0x82,%f0       ! (5_1) fy0 = *(float*)((char*)py + addrc0);
1116         sll     %o7,5,%l6               ! (5_1) ltmp0 = ldiff0 << 5;
1117         sub     %i3,%o1,%o4             ! (5_1) (char*)px - addrc0;
1118         faddd   %f4,K3,%f4              ! (1_1) dtmp0 += K3;
1119 
1120         lda     [%o4]0x82,%f2           ! (5_1) fx0 = *(float*)((char*)px - addrc0);
1121 
1122         fmuld   %f38,%f20,%f38          ! (0_1) dtmp0 *= x20;
1123         cmp     %o5,_0x7f800000         ! (5_1) b0 ? 0x7f800000
1124         bge,pn  %icc,.update10          ! (5_1) if ( b0 > 0x7f800000 )
1125         faddd   %f36,%f14,%f20          ! (5_2) dtmp0 = cadd0 + dtmp0;
1126 .cont10:
1127         fmuld   %f40,%f24,%f36          ! (3_1) dtmp0 *= x20;
1128         nop
1129         fstod   %f0,%f40                ! (5_1) y0 = (double)fy0;
1130 
1131         faddd   %f22,K5,%f14            ! (2_1) dtmp0 += K5;
1132         add     %o2,stridez,%o1         ! pz += stridez
1133         fmuld   %f4,%f18,%f4            ! (1_1) dtmp0 *= x20;
1134 
1135         sll     %l7,3,%l7               ! (0_1) cmul0_ind = ldiff0 << 3;
1136         add     %i3,stridex,%i3         ! px += stridex
1137         fstod   %f2,%f2                 ! (5_1) x0 = (double)fx0;
1138 .den0:
1139         sra     %l3,28,%o4              ! (5_1) signy0 = uy0 >> 28;
1140         add     %i1,stridey,%i1         ! py += stridey
1141 
1142         faddd   %f36,K7,%f36            ! (3_1) dtmp0 += K7;
1143         sra     %l4,27,%o5              ! (5_1) signx0 = ux0 >> 27;
1144 
1145         lda     [%i1]0x82,%l4           ! (0_0) uy0 = *(int*)py;
1146         add     %l6,cadd_arr,%l6        ! (5_1) ltmp0 += (char*)cadd_arr;
1147         fmuld   %f14,%f16,%f22          ! (2_1) dtmp0 *= x20;
1148         faddd   %f38,K0,%f38            ! (0_1) dtmp0 += K0;
1149 
1150         lda     [%i3]0x82,%l3           ! (0_0) ux0 = *(int*)px;
1151         and     %o5,-16,%o5             ! (5_1) signx0 &= -16;
1152         fdivd   %f40,%f2,%f14           ! (5_1) x0 = y0 / x0;
1153         faddd   %f4,K2,%f40             ! (1_1) dtmp0 += K2;
1154 
1155         fdtos   %f20,%f2                ! (5_2) ftmp0 = (float)dtmp0;
1156         st      %f2,[%o2]               ! (5_2) *pz = ftmp0;
1157         fmuld   %f62,%f62,%f4           ! (4_1) x20 = x0 * x0;
1158 
1159         ldd     [cmul_arr+%l7],%f0      ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1160         add     %l6,%o5,%o2             ! (5_1) ltmp0 += signx0;
1161         and     %o4,-8,%o4              ! (5_1) signy0 &= -8;
1162         fmuld   %f36,%f24,%f36          ! (3_1) dtmp0 *= x20;
1163 
1164         fmuld   %f38,%f12,%f12          ! (0_1) x0 = dtmp0 * x0;
1165         and     %l4,_0x7fffffff,%l7     ! (0_0) ay0 = uy0 & 0x7fffffff;
1166         sethi   %hi(0x00800000),%o5
1167         faddd   %f22,K4,%f22            ! (2_1) dtmp0 += K4;
1168 
1169         and     %l3,_0x7fffffff,%l6     ! (0_0) ax0 = ux0 & 0x7fffffff;
1170         fmuld   %f40,%f18,%f38          ! (1_1) dtmp0 *= x20;
1171 
1172         cmp     %l7,%o5
1173         bl,pn   %icc,.up4
1174         fmuld   K9,%f4,%f40             ! (4_1) dtmp0 = K9 * x20;
1175 .co4:
1176         nop
1177         cmp     %l6,%o5
1178         bl,pn   %icc,.up5
1179         faddd   %f36,K6,%f20            ! (3_1) dtmp0 += K6;
1180 .co5:
1181         ldd     [%o2+%o4],%f36          ! (5_1) cadd0 = *(double*)(ltmp0 + signy0);
1182         cmp     %l7,_0x7f800000
1183         bge,pn  %icc,.up6
1184         fmuld   %f22,%f16,%f22          ! (2_1) dtmp0 *= x20;
1185 .co6:
1186         sub     %l6,%l7,%o2             ! (0_0) ldiff0 = ax0 - ay0;
1187         cmp     %l6,_0x7f800000
1188         bge,pn  %icc,.up7
1189 
1190         fmuld   %f0,%f12,%f12           ! (0_1) dtmp0 = cmul0 * x0;
1191 .co7:
1192         sra     %o2,31,%l7              ! (0_0) ldiff0 >>= 31;
1193         sub     %i3,%i1,%l6             ! (0_0) addrc0 = (char*)px - (char*)py;
1194         faddd   %f38,K1,%f38            ! (1_1) dtmp0 += K1;
1195 
1196         faddd   %f40,K8,%f40            ! (4_1) dtmp0 += K8;
1197         and     %l6,%l7,%o2             ! (0_0) addrc0 &= ldiff0;
1198         fmuld   %f20,%f24,%f20          ! (3_1) dtmp0 *= x20;
1199 
1200         lda     [%i1+%o2]0x82,%f0       ! (0_0) fy0 = *(float*)((char*)py + addrc0);
1201         sll     %g1,3,%g1               ! (1_1) cmul0_ind = ldiff0 << 3;
1202         sub     %i3,%o2,%o4             ! (0_0) (char*)px - addrc0
1203         faddd   %f22,K3,%f22            ! (2_1) dtmp0 += K3;
1204 
1205         lda     [%o4]0x82,%f2           ! (0_0) fx0 = *(float*)((char*)px - addrc0);
1206         sll     %l7,5,%l6               ! (0_0) ltmp0 = ldiff0 << 5;
1207         add     %o1,stridez,%o2         ! pz += stridez
1208 
1209         fmuld   %f38,%f18,%f38          ! (1_1) dtmp0 *= x20;
1210         cmp     %o5,_0x7f800000         ! (0_0) b0 ? 0x7f800000
1211         bge,pn  %icc,.update11          ! (0_0) if ( b0 > 0x7f800000 )
1212         faddd   %f34,%f12,%f18          ! (0_1) dtmp0 = cadd0 + dtmp0;
1213 .cont11:
1214         fmuld   %f40,%f4,%f34           ! (4_1) dtmp0 *= x20;
1215         sra     %l3,27,%o5              ! (0_0) signx0 = ux0 >> 27;
1216         add     %i3,stridex,%i3         ! px += stridex
1217         fstod   %f0,%f40                ! (0_0) y0 = (double)fy0;
1218 
1219         faddd   %f20,K5,%f12            ! (3_1) dtmp0 += K5;
1220         add     %i1,stridey,%i1         ! py += stridey
1221         fmuld   %f22,%f16,%f22          ! (2_1) dtmp0 *= x20;
1222 
1223         lda     [%i1]0x82,%l3           ! (1_0) uy0 = *(int*)py;
1224         sra     %l4,28,%o4              ! (0_0) signy0 = uy0 >> 28;
1225         add     %l6,cadd_arr,%l6        ! (0_0) ltmp0 += (char*)cadd_arr;
1226         fstod   %f2,%f2                 ! (0_0) x0 = (double)fx0;
1227 .den1:
1228         lda     [%i3]0x82,%l4           ! (1_0) ux0 = *(int*)px;
1229         and     %o5,-16,%o5             ! (0_0) signx0 &= -16;
1230         faddd   %f34,K7,%f34            ! (4_1) dtmp0 += K7;
1231 
1232         fmuld   %f12,%f24,%f20          ! (3_1) dtmp0 *= x20;
1233         and     %o4,-8,%o4              ! (0_0) signy0 &= -8;
1234         faddd   %f38,K0,%f38            ! (1_1) dtmp0 += K0;
1235 
1236         fdivd   %f40,%f2,%f12           ! (0_0) x0 = y0 / x0;
1237         faddd   %f22,K2,%f40            ! (2_1) dtmp0 += K2;
1238 
1239         fdtos   %f18,%f2                ! (0_1) ftmp0 = (float)dtmp0;
1240         nop
1241         st      %f2,[%o1]               ! (0_1) *pz = ftmp0
1242         fmuld   %f14,%f14,%f22          ! (5_1) x20 = x0 * x0;
1243 
1244         ldd     [cmul_arr+%g1],%f0      ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1245         add     %l6,%o5,%o1             ! (0_0) ltmp0 += signx0;
1246         fmuld   %f34,%f4,%f34           ! (4_1) dtmp0 *= x20;
1247 
1248         fmuld   %f38,%f10,%f10          ! (1_1) x0 = dtmp0 * x0;
1249         and     %l4,_0x7fffffff,%l6     ! (1_0) ax0 = ux0 & 0x7fffffff;
1250         sethi   %hi(0x00800000),%o5
1251         faddd   %f20,K4,%f20            ! (3_1) dtmp0 += K4;
1252 
1253         and     %l3,_0x7fffffff,%g1     ! (1_0) ay0 = uy0 & 0x7fffffff;
1254         fmuld   %f40,%f16,%f38          ! (2_1) dtmp0 *= x20;
1255 
1256         cmp     %l6,%o5
1257         bl,pn   %icc,.up8
1258         fmuld   K9,%f22,%f40            ! (5_1) dtmp0 = K9 * x20;
1259 .co8:
1260         nop
1261         cmp     %g1,%o5
1262         bl,pn   %icc,.up9
1263         faddd   %f34,K6,%f18            ! (4_1) dtmp0 += K6;
1264 .co9:
1265         ldd     [%o1+%o4],%f34          ! (0_0) cadd0 = *(double*)(ltmp0 + signy0);
1266         cmp     %l6,_0x7f800000
1267         bge,pn  %icc,.up10
1268         fmuld   %f20,%f24,%f20          ! (3_1) dtmp0 *= x20;
1269 .co10:
1270         sub     %l6,%g1,%o1             ! (1_0) ldiff0 = ax0 - ay0;
1271         cmp     %g1,_0x7f800000
1272         bge,pn  %icc,.up11
1273 
1274         fmuld   %f0,%f10,%f10           ! (1_1) dtmp0 = cmul0 * x0;
1275 .co11:
1276         sra     %o1,31,%g1              ! (1_0) ldiff0 >>= 31;
1277         sub     %i3,%i1,%l6             ! (1_0) addrc0 = (char*)px - (char*)py;
1278         faddd   %f38,K1,%f38            ! (2_1) dtmp0 += K1;
1279 
1280         faddd   %f40,K8,%f40            ! (5_1) dtmp0 += K8;
1281         and     %l6,%g1,%o1             ! (1_0) addrc0 &= ldiff0;
1282         fmuld   %f18,%f4,%f18           ! (4_1) dtmp0 *= x20;
1283 
1284         lda     [%i1+%o1]0x82,%f0       ! (1_0) fy0 = *(float*)((char*)py + addrc0);
1285         sll     %g5,3,%g5               ! (2_1) cmul0_ind = ldiff0 << 3;
1286         sub     %i3,%o1,%o4             ! (1_0) (char*)px - addrc0;
1287         faddd   %f20,K3,%f20            ! (3_1) dtmp0 += K3;
1288 
1289         lda     [%o4]0x82,%f2           ! (1_0) fx0 = *(float*)((char*)px - addrc0);
1290         sll     %g1,5,%l6               ! (1_0) ltmp0 = ldiff0 << 5;
1291         add     %o2,stridez,%o1         ! pz += stridez
1292 
1293         fmuld   %f38,%f16,%f38          ! (2_1) dtmp0 *= x20;
1294         cmp     %o5,_0x7f800000         ! (1_0) b0 ? 0x7f800000
1295         bge,pn  %icc,.update12          ! (1_0) if ( b0 > 0x7f800000 )
1296         faddd   %f32,%f10,%f16          ! (1_1) dtmp0 = cadd0 + dtmp0;
1297 .cont12:
1298         fmuld   %f40,%f22,%f32          ! (5_1) dtmp0 *= x20;
1299         add     %i1,stridey,%i1         ! py += stridey
1300         nop
1301         fstod   %f0,%f40                ! (1_0) y0 = (double)fy0;
1302 
1303         faddd   %f18,K5,%f10            ! (4_1) dtmp0 += K5;
1304         sra     %l4,27,%o5              ! (1_0) signx0 = ux0 >> 27;
1305         add     %i3,stridex,%i3         ! px += stridex
1306         fmuld   %f20,%f24,%f20          ! (3_1) dtmp0 *= x20;
1307 
1308         sra     %l3,28,%o4              ! (1_0) signy0 = uy0 >> 28;
1309         add     %l6,cadd_arr,%l6        ! (1_0) ltmp0 += (char*)cadd_arr;
1310         fstod   %f2,%f2                 ! (1_0) x0 = (double)fx0;
1311 .den2:
1312         faddd   %f32,K7,%f32            ! (5_1) dtmp0 += K7;
1313         and     %o5,-16,%o5             ! (1_0) signx0 &= -16;
1314         and     %o4,-8,%o4              ! (1_0) signy0 &= -8;
1315 
1316         lda     [%i1]0x82,%l4           ! (2_0) uy0 = *(int*)py;
1317         fmuld   %f10,%f4,%f18           ! (4_1) dtmp0 *= x20;
1318         faddd   %f38,K0,%f38            ! (2_1) dtmp0 += K0;
1319 
1320         lda     [%i3]0x82,%l3           ! (2_0) ux0 = *(int*)px;
1321         fdivd   %f40,%f2,%f10           ! (1_0) x0 = y0 / x0;
1322         faddd   %f20,K2,%f40            ! (3_1) dtmp0 += K2;
1323 
1324         fdtos   %f16,%f2                ! (1_1) ftmp0 = (float)dtmp0;
1325         nop
1326         st      %f2,[%o2]               ! (1_1) *pz = ftmp0;
1327         fmuld   %f12,%f12,%f20          ! (0_0) x20 = x0 * x0;
1328 
1329         ldd     [cmul_arr+%g5],%f0      ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1330         add     %l6,%o5,%o2             ! (1_0) ltmp0 += signx0;
1331         fmuld   %f32,%f22,%f32          ! (5_1) dtmp0 *= x20;
1332 
1333         fmuld   %f38,%f8,%f8            ! (2_1) x0 = dtmp0 * x0;
1334         and     %l3,_0x7fffffff,%l6     ! (2_0) ax0 = ux0 & 0x7fffffff;
1335         sethi   %hi(0x00800000),%o5
1336         faddd   %f18,K4,%f18            ! (4_1) dtmp0 += K4;
1337 
1338         and     %l4,_0x7fffffff,%g5     ! (2_0) ay0 = uy0 & 0x7fffffff;
1339         fmuld   %f40,%f24,%f38          ! (3_1) dtmp0 *= x20;
1340 
1341         cmp     %l6,%o5
1342         bl,pn   %icc,.up12
1343         fmuld   K9,%f20,%f40            ! (0_0) dtmp0 = K9 * x20;
1344 .co12:
1345         nop
1346         cmp     %g5,%o5
1347         bl,pn   %icc,.up13
1348         faddd   %f32,K6,%f16            ! (5_1) dtmp0 += K6;
1349 .co13:
1350         ldd     [%o2+%o4],%f32          ! (1_0) cadd0 = *(double*)(ltmp0 + signy0);
1351         cmp     %l6,_0x7f800000
1352         bge,pn  %icc,.up14
1353         fmuld   %f18,%f4,%f18           ! (4_1) dtmp0 *= x20;
1354 .co14:
1355         sub     %l6,%g5,%o2             ! (2_0) ldiff0 = ax0 - ay0;
1356         cmp     %g5,_0x7f800000
1357         bge,pn  %icc,.up15
1358 
1359         fmuld   %f0,%f8,%f8             ! (2_1) dtmp0 = cmul0 * x0;
1360 .co15:
1361         sra     %o2,31,%g5              ! (2_0) ldiff0 >>= 31;
1362         sub     %i3,%i1,%l6             ! (2_0) addrc0 = (char*)px - (char*)py;
1363         faddd   %f38,K1,%f38            ! (3_1) dtmp0 += K1;
1364 
1365         faddd   %f40,K8,%f40            ! (0_0) dtmp0 += K8;
1366         and     %l6,%g5,%o2             ! (2_0) addrc0 &= ldiff0;
1367         fmuld   %f16,%f22,%f16          ! (5_1) dtmp0 *= x20;
1368 
1369         lda     [%i1+%o2]0x82,%f0       ! (2_0) fy0 = *(float*)((char*)py + addrc0);
1370         sub     %i3,%o2,%o4             ! (2_0) (char*)px - addrc0;
1371         add     %o1,stridez,%o2         ! pz += stridez
1372         faddd   %f18,K3,%f18            ! (4_1) dtmp0 += K3;
1373 
1374         lda     [%o4]0x82,%f2           ! (2_0) fx0 = *(float*)((char*)px - addrc0);
1375         sll     %o0,3,%o0               ! (3_1) cmul0_ind = ldiff0 << 3;
1376         add     %i3,stridex,%i3         ! px += stridex
1377 
1378         fmuld   %f38,%f24,%f38          ! (3_1) dtmp0 *= x20;
1379         cmp     %o5,_0x7f800000         ! (2_0) b0 ? 0x7f800000
1380         bge,pn  %icc,.update13          ! (2_0) if ( b0 > 0x7f800000 )
1381         faddd   %f30,%f8,%f24           ! (2_1) dtmp0 = cadd0 + dtmp0;
1382 .cont13:
1383         fmuld   %f40,%f20,%f30          ! (0_0) dtmp0 *= x20;
1384         sll     %g5,5,%l6               ! (2_0) ltmp0 = ldiff0 << 5;
1385         add     %i1,stridey,%i1         ! py += stridey
1386         fstod   %f0,%f40                ! (2_0) y0 = (double)fy0;
1387 
1388         faddd   %f16,K5,%f8             ! (5_1) dtmp0 += K5;
1389         sra     %l3,27,%o5              ! (2_0) signx0 = ux0 >> 27;
1390         fmuld   %f18,%f4,%f18           ! (4_1) dtmp0 *= x20;
1391 
1392         fstod   %f2,%f2                 ! (2_0) x0 = (double)fx0;
1393         sra     %l4,28,%o4              ! (2_0) signy0 = uy0 >> 28;
1394         add     %l6,cadd_arr,%l6        ! (2_0) ltmp0 += (char*)cadd_arr;
1395 .den3:
1396         lda     [%i1]0x82,%l3           ! (3_0) uy0 = *(int*)py;
1397         and     %o5,-16,%o5             ! (2_0) signx0 &= -16;
1398         faddd   %f30,K7,%f30            ! (0_0) dtmp0 += K7;
1399 
1400         lda     [%i3]0x82,%l4           ! (3_0) ux0 = *(int*)px;
1401         fmuld   %f8,%f22,%f16           ! (5_1) dtmp0 *= x20;
1402         faddd   %f38,K0,%f38            ! (3_1) dtmp0 += K0;
1403 
1404         fdivd   %f40,%f2,%f8            ! (2_0) x0 = y0 / x0;
1405         faddd   %f18,K2,%f40            ! (4_1) dtmp0 += K2;
1406 
1407         fdtos   %f24,%f1                ! (2_1) ftmp0 = (float)dtmp0;
1408         st      %f1,[%o1]               ! (2_1) *pz = ftmp0;
1409         fmuld   %f10,%f10,%f18          ! (1_0) x20 = x0 * x0;
1410 
1411         ldd     [cmul_arr+%o0],%f2      ! (3_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1412         add     %l6,%o5,%o1             ! (2_0) ltmp0 += signx0;
1413         and     %o4,-8,%o4              ! (2_0) signy0 &= -8;
1414         fmuld   %f30,%f20,%f30          ! (0_0) dtmp0 *= x20;
1415 
1416         fmuld   %f38,%f6,%f6            ! (3_1) x0 = dtmp0 * x0;
1417         and     %l4,_0x7fffffff,%l6     ! (3_0) ax0 = ux0 & 0x7fffffff;
1418         sethi   %hi(0x00800000),%o5
1419         faddd   %f16,K4,%f24            ! (5_1) dtmp0 += K4;
1420 
1421         and     %l3,_0x7fffffff,%o0     ! (3_0) ay0 = uy0 & 0x7fffffff;
1422         fmuld   %f40,%f4,%f38           ! (4_1) dtmp0 *= x20;
1423 
1424         cmp     %l6,%o5
1425         bl,pn   %icc,.up16
1426         fmuld   K9,%f18,%f40            ! (1_0) dtmp0 = K9 * x20;
1427 .co16:
1428         nop
1429         cmp     %o0,%o5
1430         bl,pn   %icc,.up17
1431         faddd   %f30,K6,%f16            ! (0_0) dtmp0 += K6;
1432 .co17:
1433         ldd     [%o1+%o4],%f30          ! (2_0) cadd0 = *(double*)(ltmp0 + signy0);
1434         cmp     %l6,_0x7f800000
1435         bge,pn  %icc,.up18
1436         fmuld   %f24,%f22,%f24          ! (5_1) dtmp0 *= x20;
1437 .co18:
1438         sub     %l6,%o0,%o1             ! (3_0) ldiff0 = ax0 - ay0;
1439         cmp     %o0,_0x7f800000
1440         bge,pn  %icc,.up19
1441 
1442         fmuld   %f2,%f6,%f6             ! (3_1) dtmp0 = cmul0 * x0;
1443 .co19:
1444         sra     %o1,31,%o0              ! (3_0) ldiff0 >>= 31;
1445         sub     %i3,%i1,%l6             ! (3_0) addrc0 = (char*)px - (char*)py;
1446         faddd   %f38,K1,%f38            ! (4_1) dtmp0 += K1;
1447 
1448         faddd   %f40,K8,%f40            ! (1_0) dtmp0 += K8;
1449         and     %l6,%o0,%o1             ! (3_0) addrc0 &= ldiff0;
1450         fmuld   %f16,%f20,%f16          ! (0_0) dtmp0 *= x20;
1451 
1452         lda     [%i1+%o1]0x82,%f0       ! (3_0) fy0 = *(float*)((char*)py + addrc0);
1453         sub     %i3,%o1,%o4             ! (3_0) (char*)px - addrc0;
1454         add     %o2,stridez,%o1         ! pz += stridez
1455         faddd   %f24,K3,%f24            ! (5_1) dtmp0 += K3;
1456 
1457         lda     [%o4]0x82,%f1           ! (3_0) fx0 = *(float*)((char*)px - addrc0);
1458         sll     %l5,3,%l5               ! (4_1) cmul0_ind = ldiff0 << 3;
1459         add     %i3,stridex,%i3         ! px += stridex
1460 
1461         fmuld   %f38,%f4,%f38           ! (4_1) dtmp0 *= x20;
1462         cmp     %o5,_0x7f800000         ! (3_0) b0 ? 0x7f800000
1463         bge,pn  %icc,.update14          ! (3_0) if ( b0 > 0x7f800000 )
1464         faddd   %f28,%f6,%f4            ! (3_1) dtmp0 = cadd0 + dtmp0;
1465 .cont14:
1466         fmuld   %f40,%f18,%f28          ! (1_0) dtmp0 *= x20;
1467         sll     %o0,5,%l6               ! (3_0) ltmp0 = ldiff0 << 5;
1468         add     %i1,stridey,%i1         ! py += stridey
1469         fstod   %f0,%f40                ! (3_0) y0 = (double)fy0;
1470 
1471         faddd   %f16,K5,%f2             ! (0_0) dtmp0 += K5;
1472         sra     %l4,27,%o5              ! (3_0) signx0 = ux0 >> 27;
1473         fmuld   %f24,%f22,%f24          ! (5_1) dtmp0 *= x20;
1474 
1475         sra     %l3,28,%o4              ! (3_0) signy0 = uy0 >> 28;
1476         fstod   %f1,%f16                ! (3_0) x0 = (double)fx0;
1477 .den4:
1478         faddd   %f28,K7,%f28            ! (1_0) dtmp0 += K7;
1479         add     %l6,cadd_arr,%l6        ! (3_0) ltmp0 += (char*)cadd_arr;
1480         and     %o5,-16,%o5             ! (3_0) signx0 &= -16;
1481 
1482         lda     [%i1]0x82,%l4           ! (4_0) uy0 = *(int*)py;
1483         fmuld   %f2,%f20,%f2            ! (0_0) dtmp0 *= x20;
1484         faddd   %f38,K0,%f38            ! (4_1) dtmp0 += K0;
1485 
1486         lda     [%i3]0x82,%l3           ! (4_0) ux0 = *(int*)px;
1487         fdivd   %f40,%f16,%f6           ! (3_0) x0 = y0 / x0;
1488         faddd   %f24,K2,%f24            ! (5_1) dtmp0 += K2;
1489 
1490         fdtos   %f4,%f1                 ! (3_1) ftmp0 = (float)dtmp0;
1491         and     %o4,-8,%o4              ! (3_0) signy0 &= -8;
1492         st      %f1,[%o2]               ! (3_1) *pz = ftmp0;
1493         fmuld   %f8,%f8,%f16            ! (2_0) x20 = x0 * x0;
1494 
1495         ldd     [cmul_arr+%l5],%f0      ! (4_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1496         add     %l6,%o5,%o2             ! (3_0) ltmp0 += signx0;
1497         fmuld   %f28,%f18,%f28          ! (1_0) dtmp0 *= x20;
1498 
1499         fmuld   %f38,%f62,%f62          ! (4_1) x0 = dtmp0 * x0;
1500         and     %l3,_0x7fffffff,%l6     ! (4_0) ax0 = ux0 & 0x7fffffff;
1501         sethi   %hi(0x00800000),%o5
1502         faddd   %f2,K4,%f2              ! (0_0) dtmp0 += K4;
1503 
1504         and     %l4,_0x7fffffff,%l5     ! (4_0) ay0 = uy0 & 0x7fffffff;
1505         fmuld   %f24,%f22,%f38          ! (5_1) dtmp0 *= x20;
1506 
1507         cmp     %l6,%o5
1508         bl,pn   %icc,.up20
1509         fmuld   K9,%f16,%f40            ! (2_0) dtmp0 = K9 * x20;
1510 .co20:
1511         nop
1512         cmp     %l5,%o5
1513         bl,pn   %icc,.up21
1514         faddd   %f28,K6,%f4             ! (1_0) dtmp0 += K6;
1515 .co21:
1516         ldd     [%o2+%o4],%f28          ! (3_0) cadd0 = *(double*)(ltmp0 + signy0);
1517         cmp     %l6,_0x7f800000
1518         bge,pn  %icc,.up22
1519         fmuld   %f2,%f20,%f24           ! (0_0) dtmp0 *= x20;
1520 .co22:
1521         sub     %l6,%l5,%o2             ! (4_0) ldiff0 = ax0 - ay0;
1522         cmp     %l5,_0x7f800000
1523         bge,pn  %icc,.up23
1524 
1525         fmuld   %f0,%f62,%f62           ! (4_1) dtmp0 = cmul0 * x0;
1526 .co23:
1527         sra     %o2,31,%l5              ! (4_0) ldiff0 >>= 31;
1528         sub     %i3,%i1,%l6             ! (4_0) addrc0 = (char*)px - (char*)py;
1529         faddd   %f38,K1,%f38            ! (5_1) dtmp0 += K1;
1530 
1531         faddd   %f40,K8,%f40            ! (2_0) dtmp0 += K8;
1532         and     %l6,%l5,%o2             ! (4_0) addrc0 &= ldiff0;
1533         fmuld   %f4,%f18,%f4            ! (1_0) dtmp0 *= x20;
1534 
1535         lda     [%i1+%o2]0x82,%f0       ! (4_0) fy0 = *(float*)((char*)py + addrc0);
1536         sub     %i3,%o2,%o4             ! (4_0) (char*)px - addrc0;
1537         add     %o1,stridez,%o2         ! pz += stridez
1538         faddd   %f24,K3,%f24            ! (0_0) dtmp0 += K3;
1539 
1540         lda     [%o4]0x82,%f2           ! (4_0) fx0 = *(float*)((char*)px - addrc0);
1541         sll     %o7,3,%o7               ! (5_1) cmul0_ind = ldiff0 << 3;
1542         add     %i3,stridex,%i3         ! px += stridex
1543 
1544         fmuld   %f38,%f22,%f38          ! (5_1) dtmp0 *= x20;
1545         cmp     %o5,_0x7f800000         ! (4_0) b0 ? 0x7f800000
1546         bge,pn  %icc,.update15          ! (4_0) if ( b0 > 0x7f800000 )
1547         faddd   %f26,%f62,%f22          ! (4_1) dtmp0 = cadd0 + dtmp0;
1548 .cont15:
1549         fmuld   %f40,%f16,%f26          ! (2_0) dtmp0 *= x20;
1550         sll     %l5,5,%l6               ! (4_0) ltmp0 = ldiff0 << 5;
1551         add     %i1,stridey,%i1         ! py += stridey
1552         fstod   %f0,%f40                ! (4_0) y0 = (double)fy0;
1553 
1554         faddd   %f4,K5,%f62             ! (1_0) dtmp0 += K5;
1555         sra     %l3,27,%o5              ! (4_0) signx0 = ux0 >> 27;
1556         fmuld   %f24,%f20,%f24          ! (0_0) dtmp0 *= x20;
1557 
1558         fstod   %f2,%f2                 ! (4_0) x0 = (double)fx0;
1559         sra     %l4,28,%o4              ! (4_0) signy0 = uy0 >> 28;
1560 .den5:
1561         lda     [%i1]0x82,%l3           ! (5_0) uy0 = *(int*)py;
1562         subcc   counter,6,counter       ! counter?
1563         add     %l6,cadd_arr,%l6        ! (4_0) ltmp0 += (char*)cadd_arr;
1564         faddd   %f26,K7,%f26            ! (2_0) dtmp0 += K7;
1565 
1566         fmuld   %f62,%f18,%f4           ! (1_0) dtmp0 *= x20;
1567         and     %o5,-16,%o5             ! (4_0) signx0 &= -16;
1568         bpos,pt %icc,.main_loop
1569         faddd   %f38,K0,%f38            ! (5_1) dtmp0 += K0;
1570 
1571 .tail:
1572         addcc   counter,5,counter
1573         bneg,a,pn       %icc,.begin
1574         or      %g0,%o1,%o4
1575 
1576         faddd   %f24,K2,%f40            ! (0_1) dtmp0 += K2;
1577 
1578         fdtos   %f22,%f22               ! (4_2) ftmp0 = (float)dtmp0;
1579         st      %f22,[%o1]              ! (4_2) *pz = ftmp0;
1580 
1581         subcc   counter,1,counter
1582         bneg,a,pn       %icc,.begin
1583         or      %g0,%o2,%o4
1584 
1585         ldd     [cmul_arr+%o7],%f0      ! (5_2) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1586         fmuld   %f26,%f16,%f26          ! (2_1) dtmp0 *= x20;
1587 
1588         fmuld   %f38,%f14,%f14          ! (5_2) x0 = dtmp0 * x0;
1589         faddd   %f4,K4,%f4              ! (1_1) dtmp0 += K4;
1590 
1591         fmuld   %f40,%f20,%f38          ! (0_1) dtmp0 *= x20;
1592 
1593 
1594         faddd   %f26,K6,%f22            ! (2_1) dtmp0 += K6;
1595 
1596         fmuld   %f4,%f18,%f4            ! (1_1) dtmp0 *= x20;
1597 
1598         fmuld   %f0,%f14,%f14           ! (5_2) dtmp0 = cmul0 * x0;
1599         faddd   %f38,K1,%f38            ! (0_1) dtmp0 += K1;
1600 
1601         fmuld   %f22,%f16,%f22          ! (2_1) dtmp0 *= x20;
1602 
1603         faddd   %f4,K3,%f4              ! (1_1) dtmp0 += K3;
1604 
1605         fmuld   %f38,%f20,%f38          ! (0_1) dtmp0 *= x20;
1606         faddd   %f36,%f14,%f20          ! (5_2) dtmp0 = cadd0 + dtmp0;
1607 
1608         faddd   %f22,K5,%f14            ! (2_1) dtmp0 += K5;
1609         add     %o2,stridez,%o1         ! pz += stridez
1610         fmuld   %f4,%f18,%f4            ! (1_1) dtmp0 *= x20;
1611 
1612         sll     %l7,3,%l7               ! (0_1) cmul0_ind = ldiff0 << 3;
1613 
1614         fmuld   %f14,%f16,%f22          ! (2_1) dtmp0 *= x20;
1615         faddd   %f38,K0,%f38            ! (0_1) dtmp0 += K0;
1616 
1617         faddd   %f4,K2,%f40             ! (1_1) dtmp0 += K2;
1618 
1619         fdtos   %f20,%f2                ! (5_2) ftmp0 = (float)dtmp0;
1620         st      %f2,[%o2]               ! (5_2) *pz = ftmp0;
1621 
1622         subcc   counter,1,counter
1623         bneg,a,pn       %icc,.begin
1624         or      %g0,%o1,%o4
1625 
1626         ldd     [cmul_arr+%l7],%f0      ! (0_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1627 
1628         fmuld   %f38,%f12,%f12          ! (0_1) x0 = dtmp0 * x0;
1629         faddd   %f22,K4,%f22            ! (2_1) dtmp0 += K4;
1630 
1631         fmuld   %f40,%f18,%f38          ! (1_1) dtmp0 *= x20;
1632 
1633         fmuld   %f22,%f16,%f22          ! (2_1) dtmp0 *= x20;
1634 
1635         fmuld   %f0,%f12,%f12           ! (0_1) dtmp0 = cmul0 * x0;
1636         faddd   %f38,K1,%f38            ! (1_1) dtmp0 += K1;
1637 
1638         sll     %g1,3,%g1               ! (1_1) cmul0_ind = ldiff0 << 3;
1639         faddd   %f22,K3,%f22            ! (2_1) dtmp0 += K3;
1640 
1641         add     %o1,stridez,%o2         ! pz += stridez
1642 
1643         fmuld   %f38,%f18,%f38          ! (1_1) dtmp0 *= x20;
1644         faddd   %f34,%f12,%f18          ! (0_1) dtmp0 = cadd0 + dtmp0;
1645 
1646         fmuld   %f22,%f16,%f22          ! (2_1) dtmp0 *= x20;
1647 
1648         faddd   %f38,K0,%f38            ! (1_1) dtmp0 += K0;
1649 
1650         faddd   %f22,K2,%f40            ! (2_1) dtmp0 += K2;
1651 
1652         fdtos   %f18,%f2                ! (0_1) ftmp0 = (float)dtmp0;
1653         st      %f2,[%o1]               ! (0_1) *pz = ftmp0
1654 
1655         subcc   counter,1,counter
1656         bneg,a,pn       %icc,.begin
1657         or      %g0,%o2,%o4
1658 
1659         ldd     [cmul_arr+%g1],%f0      ! (1_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1660 
1661         fmuld   %f38,%f10,%f10          ! (1_1) x0 = dtmp0 * x0;
1662 
1663         fmuld   %f40,%f16,%f38          ! (2_1) dtmp0 *= x20;
1664 
1665         fmuld   %f0,%f10,%f10           ! (1_1) dtmp0 = cmul0 * x0;
1666         faddd   %f38,K1,%f38            ! (2_1) dtmp0 += K1;
1667 
1668         sll     %g5,3,%g5               ! (2_1) cmul0_ind = ldiff0 << 3;
1669 
1670         add     %o2,stridez,%o1         ! pz += stridez
1671 
1672         fmuld   %f38,%f16,%f38          ! (2_1) dtmp0 *= x20;
1673         faddd   %f32,%f10,%f16          ! (1_1) dtmp0 = cadd0 + dtmp0;
1674 
1675         faddd   %f38,K0,%f38            ! (2_1) dtmp0 += K0;
1676 
1677         fdtos   %f16,%f2                ! (1_1) ftmp0 = (float)dtmp0;
1678         st      %f2,[%o2]               ! (1_1) *pz = ftmp0;
1679 
1680         subcc   counter,1,counter
1681         bneg,a,pn       %icc,.begin
1682         or      %g0,%o1,%o4
1683 
1684         ldd     [cmul_arr+%g5],%f0      ! (2_1) cmul0 = *(double*)((char*)cmul_arr + cmul0_ind);
1685 
1686         fmuld   %f38,%f8,%f8            ! (2_1) x0 = dtmp0 * x0;
1687 
1688         fmuld   %f0,%f8,%f8             ! (2_1) dtmp0 = cmul0 * x0;
1689 
1690         add     %o1,stridez,%o2         ! pz += stridez
1691 
1692         faddd   %f30,%f8,%f24           ! (2_1) dtmp0 = cadd0 + dtmp0;
1693 
1694         fdtos   %f24,%f1                ! (2_1) ftmp0 = (float)dtmp0;
1695         st      %f1,[%o1]               ! (2_1) *pz = ftmp0;
1696 
1697         ba      .begin
1698         or      %g0,%o2,%o4
1699 
1700         .align  16
1701 .spec0:
1702         cmp     %l6,_0x7f800000         ! ax0 ? 0x7f800000
1703         bg      2f                      ! if ( ax0 >= 0x7f800000 )
1704         srl     %l3,30,%l3              ! signx0 = (unsigned)ux0 >> 30;
1705 
1706         cmp     %l7,_0x7f800000         ! ay0 ? 0x7f800000
1707         bg      2f                      ! if ( ay0 >= 0x7f800000 )
1708         and     %l3,2,%l3               ! signx0 &= 2;
1709 
1710         sra     %l4,31,%l4              ! signy0 = uy0 >> 31;
1711         bne,a   1f                      ! if (ay0 != 0x7f800000)
1712         add     %l3,%l3,%l3             ! signx0 += signx0;
1713 
1714         cmp     %l6,_0x7f800000         ! ax0 ? 0x7f800000
1715         bne,a   1f                      ! if ( ax0 != 0x7f800000 )
1716         add     %g0,2,%l3               ! signx0 = 2
1717 
1718         add     %l3,1,%l3               ! signx0 ++;
1719 1:
1720         sll     %l4,3,%l4               ! signy0 <<= 3;
1721         st      %l3,[%fp+tmp_pz]        ! STORE signx0
1722 
1723         ldd     [cmul_arr+88],%f0       ! LOAD M_PI_4
1724 
1725         ld      [%fp+tmp_pz],%f2        ! LOAD signx0
1726 
1727         ldd     [cmul_arr+%l4],%f4      ! dtmp0 = *(double*)((char*)(cmul_arr + 1) + signy0);
1728 
1729         add     %i1,stridey,%i1         ! py += stridey;
1730         fitod   %f2,%f2                 ! dtmp1 = (double)signx0;
1731 
1732         add     %i3,stridex,%i3         ! px += stridex;
1733 
1734         fmuld   %f2,%f0,%f0             ! res = signx0 * M_PI_4;
1735 
1736         fmuld   %f0,%f4,%f0             ! res *= dtmp0;
1737         fdtos   %f0,%f0                 ! ftmp0 = (float) res;
1738         st      %f0,[%o4]               ! *pz = ftmp0;
1739 
1740         ba      .begin1
1741         add     %o4,stridez,%o4         ! pz += stridez;
1742 2:
1743         std     %l6,[%fp+tmp_pz]        ! *(float*)&ax0, *(float*)&ay0
1744         ldd     [%fp+tmp_pz],%f0        ! *(float*)&ax0, *(float*)&ay0
1745 
1746         add     %i1,stridey,%i1         ! py += stridey;
1747 
1748         fmuls   %f0,%f1,%f0             ! ftmp0 = *(float*)&ax0 * *(float*)&ay0;
1749         add     %i3,stridex,%i3         ! pz += stridex;
1750         st      %f0,[%o4]               ! *pz = ftmp0;
1751 
1752         ba      .begin1
1753         add     %o4,stridez,%o4         ! pz += stridez;
1754 
1755         .align  16
1756 .spec1:
1757         cmp     %l6,0
1758         bne,pn  %icc,1f
1759         nop
1760 
1761         cmp     %l7,0
1762         bne,pn  %icc,1f
1763         nop
1764 
1765         sra     %l4,28,%l4              ! signy0 = uy0 >> 28;
1766 
1767         sra     %l3,27,%l3              ! signx0 = ux0 >> 27;
1768         and     %l4,-8,%l4              ! signy0 &= -8;
1769 
1770         sra     %o2,31,%o2              ! ldiff0 >>= 31;
1771         and     %l3,-16,%l3             ! signx0 &= -16;
1772 
1773         sll     %o2,5,%o2               ! ldiff0 <<= 5;
1774         add     %l4,%l3,%l3             ! signx0 += signy0;
1775 
1776         add     %o2,%l3,%l3             ! signx0 += ldiff0;
1777         add     %i1,stridey,%i1         ! py += stridey;
1778 
1779         ldd     [cadd_arr+%l3],%f0      ! res = *(double*)((char*)(cadd_arr + 7) + signx0);
1780         add     %i3,stridex,%i3         ! px += stridex;
1781 
1782         fdtos   %f0,%f0                 ! ftmp0 = (float) res;
1783         st      %f0,[%o4]               ! *pz = ftmp0;
1784 
1785         ba      .begin1
1786         add     %o4,stridez,%o4         ! pz += stridez;
1787 1:
1788         stx     %o4,[%fp+tmp_pz]
1789         sra     %o2,31,%l7              ! (0_0) ldiff0 >>= 31;
1790         sub     %i3,%i1,%l6             ! (0_0) addrc0 = (char*)px - (char*)py;
1791 
1792         and     %l6,%l7,%o2             ! (0_0) addrc0 &= ldiff0;
1793 
1794         lda     [%i1+%o2]0x82,%f0       ! (0_0) fy0 = *(float*)((char*)py + addrc0);
1795         sub     %i3,%o2,%o4             ! (0_0) (char*)px - addrc0
1796 
1797         lda     [%i1+%o2]0x82,%l5       ! (0_0) fy0 = *(float*)((char*)py + addrc0);
1798 
1799         lda     [%o4]0x82,%f2           ! (0_0) fx0 = *(float*)((char*)px - addrc0);
1800         sll     %l7,5,%l6               ! (0_0) ltmp0 = ldiff0 << 5;
1801 
1802         lda     [%o4]0x82,%g5           ! (0_0) fx0 = *(float*)((char*)px - addrc0);
1803 
1804         sra     %l3,27,%o5              ! (0_0) signx0 = ux0 >> 27;
1805         add     %i1,stridey,%i1         ! py += stridey
1806 
1807         add     %i3,stridex,%i3         ! px += stridex
1808 
1809         lda     [%i1]0x82,%l3           ! (1_0) uy0 = *(int*)py;
1810         sra     %l4,28,%o4              ! (0_0) signy0 = uy0 >> 28;
1811 
1812         add     %l6,cadd_arr,%l6        ! (0_0) ltmp0 += (char*)cadd_arr;
1813 
1814         and     %l5,_0x7fffffff,%l4
1815         sethi   %hi(0x00800000),%g1
1816 
1817         cmp     %l4,%g1
1818         bge,a   %icc,1f
1819         fstod   %f0,%f40                ! (0_0) y0 = (double)fy0;
1820 
1821         fabss   %f0,%f0                 ! fy0 = fabsf(fy0);
1822         ldd     [cmul_arr+96],%f40
1823         sra     %l5,28,%l4              ! itmp0 >>= 28;
1824 
1825         and     %l4,-8,%l4
1826         fitod   %f0,%f0                 ! dtmp0 = (double) *(int*)&fy0;
1827 
1828         fmuld   %f40,%f0,%f40           ! dtmp0 *= C2ONM149;
1829         ldd     [cmul_arr+%l4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
1830 
1831         fmuld   %f40,%f0,%f40           ! dtmp0 *= dsign;
1832 1:
1833         and     %g5,_0x7fffffff,%l4
1834         cmp     %l4,%g1
1835         bge,a   %icc,.spec1_cont
1836         fstod   %f2,%f2                 ! (0_0) x0 = (double)fx0;
1837 
1838         fabss   %f2,%f2                 ! fx0 = fabsf(fx0);
1839         ldd     [cmul_arr+96],%f0       ! LOAD C2ONM149
1840         sra     %g5,28,%l4              ! itmp0 >>= 28;
1841 
1842         and     %l4,-8,%l4              ! itmp0 = -8;
1843         fitod   %f2,%f2                 ! dtmp0 = (double) *(int*)&fx0;
1844 
1845         fmuld   %f2,%f0,%f2             ! dtmp0 *= C2ONM149;
1846         ldd     [cmul_arr+%l4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
1847 
1848         ba      .spec1_cont
1849         fmuld   %f2,%f0,%f2             ! dtmp0 *= dsign;
1850 
1851         .align  16
1852 .update0:
1853         cmp     counter,0
1854         bg,pn   %icc,1f
1855         nop
1856 
1857         ld      [cmul_arr],%f2
1858         ba      .cont0
1859         fzero   %f0
1860 1:
1861         cmp     %o5,_0x7f800000         ! (4_0) b0 ? 0x7f800000
1862         bg,pt   %icc,1f
1863         nop
1864 2:
1865         sub     counter,0,counter
1866         st      counter,[%fp+tmp_counter]
1867         stx     %i1,[%fp+tmp_py]
1868         stx     %i3,[%fp+tmp_px]
1869 
1870         ld      [cmul_arr],%f2
1871         or      %g0,0,counter
1872         ba      .cont0
1873         fzero   %f0
1874 1:
1875         andcc   %l3,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
1876         bne,pn  %icc,1f
1877         sethi   %hi(0x00800000),%o5
1878 
1879         andcc   %l4,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
1880         be,pn   %icc,2b
1881         nop
1882 1:
1883         st      %f0,[%fp+tmp_px]
1884         st      %f2,[%fp+tmp_px+4]
1885         ld      [%fp+tmp_px],%o4
1886 
1887         and     %o4,_0x7fffffff,%l5     ! itmp0 & 0x7fffffff
1888         cmp     %l5,%o5
1889         bge,a   1f
1890         fstod   %f0,%f40                ! (0_0) y0 = (double)fy0;
1891 
1892         ldd     [cmul_arr+96],%f40      ! LOAD C2ONM149
1893         sra     %o4,28,%o4              ! itmp0 >>= 28;
1894         fabss   %f0,%f0                 ! fy0 = fabsf(fy0);
1895 
1896         and     %o4,-8,%o4              ! itmp0 = -8;
1897         fitod   %f0,%f0                 ! dtmp0 = (double) *(int*)&fy0;
1898 
1899         fmuld   %f0,%f40,%f40           ! dtmp0 *= C2ONM149;
1900         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
1901 
1902         fmuld   %f0,%f40,%f40           ! dtmp0 *= dsign;
1903 1:
1904         add     %i3,stridex,%i3         ! px += stridex
1905         add     %i1,stridey,%i1         ! py += stridey
1906 
1907         ld      [%fp+tmp_px+4],%o4
1908         and     %o4,_0x7fffffff,%l5     ! itmp0 & 0x7fffffff
1909         cmp     %l5,%o5
1910         bge,a   1f
1911         fstod   %f2,%f2                 ! (5_1) x0 = (double)fx0;
1912 
1913         ldd     [cmul_arr+96],%f0       ! LOAD C2ONM149
1914         sra     %o4,28,%o4              ! itmp0 >>= 28;
1915         fabss   %f2,%f2                 ! fx0 = fabsf(fx0);
1916 
1917         and     %o4,-8,%o4              ! itmp0 = -8;
1918         fitod   %f2,%f2                 ! dtmp0 = (double) *(int*)&fx0;
1919 
1920         fmuld   %f2,%f0,%f2             ! dtmp0 *= C2ONM149;
1921         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
1922 
1923         fmuld   %f2,%f0,%f2             ! dtmp0 *= dsign;
1924 1:
1925         sra     %l4,27,%o5              ! (1_0) signx0 = ux0 >> 27;
1926 
1927         sra     %l3,28,%o4              ! (1_0) signy0 = uy0 >> 28;
1928         ba      .d0
1929         add     %l6,cadd_arr,%l6        ! (1_0) ltmp0 += (char*)cadd_arr;
1930 
1931         .align  16
1932 .update1:
1933         cmp     counter,1
1934         bg,pn   %icc,1f
1935         nop
1936 
1937         fzero   %f0
1938         ba      .cont1
1939         ld      [cmul_arr],%f2
1940 1:
1941         cmp     %o5,_0x7f800000         ! (4_0) b0 ? 0x7f800000
1942         bg,pt   %icc,1f
1943         nop
1944 2:
1945         sub     counter,1,counter
1946         st      counter,[%fp+tmp_counter]
1947         stx     %i1,[%fp+tmp_py]
1948         stx     %i3,[%fp+tmp_px]
1949 
1950         ld      [cmul_arr],%f2
1951         or      %g0,1,counter
1952         ba      .cont1
1953         fzero   %f0
1954 1:
1955         andcc   %l3,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
1956         bne,pn  %icc,1f
1957         sethi   %hi(0x00800000),%o5
1958 
1959         andcc   %l4,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
1960         be,pn   %icc,2b
1961         nop
1962 1:
1963         st      %f0,[%fp+tmp_px]
1964         st      %f2,[%fp+tmp_px+4]
1965         ld      [%fp+tmp_px],%o4
1966         fmuld   %f40,%f20,%f30          ! (0_0) dtmp0 *= x20;
1967 
1968         and     %o4,_0x7fffffff,%l6     ! itmp0 & 0x7fffffff
1969         cmp     %l6,%o5
1970         bge,a   1f
1971         fstod   %f0,%f40                ! (0_0) y0 = (double)fy0;
1972 
1973         ldd     [cmul_arr+96],%f40      ! LOAD C2ONM149
1974         sra     %o4,28,%o4              ! itmp0 >>= 28;
1975         fabss   %f0,%f0                 ! fy0 = fabsf(fy0);
1976 
1977         and     %o4,-8,%o4              ! itmp0 = -8;
1978         fitod   %f0,%f0                 ! dtmp0 = (double) *(int*)&fy0;
1979 
1980         fmuld   %f0,%f40,%f40           ! dtmp0 *= C2ONM149;
1981         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
1982 
1983         fmuld   %f0,%f40,%f40           ! dtmp0 *= dsign;
1984 1:
1985 
1986         add     %i1,stridey,%i1         ! py += stridey
1987 
1988         ld      [%fp+tmp_px+4],%o4
1989         and     %o4,_0x7fffffff,%l6     ! itmp0 & 0x7fffffff
1990         cmp     %l6,%o5
1991         bge,a   1f
1992         fstod   %f2,%f2                 ! (5_1) x0 = (double)fx0;
1993 
1994         ldd     [cmul_arr+96],%f0       ! LOAD C2ONM149
1995         sra     %o4,28,%o4              ! itmp0 >>= 28;
1996         fabss   %f2,%f2                 ! fx0 = fabsf(fx0);
1997 
1998         and     %o4,-8,%o4              ! itmp0 = -8;
1999         fitod   %f2,%f2                 ! dtmp0 = (double) *(int*)&fx0;
2000 
2001         fmuld   %f2,%f0,%f2             ! dtmp0 *= C2ONM149;
2002         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2003 
2004         fmuld   %f2,%f0,%f2             ! dtmp0 *= dsign;
2005 1:
2006         sll     %g5,5,%l6               ! (2_0) ltmp0 = ldiff0 << 5;
2007         sra     %l3,27,%o5              ! (2_0) signx0 = ux0 >> 27;
2008         add     %i3,stridex,%i3         ! px += stridex
2009 
2010         sra     %l4,28,%o4              ! (2_0) signy0 = uy0 >> 28;
2011         ba      .d1
2012         add     %l6,cadd_arr,%l6        ! (2_0) ltmp0 += (char*)cadd_arr;
2013 
2014         .align  16
2015 .update2:
2016         cmp     counter,2
2017         bg,pn   %icc,1f
2018         nop
2019 
2020         ld      [cmul_arr],%f1
2021         ba      .cont2
2022         fzeros  %f0
2023 1:
2024         cmp     %o5,_0x7f800000         ! (4_0) b0 ? 0x7f800000
2025         bg,pt   %icc,1f
2026         nop
2027 2:
2028         sub     counter,2,counter
2029         st      counter,[%fp+tmp_counter]
2030         stx     %i1,[%fp+tmp_py]
2031         stx     %i3,[%fp+tmp_px]
2032 
2033         ld      [cmul_arr],%f1
2034         or      %g0,2,counter
2035         ba      .cont2
2036         fzeros  %f0
2037 1:
2038         andcc   %l3,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2039         bne,pn  %icc,1f
2040         sethi   %hi(0x00800000),%o5
2041 
2042         andcc   %l4,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2043         be,pn   %icc,2b
2044         nop
2045 1:
2046         std     %f0,[%fp+tmp_px]
2047         ld      [%fp+tmp_px],%o4
2048         fmuld   %f40,%f18,%f28          ! (1_0) dtmp0 *= x20;
2049 
2050         faddd   %f16,K5,%f2             ! (0_0) dtmp0 += K5;
2051 
2052         and     %o4,_0x7fffffff,%l6     ! itmp0 & 0x7fffffff
2053         cmp     %l6,%o5
2054         bge,a   1f
2055         fstod   %f0,%f40                ! (0_0) y0 = (double)fy0;
2056 
2057         ldd     [cmul_arr+96],%f40      ! LOAD C2ONM149
2058         sra     %o4,28,%o4              ! itmp0 >>= 28;
2059         fabss   %f0,%f0                 ! fy0 = fabsf(fy0);
2060 
2061         and     %o4,-8,%o4              ! itmp0 = -8;
2062         fitod   %f0,%f16                ! dtmp0 = (double) *(int*)&fy0;
2063 
2064         fmuld   %f16,%f40,%f40          ! dtmp0 *= C2ONM149;
2065         ldd     [cmul_arr+%o4],%f16     ! dsign = *(double*)((char*)cmul_arr + itmp0);
2066 
2067         fmuld   %f16,%f40,%f40          ! dtmp0 *= dsign;
2068 1:
2069         add     %i1,stridey,%i1         ! py += stridey
2070 
2071         ld      [%fp+tmp_px+4],%o4
2072         and     %o4,_0x7fffffff,%l6     ! itmp0 & 0x7fffffff
2073         cmp     %l6,%o5
2074         bge,a   1f
2075         fstod   %f1,%f16                ! (5_1) x0 = (double)fx0;
2076 
2077         fabss   %f1,%f16                ! fx0 = fabsf(fx0);
2078         ldd     [cmul_arr+96],%f0       ! LOAD C2ONM149
2079         sra     %o4,28,%o4              ! itmp0 >>= 28;
2080 
2081         and     %o4,-8,%o4              ! itmp0 = -8;
2082         fitod   %f16,%f16               ! dtmp0 = (double) *(int*)&fx0;
2083 
2084         fmuld   %f16,%f0,%f16           ! dtmp0 *= C2ONM149;
2085         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2086 
2087         fmuld   %f16,%f0,%f16           ! dtmp0 *= dsign;
2088 1:
2089         sll     %o0,5,%l6               ! (3_0) ltmp0 = ldiff0 << 5;
2090         sra     %l4,27,%o5              ! (3_0) signx0 = ux0 >> 27;
2091 
2092         add     %i3,stridex,%i3         ! px += stridex
2093         ba      .d2
2094         sra     %l3,28,%o4              ! (3_0) signy0 = uy0 >> 28;
2095 
2096         .align  16
2097 .update3:
2098         cmp     counter,3
2099         bg,pn   %icc,1f
2100         nop
2101 
2102         fzero   %f0
2103         ba      .cont3
2104         ld      [cmul_arr],%f2
2105 1:
2106         cmp     %o5,_0x7f800000         ! (4_0) b0 ? 0x7f800000
2107         bg,pt   %icc,1f
2108         nop
2109 2:
2110         sub     counter,3,counter
2111         st      counter,[%fp+tmp_counter]
2112         stx     %i1,[%fp+tmp_py]
2113         stx     %i3,[%fp+tmp_px]
2114 
2115         ld      [cmul_arr],%f2
2116         or      %g0,3,counter
2117         ba      .cont3
2118         fzero   %f0
2119 1:
2120         andcc   %l3,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2121         bne,pn  %icc,1f
2122         sethi   %hi(0x00800000),%o5
2123 
2124         andcc   %l4,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2125         be,pn   %icc,2b
2126         nop
2127 1:
2128         st      %f0,[%fp+tmp_px]
2129         st      %f2,[%fp+tmp_px+4]
2130         ld      [%fp+tmp_px],%o4
2131         fmuld   %f40,%f16,%f26          ! (2_0) dtmp0 *= x20;
2132 
2133         and     %o4,_0x7fffffff,%l6     ! itmp0 & 0x7fffffff
2134         cmp     %l6,%o5
2135         bge,a   1f
2136         fstod   %f0,%f40                ! (0_0) y0 = (double)fy0;
2137 
2138         ldd     [cmul_arr+96],%f40      ! LOAD C2ONM149
2139         sra     %o4,28,%o4              ! itmp0 >>= 28;
2140         fabss   %f0,%f0                 ! fy0 = fabsf(fy0);
2141 
2142         and     %o4,-8,%o4              ! itmp0 = -8;
2143         fitod   %f0,%f0                 ! dtmp0 = (double) *(int*)&fy0;
2144 
2145         fmuld   %f0,%f40,%f40           ! dtmp0 *= C2ONM149;
2146         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2147 
2148         fmuld   %f0,%f40,%f40           ! dtmp0 *= dsign;
2149 1:
2150         add     %i1,stridey,%i1         ! py += stridey
2151         faddd   %f4,K5,%f62             ! (1_0) dtmp0 += K5;
2152         fmuld   %f24,%f20,%f24          ! (0_0) dtmp0 *= x20;
2153 
2154         ld      [%fp+tmp_px+4],%o4
2155         and     %o4,_0x7fffffff,%l6     ! itmp0 & 0x7fffffff
2156         cmp     %l6,%o5
2157         bge,a   1f
2158         fstod   %f2,%f2                 ! (5_1) x0 = (double)fx0;
2159 
2160         fabss   %f2,%f2                 ! fx0 = fabsf(fx0);
2161         ldd     [cmul_arr+96],%f0       ! LOAD C2ONM149
2162         sra     %o4,28,%o4              ! itmp0 >>= 28;
2163 
2164         and     %o4,-8,%o4              ! itmp0 = -8;
2165         fitod   %f2,%f2                 ! dtmp0 = (double) *(int*)&fx0;
2166 
2167         fmuld   %f2,%f0,%f2             ! dtmp0 *= C2ONM149;
2168         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2169 
2170         fmuld   %f2,%f0,%f2             ! dtmp0 *= dsign;
2171 1:
2172         sll     %l5,5,%l6               ! (4_0) ltmp0 = ldiff0 << 5;
2173         sra     %l3,27,%o5              ! (4_0) signx0 = ux0 >> 27;
2174 
2175         add     %i3,stridex,%i3         ! px += stridex
2176         ba      .d3
2177         sra     %l4,28,%o4              ! (4_0) signy0 = uy0 >> 28;
2178 
2179         .align  16
2180 .update4:
2181         cmp     counter,4
2182         bg,pn   %icc,1f
2183         nop
2184 
2185         ld      [cmul_arr],%f1
2186         ba      .cont4
2187         fzeros  %f0
2188 1:
2189         cmp     %o5,_0x7f800000         ! (4_0) b0 ? 0x7f800000
2190         bg,pt   %icc,1f
2191         nop
2192 2:
2193         sub     counter,4,counter
2194         st      counter,[%fp+tmp_counter]
2195         stx     %i1,[%fp+tmp_py]
2196         stx     %i3,[%fp+tmp_px]
2197 
2198         ld      [cmul_arr],%f1
2199         or      %g0,4,counter
2200         ba      .cont4
2201         fzeros  %f0
2202 1:
2203         andcc   %l3,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2204         bne,pn  %icc,1f
2205         sethi   %hi(0x00800000),%o5
2206 
2207         andcc   %l4,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2208         be,pn   %icc,2b
2209         nop
2210 1:
2211         std     %f0,[%fp+tmp_px]
2212         ld      [%fp+tmp_px],%o4
2213         fmuld   %f40,%f24,%f36          ! (3_1) dtmp0 *= x20;
2214 
2215         and     %o4,_0x7fffffff,%o1     ! itmp0 & 0x7fffffff
2216         cmp     %o1,%o5
2217         bge,a   1f
2218         fstod   %f0,%f40                ! (0_0) y0 = (double)fy0;
2219 
2220         ldd     [cmul_arr+96],%f40      ! LOAD C2ONM149
2221         sra     %o4,28,%o4              ! itmp0 >>= 28;
2222         fabss   %f0,%f0                 ! fy0 = fabsf(fy0);
2223 
2224         and     %o4,-8,%o4              ! itmp0 = -8;
2225         fitod   %f0,%f14                ! dtmp0 = (double) *(int*)&fy0;
2226 
2227         fmuld   %f14,%f40,%f40          ! dtmp0 *= C2ONM149;
2228         ldd     [cmul_arr+%o4],%f14     ! dsign = *(double*)((char*)cmul_arr + itmp0);
2229 
2230         fmuld   %f14,%f40,%f40          ! dtmp0 *= dsign;
2231 1:
2232         faddd   %f22,K5,%f14            ! (2_1) dtmp0 += K5;
2233         fmuld   %f4,%f18,%f4            ! (1_1) dtmp0 *= x20;
2234 
2235         ld      [%fp+tmp_px+4],%o4
2236         and     %o4,_0x7fffffff,%o1     ! itmp0 & 0x7fffffff
2237         cmp     %o1,%o5
2238         bge,a   1f
2239         fstod   %f1,%f2                 ! (5_1) x0 = (double)fx0;
2240 
2241         fabss   %f1,%f22                ! fx0 = fabsf(fx0);
2242         ldd     [cmul_arr+96],%f0       ! LOAD C2ONM149
2243         sra     %o4,28,%o4              ! itmp0 >>= 28;
2244 
2245         and     %o4,-8,%o4              ! itmp0 = -8;
2246         fitod   %f22,%f22               ! dtmp0 = (double) *(int*)&fx0;
2247 
2248         fmuld   %f22,%f0,%f22           ! dtmp0 *= C2ONM149;
2249         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2250 
2251         fmuld   %f22,%f0,%f2            ! dtmp0 *= dsign;
2252 1:
2253         sll     %l7,3,%l7               ! (0_1) cmul0_ind = ldiff0 << 3;
2254         ba      .d4
2255         add     %i3,stridex,%i3         ! px += stridex
2256 
2257         .align  16
2258 .update5:
2259         cmp     counter,5
2260         bg,pn   %icc,1f
2261         nop
2262 
2263         ld      [cmul_arr],%f2
2264         ba      .cont5
2265         fzero   %f0
2266 1:
2267         cmp     %o5,_0x7f800000         ! (4_0) b0 ? 0x7f800000
2268         bg,pt   %icc,1f
2269         nop
2270 2:
2271         sub     counter,5,counter
2272         st      counter,[%fp+tmp_counter]
2273         stx     %i1,[%fp+tmp_py]
2274         stx     %i3,[%fp+tmp_px]
2275 
2276         ld      [cmul_arr],%f2
2277         or      %g0,5,counter
2278         ba      .cont5
2279         fzero   %f0
2280 1:
2281         andcc   %l3,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2282         bne,pn  %icc,1f
2283         sethi   %hi(0x00800000),%o5
2284 
2285         andcc   %l4,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2286         be,pn   %icc,2b
2287         nop
2288 1:
2289         st      %f0,[%fp+tmp_px]
2290         st      %f2,[%fp+tmp_px+4]
2291         ld      [%fp+tmp_px],%o4
2292         fmuld   %f40,%f4,%f34           ! (4_1) dtmp0 *= x20;
2293 
2294         stx     %l5,[%fp+tmp_py]
2295         and     %o4,_0x7fffffff,%l5     ! itmp0 & 0x7fffffff
2296         cmp     %l5,%o5
2297         bge,a   1f
2298         fstod   %f0,%f40                ! (0_0) y0 = (double)fy0;
2299 
2300         ldd     [cmul_arr+96],%f40      ! LOAD C2ONM149
2301         sra     %o4,28,%o4              ! itmp0 >>= 28;
2302         fabss   %f0,%f0                 ! fy0 = fabsf(fy0);
2303 
2304         and     %o4,-8,%o4              ! itmp0 = -8;
2305         fitod   %f0,%f0                 ! dtmp0 = (double) *(int*)&fy0;
2306 
2307         fmuld   %f0,%f40,%f40           ! dtmp0 *= C2ONM149;
2308         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2309 
2310         fmuld   %f0,%f40,%f40           ! dtmp0 *= dsign;
2311 1:
2312         faddd   %f20,K5,%f12            ! (3_1) dtmp0 += K5;
2313         add     %i1,stridey,%i1         ! py += stridey
2314         fmuld   %f22,%f16,%f22          ! (2_1) dtmp0 *= x20;
2315 
2316         ld      [%fp+tmp_px+4],%o4
2317         and     %o4,_0x7fffffff,%l5     ! itmp0 & 0x7fffffff
2318         cmp     %l5,%o5
2319         bge,a   1f
2320         fstod   %f2,%f2                 ! (5_1) x0 = (double)fx0;
2321 
2322         ldd     [cmul_arr+96],%f0       ! LOAD C2ONM149
2323         sra     %o4,28,%o4              ! itmp0 >>= 28;
2324         fabss   %f2,%f2                 ! fx0 = fabsf(fx0);
2325 
2326         and     %o4,-8,%o4              ! itmp0 = -8;
2327         fitod   %f2,%f2                 ! dtmp0 = (double) *(int*)&fx0;
2328 
2329         fmuld   %f2,%f0,%f2             ! dtmp0 *= C2ONM149;
2330         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2331 
2332         fmuld   %f2,%f0,%f2             ! dtmp0 *= dsign;
2333 1:
2334         ldx     [%fp+tmp_py],%l5
2335         sra     %l3,27,%o5              ! (0_0) signx0 = ux0 >> 27;
2336         add     %i3,stridex,%i3         ! px += stridex
2337 
2338         lda     [%i1]0x82,%l3           ! (1_0) uy0 = *(int*)py;
2339         sra     %l4,28,%o4              ! (0_0) signy0 = uy0 >> 28;
2340         ba      .d5
2341         add     %l6,cadd_arr,%l6        ! (0_0) ltmp0 += (char*)cadd_arr;
2342 
2343         .align  16
2344 .update6:
2345         cmp     counter,5
2346         bg,pn   %icc,1f
2347         nop
2348 
2349         ld      [cmul_arr],%f2
2350         ba      .cont6
2351         fzero   %f0
2352 1:
2353         cmp     %o5,_0x7f800000         ! (4_0) b0 ? 0x7f800000
2354         bg,pt   %icc,1f
2355         nop
2356 2:
2357         sub     counter,5,counter
2358         st      counter,[%fp+tmp_counter]
2359         stx     %i1,[%fp+tmp_py]
2360         stx     %i3,[%fp+tmp_px]
2361 
2362         ld      [cmul_arr],%f2
2363         or      %g0,5,counter
2364         ba      .cont6
2365         fzero   %f0
2366 1:
2367         andcc   %l3,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2368         bne,pn  %icc,1f
2369         sethi   %hi(0x00800000),%o5
2370 
2371         andcc   %l4,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2372         be,pn   %icc,2b
2373         nop
2374 1:
2375         st      %f0,[%fp+tmp_pz]
2376         st      %f2,[%fp+tmp_pz+4]
2377         ld      [%fp+tmp_pz],%o4
2378         fmuld   %f40,%f22,%f32          ! (5_1) dtmp0 *= x20;
2379 
2380         stx     %l5,[%fp+tmp_px]
2381         and     %o4,_0x7fffffff,%l5     ! itmp0 & 0x7fffffff
2382         cmp     %l5,%o5
2383         bge,a   1f
2384         fstod   %f0,%f40                ! (0_0) y0 = (double)fy0;
2385 
2386         ldd     [cmul_arr+96],%f40      ! LOAD C2ONM149
2387         sra     %o4,28,%o4              ! itmp0 >>= 28;
2388         fabss   %f0,%f0                 ! fy0 = fabsf(fy0);
2389 
2390         and     %o4,-8,%o4              ! itmp0 = -8;
2391         fitod   %f0,%f0                 ! dtmp0 = (double) *(int*)&fy0;
2392 
2393         fmuld   %f0,%f40,%f40           ! dtmp0 *= C2ONM149;
2394         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2395 
2396         fmuld   %f0,%f40,%f40           ! dtmp0 *= dsign;
2397 1:
2398         faddd   %f18,K5,%f10            ! (4_1) dtmp0 += K5;
2399         add     %i3,stridex,%i3         ! px += stridex
2400         add     %i1,stridey,%i1         ! py += stridey
2401         fmuld   %f20,%f24,%f20          ! (3_1) dtmp0 *= x20;
2402 
2403         ld      [%fp+tmp_pz+4],%o4
2404         and     %o4,_0x7fffffff,%l5     ! itmp0 & 0x7fffffff
2405         cmp     %l5,%o5
2406         bge,a   1f
2407         fstod   %f2,%f2                 ! (5_1) x0 = (double)fx0;
2408 
2409         ldd     [cmul_arr+96],%f0       ! LOAD C2ONM149
2410         sra     %o4,28,%o4              ! itmp0 >>= 28;
2411         fabss   %f2,%f2                 ! fx0 = fabsf(fx0);
2412 
2413         and     %o4,-8,%o4              ! itmp0 = -8;
2414         fitod   %f2,%f2                 ! dtmp0 = (double) *(int*)&fx0;
2415 
2416         fmuld   %f2,%f0,%f2             ! dtmp0 *= C2ONM149;
2417         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2418 
2419         fmuld   %f2,%f0,%f2             ! dtmp0 *= dsign;
2420 1:
2421         ldx     [%fp+tmp_px],%l5
2422 
2423         sra     %l4,27,%o5              ! (1_0) signx0 = ux0 >> 27;
2424 
2425         sra     %l3,28,%o4              ! (1_0) signy0 = uy0 >> 28;
2426         ba      .d6
2427         add     %l6,cadd_arr,%l6        ! (1_0) ltmp0 += (char*)cadd_arr;
2428 
2429         .align  16
2430 .update7:
2431         cmp     counter,5
2432         bg,pn   %icc,1f
2433         nop
2434 
2435         ld      [cmul_arr],%f2
2436         ba      .cont7
2437         fzero   %f0
2438 1:
2439         cmp     %o5,_0x7f800000         ! (4_0) b0 ? 0x7f800000
2440         bg,pt   %icc,1f
2441         nop
2442 2:
2443         sub     counter,5,counter
2444         st      counter,[%fp+tmp_counter]
2445         stx     %i1,[%fp+tmp_py]
2446         stx     %i3,[%fp+tmp_px]
2447 
2448         ld      [cmul_arr],%f2
2449         or      %g0,5,counter
2450         ba      .cont7
2451         fzero   %f0
2452 1:
2453         andcc   %l3,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2454         bne,pn  %icc,1f
2455         sethi   %hi(0x00800000),%o5
2456 
2457         andcc   %l4,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2458         be,pn   %icc,2b
2459         nop
2460 1:
2461         st      %f0,[%fp+tmp_pz]
2462         st      %f2,[%fp+tmp_pz+4]
2463         ld      [%fp+tmp_pz],%o4
2464         fmuld   %f40,%f20,%f30          ! (0_0) dtmp0 *= x20;
2465 
2466         and     %o4,_0x7fffffff,%l6     ! itmp0 & 0x7fffffff
2467         cmp     %l6,%o5
2468         bge,a   1f
2469         fstod   %f0,%f40                ! (0_0) y0 = (double)fy0;
2470 
2471         ldd     [cmul_arr+96],%f40      ! LOAD C2ONM149
2472         sra     %o4,28,%o4              ! itmp0 >>= 28;
2473         fabss   %f0,%f0                 ! fy0 = fabsf(fy0);
2474 
2475         and     %o4,-8,%o4              ! itmp0 = -8;
2476         fitod   %f0,%f0                 ! dtmp0 = (double) *(int*)&fy0;
2477 
2478         fmuld   %f0,%f40,%f40           ! dtmp0 *= C2ONM149;
2479         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2480 
2481         fmuld   %f0,%f40,%f40           ! dtmp0 *= dsign;
2482 1:
2483         faddd   %f16,K5,%f8             ! (5_1) dtmp0 += K5;
2484         add     %i1,stridey,%i1         ! py += stridey
2485         fmuld   %f18,%f4,%f18           ! (4_1) dtmp0 *= x20;
2486 
2487         ld      [%fp+tmp_pz+4],%o4
2488         and     %o4,_0x7fffffff,%l6     ! itmp0 & 0x7fffffff
2489         cmp     %l6,%o5
2490         bge,a   1f
2491         fstod   %f2,%f2                 ! (5_1) x0 = (double)fx0;
2492 
2493         ldd     [cmul_arr+96],%f0       ! LOAD C2ONM149
2494         sra     %o4,28,%o4              ! itmp0 >>= 28;
2495         fabss   %f2,%f2                 ! fx0 = fabsf(fx0);
2496 
2497         and     %o4,-8,%o4              ! itmp0 = -8;
2498         fitod   %f2,%f2                 ! dtmp0 = (double) *(int*)&fx0;
2499 
2500         fmuld   %f2,%f0,%f2             ! dtmp0 *= C2ONM149;
2501         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2502 
2503         fmuld   %f2,%f0,%f2             ! dtmp0 *= dsign;
2504 1:
2505         sll     %g5,5,%l6               ! (2_0) ltmp0 = ldiff0 << 5;
2506         sra     %l3,27,%o5              ! (2_0) signx0 = ux0 >> 27;
2507         add     %i3,stridex,%i3         ! px += stridex
2508 
2509         sra     %l4,28,%o4              ! (2_0) signy0 = uy0 >> 28;
2510         ba      .d7
2511         add     %l6,cadd_arr,%l6        ! (2_0) ltmp0 += (char*)cadd_arr;
2512 
2513         .align  16
2514 .update8:
2515         cmp     counter,5
2516         bg,pn   %icc,1f
2517         nop
2518 
2519         ld      [cmul_arr],%f1
2520         ba      .cont8
2521         fzeros  %f0
2522 1:
2523         cmp     %o5,_0x7f800000         ! (4_0) b0 ? 0x7f800000
2524         bg,pt   %icc,1f
2525         nop
2526 2:
2527         sub     counter,5,counter
2528         st      counter,[%fp+tmp_counter]
2529         stx     %i1,[%fp+tmp_py]
2530         stx     %i3,[%fp+tmp_px]
2531 
2532         ld      [cmul_arr],%f1
2533         or      %g0,5,counter
2534         ba      .cont8
2535         fzeros  %f0
2536 1:
2537         andcc   %l3,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2538         bne,pn  %icc,1f
2539         sethi   %hi(0x00800000),%o5
2540 
2541         andcc   %l4,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2542         be,pn   %icc,2b
2543         nop
2544 1:
2545         std     %f0,[%fp+tmp_pz]
2546         ld      [%fp+tmp_pz],%o4
2547         fmuld   %f40,%f18,%f28          ! (1_0) dtmp0 *= x20;
2548 
2549         faddd   %f16,K5,%f2             ! (0_0) dtmp0 += K5;
2550 
2551         and     %o4,_0x7fffffff,%l6     ! itmp0 & 0x7fffffff
2552         cmp     %l6,%o5
2553         bge,a   1f
2554         fstod   %f0,%f40                ! (0_0) y0 = (double)fy0;
2555 
2556         ldd     [cmul_arr+96],%f40      ! LOAD C2ONM149
2557         sra     %o4,28,%o4              ! itmp0 >>= 28;
2558         fabss   %f0,%f0                 ! fy0 = fabsf(fy0);
2559 
2560         and     %o4,-8,%o4              ! itmp0 = -8;
2561         fitod   %f0,%f16                ! dtmp0 = (double) *(int*)&fy0;
2562 
2563         fmuld   %f16,%f40,%f40          ! dtmp0 *= C2ONM149;
2564         ldd     [cmul_arr+%o4],%f16     ! dsign = *(double*)((char*)cmul_arr + itmp0);
2565 
2566         fmuld   %f16,%f40,%f40          ! dtmp0 *= dsign;
2567 1:
2568         add     %i1,stridey,%i1         ! py += stridey
2569         fmuld   %f24,%f22,%f24          ! (5_1) dtmp0 *= x20;
2570 
2571         ld      [%fp+tmp_pz+4],%o4
2572         and     %o4,_0x7fffffff,%l6     ! itmp0 & 0x7fffffff
2573         cmp     %l6,%o5
2574         bge,a   1f
2575         fstod   %f1,%f16                ! (5_1) x0 = (double)fx0;
2576 
2577         fabss   %f1,%f16                ! fx0 = fabsf(fx0);
2578         ldd     [cmul_arr+96],%f0       ! LOAD C2ONM149
2579         sra     %o4,28,%o4              ! itmp0 >>= 28;
2580 
2581         and     %o4,-8,%o4              ! itmp0 = -8;
2582         fitod   %f16,%f16               ! dtmp0 = (double) *(int*)&fx0;
2583 
2584         fmuld   %f16,%f0,%f16           ! dtmp0 *= C2ONM149;
2585         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2586 
2587         fmuld   %f16,%f0,%f16           ! dtmp0 *= dsign;
2588 1:
2589         sll     %o0,5,%l6               ! (3_0) ltmp0 = ldiff0 << 5;
2590         sra     %l4,27,%o5              ! (3_0) signx0 = ux0 >> 27;
2591 
2592         add     %i3,stridex,%i3         ! px += stridex
2593         ba      .d8
2594         sra     %l3,28,%o4              ! (3_0) signy0 = uy0 >> 28;
2595 
2596         .align  16
2597 .update9:
2598         cmp     counter,5
2599         bg,pn   %icc,1f
2600         nop
2601 
2602         ld      [cmul_arr],%f2
2603         ba      .cont9
2604         fzero   %f0
2605 1:
2606         cmp     %o5,_0x7f800000         ! (4_0) b0 ? 0x7f800000
2607         bg,pt   %icc,1f
2608         nop
2609 2:
2610         sub     counter,5,counter
2611         st      counter,[%fp+tmp_counter]
2612         stx     %i1,[%fp+tmp_py]
2613         stx     %i3,[%fp+tmp_px]
2614 
2615         ld      [cmul_arr],%f2
2616         or      %g0,5,counter
2617         ba      .cont9
2618         fzero   %f0
2619 1:
2620         andcc   %l3,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2621         bne,pn  %icc,1f
2622         sethi   %hi(0x00800000),%o5
2623 
2624         andcc   %l4,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2625         be,pn   %icc,2b
2626         nop
2627 1:
2628         st      %f0,[%fp+tmp_pz]
2629         st      %f2,[%fp+tmp_pz+4]
2630         ld      [%fp+tmp_pz],%o4
2631         fmuld   %f40,%f16,%f26          ! (2_0) dtmp0 *= x20;
2632 
2633         and     %o4,_0x7fffffff,%l6     ! itmp0 & 0x7fffffff
2634         cmp     %l6,%o5
2635         bge,a   1f
2636         fstod   %f0,%f40                ! (0_0) y0 = (double)fy0;
2637 
2638         ldd     [cmul_arr+96],%f40      ! LOAD C2ONM149
2639         sra     %o4,28,%o4              ! itmp0 >>= 28;
2640         fabss   %f0,%f0                 ! fy0 = fabsf(fy0);
2641 
2642         and     %o4,-8,%o4              ! itmp0 = -8;
2643         fitod   %f0,%f0                 ! dtmp0 = (double) *(int*)&fy0;
2644 
2645         fmuld   %f0,%f40,%f40           ! dtmp0 *= C2ONM149;
2646         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2647 
2648         fmuld   %f0,%f40,%f40           ! dtmp0 *= dsign;
2649 1:
2650         add     %i1,stridey,%i1         ! py += stridey
2651         faddd   %f4,K5,%f62             ! (1_0) dtmp0 += K5;
2652         fmuld   %f24,%f20,%f24          ! (0_0) dtmp0 *= x20;
2653 
2654         ld      [%fp+tmp_pz+4],%o4
2655         and     %o4,_0x7fffffff,%l6     ! itmp0 & 0x7fffffff
2656         cmp     %l6,%o5
2657         bge,a   1f
2658         fstod   %f2,%f2                 ! (5_1) x0 = (double)fx0;
2659 
2660         fabss   %f2,%f2                 ! fx0 = fabsf(fx0);
2661         ldd     [cmul_arr+96],%f0       ! LOAD C2ONM149
2662         sra     %o4,28,%o4              ! itmp0 >>= 28;
2663 
2664         and     %o4,-8,%o4              ! itmp0 = -8;
2665         fitod   %f2,%f2                 ! dtmp0 = (double) *(int*)&fx0;
2666 
2667         fmuld   %f2,%f0,%f2             ! dtmp0 *= C2ONM149;
2668         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2669 
2670         fmuld   %f2,%f0,%f2             ! dtmp0 *= dsign;
2671 1:
2672         sll     %l5,5,%l6               ! (4_0) ltmp0 = ldiff0 << 5;
2673         sra     %l3,27,%o5              ! (4_0) signx0 = ux0 >> 27;
2674 
2675         add     %i3,stridex,%i3         ! px += stridex
2676         ba      .d9
2677         sra     %l4,28,%o4              ! (4_0) signy0 = uy0 >> 28;
2678 
2679         .align  16
2680 .update10:
2681         cmp     counter,1
2682         bg,pn   %icc,1f
2683         nop
2684 
2685         ld      [cmul_arr],%f2
2686         ba      .cont10
2687         fzero   %f0
2688 1:
2689         cmp     %o5,_0x7f800000         ! (4_0) b0 ? 0x7f800000
2690         bg,pt   %icc,1f
2691         nop
2692 2:
2693         sub     counter,1,counter
2694         st      counter,[%fp+tmp_counter]
2695         stx     %i1,[%fp+tmp_py]
2696         stx     %i3,[%fp+tmp_px]
2697 
2698         ld      [cmul_arr],%f2
2699         or      %g0,1,counter
2700         ba      .cont10
2701         fzero   %f0
2702 1:
2703         andcc   %l3,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2704         bne,pn  %icc,1f
2705         sethi   %hi(0x00800000),%o5
2706 
2707         andcc   %l4,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2708         be,pn   %icc,2b
2709         nop
2710 1:
2711         st      %f0,[%fp+tmp_pz]
2712         st      %f2,[%fp+tmp_pz+4]
2713         ld      [%fp+tmp_pz],%o1
2714         fmuld   %f40,%f24,%f36          ! (3_1) dtmp0 *= x20;
2715 
2716         and     %o1,_0x7fffffff,%o4     ! itmp0 & 0x7fffffff
2717         cmp     %o4,%o5
2718         bge,a   1f
2719         fstod   %f0,%f40                ! (5_1) y0 = (double)fy0;
2720 
2721         ldd     [cmul_arr+96],%f40      ! LOAD C2ONM149
2722         sra     %o1,28,%o1              ! itmp0 >>= 28;
2723         fabss   %f0,%f0                 ! fy0 = fabsf(fy0);
2724 
2725         and     %o1,-8,%o1              ! itmp0 = -8;
2726         fitod   %f0,%f0                 ! dtmp0 = (double) *(int*)&fy0;
2727 
2728         fmuld   %f0,%f40,%f40           ! dtmp0 *= C2ONM149;
2729         ldd     [cmul_arr+%o1],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2730 
2731         fmuld   %f0,%f40,%f40           ! dtmp0 *= dsign;
2732 1:
2733         faddd   %f22,K5,%f14            ! (2_1) dtmp0 += K5;
2734         fmuld   %f4,%f18,%f4            ! (1_1) dtmp0 *= x20;
2735 
2736         sll     %l7,3,%l7               ! (0_1) cmul0_ind = ldiff0 << 3;
2737         add     %i3,stridex,%i3         ! px += stridex
2738 
2739         ld      [%fp+tmp_pz+4],%o1
2740         and     %o1,_0x7fffffff,%o4     ! itmp0 & 0x7fffffff
2741         cmp     %o4,%o5
2742         bge,a   1f
2743         fstod   %f2,%f2                 ! (5_1) x0 = (double)fx0;
2744 
2745         ldd     [cmul_arr+96],%f0       ! LOAD C2ONM149
2746         sra     %o1,28,%o1              ! itmp0 >>= 28;
2747         fabss   %f2,%f2                 ! fx0 = fabsf(fx0);
2748 
2749         and     %o1,-8,%o1              ! itmp0 = -8;
2750         fitod   %f2,%f2                 ! dtmp0 = (double) *(int*)&fx0;
2751 
2752         fmuld   %f2,%f0,%f2             ! dtmp0 *= C2ONM149;
2753         ldd     [cmul_arr+%o1],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2754 
2755         fmuld   %f2,%f0,%f2             ! dtmp0 *= dsign;
2756 1:
2757         ba      .den0
2758         add     %o2,stridez,%o1         ! pz += stridez
2759 
2760         .align  16
2761 .update11:
2762         cmp     counter,2
2763         bg,pn   %icc,1f
2764         nop
2765 
2766         ld      [cmul_arr],%f2
2767         ba      .cont11
2768         fzero   %f0
2769 1:
2770         cmp     %o5,_0x7f800000         ! (4_0) b0 ? 0x7f800000
2771         bg,pt   %icc,1f
2772         nop
2773 2:
2774         sub     counter,2,counter
2775         st      counter,[%fp+tmp_counter]
2776         stx     %i1,[%fp+tmp_py]
2777         stx     %i3,[%fp+tmp_px]
2778 
2779         ld      [cmul_arr],%f2
2780         or      %g0,2,counter
2781         ba      .cont11
2782         fzero   %f0
2783 1:
2784         andcc   %l3,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2785         bne,pn  %icc,1f
2786         sethi   %hi(0x00800000),%o5
2787 
2788         andcc   %l4,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2789         be,pn   %icc,2b
2790         nop
2791 1:
2792         st      %f0,[%fp+tmp_pz]
2793         st      %f2,[%fp+tmp_pz+4]
2794         ld      [%fp+tmp_pz],%o4
2795         fmuld   %f40,%f4,%f34           ! (4_1) dtmp0 *= x20;
2796 
2797         stx     %l5,[%fp+tmp_px]
2798         and     %o4,_0x7fffffff,%l5     ! itmp0 & 0x7fffffff
2799         cmp     %l5,%o5
2800         bge,a   1f
2801         fstod   %f0,%f40                ! (0_0) y0 = (double)fy0;
2802 
2803         ldd     [cmul_arr+96],%f40      ! LOAD C2ONM149
2804         sra     %o4,28,%o4              ! itmp0 >>= 28;
2805         fabss   %f0,%f0                 ! fy0 = fabsf(fy0);
2806 
2807         and     %o4,-8,%o4              ! itmp0 = -8;
2808         fitod   %f0,%f0                 ! dtmp0 = (double) *(int*)&fy0;
2809 
2810         fmuld   %f0,%f40,%f40           ! dtmp0 *= C2ONM149;
2811         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2812 
2813         fmuld   %f0,%f40,%f40           ! dtmp0 *= dsign;
2814 1:
2815         faddd   %f20,K5,%f12            ! (3_1) dtmp0 += K5;
2816         add     %i1,stridey,%i1         ! py += stridey
2817         fmuld   %f22,%f16,%f22          ! (2_1) dtmp0 *= x20;
2818 
2819         ld      [%fp+tmp_pz+4],%o4
2820         and     %o4,_0x7fffffff,%l5     ! itmp0 & 0x7fffffff
2821         cmp     %l5,%o5
2822         bge,a   1f
2823         fstod   %f2,%f2                 ! (5_1) x0 = (double)fx0;
2824 
2825         ldd     [cmul_arr+96],%f0       ! LOAD C2ONM149
2826         sra     %o4,28,%o4              ! itmp0 >>= 28;
2827         fabss   %f2,%f2                 ! fx0 = fabsf(fx0);
2828 
2829         and     %o4,-8,%o4              ! itmp0 = -8;
2830         fitod   %f2,%f2                 ! dtmp0 = (double) *(int*)&fx0;
2831 
2832         fmuld   %f2,%f0,%f2             ! dtmp0 *= C2ONM149;
2833         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2834 
2835         fmuld   %f2,%f0,%f2             ! dtmp0 *= dsign;
2836 1:
2837         ldx     [%fp+tmp_px],%l5
2838         sra     %l3,27,%o5              ! (0_0) signx0 = ux0 >> 27;
2839         add     %i3,stridex,%i3         ! px += stridex
2840 
2841         lda     [%i1]0x82,%l3           ! (1_0) uy0 = *(int*)py;
2842         sra     %l4,28,%o4              ! (0_0) signy0 = uy0 >> 28;
2843         ba      .den1
2844         add     %l6,cadd_arr,%l6        ! (0_0) ltmp0 += (char*)cadd_arr;
2845 
2846         .align  16
2847 .update12:
2848         cmp     counter,3
2849         bg,pn   %icc,1f
2850         nop
2851 
2852         ld      [cmul_arr],%f2
2853         ba      .cont12
2854         fzero   %f0
2855 1:
2856         cmp     %o5,_0x7f800000         ! (4_0) b0 ? 0x7f800000
2857         bg,pt   %icc,1f
2858         nop
2859 2:
2860         sub     counter,3,counter
2861         st      counter,[%fp+tmp_counter]
2862         stx     %i1,[%fp+tmp_py]
2863         stx     %i3,[%fp+tmp_px]
2864 
2865         ld      [cmul_arr],%f2
2866         or      %g0,3,counter
2867         ba      .cont12
2868         fzero   %f0
2869 1:
2870         andcc   %l3,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2871         bne,pn  %icc,1f
2872         sethi   %hi(0x00800000),%o5
2873 
2874         andcc   %l4,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2875         be,pn   %icc,2b
2876         nop
2877 1:
2878         st      %f0,[%fp+tmp_pz]
2879         st      %f2,[%fp+tmp_pz+4]
2880         ld      [%fp+tmp_pz],%o4
2881         fmuld   %f40,%f22,%f32          ! (5_1) dtmp0 *= x20;
2882 
2883         stx     %l5,[%fp+tmp_px]
2884         and     %o4,_0x7fffffff,%l5     ! itmp0 & 0x7fffffff
2885         cmp     %l5,%o5
2886         bge,a   1f
2887         fstod   %f0,%f40                ! (0_0) y0 = (double)fy0;
2888 
2889         ldd     [cmul_arr+96],%f40      ! LOAD C2ONM149
2890         sra     %o4,28,%o4              ! itmp0 >>= 28;
2891         fabss   %f0,%f0                 ! fy0 = fabsf(fy0);
2892 
2893         and     %o4,-8,%o4              ! itmp0 = -8;
2894         fitod   %f0,%f0                 ! dtmp0 = (double) *(int*)&fy0;
2895 
2896         fmuld   %f0,%f40,%f40           ! dtmp0 *= C2ONM149;
2897         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2898 
2899         fmuld   %f0,%f40,%f40           ! dtmp0 *= dsign;
2900 1:
2901         faddd   %f18,K5,%f10            ! (4_1) dtmp0 += K5;
2902         add     %i3,stridex,%i3         ! px += stridex
2903         add     %i1,stridey,%i1         ! py += stridey
2904         fmuld   %f20,%f24,%f20          ! (3_1) dtmp0 *= x20;
2905 
2906         ld      [%fp+tmp_pz+4],%o4
2907         and     %o4,_0x7fffffff,%l5     ! itmp0 & 0x7fffffff
2908         cmp     %l5,%o5
2909         bge,a   1f
2910         fstod   %f2,%f2                 ! (5_1) x0 = (double)fx0;
2911 
2912         ldd     [cmul_arr+96],%f0       ! LOAD C2ONM149
2913         sra     %o4,28,%o4              ! itmp0 >>= 28;
2914         fabss   %f2,%f2                 ! fx0 = fabsf(fx0);
2915 
2916         and     %o4,-8,%o4              ! itmp0 = -8;
2917         fitod   %f2,%f2                 ! dtmp0 = (double) *(int*)&fx0;
2918 
2919         fmuld   %f2,%f0,%f2             ! dtmp0 *= C2ONM149;
2920         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2921 
2922         fmuld   %f2,%f0,%f2             ! dtmp0 *= dsign;
2923 1:
2924         ldx     [%fp+tmp_px],%l5
2925 
2926         sra     %l4,27,%o5              ! (1_0) signx0 = ux0 >> 27;
2927 
2928         sra     %l3,28,%o4              ! (1_0) signy0 = uy0 >> 28;
2929         ba      .den2
2930         add     %l6,cadd_arr,%l6        ! (1_0) ltmp0 += (char*)cadd_arr;
2931 
2932         .align  16
2933 .update13:
2934         cmp     counter,4
2935         bg,pn   %icc,1f
2936         nop
2937 
2938         ld      [cmul_arr],%f2
2939         ba      .cont13
2940         fzero   %f0
2941 1:
2942         cmp     %o5,_0x7f800000         ! (4_0) b0 ? 0x7f800000
2943         bg,pt   %icc,1f
2944         nop
2945 2:
2946         sub     counter,4,counter
2947         st      counter,[%fp+tmp_counter]
2948         stx     %i1,[%fp+tmp_py]
2949         sub     %i3,stridex,%o5
2950         stx     %o5,[%fp+tmp_px]
2951 
2952         ld      [cmul_arr],%f2
2953         or      %g0,4,counter
2954         ba      .cont13
2955         fzero   %f0
2956 1:
2957         andcc   %l3,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2958         bne,pn  %icc,1f
2959         sethi   %hi(0x00800000),%o5
2960 
2961         andcc   %l4,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
2962         be,pn   %icc,2b
2963         nop
2964 1:
2965         st      %f0,[%fp+tmp_pz]
2966         st      %f2,[%fp+tmp_pz+4]
2967         ld      [%fp+tmp_pz],%o4
2968         fmuld   %f40,%f20,%f30          ! (0_0) dtmp0 *= x20;
2969 
2970         and     %o4,_0x7fffffff,%l6     ! itmp0 & 0x7fffffff
2971         cmp     %l6,%o5
2972         bge,a   1f
2973         fstod   %f0,%f40                ! (0_0) y0 = (double)fy0;
2974 
2975         ldd     [cmul_arr+96],%f40      ! LOAD C2ONM149
2976         sra     %o4,28,%o4              ! itmp0 >>= 28;
2977         fabss   %f0,%f0                 ! fy0 = fabsf(fy0);
2978 
2979         and     %o4,-8,%o4              ! itmp0 = -8;
2980         fitod   %f0,%f0                 ! dtmp0 = (double) *(int*)&fy0;
2981 
2982         fmuld   %f0,%f40,%f40           ! dtmp0 *= C2ONM149;
2983         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
2984 
2985         fmuld   %f0,%f40,%f40           ! dtmp0 *= dsign;
2986 1:
2987         faddd   %f16,K5,%f8             ! (5_1) dtmp0 += K5;
2988         add     %i1,stridey,%i1         ! py += stridey
2989         fmuld   %f18,%f4,%f18           ! (4_1) dtmp0 *= x20;
2990 
2991         ld      [%fp+tmp_pz+4],%o4
2992         and     %o4,_0x7fffffff,%l6     ! itmp0 & 0x7fffffff
2993         cmp     %l6,%o5
2994         bge,a   1f
2995         fstod   %f2,%f2                 ! (5_1) x0 = (double)fx0;
2996 
2997         ldd     [cmul_arr+96],%f0       ! LOAD C2ONM149
2998         sra     %o4,28,%o4              ! itmp0 >>= 28;
2999         fabss   %f2,%f2                 ! fx0 = fabsf(fx0);
3000 
3001         and     %o4,-8,%o4              ! itmp0 = -8;
3002         fitod   %f2,%f2                 ! dtmp0 = (double) *(int*)&fx0;
3003 
3004         fmuld   %f2,%f0,%f2             ! dtmp0 *= C2ONM149;
3005         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
3006 
3007         fmuld   %f2,%f0,%f2             ! dtmp0 *= dsign;
3008 1:
3009         sll     %g5,5,%l6               ! (2_0) ltmp0 = ldiff0 << 5;
3010         sra     %l3,27,%o5              ! (2_0) signx0 = ux0 >> 27;
3011 
3012         sra     %l4,28,%o4              ! (2_0) signy0 = uy0 >> 28;
3013         ba      .den3
3014         add     %l6,cadd_arr,%l6        ! (2_0) ltmp0 += (char*)cadd_arr;
3015 
3016         .align  16
3017 .update14:
3018         cmp     counter,5
3019         bg,pn   %icc,1f
3020         nop
3021 
3022         ld      [cmul_arr],%f1
3023         ba      .cont14
3024         fzeros  %f0
3025 1:
3026         cmp     %o5,_0x7f800000         ! (4_0) b0 ? 0x7f800000
3027         bg,pt   %icc,1f
3028         nop
3029 2:
3030         sub     counter,5,counter
3031         st      counter,[%fp+tmp_counter]
3032         stx     %i1,[%fp+tmp_py]
3033         sub     %i3,stridex,%o5
3034         stx     %o5,[%fp+tmp_px]
3035 
3036         ld      [cmul_arr],%f1
3037         or      %g0,5,counter
3038         ba      .cont14
3039         fzeros  %f0
3040 1:
3041         andcc   %l3,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
3042         bne,pn  %icc,1f
3043         sethi   %hi(0x00800000),%o5
3044 
3045         andcc   %l4,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
3046         be,pn   %icc,2b
3047         nop
3048 1:
3049         std     %f0,[%fp+tmp_pz]
3050         ld      [%fp+tmp_pz],%o4
3051         fmuld   %f40,%f18,%f28          ! (1_0) dtmp0 *= x20;
3052 
3053         faddd   %f16,K5,%f2             ! (0_0) dtmp0 += K5;
3054 
3055         and     %o4,_0x7fffffff,%l6     ! itmp0 & 0x7fffffff
3056         cmp     %l6,%o5
3057         bge,a   1f
3058         fstod   %f0,%f40                ! (0_0) y0 = (double)fy0;
3059 
3060         ldd     [cmul_arr+96],%f40      ! LOAD C2ONM149
3061         sra     %o4,28,%o4              ! itmp0 >>= 28;
3062         fabss   %f0,%f0                 ! fy0 = fabsf(fy0);
3063 
3064         and     %o4,-8,%o4              ! itmp0 = -8;
3065         fitod   %f0,%f16                ! dtmp0 = (double) *(int*)&fy0;
3066 
3067         fmuld   %f16,%f40,%f40          ! dtmp0 *= C2ONM149;
3068         ldd     [cmul_arr+%o4],%f16     ! dsign = *(double*)((char*)cmul_arr + itmp0);
3069 
3070         fmuld   %f16,%f40,%f40          ! dtmp0 *= dsign;
3071 1:
3072         add     %i1,stridey,%i1         ! py += stridey
3073         fmuld   %f24,%f22,%f24          ! (5_1) dtmp0 *= x20;
3074 
3075         ld      [%fp+tmp_pz+4],%o4
3076         and     %o4,_0x7fffffff,%l6     ! itmp0 & 0x7fffffff
3077         cmp     %l6,%o5
3078         bge,a   1f
3079         fstod   %f1,%f16                ! (5_1) x0 = (double)fx0;
3080 
3081         fabss   %f1,%f16                ! fx0 = fabsf(fx0);
3082         ldd     [cmul_arr+96],%f0       ! LOAD C2ONM149
3083         sra     %o4,28,%o4              ! itmp0 >>= 28;
3084 
3085         and     %o4,-8,%o4              ! itmp0 = -8;
3086         fitod   %f16,%f16               ! dtmp0 = (double) *(int*)&fx0;
3087 
3088         fmuld   %f16,%f0,%f16           ! dtmp0 *= C2ONM149;
3089         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
3090 
3091         fmuld   %f16,%f0,%f16           ! dtmp0 *= dsign;
3092 1:
3093         sll     %o0,5,%l6               ! (3_0) ltmp0 = ldiff0 << 5;
3094         sra     %l4,27,%o5              ! (3_0) signx0 = ux0 >> 27;
3095 
3096         ba      .den4
3097         sra     %l3,28,%o4              ! (3_0) signy0 = uy0 >> 28;
3098 
3099         .align  16
3100 .update15:
3101         cmp     counter,6
3102         bg,pn   %icc,1f
3103         nop
3104 
3105         ld      [cmul_arr],%f2
3106         ba      .cont15
3107         fzero   %f0
3108 1:
3109         cmp     %o5,_0x7f800000         ! (4_0) b0 ? 0x7f800000
3110         bg,pt   %icc,1f
3111         nop
3112 2:
3113         sub     counter,6,counter
3114         st      counter,[%fp+tmp_counter]
3115         stx     %i1,[%fp+tmp_py]
3116         sub     %i3,stridex,%o5
3117         stx     %o5,[%fp+tmp_px]
3118 
3119         ld      [cmul_arr],%f2
3120         or      %g0,6,counter
3121         ba      .cont15
3122         fzero   %f0
3123 1:
3124         andcc   %l3,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
3125         bne,pn  %icc,1f
3126         sethi   %hi(0x00800000),%o5
3127 
3128         andcc   %l4,_0x7fffffff,%g0     ! itmp0 & 0x7fffffff
3129         be,pn   %icc,2b
3130         nop
3131 1:
3132         st      %f0,[%fp+tmp_pz]
3133         st      %f2,[%fp+tmp_pz+4]
3134         ld      [%fp+tmp_pz],%o4
3135         fmuld   %f40,%f16,%f26          ! (2_0) dtmp0 *= x20;
3136 
3137         and     %o4,_0x7fffffff,%l6     ! itmp0 & 0x7fffffff
3138         cmp     %l6,%o5
3139         bge,a   1f
3140         fstod   %f0,%f40                ! (0_0) y0 = (double)fy0;
3141 
3142         ldd     [cmul_arr+96],%f40      ! LOAD C2ONM149
3143         sra     %o4,28,%o4              ! itmp0 >>= 28;
3144         fabss   %f0,%f0                 ! fy0 = fabsf(fy0);
3145 
3146         and     %o4,-8,%o4              ! itmp0 = -8;
3147         fitod   %f0,%f0                 ! dtmp0 = (double) *(int*)&fy0;
3148 
3149         fmuld   %f0,%f40,%f40           ! dtmp0 *= C2ONM149;
3150         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
3151 
3152         fmuld   %f0,%f40,%f40           ! dtmp0 *= dsign;
3153 1:
3154         add     %i1,stridey,%i1         ! py += stridey
3155         faddd   %f4,K5,%f62             ! (1_0) dtmp0 += K5;
3156         fmuld   %f24,%f20,%f24          ! (0_0) dtmp0 *= x20;
3157 
3158         ld      [%fp+tmp_pz+4],%o4
3159         and     %o4,_0x7fffffff,%l6     ! itmp0 & 0x7fffffff
3160         cmp     %l6,%o5
3161         bge,a   1f
3162         fstod   %f2,%f2                 ! (5_1) x0 = (double)fx0;
3163 
3164         fabss   %f2,%f2                 ! fx0 = fabsf(fx0);
3165         ldd     [cmul_arr+96],%f0       ! LOAD C2ONM149
3166         sra     %o4,28,%o4              ! itmp0 >>= 28;
3167 
3168         and     %o4,-8,%o4              ! itmp0 = -8;
3169         fitod   %f2,%f2                 ! dtmp0 = (double) *(int*)&fx0;
3170 
3171         fmuld   %f2,%f0,%f2             ! dtmp0 *= C2ONM149;
3172         ldd     [cmul_arr+%o4],%f0      ! dsign = *(double*)((char*)cmul_arr + itmp0);
3173 
3174         fmuld   %f2,%f0,%f2             ! dtmp0 *= dsign;
3175 1:
3176         sll     %l5,5,%l6               ! (4_0) ltmp0 = ldiff0 << 5;
3177         sra     %l3,27,%o5              ! (4_0) signx0 = ux0 >> 27;
3178 
3179         ba      .den5
3180         sra     %l4,28,%o4              ! (4_0) signy0 = uy0 >> 28;
3181 
3182         .align  16
3183 .u0:
3184         ba      .c0
3185         or      %g0,_0x7fffffff,%o5
3186 .u1:
3187         ba      .c1
3188         or      %g0,_0x7fffffff,%o5
3189 .u2:
3190         ba      .c2
3191         or      %g0,_0x7f800000,%o5
3192 .u3:
3193         ba      .c3
3194         or      %g0,_0x7f800000,%o5
3195 .u4:
3196         ba      .c4
3197         or      %g0,_0x7fffffff,%o5
3198 .u5:
3199         ba      .c5
3200         or      %g0,_0x7fffffff,%o5
3201 .u6:
3202         ba      .c6
3203         or      %g0,_0x7f800000,%o5
3204 .u7:
3205         ba      .c7
3206         or      %g0,_0x7f800000,%o5
3207 .u8:
3208         ba      .c8
3209         or      %g0,_0x7fffffff,%o5
3210 .u9:
3211         ba      .c9
3212         or      %g0,_0x7fffffff,%o5
3213 .u10:
3214         ba      .c10
3215         or      %g0,_0x7f800000,%o5
3216 .u11:
3217         ba      .c11
3218         or      %g0,_0x7f800000,%o5
3219 .u12:
3220         ba      .c12
3221         or      %g0,_0x7fffffff,%o5
3222 .u13:
3223         ba      .c13
3224         or      %g0,_0x7fffffff,%o5
3225 .u14:
3226         ba      .c14
3227         or      %g0,_0x7f800000,%o5
3228 .u15:
3229         ba      .c15
3230         or      %g0,_0x7f800000,%o5
3231 .u16:
3232         ba      .c16
3233         or      %g0,_0x7fffffff,%o5
3234 .u17:
3235         ba      .c17
3236         or      %g0,_0x7fffffff,%o5
3237 .u18:
3238         ba      .c18
3239         or      %g0,_0x7f800000,%o5
3240 .u19:
3241         ba      .c19
3242         or      %g0,_0x7f800000,%o5
3243 .u20:
3244         ba      .c20
3245         or      %g0,_0x7fffffff,%o5
3246 .u21:
3247         ba      .c21
3248         or      %g0,_0x7fffffff,%o5
3249 .u22:
3250         ba      .c22
3251         or      %g0,_0x7f800000,%o5
3252 .u23:
3253         ba      .c23
3254         or      %g0,_0x7f800000,%o5
3255 .u24:
3256         ba      .c24
3257         or      %g0,_0x7fffffff,%o5
3258 .u25:
3259         ba      .c25
3260         or      %g0,_0x7fffffff,%o5
3261 .u26:
3262         ba      .c26
3263         or      %g0,_0x7f800000,%o5
3264 .u27:
3265         ba      .c27
3266         or      %g0,_0x7f800000,%o5
3267 .u28:
3268         ba      .c28
3269         or      %g0,_0x7fffffff,%o5
3270 .u29:
3271         ba      .c29
3272         or      %g0,_0x7fffffff,%o5
3273 .u30:
3274         ba      .c30
3275         or      %g0,_0x7f800000,%o5
3276 .u31:
3277         ba      .c31
3278         or      %g0,_0x7f800000,%o5
3279 .u32:
3280         ba      .c32
3281         or      %g0,_0x7fffffff,%o5
3282 .u33:
3283         ba      .c33
3284         or      %g0,_0x7fffffff,%o5
3285 .u34:
3286         ba      .c34
3287         or      %g0,_0x7f800000,%o5
3288 .u35:
3289         ba      .c35
3290         or      %g0,_0x7f800000,%o5
3291 .u36:
3292         ba      .c36
3293         or      %g0,_0x7fffffff,%o5
3294 .u37:
3295         ba      .c37
3296         or      %g0,_0x7fffffff,%o5
3297 .u38:
3298         ba      .c38
3299         or      %g0,_0x7f800000,%o5
3300 .u39:
3301         ba      .c39
3302         or      %g0,_0x7f800000,%o5
3303 .up0:
3304         ba      .co0
3305         or      %g0,_0x7fffffff,%o5
3306 .up1:
3307         ba      .co1
3308         or      %g0,_0x7fffffff,%o5
3309 .up2:
3310         ba      .co2
3311         or      %g0,_0x7f800000,%o5
3312 .up3:
3313         ba      .co3
3314         or      %g0,_0x7f800000,%o5
3315 .up4:
3316         ba      .co4
3317         or      %g0,_0x7fffffff,%o5
3318 .up5:
3319         ba      .co5
3320         or      %g0,_0x7fffffff,%o5
3321 .up6:
3322         ba      .co6
3323         or      %g0,_0x7f800000,%o5
3324 .up7:
3325         ba      .co7
3326         or      %g0,_0x7f800000,%o5
3327 .up8:
3328         ba      .co8
3329         or      %g0,_0x7fffffff,%o5
3330 .up9:
3331         ba      .co9
3332         or      %g0,_0x7fffffff,%o5
3333 .up10:
3334         ba      .co10
3335         or      %g0,_0x7f800000,%o5
3336 .up11:
3337         ba      .co11
3338         or      %g0,_0x7f800000,%o5
3339 .up12:
3340         ba      .co12
3341         or      %g0,_0x7fffffff,%o5
3342 .up13:
3343         ba      .co13
3344         or      %g0,_0x7fffffff,%o5
3345 .up14:
3346         ba      .co14
3347         or      %g0,_0x7f800000,%o5
3348 .up15:
3349         ba      .co15
3350         or      %g0,_0x7f800000,%o5
3351 .up16:
3352         ba      .co16
3353         or      %g0,_0x7fffffff,%o5
3354 .up17:
3355         ba      .co17
3356         or      %g0,_0x7fffffff,%o5
3357 .up18:
3358         ba      .co18
3359         or      %g0,_0x7f800000,%o5
3360 .up19:
3361         ba      .co19
3362         or      %g0,_0x7f800000,%o5
3363 .up20:
3364         ba      .co20
3365         or      %g0,_0x7fffffff,%o5
3366 .up21:
3367         ba      .co21
3368         or      %g0,_0x7fffffff,%o5
3369 .up22:
3370         ba      .co22
3371         or      %g0,_0x7f800000,%o5
3372 .up23:
3373         ba      .co23
3374         or      %g0,_0x7f800000,%o5
3375 .exit:
3376         ret
3377         restore
3378         SET_SIZE(__vatan2f)
3379