1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file   "__vatanf.S"
  30 
  31 #include "libm.h"
  32 
  33         RO_DATA
  34         .align  64
  35 
  36 .CONST_TBL:
  37         .word   0x3fefffff, 0xfffccbbc  ! K0 =  9.99999999976686608841e-01
  38         .word   0xbfd55554, 0x51c6b90f  ! K1 = -3.33333091601972730504e-01
  39         .word   0x3fc98d6d, 0x926596cc  ! K2 =  1.99628540499523379702e-01
  40         .word   0x00020000, 0x00000000  ! DC1
  41         .word   0xfffc0000, 0x00000000  ! DC2
  42         .word   0x7ff00000, 0x00000000  ! DC3
  43         .word   0x3ff00000, 0x00000000  ! DONE = 1.0
  44         .word   0x40000000, 0x00000000  ! DTWO = 2.0
  45 
  46 ! parr0 = *(int*)&(1.0 / *(double*)&(((long long)i << 45) | 0x3ff0100000000000ULL)) + 0x3ff00000, i = [0, 127]
  47 
  48         .word   0x7fdfe01f, 0x7fdfa11c, 0x7fdf6310, 0x7fdf25f6
  49         .word   0x7fdee9c7, 0x7fdeae80, 0x7fde741a, 0x7fde3a91
  50         .word   0x7fde01e0, 0x7fddca01, 0x7fdd92f2, 0x7fdd5cac
  51         .word   0x7fdd272c, 0x7fdcf26e, 0x7fdcbe6d, 0x7fdc8b26
  52         .word   0x7fdc5894, 0x7fdc26b5, 0x7fdbf583, 0x7fdbc4fd
  53         .word   0x7fdb951e, 0x7fdb65e2, 0x7fdb3748, 0x7fdb094b
  54         .word   0x7fdadbe8, 0x7fdaaf1d, 0x7fda82e6, 0x7fda5741
  55         .word   0x7fda2c2a, 0x7fda01a0, 0x7fd9d79f, 0x7fd9ae24
  56         .word   0x7fd9852f, 0x7fd95cbb, 0x7fd934c6, 0x7fd90d4f
  57         .word   0x7fd8e652, 0x7fd8bfce, 0x7fd899c0, 0x7fd87427
  58         .word   0x7fd84f00, 0x7fd82a4a, 0x7fd80601, 0x7fd7e225
  59         .word   0x7fd7beb3, 0x7fd79baa, 0x7fd77908, 0x7fd756ca
  60         .word   0x7fd734f0, 0x7fd71378, 0x7fd6f260, 0x7fd6d1a6
  61         .word   0x7fd6b149, 0x7fd69147, 0x7fd6719f, 0x7fd6524f
  62         .word   0x7fd63356, 0x7fd614b3, 0x7fd5f664, 0x7fd5d867
  63         .word   0x7fd5babc, 0x7fd59d61, 0x7fd58056, 0x7fd56397
  64         .word   0x7fd54725, 0x7fd52aff, 0x7fd50f22, 0x7fd4f38f
  65         .word   0x7fd4d843, 0x7fd4bd3e, 0x7fd4a27f, 0x7fd48805
  66         .word   0x7fd46dce, 0x7fd453d9, 0x7fd43a27, 0x7fd420b5
  67         .word   0x7fd40782, 0x7fd3ee8f, 0x7fd3d5d9, 0x7fd3bd60
  68         .word   0x7fd3a524, 0x7fd38d22, 0x7fd3755b, 0x7fd35dce
  69         .word   0x7fd34679, 0x7fd32f5c, 0x7fd31877, 0x7fd301c8
  70         .word   0x7fd2eb4e, 0x7fd2d50a, 0x7fd2bef9, 0x7fd2a91c
  71         .word   0x7fd29372, 0x7fd27dfa, 0x7fd268b3, 0x7fd2539d
  72         .word   0x7fd23eb7, 0x7fd22a01, 0x7fd21579, 0x7fd20120
  73         .word   0x7fd1ecf4, 0x7fd1d8f5, 0x7fd1c522, 0x7fd1b17c
  74         .word   0x7fd19e01, 0x7fd18ab0, 0x7fd1778a, 0x7fd1648d
  75         .word   0x7fd151b9, 0x7fd13f0e, 0x7fd12c8b, 0x7fd11a30
  76         .word   0x7fd107fb, 0x7fd0f5ed, 0x7fd0e406, 0x7fd0d244
  77         .word   0x7fd0c0a7, 0x7fd0af2f, 0x7fd09ddb, 0x7fd08cab
  78         .word   0x7fd07b9f, 0x7fd06ab5, 0x7fd059ee, 0x7fd04949
  79         .word   0x7fd038c6, 0x7fd02864, 0x7fd01824, 0x7fd00804
  80 
  81         .word   0x3ff00000, 0x00000000  !  1.0
  82         .word   0xbff00000, 0x00000000  ! -1.0
  83 
  84 ! parr1[i] = atan((double)*(float*)&((i + 460) << 21)), i = [0, 155]
  85 
  86         .word   0x3f2fffff, 0xf555555c, 0x3f33ffff, 0xf595555f
  87         .word   0x3f37ffff, 0xee000018, 0x3f3bffff, 0xe36aaadf
  88         .word   0x3f3fffff, 0xd55555bc, 0x3f43ffff, 0xd65555f2
  89         .word   0x3f47ffff, 0xb8000185, 0x3f4bffff, 0x8daaadf3
  90         .word   0x3f4fffff, 0x55555bbc, 0x3f53ffff, 0x59555f19
  91         .word   0x3f57fffe, 0xe000184d, 0x3f5bfffe, 0x36aadf30
  92         .word   0x3f5ffffd, 0x5555bbbc, 0x3f63fffd, 0x6555f195
  93         .word   0x3f67fffb, 0x800184cc, 0x3f6bfff8, 0xdaadf302
  94         .word   0x3f6ffff5, 0x555bbbb7, 0x3f73fff5, 0x955f194a
  95         .word   0x3f77ffee, 0x00184ca6, 0x3f7bffe3, 0x6adf2fd1
  96         .word   0x3f7fffd5, 0x55bbba97, 0x3f83ffd6, 0x55f1929c
  97         .word   0x3f87ffb8, 0x0184c30a, 0x3f8bff8d, 0xadf2e78c
  98         .word   0x3f8fff55, 0x5bbb729b, 0x3f93ff59, 0x5f18a700
  99         .word   0x3f97fee0, 0x184a5c36, 0x3f9bfe36, 0xdf291712
 100         .word   0x3f9ffd55, 0xbba97625, 0x3fa3fd65, 0xf169c9d9
 101         .word   0x3fa7fb81, 0x8430da2a, 0x3fabf8dd, 0xf139c444
 102         .word   0x3faff55b, 0xb72cfdea, 0x3fb3f59f, 0x0e7c559d
 103         .word   0x3fb7ee18, 0x2602f10f, 0x3fbbe39e, 0xbe6f07c4
 104         .word   0x3fbfd5ba, 0x9aac2f6e, 0x3fc3d6ee, 0xe8c6626c
 105         .word   0x3fc7b97b, 0x4bce5b02, 0x3fcb90d7, 0x529260a2
 106         .word   0x3fcf5b75, 0xf92c80dd, 0x3fd36277, 0x3707ebcc
 107         .word   0x3fd6f619, 0x41e4def1, 0x3fda64ee, 0xc3cc23fd
 108         .word   0x3fddac67, 0x0561bb4f, 0x3fe1e00b, 0xabdefeb4
 109         .word   0x3fe4978f, 0xa3269ee1, 0x3fe700a7, 0xc5784634
 110         .word   0x3fe921fb, 0x54442d18, 0x3fecac7c, 0x57846f9e
 111         .word   0x3fef730b, 0xd281f69b, 0x3ff0d38f, 0x2c5ba09f
 112         .word   0x3ff1b6e1, 0x92ebbe44, 0x3ff30b6d, 0x796a4da8
 113         .word   0x3ff3fc17, 0x6b7a8560, 0x3ff4ae10, 0xfc6589a5
 114         .word   0x3ff5368c, 0x951e9cfd, 0x3ff5f973, 0x15254857
 115         .word   0x3ff67d88, 0x63bc99bd, 0x3ff6dcc5, 0x7bb565fd
 116         .word   0x3ff7249f, 0xaa996a21, 0x3ff789bd, 0x2c160054
 117         .word   0x3ff7cd6f, 0x6dc59db4, 0x3ff7fde8, 0x0870c2a0
 118         .word   0x3ff82250, 0x768ac529, 0x3ff8555a, 0x2787981f
 119         .word   0x3ff87769, 0xeb8e956b, 0x3ff88fc2, 0x18ace9dc
 120         .word   0x3ff8a205, 0xfd558740, 0x3ff8bb9a, 0x63718f45
 121         .word   0x3ff8cca9, 0x27cf0b3d, 0x3ff8d8d8, 0xbf65316f
 122         .word   0x3ff8e1fc, 0xa98cb633, 0x3ff8eec8, 0xcfd00665
 123         .word   0x3ff8f751, 0x0eba96e6, 0x3ff8fd69, 0x4acf36b0
 124         .word   0x3ff901fb, 0x7eee715e, 0x3ff90861, 0xd082d9b5
 125         .word   0x3ff90ca6, 0x0b9322c5, 0x3ff90fb2, 0x37a7ea27
 126         .word   0x3ff911fb, 0x59997f3a, 0x3ff9152e, 0x8a326c38
 127         .word   0x3ff91750, 0xab2e0d12, 0x3ff918d6, 0xc2f9c9e2
 128         .word   0x3ff919fb, 0x54eed7a9, 0x3ff91b94, 0xee352849
 129         .word   0x3ff91ca5, 0xff216922, 0x3ff91d69, 0x0b3f72ff
 130         .word   0x3ff91dfb, 0x5459826d, 0x3ff91ec8, 0x211be619
 131         .word   0x3ff91f50, 0xa99fd49a, 0x3ff91fb2, 0x2fb5defa
 132         .word   0x3ff91ffb, 0x5446d7c3, 0x3ff92061, 0xbaabf105
 133         .word   0x3ff920a5, 0xfeefa208, 0x3ff920d6, 0xc1fb87e7
 134         .word   0x3ff920fb, 0x5444826e, 0x3ff9212e, 0x87778bfc
 135         .word   0x3ff92150, 0xa9999bb6, 0x3ff92169, 0x0b1faabb
 136         .word   0x3ff9217b, 0x544437c3, 0x3ff92194, 0xedddcc28
 137         .word   0x3ff921a5, 0xfeeedaec, 0x3ff921b2, 0x2fb1e5f1
 138         .word   0x3ff921bb, 0x54442e6e, 0x3ff921c8, 0x2110fa94
 139         .word   0x3ff921d0, 0xa99982d3, 0x3ff921d6, 0xc1fb08c6
 140         .word   0x3ff921db, 0x54442d43, 0x3ff921e1, 0xbaaa9395
 141         .word   0x3ff921e5, 0xfeeed7d0, 0x3ff921e9, 0x0b1f9ad7
 142         .word   0x3ff921eb, 0x54442d1e, 0x3ff921ee, 0x8777604e
 143         .word   0x3ff921f0, 0xa999826f, 0x3ff921f2, 0x2fb1e3f5
 144         .word   0x3ff921f3, 0x54442d19, 0x3ff921f4, 0xedddc6b2
 145         .word   0x3ff921f5, 0xfeeed7c3, 0x3ff921f6, 0xc1fb0886
 146         .word   0x3ff921f7, 0x54442d18, 0x3ff921f8, 0x2110f9e5
 147         .word   0x3ff921f8, 0xa999826e, 0x3ff921f9, 0x0b1f9acf
 148         .word   0x3ff921f9, 0x54442d18, 0x3ff921f9, 0xbaaa937f
 149         .word   0x3ff921f9, 0xfeeed7c3, 0x3ff921fa, 0x2fb1e3f4
 150         .word   0x3ff921fa, 0x54442d18, 0x3ff921fa, 0x8777604b
 151         .word   0x3ff921fa, 0xa999826e, 0x3ff921fa, 0xc1fb0886
 152         .word   0x3ff921fa, 0xd4442d18, 0x3ff921fa, 0xedddc6b2
 153         .word   0x3ff921fa, 0xfeeed7c3, 0x3ff921fb, 0x0b1f9acf
 154         .word   0x3ff921fb, 0x14442d18, 0x3ff921fb, 0x2110f9e5
 155         .word   0x3ff921fb, 0x2999826e, 0x3ff921fb, 0x2fb1e3f4
 156         .word   0x3ff921fb, 0x34442d18, 0x3ff921fb, 0x3aaa937f
 157         .word   0x3ff921fb, 0x3eeed7c3, 0x3ff921fb, 0x41fb0886
 158         .word   0x3ff921fb, 0x44442d18, 0x3ff921fb, 0x4777604b
 159         .word   0x3ff921fb, 0x4999826e, 0x3ff921fb, 0x4b1f9acf
 160         .word   0x3ff921fb, 0x4c442d18, 0x3ff921fb, 0x4dddc6b2
 161         .word   0x3ff921fb, 0x4eeed7c3, 0x3ff921fb, 0x4fb1e3f4
 162         .word   0x3ff921fb, 0x50442d18, 0x3ff921fb, 0x5110f9e5
 163         .word   0x3ff921fb, 0x5199826e, 0x3ff921fb, 0x51fb0886
 164 
 165 #define DC2             %f2
 166 #define DTWO            %f6
 167 #define DONE            %f52
 168 #define K0              %f54
 169 #define K1              %f56
 170 #define K2              %f58
 171 #define DC1             %f60
 172 #define DC3             %f62
 173 
 174 #define stridex         %o2
 175 #define stridey         %o3
 176 #define MASK_0x7fffffff %i1
 177 #define MASK_0x100000   %i5
 178 
 179 #define tmp_px          STACK_BIAS-32
 180 #define tmp_counter     STACK_BIAS-24
 181 #define tmp0            STACK_BIAS-16
 182 #define tmp1            STACK_BIAS-8
 183 
 184 #define counter         %l1
 185 
 186 ! sizeof temp storage - must be a multiple of 16 for V9
 187 #define tmps            0x20
 188 
 189 !--------------------------------------------------------------------
 190 !               !!!!!   vatanf algorithm        !!!!!
 191 !  ux = ((int*)px)[0];
 192 !  ax = ux & 0x7fffffff;
 193 !
 194 !  if ( ax < 0x39b89c55 )
 195 !  {
 196 !    *(int*)py = ux;
 197 !    goto next;
 198 !  }
 199 !
 200 !  if ( ax > 0x4c700518 )
 201 !  {
 202 !    if ( ax > 0x7f800000 )
 203 !    {
 204 !      float fpx = fabsf(*px);
 205 !      fpx *= fpx;
 206 !      *py = fpx;
 207 !      goto next;
 208 !    }
 209 !
 210 !    sign = ux & 0x80000000;
 211 !    sign |= pi_2;
 212 !    *(int*)py = sign;
 213 !    goto next;
 214 !  }
 215 !
 216 !  ftmp0 = *px;
 217 !  x = (double)ftmp0;
 218 !  px += stridex;
 219 !  y = vis_fpadd32(x,DC1);
 220 !  y = vis_fand(y,DC2);
 221 !  div = x * y;
 222 !  xx = x - y;
 223 !  div += DONE;
 224 !  i = ((unsigned long long*)&div)[0];
 225 !  y0 = vis_fand(div,DC3);
 226 !  i >>= 43;
 227 !  i &= 508;
 228 !  *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
 229 !  y0 = vis_fpsub32(dtmp0, y0);
 230 !  dtmp0 = div0 * y0;
 231 !  dtmp0 = DTWO - dtmp0;
 232 !  y0 *= dtmp0;
 233 !  dtmp1 = div0 * y0;
 234 !  dtmp1 = DTWO - dtmp1;
 235 !  y0 *= dtmp1;
 236 !  ax = ux & 0x7fffffff;
 237 !  ax += 0x00100000;
 238 !  ax >>= 18;
 239 !  ax &= -8;
 240 !  res = *(double*)((char*)parr1 + ax);
 241 !  ux >>= 28;
 242 !  ux &= -8;
 243 !  dtmp0 = *(double*)((char*)sign_arr + ux);
 244 !  res *= dtmp0;
 245 !  xx *= y0;
 246 !  x2 = xx * xx;
 247 !  dtmp0 = K2 * x2;
 248 !  dtmp0 += K1;
 249 !  dtmp0 *= x2;
 250 !  dtmp0 += K0;
 251 !  dtmp0 *= xx;
 252 !  res += dtmp0;
 253 !  ftmp0 = (float)res;
 254 !  py[0] = ftmp0;
 255 !  py += stridey;
 256 !--------------------------------------------------------------------
 257 
 258         ENTRY(__vatanf)
 259         save    %sp,-SA(MINFRAME)-tmps,%sp
 260         PIC_SETUP(l7)
 261         PIC_SET(l7,.CONST_TBL,l2)
 262 
 263         st      %i0,[%fp+tmp_counter]
 264 
 265         sllx    %i2,2,stridex
 266         sllx    %i4,2,stridey
 267 
 268         or      %g0,%i3,%o1
 269         stx     %i1,[%fp+tmp_px]
 270 
 271         ldd     [%l2],K0
 272         ldd     [%l2+8],K1
 273         ldd     [%l2+16],K2
 274         ldd     [%l2+24],DC1
 275         ldd     [%l2+32],DC2
 276         ldd     [%l2+40],DC3
 277         ldd     [%l2+48],DONE
 278         ldd     [%l2+56],DTWO
 279 
 280         add     %l2,64,%i4
 281         add     %l2,64+512,%l0
 282         add     %l2,64+512+16-0x1cc*8,%l7
 283 
 284         sethi   %hi(0x100000),MASK_0x100000
 285         sethi   %hi(0x7ffffc00),MASK_0x7fffffff
 286         add     MASK_0x7fffffff,1023,MASK_0x7fffffff
 287 
 288         sethi   %hi(0x39b89c00),%o4
 289         add     %o4,0x55,%o4
 290         sethi   %hi(0x4c700400),%o5
 291         add     %o5,0x118,%o5
 292 
 293 .begin:
 294         ld      [%fp+tmp_counter],counter
 295         ldx     [%fp+tmp_px],%i3
 296         st      %g0,[%fp+tmp_counter]
 297 .begin1:
 298         cmp     counter,0
 299         ble,pn  %icc,.exit
 300         nop
 301 
 302         lda     [%i3]0x82,%l6           ! (0_0) ux = ((int*)px)[0];
 303 
 304         and     %l6,MASK_0x7fffffff,%l5 ! (0_0) ax = ux & 0x7fffffff;
 305         lda     [%i3]0x82,%f0           ! (0_0) ftmp0 = *px;
 306 
 307         cmp     %l5,%o4                 ! (0_0) ax ? 0x39b89c55
 308         bl,pn   %icc,.spec0             ! (0_0) if ( ax < 0x39b89c55 )
 309         nop
 310 
 311         cmp     %l5,%o5                 ! (0_0) ax ? 0x4c700518
 312         bg,pn   %icc,.spec1             ! (0_0) if ( ax > 0x4c700518 )
 313         nop
 314 
 315         add     %i3,stridex,%l5         ! px += stridex;
 316         fstod   %f0,%f22                ! (0_0) ftmp0 = *px;
 317         mov     %l6,%i3
 318 
 319         lda     [%l5]0x82,%l6           ! (1_0) ux = ((int*)px)[0];
 320 
 321         and     %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff;
 322         lda     [%l5]0x82,%f0           ! (1_0) ftmp0 = *px;
 323         add     %l5,stridex,%l4         ! px += stridex;
 324         fpadd32 %f22,DC1,%f24           ! (0_0) y = vis_fpadd32(x,dconst1);
 325 
 326         cmp     %o7,%o4                 ! (1_0) ax ? 0x39b89c55
 327         bl,pn   %icc,.update0           ! (1_0) if ( ax < 0x39b89c55 )
 328         nop
 329 .cont0:
 330         cmp     %o7,%o5                 ! (1_0) ax ? 0x4c700518
 331         bg,pn   %icc,.update1           ! (1_0) if ( ax > 0x4c700518 )
 332         nop
 333 .cont1:
 334         fstod   %f0,%f20                ! (1_0) x = (double)ftmp0;
 335         mov     %l6,%l5
 336 
 337         fand    %f24,DC2,%f26           ! (0_0) y = vis_fand(y,dconst2);
 338 
 339         fmuld   %f22,%f26,%f32          ! (0_0) div = x * y;
 340 
 341         lda     [%l4]0x82,%l6           ! (2_0) ux = ((int*)px)[0];
 342         fsubd   %f22,%f26,%f22          ! (0_0) xx = x - y;
 343 
 344         and     %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff;
 345         lda     [%l4]0x82,%f0           ! (2_0) ftmp0 = *px;
 346         add     %l4,stridex,%l3         ! px += stridex;
 347         fpadd32 %f20,DC1,%f24           ! (1_0) y = vis_fpadd32(x,dconst1);
 348 
 349         cmp     %o7,%o4                 ! (2_0) ax ? 0x39b89c55
 350         bl,pn   %icc,.update2           ! (2_0) if ( ax < 0x39b89c55 )
 351         faddd   DONE,%f32,%f32          ! (0_0) div += done;
 352 .cont2:
 353         cmp     %o7,%o5                 ! (2_0) ax ? 0x4c700518
 354         bg,pn   %icc,.update3           ! (2_0) if ( ax > 0x4c700518 )
 355         nop
 356 .cont3:
 357         std     %f32,[%fp+tmp0]         ! (0_0) i = ((unsigned long long*)&div)[0];
 358         mov     %l6,%l4
 359         fstod   %f0,%f18                ! (2_0) x = (double)ftmp0;
 360 
 361         fand    %f24,DC2,%f26           ! (1_0) y = vis_fand(y,dconst2);
 362 
 363         fmuld   %f20,%f26,%f30          ! (1_0) div = x * y;
 364 
 365         lda     [%l3]0x82,%l6           ! (3_0) ux = ((int*)px)[0];
 366         fsubd   %f20,%f26,%f20          ! (1_0) xx = x - y;
 367 
 368         and     %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff;
 369         lda     [%l3]0x82,%f0           ! (3_0) ftmp0 = *px;
 370         add     %l3,stridex,%i0         ! px += stridex;
 371         fpadd32 %f18,DC1,%f24           ! (2_0) y = vis_fpadd32(x,dconst1);
 372 
 373         cmp     %o7,%o4                 ! (3_0) ax ? 0x39b89c55
 374         bl,pn   %icc,.update4           ! (3_0) if ( ax < 0x39b89c55 )
 375         faddd   DONE,%f30,%f30          ! (1_0) div += done;
 376 .cont4:
 377         cmp     %o7,%o5                 ! (3_0) ax ? 0x4c700518
 378         bg,pn   %icc,.update5           ! (3_0) if ( ax > 0x4c700518 )
 379         nop
 380 .cont5:
 381         std     %f30,[%fp+tmp1]         ! (1_0) i = ((unsigned long long*)&div)[0];
 382         mov     %l6,%l3
 383         fstod   %f0,%f16                ! (3_0) x = (double)ftmp0;
 384 
 385         ldx     [%fp+tmp0],%o0          ! (0_0) i = ((unsigned long long*)&div)[0];
 386         fand    %f24,DC2,%f26           ! (2_0) y = vis_fand(y,dconst2);
 387 
 388         fand    %f32,DC3,%f24           ! (0_0) y0 = vis_fand(div,dconst3);
 389 
 390         srlx    %o0,43,%o0              ! (0_0) i >>= 43;
 391 
 392         and     %o0,508,%l6             ! (0_0) i &= 508;
 393 
 394         ld      [%i4+%l6],%f0           ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
 395 
 396         fmuld   %f18,%f26,%f28          ! (2_0) div = x * y;
 397 
 398         lda     [%i0]0x82,%l6           ! (4_0) ux = ((int*)px)[0];
 399         fsubd   %f18,%f26,%f18          ! (2_0) xx = x - y;
 400 
 401         fpsub32 %f0,%f24,%f40           ! (0_0) y0 = vis_fpsub32(dtmp0, y0);
 402 
 403         and     %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff;
 404         lda     [%i0]0x82,%f0           ! (4_0) ftmp0 = *px;
 405         add     %i0,stridex,%i2         ! px += stridex;
 406         fpadd32 %f16,DC1,%f24           ! (3_0) y = vis_fpadd32(x,dconst1);
 407 
 408         cmp     %o7,%o4                 ! (4_0) ax ? 0x39b89c55
 409         bl,pn   %icc,.update6           ! (4_0) if ( ax < 0x39b89c55 )
 410         faddd   DONE,%f28,%f28          ! (2_0) div += done;
 411 .cont6:
 412         fmuld   %f32,%f40,%f42          ! (0_0) dtmp0 = div0 * y0;
 413         cmp     %o7,%o5                 ! (4_0) ax ? 0x4c700518
 414         bg,pn   %icc,.update7           ! (4_0) if ( ax > 0x4c700518 )
 415         nop
 416 .cont7:
 417         std     %f28,[%fp+tmp0]         ! (2_0) i = ((unsigned long long*)&div)[0];
 418         mov     %l6,%i0
 419         fstod   %f0,%f14                ! (4_0) x = (double)ftmp0;
 420 
 421         ldx     [%fp+tmp1],%g1          ! (1_0) i = ((unsigned long long*)&div)[0];
 422         fand    %f24,DC2,%f26           ! (3_0) y = vis_fand(y,dconst2);
 423 
 424         fand    %f30,DC3,%f24           ! (1_0) y0 = vis_fand(div,dconst3);
 425 
 426         fsubd   DTWO,%f42,%f44          ! (0_0) dtmp0 = dtwo - dtmp0;
 427         srlx    %g1,43,%g1              ! (1_0) i >>= 43;
 428 
 429         and     %g1,508,%l6             ! (1_0) i &= 508;
 430 
 431         ld      [%i4+%l6],%f0           ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
 432 
 433         fmuld   %f16,%f26,%f34          ! (3_0) div = x * y;
 434 
 435         lda     [%i2]0x82,%l6           ! (5_0) ux = ((int*)px)[0];
 436         fsubd   %f16,%f26,%f16          ! (3_0) xx = x - y;
 437 
 438         fpsub32 %f0,%f24,%f38           ! (1_0) y0 = vis_fpsub32(dtmp0, y0);
 439         add     %i2,stridex,%l2         ! px += stridex;
 440 
 441         fmuld   %f40,%f44,%f40          ! (0_0) y0 *= dtmp0;
 442         and     %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff;
 443         lda     [%i2]0x82,%f0           ! (5_0) ftmp0 = *px;
 444         fpadd32 %f14,DC1,%f24           ! (4_0) y = vis_fpadd32(x,dconst1);
 445 
 446         cmp     %o7,%o4                 ! (5_0) ax ? 0x39b89c55
 447         bl,pn   %icc,.update8           ! (5_0) if ( ax < 0x39b89c55 )
 448         faddd   DONE,%f34,%f34          ! (3_0) div += done;
 449 .cont8:
 450         fmuld   %f30,%f38,%f42          ! (1_0) dtmp0 = div0 * y0;
 451         cmp     %o7,%o5                 ! (5_0) ax ? 0x4c700518
 452         bg,pn   %icc,.update9           ! (5_0) if ( ax > 0x4c700518 )
 453         nop
 454 .cont9:
 455         std     %f34,[%fp+tmp1]         ! (3_0) i = ((unsigned long long*)&div)[0];
 456         mov     %l6,%i2
 457         fstod   %f0,%f36                ! (5_0) x = (double)ftmp0;
 458 
 459         fmuld   %f32,%f40,%f32          ! (0_0) dtmp1 = div0 * y0;
 460         ldx     [%fp+tmp0],%o0          ! (2_0) i = ((unsigned long long*)&div)[0];
 461         fand    %f24,DC2,%f26           ! (4_0) y = vis_fand(y,dconst2);
 462 
 463         fand    %f28,DC3,%f24           ! (2_0) y0 = vis_fand(div,dconst3);
 464 
 465         fsubd   DTWO,%f42,%f44          ! (1_0) dtmp0 = dtwo - dtmp0;
 466         srlx    %o0,43,%o0              ! (2_0) i >>= 43;
 467 
 468         and     %o0,508,%l6             ! (2_0) i &= 508;
 469         fsubd   DTWO,%f32,%f46          ! (0_0) dtmp1 = dtwo - dtmp1;
 470 
 471         ld      [%i4+%l6],%f0           ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
 472 
 473         fmuld   %f14,%f26,%f32          ! (4_0) div = x * y;
 474 
 475         lda     [%l2]0x82,%l6           ! (6_0) ux = ((int*)px)[0];
 476         fsubd   %f14,%f26,%f14          ! (4_0) xx = x - y;
 477 
 478         fmuld   %f40,%f46,%f26          ! (0_0) y0 *= dtmp1;
 479         add     %l2,stridex,%g5         ! px += stridex;
 480         fpsub32 %f0,%f24,%f40           ! (2_0) y0 = vis_fpsub32(dtmp0, y0);
 481 
 482         fmuld   %f38,%f44,%f38          ! (1_0) y0 *= dtmp0;
 483         and     %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff;
 484         lda     [%l2]0x82,%f0           ! (6_0) ftmp0 = *px;
 485         fpadd32 %f36,DC1,%f24           ! (5_0) y = vis_fpadd32(x,dconst1);
 486 
 487         cmp     %o7,%o4                 ! (6_0) ax ? 0x39b89c55
 488         bl,pn   %icc,.update10          ! (6_0) if ( ax < 0x39b89c55 )
 489         faddd   DONE,%f32,%f32          ! (4_0) div += done;
 490 .cont10:
 491         fmuld   %f28,%f40,%f42          ! (2_0) dtmp0 = div0 * y0;
 492         cmp     %o7,%o5                 ! (6_0) ax ? 0x4c700518
 493         bg,pn   %icc,.update11          ! (6_0) if ( ax > 0x4c700518 )
 494         nop
 495 .cont11:
 496         fmuld   %f22,%f26,%f22          ! (0_0) xx *= y0;
 497         mov     %l6,%l2
 498         std     %f32,[%fp+tmp0]         ! (4_0) i = ((unsigned long long*)&div)[0];
 499         fstod   %f0,%f10                ! (6_0) x = (double)ftmp0;
 500 
 501         fmuld   %f30,%f38,%f30          ! (1_0) dtmp1 = div0 * y0;
 502         ldx     [%fp+tmp1],%g1          ! (3_0) i = ((unsigned long long*)&div)[0];
 503         fand    %f24,DC2,%f26           ! (5_0) y = vis_fand(y,dconst2);
 504 
 505         fand    %f34,DC3,%f24           ! (3_0) y0 = vis_fand(div,dconst3);
 506 
 507         fmuld   %f22,%f22,%f50          ! (0_0) x2 = xx * xx;
 508         srlx    %g1,43,%g1              ! (3_0) i >>= 43;
 509         fsubd   DTWO,%f42,%f44          ! (2_0) dtmp0 = dtwo - dtmp0;
 510 
 511         and     %g1,508,%l6             ! (3_0) i &= 508;
 512         mov     %i3,%o7
 513         fsubd   DTWO,%f30,%f46          ! (1_0) dtmp1 = dtwo - dtmp1;
 514 
 515         ld      [%i4+%l6],%f0           ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
 516 
 517         fmuld   %f36,%f26,%f30          ! (5_0) div = x * y;
 518         srl     %o7,28,%g1              ! (0_0) ux >>= 28;
 519         add     %g5,stridex,%i3         ! px += stridex;
 520 
 521         fmuld   K2,%f50,%f4             ! (0_0) dtmp0 = K2 * x2;
 522         and     %o7,MASK_0x7fffffff,%o0 ! (0_0) ax = ux & 0x7fffffff;
 523         lda     [%g5]0x82,%l6           ! (7_0) ux = ((int*)px)[0];
 524         fsubd   %f36,%f26,%f36          ! (5_0) xx = x - y;
 525 
 526         fmuld   %f38,%f46,%f26          ! (1_0) y0 *= dtmp1;
 527         add     %o0,MASK_0x100000,%o0   ! (0_0) ax += 0x00100000;
 528         and     %g1,-8,%g1              ! (0_0) ux &= -8;
 529         fpsub32 %f0,%f24,%f38           ! (3_0) y0 = vis_fpsub32(dtmp0, y0);
 530 
 531         fmuld   %f40,%f44,%f40          ! (2_0) y0 *= dtmp0;
 532         and     %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff;
 533         lda     [%g5]0x82,%f0           ! (7_0) ftmp0 = *px;
 534         fpadd32 %f10,DC1,%f24           ! (6_0) y = vis_fpadd32(x,dconst1);
 535 
 536         cmp     %o7,%o4                 ! (7_0) ax ? 0x39b89c55
 537         bl,pn   %icc,.update12          ! (7_0) if ( ax < 0x39b89c55 )
 538         faddd   DONE,%f30,%f30          ! (5_0) div += done;
 539 .cont12:
 540         fmuld   %f34,%f38,%f42          ! (3_0) dtmp0 = div0 * y0;
 541         cmp     %o7,%o5                 ! (7_0) ax ? 0x4c700518
 542         bg,pn   %icc,.update13          ! (7_0) if ( ax > 0x4c700518 )
 543         faddd   %f4,K1,%f4              ! (0_0) dtmp0 += K1;
 544 .cont13:
 545         fmuld   %f20,%f26,%f20          ! (1_0) xx *= y0;
 546         srl     %o0,18,%o7              ! (0_0) ax >>= 18;
 547         std     %f30,[%fp+tmp1]         ! (5_0) i = ((unsigned long long*)&div)[0];
 548         fstod   %f0,%f8                 ! (7_0) x = (double)ftmp0;
 549 
 550         fmuld   %f28,%f40,%f28          ! (2_0) dtmp1 = div0 * y0;
 551         and     %o7,-8,%o7              ! (0_0) ux &= -8;
 552         ldx     [%fp+tmp0],%o0          ! (4_0) i = ((unsigned long long*)&div)[0];
 553         fand    %f24,DC2,%f26           ! (6_0) y = vis_fand(y,dconst2);
 554 
 555         add     %o7,%l7,%o7             ! (0_0) (char*)parr1 + ax;
 556         mov     %l6,%g5
 557         ldd     [%l0+%g1],%f48          ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux);
 558 
 559         fmuld   %f4,%f50,%f4            ! (0_0) dtmp0 *= x2;
 560         srlx    %o0,43,%o0              ! (4_0) i >>= 43;
 561         ldd     [%o7],%f0               ! (0_0) res = *(double*)((char*)parr1 + ax);
 562         fand    %f32,DC3,%f24           ! (4_0) y0 = vis_fand(div,dconst3);
 563 
 564         fmuld   %f20,%f20,%f50          ! (1_0) x2 = xx * xx;
 565         and     %o0,508,%l6             ! (4_0) i &= 508;
 566         mov     %l5,%o7
 567         fsubd   DTWO,%f42,%f44          ! (3_0) dtmp0 = dtwo - dtmp0;
 568 
 569         fsubd   DTWO,%f28,%f46          ! (2_0) dtmp1 = dtwo - dtmp1;
 570 
 571         fmuld   %f0,%f48,%f48           ! (0_0) res *= dtmp0;
 572         srl     %o7,28,%l5              ! (1_0) ux >>= 28;
 573         ld      [%i4+%l6],%f0           ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
 574 
 575         fmuld   %f10,%f26,%f28          ! (6_0) div = x * y;
 576         faddd   %f4,K0,%f42             ! (0_0) dtmp0 += K0;
 577 
 578         subcc   counter,8,counter
 579         bneg,pn %icc,.tail
 580         or      %g0,%o1,%o0
 581 
 582         add     %fp,tmp0,%g1
 583         lda     [%i3]0x82,%l6           ! (0_0) ux = ((int*)px)[0];
 584 
 585         ba      .main_loop
 586         add     %i3,stridex,%l5         ! px += stridex;
 587 
 588         .align  16
 589 .main_loop:
 590         fsubd   %f10,%f26,%f10          ! (6_1) xx = x - y;
 591         and     %o7,MASK_0x7fffffff,%o1 ! (1_1) ax = ux & 0x7fffffff;
 592         st      %f12,[%g1]              ! (7_1) py[0] = ftmp0;
 593         fmuld   K2,%f50,%f4             ! (1_1) dtmp0 = K2 * x2;
 594 
 595         fmuld   %f40,%f46,%f26          ! (2_1) y0 *= dtmp1;
 596         srl     %o7,28,%o7              ! (1_0) ux >>= 28;
 597         add     %o1,MASK_0x100000,%g1   ! (1_1) ax += 0x00100000;
 598         fpsub32 %f0,%f24,%f40           ! (4_1) y0 = vis_fpsub32(dtmp0, y0);
 599 
 600         fmuld   %f38,%f44,%f38          ! (3_1) y0 *= dtmp0;
 601         and     %l6,MASK_0x7fffffff,%o1 ! (0_0) ax = ux & 0x7fffffff;
 602         lda     [%i3]0x82,%f0           ! (0_0) ftmp0 = *px;
 603         fpadd32 %f8,DC1,%f24            ! (7_1) y = vis_fpadd32(x,dconst1);
 604 
 605         fmuld   %f42,%f22,%f44          ! (0_1) dtmp0 *= xx;
 606         cmp     %o1,%o4                 ! (0_0) ax ? 0x39b89c55
 607         bl,pn   %icc,.update14          ! (0_0) if ( ax < 0x39b89c55 )
 608         faddd   DONE,%f28,%f28          ! (6_1) div += done;
 609 .cont14:
 610         fmuld   %f32,%f40,%f42          ! (4_1) dtmp0 = div0 * y0;
 611         cmp     %o1,%o5                 ! (0_0) ax ? 0x4c700518
 612         bg,pn   %icc,.update15          ! (0_0) if ( ax > 0x4c700518 )
 613         faddd   %f4,K1,%f4              ! (1_1) dtmp0 += K1;
 614 .cont15:
 615         fmuld   %f18,%f26,%f18          ! (2_1) xx *= y0;
 616         srl     %g1,18,%o1              ! (1_1) ax >>= 18;
 617         std     %f28,[%fp+tmp0]         ! (6_1) i = ((unsigned long long*)&div)[0];
 618         fstod   %f0,%f22                ! (0_0) ftmp0 = *px;
 619 
 620         fmuld   %f34,%f38,%f34          ! (3_1) dtmp1 = div0 * y0;
 621         and     %o1,-8,%o1              ! (1_1) ax &= -8;
 622         ldx     [%fp+tmp1],%g1          ! (5_1) i = ((unsigned long long*)&div)[0];
 623         fand    %f24,DC2,%f26           ! (7_1) y = vis_fand(y,dconst2);
 624 
 625         ldd     [%o1+%l7],%f0           ! (1_1) res = *(double*)((char*)parr1 + ax);
 626         and     %o7,-8,%o7              ! (1_1) ux &= -8;
 627         mov     %l6,%i3
 628         faddd   %f48,%f44,%f12          ! (0_1) res += dtmp0;
 629 
 630         fmuld   %f4,%f50,%f4            ! (1_1) dtmp0 *= x2;
 631         nop
 632         ldd     [%l0+%o7],%f48          ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux);
 633         fand    %f30,DC3,%f24           ! (5_1) y0 = vis_fand(div,dconst3);
 634 
 635         fmuld   %f18,%f18,%f50          ! (2_1) x2 = xx * xx;
 636         srlx    %g1,43,%g1              ! (5_1) i >>= 43;
 637         mov     %l4,%o7
 638         fsubd   DTWO,%f42,%f44          ! (4_1) dtmp0 = dtwo - dtmp0;
 639 
 640         and     %g1,508,%l6             ! (5_1) i &= 508;
 641         nop
 642         bn,pn   %icc,.exit
 643         fsubd   DTWO,%f34,%f46          ! (3_1) dtmp1 = dtwo - dtmp1;
 644 
 645         fmuld   %f0,%f48,%f48           ! (1_1) res *= dtmp0;
 646         add     %o0,stridey,%g1         ! py += stridey;
 647         ld      [%i4+%l6],%f0           ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
 648         fdtos   %f12,%f12               ! (0_1) ftmp0 = (float)res;
 649 
 650         fmuld   %f8,%f26,%f34           ! (7_1) div = x * y;
 651         srl     %o7,28,%o1              ! (2_1) ux >>= 28;
 652         lda     [%l5]0x82,%l6           ! (1_0) ux = ((int*)px)[0];
 653         faddd   %f4,K0,%f42             ! (1_1) dtmp0 += K0;
 654 
 655         fmuld   K2,%f50,%f4             ! (2_1) dtmp0 = K2 * x2;
 656         and     %o7,MASK_0x7fffffff,%o7 ! (2_1) ax = ux & 0x7fffffff;
 657         st      %f12,[%o0]              ! (0_1) py[0] = ftmp0;
 658         fsubd   %f8,%f26,%f8            ! (7_1) xx = x - y;
 659 
 660         fmuld   %f38,%f46,%f26          ! (3_1) y0 *= dtmp1;
 661         add     %l5,stridex,%l4         ! px += stridex;
 662         add     %o7,MASK_0x100000,%o0   ! (2_1) ax += 0x00100000;
 663         fpsub32 %f0,%f24,%f38           ! (5_1) y0 = vis_fpsub32(dtmp0, y0);
 664 
 665         fmuld   %f40,%f44,%f40          ! (4_1) y0 *= dtmp0;
 666         and     %l6,MASK_0x7fffffff,%o7 ! (1_0) ax = ux & 0x7fffffff;
 667         lda     [%l5]0x82,%f0           ! (1_0) ftmp0 = *px;
 668         fpadd32 %f22,DC1,%f24           ! (0_0) y = vis_fpadd32(x,dconst1);
 669 
 670         fmuld   %f42,%f20,%f44          ! (1_1) dtmp0 *= xx;
 671         cmp     %o7,%o4                 ! (1_0) ax ? 0x39b89c55
 672         bl,pn   %icc,.update16          ! (1_0) if ( ax < 0x39b89c55 )
 673         faddd   DONE,%f34,%f34          ! (7_1) div += done;
 674 .cont16:
 675         fmuld   %f30,%f38,%f42          ! (5_1) dtmp0 = div0 * y0;
 676         cmp     %o7,%o5                 ! (1_0) ax ? 0x4c700518
 677         bg,pn   %icc,.update17          ! (1_0) if ( ax > 0x4c700518 )
 678         faddd   %f4,K1,%f4              ! (2_1) dtmp0 += K1;
 679 .cont17:
 680         fmuld   %f16,%f26,%f16          ! (3_1) xx *= y0;
 681         srl     %o0,18,%o7              ! (2_1) ax >>= 18;
 682         std     %f34,[%fp+tmp1]         ! (7_1) i = ((unsigned long long*)&div)[0];
 683         fstod   %f0,%f20                ! (1_0) x = (double)ftmp0;
 684 
 685         fmuld   %f32,%f40,%f32          ! (4_1) dtmp1 = div0 * y0;
 686         ldx     [%fp+tmp0],%o0          ! (6_1) i = ((unsigned long long*)&div)[0];
 687         and     %o1,-8,%o1              ! (2_1) ux &= -8;
 688         fand    %f24,DC2,%f26           ! (0_0) y = vis_fand(y,dconst2);
 689 
 690         faddd   %f48,%f44,%f12          ! (1_1) res += dtmp0;
 691         and     %o7,-8,%o7              ! (2_1) ax &= -8;
 692         ldd     [%l0+%o1],%f48          ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux);
 693         bn,pn   %icc,.exit
 694 
 695         ldd     [%o7+%l7],%f0           ! (2_1) res = *(double*)((char*)parr1 + ax);
 696         mov     %l6,%l5
 697         fmuld   %f4,%f50,%f4            ! (2_1) dtmp0 *= x2;
 698         fand    %f28,DC3,%f24           ! (6_1) y0 = vis_fand(div,dconst3);
 699 
 700         fmuld   %f16,%f16,%f50          ! (3_1) x2 = xx * xx;
 701         srlx    %o0,43,%o0              ! (6_1) i >>= 43;
 702         mov     %l3,%o7
 703         fsubd   DTWO,%f42,%f44          ! (5_1) dtmp0 = dtwo - dtmp0;
 704 
 705         and     %o0,508,%l6             ! (6_1) i &= 508;
 706         add     %l4,stridex,%l3         ! px += stridex;
 707         bn,pn   %icc,.exit
 708         fsubd   DTWO,%f32,%f46          ! (4_1) dtmp1 = dtwo - dtmp1;
 709 
 710         fmuld   %f0,%f48,%f48           ! (2_1) res *= dtmp0;
 711         add     %g1,stridey,%o0         ! py += stridey;
 712         ld      [%i4+%l6],%f0           ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
 713         fdtos   %f12,%f12               ! (1_1) ftmp0 = (float)res;
 714 
 715         fmuld   %f22,%f26,%f32          ! (0_0) div = x * y;
 716         srl     %o7,28,%o1              ! (3_1) ux >>= 28;
 717         lda     [%l4]0x82,%l6           ! (2_0) ux = ((int*)px)[0];
 718         faddd   %f4,K0,%f42             ! (2_1) dtmp0 += K0;
 719 
 720         fmuld   K2,%f50,%f4             ! (3_1) dtmp0 = K2 * x2;
 721         and     %o7,MASK_0x7fffffff,%o7 ! (3_1) ax = ux & 0x7fffffff;
 722         st      %f12,[%g1]              ! (1_1) py[0] = ftmp0;
 723         fsubd   %f22,%f26,%f22          ! (0_0) xx = x - y;
 724 
 725         fmuld   %f40,%f46,%f26          ! (4_1) y0 *= dtmp1;
 726         add     %o7,MASK_0x100000,%g1   ! (3_1) ax += 0x00100000;
 727         and     %o1,-8,%o1              ! (3_1) ux &= -8;
 728         fpsub32 %f0,%f24,%f40           ! (6_1) y0 = vis_fpsub32(dtmp0, y0);
 729 
 730         fmuld   %f38,%f44,%f38          ! (5_1) y0 *= dtmp0;
 731         and     %l6,MASK_0x7fffffff,%o7 ! (2_0) ax = ux & 0x7fffffff;
 732         lda     [%l4]0x82,%f0           ! (2_0) ftmp0 = *px;
 733         fpadd32 %f20,DC1,%f24           ! (1_0) y = vis_fpadd32(x,dconst1);
 734 
 735         fmuld   %f42,%f18,%f44          ! (2_1) dtmp0 *= xx;
 736         cmp     %o7,%o4                 ! (2_0) ax ? 0x39b89c55
 737         bl,pn   %icc,.update18          ! (2_0) if ( ax < 0x39b89c55 )
 738         faddd   DONE,%f32,%f32          ! (0_0) div += done;
 739 .cont18:
 740         fmuld   %f28,%f40,%f42          ! (6_1) dtmp0 = div0 * y0;
 741         cmp     %o7,%o5                 ! (2_0) ax ? 0x4c700518
 742         bg,pn   %icc,.update19          ! (2_0) if ( ax > 0x4c700518 )
 743         faddd   %f4,K1,%f4              ! (3_1) dtmp0 += K1;
 744 .cont19:
 745         fmuld   %f14,%f26,%f14          ! (4_1) xx *= y0;
 746         srl     %g1,18,%o7              ! (3_1) ax >>= 18;
 747         std     %f32,[%fp+tmp0]         ! (0_0) i = ((unsigned long long*)&div)[0];
 748         fstod   %f0,%f18                ! (2_0) x = (double)ftmp0;
 749 
 750         fmuld   %f30,%f38,%f30          ! (5_1) dtmp1 = div0 * y0;
 751         and     %o7,-8,%o7              ! (3_1) ax &= -8;
 752         ldx     [%fp+tmp1],%g1          ! (7_1) i = ((unsigned long long*)&div)[0];
 753         fand    %f24,DC2,%f26           ! (1_0) y = vis_fand(y,dconst2);
 754 
 755         faddd   %f48,%f44,%f12          ! (2_1) res += dtmp0;
 756         mov     %l6,%l4
 757         ldd     [%l0+%o1],%f48          ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux);
 758         bn,pn   %icc,.exit
 759 
 760         fmuld   %f4,%f50,%f4            ! (3_1) dtmp0 *= x2;
 761         ldd     [%o7+%l7],%f0           ! (3_1) res = *(double*)((char*)parr1 + ax)
 762         nop
 763         fand    %f34,DC3,%f24           ! (7_1) y0 = vis_fand(div,dconst3);
 764 
 765         fmuld   %f14,%f14,%f50          ! (4_1) x2 = xx * xx;
 766         srlx    %g1,43,%g1              ! (7_1) i >>= 43;
 767         mov     %i0,%o7
 768         fsubd   DTWO,%f42,%f44          ! (6_1) dtmp0 = dtwo - dtmp0;
 769 
 770         and     %g1,508,%l6             ! (7_1) i &= 508;
 771         add     %l3,stridex,%i0         ! px += stridex;
 772         bn,pn   %icc,.exit
 773         fsubd   DTWO,%f30,%f46          ! (5_1) dtmp1 = dtwo - dtmp1;
 774 
 775         fmuld   %f0,%f48,%f48           ! (3_1) res *= dtmp0;
 776         add     %o0,stridey,%g1         ! py += stridey;
 777         ld      [%i4+%l6],%f0           ! (7_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
 778         fdtos   %f12,%f12               ! (2_1) ftmp0 = (float)res;
 779 
 780         fmuld   %f20,%f26,%f30          ! (1_0) div = x * y;
 781         srl     %o7,28,%o1              ! (4_1) ux >>= 28;
 782         lda     [%l3]0x82,%l6           ! (3_0) ux = ((int*)px)[0];
 783         faddd   %f4,K0,%f42             ! (3_1) dtmp0 += K0;
 784 
 785         fmuld   K2,%f50,%f4             ! (4_1) dtmp0 = K2 * x2;
 786         and     %o7,MASK_0x7fffffff,%o7 ! (4_1) ax = ux & 0x7fffffff;
 787         st      %f12,[%o0]              ! (2_1) py[0] = ftmp0;
 788         fsubd   %f20,%f26,%f20          ! (1_0) xx = x - y;
 789 
 790         fmuld   %f38,%f46,%f26          ! (5_1) y0 *= dtmp1;
 791         add     %o7,MASK_0x100000,%o0   ! (4_1) ax += 0x00100000;
 792         and     %o1,-8,%o1              ! (4_1) ux &= -8;
 793         fpsub32 %f0,%f24,%f38           ! (7_1) y0 = vis_fpsub32(dtmp0, y0);
 794 
 795         fmuld   %f40,%f44,%f40          ! (6_1) y0 *= dtmp0;
 796         and     %l6,MASK_0x7fffffff,%o7 ! (3_0) ax = ux & 0x7fffffff;
 797         lda     [%l3]0x82,%f0           ! (3_0) ftmp0 = *px;
 798         fpadd32 %f18,DC1,%f24           ! (2_0) y = vis_fpadd32(x,dconst1);
 799 
 800         fmuld   %f42,%f16,%f44          ! (3_1) dtmp0 *= xx;
 801         cmp     %o7,%o4                 ! (3_0) ax ? 0x39b89c55
 802         bl,pn   %icc,.update20          ! (3_0) if ( ax < 0x39b89c55 )
 803         faddd   DONE,%f30,%f30          ! (1_0) div += done;
 804 .cont20:
 805         fmuld   %f34,%f38,%f42          ! (7_1) dtmp0 = div0 * y0;
 806         cmp     %o7,%o5                 ! (3_0) ax ? 0x4c700518
 807         bg,pn   %icc,.update21          ! (3_0) if ( ax > 0x4c700518 )
 808         faddd   %f4,K1,%f4              ! (4_1) dtmp0 += K1;
 809 .cont21:
 810         fmuld   %f36,%f26,%f36          ! (5_1) xx *= y0;
 811         srl     %o0,18,%o7              ! (4_1) ax >>= 18;
 812         std     %f30,[%fp+tmp1]         ! (1_0) i = ((unsigned long long*)&div)[0];
 813         fstod   %f0,%f16                ! (3_0) x = (double)ftmp0;
 814 
 815         fmuld   %f28,%f40,%f28          ! (6_1) dtmp1 = div0 * y0;
 816         and     %o7,-8,%o7              ! (4_1) ax &= -8;
 817         ldx     [%fp+tmp0],%o0          ! (0_0) i = ((unsigned long long*)&div)[0];
 818         fand    %f24,DC2,%f26           ! (2_0) y = vis_fand(y,dconst2);
 819 
 820         faddd   %f48,%f44,%f12          ! (3_1) res += dtmp0;
 821         nop
 822         ldd     [%l0+%o1],%f48          ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux);
 823         bn,pn   %icc,.exit
 824 
 825         ldd     [%o7+%l7],%f0           ! (4_1) res = *(double*)((char*)parr1 + ax);
 826         mov     %l6,%l3
 827         fmuld   %f4,%f50,%f4            ! (4_1) dtmp0 *= x2;
 828         fand    %f32,DC3,%f24           ! (0_0) y0 = vis_fand(div,dconst3);
 829 
 830         fmuld   %f36,%f36,%f50          ! (5_1) x2 = xx * xx;
 831         srlx    %o0,43,%o0              ! (0_0) i >>= 43;
 832         mov     %i2,%o7
 833         fsubd   DTWO,%f42,%f44          ! (7_1) dtmp0 = dtwo - dtmp0;
 834 
 835         and     %o0,508,%l6             ! (0_0) i &= 508;
 836         add     %i0,stridex,%i2         ! px += stridex;
 837         bn,pn   %icc,.exit
 838         fsubd   DTWO,%f28,%f46          ! (6_1) dtmp1 = dtwo - dtmp1;
 839 
 840         fmuld   %f0,%f48,%f48           ! (4_1) res *= dtmp0;
 841         add     %g1,stridey,%o0         ! py += stridey;
 842         ld      [%i4+%l6],%f0           ! (0_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
 843         fdtos   %f12,%f12               ! (3_1) ftmp0 = (float)res;
 844 
 845         fmuld   %f18,%f26,%f28          ! (2_0) div = x * y;
 846         srl     %o7,28,%o1              ! (5_1) ux >>= 28;
 847         lda     [%i0]0x82,%l6           ! (4_0) ux = ((int*)px)[0];
 848         faddd   %f4,K0,%f42             ! (4_1) dtmp0 += K0;
 849 
 850         fmuld   K2,%f50,%f4             ! (5_1) dtmp0 = K2 * x2;
 851         and     %o7,MASK_0x7fffffff,%o7 ! (5_1) ax = ux & 0x7fffffff;
 852         st      %f12,[%g1]              ! (3_1) py[0] = ftmp0;
 853         fsubd   %f18,%f26,%f18          ! (2_0) xx = x - y;
 854 
 855         fmuld   %f40,%f46,%f26          ! (6_1) y0 *= dtmp1;
 856         add     %o7,MASK_0x100000,%g1   ! (5_1) ax += 0x00100000;
 857         and     %o1,-8,%o1              ! (5_1) ux &= -8;
 858         fpsub32 %f0,%f24,%f40           ! (0_0) y0 = vis_fpsub32(dtmp0, y0);
 859 
 860         fmuld   %f38,%f44,%f38          ! (7_1) y0 *= dtmp0;
 861         and     %l6,MASK_0x7fffffff,%o7 ! (4_0) ax = ux & 0x7fffffff;
 862         lda     [%i0]0x82,%f0           ! (4_0) ftmp0 = *px;
 863         fpadd32 %f16,DC1,%f24           ! (3_0) y = vis_fpadd32(x,dconst1);
 864 
 865         fmuld   %f42,%f14,%f44          ! (4_1) dtmp0 *= xx;
 866         cmp     %o7,%o4                 ! (4_0) ax ? 0x39b89c55
 867         bl,pn   %icc,.update22          ! (4_0) if ( ax < 0x39b89c55 )
 868         faddd   DONE,%f28,%f28          ! (2_0) div += done;
 869 .cont22:
 870         fmuld   %f32,%f40,%f42          ! (0_0) dtmp0 = div0 * y0;
 871         cmp     %o7,%o5                 ! (4_0) ax ? 0x4c700518
 872         bg,pn   %icc,.update23          ! (4_0) if ( ax > 0x4c700518 )
 873         faddd   %f4,K1,%f4              ! (5_1) dtmp0 += K1;
 874 .cont23:
 875         fmuld   %f10,%f26,%f10          ! (6_1) xx *= y0;
 876         srl     %g1,18,%o7              ! (5_1) ax >>= 18;
 877         std     %f28,[%fp+tmp0]         ! (2_0) i = ((unsigned long long*)&div)[0];
 878         fstod   %f0,%f14                ! (4_0) x = (double)ftmp0;
 879 
 880         fmuld   %f34,%f38,%f34          ! (7_1) dtmp1 = div0 * y0;
 881         and     %o7,-8,%o7              ! (5_1) ax &= -8;
 882         ldx     [%fp+tmp1],%g1          ! (1_0) i = ((unsigned long long*)&div)[0];
 883         fand    %f24,DC2,%f26           ! (3_0) y = vis_fand(y,dconst2);
 884 
 885         faddd   %f48,%f44,%f12          ! (4_1) res += dtmp0;
 886         mov     %l6,%i0
 887         ldd     [%l0+%o1],%f48          ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux);
 888         bn,pn   %icc,.exit
 889 
 890         ldd     [%o7+%l7],%f0           ! (5_1) res = *(double*)((char*)parr1 + ax);
 891         nop
 892         fmuld   %f4,%f50,%f4            ! (5_1) dtmp0 *= x2;
 893         fand    %f30,DC3,%f24           ! (1_0) y0 = vis_fand(div,dconst3);
 894 
 895         fmuld   %f10,%f10,%f50          ! (6_1) x2 = xx * xx;
 896         srlx    %g1,43,%g1              ! (1_0) i >>= 43;
 897         mov     %l2,%o7
 898         fsubd   DTWO,%f42,%f44          ! (0_0) dtmp0 = dtwo - dtmp0;
 899 
 900         and     %g1,508,%l6             ! (1_0) i &= 508;
 901         add     %i2,stridex,%l2         ! px += stridex;
 902         bn,pn   %icc,.exit
 903         fsubd   DTWO,%f34,%f46          ! (7_1) dtmp1 = dtwo - dtmp1;
 904 
 905         fmuld   %f0,%f48,%f48           ! (5_1) res *= dtmp0;
 906         add     %o0,stridey,%g1         ! py += stridey;
 907         ld      [%i4+%l6],%f0           ! (1_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
 908         fdtos   %f12,%f12               ! (4_1) ftmp0 = (float)res;
 909 
 910         fmuld   %f16,%f26,%f34          ! (3_0) div = x * y;
 911         srl     %o7,28,%o1              ! (6_1) ux >>= 28;
 912         lda     [%i2]0x82,%l6           ! (5_0) ux = ((int*)px)[0];
 913         faddd   %f4,K0,%f42             ! (5_1) dtmp0 += K0;
 914 
 915         fmuld   K2,%f50,%f4             ! (6_1) dtmp0 = K2 * x2;
 916         and     %o7,MASK_0x7fffffff,%o7 ! (6_1) ax = ux & 0x7fffffff;
 917         st      %f12,[%o0]              ! (4_1) py[0] = ftmp0;
 918         fsubd   %f16,%f26,%f16          ! (3_0) xx = x - y;
 919 
 920         fmuld   %f38,%f46,%f26          ! (7_1) y0 *= dtmp1;
 921         add     %o7,MASK_0x100000,%o0   ! (6_1) ax += 0x00100000;
 922         and     %o1,-8,%o1              ! (6_1) ux &= -8;
 923         fpsub32 %f0,%f24,%f38           ! (1_0) y0 = vis_fpsub32(dtmp0, y0);
 924 
 925         fmuld   %f40,%f44,%f40          ! (0_0) y0 *= dtmp0;
 926         and     %l6,MASK_0x7fffffff,%o7 ! (5_0) ax = ux & 0x7fffffff;
 927         lda     [%i2]0x82,%f0           ! (5_0) ftmp0 = *px;
 928         fpadd32 %f14,DC1,%f24           ! (4_0) y = vis_fpadd32(x,dconst1);
 929 
 930         fmuld   %f42,%f36,%f44          ! (5_1) dtmp0 *= xx;
 931         cmp     %o7,%o4                 ! (5_0) ax ? 0x39b89c55
 932         bl,pn   %icc,.update24          ! (5_0) if ( ax < 0x39b89c55 )
 933         faddd   DONE,%f34,%f34          ! (3_0) div += done;
 934 .cont24:
 935         fmuld   %f30,%f38,%f42          ! (1_0) dtmp0 = div0 * y0;
 936         cmp     %o7,%o5                 ! (5_0) ax ? 0x4c700518
 937         bg,pn   %icc,.update25          ! (5_0) if ( ax > 0x4c700518 )
 938         faddd   %f4,K1,%f4              ! (6_1) dtmp0 += K1;
 939 .cont25:
 940         fmuld   %f8,%f26,%f8            ! (7_1) xx *= y0;
 941         srl     %o0,18,%o7              ! (6_1) ax >>= 18;
 942         std     %f34,[%fp+tmp1]         ! (3_0) i = ((unsigned long long*)&div)[0];
 943         fstod   %f0,%f36                ! (5_0) x = (double)ftmp0;
 944 
 945         fmuld   %f32,%f40,%f32          ! (0_0) dtmp1 = div0 * y0;
 946         and     %o7,-8,%o7              ! (6_1) ax &= -8;
 947         ldx     [%fp+tmp0],%o0          ! (2_0) i = ((unsigned long long*)&div)[0];
 948         fand    %f24,DC2,%f26           ! (4_0) y = vis_fand(y,dconst2);
 949 
 950         faddd   %f48,%f44,%f12          ! (5_1) res += dtmp0;
 951         mov     %l6,%i2
 952         ldd     [%l0+%o1],%f48          ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux);
 953         bn,pn   %icc,.exit
 954 
 955         ldd     [%o7+%l7],%f0           ! (6_1) res = *(double*)((char*)parr1 + ax);
 956         nop
 957         fmuld   %f4,%f50,%f4            ! (6_1) dtmp0 *= x2;
 958         fand    %f28,DC3,%f24           ! (2_0) y0 = vis_fand(div,dconst3);
 959 
 960         fmuld   %f8,%f8,%f50            ! (7_1) x2 = xx * xx;
 961         srlx    %o0,43,%o0              ! (2_0) i >>= 43;
 962         mov     %g5,%o7
 963         fsubd   DTWO,%f42,%f44          ! (1_0) dtmp0 = dtwo - dtmp0;
 964 
 965         and     %o0,508,%l6             ! (2_0) i &= 508;
 966         add     %l2,stridex,%g5         ! px += stridex;
 967         bn,pn   %icc,.exit
 968         fsubd   DTWO,%f32,%f46          ! (0_0) dtmp1 = dtwo - dtmp1;
 969 
 970         fmuld   %f0,%f48,%f48           ! (6_1) res *= dtmp0;
 971         add     %g1,stridey,%o0         ! py += stridey;
 972         ld      [%i4+%l6],%f0           ! (2_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
 973         fdtos   %f12,%f12               ! (5_1) ftmp0 = (float)res;
 974 
 975         fmuld   %f14,%f26,%f32          ! (4_0) div = x * y;
 976         srl     %o7,28,%o1              ! (7_1) ux >>= 28;
 977         lda     [%l2]0x82,%l6           ! (6_0) ux = ((int*)px)[0];
 978         faddd   %f4,K0,%f42             ! (6_1) dtmp0 += K0;
 979 
 980         fmuld   K2,%f50,%f4             ! (7_1) dtmp0 = K2 * x2;
 981         and     %o7,MASK_0x7fffffff,%o7 ! (7_1) ax = ux & 0x7fffffff;
 982         st      %f12,[%g1]              ! (5_1) py[0] = ftmp0;
 983         fsubd   %f14,%f26,%f14          ! (4_0) xx = x - y;
 984 
 985         fmuld   %f40,%f46,%f26          ! (0_0) y0 *= dtmp1;
 986         add     %o7,MASK_0x100000,%g1   ! (7_1) ax += 0x00100000;
 987         and     %o1,-8,%o1              ! (7_1) ux &= -8;
 988         fpsub32 %f0,%f24,%f40           ! (2_0) y0 = vis_fpsub32(dtmp0, y0);
 989 
 990         fmuld   %f38,%f44,%f38          ! (1_0) y0 *= dtmp0;
 991         and     %l6,MASK_0x7fffffff,%o7 ! (6_0) ax = ux & 0x7fffffff;
 992         lda     [%l2]0x82,%f0           ! (6_0) ftmp0 = *px;
 993         fpadd32 %f36,DC1,%f24           ! (5_0) y = vis_fpadd32(x,dconst1);
 994 
 995         fmuld   %f42,%f10,%f44          ! (6_1) dtmp0 *= xx;
 996         cmp     %o7,%o4                 ! (6_0) ax ? 0x39b89c55
 997         bl,pn   %icc,.update26          ! (6_0) if ( ax < 0x39b89c55 )
 998         faddd   DONE,%f32,%f32          ! (4_0) div += done;
 999 .cont26:
1000         fmuld   %f28,%f40,%f42          ! (2_0) dtmp0 = div0 * y0;
1001         cmp     %o7,%o5                 ! (6_0) ax ? 0x4c700518
1002         bg,pn   %icc,.update27          ! (6_0) if ( ax > 0x4c700518 )
1003         faddd   %f4,K1,%f4              ! (7_1) dtmp0 += K1;
1004 .cont27:
1005         fmuld   %f22,%f26,%f22          ! (0_0) xx *= y0;
1006         srl     %g1,18,%o7              ! (7_1) ax >>= 18;
1007         std     %f32,[%fp+tmp0]         ! (4_0) i = ((unsigned long long*)&div)[0];
1008         fstod   %f0,%f10                ! (6_0) x = (double)ftmp0;
1009 
1010         fmuld   %f30,%f38,%f30          ! (1_0) dtmp1 = div0 * y0;
1011         and     %o7,-8,%o7              ! (7_1) ax &= -8;
1012         ldx     [%fp+tmp1],%g1          ! (3_0) i = ((unsigned long long*)&div)[0];
1013         fand    %f24,DC2,%f26           ! (5_0) y = vis_fand(y,dconst2);
1014 
1015         faddd   %f48,%f44,%f12          ! (6_1) res += dtmp0;
1016         mov     %l6,%l2
1017         ldd     [%l0+%o1],%f48          ! (7_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1018         bn,pn   %icc,.exit
1019 
1020         ldd     [%o7+%l7],%f0           ! (7_1) res = *(double*)((char*)parr1 + ax);
1021         nop
1022         fmuld   %f4,%f50,%f4            ! (7_1) dtmp0 *= x2;
1023         fand    %f34,DC3,%f24           ! (3_0) y0 = vis_fand(div,dconst3);
1024 
1025         fmuld   %f22,%f22,%f50          ! (0_0) x2 = xx * xx;
1026         srlx    %g1,43,%g1              ! (3_0) i >>= 43;
1027         mov     %i3,%o7
1028         fsubd   DTWO,%f42,%f44          ! (2_0) dtmp0 = dtwo - dtmp0;
1029 
1030         and     %g1,508,%l6             ! (3_0) i &= 508;
1031         add     %g5,stridex,%i3         ! px += stridex;
1032         bn,pn   %icc,.exit
1033         fsubd   DTWO,%f30,%f46          ! (1_0) dtmp1 = dtwo - dtmp1;
1034 
1035         fmuld   %f0,%f48,%f48           ! (7_1) res *= dtmp0;
1036         add     %o0,stridey,%g1         ! py += stridey;
1037         ld      [%i4+%l6],%f0           ! (3_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
1038         fdtos   %f12,%f12               ! (6_1) ftmp0 = (float)res;
1039 
1040         fmuld   %f36,%f26,%f30          ! (5_0) div = x * y;
1041         srl     %o7,28,%o1              ! (0_0) ux >>= 28;
1042         lda     [%g5]0x82,%l6           ! (7_0) ux = ((int*)px)[0];
1043         faddd   %f4,K0,%f42             ! (7_1) dtmp0 += K0;
1044 
1045         fmuld   K2,%f50,%f4             ! (0_0) dtmp0 = K2 * x2;
1046         and     %o7,MASK_0x7fffffff,%o7 ! (0_0) ax = ux & 0x7fffffff;
1047         st      %f12,[%o0]              ! (6_1) py[0] = ftmp0;
1048         fsubd   %f36,%f26,%f36          ! (5_0) xx = x - y;
1049 
1050         fmuld   %f38,%f46,%f26          ! (1_0) y0 *= dtmp1;
1051         add     %o7,MASK_0x100000,%o0   ! (0_0) ax += 0x00100000;
1052         and     %o1,-8,%o1              ! (0_0) ux &= -8;
1053         fpsub32 %f0,%f24,%f38           ! (3_0) y0 = vis_fpsub32(dtmp0, y0);
1054 
1055         fmuld   %f40,%f44,%f40          ! (2_0) y0 *= dtmp0;
1056         and     %l6,MASK_0x7fffffff,%o7 ! (7_0) ax = ux & 0x7fffffff;
1057         lda     [%g5]0x82,%f0           ! (7_0) ftmp0 = *px;
1058         fpadd32 %f10,DC1,%f24           ! (6_0) y = vis_fpadd32(x,dconst1);
1059 
1060         fmuld   %f42,%f8,%f44           ! (7_1) dtmp0 *= xx;
1061         cmp     %o7,%o4                 ! (7_0) ax ? 0x39b89c55
1062         bl,pn   %icc,.update28          ! (7_0) if ( ax < 0x39b89c55 )
1063         faddd   DONE,%f30,%f30          ! (5_0) div += done;
1064 .cont28:
1065         fmuld   %f34,%f38,%f42          ! (3_0) dtmp0 = div0 * y0;
1066         cmp     %o7,%o5                 ! (7_0) ax ? 0x4c700518
1067         bg,pn   %icc,.update29          ! (7_0) if ( ax > 0x4c700518 )
1068         faddd   %f4,K1,%f4              ! (0_0) dtmp0 += K1;
1069 .cont29:
1070         fmuld   %f20,%f26,%f20          ! (1_0) xx *= y0;
1071         srl     %o0,18,%o7              ! (0_0) ax >>= 18;
1072         std     %f30,[%fp+tmp1]         ! (5_0) i = ((unsigned long long*)&div)[0];
1073         fstod   %f0,%f8                 ! (7_0) x = (double)ftmp0;
1074 
1075         fmuld   %f28,%f40,%f28          ! (2_0) dtmp1 = div0 * y0;
1076         and     %o7,-8,%o7              ! (0_0) ux &= -8;
1077         ldx     [%fp+tmp0],%o0          ! (4_0) i = ((unsigned long long*)&div)[0];
1078         fand    %f24,DC2,%f26           ! (6_0) y = vis_fand(y,dconst2);
1079 
1080         faddd   %f48,%f44,%f12          ! (7_1) res += dtmp0;
1081         subcc   counter,8,counter
1082         ldd     [%l0+%o1],%f48          ! (0_0) dtmp0 = *(double*)((char*)sign_arr + ux);
1083         bn,pn   %icc,.exit
1084 
1085         fmuld   %f4,%f50,%f4            ! (0_0) dtmp0 *= x2;
1086         mov     %l6,%g5
1087         ldd     [%o7+%l7],%f0           ! (0_0) res = *(double*)((char*)parr1 + ax);
1088         fand    %f32,DC3,%f24           ! (4_0) y0 = vis_fand(div,dconst3);
1089 
1090         fmuld   %f20,%f20,%f50          ! (1_0) x2 = xx * xx;
1091         srlx    %o0,43,%l6              ! (4_0) i >>= 43;
1092         mov     %l5,%o7
1093         fsubd   DTWO,%f42,%f44          ! (3_0) dtmp0 = dtwo - dtmp0;
1094 
1095         add     %g1,stridey,%o0         ! py += stridey;
1096         and     %l6,508,%l6             ! (4_0) i &= 508;
1097         bn,pn   %icc,.exit
1098         fsubd   DTWO,%f28,%f46          ! (2_0) dtmp1 = dtwo - dtmp1;
1099 
1100         fmuld   %f0,%f48,%f48           ! (0_0) res *= dtmp0;
1101         ld      [%i4+%l6],%f0           ! (4_0) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
1102         add     %i3,stridex,%l5         ! px += stridex;
1103         fdtos   %f12,%f12               ! (7_1) ftmp0 = (float)res;
1104 
1105         lda     [%i3]0x82,%l6           ! (0_0) ux = ((int*)px)[0];
1106         fmuld   %f10,%f26,%f28          ! (6_0) div = x * y;
1107         bpos,pt %icc,.main_loop
1108         faddd   %f4,K0,%f42             ! (0_0) dtmp0 += K0;
1109 
1110         srl     %o7,28,%l5              ! (1_0) ux >>= 28;
1111         st      %f12,[%g1]              ! (7_1) py[0] = ftmp0;
1112 
1113 .tail:
1114         addcc   counter,7,counter
1115         bneg,pn %icc,.begin
1116         or      %g0,%o0,%o1
1117 
1118         fsubd   %f10,%f26,%f10          ! (6_1) xx = x - y;
1119         and     %o7,MASK_0x7fffffff,%g1 ! (1_1) ax = ux & 0x7fffffff;
1120         fmuld   K2,%f50,%f4             ! (1_1) dtmp0 = K2 * x2;
1121 
1122         fmuld   %f40,%f46,%f26          ! (2_1) y0 *= dtmp1;
1123         add     %g1,MASK_0x100000,%g1   ! (1_1) ax += 0x00100000;
1124         and     %l5,-8,%l5              ! (1_1) ux &= -8;
1125         fpsub32 %f0,%f24,%f40           ! (4_1) y0 = vis_fpsub32(dtmp0, y0);
1126 
1127         fmuld   %f38,%f44,%f38          ! (3_1) y0 *= dtmp0;
1128 
1129         fmuld   %f42,%f22,%f44          ! (0_1) dtmp0 *= xx;
1130         faddd   DONE,%f28,%f28          ! (6_1) div += done;
1131 
1132         fmuld   %f32,%f40,%f42          ! (4_1) dtmp0 = div0 * y0;
1133         faddd   %f4,K1,%f4              ! (1_1) dtmp0 += K1;
1134 
1135         fmuld   %f18,%f26,%f18          ! (2_1) xx *= y0;
1136         srl     %g1,18,%o7              ! (1_1) ax >>= 18;
1137         std     %f28,[%fp+tmp0]         ! (6_1) i = ((unsigned long long*)&div)[0];
1138 
1139         fmuld   %f34,%f38,%f34          ! (3_1) dtmp1 = div0 * y0;
1140         and     %o7,-8,%o7              ! (1_1) ax &= -8;
1141         ldx     [%fp+tmp1],%g1          ! (5_1) i = ((unsigned long long*)&div)[0];
1142 
1143         faddd   %f48,%f44,%f12          ! (0_1) res += dtmp0;
1144         add     %o7,%l7,%o7             ! (1_1) (char*)parr1 + ax;
1145         ldd     [%l0+%l5],%f48          ! (1_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1146 
1147         fmuld   %f4,%f50,%f4            ! (1_1) dtmp0 *= x2;
1148         fand    %f30,DC3,%f24           ! (5_1) y0 = vis_fand(div,dconst3);
1149         ldd     [%o7],%f0               ! (1_1) res = *(double*)((char*)parr1 + ax);
1150 
1151         fmuld   %f18,%f18,%f50          ! (2_1) x2 = xx * xx;
1152         fsubd   DTWO,%f42,%f44          ! (4_1) dtmp0 = dtwo - dtmp0;
1153         srlx    %g1,43,%g1              ! (5_1) i >>= 43;
1154 
1155         and     %g1,508,%l6             ! (5_1) i &= 508;
1156         mov     %l4,%o7
1157         fsubd   DTWO,%f34,%f46          ! (3_1) dtmp1 = dtwo - dtmp1;
1158 
1159         fmuld   %f0,%f48,%f48           ! (1_1) res *= dtmp0;
1160         add     %o0,stridey,%g1         ! py += stridey;
1161         ld      [%i4+%l6],%f0           ! (5_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
1162         fdtos   %f12,%f12               ! (0_1) ftmp0 = (float)res;
1163 
1164         srl     %o7,28,%l4              ! (2_1) ux >>= 28;
1165         st      %f12,[%o0]              ! (0_1) py[0] = ftmp0;
1166         faddd   %f4,K0,%f42             ! (1_1) dtmp0 += K0;
1167 
1168         subcc   counter,1,counter
1169         bneg,pn %icc,.begin
1170         or      %g0,%g1,%o1
1171 
1172         fmuld   K2,%f50,%f4             ! (2_1) dtmp0 = K2 * x2;
1173         and     %o7,MASK_0x7fffffff,%o0 ! (2_1) ax = ux & 0x7fffffff;
1174 
1175         fmuld   %f38,%f46,%f26          ! (3_1) y0 *= dtmp1;
1176         add     %o0,MASK_0x100000,%o0   ! (2_1) ax += 0x00100000;
1177         and     %l4,-8,%l4              ! (2_1) ux &= -8;
1178         fpsub32 %f0,%f24,%f38           ! (5_1) y0 = vis_fpsub32(dtmp0, y0);
1179 
1180         fmuld   %f40,%f44,%f40          ! (4_1) y0 *= dtmp0;
1181 
1182         fmuld   %f42,%f20,%f44          ! (1_1) dtmp0 *= xx;
1183 
1184         fmuld   %f30,%f38,%f42          ! (5_1) dtmp0 = div0 * y0;
1185         faddd   %f4,K1,%f4              ! (2_1) dtmp0 += K1;
1186 
1187         fmuld   %f16,%f26,%f16          ! (3_1) xx *= y0;
1188         srl     %o0,18,%o7              ! (2_1) ax >>= 18;
1189 
1190         fmuld   %f32,%f40,%f32          ! (4_1) dtmp1 = div0 * y0;
1191         and     %o7,-8,%o7              ! (2_1) ax &= -8;
1192         ldx     [%fp+tmp0],%o0          ! (6_1) i = ((unsigned long long*)&div)[0];
1193 
1194         faddd   %f48,%f44,%f12          ! (1_1) res += dtmp0;
1195         add     %o7,%l7,%o7             ! (2_1) (char*)parr1 + ax;
1196         ldd     [%l0+%l4],%f48          ! (2_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1197 
1198         fmuld   %f4,%f50,%f4            ! (2_1) dtmp0 *= x2;
1199         fand    %f28,DC3,%f24           ! (6_1) y0 = vis_fand(div,dconst3);
1200         ldd     [%o7],%f0               ! (2_1) res = *(double*)((char*)parr1 + ax);
1201 
1202         fmuld   %f16,%f16,%f50          ! (3_1) x2 = xx * xx;
1203         fsubd   DTWO,%f42,%f44          ! (5_1) dtmp0 = dtwo - dtmp0;
1204         srlx    %o0,43,%o0              ! (6_1) i >>= 43;
1205 
1206         and     %o0,508,%l6             ! (6_1) i &= 508;
1207         mov     %l3,%o7
1208         fsubd   DTWO,%f32,%f46          ! (4_1) dtmp1 = dtwo - dtmp1;
1209 
1210         fmuld   %f0,%f48,%f48           ! (2_1) res *= dtmp0;
1211         add     %g1,stridey,%o0         ! py += stridey;
1212         ld      [%i4+%l6],%f0           ! (6_1) *(float*)&dtmp0 = *(float*)((char*)parr0 + i);
1213         fdtos   %f12,%f12               ! (1_1) ftmp0 = (float)res;
1214 
1215         srl     %o7,28,%l3              ! (3_1) ux >>= 28;
1216         st      %f12,[%g1]              ! (1_1) py[0] = ftmp0;
1217         faddd   %f4,K0,%f42             ! (2_1) dtmp0 += K0;
1218 
1219         subcc   counter,1,counter
1220         bneg,pn %icc,.begin
1221         or      %g0,%o0,%o1
1222 
1223         fmuld   K2,%f50,%f4             ! (3_1) dtmp0 = K2 * x2;
1224         and     %o7,MASK_0x7fffffff,%g1 ! (3_1) ax = ux & 0x7fffffff;
1225 
1226         fmuld   %f40,%f46,%f26          ! (4_1) y0 *= dtmp1;
1227         add     %g1,MASK_0x100000,%g1   ! (3_1) ax += 0x00100000;
1228         and     %l3,-8,%l3              ! (3_1) ux &= -8;
1229         fpsub32 %f0,%f24,%f40           ! (6_1) y0 = vis_fpsub32(dtmp0, y0);
1230 
1231         fmuld   %f38,%f44,%f38          ! (5_1) y0 *= dtmp0;
1232 
1233         fmuld   %f42,%f18,%f44          ! (2_1) dtmp0 *= xx;
1234 
1235         fmuld   %f28,%f40,%f42          ! (6_1) dtmp0 = div0 * y0;
1236         faddd   %f4,K1,%f4              ! (3_1) dtmp0 += K1;
1237 
1238         fmuld   %f14,%f26,%f14          ! (4_1) xx *= y0;
1239         srl     %g1,18,%o7              ! (3_1) ax >>= 18;
1240 
1241         fmuld   %f30,%f38,%f30          ! (5_1) dtmp1 = div0 * y0;
1242         and     %o7,-8,%o7              ! (3_1) ax &= -8;
1243 
1244         faddd   %f48,%f44,%f12          ! (2_1) res += dtmp0;
1245         add     %o7,%l7,%o7             ! (3_1) (char*)parr1 + ax;
1246         ldd     [%l0+%l3],%f48          ! (3_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1247 
1248         fmuld   %f4,%f50,%f4            ! (3_1) dtmp0 *= x2;
1249         ldd     [%o7],%f0               ! (3_1) res = *(double*)((char*)parr1 + ax)
1250 
1251         fmuld   %f14,%f14,%f50          ! (4_1) x2 = xx * xx;
1252         fsubd   DTWO,%f42,%f44          ! (6_1) dtmp0 = dtwo - dtmp0;
1253 
1254         mov     %i0,%o7
1255         fsubd   DTWO,%f30,%f46          ! (5_1) dtmp1 = dtwo - dtmp1;
1256 
1257         fmuld   %f0,%f48,%f48           ! (3_1) res *= dtmp0;
1258         add     %o0,stridey,%g1         ! py += stridey;
1259         fdtos   %f12,%f12               ! (2_1) ftmp0 = (float)res;
1260 
1261         srl     %o7,28,%i0              ! (4_1) ux >>= 28;
1262         st      %f12,[%o0]              ! (2_1) py[0] = ftmp0;
1263         faddd   %f4,K0,%f42             ! (3_1) dtmp0 += K0;
1264 
1265         subcc   counter,1,counter
1266         bneg,pn %icc,.begin
1267         or      %g0,%g1,%o1
1268 
1269         fmuld   K2,%f50,%f4             ! (4_1) dtmp0 = K2 * x2;
1270         and     %o7,MASK_0x7fffffff,%o0 ! (4_1) ax = ux & 0x7fffffff;
1271 
1272         fmuld   %f38,%f46,%f26          ! (5_1) y0 *= dtmp1;
1273         add     %o0,MASK_0x100000,%o0   ! (4_1) ax += 0x00100000;
1274         and     %i0,-8,%i0              ! (4_1) ux &= -8;
1275 
1276         fmuld   %f40,%f44,%f40          ! (6_1) y0 *= dtmp0;
1277 
1278         fmuld   %f42,%f16,%f44          ! (3_1) dtmp0 *= xx;
1279 
1280         faddd   %f4,K1,%f4              ! (4_1) dtmp0 += K1;
1281 
1282         fmuld   %f36,%f26,%f36          ! (5_1) xx *= y0;
1283         srl     %o0,18,%o7              ! (4_1) ax >>= 18;
1284 
1285         fmuld   %f28,%f40,%f28          ! (6_1) dtmp1 = div0 * y0;
1286         and     %o7,-8,%o7              ! (4_1) ax &= -8;
1287 
1288         faddd   %f48,%f44,%f12          ! (3_1) res += dtmp0;
1289         add     %o7,%l7,%o7             ! (4_1) (char*)parr1 + ax;
1290         ldd     [%l0+%i0],%f48          ! (4_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1291 
1292         fmuld   %f4,%f50,%f4            ! (4_1) dtmp0 *= x2;
1293         ldd     [%o7],%f0               ! (4_1) res = *(double*)((char*)parr1 + ax);
1294 
1295         fmuld   %f36,%f36,%f50          ! (5_1) x2 = xx * xx;
1296 
1297         mov     %i2,%o7
1298         fsubd   DTWO,%f28,%f46          ! (6_1) dtmp1 = dtwo - dtmp1;
1299 
1300         fmuld   %f0,%f48,%f48           ! (4_1) res *= dtmp0;
1301         add     %g1,stridey,%o0         ! py += stridey;
1302         fdtos   %f12,%f12               ! (3_1) ftmp0 = (float)res;
1303 
1304         srl     %o7,28,%i2              ! (5_1) ux >>= 28;
1305         st      %f12,[%g1]              ! (3_1) py[0] = ftmp0;
1306         faddd   %f4,K0,%f42             ! (4_1) dtmp0 += K0;
1307 
1308         subcc   counter,1,counter
1309         bneg,pn %icc,.begin
1310         or      %g0,%o0,%o1
1311 
1312         fmuld   K2,%f50,%f4             ! (5_1) dtmp0 = K2 * x2;
1313         and     %o7,MASK_0x7fffffff,%g1 ! (5_1) ax = ux & 0x7fffffff;
1314 
1315         fmuld   %f40,%f46,%f26          ! (6_1) y0 *= dtmp1;
1316         add     %g1,MASK_0x100000,%g1   ! (5_1) ax += 0x00100000;
1317         and     %i2,-8,%i2              ! (5_1) ux &= -8;
1318 
1319         fmuld   %f42,%f14,%f44          ! (4_1) dtmp0 *= xx;
1320 
1321         faddd   %f4,K1,%f4              ! (5_1) dtmp0 += K1;
1322 
1323         fmuld   %f10,%f26,%f10          ! (6_1) xx *= y0;
1324         srl     %g1,18,%o7              ! (5_1) ax >>= 18;
1325 
1326         and     %o7,-8,%o7              ! (5_1) ax &= -8;
1327 
1328         faddd   %f48,%f44,%f12          ! (4_1) res += dtmp0;
1329         add     %o7,%l7,%o7             ! (5_1) (char*)parr1 + ax;
1330         ldd     [%l0+%i2],%f48          ! (5_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1331 
1332         fmuld   %f4,%f50,%f4            ! (5_1) dtmp0 *= x2;
1333         ldd     [%o7],%f0               ! (5_1) res = *(double*)((char*)parr1 + ax);
1334 
1335         fmuld   %f10,%f10,%f50          ! (6_1) x2 = xx * xx;
1336 
1337         mov     %l2,%o7
1338 
1339         fmuld   %f0,%f48,%f48           ! (5_1) res *= dtmp0;
1340         add     %o0,stridey,%g1         ! py += stridey;
1341         fdtos   %f12,%f12               ! (4_1) ftmp0 = (float)res;
1342 
1343         srl     %o7,28,%l2              ! (6_1) ux >>= 28;
1344         st      %f12,[%o0]              ! (4_1) py[0] = ftmp0;
1345         faddd   %f4,K0,%f42             ! (5_1) dtmp0 += K0;
1346 
1347         subcc   counter,1,counter
1348         bneg,pn %icc,.begin
1349         or      %g0,%g1,%o1
1350 
1351         fmuld   K2,%f50,%f4             ! (6_1) dtmp0 = K2 * x2;
1352         and     %o7,MASK_0x7fffffff,%o0 ! (6_1) ax = ux & 0x7fffffff;
1353 
1354         add     %o0,MASK_0x100000,%o0   ! (6_1) ax += 0x00100000;
1355         and     %l2,-8,%l2              ! (6_1) ux &= -8;
1356 
1357         fmuld   %f42,%f36,%f44          ! (5_1) dtmp0 *= xx;
1358 
1359         faddd   %f4,K1,%f4              ! (6_1) dtmp0 += K1;
1360 
1361         srl     %o0,18,%o7              ! (6_1) ax >>= 18;
1362 
1363         and     %o7,-8,%o7              ! (6_1) ax &= -8;
1364 
1365         faddd   %f48,%f44,%f12          ! (5_1) res += dtmp0;
1366         add     %o7,%l7,%o7             ! (6_1) (char*)parr1 + ax;
1367         ldd     [%l0+%l2],%f48          ! (6_1) dtmp0 = *(double*)((char*)sign_arr + ux);
1368 
1369         fmuld   %f4,%f50,%f4            ! (6_1) dtmp0 *= x2;
1370         ldd     [%o7],%f0               ! (6_1) res = *(double*)((char*)parr1 + ax);
1371 
1372         fmuld   %f0,%f48,%f48           ! (6_1) res *= dtmp0;
1373         add     %g1,stridey,%o0         ! py += stridey;
1374         fdtos   %f12,%f12               ! (5_1) ftmp0 = (float)res;
1375 
1376         st      %f12,[%g1]              ! (5_1) py[0] = ftmp0;
1377         faddd   %f4,K0,%f42             ! (6_1) dtmp0 += K0;
1378 
1379         subcc   counter,1,counter
1380         bneg,pn %icc,.begin
1381         or      %g0,%o0,%o1
1382 
1383         fmuld   %f42,%f10,%f44          ! (6_1) dtmp0 *= xx;
1384 
1385         faddd   %f48,%f44,%f12          ! (6_1) res += dtmp0;
1386 
1387         add     %o0,stridey,%g1         ! py += stridey;
1388         fdtos   %f12,%f12               ! (6_1) ftmp0 = (float)res;
1389 
1390         st      %f12,[%o0]              ! (6_1) py[0] = ftmp0;
1391 
1392         ba      .begin
1393         or      %g0,%g1,%o1             ! py += stridey;
1394 
1395 .exit:
1396         ret
1397         restore %g0,%g0,%g0
1398 
1399         .align  16
1400 .spec0:
1401         add     %i3,stridex,%i3         ! px += stridex;
1402         sub     counter,1,counter
1403         st      %l6,[%o1]               ! *(int*)py = ux;
1404 
1405         ba      .begin1
1406         add     %o1,stridey,%o1         ! py += stridey;
1407 
1408         .align  16
1409 .spec1:
1410         sethi   %hi(0x7f800000),%l3
1411         sethi   %hi(0x3fc90c00),%l4     ! pi_2
1412 
1413         sethi   %hi(0x80000000),%o0
1414         add     %l4,0x3db,%l4           ! pi_2
1415 
1416         cmp     %l5,%l3                 ! if ( ax > 0x7f800000 )
1417         bg,a,pn %icc,1f
1418         fabss   %f0,%f0                 ! fpx = fabsf(*px);
1419 
1420         and     %l6,%o0,%l6             ! sign = ux & 0x80000000;
1421 
1422         or      %l6,%l4,%l6             ! sign |= pi_2;
1423 
1424         add     %i3,stridex,%i3         ! px += stridex;
1425         sub     counter,1,counter
1426         st      %l6,[%o1]               ! *(int*)py = sign;
1427 
1428         ba      .begin1
1429         add     %o1,stridey,%o1         ! py += stridey;
1430 
1431 1:
1432         fmuls   %f0,%f0,%f0             ! fpx *= fpx;
1433 
1434         add     %i3,stridex,%i3         ! px += stridex
1435         sub     counter,1,counter
1436         st      %f0,[%o1]               ! *py = fpx;
1437 
1438         ba      .begin1
1439         add     %o1,stridey,%o1         ! py += stridey;
1440 
1441         .align  16
1442 .update0:
1443         cmp     counter,1
1444         fzeros  %f0
1445         ble,a   .cont0
1446         sethi   %hi(0x3fffffff),%l6
1447 
1448         sub     counter,1,counter
1449         st      counter,[%fp+tmp_counter]
1450 
1451         stx     %l5,[%fp+tmp_px]
1452         sethi   %hi(0x3fffffff),%l6
1453         ba      .cont0
1454         or      %g0,1,counter
1455 
1456         .align  16
1457 .update1:
1458         cmp     counter,1
1459         fzeros  %f0
1460         ble,a   .cont1
1461         sethi   %hi(0x3fffffff),%l6
1462 
1463         sub     counter,1,counter
1464         st      counter,[%fp+tmp_counter]
1465 
1466         stx     %l5,[%fp+tmp_px]
1467         sethi   %hi(0x3fffffff),%l6
1468         ba      .cont1
1469         or      %g0,1,counter
1470 
1471         .align  16
1472 .update2:
1473         cmp     counter,2
1474         fzeros  %f0
1475         ble,a   .cont2
1476         sethi   %hi(0x3fffffff),%l6
1477 
1478         sub     counter,2,counter
1479         st      counter,[%fp+tmp_counter]
1480 
1481         stx     %l4,[%fp+tmp_px]
1482         sethi   %hi(0x3fffffff),%l6
1483         ba      .cont2
1484         or      %g0,2,counter
1485 
1486         .align  16
1487 .update3:
1488         cmp     counter,2
1489         fzeros  %f0
1490         ble,a   .cont3
1491         sethi   %hi(0x3fffffff),%l6
1492 
1493         sub     counter,2,counter
1494         st      counter,[%fp+tmp_counter]
1495 
1496         stx     %l4,[%fp+tmp_px]
1497         sethi   %hi(0x3fffffff),%l6
1498         ba      .cont3
1499         or      %g0,2,counter
1500 
1501         .align  16
1502 .update4:
1503         cmp     counter,3
1504         fzeros  %f0
1505         ble,a   .cont4
1506         sethi   %hi(0x3fffffff),%l6
1507 
1508         sub     counter,3,counter
1509         st      counter,[%fp+tmp_counter]
1510 
1511         stx     %l3,[%fp+tmp_px]
1512         sethi   %hi(0x3fffffff),%l6
1513         ba      .cont4
1514         or      %g0,3,counter
1515 
1516         .align  16
1517 .update5:
1518         cmp     counter,3
1519         fzeros  %f0
1520         ble,a   .cont5
1521         sethi   %hi(0x3fffffff),%l6
1522 
1523         sub     counter,3,counter
1524         st      counter,[%fp+tmp_counter]
1525 
1526         stx     %l3,[%fp+tmp_px]
1527         sethi   %hi(0x3fffffff),%l6
1528         ba      .cont5
1529         or      %g0,3,counter
1530 
1531         .align  16
1532 .update6:
1533         cmp     counter,4
1534         fzeros  %f0
1535         ble,a   .cont6
1536         sethi   %hi(0x3fffffff),%l6
1537 
1538         sub     counter,4,counter
1539         st      counter,[%fp+tmp_counter]
1540 
1541         stx     %i0,[%fp+tmp_px]
1542         sethi   %hi(0x3fffffff),%l6
1543         ba      .cont6
1544         or      %g0,4,counter
1545 
1546         .align  16
1547 .update7:
1548         cmp     counter,4
1549         fzeros  %f0
1550         ble,a   .cont7
1551         sethi   %hi(0x3fffffff),%l6
1552 
1553         sub     counter,4,counter
1554         st      counter,[%fp+tmp_counter]
1555 
1556         stx     %i0,[%fp+tmp_px]
1557         sethi   %hi(0x3fffffff),%l6
1558         ba      .cont7
1559         or      %g0,4,counter
1560 
1561         .align  16
1562 .update8:
1563         cmp     counter,5
1564         fzeros  %f0
1565         ble,a   .cont8
1566         sethi   %hi(0x3fffffff),%l6
1567 
1568         sub     counter,5,counter
1569         st      counter,[%fp+tmp_counter]
1570 
1571         stx     %i2,[%fp+tmp_px]
1572         sethi   %hi(0x3fffffff),%l6
1573         ba      .cont8
1574         or      %g0,5,counter
1575 
1576         .align  16
1577 .update9:
1578         cmp     counter,5
1579         fzeros  %f0
1580         ble,a   .cont9
1581         sethi   %hi(0x3fffffff),%l6
1582 
1583         sub     counter,5,counter
1584         st      counter,[%fp+tmp_counter]
1585 
1586         stx     %i2,[%fp+tmp_px]
1587         sethi   %hi(0x3fffffff),%l6
1588         ba      .cont9
1589         or      %g0,5,counter
1590 
1591         .align  16
1592 .update10:
1593         cmp     counter,6
1594         fzeros  %f0
1595         ble,a   .cont10
1596         sethi   %hi(0x3fffffff),%l6
1597 
1598         sub     counter,6,counter
1599         st      counter,[%fp+tmp_counter]
1600 
1601         stx     %l2,[%fp+tmp_px]
1602         sethi   %hi(0x3fffffff),%l6
1603         ba      .cont10
1604         or      %g0,6,counter
1605 
1606         .align  16
1607 .update11:
1608         cmp     counter,6
1609         fzeros  %f0
1610         ble,a   .cont11
1611         sethi   %hi(0x3fffffff),%l6
1612 
1613         sub     counter,6,counter
1614         st      counter,[%fp+tmp_counter]
1615 
1616         stx     %l2,[%fp+tmp_px]
1617         sethi   %hi(0x3fffffff),%l6
1618         ba      .cont11
1619         or      %g0,6,counter
1620 
1621         .align  16
1622 .update12:
1623         cmp     counter,7
1624         fzeros  %f0
1625         ble,a   .cont12
1626         sethi   %hi(0x3fffffff),%l6
1627 
1628         sub     counter,7,counter
1629         st      counter,[%fp+tmp_counter]
1630 
1631         stx     %g5,[%fp+tmp_px]
1632         sethi   %hi(0x3fffffff),%l6
1633         ba      .cont12
1634         or      %g0,7,counter
1635 
1636         .align  16
1637 .update13:
1638         cmp     counter,7
1639         fzeros  %f0
1640         ble,a   .cont13
1641         sethi   %hi(0x3fffffff),%l6
1642 
1643         sub     counter,7,counter
1644         st      counter,[%fp+tmp_counter]
1645 
1646         stx     %g5,[%fp+tmp_px]
1647         sethi   %hi(0x3fffffff),%l6
1648         ba      .cont13
1649         or      %g0,7,counter
1650 
1651         .align  16
1652 .update14:
1653         cmp     counter,0
1654         fzeros  %f0
1655         ble,a   .cont14
1656         sethi   %hi(0x3fffffff),%l6
1657 
1658         sub     counter,0,counter
1659         st      counter,[%fp+tmp_counter]
1660 
1661         stx     %i3,[%fp+tmp_px]
1662         sethi   %hi(0x3fffffff),%l6
1663         ba      .cont14
1664         or      %g0,0,counter
1665 
1666         .align  16
1667 .update15:
1668         cmp     counter,0
1669         fzeros  %f0
1670         ble,a   .cont15
1671         sethi   %hi(0x3fffffff),%l6
1672 
1673         sub     counter,0,counter
1674         st      counter,[%fp+tmp_counter]
1675 
1676         stx     %i3,[%fp+tmp_px]
1677         sethi   %hi(0x3fffffff),%l6
1678         ba      .cont15
1679         or      %g0,0,counter
1680 
1681         .align  16
1682 .update16:
1683         cmp     counter,1
1684         fzeros  %f0
1685         ble,a   .cont16
1686         sethi   %hi(0x3fffffff),%l6
1687 
1688         sub     counter,1,counter
1689         st      counter,[%fp+tmp_counter]
1690 
1691         stx     %l5,[%fp+tmp_px]
1692         sethi   %hi(0x3fffffff),%l6
1693         ba      .cont16
1694         or      %g0,1,counter
1695 
1696         .align  16
1697 .update17:
1698         cmp     counter,1
1699         fzeros  %f0
1700         ble,a   .cont17
1701         sethi   %hi(0x3fffffff),%l6
1702 
1703         sub     counter,1,counter
1704         st      counter,[%fp+tmp_counter]
1705 
1706         stx     %l5,[%fp+tmp_px]
1707         sethi   %hi(0x3fffffff),%l6
1708         ba      .cont17
1709         or      %g0,1,counter
1710 
1711         .align  16
1712 .update18:
1713         cmp     counter,2
1714         fzeros  %f0
1715         ble,a   .cont18
1716         sethi   %hi(0x3fffffff),%l6
1717 
1718         sub     counter,2,counter
1719         st      counter,[%fp+tmp_counter]
1720 
1721         stx     %l4,[%fp+tmp_px]
1722         sethi   %hi(0x3fffffff),%l6
1723         ba      .cont18
1724         or      %g0,2,counter
1725 
1726         .align  16
1727 .update19:
1728         cmp     counter,2
1729         fzeros  %f0
1730         ble,a   .cont19
1731         sethi   %hi(0x3fffffff),%l6
1732 
1733         sub     counter,2,counter
1734         st      counter,[%fp+tmp_counter]
1735 
1736         stx     %l4,[%fp+tmp_px]
1737         sethi   %hi(0x3fffffff),%l6
1738         ba      .cont19
1739         or      %g0,2,counter
1740 
1741         .align  16
1742 .update20:
1743         cmp     counter,3
1744         fzeros  %f0
1745         ble,a   .cont20
1746         sethi   %hi(0x3fffffff),%l6
1747 
1748         sub     counter,3,counter
1749         st      counter,[%fp+tmp_counter]
1750 
1751         stx     %l3,[%fp+tmp_px]
1752         sethi   %hi(0x3fffffff),%l6
1753         ba      .cont20
1754         or      %g0,3,counter
1755 
1756         .align  16
1757 .update21:
1758         cmp     counter,3
1759         fzeros  %f0
1760         ble,a   .cont21
1761         sethi   %hi(0x3fffffff),%l6
1762 
1763         sub     counter,3,counter
1764         st      counter,[%fp+tmp_counter]
1765 
1766         stx     %l3,[%fp+tmp_px]
1767         sethi   %hi(0x3fffffff),%l6
1768         ba      .cont21
1769         or      %g0,3,counter
1770 
1771         .align  16
1772 .update22:
1773         cmp     counter,4
1774         fzeros  %f0
1775         ble,a   .cont22
1776         sethi   %hi(0x3fffffff),%l6
1777 
1778         sub     counter,4,counter
1779         st      counter,[%fp+tmp_counter]
1780 
1781         stx     %i0,[%fp+tmp_px]
1782         sethi   %hi(0x3fffffff),%l6
1783         ba      .cont22
1784         or      %g0,4,counter
1785 
1786         .align  16
1787 .update23:
1788         cmp     counter,4
1789         fzeros  %f0
1790         ble,a   .cont23
1791         sethi   %hi(0x3fffffff),%l6
1792 
1793         sub     counter,4,counter
1794         st      counter,[%fp+tmp_counter]
1795 
1796         stx     %i0,[%fp+tmp_px]
1797         sethi   %hi(0x3fffffff),%l6
1798         ba      .cont23
1799         or      %g0,4,counter
1800 
1801         .align  16
1802 .update24:
1803         cmp     counter,5
1804         fzeros  %f0
1805         ble,a   .cont24
1806         sethi   %hi(0x3fffffff),%l6
1807 
1808         sub     counter,5,counter
1809         st      counter,[%fp+tmp_counter]
1810 
1811         stx     %i2,[%fp+tmp_px]
1812         sethi   %hi(0x3fffffff),%l6
1813         ba      .cont24
1814         or      %g0,5,counter
1815 
1816         .align  16
1817 .update25:
1818         cmp     counter,5
1819         fzeros  %f0
1820         ble,a   .cont25
1821         sethi   %hi(0x3fffffff),%l6
1822 
1823         sub     counter,5,counter
1824         st      counter,[%fp+tmp_counter]
1825 
1826         stx     %i2,[%fp+tmp_px]
1827         sethi   %hi(0x3fffffff),%l6
1828         ba      .cont25
1829         or      %g0,5,counter
1830 
1831         .align  16
1832 .update26:
1833         cmp     counter,6
1834         fzeros  %f0
1835         ble,a   .cont26
1836         sethi   %hi(0x3fffffff),%l6
1837 
1838         sub     counter,6,counter
1839         st      counter,[%fp+tmp_counter]
1840 
1841         stx     %l2,[%fp+tmp_px]
1842         sethi   %hi(0x3fffffff),%l6
1843         ba      .cont26
1844         or      %g0,6,counter
1845 
1846         .align  16
1847 .update27:
1848         cmp     counter,6
1849         fzeros  %f0
1850         ble,a   .cont27
1851         sethi   %hi(0x3fffffff),%l6
1852 
1853         sub     counter,6,counter
1854         st      counter,[%fp+tmp_counter]
1855 
1856         stx     %l2,[%fp+tmp_px]
1857         sethi   %hi(0x3fffffff),%l6
1858         ba      .cont27
1859         or      %g0,6,counter
1860 
1861         .align  16
1862 .update28:
1863         cmp     counter,7
1864         fzeros  %f0
1865         ble,a   .cont28
1866         sethi   %hi(0x3fffffff),%l6
1867 
1868         sub     counter,7,counter
1869         st      counter,[%fp+tmp_counter]
1870 
1871         stx     %g5,[%fp+tmp_px]
1872         sethi   %hi(0x3fffffff),%l6
1873         ba      .cont28
1874         or      %g0,7,counter
1875 
1876         .align  16
1877 .update29:
1878         cmp     counter,7
1879         fzeros  %f0
1880         ble,a   .cont29
1881         sethi   %hi(0x3fffffff),%l6
1882 
1883         sub     counter,7,counter
1884         st      counter,[%fp+tmp_counter]
1885 
1886         stx     %g5,[%fp+tmp_px]
1887         sethi   %hi(0x3fffffff),%l6
1888         ba      .cont29
1889         or      %g0,7,counter
1890 
1891         SET_SIZE(__vatanf)
1892