1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file   "__vsqrtf_ultra3.S"
  30 
  31 #include "libm.h"
  32 #if defined(LIBMVEC_SO_BUILD)
  33         .weak   __vsqrtf
  34         .type   __vsqrtf,#function
  35         __vsqrtf = __vsqrtf_ultra3
  36 #endif
  37 
  38         RO_DATA
  39         .align  64
  40 
  41 .CONST_TBL:
  42         .word   0x3fe00001, 0x80007e00  ! K1  =  5.00000715259318464227e-01
  43         .word   0xbfc00003, 0xc0017a01  ! K2  = -1.25000447037521686593e-01
  44         .word   0x000fffff, 0xffffffff  ! DC0 = 0x000fffffffffffff
  45         .word   0x3ff00000, 0x00000000  ! DC1 = 0x3ff0000000000000
  46         .word   0x7ffff000, 0x00000000  ! DC2 = 0x7ffff00000000000
  47 
  48 #define DC0             %f6
  49 #define DC1             %f4
  50 #define DC2             %f2
  51 #define K2              %f38
  52 #define K1              %f36
  53 #define TBL             %l2
  54 #define stridex         %l3
  55 #define stridey         %l4
  56 #define _0x1ff0         %l5
  57 #define counter         %l6
  58 #define _0x00800000     %l7
  59 #define _0x7f800000     %o0
  60 
  61 #define tmp_px          STACK_BIAS-0x40
  62 #define tmp_counter     STACK_BIAS-0x38
  63 #define tmp0            STACK_BIAS-0x30
  64 #define tmp1            STACK_BIAS-0x28
  65 #define tmp2            STACK_BIAS-0x20
  66 #define tmp3            STACK_BIAS-0x18
  67 #define tmp4            STACK_BIAS-0x10
  68 
  69 ! sizeof temp storage - must be a multiple of 16 for V9
  70 #define tmps            0x40
  71 
  72 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  73 !      !!!!!   algorithm   !!!!!
  74 !
  75 !  x0 = *px;
  76 !  ax = *(int*)px;
  77 !  px += stridex;
  78 !
  79 !  if( ax >= 0x7f800000 )
  80 !  {
  81 !    *py = sqrtf(x0);
  82 !    py += stridey;
  83 !    continue;
  84 !  }
  85 !  if( ax < 0x00800000 )
  86 !  {
  87 !    *py = sqrtf(x0);
  88 !    py += stridey;
  89 !    continue;
  90 !  }
  91 !
  92 !  db0 = (double)x0;
  93 !  iexp0 = ax >> 24;
  94 !  iexp0 += 0x3c0;
  95 !  lexp0 = (long long)iexp0 << 52;
  96 !
  97 !  db0 = vis_fand(db0,DC0);
  98 !  db0 = vis_for(db0,DC1);
  99 !  hi0 = vis_fand(db0,DC2);
 100 !
 101 !  ax >>= 11;
 102 !  si0 = ax & 0x1ff0;
 103 !  dtmp0 = ((double*)((char*)TBL + si0))[0];
 104 !  xx0 = (db0 - hi0);
 105 !  xx0 *= dtmp0;
 106 !  dtmp0 = ((double*)((char*)TBL + si0))[1]
 107 !  res0 = K2 * xx0;
 108 !  res0 += K1;
 109 !  res0 *= xx0;
 110 !  res0 += DC1;
 111 !  res0 = dtmp0 * res0;
 112 !  dtmp1 = *((double*)&lexp0);
 113 !  res0 *= dtmp1;
 114 !  fres0 = (float)res0;
 115 !  *py = fres0;
 116 !  py += stridey;
 117 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 118 
 119         ENTRY(__vsqrtf_ultra3)
 120         save    %sp,-SA(MINFRAME)-tmps,%sp
 121         PIC_SETUP(l7)
 122         PIC_SET(l7,.CONST_TBL,o2)
 123         PIC_SET(l7,__vlibm_TBL_sqrtf,l2)
 124 
 125         st      %i0,[%fp+tmp_counter]
 126         sll     %i2,2,stridex
 127         or      %g0,0xff8,%l5
 128 
 129         stx     %i1,[%fp+tmp_px]
 130         sll     %l5,1,_0x1ff0
 131 
 132         ldd     [%o2],K1
 133         sll     %i4,2,stridey
 134 
 135         ldd     [%o2+8],K2
 136         or      %g0,%i3,%g5
 137 
 138         ldd     [%o2+16],DC0
 139         sethi   %hi(0x7f800000),%o0
 140 
 141         ldd     [%o2+24],DC1
 142         sethi   %hi(0x00800000),%l7
 143 
 144         ldd     [%o2+32],DC2
 145 
 146 .begin:
 147         ld      [%fp+tmp_counter],counter
 148         ldx     [%fp+tmp_px],%i1
 149         st      %g0,[%fp+tmp_counter]
 150 .begin1:
 151         cmp     counter,0
 152         ble,pn  %icc,.exit
 153 
 154         lda     [%i1]0x82,%o2           ! (2_0) ax = *(int*)px;
 155 
 156         or      %g0,%i1,%o7
 157         lda     [%i1]0x82,%f25          ! (2_0) x0 = *px;
 158 
 159         cmp     %o2,_0x7f800000         ! (2_0) ax ? 0x7f800000
 160         bge,pn  %icc,.spec              ! (2_0) if( ax >= 0x7f800000 )
 161         nop
 162 
 163         cmp     %o2,_0x00800000         ! (2_0) ax ? 0x00800000
 164         bl,pn   %icc,.spec              ! (2_0) if( ax < 0x00800000 )
 165         nop
 166 
 167         fstod   %f25,%f56               ! (2_0) db0 = (double)x0;
 168 
 169         lda     [stridex+%o7]0x82,%o1   ! (3_0) ax = *(int*)px;
 170 
 171         sra     %o2,24,%l1              ! (2_0) iexp0 = ax >> 24;
 172 
 173         add     %o7,stridex,%i1         ! px += stridex
 174         add     %l1,960,%l0             ! (2_0) iexp0 += 0x3c0;
 175         lda     [stridex+%o7]0x82,%f0   ! (3_0) x0 = *px;
 176         fand    %f56,DC0,%f60           ! (2_0) db0 = vis_fand(db0,DC0);
 177 
 178         cmp     %o1,_0x7f800000         ! (3_0) ax ? 0x7f800000
 179         bge,pn  %icc,.update0           ! (3_0) if( ax >= 0x7f800000 )
 180         nop
 181 .cont0:
 182         sllx    %l0,52,%o3              ! (2_0) lexp0 = (long long)iexp0 << 52;
 183 
 184         sra     %o2,11,%i2              ! (2_0) ax >>= 11;
 185         stx     %o3,[%fp+tmp0]          ! (2_0) dtmp1 = *((double*)&lexp0);
 186         for     %f60,DC1,%f40           ! (2_0) db0 = vis_for(db0,DC1);
 187 
 188         cmp     %o1,_0x00800000         ! (3_0) ax ? 0x00800000
 189         bl,pn   %icc,.update1           ! (3_0) if( ax < 0x00800000 )
 190         nop
 191 .cont1:
 192         fstod   %f0,%f48                ! (3_0) db0 = (double)x0;
 193 
 194         and     %i2,_0x1ff0,%o3         ! (2_0) si0 = ax & 0x1ff0;
 195         lda     [%i1+stridex]0x82,%o2   ! (4_0) ax = *(int*)px;
 196 
 197         add     %i1,stridex,%i1         ! px += stridex
 198         add     %o3,TBL,%i2             ! (2_0) (char*)TBL + si0
 199         fand    %f40,DC2,%f46           ! (2_0) hi0 = vis_fand(db0,DC2);
 200 
 201         sra     %o1,24,%o4              ! (3_0) iexp0 = ax >> 24;
 202 
 203         lda     [%i1]0x82,%f13          ! (4_0) x0 = *px;
 204         fand    %f48,DC0,%f58           ! (3_0) db0 = vis_fand(db0,DC0);
 205 
 206         add     %o4,960,%i0             ! (3_0) iexp0 += 0x3c0;
 207 
 208         cmp     %o2,_0x7f800000         ! (4_1) ax ? 0x7f800000
 209         bge,pn  %icc,.update2           ! (4_1) if( ax >= 0x7f800000 )
 210         nop
 211 .cont2:
 212         fsubd   %f40,%f46,%f44          ! (2_1) xx0 = (db0 - hi0);
 213         sllx    %i0,52,%g1              ! (3_1) lexp0 = (long long)iexp0 << 52;
 214         ldd     [%i2],%f40              ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
 215 
 216         sra     %o1,11,%l0              ! (3_1) ax >>= 11;
 217         stx     %g1,[%fp+tmp1]          ! (3_1) dtmp1 = *((double*)&lexp0);
 218         for     %f58,DC1,%f48           ! (3_1) db0 = vis_for(db0,DC1);
 219 
 220         cmp     %o2,_0x00800000         ! (4_1) ax ? 0x00800000
 221         bl,pn   %icc,.update3           ! (4_1) if( ax < 0x00800000 )
 222         nop
 223 .cont3:
 224         fstod   %f13,%f50               ! (4_1) db0 = (double)x0;
 225 
 226         fmuld   %f44,%f40,%f46          ! (2_1) xx0 *= dtmp0;
 227         and     %l0,_0x1ff0,%i0         ! (3_1) si0 = ax & 0x1ff0;
 228         lda     [%i1+stridex]0x82,%l1   ! (0_0) ax = *(int*)px;
 229 
 230         add     %i0,TBL,%l0             ! (3_1) (char*)TBL + si0
 231         fand    %f48,DC2,%f62           ! (3_1) hi0 = vis_fand(db0,DC2);
 232 
 233         sra     %o2,24,%o7              ! (4_1) iexp0 = ax >> 24;
 234 
 235         add     %i1,stridex,%o4         ! px += stridex
 236         add     %o7,960,%o7             ! (4_1) iexp0 += 0x3c0;
 237         lda     [%i1+stridex]0x82,%f17  ! (0_0) x0 = *px;
 238         fand    %f50,DC0,%f54           ! (4_1) db0 = vis_fand(db0,DC0);
 239 
 240         fmuld   K2,%f46,%f52            ! (2_1) res0 = K2 * xx0;
 241         cmp     %l1,_0x7f800000         ! (0_0) ax ? 0x7f800000
 242         bge,pn  %icc,.update4           ! (0_0) if( ax >= 0x7f800000 )
 243         fsubd   %f48,%f62,%f42          ! (3_1) xx0 = (db0 - hi0);
 244 .cont4:
 245         sllx    %o7,52,%o1              ! (4_1) lexp0 = (long long)iexp0 << 52;
 246         ldd     [%i0+TBL],%f40          ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
 247 
 248         sra     %o2,11,%i5              ! (4_1) ax >>= 11;
 249         stx     %o1,[%fp+tmp2]          ! (4_1) dtmp1 = *((double*)&lexp0);
 250         for     %f54,DC1,%f34           ! (4_1) db0 = vis_for(db0,DC1);
 251 
 252         cmp     %l1,_0x00800000         ! (0_0) ax ? 0x00800000
 253         bl,pn   %icc,.update5           ! (0_0) if( ax < 0x00800000 )
 254         nop
 255 .cont5:
 256         fstod   %f17,%f56               ! (0_0) db0 = (double)x0;
 257 
 258         fmuld   %f42,%f40,%f42          ! (3_1) xx0 *= dtmp0;
 259         lda     [stridex+%o4]0x82,%i0   ! (1_0) ax = *(int*)px;
 260         faddd   %f52,K1,%f52            ! (2_1) res0 += K1;
 261 
 262         sra     %l1,24,%g1              ! (0_0) iexp0 = ax >> 24;
 263         and     %i5,_0x1ff0,%i5         ! (4_1) si0 = ax & 0x1ff0;
 264         fand    %f34,DC2,%f62           ! (4_1) hi0 = vis_fand(db0,DC2);
 265 
 266         add     %o4,stridex,%i1         ! px += stridex
 267 
 268         add     %g1,960,%o5             ! (0_0) iexp0 += 0x3c0;
 269         add     %i5,TBL,%i3             ! (4_1) (char*)TBL + si0
 270         lda     [stridex+%o4]0x82,%f21  ! (1_0) x0 = *px;
 271         fand    %f56,DC0,%f32           ! (0_0) db0 = vis_fand(db0,DC0);
 272 
 273         fmuld   K2,%f42,%f50            ! (3_1) res0 = K2 * xx0;
 274         cmp     %i0,_0x7f800000         ! (1_0) ax ? 0x7f800000
 275         bge,pn  %icc,.update6           ! (1_0) if( ax >= 0x7f800000 )
 276         fsubd   %f34,%f62,%f54          ! (4_1) xx0 = (db0 - hi0);
 277 .cont6:
 278         fmuld   %f52,%f46,%f52          ! (2_1) res0 *= xx0;
 279         sllx    %o5,52,%o7              ! (0_0) lexp0 = (long long)iexp0 << 52;
 280         ldd     [TBL+%i5],%f62          ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
 281 
 282         sra     %l1,11,%i4              ! (0_0) ax >>= 11;
 283         stx     %o7,[%fp+tmp3]          ! (0_0) dtmp1 = *((double*)&lexp0);
 284         for     %f32,DC1,%f48           ! (0_0) db0 = vis_for(db0,DC1);
 285 
 286         cmp     %i0,_0x00800000         ! (1_0) ax ? 0x00800000
 287         bl,pn   %icc,.update7           ! (1_0) if( ax < 0x00800000 )
 288         nop
 289 .cont7:
 290         fstod   %f21,%f56               ! (1_0) db0 = (double)x0;
 291 
 292         fmuld   %f54,%f62,%f46          ! (4_1) xx0 *= dtmp0;
 293         and     %i4,_0x1ff0,%g1         ! (0_0) si0 = ax & 0x1ff0;
 294         lda     [%i1+stridex]0x82,%o2   ! (2_0) ax = *(int*)px;
 295         faddd   %f50,K1,%f62            ! (3_1) res0 += K1;
 296 
 297         add     %g1,TBL,%i5             ! (0_0) (double*)((char*)TBL + si0
 298         fand    %f48,DC2,%f32           ! (0_0) hi0 = vis_fand(db0,DC2);
 299 
 300         sra     %i0,24,%o4              ! (1_0) iexp0 = ax >> 24;
 301         ldd     [%i2+8],%f60            ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
 302         faddd   %f52,DC1,%f58           ! (2_1) res0 += DC1;
 303 
 304         add     %i1,stridex,%o7         ! px += stridex
 305         add     %o4,960,%i2             ! (1_0) iexp0 += 0x3c0;
 306         lda     [%i1+stridex]0x82,%f25  ! (2_0) x0 = *px;
 307         fand    %f56,DC0,%f34           ! (1_0) db0 = vis_fand(db0,DC0);
 308 
 309         fmuld   K2,%f46,%f50            ! (4_1) res0 = K2 * xx0;
 310         cmp     %o2,_0x7f800000         ! (2_0) ax ? 0x7f800000
 311         bge,pn  %icc,.update8           ! (2_0) if( ax >= 0x7f800000 )
 312         fsubd   %f48,%f32,%f52          ! (0_0) xx0 = (db0 - hi0);
 313 .cont8:
 314         fmuld   %f62,%f42,%f54          ! (3_1) res0 *= xx0;
 315         sllx    %i2,52,%o4              ! (1_0) lexp0 = (long long)iexp0 << 52;
 316         ldd     [TBL+%g1],%f32          ! (0_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
 317 
 318         fmuld   %f60,%f58,%f60          ! (2_1) res0 = dtmp0 * res0;
 319         sra     %i0,11,%g1              ! (1_0) ax >>= 11;
 320         stx     %o4,[%fp+tmp4]          ! (1_0) dtmp1 = *((double*)&lexp0);
 321         for     %f34,DC1,%f48           ! (1_0) db0 = vis_for(db0,DC1);
 322 
 323         cmp     %o2,_0x00800000         ! (2_0) ax ? 0x00800000
 324         bl,pn   %icc,.update9           ! (2_0) if( ax < 0x00800000 )
 325         ldd     [%fp+tmp0],%f40         ! (2_1) dtmp1 = *((double*)&lexp0);
 326         fstod   %f25,%f56               ! (2_0) db0 = (double)x0;
 327 .cont9:
 328         fmuld   %f52,%f32,%f42          ! (0_0) xx0 *= dtmp0;
 329         and     %g1,_0x1ff0,%o5         ! (1_0) si0 = ax & 0x1ff0;
 330         lda     [stridex+%o7]0x82,%o1   ! (3_0) ax = *(int*)px;
 331         faddd   %f50,K1,%f34            ! (4_1) res0 += K1;
 332 
 333         add     %o5,TBL,%i4             ! (1_0) (char*)TBL + si0
 334         fand    %f48,DC2,%f62           ! (1_0) hi0 = vis_fand(db0,DC2);
 335 
 336         fmuld   %f60,%f40,%f32          ! (2_1) res0 *= dtmp1;
 337         sra     %o2,24,%l1              ! (2_0) iexp0 = ax >> 24;
 338         ldd     [%l0+8],%f40            ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
 339         faddd   %f54,DC1,%f58           ! (3_1) res0 += DC1;
 340 
 341         add     %o7,stridex,%i1         ! px += stridex
 342         add     %l1,960,%l0             ! (2_0) iexp0 += 0x3c0;
 343         lda     [stridex+%o7]0x82,%f0   ! (3_0) x0 = *px;
 344         fand    %f56,DC0,%f60           ! (2_0) db0 = vis_fand(db0,DC0);
 345 
 346         fmuld   K2,%f42,%f50            ! (0_0) res0 = K2 * xx0;
 347         cmp     %o1,_0x7f800000         ! (3_0) ax ? 0x7f800000
 348         bge,pn  %icc,.update10          ! (3_0) if( ax >= 0x7f800000 )
 349         fsubd   %f48,%f62,%f54          ! (1_0) xx0 = (db0 - hi0);
 350 .cont10:
 351         fmuld   %f34,%f46,%f52          ! (4_1) res0 *= xx0;
 352         sllx    %l0,52,%o3              ! (2_0) lexp0 = (long long)iexp0 << 52;
 353         ldd     [TBL+%o5],%f56          ! (1_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
 354 
 355         fmuld   %f40,%f58,%f34          ! (3_1) res0 = dtmp0 * res0;
 356         sra     %o2,11,%i2              ! (2_0) ax >>= 11;
 357         stx     %o3,[%fp+tmp0]          ! (2_0) dtmp1 = *((double*)&lexp0);
 358         for     %f60,DC1,%f40           ! (2_0) db0 = vis_for(db0,DC1);
 359 
 360         cmp     %o1,_0x00800000         ! (3_0) ax ? 0x00800000
 361         bl,pn   %icc,.update11          ! (3_0) if( ax < 0x00800000 )
 362         ldd     [%fp+tmp1],%f62         ! (3_1) dtmp1 = *((double*)&lexp0);
 363         fstod   %f0,%f48                ! (3_0) db0 = (double)x0;
 364 .cont11:
 365         fmuld   %f54,%f56,%f30          ! (1_0) xx0 *= dtmp0;
 366         and     %i2,_0x1ff0,%o3         ! (2_0) si0 = ax & 0x1ff0;
 367         lda     [%i1+stridex]0x82,%o2   ! (4_0) ax = *(int*)px;
 368         faddd   %f50,K1,%f56            ! (0_0) res0 += K1;
 369 
 370         add     %i1,stridex,%i1         ! px += stridex
 371         add     %o3,TBL,%i2             ! (2_0) (char*)TBL + si0
 372         fand    %f40,DC2,%f46           ! (2_0) hi0 = vis_fand(db0,DC2);
 373 
 374         fmuld   %f34,%f62,%f28          ! (3_1) res0 *= dtmp1;
 375         sra     %o1,24,%o4              ! (3_0) iexp0 = ax >> 24;
 376         ldd     [%i3+8],%f50            ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
 377         faddd   %f52,DC1,%f54           ! (4_1) res0 += DC1;
 378 
 379         lda     [%i1]0x82,%f13          ! (4_0) x0 = *px;
 380         fand    %f48,DC0,%f58           ! (3_0) db0 = vis_fand(db0,DC0);
 381 
 382         or      %g0,%g5,%i3
 383         cmp     counter,5
 384         bl,pn   %icc,.tail
 385         add     %o4,960,%g5             ! (3_0) iexp0 += 0x3c0;
 386 
 387         ba      .main_loop
 388         sub     counter,5,counter       ! counter
 389 
 390         .align  16
 391 .main_loop:
 392         fmuld   K2,%f30,%f60            ! (1_1) res0 = K2 * xx0;
 393         cmp     %o2,_0x7f800000         ! (4_1) ax ? 0x7f800000
 394         bge,pn  %icc,.update12          ! (4_1) if( ax >= 0x7f800000 )
 395         fsubd   %f40,%f46,%f44          ! (2_1) xx0 = (db0 - hi0);
 396 .cont12:
 397         fmuld   %f56,%f42,%f52          ! (0_1) res0 *= xx0;
 398         sllx    %g5,52,%g5              ! (3_1) lexp0 = (long long)iexp0 << 52;
 399         ldd     [%i2],%f40              ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
 400         fdtos   %f32,%f15               ! (2_2) fres0 = (float)res0;
 401 
 402         fmuld   %f50,%f54,%f42          ! (4_2) res0 = dtmp0 * res0;
 403         sra     %o1,11,%l0              ! (3_1) ax >>= 11;
 404         stx     %g5,[%fp+tmp1]          ! (3_1) dtmp1 = *((double*)&lexp0);
 405         for     %f58,DC1,%f48           ! (3_1) db0 = vis_for(db0,DC1);
 406 
 407         cmp     %o2,_0x00800000         ! (4_1) ax ? 0x00800000
 408         bl,pn   %icc,.update13          ! (4_1) if( ax < 0x00800000 )
 409         ldd     [%fp+tmp2],%f56         ! (4_2) dtmp1 = *((double*)&lexp0);
 410         fstod   %f13,%f50               ! (4_1) db0 = (double)x0;
 411 .cont13:
 412         fmuld   %f44,%f40,%f46          ! (2_1) xx0 *= dtmp0;
 413         and     %l0,_0x1ff0,%i0         ! (3_1) si0 = ax & 0x1ff0;
 414         lda     [%i1+stridex]0x82,%l1   ! (0_0) ax = *(int*)px;
 415         faddd   %f60,K1,%f32            ! (1_1) res0 += K1;
 416 
 417         add     %i0,TBL,%l0             ! (3_1) (char*)TBL + si0
 418         add     %i3,stridey,%o3         ! py += stridey
 419         st      %f15,[%i3]              ! (2_2) *py = fres0;
 420         fand    %f48,DC2,%f62           ! (3_1) hi0 = vis_fand(db0,DC2);
 421 
 422         fmuld   %f42,%f56,%f44          ! (4_2) res0 *= dtmp1;
 423         sra     %o2,24,%o7              ! (4_1) iexp0 = ax >> 24;
 424         ldd     [%i5+8],%f58            ! (0_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
 425         faddd   %f52,DC1,%f34           ! (0_1) res0 += DC1;
 426 
 427         add     %i1,stridex,%o4         ! px += stridex
 428         add     %o7,960,%o7             ! (4_1) iexp0 += 0x3c0;
 429         lda     [%i1+stridex]0x82,%f17  ! (0_0) x0 = *px;
 430         fand    %f50,DC0,%f54           ! (4_1) db0 = vis_fand(db0,DC0);
 431 
 432         fmuld   K2,%f46,%f52            ! (2_1) res0 = K2 * xx0;
 433         cmp     %l1,_0x7f800000         ! (0_0) ax ? 0x7f800000
 434         bge,pn  %icc,.update14          ! (0_0) if( ax >= 0x7f800000 )
 435         fsubd   %f48,%f62,%f42          ! (3_1) xx0 = (db0 - hi0);
 436 .cont14:
 437         fmuld   %f32,%f30,%f48          ! (1_1) res0 *= xx0;
 438         sllx    %o7,52,%o1              ! (4_1) lexp0 = (long long)iexp0 << 52;
 439         ldd     [%i0+TBL],%f40          ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
 440         fdtos   %f28,%f19               ! (3_2) fres0 = (float)res0;
 441 
 442         fmuld   %f58,%f34,%f32          ! (0_1) res0 = dtmp0 * res0;
 443         sra     %o2,11,%i5              ! (4_1) ax >>= 11;
 444         stx     %o1,[%fp+tmp2]          ! (4_1) dtmp1 = *((double*)&lexp0);
 445         for     %f54,DC1,%f34           ! (4_1) db0 = vis_for(db0,DC1);
 446 
 447         cmp     %l1,_0x00800000         ! (0_0) ax ? 0x00800000
 448         bl,pn   %icc,.update15          ! (0_0) if( ax < 0x00800000 )
 449         ldd     [%fp+tmp3],%f60         ! (0_1) dtmp1 = *((double*)&lexp0);
 450         fstod   %f17,%f56               ! (0_0) db0 = (double)x0;
 451 .cont15:
 452         fmuld   %f42,%f40,%f42          ! (3_1) xx0 *= dtmp0;
 453         add     %o3,stridey,%g5         ! py += stridey
 454         lda     [stridex+%o4]0x82,%i0   ! (1_0) ax = *(int*)px;
 455         faddd   %f52,K1,%f52            ! (2_1) res0 += K1;
 456 
 457         sra     %l1,24,%g1              ! (0_0) iexp0 = ax >> 24;
 458         and     %i5,_0x1ff0,%i5         ! (4_1) si0 = ax & 0x1ff0;
 459         st      %f19,[%o3]              ! (3_2) *py = fres0;
 460         fand    %f34,DC2,%f62           ! (4_1) hi0 = vis_fand(db0,DC2);
 461 
 462         fmuld   %f32,%f60,%f40          ! (0_1) res0 *= dtmp1;
 463         add     %o4,stridex,%i1         ! px += stridex
 464         ldd     [%i4+8],%f60            ! (1_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
 465         faddd   %f48,DC1,%f58           ! (1_1) res0 += DC1;
 466 
 467         add     %g1,960,%o5             ! (0_0) iexp0 += 0x3c0;
 468         add     %i5,TBL,%i3             ! (4_1) (char*)TBL + si0
 469         lda     [stridex+%o4]0x82,%f21  ! (1_0) x0 = *px;
 470         fand    %f56,DC0,%f32           ! (0_0) db0 = vis_fand(db0,DC0);
 471 
 472         fmuld   K2,%f42,%f50            ! (3_1) res0 = K2 * xx0;
 473         cmp     %i0,_0x7f800000         ! (1_0) ax ? 0x7f800000
 474         bge,pn  %icc,.update16          ! (1_0) if( ax >= 0x7f800000 )
 475         fsubd   %f34,%f62,%f54          ! (4_1) xx0 = (db0 - hi0);
 476 .cont16:
 477         fmuld   %f52,%f46,%f52          ! (2_1) res0 *= xx0;
 478         sllx    %o5,52,%o7              ! (0_0) lexp0 = (long long)iexp0 << 52;
 479         ldd     [TBL+%i5],%f62          ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
 480         fdtos   %f44,%f23               ! (4_2) fres0 = (float)res0;
 481 
 482         fmuld   %f60,%f58,%f44          ! (1_1) res0 = dtmp0 * res0;
 483         sra     %l1,11,%i4              ! (0_0) ax >>= 11;
 484         stx     %o7,[%fp+tmp3]          ! (0_0) dtmp1 = *((double*)&lexp0);
 485         for     %f32,DC1,%f48           ! (0_0) db0 = vis_for(db0,DC1);
 486 
 487         cmp     %i0,_0x00800000         ! (1_0) ax ? 0x00800000
 488         bl,pn   %icc,.update17          ! (1_0) if( ax < 0x00800000 )
 489         ldd     [%fp+tmp4],%f34         ! (1_1) dtmp1 = *((double*)&lexp0);
 490         fstod   %f21,%f56               ! (1_0) db0 = (double)x0;
 491 .cont17:
 492         fmuld   %f54,%f62,%f46          ! (4_1) xx0 *= dtmp0;
 493         and     %i4,_0x1ff0,%g1         ! (0_0) si0 = ax & 0x1ff0;
 494         lda     [%i1+stridex]0x82,%o2   ! (2_0) ax = *(int*)px;
 495         faddd   %f50,K1,%f62            ! (3_1) res0 += K1;
 496 
 497         add     %g1,TBL,%i5             ! (0_0) (double*)((char*)TBL + si0
 498         add     %g5,stridey,%g5         ! py += stridey
 499         st      %f23,[stridey+%o3]      ! (4_2) *py = fres0;
 500         fand    %f48,DC2,%f32           ! (0_0) hi0 = vis_fand(db0,DC2);
 501 
 502         fmuld   %f44,%f34,%f44          ! (1_1) res0 *= dtmp1;
 503         sra     %i0,24,%o4              ! (1_0) iexp0 = ax >> 24;
 504         ldd     [%i2+8],%f60            ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
 505         faddd   %f52,DC1,%f58           ! (2_1) res0 += DC1;
 506 
 507         add     %i1,stridex,%o7         ! px += stridex
 508         add     %o4,960,%i2             ! (1_0) iexp0 += 0x3c0;
 509         lda     [%i1+stridex]0x82,%f25  ! (2_0) x0 = *px;
 510         fand    %f56,DC0,%f34           ! (1_0) db0 = vis_fand(db0,DC0);
 511 
 512         fmuld   K2,%f46,%f50            ! (4_1) res0 = K2 * xx0;
 513         cmp     %o2,_0x7f800000         ! (2_0) ax ? 0x7f800000
 514         bge,pn  %icc,.update18          ! (2_0) if( ax >= 0x7f800000 )
 515         fsubd   %f48,%f32,%f52          ! (0_0) xx0 = (db0 - hi0);
 516 .cont18:
 517         fmuld   %f62,%f42,%f54          ! (3_1) res0 *= xx0;
 518         sllx    %i2,52,%o4              ! (1_0) lexp0 = (long long)iexp0 << 52;
 519         ldd     [TBL+%g1],%f32          ! (0_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
 520         fdtos   %f40,%f27               ! (0_1) fres0 = (float)res0;
 521 
 522         fmuld   %f60,%f58,%f60          ! (2_1) res0 = dtmp0 * res0;
 523         sra     %i0,11,%g1              ! (1_0) ax >>= 11;
 524         stx     %o4,[%fp+tmp4]          ! (1_0) dtmp1 = *((double*)&lexp0);
 525         for     %f34,DC1,%f48           ! (1_0) db0 = vis_for(db0,DC1);
 526 
 527         cmp     %o2,_0x00800000         ! (2_0) ax ? 0x00800000
 528         bl,pn   %icc,.update19          ! (2_0) if( ax < 0x00800000 )
 529         ldd     [%fp+tmp0],%f40         ! (2_1) dtmp1 = *((double*)&lexp0);
 530         fstod   %f25,%f56               ! (2_0) db0 = (double)x0;
 531 .cont19:
 532         fmuld   %f52,%f32,%f42          ! (0_0) xx0 *= dtmp0;
 533         and     %g1,_0x1ff0,%o5         ! (1_0) si0 = ax & 0x1ff0;
 534         lda     [stridex+%o7]0x82,%o1   ! (3_0) ax = *(int*)px;
 535         faddd   %f50,K1,%f34            ! (4_1) res0 += K1;
 536 
 537         add     %o5,TBL,%i4             ! (1_0) (char*)TBL + si0
 538         add     %g5,stridey,%g1         ! py += stridey
 539         st      %f27,[%g5]              ! (0_1) *py = fres0;
 540         fand    %f48,DC2,%f62           ! (1_0) hi0 = vis_fand(db0,DC2);
 541 
 542         fmuld   %f60,%f40,%f32          ! (2_1) res0 *= dtmp1;
 543         sra     %o2,24,%l1              ! (2_0) iexp0 = ax >> 24;
 544         ldd     [%l0+8],%f40            ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
 545         faddd   %f54,DC1,%f58           ! (3_1) res0 += DC1;
 546 
 547         add     %o7,stridex,%i1         ! px += stridex
 548         add     %l1,960,%l0             ! (2_0) iexp0 += 0x3c0;
 549         lda     [stridex+%o7]0x82,%f0   ! (3_0) x0 = *px;
 550         fand    %f56,DC0,%f60           ! (2_0) db0 = vis_fand(db0,DC0);
 551 
 552         fmuld   K2,%f42,%f50            ! (0_0) res0 = K2 * xx0;
 553         cmp     %o1,_0x7f800000         ! (3_0) ax ? 0x7f800000
 554         bge,pn  %icc,.update20          ! (3_0) if( ax >= 0x7f800000 )
 555         fsubd   %f48,%f62,%f54          ! (1_0) xx0 = (db0 - hi0);
 556 .cont20:
 557         fmuld   %f34,%f46,%f52          ! (4_1) res0 *= xx0;
 558         sllx    %l0,52,%o3              ! (2_0) lexp0 = (long long)iexp0 << 52;
 559         ldd     [TBL+%o5],%f56          ! (1_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
 560         fdtos   %f44,%f8                ! (1_1) fres0 = (float)res0;
 561 
 562         fmuld   %f40,%f58,%f34          ! (3_1) res0 = dtmp0 * res0;
 563         sra     %o2,11,%i2              ! (2_0) ax >>= 11;
 564         stx     %o3,[%fp+tmp0]          ! (2_0) dtmp1 = *((double*)&lexp0);
 565         for     %f60,DC1,%f40           ! (2_0) db0 = vis_for(db0,DC1);
 566 
 567         cmp     %o1,_0x00800000         ! (3_0) ax ? 0x00800000
 568         bl,pn   %icc,.update21          ! (3_0) if( ax < 0x00800000 )
 569         ldd     [%fp+tmp1],%f62         ! (3_1) dtmp1 = *((double*)&lexp0);
 570         fstod   %f0,%f48                ! (3_0) db0 = (double)x0;
 571 .cont21:
 572         fmuld   %f54,%f56,%f30          ! (1_0) xx0 *= dtmp0;
 573         and     %i2,_0x1ff0,%o3         ! (2_0) si0 = ax & 0x1ff0;
 574         lda     [%i1+stridex]0x82,%o2   ! (4_0) ax = *(int*)px;
 575         faddd   %f50,K1,%f56            ! (0_0) res0 += K1;
 576 
 577         add     %i1,stridex,%i1         ! px += stridex
 578         add     %o3,TBL,%i2             ! (2_0) (char*)TBL + si0
 579         st      %f8,[stridey+%g5]       ! (1_1) *py = fres0;
 580         fand    %f40,DC2,%f46           ! (2_0) hi0 = vis_fand(db0,DC2);
 581 
 582         fmuld   %f34,%f62,%f28          ! (3_1) res0 *= dtmp1;
 583         sra     %o1,24,%o4              ! (3_0) iexp0 = ax >> 24;
 584         ldd     [%i3+8],%f50            ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
 585         faddd   %f52,DC1,%f54           ! (4_1) res0 += DC1;
 586 
 587         add     %g1,stridey,%i3         ! py += stridey
 588         subcc   counter,5,counter       ! counter
 589         lda     [%i1]0x82,%f13          ! (4_0) x0 = *px;
 590         fand    %f48,DC0,%f58           ! (3_0) db0 = vis_fand(db0,DC0);
 591 
 592         bpos,pt %icc,.main_loop
 593         add     %o4,960,%g5             ! (3_0) iexp0 += 0x3c0;
 594 
 595         add     counter,5,counter
 596 .tail:
 597         subcc   counter,1,counter
 598         bneg,a  .begin
 599         or      %g0,%i3,%g5
 600 
 601         fmuld   %f56,%f42,%f52          ! (0_1) res0 *= xx0;
 602         fdtos   %f32,%f15               ! (2_2) fres0 = (float)res0;
 603 
 604         fmuld   %f50,%f54,%f42          ! (4_2) res0 = dtmp0 * res0;
 605 
 606         ldd     [%fp+tmp2],%f56         ! (4_2) dtmp1 = *((double*)&lexp0);
 607 
 608         add     %i3,stridey,%o3         ! py += stridey
 609         st      %f15,[%i3]              ! (2_2) *py = fres0;
 610 
 611         subcc   counter,1,counter
 612         bneg,a  .begin
 613         or      %g0,%o3,%g5
 614 
 615         fmuld   %f42,%f56,%f44          ! (4_2) res0 *= dtmp1;
 616         ldd     [%i5+8],%f58            ! (0_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
 617         faddd   %f52,DC1,%f34           ! (0_1) res0 += DC1;
 618 
 619         fdtos   %f28,%f19               ! (3_2) fres0 = (float)res0;
 620 
 621         fmuld   %f58,%f34,%f32          ! (0_1) res0 = dtmp0 * res0;
 622 
 623         ldd     [%fp+tmp3],%f60         ! (0_1) dtmp1 = *((double*)&lexp0);
 624 
 625         add     %o3,stridey,%g5         ! py += stridey
 626 
 627         st      %f19,[%o3]              ! (3_2) *py = fres0;
 628 
 629         subcc   counter,1,counter
 630         bneg,a  .begin
 631         nop
 632 
 633         fmuld   %f32,%f60,%f40          ! (0_1) res0 *= dtmp1;
 634 
 635         fdtos   %f44,%f23               ! (4_2) fres0 = (float)res0;
 636 
 637         add     %g5,stridey,%g5         ! py += stridey
 638         st      %f23,[stridey+%o3]      ! (4_2) *py = fres0;
 639 
 640         subcc   counter,1,counter
 641         bneg,a  .begin
 642         nop
 643 
 644         fdtos   %f40,%f27               ! (0_1) fres0 = (float)res0;
 645 
 646         st      %f27,[%g5]              ! (0_1) *py = fres0;
 647 
 648         ba      .begin
 649         add     %g5,stridey,%g5
 650 
 651         .align  16
 652 .spec:
 653         fsqrts  %f25,%f25
 654         sub     counter,1,counter
 655         add     %i1,stridex,%i1
 656         st      %f25,[%g5]
 657         ba      .begin1
 658         add     %g5,stridey,%g5
 659 
 660         .align  16
 661 .update0:
 662         cmp     counter,1
 663         ble     .cont0
 664         fzeros  %f0
 665 
 666         stx     %i1,[%fp+tmp_px]
 667         sethi   %hi(0x7f800000),%o1
 668 
 669         sub     counter,1,counter
 670         st      counter,[%fp+tmp_counter]
 671 
 672         ba      .cont0
 673         or      %g0,1,counter
 674 
 675         .align  16
 676 .update1:
 677         cmp     counter,1
 678         ble     .cont1
 679         fzeros  %f0
 680 
 681         stx     %i1,[%fp+tmp_px]
 682         clr     %o1
 683 
 684         sub     counter,1,counter
 685         st      counter,[%fp+tmp_counter]
 686 
 687         ba      .cont1
 688         or      %g0,1,counter
 689 
 690         .align  16
 691 .update2:
 692         cmp     counter,2
 693         ble     .cont2
 694         fzeros  %f13
 695 
 696         stx     %i1,[%fp+tmp_px]
 697         sethi   %hi(0x7f800000),%o2
 698 
 699         sub     counter,2,counter
 700         st      counter,[%fp+tmp_counter]
 701 
 702         ba      .cont2
 703         or      %g0,2,counter
 704 
 705         .align  16
 706 .update3:
 707         cmp     counter,2
 708         ble     .cont3
 709         fzeros  %f13
 710 
 711         stx     %i1,[%fp+tmp_px]
 712         clr     %o2
 713 
 714         sub     counter,2,counter
 715         st      counter,[%fp+tmp_counter]
 716 
 717         ba      .cont3
 718         or      %g0,2,counter
 719 
 720         .align  16
 721 .update4:
 722         cmp     counter,3
 723         ble     .cont4
 724         fzeros  %f17
 725 
 726         stx     %o4,[%fp+tmp_px]
 727         sethi   %hi(0x7f800000),%l1
 728 
 729         sub     counter,3,counter
 730         st      counter,[%fp+tmp_counter]
 731 
 732         ba      .cont4
 733         or      %g0,3,counter
 734 
 735         .align  16
 736 .update5:
 737         cmp     counter,3
 738         ble     .cont5
 739         fzeros  %f17
 740 
 741         stx     %o4,[%fp+tmp_px]
 742         clr     %l1
 743 
 744         sub     counter,3,counter
 745         st      counter,[%fp+tmp_counter]
 746 
 747         ba      .cont5
 748         or      %g0,3,counter
 749 
 750         .align  16
 751 .update6:
 752         cmp     counter,4
 753         ble     .cont6
 754         fzeros  %f21
 755 
 756         stx     %i1,[%fp+tmp_px]
 757         sethi   %hi(0x7f800000),%i0
 758 
 759         sub     counter,4,counter
 760         st      counter,[%fp+tmp_counter]
 761 
 762         ba      .cont6
 763         or      %g0,4,counter
 764 
 765         .align  16
 766 .update7:
 767         cmp     counter,4
 768         ble     .cont7
 769         fzeros  %f21
 770 
 771         stx     %i1,[%fp+tmp_px]
 772         clr     %i0
 773 
 774         sub     counter,4,counter
 775         st      counter,[%fp+tmp_counter]
 776 
 777         ba      .cont7
 778         or      %g0,4,counter
 779 
 780         .align  16
 781 .update8:
 782         cmp     counter,5
 783         ble     .cont8
 784         fzeros  %f25
 785 
 786         stx     %o7,[%fp+tmp_px]
 787         sethi   %hi(0x7f800000),%o2
 788 
 789         sub     counter,5,counter
 790         st      counter,[%fp+tmp_counter]
 791 
 792         ba      .cont8
 793         or      %g0,5,counter
 794 
 795         .align  16
 796 .update9:
 797         cmp     counter,5
 798         ble     .cont9
 799         fzeros  %f25
 800 
 801         stx     %o7,[%fp+tmp_px]
 802         clr     %o2
 803 
 804         sub     counter,5,counter
 805         st      counter,[%fp+tmp_counter]
 806 
 807         ba      .cont9
 808         or      %g0,5,counter
 809 
 810         .align  16
 811 .update10:
 812         cmp     counter,6
 813         ble     .cont10
 814         fzeros  %f0
 815 
 816         stx     %i1,[%fp+tmp_px]
 817         sethi   %hi(0x7f800000),%o1
 818 
 819         sub     counter,6,counter
 820         st      counter,[%fp+tmp_counter]
 821 
 822         ba      .cont10
 823         or      %g0,6,counter
 824 
 825         .align  16
 826 .update11:
 827         cmp     counter,6
 828         ble     .cont11
 829         fzeros  %f0
 830 
 831         stx     %i1,[%fp+tmp_px]
 832         clr     %o1
 833 
 834         sub     counter,6,counter
 835         st      counter,[%fp+tmp_counter]
 836 
 837         ba      .cont11
 838         or      %g0,6,counter
 839 
 840         .align  16
 841 .update12:
 842         cmp     counter,2
 843         ble     .cont12
 844         fzeros  %f13
 845 
 846         stx     %i1,[%fp+tmp_px]
 847         sethi   %hi(0x7f800000),%o2
 848 
 849         sub     counter,2,counter
 850         st      counter,[%fp+tmp_counter]
 851 
 852         ba      .cont12
 853         or      %g0,2,counter
 854 
 855         .align  16
 856 .update13:
 857         cmp     counter,2
 858         ble     .cont13
 859         fzeros  %f13
 860 
 861         stx     %i1,[%fp+tmp_px]
 862         clr     %o2
 863 
 864         sub     counter,2,counter
 865         st      counter,[%fp+tmp_counter]
 866 
 867         ba      .cont13
 868         or      %g0,2,counter
 869 
 870         .align  16
 871 .update14:
 872         cmp     counter,3
 873         ble     .cont14
 874         fzeros  %f17
 875 
 876         stx     %o4,[%fp+tmp_px]
 877         sethi   %hi(0x7f800000),%l1
 878 
 879         sub     counter,3,counter
 880         st      counter,[%fp+tmp_counter]
 881 
 882         ba      .cont14
 883         or      %g0,3,counter
 884 
 885         .align  16
 886 .update15:
 887         cmp     counter,3
 888         ble     .cont15
 889         fzeros  %f17
 890 
 891         stx     %o4,[%fp+tmp_px]
 892         clr     %l1
 893 
 894         sub     counter,3,counter
 895         st      counter,[%fp+tmp_counter]
 896 
 897         ba      .cont15
 898         or      %g0,3,counter
 899 
 900         .align  16
 901 .update16:
 902         cmp     counter,4
 903         ble     .cont16
 904         fzeros  %f21
 905 
 906         stx     %i1,[%fp+tmp_px]
 907         sethi   %hi(0x7f800000),%i0
 908 
 909         sub     counter,4,counter
 910         st      counter,[%fp+tmp_counter]
 911 
 912         ba      .cont16
 913         or      %g0,4,counter
 914 
 915         .align  16
 916 .update17:
 917         cmp     counter,4
 918         ble     .cont17
 919         fzeros  %f21
 920 
 921         stx     %i1,[%fp+tmp_px]
 922         clr     %i0
 923 
 924         sub     counter,4,counter
 925         st      counter,[%fp+tmp_counter]
 926 
 927         ba      .cont17
 928         or      %g0,4,counter
 929 
 930         .align  16
 931 .update18:
 932         cmp     counter,5
 933         ble     .cont18
 934         fzeros  %f25
 935 
 936         stx     %o7,[%fp+tmp_px]
 937         sethi   %hi(0x7f800000),%o2
 938 
 939         sub     counter,5,counter
 940         st      counter,[%fp+tmp_counter]
 941 
 942         ba      .cont18
 943         or      %g0,5,counter
 944 
 945         .align  16
 946 .update19:
 947         cmp     counter,5
 948         ble     .cont19
 949         fzeros  %f25
 950 
 951         stx     %o7,[%fp+tmp_px]
 952         clr     %o2
 953 
 954         sub     counter,5,counter
 955         st      counter,[%fp+tmp_counter]
 956 
 957         ba      .cont19
 958         or      %g0,5,counter
 959 
 960         .align  16
 961 .update20:
 962         cmp     counter,6
 963         ble     .cont20
 964         fzeros  %f0
 965 
 966         stx     %i1,[%fp+tmp_px]
 967         sethi   %hi(0x7f800000),%o1
 968 
 969         sub     counter,6,counter
 970         st      counter,[%fp+tmp_counter]
 971 
 972         ba      .cont20
 973         or      %g0,6,counter
 974 
 975         .align  16
 976 .update21:
 977         cmp     counter,6
 978         ble     .cont21
 979         fzeros  %f0
 980 
 981         stx     %i1,[%fp+tmp_px]
 982         clr     %o1
 983 
 984         sub     counter,6,counter
 985         st      counter,[%fp+tmp_counter]
 986 
 987         ba      .cont21
 988         or      %g0,6,counter
 989 
 990 .exit:
 991         ret
 992         restore
 993         SET_SIZE(__vsqrtf_ultra3)
 994