1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file   "__vsqrtf_ultra3.S"
  30 
  31 #include "libm.h"
  32         .weak   __vsqrtf
  33         .type   __vsqrtf,#function
  34         __vsqrtf = __vsqrtf_ultra3
  35 
  36         RO_DATA
  37         .align  64
  38 
  39 .CONST_TBL:
  40         .word   0x3fe00001, 0x80007e00  ! K1  =  5.00000715259318464227e-01
  41         .word   0xbfc00003, 0xc0017a01  ! K2  = -1.25000447037521686593e-01
  42         .word   0x000fffff, 0xffffffff  ! DC0 = 0x000fffffffffffff
  43         .word   0x3ff00000, 0x00000000  ! DC1 = 0x3ff0000000000000
  44         .word   0x7ffff000, 0x00000000  ! DC2 = 0x7ffff00000000000
  45 
  46 #define DC0             %f6
  47 #define DC1             %f4
  48 #define DC2             %f2
  49 #define K2              %f38
  50 #define K1              %f36
  51 #define TBL             %l2
  52 #define stridex         %l3
  53 #define stridey         %l4
  54 #define _0x1ff0         %l5
  55 #define counter         %l6
  56 #define _0x00800000     %l7
  57 #define _0x7f800000     %o0
  58 
  59 #define tmp_px          STACK_BIAS-0x40
  60 #define tmp_counter     STACK_BIAS-0x38
  61 #define tmp0            STACK_BIAS-0x30
  62 #define tmp1            STACK_BIAS-0x28
  63 #define tmp2            STACK_BIAS-0x20
  64 #define tmp3            STACK_BIAS-0x18
  65 #define tmp4            STACK_BIAS-0x10
  66 
  67 ! sizeof temp storage - must be a multiple of 16 for V9
  68 #define tmps            0x40
  69 
  70 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
  71 !      !!!!!   algorithm   !!!!!
  72 !
  73 !  x0 = *px;
  74 !  ax = *(int*)px;
  75 !  px += stridex;
  76 !
  77 !  if( ax >= 0x7f800000 )
  78 !  {
  79 !    *py = sqrtf(x0);
  80 !    py += stridey;
  81 !    continue;
  82 !  }
  83 !  if( ax < 0x00800000 )
  84 !  {
  85 !    *py = sqrtf(x0);
  86 !    py += stridey;
  87 !    continue;
  88 !  }
  89 !
  90 !  db0 = (double)x0;
  91 !  iexp0 = ax >> 24;
  92 !  iexp0 += 0x3c0;
  93 !  lexp0 = (long long)iexp0 << 52;
  94 !
  95 !  db0 = vis_fand(db0,DC0);
  96 !  db0 = vis_for(db0,DC1);
  97 !  hi0 = vis_fand(db0,DC2);
  98 !
  99 !  ax >>= 11;
 100 !  si0 = ax & 0x1ff0;
 101 !  dtmp0 = ((double*)((char*)TBL + si0))[0];
 102 !  xx0 = (db0 - hi0);
 103 !  xx0 *= dtmp0;
 104 !  dtmp0 = ((double*)((char*)TBL + si0))[1]
 105 !  res0 = K2 * xx0;
 106 !  res0 += K1;
 107 !  res0 *= xx0;
 108 !  res0 += DC1;
 109 !  res0 = dtmp0 * res0;
 110 !  dtmp1 = *((double*)&lexp0);
 111 !  res0 *= dtmp1;
 112 !  fres0 = (float)res0;
 113 !  *py = fres0;
 114 !  py += stridey;
 115 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 116 
 117         ENTRY(__vsqrtf_ultra3)
 118         save    %sp,-SA(MINFRAME)-tmps,%sp
 119         PIC_SETUP(l7)
 120         PIC_SET(l7,.CONST_TBL,o2)
 121         PIC_SET(l7,__vlibm_TBL_sqrtf,l2)
 122 
 123         st      %i0,[%fp+tmp_counter]
 124         sll     %i2,2,stridex
 125         or      %g0,0xff8,%l5
 126 
 127         stx     %i1,[%fp+tmp_px]
 128         sll     %l5,1,_0x1ff0
 129 
 130         ldd     [%o2],K1
 131         sll     %i4,2,stridey
 132 
 133         ldd     [%o2+8],K2
 134         or      %g0,%i3,%g5
 135 
 136         ldd     [%o2+16],DC0
 137         sethi   %hi(0x7f800000),%o0
 138 
 139         ldd     [%o2+24],DC1
 140         sethi   %hi(0x00800000),%l7
 141 
 142         ldd     [%o2+32],DC2
 143 
 144 .begin:
 145         ld      [%fp+tmp_counter],counter
 146         ldx     [%fp+tmp_px],%i1
 147         st      %g0,[%fp+tmp_counter]
 148 .begin1:
 149         cmp     counter,0
 150         ble,pn  %icc,.exit
 151 
 152         lda     [%i1]0x82,%o2           ! (2_0) ax = *(int*)px;
 153 
 154         or      %g0,%i1,%o7
 155         lda     [%i1]0x82,%f25          ! (2_0) x0 = *px;
 156 
 157         cmp     %o2,_0x7f800000         ! (2_0) ax ? 0x7f800000
 158         bge,pn  %icc,.spec              ! (2_0) if( ax >= 0x7f800000 )
 159         nop
 160 
 161         cmp     %o2,_0x00800000         ! (2_0) ax ? 0x00800000
 162         bl,pn   %icc,.spec              ! (2_0) if( ax < 0x00800000 )
 163         nop
 164 
 165         fstod   %f25,%f56               ! (2_0) db0 = (double)x0;
 166 
 167         lda     [stridex+%o7]0x82,%o1   ! (3_0) ax = *(int*)px;
 168 
 169         sra     %o2,24,%l1              ! (2_0) iexp0 = ax >> 24;
 170 
 171         add     %o7,stridex,%i1         ! px += stridex
 172         add     %l1,960,%l0             ! (2_0) iexp0 += 0x3c0;
 173         lda     [stridex+%o7]0x82,%f0   ! (3_0) x0 = *px;
 174         fand    %f56,DC0,%f60           ! (2_0) db0 = vis_fand(db0,DC0);
 175 
 176         cmp     %o1,_0x7f800000         ! (3_0) ax ? 0x7f800000
 177         bge,pn  %icc,.update0           ! (3_0) if( ax >= 0x7f800000 )
 178         nop
 179 .cont0:
 180         sllx    %l0,52,%o3              ! (2_0) lexp0 = (long long)iexp0 << 52;
 181 
 182         sra     %o2,11,%i2              ! (2_0) ax >>= 11;
 183         stx     %o3,[%fp+tmp0]          ! (2_0) dtmp1 = *((double*)&lexp0);
 184         for     %f60,DC1,%f40           ! (2_0) db0 = vis_for(db0,DC1);
 185 
 186         cmp     %o1,_0x00800000         ! (3_0) ax ? 0x00800000
 187         bl,pn   %icc,.update1           ! (3_0) if( ax < 0x00800000 )
 188         nop
 189 .cont1:
 190         fstod   %f0,%f48                ! (3_0) db0 = (double)x0;
 191 
 192         and     %i2,_0x1ff0,%o3         ! (2_0) si0 = ax & 0x1ff0;
 193         lda     [%i1+stridex]0x82,%o2   ! (4_0) ax = *(int*)px;
 194 
 195         add     %i1,stridex,%i1         ! px += stridex
 196         add     %o3,TBL,%i2             ! (2_0) (char*)TBL + si0
 197         fand    %f40,DC2,%f46           ! (2_0) hi0 = vis_fand(db0,DC2);
 198 
 199         sra     %o1,24,%o4              ! (3_0) iexp0 = ax >> 24;
 200 
 201         lda     [%i1]0x82,%f13          ! (4_0) x0 = *px;
 202         fand    %f48,DC0,%f58           ! (3_0) db0 = vis_fand(db0,DC0);
 203 
 204         add     %o4,960,%i0             ! (3_0) iexp0 += 0x3c0;
 205 
 206         cmp     %o2,_0x7f800000         ! (4_1) ax ? 0x7f800000
 207         bge,pn  %icc,.update2           ! (4_1) if( ax >= 0x7f800000 )
 208         nop
 209 .cont2:
 210         fsubd   %f40,%f46,%f44          ! (2_1) xx0 = (db0 - hi0);
 211         sllx    %i0,52,%g1              ! (3_1) lexp0 = (long long)iexp0 << 52;
 212         ldd     [%i2],%f40              ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
 213 
 214         sra     %o1,11,%l0              ! (3_1) ax >>= 11;
 215         stx     %g1,[%fp+tmp1]          ! (3_1) dtmp1 = *((double*)&lexp0);
 216         for     %f58,DC1,%f48           ! (3_1) db0 = vis_for(db0,DC1);
 217 
 218         cmp     %o2,_0x00800000         ! (4_1) ax ? 0x00800000
 219         bl,pn   %icc,.update3           ! (4_1) if( ax < 0x00800000 )
 220         nop
 221 .cont3:
 222         fstod   %f13,%f50               ! (4_1) db0 = (double)x0;
 223 
 224         fmuld   %f44,%f40,%f46          ! (2_1) xx0 *= dtmp0;
 225         and     %l0,_0x1ff0,%i0         ! (3_1) si0 = ax & 0x1ff0;
 226         lda     [%i1+stridex]0x82,%l1   ! (0_0) ax = *(int*)px;
 227 
 228         add     %i0,TBL,%l0             ! (3_1) (char*)TBL + si0
 229         fand    %f48,DC2,%f62           ! (3_1) hi0 = vis_fand(db0,DC2);
 230 
 231         sra     %o2,24,%o7              ! (4_1) iexp0 = ax >> 24;
 232 
 233         add     %i1,stridex,%o4         ! px += stridex
 234         add     %o7,960,%o7             ! (4_1) iexp0 += 0x3c0;
 235         lda     [%i1+stridex]0x82,%f17  ! (0_0) x0 = *px;
 236         fand    %f50,DC0,%f54           ! (4_1) db0 = vis_fand(db0,DC0);
 237 
 238         fmuld   K2,%f46,%f52            ! (2_1) res0 = K2 * xx0;
 239         cmp     %l1,_0x7f800000         ! (0_0) ax ? 0x7f800000
 240         bge,pn  %icc,.update4           ! (0_0) if( ax >= 0x7f800000 )
 241         fsubd   %f48,%f62,%f42          ! (3_1) xx0 = (db0 - hi0);
 242 .cont4:
 243         sllx    %o7,52,%o1              ! (4_1) lexp0 = (long long)iexp0 << 52;
 244         ldd     [%i0+TBL],%f40          ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
 245 
 246         sra     %o2,11,%i5              ! (4_1) ax >>= 11;
 247         stx     %o1,[%fp+tmp2]          ! (4_1) dtmp1 = *((double*)&lexp0);
 248         for     %f54,DC1,%f34           ! (4_1) db0 = vis_for(db0,DC1);
 249 
 250         cmp     %l1,_0x00800000         ! (0_0) ax ? 0x00800000
 251         bl,pn   %icc,.update5           ! (0_0) if( ax < 0x00800000 )
 252         nop
 253 .cont5:
 254         fstod   %f17,%f56               ! (0_0) db0 = (double)x0;
 255 
 256         fmuld   %f42,%f40,%f42          ! (3_1) xx0 *= dtmp0;
 257         lda     [stridex+%o4]0x82,%i0   ! (1_0) ax = *(int*)px;
 258         faddd   %f52,K1,%f52            ! (2_1) res0 += K1;
 259 
 260         sra     %l1,24,%g1              ! (0_0) iexp0 = ax >> 24;
 261         and     %i5,_0x1ff0,%i5         ! (4_1) si0 = ax & 0x1ff0;
 262         fand    %f34,DC2,%f62           ! (4_1) hi0 = vis_fand(db0,DC2);
 263 
 264         add     %o4,stridex,%i1         ! px += stridex
 265 
 266         add     %g1,960,%o5             ! (0_0) iexp0 += 0x3c0;
 267         add     %i5,TBL,%i3             ! (4_1) (char*)TBL + si0
 268         lda     [stridex+%o4]0x82,%f21  ! (1_0) x0 = *px;
 269         fand    %f56,DC0,%f32           ! (0_0) db0 = vis_fand(db0,DC0);
 270 
 271         fmuld   K2,%f42,%f50            ! (3_1) res0 = K2 * xx0;
 272         cmp     %i0,_0x7f800000         ! (1_0) ax ? 0x7f800000
 273         bge,pn  %icc,.update6           ! (1_0) if( ax >= 0x7f800000 )
 274         fsubd   %f34,%f62,%f54          ! (4_1) xx0 = (db0 - hi0);
 275 .cont6:
 276         fmuld   %f52,%f46,%f52          ! (2_1) res0 *= xx0;
 277         sllx    %o5,52,%o7              ! (0_0) lexp0 = (long long)iexp0 << 52;
 278         ldd     [TBL+%i5],%f62          ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
 279 
 280         sra     %l1,11,%i4              ! (0_0) ax >>= 11;
 281         stx     %o7,[%fp+tmp3]          ! (0_0) dtmp1 = *((double*)&lexp0);
 282         for     %f32,DC1,%f48           ! (0_0) db0 = vis_for(db0,DC1);
 283 
 284         cmp     %i0,_0x00800000         ! (1_0) ax ? 0x00800000
 285         bl,pn   %icc,.update7           ! (1_0) if( ax < 0x00800000 )
 286         nop
 287 .cont7:
 288         fstod   %f21,%f56               ! (1_0) db0 = (double)x0;
 289 
 290         fmuld   %f54,%f62,%f46          ! (4_1) xx0 *= dtmp0;
 291         and     %i4,_0x1ff0,%g1         ! (0_0) si0 = ax & 0x1ff0;
 292         lda     [%i1+stridex]0x82,%o2   ! (2_0) ax = *(int*)px;
 293         faddd   %f50,K1,%f62            ! (3_1) res0 += K1;
 294 
 295         add     %g1,TBL,%i5             ! (0_0) (double*)((char*)TBL + si0
 296         fand    %f48,DC2,%f32           ! (0_0) hi0 = vis_fand(db0,DC2);
 297 
 298         sra     %i0,24,%o4              ! (1_0) iexp0 = ax >> 24;
 299         ldd     [%i2+8],%f60            ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
 300         faddd   %f52,DC1,%f58           ! (2_1) res0 += DC1;
 301 
 302         add     %i1,stridex,%o7         ! px += stridex
 303         add     %o4,960,%i2             ! (1_0) iexp0 += 0x3c0;
 304         lda     [%i1+stridex]0x82,%f25  ! (2_0) x0 = *px;
 305         fand    %f56,DC0,%f34           ! (1_0) db0 = vis_fand(db0,DC0);
 306 
 307         fmuld   K2,%f46,%f50            ! (4_1) res0 = K2 * xx0;
 308         cmp     %o2,_0x7f800000         ! (2_0) ax ? 0x7f800000
 309         bge,pn  %icc,.update8           ! (2_0) if( ax >= 0x7f800000 )
 310         fsubd   %f48,%f32,%f52          ! (0_0) xx0 = (db0 - hi0);
 311 .cont8:
 312         fmuld   %f62,%f42,%f54          ! (3_1) res0 *= xx0;
 313         sllx    %i2,52,%o4              ! (1_0) lexp0 = (long long)iexp0 << 52;
 314         ldd     [TBL+%g1],%f32          ! (0_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
 315 
 316         fmuld   %f60,%f58,%f60          ! (2_1) res0 = dtmp0 * res0;
 317         sra     %i0,11,%g1              ! (1_0) ax >>= 11;
 318         stx     %o4,[%fp+tmp4]          ! (1_0) dtmp1 = *((double*)&lexp0);
 319         for     %f34,DC1,%f48           ! (1_0) db0 = vis_for(db0,DC1);
 320 
 321         cmp     %o2,_0x00800000         ! (2_0) ax ? 0x00800000
 322         bl,pn   %icc,.update9           ! (2_0) if( ax < 0x00800000 )
 323         ldd     [%fp+tmp0],%f40         ! (2_1) dtmp1 = *((double*)&lexp0);
 324         fstod   %f25,%f56               ! (2_0) db0 = (double)x0;
 325 .cont9:
 326         fmuld   %f52,%f32,%f42          ! (0_0) xx0 *= dtmp0;
 327         and     %g1,_0x1ff0,%o5         ! (1_0) si0 = ax & 0x1ff0;
 328         lda     [stridex+%o7]0x82,%o1   ! (3_0) ax = *(int*)px;
 329         faddd   %f50,K1,%f34            ! (4_1) res0 += K1;
 330 
 331         add     %o5,TBL,%i4             ! (1_0) (char*)TBL + si0
 332         fand    %f48,DC2,%f62           ! (1_0) hi0 = vis_fand(db0,DC2);
 333 
 334         fmuld   %f60,%f40,%f32          ! (2_1) res0 *= dtmp1;
 335         sra     %o2,24,%l1              ! (2_0) iexp0 = ax >> 24;
 336         ldd     [%l0+8],%f40            ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
 337         faddd   %f54,DC1,%f58           ! (3_1) res0 += DC1;
 338 
 339         add     %o7,stridex,%i1         ! px += stridex
 340         add     %l1,960,%l0             ! (2_0) iexp0 += 0x3c0;
 341         lda     [stridex+%o7]0x82,%f0   ! (3_0) x0 = *px;
 342         fand    %f56,DC0,%f60           ! (2_0) db0 = vis_fand(db0,DC0);
 343 
 344         fmuld   K2,%f42,%f50            ! (0_0) res0 = K2 * xx0;
 345         cmp     %o1,_0x7f800000         ! (3_0) ax ? 0x7f800000
 346         bge,pn  %icc,.update10          ! (3_0) if( ax >= 0x7f800000 )
 347         fsubd   %f48,%f62,%f54          ! (1_0) xx0 = (db0 - hi0);
 348 .cont10:
 349         fmuld   %f34,%f46,%f52          ! (4_1) res0 *= xx0;
 350         sllx    %l0,52,%o3              ! (2_0) lexp0 = (long long)iexp0 << 52;
 351         ldd     [TBL+%o5],%f56          ! (1_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
 352 
 353         fmuld   %f40,%f58,%f34          ! (3_1) res0 = dtmp0 * res0;
 354         sra     %o2,11,%i2              ! (2_0) ax >>= 11;
 355         stx     %o3,[%fp+tmp0]          ! (2_0) dtmp1 = *((double*)&lexp0);
 356         for     %f60,DC1,%f40           ! (2_0) db0 = vis_for(db0,DC1);
 357 
 358         cmp     %o1,_0x00800000         ! (3_0) ax ? 0x00800000
 359         bl,pn   %icc,.update11          ! (3_0) if( ax < 0x00800000 )
 360         ldd     [%fp+tmp1],%f62         ! (3_1) dtmp1 = *((double*)&lexp0);
 361         fstod   %f0,%f48                ! (3_0) db0 = (double)x0;
 362 .cont11:
 363         fmuld   %f54,%f56,%f30          ! (1_0) xx0 *= dtmp0;
 364         and     %i2,_0x1ff0,%o3         ! (2_0) si0 = ax & 0x1ff0;
 365         lda     [%i1+stridex]0x82,%o2   ! (4_0) ax = *(int*)px;
 366         faddd   %f50,K1,%f56            ! (0_0) res0 += K1;
 367 
 368         add     %i1,stridex,%i1         ! px += stridex
 369         add     %o3,TBL,%i2             ! (2_0) (char*)TBL + si0
 370         fand    %f40,DC2,%f46           ! (2_0) hi0 = vis_fand(db0,DC2);
 371 
 372         fmuld   %f34,%f62,%f28          ! (3_1) res0 *= dtmp1;
 373         sra     %o1,24,%o4              ! (3_0) iexp0 = ax >> 24;
 374         ldd     [%i3+8],%f50            ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
 375         faddd   %f52,DC1,%f54           ! (4_1) res0 += DC1;
 376 
 377         lda     [%i1]0x82,%f13          ! (4_0) x0 = *px;
 378         fand    %f48,DC0,%f58           ! (3_0) db0 = vis_fand(db0,DC0);
 379 
 380         or      %g0,%g5,%i3
 381         cmp     counter,5
 382         bl,pn   %icc,.tail
 383         add     %o4,960,%g5             ! (3_0) iexp0 += 0x3c0;
 384 
 385         ba      .main_loop
 386         sub     counter,5,counter       ! counter
 387 
 388         .align  16
 389 .main_loop:
 390         fmuld   K2,%f30,%f60            ! (1_1) res0 = K2 * xx0;
 391         cmp     %o2,_0x7f800000         ! (4_1) ax ? 0x7f800000
 392         bge,pn  %icc,.update12          ! (4_1) if( ax >= 0x7f800000 )
 393         fsubd   %f40,%f46,%f44          ! (2_1) xx0 = (db0 - hi0);
 394 .cont12:
 395         fmuld   %f56,%f42,%f52          ! (0_1) res0 *= xx0;
 396         sllx    %g5,52,%g5              ! (3_1) lexp0 = (long long)iexp0 << 52;
 397         ldd     [%i2],%f40              ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
 398         fdtos   %f32,%f15               ! (2_2) fres0 = (float)res0;
 399 
 400         fmuld   %f50,%f54,%f42          ! (4_2) res0 = dtmp0 * res0;
 401         sra     %o1,11,%l0              ! (3_1) ax >>= 11;
 402         stx     %g5,[%fp+tmp1]          ! (3_1) dtmp1 = *((double*)&lexp0);
 403         for     %f58,DC1,%f48           ! (3_1) db0 = vis_for(db0,DC1);
 404 
 405         cmp     %o2,_0x00800000         ! (4_1) ax ? 0x00800000
 406         bl,pn   %icc,.update13          ! (4_1) if( ax < 0x00800000 )
 407         ldd     [%fp+tmp2],%f56         ! (4_2) dtmp1 = *((double*)&lexp0);
 408         fstod   %f13,%f50               ! (4_1) db0 = (double)x0;
 409 .cont13:
 410         fmuld   %f44,%f40,%f46          ! (2_1) xx0 *= dtmp0;
 411         and     %l0,_0x1ff0,%i0         ! (3_1) si0 = ax & 0x1ff0;
 412         lda     [%i1+stridex]0x82,%l1   ! (0_0) ax = *(int*)px;
 413         faddd   %f60,K1,%f32            ! (1_1) res0 += K1;
 414 
 415         add     %i0,TBL,%l0             ! (3_1) (char*)TBL + si0
 416         add     %i3,stridey,%o3         ! py += stridey
 417         st      %f15,[%i3]              ! (2_2) *py = fres0;
 418         fand    %f48,DC2,%f62           ! (3_1) hi0 = vis_fand(db0,DC2);
 419 
 420         fmuld   %f42,%f56,%f44          ! (4_2) res0 *= dtmp1;
 421         sra     %o2,24,%o7              ! (4_1) iexp0 = ax >> 24;
 422         ldd     [%i5+8],%f58            ! (0_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
 423         faddd   %f52,DC1,%f34           ! (0_1) res0 += DC1;
 424 
 425         add     %i1,stridex,%o4         ! px += stridex
 426         add     %o7,960,%o7             ! (4_1) iexp0 += 0x3c0;
 427         lda     [%i1+stridex]0x82,%f17  ! (0_0) x0 = *px;
 428         fand    %f50,DC0,%f54           ! (4_1) db0 = vis_fand(db0,DC0);
 429 
 430         fmuld   K2,%f46,%f52            ! (2_1) res0 = K2 * xx0;
 431         cmp     %l1,_0x7f800000         ! (0_0) ax ? 0x7f800000
 432         bge,pn  %icc,.update14          ! (0_0) if( ax >= 0x7f800000 )
 433         fsubd   %f48,%f62,%f42          ! (3_1) xx0 = (db0 - hi0);
 434 .cont14:
 435         fmuld   %f32,%f30,%f48          ! (1_1) res0 *= xx0;
 436         sllx    %o7,52,%o1              ! (4_1) lexp0 = (long long)iexp0 << 52;
 437         ldd     [%i0+TBL],%f40          ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
 438         fdtos   %f28,%f19               ! (3_2) fres0 = (float)res0;
 439 
 440         fmuld   %f58,%f34,%f32          ! (0_1) res0 = dtmp0 * res0;
 441         sra     %o2,11,%i5              ! (4_1) ax >>= 11;
 442         stx     %o1,[%fp+tmp2]          ! (4_1) dtmp1 = *((double*)&lexp0);
 443         for     %f54,DC1,%f34           ! (4_1) db0 = vis_for(db0,DC1);
 444 
 445         cmp     %l1,_0x00800000         ! (0_0) ax ? 0x00800000
 446         bl,pn   %icc,.update15          ! (0_0) if( ax < 0x00800000 )
 447         ldd     [%fp+tmp3],%f60         ! (0_1) dtmp1 = *((double*)&lexp0);
 448         fstod   %f17,%f56               ! (0_0) db0 = (double)x0;
 449 .cont15:
 450         fmuld   %f42,%f40,%f42          ! (3_1) xx0 *= dtmp0;
 451         add     %o3,stridey,%g5         ! py += stridey
 452         lda     [stridex+%o4]0x82,%i0   ! (1_0) ax = *(int*)px;
 453         faddd   %f52,K1,%f52            ! (2_1) res0 += K1;
 454 
 455         sra     %l1,24,%g1              ! (0_0) iexp0 = ax >> 24;
 456         and     %i5,_0x1ff0,%i5         ! (4_1) si0 = ax & 0x1ff0;
 457         st      %f19,[%o3]              ! (3_2) *py = fres0;
 458         fand    %f34,DC2,%f62           ! (4_1) hi0 = vis_fand(db0,DC2);
 459 
 460         fmuld   %f32,%f60,%f40          ! (0_1) res0 *= dtmp1;
 461         add     %o4,stridex,%i1         ! px += stridex
 462         ldd     [%i4+8],%f60            ! (1_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
 463         faddd   %f48,DC1,%f58           ! (1_1) res0 += DC1;
 464 
 465         add     %g1,960,%o5             ! (0_0) iexp0 += 0x3c0;
 466         add     %i5,TBL,%i3             ! (4_1) (char*)TBL + si0
 467         lda     [stridex+%o4]0x82,%f21  ! (1_0) x0 = *px;
 468         fand    %f56,DC0,%f32           ! (0_0) db0 = vis_fand(db0,DC0);
 469 
 470         fmuld   K2,%f42,%f50            ! (3_1) res0 = K2 * xx0;
 471         cmp     %i0,_0x7f800000         ! (1_0) ax ? 0x7f800000
 472         bge,pn  %icc,.update16          ! (1_0) if( ax >= 0x7f800000 )
 473         fsubd   %f34,%f62,%f54          ! (4_1) xx0 = (db0 - hi0);
 474 .cont16:
 475         fmuld   %f52,%f46,%f52          ! (2_1) res0 *= xx0;
 476         sllx    %o5,52,%o7              ! (0_0) lexp0 = (long long)iexp0 << 52;
 477         ldd     [TBL+%i5],%f62          ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[0];
 478         fdtos   %f44,%f23               ! (4_2) fres0 = (float)res0;
 479 
 480         fmuld   %f60,%f58,%f44          ! (1_1) res0 = dtmp0 * res0;
 481         sra     %l1,11,%i4              ! (0_0) ax >>= 11;
 482         stx     %o7,[%fp+tmp3]          ! (0_0) dtmp1 = *((double*)&lexp0);
 483         for     %f32,DC1,%f48           ! (0_0) db0 = vis_for(db0,DC1);
 484 
 485         cmp     %i0,_0x00800000         ! (1_0) ax ? 0x00800000
 486         bl,pn   %icc,.update17          ! (1_0) if( ax < 0x00800000 )
 487         ldd     [%fp+tmp4],%f34         ! (1_1) dtmp1 = *((double*)&lexp0);
 488         fstod   %f21,%f56               ! (1_0) db0 = (double)x0;
 489 .cont17:
 490         fmuld   %f54,%f62,%f46          ! (4_1) xx0 *= dtmp0;
 491         and     %i4,_0x1ff0,%g1         ! (0_0) si0 = ax & 0x1ff0;
 492         lda     [%i1+stridex]0x82,%o2   ! (2_0) ax = *(int*)px;
 493         faddd   %f50,K1,%f62            ! (3_1) res0 += K1;
 494 
 495         add     %g1,TBL,%i5             ! (0_0) (double*)((char*)TBL + si0
 496         add     %g5,stridey,%g5         ! py += stridey
 497         st      %f23,[stridey+%o3]      ! (4_2) *py = fres0;
 498         fand    %f48,DC2,%f32           ! (0_0) hi0 = vis_fand(db0,DC2);
 499 
 500         fmuld   %f44,%f34,%f44          ! (1_1) res0 *= dtmp1;
 501         sra     %i0,24,%o4              ! (1_0) iexp0 = ax >> 24;
 502         ldd     [%i2+8],%f60            ! (2_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
 503         faddd   %f52,DC1,%f58           ! (2_1) res0 += DC1;
 504 
 505         add     %i1,stridex,%o7         ! px += stridex
 506         add     %o4,960,%i2             ! (1_0) iexp0 += 0x3c0;
 507         lda     [%i1+stridex]0x82,%f25  ! (2_0) x0 = *px;
 508         fand    %f56,DC0,%f34           ! (1_0) db0 = vis_fand(db0,DC0);
 509 
 510         fmuld   K2,%f46,%f50            ! (4_1) res0 = K2 * xx0;
 511         cmp     %o2,_0x7f800000         ! (2_0) ax ? 0x7f800000
 512         bge,pn  %icc,.update18          ! (2_0) if( ax >= 0x7f800000 )
 513         fsubd   %f48,%f32,%f52          ! (0_0) xx0 = (db0 - hi0);
 514 .cont18:
 515         fmuld   %f62,%f42,%f54          ! (3_1) res0 *= xx0;
 516         sllx    %i2,52,%o4              ! (1_0) lexp0 = (long long)iexp0 << 52;
 517         ldd     [TBL+%g1],%f32          ! (0_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
 518         fdtos   %f40,%f27               ! (0_1) fres0 = (float)res0;
 519 
 520         fmuld   %f60,%f58,%f60          ! (2_1) res0 = dtmp0 * res0;
 521         sra     %i0,11,%g1              ! (1_0) ax >>= 11;
 522         stx     %o4,[%fp+tmp4]          ! (1_0) dtmp1 = *((double*)&lexp0);
 523         for     %f34,DC1,%f48           ! (1_0) db0 = vis_for(db0,DC1);
 524 
 525         cmp     %o2,_0x00800000         ! (2_0) ax ? 0x00800000
 526         bl,pn   %icc,.update19          ! (2_0) if( ax < 0x00800000 )
 527         ldd     [%fp+tmp0],%f40         ! (2_1) dtmp1 = *((double*)&lexp0);
 528         fstod   %f25,%f56               ! (2_0) db0 = (double)x0;
 529 .cont19:
 530         fmuld   %f52,%f32,%f42          ! (0_0) xx0 *= dtmp0;
 531         and     %g1,_0x1ff0,%o5         ! (1_0) si0 = ax & 0x1ff0;
 532         lda     [stridex+%o7]0x82,%o1   ! (3_0) ax = *(int*)px;
 533         faddd   %f50,K1,%f34            ! (4_1) res0 += K1;
 534 
 535         add     %o5,TBL,%i4             ! (1_0) (char*)TBL + si0
 536         add     %g5,stridey,%g1         ! py += stridey
 537         st      %f27,[%g5]              ! (0_1) *py = fres0;
 538         fand    %f48,DC2,%f62           ! (1_0) hi0 = vis_fand(db0,DC2);
 539 
 540         fmuld   %f60,%f40,%f32          ! (2_1) res0 *= dtmp1;
 541         sra     %o2,24,%l1              ! (2_0) iexp0 = ax >> 24;
 542         ldd     [%l0+8],%f40            ! (3_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
 543         faddd   %f54,DC1,%f58           ! (3_1) res0 += DC1;
 544 
 545         add     %o7,stridex,%i1         ! px += stridex
 546         add     %l1,960,%l0             ! (2_0) iexp0 += 0x3c0;
 547         lda     [stridex+%o7]0x82,%f0   ! (3_0) x0 = *px;
 548         fand    %f56,DC0,%f60           ! (2_0) db0 = vis_fand(db0,DC0);
 549 
 550         fmuld   K2,%f42,%f50            ! (0_0) res0 = K2 * xx0;
 551         cmp     %o1,_0x7f800000         ! (3_0) ax ? 0x7f800000
 552         bge,pn  %icc,.update20          ! (3_0) if( ax >= 0x7f800000 )
 553         fsubd   %f48,%f62,%f54          ! (1_0) xx0 = (db0 - hi0);
 554 .cont20:
 555         fmuld   %f34,%f46,%f52          ! (4_1) res0 *= xx0;
 556         sllx    %l0,52,%o3              ! (2_0) lexp0 = (long long)iexp0 << 52;
 557         ldd     [TBL+%o5],%f56          ! (1_0) dtmp0 = ((double*)((char*)TBL + si0))[0];
 558         fdtos   %f44,%f8                ! (1_1) fres0 = (float)res0;
 559 
 560         fmuld   %f40,%f58,%f34          ! (3_1) res0 = dtmp0 * res0;
 561         sra     %o2,11,%i2              ! (2_0) ax >>= 11;
 562         stx     %o3,[%fp+tmp0]          ! (2_0) dtmp1 = *((double*)&lexp0);
 563         for     %f60,DC1,%f40           ! (2_0) db0 = vis_for(db0,DC1);
 564 
 565         cmp     %o1,_0x00800000         ! (3_0) ax ? 0x00800000
 566         bl,pn   %icc,.update21          ! (3_0) if( ax < 0x00800000 )
 567         ldd     [%fp+tmp1],%f62         ! (3_1) dtmp1 = *((double*)&lexp0);
 568         fstod   %f0,%f48                ! (3_0) db0 = (double)x0;
 569 .cont21:
 570         fmuld   %f54,%f56,%f30          ! (1_0) xx0 *= dtmp0;
 571         and     %i2,_0x1ff0,%o3         ! (2_0) si0 = ax & 0x1ff0;
 572         lda     [%i1+stridex]0x82,%o2   ! (4_0) ax = *(int*)px;
 573         faddd   %f50,K1,%f56            ! (0_0) res0 += K1;
 574 
 575         add     %i1,stridex,%i1         ! px += stridex
 576         add     %o3,TBL,%i2             ! (2_0) (char*)TBL + si0
 577         st      %f8,[stridey+%g5]       ! (1_1) *py = fres0;
 578         fand    %f40,DC2,%f46           ! (2_0) hi0 = vis_fand(db0,DC2);
 579 
 580         fmuld   %f34,%f62,%f28          ! (3_1) res0 *= dtmp1;
 581         sra     %o1,24,%o4              ! (3_0) iexp0 = ax >> 24;
 582         ldd     [%i3+8],%f50            ! (4_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
 583         faddd   %f52,DC1,%f54           ! (4_1) res0 += DC1;
 584 
 585         add     %g1,stridey,%i3         ! py += stridey
 586         subcc   counter,5,counter       ! counter
 587         lda     [%i1]0x82,%f13          ! (4_0) x0 = *px;
 588         fand    %f48,DC0,%f58           ! (3_0) db0 = vis_fand(db0,DC0);
 589 
 590         bpos,pt %icc,.main_loop
 591         add     %o4,960,%g5             ! (3_0) iexp0 += 0x3c0;
 592 
 593         add     counter,5,counter
 594 .tail:
 595         subcc   counter,1,counter
 596         bneg,a  .begin
 597         or      %g0,%i3,%g5
 598 
 599         fmuld   %f56,%f42,%f52          ! (0_1) res0 *= xx0;
 600         fdtos   %f32,%f15               ! (2_2) fres0 = (float)res0;
 601 
 602         fmuld   %f50,%f54,%f42          ! (4_2) res0 = dtmp0 * res0;
 603 
 604         ldd     [%fp+tmp2],%f56         ! (4_2) dtmp1 = *((double*)&lexp0);
 605 
 606         add     %i3,stridey,%o3         ! py += stridey
 607         st      %f15,[%i3]              ! (2_2) *py = fres0;
 608 
 609         subcc   counter,1,counter
 610         bneg,a  .begin
 611         or      %g0,%o3,%g5
 612 
 613         fmuld   %f42,%f56,%f44          ! (4_2) res0 *= dtmp1;
 614         ldd     [%i5+8],%f58            ! (0_1) dtmp0 = ((double*)((char*)TBL + si0))[1]
 615         faddd   %f52,DC1,%f34           ! (0_1) res0 += DC1;
 616 
 617         fdtos   %f28,%f19               ! (3_2) fres0 = (float)res0;
 618 
 619         fmuld   %f58,%f34,%f32          ! (0_1) res0 = dtmp0 * res0;
 620 
 621         ldd     [%fp+tmp3],%f60         ! (0_1) dtmp1 = *((double*)&lexp0);
 622 
 623         add     %o3,stridey,%g5         ! py += stridey
 624 
 625         st      %f19,[%o3]              ! (3_2) *py = fres0;
 626 
 627         subcc   counter,1,counter
 628         bneg,a  .begin
 629         nop
 630 
 631         fmuld   %f32,%f60,%f40          ! (0_1) res0 *= dtmp1;
 632 
 633         fdtos   %f44,%f23               ! (4_2) fres0 = (float)res0;
 634 
 635         add     %g5,stridey,%g5         ! py += stridey
 636         st      %f23,[stridey+%o3]      ! (4_2) *py = fres0;
 637 
 638         subcc   counter,1,counter
 639         bneg,a  .begin
 640         nop
 641 
 642         fdtos   %f40,%f27               ! (0_1) fres0 = (float)res0;
 643 
 644         st      %f27,[%g5]              ! (0_1) *py = fres0;
 645 
 646         ba      .begin
 647         add     %g5,stridey,%g5
 648 
 649         .align  16
 650 .spec:
 651         fsqrts  %f25,%f25
 652         sub     counter,1,counter
 653         add     %i1,stridex,%i1
 654         st      %f25,[%g5]
 655         ba      .begin1
 656         add     %g5,stridey,%g5
 657 
 658         .align  16
 659 .update0:
 660         cmp     counter,1
 661         ble     .cont0
 662         fzeros  %f0
 663 
 664         stx     %i1,[%fp+tmp_px]
 665         sethi   %hi(0x7f800000),%o1
 666 
 667         sub     counter,1,counter
 668         st      counter,[%fp+tmp_counter]
 669 
 670         ba      .cont0
 671         or      %g0,1,counter
 672 
 673         .align  16
 674 .update1:
 675         cmp     counter,1
 676         ble     .cont1
 677         fzeros  %f0
 678 
 679         stx     %i1,[%fp+tmp_px]
 680         clr     %o1
 681 
 682         sub     counter,1,counter
 683         st      counter,[%fp+tmp_counter]
 684 
 685         ba      .cont1
 686         or      %g0,1,counter
 687 
 688         .align  16
 689 .update2:
 690         cmp     counter,2
 691         ble     .cont2
 692         fzeros  %f13
 693 
 694         stx     %i1,[%fp+tmp_px]
 695         sethi   %hi(0x7f800000),%o2
 696 
 697         sub     counter,2,counter
 698         st      counter,[%fp+tmp_counter]
 699 
 700         ba      .cont2
 701         or      %g0,2,counter
 702 
 703         .align  16
 704 .update3:
 705         cmp     counter,2
 706         ble     .cont3
 707         fzeros  %f13
 708 
 709         stx     %i1,[%fp+tmp_px]
 710         clr     %o2
 711 
 712         sub     counter,2,counter
 713         st      counter,[%fp+tmp_counter]
 714 
 715         ba      .cont3
 716         or      %g0,2,counter
 717 
 718         .align  16
 719 .update4:
 720         cmp     counter,3
 721         ble     .cont4
 722         fzeros  %f17
 723 
 724         stx     %o4,[%fp+tmp_px]
 725         sethi   %hi(0x7f800000),%l1
 726 
 727         sub     counter,3,counter
 728         st      counter,[%fp+tmp_counter]
 729 
 730         ba      .cont4
 731         or      %g0,3,counter
 732 
 733         .align  16
 734 .update5:
 735         cmp     counter,3
 736         ble     .cont5
 737         fzeros  %f17
 738 
 739         stx     %o4,[%fp+tmp_px]
 740         clr     %l1
 741 
 742         sub     counter,3,counter
 743         st      counter,[%fp+tmp_counter]
 744 
 745         ba      .cont5
 746         or      %g0,3,counter
 747 
 748         .align  16
 749 .update6:
 750         cmp     counter,4
 751         ble     .cont6
 752         fzeros  %f21
 753 
 754         stx     %i1,[%fp+tmp_px]
 755         sethi   %hi(0x7f800000),%i0
 756 
 757         sub     counter,4,counter
 758         st      counter,[%fp+tmp_counter]
 759 
 760         ba      .cont6
 761         or      %g0,4,counter
 762 
 763         .align  16
 764 .update7:
 765         cmp     counter,4
 766         ble     .cont7
 767         fzeros  %f21
 768 
 769         stx     %i1,[%fp+tmp_px]
 770         clr     %i0
 771 
 772         sub     counter,4,counter
 773         st      counter,[%fp+tmp_counter]
 774 
 775         ba      .cont7
 776         or      %g0,4,counter
 777 
 778         .align  16
 779 .update8:
 780         cmp     counter,5
 781         ble     .cont8
 782         fzeros  %f25
 783 
 784         stx     %o7,[%fp+tmp_px]
 785         sethi   %hi(0x7f800000),%o2
 786 
 787         sub     counter,5,counter
 788         st      counter,[%fp+tmp_counter]
 789 
 790         ba      .cont8
 791         or      %g0,5,counter
 792 
 793         .align  16
 794 .update9:
 795         cmp     counter,5
 796         ble     .cont9
 797         fzeros  %f25
 798 
 799         stx     %o7,[%fp+tmp_px]
 800         clr     %o2
 801 
 802         sub     counter,5,counter
 803         st      counter,[%fp+tmp_counter]
 804 
 805         ba      .cont9
 806         or      %g0,5,counter
 807 
 808         .align  16
 809 .update10:
 810         cmp     counter,6
 811         ble     .cont10
 812         fzeros  %f0
 813 
 814         stx     %i1,[%fp+tmp_px]
 815         sethi   %hi(0x7f800000),%o1
 816 
 817         sub     counter,6,counter
 818         st      counter,[%fp+tmp_counter]
 819 
 820         ba      .cont10
 821         or      %g0,6,counter
 822 
 823         .align  16
 824 .update11:
 825         cmp     counter,6
 826         ble     .cont11
 827         fzeros  %f0
 828 
 829         stx     %i1,[%fp+tmp_px]
 830         clr     %o1
 831 
 832         sub     counter,6,counter
 833         st      counter,[%fp+tmp_counter]
 834 
 835         ba      .cont11
 836         or      %g0,6,counter
 837 
 838         .align  16
 839 .update12:
 840         cmp     counter,2
 841         ble     .cont12
 842         fzeros  %f13
 843 
 844         stx     %i1,[%fp+tmp_px]
 845         sethi   %hi(0x7f800000),%o2
 846 
 847         sub     counter,2,counter
 848         st      counter,[%fp+tmp_counter]
 849 
 850         ba      .cont12
 851         or      %g0,2,counter
 852 
 853         .align  16
 854 .update13:
 855         cmp     counter,2
 856         ble     .cont13
 857         fzeros  %f13
 858 
 859         stx     %i1,[%fp+tmp_px]
 860         clr     %o2
 861 
 862         sub     counter,2,counter
 863         st      counter,[%fp+tmp_counter]
 864 
 865         ba      .cont13
 866         or      %g0,2,counter
 867 
 868         .align  16
 869 .update14:
 870         cmp     counter,3
 871         ble     .cont14
 872         fzeros  %f17
 873 
 874         stx     %o4,[%fp+tmp_px]
 875         sethi   %hi(0x7f800000),%l1
 876 
 877         sub     counter,3,counter
 878         st      counter,[%fp+tmp_counter]
 879 
 880         ba      .cont14
 881         or      %g0,3,counter
 882 
 883         .align  16
 884 .update15:
 885         cmp     counter,3
 886         ble     .cont15
 887         fzeros  %f17
 888 
 889         stx     %o4,[%fp+tmp_px]
 890         clr     %l1
 891 
 892         sub     counter,3,counter
 893         st      counter,[%fp+tmp_counter]
 894 
 895         ba      .cont15
 896         or      %g0,3,counter
 897 
 898         .align  16
 899 .update16:
 900         cmp     counter,4
 901         ble     .cont16
 902         fzeros  %f21
 903 
 904         stx     %i1,[%fp+tmp_px]
 905         sethi   %hi(0x7f800000),%i0
 906 
 907         sub     counter,4,counter
 908         st      counter,[%fp+tmp_counter]
 909 
 910         ba      .cont16
 911         or      %g0,4,counter
 912 
 913         .align  16
 914 .update17:
 915         cmp     counter,4
 916         ble     .cont17
 917         fzeros  %f21
 918 
 919         stx     %i1,[%fp+tmp_px]
 920         clr     %i0
 921 
 922         sub     counter,4,counter
 923         st      counter,[%fp+tmp_counter]
 924 
 925         ba      .cont17
 926         or      %g0,4,counter
 927 
 928         .align  16
 929 .update18:
 930         cmp     counter,5
 931         ble     .cont18
 932         fzeros  %f25
 933 
 934         stx     %o7,[%fp+tmp_px]
 935         sethi   %hi(0x7f800000),%o2
 936 
 937         sub     counter,5,counter
 938         st      counter,[%fp+tmp_counter]
 939 
 940         ba      .cont18
 941         or      %g0,5,counter
 942 
 943         .align  16
 944 .update19:
 945         cmp     counter,5
 946         ble     .cont19
 947         fzeros  %f25
 948 
 949         stx     %o7,[%fp+tmp_px]
 950         clr     %o2
 951 
 952         sub     counter,5,counter
 953         st      counter,[%fp+tmp_counter]
 954 
 955         ba      .cont19
 956         or      %g0,5,counter
 957 
 958         .align  16
 959 .update20:
 960         cmp     counter,6
 961         ble     .cont20
 962         fzeros  %f0
 963 
 964         stx     %i1,[%fp+tmp_px]
 965         sethi   %hi(0x7f800000),%o1
 966 
 967         sub     counter,6,counter
 968         st      counter,[%fp+tmp_counter]
 969 
 970         ba      .cont20
 971         or      %g0,6,counter
 972 
 973         .align  16
 974 .update21:
 975         cmp     counter,6
 976         ble     .cont21
 977         fzeros  %f0
 978 
 979         stx     %i1,[%fp+tmp_px]
 980         clr     %o1
 981 
 982         sub     counter,6,counter
 983         st      counter,[%fp+tmp_counter]
 984 
 985         ba      .cont21
 986         or      %g0,6,counter
 987 
 988 .exit:
 989         ret
 990         restore
 991         SET_SIZE(__vsqrtf_ultra3)
 992