1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file   "__vrhypotf.S"
  30 
  31 #include "libm.h"
  32 
  33         RO_DATA
  34         .align  64
  35 .CONST_TBL:
  36 ! i = [0,63]
  37 ! TBL[2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46)));
  38 ! TBL[2*i+1] = (double)(0.5/sqrtl(2) / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46))));
  39 ! TBL[128+2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46)));
  40 ! TBL[128+2*i+1] = (double)(0.25 / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46))));
  41 
  42         .word   0x3ff00000, 0x00000000, 0x3fd6a09e, 0x667f3bcd,
  43         .word   0x3fef81f8, 0x1f81f820, 0x3fd673e3, 0x2ef63a03,
  44         .word   0x3fef07c1, 0xf07c1f08, 0x3fd6482d, 0x37a5a3d2,
  45         .word   0x3fee9131, 0xabf0b767, 0x3fd61d72, 0xb7978671,
  46         .word   0x3fee1e1e, 0x1e1e1e1e, 0x3fd5f3aa, 0x673fa911,
  47         .word   0x3fedae60, 0x76b981db, 0x3fd5cacb, 0x7802f342,
  48         .word   0x3fed41d4, 0x1d41d41d, 0x3fd5a2cd, 0x8c69d61a,
  49         .word   0x3fecd856, 0x89039b0b, 0x3fd57ba8, 0xb0ee01b9,
  50         .word   0x3fec71c7, 0x1c71c71c, 0x3fd55555, 0x55555555,
  51         .word   0x3fec0e07, 0x0381c0e0, 0x3fd52fcc, 0x468d6b54,
  52         .word   0x3febacf9, 0x14c1bad0, 0x3fd50b06, 0xa8fc6b70,
  53         .word   0x3feb4e81, 0xb4e81b4f, 0x3fd4e6fd, 0xf33cf032,
  54         .word   0x3feaf286, 0xbca1af28, 0x3fd4c3ab, 0xe93bcf74,
  55         .word   0x3fea98ef, 0x606a63be, 0x3fd4a10a, 0x97af7b92,
  56         .word   0x3fea41a4, 0x1a41a41a, 0x3fd47f14, 0x4fe17f9f,
  57         .word   0x3fe9ec8e, 0x951033d9, 0x3fd45dc3, 0xa3c34fa3,
  58         .word   0x3fe99999, 0x9999999a, 0x3fd43d13, 0x6248490f,
  59         .word   0x3fe948b0, 0xfcd6e9e0, 0x3fd41cfe, 0x93ff5199,
  60         .word   0x3fe8f9c1, 0x8f9c18fa, 0x3fd3fd80, 0x77e70577,
  61         .word   0x3fe8acb9, 0x0f6bf3aa, 0x3fd3de94, 0x8077db58,
  62         .word   0x3fe86186, 0x18618618, 0x3fd3c036, 0x50e00e03,
  63         .word   0x3fe81818, 0x18181818, 0x3fd3a261, 0xba6d7a37,
  64         .word   0x3fe7d05f, 0x417d05f4, 0x3fd38512, 0xba21f51e,
  65         .word   0x3fe78a4c, 0x8178a4c8, 0x3fd36845, 0x766eec92,
  66         .word   0x3fe745d1, 0x745d1746, 0x3fd34bf6, 0x3d156826,
  67         .word   0x3fe702e0, 0x5c0b8170, 0x3fd33021, 0x8127c0e0,
  68         .word   0x3fe6c16c, 0x16c16c17, 0x3fd314c3, 0xd92a9e91,
  69         .word   0x3fe68168, 0x16816817, 0x3fd2f9d9, 0xfd52fd50,
  70         .word   0x3fe642c8, 0x590b2164, 0x3fd2df60, 0xc5df2c9e,
  71         .word   0x3fe60581, 0x60581606, 0x3fd2c555, 0x2988e428,
  72         .word   0x3fe5c988, 0x2b931057, 0x3fd2abb4, 0x3c0eb0f4,
  73         .word   0x3fe58ed2, 0x308158ed, 0x3fd2927b, 0x2cd320f5,
  74         .word   0x3fe55555, 0x55555555, 0x3fd279a7, 0x4590331c,
  75         .word   0x3fe51d07, 0xeae2f815, 0x3fd26135, 0xe91daf55,
  76         .word   0x3fe4e5e0, 0xa72f0539, 0x3fd24924, 0x92492492,
  77         .word   0x3fe4afd6, 0xa052bf5b, 0x3fd23170, 0xd2be638a,
  78         .word   0x3fe47ae1, 0x47ae147b, 0x3fd21a18, 0x51ff630a,
  79         .word   0x3fe446f8, 0x6562d9fb, 0x3fd20318, 0xcc6a8f5d,
  80         .word   0x3fe41414, 0x14141414, 0x3fd1ec70, 0x124e98f9,
  81         .word   0x3fe3e22c, 0xbce4a902, 0x3fd1d61c, 0x070ae7d3,
  82         .word   0x3fe3b13b, 0x13b13b14, 0x3fd1c01a, 0xa03be896,
  83         .word   0x3fe38138, 0x13813814, 0x3fd1aa69, 0xe4f2777f,
  84         .word   0x3fe3521c, 0xfb2b78c1, 0x3fd19507, 0xecf5b9e9,
  85         .word   0x3fe323e3, 0x4a2b10bf, 0x3fd17ff2, 0xe00ec3ee,
  86         .word   0x3fe2f684, 0xbda12f68, 0x3fd16b28, 0xf55d72d4,
  87         .word   0x3fe2c9fb, 0x4d812ca0, 0x3fd156a8, 0x72b5ef62,
  88         .word   0x3fe29e41, 0x29e4129e, 0x3fd1426f, 0xac0654db,
  89         .word   0x3fe27350, 0xb8812735, 0x3fd12e7d, 0x02c40253,
  90         .word   0x3fe24924, 0x92492492, 0x3fd11ace, 0xe560242a,
  91         .word   0x3fe21fb7, 0x8121fb78, 0x3fd10763, 0xcec30b26,
  92         .word   0x3fe1f704, 0x7dc11f70, 0x3fd0f43a, 0x45cdedad,
  93         .word   0x3fe1cf06, 0xada2811d, 0x3fd0e150, 0xdce2b60c,
  94         .word   0x3fe1a7b9, 0x611a7b96, 0x3fd0cea6, 0x317186dc,
  95         .word   0x3fe18118, 0x11811812, 0x3fd0bc38, 0xeb8ba412,
  96         .word   0x3fe15b1e, 0x5f75270d, 0x3fd0aa07, 0xbd7b7488,
  97         .word   0x3fe135c8, 0x1135c811, 0x3fd09811, 0x63615499,
  98         .word   0x3fe11111, 0x11111111, 0x3fd08654, 0xa2d4f6db,
  99         .word   0x3fe0ecf5, 0x6be69c90, 0x3fd074d0, 0x4a8b1438,
 100         .word   0x3fe0c971, 0x4fbcda3b, 0x3fd06383, 0x31ff307a,
 101         .word   0x3fe0a681, 0x0a6810a7, 0x3fd0526c, 0x39213bfa,
 102         .word   0x3fe08421, 0x08421084, 0x3fd0418a, 0x4806de7d,
 103         .word   0x3fe0624d, 0xd2f1a9fc, 0x3fd030dc, 0x4ea03a72,
 104         .word   0x3fe04104, 0x10410410, 0x3fd02061, 0x446ffa9a,
 105         .word   0x3fe02040, 0x81020408, 0x3fd01018, 0x28467ee9,
 106         .word   0x3ff00000, 0x00000000, 0x3fd00000, 0x00000000,
 107         .word   0x3fef81f8, 0x1f81f820, 0x3fcfc0bd, 0x88a0f1d9,
 108         .word   0x3fef07c1, 0xf07c1f08, 0x3fcf82ec, 0x882c0f9b,
 109         .word   0x3fee9131, 0xabf0b767, 0x3fcf467f, 0x2814b0cc,
 110         .word   0x3fee1e1e, 0x1e1e1e1e, 0x3fcf0b68, 0x48d2af1c,
 111         .word   0x3fedae60, 0x76b981db, 0x3fced19b, 0x75e78957,
 112         .word   0x3fed41d4, 0x1d41d41d, 0x3fce990c, 0xdad55ed2,
 113         .word   0x3fecd856, 0x89039b0b, 0x3fce61b1, 0x38f18adc,
 114         .word   0x3fec71c7, 0x1c71c71c, 0x3fce2b7d, 0xddfefa66,
 115         .word   0x3fec0e07, 0x0381c0e0, 0x3fcdf668, 0x9b7e6350,
 116         .word   0x3febacf9, 0x14c1bad0, 0x3fcdc267, 0xbea45549,
 117         .word   0x3feb4e81, 0xb4e81b4f, 0x3fcd8f72, 0x08e6b82d,
 118         .word   0x3feaf286, 0xbca1af28, 0x3fcd5d7e, 0xa914b937,
 119         .word   0x3fea98ef, 0x606a63be, 0x3fcd2c85, 0x34ed6d86,
 120         .word   0x3fea41a4, 0x1a41a41a, 0x3fccfc7d, 0xa32a9213,
 121         .word   0x3fe9ec8e, 0x951033d9, 0x3fcccd60, 0x45f5d358,
 122         .word   0x3fe99999, 0x9999999a, 0x3fcc9f25, 0xc5bfedd9,
 123         .word   0x3fe948b0, 0xfcd6e9e0, 0x3fcc71c7, 0x1c71c71c,
 124         .word   0x3fe8f9c1, 0x8f9c18fa, 0x3fcc453d, 0x90f057a2,
 125         .word   0x3fe8acb9, 0x0f6bf3aa, 0x3fcc1982, 0xb2ece47b,
 126         .word   0x3fe86186, 0x18618618, 0x3fcbee90, 0x56fb9c39,
 127         .word   0x3fe81818, 0x18181818, 0x3fcbc460, 0x92eb3118,
 128         .word   0x3fe7d05f, 0x417d05f4, 0x3fcb9aed, 0xba588347,
 129         .word   0x3fe78a4c, 0x8178a4c8, 0x3fcb7232, 0x5b79db11,
 130         .word   0x3fe745d1, 0x745d1746, 0x3fcb4a29, 0x3c1d9550,
 131         .word   0x3fe702e0, 0x5c0b8170, 0x3fcb22cd, 0x56d87d7e,
 132         .word   0x3fe6c16c, 0x16c16c17, 0x3fcafc19, 0xd8606169,
 133         .word   0x3fe68168, 0x16816817, 0x3fcad60a, 0x1d0fb394,
 134         .word   0x3fe642c8, 0x590b2164, 0x3fcab099, 0xae8f539a,
 135         .word   0x3fe60581, 0x60581606, 0x3fca8bc4, 0x41a3d02c,
 136         .word   0x3fe5c988, 0x2b931057, 0x3fca6785, 0xb41bacf7,
 137         .word   0x3fe58ed2, 0x308158ed, 0x3fca43da, 0x0adc6899,
 138         .word   0x3fe55555, 0x55555555, 0x3fca20bd, 0x700c2c3e,
 139         .word   0x3fe51d07, 0xeae2f815, 0x3fc9fe2c, 0x315637ee,
 140         .word   0x3fe4e5e0, 0xa72f0539, 0x3fc9dc22, 0xbe484458,
 141         .word   0x3fe4afd6, 0xa052bf5b, 0x3fc9ba9d, 0xa6c73588,
 142         .word   0x3fe47ae1, 0x47ae147b, 0x3fc99999, 0x9999999a,
 143         .word   0x3fe446f8, 0x6562d9fb, 0x3fc97913, 0x63068b54,
 144         .word   0x3fe41414, 0x14141414, 0x3fc95907, 0xeb87ab44,
 145         .word   0x3fe3e22c, 0xbce4a902, 0x3fc93974, 0x368cfa31,
 146         .word   0x3fe3b13b, 0x13b13b14, 0x3fc91a55, 0x6151761c,
 147         .word   0x3fe38138, 0x13813814, 0x3fc8fba8, 0xa1bf6f96,
 148         .word   0x3fe3521c, 0xfb2b78c1, 0x3fc8dd6b, 0x4563a009,
 149         .word   0x3fe323e3, 0x4a2b10bf, 0x3fc8bf9a, 0xb06e1af3,
 150         .word   0x3fe2f684, 0xbda12f68, 0x3fc8a234, 0x5cc04426,
 151         .word   0x3fe2c9fb, 0x4d812ca0, 0x3fc88535, 0xd90703c6,
 152         .word   0x3fe29e41, 0x29e4129e, 0x3fc8689c, 0xc7e07e7d,
 153         .word   0x3fe27350, 0xb8812735, 0x3fc84c66, 0xdf0ca4c2,
 154         .word   0x3fe24924, 0x92492492, 0x3fc83091, 0xe6a7f7e7,
 155         .word   0x3fe21fb7, 0x8121fb78, 0x3fc8151b, 0xb86fee1d,
 156         .word   0x3fe1f704, 0x7dc11f70, 0x3fc7fa02, 0x3f1068d1,
 157         .word   0x3fe1cf06, 0xada2811d, 0x3fc7df43, 0x7579b9b5,
 158         .word   0x3fe1a7b9, 0x611a7b96, 0x3fc7c4dd, 0x663ebb88,
 159         .word   0x3fe18118, 0x11811812, 0x3fc7aace, 0x2afa8b72,
 160         .word   0x3fe15b1e, 0x5f75270d, 0x3fc79113, 0xebbd7729,
 161         .word   0x3fe135c8, 0x1135c811, 0x3fc777ac, 0xde80baea,
 162         .word   0x3fe11111, 0x11111111, 0x3fc75e97, 0x46a0b098,
 163         .word   0x3fe0ecf5, 0x6be69c90, 0x3fc745d1, 0x745d1746,
 164         .word   0x3fe0c971, 0x4fbcda3b, 0x3fc72d59, 0xc45f1fc5,
 165         .word   0x3fe0a681, 0x0a6810a7, 0x3fc7152e, 0x9f44f01f,
 166         .word   0x3fe08421, 0x08421084, 0x3fc6fd4e, 0x79325467,
 167         .word   0x3fe0624d, 0xd2f1a9fc, 0x3fc6e5b7, 0xd16657e1,
 168         .word   0x3fe04104, 0x10410410, 0x3fc6ce69, 0x31d5858d,
 169         .word   0x3fe02040, 0x81020408, 0x3fc6b761, 0x2ec892f6,
 170 
 171         .word   0x000fffff, 0xffffffff  ! DC0
 172         .word   0x3ff00000, 0           ! DC1
 173         .word   0x7fffc000, 0           ! DC2
 174         .word   0x7fe00000, 0           ! DA0
 175         .word   0x60000000, 0           ! DA1
 176         .word   0x80808080, 0x3f800000  ! SCALE , FONE = 1.0f
 177         .word   0x3fefffff, 0xfee7f18f  ! KA0 =  9.99999997962321453275e-01
 178         .word   0xbfdfffff, 0xfe07e52f  ! KA1 = -4.99999998166077580600e-01
 179         .word   0x3fd80118, 0x0ca296d9  ! KA2 = 3.75066768969515586277e-01
 180         .word   0xbfd400fc, 0x0bbb8e78  ! KA3 = -3.12560092408808548438e-01
 181 
 182 #define _0x7f800000     %o0
 183 #define _0x7fffffff     %o7
 184 #define TBL             %l2
 185 
 186 #define TBL_SHIFT       2048
 187 
 188 #define stridex         %l3
 189 #define stridey         %l4
 190 #define stridez         %l5
 191 #define counter         %i0
 192 
 193 #define DA0             %f52
 194 #define DA1             %f44
 195 #define SCALE           %f6
 196 
 197 #define DC0             %f46
 198 #define DC1             %f8
 199 #define FZERO           %f9
 200 #define DC2             %f50
 201 
 202 #define KA3             %f56
 203 #define KA2             %f58
 204 #define KA1             %f60
 205 #define KA0             %f54
 206 
 207 #define tmp_counter     STACK_BIAS-0x04
 208 #define tmp_px          STACK_BIAS-0x20
 209 #define tmp_py          STACK_BIAS-0x18
 210 
 211 #define ftmp0           STACK_BIAS-0x10
 212 #define ftmp1           STACK_BIAS-0x0c
 213 #define ftmp2           STACK_BIAS-0x10
 214 #define ftmp3           STACK_BIAS-0x0c
 215 #define ftmp4           STACK_BIAS-0x08
 216 
 217 ! sizeof temp storage - must be a multiple of 16 for V9
 218 #define tmps            0x20
 219 
 220 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 221 !      !!!!!   algorithm   !!!!!
 222 !  x0 = *px;
 223 !  ax = *(int*)px;
 224 !
 225 !  y0 = *py;
 226 !  ay = *(int*)py;
 227 !
 228 !  ax &= 0x7fffffff;
 229 !  ay &= 0x7fffffff;
 230 !
 231 !  px += stridex;
 232 !  py += stridey;
 233 !
 234 !  if ( ax >= 0x7f800000 || ay >= 0x7f800000 )
 235 !  {
 236 !    *pz = fabsf(x0) * fabsf(y0);
 237 !    if( ax == 0x7f800000 ) *pz = 0.0f;
 238 !    else if( ay == 0x7f800000 ) *pz = 0.0f;
 239 !    pz += stridez;
 240 !    continue;
 241 !  }
 242 !
 243 !  if ( ay == 0 )
 244 !  {
 245 !    if ( ax == 0 )
 246 !    {
 247 !      *pz = 1.0f / 0.0f;
 248 !      pz += stridez;
 249 !      continue;
 250 !    }
 251 !  }
 252 !
 253 !  hyp0 = x0 * (double)x0;
 254 !  dtmp0 = y0 * (double)y0;
 255 !  hyp0 += dtmp0;
 256 !
 257 !  ibase0 = ((int*)&hyp0)[0];
 258 !
 259 !  dbase0 = vis_fand(hyp0,DA0);
 260 !  dbase0 = vis_fmul8x16(SCALE, dbase0);
 261 !  dbase0 = vis_fpsub32(DA1,dbase0);
 262 !
 263 !  hyp0 = vis_fand(hyp0,DC0);
 264 !  hyp0 = vis_for(hyp0,DC1);
 265 !  h_hi0 = vis_fand(hyp0,DC2);
 266 !
 267 !  ibase0 >>= 10;
 268 !  si0 = ibase0 & 0x7f0;
 269 !  xx0 = ((double*)((char*)TBL + si0))[0];
 270 !
 271 !  dtmp1 = hyp0 - h_hi0;
 272 !  xx0 = dtmp1 * xx0;
 273 !  res0 = ((double*)((char*)arr + si0))[1];
 274 !  dtmp2 = KA3 * xx0;
 275 !  dtmp2 += KA2;
 276 !  dtmp2 *= xx0;
 277 !  dtmp2 += KA1;
 278 !  dtmp2 *= xx0;
 279 !  dtmp2 += KA0;
 280 !  res0 *= dtmp2;
 281 !  res0 *= dbase0;
 282 !  ftmp0 = (float)res0;
 283 !  *pz = ftmp0;
 284 !  pz += stridez;
 285 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
 286 
 287         ENTRY(__vrhypotf)
 288         save    %sp,-SA(MINFRAME)-tmps,%sp
 289         PIC_SETUP(l7)
 290         PIC_SET(l7,.CONST_TBL,l2)
 291         wr      %g0,0x82,%asi
 292 
 293 #ifdef __sparcv9
 294         ldx     [%fp+STACK_BIAS+176],stridez
 295 #else
 296         ld      [%fp+STACK_BIAS+92],stridez
 297 #endif
 298 
 299         stx     %i1,[%fp+tmp_px]
 300         sll     %i2,2,stridex
 301 
 302         stx     %i3,[%fp+tmp_py]
 303         sll     %i4,2,stridey
 304 
 305         st      %i0,[%fp+tmp_counter]
 306         sll     stridez,2,stridez
 307         mov     %i5,%o1
 308 
 309         ldd     [TBL+TBL_SHIFT],DC0
 310         ldd     [TBL+TBL_SHIFT+8],DC1
 311         ldd     [TBL+TBL_SHIFT+16],DC2
 312         ldd     [TBL+TBL_SHIFT+24],DA0
 313         ldd     [TBL+TBL_SHIFT+32],DA1
 314         ldd     [TBL+TBL_SHIFT+40],SCALE
 315         ldd     [TBL+TBL_SHIFT+48],KA0
 316 
 317         ldd     [TBL+TBL_SHIFT+56],KA1
 318         sethi   %hi(0x7f800000),%o0
 319 
 320         ldd     [TBL+TBL_SHIFT+64],KA2
 321         sethi   %hi(0x7ffffc00),%o7
 322 
 323         ldd     [TBL+TBL_SHIFT+72],KA3
 324         add     %o7,1023,%o7
 325 
 326 .begin:
 327         ld      [%fp+tmp_counter],counter
 328         ldx     [%fp+tmp_px],%o4
 329         ldx     [%fp+tmp_py],%i2
 330         st      %g0,[%fp+tmp_counter]
 331 .begin1:
 332         cmp     counter,0
 333         ble,pn  %icc,.exit
 334         nop
 335 
 336         lda     [%i2]0x82,%l6           ! (3_0) ay = *(int*)py;
 337 
 338         lda     [%o4]0x82,%i5           ! (3_0) ax = *(int*)px;
 339 
 340         lda     [%i2]0x82,%f2           ! (3_0) y0 = *py;
 341         and     %l6,_0x7fffffff,%l6     ! (3_0) ay &= 0x7fffffff;
 342 
 343         and     %i5,_0x7fffffff,%i5     ! (3_0) ax &= 0x7fffffff;
 344         cmp     %l6,_0x7f800000         ! (3_0) ay ? 0x7f800000
 345         bge,pn  %icc,.spec0             ! (3_0) if ( ay >= 0x7f800000 )
 346         lda     [%o4]0x82,%f4           ! (3_0) x0 = *px;
 347 
 348         cmp     %i5,_0x7f800000         ! (3_0) ax ? 0x7f800000
 349         bge,pn  %icc,.spec0             ! (3_0) if ( ax >= 0x7f800000 )
 350         nop
 351 
 352         cmp     %l6,0                   ! (3_0)
 353         be,pn   %icc,.spec1             ! (3_0) if ( ay == 0 )
 354         fsmuld  %f4,%f4,%f36            ! (3_0) hyp0 = x0 * (double)x0;
 355 .cont_spec1:
 356         lda     [%i2+stridey]0x82,%l6   ! (4_0) ay = *(int*)py;
 357 
 358         fsmuld  %f2,%f2,%f62            ! (3_0) dtmp0 = y0 * (double)y0;
 359         lda     [stridex+%o4]0x82,%i5   ! (4_0) ax = *(int*)px;
 360 
 361         add     %o4,stridex,%l0         ! px += stridex
 362 
 363         add     %i2,stridey,%i2         ! py += stridey
 364         and     %l6,_0x7fffffff,%l6     ! (4_0) ay &= 0x7fffffff;
 365 
 366         and     %i5,_0x7fffffff,%i5     ! (4_0) ax &= 0x7fffffff;
 367         lda     [%i2]0x82,%f2           ! (4_0) y0 = *py;
 368 
 369         faddd   %f36,%f62,%f20          ! (3_0) hyp0 += dtmp0;
 370         cmp     %l6,_0x7f800000         ! (4_0) ay ? 0x7f800000
 371 
 372         bge,pn  %icc,.update0           ! (4_0) if ( ay >= 0x7f800000 )
 373         lda     [stridex+%o4]0x82,%f4   ! (4_0) x0 = *px;
 374 .cont0:
 375         cmp     %i5,_0x7f800000         ! (4_0) ax ? 0x7f800000
 376         bge,pn  %icc,.update1           ! (4_0) if ( ax >= 0x7f800000 )
 377         st      %f20,[%fp+ftmp4]        ! (3_0) ibase0 = ((int*)&hyp0)[0];
 378 .cont1:
 379         cmp     %l6,0                   ! (4_1) ay ? 0
 380         be,pn   %icc,.update2           ! (4_1) if ( ay == 0 )
 381         fsmuld  %f4,%f4,%f38            ! (4_1) hyp0 = x0 * (double)x0;
 382 .cont2:
 383         lda     [%i2+stridey]0x82,%l6   ! (0_0) ay = *(int*)py;
 384 
 385         fsmuld  %f2,%f2,%f62            ! (4_1) dtmp0 = y0 * (double)y0;
 386         lda     [%l0+stridex]0x82,%i5   ! (0_0) ax = *(int*)px;
 387 
 388         add     %l0,stridex,%i1         ! px += stridex
 389 
 390         add     %i2,stridey,%i2         ! py += stridey
 391         and     %l6,_0x7fffffff,%l6     ! (0_0) ay &= 0x7fffffff;
 392 
 393         and     %i5,_0x7fffffff,%i5     ! (0_0) ax &= 0x7fffffff;
 394         lda     [%i2]0x82,%f2           ! (0_0) y0 = *py;
 395 
 396         cmp     %l6,_0x7f800000         ! (0_0) ay ? 0x7f800000
 397         bge,pn  %icc,.update3           ! (0_0) if ( ay >= 0x7f800000 )
 398         faddd   %f38,%f62,%f12          ! (4_1) hyp0 += dtmp0;
 399 .cont3:
 400         lda     [%i1]0x82,%f4           ! (0_0) x0 = *px;
 401 
 402         cmp     %i5,_0x7f800000         ! (0_0) ax ? 0x7f800000
 403         bge,pn  %icc,.update4           ! (0_0) if ( ax >= 0x7f800000 )
 404         st      %f12,[%fp+ftmp0]        ! (4_1) ibase0 = ((int*)&hyp0)[0];
 405 .cont4:
 406         cmp     %l6,0                   ! (0_0) ay ? 0
 407         be,pn   %icc,.update5           ! (0_0) if ( ay == 0 )
 408         fsmuld  %f4,%f4,%f38            ! (0_0) hyp0 = x0 * (double)x0;
 409 .cont5:
 410         lda     [%i2+stridey]0x82,%l6   ! (1_0) ay = *(int*)py;
 411 
 412         fsmuld  %f2,%f2,%f62            ! (0_0) dtmp0 = y0 * (double)y0;
 413         lda     [%i1+stridex]0x82,%i5   ! (1_0) ax = *(int*)px;
 414 
 415         add     %i1,stridex,%g5         ! px += stridex
 416 
 417         add     %i2,stridey,%o3         ! py += stridey
 418         and     %l6,_0x7fffffff,%l6     ! (1_0) ay &= 0x7fffffff;
 419         fand    %f20,DC0,%f30           ! (3_1) hyp0 = vis_fand(hyp0,DC0);
 420 
 421         and     %i5,_0x7fffffff,%i5     ! (1_0) ax &= 0x7fffffff;
 422         lda     [%o3]0x82,%f2           ! (1_0) y0 = *py;
 423 
 424         faddd   %f38,%f62,%f14          ! (0_0) hyp0 += dtmp0;
 425         cmp     %l6,_0x7f800000         ! (1_0) ay ? 0x7f800000
 426 
 427         lda     [%g5]0x82,%f4           ! (1_0) x0 = *px;
 428         bge,pn  %icc,.update6           ! (1_0) if ( ay >= 0x7f800000 )
 429         for     %f30,DC1,%f28           ! (3_1) hyp0 = vis_for(hyp0,DC1);
 430 .cont6:
 431         cmp     %i5,_0x7f800000         ! (1_0) ax ? 0x7f800000
 432         bge,pn  %icc,.update7           ! (1_0) if ( ax >= 0x7f800000 )
 433         ld      [%fp+ftmp4],%l1         ! (3_1) ibase0 = ((int*)&hyp0)[0];
 434 .cont7:
 435         st      %f14,[%fp+ftmp1]        ! (0_0) ibase0 = ((int*)&hyp0)[0];
 436 
 437         cmp     %l6,0                   ! (1_0) ay ? 0
 438         be,pn   %icc,.update8           ! (1_0) if ( ay == 0 )
 439         fand    %f28,DC2,%f30           ! (3_1) h_hi0 = vis_fand(hyp0,DC2);
 440 .cont8:
 441         fsmuld  %f4,%f4,%f38            ! (1_0) hyp0 = x0 * (double)x0;
 442         sra     %l1,10,%o5              ! (3_1) ibase0 >>= 10;
 443 
 444         and     %o5,2032,%o4            ! (3_1) si0 = ibase0 & 0x7f0;
 445         lda     [%o3+stridey]0x82,%l6   ! (2_0) ay = *(int*)py;
 446 
 447         fsmuld  %f2,%f2,%f62            ! (1_0) dtmp0 = y0 * (double)y0;
 448         add     %o4,TBL,%l7             ! (3_1) (char*)TBL + si0
 449         lda     [stridex+%g5]0x82,%i5   ! (2_0) ax = *(int*)px;
 450         fsubd   %f28,%f30,%f28          ! (3_1) dtmp1 = hyp0 - h_hi0;
 451 
 452         add     %g5,stridex,%i4         ! px += stridex
 453         ldd     [TBL+%o4],%f42          ! (3_1) xx0 = ((double*)((char*)TBL + si0))[0];
 454 
 455         and     %l6,_0x7fffffff,%l6     ! (2_0) ay &= 0x7fffffff;
 456         add     %o3,stridey,%i2         ! py += stridey
 457         fand    %f12,DC0,%f30           ! (4_1) hyp0 = vis_fand(hyp0,DC0);
 458 
 459         and     %i5,_0x7fffffff,%i5     ! (2_0) ax &= 0x7fffffff;
 460         lda     [%i2]0x82,%f2           ! (2_0) y0 = *py;
 461 
 462         faddd   %f38,%f62,%f16          ! (1_0) hyp0 += dtmp0;
 463         cmp     %l6,_0x7f800000         ! (2_0) ay ? 0x7f800000
 464         fmuld   %f28,%f42,%f26          ! (3_1) xx0 = dtmp1 * xx0;
 465 
 466         lda     [stridex+%g5]0x82,%f4   ! (2_0) x0 = *px;
 467         bge,pn  %icc,.update9           ! (2_0) if ( ay >= 0x7f800000
 468         for     %f30,DC1,%f28           ! (4_1) hyp0 = vis_for(hyp0,DC1);
 469 .cont9:
 470         cmp     %i5,_0x7f800000         ! (2_0) ax ? 0x7f800000
 471         bge,pn  %icc,.update10          ! (2_0) if ( ax >= 0x7f800000 )
 472         ld      [%fp+ftmp0],%i3         ! (4_1) ibase0 = ((int*)&hyp0)[0];
 473 .cont10:
 474         st      %f16,[%fp+ftmp2]        ! (1_0) ibase0 = ((int*)&hyp0)[0];
 475 
 476         fmuld   KA3,%f26,%f34           ! (3_1) dtmp2 = KA3 * xx0;
 477         cmp     %l6,0                   ! (2_0) ay ? 0
 478         be,pn   %icc,.update11          ! (2_0) if ( ay == 0 )
 479         fand    %f28,DC2,%f30           ! (4_1) h_hi0 = vis_fand(hyp0,DC2);
 480 .cont11:
 481         fsmuld  %f4,%f4,%f36            ! (2_0) hyp0 = x0 * (double)x0;
 482         sra     %i3,10,%i3              ! (4_1) ibase0 >>= 10;
 483 
 484         and     %i3,2032,%i3            ! (4_1) si0 = ibase0 & 0x7f0;
 485         lda     [%i2+stridey]0x82,%l6   ! (3_0) ay = *(int*)py;
 486 
 487         fsmuld  %f2,%f2,%f62            ! (2_0) dtmp0 = y0 * (double)y0;
 488         add     %i3,TBL,%i3             ! (4_1) (char*)TBL + si0
 489         lda     [%i4+stridex]0x82,%i5   ! (3_0) ax = *(int*)px;
 490         fsubd   %f28,%f30,%f28          ! (4_1) dtmp1 = hyp0 - h_hi0;
 491 
 492         add     %i4,stridex,%o4         ! px += stridex
 493         ldd     [%i3],%f42              ! (4_1) xx0 = ((double*)((char*)TBL + si0))[0];
 494         faddd   %f34,KA2,%f10           ! (3_1) dtmp2 += KA2;
 495 
 496         add     %i2,stridey,%i2         ! py += stridey
 497         and     %l6,_0x7fffffff,%l6     ! (3_0) ay &= 0x7fffffff;
 498         fand    %f14,DC0,%f30           ! (0_0) hyp0 = vis_fand(hyp0,DC0);
 499 
 500         and     %i5,_0x7fffffff,%i5     ! (3_0) ax &= 0x7fffffff;
 501         lda     [%i2]0x82,%f2           ! (3_0) y0 = *py;
 502 
 503         faddd   %f36,%f62,%f18          ! (2_0) hyp0 += dtmp0;
 504         cmp     %l6,_0x7f800000         ! (3_0) ay ? 0x7f800000
 505         fmuld   %f28,%f42,%f32          ! (4_1) xx0 = dtmp1 * xx0;
 506 
 507         fmuld   %f10,%f26,%f10          ! (3_1) dtmp2 *= xx0;
 508         lda     [%o4]0x82,%f4           ! (3_0) x0 = *px;
 509         bge,pn  %icc,.update12          ! (3_0) if ( ay >= 0x7f800000 )
 510         for     %f30,DC1,%f28           ! (0_0) hyp0 = vis_for(hyp0,DC1);
 511 .cont12:
 512         cmp     %i5,_0x7f800000         ! (3_0) ax ? 0x7f800000
 513         bge,pn  %icc,.update13          ! (3_0) if ( ax >= 0x7f800000 )
 514         ld      [%fp+ftmp1],%i1         ! (0_0) ibase0 = ((int*)&hyp0)[0];
 515 .cont13:
 516         st      %f18,[%fp+ftmp3]        ! (2_0) ibase0 = ((int*)&hyp0)[0];
 517 
 518         fmuld   KA3,%f32,%f34           ! (4_1) dtmp2 = KA3 * xx0;
 519         cmp     %l6,0                   ! (3_0)
 520         be,pn   %icc,.update14          ! (3_0) if ( ay == 0 )
 521         fand    %f28,DC2,%f30           ! (0_0) h_hi0 = vis_fand(hyp0,DC2);
 522 .cont14:
 523         fsmuld  %f4,%f4,%f36            ! (3_0) hyp0 = x0 * (double)x0;
 524         sra     %i1,10,%l1              ! (0_0) ibase0 >>= 10;
 525         faddd   %f10,KA1,%f40           ! (3_1) dtmp2 += KA1;
 526 
 527         and     %l1,2032,%o5            ! (0_0) si0 = ibase0 & 0x7f0;
 528         lda     [%i2+stridey]0x82,%l6   ! (4_0) ay = *(int*)py;
 529 
 530         fsmuld  %f2,%f2,%f62            ! (3_0) dtmp0 = y0 * (double)y0;
 531         add     %o5,TBL,%l1             ! (0_0) (char*)TBL + si0
 532         lda     [stridex+%o4]0x82,%i5   ! (4_0) ax = *(int*)px;
 533         fsubd   %f28,%f30,%f28          ! (0_0) dtmp1 = hyp0 - h_hi0;
 534 
 535         add     %o4,stridex,%l0         ! px += stridex
 536         ldd     [TBL+%o5],%f42          ! (0_0) xx0 = ((double*)((char*)TBL + si0))[0];
 537         faddd   %f34,KA2,%f10           ! (4_1) dtmp2 += KA2;
 538 
 539         fmuld   %f40,%f26,%f40          ! (3_1) dtmp2 *= xx0;
 540         add     %i2,stridey,%i2         ! py += stridey
 541         and     %l6,_0x7fffffff,%l6     ! (4_0) ay &= 0x7fffffff;
 542         fand    %f16,DC0,%f30           ! (1_0) hyp0 = vis_fand(hyp0,DC0);
 543 
 544         and     %i5,_0x7fffffff,%i5     ! (4_0) ax &= 0x7fffffff;
 545         lda     [%i2]0x82,%f2           ! (4_0) y0 = *py;
 546         fand    %f20,DA0,%f24           ! (3_1) dbase0 = vis_fand(hyp0,DA0);
 547 
 548         faddd   %f36,%f62,%f20          ! (3_0) hyp0 += dtmp0;
 549         cmp     %l6,_0x7f800000         ! (4_0) ay ? 0x7f800000
 550         ldd     [%l7+8],%f36            ! (3_1) res0 = ((double*)((char*)arr + si0))[1];
 551         fmuld   %f28,%f42,%f26          ! (0_0) xx0 = dtmp1 * xx0;
 552 
 553         fmuld   %f10,%f32,%f10          ! (4_1) dtmp2 *= xx0;
 554         lda     [stridex+%o4]0x82,%f4   ! (4_0) x0 = *px;
 555         bge,pn  %icc,.update15          ! (4_0) if ( ay >= 0x7f800000 )
 556         for     %f30,DC1,%f28           ! (1_0) hyp0 = vis_for(hyp0,DC1);
 557 .cont15:
 558         fmul8x16        SCALE,%f24,%f24 ! (3_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
 559         cmp     %i5,_0x7f800000         ! (4_0) ax ? 0x7f800000
 560         ld      [%fp+ftmp2],%i1         ! (1_0) ibase0 = ((int*)&hyp0)[0];
 561         faddd   %f40,KA0,%f62           ! (3_1) dtmp2 += KA0;
 562 
 563         bge,pn  %icc,.update16          ! (4_0) if ( ax >= 0x7f800000 )
 564         st      %f20,[%fp+ftmp4]        ! (3_0) ibase0 = ((int*)&hyp0)[0];
 565 .cont16:
 566         fmuld   KA3,%f26,%f34           ! (0_0) dtmp2 = KA3 * xx0;
 567         fand    %f28,DC2,%f30           ! (1_0) h_hi0 = vis_fand(hyp0,DC2);
 568 
 569         mov     %o1,%i4
 570         cmp     counter,5
 571         bl,pn   %icc,.tail
 572         nop
 573 
 574         ba      .main_loop
 575         sub     counter,5,counter
 576 
 577         .align  16
 578 .main_loop:
 579         fsmuld  %f4,%f4,%f38            ! (4_1) hyp0 = x0 * (double)x0;
 580         sra     %i1,10,%o2              ! (1_1) ibase0 >>= 10;
 581         cmp     %l6,0                   ! (4_1) ay ? 0
 582         faddd   %f10,KA1,%f40           ! (4_2) dtmp2 += KA1;
 583 
 584         fmuld   %f36,%f62,%f36          ! (3_2) res0 *= dtmp2;
 585         and     %o2,2032,%o2            ! (1_1) si0 = ibase0 & 0x7f0;
 586         lda     [%i2+stridey]0x82,%l6   ! (0_0) ay = *(int*)py;
 587         fpsub32 DA1,%f24,%f24           ! (3_2) dbase0 = vis_fpsub32(DA1,dbase0);
 588 
 589         fsmuld  %f2,%f2,%f62            ! (4_1) dtmp0 = y0 * (double)y0;
 590         add     %o2,TBL,%o2             ! (1_1) (char*)TBL + si0
 591         lda     [%l0+stridex]0x82,%o1   ! (0_0) ax = *(int*)px;
 592         fsubd   %f28,%f30,%f28          ! (1_1) dtmp1 = hyp0 - h_hi0;
 593 
 594         add     %l0,stridex,%i1         ! px += stridex
 595         ldd     [%o2],%f42              ! (1_1) xx0 = ((double*)((char*)TBL + si0))[0];
 596         be,pn   %icc,.update17          ! (4_1) if ( ay == 0 )
 597         faddd   %f34,KA2,%f10           ! (0_1) dtmp2 += KA2;
 598 .cont17:
 599         fmuld   %f40,%f32,%f40          ! (4_2) dtmp2 *= xx0;
 600         add     %i2,stridey,%i2         ! py += stridey
 601         and     %l6,_0x7fffffff,%l6     ! (0_0) ay &= 0x7fffffff;
 602         fand    %f18,DC0,%f30           ! (2_1) hyp0 = vis_fand(hyp0,DC0);
 603 
 604         fmuld   %f36,%f24,%f32          ! (3_2) res0 *= dbase0;
 605         and     %o1,_0x7fffffff,%o1     ! (0_0) ax &= 0x7fffffff;
 606         lda     [%i2]0x82,%f2           ! (0_0) y0 = *py;
 607         fand    %f12,DA0,%f24           ! (4_2) dbase0 = vis_fand(hyp0,DA0);
 608 
 609         faddd   %f38,%f62,%f12          ! (4_1) hyp0 += dtmp0;
 610         cmp     %l6,_0x7f800000         ! (0_0) ay ? 0x7f800000
 611         ldd     [%i3+8],%f62            ! (4_2) res0 = ((double*)((char*)arr + si0))[1];
 612         fmuld   %f28,%f42,%f36          ! (1_1) xx0 = dtmp1 * xx0;
 613 
 614         fmuld   %f10,%f26,%f10          ! (0_1) dtmp2 *= xx0;
 615         lda     [%i1]0x82,%f4           ! (0_0) x0 = *px;
 616         bge,pn  %icc,.update18          ! (0_0) if ( ay >= 0x7f800000 )
 617         for     %f30,DC1,%f28           ! (2_1) hyp0 = vis_for(hyp0,DC1);
 618 .cont18:
 619         fmul8x16        SCALE,%f24,%f24 ! (4_2) dbase0 = vis_fmul8x16(SCALE, dbase0);
 620         cmp     %o1,_0x7f800000         ! (0_0) ax ? 0x7f800000
 621         ld      [%fp+ftmp3],%l0         ! (2_1) ibase0 = ((int*)&hyp0)[0];
 622         faddd   %f40,KA0,%f42           ! (4_2) dtmp2 += KA0;
 623 
 624         add     %i4,stridez,%i3         ! pz += stridez
 625         st      %f12,[%fp+ftmp0]        ! (4_1) ibase0 = ((int*)&hyp0)[0];
 626         bge,pn  %icc,.update19          ! (0_0) if ( ax >= 0x7f800000 )
 627         fdtos   %f32,%f1                ! (3_2) ftmp0 = (float)res0;
 628 .cont19:
 629         fmuld   KA3,%f36,%f34           ! (1_1) dtmp2 = KA3 * xx0;
 630         cmp     %l6,0                   ! (0_0) ay ? 0
 631         st      %f1,[%i4]               ! (3_2) *pz = ftmp0;
 632         fand    %f28,DC2,%f30           ! (2_1) h_hi0 = vis_fand(hyp0,DC2);
 633 
 634         fsmuld  %f4,%f4,%f38            ! (0_0) hyp0 = x0 * (double)x0;
 635         sra     %l0,10,%i4              ! (2_1) ibase0 >>= 10;
 636         be,pn   %icc,.update20          ! (0_0) if ( ay == 0 )
 637         faddd   %f10,KA1,%f40           ! (0_1) dtmp2 += KA1;
 638 .cont20:
 639         fmuld   %f62,%f42,%f32          ! (4_2) res0 *= dtmp2;
 640         and     %i4,2032,%g1            ! (2_1) si0 = ibase0 & 0x7f0;
 641         lda     [%i2+stridey]0x82,%l6   ! (1_0) ay = *(int*)py;
 642         fpsub32 DA1,%f24,%f24           ! (4_2) dbase0 = vis_fpsub32(DA1,dbase0);
 643 
 644         fsmuld  %f2,%f2,%f62            ! (0_0) dtmp0 = y0 * (double)y0;
 645         add     %g1,TBL,%l0             ! (2_1) (char*)TBL + si0
 646         lda     [%i1+stridex]0x82,%i5   ! (1_0) ax = *(int*)px;
 647         fsubd   %f28,%f30,%f28          ! (2_1) dtmp1 = hyp0 - h_hi0;
 648 
 649         nop
 650         add     %i1,stridex,%g5         ! px += stridex
 651         ldd     [TBL+%g1],%f42          ! (2_1) xx0 = ((double*)((char*)TBL + si0))[0];
 652         faddd   %f34,KA2,%f10           ! (1_1) dtmp2 += KA2;
 653 
 654         fmuld   %f40,%f26,%f40          ! (0_1) dtmp2 *= xx0;
 655         add     %i2,stridey,%o3         ! py += stridey
 656         and     %l6,_0x7fffffff,%l6     ! (1_0) ay &= 0x7fffffff;
 657         fand    %f20,DC0,%f30           ! (3_1) hyp0 = vis_fand(hyp0,DC0);
 658 
 659         fmuld   %f32,%f24,%f26          ! (4_2) res0 *= dbase0;
 660         and     %i5,_0x7fffffff,%i5     ! (1_0) ax &= 0x7fffffff;
 661         lda     [%o3]0x82,%f2           ! (1_0) y0 = *py;
 662         fand    %f14,DA0,%f24           ! (0_1) dbase0 = vis_fand(hyp0,DA0);
 663 
 664         faddd   %f38,%f62,%f14          ! (0_0) hyp0 += dtmp0;
 665         cmp     %l6,_0x7f800000         ! (1_0) ay ? 0x7f800000
 666         ldd     [%l1+8],%f62            ! (0_1) res0 = ((double*)((char*)arr + si0))[1];
 667         fmuld   %f28,%f42,%f32          ! (2_1) xx0 = dtmp1 * xx0;
 668 
 669         fmuld   %f10,%f36,%f10          ! (1_1) dtmp2 *= xx0;
 670         lda     [%g5]0x82,%f4           ! (1_0) x0 = *px;
 671         bge,pn  %icc,.update21          ! (1_0) if ( ay >= 0x7f800000 )
 672         for     %f30,DC1,%f28           ! (3_1) hyp0 = vis_for(hyp0,DC1);
 673 .cont21:
 674         fmul8x16        SCALE,%f24,%f24 ! (0_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
 675         cmp     %i5,_0x7f800000         ! (1_0) ax ? 0x7f800000
 676         ld      [%fp+ftmp4],%l1         ! (3_1) ibase0 = ((int*)&hyp0)[0];
 677         faddd   %f40,KA0,%f42           ! (0_1) dtmp2 += KA0
 678 
 679         add     %i3,stridez,%o1         ! pz += stridez
 680         st      %f14,[%fp+ftmp1]        ! (0_0) ibase0 = ((int*)&hyp0)[0];
 681         bge,pn  %icc,.update22          ! (1_0) if ( ax >= 0x7f800000 )
 682         fdtos   %f26,%f1                ! (4_2) ftmp0 = (float)res0;
 683 .cont22:
 684         fmuld   KA3,%f32,%f34           ! (2_1) dtmp2 = KA3 * xx0;
 685         cmp     %l6,0                   ! (1_0) ay ? 0
 686         st      %f1,[%i3]               ! (4_2) *pz = ftmp0;
 687         fand    %f28,DC2,%f30           ! (3_1) h_hi0 = vis_fand(hyp0,DC2);
 688 
 689         fsmuld  %f4,%f4,%f38            ! (1_0) hyp0 = x0 * (double)x0;
 690         sra     %l1,10,%o5              ! (3_1) ibase0 >>= 10;
 691         be,pn   %icc,.update23          ! (1_0) if ( ay == 0 )
 692         faddd   %f10,KA1,%f40           ! (1_1) dtmp2 += KA1;
 693 .cont23:
 694         fmuld   %f62,%f42,%f26          ! (0_1) res0 *= dtmp2;
 695         and     %o5,2032,%o4            ! (3_1) si0 = ibase0 & 0x7f0;
 696         lda     [%o3+stridey]0x82,%l6   ! (2_0) ay = *(int*)py;
 697         fpsub32 DA1,%f24,%f24           ! (0_1) dbase0 = vis_fpsub32(DA1,dbase0);
 698 
 699         fsmuld  %f2,%f2,%f62            ! (1_0) dtmp0 = y0 * (double)y0;
 700         add     %o4,TBL,%l7             ! (3_1) (char*)TBL + si0
 701         lda     [stridex+%g5]0x82,%i5   ! (2_0) ax = *(int*)px;
 702         fsubd   %f28,%f30,%f28          ! (3_1) dtmp1 = hyp0 - h_hi0;
 703 
 704         nop
 705         add     %g5,stridex,%i4         ! px += stridex
 706         ldd     [TBL+%o4],%f42          ! (3_1) xx0 = ((double*)((char*)TBL + si0))[0];
 707         faddd   %f34,KA2,%f10           ! (2_1) dtmp2 += KA2;
 708 
 709         fmuld   %f40,%f36,%f40          ! (1_1) dtmp2 *= xx0;
 710         and     %l6,_0x7fffffff,%l6     ! (2_0) ay &= 0x7fffffff;
 711         add     %o3,stridey,%i2         ! py += stridey
 712         fand    %f12,DC0,%f30           ! (4_1) hyp0 = vis_fand(hyp0,DC0);
 713 
 714         fmuld   %f26,%f24,%f36          ! (0_1) res0 *= dbase0;
 715         and     %i5,_0x7fffffff,%i5     ! (2_0) ax &= 0x7fffffff;
 716         lda     [%i2]0x82,%f2           ! (2_0) y0 = *py;
 717         fand    %f16,DA0,%f24           ! (1_1) dbase0 = vis_fand(hyp0,DA0);
 718 
 719         faddd   %f38,%f62,%f16          ! (1_0) hyp0 += dtmp0;
 720         cmp     %l6,_0x7f800000         ! (2_0) ay ? 0x7f800000
 721         ldd     [%o2+8],%f38            ! (1_1) res0 = ((double*)((char*)arr + si0))[1];
 722         fmuld   %f28,%f42,%f26          ! (3_1) xx0 = dtmp1 * xx0;
 723 
 724         fmuld   %f10,%f32,%f10          ! (2_1) dtmp2 *= xx0;
 725         lda     [stridex+%g5]0x82,%f4   ! (2_0) x0 = *px;
 726         bge,pn  %icc,.update24          ! (2_0) if ( ay >= 0x7f800000
 727         for     %f30,DC1,%f28           ! (4_1) hyp0 = vis_for(hyp0,DC1);
 728 .cont24:
 729         fmul8x16        SCALE,%f24,%f24 ! (1_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
 730         cmp     %i5,_0x7f800000         ! (2_0) ax ? 0x7f800000
 731         ld      [%fp+ftmp0],%i3         ! (4_1) ibase0 = ((int*)&hyp0)[0];
 732         faddd   %f40,KA0,%f62           ! (1_1) dtmp2 += KA0;
 733 
 734         add     %o1,stridez,%g1         ! pz += stridez
 735         st      %f16,[%fp+ftmp2]        ! (1_0) ibase0 = ((int*)&hyp0)[0];
 736         bge,pn  %icc,.update25          ! (2_0) if ( ax >= 0x7f800000 )
 737         fdtos   %f36,%f1                ! (0_1) ftmp0 = (float)res0;
 738 .cont25:
 739         fmuld   KA3,%f26,%f34           ! (3_1) dtmp2 = KA3 * xx0;
 740         cmp     %l6,0                   ! (2_0) ay ? 0
 741         st      %f1,[%o1]               ! (0_1) *pz = ftmp0;
 742         fand    %f28,DC2,%f30           ! (4_1) h_hi0 = vis_fand(hyp0,DC2);
 743 
 744         fsmuld  %f4,%f4,%f36            ! (2_0) hyp0 = x0 * (double)x0;
 745         sra     %i3,10,%i3              ! (4_1) ibase0 >>= 10;
 746         be,pn   %icc,.update26          ! (2_0) if ( ay == 0 )
 747         faddd   %f10,KA1,%f40           ! (2_1) dtmp2 += KA1;
 748 .cont26:
 749         fmuld   %f38,%f62,%f38          ! (1_1) res0 *= dtmp2;
 750         and     %i3,2032,%i3            ! (4_1) si0 = ibase0 & 0x7f0;
 751         lda     [%i2+stridey]0x82,%l6   ! (3_0) ay = *(int*)py;
 752         fpsub32 DA1,%f24,%f24           ! (1_1) dbase0 = vis_fpsub32(DA1,dbase0);
 753 
 754         fsmuld  %f2,%f2,%f62            ! (2_0) dtmp0 = y0 * (double)y0;
 755         add     %i3,TBL,%i3             ! (4_1) (char*)TBL + si0
 756         lda     [%i4+stridex]0x82,%i5   ! (3_0) ax = *(int*)px;
 757         fsubd   %f28,%f30,%f28          ! (4_1) dtmp1 = hyp0 - h_hi0;
 758 
 759         nop
 760         add     %i4,stridex,%o4         ! px += stridex
 761         ldd     [%i3],%f42              ! (4_1) xx0 = ((double*)((char*)TBL + si0))[0];
 762         faddd   %f34,KA2,%f10           ! (3_1) dtmp2 += KA2;
 763 
 764         fmuld   %f40,%f32,%f40          ! (2_1) dtmp2 *= xx0;
 765         add     %i2,stridey,%i2         ! py += stridey
 766         and     %l6,_0x7fffffff,%l6     ! (3_0) ay &= 0x7fffffff;
 767         fand    %f14,DC0,%f30           ! (0_0) hyp0 = vis_fand(hyp0,DC0);
 768 
 769         fmuld   %f38,%f24,%f38          ! (1_1) res0 *= dbase0;
 770         and     %i5,_0x7fffffff,%i5     ! (3_0) ax &= 0x7fffffff;
 771         lda     [%i2]0x82,%f2           ! (3_0) y0 = *py;
 772         fand    %f18,DA0,%f24           ! (2_1) dbase0 = vis_fand(hyp0,DA0);
 773 
 774         faddd   %f36,%f62,%f18          ! (2_0) hyp0 += dtmp0;
 775         cmp     %l6,_0x7f800000         ! (3_0) ay ? 0x7f800000
 776         ldd     [%l0+8],%f62            ! (2_1) res0 = ((double*)((char*)arr + si0))[1];
 777         fmuld   %f28,%f42,%f32          ! (4_1) xx0 = dtmp1 * xx0;
 778 
 779         fmuld   %f10,%f26,%f10          ! (3_1) dtmp2 *= xx0;
 780         lda     [%o4]0x82,%f4           ! (3_0) x0 = *px;
 781         bge,pn  %icc,.update27          ! (3_0) if ( ay >= 0x7f800000 )
 782         for     %f30,DC1,%f28           ! (0_0) hyp0 = vis_for(hyp0,DC1);
 783 .cont27:
 784         fmul8x16        SCALE,%f24,%f24 ! (2_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
 785         cmp     %i5,_0x7f800000         ! (3_0) ax ? 0x7f800000
 786         ld      [%fp+ftmp1],%i1         ! (0_0) ibase0 = ((int*)&hyp0)[0];
 787         faddd   %f40,KA0,%f42           ! (2_1) dtmp2 += KA0;
 788 
 789         add     %g1,stridez,%o3         ! pz += stridez
 790         st      %f18,[%fp+ftmp3]        ! (2_0) ibase0 = ((int*)&hyp0)[0];
 791         bge,pn  %icc,.update28          ! (3_0) if ( ax >= 0x7f800000 )
 792         fdtos   %f38,%f1                ! (1_1) ftmp0 = (float)res0;
 793 .cont28:
 794         fmuld   KA3,%f32,%f34           ! (4_1) dtmp2 = KA3 * xx0;
 795         cmp     %l6,0                   ! (3_0)
 796         st      %f1,[%g1]               ! (1_1) *pz = ftmp0;
 797         fand    %f28,DC2,%f30           ! (0_0) h_hi0 = vis_fand(hyp0,DC2);
 798 
 799         fsmuld  %f4,%f4,%f36            ! (3_0) hyp0 = x0 * (double)x0;
 800         sra     %i1,10,%l1              ! (0_0) ibase0 >>= 10;
 801         be,pn   %icc,.update29          ! (3_0) if ( ay == 0 )
 802         faddd   %f10,KA1,%f40           ! (3_1) dtmp2 += KA1;
 803 .cont29:
 804         fmuld   %f62,%f42,%f38          ! (2_1) res0 *= dtmp2;
 805         and     %l1,2032,%o5            ! (0_0) si0 = ibase0 & 0x7f0;
 806         lda     [%i2+stridey]0x82,%l6   ! (4_0) ay = *(int*)py;
 807         fpsub32 DA1,%f24,%f24           ! (2_1) dbase0 = vis_fpsub32(DA1,dbase0);
 808 
 809         fsmuld  %f2,%f2,%f62            ! (3_0) dtmp0 = y0 * (double)y0;
 810         add     %o5,TBL,%l1             ! (0_0) (char*)TBL + si0
 811         lda     [stridex+%o4]0x82,%i5   ! (4_0) ax = *(int*)px;
 812         fsubd   %f28,%f30,%f28          ! (0_0) dtmp1 = hyp0 - h_hi0;
 813 
 814         add     %o3,stridez,%i4         ! pz += stridez
 815         add     %o4,stridex,%l0         ! px += stridex
 816         ldd     [TBL+%o5],%f42          ! (0_0) xx0 = ((double*)((char*)TBL + si0))[0];
 817         faddd   %f34,KA2,%f10           ! (4_1) dtmp2 += KA2;
 818 
 819         fmuld   %f40,%f26,%f40          ! (3_1) dtmp2 *= xx0;
 820         add     %i2,stridey,%i2         ! py += stridey
 821         and     %l6,_0x7fffffff,%l6     ! (4_0) ay &= 0x7fffffff;
 822         fand    %f16,DC0,%f30           ! (1_0) hyp0 = vis_fand(hyp0,DC0);
 823 
 824         fmuld   %f38,%f24,%f38          ! (2_1) res0 *= dbase0;
 825         and     %i5,_0x7fffffff,%i5     ! (4_0) ax &= 0x7fffffff;
 826         lda     [%i2]0x82,%f2           ! (4_0) y0 = *py;
 827         fand    %f20,DA0,%f24           ! (3_1) dbase0 = vis_fand(hyp0,DA0);
 828 
 829         faddd   %f36,%f62,%f20          ! (3_0) hyp0 += dtmp0;
 830         cmp     %l6,_0x7f800000         ! (4_0) ay ? 0x7f800000
 831         ldd     [%l7+8],%f36            ! (3_1) res0 = ((double*)((char*)arr + si0))[1];
 832         fmuld   %f28,%f42,%f26          ! (0_0) xx0 = dtmp1 * xx0;
 833 
 834         fmuld   %f10,%f32,%f10          ! (4_1) dtmp2 *= xx0;
 835         lda     [stridex+%o4]0x82,%f4   ! (4_0) x0 = *px;
 836         bge,pn  %icc,.update30          ! (4_0) if ( ay >= 0x7f800000 )
 837         for     %f30,DC1,%f28           ! (1_0) hyp0 = vis_for(hyp0,DC1);
 838 .cont30:
 839         fmul8x16        SCALE,%f24,%f24 ! (3_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
 840         cmp     %i5,_0x7f800000         ! (4_0) ax ? 0x7f800000
 841         ld      [%fp+ftmp2],%i1         ! (1_0) ibase0 = ((int*)&hyp0)[0];
 842         faddd   %f40,KA0,%f62           ! (3_1) dtmp2 += KA0;
 843 
 844         bge,pn  %icc,.update31          ! (4_0) if ( ax >= 0x7f800000 )
 845         st      %f20,[%fp+ftmp4]        ! (3_0) ibase0 = ((int*)&hyp0)[0];
 846 .cont31:
 847         subcc   counter,5,counter       ! counter -= 5;
 848         fdtos   %f38,%f1                ! (2_1) ftmp0 = (float)res0;
 849 
 850         fmuld   KA3,%f26,%f34           ! (0_0) dtmp2 = KA3 * xx0;
 851         st      %f1,[%o3]               ! (2_1) *pz = ftmp0;
 852         bpos,pt %icc,.main_loop
 853         fand    %f28,DC2,%f30           ! (1_0) h_hi0 = vis_fand(hyp0,DC2);
 854 
 855         add     counter,5,counter
 856 
 857 .tail:
 858         subcc   counter,1,counter
 859         bneg    .begin
 860         mov     %i4,%o1
 861 
 862         sra     %i1,10,%o2              ! (1_1) ibase0 >>= 10;
 863         faddd   %f10,KA1,%f40           ! (4_2) dtmp2 += KA1;
 864 
 865         fmuld   %f36,%f62,%f36          ! (3_2) res0 *= dtmp2;
 866         and     %o2,2032,%o2            ! (1_1) si0 = ibase0 & 0x7f0;
 867         fpsub32 DA1,%f24,%f24           ! (3_2) dbase0 = vis_fpsub32(DA1,dbase0);
 868 
 869         add     %o2,TBL,%o2             ! (1_1) (char*)TBL + si0
 870         fsubd   %f28,%f30,%f28          ! (1_1) dtmp1 = hyp0 - h_hi0;
 871 
 872         ldd     [%o2],%f42              ! (1_1) xx0 = ((double*)((char*)TBL + si0))[0];
 873         faddd   %f34,KA2,%f10           ! (0_1) dtmp2 += KA2;
 874 
 875         fmuld   %f40,%f32,%f40          ! (4_2) dtmp2 *= xx0;
 876 
 877         fmuld   %f36,%f24,%f32          ! (3_2) res0 *= dbase0;
 878         fand    %f12,DA0,%f24           ! (4_2) dbase0 = vis_fand(hyp0,DA0);
 879 
 880         ldd     [%i3+8],%f62            ! (4_2) res0 = ((double*)((char*)arr + si0))[1];
 881         fmuld   %f28,%f42,%f36          ! (1_1) xx0 = dtmp1 * xx0;
 882 
 883         fmuld   %f10,%f26,%f10          ! (0_1) dtmp2 *= xx0;
 884 
 885         fmul8x16        SCALE,%f24,%f24 ! (4_2) dbase0 = vis_fmul8x16(SCALE, dbase0);
 886         faddd   %f40,KA0,%f42           ! (4_2) dtmp2 += KA0;
 887 
 888         add     %i4,stridez,%i3         ! pz += stridez
 889         fdtos   %f32,%f1                ! (3_2) ftmp0 = (float)res0;
 890 
 891         fmuld   KA3,%f36,%f34           ! (1_1) dtmp2 = KA3 * xx0;
 892         st      %f1,[%i4]               ! (3_2) *pz = ftmp0;
 893 
 894         subcc   counter,1,counter
 895         bneg    .begin
 896         mov     %i3,%o1
 897 
 898         faddd   %f10,KA1,%f40           ! (0_1) dtmp2 += KA1;
 899 
 900         fmuld   %f62,%f42,%f32          ! (4_2) res0 *= dtmp2;
 901         fpsub32 DA1,%f24,%f24           ! (4_2) dbase0 = vis_fpsub32(DA1,dbase0);
 902 
 903 
 904         faddd   %f34,KA2,%f10           ! (1_1) dtmp2 += KA2;
 905 
 906         fmuld   %f40,%f26,%f40          ! (0_1) dtmp2 *= xx0;
 907 
 908         fmuld   %f32,%f24,%f26          ! (4_2) res0 *= dbase0;
 909         fand    %f14,DA0,%f24           ! (0_1) dbase0 = vis_fand(hyp0,DA0);
 910 
 911         ldd     [%l1+8],%f62            ! (0_1) res0 = ((double*)((char*)arr + si0))[1];
 912 
 913         fmuld   %f10,%f36,%f10          ! (1_1) dtmp2 *= xx0;
 914 
 915         fmul8x16        SCALE,%f24,%f24 ! (0_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
 916         faddd   %f40,KA0,%f42           ! (0_1) dtmp2 += KA0
 917 
 918         add     %i3,stridez,%o1         ! pz += stridez
 919         fdtos   %f26,%f1                ! (4_2) ftmp0 = (float)res0;
 920 
 921         st      %f1,[%i3]               ! (4_2) *pz = ftmp0;
 922 
 923         subcc   counter,1,counter
 924         bneg    .begin
 925         nop
 926 
 927         faddd   %f10,KA1,%f40           ! (1_1) dtmp2 += KA1;
 928 
 929         fmuld   %f62,%f42,%f26          ! (0_1) res0 *= dtmp2;
 930         fpsub32 DA1,%f24,%f24           ! (0_1) dbase0 = vis_fpsub32(DA1,dbase0);
 931 
 932         fmuld   %f40,%f36,%f40          ! (1_1) dtmp2 *= xx0;
 933 
 934         fmuld   %f26,%f24,%f36          ! (0_1) res0 *= dbase0;
 935         fand    %f16,DA0,%f24           ! (1_1) dbase0 = vis_fand(hyp0,DA0);
 936 
 937         ldd     [%o2+8],%f38            ! (1_1) res0 = ((double*)((char*)arr + si0))[1];
 938 
 939         fmul8x16        SCALE,%f24,%f24 ! (1_1) dbase0 = vis_fmul8x16(SCALE, dbase0);
 940         faddd   %f40,KA0,%f62           ! (1_1) dtmp2 += KA0;
 941 
 942         add     %o1,stridez,%g1         ! pz += stridez
 943         fdtos   %f36,%f1                ! (0_1) ftmp0 = (float)res0;
 944 
 945         st      %f1,[%o1]               ! (0_1) *pz = ftmp0;
 946 
 947         subcc   counter,1,counter
 948         bneg    .begin
 949         mov     %g1,%o1
 950 
 951         fmuld   %f38,%f62,%f38          ! (1_1) res0 *= dtmp2;
 952         fpsub32 DA1,%f24,%f24           ! (1_1) dbase0 = vis_fpsub32(DA1,dbase0);
 953 
 954         fmuld   %f38,%f24,%f38          ! (1_1) res0 *= dbase0;
 955 
 956         fdtos   %f38,%f1                ! (1_1) ftmp0 = (float)res0;
 957         st      %f1,[%g1]               ! (1_1) *pz = ftmp0;
 958 
 959         ba      .begin
 960         add     %g1,stridez,%o1         ! pz += stridez
 961 
 962         .align  16
 963 .spec0:
 964         fabss   %f2,%f2                 ! fabsf(y0);
 965 
 966         fabss   %f4,%f4                 ! fabsf(x0);
 967 
 968         fcmps   %f2,%f4
 969 
 970         cmp     %l6,_0x7f800000         ! ay ? 0x7f800000
 971         be,a    1f                      ! if( ay == 0x7f800000 )
 972         st      %g0,[%o1]               ! *pz = 0.0f;
 973 
 974         cmp     %i5,_0x7f800000         ! ax ? 0x7f800000
 975         be,a    1f                      ! if( ax == 0x7f800000 )
 976         st      %g0,[%o1]               ! *pz = 0.0f;
 977 
 978         fmuls   %f2,%f4,%f2             ! fabsf(x0) * fabsf(y0);
 979         st      %f2,[%o1]               ! *pz = fabsf(x0) + fabsf(y0);
 980 1:
 981         add     %o4,stridex,%o4         ! px += stridex;
 982         add     %i2,stridey,%i2         ! py += stridey;
 983 
 984         add     %o1,stridez,%o1         ! pz += stridez;
 985         ba      .begin1
 986         sub     counter,1,counter       ! counter--;
 987 
 988         .align  16
 989 .spec1:
 990         cmp     %i5,0                   ! ax ? 0
 991         bne,pt  %icc,.cont_spec1        ! if ( ax != 0 )
 992         nop
 993 
 994         add     %o4,stridex,%o4         ! px += stridex;
 995         add     %i2,stridey,%i2         ! py += stridey;
 996 
 997         fdivs   %f7,%f9,%f2             ! 1.0f / 0.0f
 998         st      %f2,[%o1]               ! *pz = 1.0f / 0.0f;
 999 
1000         add     %o1,stridez,%o1         ! pz += stridez;
1001         ba      .begin1
1002         sub     counter,1,counter       ! counter--;
1003 
1004         .align  16
1005 .update0:
1006         cmp     counter,1
1007         ble     .cont0
1008         ld      [TBL+TBL_SHIFT+44],%f2
1009 
1010         sub     counter,1,counter
1011         st      counter,[%fp+tmp_counter]
1012 
1013         stx     %l0,[%fp+tmp_px]
1014 
1015         stx     %i2,[%fp+tmp_py]
1016         ba      .cont0
1017         mov     1,counter
1018 
1019         .align  16
1020 .update1:
1021         cmp     counter,1
1022         ble     .cont1
1023         ld      [TBL+TBL_SHIFT+44],%f4
1024 
1025         sub     counter,1,counter
1026         st      counter,[%fp+tmp_counter]
1027 
1028         stx     %l0,[%fp+tmp_px]
1029 
1030         stx     %i2,[%fp+tmp_py]
1031         ba      .cont1
1032         mov     1,counter
1033 
1034         .align  16
1035 .update2:
1036         cmp     %i5,0
1037         bne     .cont2
1038 
1039         cmp     counter,1
1040         ble     .cont2
1041         ld      [TBL+TBL_SHIFT+44],%f2
1042 
1043         sub     counter,1,counter
1044         st      counter,[%fp+tmp_counter]
1045 
1046         stx     %l0,[%fp+tmp_px]
1047 
1048         stx     %i2,[%fp+tmp_py]
1049         ba      .cont2
1050         mov     1,counter
1051 
1052         .align  16
1053 .update3:
1054         cmp     counter,2
1055         ble     .cont3
1056         ld      [TBL+TBL_SHIFT+44],%f2
1057 
1058         sub     counter,2,counter
1059         st      counter,[%fp+tmp_counter]
1060 
1061         stx     %i1,[%fp+tmp_px]
1062 
1063         stx     %i2,[%fp+tmp_py]
1064         ba      .cont3
1065         mov     2,counter
1066 
1067         .align  16
1068 .update4:
1069         cmp     counter,2
1070         ble     .cont4
1071         ld      [TBL+TBL_SHIFT+44],%f4
1072 
1073         sub     counter,2,counter
1074         st      counter,[%fp+tmp_counter]
1075 
1076         stx     %i1,[%fp+tmp_px]
1077 
1078         stx     %i2,[%fp+tmp_py]
1079         ba      .cont4
1080         mov     2,counter
1081 
1082         .align  16
1083 .update5:
1084         cmp     %i5,0
1085         bne     .cont5
1086 
1087         cmp     counter,2
1088         ble     .cont5
1089         ld      [TBL+TBL_SHIFT+44],%f2
1090 
1091         sub     counter,2,counter
1092         st      counter,[%fp+tmp_counter]
1093 
1094         stx     %i1,[%fp+tmp_px]
1095 
1096         stx     %i2,[%fp+tmp_py]
1097         ba      .cont5
1098         mov     2,counter
1099 
1100         .align  16
1101 .update6:
1102         cmp     counter,3
1103         ble     .cont6
1104         ld      [TBL+TBL_SHIFT+44],%f2
1105 
1106         sub     counter,3,counter
1107         st      counter,[%fp+tmp_counter]
1108 
1109         stx     %g5,[%fp+tmp_px]
1110 
1111         stx     %o3,[%fp+tmp_py]
1112         ba      .cont6
1113         mov     3,counter
1114 
1115         .align  16
1116 .update7:
1117         cmp     counter,3
1118         ble     .cont7
1119         ld      [TBL+TBL_SHIFT+44],%f4
1120 
1121         sub     counter,3,counter
1122         st      counter,[%fp+tmp_counter]
1123 
1124         stx     %g5,[%fp+tmp_px]
1125 
1126         stx     %o3,[%fp+tmp_py]
1127         ba      .cont7
1128         mov     3,counter
1129 
1130         .align  16
1131 .update8:
1132         cmp     %i5,0
1133         bne     .cont8
1134 
1135         cmp     counter,3
1136         ble     .cont8
1137         ld      [TBL+TBL_SHIFT+44],%f2
1138 
1139         sub     counter,3,counter
1140         st      counter,[%fp+tmp_counter]
1141 
1142         stx     %g5,[%fp+tmp_px]
1143 
1144         stx     %o3,[%fp+tmp_py]
1145         ba      .cont8
1146         mov     3,counter
1147 
1148         .align  16
1149 .update9:
1150         cmp     counter,4
1151         ble     .cont9
1152         ld      [TBL+TBL_SHIFT+44],%f2
1153 
1154         sub     counter,4,counter
1155         st      counter,[%fp+tmp_counter]
1156 
1157         stx     %i4,[%fp+tmp_px]
1158 
1159         stx     %i2,[%fp+tmp_py]
1160         ba      .cont9
1161         mov     4,counter
1162 
1163         .align  16
1164 .update10:
1165         cmp     counter,4
1166         ble     .cont10
1167         ld      [TBL+TBL_SHIFT+44],%f4
1168 
1169         sub     counter,4,counter
1170         st      counter,[%fp+tmp_counter]
1171 
1172         stx     %i4,[%fp+tmp_px]
1173 
1174         stx     %i2,[%fp+tmp_py]
1175         ba      .cont10
1176         mov     4,counter
1177 
1178         .align  16
1179 .update11:
1180         cmp     %i5,0
1181         bne     .cont11
1182 
1183         cmp     counter,4
1184         ble     .cont11
1185         ld      [TBL+TBL_SHIFT+44],%f2
1186 
1187         sub     counter,4,counter
1188         st      counter,[%fp+tmp_counter]
1189 
1190         stx     %i4,[%fp+tmp_px]
1191 
1192         stx     %i2,[%fp+tmp_py]
1193         ba      .cont11
1194         mov     4,counter
1195 
1196         .align  16
1197 .update12:
1198         cmp     counter,5
1199         ble     .cont12
1200         ld      [TBL+TBL_SHIFT+44],%f2
1201 
1202         sub     counter,5,counter
1203         st      counter,[%fp+tmp_counter]
1204 
1205         stx     %o4,[%fp+tmp_px]
1206 
1207         stx     %i2,[%fp+tmp_py]
1208         ba      .cont12
1209         mov     5,counter
1210 
1211         .align  16
1212 .update13:
1213         cmp     counter,5
1214         ble     .cont13
1215         ld      [TBL+TBL_SHIFT+44],%f4
1216 
1217         sub     counter,5,counter
1218         st      counter,[%fp+tmp_counter]
1219 
1220         stx     %o4,[%fp+tmp_px]
1221 
1222         stx     %i2,[%fp+tmp_py]
1223         ba      .cont13
1224         mov     5,counter
1225 
1226         .align  16
1227 .update14:
1228         cmp     %i5,0
1229         bne     .cont14
1230 
1231         cmp     counter,5
1232         ble     .cont14
1233         ld      [TBL+TBL_SHIFT+44],%f2
1234 
1235         sub     counter,5,counter
1236         st      counter,[%fp+tmp_counter]
1237 
1238         stx     %o4,[%fp+tmp_px]
1239 
1240         stx     %i2,[%fp+tmp_py]
1241         ba      .cont14
1242         mov     5,counter
1243 
1244         .align  16
1245 .update15:
1246         cmp     counter,6
1247         ble     .cont15
1248         ld      [TBL+TBL_SHIFT+44],%f2
1249 
1250         sub     counter,6,counter
1251         st      counter,[%fp+tmp_counter]
1252 
1253         stx     %l0,[%fp+tmp_px]
1254 
1255         stx     %i2,[%fp+tmp_py]
1256         ba      .cont15
1257         mov     6,counter
1258 
1259         .align  16
1260 .update16:
1261         cmp     counter,6
1262         ble     .cont16
1263         ld      [TBL+TBL_SHIFT+44],%f4
1264 
1265         sub     counter,6,counter
1266         st      counter,[%fp+tmp_counter]
1267 
1268         stx     %l0,[%fp+tmp_px]
1269 
1270         stx     %i2,[%fp+tmp_py]
1271         ba      .cont16
1272         mov     6,counter
1273 
1274         .align  16
1275 .update17:
1276         cmp     %i5,0
1277         bne     .cont17
1278 
1279         cmp     counter,1
1280         ble     .cont17
1281         fmovd   DC1,%f62
1282 
1283         sub     counter,1,counter
1284         st      counter,[%fp+tmp_counter]
1285 
1286         stx     %l0,[%fp+tmp_px]
1287 
1288         stx     %i2,[%fp+tmp_py]
1289         ba      .cont17
1290         mov     1,counter
1291 
1292         .align  16
1293 .update18:
1294         cmp     counter,2
1295         ble     .cont18
1296         ld      [TBL+TBL_SHIFT+44],%f2
1297 
1298         sub     counter,2,counter
1299         st      counter,[%fp+tmp_counter]
1300 
1301         stx     %i1,[%fp+tmp_px]
1302 
1303         stx     %i2,[%fp+tmp_py]
1304         ba      .cont18
1305         mov     2,counter
1306 
1307         .align  16
1308 .update19:
1309         cmp     counter,2
1310         ble     .cont19
1311         ld      [TBL+TBL_SHIFT+44],%f4
1312 
1313         sub     counter,2,counter
1314         st      counter,[%fp+tmp_counter]
1315 
1316         stx     %i1,[%fp+tmp_px]
1317 
1318         stx     %i2,[%fp+tmp_py]
1319         ba      .cont19
1320         mov     2,counter
1321 
1322         .align  16
1323 .update20:
1324         cmp     %o1,0
1325         bne     .cont20
1326 
1327         cmp     counter,2
1328         ble     .cont20
1329         ld      [TBL+TBL_SHIFT+44],%f2
1330 
1331         sub     counter,2,counter
1332         st      counter,[%fp+tmp_counter]
1333 
1334         stx     %i1,[%fp+tmp_px]
1335 
1336         stx     %i2,[%fp+tmp_py]
1337         ba      .cont20
1338         mov     2,counter
1339 
1340         .align  16
1341 .update21:
1342         cmp     counter,3
1343         ble     .cont21
1344         ld      [TBL+TBL_SHIFT+44],%f2
1345 
1346         sub     counter,3,counter
1347         st      counter,[%fp+tmp_counter]
1348 
1349         stx     %g5,[%fp+tmp_px]
1350 
1351         stx     %o3,[%fp+tmp_py]
1352         ba      .cont21
1353         mov     3,counter
1354 
1355         .align  16
1356 .update22:
1357         cmp     counter,3
1358         ble     .cont22
1359         ld      [TBL+TBL_SHIFT+44],%f4
1360 
1361         sub     counter,3,counter
1362         st      counter,[%fp+tmp_counter]
1363 
1364         stx     %g5,[%fp+tmp_px]
1365 
1366         stx     %o3,[%fp+tmp_py]
1367         ba      .cont22
1368         mov     3,counter
1369 
1370         .align  16
1371 .update23:
1372         cmp     %i5,0
1373         bne     .cont23
1374 
1375         cmp     counter,3
1376         ble     .cont23
1377         ld      [TBL+TBL_SHIFT+44],%f2
1378 
1379         sub     counter,3,counter
1380         st      counter,[%fp+tmp_counter]
1381 
1382         stx     %g5,[%fp+tmp_px]
1383 
1384         stx     %o3,[%fp+tmp_py]
1385         ba      .cont23
1386         mov     3,counter
1387 
1388         .align  16
1389 .update24:
1390         cmp     counter,4
1391         ble     .cont24
1392         ld      [TBL+TBL_SHIFT+44],%f2
1393 
1394         sub     counter,4,counter
1395         st      counter,[%fp+tmp_counter]
1396 
1397         stx     %i4,[%fp+tmp_px]
1398 
1399         stx     %i2,[%fp+tmp_py]
1400         ba      .cont24
1401         mov     4,counter
1402 
1403         .align  16
1404 .update25:
1405         cmp     counter,4
1406         ble     .cont25
1407         ld      [TBL+TBL_SHIFT+44],%f4
1408 
1409         sub     counter,4,counter
1410         st      counter,[%fp+tmp_counter]
1411 
1412         stx     %i4,[%fp+tmp_px]
1413 
1414         stx     %i2,[%fp+tmp_py]
1415         ba      .cont25
1416         mov     4,counter
1417 
1418         .align  16
1419 .update26:
1420         cmp     %i5,0
1421         bne     .cont26
1422 
1423         cmp     counter,4
1424         ble     .cont26
1425         ld      [TBL+TBL_SHIFT+44],%f2
1426 
1427         sub     counter,4,counter
1428         st      counter,[%fp+tmp_counter]
1429 
1430         stx     %i4,[%fp+tmp_px]
1431 
1432         stx     %i2,[%fp+tmp_py]
1433         ba      .cont26
1434         mov     4,counter
1435 
1436         .align  16
1437 .update27:
1438         cmp     counter,5
1439         ble     .cont27
1440         ld      [TBL+TBL_SHIFT+44],%f2
1441 
1442         sub     counter,5,counter
1443         st      counter,[%fp+tmp_counter]
1444 
1445         stx     %o4,[%fp+tmp_px]
1446 
1447         stx     %i2,[%fp+tmp_py]
1448         ba      .cont27
1449         mov     5,counter
1450 
1451         .align  16
1452 .update28:
1453         cmp     counter,5
1454         ble     .cont28
1455         ld      [TBL+TBL_SHIFT+44],%f4
1456 
1457         sub     counter,5,counter
1458         st      counter,[%fp+tmp_counter]
1459 
1460         stx     %o4,[%fp+tmp_px]
1461 
1462         stx     %i2,[%fp+tmp_py]
1463         ba      .cont28
1464         mov     5,counter
1465 
1466         .align  16
1467 .update29:
1468         cmp     %i5,0
1469         bne     .cont29
1470 
1471         cmp     counter,5
1472         ble     .cont29
1473         ld      [TBL+TBL_SHIFT+44],%f2
1474 
1475         sub     counter,5,counter
1476         st      counter,[%fp+tmp_counter]
1477 
1478         stx     %o4,[%fp+tmp_px]
1479 
1480         stx     %i2,[%fp+tmp_py]
1481         ba      .cont29
1482         mov     5,counter
1483 
1484         .align  16
1485 .update30:
1486         cmp     counter,6
1487         ble     .cont30
1488         ld      [TBL+TBL_SHIFT+44],%f2
1489 
1490         sub     counter,6,counter
1491         st      counter,[%fp+tmp_counter]
1492 
1493         stx     %l0,[%fp+tmp_px]
1494 
1495         stx     %i2,[%fp+tmp_py]
1496         ba      .cont30
1497         mov     6,counter
1498 
1499         .align  16
1500 .update31:
1501         cmp     counter,6
1502         ble     .cont31
1503         ld      [TBL+TBL_SHIFT+44],%f4
1504 
1505         sub     counter,6,counter
1506         st      counter,[%fp+tmp_counter]
1507 
1508         stx     %l0,[%fp+tmp_px]
1509 
1510         stx     %i2,[%fp+tmp_py]
1511         ba      .cont31
1512         mov     6,counter
1513 
1514         .align  16
1515 .exit:
1516         ret
1517         restore
1518         SET_SIZE(__vrhypotf)
1519