1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24 /* 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "__vrhypotf.S" 30 31 #include "libm.h" 32 33 RO_DATA 34 .align 64 35 .CONST_TBL: 36 ! i = [0,63] 37 ! TBL[2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46))); 38 ! TBL[2*i+1] = (double)(0.5/sqrtl(2) / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46)))); 39 ! TBL[128+2*i+0] = 1.0 / (*(double*)&(0x3ff0000000000000LL + (i << 46))); 40 ! TBL[128+2*i+1] = (double)(0.25 / sqrtl(*(double*)&(0x3ff0000000000000LL + (i << 46)))); 41 42 .word 0x3ff00000, 0x00000000, 0x3fd6a09e, 0x667f3bcd, 43 .word 0x3fef81f8, 0x1f81f820, 0x3fd673e3, 0x2ef63a03, 44 .word 0x3fef07c1, 0xf07c1f08, 0x3fd6482d, 0x37a5a3d2, 45 .word 0x3fee9131, 0xabf0b767, 0x3fd61d72, 0xb7978671, 46 .word 0x3fee1e1e, 0x1e1e1e1e, 0x3fd5f3aa, 0x673fa911, 47 .word 0x3fedae60, 0x76b981db, 0x3fd5cacb, 0x7802f342, 48 .word 0x3fed41d4, 0x1d41d41d, 0x3fd5a2cd, 0x8c69d61a, 49 .word 0x3fecd856, 0x89039b0b, 0x3fd57ba8, 0xb0ee01b9, 50 .word 0x3fec71c7, 0x1c71c71c, 0x3fd55555, 0x55555555, 51 .word 0x3fec0e07, 0x0381c0e0, 0x3fd52fcc, 0x468d6b54, 52 .word 0x3febacf9, 0x14c1bad0, 0x3fd50b06, 0xa8fc6b70, 53 .word 0x3feb4e81, 0xb4e81b4f, 0x3fd4e6fd, 0xf33cf032, 54 .word 0x3feaf286, 0xbca1af28, 0x3fd4c3ab, 0xe93bcf74, 55 .word 0x3fea98ef, 0x606a63be, 0x3fd4a10a, 0x97af7b92, 56 .word 0x3fea41a4, 0x1a41a41a, 0x3fd47f14, 0x4fe17f9f, 57 .word 0x3fe9ec8e, 0x951033d9, 0x3fd45dc3, 0xa3c34fa3, 58 .word 0x3fe99999, 0x9999999a, 0x3fd43d13, 0x6248490f, 59 .word 0x3fe948b0, 0xfcd6e9e0, 0x3fd41cfe, 0x93ff5199, 60 .word 0x3fe8f9c1, 0x8f9c18fa, 0x3fd3fd80, 0x77e70577, 61 .word 0x3fe8acb9, 0x0f6bf3aa, 0x3fd3de94, 0x8077db58, 62 .word 0x3fe86186, 0x18618618, 0x3fd3c036, 0x50e00e03, 63 .word 0x3fe81818, 0x18181818, 0x3fd3a261, 0xba6d7a37, 64 .word 0x3fe7d05f, 0x417d05f4, 0x3fd38512, 0xba21f51e, 65 .word 0x3fe78a4c, 0x8178a4c8, 0x3fd36845, 0x766eec92, 66 .word 0x3fe745d1, 0x745d1746, 0x3fd34bf6, 0x3d156826, 67 .word 0x3fe702e0, 0x5c0b8170, 0x3fd33021, 0x8127c0e0, 68 .word 0x3fe6c16c, 0x16c16c17, 0x3fd314c3, 0xd92a9e91, 69 .word 0x3fe68168, 0x16816817, 0x3fd2f9d9, 0xfd52fd50, 70 .word 0x3fe642c8, 0x590b2164, 0x3fd2df60, 0xc5df2c9e, 71 .word 0x3fe60581, 0x60581606, 0x3fd2c555, 0x2988e428, 72 .word 0x3fe5c988, 0x2b931057, 0x3fd2abb4, 0x3c0eb0f4, 73 .word 0x3fe58ed2, 0x308158ed, 0x3fd2927b, 0x2cd320f5, 74 .word 0x3fe55555, 0x55555555, 0x3fd279a7, 0x4590331c, 75 .word 0x3fe51d07, 0xeae2f815, 0x3fd26135, 0xe91daf55, 76 .word 0x3fe4e5e0, 0xa72f0539, 0x3fd24924, 0x92492492, 77 .word 0x3fe4afd6, 0xa052bf5b, 0x3fd23170, 0xd2be638a, 78 .word 0x3fe47ae1, 0x47ae147b, 0x3fd21a18, 0x51ff630a, 79 .word 0x3fe446f8, 0x6562d9fb, 0x3fd20318, 0xcc6a8f5d, 80 .word 0x3fe41414, 0x14141414, 0x3fd1ec70, 0x124e98f9, 81 .word 0x3fe3e22c, 0xbce4a902, 0x3fd1d61c, 0x070ae7d3, 82 .word 0x3fe3b13b, 0x13b13b14, 0x3fd1c01a, 0xa03be896, 83 .word 0x3fe38138, 0x13813814, 0x3fd1aa69, 0xe4f2777f, 84 .word 0x3fe3521c, 0xfb2b78c1, 0x3fd19507, 0xecf5b9e9, 85 .word 0x3fe323e3, 0x4a2b10bf, 0x3fd17ff2, 0xe00ec3ee, 86 .word 0x3fe2f684, 0xbda12f68, 0x3fd16b28, 0xf55d72d4, 87 .word 0x3fe2c9fb, 0x4d812ca0, 0x3fd156a8, 0x72b5ef62, 88 .word 0x3fe29e41, 0x29e4129e, 0x3fd1426f, 0xac0654db, 89 .word 0x3fe27350, 0xb8812735, 0x3fd12e7d, 0x02c40253, 90 .word 0x3fe24924, 0x92492492, 0x3fd11ace, 0xe560242a, 91 .word 0x3fe21fb7, 0x8121fb78, 0x3fd10763, 0xcec30b26, 92 .word 0x3fe1f704, 0x7dc11f70, 0x3fd0f43a, 0x45cdedad, 93 .word 0x3fe1cf06, 0xada2811d, 0x3fd0e150, 0xdce2b60c, 94 .word 0x3fe1a7b9, 0x611a7b96, 0x3fd0cea6, 0x317186dc, 95 .word 0x3fe18118, 0x11811812, 0x3fd0bc38, 0xeb8ba412, 96 .word 0x3fe15b1e, 0x5f75270d, 0x3fd0aa07, 0xbd7b7488, 97 .word 0x3fe135c8, 0x1135c811, 0x3fd09811, 0x63615499, 98 .word 0x3fe11111, 0x11111111, 0x3fd08654, 0xa2d4f6db, 99 .word 0x3fe0ecf5, 0x6be69c90, 0x3fd074d0, 0x4a8b1438, 100 .word 0x3fe0c971, 0x4fbcda3b, 0x3fd06383, 0x31ff307a, 101 .word 0x3fe0a681, 0x0a6810a7, 0x3fd0526c, 0x39213bfa, 102 .word 0x3fe08421, 0x08421084, 0x3fd0418a, 0x4806de7d, 103 .word 0x3fe0624d, 0xd2f1a9fc, 0x3fd030dc, 0x4ea03a72, 104 .word 0x3fe04104, 0x10410410, 0x3fd02061, 0x446ffa9a, 105 .word 0x3fe02040, 0x81020408, 0x3fd01018, 0x28467ee9, 106 .word 0x3ff00000, 0x00000000, 0x3fd00000, 0x00000000, 107 .word 0x3fef81f8, 0x1f81f820, 0x3fcfc0bd, 0x88a0f1d9, 108 .word 0x3fef07c1, 0xf07c1f08, 0x3fcf82ec, 0x882c0f9b, 109 .word 0x3fee9131, 0xabf0b767, 0x3fcf467f, 0x2814b0cc, 110 .word 0x3fee1e1e, 0x1e1e1e1e, 0x3fcf0b68, 0x48d2af1c, 111 .word 0x3fedae60, 0x76b981db, 0x3fced19b, 0x75e78957, 112 .word 0x3fed41d4, 0x1d41d41d, 0x3fce990c, 0xdad55ed2, 113 .word 0x3fecd856, 0x89039b0b, 0x3fce61b1, 0x38f18adc, 114 .word 0x3fec71c7, 0x1c71c71c, 0x3fce2b7d, 0xddfefa66, 115 .word 0x3fec0e07, 0x0381c0e0, 0x3fcdf668, 0x9b7e6350, 116 .word 0x3febacf9, 0x14c1bad0, 0x3fcdc267, 0xbea45549, 117 .word 0x3feb4e81, 0xb4e81b4f, 0x3fcd8f72, 0x08e6b82d, 118 .word 0x3feaf286, 0xbca1af28, 0x3fcd5d7e, 0xa914b937, 119 .word 0x3fea98ef, 0x606a63be, 0x3fcd2c85, 0x34ed6d86, 120 .word 0x3fea41a4, 0x1a41a41a, 0x3fccfc7d, 0xa32a9213, 121 .word 0x3fe9ec8e, 0x951033d9, 0x3fcccd60, 0x45f5d358, 122 .word 0x3fe99999, 0x9999999a, 0x3fcc9f25, 0xc5bfedd9, 123 .word 0x3fe948b0, 0xfcd6e9e0, 0x3fcc71c7, 0x1c71c71c, 124 .word 0x3fe8f9c1, 0x8f9c18fa, 0x3fcc453d, 0x90f057a2, 125 .word 0x3fe8acb9, 0x0f6bf3aa, 0x3fcc1982, 0xb2ece47b, 126 .word 0x3fe86186, 0x18618618, 0x3fcbee90, 0x56fb9c39, 127 .word 0x3fe81818, 0x18181818, 0x3fcbc460, 0x92eb3118, 128 .word 0x3fe7d05f, 0x417d05f4, 0x3fcb9aed, 0xba588347, 129 .word 0x3fe78a4c, 0x8178a4c8, 0x3fcb7232, 0x5b79db11, 130 .word 0x3fe745d1, 0x745d1746, 0x3fcb4a29, 0x3c1d9550, 131 .word 0x3fe702e0, 0x5c0b8170, 0x3fcb22cd, 0x56d87d7e, 132 .word 0x3fe6c16c, 0x16c16c17, 0x3fcafc19, 0xd8606169, 133 .word 0x3fe68168, 0x16816817, 0x3fcad60a, 0x1d0fb394, 134 .word 0x3fe642c8, 0x590b2164, 0x3fcab099, 0xae8f539a, 135 .word 0x3fe60581, 0x60581606, 0x3fca8bc4, 0x41a3d02c, 136 .word 0x3fe5c988, 0x2b931057, 0x3fca6785, 0xb41bacf7, 137 .word 0x3fe58ed2, 0x308158ed, 0x3fca43da, 0x0adc6899, 138 .word 0x3fe55555, 0x55555555, 0x3fca20bd, 0x700c2c3e, 139 .word 0x3fe51d07, 0xeae2f815, 0x3fc9fe2c, 0x315637ee, 140 .word 0x3fe4e5e0, 0xa72f0539, 0x3fc9dc22, 0xbe484458, 141 .word 0x3fe4afd6, 0xa052bf5b, 0x3fc9ba9d, 0xa6c73588, 142 .word 0x3fe47ae1, 0x47ae147b, 0x3fc99999, 0x9999999a, 143 .word 0x3fe446f8, 0x6562d9fb, 0x3fc97913, 0x63068b54, 144 .word 0x3fe41414, 0x14141414, 0x3fc95907, 0xeb87ab44, 145 .word 0x3fe3e22c, 0xbce4a902, 0x3fc93974, 0x368cfa31, 146 .word 0x3fe3b13b, 0x13b13b14, 0x3fc91a55, 0x6151761c, 147 .word 0x3fe38138, 0x13813814, 0x3fc8fba8, 0xa1bf6f96, 148 .word 0x3fe3521c, 0xfb2b78c1, 0x3fc8dd6b, 0x4563a009, 149 .word 0x3fe323e3, 0x4a2b10bf, 0x3fc8bf9a, 0xb06e1af3, 150 .word 0x3fe2f684, 0xbda12f68, 0x3fc8a234, 0x5cc04426, 151 .word 0x3fe2c9fb, 0x4d812ca0, 0x3fc88535, 0xd90703c6, 152 .word 0x3fe29e41, 0x29e4129e, 0x3fc8689c, 0xc7e07e7d, 153 .word 0x3fe27350, 0xb8812735, 0x3fc84c66, 0xdf0ca4c2, 154 .word 0x3fe24924, 0x92492492, 0x3fc83091, 0xe6a7f7e7, 155 .word 0x3fe21fb7, 0x8121fb78, 0x3fc8151b, 0xb86fee1d, 156 .word 0x3fe1f704, 0x7dc11f70, 0x3fc7fa02, 0x3f1068d1, 157 .word 0x3fe1cf06, 0xada2811d, 0x3fc7df43, 0x7579b9b5, 158 .word 0x3fe1a7b9, 0x611a7b96, 0x3fc7c4dd, 0x663ebb88, 159 .word 0x3fe18118, 0x11811812, 0x3fc7aace, 0x2afa8b72, 160 .word 0x3fe15b1e, 0x5f75270d, 0x3fc79113, 0xebbd7729, 161 .word 0x3fe135c8, 0x1135c811, 0x3fc777ac, 0xde80baea, 162 .word 0x3fe11111, 0x11111111, 0x3fc75e97, 0x46a0b098, 163 .word 0x3fe0ecf5, 0x6be69c90, 0x3fc745d1, 0x745d1746, 164 .word 0x3fe0c971, 0x4fbcda3b, 0x3fc72d59, 0xc45f1fc5, 165 .word 0x3fe0a681, 0x0a6810a7, 0x3fc7152e, 0x9f44f01f, 166 .word 0x3fe08421, 0x08421084, 0x3fc6fd4e, 0x79325467, 167 .word 0x3fe0624d, 0xd2f1a9fc, 0x3fc6e5b7, 0xd16657e1, 168 .word 0x3fe04104, 0x10410410, 0x3fc6ce69, 0x31d5858d, 169 .word 0x3fe02040, 0x81020408, 0x3fc6b761, 0x2ec892f6, 170 171 .word 0x000fffff, 0xffffffff ! DC0 172 .word 0x3ff00000, 0 ! DC1 173 .word 0x7fffc000, 0 ! DC2 174 .word 0x7fe00000, 0 ! DA0 175 .word 0x60000000, 0 ! DA1 176 .word 0x80808080, 0x3f800000 ! SCALE , FONE = 1.0f 177 .word 0x3fefffff, 0xfee7f18f ! KA0 = 9.99999997962321453275e-01 178 .word 0xbfdfffff, 0xfe07e52f ! KA1 = -4.99999998166077580600e-01 179 .word 0x3fd80118, 0x0ca296d9 ! KA2 = 3.75066768969515586277e-01 180 .word 0xbfd400fc, 0x0bbb8e78 ! KA3 = -3.12560092408808548438e-01 181 182 #define _0x7f800000 %o0 183 #define _0x7fffffff %o7 184 #define TBL %l2 185 186 #define TBL_SHIFT 2048 187 188 #define stridex %l3 189 #define stridey %l4 190 #define stridez %l5 191 #define counter %i0 192 193 #define DA0 %f52 194 #define DA1 %f44 195 #define SCALE %f6 196 197 #define DC0 %f46 198 #define DC1 %f8 199 #define FZERO %f9 200 #define DC2 %f50 201 202 #define KA3 %f56 203 #define KA2 %f58 204 #define KA1 %f60 205 #define KA0 %f54 206 207 #define tmp_counter STACK_BIAS-0x04 208 #define tmp_px STACK_BIAS-0x20 209 #define tmp_py STACK_BIAS-0x18 210 211 #define ftmp0 STACK_BIAS-0x10 212 #define ftmp1 STACK_BIAS-0x0c 213 #define ftmp2 STACK_BIAS-0x10 214 #define ftmp3 STACK_BIAS-0x0c 215 #define ftmp4 STACK_BIAS-0x08 216 217 ! sizeof temp storage - must be a multiple of 16 for V9 218 #define tmps 0x20 219 220 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 221 ! !!!!! algorithm !!!!! 222 ! x0 = *px; 223 ! ax = *(int*)px; 224 ! 225 ! y0 = *py; 226 ! ay = *(int*)py; 227 ! 228 ! ax &= 0x7fffffff; 229 ! ay &= 0x7fffffff; 230 ! 231 ! px += stridex; 232 ! py += stridey; 233 ! 234 ! if ( ax >= 0x7f800000 || ay >= 0x7f800000 ) 235 ! { 236 ! *pz = fabsf(x0) * fabsf(y0); 237 ! if( ax == 0x7f800000 ) *pz = 0.0f; 238 ! else if( ay == 0x7f800000 ) *pz = 0.0f; 239 ! pz += stridez; 240 ! continue; 241 ! } 242 ! 243 ! if ( ay == 0 ) 244 ! { 245 ! if ( ax == 0 ) 246 ! { 247 ! *pz = 1.0f / 0.0f; 248 ! pz += stridez; 249 ! continue; 250 ! } 251 ! } 252 ! 253 ! hyp0 = x0 * (double)x0; 254 ! dtmp0 = y0 * (double)y0; 255 ! hyp0 += dtmp0; 256 ! 257 ! ibase0 = ((int*)&hyp0)[0]; 258 ! 259 ! dbase0 = vis_fand(hyp0,DA0); 260 ! dbase0 = vis_fmul8x16(SCALE, dbase0); 261 ! dbase0 = vis_fpsub32(DA1,dbase0); 262 ! 263 ! hyp0 = vis_fand(hyp0,DC0); 264 ! hyp0 = vis_for(hyp0,DC1); 265 ! h_hi0 = vis_fand(hyp0,DC2); 266 ! 267 ! ibase0 >>= 10; 268 ! si0 = ibase0 & 0x7f0; 269 ! xx0 = ((double*)((char*)TBL + si0))[0]; 270 ! 271 ! dtmp1 = hyp0 - h_hi0; 272 ! xx0 = dtmp1 * xx0; 273 ! res0 = ((double*)((char*)arr + si0))[1]; 274 ! dtmp2 = KA3 * xx0; 275 ! dtmp2 += KA2; 276 ! dtmp2 *= xx0; 277 ! dtmp2 += KA1; 278 ! dtmp2 *= xx0; 279 ! dtmp2 += KA0; 280 ! res0 *= dtmp2; 281 ! res0 *= dbase0; 282 ! ftmp0 = (float)res0; 283 ! *pz = ftmp0; 284 ! pz += stridez; 285 !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 286 287 ENTRY(__vrhypotf) 288 save %sp,-SA(MINFRAME)-tmps,%sp 289 PIC_SETUP(l7) 290 PIC_SET(l7,.CONST_TBL,l2) 291 wr %g0,0x82,%asi 292 293 #ifdef __sparcv9 294 ldx [%fp+STACK_BIAS+176],stridez 295 #else 296 ld [%fp+STACK_BIAS+92],stridez 297 #endif 298 299 stx %i1,[%fp+tmp_px] 300 sll %i2,2,stridex 301 302 stx %i3,[%fp+tmp_py] 303 sll %i4,2,stridey 304 305 st %i0,[%fp+tmp_counter] 306 sll stridez,2,stridez 307 mov %i5,%o1 308 309 ldd [TBL+TBL_SHIFT],DC0 310 ldd [TBL+TBL_SHIFT+8],DC1 311 ldd [TBL+TBL_SHIFT+16],DC2 312 ldd [TBL+TBL_SHIFT+24],DA0 313 ldd [TBL+TBL_SHIFT+32],DA1 314 ldd [TBL+TBL_SHIFT+40],SCALE 315 ldd [TBL+TBL_SHIFT+48],KA0 316 317 ldd [TBL+TBL_SHIFT+56],KA1 318 sethi %hi(0x7f800000),%o0 319 320 ldd [TBL+TBL_SHIFT+64],KA2 321 sethi %hi(0x7ffffc00),%o7 322 323 ldd [TBL+TBL_SHIFT+72],KA3 324 add %o7,1023,%o7 325 326 .begin: 327 ld [%fp+tmp_counter],counter 328 ldx [%fp+tmp_px],%o4 329 ldx [%fp+tmp_py],%i2 330 st %g0,[%fp+tmp_counter] 331 .begin1: 332 cmp counter,0 333 ble,pn %icc,.exit 334 nop 335 336 lda [%i2]0x82,%l6 ! (3_0) ay = *(int*)py; 337 338 lda [%o4]0x82,%i5 ! (3_0) ax = *(int*)px; 339 340 lda [%i2]0x82,%f2 ! (3_0) y0 = *py; 341 and %l6,_0x7fffffff,%l6 ! (3_0) ay &= 0x7fffffff; 342 343 and %i5,_0x7fffffff,%i5 ! (3_0) ax &= 0x7fffffff; 344 cmp %l6,_0x7f800000 ! (3_0) ay ? 0x7f800000 345 bge,pn %icc,.spec0 ! (3_0) if ( ay >= 0x7f800000 ) 346 lda [%o4]0x82,%f4 ! (3_0) x0 = *px; 347 348 cmp %i5,_0x7f800000 ! (3_0) ax ? 0x7f800000 349 bge,pn %icc,.spec0 ! (3_0) if ( ax >= 0x7f800000 ) 350 nop 351 352 cmp %l6,0 ! (3_0) 353 be,pn %icc,.spec1 ! (3_0) if ( ay == 0 ) 354 fsmuld %f4,%f4,%f36 ! (3_0) hyp0 = x0 * (double)x0; 355 .cont_spec1: 356 lda [%i2+stridey]0x82,%l6 ! (4_0) ay = *(int*)py; 357 358 fsmuld %f2,%f2,%f62 ! (3_0) dtmp0 = y0 * (double)y0; 359 lda [stridex+%o4]0x82,%i5 ! (4_0) ax = *(int*)px; 360 361 add %o4,stridex,%l0 ! px += stridex 362 363 add %i2,stridey,%i2 ! py += stridey 364 and %l6,_0x7fffffff,%l6 ! (4_0) ay &= 0x7fffffff; 365 366 and %i5,_0x7fffffff,%i5 ! (4_0) ax &= 0x7fffffff; 367 lda [%i2]0x82,%f2 ! (4_0) y0 = *py; 368 369 faddd %f36,%f62,%f20 ! (3_0) hyp0 += dtmp0; 370 cmp %l6,_0x7f800000 ! (4_0) ay ? 0x7f800000 371 372 bge,pn %icc,.update0 ! (4_0) if ( ay >= 0x7f800000 ) 373 lda [stridex+%o4]0x82,%f4 ! (4_0) x0 = *px; 374 .cont0: 375 cmp %i5,_0x7f800000 ! (4_0) ax ? 0x7f800000 376 bge,pn %icc,.update1 ! (4_0) if ( ax >= 0x7f800000 ) 377 st %f20,[%fp+ftmp4] ! (3_0) ibase0 = ((int*)&hyp0)[0]; 378 .cont1: 379 cmp %l6,0 ! (4_1) ay ? 0 380 be,pn %icc,.update2 ! (4_1) if ( ay == 0 ) 381 fsmuld %f4,%f4,%f38 ! (4_1) hyp0 = x0 * (double)x0; 382 .cont2: 383 lda [%i2+stridey]0x82,%l6 ! (0_0) ay = *(int*)py; 384 385 fsmuld %f2,%f2,%f62 ! (4_1) dtmp0 = y0 * (double)y0; 386 lda [%l0+stridex]0x82,%i5 ! (0_0) ax = *(int*)px; 387 388 add %l0,stridex,%i1 ! px += stridex 389 390 add %i2,stridey,%i2 ! py += stridey 391 and %l6,_0x7fffffff,%l6 ! (0_0) ay &= 0x7fffffff; 392 393 and %i5,_0x7fffffff,%i5 ! (0_0) ax &= 0x7fffffff; 394 lda [%i2]0x82,%f2 ! (0_0) y0 = *py; 395 396 cmp %l6,_0x7f800000 ! (0_0) ay ? 0x7f800000 397 bge,pn %icc,.update3 ! (0_0) if ( ay >= 0x7f800000 ) 398 faddd %f38,%f62,%f12 ! (4_1) hyp0 += dtmp0; 399 .cont3: 400 lda [%i1]0x82,%f4 ! (0_0) x0 = *px; 401 402 cmp %i5,_0x7f800000 ! (0_0) ax ? 0x7f800000 403 bge,pn %icc,.update4 ! (0_0) if ( ax >= 0x7f800000 ) 404 st %f12,[%fp+ftmp0] ! (4_1) ibase0 = ((int*)&hyp0)[0]; 405 .cont4: 406 cmp %l6,0 ! (0_0) ay ? 0 407 be,pn %icc,.update5 ! (0_0) if ( ay == 0 ) 408 fsmuld %f4,%f4,%f38 ! (0_0) hyp0 = x0 * (double)x0; 409 .cont5: 410 lda [%i2+stridey]0x82,%l6 ! (1_0) ay = *(int*)py; 411 412 fsmuld %f2,%f2,%f62 ! (0_0) dtmp0 = y0 * (double)y0; 413 lda [%i1+stridex]0x82,%i5 ! (1_0) ax = *(int*)px; 414 415 add %i1,stridex,%g5 ! px += stridex 416 417 add %i2,stridey,%o3 ! py += stridey 418 and %l6,_0x7fffffff,%l6 ! (1_0) ay &= 0x7fffffff; 419 fand %f20,DC0,%f30 ! (3_1) hyp0 = vis_fand(hyp0,DC0); 420 421 and %i5,_0x7fffffff,%i5 ! (1_0) ax &= 0x7fffffff; 422 lda [%o3]0x82,%f2 ! (1_0) y0 = *py; 423 424 faddd %f38,%f62,%f14 ! (0_0) hyp0 += dtmp0; 425 cmp %l6,_0x7f800000 ! (1_0) ay ? 0x7f800000 426 427 lda [%g5]0x82,%f4 ! (1_0) x0 = *px; 428 bge,pn %icc,.update6 ! (1_0) if ( ay >= 0x7f800000 ) 429 for %f30,DC1,%f28 ! (3_1) hyp0 = vis_for(hyp0,DC1); 430 .cont6: 431 cmp %i5,_0x7f800000 ! (1_0) ax ? 0x7f800000 432 bge,pn %icc,.update7 ! (1_0) if ( ax >= 0x7f800000 ) 433 ld [%fp+ftmp4],%l1 ! (3_1) ibase0 = ((int*)&hyp0)[0]; 434 .cont7: 435 st %f14,[%fp+ftmp1] ! (0_0) ibase0 = ((int*)&hyp0)[0]; 436 437 cmp %l6,0 ! (1_0) ay ? 0 438 be,pn %icc,.update8 ! (1_0) if ( ay == 0 ) 439 fand %f28,DC2,%f30 ! (3_1) h_hi0 = vis_fand(hyp0,DC2); 440 .cont8: 441 fsmuld %f4,%f4,%f38 ! (1_0) hyp0 = x0 * (double)x0; 442 sra %l1,10,%o5 ! (3_1) ibase0 >>= 10; 443 444 and %o5,2032,%o4 ! (3_1) si0 = ibase0 & 0x7f0; 445 lda [%o3+stridey]0x82,%l6 ! (2_0) ay = *(int*)py; 446 447 fsmuld %f2,%f2,%f62 ! (1_0) dtmp0 = y0 * (double)y0; 448 add %o4,TBL,%l7 ! (3_1) (char*)TBL + si0 449 lda [stridex+%g5]0x82,%i5 ! (2_0) ax = *(int*)px; 450 fsubd %f28,%f30,%f28 ! (3_1) dtmp1 = hyp0 - h_hi0; 451 452 add %g5,stridex,%i4 ! px += stridex 453 ldd [TBL+%o4],%f42 ! (3_1) xx0 = ((double*)((char*)TBL + si0))[0]; 454 455 and %l6,_0x7fffffff,%l6 ! (2_0) ay &= 0x7fffffff; 456 add %o3,stridey,%i2 ! py += stridey 457 fand %f12,DC0,%f30 ! (4_1) hyp0 = vis_fand(hyp0,DC0); 458 459 and %i5,_0x7fffffff,%i5 ! (2_0) ax &= 0x7fffffff; 460 lda [%i2]0x82,%f2 ! (2_0) y0 = *py; 461 462 faddd %f38,%f62,%f16 ! (1_0) hyp0 += dtmp0; 463 cmp %l6,_0x7f800000 ! (2_0) ay ? 0x7f800000 464 fmuld %f28,%f42,%f26 ! (3_1) xx0 = dtmp1 * xx0; 465 466 lda [stridex+%g5]0x82,%f4 ! (2_0) x0 = *px; 467 bge,pn %icc,.update9 ! (2_0) if ( ay >= 0x7f800000 468 for %f30,DC1,%f28 ! (4_1) hyp0 = vis_for(hyp0,DC1); 469 .cont9: 470 cmp %i5,_0x7f800000 ! (2_0) ax ? 0x7f800000 471 bge,pn %icc,.update10 ! (2_0) if ( ax >= 0x7f800000 ) 472 ld [%fp+ftmp0],%i3 ! (4_1) ibase0 = ((int*)&hyp0)[0]; 473 .cont10: 474 st %f16,[%fp+ftmp2] ! (1_0) ibase0 = ((int*)&hyp0)[0]; 475 476 fmuld KA3,%f26,%f34 ! (3_1) dtmp2 = KA3 * xx0; 477 cmp %l6,0 ! (2_0) ay ? 0 478 be,pn %icc,.update11 ! (2_0) if ( ay == 0 ) 479 fand %f28,DC2,%f30 ! (4_1) h_hi0 = vis_fand(hyp0,DC2); 480 .cont11: 481 fsmuld %f4,%f4,%f36 ! (2_0) hyp0 = x0 * (double)x0; 482 sra %i3,10,%i3 ! (4_1) ibase0 >>= 10; 483 484 and %i3,2032,%i3 ! (4_1) si0 = ibase0 & 0x7f0; 485 lda [%i2+stridey]0x82,%l6 ! (3_0) ay = *(int*)py; 486 487 fsmuld %f2,%f2,%f62 ! (2_0) dtmp0 = y0 * (double)y0; 488 add %i3,TBL,%i3 ! (4_1) (char*)TBL + si0 489 lda [%i4+stridex]0x82,%i5 ! (3_0) ax = *(int*)px; 490 fsubd %f28,%f30,%f28 ! (4_1) dtmp1 = hyp0 - h_hi0; 491 492 add %i4,stridex,%o4 ! px += stridex 493 ldd [%i3],%f42 ! (4_1) xx0 = ((double*)((char*)TBL + si0))[0]; 494 faddd %f34,KA2,%f10 ! (3_1) dtmp2 += KA2; 495 496 add %i2,stridey,%i2 ! py += stridey 497 and %l6,_0x7fffffff,%l6 ! (3_0) ay &= 0x7fffffff; 498 fand %f14,DC0,%f30 ! (0_0) hyp0 = vis_fand(hyp0,DC0); 499 500 and %i5,_0x7fffffff,%i5 ! (3_0) ax &= 0x7fffffff; 501 lda [%i2]0x82,%f2 ! (3_0) y0 = *py; 502 503 faddd %f36,%f62,%f18 ! (2_0) hyp0 += dtmp0; 504 cmp %l6,_0x7f800000 ! (3_0) ay ? 0x7f800000 505 fmuld %f28,%f42,%f32 ! (4_1) xx0 = dtmp1 * xx0; 506 507 fmuld %f10,%f26,%f10 ! (3_1) dtmp2 *= xx0; 508 lda [%o4]0x82,%f4 ! (3_0) x0 = *px; 509 bge,pn %icc,.update12 ! (3_0) if ( ay >= 0x7f800000 ) 510 for %f30,DC1,%f28 ! (0_0) hyp0 = vis_for(hyp0,DC1); 511 .cont12: 512 cmp %i5,_0x7f800000 ! (3_0) ax ? 0x7f800000 513 bge,pn %icc,.update13 ! (3_0) if ( ax >= 0x7f800000 ) 514 ld [%fp+ftmp1],%i1 ! (0_0) ibase0 = ((int*)&hyp0)[0]; 515 .cont13: 516 st %f18,[%fp+ftmp3] ! (2_0) ibase0 = ((int*)&hyp0)[0]; 517 518 fmuld KA3,%f32,%f34 ! (4_1) dtmp2 = KA3 * xx0; 519 cmp %l6,0 ! (3_0) 520 be,pn %icc,.update14 ! (3_0) if ( ay == 0 ) 521 fand %f28,DC2,%f30 ! (0_0) h_hi0 = vis_fand(hyp0,DC2); 522 .cont14: 523 fsmuld %f4,%f4,%f36 ! (3_0) hyp0 = x0 * (double)x0; 524 sra %i1,10,%l1 ! (0_0) ibase0 >>= 10; 525 faddd %f10,KA1,%f40 ! (3_1) dtmp2 += KA1; 526 527 and %l1,2032,%o5 ! (0_0) si0 = ibase0 & 0x7f0; 528 lda [%i2+stridey]0x82,%l6 ! (4_0) ay = *(int*)py; 529 530 fsmuld %f2,%f2,%f62 ! (3_0) dtmp0 = y0 * (double)y0; 531 add %o5,TBL,%l1 ! (0_0) (char*)TBL + si0 532 lda [stridex+%o4]0x82,%i5 ! (4_0) ax = *(int*)px; 533 fsubd %f28,%f30,%f28 ! (0_0) dtmp1 = hyp0 - h_hi0; 534 535 add %o4,stridex,%l0 ! px += stridex 536 ldd [TBL+%o5],%f42 ! (0_0) xx0 = ((double*)((char*)TBL + si0))[0]; 537 faddd %f34,KA2,%f10 ! (4_1) dtmp2 += KA2; 538 539 fmuld %f40,%f26,%f40 ! (3_1) dtmp2 *= xx0; 540 add %i2,stridey,%i2 ! py += stridey 541 and %l6,_0x7fffffff,%l6 ! (4_0) ay &= 0x7fffffff; 542 fand %f16,DC0,%f30 ! (1_0) hyp0 = vis_fand(hyp0,DC0); 543 544 and %i5,_0x7fffffff,%i5 ! (4_0) ax &= 0x7fffffff; 545 lda [%i2]0x82,%f2 ! (4_0) y0 = *py; 546 fand %f20,DA0,%f24 ! (3_1) dbase0 = vis_fand(hyp0,DA0); 547 548 faddd %f36,%f62,%f20 ! (3_0) hyp0 += dtmp0; 549 cmp %l6,_0x7f800000 ! (4_0) ay ? 0x7f800000 550 ldd [%l7+8],%f36 ! (3_1) res0 = ((double*)((char*)arr + si0))[1]; 551 fmuld %f28,%f42,%f26 ! (0_0) xx0 = dtmp1 * xx0; 552 553 fmuld %f10,%f32,%f10 ! (4_1) dtmp2 *= xx0; 554 lda [stridex+%o4]0x82,%f4 ! (4_0) x0 = *px; 555 bge,pn %icc,.update15 ! (4_0) if ( ay >= 0x7f800000 ) 556 for %f30,DC1,%f28 ! (1_0) hyp0 = vis_for(hyp0,DC1); 557 .cont15: 558 fmul8x16 SCALE,%f24,%f24 ! (3_1) dbase0 = vis_fmul8x16(SCALE, dbase0); 559 cmp %i5,_0x7f800000 ! (4_0) ax ? 0x7f800000 560 ld [%fp+ftmp2],%i1 ! (1_0) ibase0 = ((int*)&hyp0)[0]; 561 faddd %f40,KA0,%f62 ! (3_1) dtmp2 += KA0; 562 563 bge,pn %icc,.update16 ! (4_0) if ( ax >= 0x7f800000 ) 564 st %f20,[%fp+ftmp4] ! (3_0) ibase0 = ((int*)&hyp0)[0]; 565 .cont16: 566 fmuld KA3,%f26,%f34 ! (0_0) dtmp2 = KA3 * xx0; 567 fand %f28,DC2,%f30 ! (1_0) h_hi0 = vis_fand(hyp0,DC2); 568 569 mov %o1,%i4 570 cmp counter,5 571 bl,pn %icc,.tail 572 nop 573 574 ba .main_loop 575 sub counter,5,counter 576 577 .align 16 578 .main_loop: 579 fsmuld %f4,%f4,%f38 ! (4_1) hyp0 = x0 * (double)x0; 580 sra %i1,10,%o2 ! (1_1) ibase0 >>= 10; 581 cmp %l6,0 ! (4_1) ay ? 0 582 faddd %f10,KA1,%f40 ! (4_2) dtmp2 += KA1; 583 584 fmuld %f36,%f62,%f36 ! (3_2) res0 *= dtmp2; 585 and %o2,2032,%o2 ! (1_1) si0 = ibase0 & 0x7f0; 586 lda [%i2+stridey]0x82,%l6 ! (0_0) ay = *(int*)py; 587 fpsub32 DA1,%f24,%f24 ! (3_2) dbase0 = vis_fpsub32(DA1,dbase0); 588 589 fsmuld %f2,%f2,%f62 ! (4_1) dtmp0 = y0 * (double)y0; 590 add %o2,TBL,%o2 ! (1_1) (char*)TBL + si0 591 lda [%l0+stridex]0x82,%o1 ! (0_0) ax = *(int*)px; 592 fsubd %f28,%f30,%f28 ! (1_1) dtmp1 = hyp0 - h_hi0; 593 594 add %l0,stridex,%i1 ! px += stridex 595 ldd [%o2],%f42 ! (1_1) xx0 = ((double*)((char*)TBL + si0))[0]; 596 be,pn %icc,.update17 ! (4_1) if ( ay == 0 ) 597 faddd %f34,KA2,%f10 ! (0_1) dtmp2 += KA2; 598 .cont17: 599 fmuld %f40,%f32,%f40 ! (4_2) dtmp2 *= xx0; 600 add %i2,stridey,%i2 ! py += stridey 601 and %l6,_0x7fffffff,%l6 ! (0_0) ay &= 0x7fffffff; 602 fand %f18,DC0,%f30 ! (2_1) hyp0 = vis_fand(hyp0,DC0); 603 604 fmuld %f36,%f24,%f32 ! (3_2) res0 *= dbase0; 605 and %o1,_0x7fffffff,%o1 ! (0_0) ax &= 0x7fffffff; 606 lda [%i2]0x82,%f2 ! (0_0) y0 = *py; 607 fand %f12,DA0,%f24 ! (4_2) dbase0 = vis_fand(hyp0,DA0); 608 609 faddd %f38,%f62,%f12 ! (4_1) hyp0 += dtmp0; 610 cmp %l6,_0x7f800000 ! (0_0) ay ? 0x7f800000 611 ldd [%i3+8],%f62 ! (4_2) res0 = ((double*)((char*)arr + si0))[1]; 612 fmuld %f28,%f42,%f36 ! (1_1) xx0 = dtmp1 * xx0; 613 614 fmuld %f10,%f26,%f10 ! (0_1) dtmp2 *= xx0; 615 lda [%i1]0x82,%f4 ! (0_0) x0 = *px; 616 bge,pn %icc,.update18 ! (0_0) if ( ay >= 0x7f800000 ) 617 for %f30,DC1,%f28 ! (2_1) hyp0 = vis_for(hyp0,DC1); 618 .cont18: 619 fmul8x16 SCALE,%f24,%f24 ! (4_2) dbase0 = vis_fmul8x16(SCALE, dbase0); 620 cmp %o1,_0x7f800000 ! (0_0) ax ? 0x7f800000 621 ld [%fp+ftmp3],%l0 ! (2_1) ibase0 = ((int*)&hyp0)[0]; 622 faddd %f40,KA0,%f42 ! (4_2) dtmp2 += KA0; 623 624 add %i4,stridez,%i3 ! pz += stridez 625 st %f12,[%fp+ftmp0] ! (4_1) ibase0 = ((int*)&hyp0)[0]; 626 bge,pn %icc,.update19 ! (0_0) if ( ax >= 0x7f800000 ) 627 fdtos %f32,%f1 ! (3_2) ftmp0 = (float)res0; 628 .cont19: 629 fmuld KA3,%f36,%f34 ! (1_1) dtmp2 = KA3 * xx0; 630 cmp %l6,0 ! (0_0) ay ? 0 631 st %f1,[%i4] ! (3_2) *pz = ftmp0; 632 fand %f28,DC2,%f30 ! (2_1) h_hi0 = vis_fand(hyp0,DC2); 633 634 fsmuld %f4,%f4,%f38 ! (0_0) hyp0 = x0 * (double)x0; 635 sra %l0,10,%i4 ! (2_1) ibase0 >>= 10; 636 be,pn %icc,.update20 ! (0_0) if ( ay == 0 ) 637 faddd %f10,KA1,%f40 ! (0_1) dtmp2 += KA1; 638 .cont20: 639 fmuld %f62,%f42,%f32 ! (4_2) res0 *= dtmp2; 640 and %i4,2032,%g1 ! (2_1) si0 = ibase0 & 0x7f0; 641 lda [%i2+stridey]0x82,%l6 ! (1_0) ay = *(int*)py; 642 fpsub32 DA1,%f24,%f24 ! (4_2) dbase0 = vis_fpsub32(DA1,dbase0); 643 644 fsmuld %f2,%f2,%f62 ! (0_0) dtmp0 = y0 * (double)y0; 645 add %g1,TBL,%l0 ! (2_1) (char*)TBL + si0 646 lda [%i1+stridex]0x82,%i5 ! (1_0) ax = *(int*)px; 647 fsubd %f28,%f30,%f28 ! (2_1) dtmp1 = hyp0 - h_hi0; 648 649 nop 650 add %i1,stridex,%g5 ! px += stridex 651 ldd [TBL+%g1],%f42 ! (2_1) xx0 = ((double*)((char*)TBL + si0))[0]; 652 faddd %f34,KA2,%f10 ! (1_1) dtmp2 += KA2; 653 654 fmuld %f40,%f26,%f40 ! (0_1) dtmp2 *= xx0; 655 add %i2,stridey,%o3 ! py += stridey 656 and %l6,_0x7fffffff,%l6 ! (1_0) ay &= 0x7fffffff; 657 fand %f20,DC0,%f30 ! (3_1) hyp0 = vis_fand(hyp0,DC0); 658 659 fmuld %f32,%f24,%f26 ! (4_2) res0 *= dbase0; 660 and %i5,_0x7fffffff,%i5 ! (1_0) ax &= 0x7fffffff; 661 lda [%o3]0x82,%f2 ! (1_0) y0 = *py; 662 fand %f14,DA0,%f24 ! (0_1) dbase0 = vis_fand(hyp0,DA0); 663 664 faddd %f38,%f62,%f14 ! (0_0) hyp0 += dtmp0; 665 cmp %l6,_0x7f800000 ! (1_0) ay ? 0x7f800000 666 ldd [%l1+8],%f62 ! (0_1) res0 = ((double*)((char*)arr + si0))[1]; 667 fmuld %f28,%f42,%f32 ! (2_1) xx0 = dtmp1 * xx0; 668 669 fmuld %f10,%f36,%f10 ! (1_1) dtmp2 *= xx0; 670 lda [%g5]0x82,%f4 ! (1_0) x0 = *px; 671 bge,pn %icc,.update21 ! (1_0) if ( ay >= 0x7f800000 ) 672 for %f30,DC1,%f28 ! (3_1) hyp0 = vis_for(hyp0,DC1); 673 .cont21: 674 fmul8x16 SCALE,%f24,%f24 ! (0_1) dbase0 = vis_fmul8x16(SCALE, dbase0); 675 cmp %i5,_0x7f800000 ! (1_0) ax ? 0x7f800000 676 ld [%fp+ftmp4],%l1 ! (3_1) ibase0 = ((int*)&hyp0)[0]; 677 faddd %f40,KA0,%f42 ! (0_1) dtmp2 += KA0 678 679 add %i3,stridez,%o1 ! pz += stridez 680 st %f14,[%fp+ftmp1] ! (0_0) ibase0 = ((int*)&hyp0)[0]; 681 bge,pn %icc,.update22 ! (1_0) if ( ax >= 0x7f800000 ) 682 fdtos %f26,%f1 ! (4_2) ftmp0 = (float)res0; 683 .cont22: 684 fmuld KA3,%f32,%f34 ! (2_1) dtmp2 = KA3 * xx0; 685 cmp %l6,0 ! (1_0) ay ? 0 686 st %f1,[%i3] ! (4_2) *pz = ftmp0; 687 fand %f28,DC2,%f30 ! (3_1) h_hi0 = vis_fand(hyp0,DC2); 688 689 fsmuld %f4,%f4,%f38 ! (1_0) hyp0 = x0 * (double)x0; 690 sra %l1,10,%o5 ! (3_1) ibase0 >>= 10; 691 be,pn %icc,.update23 ! (1_0) if ( ay == 0 ) 692 faddd %f10,KA1,%f40 ! (1_1) dtmp2 += KA1; 693 .cont23: 694 fmuld %f62,%f42,%f26 ! (0_1) res0 *= dtmp2; 695 and %o5,2032,%o4 ! (3_1) si0 = ibase0 & 0x7f0; 696 lda [%o3+stridey]0x82,%l6 ! (2_0) ay = *(int*)py; 697 fpsub32 DA1,%f24,%f24 ! (0_1) dbase0 = vis_fpsub32(DA1,dbase0); 698 699 fsmuld %f2,%f2,%f62 ! (1_0) dtmp0 = y0 * (double)y0; 700 add %o4,TBL,%l7 ! (3_1) (char*)TBL + si0 701 lda [stridex+%g5]0x82,%i5 ! (2_0) ax = *(int*)px; 702 fsubd %f28,%f30,%f28 ! (3_1) dtmp1 = hyp0 - h_hi0; 703 704 nop 705 add %g5,stridex,%i4 ! px += stridex 706 ldd [TBL+%o4],%f42 ! (3_1) xx0 = ((double*)((char*)TBL + si0))[0]; 707 faddd %f34,KA2,%f10 ! (2_1) dtmp2 += KA2; 708 709 fmuld %f40,%f36,%f40 ! (1_1) dtmp2 *= xx0; 710 and %l6,_0x7fffffff,%l6 ! (2_0) ay &= 0x7fffffff; 711 add %o3,stridey,%i2 ! py += stridey 712 fand %f12,DC0,%f30 ! (4_1) hyp0 = vis_fand(hyp0,DC0); 713 714 fmuld %f26,%f24,%f36 ! (0_1) res0 *= dbase0; 715 and %i5,_0x7fffffff,%i5 ! (2_0) ax &= 0x7fffffff; 716 lda [%i2]0x82,%f2 ! (2_0) y0 = *py; 717 fand %f16,DA0,%f24 ! (1_1) dbase0 = vis_fand(hyp0,DA0); 718 719 faddd %f38,%f62,%f16 ! (1_0) hyp0 += dtmp0; 720 cmp %l6,_0x7f800000 ! (2_0) ay ? 0x7f800000 721 ldd [%o2+8],%f38 ! (1_1) res0 = ((double*)((char*)arr + si0))[1]; 722 fmuld %f28,%f42,%f26 ! (3_1) xx0 = dtmp1 * xx0; 723 724 fmuld %f10,%f32,%f10 ! (2_1) dtmp2 *= xx0; 725 lda [stridex+%g5]0x82,%f4 ! (2_0) x0 = *px; 726 bge,pn %icc,.update24 ! (2_0) if ( ay >= 0x7f800000 727 for %f30,DC1,%f28 ! (4_1) hyp0 = vis_for(hyp0,DC1); 728 .cont24: 729 fmul8x16 SCALE,%f24,%f24 ! (1_1) dbase0 = vis_fmul8x16(SCALE, dbase0); 730 cmp %i5,_0x7f800000 ! (2_0) ax ? 0x7f800000 731 ld [%fp+ftmp0],%i3 ! (4_1) ibase0 = ((int*)&hyp0)[0]; 732 faddd %f40,KA0,%f62 ! (1_1) dtmp2 += KA0; 733 734 add %o1,stridez,%g1 ! pz += stridez 735 st %f16,[%fp+ftmp2] ! (1_0) ibase0 = ((int*)&hyp0)[0]; 736 bge,pn %icc,.update25 ! (2_0) if ( ax >= 0x7f800000 ) 737 fdtos %f36,%f1 ! (0_1) ftmp0 = (float)res0; 738 .cont25: 739 fmuld KA3,%f26,%f34 ! (3_1) dtmp2 = KA3 * xx0; 740 cmp %l6,0 ! (2_0) ay ? 0 741 st %f1,[%o1] ! (0_1) *pz = ftmp0; 742 fand %f28,DC2,%f30 ! (4_1) h_hi0 = vis_fand(hyp0,DC2); 743 744 fsmuld %f4,%f4,%f36 ! (2_0) hyp0 = x0 * (double)x0; 745 sra %i3,10,%i3 ! (4_1) ibase0 >>= 10; 746 be,pn %icc,.update26 ! (2_0) if ( ay == 0 ) 747 faddd %f10,KA1,%f40 ! (2_1) dtmp2 += KA1; 748 .cont26: 749 fmuld %f38,%f62,%f38 ! (1_1) res0 *= dtmp2; 750 and %i3,2032,%i3 ! (4_1) si0 = ibase0 & 0x7f0; 751 lda [%i2+stridey]0x82,%l6 ! (3_0) ay = *(int*)py; 752 fpsub32 DA1,%f24,%f24 ! (1_1) dbase0 = vis_fpsub32(DA1,dbase0); 753 754 fsmuld %f2,%f2,%f62 ! (2_0) dtmp0 = y0 * (double)y0; 755 add %i3,TBL,%i3 ! (4_1) (char*)TBL + si0 756 lda [%i4+stridex]0x82,%i5 ! (3_0) ax = *(int*)px; 757 fsubd %f28,%f30,%f28 ! (4_1) dtmp1 = hyp0 - h_hi0; 758 759 nop 760 add %i4,stridex,%o4 ! px += stridex 761 ldd [%i3],%f42 ! (4_1) xx0 = ((double*)((char*)TBL + si0))[0]; 762 faddd %f34,KA2,%f10 ! (3_1) dtmp2 += KA2; 763 764 fmuld %f40,%f32,%f40 ! (2_1) dtmp2 *= xx0; 765 add %i2,stridey,%i2 ! py += stridey 766 and %l6,_0x7fffffff,%l6 ! (3_0) ay &= 0x7fffffff; 767 fand %f14,DC0,%f30 ! (0_0) hyp0 = vis_fand(hyp0,DC0); 768 769 fmuld %f38,%f24,%f38 ! (1_1) res0 *= dbase0; 770 and %i5,_0x7fffffff,%i5 ! (3_0) ax &= 0x7fffffff; 771 lda [%i2]0x82,%f2 ! (3_0) y0 = *py; 772 fand %f18,DA0,%f24 ! (2_1) dbase0 = vis_fand(hyp0,DA0); 773 774 faddd %f36,%f62,%f18 ! (2_0) hyp0 += dtmp0; 775 cmp %l6,_0x7f800000 ! (3_0) ay ? 0x7f800000 776 ldd [%l0+8],%f62 ! (2_1) res0 = ((double*)((char*)arr + si0))[1]; 777 fmuld %f28,%f42,%f32 ! (4_1) xx0 = dtmp1 * xx0; 778 779 fmuld %f10,%f26,%f10 ! (3_1) dtmp2 *= xx0; 780 lda [%o4]0x82,%f4 ! (3_0) x0 = *px; 781 bge,pn %icc,.update27 ! (3_0) if ( ay >= 0x7f800000 ) 782 for %f30,DC1,%f28 ! (0_0) hyp0 = vis_for(hyp0,DC1); 783 .cont27: 784 fmul8x16 SCALE,%f24,%f24 ! (2_1) dbase0 = vis_fmul8x16(SCALE, dbase0); 785 cmp %i5,_0x7f800000 ! (3_0) ax ? 0x7f800000 786 ld [%fp+ftmp1],%i1 ! (0_0) ibase0 = ((int*)&hyp0)[0]; 787 faddd %f40,KA0,%f42 ! (2_1) dtmp2 += KA0; 788 789 add %g1,stridez,%o3 ! pz += stridez 790 st %f18,[%fp+ftmp3] ! (2_0) ibase0 = ((int*)&hyp0)[0]; 791 bge,pn %icc,.update28 ! (3_0) if ( ax >= 0x7f800000 ) 792 fdtos %f38,%f1 ! (1_1) ftmp0 = (float)res0; 793 .cont28: 794 fmuld KA3,%f32,%f34 ! (4_1) dtmp2 = KA3 * xx0; 795 cmp %l6,0 ! (3_0) 796 st %f1,[%g1] ! (1_1) *pz = ftmp0; 797 fand %f28,DC2,%f30 ! (0_0) h_hi0 = vis_fand(hyp0,DC2); 798 799 fsmuld %f4,%f4,%f36 ! (3_0) hyp0 = x0 * (double)x0; 800 sra %i1,10,%l1 ! (0_0) ibase0 >>= 10; 801 be,pn %icc,.update29 ! (3_0) if ( ay == 0 ) 802 faddd %f10,KA1,%f40 ! (3_1) dtmp2 += KA1; 803 .cont29: 804 fmuld %f62,%f42,%f38 ! (2_1) res0 *= dtmp2; 805 and %l1,2032,%o5 ! (0_0) si0 = ibase0 & 0x7f0; 806 lda [%i2+stridey]0x82,%l6 ! (4_0) ay = *(int*)py; 807 fpsub32 DA1,%f24,%f24 ! (2_1) dbase0 = vis_fpsub32(DA1,dbase0); 808 809 fsmuld %f2,%f2,%f62 ! (3_0) dtmp0 = y0 * (double)y0; 810 add %o5,TBL,%l1 ! (0_0) (char*)TBL + si0 811 lda [stridex+%o4]0x82,%i5 ! (4_0) ax = *(int*)px; 812 fsubd %f28,%f30,%f28 ! (0_0) dtmp1 = hyp0 - h_hi0; 813 814 add %o3,stridez,%i4 ! pz += stridez 815 add %o4,stridex,%l0 ! px += stridex 816 ldd [TBL+%o5],%f42 ! (0_0) xx0 = ((double*)((char*)TBL + si0))[0]; 817 faddd %f34,KA2,%f10 ! (4_1) dtmp2 += KA2; 818 819 fmuld %f40,%f26,%f40 ! (3_1) dtmp2 *= xx0; 820 add %i2,stridey,%i2 ! py += stridey 821 and %l6,_0x7fffffff,%l6 ! (4_0) ay &= 0x7fffffff; 822 fand %f16,DC0,%f30 ! (1_0) hyp0 = vis_fand(hyp0,DC0); 823 824 fmuld %f38,%f24,%f38 ! (2_1) res0 *= dbase0; 825 and %i5,_0x7fffffff,%i5 ! (4_0) ax &= 0x7fffffff; 826 lda [%i2]0x82,%f2 ! (4_0) y0 = *py; 827 fand %f20,DA0,%f24 ! (3_1) dbase0 = vis_fand(hyp0,DA0); 828 829 faddd %f36,%f62,%f20 ! (3_0) hyp0 += dtmp0; 830 cmp %l6,_0x7f800000 ! (4_0) ay ? 0x7f800000 831 ldd [%l7+8],%f36 ! (3_1) res0 = ((double*)((char*)arr + si0))[1]; 832 fmuld %f28,%f42,%f26 ! (0_0) xx0 = dtmp1 * xx0; 833 834 fmuld %f10,%f32,%f10 ! (4_1) dtmp2 *= xx0; 835 lda [stridex+%o4]0x82,%f4 ! (4_0) x0 = *px; 836 bge,pn %icc,.update30 ! (4_0) if ( ay >= 0x7f800000 ) 837 for %f30,DC1,%f28 ! (1_0) hyp0 = vis_for(hyp0,DC1); 838 .cont30: 839 fmul8x16 SCALE,%f24,%f24 ! (3_1) dbase0 = vis_fmul8x16(SCALE, dbase0); 840 cmp %i5,_0x7f800000 ! (4_0) ax ? 0x7f800000 841 ld [%fp+ftmp2],%i1 ! (1_0) ibase0 = ((int*)&hyp0)[0]; 842 faddd %f40,KA0,%f62 ! (3_1) dtmp2 += KA0; 843 844 bge,pn %icc,.update31 ! (4_0) if ( ax >= 0x7f800000 ) 845 st %f20,[%fp+ftmp4] ! (3_0) ibase0 = ((int*)&hyp0)[0]; 846 .cont31: 847 subcc counter,5,counter ! counter -= 5; 848 fdtos %f38,%f1 ! (2_1) ftmp0 = (float)res0; 849 850 fmuld KA3,%f26,%f34 ! (0_0) dtmp2 = KA3 * xx0; 851 st %f1,[%o3] ! (2_1) *pz = ftmp0; 852 bpos,pt %icc,.main_loop 853 fand %f28,DC2,%f30 ! (1_0) h_hi0 = vis_fand(hyp0,DC2); 854 855 add counter,5,counter 856 857 .tail: 858 subcc counter,1,counter 859 bneg .begin 860 mov %i4,%o1 861 862 sra %i1,10,%o2 ! (1_1) ibase0 >>= 10; 863 faddd %f10,KA1,%f40 ! (4_2) dtmp2 += KA1; 864 865 fmuld %f36,%f62,%f36 ! (3_2) res0 *= dtmp2; 866 and %o2,2032,%o2 ! (1_1) si0 = ibase0 & 0x7f0; 867 fpsub32 DA1,%f24,%f24 ! (3_2) dbase0 = vis_fpsub32(DA1,dbase0); 868 869 add %o2,TBL,%o2 ! (1_1) (char*)TBL + si0 870 fsubd %f28,%f30,%f28 ! (1_1) dtmp1 = hyp0 - h_hi0; 871 872 ldd [%o2],%f42 ! (1_1) xx0 = ((double*)((char*)TBL + si0))[0]; 873 faddd %f34,KA2,%f10 ! (0_1) dtmp2 += KA2; 874 875 fmuld %f40,%f32,%f40 ! (4_2) dtmp2 *= xx0; 876 877 fmuld %f36,%f24,%f32 ! (3_2) res0 *= dbase0; 878 fand %f12,DA0,%f24 ! (4_2) dbase0 = vis_fand(hyp0,DA0); 879 880 ldd [%i3+8],%f62 ! (4_2) res0 = ((double*)((char*)arr + si0))[1]; 881 fmuld %f28,%f42,%f36 ! (1_1) xx0 = dtmp1 * xx0; 882 883 fmuld %f10,%f26,%f10 ! (0_1) dtmp2 *= xx0; 884 885 fmul8x16 SCALE,%f24,%f24 ! (4_2) dbase0 = vis_fmul8x16(SCALE, dbase0); 886 faddd %f40,KA0,%f42 ! (4_2) dtmp2 += KA0; 887 888 add %i4,stridez,%i3 ! pz += stridez 889 fdtos %f32,%f1 ! (3_2) ftmp0 = (float)res0; 890 891 fmuld KA3,%f36,%f34 ! (1_1) dtmp2 = KA3 * xx0; 892 st %f1,[%i4] ! (3_2) *pz = ftmp0; 893 894 subcc counter,1,counter 895 bneg .begin 896 mov %i3,%o1 897 898 faddd %f10,KA1,%f40 ! (0_1) dtmp2 += KA1; 899 900 fmuld %f62,%f42,%f32 ! (4_2) res0 *= dtmp2; 901 fpsub32 DA1,%f24,%f24 ! (4_2) dbase0 = vis_fpsub32(DA1,dbase0); 902 903 904 faddd %f34,KA2,%f10 ! (1_1) dtmp2 += KA2; 905 906 fmuld %f40,%f26,%f40 ! (0_1) dtmp2 *= xx0; 907 908 fmuld %f32,%f24,%f26 ! (4_2) res0 *= dbase0; 909 fand %f14,DA0,%f24 ! (0_1) dbase0 = vis_fand(hyp0,DA0); 910 911 ldd [%l1+8],%f62 ! (0_1) res0 = ((double*)((char*)arr + si0))[1]; 912 913 fmuld %f10,%f36,%f10 ! (1_1) dtmp2 *= xx0; 914 915 fmul8x16 SCALE,%f24,%f24 ! (0_1) dbase0 = vis_fmul8x16(SCALE, dbase0); 916 faddd %f40,KA0,%f42 ! (0_1) dtmp2 += KA0 917 918 add %i3,stridez,%o1 ! pz += stridez 919 fdtos %f26,%f1 ! (4_2) ftmp0 = (float)res0; 920 921 st %f1,[%i3] ! (4_2) *pz = ftmp0; 922 923 subcc counter,1,counter 924 bneg .begin 925 nop 926 927 faddd %f10,KA1,%f40 ! (1_1) dtmp2 += KA1; 928 929 fmuld %f62,%f42,%f26 ! (0_1) res0 *= dtmp2; 930 fpsub32 DA1,%f24,%f24 ! (0_1) dbase0 = vis_fpsub32(DA1,dbase0); 931 932 fmuld %f40,%f36,%f40 ! (1_1) dtmp2 *= xx0; 933 934 fmuld %f26,%f24,%f36 ! (0_1) res0 *= dbase0; 935 fand %f16,DA0,%f24 ! (1_1) dbase0 = vis_fand(hyp0,DA0); 936 937 ldd [%o2+8],%f38 ! (1_1) res0 = ((double*)((char*)arr + si0))[1]; 938 939 fmul8x16 SCALE,%f24,%f24 ! (1_1) dbase0 = vis_fmul8x16(SCALE, dbase0); 940 faddd %f40,KA0,%f62 ! (1_1) dtmp2 += KA0; 941 942 add %o1,stridez,%g1 ! pz += stridez 943 fdtos %f36,%f1 ! (0_1) ftmp0 = (float)res0; 944 945 st %f1,[%o1] ! (0_1) *pz = ftmp0; 946 947 subcc counter,1,counter 948 bneg .begin 949 mov %g1,%o1 950 951 fmuld %f38,%f62,%f38 ! (1_1) res0 *= dtmp2; 952 fpsub32 DA1,%f24,%f24 ! (1_1) dbase0 = vis_fpsub32(DA1,dbase0); 953 954 fmuld %f38,%f24,%f38 ! (1_1) res0 *= dbase0; 955 956 fdtos %f38,%f1 ! (1_1) ftmp0 = (float)res0; 957 st %f1,[%g1] ! (1_1) *pz = ftmp0; 958 959 ba .begin 960 add %g1,stridez,%o1 ! pz += stridez 961 962 .align 16 963 .spec0: 964 fabss %f2,%f2 ! fabsf(y0); 965 966 fabss %f4,%f4 ! fabsf(x0); 967 968 fcmps %f2,%f4 969 970 cmp %l6,_0x7f800000 ! ay ? 0x7f800000 971 be,a 1f ! if( ay == 0x7f800000 ) 972 st %g0,[%o1] ! *pz = 0.0f; 973 974 cmp %i5,_0x7f800000 ! ax ? 0x7f800000 975 be,a 1f ! if( ax == 0x7f800000 ) 976 st %g0,[%o1] ! *pz = 0.0f; 977 978 fmuls %f2,%f4,%f2 ! fabsf(x0) * fabsf(y0); 979 st %f2,[%o1] ! *pz = fabsf(x0) + fabsf(y0); 980 1: 981 add %o4,stridex,%o4 ! px += stridex; 982 add %i2,stridey,%i2 ! py += stridey; 983 984 add %o1,stridez,%o1 ! pz += stridez; 985 ba .begin1 986 sub counter,1,counter ! counter--; 987 988 .align 16 989 .spec1: 990 cmp %i5,0 ! ax ? 0 991 bne,pt %icc,.cont_spec1 ! if ( ax != 0 ) 992 nop 993 994 add %o4,stridex,%o4 ! px += stridex; 995 add %i2,stridey,%i2 ! py += stridey; 996 997 fdivs %f7,%f9,%f2 ! 1.0f / 0.0f 998 st %f2,[%o1] ! *pz = 1.0f / 0.0f; 999 1000 add %o1,stridez,%o1 ! pz += stridez; 1001 ba .begin1 1002 sub counter,1,counter ! counter--; 1003 1004 .align 16 1005 .update0: 1006 cmp counter,1 1007 ble .cont0 1008 ld [TBL+TBL_SHIFT+44],%f2 1009 1010 sub counter,1,counter 1011 st counter,[%fp+tmp_counter] 1012 1013 stx %l0,[%fp+tmp_px] 1014 1015 stx %i2,[%fp+tmp_py] 1016 ba .cont0 1017 mov 1,counter 1018 1019 .align 16 1020 .update1: 1021 cmp counter,1 1022 ble .cont1 1023 ld [TBL+TBL_SHIFT+44],%f4 1024 1025 sub counter,1,counter 1026 st counter,[%fp+tmp_counter] 1027 1028 stx %l0,[%fp+tmp_px] 1029 1030 stx %i2,[%fp+tmp_py] 1031 ba .cont1 1032 mov 1,counter 1033 1034 .align 16 1035 .update2: 1036 cmp %i5,0 1037 bne .cont2 1038 1039 cmp counter,1 1040 ble .cont2 1041 ld [TBL+TBL_SHIFT+44],%f2 1042 1043 sub counter,1,counter 1044 st counter,[%fp+tmp_counter] 1045 1046 stx %l0,[%fp+tmp_px] 1047 1048 stx %i2,[%fp+tmp_py] 1049 ba .cont2 1050 mov 1,counter 1051 1052 .align 16 1053 .update3: 1054 cmp counter,2 1055 ble .cont3 1056 ld [TBL+TBL_SHIFT+44],%f2 1057 1058 sub counter,2,counter 1059 st counter,[%fp+tmp_counter] 1060 1061 stx %i1,[%fp+tmp_px] 1062 1063 stx %i2,[%fp+tmp_py] 1064 ba .cont3 1065 mov 2,counter 1066 1067 .align 16 1068 .update4: 1069 cmp counter,2 1070 ble .cont4 1071 ld [TBL+TBL_SHIFT+44],%f4 1072 1073 sub counter,2,counter 1074 st counter,[%fp+tmp_counter] 1075 1076 stx %i1,[%fp+tmp_px] 1077 1078 stx %i2,[%fp+tmp_py] 1079 ba .cont4 1080 mov 2,counter 1081 1082 .align 16 1083 .update5: 1084 cmp %i5,0 1085 bne .cont5 1086 1087 cmp counter,2 1088 ble .cont5 1089 ld [TBL+TBL_SHIFT+44],%f2 1090 1091 sub counter,2,counter 1092 st counter,[%fp+tmp_counter] 1093 1094 stx %i1,[%fp+tmp_px] 1095 1096 stx %i2,[%fp+tmp_py] 1097 ba .cont5 1098 mov 2,counter 1099 1100 .align 16 1101 .update6: 1102 cmp counter,3 1103 ble .cont6 1104 ld [TBL+TBL_SHIFT+44],%f2 1105 1106 sub counter,3,counter 1107 st counter,[%fp+tmp_counter] 1108 1109 stx %g5,[%fp+tmp_px] 1110 1111 stx %o3,[%fp+tmp_py] 1112 ba .cont6 1113 mov 3,counter 1114 1115 .align 16 1116 .update7: 1117 cmp counter,3 1118 ble .cont7 1119 ld [TBL+TBL_SHIFT+44],%f4 1120 1121 sub counter,3,counter 1122 st counter,[%fp+tmp_counter] 1123 1124 stx %g5,[%fp+tmp_px] 1125 1126 stx %o3,[%fp+tmp_py] 1127 ba .cont7 1128 mov 3,counter 1129 1130 .align 16 1131 .update8: 1132 cmp %i5,0 1133 bne .cont8 1134 1135 cmp counter,3 1136 ble .cont8 1137 ld [TBL+TBL_SHIFT+44],%f2 1138 1139 sub counter,3,counter 1140 st counter,[%fp+tmp_counter] 1141 1142 stx %g5,[%fp+tmp_px] 1143 1144 stx %o3,[%fp+tmp_py] 1145 ba .cont8 1146 mov 3,counter 1147 1148 .align 16 1149 .update9: 1150 cmp counter,4 1151 ble .cont9 1152 ld [TBL+TBL_SHIFT+44],%f2 1153 1154 sub counter,4,counter 1155 st counter,[%fp+tmp_counter] 1156 1157 stx %i4,[%fp+tmp_px] 1158 1159 stx %i2,[%fp+tmp_py] 1160 ba .cont9 1161 mov 4,counter 1162 1163 .align 16 1164 .update10: 1165 cmp counter,4 1166 ble .cont10 1167 ld [TBL+TBL_SHIFT+44],%f4 1168 1169 sub counter,4,counter 1170 st counter,[%fp+tmp_counter] 1171 1172 stx %i4,[%fp+tmp_px] 1173 1174 stx %i2,[%fp+tmp_py] 1175 ba .cont10 1176 mov 4,counter 1177 1178 .align 16 1179 .update11: 1180 cmp %i5,0 1181 bne .cont11 1182 1183 cmp counter,4 1184 ble .cont11 1185 ld [TBL+TBL_SHIFT+44],%f2 1186 1187 sub counter,4,counter 1188 st counter,[%fp+tmp_counter] 1189 1190 stx %i4,[%fp+tmp_px] 1191 1192 stx %i2,[%fp+tmp_py] 1193 ba .cont11 1194 mov 4,counter 1195 1196 .align 16 1197 .update12: 1198 cmp counter,5 1199 ble .cont12 1200 ld [TBL+TBL_SHIFT+44],%f2 1201 1202 sub counter,5,counter 1203 st counter,[%fp+tmp_counter] 1204 1205 stx %o4,[%fp+tmp_px] 1206 1207 stx %i2,[%fp+tmp_py] 1208 ba .cont12 1209 mov 5,counter 1210 1211 .align 16 1212 .update13: 1213 cmp counter,5 1214 ble .cont13 1215 ld [TBL+TBL_SHIFT+44],%f4 1216 1217 sub counter,5,counter 1218 st counter,[%fp+tmp_counter] 1219 1220 stx %o4,[%fp+tmp_px] 1221 1222 stx %i2,[%fp+tmp_py] 1223 ba .cont13 1224 mov 5,counter 1225 1226 .align 16 1227 .update14: 1228 cmp %i5,0 1229 bne .cont14 1230 1231 cmp counter,5 1232 ble .cont14 1233 ld [TBL+TBL_SHIFT+44],%f2 1234 1235 sub counter,5,counter 1236 st counter,[%fp+tmp_counter] 1237 1238 stx %o4,[%fp+tmp_px] 1239 1240 stx %i2,[%fp+tmp_py] 1241 ba .cont14 1242 mov 5,counter 1243 1244 .align 16 1245 .update15: 1246 cmp counter,6 1247 ble .cont15 1248 ld [TBL+TBL_SHIFT+44],%f2 1249 1250 sub counter,6,counter 1251 st counter,[%fp+tmp_counter] 1252 1253 stx %l0,[%fp+tmp_px] 1254 1255 stx %i2,[%fp+tmp_py] 1256 ba .cont15 1257 mov 6,counter 1258 1259 .align 16 1260 .update16: 1261 cmp counter,6 1262 ble .cont16 1263 ld [TBL+TBL_SHIFT+44],%f4 1264 1265 sub counter,6,counter 1266 st counter,[%fp+tmp_counter] 1267 1268 stx %l0,[%fp+tmp_px] 1269 1270 stx %i2,[%fp+tmp_py] 1271 ba .cont16 1272 mov 6,counter 1273 1274 .align 16 1275 .update17: 1276 cmp %i5,0 1277 bne .cont17 1278 1279 cmp counter,1 1280 ble .cont17 1281 fmovd DC1,%f62 1282 1283 sub counter,1,counter 1284 st counter,[%fp+tmp_counter] 1285 1286 stx %l0,[%fp+tmp_px] 1287 1288 stx %i2,[%fp+tmp_py] 1289 ba .cont17 1290 mov 1,counter 1291 1292 .align 16 1293 .update18: 1294 cmp counter,2 1295 ble .cont18 1296 ld [TBL+TBL_SHIFT+44],%f2 1297 1298 sub counter,2,counter 1299 st counter,[%fp+tmp_counter] 1300 1301 stx %i1,[%fp+tmp_px] 1302 1303 stx %i2,[%fp+tmp_py] 1304 ba .cont18 1305 mov 2,counter 1306 1307 .align 16 1308 .update19: 1309 cmp counter,2 1310 ble .cont19 1311 ld [TBL+TBL_SHIFT+44],%f4 1312 1313 sub counter,2,counter 1314 st counter,[%fp+tmp_counter] 1315 1316 stx %i1,[%fp+tmp_px] 1317 1318 stx %i2,[%fp+tmp_py] 1319 ba .cont19 1320 mov 2,counter 1321 1322 .align 16 1323 .update20: 1324 cmp %o1,0 1325 bne .cont20 1326 1327 cmp counter,2 1328 ble .cont20 1329 ld [TBL+TBL_SHIFT+44],%f2 1330 1331 sub counter,2,counter 1332 st counter,[%fp+tmp_counter] 1333 1334 stx %i1,[%fp+tmp_px] 1335 1336 stx %i2,[%fp+tmp_py] 1337 ba .cont20 1338 mov 2,counter 1339 1340 .align 16 1341 .update21: 1342 cmp counter,3 1343 ble .cont21 1344 ld [TBL+TBL_SHIFT+44],%f2 1345 1346 sub counter,3,counter 1347 st counter,[%fp+tmp_counter] 1348 1349 stx %g5,[%fp+tmp_px] 1350 1351 stx %o3,[%fp+tmp_py] 1352 ba .cont21 1353 mov 3,counter 1354 1355 .align 16 1356 .update22: 1357 cmp counter,3 1358 ble .cont22 1359 ld [TBL+TBL_SHIFT+44],%f4 1360 1361 sub counter,3,counter 1362 st counter,[%fp+tmp_counter] 1363 1364 stx %g5,[%fp+tmp_px] 1365 1366 stx %o3,[%fp+tmp_py] 1367 ba .cont22 1368 mov 3,counter 1369 1370 .align 16 1371 .update23: 1372 cmp %i5,0 1373 bne .cont23 1374 1375 cmp counter,3 1376 ble .cont23 1377 ld [TBL+TBL_SHIFT+44],%f2 1378 1379 sub counter,3,counter 1380 st counter,[%fp+tmp_counter] 1381 1382 stx %g5,[%fp+tmp_px] 1383 1384 stx %o3,[%fp+tmp_py] 1385 ba .cont23 1386 mov 3,counter 1387 1388 .align 16 1389 .update24: 1390 cmp counter,4 1391 ble .cont24 1392 ld [TBL+TBL_SHIFT+44],%f2 1393 1394 sub counter,4,counter 1395 st counter,[%fp+tmp_counter] 1396 1397 stx %i4,[%fp+tmp_px] 1398 1399 stx %i2,[%fp+tmp_py] 1400 ba .cont24 1401 mov 4,counter 1402 1403 .align 16 1404 .update25: 1405 cmp counter,4 1406 ble .cont25 1407 ld [TBL+TBL_SHIFT+44],%f4 1408 1409 sub counter,4,counter 1410 st counter,[%fp+tmp_counter] 1411 1412 stx %i4,[%fp+tmp_px] 1413 1414 stx %i2,[%fp+tmp_py] 1415 ba .cont25 1416 mov 4,counter 1417 1418 .align 16 1419 .update26: 1420 cmp %i5,0 1421 bne .cont26 1422 1423 cmp counter,4 1424 ble .cont26 1425 ld [TBL+TBL_SHIFT+44],%f2 1426 1427 sub counter,4,counter 1428 st counter,[%fp+tmp_counter] 1429 1430 stx %i4,[%fp+tmp_px] 1431 1432 stx %i2,[%fp+tmp_py] 1433 ba .cont26 1434 mov 4,counter 1435 1436 .align 16 1437 .update27: 1438 cmp counter,5 1439 ble .cont27 1440 ld [TBL+TBL_SHIFT+44],%f2 1441 1442 sub counter,5,counter 1443 st counter,[%fp+tmp_counter] 1444 1445 stx %o4,[%fp+tmp_px] 1446 1447 stx %i2,[%fp+tmp_py] 1448 ba .cont27 1449 mov 5,counter 1450 1451 .align 16 1452 .update28: 1453 cmp counter,5 1454 ble .cont28 1455 ld [TBL+TBL_SHIFT+44],%f4 1456 1457 sub counter,5,counter 1458 st counter,[%fp+tmp_counter] 1459 1460 stx %o4,[%fp+tmp_px] 1461 1462 stx %i2,[%fp+tmp_py] 1463 ba .cont28 1464 mov 5,counter 1465 1466 .align 16 1467 .update29: 1468 cmp %i5,0 1469 bne .cont29 1470 1471 cmp counter,5 1472 ble .cont29 1473 ld [TBL+TBL_SHIFT+44],%f2 1474 1475 sub counter,5,counter 1476 st counter,[%fp+tmp_counter] 1477 1478 stx %o4,[%fp+tmp_px] 1479 1480 stx %i2,[%fp+tmp_py] 1481 ba .cont29 1482 mov 5,counter 1483 1484 .align 16 1485 .update30: 1486 cmp counter,6 1487 ble .cont30 1488 ld [TBL+TBL_SHIFT+44],%f2 1489 1490 sub counter,6,counter 1491 st counter,[%fp+tmp_counter] 1492 1493 stx %l0,[%fp+tmp_px] 1494 1495 stx %i2,[%fp+tmp_py] 1496 ba .cont30 1497 mov 6,counter 1498 1499 .align 16 1500 .update31: 1501 cmp counter,6 1502 ble .cont31 1503 ld [TBL+TBL_SHIFT+44],%f4 1504 1505 sub counter,6,counter 1506 st counter,[%fp+tmp_counter] 1507 1508 stx %l0,[%fp+tmp_px] 1509 1510 stx %i2,[%fp+tmp_py] 1511 ba .cont31 1512 mov 6,counter 1513 1514 .align 16 1515 .exit: 1516 ret 1517 restore 1518 SET_SIZE(__vrhypotf) 1519