1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24 /* 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "__vexpf.S" 30 31 #include "libm.h" 32 33 RO_DATA 34 .align 64 35 !! 2^(i/256) - ((i & 0xf0) << 44), i = [0, 255] 36 .CONST_TBL: 37 .word 0x3ff00000, 0x00000000, 0x3ff00b1a, 0xfa5abcbf 38 .word 0x3ff0163d, 0xa9fb3335, 0x3ff02168, 0x143b0281 39 .word 0x3ff02c9a, 0x3e778061, 0x3ff037d4, 0x2e11bbcc 40 .word 0x3ff04315, 0xe86e7f85, 0x3ff04e5f, 0x72f654b1 41 .word 0x3ff059b0, 0xd3158574, 0x3ff0650a, 0x0e3c1f89 42 .word 0x3ff0706b, 0x29ddf6de, 0x3ff07bd4, 0x2b72a836 43 .word 0x3ff08745, 0x18759bc8, 0x3ff092bd, 0xf66607e0 44 .word 0x3ff09e3e, 0xcac6f383, 0x3ff0a9c7, 0x9b1f3919 45 .word 0x3fefb558, 0x6cf9890f, 0x3fefc0f1, 0x45e46c85 46 .word 0x3fefcc92, 0x2b7247f7, 0x3fefd83b, 0x23395dec 47 .word 0x3fefe3ec, 0x32d3d1a2, 0x3fefefa5, 0x5fdfa9c5 48 .word 0x3feffb66, 0xaffed31b, 0x3ff00730, 0x28d7233e 49 .word 0x3ff01301, 0xd0125b51, 0x3ff01edb, 0xab5e2ab6 50 .word 0x3ff02abd, 0xc06c31cc, 0x3ff036a8, 0x14f204ab 51 .word 0x3ff0429a, 0xaea92de0, 0x3ff04e95, 0x934f312e 52 .word 0x3ff05a98, 0xc8a58e51, 0x3ff066a4, 0x5471c3c2 53 .word 0x3fef72b8, 0x3c7d517b, 0x3fef7ed4, 0x8695bbc0 54 .word 0x3fef8af9, 0x388c8dea, 0x3fef9726, 0x58375d2f 55 .word 0x3fefa35b, 0xeb6fcb75, 0x3fefaf99, 0xf8138a1c 56 .word 0x3fefbbe0, 0x84045cd4, 0x3fefc82f, 0x95281c6b 57 .word 0x3fefd487, 0x3168b9aa, 0x3fefe0e7, 0x5eb44027 58 .word 0x3fefed50, 0x22fcd91d, 0x3feff9c1, 0x8438ce4d 59 .word 0x3ff0063b, 0x88628cd6, 0x3ff012be, 0x3578a819 60 .word 0x3ff01f49, 0x917ddc96, 0x3ff02bdd, 0xa27912d1 61 .word 0x3fef387a, 0x6e756238, 0x3fef451f, 0xfb82140a 62 .word 0x3fef51ce, 0x4fb2a63f, 0x3fef5e85, 0x711ece75 63 .word 0x3fef6b45, 0x65e27cdd, 0x3fef780e, 0x341ddf29 64 .word 0x3fef84df, 0xe1f56381, 0x3fef91ba, 0x7591bb70 65 .word 0x3fef9e9d, 0xf51fdee1, 0x3fefab8a, 0x66d10f13 66 .word 0x3fefb87f, 0xd0dad990, 0x3fefc57e, 0x39771b2f 67 .word 0x3fefd285, 0xa6e4030b, 0x3fefdf96, 0x1f641589 68 .word 0x3fefecaf, 0xa93e2f56, 0x3feff9d2, 0x4abd886b 69 .word 0x3fef06fe, 0x0a31b715, 0x3fef1432, 0xedeeb2fd 70 .word 0x3fef2170, 0xfc4cd831, 0x3fef2eb8, 0x3ba8ea32 71 .word 0x3fef3c08, 0xb26416ff, 0x3fef4962, 0x66e3fa2d 72 .word 0x3fef56c5, 0x5f929ff1, 0x3fef6431, 0xa2de883b 73 .word 0x3fef71a7, 0x373aa9cb, 0x3fef7f26, 0x231e754a 74 .word 0x3fef8cae, 0x6d05d866, 0x3fef9a40, 0x1b7140ef 75 .word 0x3fefa7db, 0x34e59ff7, 0x3fefb57f, 0xbfec6cf4 76 .word 0x3fefc32d, 0xc313a8e5, 0x3fefd0e5, 0x44ede173 77 .word 0x3feedea6, 0x4c123422, 0x3feeec70, 0xdf1c5175 78 .word 0x3feefa45, 0x04ac801c, 0x3fef0822, 0xc367a024 79 .word 0x3fef160a, 0x21f72e2a, 0x3fef23fb, 0x2709468a 80 .word 0x3fef31f5, 0xd950a897, 0x3fef3ffa, 0x3f84b9d4 81 .word 0x3fef4e08, 0x6061892d, 0x3fef5c20, 0x42a7d232 82 .word 0x3fef6a41, 0xed1d0057, 0x3fef786d, 0x668b3237 83 .word 0x3fef86a2, 0xb5c13cd0, 0x3fef94e1, 0xe192aed2 84 .word 0x3fefa32a, 0xf0d7d3de, 0x3fefb17d, 0xea6db7d7 85 .word 0x3feebfda, 0xd5362a27, 0x3feece41, 0xb817c114 86 .word 0x3feedcb2, 0x99fddd0d, 0x3feeeb2d, 0x81d8abff 87 .word 0x3feef9b2, 0x769d2ca7, 0x3fef0841, 0x7f4531ee 88 .word 0x3fef16da, 0xa2cf6642, 0x3fef257d, 0xe83f4eef 89 .word 0x3fef342b, 0x569d4f82, 0x3fef42e2, 0xf4f6ad27 90 .word 0x3fef51a4, 0xca5d920f, 0x3fef6070, 0xdde910d2 91 .word 0x3fef6f47, 0x36b527da, 0x3fef7e27, 0xdbe2c4cf 92 .word 0x3fef8d12, 0xd497c7fd, 0x3fef9c08, 0x27ff07cc 93 .word 0x3feeab07, 0xdd485429, 0x3feeba11, 0xfba87a03 94 .word 0x3feec926, 0x8a5946b7, 0x3feed845, 0x90998b93 95 .word 0x3feee76f, 0x15ad2148, 0x3feef6a3, 0x20dceb71 96 .word 0x3fef05e1, 0xb976dc09, 0x3fef152a, 0xe6cdf6f4 97 .word 0x3fef247e, 0xb03a5585, 0x3fef33dd, 0x1d1929fd 98 .word 0x3fef4346, 0x34ccc320, 0x3fef52b9, 0xfebc8fb7 99 .word 0x3fef6238, 0x82552225, 0x3fef71c1, 0xc70833f6 100 .word 0x3fef8155, 0xd44ca973, 0x3fef90f4, 0xb19e9538 101 .word 0x3feea09e, 0x667f3bcd, 0x3feeb052, 0xfa75173e 102 .word 0x3feec012, 0x750bdabf, 0x3feecfdc, 0xddd47645 103 .word 0x3feedfb2, 0x3c651a2f, 0x3feeef92, 0x98593ae5 104 .word 0x3feeff7d, 0xf9519484, 0x3fef0f74, 0x66f42e87 105 .word 0x3fef1f75, 0xe8ec5f74, 0x3fef2f82, 0x86ead08a 106 .word 0x3fef3f9a, 0x48a58174, 0x3fef4fbd, 0x35d7cbfd 107 .word 0x3fef5feb, 0x564267c9, 0x3fef7024, 0xb1ab6e09 108 .word 0x3fef8069, 0x4fde5d3f, 0x3fef90b9, 0x38ac1cf6 109 .word 0x3feea114, 0x73eb0187, 0x3feeb17b, 0x0976cfdb 110 .word 0x3feec1ed, 0x0130c132, 0x3feed26a, 0x62ff86f0 111 .word 0x3feee2f3, 0x36cf4e62, 0x3feef387, 0x8491c491 112 .word 0x3fef0427, 0x543e1a12, 0x3fef14d2, 0xadd106d9 113 .word 0x3fef2589, 0x994cce13, 0x3fef364c, 0x1eb941f7 114 .word 0x3fef471a, 0x4623c7ad, 0x3fef57f4, 0x179f5b21 115 .word 0x3fef68d9, 0x9b4492ed, 0x3fef79ca, 0xd931a436 116 .word 0x3fef8ac7, 0xd98a6699, 0x3fef9bd0, 0xa478580f 117 .word 0x3feeace5, 0x422aa0db, 0x3feebe05, 0xbad61778 118 .word 0x3feecf32, 0x16b5448c, 0x3feee06a, 0x5e0866d9 119 .word 0x3feef1ae, 0x99157736, 0x3fef02fe, 0xd0282c8a 120 .word 0x3fef145b, 0x0b91ffc6, 0x3fef25c3, 0x53aa2fe2 121 .word 0x3fef3737, 0xb0cdc5e5, 0x3fef48b8, 0x2b5f98e5 122 .word 0x3fef5a44, 0xcbc8520f, 0x3fef6bdd, 0x9a7670b3 123 .word 0x3fef7d82, 0x9fde4e50, 0x3fef8f33, 0xe47a22a2 124 .word 0x3fefa0f1, 0x70ca07ba, 0x3fefb2bb, 0x4d53fe0d 125 .word 0x3feec491, 0x82a3f090, 0x3feed674, 0x194bb8d5 126 .word 0x3feee863, 0x19e32323, 0x3feefa5e, 0x8d07f29e 127 .word 0x3fef0c66, 0x7b5de565, 0x3fef1e7a, 0xed8eb8bb 128 .word 0x3fef309b, 0xec4a2d33, 0x3fef42c9, 0x80460ad8 129 .word 0x3fef5503, 0xb23e255d, 0x3fef674a, 0x8af46052 130 .word 0x3fef799e, 0x1330b358, 0x3fef8bfe, 0x53c12e59 131 .word 0x3fef9e6b, 0x5579fdbf, 0x3fefb0e5, 0x21356eba 132 .word 0x3fefc36b, 0xbfd3f37a, 0x3fefd5ff, 0x3a3c2774 133 .word 0x3feee89f, 0x995ad3ad, 0x3feefb4c, 0xe622f2ff 134 .word 0x3fef0e07, 0x298db666, 0x3fef20ce, 0x6c9a8952 135 .word 0x3fef33a2, 0xb84f15fb, 0x3fef4684, 0x15b749b1 136 .word 0x3fef5972, 0x8de5593a, 0x3fef6c6e, 0x29f1c52a 137 .word 0x3fef7f76, 0xf2fb5e47, 0x3fef928c, 0xf22749e4 138 .word 0x3fefa5b0, 0x30a1064a, 0x3fefb8e0, 0xb79a6f1f 139 .word 0x3fefcc1e, 0x904bc1d2, 0x3fefdf69, 0xc3f3a207 140 .word 0x3feff2c2, 0x5bd71e09, 0x3ff00628, 0x6141b33d 141 .word 0x3fef199b, 0xdd85529c, 0x3fef2d1c, 0xd9fa652c 142 .word 0x3fef40ab, 0x5fffd07a, 0x3fef5447, 0x78fafb22 143 .word 0x3fef67f1, 0x2e57d14b, 0x3fef7ba8, 0x8988c933 144 .word 0x3fef8f6d, 0x9406e7b5, 0x3fefa340, 0x5751c4db 145 .word 0x3fefb720, 0xdcef9069, 0x3fefcb0f, 0x2e6d1675 146 .word 0x3fefdf0b, 0x555dc3fa, 0x3feff315, 0x5b5bab74 147 .word 0x3ff0072d, 0x4a07897c, 0x3ff01b53, 0x2b08c968 148 .word 0x3ff02f87, 0x080d89f2, 0x3ff043c8, 0xeacaa1d6 149 .word 0x3fef5818, 0xdcfba487, 0x3fef6c76, 0xe862e6d3 150 .word 0x3fef80e3, 0x16c98398, 0x3fef955d, 0x71ff6075 151 .word 0x3fefa9e6, 0x03db3285, 0x3fefbe7c, 0xd63a8315 152 .word 0x3fefd321, 0xf301b460, 0x3fefe7d5, 0x641c0658 153 .word 0x3feffc97, 0x337b9b5f, 0x3ff01167, 0x6b197d17 154 .word 0x3ff02646, 0x14f5a129, 0x3ff03b33, 0x3b16ee12 155 .word 0x3ff0502e, 0xe78b3ff6, 0x3ff06539, 0x24676d76 156 .word 0x3ff07a51, 0xfbc74c83, 0x3ff08f79, 0x77cdb740 157 .word 0x3fefa4af, 0xa2a490da, 0x3fefb9f4, 0x867cca6e 158 .word 0x3fefcf48, 0x2d8e67f1, 0x3fefe4aa, 0xa2188510 159 .word 0x3feffa1b, 0xee615a27, 0x3ff00f9c, 0x1cb6412a 160 .word 0x3ff0252b, 0x376bba97, 0x3ff03ac9, 0x48dd7274 161 .word 0x3ff05076, 0x5b6e4540, 0x3ff06632, 0x798844f8 162 .word 0x3ff07bfd, 0xad9cbe14, 0x3ff091d8, 0x02243c89 163 .word 0x3ff0a7c1, 0x819e90d8, 0x3ff0bdba, 0x3692d514 164 .word 0x3ff0d3c2, 0x2b8f71f1, 0x3ff0e9d9, 0x6b2a23d9 165 166 .word 0x7149f2ca, 0x0da24260 ! 1.0e30f, 1.0e-30f 167 .word 0x3ecebfbe, 0x9d182250 ! KA2 = 3.66556671660783833261e-06 168 .word 0x3f662e43, 0xe2528362 ! KA1 = 2.70760782821392980564e-03 169 .word 0x40771547, 0x652b82fe ! K256ONLN2 = 369.3299304675746271 170 .word 0x42aeac4f, 0x42b17218 ! THRESHOLD = 87.3365402f 171 ! THRESHOLDL = 88.7228394f 172 ! local storage indices 173 174 #define tmp0 STACK_BIAS-32 175 #define tmp1 STACK_BIAS-28 176 #define tmp2 STACK_BIAS-24 177 #define tmp3 STACK_BIAS-20 178 #define tmp4 STACK_BIAS-16 179 #define tmp5 STACK_BIAS-12 180 #define tmp6 STACK_BIAS-8 181 #define tmp7 STACK_BIAS-4 182 183 ! sizeof temp storage - must be a multiple of 16 for V9 184 #define tmps 0x20 185 186 #define I5_THRESHOLD %i5 187 #define G1_CONST_TBL %g5 188 #define G5_CONST %g1 189 190 #define F62_K256ONLN2 %f62 191 #define F60_KA2 %f60 192 #define F58_KA1 %f58 193 194 #define THRESHOLDL %f0 195 196 ! register use 197 ! i0 n 198 ! i1 x 199 ! i2 stridex 200 ! i3 y 201 ! i4 stridey 202 203 ! i5 0x42aeac4f (87.3365402f) 204 205 ! g1 CONST_TBL 206 ! g5 0x7fffffff 207 208 ! f62 K256ONLN2 = 369.3299304675746271 209 ! f60 KA2 = 3.66556671660783833261e-06 210 ! f58 KA1 = 2.70760782821392980564e-03 211 212 213 ! !!!!! Algorithm !!!!! 214 ! 215 ! double y, dtmp, drez; 216 ! int k, sign, Xi; 217 ! float X, Y; 218 ! int THRESHOLD = 0x42aeac4f; /* 87.3365402f */ 219 ! float THRESHOLDL = 88.7228394f; 220 ! double KA2 = 3.66556671660783833261e-06; 221 ! double KA1 = 2.70760782821392980564e-03; 222 ! double K256ONLN2 = 369.3299304675746271; 223 ! char *CONST_TBL; 224 ! 225 ! X = px[0]; 226 ! Xi = ((int*)px)[0]; 227 ! ax = Xi & 0x7fffffff; 228 ! 229 ! if (ax > THRESHOLD) { 230 ! sign = ((unsigned)Xi >> 29) & 4; 231 ! if (ax >= 0x7f800000) { /* Inf or NaN */ 232 ! if (ax > 0x7f800000) { /* NaN */ 233 ! Y = X * X; /* NaN -> NaN */ 234 ! return Y; 235 ! } 236 ! Y = (sign) ? zero : X; /* +Inf -> +Inf , -Inf -> zero */ 237 ! return Y; 238 ! } 239 ! 240 ! if ( X < 0.0f || X >= THRESHOLDL ) { 241 ! Y = ((float*)(CONST_TBL + 2048 + sign))[0]; 242 ! /* Xi >= THRESHOLDL : Y = 1.0e+30f */ 243 ! /* Xi < -THRESHOLD : Y = 1.0e-30f */ 244 ! Y = Y * Y; 245 ! /* Xi >= THRESHOLDL : +Inf + overflow */ 246 ! /* Xi < -THRESHOLD : +0 + underflow */ 247 ! return Y; 248 ! } 249 ! } 250 ! vis_write_gsr(12 << 3); 251 ! y = (double) X; 252 ! y = K256ONLN2 * y; 253 ! k = (int) y; 254 ! dtmp = (double) k; 255 ! y -= dtmp; 256 ! dtmp = y * KA2; 257 ! dtmp += KA1; 258 ! y *= dtmp; 259 ! y = (y * KA2 + KA1) * y; 260 ! ((int*)&drez)[0] = k; 261 ! ((int*)&drez)[1] = 0; 262 ! ((float*)&drez)[0] = vis_fpackfix(drez); 263 ! k &= 255; 264 ! k <<= 3; 265 ! dtmp = ((double*)(CONST_TBL + k))[0]; 266 ! drez = vis_fpadd32(drez,dtmp); 267 ! y *= drez; 268 ! y += drez; 269 ! Y = (float) y; 270 ! 271 ! 272 ! fstod %f16,%f40 ! y = (double) X 273 ! fmuld F62_K256ONLN2,%f40,%f40 ! y *= K256ONLN2 274 ! fdtoi %f40,%f16 ! k = (int) y 275 ! st %f16,[%fp+tmp0] ! store k 276 ! fitod %f16,%f34 ! dtmp = (double) k 277 ! fpackfix %f16,%f16 ! ((float*)&drez)[0] = vis_fpackfix(drez) 278 ! fsubd %f40,%f34,%f40 ! y -= dtmp 279 ! fmuld F60_KA2,%f40,%f34 ! dtmp = y * KA2 280 ! faddd F58_KA1,%f34,%f34 ! dtmp += KA1 281 ! ld [%fp+tmp0],%o0 ! load k 282 ! fmuld %f34,%f40,%f40 ! y *= dtmp 283 ! and %o0,255,%o0 ! k &= 255 284 ! sll %o0,3,%o0 ! k <<= 3 285 ! ldd [G1_CONST_TBL+%o0],%f34 ! dtmp = ((double*)(CONST_TBL + k))[0] 286 ! fpadd32 %f16,%f34,%f34 ! drez = vis_fpadd32(drez,dtmp) 287 ! fmuld %f34,%f40,%f40 ! y *= drez 288 ! faddd %f34,%f40,%f40 ! y += drez 289 ! fdtos %f40,%f26 ! (float) y 290 !-------------------------------------------------------------------- 291 292 ENTRY(__vexpf) 293 save %sp,-SA(MINFRAME)-tmps,%sp 294 PIC_SETUP(l7) 295 PIC_SET(l7,.CONST_TBL,g5) 296 297 wr %g0,0x82,%asi ! set %asi for non-faulting loads 298 wr %g0,0x60,%gsr 299 300 sll %i2,2,%i2 301 sll %i4,2,%i4 302 303 ldd [G1_CONST_TBL+2056],F60_KA2 304 sethi %hi(0x7ffffc00),G5_CONST 305 ldd [G1_CONST_TBL+2064],F58_KA1 306 add G5_CONST,1023,G5_CONST 307 ldd [G1_CONST_TBL+2072],F62_K256ONLN2 308 ld [G1_CONST_TBL+2080],I5_THRESHOLD 309 ld [G1_CONST_TBL+2084],THRESHOLDL 310 311 subcc %i0,8,%i0 312 bneg,pn %icc,.tail 313 fzeros %f3 314 315 .main_loop_preload: 316 317 ! preload 8 elements and get absolute values 318 ld [%i1],%l0 ! (0) Xi = ((int*)px)[0] 319 fzeros %f5 320 ld [%i1],%f16 ! (0) X = px[0] 321 fzeros %f7 322 add %i1,%i2,%o5 ! px += stridex 323 ld [%o5],%l1 ! (1) Xi = ((int*)px)[0] 324 and %l0,G5_CONST,%l0 ! (0) ax = Xi & 0x7fffffff 325 fzeros %f9 326 ld [%o5],%f2 ! (1) X = px[0] 327 fzeros %f11 328 add %o5,%i2,%i1 ! px += stridex 329 ld [%i1],%l2 ! (2) Xi = ((int*)px)[0] 330 and %l1,G5_CONST,%l1 ! (1) ax = Xi & 0x7fffffff 331 fzeros %f13 332 ld [%i1],%f4 ! (2) X = px[0] 333 fzeros %f15 334 add %i1,%i2,%o5 ! px += stridex 335 ld [%o5],%l3 ! (3) Xi = ((int*)px)[0] 336 and %l2,G5_CONST,%l2 ! (2) ax = Xi & 0x7fffffff 337 fzeros %f17 338 ld [%o5],%f6 ! (3) X = px[0] 339 add %o5,%i2,%o0 ! px += stridex 340 ld [%o0],%l4 ! (4) Xi = ((int*)px)[0] 341 and %l3,G5_CONST,%l3 ! (3) ax = Xi & 0x7fffffff 342 add %o0,%i2,%o1 ! px += stridex 343 ld [%o1],%l5 ! (5) Xi = ((int*)px)[0] 344 add %o1,%i2,%o2 ! px += stridex 345 ld [%o2],%l6 ! (6) Xi = ((int*)px)[0] 346 and %l4,G5_CONST,%l4 ! (4) ax = Xi & 0x7fffffff 347 add %o2,%i2,%o3 ! px += stridex 348 ld [%o3],%l7 ! (7) Xi = ((int*)px)[0] 349 add %o3,%i2,%i1 ! px += stridex 350 and %l5,G5_CONST,%l5 ! (5) ax = Xi & 0x7fffffff 351 and %l6,G5_CONST,%l6 ! (6) ax = Xi & 0x7fffffff 352 ba .main_loop 353 and %l7,G5_CONST,%l7 ! (7) ax = Xi & 0x7fffffff 354 355 .align 16 356 .main_loop: 357 cmp %l0,I5_THRESHOLD 358 bg,pn %icc,.spec0 ! (0) if (ax > THRESHOLD) 359 lda [%o0]%asi,%f8 ! (4) X = px[0] 360 fstod %f16,%f40 ! (0) y = (double) X 361 .spec0_cont: 362 cmp %l1,I5_THRESHOLD 363 bg,pn %icc,.spec1 ! (1) if (ax > THRESHOLD) 364 lda [%o1]%asi,%f10 ! (5) X = px[0] 365 fstod %f2,%f42 ! (1) y = (double) X 366 .spec1_cont: 367 cmp %l2,I5_THRESHOLD 368 bg,pn %icc,.spec2 ! (2) if (ax > THRESHOLD) 369 lda [%o2]%asi,%f12 ! (6) X = px[0] 370 fstod %f4,%f44 ! (2) y = (double) X 371 .spec2_cont: 372 cmp %l3,I5_THRESHOLD 373 bg,pn %icc,.spec3 ! (3) if (ax > THRESHOLD) 374 lda [%o3]%asi,%f14 ! (7) X = px[0] 375 fstod %f6,%f46 ! (3) y = (double) X 376 .spec3_cont: 377 cmp %l4,I5_THRESHOLD 378 bg,pn %icc,.spec4 ! (4) if (ax > THRESHOLD) 379 fmuld F62_K256ONLN2,%f40,%f40 ! (0) y *= K256ONLN2 380 fstod %f8,%f48 ! (4) y = (double) X 381 .spec4_cont: 382 cmp %l5,I5_THRESHOLD 383 bg,pn %icc,.spec5 ! (5) if (ax > THRESHOLD) 384 fmuld F62_K256ONLN2,%f42,%f42 ! (1) y *= K256ONLN2 385 fstod %f10,%f50 ! (5) y = (double) X 386 .spec5_cont: 387 cmp %l6,I5_THRESHOLD 388 bg,pn %icc,.spec6 ! (6) if (ax > THRESHOLD) 389 fmuld F62_K256ONLN2,%f44,%f44 ! (2) y *= K256ONLN2 390 fstod %f12,%f52 ! (6) y = (double) X 391 .spec6_cont: 392 cmp %l7,I5_THRESHOLD 393 bg,pn %icc,.spec7 ! (7) if (ax > THRESHOLD) 394 fmuld F62_K256ONLN2,%f46,%f46 ! (3) y *= K256ONLN2 395 fstod %f14,%f54 ! (7) y = (double) X 396 .spec7_cont: 397 fdtoi %f40,%f16 ! (0) k = (int) y 398 st %f16,[%fp+tmp0] 399 fmuld F62_K256ONLN2,%f48,%f48 ! (4) y *= K256ONLN2 400 401 fdtoi %f42,%f2 ! (1) k = (int) y 402 st %f2,[%fp+tmp1] 403 fmuld F62_K256ONLN2,%f50,%f50 ! (5) y *= K256ONLN2 404 405 fdtoi %f44,%f4 ! (2) k = (int) y 406 st %f4,[%fp+tmp2] 407 fmuld F62_K256ONLN2,%f52,%f52 ! (6) y *= K256ONLN2 408 409 fdtoi %f46,%f6 ! (3) k = (int) y 410 st %f6,[%fp+tmp3] 411 fmuld F62_K256ONLN2,%f54,%f54 ! (7) y *= K256ONLN2 412 413 fdtoi %f48,%f8 ! (4) k = (int) y 414 st %f8,[%fp+tmp4] 415 416 fdtoi %f50,%f10 ! (5) k = (int) y 417 st %f10,[%fp+tmp5] 418 419 fitod %f16,%f34 ! (0) dtmp = (double) k 420 fpackfix %f16,%f16 ! (0) ((float*)&drez)[0] = vis_fpackfix(drez) 421 nop 422 nop 423 424 fdtoi %f52,%f12 ! (6) k = (int) y 425 st %f12,[%fp+tmp6] 426 427 fdtoi %f54,%f14 ! (7) k = (int) y 428 st %f14,[%fp+tmp7] 429 430 lda [%i1]%asi,%l0 ! (8) Xi = ((int*)px)[0] 431 add %i1,%i2,%o5 ! px += stridex 432 fitod %f2,%f18 ! (1) dtmp = (double) k 433 fpackfix %f2,%f2 ! (1) ((float*)&drez)[0] = vis_fpackfix(drez) 434 435 lda [%o5]%asi,%l1 ! (9) Xi = ((int*)px)[0] 436 add %o5,%i2,%i1 ! px += stridex 437 fitod %f4,%f20 ! (2) dtmp = (double) k 438 fpackfix %f4,%f4 ! (2) ((float*)&drez)[0] = vis_fpackfix(drez) 439 440 lda [%i1]%asi,%l2 ! (10) Xi = ((int*)px)[0] 441 add %i1,%i2,%o5 ! px += stridex 442 fitod %f6,%f22 ! (3) dtmp = (double) k 443 fpackfix %f6,%f6 ! (3) ((float*)&drez)[0] = vis_fpackfix(drez) 444 445 lda [%o5]%asi,%l3 ! (11) Xi = ((int*)px)[0] 446 add %o5,%i2,%i1 ! px += stridex 447 fitod %f8,%f24 ! (4) dtmp = (double) k 448 fpackfix %f8,%f8 ! (4) ((float*)&drez)[0] = vis_fpackfix(drez) 449 450 fitod %f10,%f26 ! (5) dtmp = (double) k 451 fpackfix %f10,%f10 ! (5) ((float*)&drez)[0] = vis_fpackfix(drez) 452 453 fitod %f12,%f28 ! (6) dtmp = (double) k 454 fpackfix %f12,%f12 ! (6) ((float*)&drez)[0] = vis_fpackfix(drez) 455 456 fitod %f14,%f30 ! (7) dtmp = (double) k 457 fpackfix %f14,%f14 ! (7) ((float*)&drez)[0] = vis_fpackfix(drez) 458 459 ld [%fp+tmp0],%o0 ! (0) load k 460 and %l0,G5_CONST,%l0 ! (8) ax = Xi & 0x7fffffff 461 fsubd %f40,%f34,%f40 ! (0) y -= dtmp 462 463 ld [%fp+tmp1],%o1 ! (1) load k 464 and %l1,G5_CONST,%l1 ! (9) ax = Xi & 0x7fffffff 465 fsubd %f42,%f18,%f42 ! (1) y -= dtmp 466 467 ld [%fp+tmp2],%o2 ! (2) load k 468 and %l2,G5_CONST,%l2 ! (10) ax = Xi & 0x7fffffff 469 and %o0,255,%o0 ! (0) k &= 255 470 fsubd %f44,%f20,%f44 ! (2) y -= dtmp 471 472 ld [%fp+tmp3],%o3 ! (3) load k 473 and %o1,255,%o1 ! (1) k &= 255 474 fsubd %f46,%f22,%f46 ! (3) y -= dtmp 475 476 sll %o0,3,%o0 ! (0) k <<= 3 477 sll %o1,3,%o1 ! (1) k <<= 3 478 fmuld F60_KA2,%f40,%f34 ! (0) dtmp = y * KA2 479 fsubd %f48,%f24,%f48 ! (4) y -= dtmp 480 481 and %l3,G5_CONST,%l3 ! (11) ax = Xi & 0x7fffffff 482 and %o2,255,%o2 ! (2) k &= 255 483 fmuld F60_KA2,%f42,%f18 ! (1) dtmp = y * KA2 484 fsubd %f50,%f26,%f50 ! (5) y -= dtmp 485 486 sll %o2,3,%o2 ! (2) k <<= 3 487 fmuld F60_KA2,%f44,%f20 ! (2) dtmp = y * KA2 488 fsubd %f52,%f28,%f52 ! (6) y -= dtmp 489 490 ld [%fp+tmp4],%o4 ! (4) load k 491 and %o3,255,%o3 ! (3) k &= 255 492 fmuld F60_KA2,%f46,%f22 ! (3) dtmp = y * KA2 493 fsubd %f54,%f30,%f54 ! (7) y -= dtmp 494 495 ld [%fp+tmp5],%o5 ! (5) load k 496 sll %o3,3,%o3 ! (3) k <<= 3 497 fmuld F60_KA2,%f48,%f24 ! (4) dtmp = y * KA2 498 faddd F58_KA1,%f34,%f34 ! (0) dtmp += KA1 499 500 ld [%fp+tmp6],%o7 ! (6) load k 501 and %o4,255,%o4 ! (4) k &= 255 502 fmuld F60_KA2,%f50,%f26 ! (5) dtmp = y * KA2 503 faddd F58_KA1,%f18,%f18 ! (1) dtmp += KA1 504 505 ld [%fp+tmp7],%l4 ! (7) load k 506 and %o5,255,%o5 ! (5) k &= 255 507 fmuld F60_KA2,%f52,%f28 ! (6) dtmp = y * KA2 508 faddd F58_KA1,%f20,%f20 ! (2) dtmp += KA1 509 510 sll %o5,3,%o5 ! (5) k <<= 3 511 fmuld F60_KA2,%f54,%f30 ! (7) dtmp = y * KA2 512 faddd F58_KA1,%f22,%f22 ! (3) dtmp += KA1 513 514 fmuld %f34,%f40,%f40 ! (0) y *= dtmp 515 ldd [G1_CONST_TBL+%o0],%f34 ! (0) dtmp = ((double*)(CONST_TBL + k))[0] 516 and %l4,255,%l4 ! (7) k &= 255 517 faddd F58_KA1,%f24,%f24 ! (4) dtmp += KA1 518 519 fmuld %f18,%f42,%f42 ! (1) y *= dtmp 520 ldd [G1_CONST_TBL+%o1],%f18 ! (1) dtmp = ((double*)(CONST_TBL + k))[0] 521 sll %l4,3,%l4 ! (7) k <<= 3 522 faddd F58_KA1,%f26,%f26 ! (5) dtmp += KA1 523 524 fmuld %f20,%f44,%f44 ! (2) y *= dtmp 525 ldd [G1_CONST_TBL+%o2],%f20 ! (2) dtmp = ((double*)(CONST_TBL + k))[0] 526 faddd F58_KA1,%f28,%f28 ! (6) dtmp += KA1 527 528 fmuld %f22,%f46,%f46 ! (3) y *= dtmp 529 ldd [G1_CONST_TBL+%o3],%f22 ! (3) dtmp = ((double*)(CONST_TBL + k))[0] 530 sll %o4,3,%o4 ! (4) k <<= 3 531 faddd F58_KA1,%f30,%f30 ! (7) dtmp += KA1 532 533 fmuld %f24,%f48,%f48 ! (4) y *= dtmp 534 ldd [G1_CONST_TBL+%o4],%f24 ! (4) dtmp = ((double*)(CONST_TBL + k))[0] 535 and %o7,255,%o7 ! (6) k &= 255 536 fpadd32 %f16,%f34,%f34 ! (0) drez = vis_fpadd32(drez,dtmp) 537 538 fmuld %f26,%f50,%f50 ! (5) y *= dtmp 539 ldd [G1_CONST_TBL+%o5],%f26 ! (5) dtmp = ((double*)(CONST_TBL + k))[0] 540 sll %o7,3,%o7 ! (6) k <<= 3 541 fpadd32 %f2,%f18,%f18 ! (1) drez = vis_fpadd32(drez,dtmp) 542 543 fmuld %f28,%f52,%f52 ! (6) y *= dtmp 544 ldd [G1_CONST_TBL+%o7],%f28 ! (6) dtmp = ((double*)(CONST_TBL + k))[0] 545 sll %i2,2,%o0 546 fpadd32 %f4,%f20,%f20 ! (2) drez = vis_fpadd32(drez,dtmp) 547 548 fmuld %f30,%f54,%f54 ! (7) y *= dtmp 549 ldd [G1_CONST_TBL+%l4],%f30 ! (7) dtmp = ((double*)(CONST_TBL + k))[0] 550 sub %i1,%o0,%o0 551 fpadd32 %f6,%f22,%f22 ! (3) drez = vis_fpadd32(drez,dtmp) 552 553 lda [%i1]%asi,%l4 ! (12) Xi = ((int*)px)[0] 554 add %i1,%i2,%o1 ! px += stridex 555 fpadd32 %f8,%f24,%f24 ! (4) drez = vis_fpadd32(drez,dtmp) 556 fmuld %f34,%f40,%f40 ! (0) y *= drez 557 558 lda [%o1]%asi,%l5 ! (13) Xi = ((int*)px)[0] 559 add %o1,%i2,%o2 ! px += stridex 560 fpadd32 %f10,%f26,%f26 ! (5) drez = vis_fpadd32(drez,dtmp) 561 fmuld %f18,%f42,%f42 ! (1) y *= drez 562 563 lda [%o2]%asi,%l6 ! (14) Xi = ((int*)px)[0] 564 add %o2,%i2,%o3 ! px += stridex 565 fpadd32 %f12,%f28,%f28 ! (6) drez = vis_fpadd32(drez,dtmp) 566 fmuld %f20,%f44,%f44 ! (2) y *= drez 567 568 lda [%o3]%asi,%l7 ! (15) Xi = ((int*)px)[0] 569 add %o3,%i2,%i1 ! px += stridex 570 fpadd32 %f14,%f30,%f30 ! (7) drez = vis_fpadd32(drez,dtmp) 571 fmuld %f22,%f46,%f46 ! (3) y *= drez 572 573 lda [%o0]%asi,%f16 ! (8) X = px[0] 574 add %o0,%i2,%o5 575 fmuld %f24,%f48,%f48 ! (4) y *= drez 576 faddd %f34,%f40,%f40 ! (0) y += drez 577 578 lda [%o5]%asi,%f2 ! (9) X = px[0] 579 add %o5,%i2,%o0 580 fmuld %f26,%f50,%f50 ! (5) y *= drez 581 faddd %f18,%f42,%f42 ! (1) y += drez 582 583 lda [%o0]%asi,%f4 ! (10) X = px[0] 584 add %o0,%i2,%o5 585 fmuld %f28,%f52,%f52 ! (6) y *= drez 586 faddd %f20,%f44,%f44 ! (2) y += drez 587 588 lda [%o5]%asi,%f6 ! (11) X = px[0] 589 add %o5,%i2,%o0 590 fmuld %f30,%f54,%f54 ! (7) y *= drez 591 faddd %f22,%f46,%f46 ! (3) y += drez 592 593 and %l4,G5_CONST,%l4 ! (12) ax = Xi & 0x7fffffff 594 faddd %f24,%f48,%f48 ! (4) y += drez 595 596 and %l5,G5_CONST,%l5 ! (13) ax = Xi & 0x7fffffff 597 faddd %f26,%f50,%f50 ! (5) y += drez 598 599 and %l6,G5_CONST,%l6 ! (14) ax = Xi & 0x7fffffff 600 faddd %f28,%f52,%f52 ! (6) y += drez 601 602 and %l7,G5_CONST,%l7 ! (15) ax = Xi & 0x7fffffff 603 faddd %f30,%f54,%f54 ! (7) y += drez 604 605 fdtos %f40,%f26 ! (0) (float) y 606 st %f26,[%i3] 607 add %i3,%i4,%o4 ! py += stridey 608 609 fdtos %f42,%f18 ! (1) (float) y 610 st %f18,[%o4] 611 add %o4,%i4,%i3 ! py += stridey 612 613 fdtos %f44,%f20 ! (2) (float) y 614 st %f20,[%i3] 615 add %i3,%i4,%o4 ! py += stridey 616 617 fdtos %f46,%f22 ! (3) (float) y 618 st %f22,[%o4] 619 add %o4,%i4,%i3 ! py += stridey 620 621 fdtos %f48,%f24 ! (4) (float) y 622 st %f24,[%i3] 623 subcc %i0,8,%i0 624 add %i3,%i4,%o4 ! py += stridey 625 626 fdtos %f50,%f26 ! (5) (float) y 627 st %f26,[%o4] 628 add %o4,%i4,%o5 ! py += stridey 629 add %i4,%i4,%o7 630 631 fdtos %f52,%f28 ! (6) (float) y 632 st %f28,[%o5] 633 add %o5,%i4,%o4 ! py += stridey 634 add %o5,%o7,%i3 ! py += stridey 635 636 fdtos %f54,%f30 ! (7) (float) y 637 st %f30,[%o4] 638 bpos,pt %icc,.main_loop 639 nop 640 .after_main_loop: 641 sll %i2,3,%o2 642 sub %i1,%o2,%i1 643 644 .tail: 645 add %i0,8,%i0 646 subcc %i0,1,%i0 647 bneg,pn %icc,.exit 648 649 ld [%i1],%l0 650 ld [%i1],%f2 651 add %i1,%i2,%i1 652 653 .tail_loop: 654 and %l0,G5_CONST,%l1 655 cmp %l1,I5_THRESHOLD 656 bg,pn %icc,.tail_spec 657 nop 658 .tail_spec_cont: 659 fstod %f2,%f40 660 fmuld F62_K256ONLN2,%f40,%f40 661 fdtoi %f40,%f2 662 st %f2,[%fp+tmp0] 663 fitod %f2,%f16 664 fpackfix %f2,%f2 665 fsubd %f40,%f16,%f40 666 fmuld F60_KA2,%f40,%f16 667 faddd F58_KA1,%f16,%f16 668 ld [%fp+tmp0],%o0 669 fmuld %f16,%f40,%f40 670 and %o0,255,%o0 671 sll %o0,3,%o0 672 ldd [G1_CONST_TBL+%o0],%f16 673 fpadd32 %f2,%f16,%f16 674 lda [%i1]%asi,%l0 675 fmuld %f16,%f40,%f40 676 lda [%i1]%asi,%f2 677 faddd %f16,%f40,%f40 678 add %i1,%i2,%i1 679 fdtos %f40,%f16 680 st %f16,[%i3] 681 add %i3,%i4,%i3 682 subcc %i0,1,%i0 683 bpos,pt %icc,.tail_loop 684 nop 685 686 .exit: 687 ret 688 restore 689 690 .tail_spec: 691 sethi %hi(0x7f800000),%o4 692 cmp %l1,%o4 693 bl,pt %icc,.tail_spec_out_of_range 694 nop 695 696 srl %l0,29,%l0 697 ble,pn %icc,.tail_spec_inf 698 andcc %l0,4,%g0 699 700 ! NaN -> NaN 701 702 fmuls %f2,%f2,%f2 703 ba .tail_spec_exit 704 st %f2,[%i3] 705 706 .tail_spec_inf: 707 be,a,pn %icc,.tail_spec_exit 708 st %f2,[%i3] 709 710 ba .tail_spec_exit 711 st %f3,[%i3] 712 713 .tail_spec_out_of_range: 714 fcmpes %fcc0,%f2,%f3 715 fcmpes %fcc1,%f2,THRESHOLDL 716 fbl,pn %fcc0,1f ! if ( X < 0.0f ) 717 nop 718 fbl,pt %fcc1,.tail_spec_cont ! if ( X < THRESHOLDL ) 719 nop 720 1: 721 srl %l0,29,%l0 722 and %l0,4,%l0 723 add %l0,2048,%l0 724 ld [G1_CONST_TBL+%l0],%f2 725 fmuls %f2,%f2,%f2 726 st %f2,[%i3] 727 728 .tail_spec_exit: 729 lda [%i1]%asi,%l0 730 lda [%i1]%asi,%f2 731 add %i1,%i2,%i1 732 733 subcc %i0,1,%i0 734 bpos,pt %icc,.tail_loop 735 add %i3,%i4,%i3 736 ba .exit 737 nop 738 739 .align 16 740 .spec0: 741 sethi %hi(0x7f800000),%o5 742 cmp %l0,%o5 743 bl,pt %icc,.spec0_out_of_range 744 sll %i2,3,%o4 745 746 ble,pn %icc,.spec0_inf 747 sub %i1,%o4,%o4 748 749 ! NaN -> NaN 750 751 fmuls %f16,%f16,%f16 752 ba .spec0_exit 753 st %f16,[%i3] 754 755 .spec0_inf: 756 ld [%o4],%l0 757 srl %l0,29,%l0 758 andcc %l0,4,%l0 759 be,a,pn %icc,.spec0_exit 760 st %f16,[%i3] 761 762 ba .spec0_exit 763 st %f3,[%i3] 764 765 .spec0_out_of_range: 766 fcmpes %fcc0,%f16,%f3 767 fcmpes %fcc1,%f16,THRESHOLDL 768 fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) 769 fstod %f16,%f40 ! (0) y = (double) X 770 fbl,a,pt %fcc1,.spec0_cont ! if ( X < THRESHOLDL ) 771 fstod %f16,%f40 ! (0) y = (double) X 772 1: 773 sub %i1,%o4,%o4 774 ld [%o4],%l0 775 srl %l0,29,%l0 776 and %l0,4,%l0 777 add %l0,2048,%l0 778 ld [G1_CONST_TBL+%l0],%f16 779 fmuls %f16,%f16,%f16 780 st %f16,[%i3] 781 782 .spec0_exit: 783 fmovs %f2,%f16 784 mov %l1,%l0 785 fmovs %f4,%f2 786 mov %l2,%l1 787 fmovs %f6,%f4 788 mov %l3,%l2 789 fmovs %f8,%f6 790 mov %l4,%l3 791 mov %l5,%l4 792 mov %l6,%l5 793 mov %l7,%l6 794 lda [%i1]%asi,%l7 795 add %i1,%i2,%i1 796 mov %o1,%o0 797 mov %o2,%o1 798 mov %o3,%o2 799 and %l7,G5_CONST,%l7 800 add %o2,%i2,%o3 801 802 subcc %i0,1,%i0 803 bpos,pt %icc,.main_loop 804 add %i3,%i4,%i3 805 ba .after_main_loop 806 nop 807 808 .align 16 809 .spec1: 810 sethi %hi(0x7f800000),%o5 811 cmp %l1,%o5 812 bge,pn %icc,1f 813 nop 814 fcmpes %fcc0,%f2,%f3 815 fcmpes %fcc1,%f2,THRESHOLDL 816 fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) 817 fstod %f2,%f42 ! (1) y = (double) X 818 fbl,a,pt %fcc1,.spec1_cont ! if ( X < THRESHOLDL ) 819 fstod %f2,%f42 ! (1) y = (double) X 820 1: 821 fmuld F62_K256ONLN2,%f40,%f40 822 fdtoi %f40,%f16 823 st %f16,[%fp+tmp0] 824 fitod %f16,%f34 825 fpackfix %f16,%f16 826 fsubd %f40,%f34,%f40 827 fmuld F60_KA2,%f40,%f34 828 faddd F58_KA1,%f34,%f34 829 ld [%fp+tmp0],%o0 830 fmuld %f34,%f40,%f40 831 and %o0,255,%o0 832 sll %o0,3,%o0 833 ldd [G1_CONST_TBL+%o0],%f34 834 fpadd32 %f16,%f34,%f34 835 fmuld %f34,%f40,%f40 836 faddd %f34,%f40,%f40 837 fdtos %f40,%f26 838 st %f26,[%i3] 839 add %i3,%i4,%i3 840 841 cmp %l1,%o5 842 bl,pt %icc,.spec1_out_of_range 843 sll %i2,3,%o4 844 845 ble,pn %icc,.spec1_inf 846 sub %i1,%o4,%o4 847 848 ! NaN -> NaN 849 850 fmuls %f2,%f2,%f2 851 ba .spec1_exit 852 st %f2,[%i3] 853 854 .spec1_inf: 855 add %o4,%i2,%o4 856 ld [%o4],%l0 857 srl %l0,29,%l0 858 andcc %l0,4,%l0 859 be,a,pn %icc,.spec1_exit 860 st %f2,[%i3] 861 862 ba .spec1_exit 863 st %f3,[%i3] 864 865 .spec1_out_of_range: 866 sub %i1,%o4,%o4 867 add %o4,%i2,%o4 868 ld [%o4],%l0 869 srl %l0,29,%l0 870 and %l0,4,%l0 871 add %l0,2048,%l0 872 ld [G1_CONST_TBL+%l0],%f2 873 fmuls %f2,%f2,%f2 874 st %f2,[%i3] 875 876 .spec1_exit: 877 fmovs %f4,%f16 878 mov %l2,%l0 879 fmovs %f6,%f2 880 mov %l3,%l1 881 fmovs %f8,%f4 882 mov %l4,%l2 883 fmovs %f10,%f6 884 mov %l5,%l3 885 mov %l6,%l4 886 mov %l7,%l5 887 lda [%i1]%asi,%l6 888 add %i1,%i2,%i1 889 lda [%i1]%asi,%l7 890 add %i1,%i2,%i1 891 and %l6,G5_CONST,%l6 892 and %l7,G5_CONST,%l7 893 mov %o2,%o0 894 mov %o3,%o1 895 add %o1,%i2,%o2 896 add %o2,%i2,%o3 897 898 subcc %i0,2,%i0 899 bpos,pt %icc,.main_loop 900 add %i3,%i4,%i3 901 ba .after_main_loop 902 nop 903 904 .align 16 905 .spec2: 906 sethi %hi(0x7f800000),%o5 907 cmp %l2,%o5 908 bge,pn %icc,1f 909 nop 910 fcmpes %fcc0,%f4,%f3 911 fcmpes %fcc1,%f4,THRESHOLDL 912 fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) 913 fstod %f4,%f44 ! (2) y = (double) X 914 fbl,a,pt %fcc1,.spec2_cont ! if ( X < THRESHOLDL ) 915 fstod %f4,%f44 ! (2) y = (double) X 916 1: 917 fmuld F62_K256ONLN2,%f40,%f40 918 919 fmuld F62_K256ONLN2,%f42,%f42 920 921 fdtoi %f40,%f16 922 st %f16,[%fp+tmp0] 923 924 fdtoi %f42,%f2 925 st %f2,[%fp+tmp1] 926 927 fitod %f16,%f34 928 fpackfix %f16,%f16 929 930 fitod %f2,%f18 931 fpackfix %f2,%f2 932 933 fsubd %f40,%f34,%f40 934 935 fsubd %f42,%f18,%f42 936 937 fmuld F60_KA2,%f40,%f34 938 939 fmuld F60_KA2,%f42,%f18 940 941 faddd F58_KA1,%f34,%f34 942 943 faddd F58_KA1,%f18,%f18 944 945 ld [%fp+tmp0],%o0 946 fmuld %f34,%f40,%f40 947 948 ld [%fp+tmp1],%o1 949 fmuld %f18,%f42,%f42 950 951 and %o0,255,%o0 952 953 and %o1,255,%o1 954 955 sll %o0,3,%o0 956 957 sll %o1,3,%o1 958 959 ldd [G1_CONST_TBL+%o0],%f34 960 961 ldd [G1_CONST_TBL+%o1],%f18 962 963 fpadd32 %f16,%f34,%f34 964 965 fpadd32 %f2,%f18,%f18 966 967 fmuld %f34,%f40,%f40 968 969 fmuld %f18,%f42,%f42 970 971 faddd %f34,%f40,%f40 972 973 faddd %f18,%f42,%f42 974 975 fdtos %f40,%f26 976 st %f26,[%i3] 977 add %i3,%i4,%o4 978 979 fdtos %f42,%f18 980 st %f18,[%o4] 981 add %o4,%i4,%i3 982 983 cmp %l2,%o5 984 sll %i2,1,%o5 985 bl,pt %icc,.spec2_out_of_range 986 sll %i2,2,%o4 987 988 ble,pn %icc,.spec2_inf 989 add %o4,%o5,%o4 990 991 ! NaN -> NaN 992 993 fmuls %f4,%f4,%f4 994 ba .spec2_exit 995 st %f4,[%i3] 996 997 .spec2_inf: 998 sub %i1,%o4,%o4 999 ld [%o4],%l0 1000 srl %l0,29,%l0 1001 andcc %l0,4,%l0 1002 be,a,pn %icc,.spec2_exit 1003 st %f4,[%i3] 1004 1005 ba .spec2_exit 1006 st %f3,[%i3] 1007 1008 .spec2_out_of_range: 1009 add %o4,%o5,%o4 1010 sub %i1,%o4,%o4 1011 ld [%o4],%l0 1012 srl %l0,29,%l0 1013 and %l0,4,%l0 1014 add %l0,2048,%l0 1015 ld [G1_CONST_TBL+%l0],%f2 1016 fmuls %f2,%f2,%f2 1017 st %f2,[%i3] 1018 1019 .spec2_exit: 1020 fmovs %f6,%f16 1021 mov %l3,%l0 1022 mov %o3,%o0 1023 fmovs %f8,%f2 1024 mov %l4,%l1 1025 add %o0,%i2,%o1 1026 fmovs %f10,%f4 1027 mov %l5,%l2 1028 add %o1,%i2,%o2 1029 fmovs %f12,%f6 1030 mov %l6,%l3 1031 mov %l7,%l4 1032 lda [%i1]%asi,%l5 1033 add %i1,%i2,%i1 1034 add %o2,%i2,%o3 1035 lda [%i1]%asi,%l6 1036 add %i1,%i2,%i1 1037 lda [%i1]%asi,%l7 1038 add %i1,%i2,%i1 1039 and %l5,G5_CONST,%l5 1040 and %l6,G5_CONST,%l6 1041 and %l7,G5_CONST,%l7 1042 1043 subcc %i0,3,%i0 1044 bpos,pt %icc,.main_loop 1045 add %i3,%i4,%i3 1046 ba .after_main_loop 1047 nop 1048 .spec3: 1049 sethi %hi(0x7f800000),%o5 1050 cmp %l3,%o5 1051 bge,pn %icc,1f 1052 nop 1053 fcmpes %fcc0,%f6,%f3 1054 fcmpes %fcc1,%f6,THRESHOLDL 1055 fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) 1056 fstod %f6,%f46 ! (3) y = (double) X 1057 fbl,a,pt %fcc1,.spec3_cont ! if ( X < THRESHOLDL ) 1058 fstod %f6,%f46 ! (3) y = (double) X 1059 1: 1060 fmuld F62_K256ONLN2,%f40,%f40 1061 1062 fmuld F62_K256ONLN2,%f42,%f42 1063 1064 fmuld F62_K256ONLN2,%f44,%f44 1065 1066 fdtoi %f40,%f16 1067 st %f16,[%fp+tmp0] 1068 1069 fdtoi %f42,%f2 1070 st %f2,[%fp+tmp1] 1071 1072 fdtoi %f44,%f4 1073 st %f4,[%fp+tmp2] 1074 1075 fitod %f16,%f34 1076 fpackfix %f16,%f16 1077 1078 fitod %f2,%f18 1079 fpackfix %f2,%f2 1080 1081 fitod %f4,%f20 1082 fpackfix %f4,%f4 1083 1084 fsubd %f40,%f34,%f40 1085 1086 fsubd %f42,%f18,%f42 1087 1088 fsubd %f44,%f20,%f44 1089 1090 fmuld F60_KA2,%f40,%f34 1091 1092 fmuld F60_KA2,%f42,%f18 1093 1094 fmuld F60_KA2,%f44,%f20 1095 1096 faddd F58_KA1,%f34,%f34 1097 1098 faddd F58_KA1,%f18,%f18 1099 1100 faddd F58_KA1,%f20,%f20 1101 1102 ld [%fp+tmp0],%o0 1103 fmuld %f34,%f40,%f40 1104 1105 ld [%fp+tmp1],%o1 1106 fmuld %f18,%f42,%f42 1107 1108 ld [%fp+tmp2],%o2 1109 fmuld %f20,%f44,%f44 1110 1111 and %o0,255,%o0 1112 and %o1,255,%o1 1113 1114 and %o2,255,%o2 1115 sll %o0,3,%o0 1116 1117 sll %o1,3,%o1 1118 sll %o2,3,%o2 1119 1120 ldd [G1_CONST_TBL+%o0],%f34 1121 1122 ldd [G1_CONST_TBL+%o1],%f18 1123 1124 ldd [G1_CONST_TBL+%o2],%f20 1125 1126 fpadd32 %f16,%f34,%f34 1127 1128 fpadd32 %f2,%f18,%f18 1129 1130 fpadd32 %f4,%f20,%f20 1131 1132 fmuld %f34,%f40,%f40 1133 1134 fmuld %f18,%f42,%f42 1135 1136 fmuld %f20,%f44,%f44 1137 1138 faddd %f34,%f40,%f40 1139 1140 faddd %f18,%f42,%f42 1141 1142 faddd %f20,%f44,%f44 1143 1144 fdtos %f40,%f26 1145 st %f26,[%i3] 1146 add %i3,%i4,%o4 1147 1148 fdtos %f42,%f18 1149 st %f18,[%o4] 1150 add %o4,%i4,%i3 1151 1152 fdtos %f44,%f20 1153 st %f20,[%i3] 1154 add %i3,%i4,%i3 1155 1156 cmp %l3,%o5 1157 bl,pt %icc,.spec3_out_of_range 1158 sll %i2,2,%o4 1159 1160 ble,pn %icc,.spec3_inf 1161 add %o4,%i2,%o4 1162 1163 ! NaN -> NaN 1164 1165 fmuls %f6,%f6,%f6 1166 ba .spec3_exit 1167 st %f6,[%i3] 1168 1169 .spec3_inf: 1170 sub %i1,%o4,%o4 1171 ld [%o4],%l0 1172 srl %l0,29,%l0 1173 andcc %l0,4,%l0 1174 be,a,pn %icc,.spec3_exit 1175 st %f6,[%i3] 1176 1177 ba .spec3_exit 1178 st %f3,[%i3] 1179 1180 .spec3_out_of_range: 1181 add %o4,%i2,%o4 1182 sub %i1,%o4,%o4 1183 ld [%o4],%l0 1184 srl %l0,29,%l0 1185 and %l0,4,%l0 1186 add %l0,2048,%l0 1187 ld [G1_CONST_TBL+%l0],%f2 1188 fmuls %f2,%f2,%f2 1189 st %f2,[%i3] 1190 1191 .spec3_exit: 1192 fmovs %f8,%f16 1193 mov %l4,%l0 1194 fmovs %f10,%f2 1195 mov %l5,%l1 1196 fmovs %f12,%f4 1197 mov %l6,%l2 1198 fmovs %f14,%f6 1199 mov %l7,%l3 1200 mov %i1,%o0 1201 lda [%o0]%asi,%l4 1202 add %o0,%i2,%o1 1203 lda [%o1]%asi,%l5 1204 add %o1,%i2,%o2 1205 lda [%o2]%asi,%l6 1206 add %o2,%i2,%o3 1207 lda [%o3]%asi,%l7 1208 add %o3,%i2,%i1 1209 and %l4,G5_CONST,%l4 1210 and %l5,G5_CONST,%l5 1211 and %l6,G5_CONST,%l6 1212 and %l7,G5_CONST,%l7 1213 1214 subcc %i0,4,%i0 1215 bpos,pt %icc,.main_loop 1216 add %i3,%i4,%i3 1217 ba .after_main_loop 1218 nop 1219 1220 .align 16 1221 .spec4: 1222 sethi %hi(0x7f800000),%o5 1223 cmp %l4,%o5 1224 bge,pn %icc,1f 1225 nop 1226 fcmpes %fcc0,%f8,%f3 1227 fcmpes %fcc1,%f8,THRESHOLDL 1228 fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) 1229 fstod %f8,%f48 ! (4) y = (double) X 1230 fbl,a,pt %fcc1,.spec4_cont ! if ( X < THRESHOLDL ) 1231 fstod %f8,%f48 ! (4) y = (double) X 1232 1: 1233 fmuld F62_K256ONLN2,%f42,%f42 1234 1235 fmuld F62_K256ONLN2,%f44,%f44 1236 1237 fmuld F62_K256ONLN2,%f46,%f46 1238 1239 fdtoi %f40,%f16 1240 st %f16,[%fp+tmp0] 1241 1242 fdtoi %f42,%f2 1243 st %f2,[%fp+tmp1] 1244 1245 fdtoi %f44,%f4 1246 st %f4,[%fp+tmp2] 1247 1248 fdtoi %f46,%f6 1249 st %f6,[%fp+tmp3] 1250 1251 fitod %f16,%f34 1252 fpackfix %f16,%f16 1253 1254 fitod %f2,%f18 1255 fpackfix %f2,%f2 1256 1257 fitod %f4,%f20 1258 fpackfix %f4,%f4 1259 1260 fitod %f6,%f22 1261 fpackfix %f6,%f6 1262 1263 fsubd %f40,%f34,%f40 1264 1265 fsubd %f42,%f18,%f42 1266 1267 fsubd %f44,%f20,%f44 1268 1269 fsubd %f46,%f22,%f46 1270 1271 fmuld F60_KA2,%f40,%f34 1272 1273 fmuld F60_KA2,%f42,%f18 1274 1275 fmuld F60_KA2,%f44,%f20 1276 1277 fmuld F60_KA2,%f46,%f22 1278 1279 faddd F58_KA1,%f34,%f34 1280 1281 faddd F58_KA1,%f18,%f18 1282 1283 faddd F58_KA1,%f20,%f20 1284 1285 faddd F58_KA1,%f22,%f22 1286 1287 ld [%fp+tmp0],%o0 1288 fmuld %f34,%f40,%f40 1289 1290 ld [%fp+tmp1],%o1 1291 fmuld %f18,%f42,%f42 1292 1293 ld [%fp+tmp2],%o2 1294 fmuld %f20,%f44,%f44 1295 1296 ld [%fp+tmp3],%o3 1297 fmuld %f22,%f46,%f46 1298 1299 and %o0,255,%o0 1300 and %o1,255,%o1 1301 1302 and %o2,255,%o2 1303 and %o3,255,%o3 1304 1305 sll %o0,3,%o0 1306 sll %o1,3,%o1 1307 1308 sll %o2,3,%o2 1309 sll %o3,3,%o3 1310 1311 ldd [G1_CONST_TBL+%o0],%f34 1312 1313 ldd [G1_CONST_TBL+%o1],%f18 1314 1315 ldd [G1_CONST_TBL+%o2],%f20 1316 1317 ldd [G1_CONST_TBL+%o3],%f22 1318 1319 fpadd32 %f16,%f34,%f34 1320 1321 fpadd32 %f2,%f18,%f18 1322 1323 fpadd32 %f4,%f20,%f20 1324 1325 fpadd32 %f6,%f22,%f22 1326 1327 fmuld %f34,%f40,%f40 1328 1329 fmuld %f18,%f42,%f42 1330 1331 fmuld %f20,%f44,%f44 1332 1333 fmuld %f22,%f46,%f46 1334 1335 faddd %f34,%f40,%f40 1336 1337 faddd %f18,%f42,%f42 1338 1339 faddd %f20,%f44,%f44 1340 1341 faddd %f22,%f46,%f46 1342 1343 fdtos %f40,%f26 1344 st %f26,[%i3] 1345 add %i3,%i4,%o4 1346 1347 fdtos %f42,%f18 1348 st %f18,[%o4] 1349 add %o4,%i4,%i3 1350 1351 fdtos %f44,%f20 1352 st %f20,[%i3] 1353 add %i3,%i4,%o4 1354 1355 fdtos %f46,%f22 1356 st %f22,[%o4] 1357 add %o4,%i4,%i3 1358 1359 cmp %l4,%o5 1360 bl,pt %icc,.spec4_out_of_range 1361 sll %i2,2,%o4 1362 1363 ble,pn %icc,.spec4_inf 1364 sub %i1,%o4,%o4 1365 1366 ! NaN -> NaN 1367 1368 fmuls %f8,%f8,%f8 1369 ba .spec4_exit 1370 st %f8,[%i3] 1371 1372 .spec4_inf: 1373 ld [%o4],%l0 1374 srl %l0,29,%l0 1375 andcc %l0,4,%l0 1376 be,a,pn %icc,.spec4_exit 1377 st %f8,[%i3] 1378 1379 ba .spec4_exit 1380 st %f3,[%i3] 1381 1382 .spec4_out_of_range: 1383 sub %i1,%o4,%o4 1384 ld [%o4],%l0 1385 srl %l0,29,%l0 1386 and %l0,4,%l0 1387 add %l0,2048,%l0 1388 ld [G1_CONST_TBL+%l0],%f2 1389 fmuls %f2,%f2,%f2 1390 st %f2,[%i3] 1391 1392 .spec4_exit: 1393 fmovs %f10,%f16 1394 mov %l5,%l0 1395 fmovs %f12,%f2 1396 mov %l6,%l1 1397 fmovs %f14,%f4 1398 mov %l7,%l2 1399 lda [%i1]%asi,%l3 1400 lda [%i1]%asi,%f6 1401 add %i1,%i2,%o0 1402 lda [%o0]%asi,%l4 1403 add %o0,%i2,%o1 1404 lda [%o1]%asi,%l5 1405 add %o1,%i2,%o2 1406 lda [%o2]%asi,%l6 1407 add %o2,%i2,%o3 1408 lda [%o3]%asi,%l7 1409 add %o3,%i2,%i1 1410 and %l3,G5_CONST,%l3 1411 and %l4,G5_CONST,%l4 1412 and %l5,G5_CONST,%l5 1413 and %l6,G5_CONST,%l6 1414 and %l7,G5_CONST,%l7 1415 1416 subcc %i0,5,%i0 1417 bpos,pt %icc,.main_loop 1418 add %i3,%i4,%i3 1419 ba .after_main_loop 1420 nop 1421 1422 .align 16 1423 .spec5: 1424 sethi %hi(0x7f800000),%o5 1425 cmp %l5,%o5 1426 bge,pn %icc,1f 1427 nop 1428 fcmpes %fcc0,%f10,%f3 1429 fcmpes %fcc1,%f10,THRESHOLDL 1430 fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) 1431 fstod %f10,%f50 ! (5) y = (double) X 1432 fbl,a,pt %fcc1,.spec5_cont ! if ( X < THRESHOLDL ) 1433 fstod %f10,%f50 ! (5) y = (double) X 1434 1: 1435 fmuld F62_K256ONLN2,%f44,%f44 1436 1437 fmuld F62_K256ONLN2,%f46,%f46 1438 1439 fdtoi %f40,%f16 1440 st %f16,[%fp+tmp0] 1441 fmuld F62_K256ONLN2,%f48,%f48 1442 1443 fdtoi %f42,%f2 1444 st %f2,[%fp+tmp1] 1445 1446 fdtoi %f44,%f4 1447 st %f4,[%fp+tmp2] 1448 1449 fdtoi %f46,%f6 1450 st %f6,[%fp+tmp3] 1451 1452 fdtoi %f48,%f8 1453 st %f8,[%fp+tmp4] 1454 1455 fitod %f16,%f34 1456 fpackfix %f16,%f16 1457 1458 fitod %f2,%f18 1459 fpackfix %f2,%f2 1460 1461 fitod %f4,%f20 1462 fpackfix %f4,%f4 1463 1464 fitod %f6,%f22 1465 fpackfix %f6,%f6 1466 1467 fitod %f8,%f24 1468 fpackfix %f8,%f8 1469 1470 ld [%fp+tmp0],%o0 1471 fsubd %f40,%f34,%f40 1472 1473 ld [%fp+tmp1],%o1 1474 fsubd %f42,%f18,%f42 1475 1476 ld [%fp+tmp2],%o2 1477 and %o0,255,%o0 1478 fsubd %f44,%f20,%f44 1479 1480 ld [%fp+tmp3],%o3 1481 and %o1,255,%o1 1482 fsubd %f46,%f22,%f46 1483 1484 sll %o0,3,%o0 1485 sll %o1,3,%o1 1486 fmuld F60_KA2,%f40,%f34 1487 fsubd %f48,%f24,%f48 1488 1489 and %o2,255,%o2 1490 fmuld F60_KA2,%f42,%f18 1491 1492 sll %o2,3,%o2 1493 fmuld F60_KA2,%f44,%f20 1494 1495 ld [%fp+tmp4],%o4 1496 and %o3,255,%o3 1497 fmuld F60_KA2,%f46,%f22 1498 1499 sll %o3,3,%o3 1500 fmuld F60_KA2,%f48,%f24 1501 faddd F58_KA1,%f34,%f34 1502 1503 and %o4,255,%o4 1504 faddd F58_KA1,%f18,%f18 1505 1506 faddd F58_KA1,%f20,%f20 1507 1508 faddd F58_KA1,%f22,%f22 1509 1510 fmuld %f34,%f40,%f40 1511 ldd [G1_CONST_TBL+%o0],%f34 1512 faddd F58_KA1,%f24,%f24 1513 1514 fmuld %f18,%f42,%f42 1515 ldd [G1_CONST_TBL+%o1],%f18 1516 1517 fmuld %f20,%f44,%f44 1518 ldd [G1_CONST_TBL+%o2],%f20 1519 1520 fmuld %f22,%f46,%f46 1521 ldd [G1_CONST_TBL+%o3],%f22 1522 sll %o4,3,%o4 1523 1524 fmuld %f24,%f48,%f48 1525 ldd [G1_CONST_TBL+%o4],%f24 1526 fpadd32 %f16,%f34,%f34 1527 1528 fpadd32 %f2,%f18,%f18 1529 1530 fpadd32 %f4,%f20,%f20 1531 1532 fpadd32 %f6,%f22,%f22 1533 1534 fpadd32 %f8,%f24,%f24 1535 fmuld %f34,%f40,%f40 1536 1537 fmuld %f18,%f42,%f42 1538 1539 fmuld %f20,%f44,%f44 1540 1541 fmuld %f22,%f46,%f46 1542 1543 fmuld %f24,%f48,%f48 1544 faddd %f34,%f40,%f40 1545 1546 faddd %f18,%f42,%f42 1547 1548 faddd %f20,%f44,%f44 1549 1550 faddd %f22,%f46,%f46 1551 1552 faddd %f24,%f48,%f48 1553 1554 fdtos %f40,%f26 1555 st %f26,[%i3] 1556 add %i3,%i4,%o4 1557 1558 fdtos %f42,%f18 1559 st %f18,[%o4] 1560 add %o4,%i4,%i3 1561 1562 fdtos %f44,%f20 1563 st %f20,[%i3] 1564 add %i3,%i4,%o4 1565 1566 fdtos %f46,%f22 1567 st %f22,[%o4] 1568 add %o4,%i4,%i3 1569 1570 fdtos %f48,%f24 1571 st %f24,[%i3] 1572 add %i3,%i4,%i3 1573 1574 cmp %l5,%o5 1575 bl,pt %icc,.spec5_out_of_range 1576 sll %i2,2,%o4 1577 1578 ble,pn %icc,.spec5_inf 1579 sub %o4,%i2,%o4 1580 1581 ! NaN -> NaN 1582 1583 fmuls %f10,%f10,%f10 1584 ba .spec5_exit 1585 st %f10,[%i3] 1586 1587 .spec5_inf: 1588 sub %i1,%o4,%o4 1589 ld [%o4],%l0 1590 srl %l0,29,%l0 1591 andcc %l0,4,%l0 1592 be,a,pn %icc,.spec5_exit 1593 st %f10,[%i3] 1594 1595 ba .spec5_exit 1596 st %f3,[%i3] 1597 1598 .spec5_out_of_range: 1599 sub %o4,%i2,%o4 1600 sub %i1,%o4,%o4 1601 ld [%o4],%l0 1602 srl %l0,29,%l0 1603 and %l0,4,%l0 1604 add %l0,2048,%l0 1605 ld [G1_CONST_TBL+%l0],%f2 1606 fmuls %f2,%f2,%f2 1607 st %f2,[%i3] 1608 1609 .spec5_exit: 1610 fmovs %f12,%f16 1611 mov %l6,%l0 1612 fmovs %f14,%f2 1613 mov %l7,%l1 1614 lda [%i1]%asi,%l2 1615 lda [%i1]%asi,%f4 1616 add %i1,%i2,%i1 1617 lda [%i1]%asi,%l3 1618 lda [%i1]%asi,%f6 1619 add %i1,%i2,%o0 1620 lda [%o0]%asi,%l4 1621 add %o0,%i2,%o1 1622 lda [%o1]%asi,%l5 1623 add %o1,%i2,%o2 1624 lda [%o2]%asi,%l6 1625 add %o2,%i2,%o3 1626 lda [%o3]%asi,%l7 1627 add %o3,%i2,%i1 1628 and %l2,G5_CONST,%l2 1629 and %l3,G5_CONST,%l3 1630 and %l4,G5_CONST,%l4 1631 and %l5,G5_CONST,%l5 1632 and %l6,G5_CONST,%l6 1633 and %l7,G5_CONST,%l7 1634 1635 subcc %i0,6,%i0 1636 bpos,pt %icc,.main_loop 1637 add %i3,%i4,%i3 1638 ba .after_main_loop 1639 nop 1640 .spec6: 1641 sethi %hi(0x7f800000),%o5 1642 cmp %l6,%o5 1643 bge,pn %icc,1f 1644 nop 1645 fcmpes %fcc0,%f12,%f3 1646 fcmpes %fcc1,%f12,THRESHOLDL 1647 fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) 1648 fstod %f12,%f52 ! (6) y = (double) X 1649 fbl,a,pt %fcc1,.spec6_cont ! if ( X < THRESHOLDL ) 1650 fstod %f12,%f52 ! (6) y = (double) X 1651 1: 1652 fmuld F62_K256ONLN2,%f46,%f46 1653 1654 fdtoi %f40,%f16 1655 st %f16,[%fp+tmp0] 1656 fmuld F62_K256ONLN2,%f48,%f48 1657 1658 fdtoi %f42,%f2 1659 st %f2,[%fp+tmp1] 1660 fmuld F62_K256ONLN2,%f50,%f50 1661 1662 fdtoi %f44,%f4 1663 st %f4,[%fp+tmp2] 1664 1665 fdtoi %f46,%f6 1666 st %f6,[%fp+tmp3] 1667 1668 fdtoi %f48,%f8 1669 st %f8,[%fp+tmp4] 1670 1671 fdtoi %f50,%f10 1672 st %f10,[%fp+tmp5] 1673 1674 fitod %f16,%f34 1675 fpackfix %f16,%f16 1676 1677 fitod %f2,%f18 1678 fpackfix %f2,%f2 1679 1680 fitod %f4,%f20 1681 fpackfix %f4,%f4 1682 1683 fitod %f6,%f22 1684 fpackfix %f6,%f6 1685 1686 fitod %f8,%f24 1687 fpackfix %f8,%f8 1688 1689 fitod %f10,%f26 1690 fpackfix %f10,%f10 1691 1692 ld [%fp+tmp0],%o0 1693 fsubd %f40,%f34,%f40 1694 1695 ld [%fp+tmp1],%o1 1696 fsubd %f42,%f18,%f42 1697 1698 ld [%fp+tmp2],%o2 1699 and %o0,255,%o0 1700 fsubd %f44,%f20,%f44 1701 1702 ld [%fp+tmp3],%o3 1703 and %o1,255,%o1 1704 fsubd %f46,%f22,%f46 1705 1706 sll %o0,3,%o0 1707 sll %o1,3,%o1 1708 fmuld F60_KA2,%f40,%f34 1709 fsubd %f48,%f24,%f48 1710 1711 and %o2,255,%o2 1712 fmuld F60_KA2,%f42,%f18 1713 fsubd %f50,%f26,%f50 1714 1715 sll %o2,3,%o2 1716 fmuld F60_KA2,%f44,%f20 1717 1718 ld [%fp+tmp4],%o4 1719 and %o3,255,%o3 1720 fmuld F60_KA2,%f46,%f22 1721 1722 ld [%fp+tmp5],%o5 1723 sll %o3,3,%o3 1724 fmuld F60_KA2,%f48,%f24 1725 faddd F58_KA1,%f34,%f34 1726 1727 and %o4,255,%o4 1728 fmuld F60_KA2,%f50,%f26 1729 faddd F58_KA1,%f18,%f18 1730 1731 and %o5,255,%o5 1732 faddd F58_KA1,%f20,%f20 1733 1734 sll %o5,3,%o5 1735 faddd F58_KA1,%f22,%f22 1736 1737 fmuld %f34,%f40,%f40 1738 ldd [G1_CONST_TBL+%o0],%f34 1739 faddd F58_KA1,%f24,%f24 1740 1741 fmuld %f18,%f42,%f42 1742 ldd [G1_CONST_TBL+%o1],%f18 1743 faddd F58_KA1,%f26,%f26 1744 1745 fmuld %f20,%f44,%f44 1746 ldd [G1_CONST_TBL+%o2],%f20 1747 1748 fmuld %f22,%f46,%f46 1749 ldd [G1_CONST_TBL+%o3],%f22 1750 sll %o4,3,%o4 1751 1752 fmuld %f24,%f48,%f48 1753 ldd [G1_CONST_TBL+%o4],%f24 1754 fpadd32 %f16,%f34,%f34 1755 1756 fmuld %f26,%f50,%f50 1757 ldd [G1_CONST_TBL+%o5],%f26 1758 fpadd32 %f2,%f18,%f18 1759 1760 fpadd32 %f4,%f20,%f20 1761 1762 fpadd32 %f6,%f22,%f22 1763 1764 fpadd32 %f8,%f24,%f24 1765 fmuld %f34,%f40,%f40 1766 1767 fpadd32 %f10,%f26,%f26 1768 fmuld %f18,%f42,%f42 1769 1770 fmuld %f20,%f44,%f44 1771 1772 fmuld %f22,%f46,%f46 1773 1774 fmuld %f24,%f48,%f48 1775 faddd %f34,%f40,%f40 1776 1777 fmuld %f26,%f50,%f50 1778 faddd %f18,%f42,%f42 1779 1780 faddd %f20,%f44,%f44 1781 1782 faddd %f22,%f46,%f46 1783 1784 faddd %f24,%f48,%f48 1785 1786 faddd %f26,%f50,%f50 1787 1788 fdtos %f40,%f26 1789 st %f26,[%i3] 1790 add %i3,%i4,%o4 1791 1792 fdtos %f42,%f18 1793 st %f18,[%o4] 1794 add %o4,%i4,%i3 1795 1796 fdtos %f44,%f20 1797 st %f20,[%i3] 1798 add %i3,%i4,%o4 1799 1800 fdtos %f46,%f22 1801 st %f22,[%o4] 1802 add %o4,%i4,%i3 1803 1804 fdtos %f48,%f24 1805 st %f24,[%i3] 1806 add %i3,%i4,%o4 1807 1808 fdtos %f50,%f26 1809 st %f26,[%o4] 1810 add %o4,%i4,%i3 1811 1812 sethi %hi(0x7f800000),%o5 1813 cmp %l6,%o5 1814 bl,pt %icc,.spec6_out_of_range 1815 sll %i2,1,%o4 1816 1817 ble,pn %icc,.spec6_inf 1818 sub %i1,%o4,%o4 1819 1820 ! NaN -> NaN 1821 1822 fmuls %f12,%f12,%f12 1823 ba .spec6_exit 1824 st %f12,[%i3] 1825 1826 .spec6_inf: 1827 ld [%o4],%l0 1828 srl %l0,29,%l0 1829 andcc %l0,4,%l0 1830 be,a,pn %icc,.spec6_exit 1831 st %f12,[%i3] 1832 1833 ba .spec6_exit 1834 st %f3,[%i3] 1835 1836 .spec6_out_of_range: 1837 sub %i1,%o4,%o4 1838 ld [%o4],%l0 1839 srl %l0,29,%l0 1840 and %l0,4,%l0 1841 add %l0,2048,%l0 1842 ld [G1_CONST_TBL+%l0],%f2 1843 fmuls %f2,%f2,%f2 1844 st %f2,[%i3] 1845 1846 .spec6_exit: 1847 fmovs %f14,%f16 1848 mov %l7,%l0 1849 lda [%i1]%asi,%l1 1850 lda [%i1]%asi,%f2 1851 add %i1,%i2,%i1 1852 lda [%i1]%asi,%l2 1853 lda [%i1]%asi,%f4 1854 add %i1,%i2,%i1 1855 lda [%i1]%asi,%l3 1856 lda [%i1]%asi,%f6 1857 add %i1,%i2,%o0 1858 lda [%o0]%asi,%l4 1859 add %o0,%i2,%o1 1860 lda [%o1]%asi,%l5 1861 add %o1,%i2,%o2 1862 lda [%o2]%asi,%l6 1863 add %o2,%i2,%o3 1864 lda [%o3]%asi,%l7 1865 add %o3,%i2,%i1 1866 and %l1,G5_CONST,%l1 1867 and %l2,G5_CONST,%l2 1868 and %l3,G5_CONST,%l3 1869 and %l4,G5_CONST,%l4 1870 and %l5,G5_CONST,%l5 1871 and %l6,G5_CONST,%l6 1872 and %l7,G5_CONST,%l7 1873 1874 subcc %i0,7,%i0 1875 bpos,pt %icc,.main_loop 1876 add %i3,%i4,%i3 1877 ba .after_main_loop 1878 nop 1879 1880 .align 16 1881 .spec7: 1882 sethi %hi(0x7f800000),%o5 1883 cmp %l7,%o5 1884 bge,pn %icc,1f 1885 nop 1886 fcmpes %fcc0,%f14,%f3 1887 fcmpes %fcc1,%f14,THRESHOLDL 1888 fbl,a,pn %fcc0,1f ! if ( X < 0.0f ) 1889 fstod %f14,%f54 ! (7) y = (double) X 1890 fbl,a,pt %fcc1,.spec7_cont ! if ( X < THRESHOLDL ) 1891 fstod %f14,%f54 ! (7) y = (double) X 1892 1: 1893 fdtoi %f40,%f16 1894 st %f16,[%fp+tmp0] 1895 fmuld F62_K256ONLN2,%f48,%f48 1896 1897 fdtoi %f42,%f2 1898 st %f2,[%fp+tmp1] 1899 fmuld F62_K256ONLN2,%f50,%f50 1900 1901 fdtoi %f44,%f4 1902 st %f4,[%fp+tmp2] 1903 fmuld F62_K256ONLN2,%f52,%f52 1904 1905 fdtoi %f46,%f6 1906 st %f6,[%fp+tmp3] 1907 1908 fdtoi %f48,%f8 1909 st %f8,[%fp+tmp4] 1910 1911 fdtoi %f50,%f10 1912 st %f10,[%fp+tmp5] 1913 1914 fdtoi %f52,%f12 1915 st %f12,[%fp+tmp6] 1916 1917 fitod %f16,%f34 1918 fpackfix %f16,%f16 1919 1920 fitod %f2,%f18 1921 fpackfix %f2,%f2 1922 1923 fitod %f4,%f20 1924 fpackfix %f4,%f4 1925 1926 fitod %f6,%f22 1927 fpackfix %f6,%f6 1928 1929 fitod %f8,%f24 1930 fpackfix %f8,%f8 1931 1932 fitod %f10,%f26 1933 fpackfix %f10,%f10 1934 1935 fitod %f12,%f28 1936 fpackfix %f12,%f12 1937 1938 ld [%fp+tmp0],%o0 1939 fsubd %f40,%f34,%f40 1940 1941 ld [%fp+tmp1],%o1 1942 fsubd %f42,%f18,%f42 1943 1944 ld [%fp+tmp2],%o2 1945 and %o0,255,%o0 1946 fsubd %f44,%f20,%f44 1947 1948 ld [%fp+tmp3],%o3 1949 and %o1,255,%o1 1950 fsubd %f46,%f22,%f46 1951 1952 sll %o0,3,%o0 1953 sll %o1,3,%o1 1954 fmuld F60_KA2,%f40,%f34 1955 fsubd %f48,%f24,%f48 1956 1957 and %o2,255,%o2 1958 fmuld F60_KA2,%f42,%f18 1959 fsubd %f50,%f26,%f50 1960 1961 sll %o2,3,%o2 1962 fmuld F60_KA2,%f44,%f20 1963 fsubd %f52,%f28,%f52 1964 1965 ld [%fp+tmp4],%o4 1966 and %o3,255,%o3 1967 fmuld F60_KA2,%f46,%f22 1968 1969 ld [%fp+tmp5],%o5 1970 sll %o3,3,%o3 1971 fmuld F60_KA2,%f48,%f24 1972 faddd F58_KA1,%f34,%f34 1973 1974 ld [%fp+tmp6],%o7 1975 and %o4,255,%o4 1976 fmuld F60_KA2,%f50,%f26 1977 faddd F58_KA1,%f18,%f18 1978 1979 and %o5,255,%o5 1980 fmuld F60_KA2,%f52,%f28 1981 faddd F58_KA1,%f20,%f20 1982 1983 sll %o5,3,%o5 1984 faddd F58_KA1,%f22,%f22 1985 1986 fmuld %f34,%f40,%f40 1987 ldd [G1_CONST_TBL+%o0],%f34 1988 faddd F58_KA1,%f24,%f24 1989 1990 fmuld %f18,%f42,%f42 1991 ldd [G1_CONST_TBL+%o1],%f18 1992 faddd F58_KA1,%f26,%f26 1993 1994 fmuld %f20,%f44,%f44 1995 ldd [G1_CONST_TBL+%o2],%f20 1996 faddd F58_KA1,%f28,%f28 1997 1998 fmuld %f22,%f46,%f46 1999 ldd [G1_CONST_TBL+%o3],%f22 2000 sll %o4,3,%o4 2001 2002 fmuld %f24,%f48,%f48 2003 ldd [G1_CONST_TBL+%o4],%f24 2004 and %o7,255,%o7 2005 fpadd32 %f16,%f34,%f34 2006 2007 fmuld %f26,%f50,%f50 2008 ldd [G1_CONST_TBL+%o5],%f26 2009 sll %o7,3,%o7 2010 fpadd32 %f2,%f18,%f18 2011 2012 fmuld %f28,%f52,%f52 2013 ldd [G1_CONST_TBL+%o7],%f28 2014 fpadd32 %f4,%f20,%f20 2015 2016 fpadd32 %f6,%f22,%f22 2017 2018 fpadd32 %f8,%f24,%f24 2019 fmuld %f34,%f40,%f40 2020 2021 fpadd32 %f10,%f26,%f26 2022 fmuld %f18,%f42,%f42 2023 2024 fpadd32 %f12,%f28,%f28 2025 fmuld %f20,%f44,%f44 2026 2027 fmuld %f22,%f46,%f46 2028 2029 fmuld %f24,%f48,%f48 2030 faddd %f34,%f40,%f40 2031 2032 fmuld %f26,%f50,%f50 2033 faddd %f18,%f42,%f42 2034 2035 fmuld %f28,%f52,%f52 2036 faddd %f20,%f44,%f44 2037 2038 faddd %f22,%f46,%f46 2039 2040 faddd %f24,%f48,%f48 2041 2042 faddd %f26,%f50,%f50 2043 2044 faddd %f28,%f52,%f52 2045 2046 fdtos %f40,%f26 2047 st %f26,[%i3] 2048 add %i3,%i4,%o4 2049 2050 fdtos %f42,%f18 2051 st %f18,[%o4] 2052 add %o4,%i4,%i3 2053 2054 fdtos %f44,%f20 2055 st %f20,[%i3] 2056 add %i3,%i4,%o4 2057 2058 fdtos %f46,%f22 2059 st %f22,[%o4] 2060 add %o4,%i4,%i3 2061 2062 fdtos %f48,%f24 2063 st %f24,[%i3] 2064 add %i3,%i4,%o4 2065 2066 fdtos %f50,%f26 2067 st %f26,[%o4] 2068 add %o4,%i4,%i3 2069 2070 fdtos %f52,%f28 2071 st %f28,[%i3] 2072 add %i3,%i4,%i3 2073 2074 sethi %hi(0x7f800000),%o5 2075 cmp %l7,%o5 2076 bl,pt %icc,.spec7_out_of_range 2077 sub %i1,%i2,%o4 2078 2079 ble,pn %icc,.spec7_inf 2080 ld [%o4],%l0 2081 2082 ! NaN -> NaN 2083 2084 fmuls %f14,%f14,%f14 2085 ba .spec7_exit 2086 st %f14,[%i3] 2087 2088 .spec7_inf: 2089 srl %l0,29,%l0 2090 andcc %l0,4,%l0 2091 be,a,pn %icc,.spec7_exit 2092 st %f14,[%i3] 2093 2094 ba .spec7_exit 2095 st %f3,[%i3] 2096 2097 .spec7_out_of_range: 2098 ld [%o4],%l0 2099 srl %l0,29,%l0 2100 and %l0,4,%l0 2101 add %l0,2048,%l0 2102 ld [G1_CONST_TBL+%l0],%f2 2103 fmuls %f2,%f2,%f2 2104 st %f2,[%i3] 2105 2106 .spec7_exit: 2107 subcc %i0,8,%i0 2108 bpos,pt %icc,.main_loop_preload 2109 add %i3,%i4,%i3 2110 2111 ba .tail 2112 nop 2113 SET_SIZE(__vexpf) 2114