1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24 /* 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "__vexp.S" 30 31 #include "libm.h" 32 33 RO_DATA 34 35 /******************************************************************** 36 * vexp() algorithm is from mopt:f_exp.c. Basics are included here 37 * to supplement comments within this file. vexp() has been unrolled 38 * to a depth of 3. Only element 0 is documented. 39 * 40 * Note 1: INVLN2_256, LN2_256H, and LN2_256L were originally scaled by 41 * 2^44 to allow *2^k w/o shifting within the FP registers. These 42 * had to be removed for CHEETAH to avoid the fdtox of a very large 43 * number, which would trap to kernel (2^52). 44 * 45 * Let x = (k + j/256)ln2 + r 46 * then exp(x) = exp(ln2^(k+j/256)) * exp(r) 47 * = 2^k * 2^(j/256) * exp(r) 48 * where r is polynomial approximation 49 * exp(r) = 1 + r + r^2*B1 + r^3*B2 + r^4*B3 50 * = 1 + r*(1+r*(B1+r*(B2+r*B3))) 51 * let 52 * p = r*(1+r*(B1+r*(B2+r*B3))) ! notice, not quite exp(r) 53 * q = 2^(j/256) (high 64 bits) 54 * t = 2^(j/256) (extra precision) ! both from _TBL_exp_z[] 55 * then 56 * 2^(j/256) * exp(r) = (q+t)(1+p) ~ q + ( t + q*p ) 57 * then actual computation is 2^k * ( q + ( t + q*p ) ) 58 * 59 ********************************************************************/ 60 61 .align 16 62 TBL: 63 .word 0x3ff00000,0x00000000 64 .word 0x00000000,0x00000000 65 .word 0x3ff00b1a,0xfa5abcbf 66 .word 0xbc84f6b2,0xa7609f71 67 .word 0x3ff0163d,0xa9fb3335 68 .word 0x3c9b6129,0x9ab8cdb7 69 .word 0x3ff02168,0x143b0281 70 .word 0xbc82bf31,0x0fc54eb6 71 .word 0x3ff02c9a,0x3e778061 72 .word 0xbc719083,0x535b085d 73 .word 0x3ff037d4,0x2e11bbcc 74 .word 0x3c656811,0xeeade11a 75 .word 0x3ff04315,0xe86e7f85 76 .word 0xbc90a31c,0x1977c96e 77 .word 0x3ff04e5f,0x72f654b1 78 .word 0x3c84c379,0x3aa0d08c 79 .word 0x3ff059b0,0xd3158574 80 .word 0x3c8d73e2,0xa475b465 81 .word 0x3ff0650a,0x0e3c1f89 82 .word 0xbc95cb7b,0x5799c396 83 .word 0x3ff0706b,0x29ddf6de 84 .word 0xbc8c91df,0xe2b13c26 85 .word 0x3ff07bd4,0x2b72a836 86 .word 0x3c832334,0x54458700 87 .word 0x3ff08745,0x18759bc8 88 .word 0x3c6186be,0x4bb284ff 89 .word 0x3ff092bd,0xf66607e0 90 .word 0xbc968063,0x800a3fd1 91 .word 0x3ff09e3e,0xcac6f383 92 .word 0x3c914878,0x18316136 93 .word 0x3ff0a9c7,0x9b1f3919 94 .word 0x3c85d16c,0x873d1d38 95 .word 0x3ff0b558,0x6cf9890f 96 .word 0x3c98a62e,0x4adc610a 97 .word 0x3ff0c0f1,0x45e46c85 98 .word 0x3c94f989,0x06d21cef 99 .word 0x3ff0cc92,0x2b7247f7 100 .word 0x3c901edc,0x16e24f71 101 .word 0x3ff0d83b,0x23395dec 102 .word 0xbc9bc14d,0xe43f316a 103 .word 0x3ff0e3ec,0x32d3d1a2 104 .word 0x3c403a17,0x27c57b53 105 .word 0x3ff0efa5,0x5fdfa9c5 106 .word 0xbc949db9,0xbc54021b 107 .word 0x3ff0fb66,0xaffed31b 108 .word 0xbc6b9bed,0xc44ebd7b 109 .word 0x3ff10730,0x28d7233e 110 .word 0x3c8d46eb,0x1692fdd5 111 .word 0x3ff11301,0xd0125b51 112 .word 0xbc96c510,0x39449b3a 113 .word 0x3ff11edb,0xab5e2ab6 114 .word 0xbc9ca454,0xf703fb72 115 .word 0x3ff12abd,0xc06c31cc 116 .word 0xbc51b514,0xb36ca5c7 117 .word 0x3ff136a8,0x14f204ab 118 .word 0xbc67108f,0xba48dcf0 119 .word 0x3ff1429a,0xaea92de0 120 .word 0xbc932fbf,0x9af1369e 121 .word 0x3ff14e95,0x934f312e 122 .word 0xbc8b91e8,0x39bf44ab 123 .word 0x3ff15a98,0xc8a58e51 124 .word 0x3c82406a,0xb9eeab0a 125 .word 0x3ff166a4,0x5471c3c2 126 .word 0x3c58f23b,0x82ea1a32 127 .word 0x3ff172b8,0x3c7d517b 128 .word 0xbc819041,0xb9d78a76 129 .word 0x3ff17ed4,0x8695bbc0 130 .word 0x3c709e3f,0xe2ac5a64 131 .word 0x3ff18af9,0x388c8dea 132 .word 0xbc911023,0xd1970f6c 133 .word 0x3ff19726,0x58375d2f 134 .word 0x3c94aadd,0x85f17e08 135 .word 0x3ff1a35b,0xeb6fcb75 136 .word 0x3c8e5b4c,0x7b4968e4 137 .word 0x3ff1af99,0xf8138a1c 138 .word 0x3c97bf85,0xa4b69280 139 .word 0x3ff1bbe0,0x84045cd4 140 .word 0xbc995386,0x352ef607 141 .word 0x3ff1c82f,0x95281c6b 142 .word 0x3c900977,0x8010f8c9 143 .word 0x3ff1d487,0x3168b9aa 144 .word 0x3c9e016e,0x00a2643c 145 .word 0x3ff1e0e7,0x5eb44027 146 .word 0xbc96fdd8,0x088cb6de 147 .word 0x3ff1ed50,0x22fcd91d 148 .word 0xbc91df98,0x027bb78c 149 .word 0x3ff1f9c1,0x8438ce4d 150 .word 0xbc9bf524,0xa097af5c 151 .word 0x3ff2063b,0x88628cd6 152 .word 0x3c8dc775,0x814a8494 153 .word 0x3ff212be,0x3578a819 154 .word 0x3c93592d,0x2cfcaac9 155 .word 0x3ff21f49,0x917ddc96 156 .word 0x3c82a97e,0x9494a5ee 157 .word 0x3ff22bdd,0xa27912d1 158 .word 0x3c8d34fb,0x5577d69e 159 .word 0x3ff2387a,0x6e756238 160 .word 0x3c99b07e,0xb6c70573 161 .word 0x3ff2451f,0xfb82140a 162 .word 0x3c8acfcc,0x911ca996 163 .word 0x3ff251ce,0x4fb2a63f 164 .word 0x3c8ac155,0xbef4f4a4 165 .word 0x3ff25e85,0x711ece75 166 .word 0x3c93e1a2,0x4ac31b2c 167 .word 0x3ff26b45,0x65e27cdd 168 .word 0x3c82bd33,0x9940e9d9 169 .word 0x3ff2780e,0x341ddf29 170 .word 0x3c9e067c,0x05f9e76c 171 .word 0x3ff284df,0xe1f56381 172 .word 0xbc9a4c3a,0x8c3f0d7e 173 .word 0x3ff291ba,0x7591bb70 174 .word 0xbc82cc72,0x28401cbc 175 .word 0x3ff29e9d,0xf51fdee1 176 .word 0x3c8612e8,0xafad1255 177 .word 0x3ff2ab8a,0x66d10f13 178 .word 0xbc995743,0x191690a7 179 .word 0x3ff2b87f,0xd0dad990 180 .word 0xbc410adc,0xd6381aa4 181 .word 0x3ff2c57e,0x39771b2f 182 .word 0xbc950145,0xa6eb5124 183 .word 0x3ff2d285,0xa6e4030b 184 .word 0x3c900247,0x54db41d5 185 .word 0x3ff2df96,0x1f641589 186 .word 0x3c9d16cf,0xfbbce198 187 .word 0x3ff2ecaf,0xa93e2f56 188 .word 0x3c71ca0f,0x45d52383 189 .word 0x3ff2f9d2,0x4abd886b 190 .word 0xbc653c55,0x532bda93 191 .word 0x3ff306fe,0x0a31b715 192 .word 0x3c86f46a,0xd23182e4 193 .word 0x3ff31432,0xedeeb2fd 194 .word 0x3c8959a3,0xf3f3fcd0 195 .word 0x3ff32170,0xfc4cd831 196 .word 0x3c8a9ce7,0x8e18047c 197 .word 0x3ff32eb8,0x3ba8ea32 198 .word 0xbc9c45e8,0x3cb4f318 199 .word 0x3ff33c08,0xb26416ff 200 .word 0x3c932721,0x843659a6 201 .word 0x3ff34962,0x66e3fa2d 202 .word 0xbc835a75,0x930881a4 203 .word 0x3ff356c5,0x5f929ff1 204 .word 0xbc8b5cee,0x5c4e4628 205 .word 0x3ff36431,0xa2de883b 206 .word 0xbc8c3144,0xa06cb85e 207 .word 0x3ff371a7,0x373aa9cb 208 .word 0xbc963aea,0xbf42eae2 209 .word 0x3ff37f26,0x231e754a 210 .word 0xbc99f5ca,0x9eceb23c 211 .word 0x3ff38cae,0x6d05d866 212 .word 0xbc9e958d,0x3c9904bd 213 .word 0x3ff39a40,0x1b7140ef 214 .word 0xbc99a9a5,0xfc8e2934 215 .word 0x3ff3a7db,0x34e59ff7 216 .word 0xbc75e436,0xd661f5e3 217 .word 0x3ff3b57f,0xbfec6cf4 218 .word 0x3c954c66,0xe26fff18 219 .word 0x3ff3c32d,0xc313a8e5 220 .word 0xbc9efff8,0x375d29c3 221 .word 0x3ff3d0e5,0x44ede173 222 .word 0x3c7fe8d0,0x8c284c71 223 .word 0x3ff3dea6,0x4c123422 224 .word 0x3c8ada09,0x11f09ebc 225 .word 0x3ff3ec70,0xdf1c5175 226 .word 0xbc8af663,0x7b8c9bca 227 .word 0x3ff3fa45,0x04ac801c 228 .word 0xbc97d023,0xf956f9f3 229 .word 0x3ff40822,0xc367a024 230 .word 0x3c8bddf8,0xb6f4d048 231 .word 0x3ff4160a,0x21f72e2a 232 .word 0xbc5ef369,0x1c309278 233 .word 0x3ff423fb,0x2709468a 234 .word 0xbc98462d,0xc0b314dd 235 .word 0x3ff431f5,0xd950a897 236 .word 0xbc81c7dd,0xe35f7998 237 .word 0x3ff43ffa,0x3f84b9d4 238 .word 0x3c8880be,0x9704c002 239 .word 0x3ff44e08,0x6061892d 240 .word 0x3c489b7a,0x04ef80d0 241 .word 0x3ff45c20,0x42a7d232 242 .word 0xbc686419,0x82fb1f8e 243 .word 0x3ff46a41,0xed1d0057 244 .word 0x3c9c944b,0xd1648a76 245 .word 0x3ff4786d,0x668b3237 246 .word 0xbc9c20f0,0xed445733 247 .word 0x3ff486a2,0xb5c13cd0 248 .word 0x3c73c1a3,0xb69062f0 249 .word 0x3ff494e1,0xe192aed2 250 .word 0xbc83b289,0x5e499ea0 251 .word 0x3ff4a32a,0xf0d7d3de 252 .word 0x3c99cb62,0xf3d1be56 253 .word 0x3ff4b17d,0xea6db7d7 254 .word 0xbc8125b8,0x7f2897f0 255 .word 0x3ff4bfda,0xd5362a27 256 .word 0x3c7d4397,0xafec42e2 257 .word 0x3ff4ce41,0xb817c114 258 .word 0x3c905e29,0x690abd5d 259 .word 0x3ff4dcb2,0x99fddd0d 260 .word 0x3c98ecdb,0xbc6a7833 261 .word 0x3ff4eb2d,0x81d8abff 262 .word 0xbc95257d,0x2e5d7a52 263 .word 0x3ff4f9b2,0x769d2ca7 264 .word 0xbc94b309,0xd25957e3 265 .word 0x3ff50841,0x7f4531ee 266 .word 0x3c7a249b,0x49b7465f 267 .word 0x3ff516da,0xa2cf6642 268 .word 0xbc8f7685,0x69bd93ee 269 .word 0x3ff5257d,0xe83f4eef 270 .word 0xbc7c998d,0x43efef71 271 .word 0x3ff5342b,0x569d4f82 272 .word 0xbc807abe,0x1db13cac 273 .word 0x3ff542e2,0xf4f6ad27 274 .word 0x3c87926d,0x192d5f7e 275 .word 0x3ff551a4,0xca5d920f 276 .word 0xbc8d689c,0xefede59a 277 .word 0x3ff56070,0xdde910d2 278 .word 0xbc90fb6e,0x168eebf0 279 .word 0x3ff56f47,0x36b527da 280 .word 0x3c99bb2c,0x011d93ad 281 .word 0x3ff57e27,0xdbe2c4cf 282 .word 0xbc90b98c,0x8a57b9c4 283 .word 0x3ff58d12,0xd497c7fd 284 .word 0x3c8295e1,0x5b9a1de8 285 .word 0x3ff59c08,0x27ff07cc 286 .word 0xbc97e2ce,0xe467e60f 287 .word 0x3ff5ab07,0xdd485429 288 .word 0x3c96324c,0x054647ad 289 .word 0x3ff5ba11,0xfba87a03 290 .word 0xbc9b77a1,0x4c233e1a 291 .word 0x3ff5c926,0x8a5946b7 292 .word 0x3c3c4b1b,0x816986a2 293 .word 0x3ff5d845,0x90998b93 294 .word 0xbc9cd6a7,0xa8b45642 295 .word 0x3ff5e76f,0x15ad2148 296 .word 0x3c9ba6f9,0x3080e65e 297 .word 0x3ff5f6a3,0x20dceb71 298 .word 0xbc89eadd,0xe3cdcf92 299 .word 0x3ff605e1,0xb976dc09 300 .word 0xbc93e242,0x9b56de47 301 .word 0x3ff6152a,0xe6cdf6f4 302 .word 0x3c9e4b3e,0x4ab84c27 303 .word 0x3ff6247e,0xb03a5585 304 .word 0xbc9383c1,0x7e40b497 305 .word 0x3ff633dd,0x1d1929fd 306 .word 0x3c984710,0xbeb964e5 307 .word 0x3ff64346,0x34ccc320 308 .word 0xbc8c483c,0x759d8932 309 .word 0x3ff652b9,0xfebc8fb7 310 .word 0xbc9ae3d5,0xc9a73e08 311 .word 0x3ff66238,0x82552225 312 .word 0xbc9bb609,0x87591c34 313 .word 0x3ff671c1,0xc70833f6 314 .word 0xbc8e8732,0x586c6134 315 .word 0x3ff68155,0xd44ca973 316 .word 0x3c6038ae,0x44f73e65 317 .word 0x3ff690f4,0xb19e9538 318 .word 0x3c8804bd,0x9aeb445c 319 .word 0x3ff6a09e,0x667f3bcd 320 .word 0xbc9bdd34,0x13b26456 321 .word 0x3ff6b052,0xfa75173e 322 .word 0x3c7a38f5,0x2c9a9d0e 323 .word 0x3ff6c012,0x750bdabf 324 .word 0xbc728956,0x67ff0b0d 325 .word 0x3ff6cfdc,0xddd47645 326 .word 0x3c9c7aa9,0xb6f17309 327 .word 0x3ff6dfb2,0x3c651a2f 328 .word 0xbc6bbe3a,0x683c88ab 329 .word 0x3ff6ef92,0x98593ae5 330 .word 0xbc90b974,0x9e1ac8b2 331 .word 0x3ff6ff7d,0xf9519484 332 .word 0xbc883c0f,0x25860ef6 333 .word 0x3ff70f74,0x66f42e87 334 .word 0x3c59d644,0xd45aa65f 335 .word 0x3ff71f75,0xe8ec5f74 336 .word 0xbc816e47,0x86887a99 337 .word 0x3ff72f82,0x86ead08a 338 .word 0xbc920aa0,0x2cd62c72 339 .word 0x3ff73f9a,0x48a58174 340 .word 0xbc90a8d9,0x6c65d53c 341 .word 0x3ff74fbd,0x35d7cbfd 342 .word 0x3c9047fd,0x618a6e1c 343 .word 0x3ff75feb,0x564267c9 344 .word 0xbc902459,0x57316dd3 345 .word 0x3ff77024,0xb1ab6e09 346 .word 0x3c9b7877,0x169147f8 347 .word 0x3ff78069,0x4fde5d3f 348 .word 0x3c9866b8,0x0a02162c 349 .word 0x3ff790b9,0x38ac1cf6 350 .word 0x3c9349a8,0x62aadd3e 351 .word 0x3ff7a114,0x73eb0187 352 .word 0xbc841577,0xee04992f 353 .word 0x3ff7b17b,0x0976cfdb 354 .word 0xbc9bebb5,0x8468dc88 355 .word 0x3ff7c1ed,0x0130c132 356 .word 0x3c9f124c,0xd1164dd6 357 .word 0x3ff7d26a,0x62ff86f0 358 .word 0x3c91bddb,0xfb72b8b4 359 .word 0x3ff7e2f3,0x36cf4e62 360 .word 0x3c705d02,0xba15797e 361 .word 0x3ff7f387,0x8491c491 362 .word 0xbc807f11,0xcf9311ae 363 .word 0x3ff80427,0x543e1a12 364 .word 0xbc927c86,0x626d972b 365 .word 0x3ff814d2,0xadd106d9 366 .word 0x3c946437,0x0d151d4d 367 .word 0x3ff82589,0x994cce13 368 .word 0xbc9d4c1d,0xd41532d8 369 .word 0x3ff8364c,0x1eb941f7 370 .word 0x3c999b9a,0x31df2bd5 371 .word 0x3ff8471a,0x4623c7ad 372 .word 0xbc88d684,0xa341cdfb 373 .word 0x3ff857f4,0x179f5b21 374 .word 0xbc5ba748,0xf8b216d0 375 .word 0x3ff868d9,0x9b4492ec 376 .word 0x3ca01c83,0xb21584a3 377 .word 0x3ff879ca,0xd931a436 378 .word 0x3c85d2d7,0xd2db47bc 379 .word 0x3ff88ac7,0xd98a6699 380 .word 0x3c9994c2,0xf37cb53a 381 .word 0x3ff89bd0,0xa478580f 382 .word 0x3c9d5395,0x4475202a 383 .word 0x3ff8ace5,0x422aa0db 384 .word 0x3c96e9f1,0x56864b27 385 .word 0x3ff8be05,0xbad61778 386 .word 0x3c9ecb5e,0xfc43446e 387 .word 0x3ff8cf32,0x16b5448c 388 .word 0xbc70d55e,0x32e9e3aa 389 .word 0x3ff8e06a,0x5e0866d9 390 .word 0xbc97114a,0x6fc9b2e6 391 .word 0x3ff8f1ae,0x99157736 392 .word 0x3c85cc13,0xa2e3976c 393 .word 0x3ff902fe,0xd0282c8a 394 .word 0x3c9592ca,0x85fe3fd2 395 .word 0x3ff9145b,0x0b91ffc6 396 .word 0xbc9dd679,0x2e582524 397 .word 0x3ff925c3,0x53aa2fe2 398 .word 0xbc83455f,0xa639db7f 399 .word 0x3ff93737,0xb0cdc5e5 400 .word 0xbc675fc7,0x81b57ebc 401 .word 0x3ff948b8,0x2b5f98e5 402 .word 0xbc8dc3d6,0x797d2d99 403 .word 0x3ff95a44,0xcbc8520f 404 .word 0xbc764b7c,0x96a5f039 405 .word 0x3ff96bdd,0x9a7670b3 406 .word 0xbc5ba596,0x7f19c896 407 .word 0x3ff97d82,0x9fde4e50 408 .word 0xbc9d185b,0x7c1b85d0 409 .word 0x3ff98f33,0xe47a22a2 410 .word 0x3c7cabda,0xa24c78ed 411 .word 0x3ff9a0f1,0x70ca07ba 412 .word 0xbc9173bd,0x91cee632 413 .word 0x3ff9b2bb,0x4d53fe0d 414 .word 0xbc9dd84e,0x4df6d518 415 .word 0x3ff9c491,0x82a3f090 416 .word 0x3c7c7c46,0xb071f2be 417 .word 0x3ff9d674,0x194bb8d5 418 .word 0xbc9516be,0xa3dd8233 419 .word 0x3ff9e863,0x19e32323 420 .word 0x3c7824ca,0x78e64c6e 421 .word 0x3ff9fa5e,0x8d07f29e 422 .word 0xbc84a9ce,0xaaf1face 423 .word 0x3ffa0c66,0x7b5de565 424 .word 0xbc935949,0x5d1cd533 425 .word 0x3ffa1e7a,0xed8eb8bb 426 .word 0x3c9c6618,0xee8be70e 427 .word 0x3ffa309b,0xec4a2d33 428 .word 0x3c96305c,0x7ddc36ab 429 .word 0x3ffa42c9,0x80460ad8 430 .word 0xbc9aa780,0x589fb120 431 .word 0x3ffa5503,0xb23e255d 432 .word 0xbc9d2f6e,0xdb8d41e1 433 .word 0x3ffa674a,0x8af46052 434 .word 0x3c650f56,0x30670366 435 .word 0x3ffa799e,0x1330b358 436 .word 0x3c9bcb7e,0xcac563c6 437 .word 0x3ffa8bfe,0x53c12e59 438 .word 0xbc94f867,0xb2ba15a8 439 .word 0x3ffa9e6b,0x5579fdbf 440 .word 0x3c90fac9,0x0ef7fd31 441 .word 0x3ffab0e5,0x21356eba 442 .word 0x3c889c31,0xdae94544 443 .word 0x3ffac36b,0xbfd3f37a 444 .word 0xbc8f9234,0xcae76cd0 445 .word 0x3ffad5ff,0x3a3c2774 446 .word 0x3c97ef3b,0xb6b1b8e4 447 .word 0x3ffae89f,0x995ad3ad 448 .word 0x3c97a1cd,0x345dcc81 449 .word 0x3ffafb4c,0xe622f2ff 450 .word 0xbc94b2fc,0x0f315ecc 451 .word 0x3ffb0e07,0x298db666 452 .word 0xbc9bdef5,0x4c80e425 453 .word 0x3ffb20ce,0x6c9a8952 454 .word 0x3c94dd02,0x4a0756cc 455 .word 0x3ffb33a2,0xb84f15fb 456 .word 0xbc62805e,0x3084d708 457 .word 0x3ffb4684,0x15b749b1 458 .word 0xbc7f763d,0xe9df7c90 459 .word 0x3ffb5972,0x8de5593a 460 .word 0xbc9c71df,0xbbba6de3 461 .word 0x3ffb6c6e,0x29f1c52a 462 .word 0x3c92a8f3,0x52883f6e 463 .word 0x3ffb7f76,0xf2fb5e47 464 .word 0xbc75584f,0x7e54ac3b 465 .word 0x3ffb928c,0xf22749e4 466 .word 0xbc9b7216,0x54cb65c6 467 .word 0x3ffba5b0,0x30a1064a 468 .word 0xbc9efcd3,0x0e54292e 469 .word 0x3ffbb8e0,0xb79a6f1f 470 .word 0xbc3f52d1,0xc9696205 471 .word 0x3ffbcc1e,0x904bc1d2 472 .word 0x3c823dd0,0x7a2d9e84 473 .word 0x3ffbdf69,0xc3f3a207 474 .word 0xbc3c2623,0x60ea5b52 475 .word 0x3ffbf2c2,0x5bd71e09 476 .word 0xbc9efdca,0x3f6b9c73 477 .word 0x3ffc0628,0x6141b33d 478 .word 0xbc8d8a5a,0xa1fbca34 479 .word 0x3ffc199b,0xdd85529c 480 .word 0x3c811065,0x895048dd 481 .word 0x3ffc2d1c,0xd9fa652c 482 .word 0xbc96e516,0x17c8a5d7 483 .word 0x3ffc40ab,0x5fffd07a 484 .word 0x3c9b4537,0xe083c60a 485 .word 0x3ffc5447,0x78fafb22 486 .word 0x3c912f07,0x2493b5af 487 .word 0x3ffc67f1,0x2e57d14b 488 .word 0x3c92884d,0xff483cad 489 .word 0x3ffc7ba8,0x8988c933 490 .word 0xbc8e76bb,0xbe255559 491 .word 0x3ffc8f6d,0x9406e7b5 492 .word 0x3c71acbc,0x48805c44 493 .word 0x3ffca340,0x5751c4db 494 .word 0xbc87f2be,0xd10d08f4 495 .word 0x3ffcb720,0xdcef9069 496 .word 0x3c7503cb,0xd1e949db 497 .word 0x3ffccb0f,0x2e6d1675 498 .word 0xbc7d220f,0x86009093 499 .word 0x3ffcdf0b,0x555dc3fa 500 .word 0xbc8dd83b,0x53829d72 501 .word 0x3ffcf315,0x5b5bab74 502 .word 0xbc9a08e9,0xb86dff57 503 .word 0x3ffd072d,0x4a07897c 504 .word 0xbc9cbc37,0x43797a9c 505 .word 0x3ffd1b53,0x2b08c968 506 .word 0x3c955636,0x219a36ee 507 .word 0x3ffd2f87,0x080d89f2 508 .word 0xbc9d487b,0x719d8578 509 .word 0x3ffd43c8,0xeacaa1d6 510 .word 0x3c93db53,0xbf5a1614 511 .word 0x3ffd5818,0xdcfba487 512 .word 0x3c82ed02,0xd75b3706 513 .word 0x3ffd6c76,0xe862e6d3 514 .word 0x3c5fe87a,0x4a8165a0 515 .word 0x3ffd80e3,0x16c98398 516 .word 0xbc911ec1,0x8beddfe8 517 .word 0x3ffd955d,0x71ff6075 518 .word 0x3c9a052d,0xbb9af6be 519 .word 0x3ffda9e6,0x03db3285 520 .word 0x3c9c2300,0x696db532 521 .word 0x3ffdbe7c,0xd63a8315 522 .word 0xbc9b76f1,0x926b8be4 523 .word 0x3ffdd321,0xf301b460 524 .word 0x3c92da57,0x78f018c2 525 .word 0x3ffde7d5,0x641c0658 526 .word 0xbc9ca552,0x8e79ba8f 527 .word 0x3ffdfc97,0x337b9b5f 528 .word 0xbc91a5cd,0x4f184b5c 529 .word 0x3ffe1167,0x6b197d17 530 .word 0xbc72b529,0xbd5c7f44 531 .word 0x3ffe2646,0x14f5a129 532 .word 0xbc97b627,0x817a1496 533 .word 0x3ffe3b33,0x3b16ee12 534 .word 0xbc99f4a4,0x31fdc68a 535 .word 0x3ffe502e,0xe78b3ff6 536 .word 0x3c839e89,0x80a9cc8f 537 .word 0x3ffe6539,0x24676d76 538 .word 0xbc863ff8,0x7522b734 539 .word 0x3ffe7a51,0xfbc74c83 540 .word 0x3c92d522,0xca0c8de2 541 .word 0x3ffe8f79,0x77cdb740 542 .word 0xbc910894,0x80b054b1 543 .word 0x3ffea4af,0xa2a490da 544 .word 0xbc9e9c23,0x179c2893 545 .word 0x3ffeb9f4,0x867cca6e 546 .word 0x3c94832f,0x2293e4f2 547 .word 0x3ffecf48,0x2d8e67f1 548 .word 0xbc9c93f3,0xb411ad8c 549 .word 0x3ffee4aa,0xa2188510 550 .word 0x3c91c68d,0xa487568d 551 .word 0x3ffefa1b,0xee615a27 552 .word 0x3c9dc7f4,0x86a4b6b0 553 .word 0x3fff0f9c,0x1cb6412a 554 .word 0xbc932200,0x65181d45 555 .word 0x3fff252b,0x376bba97 556 .word 0x3c93a1a5,0xbf0d8e43 557 .word 0x3fff3ac9,0x48dd7274 558 .word 0xbc795a5a,0x3ed837de 559 .word 0x3fff5076,0x5b6e4540 560 .word 0x3c99d3e1,0x2dd8a18b 561 .word 0x3fff6632,0x798844f8 562 .word 0x3c9fa37b,0x3539343e 563 .word 0x3fff7bfd,0xad9cbe14 564 .word 0xbc9dbb12,0xd006350a 565 .word 0x3fff91d8,0x02243c89 566 .word 0xbc612ea8,0xa779f689 567 .word 0x3fffa7c1,0x819e90d8 568 .word 0x3c874853,0xf3a5931e 569 .word 0x3fffbdba,0x3692d514 570 .word 0xbc796773,0x15098eb6 571 .word 0x3fffd3c2,0x2b8f71f1 572 .word 0x3c62eb74,0x966579e7 573 .word 0x3fffe9d9,0x6b2a23d9 574 .word 0x3c74a603,0x7442fde3 575 576 .align 16 577 constants: 578 .word 0x3ef00000,0x00000000 579 .word 0x40862e42,0xfefa39ef 580 .word 0x01000000,0x00000000 581 .word 0x7f000000,0x00000000 582 .word 0x80000000,0x00000000 583 .word 0x43f00000,0x00000000 ! scaling 2^12 two96 584 .word 0xfff00000,0x00000000 585 .word 0x3ff00000,0x00000000 586 .word 0x3fdfffff,0xfffffff6 587 .word 0x3fc55555,0x721a1d14 588 .word 0x3fa55555,0x6e0896af 589 .word 0x41371547,0x652b82fe ! scaling 2^12 invln2_256 590 .word 0x3ea62e42,0xfee00000 ! scaling 2^(-12) ln2_256h 591 .word 0x3caa39ef,0x35793c76 ! scaling 2^(-12) ln2_256l 592 593 ! base set w/o scaling 594 ! .word 0x43300000,0x00000000 ! scaling two96 595 ! .word 0x40771547,0x652b82fe ! scaling invln2_256 596 ! .word 0x3f662e42,0xfee00000 ! scaling ln2_256h 597 ! .word 0x3d6a39ef,0x35793c76 ! scaling ln2_256l 598 599 #define ox3ef 0x0 600 #define thresh 0x8 601 #define tiny 0x10 602 #define huge 0x18 603 #define signbit 0x20 604 #define two96 0x28 605 #define neginf 0x30 606 #define one 0x38 607 #define B1OFF 0x40 608 #define B2OFF 0x48 609 #define B3OFF 0x50 610 #define invln2_256 0x58 611 #define ln2_256h 0x60 612 #define ln2_256l 0x68 613 614 ! local storage indices 615 616 #define m2 STACK_BIAS-0x4 617 #define m1 STACK_BIAS-0x8 618 #define m0 STACK_BIAS-0xc 619 #define jnk STACK_BIAS-0x20 620 ! sizeof temp storage - must be a multiple of 16 for V9 621 #define tmps 0x20 622 623 ! register use 624 625 ! i0 n 626 ! i1 x 627 ! i2 stridex 628 ! i3 y 629 ! i4 stridey 630 ! i5 0x80000000 631 632 ! g1 TBL 633 634 ! l0 m0 635 ! l1 m1 636 ! l2 m2 637 ! l3 j0,oy0 638 ! l4 j1,oy1 639 ! l5 j2,oy2 640 ! l6 0x3e300000 641 ! l7 0x40862e41 642 643 ! o0 py0 644 ! o1 py1 645 ! o2 py2 646 ! o3 scratch 647 ! o4 scratch 648 ! o5 0x40874910 649 ! o7 0x7ff00000 650 651 ! f0 x0 652 ! f2 653 ! f4 654 ! f6 655 ! f8 656 ! f10 x1 657 ! f12 658 ! f14 659 ! f16 660 ! f18 661 ! f20 x2 662 ! f22 663 ! f24 664 ! f26 665 ! f28 666 ! f30 667 ! f32 668 ! f34 669 ! f36 0x3ef0... 670 ! f38 thresh 671 ! f40 tiny 672 ! f42 huge 673 ! f44 signbit 674 ! f46 two96 675 ! f48 neginf 676 ! f50 one 677 ! f52 B1 678 ! f54 B2 679 ! f56 B3 680 ! f58 invln2_256 681 ! f60 ln2_256h 682 ! f62 ln2_256l 683 #define BOUNDRY %f36 684 #define THRESH %f38 685 #define TINY %f40 686 #define HUGE %f42 687 #define SIGNBIT %f44 688 #define TWO96 %f46 689 #define NEGINF %f48 690 #define ONE %f50 691 #define B1 %f52 692 #define B2 %f54 693 #define B3 %f56 694 #define INVLN2_256 %f58 695 #define LN2_256H %f60 696 #define LN2_256L %f62 697 698 ENTRY(__vexp) 699 save %sp,-SA(MINFRAME)-tmps,%sp 700 PIC_SETUP(l7) 701 PIC_SET(l7,constants,o3) 702 PIC_SET(l7,TBL,o0) 703 mov %o0,%g1 704 wr %g0,0x82,%asi ! set %asi for non-faulting loads 705 706 sethi %hi(0x80000000),%i5 707 sethi %hi(0x3e300000),%l6 708 sethi %hi(0x40862e41),%l7 709 or %l7,%lo(0x40862e41),%l7 710 sethi %hi(0x40874910),%o5 711 or %o5,%lo(0x40874910),%o5 712 sethi %hi(0x7ff00000),%o7 713 ldd [%o3+ox3ef],BOUNDRY 714 ldd [%o3+thresh],THRESH 715 ldd [%o3+tiny],TINY 716 ldd [%o3+huge],HUGE 717 ldd [%o3+signbit],SIGNBIT 718 ldd [%o3+two96],TWO96 719 ldd [%o3+neginf],NEGINF 720 ldd [%o3+one],ONE 721 ldd [%o3+B1OFF],B1 722 ldd [%o3+B2OFF],B2 723 ldd [%o3+B3OFF],B3 724 ldd [%o3+invln2_256],INVLN2_256 725 ldd [%o3+ln2_256h],LN2_256H 726 ldd [%o3+ln2_256l],LN2_256L 727 sll %i2,3,%i2 ! scale strides 728 sll %i4,3,%i4 729 add %fp,jnk,%l3 ! precondition loop 730 add %fp,jnk,%l4 731 add %fp,jnk,%l5 732 ld [%i1],%l0 ! hx = *x 733 ld [%i1],%f0 734 ld [%i1+4],%f1 735 andn %l0,%i5,%l0 ! hx &= ~0x80000000 736 ba .loop0 737 add %i1,%i2,%i1 ! x += stridex 738 739 .align 16 740 ! -- 16 byte aligned 741 .loop0: 742 lda [%i1]%asi,%l1 ! preload next argument 743 sub %l0,%l6,%o3 744 sub %l7,%l0,%o4 745 fand %f0,SIGNBIT,%f2 ! get sign bit 746 747 lda [%i1]%asi,%f10 748 orcc %o3,%o4,%g0 749 mov %i3,%o0 ! py0 = y 750 bl,pn %icc,.range0 ! if hx < 0x3e300000 or > 0x40862e41 751 752 ! delay slot 753 lda [%i1+4]%asi,%f11 754 addcc %i0,-1,%i0 755 add %i3,%i4,%i3 ! y += stridey 756 ble,pn %icc,.endloop1 757 758 ! delay slot 759 andn %l1,%i5,%l1 760 add %i1,%i2,%i1 ! x += stridex 761 for %f2,TWO96,%f2 ! used to strip least sig bits 762 fmuld %f0,INVLN2_256,%f4 ! x/ (ln2/256) , creating k 763 764 .loop1: 765 lda [%i1]%asi,%l2 ! preload next argument 766 sub %l1,%l6,%o3 767 sub %l7,%l1,%o4 768 fand %f10,SIGNBIT,%f12 769 770 lda [%i1]%asi,%f20 771 orcc %o3,%o4,%g0 772 mov %i3,%o1 ! py1 = y 773 bl,pn %icc,.range1 ! if hx < 0x3e300000 or > 0x40862e41 774 775 ! delay slot 776 lda [%i1+4]%asi,%f21 777 addcc %i0,-1,%i0 778 add %i3,%i4,%i3 ! y += stridey 779 ble,pn %icc,.endloop2 780 781 ! delay slot 782 andn %l2,%i5,%l2 783 add %i1,%i2,%i1 ! x += stridex 784 for %f12,TWO96,%f12 785 fmuld %f10,INVLN2_256,%f14 786 787 .loop2: 788 sub %l2,%l6,%o3 789 sub %l7,%l2,%o4 790 fand %f20,SIGNBIT,%f22 791 fmuld %f20,INVLN2_256,%f24 ! okay to put this here; for alignment 792 793 orcc %o3,%o4,%g0 794 bl,pn %icc,.range2 ! if hx < 0x3e300000 or > 0x40862e41 795 ! delay slot 796 for %f22,TWO96,%f22 797 faddd %f4,%f2,%f4 ! creating k+j/256, sra to zero bits 798 799 .cont: 800 faddd %f14,%f12,%f14 801 mov %i3,%o2 ! py2 = y 802 803 faddd %f24,%f22,%f24 804 add %i3,%i4,%i3 ! y += stridey 805 806 ! BUBBLE USIII 807 808 fsubd %f4,%f2,%f8 ! creating k+j/256: sll 809 st %f6,[%l3] ! store previous loop x0 810 811 fsubd %f14,%f12,%f18 812 st %f7,[%l3+4] ! store previous loop x0 813 814 fsubd %f24,%f22,%f28 815 st %f16,[%l4] 816 817 ! BUBBLE USIII 818 819 fmuld %f8,LN2_256H,%f2 ! closest LN2_256 to x 820 st %f17,[%l4+4] 821 822 fmuld %f18,LN2_256H,%f12 823 st %f26,[%l5] 824 825 fmuld %f28,LN2_256H,%f22 826 st %f27,[%l5+4] 827 828 ! BUBBLE USIII 829 830 fsubd %f0,%f2,%f0 ! r = x - p*LN2_256H 831 fmuld %f8,LN2_256L,%f4 ! closest LN2_256 to x , added prec 832 833 fsubd %f10,%f12,%f10 834 fmuld %f18,LN2_256L,%f14 835 836 fsubd %f20,%f22,%f20 837 fmuld %f28,LN2_256L,%f24 838 839 ! BUBBLE USIII 840 841 fsubd %f0,%f4,%f0 ! r -= p*LN2_256L 842 843 fsubd %f10,%f14,%f10 844 845 fsubd %f20,%f24,%f20 846 847 !!!!!!!!!!!!!!!!!!! New polynomial reorder starts here 848 849 ! Alternate polynomial grouping allowing non-sequential calc of p 850 ! OLD : p = r * ( 1 + r * ( B1 + r * ( B2 + r * B3) ) ) 851 ! NEW : p = r * [ (1+r*B1) + (r*r) * ( B2 + r * B3) ) ] 852 ! 853 ! let SLi Ri SRi be accumulators 854 855 fmuld %f0,B3,%f2 ! SR1 = r1 * B3 856 fdtoi %f8,%f8 ! convert k+j/256 to int 857 st %f8,[%fp+m0] ! store k, to shift return/use 858 859 fmuld %f10,B3,%f12 ! SR2 = r2 * B3 860 fdtoi %f18,%f18 ! convert k+j/256 to int 861 st %f18,[%fp+m1] ! store k, to shift return/use 862 863 fmuld %f20,B3,%f22 ! SR3 = r3 * B3 864 fdtoi %f28,%f28 ! convert k+j/256 to int 865 st %f28,[%fp+m2] ! store k, to shift return/use 866 867 fmuld %f0,%f0,%f4 ! R1 = r1 * r1 868 869 fmuld %f10,%f10,%f14 ! R2 = r2 * r2 870 faddd %f2,B2,%f2 ! SR1 += B2 871 872 fmuld %f20,%f20,%f24 ! R3 = r3 * r3 873 faddd %f12,B2,%f12 ! SR2 += B2 874 875 faddd %f22,B2,%f22 ! SR3 += B2 876 fmuld %f0,B1,%f6 ! SL1 = r1 * B1 877 878 fmuld %f10,B1,%f32 ! SL2 = r2 * B1 879 fand %f8,NEGINF,%f8 880 ! best here for RAW BYPASS 881 ld [%fp+m0],%l0 ! get nonshifted k into intreg 882 883 fmuld %f20,B1,%f34 ! SL3 = r3 * B1 884 fand %f18,NEGINF,%f18 885 ld [%fp+m1],%l1 ! get nonshifted k into intreg 886 887 fmuld %f4,%f2,%f4 ! R1 = R1 * SR1 888 fand %f28,NEGINF,%f28 889 ld [%fp+m2],%l2 ! get nonshifted k into intreg 890 891 fmuld %f14,%f12,%f14 ! R2 = R2 * SR2 892 faddd %f6,ONE,%f6 ! SL1 += 1 893 894 fmuld %f24,%f22,%f24 ! R3 = R3 * SR3 895 faddd %f32,ONE,%f32 ! SL2 += 1 896 sra %l0,8,%l3 ! shift k tobe offset 256-8byte 897 898 faddd %f34,ONE,%f34 ! SL3 += 1 899 sra %l1,8,%l4 ! shift k tobe offset 256-8byte 900 sra %l2,8,%l5 ! shift k tobe offset 256-8byte 901 902 ! BUBBLE in USIII 903 and %l3,0xff0,%l3 904 and %l4,0xff0,%l4 905 906 907 908 faddd %f6,%f4,%f6 ! R1 = SL1 + R1 909 ldd [%g1+%l3],%f4 ! tbl[j] 910 add %l3,8,%l3 ! inc j 911 and %l5,0xff0,%l5 912 913 914 faddd %f32,%f14,%f32 ! R2 = SL2 + R2 915 ldd [%g1+%l4],%f14 ! tbl[j] 916 add %l4,8,%l4 ! inc j 917 sra %l0,20,%o3 918 919 faddd %f34,%f24,%f34 ! R3 = SL3 + R3 920 ldd [%g1+%l5],%f24 ! tbl[j] 921 add %l5,8,%l5 ! inc j 922 sra %l1,20,%l1 923 924 ! BUBBLE in USIII 925 ldd [%g1+%l4],%f16 ! tbl[j+1] 926 add %o3,1021,%o3 ! inc j 927 928 fmuld %f0,%f6,%f0 ! p1 = r1 * R1 929 ldd [%g1+%l3],%f6 ! tbl[j+1] 930 add %l1,1021,%l1 ! inc j 931 sra %l2,20,%l2 932 933 fmuld %f10,%f32,%f10 ! p2 = r2 * R2 934 ldd [%g1+%l5],%f26 ! tbl[j+1] 935 add %l2,1021,%l2 ! inc j 936 937 fmuld %f20,%f34,%f20 ! p3 = r3 * R3 938 939 940 941 942 943 !!!!!!!!!!!!!!!!!!! poly-reorder - ends here 944 945 fmuld %f0,%f4,%f0 ! start exp(x) = exp(r) * tbl[j] 946 mov %o0,%l3 947 948 fmuld %f10,%f14,%f10 949 mov %o1,%l4 950 951 fmuld %f20,%f24,%f20 952 mov %o2,%l5 953 954 faddd %f0,%f6,%f6 ! cont exp(x) : apply tbl[j] high bits 955 lda [%i1]%asi,%l0 ! preload next argument 956 957 faddd %f10,%f16,%f16 958 lda [%i1]%asi,%f0 959 960 faddd %f20,%f26,%f26 961 lda [%i1+4]%asi,%f1 962 963 faddd %f6,%f4,%f6 ! cont exp(x) : apply tbl[j+1] low bits 964 add %i1,%i2,%i1 ! x += stridex 965 966 faddd %f16,%f14,%f16 967 andn %l0,%i5,%l0 968 or %o3,%l1,%o4 969 970 ! -- 16 byte aligned 971 orcc %o4,%l2,%o4 972 bl,pn %icc,.small 973 ! delay slot 974 faddd %f26,%f24,%f26 975 976 fpadd32 %f6,%f8,%f6 ! done exp(x) : apply 2^k 977 fpadd32 %f16,%f18,%f16 978 979 980 addcc %i0,-1,%i0 981 bg,pn %icc,.loop0 982 ! delay slot 983 fpadd32 %f26,%f28,%f26 984 985 ba,pt %icc,.endloop0 986 ! delay slot 987 nop 988 989 990 .align 16 991 .small: 992 tst %o3 993 bge,pt %icc,1f 994 ! delay slot 995 fpadd32 %f6,%f8,%f6 996 fpadd32 %f6,BOUNDRY,%f6 997 fmuld %f6,TINY,%f6 998 1: 999 tst %l1 1000 bge,pt %icc,1f 1001 ! delay slot 1002 fpadd32 %f16,%f18,%f16 1003 fpadd32 %f16,BOUNDRY,%f16 1004 fmuld %f16,TINY,%f16 1005 1: 1006 tst %l2 1007 bge,pt %icc,1f 1008 ! delay slot 1009 fpadd32 %f26,%f28,%f26 1010 fpadd32 %f26,BOUNDRY,%f26 1011 fmuld %f26,TINY,%f26 1012 1: 1013 addcc %i0,-1,%i0 1014 bg,pn %icc,.loop0 1015 ! delay slot 1016 nop 1017 ba,pt %icc,.endloop0 1018 ! delay slot 1019 nop 1020 1021 1022 .endloop2: 1023 for %f12,TWO96,%f12 1024 fmuld %f10,INVLN2_256,%f14 1025 faddd %f14,%f12,%f14 1026 fsubd %f14,%f12,%f18 1027 fmuld %f18,LN2_256H,%f12 1028 fsubd %f10,%f12,%f10 1029 fmuld %f18,LN2_256L,%f14 1030 fsubd %f10,%f14,%f10 1031 fmuld %f10,B3,%f12 1032 fdtoi %f18,%f18 1033 st %f18,[%fp+m1] 1034 fmuld %f10,%f10,%f14 1035 faddd %f12,B2,%f12 1036 fmuld %f10,B1,%f32 1037 fand %f18,NEGINF,%f18 1038 ld [%fp+m1],%l1 1039 fmuld %f14,%f12,%f14 1040 faddd %f32,ONE,%f32 1041 sra %l1,8,%o4 1042 and %o4,0xff0,%o4 1043 faddd %f32,%f14,%f32 1044 ldd [%g1+%o4],%f14 1045 add %o4,8,%o4 1046 sra %l1,20,%l1 1047 ldd [%g1+%o4],%f30 1048 addcc %l1,1021,%l1 1049 fmuld %f10,%f32,%f10 1050 fmuld %f10,%f14,%f10 1051 faddd %f10,%f30,%f30 1052 faddd %f30,%f14,%f30 1053 bge,pt %icc,1f 1054 ! delay slot 1055 fpadd32 %f30,%f18,%f30 1056 fpadd32 %f30,BOUNDRY,%f30 1057 fmuld %f30,TINY,%f30 1058 1: 1059 st %f30,[%o1] 1060 st %f31,[%o1+4] 1061 1062 .endloop1: 1063 for %f2,TWO96,%f2 1064 fmuld %f0,INVLN2_256,%f4 1065 faddd %f4,%f2,%f4 1066 fsubd %f4,%f2,%f8 1067 fmuld %f8,LN2_256H,%f2 1068 fsubd %f0,%f2,%f0 1069 fmuld %f8,LN2_256L,%f4 1070 fsubd %f0,%f4,%f0 1071 fmuld %f0,B3,%f2 1072 fdtoi %f8,%f8 1073 st %f8,[%fp+m0] 1074 fmuld %f0,%f0,%f4 1075 faddd %f2,B2,%f2 1076 fmuld %f0,B1,%f32 1077 fand %f8,NEGINF,%f8 1078 ld [%fp+m0],%l0 1079 fmuld %f4,%f2,%f4 1080 faddd %f32,ONE,%f32 1081 sra %l0,8,%o4 1082 and %o4,0xff0,%o4 1083 faddd %f32,%f4,%f32 1084 ldd [%g1+%o4],%f4 1085 add %o4,8,%o4 1086 sra %l0,20,%o3 1087 ldd [%g1+%o4],%f30 1088 addcc %o3,1021,%o3 1089 fmuld %f0,%f32,%f0 1090 fmuld %f0,%f4,%f0 1091 faddd %f0,%f30,%f30 1092 faddd %f30,%f4,%f30 1093 bge,pt %icc,1f 1094 ! delay slot 1095 fpadd32 %f30,%f8,%f30 1096 fpadd32 %f30,BOUNDRY,%f30 1097 fmuld %f30,TINY,%f30 1098 1: 1099 st %f30,[%o0] 1100 st %f31,[%o0+4] 1101 1102 .endloop0: 1103 st %f6,[%l3] 1104 st %f7,[%l3+4] 1105 st %f16,[%l4] 1106 st %f17,[%l4+4] 1107 st %f26,[%l5] 1108 st %f27,[%l5+4] 1109 ret 1110 restore 1111 1112 1113 .range0: 1114 cmp %l0,%l6 1115 bl,a,pt %icc,3f ! if x is tiny 1116 ! delay slot, annulled if branch not taken 1117 faddd %f0,ONE,%f4 1118 1119 cmp %l0,%o5 1120 bg,pt %icc,1f ! if x is huge, inf, nan 1121 ! delay slot 1122 nop 1123 1124 fcmpd %fcc0,%f0,THRESH 1125 fbg,a,pt %fcc0,3f ! if x is huge and positive 1126 ! delay slot, annulled if branch not taken 1127 fmuld HUGE,HUGE,%f4 1128 1129 ! x is near the extremes but within range; return to the loop 1130 addcc %i0,-1,%i0 1131 add %i3,%i4,%i3 ! y += stridey 1132 ble,pn %icc,.endloop1 1133 ! delay slot 1134 andn %l1,%i5,%l1 1135 add %i1,%i2,%i1 ! x += stridex 1136 for %f2,TWO96,%f2 1137 ba,pt %icc,.loop1 1138 ! delay slot 1139 fmuld %f0,INVLN2_256,%f4 1140 1141 1: 1142 cmp %l0,%o7 1143 bl,pn %icc,2f ! if x is finite 1144 ! delay slot 1145 nop 1146 fzero %f4 1147 fcmpd %fcc0,%f0,NEGINF 1148 fmovdne %fcc0,%f0,%f4 1149 ba,pt %icc,3f 1150 fmuld %f4,%f4,%f4 ! x*x or zero*zero 1151 2: 1152 fmovd HUGE,%f4 1153 fcmpd %fcc0,%f0,ONE 1154 fmovdl %fcc0,TINY,%f4 1155 fmuld %f4,%f4,%f4 ! huge*huge or tiny*tiny 1156 3: 1157 st %f4,[%o0] 1158 andn %l1,%i5,%l0 1159 add %i1,%i2,%i1 ! x += stridex 1160 fmovd %f10,%f0 1161 st %f5,[%o0+4] 1162 addcc %i0,-1,%i0 1163 bg,pt %icc,.loop0 1164 ! delay slot 1165 add %i3,%i4,%i3 ! y += stridey 1166 ba,pt %icc,.endloop0 1167 ! delay slot 1168 nop 1169 1170 1171 .range1: 1172 cmp %l1,%l6 1173 bl,a,pt %icc,3f ! if x is tiny 1174 ! delay slot, annulled if branch not taken 1175 faddd %f10,ONE,%f14 1176 1177 cmp %l1,%o5 1178 bg,pt %icc,1f ! if x is huge, inf, nan 1179 ! delay slot 1180 nop 1181 1182 fcmpd %fcc0,%f10,THRESH 1183 fbg,a,pt %fcc0,3f ! if x is huge and positive 1184 ! delay slot, annulled if branch not taken 1185 fmuld HUGE,HUGE,%f14 1186 1187 ! x is near the extremes but within range; return to the loop 1188 addcc %i0,-1,%i0 1189 add %i3,%i4,%i3 ! y += stridey 1190 ble,pn %icc,.endloop2 1191 ! delay slot 1192 andn %l2,%i5,%l2 1193 add %i1,%i2,%i1 ! x += stridex 1194 for %f12,TWO96,%f12 1195 ba,pt %icc,.loop2 1196 ! delay slot 1197 fmuld %f10,INVLN2_256,%f14 1198 1199 1: 1200 cmp %l1,%o7 1201 bl,pn %icc,2f ! if x is finite 1202 ! delay slot 1203 nop 1204 fzero %f14 1205 fcmpd %fcc0,%f10,NEGINF 1206 fmovdne %fcc0,%f10,%f14 1207 ba,pt %icc,3f 1208 fmuld %f14,%f14,%f14 ! x*x or zero*zero 1209 2: 1210 fmovd HUGE,%f14 1211 fcmpd %fcc0,%f10,ONE 1212 fmovdl %fcc0,TINY,%f14 1213 fmuld %f14,%f14,%f14 ! huge*huge or tiny*tiny 1214 3: 1215 st %f14,[%o1] 1216 andn %l2,%i5,%l1 1217 add %i1,%i2,%i1 ! x += stridex 1218 fmovd %f20,%f10 1219 st %f15,[%o1+4] 1220 addcc %i0,-1,%i0 1221 bg,pt %icc,.loop1 1222 ! delay slot 1223 add %i3,%i4,%i3 ! y += stridey 1224 ba,pt %icc,.endloop1 1225 ! delay slot 1226 nop 1227 1228 1229 .range2: 1230 cmp %l2,%l6 1231 bl,a,pt %icc,3f ! if x is tiny 1232 ! delay slot, annulled if branch not taken 1233 faddd %f20,ONE,%f24 1234 1235 cmp %l2,%o5 1236 bg,pt %icc,1f ! if x is huge, inf, nan 1237 ! delay slot 1238 nop 1239 1240 fcmpd %fcc0,%f20,THRESH 1241 fbg,a,pt %fcc0,3f ! if x is huge and positive 1242 ! delay slot, annulled if branch not taken 1243 fmuld HUGE,HUGE,%f24 1244 1245 ! x is near the extremes but within range; return to the loop 1246 ba,pt %icc,.cont 1247 ! delay slot 1248 faddd %f4,%f2,%f4 1249 1250 1: 1251 cmp %l2,%o7 1252 bl,pn %icc,2f ! if x is finite 1253 ! delay slot 1254 nop 1255 fzero %f24 1256 fcmpd %fcc0,%f20,NEGINF 1257 fmovdne %fcc0,%f20,%f24 1258 ba,pt %icc,3f 1259 fmuld %f24,%f24,%f24 ! x*x or zero*zero 1260 2: 1261 fmovd HUGE,%f24 1262 fcmpd %fcc0,%f20,ONE 1263 fmovdl %fcc0,TINY,%f24 1264 fmuld %f24,%f24,%f24 ! huge*huge or tiny*tiny 1265 3: 1266 st %f24,[%i3] 1267 st %f25,[%i3+4] 1268 lda [%i1]%asi,%l2 ! preload next argument 1269 lda [%i1]%asi,%f20 1270 lda [%i1+4]%asi,%f21 1271 andn %l2,%i5,%l2 1272 add %i1,%i2,%i1 ! x += stridex 1273 addcc %i0,-1,%i0 1274 bg,pt %icc,.loop2 1275 ! delay slot 1276 add %i3,%i4,%i3 ! y += stridey 1277 ba,pt %icc,.endloop2 1278 ! delay slot 1279 nop 1280 1281 SET_SIZE(__vexp) 1282