1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file   "__vexp.S"
  30 
  31 #include "libm.h"
  32 
  33         RO_DATA
  34 
  35 /********************************************************************
  36  * vexp() algorithm is from mopt:f_exp.c.  Basics are included here
  37  * to supplement comments within this file.  vexp() has been unrolled
  38  * to a depth of 3.  Only element 0 is documented.
  39  *
  40  * Note 1: INVLN2_256, LN2_256H, and LN2_256L were originally scaled by
  41  *      2^44 to allow *2^k w/o shifting within the FP registers.  These
  42  *      had to be removed for CHEETAH to avoid the fdtox of a very large
  43  *      number, which would trap to kernel (2^52).
  44  *
  45  * Let  x = (k + j/256)ln2 + r
  46  * then exp(x) = exp(ln2^(k+j/256)) * exp(r)
  47  *             = 2^k * 2^(j/256) * exp(r)
  48  * where r is polynomial approximation
  49  *      exp(r) = 1 + r + r^2*B1 + r^3*B2 + r^4*B3
  50  *             = 1 + r*(1+r*(B1+r*(B2+r*B3)))
  51  *      let
  52  *      p = r*(1+r*(B1+r*(B2+r*B3)))    ! notice, not quite exp(r)
  53  *      q = 2^(j/256) (high 64 bits)
  54  *      t = 2^(j/256) (extra precision) ! both from _TBL_exp_z[]
  55  *      then
  56  *      2^(j/256) * exp(r) = (q+t)(1+p) ~ q + ( t + q*p )
  57  *      then actual computation is 2^k * ( q + ( t + q*p ) )
  58  * 
  59  ********************************************************************/
  60 
  61         .align  16
  62 TBL:
  63         .word   0x3ff00000,0x00000000
  64         .word   0x00000000,0x00000000
  65         .word   0x3ff00b1a,0xfa5abcbf
  66         .word   0xbc84f6b2,0xa7609f71
  67         .word   0x3ff0163d,0xa9fb3335
  68         .word   0x3c9b6129,0x9ab8cdb7
  69         .word   0x3ff02168,0x143b0281
  70         .word   0xbc82bf31,0x0fc54eb6
  71         .word   0x3ff02c9a,0x3e778061
  72         .word   0xbc719083,0x535b085d
  73         .word   0x3ff037d4,0x2e11bbcc
  74         .word   0x3c656811,0xeeade11a
  75         .word   0x3ff04315,0xe86e7f85
  76         .word   0xbc90a31c,0x1977c96e
  77         .word   0x3ff04e5f,0x72f654b1
  78         .word   0x3c84c379,0x3aa0d08c
  79         .word   0x3ff059b0,0xd3158574
  80         .word   0x3c8d73e2,0xa475b465
  81         .word   0x3ff0650a,0x0e3c1f89
  82         .word   0xbc95cb7b,0x5799c396
  83         .word   0x3ff0706b,0x29ddf6de
  84         .word   0xbc8c91df,0xe2b13c26
  85         .word   0x3ff07bd4,0x2b72a836
  86         .word   0x3c832334,0x54458700
  87         .word   0x3ff08745,0x18759bc8
  88         .word   0x3c6186be,0x4bb284ff
  89         .word   0x3ff092bd,0xf66607e0
  90         .word   0xbc968063,0x800a3fd1
  91         .word   0x3ff09e3e,0xcac6f383
  92         .word   0x3c914878,0x18316136
  93         .word   0x3ff0a9c7,0x9b1f3919
  94         .word   0x3c85d16c,0x873d1d38
  95         .word   0x3ff0b558,0x6cf9890f
  96         .word   0x3c98a62e,0x4adc610a
  97         .word   0x3ff0c0f1,0x45e46c85
  98         .word   0x3c94f989,0x06d21cef
  99         .word   0x3ff0cc92,0x2b7247f7
 100         .word   0x3c901edc,0x16e24f71
 101         .word   0x3ff0d83b,0x23395dec
 102         .word   0xbc9bc14d,0xe43f316a
 103         .word   0x3ff0e3ec,0x32d3d1a2
 104         .word   0x3c403a17,0x27c57b53
 105         .word   0x3ff0efa5,0x5fdfa9c5
 106         .word   0xbc949db9,0xbc54021b
 107         .word   0x3ff0fb66,0xaffed31b
 108         .word   0xbc6b9bed,0xc44ebd7b
 109         .word   0x3ff10730,0x28d7233e
 110         .word   0x3c8d46eb,0x1692fdd5
 111         .word   0x3ff11301,0xd0125b51
 112         .word   0xbc96c510,0x39449b3a
 113         .word   0x3ff11edb,0xab5e2ab6
 114         .word   0xbc9ca454,0xf703fb72
 115         .word   0x3ff12abd,0xc06c31cc
 116         .word   0xbc51b514,0xb36ca5c7
 117         .word   0x3ff136a8,0x14f204ab
 118         .word   0xbc67108f,0xba48dcf0
 119         .word   0x3ff1429a,0xaea92de0
 120         .word   0xbc932fbf,0x9af1369e
 121         .word   0x3ff14e95,0x934f312e
 122         .word   0xbc8b91e8,0x39bf44ab
 123         .word   0x3ff15a98,0xc8a58e51
 124         .word   0x3c82406a,0xb9eeab0a
 125         .word   0x3ff166a4,0x5471c3c2
 126         .word   0x3c58f23b,0x82ea1a32
 127         .word   0x3ff172b8,0x3c7d517b
 128         .word   0xbc819041,0xb9d78a76
 129         .word   0x3ff17ed4,0x8695bbc0
 130         .word   0x3c709e3f,0xe2ac5a64
 131         .word   0x3ff18af9,0x388c8dea
 132         .word   0xbc911023,0xd1970f6c
 133         .word   0x3ff19726,0x58375d2f
 134         .word   0x3c94aadd,0x85f17e08
 135         .word   0x3ff1a35b,0xeb6fcb75
 136         .word   0x3c8e5b4c,0x7b4968e4
 137         .word   0x3ff1af99,0xf8138a1c
 138         .word   0x3c97bf85,0xa4b69280
 139         .word   0x3ff1bbe0,0x84045cd4
 140         .word   0xbc995386,0x352ef607
 141         .word   0x3ff1c82f,0x95281c6b
 142         .word   0x3c900977,0x8010f8c9
 143         .word   0x3ff1d487,0x3168b9aa
 144         .word   0x3c9e016e,0x00a2643c
 145         .word   0x3ff1e0e7,0x5eb44027
 146         .word   0xbc96fdd8,0x088cb6de
 147         .word   0x3ff1ed50,0x22fcd91d
 148         .word   0xbc91df98,0x027bb78c
 149         .word   0x3ff1f9c1,0x8438ce4d
 150         .word   0xbc9bf524,0xa097af5c
 151         .word   0x3ff2063b,0x88628cd6
 152         .word   0x3c8dc775,0x814a8494
 153         .word   0x3ff212be,0x3578a819
 154         .word   0x3c93592d,0x2cfcaac9
 155         .word   0x3ff21f49,0x917ddc96
 156         .word   0x3c82a97e,0x9494a5ee
 157         .word   0x3ff22bdd,0xa27912d1
 158         .word   0x3c8d34fb,0x5577d69e
 159         .word   0x3ff2387a,0x6e756238
 160         .word   0x3c99b07e,0xb6c70573
 161         .word   0x3ff2451f,0xfb82140a
 162         .word   0x3c8acfcc,0x911ca996
 163         .word   0x3ff251ce,0x4fb2a63f
 164         .word   0x3c8ac155,0xbef4f4a4
 165         .word   0x3ff25e85,0x711ece75
 166         .word   0x3c93e1a2,0x4ac31b2c
 167         .word   0x3ff26b45,0x65e27cdd
 168         .word   0x3c82bd33,0x9940e9d9
 169         .word   0x3ff2780e,0x341ddf29
 170         .word   0x3c9e067c,0x05f9e76c
 171         .word   0x3ff284df,0xe1f56381
 172         .word   0xbc9a4c3a,0x8c3f0d7e
 173         .word   0x3ff291ba,0x7591bb70
 174         .word   0xbc82cc72,0x28401cbc
 175         .word   0x3ff29e9d,0xf51fdee1
 176         .word   0x3c8612e8,0xafad1255
 177         .word   0x3ff2ab8a,0x66d10f13
 178         .word   0xbc995743,0x191690a7
 179         .word   0x3ff2b87f,0xd0dad990
 180         .word   0xbc410adc,0xd6381aa4
 181         .word   0x3ff2c57e,0x39771b2f
 182         .word   0xbc950145,0xa6eb5124
 183         .word   0x3ff2d285,0xa6e4030b
 184         .word   0x3c900247,0x54db41d5
 185         .word   0x3ff2df96,0x1f641589
 186         .word   0x3c9d16cf,0xfbbce198
 187         .word   0x3ff2ecaf,0xa93e2f56
 188         .word   0x3c71ca0f,0x45d52383
 189         .word   0x3ff2f9d2,0x4abd886b
 190         .word   0xbc653c55,0x532bda93
 191         .word   0x3ff306fe,0x0a31b715
 192         .word   0x3c86f46a,0xd23182e4
 193         .word   0x3ff31432,0xedeeb2fd
 194         .word   0x3c8959a3,0xf3f3fcd0
 195         .word   0x3ff32170,0xfc4cd831
 196         .word   0x3c8a9ce7,0x8e18047c
 197         .word   0x3ff32eb8,0x3ba8ea32
 198         .word   0xbc9c45e8,0x3cb4f318
 199         .word   0x3ff33c08,0xb26416ff
 200         .word   0x3c932721,0x843659a6
 201         .word   0x3ff34962,0x66e3fa2d
 202         .word   0xbc835a75,0x930881a4
 203         .word   0x3ff356c5,0x5f929ff1
 204         .word   0xbc8b5cee,0x5c4e4628
 205         .word   0x3ff36431,0xa2de883b
 206         .word   0xbc8c3144,0xa06cb85e
 207         .word   0x3ff371a7,0x373aa9cb
 208         .word   0xbc963aea,0xbf42eae2
 209         .word   0x3ff37f26,0x231e754a
 210         .word   0xbc99f5ca,0x9eceb23c
 211         .word   0x3ff38cae,0x6d05d866
 212         .word   0xbc9e958d,0x3c9904bd
 213         .word   0x3ff39a40,0x1b7140ef
 214         .word   0xbc99a9a5,0xfc8e2934
 215         .word   0x3ff3a7db,0x34e59ff7
 216         .word   0xbc75e436,0xd661f5e3
 217         .word   0x3ff3b57f,0xbfec6cf4
 218         .word   0x3c954c66,0xe26fff18
 219         .word   0x3ff3c32d,0xc313a8e5
 220         .word   0xbc9efff8,0x375d29c3
 221         .word   0x3ff3d0e5,0x44ede173
 222         .word   0x3c7fe8d0,0x8c284c71
 223         .word   0x3ff3dea6,0x4c123422
 224         .word   0x3c8ada09,0x11f09ebc
 225         .word   0x3ff3ec70,0xdf1c5175
 226         .word   0xbc8af663,0x7b8c9bca
 227         .word   0x3ff3fa45,0x04ac801c
 228         .word   0xbc97d023,0xf956f9f3
 229         .word   0x3ff40822,0xc367a024
 230         .word   0x3c8bddf8,0xb6f4d048
 231         .word   0x3ff4160a,0x21f72e2a
 232         .word   0xbc5ef369,0x1c309278
 233         .word   0x3ff423fb,0x2709468a
 234         .word   0xbc98462d,0xc0b314dd
 235         .word   0x3ff431f5,0xd950a897
 236         .word   0xbc81c7dd,0xe35f7998
 237         .word   0x3ff43ffa,0x3f84b9d4
 238         .word   0x3c8880be,0x9704c002
 239         .word   0x3ff44e08,0x6061892d
 240         .word   0x3c489b7a,0x04ef80d0
 241         .word   0x3ff45c20,0x42a7d232
 242         .word   0xbc686419,0x82fb1f8e
 243         .word   0x3ff46a41,0xed1d0057
 244         .word   0x3c9c944b,0xd1648a76
 245         .word   0x3ff4786d,0x668b3237
 246         .word   0xbc9c20f0,0xed445733
 247         .word   0x3ff486a2,0xb5c13cd0
 248         .word   0x3c73c1a3,0xb69062f0
 249         .word   0x3ff494e1,0xe192aed2
 250         .word   0xbc83b289,0x5e499ea0
 251         .word   0x3ff4a32a,0xf0d7d3de
 252         .word   0x3c99cb62,0xf3d1be56
 253         .word   0x3ff4b17d,0xea6db7d7
 254         .word   0xbc8125b8,0x7f2897f0
 255         .word   0x3ff4bfda,0xd5362a27
 256         .word   0x3c7d4397,0xafec42e2
 257         .word   0x3ff4ce41,0xb817c114
 258         .word   0x3c905e29,0x690abd5d
 259         .word   0x3ff4dcb2,0x99fddd0d
 260         .word   0x3c98ecdb,0xbc6a7833
 261         .word   0x3ff4eb2d,0x81d8abff
 262         .word   0xbc95257d,0x2e5d7a52
 263         .word   0x3ff4f9b2,0x769d2ca7
 264         .word   0xbc94b309,0xd25957e3
 265         .word   0x3ff50841,0x7f4531ee
 266         .word   0x3c7a249b,0x49b7465f
 267         .word   0x3ff516da,0xa2cf6642
 268         .word   0xbc8f7685,0x69bd93ee
 269         .word   0x3ff5257d,0xe83f4eef
 270         .word   0xbc7c998d,0x43efef71
 271         .word   0x3ff5342b,0x569d4f82
 272         .word   0xbc807abe,0x1db13cac
 273         .word   0x3ff542e2,0xf4f6ad27
 274         .word   0x3c87926d,0x192d5f7e
 275         .word   0x3ff551a4,0xca5d920f
 276         .word   0xbc8d689c,0xefede59a
 277         .word   0x3ff56070,0xdde910d2
 278         .word   0xbc90fb6e,0x168eebf0
 279         .word   0x3ff56f47,0x36b527da
 280         .word   0x3c99bb2c,0x011d93ad
 281         .word   0x3ff57e27,0xdbe2c4cf
 282         .word   0xbc90b98c,0x8a57b9c4
 283         .word   0x3ff58d12,0xd497c7fd
 284         .word   0x3c8295e1,0x5b9a1de8
 285         .word   0x3ff59c08,0x27ff07cc
 286         .word   0xbc97e2ce,0xe467e60f
 287         .word   0x3ff5ab07,0xdd485429
 288         .word   0x3c96324c,0x054647ad
 289         .word   0x3ff5ba11,0xfba87a03
 290         .word   0xbc9b77a1,0x4c233e1a
 291         .word   0x3ff5c926,0x8a5946b7
 292         .word   0x3c3c4b1b,0x816986a2
 293         .word   0x3ff5d845,0x90998b93
 294         .word   0xbc9cd6a7,0xa8b45642
 295         .word   0x3ff5e76f,0x15ad2148
 296         .word   0x3c9ba6f9,0x3080e65e
 297         .word   0x3ff5f6a3,0x20dceb71
 298         .word   0xbc89eadd,0xe3cdcf92
 299         .word   0x3ff605e1,0xb976dc09
 300         .word   0xbc93e242,0x9b56de47
 301         .word   0x3ff6152a,0xe6cdf6f4
 302         .word   0x3c9e4b3e,0x4ab84c27
 303         .word   0x3ff6247e,0xb03a5585
 304         .word   0xbc9383c1,0x7e40b497
 305         .word   0x3ff633dd,0x1d1929fd
 306         .word   0x3c984710,0xbeb964e5
 307         .word   0x3ff64346,0x34ccc320
 308         .word   0xbc8c483c,0x759d8932
 309         .word   0x3ff652b9,0xfebc8fb7
 310         .word   0xbc9ae3d5,0xc9a73e08
 311         .word   0x3ff66238,0x82552225
 312         .word   0xbc9bb609,0x87591c34
 313         .word   0x3ff671c1,0xc70833f6
 314         .word   0xbc8e8732,0x586c6134
 315         .word   0x3ff68155,0xd44ca973
 316         .word   0x3c6038ae,0x44f73e65
 317         .word   0x3ff690f4,0xb19e9538
 318         .word   0x3c8804bd,0x9aeb445c
 319         .word   0x3ff6a09e,0x667f3bcd
 320         .word   0xbc9bdd34,0x13b26456
 321         .word   0x3ff6b052,0xfa75173e
 322         .word   0x3c7a38f5,0x2c9a9d0e
 323         .word   0x3ff6c012,0x750bdabf
 324         .word   0xbc728956,0x67ff0b0d
 325         .word   0x3ff6cfdc,0xddd47645
 326         .word   0x3c9c7aa9,0xb6f17309
 327         .word   0x3ff6dfb2,0x3c651a2f
 328         .word   0xbc6bbe3a,0x683c88ab
 329         .word   0x3ff6ef92,0x98593ae5
 330         .word   0xbc90b974,0x9e1ac8b2
 331         .word   0x3ff6ff7d,0xf9519484
 332         .word   0xbc883c0f,0x25860ef6
 333         .word   0x3ff70f74,0x66f42e87
 334         .word   0x3c59d644,0xd45aa65f
 335         .word   0x3ff71f75,0xe8ec5f74
 336         .word   0xbc816e47,0x86887a99
 337         .word   0x3ff72f82,0x86ead08a
 338         .word   0xbc920aa0,0x2cd62c72
 339         .word   0x3ff73f9a,0x48a58174
 340         .word   0xbc90a8d9,0x6c65d53c
 341         .word   0x3ff74fbd,0x35d7cbfd
 342         .word   0x3c9047fd,0x618a6e1c
 343         .word   0x3ff75feb,0x564267c9
 344         .word   0xbc902459,0x57316dd3
 345         .word   0x3ff77024,0xb1ab6e09
 346         .word   0x3c9b7877,0x169147f8
 347         .word   0x3ff78069,0x4fde5d3f
 348         .word   0x3c9866b8,0x0a02162c
 349         .word   0x3ff790b9,0x38ac1cf6
 350         .word   0x3c9349a8,0x62aadd3e
 351         .word   0x3ff7a114,0x73eb0187
 352         .word   0xbc841577,0xee04992f
 353         .word   0x3ff7b17b,0x0976cfdb
 354         .word   0xbc9bebb5,0x8468dc88
 355         .word   0x3ff7c1ed,0x0130c132
 356         .word   0x3c9f124c,0xd1164dd6
 357         .word   0x3ff7d26a,0x62ff86f0
 358         .word   0x3c91bddb,0xfb72b8b4
 359         .word   0x3ff7e2f3,0x36cf4e62
 360         .word   0x3c705d02,0xba15797e
 361         .word   0x3ff7f387,0x8491c491
 362         .word   0xbc807f11,0xcf9311ae
 363         .word   0x3ff80427,0x543e1a12
 364         .word   0xbc927c86,0x626d972b
 365         .word   0x3ff814d2,0xadd106d9
 366         .word   0x3c946437,0x0d151d4d
 367         .word   0x3ff82589,0x994cce13
 368         .word   0xbc9d4c1d,0xd41532d8
 369         .word   0x3ff8364c,0x1eb941f7
 370         .word   0x3c999b9a,0x31df2bd5
 371         .word   0x3ff8471a,0x4623c7ad
 372         .word   0xbc88d684,0xa341cdfb
 373         .word   0x3ff857f4,0x179f5b21
 374         .word   0xbc5ba748,0xf8b216d0
 375         .word   0x3ff868d9,0x9b4492ec
 376         .word   0x3ca01c83,0xb21584a3
 377         .word   0x3ff879ca,0xd931a436
 378         .word   0x3c85d2d7,0xd2db47bc
 379         .word   0x3ff88ac7,0xd98a6699
 380         .word   0x3c9994c2,0xf37cb53a
 381         .word   0x3ff89bd0,0xa478580f
 382         .word   0x3c9d5395,0x4475202a
 383         .word   0x3ff8ace5,0x422aa0db
 384         .word   0x3c96e9f1,0x56864b27
 385         .word   0x3ff8be05,0xbad61778
 386         .word   0x3c9ecb5e,0xfc43446e
 387         .word   0x3ff8cf32,0x16b5448c
 388         .word   0xbc70d55e,0x32e9e3aa
 389         .word   0x3ff8e06a,0x5e0866d9
 390         .word   0xbc97114a,0x6fc9b2e6
 391         .word   0x3ff8f1ae,0x99157736
 392         .word   0x3c85cc13,0xa2e3976c
 393         .word   0x3ff902fe,0xd0282c8a
 394         .word   0x3c9592ca,0x85fe3fd2
 395         .word   0x3ff9145b,0x0b91ffc6
 396         .word   0xbc9dd679,0x2e582524
 397         .word   0x3ff925c3,0x53aa2fe2
 398         .word   0xbc83455f,0xa639db7f
 399         .word   0x3ff93737,0xb0cdc5e5
 400         .word   0xbc675fc7,0x81b57ebc
 401         .word   0x3ff948b8,0x2b5f98e5
 402         .word   0xbc8dc3d6,0x797d2d99
 403         .word   0x3ff95a44,0xcbc8520f
 404         .word   0xbc764b7c,0x96a5f039
 405         .word   0x3ff96bdd,0x9a7670b3
 406         .word   0xbc5ba596,0x7f19c896
 407         .word   0x3ff97d82,0x9fde4e50
 408         .word   0xbc9d185b,0x7c1b85d0
 409         .word   0x3ff98f33,0xe47a22a2
 410         .word   0x3c7cabda,0xa24c78ed
 411         .word   0x3ff9a0f1,0x70ca07ba
 412         .word   0xbc9173bd,0x91cee632
 413         .word   0x3ff9b2bb,0x4d53fe0d
 414         .word   0xbc9dd84e,0x4df6d518
 415         .word   0x3ff9c491,0x82a3f090
 416         .word   0x3c7c7c46,0xb071f2be
 417         .word   0x3ff9d674,0x194bb8d5
 418         .word   0xbc9516be,0xa3dd8233
 419         .word   0x3ff9e863,0x19e32323
 420         .word   0x3c7824ca,0x78e64c6e
 421         .word   0x3ff9fa5e,0x8d07f29e
 422         .word   0xbc84a9ce,0xaaf1face
 423         .word   0x3ffa0c66,0x7b5de565
 424         .word   0xbc935949,0x5d1cd533
 425         .word   0x3ffa1e7a,0xed8eb8bb
 426         .word   0x3c9c6618,0xee8be70e
 427         .word   0x3ffa309b,0xec4a2d33
 428         .word   0x3c96305c,0x7ddc36ab
 429         .word   0x3ffa42c9,0x80460ad8
 430         .word   0xbc9aa780,0x589fb120
 431         .word   0x3ffa5503,0xb23e255d
 432         .word   0xbc9d2f6e,0xdb8d41e1
 433         .word   0x3ffa674a,0x8af46052
 434         .word   0x3c650f56,0x30670366
 435         .word   0x3ffa799e,0x1330b358
 436         .word   0x3c9bcb7e,0xcac563c6
 437         .word   0x3ffa8bfe,0x53c12e59
 438         .word   0xbc94f867,0xb2ba15a8
 439         .word   0x3ffa9e6b,0x5579fdbf
 440         .word   0x3c90fac9,0x0ef7fd31
 441         .word   0x3ffab0e5,0x21356eba
 442         .word   0x3c889c31,0xdae94544
 443         .word   0x3ffac36b,0xbfd3f37a
 444         .word   0xbc8f9234,0xcae76cd0
 445         .word   0x3ffad5ff,0x3a3c2774
 446         .word   0x3c97ef3b,0xb6b1b8e4
 447         .word   0x3ffae89f,0x995ad3ad
 448         .word   0x3c97a1cd,0x345dcc81
 449         .word   0x3ffafb4c,0xe622f2ff
 450         .word   0xbc94b2fc,0x0f315ecc
 451         .word   0x3ffb0e07,0x298db666
 452         .word   0xbc9bdef5,0x4c80e425
 453         .word   0x3ffb20ce,0x6c9a8952
 454         .word   0x3c94dd02,0x4a0756cc
 455         .word   0x3ffb33a2,0xb84f15fb
 456         .word   0xbc62805e,0x3084d708
 457         .word   0x3ffb4684,0x15b749b1
 458         .word   0xbc7f763d,0xe9df7c90
 459         .word   0x3ffb5972,0x8de5593a
 460         .word   0xbc9c71df,0xbbba6de3
 461         .word   0x3ffb6c6e,0x29f1c52a
 462         .word   0x3c92a8f3,0x52883f6e
 463         .word   0x3ffb7f76,0xf2fb5e47
 464         .word   0xbc75584f,0x7e54ac3b
 465         .word   0x3ffb928c,0xf22749e4
 466         .word   0xbc9b7216,0x54cb65c6
 467         .word   0x3ffba5b0,0x30a1064a
 468         .word   0xbc9efcd3,0x0e54292e
 469         .word   0x3ffbb8e0,0xb79a6f1f
 470         .word   0xbc3f52d1,0xc9696205
 471         .word   0x3ffbcc1e,0x904bc1d2
 472         .word   0x3c823dd0,0x7a2d9e84
 473         .word   0x3ffbdf69,0xc3f3a207
 474         .word   0xbc3c2623,0x60ea5b52
 475         .word   0x3ffbf2c2,0x5bd71e09
 476         .word   0xbc9efdca,0x3f6b9c73
 477         .word   0x3ffc0628,0x6141b33d
 478         .word   0xbc8d8a5a,0xa1fbca34
 479         .word   0x3ffc199b,0xdd85529c
 480         .word   0x3c811065,0x895048dd
 481         .word   0x3ffc2d1c,0xd9fa652c
 482         .word   0xbc96e516,0x17c8a5d7
 483         .word   0x3ffc40ab,0x5fffd07a
 484         .word   0x3c9b4537,0xe083c60a
 485         .word   0x3ffc5447,0x78fafb22
 486         .word   0x3c912f07,0x2493b5af
 487         .word   0x3ffc67f1,0x2e57d14b
 488         .word   0x3c92884d,0xff483cad
 489         .word   0x3ffc7ba8,0x8988c933
 490         .word   0xbc8e76bb,0xbe255559
 491         .word   0x3ffc8f6d,0x9406e7b5
 492         .word   0x3c71acbc,0x48805c44
 493         .word   0x3ffca340,0x5751c4db
 494         .word   0xbc87f2be,0xd10d08f4
 495         .word   0x3ffcb720,0xdcef9069
 496         .word   0x3c7503cb,0xd1e949db
 497         .word   0x3ffccb0f,0x2e6d1675
 498         .word   0xbc7d220f,0x86009093
 499         .word   0x3ffcdf0b,0x555dc3fa
 500         .word   0xbc8dd83b,0x53829d72
 501         .word   0x3ffcf315,0x5b5bab74
 502         .word   0xbc9a08e9,0xb86dff57
 503         .word   0x3ffd072d,0x4a07897c
 504         .word   0xbc9cbc37,0x43797a9c
 505         .word   0x3ffd1b53,0x2b08c968
 506         .word   0x3c955636,0x219a36ee
 507         .word   0x3ffd2f87,0x080d89f2
 508         .word   0xbc9d487b,0x719d8578
 509         .word   0x3ffd43c8,0xeacaa1d6
 510         .word   0x3c93db53,0xbf5a1614
 511         .word   0x3ffd5818,0xdcfba487
 512         .word   0x3c82ed02,0xd75b3706
 513         .word   0x3ffd6c76,0xe862e6d3
 514         .word   0x3c5fe87a,0x4a8165a0
 515         .word   0x3ffd80e3,0x16c98398
 516         .word   0xbc911ec1,0x8beddfe8
 517         .word   0x3ffd955d,0x71ff6075
 518         .word   0x3c9a052d,0xbb9af6be
 519         .word   0x3ffda9e6,0x03db3285
 520         .word   0x3c9c2300,0x696db532
 521         .word   0x3ffdbe7c,0xd63a8315
 522         .word   0xbc9b76f1,0x926b8be4
 523         .word   0x3ffdd321,0xf301b460
 524         .word   0x3c92da57,0x78f018c2
 525         .word   0x3ffde7d5,0x641c0658
 526         .word   0xbc9ca552,0x8e79ba8f
 527         .word   0x3ffdfc97,0x337b9b5f
 528         .word   0xbc91a5cd,0x4f184b5c
 529         .word   0x3ffe1167,0x6b197d17
 530         .word   0xbc72b529,0xbd5c7f44
 531         .word   0x3ffe2646,0x14f5a129
 532         .word   0xbc97b627,0x817a1496
 533         .word   0x3ffe3b33,0x3b16ee12
 534         .word   0xbc99f4a4,0x31fdc68a
 535         .word   0x3ffe502e,0xe78b3ff6
 536         .word   0x3c839e89,0x80a9cc8f
 537         .word   0x3ffe6539,0x24676d76
 538         .word   0xbc863ff8,0x7522b734
 539         .word   0x3ffe7a51,0xfbc74c83
 540         .word   0x3c92d522,0xca0c8de2
 541         .word   0x3ffe8f79,0x77cdb740
 542         .word   0xbc910894,0x80b054b1
 543         .word   0x3ffea4af,0xa2a490da
 544         .word   0xbc9e9c23,0x179c2893
 545         .word   0x3ffeb9f4,0x867cca6e
 546         .word   0x3c94832f,0x2293e4f2
 547         .word   0x3ffecf48,0x2d8e67f1
 548         .word   0xbc9c93f3,0xb411ad8c
 549         .word   0x3ffee4aa,0xa2188510
 550         .word   0x3c91c68d,0xa487568d
 551         .word   0x3ffefa1b,0xee615a27
 552         .word   0x3c9dc7f4,0x86a4b6b0
 553         .word   0x3fff0f9c,0x1cb6412a
 554         .word   0xbc932200,0x65181d45
 555         .word   0x3fff252b,0x376bba97
 556         .word   0x3c93a1a5,0xbf0d8e43
 557         .word   0x3fff3ac9,0x48dd7274
 558         .word   0xbc795a5a,0x3ed837de
 559         .word   0x3fff5076,0x5b6e4540
 560         .word   0x3c99d3e1,0x2dd8a18b
 561         .word   0x3fff6632,0x798844f8
 562         .word   0x3c9fa37b,0x3539343e
 563         .word   0x3fff7bfd,0xad9cbe14
 564         .word   0xbc9dbb12,0xd006350a
 565         .word   0x3fff91d8,0x02243c89
 566         .word   0xbc612ea8,0xa779f689
 567         .word   0x3fffa7c1,0x819e90d8
 568         .word   0x3c874853,0xf3a5931e
 569         .word   0x3fffbdba,0x3692d514
 570         .word   0xbc796773,0x15098eb6
 571         .word   0x3fffd3c2,0x2b8f71f1
 572         .word   0x3c62eb74,0x966579e7
 573         .word   0x3fffe9d9,0x6b2a23d9
 574         .word   0x3c74a603,0x7442fde3
 575 
 576         .align  16
 577 constants:
 578         .word   0x3ef00000,0x00000000
 579         .word   0x40862e42,0xfefa39ef
 580         .word   0x01000000,0x00000000
 581         .word   0x7f000000,0x00000000
 582         .word   0x80000000,0x00000000
 583         .word   0x43f00000,0x00000000 ! scaling 2^12 two96
 584         .word   0xfff00000,0x00000000
 585         .word   0x3ff00000,0x00000000
 586         .word   0x3fdfffff,0xfffffff6
 587         .word   0x3fc55555,0x721a1d14
 588         .word   0x3fa55555,0x6e0896af
 589         .word   0x41371547,0x652b82fe ! scaling 2^12 invln2_256
 590         .word   0x3ea62e42,0xfee00000 ! scaling 2^(-12) ln2_256h
 591         .word   0x3caa39ef,0x35793c76 ! scaling 2^(-12) ln2_256l
 592 
 593         ! base set w/o scaling
 594         ! .word 0x43300000,0x00000000 ! scaling  two96
 595         ! .word 0x40771547,0x652b82fe ! scaling  invln2_256
 596         ! .word 0x3f662e42,0xfee00000 ! scaling  ln2_256h
 597         ! .word 0x3d6a39ef,0x35793c76 ! scaling  ln2_256l
 598 
 599 #define ox3ef           0x0
 600 #define thresh          0x8
 601 #define tiny            0x10
 602 #define huge            0x18
 603 #define signbit         0x20
 604 #define two96           0x28
 605 #define neginf          0x30
 606 #define one             0x38
 607 #define B1OFF           0x40
 608 #define B2OFF           0x48
 609 #define B3OFF           0x50
 610 #define invln2_256      0x58
 611 #define ln2_256h        0x60
 612 #define ln2_256l        0x68
 613 
 614 ! local storage indices
 615 
 616 #define m2              STACK_BIAS-0x4
 617 #define m1              STACK_BIAS-0x8
 618 #define m0              STACK_BIAS-0xc
 619 #define jnk             STACK_BIAS-0x20
 620 ! sizeof temp storage - must be a multiple of 16 for V9
 621 #define tmps            0x20
 622 
 623 ! register use
 624 
 625 ! i0  n
 626 ! i1  x
 627 ! i2  stridex
 628 ! i3  y
 629 ! i4  stridey
 630 ! i5  0x80000000
 631 
 632 ! g1  TBL
 633 
 634 ! l0  m0
 635 ! l1  m1
 636 ! l2  m2
 637 ! l3  j0,oy0
 638 ! l4  j1,oy1
 639 ! l5  j2,oy2
 640 ! l6  0x3e300000
 641 ! l7  0x40862e41
 642 
 643 ! o0  py0
 644 ! o1  py1
 645 ! o2  py2
 646 ! o3  scratch
 647 ! o4  scratch
 648 ! o5  0x40874910
 649 ! o7  0x7ff00000
 650 
 651 ! f0  x0
 652 ! f2  
 653 ! f4  
 654 ! f6  
 655 ! f8  
 656 ! f10 x1
 657 ! f12 
 658 ! f14 
 659 ! f16 
 660 ! f18 
 661 ! f20 x2
 662 ! f22 
 663 ! f24 
 664 ! f26 
 665 ! f28 
 666 ! f30 
 667 ! f32 
 668 ! f34 
 669 ! f36 0x3ef0...
 670 ! f38 thresh
 671 ! f40 tiny
 672 ! f42 huge
 673 ! f44 signbit
 674 ! f46 two96
 675 ! f48 neginf
 676 ! f50 one
 677 ! f52 B1
 678 ! f54 B2
 679 ! f56 B3
 680 ! f58 invln2_256
 681 ! f60 ln2_256h
 682 ! f62 ln2_256l
 683 #define BOUNDRY %f36
 684 #define THRESH %f38
 685 #define TINY %f40
 686 #define HUGE %f42
 687 #define SIGNBIT %f44
 688 #define TWO96 %f46
 689 #define NEGINF %f48
 690 #define ONE %f50
 691 #define B1 %f52
 692 #define B2 %f54
 693 #define B3 %f56
 694 #define INVLN2_256 %f58
 695 #define LN2_256H %f60
 696 #define LN2_256L %f62
 697 
 698         ENTRY(__vexp)
 699         save    %sp,-SA(MINFRAME)-tmps,%sp
 700         PIC_SETUP(l7)
 701         PIC_SET(l7,constants,o3)
 702         PIC_SET(l7,TBL,o0)
 703         mov     %o0,%g1
 704         wr      %g0,0x82,%asi           ! set %asi for non-faulting loads
 705 
 706         sethi   %hi(0x80000000),%i5
 707         sethi   %hi(0x3e300000),%l6
 708         sethi   %hi(0x40862e41),%l7
 709         or      %l7,%lo(0x40862e41),%l7
 710         sethi   %hi(0x40874910),%o5
 711         or      %o5,%lo(0x40874910),%o5
 712         sethi   %hi(0x7ff00000),%o7
 713         ldd     [%o3+ox3ef],BOUNDRY
 714         ldd     [%o3+thresh],THRESH
 715         ldd     [%o3+tiny],TINY
 716         ldd     [%o3+huge],HUGE
 717         ldd     [%o3+signbit],SIGNBIT
 718         ldd     [%o3+two96],TWO96
 719         ldd     [%o3+neginf],NEGINF
 720         ldd     [%o3+one],ONE
 721         ldd     [%o3+B1OFF],B1
 722         ldd     [%o3+B2OFF],B2
 723         ldd     [%o3+B3OFF],B3
 724         ldd     [%o3+invln2_256],INVLN2_256
 725         ldd     [%o3+ln2_256h],LN2_256H
 726         ldd     [%o3+ln2_256l],LN2_256L
 727         sll     %i2,3,%i2               ! scale strides
 728         sll     %i4,3,%i4
 729         add     %fp,jnk,%l3             ! precondition loop
 730         add     %fp,jnk,%l4
 731         add     %fp,jnk,%l5
 732         ld      [%i1],%l0               ! hx = *x
 733         ld      [%i1],%f0
 734         ld      [%i1+4],%f1
 735         andn    %l0,%i5,%l0             ! hx &= ~0x80000000
 736         ba      .loop0
 737         add     %i1,%i2,%i1             ! x += stridex
 738 
 739         .align  16
 740 ! -- 16 byte aligned
 741 .loop0:
 742         lda     [%i1]%asi,%l1           ! preload next argument
 743         sub     %l0,%l6,%o3
 744         sub     %l7,%l0,%o4
 745         fand    %f0,SIGNBIT,%f2         ! get sign bit
 746 
 747         lda     [%i1]%asi,%f10
 748         orcc    %o3,%o4,%g0
 749         mov     %i3,%o0                 ! py0 = y
 750         bl,pn   %icc,.range0            ! if hx < 0x3e300000 or > 0x40862e41
 751 
 752 ! delay slot
 753         lda     [%i1+4]%asi,%f11
 754         addcc   %i0,-1,%i0
 755         add     %i3,%i4,%i3             ! y += stridey
 756         ble,pn  %icc,.endloop1
 757 
 758 ! delay slot
 759         andn    %l1,%i5,%l1
 760         add     %i1,%i2,%i1             ! x += stridex
 761         for     %f2,TWO96,%f2           ! used to strip least sig bits
 762         fmuld   %f0,INVLN2_256,%f4      ! x/ (ln2/256)  , creating k
 763 
 764 .loop1:
 765         lda     [%i1]%asi,%l2           ! preload next argument
 766         sub     %l1,%l6,%o3
 767         sub     %l7,%l1,%o4
 768         fand    %f10,SIGNBIT,%f12
 769 
 770         lda     [%i1]%asi,%f20
 771         orcc    %o3,%o4,%g0
 772         mov     %i3,%o1                 ! py1 = y
 773         bl,pn   %icc,.range1            ! if hx < 0x3e300000 or > 0x40862e41
 774 
 775 ! delay slot
 776         lda     [%i1+4]%asi,%f21
 777         addcc   %i0,-1,%i0
 778         add     %i3,%i4,%i3             ! y += stridey
 779         ble,pn  %icc,.endloop2
 780 
 781 ! delay slot
 782         andn    %l2,%i5,%l2
 783         add     %i1,%i2,%i1             ! x += stridex
 784         for     %f12,TWO96,%f12
 785         fmuld   %f10,INVLN2_256,%f14
 786 
 787 .loop2:
 788         sub     %l2,%l6,%o3
 789         sub     %l7,%l2,%o4
 790         fand    %f20,SIGNBIT,%f22
 791         fmuld   %f20,INVLN2_256,%f24            ! okay to put this here; for alignment
 792 
 793         orcc    %o3,%o4,%g0
 794         bl,pn   %icc,.range2            ! if hx < 0x3e300000 or > 0x40862e41
 795 ! delay slot
 796         for     %f22,TWO96,%f22
 797         faddd   %f4,%f2,%f4             ! creating k+j/256, sra to zero bits
 798 
 799 .cont:
 800         faddd   %f14,%f12,%f14
 801         mov     %i3,%o2                 ! py2 = y
 802 
 803         faddd   %f24,%f22,%f24
 804         add     %i3,%i4,%i3             ! y += stridey
 805 
 806         ! BUBBLE USIII
 807 
 808         fsubd   %f4,%f2,%f8             ! creating k+j/256: sll 
 809         st      %f6,[%l3]               ! store previous loop x0
 810 
 811         fsubd   %f14,%f12,%f18
 812         st      %f7,[%l3+4]             ! store previous loop x0
 813 
 814         fsubd   %f24,%f22,%f28
 815         st      %f16,[%l4]
 816 
 817         ! BUBBLE USIII
 818 
 819         fmuld   %f8,LN2_256H,%f2        ! closest LN2_256 to x
 820         st      %f17,[%l4+4]
 821 
 822         fmuld   %f18,LN2_256H,%f12
 823         st      %f26,[%l5]
 824 
 825         fmuld   %f28,LN2_256H,%f22
 826         st      %f27,[%l5+4]
 827 
 828         ! BUBBLE USIII
 829 
 830         fsubd   %f0,%f2,%f0             ! r = x - p*LN2_256H
 831         fmuld   %f8,LN2_256L,%f4        ! closest LN2_256 to x , added prec
 832 
 833         fsubd   %f10,%f12,%f10
 834         fmuld   %f18,LN2_256L,%f14
 835 
 836         fsubd   %f20,%f22,%f20
 837         fmuld   %f28,LN2_256L,%f24
 838 
 839         ! BUBBLE USIII
 840 
 841         fsubd   %f0,%f4,%f0             ! r -= p*LN2_256L
 842 
 843         fsubd   %f10,%f14,%f10
 844 
 845         fsubd   %f20,%f24,%f20
 846 
 847 !!!!!!!!!!!!!!!!!!! New polynomial reorder starts here
 848 
 849         ! Alternate polynomial grouping allowing non-sequential calc of p
 850         ! OLD : p = r * ( 1 + r * ( B1 + r * ( B2 + r * B3) ) )
 851         ! NEW : p = r * [ (1+r*B1) + (r*r) * ( B2 + r * B3) ) ]
 852         !
 853         ! let               SLi        Ri           SRi         be accumulators
 854 
 855         fmuld   %f0,B3,%f2      ! SR1 = r1 * B3
 856         fdtoi   %f8,%f8                         ! convert k+j/256 to int
 857         st      %f8,[%fp+m0]                    ! store k, to shift return/use
 858 
 859         fmuld   %f10,B3,%f12    ! SR2 = r2 * B3
 860         fdtoi   %f18,%f18                       ! convert k+j/256 to int
 861         st      %f18,[%fp+m1]                   ! store k, to shift return/use
 862 
 863         fmuld   %f20,B3,%f22    ! SR3 = r3 * B3
 864         fdtoi   %f28,%f28                       ! convert k+j/256 to int
 865         st      %f28,[%fp+m2]                   ! store k, to shift return/use
 866 
 867         fmuld   %f0,%f0,%f4     ! R1 = r1 * r1
 868 
 869         fmuld   %f10,%f10,%f14  ! R2 = r2 * r2
 870         faddd   %f2,B2,%f2      ! SR1 += B2
 871 
 872         fmuld   %f20,%f20,%f24  ! R3 = r3 * r3
 873         faddd   %f12,B2,%f12    ! SR2 += B2
 874 
 875         faddd   %f22,B2,%f22    ! SR3 += B2
 876         fmuld   %f0,B1,%f6      ! SL1 = r1 * B1
 877 
 878         fmuld   %f10,B1,%f32    ! SL2 = r2 * B1
 879         fand    %f8,NEGINF,%f8
 880         ! best here for RAW BYPASS
 881         ld      [%fp+m0],%l0                    ! get nonshifted k into intreg
 882 
 883         fmuld   %f20,B1,%f34    ! SL3 = r3 * B1
 884         fand    %f18,NEGINF,%f18
 885         ld      [%fp+m1],%l1                    ! get nonshifted k into intreg
 886 
 887         fmuld   %f4,%f2,%f4     ! R1 = R1 * SR1
 888         fand    %f28,NEGINF,%f28
 889         ld      [%fp+m2],%l2                    ! get nonshifted k into intreg
 890 
 891         fmuld   %f14,%f12,%f14  ! R2 = R2 * SR2
 892         faddd   %f6,ONE,%f6     ! SL1 += 1
 893 
 894         fmuld   %f24,%f22,%f24  ! R3 = R3 * SR3
 895         faddd   %f32,ONE,%f32   ! SL2 += 1
 896         sra     %l0,8,%l3                       ! shift k tobe offset 256-8byte
 897 
 898         faddd   %f34,ONE,%f34   ! SL3 += 1
 899         sra     %l1,8,%l4                       ! shift k tobe offset 256-8byte
 900         sra     %l2,8,%l5                       ! shift k tobe offset 256-8byte
 901 
 902         ! BUBBLE in USIII
 903         and     %l3,0xff0,%l3
 904         and     %l4,0xff0,%l4
 905 
 906 
 907 
 908         faddd   %f6,%f4,%f6     ! R1 = SL1 + R1
 909         ldd     [%g1+%l3],%f4                   ! tbl[j]
 910         add     %l3,8,%l3                       ! inc j
 911         and     %l5,0xff0,%l5
 912 
 913 
 914         faddd   %f32,%f14,%f32  ! R2 = SL2 + R2
 915         ldd     [%g1+%l4],%f14                  ! tbl[j]
 916         add     %l4,8,%l4                       ! inc j
 917         sra     %l0,20,%o3
 918 
 919         faddd   %f34,%f24,%f34  ! R3 = SL3 + R3
 920         ldd     [%g1+%l5],%f24                  ! tbl[j]
 921         add     %l5,8,%l5                       ! inc j
 922         sra     %l1,20,%l1
 923 
 924         ! BUBBLE in USIII
 925         ldd     [%g1+%l4],%f16          ! tbl[j+1]
 926         add     %o3,1021,%o3            ! inc j
 927 
 928         fmuld   %f0,%f6,%f0     ! p1 = r1 * R1
 929         ldd     [%g1+%l3],%f6           ! tbl[j+1]
 930         add     %l1,1021,%l1            ! inc j
 931         sra     %l2,20,%l2
 932 
 933         fmuld   %f10,%f32,%f10  ! p2 = r2 * R2
 934         ldd     [%g1+%l5],%f26          ! tbl[j+1]
 935         add     %l2,1021,%l2            ! inc j
 936 
 937         fmuld   %f20,%f34,%f20  ! p3 = r3 * R3
 938 
 939  
 940  
 941 
 942 
 943 !!!!!!!!!!!!!!!!!!! poly-reorder - ends here
 944 
 945         fmuld   %f0,%f4,%f0             ! start exp(x) = exp(r) * tbl[j]
 946         mov     %o0,%l3
 947 
 948         fmuld   %f10,%f14,%f10
 949         mov     %o1,%l4
 950 
 951         fmuld   %f20,%f24,%f20
 952         mov     %o2,%l5
 953 
 954         faddd   %f0,%f6,%f6             ! cont exp(x) : apply tbl[j] high bits
 955         lda     [%i1]%asi,%l0           ! preload next argument
 956 
 957         faddd   %f10,%f16,%f16
 958         lda     [%i1]%asi,%f0
 959 
 960         faddd   %f20,%f26,%f26
 961         lda     [%i1+4]%asi,%f1
 962 
 963         faddd   %f6,%f4,%f6             ! cont exp(x) : apply tbl[j+1] low bits
 964         add     %i1,%i2,%i1             ! x += stridex
 965 
 966         faddd   %f16,%f14,%f16
 967         andn    %l0,%i5,%l0
 968         or      %o3,%l1,%o4
 969 
 970 ! -- 16 byte aligned
 971         orcc    %o4,%l2,%o4
 972         bl,pn   %icc,.small
 973 ! delay slot
 974         faddd   %f26,%f24,%f26
 975 
 976         fpadd32 %f6,%f8,%f6             ! done exp(x) : apply 2^k
 977         fpadd32 %f16,%f18,%f16
 978 
 979 
 980         addcc   %i0,-1,%i0
 981         bg,pn   %icc,.loop0
 982 ! delay slot
 983         fpadd32 %f26,%f28,%f26
 984 
 985         ba,pt   %icc,.endloop0
 986 ! delay slot
 987         nop
 988 
 989 
 990         .align  16
 991 .small:
 992         tst     %o3
 993         bge,pt  %icc,1f
 994 ! delay slot
 995         fpadd32 %f6,%f8,%f6
 996         fpadd32 %f6,BOUNDRY,%f6
 997         fmuld   %f6,TINY,%f6
 998 1:
 999         tst     %l1
1000         bge,pt  %icc,1f
1001 ! delay slot
1002         fpadd32 %f16,%f18,%f16
1003         fpadd32 %f16,BOUNDRY,%f16
1004         fmuld   %f16,TINY,%f16
1005 1:
1006         tst     %l2
1007         bge,pt  %icc,1f
1008 ! delay slot
1009         fpadd32 %f26,%f28,%f26
1010         fpadd32 %f26,BOUNDRY,%f26
1011         fmuld   %f26,TINY,%f26
1012 1:
1013         addcc   %i0,-1,%i0
1014         bg,pn   %icc,.loop0
1015 ! delay slot
1016         nop
1017         ba,pt   %icc,.endloop0
1018 ! delay slot
1019         nop
1020 
1021 
1022 .endloop2:
1023         for     %f12,TWO96,%f12
1024         fmuld   %f10,INVLN2_256,%f14
1025         faddd   %f14,%f12,%f14
1026         fsubd   %f14,%f12,%f18
1027         fmuld   %f18,LN2_256H,%f12
1028         fsubd   %f10,%f12,%f10
1029         fmuld   %f18,LN2_256L,%f14
1030         fsubd   %f10,%f14,%f10
1031         fmuld   %f10,B3,%f12
1032         fdtoi   %f18,%f18
1033         st      %f18,[%fp+m1]
1034         fmuld   %f10,%f10,%f14
1035         faddd   %f12,B2,%f12
1036         fmuld   %f10,B1,%f32
1037         fand    %f18,NEGINF,%f18
1038         ld      [%fp+m1],%l1
1039         fmuld   %f14,%f12,%f14
1040         faddd   %f32,ONE,%f32
1041         sra     %l1,8,%o4       
1042         and     %o4,0xff0,%o4
1043         faddd   %f32,%f14,%f32
1044         ldd     [%g1+%o4],%f14
1045         add     %o4,8,%o4
1046         sra     %l1,20,%l1
1047         ldd     [%g1+%o4],%f30
1048         addcc   %l1,1021,%l1
1049         fmuld   %f10,%f32,%f10
1050         fmuld   %f10,%f14,%f10
1051         faddd   %f10,%f30,%f30
1052         faddd   %f30,%f14,%f30
1053         bge,pt  %icc,1f
1054 ! delay slot
1055         fpadd32 %f30,%f18,%f30
1056         fpadd32 %f30,BOUNDRY,%f30
1057         fmuld   %f30,TINY,%f30
1058 1:
1059         st      %f30,[%o1]
1060         st      %f31,[%o1+4]
1061 
1062 .endloop1:
1063         for     %f2,TWO96,%f2
1064         fmuld   %f0,INVLN2_256,%f4
1065         faddd   %f4,%f2,%f4
1066         fsubd   %f4,%f2,%f8
1067         fmuld   %f8,LN2_256H,%f2
1068         fsubd   %f0,%f2,%f0
1069         fmuld   %f8,LN2_256L,%f4
1070         fsubd   %f0,%f4,%f0
1071         fmuld   %f0,B3,%f2
1072         fdtoi   %f8,%f8
1073         st      %f8,[%fp+m0]
1074         fmuld   %f0,%f0,%f4
1075         faddd   %f2,B2,%f2
1076         fmuld   %f0,B1,%f32
1077         fand    %f8,NEGINF,%f8
1078         ld      [%fp+m0],%l0
1079         fmuld   %f4,%f2,%f4
1080         faddd   %f32,ONE,%f32
1081         sra     %l0,8,%o4       
1082         and     %o4,0xff0,%o4
1083         faddd   %f32,%f4,%f32
1084         ldd     [%g1+%o4],%f4
1085         add     %o4,8,%o4
1086         sra     %l0,20,%o3
1087         ldd     [%g1+%o4],%f30
1088         addcc   %o3,1021,%o3
1089         fmuld   %f0,%f32,%f0
1090         fmuld   %f0,%f4,%f0
1091         faddd   %f0,%f30,%f30
1092         faddd   %f30,%f4,%f30
1093         bge,pt  %icc,1f
1094 ! delay slot
1095         fpadd32 %f30,%f8,%f30
1096         fpadd32 %f30,BOUNDRY,%f30
1097         fmuld   %f30,TINY,%f30
1098 1:
1099         st      %f30,[%o0]
1100         st      %f31,[%o0+4]
1101 
1102 .endloop0:
1103         st      %f6,[%l3]
1104         st      %f7,[%l3+4]
1105         st      %f16,[%l4]
1106         st      %f17,[%l4+4]
1107         st      %f26,[%l5]
1108         st      %f27,[%l5+4]
1109         ret
1110         restore
1111 
1112 
1113 .range0:
1114         cmp     %l0,%l6
1115         bl,a,pt %icc,3f                 ! if x is tiny
1116 ! delay slot, annulled if branch not taken
1117         faddd   %f0,ONE,%f4
1118 
1119         cmp     %l0,%o5
1120         bg,pt   %icc,1f                 ! if x is huge, inf, nan
1121 ! delay slot
1122         nop
1123 
1124         fcmpd   %fcc0,%f0,THRESH
1125         fbg,a,pt %fcc0,3f               ! if x is huge and positive
1126 ! delay slot, annulled if branch not taken
1127         fmuld   HUGE,HUGE,%f4
1128 
1129 ! x is near the extremes but within range; return to the loop
1130         addcc   %i0,-1,%i0
1131         add     %i3,%i4,%i3             ! y += stridey
1132         ble,pn  %icc,.endloop1
1133 ! delay slot
1134         andn    %l1,%i5,%l1
1135         add     %i1,%i2,%i1             ! x += stridex
1136         for     %f2,TWO96,%f2
1137         ba,pt   %icc,.loop1
1138 ! delay slot
1139         fmuld   %f0,INVLN2_256,%f4
1140 
1141 1:
1142         cmp     %l0,%o7
1143         bl,pn   %icc,2f                 ! if x is finite
1144 ! delay slot
1145         nop
1146         fzero   %f4
1147         fcmpd   %fcc0,%f0,NEGINF
1148         fmovdne %fcc0,%f0,%f4
1149         ba,pt   %icc,3f
1150         fmuld   %f4,%f4,%f4             ! x*x or zero*zero
1151 2:
1152         fmovd   HUGE,%f4
1153         fcmpd   %fcc0,%f0,ONE
1154         fmovdl  %fcc0,TINY,%f4
1155         fmuld   %f4,%f4,%f4             ! huge*huge or tiny*tiny
1156 3:
1157         st      %f4,[%o0]
1158         andn    %l1,%i5,%l0
1159         add     %i1,%i2,%i1             ! x += stridex
1160         fmovd   %f10,%f0
1161         st      %f5,[%o0+4]
1162         addcc   %i0,-1,%i0
1163         bg,pt   %icc,.loop0
1164 ! delay slot
1165         add     %i3,%i4,%i3             ! y += stridey
1166         ba,pt   %icc,.endloop0
1167 ! delay slot
1168         nop
1169 
1170 
1171 .range1:
1172         cmp     %l1,%l6
1173         bl,a,pt %icc,3f                 ! if x is tiny
1174 ! delay slot, annulled if branch not taken
1175         faddd   %f10,ONE,%f14
1176 
1177         cmp     %l1,%o5
1178         bg,pt   %icc,1f                 ! if x is huge, inf, nan
1179 ! delay slot
1180         nop
1181 
1182         fcmpd   %fcc0,%f10,THRESH
1183         fbg,a,pt %fcc0,3f               ! if x is huge and positive
1184 ! delay slot, annulled if branch not taken
1185         fmuld   HUGE,HUGE,%f14
1186 
1187 ! x is near the extremes but within range; return to the loop
1188         addcc   %i0,-1,%i0
1189         add     %i3,%i4,%i3             ! y += stridey
1190         ble,pn  %icc,.endloop2
1191 ! delay slot
1192         andn    %l2,%i5,%l2
1193         add     %i1,%i2,%i1             ! x += stridex
1194         for     %f12,TWO96,%f12
1195         ba,pt   %icc,.loop2
1196 ! delay slot
1197         fmuld   %f10,INVLN2_256,%f14
1198 
1199 1:
1200         cmp     %l1,%o7
1201         bl,pn   %icc,2f                 ! if x is finite
1202 ! delay slot
1203         nop
1204         fzero   %f14
1205         fcmpd   %fcc0,%f10,NEGINF
1206         fmovdne %fcc0,%f10,%f14
1207         ba,pt   %icc,3f
1208         fmuld   %f14,%f14,%f14          ! x*x or zero*zero
1209 2:
1210         fmovd   HUGE,%f14
1211         fcmpd   %fcc0,%f10,ONE
1212         fmovdl  %fcc0,TINY,%f14
1213         fmuld   %f14,%f14,%f14          ! huge*huge or tiny*tiny
1214 3:
1215         st      %f14,[%o1]
1216         andn    %l2,%i5,%l1
1217         add     %i1,%i2,%i1             ! x += stridex
1218         fmovd   %f20,%f10
1219         st      %f15,[%o1+4]
1220         addcc   %i0,-1,%i0
1221         bg,pt   %icc,.loop1
1222 ! delay slot
1223         add     %i3,%i4,%i3             ! y += stridey
1224         ba,pt   %icc,.endloop1
1225 ! delay slot
1226         nop
1227 
1228 
1229 .range2:
1230         cmp     %l2,%l6
1231         bl,a,pt %icc,3f                 ! if x is tiny
1232 ! delay slot, annulled if branch not taken
1233         faddd   %f20,ONE,%f24
1234 
1235         cmp     %l2,%o5
1236         bg,pt   %icc,1f                 ! if x is huge, inf, nan
1237 ! delay slot
1238         nop
1239 
1240         fcmpd   %fcc0,%f20,THRESH
1241         fbg,a,pt %fcc0,3f               ! if x is huge and positive
1242 ! delay slot, annulled if branch not taken
1243         fmuld   HUGE,HUGE,%f24
1244 
1245 ! x is near the extremes but within range; return to the loop
1246         ba,pt   %icc,.cont
1247 ! delay slot
1248         faddd   %f4,%f2,%f4
1249 
1250 1:
1251         cmp     %l2,%o7
1252         bl,pn   %icc,2f                 ! if x is finite
1253 ! delay slot
1254         nop
1255         fzero   %f24
1256         fcmpd   %fcc0,%f20,NEGINF
1257         fmovdne %fcc0,%f20,%f24
1258         ba,pt   %icc,3f
1259         fmuld   %f24,%f24,%f24          ! x*x or zero*zero
1260 2:
1261         fmovd   HUGE,%f24
1262         fcmpd   %fcc0,%f20,ONE
1263         fmovdl  %fcc0,TINY,%f24
1264         fmuld   %f24,%f24,%f24          ! huge*huge or tiny*tiny
1265 3:
1266         st      %f24,[%i3]
1267         st      %f25,[%i3+4]
1268         lda     [%i1]%asi,%l2           ! preload next argument
1269         lda     [%i1]%asi,%f20
1270         lda     [%i1+4]%asi,%f21
1271         andn    %l2,%i5,%l2
1272         add     %i1,%i2,%i1             ! x += stridex
1273         addcc   %i0,-1,%i0
1274         bg,pt   %icc,.loop2
1275 ! delay slot
1276         add     %i3,%i4,%i3             ! y += stridey
1277         ba,pt   %icc,.endloop2
1278 ! delay slot
1279         nop
1280 
1281         SET_SIZE(__vexp)
1282