1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24 /* 25 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "exp.s" 30 31 #include "libm.h" 32 LIBM_ANSI_PRAGMA_WEAK(exp,function) 33 #include "libm_synonyms.h" 34 #include "libm_protos.h" 35 36 ENTRY(exp) 37 movl 8(%esp),%ecx / ecx <-- hi_32(x) 38 andl $0x7fffffff,%ecx / ecx <-- hi_32(|x|) 39 cmpl $0x3fe62e42,%ecx / Is |x| < ln(2)? 40 jb .shortcut / If so, take a shortcut. 41 je .check_tail / |x| may be only slightly < ln(2) 42 cmpl $0x7ff00000,%ecx / hi_32(|x|) >= hi_32(INF)? 43 jae .not_finite / if so, x is not finite 44 .finite_non_special: / Here, ln(2) < |x| < INF 45 fldl 4(%esp) / push x 46 subl $8,%esp 47 /// overhead of RP save/restore; 63/15 48 fstcw (%esp) /// ; 15/3 49 movw (%esp),%ax /// ; 4/1 50 movw %ax,4(%esp) /// save old RP; 2/1 51 orw $0x0300,%ax /// force 64-bit RP; 2/1 52 movw %ax,(%esp) /// ; 2/1 53 fldcw (%esp) /// ; 19/4 54 fldl2e / push log2e }not for xtndd_dbl 55 fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl 56 fld %st(0) / duplicate stack top 57 frndint / [z],z 58 fucom / This and the next 3 instructions 59 fstsw %ax / add 10 clocks to runtime of the 60 sahf / main branch, but save about 265 61 je .z_integral / upon detection of integral z. 62 / [z] != z, compute exp(x) 63 fxch / z,[z] 64 fsub %st(1),%st / z-[z],[z] 65 f2xm1 / 2**(z-[z])-1,[z] 66 fld1 / 1,2**(z-[z])-1,[z] 67 faddp %st,%st(1) / 2**(z-[z]) ,[z] 68 .merge: 69 fscale / exp(x) ,[z] 70 fstp %st(1) 71 fstcw (%esp) / restore RD 72 movw (%esp),%dx 73 andw $0xfcff,%dx 74 movw 4(%esp),%cx 75 andw $0x0300,%cx 76 orw %dx,%cx 77 movw %cx,(%esp) 78 fldcw (%esp) /// restore old RP; 19/4 79 fstpl (%esp) / round to double 80 fldl (%esp) / exp(x) rounded to double 81 fxam / determine class of exp(x) 82 add $8,%esp 83 fstsw %ax / store status in ax 84 andw $0x4500,%ax 85 cmpw $0x0500,%ax 86 je .overflow 87 cmpw $0x4000,%ax 88 je .underflow 89 ret 90 91 .overflow: 92 fstp %st(0) / stack empty 93 push %ebp 94 mov %esp,%ebp 95 PIC_SETUP(1) 96 pushl $6 97 jmp .error 98 99 .underflow: 100 fstp %st(0) / stack empty 101 push %ebp 102 mov %esp,%ebp 103 PIC_SETUP(2) 104 pushl $7 105 106 .error: 107 pushl 12(%ebp) / high x 108 pushl 8(%ebp) / low x 109 pushl 12(%ebp) / high x 110 pushl 8(%ebp) / low x 111 call PIC_F(_SVID_libm_err) 112 addl $20,%esp 113 PIC_WRAPUP 114 leave 115 ret 116 117 .z_integral: / here, z is integral 118 fstp %st(0) / ,z 119 fld1 / 1,z 120 jmp .merge 121 122 .check_tail: 123 movl 4(%esp),%edx / edx <-- lo_32(x) 124 cmpl $0xfefa39ef,%edx / Is |x| slightly < ln(2)? 125 ja .finite_non_special / branch if |x| slightly > ln(2) 126 .shortcut: 127 / Here, |x| < ln(2), so |z| = |x*log2(e)| < 1, 128 / whence z is in f2xm1's domain. 129 fldl 4(%esp) / push x 130 fldl2e / push log2e }not for xtndd_dbl 131 fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl 132 f2xm1 / 2**(x*log2(e))-1 = e**x - 1 133 fld1 / 1,2**(z)-1 134 faddp %st,%st(1) / 2**(z) = e**x 135 ret 136 137 .not_finite: 138 / Here, flags still have settings from execution of 139 / cmpl $0x7ff00000,%ecx / hi_32(|x|) > hi_32(INF)? 140 ja .NaN_or_pinf / if not, x may be +/- INF 141 movl 4(%esp),%edx / edx <-- lo_32(x) 142 cmpl $0,%edx / lo_32(x) = 0? 143 jne .NaN_or_pinf / if not, x is NaN 144 movl 8(%esp),%eax / eax <-- hi_32(x) 145 andl $0x80000000,%eax / here, x is infinite, but +/-? 146 jz .NaN_or_pinf / branch if x = +INF 147 fldz / Here, x = -inf, so return 0 148 ret 149 150 .NaN_or_pinf: 151 / Here, x = NaN or +inf, so load x and return immediately. 152 fldl 4(%esp) 153 fwait 154 ret 155 .align 4 156 SET_SIZE(exp)