1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24 /* 25 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "expm1f.s" 30 31 #include "libm.h" 32 LIBM_ANSI_PRAGMA_WEAK(expm1f,function) 33 #include "libm_synonyms.h" 34 35 .data 36 .align 4 37 .mhundred: .float -100.0 38 39 ENTRY(expm1f) 40 movl 4(%esp),%ecx / ecx <-- x 41 andl $0x7fffffff,%ecx / ecx <-- |x| 42 cmpl $0x3f317217,%ecx / Is |x| < ln(2)? 43 jbe .shortcut / If so, take a shortcut. 44 cmpl $0x7f800000,%ecx / |x| >= INF? 45 jae .not_finite / if so, x is not finite 46 flds 4(%esp) / push x 47 48 subl $8,%esp / save RP and set round-to-64-bits 49 fstcw (%esp) 50 movw (%esp),%ax 51 movw %ax,4(%esp) 52 orw $0x0300,%ax 53 movw %ax,(%esp) 54 fldcw (%esp) 55 56 fldl2e / push log2e }not for xtndd_dbl 57 fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl 58 fld %st(0) / duplicate stack top 59 frndint / [z],z 60 fucom / This and the next 3 instructions 61 fstsw %ax / add 10 clocks to runtime of the 62 sahf / main branch, but save about 265 63 je .z_integral / upon detection of integral z. 64 / [z] != 0, compute exp(x) and then subtract one to get expm1(x) 65 fxch / z,[z] 66 fsub %st(1),%st / z-[z],[z] 67 f2xm1 / 2**(z-[z])-1,[z] 68 / avoid spurious underflow when scaling to compute exp(x) 69 PIC_SETUP(1) 70 flds PIC_L(.mhundred) 71 PIC_WRAPUP 72 fucom %st(2) / if -100 !< [z], then use -100 73 fstsw %ax 74 sahf 75 jb .got_int_part 76 fxch %st(2) 77 .got_int_part: 78 fstp %st(0) / 2**(z-[z])-1,max([z],-100) 79 fld1 / 1,2**(z-[z])-1,max([z],-100) 80 faddp %st,%st(1) / 2**(z-[z]) ,max([z],-100) 81 fscale / exp(x) ,max([z],-100) 82 fld1 / 1,exp(x) ,max([z],-100) 83 fsubrp %st,%st(1) / exp(x)-1 ,max([z],-100) 84 fstp %st(1) 85 86 fstcw (%esp) / restore old RP 87 movw (%esp),%dx 88 andw $0xfcff,%dx 89 movw 4(%esp),%cx 90 andw $0x0300,%cx 91 orw %dx,%cx 92 movw %cx,(%esp) 93 fldcw (%esp) 94 add $8,%esp 95 96 ret 97 98 .z_integral: / here, z is integral 99 fstp %st(0) / ,z 100 / avoid spurious underflow when scaling to compute exp(x) 101 PIC_SETUP(2) 102 flds PIC_L(.mhundred) 103 PIC_WRAPUP 104 fucom %st(1) / if -100 !< [z], then use -100 105 fstsw %ax 106 sahf 107 jb .scale_wont_ovfl 108 fxch %st(1) 109 .scale_wont_ovfl: 110 fstp %st(0) / max([z],-100) 111 fld1 / 1,max([z],-100) 112 fscale / exp(x) ,max([z],-100) 113 fld1 / 1,exp(x) ,max([z],-100) 114 fsubrp %st,%st(1) / exp(x)-1 ,max([z],-100) 115 fstp %st(1) 116 117 fstcw (%esp) / restore old RP 118 movw (%esp),%dx 119 andw $0xfcff,%dx 120 movw 4(%esp),%cx 121 andw $0x0300,%cx 122 orw %dx,%cx 123 movw %cx,(%esp) 124 fldcw (%esp) 125 add $8,%esp 126 127 ret 128 129 .shortcut: 130 / Here, |x| < ln(2), so |z| = |x*log2(e)| < 1, 131 / whence z is in f2xm1's domain. 132 flds 4(%esp) / push x 133 fldl2e / push log2e }not for xtndd_dbl 134 fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl 135 f2xm1 / 2**(x*log2(e))-1 = e**x - 1 136 ret 137 138 .not_finite: 139 ja .NaN_or_pinf / branch if x is NaN 140 movl 4(%esp),%eax / eax <-- x 141 andl $0x80000000,%eax / here, x is infinite, but +/-? 142 jz .NaN_or_pinf / branch if x = +INF 143 fld1 / Here, x = -inf, so return -1 144 fchs 145 ret 146 147 .NaN_or_pinf: 148 / Here, x = NaN or +inf, so load x and return immediately. 149 flds 4(%esp) 150 fwait 151 ret 152 .align 4 153 SET_SIZE(expm1f)