1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24 /* 25 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "expm1f.s" 30 31 #include "libm.h" 32 LIBM_ANSI_PRAGMA_WEAK(expm1f,function) 33 34 .data 35 .align 4 36 .mhundred: .float -100.0 37 38 ENTRY(expm1f) 39 movl 4(%esp),%ecx / ecx <-- x 40 andl $0x7fffffff,%ecx / ecx <-- |x| 41 cmpl $0x3f317217,%ecx / Is |x| < ln(2)? 42 jbe .shortcut / If so, take a shortcut. 43 cmpl $0x7f800000,%ecx / |x| >= INF? 44 jae .not_finite / if so, x is not finite 45 flds 4(%esp) / push x 46 47 subl $8,%esp / save RP and set round-to-64-bits 48 fstcw (%esp) 49 movw (%esp),%ax 50 movw %ax,4(%esp) 51 orw $0x0300,%ax 52 movw %ax,(%esp) 53 fldcw (%esp) 54 55 fldl2e / push log2e }not for xtndd_dbl 56 fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl 57 fld %st(0) / duplicate stack top 58 frndint / [z],z 59 fucom / This and the next 3 instructions 60 fstsw %ax / add 10 clocks to runtime of the 61 sahf / main branch, but save about 265 62 je .z_integral / upon detection of integral z. 63 / [z] != 0, compute exp(x) and then subtract one to get expm1(x) 64 fxch / z,[z] 65 fsub %st(1),%st / z-[z],[z] 66 f2xm1 / 2**(z-[z])-1,[z] 67 / avoid spurious underflow when scaling to compute exp(x) 68 PIC_SETUP(1) 69 flds PIC_L(.mhundred) 70 PIC_WRAPUP 71 fucom %st(2) / if -100 !< [z], then use -100 72 fstsw %ax 73 sahf 74 jb .got_int_part 75 fxch %st(2) 76 .got_int_part: 77 fstp %st(0) / 2**(z-[z])-1,max([z],-100) 78 fld1 / 1,2**(z-[z])-1,max([z],-100) 79 faddp %st,%st(1) / 2**(z-[z]) ,max([z],-100) 80 fscale / exp(x) ,max([z],-100) 81 fld1 / 1,exp(x) ,max([z],-100) 82 fsubrp %st,%st(1) / exp(x)-1 ,max([z],-100) 83 fstp %st(1) 84 85 fstcw (%esp) / restore old RP 86 movw (%esp),%dx 87 andw $0xfcff,%dx 88 movw 4(%esp),%cx 89 andw $0x0300,%cx 90 orw %dx,%cx 91 movw %cx,(%esp) 92 fldcw (%esp) 93 add $8,%esp 94 95 ret 96 97 .z_integral: / here, z is integral 98 fstp %st(0) / ,z 99 / avoid spurious underflow when scaling to compute exp(x) 100 PIC_SETUP(2) 101 flds PIC_L(.mhundred) 102 PIC_WRAPUP 103 fucom %st(1) / if -100 !< [z], then use -100 104 fstsw %ax 105 sahf 106 jb .scale_wont_ovfl 107 fxch %st(1) 108 .scale_wont_ovfl: 109 fstp %st(0) / max([z],-100) 110 fld1 / 1,max([z],-100) 111 fscale / exp(x) ,max([z],-100) 112 fld1 / 1,exp(x) ,max([z],-100) 113 fsubrp %st,%st(1) / exp(x)-1 ,max([z],-100) 114 fstp %st(1) 115 116 fstcw (%esp) / restore old RP 117 movw (%esp),%dx 118 andw $0xfcff,%dx 119 movw 4(%esp),%cx 120 andw $0x0300,%cx 121 orw %dx,%cx 122 movw %cx,(%esp) 123 fldcw (%esp) 124 add $8,%esp 125 126 ret 127 128 .shortcut: 129 / Here, |x| < ln(2), so |z| = |x*log2(e)| < 1, 130 / whence z is in f2xm1's domain. 131 flds 4(%esp) / push x 132 fldl2e / push log2e }not for xtndd_dbl 133 fmulp %st,%st(1) / z = x*log2e }not for xtndd_dbl 134 f2xm1 / 2**(x*log2(e))-1 = e**x - 1 135 ret 136 137 .not_finite: 138 ja .NaN_or_pinf / branch if x is NaN 139 movl 4(%esp),%eax / eax <-- x 140 andl $0x80000000,%eax / here, x is infinite, but +/-? 141 jz .NaN_or_pinf / branch if x = +INF 142 fld1 / Here, x = -inf, so return -1 143 fchs 144 ret 145 146 .NaN_or_pinf: 147 / Here, x = NaN or +inf, so load x and return immediately. 148 flds 4(%esp) 149 fwait 150 ret 151 .align 4 152 SET_SIZE(expm1f)