1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file "expm1f.s"
  30 
  31 #include "libm.h"
  32 LIBM_ANSI_PRAGMA_WEAK(expm1f,function)
  33 #include "libm_synonyms.h"
  34 
  35         .data
  36         .align  4
  37 .mhundred:      .float  -100.0
  38 
  39         ENTRY(expm1f)
  40         movl    4(%esp),%ecx            / ecx <-- x
  41         andl    $0x7fffffff,%ecx        / ecx <-- |x|
  42         cmpl    $0x3f317217,%ecx        / Is |x| < ln(2)?
  43         jbe     .shortcut               / If so, take a shortcut.
  44         cmpl    $0x7f800000,%ecx        / |x| >= INF?
  45         jae     .not_finite             / if so, x is not finite
  46         flds    4(%esp)                 / push x
  47 
  48         subl    $8,%esp                 / save RP and set round-to-64-bits
  49         fstcw   (%esp)
  50         movw    (%esp),%ax
  51         movw    %ax,4(%esp)
  52         orw     $0x0300,%ax
  53         movw    %ax,(%esp)
  54         fldcw   (%esp)
  55 
  56         fldl2e                          / push log2e   }not for xtndd_dbl
  57         fmulp   %st,%st(1)              / z = x*log2e  }not for xtndd_dbl
  58         fld     %st(0)                  / duplicate stack top
  59         frndint                         / [z],z
  60         fucom                           / This and the next 3 instructions
  61         fstsw   %ax                     / add 10 clocks to runtime of the
  62         sahf                            / main branch, but save about 265
  63         je      .z_integral             / upon detection of integral z.
  64         / [z] != 0, compute exp(x) and then subtract one to get expm1(x)
  65         fxch                            / z,[z]
  66         fsub    %st(1),%st              / z-[z],[z]
  67         f2xm1                           / 2**(z-[z])-1,[z]
  68         / avoid spurious underflow when scaling to compute exp(x) 
  69         PIC_SETUP(1)
  70         flds    PIC_L(.mhundred)
  71         PIC_WRAPUP
  72         fucom   %st(2)                  / if -100 !< [z], then use -100
  73         fstsw   %ax
  74         sahf
  75         jb      .got_int_part
  76         fxch    %st(2)
  77 .got_int_part:
  78         fstp    %st(0)                  /   2**(z-[z])-1,max([z],-100)
  79         fld1                            / 1,2**(z-[z])-1,max([z],-100)
  80         faddp   %st,%st(1)              /   2**(z-[z])  ,max([z],-100)
  81         fscale                          /   exp(x)      ,max([z],-100)
  82         fld1                            / 1,exp(x)      ,max([z],-100)
  83         fsubrp  %st,%st(1)              /   exp(x)-1    ,max([z],-100)
  84         fstp    %st(1)
  85 
  86         fstcw   (%esp)                  / restore old RP
  87         movw    (%esp),%dx
  88         andw    $0xfcff,%dx
  89         movw    4(%esp),%cx
  90         andw    $0x0300,%cx
  91         orw     %dx,%cx
  92         movw    %cx,(%esp)
  93         fldcw   (%esp)
  94         add     $8,%esp
  95 
  96         ret
  97 
  98 .z_integral:                            / here, z is integral
  99         fstp    %st(0)                  / ,z
 100         / avoid spurious underflow when scaling to compute exp(x) 
 101         PIC_SETUP(2)
 102         flds    PIC_L(.mhundred)
 103         PIC_WRAPUP
 104         fucom   %st(1)                  / if -100 !< [z], then use -100
 105         fstsw   %ax
 106         sahf
 107         jb      .scale_wont_ovfl
 108         fxch    %st(1)
 109 .scale_wont_ovfl:
 110         fstp    %st(0)                  /   max([z],-100)
 111         fld1                            / 1,max([z],-100)
 112         fscale                          /   exp(x)      ,max([z],-100)
 113         fld1                            / 1,exp(x)      ,max([z],-100)
 114         fsubrp  %st,%st(1)              /   exp(x)-1    ,max([z],-100)
 115         fstp    %st(1)
 116 
 117         fstcw   (%esp)                  / restore old RP
 118         movw    (%esp),%dx
 119         andw    $0xfcff,%dx
 120         movw    4(%esp),%cx
 121         andw    $0x0300,%cx
 122         orw     %dx,%cx
 123         movw    %cx,(%esp)
 124         fldcw   (%esp)
 125         add     $8,%esp
 126 
 127         ret
 128 
 129 .shortcut:
 130         / Here, |x| < ln(2), so |z| = |x*log2(e)| < 1,
 131         / whence z is in f2xm1's domain.
 132         flds    4(%esp)                 / push x
 133         fldl2e                          / push log2e  }not for xtndd_dbl
 134         fmulp   %st,%st(1)              / z = x*log2e }not for xtndd_dbl
 135         f2xm1                           / 2**(x*log2(e))-1 = e**x - 1
 136         ret
 137 
 138 .not_finite:
 139         ja      .NaN_or_pinf            / branch if x is NaN 
 140         movl    4(%esp),%eax            / eax <-- x
 141         andl    $0x80000000,%eax        / here, x is infinite, but +/-?
 142         jz      .NaN_or_pinf            / branch if x = +INF
 143         fld1                            / Here, x = -inf, so return -1
 144         fchs
 145         ret
 146 
 147 .NaN_or_pinf:
 148         / Here, x = NaN or +inf, so load x and return immediately.
 149         flds    4(%esp)
 150         fwait
 151         ret
 152         .align  4
 153         SET_SIZE(expm1f)