1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file "exp.s"
  30 
  31 #include "libm.h"
  32 LIBM_ANSI_PRAGMA_WEAK(exp,function)
  33 #include "libm_synonyms.h"
  34 #include "libm_protos.h"
  35 
  36         ENTRY(exp)
  37         movl    8(%esp),%ecx            / ecx <-- hi_32(x)
  38         andl    $0x7fffffff,%ecx        / ecx <-- hi_32(|x|)
  39         cmpl    $0x3fe62e42,%ecx        / Is |x| < ln(2)?
  40         jb      .shortcut               / If so, take a shortcut.
  41         je      .check_tail             / |x| may be only slightly < ln(2)
  42         cmpl    $0x7ff00000,%ecx        / hi_32(|x|) >= hi_32(INF)?
  43         jae     .not_finite             / if so, x is not finite
  44 .finite_non_special:                    / Here, ln(2) < |x| < INF
  45         fldl    4(%esp)                 / push x
  46         subl    $8,%esp
  47                                         /// overhead of RP save/restore; 63/15
  48         fstcw   (%esp)                  /// ; 15/3
  49         movw    (%esp),%ax              /// ; 4/1
  50         movw    %ax,4(%esp)             /// save old RP; 2/1
  51         orw     $0x0300,%ax             /// force 64-bit RP; 2/1
  52         movw    %ax,(%esp)              /// ; 2/1
  53         fldcw   (%esp)                  /// ; 19/4
  54         fldl2e                          / push log2e   }not for xtndd_dbl
  55         fmulp   %st,%st(1)              / z = x*log2e  }not for xtndd_dbl
  56         fld     %st(0)                  / duplicate stack top
  57         frndint                         / [z],z
  58         fucom                           / This and the next 3 instructions
  59         fstsw  %ax                      / add 10 clocks to runtime of the
  60         sahf                            / main branch, but save about 265
  61         je      .z_integral             / upon detection of integral z.
  62         / [z] != z, compute exp(x)
  63         fxch                            / z,[z]
  64         fsub    %st(1),%st              / z-[z],[z]
  65         f2xm1                           / 2**(z-[z])-1,[z]
  66         fld1                            / 1,2**(z-[z])-1,[z]
  67         faddp   %st,%st(1)              /   2**(z-[z])  ,[z]
  68 .merge:
  69         fscale                          /   exp(x)      ,[z]
  70         fstp    %st(1)
  71         fstcw   (%esp)                  / restore RD
  72         movw    (%esp),%dx
  73         andw    $0xfcff,%dx
  74         movw    4(%esp),%cx
  75         andw    $0x0300,%cx
  76         orw     %dx,%cx
  77         movw    %cx,(%esp)
  78         fldcw   (%esp)                  /// restore old RP; 19/4
  79         fstpl   (%esp)                  / round to double
  80         fldl    (%esp)                  / exp(x) rounded to double
  81         fxam                            / determine class of exp(x)
  82         add     $8,%esp
  83         fstsw   %ax                     / store status in ax
  84         andw    $0x4500,%ax
  85         cmpw    $0x0500,%ax
  86         je      .overflow
  87         cmpw    $0x4000,%ax
  88         je      .underflow
  89         ret
  90 
  91 .overflow:
  92         fstp    %st(0)                  / stack empty
  93         push    %ebp
  94         mov     %esp,%ebp
  95         PIC_SETUP(1)
  96         pushl   $6
  97         jmp     .error
  98 
  99 .underflow:
 100         fstp    %st(0)                  / stack empty
 101         push    %ebp
 102         mov     %esp,%ebp
 103         PIC_SETUP(2)
 104         pushl   $7
 105 
 106 .error:
 107         pushl   12(%ebp)                / high x
 108         pushl   8(%ebp)                 / low x
 109         pushl   12(%ebp)                / high x
 110         pushl   8(%ebp)                 / low x
 111         call    PIC_F(_SVID_libm_err)
 112         addl    $20,%esp
 113         PIC_WRAPUP
 114         leave
 115         ret
 116 
 117 .z_integral:                            / here, z is integral
 118         fstp    %st(0)                  / ,z
 119         fld1                            / 1,z
 120         jmp     .merge
 121 
 122 .check_tail:
 123         movl    4(%esp),%edx            / edx <-- lo_32(x)
 124         cmpl    $0xfefa39ef,%edx        / Is |x| slightly < ln(2)?
 125         ja      .finite_non_special     / branch if |x| slightly > ln(2)
 126 .shortcut:
 127         / Here, |x| < ln(2), so |z| = |x*log2(e)| < 1,
 128         / whence z is in f2xm1's domain.
 129         fldl    4(%esp)                 / push x
 130         fldl2e                          / push log2e  }not for xtndd_dbl
 131         fmulp   %st,%st(1)              / z = x*log2e }not for xtndd_dbl
 132         f2xm1                           / 2**(x*log2(e))-1 = e**x - 1
 133         fld1                            / 1,2**(z)-1
 134         faddp   %st,%st(1)              /   2**(z) = e**x
 135         ret
 136 
 137 .not_finite:
 138         / Here, flags still have settings from execution of
 139         /       cmpl    $0x7ff00000,%ecx        / hi_32(|x|) > hi_32(INF)?
 140         ja      .NaN_or_pinf            / if not, x may be +/- INF 
 141         movl    4(%esp),%edx            / edx <-- lo_32(x)
 142         cmpl    $0,%edx                 / lo_32(x) = 0?
 143         jne     .NaN_or_pinf            / if not, x is NaN
 144         movl    8(%esp),%eax            / eax <-- hi_32(x)
 145         andl    $0x80000000,%eax        / here, x is infinite, but +/-?
 146         jz      .NaN_or_pinf            / branch if x = +INF
 147         fldz                            / Here, x = -inf, so return 0
 148         ret
 149 
 150 .NaN_or_pinf:
 151         / Here, x = NaN or +inf, so load x and return immediately.
 152         fldl    4(%esp)
 153         fwait
 154         ret
 155         .align  4
 156         SET_SIZE(exp)