il_alllibm Wdiff usr/src/lib/libm/i386/src/exp.s

Print this page

5261 libm should stop using synonyms.h
5298 fabs is 0-sized, confuses dis(1) and others
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Gordon Ross <gwr@nexenta.com>

Split	Close
Expand all
Collapse all

          --- old/usr/src/lib/libm/i386/src/exp.s
          +++ new/usr/src/lib/libm/i386/src/exp.s

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.

↓ open down ↓

22 lines elided

↑ open up ↑

  23   23   */
  24   24  /*
  25   25   * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  26   26   * Use is subject to license terms.
  27   27   */
  28   28  
  29   29          .file "exp.s"
  30   30  
  31   31  #include "libm.h"
  32   32  LIBM_ANSI_PRAGMA_WEAK(exp,function)
  33      -#include "libm_synonyms.h"
  34   33  #include "libm_protos.h"
  35   34  
  36   35          ENTRY(exp)
  37   36          movl    8(%esp),%ecx            / ecx <-- hi_32(x)
  38   37          andl    $0x7fffffff,%ecx        / ecx <-- hi_32(|x|)
  39   38          cmpl    $0x3fe62e42,%ecx        / Is |x| < ln(2)?
  40   39          jb      .shortcut               / If so, take a shortcut.
  41   40          je      .check_tail             / |x| may be only slightly < ln(2)
  42   41          cmpl    $0x7ff00000,%ecx        / hi_32(|x|) >= hi_32(INF)?
  43   42          jae     .not_finite             / if so, x is not finite

  44   43  .finite_non_special:                    / Here, ln(2) < |x| < INF
  45   44          fldl    4(%esp)                 / push x
  46   45          subl    $8,%esp
  47   46                                          /// overhead of RP save/restore; 63/15
  48   47          fstcw   (%esp)                  /// ; 15/3
  49   48          movw    (%esp),%ax              /// ; 4/1
  50   49          movw    %ax,4(%esp)             /// save old RP; 2/1
  51   50          orw     $0x0300,%ax             /// force 64-bit RP; 2/1
  52   51          movw    %ax,(%esp)              /// ; 2/1
  53   52          fldcw   (%esp)                  /// ; 19/4
  54   53          fldl2e                          / push log2e   }not for xtndd_dbl
  55   54          fmulp   %st,%st(1)              / z = x*log2e  }not for xtndd_dbl
  56   55          fld     %st(0)                  / duplicate stack top
  57   56          frndint                         / [z],z
  58   57          fucom                           / This and the next 3 instructions
  59   58          fstsw  %ax                      / add 10 clocks to runtime of the
  60   59          sahf                            / main branch, but save about 265
  61   60          je      .z_integral             / upon detection of integral z.
  62   61          / [z] != z, compute exp(x)
  63   62          fxch                            / z,[z]
  64   63          fsub    %st(1),%st              / z-[z],[z]
  65   64          f2xm1                           / 2**(z-[z])-1,[z]
  66   65          fld1                            / 1,2**(z-[z])-1,[z]
  67   66          faddp   %st,%st(1)              /   2**(z-[z])  ,[z]
  68   67  .merge:
  69   68          fscale                          /   exp(x)      ,[z]
  70   69          fstp    %st(1)
  71   70          fstcw   (%esp)                  / restore RD
  72   71          movw    (%esp),%dx
  73   72          andw    $0xfcff,%dx
  74   73          movw    4(%esp),%cx
  75   74          andw    $0x0300,%cx
  76   75          orw     %dx,%cx
  77   76          movw    %cx,(%esp)
  78   77          fldcw   (%esp)                  /// restore old RP; 19/4
  79   78          fstpl   (%esp)                  / round to double
  80   79          fldl    (%esp)                  / exp(x) rounded to double
  81   80          fxam                            / determine class of exp(x)
  82   81          add     $8,%esp
  83   82          fstsw   %ax                     / store status in ax
  84   83          andw    $0x4500,%ax
  85   84          cmpw    $0x0500,%ax
  86   85          je      .overflow
  87   86          cmpw    $0x4000,%ax
  88   87          je      .underflow
  89   88          ret
  90   89  
  91   90  .overflow:
  92   91          fstp    %st(0)                  / stack empty
  93   92          push    %ebp
  94   93          mov     %esp,%ebp
  95   94          PIC_SETUP(1)
  96   95          pushl   $6
  97   96          jmp     .error
  98   97  
  99   98  .underflow:
 100   99          fstp    %st(0)                  / stack empty
 101  100          push    %ebp
 102  101          mov     %esp,%ebp
 103  102          PIC_SETUP(2)
 104  103          pushl   $7
 105  104  
 106  105  .error:
 107  106          pushl   12(%ebp)                / high x
 108  107          pushl   8(%ebp)                 / low x
 109  108          pushl   12(%ebp)                / high x
 110  109          pushl   8(%ebp)                 / low x
 111  110          call    PIC_F(_SVID_libm_err)
 112  111          addl    $20,%esp
 113  112          PIC_WRAPUP
 114  113          leave
 115  114          ret
 116  115  
 117  116  .z_integral:                            / here, z is integral
 118  117          fstp    %st(0)                  / ,z
 119  118          fld1                            / 1,z
 120  119          jmp     .merge
 121  120  
 122  121  .check_tail:
 123  122          movl    4(%esp),%edx            / edx <-- lo_32(x)
 124  123          cmpl    $0xfefa39ef,%edx        / Is |x| slightly < ln(2)?
 125  124          ja      .finite_non_special     / branch if |x| slightly > ln(2)
 126  125  .shortcut:
 127  126          / Here, |x| < ln(2), so |z| = |x*log2(e)| < 1,
 128  127          / whence z is in f2xm1's domain.
 129  128          fldl    4(%esp)                 / push x
 130  129          fldl2e                          / push log2e  }not for xtndd_dbl
 131  130          fmulp   %st,%st(1)              / z = x*log2e }not for xtndd_dbl
 132  131          f2xm1                           / 2**(x*log2(e))-1 = e**x - 1
 133  132          fld1                            / 1,2**(z)-1
 134  133          faddp   %st,%st(1)              /   2**(z) = e**x
 135  134          ret
 136  135  
 137  136  .not_finite:
 138  137          / Here, flags still have settings from execution of
 139  138          /       cmpl    $0x7ff00000,%ecx        / hi_32(|x|) > hi_32(INF)?
 140  139          ja      .NaN_or_pinf            / if not, x may be +/- INF 
 141  140          movl    4(%esp),%edx            / edx <-- lo_32(x)
 142  141          cmpl    $0,%edx                 / lo_32(x) = 0?
 143  142          jne     .NaN_or_pinf            / if not, x is NaN
 144  143          movl    8(%esp),%eax            / eax <-- hi_32(x)
 145  144          andl    $0x80000000,%eax        / here, x is infinite, but +/-?
 146  145          jz      .NaN_or_pinf            / branch if x = +INF
 147  146          fldz                            / Here, x = -inf, so return 0
 148  147          ret
 149  148  
 150  149  .NaN_or_pinf:
 151  150          / Here, x = NaN or +inf, so load x and return immediately.
 152  151          fldl    4(%esp)
 153  152          fwait
 154  153          ret
 155  154          .align  4
 156  155          SET_SIZE(exp)

↓ open down ↓

113 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX