il_5261 Wdiff usr/src/lib/libm/i386/src/expm1f.s

Print this page

5261 libm should stop using synonyms.h

Split	Close
Expand all
Collapse all

          --- old/usr/src/lib/libm/i386/src/expm1f.s
          +++ new/usr/src/lib/libm/i386/src/expm1f.s

   1    1  /*
   2    2   * CDDL HEADER START
   3    3   *
   4    4   * The contents of this file are subject to the terms of the
   5    5   * Common Development and Distribution License (the "License").
   6    6   * You may not use this file except in compliance with the License.
   7    7   *
   8    8   * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9    9   * or http://www.opensolaris.org/os/licensing.
  10   10   * See the License for the specific language governing permissions
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  /*
  22   22   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.

↓ open down ↓

22 lines elided

↑ open up ↑

  23   23   */
  24   24  /*
  25   25   * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  26   26   * Use is subject to license terms.
  27   27   */
  28   28  
  29   29          .file "expm1f.s"
  30   30  
  31   31  #include "libm.h"
  32   32  LIBM_ANSI_PRAGMA_WEAK(expm1f,function)
  33      -#include "libm_synonyms.h"
  34   33  
  35   34          .data
  36   35          .align  4
  37   36  .mhundred:      .float  -100.0
  38   37  
  39   38          ENTRY(expm1f)
  40   39          movl    4(%esp),%ecx            / ecx <-- x
  41   40          andl    $0x7fffffff,%ecx        / ecx <-- |x|
  42   41          cmpl    $0x3f317217,%ecx        / Is |x| < ln(2)?
  43   42          jbe     .shortcut               / If so, take a shortcut.

  44   43          cmpl    $0x7f800000,%ecx        / |x| >= INF?
  45   44          jae     .not_finite             / if so, x is not finite
  46   45          flds    4(%esp)                 / push x
  47   46  
  48   47          subl    $8,%esp                 / save RP and set round-to-64-bits
  49   48          fstcw   (%esp)
  50   49          movw    (%esp),%ax
  51   50          movw    %ax,4(%esp)
  52   51          orw     $0x0300,%ax
  53   52          movw    %ax,(%esp)
  54   53          fldcw   (%esp)
  55   54  
  56   55          fldl2e                          / push log2e   }not for xtndd_dbl
  57   56          fmulp   %st,%st(1)              / z = x*log2e  }not for xtndd_dbl
  58   57          fld     %st(0)                  / duplicate stack top
  59   58          frndint                         / [z],z
  60   59          fucom                           / This and the next 3 instructions
  61   60          fstsw   %ax                     / add 10 clocks to runtime of the
  62   61          sahf                            / main branch, but save about 265
  63   62          je      .z_integral             / upon detection of integral z.
  64   63          / [z] != 0, compute exp(x) and then subtract one to get expm1(x)
  65   64          fxch                            / z,[z]
  66   65          fsub    %st(1),%st              / z-[z],[z]
  67   66          f2xm1                           / 2**(z-[z])-1,[z]
  68   67          / avoid spurious underflow when scaling to compute exp(x) 
  69   68          PIC_SETUP(1)
  70   69          flds    PIC_L(.mhundred)
  71   70          PIC_WRAPUP
  72   71          fucom   %st(2)                  / if -100 !< [z], then use -100
  73   72          fstsw   %ax
  74   73          sahf
  75   74          jb      .got_int_part
  76   75          fxch    %st(2)
  77   76  .got_int_part:
  78   77          fstp    %st(0)                  /   2**(z-[z])-1,max([z],-100)
  79   78          fld1                            / 1,2**(z-[z])-1,max([z],-100)
  80   79          faddp   %st,%st(1)              /   2**(z-[z])  ,max([z],-100)
  81   80          fscale                          /   exp(x)      ,max([z],-100)
  82   81          fld1                            / 1,exp(x)      ,max([z],-100)
  83   82          fsubrp  %st,%st(1)              /   exp(x)-1    ,max([z],-100)
  84   83          fstp    %st(1)
  85   84  
  86   85          fstcw   (%esp)                  / restore old RP
  87   86          movw    (%esp),%dx
  88   87          andw    $0xfcff,%dx
  89   88          movw    4(%esp),%cx
  90   89          andw    $0x0300,%cx
  91   90          orw     %dx,%cx
  92   91          movw    %cx,(%esp)
  93   92          fldcw   (%esp)
  94   93          add     $8,%esp
  95   94  
  96   95          ret
  97   96  
  98   97  .z_integral:                            / here, z is integral
  99   98          fstp    %st(0)                  / ,z
 100   99          / avoid spurious underflow when scaling to compute exp(x) 
 101  100          PIC_SETUP(2)
 102  101          flds    PIC_L(.mhundred)
 103  102          PIC_WRAPUP
 104  103          fucom   %st(1)                  / if -100 !< [z], then use -100
 105  104          fstsw   %ax
 106  105          sahf
 107  106          jb      .scale_wont_ovfl
 108  107          fxch    %st(1)
 109  108  .scale_wont_ovfl:
 110  109          fstp    %st(0)                  /   max([z],-100)
 111  110          fld1                            / 1,max([z],-100)
 112  111          fscale                          /   exp(x)      ,max([z],-100)
 113  112          fld1                            / 1,exp(x)      ,max([z],-100)
 114  113          fsubrp  %st,%st(1)              /   exp(x)-1    ,max([z],-100)
 115  114          fstp    %st(1)
 116  115  
 117  116          fstcw   (%esp)                  / restore old RP
 118  117          movw    (%esp),%dx
 119  118          andw    $0xfcff,%dx
 120  119          movw    4(%esp),%cx
 121  120          andw    $0x0300,%cx
 122  121          orw     %dx,%cx
 123  122          movw    %cx,(%esp)
 124  123          fldcw   (%esp)
 125  124          add     $8,%esp
 126  125  
 127  126          ret
 128  127  
 129  128  .shortcut:
 130  129          / Here, |x| < ln(2), so |z| = |x*log2(e)| < 1,
 131  130          / whence z is in f2xm1's domain.
 132  131          flds    4(%esp)                 / push x
 133  132          fldl2e                          / push log2e  }not for xtndd_dbl
 134  133          fmulp   %st,%st(1)              / z = x*log2e }not for xtndd_dbl
 135  134          f2xm1                           / 2**(x*log2(e))-1 = e**x - 1
 136  135          ret
 137  136  
 138  137  .not_finite:
 139  138          ja      .NaN_or_pinf            / branch if x is NaN 
 140  139          movl    4(%esp),%eax            / eax <-- x
 141  140          andl    $0x80000000,%eax        / here, x is infinite, but +/-?
 142  141          jz      .NaN_or_pinf            / branch if x = +INF
 143  142          fld1                            / Here, x = -inf, so return -1
 144  143          fchs
 145  144          ret
 146  145  
 147  146  .NaN_or_pinf:
 148  147          / Here, x = NaN or +inf, so load x and return immediately.
 149  148          flds    4(%esp)
 150  149          fwait
 151  150          ret
 152  151          .align  4
 153  152          SET_SIZE(expm1f)

↓ open down ↓

110 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX