il_alllibm New usr/src/lib/libm/i386/src/pow.s

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file "pow.s"
  30 
  31 / Note: 0^NaN should not signal "invalid" but this implementation
  32 / does because y is placed on the NPX stack.
  33 
  34 / Special cases:
  35 /
  36 / x ** 0 is 1                           _SVID_libm_err if x is 0 or NaN
  37 / 1 ** y is 1                           (C99)
  38 / x ** NaN is NaN
  39 / NaN ** y (except 0) is NaN
  40 / x ** 1 is x
  41 / +-(|x| > 1) **  +inf is +inf
  42 / +-(|x| > 1) **  -inf is +0
  43 / +-(|x| < 1) **  +inf is +0
  44 / +-(|x| < 1) **  -inf is +inf
  45 / (-1) ** +-inf is +1                   (C99)
  46 / +0 ** +y (except 0, NaN)              is +0
  47 / -0 ** +y (except 0, NaN, odd int)     is +0
  48 / -0 ** +y (odd int)                    is -0
  49 / +-0 ** -y (except 0, NaN)             _SVID_libm_err
  50 / +inf ** +y (except 0, NaN)            is +inf
  51 / +inf ** -y (except 0, NaN)            is +0
  52 / -inf ** +-y (except 0, NaN)           is -0 ** -+y (NO z flag)
  53 / x ** -1 is 1/x
  54 / x ** 2 is x*x
  55 / -x ** y (an integer) is (-1)**(y) * (+x)**(y)
  56 / x ** y (x negative & y not integer)       _SVID_libm_err
  57 / if x and y are finite and x**y = 0    _SVID_libm_err (underflow)
  58 / if x and y are finite and x**y = inf  _SVID_libm_err (overflow)
  59 
  60 #include "libm.h"
  61 LIBM_ANSI_PRAGMA_WEAK(pow,function)
  62 #include "libm_protos.h"
  63 #include "xpg6.h"
  64 
  65         .data
  66         .align  4
  67 negzero:
  68         .float  -0.0
  69 one:
  70         .float  1.0
  71 negone:
  72         .float  -1.0
  73 two:
  74         .float  2.0
  75 Snan:
  76         .long   0x7f800001
  77 pinfinity:
  78         .long   0x7f800000
  79 ninfinity:
  80         .long   0xff800000
  81 
  82 
  83         ENTRY(pow)
  84         pushl   %ebp
  85         movl    %esp,%ebp
  86         PIC_SETUP(1)
  87 
  88         fldl    8(%ebp)                 / x
  89         fxam                            / determine class of x
  90         fnstsw  %ax                     / store status in %ax
  91         movb    %ah,%dh                 / %dh <- condition code of x
  92 
  93         fldl    16(%ebp)                / y , x
  94         fxam                            / determine class of y
  95         fnstsw  %ax                     / store status in %ax
  96         movb    %ah,%dl                 / %dl <- condition code of y
  97 
  98         call    .pow_main               /// LOCAL
  99         PIC_WRAPUP
 100         leave
 101         ret
 102 
 103 .pow_main:
 104         / x ** 0 is 1 unless x is 0 or a NaN
 105         movb    %dl,%cl
 106         andb    $0x45,%cl
 107         cmpb    $0x40,%cl               / C3=1 C2=0 C1=? C0=0 when +-0
 108         jne     1f
 109         movb    %dh,%cl
 110         andb    $0x45,%cl
 111         cmpb    $0x40,%cl               / C3=1 C2=0 C1=? C0=0 when +-0
 112         jne     2f
 113         / 0^0
 114         pushl   $20
 115         jmp     .SVIDerr                / SVID error handler
 116 2:
 117         cmpb    $0x01,%cl               /// C3=0 C2=0 C1=? C0=1 when +-NaN
 118         jne     2f
 119         / NaN^0
 120         pushl   $42
 121         jmp     .SVIDerr
 122 2:
 123         / (not 0 or NaN)^0
 124         fstp    %st(0)                  / x
 125         fstp    %st(0)                  / stack empty
 126         fld1                            / 1
 127         ret
 128 
 129 1:      / y is not zero
 130         PIC_G_LOAD(movzwl,__xpg6,eax)
 131         andl    $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax
 132         cmpl    $0,%eax
 133         je      1f
 134 
 135         / C99: 1 ** anything is 1
 136         fld1                            / 1, y, x
 137         fucomp  %st(2)                  / y, x
 138         fnstsw  %ax                     / store status in %ax
 139         sahf                            / 80387 flags in %ax to 80386 flags
 140         jp      1f                      / so that pow(NaN1,NaN2) returns NaN2
 141         jne     1f
 142         fstp    %st(0)                  / x
 143         ret
 144 
 145 1:
 146         / x ** NaN is NaN
 147         movb    %dl,%cl
 148         andb    $0x45,%cl
 149         cmpb    $0x01,%cl               / C3=0 C2=0 C1=? C0=1 when +-NaN
 150         jne     1f
 151         fstp    %st(1)                  / y
 152         ret
 153 
 154 1:      / y is not NaN
 155         / NaN ** y (except 0) is NaN
 156         movb    %dh,%cl
 157         andb    $0x45,%cl
 158         cmpb    $0x01,%cl               / C3=0 C2=0 C1=? C0=1 when +-NaN
 159         jne     1f
 160         fstp    %st(0)                  / x
 161         ret
 162 
 163 1:      / x is not NaN
 164         / x ** 1 is x
 165         fcoms   PIC_L(one)              / y , x
 166         fnstsw  %ax                     / store status in %ax
 167         sahf                            / 80387 flags in %ax to 80386 flags
 168         jne     1f
 169         fstp    %st(0)                  / x
 170         ret
 171 
 172 1:      / y is not 1
 173         / +-(|x| > 1) **  +inf is +inf
 174         / +-(|x| > 1) **  -inf is +0
 175         / +-(|x| < 1) **  +inf is +0
 176         / +-(|x| < 1) **  -inf is +inf
 177         / +-(|x| = 1) ** +-inf is NaN
 178         movb    %dl,%cl
 179         andb    $0x47,%cl
 180         cmpb    $0x05,%cl               / C3=0 C2=1 C1=0 C0=1 when +inf
 181         je      .yispinf
 182         cmpb    $0x07,%cl               / C3=0 C2=1 C1=1 C0=1 when -inf
 183         je      .yisninf
 184 
 185         / +0 ** +y (except 0, NaN)              is +0
 186         / -0 ** +y (except 0, NaN, odd int)     is +0
 187         / +0 ** -y (except 0, NaN)              is +inf (z flag)
 188         / -0 ** -y (except 0, NaN, odd int)     is +inf (z flag)
 189         / -0 ** y (odd int)                     is - (+0 ** x)
 190         movb    %dh,%cl
 191         andb    $0x47,%cl
 192         cmpb    $0x40,%cl               / C3=1 C2=0 C1=0 C0=0 when +0
 193         je      .xispzero
 194         cmpb    $0x42,%cl               / C3=1 C2=0 C1=1 C0=0 when -0
 195         je      .xisnzero
 196 
 197         / +inf ** +y (except 0, NaN)    is +inf
 198         / +inf ** -y (except 0, NaN)    is +0
 199         / -inf ** +-y (except 0, NaN)   is -0 ** -+y (NO z flag)
 200         movb    %dh,%cl
 201         andb    $0x47,%cl
 202         cmpb    $0x05,%cl               / C3=0 C2=1 C1=0 C0=1 when +inf
 203         je      .xispinf
 204         cmpb    $0x07,%cl               / C3=0 C2=1 C1=1 C0=1 when -inf
 205         je      .xisninf
 206 
 207         / x ** -1 is 1/x
 208         fcoms   PIC_L(negone)           / y , x
 209         fnstsw  %ax                     / store status in %ax
 210         sahf                            / 80387 flags in %ax to 80386 flags
 211         jne     1f
 212         fld     %st(1)                  / x , y , x
 213         fdivrs  PIC_L(one)              / 1/x , y , x
 214         jmp     .signok                 / check for over/underflow
 215 
 216 1:      / y is not -1
 217         / x ** 2 is x*x
 218         fcoms   PIC_L(two)              / y , x
 219         fnstsw  %ax                     / store status in %ax
 220         sahf                            / 80387 flags in %ax to 80386 flags
 221         jne     1f
 222         fld     %st(1)                  / x , y , x
 223         fld     %st(0)                  / x , x , y , x
 224         fmulp                           / x^2 , y , x
 225         jmp     .signok                 / check for over/underflow
 226 
 227 1:      / y is not 2
 228         / make copies of x & y
 229         fld     %st(1)                  / x , y , x
 230         fld     %st(1)                  / y , x , y , x
 231 
 232         / -x ** y (an integer) is (-1)**(y) * (+x)**(y)
 233         / x ** y (x negative & y not integer) is  NaN
 234         movl    $0,%ecx                 / track whether to flip sign of result
 235         fld     %st(1)                  / x , y , x , y , x
 236         ftst                            / compare %st(0) with 0
 237         fnstsw  %ax                     / store status in %ax
 238         sahf                            / 80387 flags in %ax to 80386 flags
 239         fstp    %st(0)                  / y , x , y , x
 240         ja      .merge                  / x > 0
 241         / x < 0
 242         call    .y_is_int
 243         cmpl    $0,%ecx
 244         jne     1f
 245         / x < 0, y is non-integral
 246         fstp    %st(0)                  / x , y , x
 247         fstp    %st(0)                  / y , x
 248         pushl   $24
 249         jmp     .SVIDerr                / SVID error handler
 250 
 251 1:      / x < 0 & y = int
 252         fxch                            / x , y , y , x
 253         fchs                            / px = -x , y , y , x
 254         fxch                            / y , px , y , x
 255 .merge:
 256         / px > 0
 257         fxch                            / px , y , y , x
 258 
 259         / x**y   =   exp(y*ln(x))
 260         fyl2x                           / t=y*log2(px) , y , x
 261         fld     %st(0)                  / t , t , y , x
 262         frndint                         / [t] , t , y , x
 263         fxch                            / t , [t] , y , x
 264         fucom
 265         fnstsw  %ax                     / store status in %ax
 266         sahf                            / 80387 flags in %ax to 80386 flags
 267         je      1f                      / t is integral
 268         fsub    %st(1),%st              / t-[t] , [t] , y , x
 269         f2xm1                           / 2**(t-[t])-1 , [t] , y , x
 270         fadds   PIC_L(one)              / 2**(t-[t]) , [t] , y , x
 271         fscale                          / 2**t = px**y , [t] , y , x
 272         jmp     2f
 273 1:
 274         fstp    %st(0)                  / t=[t] , y , x
 275         fld1                            / 1 , t , y , x
 276         fscale                          / 1*2**t = x**y , t , y , x
 277 2:
 278         fstp    %st(1)                  / x**y , y , x
 279         cmpl    $1,%ecx
 280         jne     .signok
 281         fchs                            / change sign since x<0 & y=-int
 282 .signok:
 283         subl    $8,%esp
 284         fstpl   (%esp)                  / round to double precision
 285         fldl    (%esp)                  / place result on NPX stack
 286         addl    $8,%esp
 287 
 288         fxam                            / determine class of x**y
 289         fnstsw  %ax                     / store status in %ax
 290         andw    $0x4500,%ax
 291         / check for overflow
 292         cmpw    $0x0500,%ax             / C0=0 C1=1 C2=? C3=1 then +-inf
 293         jne     1f
 294         / x^y overflows
 295         fstp    %st(0)                  / y , x
 296         pushl   $21
 297         jmp     .SVIDerr
 298 1:
 299         / check for underflow
 300         cmpw    $0x4000,%ax             / C0=1 C1=0 C2=? C3=0 then +-0
 301         jne     1f
 302         / x^y underflows
 303         fstp    %st(0)                  / y , x
 304         pushl   $22
 305         jmp     .SVIDerr
 306 1:
 307         fstp    %st(2)                  / y , x**y
 308         fstp    %st(0)                  / x**y
 309         ret
 310 
 311 / ------------------------------------------------------------------------
 312 
 313 .xispinf:
 314         ftst                            / compare %st(0) with 0
 315         fnstsw  %ax                     / store status in %ax
 316         sahf                            / 80387 flags in %ax to 80386 flags
 317         ja      .retpinf                / y > 0
 318         jmp     .retpzero               / y < 0
 319 
 320 .xisninf:
 321         / -inf ** +-y is -0 ** -+y
 322         fchs                            / -y , x
 323         flds    PIC_L(negzero)          / -0 , -y , x
 324         fstp    %st(2)                  / -y , -0
 325         jmp     .xisnzero
 326 
 327 .yispinf:
 328         fld     %st(1)                  / x , y , x
 329         fabs                            / |x| , y , x
 330         fcomps  PIC_L(one)              / y , x
 331         fnstsw  %ax                     / store status in %ax
 332         sahf                            / 80387 flags in %ax to 80386 flags
 333         je      .retponeorinvalid       / x == -1       C99
 334         ja      .retpinf                / |x| > 1
 335         jmp     .retpzero               / |x| < 1
 336 
 337 .yisninf:
 338         fld     %st(1)                  / x , y , x
 339         fabs                            / |x| , y , x
 340         fcomps  PIC_L(one)              / y , x
 341         fnstsw  %ax                     / store status in %ax
 342         sahf                            / 80387 flags in %ax to 80386 flags
 343         je      .retponeorinvalid       / x == -1       C99
 344         ja      .retpzero               / |x| > 1
 345         jmp     .retpinf                / |x| < 1
 346 
 347 .xispzero:
 348         / y cannot be 0 or NaN ; stack has      y , x
 349         ftst                            / compare %st(0) with 0
 350         fnstsw  %ax                     / store status in %ax
 351         sahf                            / 80387 flags in %ax to 80386 flags
 352         ja      .retpzero               / y > 0
 353         / x = +0 & y < 0
 354         jmp     .SVIDzerotoneg
 355 
 356 .xisnzero:
 357         / y cannot be 0 or NaN ; stack has      y , x
 358         call    .y_is_int
 359         cmpl    $1,%ecx
 360         jne     1f                      / y is not an odd integer
 361         / y is an odd integer
 362         ftst                            / compare %st(0) with 0
 363         fnstsw  %ax                     / store status in %ax
 364         sahf                            / 80387 flags in %ax to 80386 flags
 365         ja      .retnzero               / y > 0
 366         / x = -0 & y < 0 (odd int)       return -inf (z flag)
 367         / x = -inf & y != 0 or NaN  return -inf (NO z flag)
 368         movb    %dh,%cl
 369         andb    $0x45,%cl
 370         cmpb    $0x05,%cl               / C3=0 C2=1 C1=? C0=1 when +-inf
 371         jne     .SVIDzerotoneg
 372         fstp    %st(0)                  / x
 373         fstp    %st(0)                  / stack empty
 374         flds    PIC_L(ninfinity)        / -inf
 375         ret
 376 
 377 1:      / y is not an odd integer
 378         ftst                            / compare %st(0) with 0
 379         fnstsw  %ax                     / store status in %ax
 380         sahf                            / 80387 flags in %ax to 80386 flags
 381         ja      .retpzero               / y > 0
 382         / x = -0 & y < 0 (not odd int)   return +inf (z flag)
 383         / x = -inf & y not 0 or NaN         return +inf (NO z flag)
 384         movb    %dh,%cl
 385         andb    $0x45,%cl
 386         cmpb    $0x05,%cl               / C3=0 C2=1 C1=? C0=1 when +-inf
 387         jne     .SVIDzerotoneg
 388         jmp     .retpinf                / return +inf (NO z flag)
 389 
 390 .retpzero:
 391         fstp    %st(0)                  / x
 392         fstp    %st(0)                  / stack empty
 393         fldz                            / +0
 394         ret
 395 
 396 .retnzero:
 397         fstp    %st(0)                  / x
 398         fstp    %st(0)                  / stack empty
 399         flds    PIC_L(negzero)          / -0
 400         ret
 401 
 402 .retponeorinvalid:
 403         PIC_G_LOAD(movzwl,__xpg6,eax)
 404         andl    $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax
 405         cmpl    $0,%eax
 406         je      1f
 407         fstp    %st(0)                  / x
 408         fstp    %st(0)                  / stack empty
 409         fld1                            / 1
 410         ret
 411 
 412 1:
 413         fstp    %st(0)                  / x
 414         fstp    %st(0)                  / stack empty
 415         flds    PIC_L(Snan)             / Q NaN (i flag)
 416         fwait
 417         ret
 418 
 419 .retpinf:
 420         fstp    %st(0)                  / x
 421         fstp    %st(0)                  / stack empty
 422         flds    PIC_L(pinfinity)        / +inf
 423         ret
 424 
 425 .SVIDzerotoneg:
 426         pushl   $23
 427 .SVIDerr:
 428         / At this point the fp stack contains  y , x  and the number
 429         / of the error case has been pushed on the memory stack.
 430         subl    $16,%esp
 431         fstpl   8(%esp)                 / push y
 432         fstpl   (%esp)                  / push x; NPX stack empty
 433         call    PIC_F(_SVID_libm_err)   / report result/error according to SVID
 434         addl    $20,%esp
 435         ret
 436 
 437 / Set %ecx to 2 if y is an even integer, 1 if y is an odd integer,
 438 / 0 otherwise.  Assume y is not zero.  Do not raise inexact or modify
 439 / %edx.
 440 .y_is_int:
 441         movl    20(%ebp),%eax
 442         andl    $0x7fffffff,%eax        / |y|
 443         cmpl    $0x43400000,%eax
 444         jae     1f                      / |y| >= 2^53, an even int
 445         cmpl    $0x3ff00000,%eax
 446         jb      2f                      / |y| < 1, can't be an int
 447         movl    %eax,%ecx
 448         sarl    $20,%ecx
 449         subl    $0x433,%ecx
 450         negl    %ecx                    / 52 - unbiased exponent of y
 451         movl    16(%ebp),%eax
 452         bsfl    %eax,%eax               / index of least sig. 1 bit
 453         jne     3f                      / jump if 1 bit found
 454         movl    20(%ebp),%eax
 455         bsfl    %eax,%eax
 456         addl    $32,%eax                / 32 + index of least sig. 1 bit
 457 3:
 458         cmpl    %ecx,%eax
 459         jb      2f
 460         ja      1f
 461         movl    $1,%ecx
 462         ret
 463 1:
 464         movl    $2,%ecx
 465         ret
 466 2:
 467         xorl    %ecx,%ecx
 468         ret
 469         .align  4
 470         SET_SIZE(pow)