1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file "pow.s"
  30 
  31 / Note: 0^NaN should not signal "invalid" but this implementation
  32 / does because y is placed on the NPX stack.
  33 
  34 / Special cases:
  35 /
  36 / x ** 0 is 1                           _SVID_libm_err if x is 0 or NaN
  37 / 1 ** y is 1                           (C99)
  38 / x ** NaN is NaN
  39 / NaN ** y (except 0) is NaN
  40 / x ** 1 is x
  41 / +-(|x| > 1) **  +inf is +inf
  42 / +-(|x| > 1) **  -inf is +0
  43 / +-(|x| < 1) **  +inf is +0
  44 / +-(|x| < 1) **  -inf is +inf
  45 / (-1) ** +-inf is +1                   (C99)
  46 / +0 ** +y (except 0, NaN)              is +0
  47 / -0 ** +y (except 0, NaN, odd int)     is +0
  48 / -0 ** +y (odd int)                    is -0
  49 / +-0 ** -y (except 0, NaN)             _SVID_libm_err
  50 / +inf ** +y (except 0, NaN)            is +inf
  51 / +inf ** -y (except 0, NaN)            is +0
  52 / -inf ** +-y (except 0, NaN)           is -0 ** -+y (NO z flag)
  53 / x ** -1 is 1/x
  54 / x ** 2 is x*x
  55 / -x ** y (an integer) is (-1)**(y) * (+x)**(y)
  56 / x ** y (x negative & y not integer)       _SVID_libm_err
  57 / if x and y are finite and x**y = 0    _SVID_libm_err (underflow)
  58 / if x and y are finite and x**y = inf  _SVID_libm_err (overflow)
  59 
  60 #include "libm.h"
  61 LIBM_ANSI_PRAGMA_WEAK(pow,function)
  62 #include "libm_synonyms.h"
  63 #include "libm_protos.h"
  64 #include "xpg6.h"
  65 
  66 #undef fabs
  67 
  68         .data
  69         .align  4
  70 negzero:
  71         .float  -0.0
  72 one:
  73         .float  1.0
  74 negone:
  75         .float  -1.0
  76 two:
  77         .float  2.0
  78 Snan:
  79         .long   0x7f800001
  80 pinfinity:
  81         .long   0x7f800000
  82 ninfinity:
  83         .long   0xff800000
  84 
  85 
  86         ENTRY(pow)
  87         pushl   %ebp
  88         movl    %esp,%ebp
  89         PIC_SETUP(1)
  90 
  91         fldl    8(%ebp)                 / x
  92         fxam                            / determine class of x
  93         fnstsw  %ax                     / store status in %ax
  94         movb    %ah,%dh                 / %dh <- condition code of x
  95 
  96         fldl    16(%ebp)                / y , x
  97         fxam                            / determine class of y
  98         fnstsw  %ax                     / store status in %ax
  99         movb    %ah,%dl                 / %dl <- condition code of y
 100 
 101         call    .pow_main               /// LOCAL
 102         PIC_WRAPUP
 103         leave
 104         ret
 105 
 106 .pow_main:
 107         / x ** 0 is 1 unless x is 0 or a NaN
 108         movb    %dl,%cl
 109         andb    $0x45,%cl
 110         cmpb    $0x40,%cl               / C3=1 C2=0 C1=? C0=0 when +-0
 111         jne     1f
 112         movb    %dh,%cl
 113         andb    $0x45,%cl
 114         cmpb    $0x40,%cl               / C3=1 C2=0 C1=? C0=0 when +-0
 115         jne     2f
 116         / 0^0
 117         pushl   $20
 118         jmp     .SVIDerr                / SVID error handler
 119 2:
 120         cmpb    $0x01,%cl               /// C3=0 C2=0 C1=? C0=1 when +-NaN
 121         jne     2f
 122         / NaN^0
 123         pushl   $42
 124         jmp     .SVIDerr
 125 2:
 126         / (not 0 or NaN)^0
 127         fstp    %st(0)                  / x
 128         fstp    %st(0)                  / stack empty
 129         fld1                            / 1
 130         ret
 131 
 132 1:      / y is not zero
 133         PIC_G_LOAD(movzwl,__xpg6,eax)
 134         andl    $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax
 135         cmpl    $0,%eax
 136         je      1f
 137 
 138         / C99: 1 ** anything is 1
 139         fld1                            / 1, y, x
 140         fucomp  %st(2)                  / y, x
 141         fnstsw  %ax                     / store status in %ax
 142         sahf                            / 80387 flags in %ax to 80386 flags
 143         jp      1f                      / so that pow(NaN1,NaN2) returns NaN2
 144         jne     1f
 145         fstp    %st(0)                  / x
 146         ret
 147 
 148 1:
 149         / x ** NaN is NaN
 150         movb    %dl,%cl
 151         andb    $0x45,%cl
 152         cmpb    $0x01,%cl               / C3=0 C2=0 C1=? C0=1 when +-NaN
 153         jne     1f
 154         fstp    %st(1)                  / y
 155         ret
 156 
 157 1:      / y is not NaN
 158         / NaN ** y (except 0) is NaN
 159         movb    %dh,%cl
 160         andb    $0x45,%cl
 161         cmpb    $0x01,%cl               / C3=0 C2=0 C1=? C0=1 when +-NaN
 162         jne     1f
 163         fstp    %st(0)                  / x
 164         ret
 165 
 166 1:      / x is not NaN
 167         / x ** 1 is x
 168         fcoms   PIC_L(one)              / y , x
 169         fnstsw  %ax                     / store status in %ax
 170         sahf                            / 80387 flags in %ax to 80386 flags
 171         jne     1f
 172         fstp    %st(0)                  / x
 173         ret
 174 
 175 1:      / y is not 1
 176         / +-(|x| > 1) **  +inf is +inf
 177         / +-(|x| > 1) **  -inf is +0
 178         / +-(|x| < 1) **  +inf is +0
 179         / +-(|x| < 1) **  -inf is +inf
 180         / +-(|x| = 1) ** +-inf is NaN
 181         movb    %dl,%cl
 182         andb    $0x47,%cl
 183         cmpb    $0x05,%cl               / C3=0 C2=1 C1=0 C0=1 when +inf
 184         je      .yispinf
 185         cmpb    $0x07,%cl               / C3=0 C2=1 C1=1 C0=1 when -inf
 186         je      .yisninf
 187 
 188         / +0 ** +y (except 0, NaN)              is +0
 189         / -0 ** +y (except 0, NaN, odd int)     is +0
 190         / +0 ** -y (except 0, NaN)              is +inf (z flag)
 191         / -0 ** -y (except 0, NaN, odd int)     is +inf (z flag)
 192         / -0 ** y (odd int)                     is - (+0 ** x)
 193         movb    %dh,%cl
 194         andb    $0x47,%cl
 195         cmpb    $0x40,%cl               / C3=1 C2=0 C1=0 C0=0 when +0
 196         je      .xispzero
 197         cmpb    $0x42,%cl               / C3=1 C2=0 C1=1 C0=0 when -0
 198         je      .xisnzero
 199 
 200         / +inf ** +y (except 0, NaN)    is +inf
 201         / +inf ** -y (except 0, NaN)    is +0
 202         / -inf ** +-y (except 0, NaN)   is -0 ** -+y (NO z flag)
 203         movb    %dh,%cl
 204         andb    $0x47,%cl
 205         cmpb    $0x05,%cl               / C3=0 C2=1 C1=0 C0=1 when +inf
 206         je      .xispinf
 207         cmpb    $0x07,%cl               / C3=0 C2=1 C1=1 C0=1 when -inf
 208         je      .xisninf
 209 
 210         / x ** -1 is 1/x
 211         fcoms   PIC_L(negone)           / y , x
 212         fnstsw  %ax                     / store status in %ax
 213         sahf                            / 80387 flags in %ax to 80386 flags
 214         jne     1f
 215         fld     %st(1)                  / x , y , x
 216         fdivrs  PIC_L(one)              / 1/x , y , x
 217         jmp     .signok                 / check for over/underflow
 218 
 219 1:      / y is not -1
 220         / x ** 2 is x*x
 221         fcoms   PIC_L(two)              / y , x
 222         fnstsw  %ax                     / store status in %ax
 223         sahf                            / 80387 flags in %ax to 80386 flags
 224         jne     1f
 225         fld     %st(1)                  / x , y , x
 226         fld     %st(0)                  / x , x , y , x
 227         fmulp                           / x^2 , y , x
 228         jmp     .signok                 / check for over/underflow
 229 
 230 1:      / y is not 2
 231         / make copies of x & y
 232         fld     %st(1)                  / x , y , x
 233         fld     %st(1)                  / y , x , y , x
 234 
 235         / -x ** y (an integer) is (-1)**(y) * (+x)**(y)
 236         / x ** y (x negative & y not integer) is  NaN
 237         movl    $0,%ecx                 / track whether to flip sign of result
 238         fld     %st(1)                  / x , y , x , y , x
 239         ftst                            / compare %st(0) with 0
 240         fnstsw  %ax                     / store status in %ax
 241         sahf                            / 80387 flags in %ax to 80386 flags
 242         fstp    %st(0)                  / y , x , y , x
 243         ja      .merge                  / x > 0
 244         / x < 0
 245         call    .y_is_int
 246         cmpl    $0,%ecx
 247         jne     1f
 248         / x < 0, y is non-integral
 249         fstp    %st(0)                  / x , y , x
 250         fstp    %st(0)                  / y , x
 251         pushl   $24
 252         jmp     .SVIDerr                / SVID error handler
 253 
 254 1:      / x < 0 & y = int
 255         fxch                            / x , y , y , x
 256         fchs                            / px = -x , y , y , x
 257         fxch                            / y , px , y , x
 258 .merge:
 259         / px > 0
 260         fxch                            / px , y , y , x
 261 
 262         / x**y   =   exp(y*ln(x))
 263         fyl2x                           / t=y*log2(px) , y , x
 264         fld     %st(0)                  / t , t , y , x
 265         frndint                         / [t] , t , y , x
 266         fxch                            / t , [t] , y , x
 267         fucom
 268         fnstsw  %ax                     / store status in %ax
 269         sahf                            / 80387 flags in %ax to 80386 flags
 270         je      1f                      / t is integral
 271         fsub    %st(1),%st              / t-[t] , [t] , y , x
 272         f2xm1                           / 2**(t-[t])-1 , [t] , y , x
 273         fadds   PIC_L(one)              / 2**(t-[t]) , [t] , y , x
 274         fscale                          / 2**t = px**y , [t] , y , x
 275         jmp     2f
 276 1:
 277         fstp    %st(0)                  / t=[t] , y , x
 278         fld1                            / 1 , t , y , x
 279         fscale                          / 1*2**t = x**y , t , y , x
 280 2:
 281         fstp    %st(1)                  / x**y , y , x
 282         cmpl    $1,%ecx
 283         jne     .signok
 284         fchs                            / change sign since x<0 & y=-int
 285 .signok:
 286         subl    $8,%esp
 287         fstpl   (%esp)                  / round to double precision
 288         fldl    (%esp)                  / place result on NPX stack
 289         addl    $8,%esp
 290 
 291         fxam                            / determine class of x**y
 292         fnstsw  %ax                     / store status in %ax
 293         andw    $0x4500,%ax
 294         / check for overflow
 295         cmpw    $0x0500,%ax             / C0=0 C1=1 C2=? C3=1 then +-inf
 296         jne     1f
 297         / x^y overflows
 298         fstp    %st(0)                  / y , x
 299         pushl   $21
 300         jmp     .SVIDerr
 301 1:
 302         / check for underflow
 303         cmpw    $0x4000,%ax             / C0=1 C1=0 C2=? C3=0 then +-0
 304         jne     1f
 305         / x^y underflows
 306         fstp    %st(0)                  / y , x
 307         pushl   $22
 308         jmp     .SVIDerr
 309 1:
 310         fstp    %st(2)                  / y , x**y
 311         fstp    %st(0)                  / x**y
 312         ret
 313 
 314 / ------------------------------------------------------------------------
 315 
 316 .xispinf:
 317         ftst                            / compare %st(0) with 0
 318         fnstsw  %ax                     / store status in %ax
 319         sahf                            / 80387 flags in %ax to 80386 flags
 320         ja      .retpinf                / y > 0
 321         jmp     .retpzero               / y < 0
 322 
 323 .xisninf:
 324         / -inf ** +-y is -0 ** -+y
 325         fchs                            / -y , x
 326         flds    PIC_L(negzero)          / -0 , -y , x
 327         fstp    %st(2)                  / -y , -0
 328         jmp     .xisnzero
 329 
 330 .yispinf:
 331         fld     %st(1)                  / x , y , x
 332         fabs                            / |x| , y , x
 333         fcomps  PIC_L(one)              / y , x
 334         fnstsw  %ax                     / store status in %ax
 335         sahf                            / 80387 flags in %ax to 80386 flags
 336         je      .retponeorinvalid       / x == -1       C99
 337         ja      .retpinf                / |x| > 1
 338         jmp     .retpzero               / |x| < 1
 339 
 340 .yisninf:
 341         fld     %st(1)                  / x , y , x
 342         fabs                            / |x| , y , x
 343         fcomps  PIC_L(one)              / y , x
 344         fnstsw  %ax                     / store status in %ax
 345         sahf                            / 80387 flags in %ax to 80386 flags
 346         je      .retponeorinvalid       / x == -1       C99
 347         ja      .retpzero               / |x| > 1
 348         jmp     .retpinf                / |x| < 1
 349 
 350 .xispzero:
 351         / y cannot be 0 or NaN ; stack has      y , x
 352         ftst                            / compare %st(0) with 0
 353         fnstsw  %ax                     / store status in %ax
 354         sahf                            / 80387 flags in %ax to 80386 flags
 355         ja      .retpzero               / y > 0
 356         / x = +0 & y < 0
 357         jmp     .SVIDzerotoneg
 358 
 359 .xisnzero:
 360         / y cannot be 0 or NaN ; stack has      y , x
 361         call    .y_is_int
 362         cmpl    $1,%ecx
 363         jne     1f                      / y is not an odd integer
 364         / y is an odd integer
 365         ftst                            / compare %st(0) with 0
 366         fnstsw  %ax                     / store status in %ax
 367         sahf                            / 80387 flags in %ax to 80386 flags
 368         ja      .retnzero               / y > 0
 369         / x = -0 & y < 0 (odd int)       return -inf (z flag)
 370         / x = -inf & y != 0 or NaN  return -inf (NO z flag)
 371         movb    %dh,%cl
 372         andb    $0x45,%cl
 373         cmpb    $0x05,%cl               / C3=0 C2=1 C1=? C0=1 when +-inf
 374         jne     .SVIDzerotoneg
 375         fstp    %st(0)                  / x
 376         fstp    %st(0)                  / stack empty
 377         flds    PIC_L(ninfinity)        / -inf
 378         ret
 379 
 380 1:      / y is not an odd integer
 381         ftst                            / compare %st(0) with 0
 382         fnstsw  %ax                     / store status in %ax
 383         sahf                            / 80387 flags in %ax to 80386 flags
 384         ja      .retpzero               / y > 0
 385         / x = -0 & y < 0 (not odd int)   return +inf (z flag)
 386         / x = -inf & y not 0 or NaN         return +inf (NO z flag)
 387         movb    %dh,%cl
 388         andb    $0x45,%cl
 389         cmpb    $0x05,%cl               / C3=0 C2=1 C1=? C0=1 when +-inf
 390         jne     .SVIDzerotoneg
 391         jmp     .retpinf                / return +inf (NO z flag)
 392 
 393 .retpzero:
 394         fstp    %st(0)                  / x
 395         fstp    %st(0)                  / stack empty
 396         fldz                            / +0
 397         ret
 398 
 399 .retnzero:
 400         fstp    %st(0)                  / x
 401         fstp    %st(0)                  / stack empty
 402         flds    PIC_L(negzero)          / -0
 403         ret
 404 
 405 .retponeorinvalid:
 406         PIC_G_LOAD(movzwl,__xpg6,eax)
 407         andl    $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax
 408         cmpl    $0,%eax
 409         je      1f
 410         fstp    %st(0)                  / x
 411         fstp    %st(0)                  / stack empty
 412         fld1                            / 1
 413         ret
 414 
 415 1:
 416         fstp    %st(0)                  / x
 417         fstp    %st(0)                  / stack empty
 418         flds    PIC_L(Snan)             / Q NaN (i flag)
 419         fwait
 420         ret
 421 
 422 .retpinf:
 423         fstp    %st(0)                  / x
 424         fstp    %st(0)                  / stack empty
 425         flds    PIC_L(pinfinity)        / +inf
 426         ret
 427 
 428 .SVIDzerotoneg:
 429         pushl   $23
 430 .SVIDerr:
 431         / At this point the fp stack contains  y , x  and the number
 432         / of the error case has been pushed on the memory stack.
 433         subl    $16,%esp
 434         fstpl   8(%esp)                 / push y
 435         fstpl   (%esp)                  / push x; NPX stack empty
 436         call    PIC_F(_SVID_libm_err)   / report result/error according to SVID
 437         addl    $20,%esp
 438         ret
 439 
 440 / Set %ecx to 2 if y is an even integer, 1 if y is an odd integer,
 441 / 0 otherwise.  Assume y is not zero.  Do not raise inexact or modify
 442 / %edx.
 443 .y_is_int:
 444         movl    20(%ebp),%eax
 445         andl    $0x7fffffff,%eax        / |y|
 446         cmpl    $0x43400000,%eax
 447         jae     1f                      / |y| >= 2^53, an even int
 448         cmpl    $0x3ff00000,%eax
 449         jb      2f                      / |y| < 1, can't be an int
 450         movl    %eax,%ecx
 451         sarl    $20,%ecx
 452         subl    $0x433,%ecx
 453         negl    %ecx                    / 52 - unbiased exponent of y
 454         movl    16(%ebp),%eax
 455         bsfl    %eax,%eax               / index of least sig. 1 bit
 456         jne     3f                      / jump if 1 bit found
 457         movl    20(%ebp),%eax
 458         bsfl    %eax,%eax
 459         addl    $32,%eax                / 32 + index of least sig. 1 bit
 460 3:
 461         cmpl    %ecx,%eax
 462         jb      2f
 463         ja      1f
 464         movl    $1,%ecx
 465         ret
 466 1:
 467         movl    $2,%ecx
 468         ret
 469 2:
 470         xorl    %ecx,%ecx
 471         ret
 472         .align  4
 473         SET_SIZE(pow)