il_5261 New usr/src/lib/libm/i386/src/pow.s

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  23  */
  24 /*
  25  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
  26  * Use is subject to license terms.
  27  */
  28 
  29         .file "pow.s"
  30 
  31 / Note: 0^NaN should not signal "invalid" but this implementation
  32 / does because y is placed on the NPX stack.
  33 
  34 / Special cases:
  35 /
  36 / x ** 0 is 1                           _SVID_libm_err if x is 0 or NaN
  37 / 1 ** y is 1                           (C99)
  38 / x ** NaN is NaN
  39 / NaN ** y (except 0) is NaN
  40 / x ** 1 is x
  41 / +-(|x| > 1) **  +inf is +inf
  42 / +-(|x| > 1) **  -inf is +0
  43 / +-(|x| < 1) **  +inf is +0
  44 / +-(|x| < 1) **  -inf is +inf
  45 / (-1) ** +-inf is +1                   (C99)
  46 / +0 ** +y (except 0, NaN)              is +0
  47 / -0 ** +y (except 0, NaN, odd int)     is +0
  48 / -0 ** +y (odd int)                    is -0
  49 / +-0 ** -y (except 0, NaN)             _SVID_libm_err
  50 / +inf ** +y (except 0, NaN)            is +inf
  51 / +inf ** -y (except 0, NaN)            is +0
  52 / -inf ** +-y (except 0, NaN)           is -0 ** -+y (NO z flag)
  53 / x ** -1 is 1/x
  54 / x ** 2 is x*x
  55 / -x ** y (an integer) is (-1)**(y) * (+x)**(y)
  56 / x ** y (x negative & y not integer)       _SVID_libm_err
  57 / if x and y are finite and x**y = 0    _SVID_libm_err (underflow)
  58 / if x and y are finite and x**y = inf  _SVID_libm_err (overflow)
  59 
  60 #include "libm.h"
  61 LIBM_ANSI_PRAGMA_WEAK(pow,function)
  62 #include "libm_protos.h"
  63 #include "xpg6.h"
  64 
  65 #undef fabs
  66 
  67         .data
  68         .align  4
  69 negzero:
  70         .float  -0.0
  71 one:
  72         .float  1.0
  73 negone:
  74         .float  -1.0
  75 two:
  76         .float  2.0
  77 Snan:
  78         .long   0x7f800001
  79 pinfinity:
  80         .long   0x7f800000
  81 ninfinity:
  82         .long   0xff800000
  83 
  84 
  85         ENTRY(pow)
  86         pushl   %ebp
  87         movl    %esp,%ebp
  88         PIC_SETUP(1)
  89 
  90         fldl    8(%ebp)                 / x
  91         fxam                            / determine class of x
  92         fnstsw  %ax                     / store status in %ax
  93         movb    %ah,%dh                 / %dh <- condition code of x
  94 
  95         fldl    16(%ebp)                / y , x
  96         fxam                            / determine class of y
  97         fnstsw  %ax                     / store status in %ax
  98         movb    %ah,%dl                 / %dl <- condition code of y
  99 
 100         call    .pow_main               /// LOCAL
 101         PIC_WRAPUP
 102         leave
 103         ret
 104 
 105 .pow_main:
 106         / x ** 0 is 1 unless x is 0 or a NaN
 107         movb    %dl,%cl
 108         andb    $0x45,%cl
 109         cmpb    $0x40,%cl               / C3=1 C2=0 C1=? C0=0 when +-0
 110         jne     1f
 111         movb    %dh,%cl
 112         andb    $0x45,%cl
 113         cmpb    $0x40,%cl               / C3=1 C2=0 C1=? C0=0 when +-0
 114         jne     2f
 115         / 0^0
 116         pushl   $20
 117         jmp     .SVIDerr                / SVID error handler
 118 2:
 119         cmpb    $0x01,%cl               /// C3=0 C2=0 C1=? C0=1 when +-NaN
 120         jne     2f
 121         / NaN^0
 122         pushl   $42
 123         jmp     .SVIDerr
 124 2:
 125         / (not 0 or NaN)^0
 126         fstp    %st(0)                  / x
 127         fstp    %st(0)                  / stack empty
 128         fld1                            / 1
 129         ret
 130 
 131 1:      / y is not zero
 132         PIC_G_LOAD(movzwl,__xpg6,eax)
 133         andl    $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax
 134         cmpl    $0,%eax
 135         je      1f
 136 
 137         / C99: 1 ** anything is 1
 138         fld1                            / 1, y, x
 139         fucomp  %st(2)                  / y, x
 140         fnstsw  %ax                     / store status in %ax
 141         sahf                            / 80387 flags in %ax to 80386 flags
 142         jp      1f                      / so that pow(NaN1,NaN2) returns NaN2
 143         jne     1f
 144         fstp    %st(0)                  / x
 145         ret
 146 
 147 1:
 148         / x ** NaN is NaN
 149         movb    %dl,%cl
 150         andb    $0x45,%cl
 151         cmpb    $0x01,%cl               / C3=0 C2=0 C1=? C0=1 when +-NaN
 152         jne     1f
 153         fstp    %st(1)                  / y
 154         ret
 155 
 156 1:      / y is not NaN
 157         / NaN ** y (except 0) is NaN
 158         movb    %dh,%cl
 159         andb    $0x45,%cl
 160         cmpb    $0x01,%cl               / C3=0 C2=0 C1=? C0=1 when +-NaN
 161         jne     1f
 162         fstp    %st(0)                  / x
 163         ret
 164 
 165 1:      / x is not NaN
 166         / x ** 1 is x
 167         fcoms   PIC_L(one)              / y , x
 168         fnstsw  %ax                     / store status in %ax
 169         sahf                            / 80387 flags in %ax to 80386 flags
 170         jne     1f
 171         fstp    %st(0)                  / x
 172         ret
 173 
 174 1:      / y is not 1
 175         / +-(|x| > 1) **  +inf is +inf
 176         / +-(|x| > 1) **  -inf is +0
 177         / +-(|x| < 1) **  +inf is +0
 178         / +-(|x| < 1) **  -inf is +inf
 179         / +-(|x| = 1) ** +-inf is NaN
 180         movb    %dl,%cl
 181         andb    $0x47,%cl
 182         cmpb    $0x05,%cl               / C3=0 C2=1 C1=0 C0=1 when +inf
 183         je      .yispinf
 184         cmpb    $0x07,%cl               / C3=0 C2=1 C1=1 C0=1 when -inf
 185         je      .yisninf
 186 
 187         / +0 ** +y (except 0, NaN)              is +0
 188         / -0 ** +y (except 0, NaN, odd int)     is +0
 189         / +0 ** -y (except 0, NaN)              is +inf (z flag)
 190         / -0 ** -y (except 0, NaN, odd int)     is +inf (z flag)
 191         / -0 ** y (odd int)                     is - (+0 ** x)
 192         movb    %dh,%cl
 193         andb    $0x47,%cl
 194         cmpb    $0x40,%cl               / C3=1 C2=0 C1=0 C0=0 when +0
 195         je      .xispzero
 196         cmpb    $0x42,%cl               / C3=1 C2=0 C1=1 C0=0 when -0
 197         je      .xisnzero
 198 
 199         / +inf ** +y (except 0, NaN)    is +inf
 200         / +inf ** -y (except 0, NaN)    is +0
 201         / -inf ** +-y (except 0, NaN)   is -0 ** -+y (NO z flag)
 202         movb    %dh,%cl
 203         andb    $0x47,%cl
 204         cmpb    $0x05,%cl               / C3=0 C2=1 C1=0 C0=1 when +inf
 205         je      .xispinf
 206         cmpb    $0x07,%cl               / C3=0 C2=1 C1=1 C0=1 when -inf
 207         je      .xisninf
 208 
 209         / x ** -1 is 1/x
 210         fcoms   PIC_L(negone)           / y , x
 211         fnstsw  %ax                     / store status in %ax
 212         sahf                            / 80387 flags in %ax to 80386 flags
 213         jne     1f
 214         fld     %st(1)                  / x , y , x
 215         fdivrs  PIC_L(one)              / 1/x , y , x
 216         jmp     .signok                 / check for over/underflow
 217 
 218 1:      / y is not -1
 219         / x ** 2 is x*x
 220         fcoms   PIC_L(two)              / y , x
 221         fnstsw  %ax                     / store status in %ax
 222         sahf                            / 80387 flags in %ax to 80386 flags
 223         jne     1f
 224         fld     %st(1)                  / x , y , x
 225         fld     %st(0)                  / x , x , y , x
 226         fmulp                           / x^2 , y , x
 227         jmp     .signok                 / check for over/underflow
 228 
 229 1:      / y is not 2
 230         / make copies of x & y
 231         fld     %st(1)                  / x , y , x
 232         fld     %st(1)                  / y , x , y , x
 233 
 234         / -x ** y (an integer) is (-1)**(y) * (+x)**(y)
 235         / x ** y (x negative & y not integer) is  NaN
 236         movl    $0,%ecx                 / track whether to flip sign of result
 237         fld     %st(1)                  / x , y , x , y , x
 238         ftst                            / compare %st(0) with 0
 239         fnstsw  %ax                     / store status in %ax
 240         sahf                            / 80387 flags in %ax to 80386 flags
 241         fstp    %st(0)                  / y , x , y , x
 242         ja      .merge                  / x > 0
 243         / x < 0
 244         call    .y_is_int
 245         cmpl    $0,%ecx
 246         jne     1f
 247         / x < 0, y is non-integral
 248         fstp    %st(0)                  / x , y , x
 249         fstp    %st(0)                  / y , x
 250         pushl   $24
 251         jmp     .SVIDerr                / SVID error handler
 252 
 253 1:      / x < 0 & y = int
 254         fxch                            / x , y , y , x
 255         fchs                            / px = -x , y , y , x
 256         fxch                            / y , px , y , x
 257 .merge:
 258         / px > 0
 259         fxch                            / px , y , y , x
 260 
 261         / x**y   =   exp(y*ln(x))
 262         fyl2x                           / t=y*log2(px) , y , x
 263         fld     %st(0)                  / t , t , y , x
 264         frndint                         / [t] , t , y , x
 265         fxch                            / t , [t] , y , x
 266         fucom
 267         fnstsw  %ax                     / store status in %ax
 268         sahf                            / 80387 flags in %ax to 80386 flags
 269         je      1f                      / t is integral
 270         fsub    %st(1),%st              / t-[t] , [t] , y , x
 271         f2xm1                           / 2**(t-[t])-1 , [t] , y , x
 272         fadds   PIC_L(one)              / 2**(t-[t]) , [t] , y , x
 273         fscale                          / 2**t = px**y , [t] , y , x
 274         jmp     2f
 275 1:
 276         fstp    %st(0)                  / t=[t] , y , x
 277         fld1                            / 1 , t , y , x
 278         fscale                          / 1*2**t = x**y , t , y , x
 279 2:
 280         fstp    %st(1)                  / x**y , y , x
 281         cmpl    $1,%ecx
 282         jne     .signok
 283         fchs                            / change sign since x<0 & y=-int
 284 .signok:
 285         subl    $8,%esp
 286         fstpl   (%esp)                  / round to double precision
 287         fldl    (%esp)                  / place result on NPX stack
 288         addl    $8,%esp
 289 
 290         fxam                            / determine class of x**y
 291         fnstsw  %ax                     / store status in %ax
 292         andw    $0x4500,%ax
 293         / check for overflow
 294         cmpw    $0x0500,%ax             / C0=0 C1=1 C2=? C3=1 then +-inf
 295         jne     1f
 296         / x^y overflows
 297         fstp    %st(0)                  / y , x
 298         pushl   $21
 299         jmp     .SVIDerr
 300 1:
 301         / check for underflow
 302         cmpw    $0x4000,%ax             / C0=1 C1=0 C2=? C3=0 then +-0
 303         jne     1f
 304         / x^y underflows
 305         fstp    %st(0)                  / y , x
 306         pushl   $22
 307         jmp     .SVIDerr
 308 1:
 309         fstp    %st(2)                  / y , x**y
 310         fstp    %st(0)                  / x**y
 311         ret
 312 
 313 / ------------------------------------------------------------------------
 314 
 315 .xispinf:
 316         ftst                            / compare %st(0) with 0
 317         fnstsw  %ax                     / store status in %ax
 318         sahf                            / 80387 flags in %ax to 80386 flags
 319         ja      .retpinf                / y > 0
 320         jmp     .retpzero               / y < 0
 321 
 322 .xisninf:
 323         / -inf ** +-y is -0 ** -+y
 324         fchs                            / -y , x
 325         flds    PIC_L(negzero)          / -0 , -y , x
 326         fstp    %st(2)                  / -y , -0
 327         jmp     .xisnzero
 328 
 329 .yispinf:
 330         fld     %st(1)                  / x , y , x
 331         fabs                            / |x| , y , x
 332         fcomps  PIC_L(one)              / y , x
 333         fnstsw  %ax                     / store status in %ax
 334         sahf                            / 80387 flags in %ax to 80386 flags
 335         je      .retponeorinvalid       / x == -1       C99
 336         ja      .retpinf                / |x| > 1
 337         jmp     .retpzero               / |x| < 1
 338 
 339 .yisninf:
 340         fld     %st(1)                  / x , y , x
 341         fabs                            / |x| , y , x
 342         fcomps  PIC_L(one)              / y , x
 343         fnstsw  %ax                     / store status in %ax
 344         sahf                            / 80387 flags in %ax to 80386 flags
 345         je      .retponeorinvalid       / x == -1       C99
 346         ja      .retpzero               / |x| > 1
 347         jmp     .retpinf                / |x| < 1
 348 
 349 .xispzero:
 350         / y cannot be 0 or NaN ; stack has      y , x
 351         ftst                            / compare %st(0) with 0
 352         fnstsw  %ax                     / store status in %ax
 353         sahf                            / 80387 flags in %ax to 80386 flags
 354         ja      .retpzero               / y > 0
 355         / x = +0 & y < 0
 356         jmp     .SVIDzerotoneg
 357 
 358 .xisnzero:
 359         / y cannot be 0 or NaN ; stack has      y , x
 360         call    .y_is_int
 361         cmpl    $1,%ecx
 362         jne     1f                      / y is not an odd integer
 363         / y is an odd integer
 364         ftst                            / compare %st(0) with 0
 365         fnstsw  %ax                     / store status in %ax
 366         sahf                            / 80387 flags in %ax to 80386 flags
 367         ja      .retnzero               / y > 0
 368         / x = -0 & y < 0 (odd int)       return -inf (z flag)
 369         / x = -inf & y != 0 or NaN  return -inf (NO z flag)
 370         movb    %dh,%cl
 371         andb    $0x45,%cl
 372         cmpb    $0x05,%cl               / C3=0 C2=1 C1=? C0=1 when +-inf
 373         jne     .SVIDzerotoneg
 374         fstp    %st(0)                  / x
 375         fstp    %st(0)                  / stack empty
 376         flds    PIC_L(ninfinity)        / -inf
 377         ret
 378 
 379 1:      / y is not an odd integer
 380         ftst                            / compare %st(0) with 0
 381         fnstsw  %ax                     / store status in %ax
 382         sahf                            / 80387 flags in %ax to 80386 flags
 383         ja      .retpzero               / y > 0
 384         / x = -0 & y < 0 (not odd int)   return +inf (z flag)
 385         / x = -inf & y not 0 or NaN         return +inf (NO z flag)
 386         movb    %dh,%cl
 387         andb    $0x45,%cl
 388         cmpb    $0x05,%cl               / C3=0 C2=1 C1=? C0=1 when +-inf
 389         jne     .SVIDzerotoneg
 390         jmp     .retpinf                / return +inf (NO z flag)
 391 
 392 .retpzero:
 393         fstp    %st(0)                  / x
 394         fstp    %st(0)                  / stack empty
 395         fldz                            / +0
 396         ret
 397 
 398 .retnzero:
 399         fstp    %st(0)                  / x
 400         fstp    %st(0)                  / stack empty
 401         flds    PIC_L(negzero)          / -0
 402         ret
 403 
 404 .retponeorinvalid:
 405         PIC_G_LOAD(movzwl,__xpg6,eax)
 406         andl    $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax
 407         cmpl    $0,%eax
 408         je      1f
 409         fstp    %st(0)                  / x
 410         fstp    %st(0)                  / stack empty
 411         fld1                            / 1
 412         ret
 413 
 414 1:
 415         fstp    %st(0)                  / x
 416         fstp    %st(0)                  / stack empty
 417         flds    PIC_L(Snan)             / Q NaN (i flag)
 418         fwait
 419         ret
 420 
 421 .retpinf:
 422         fstp    %st(0)                  / x
 423         fstp    %st(0)                  / stack empty
 424         flds    PIC_L(pinfinity)        / +inf
 425         ret
 426 
 427 .SVIDzerotoneg:
 428         pushl   $23
 429 .SVIDerr:
 430         / At this point the fp stack contains  y , x  and the number
 431         / of the error case has been pushed on the memory stack.
 432         subl    $16,%esp
 433         fstpl   8(%esp)                 / push y
 434         fstpl   (%esp)                  / push x; NPX stack empty
 435         call    PIC_F(_SVID_libm_err)   / report result/error according to SVID
 436         addl    $20,%esp
 437         ret
 438 
 439 / Set %ecx to 2 if y is an even integer, 1 if y is an odd integer,
 440 / 0 otherwise.  Assume y is not zero.  Do not raise inexact or modify
 441 / %edx.
 442 .y_is_int:
 443         movl    20(%ebp),%eax
 444         andl    $0x7fffffff,%eax        / |y|
 445         cmpl    $0x43400000,%eax
 446         jae     1f                      / |y| >= 2^53, an even int
 447         cmpl    $0x3ff00000,%eax
 448         jb      2f                      / |y| < 1, can't be an int
 449         movl    %eax,%ecx
 450         sarl    $20,%ecx
 451         subl    $0x433,%ecx
 452         negl    %ecx                    / 52 - unbiased exponent of y
 453         movl    16(%ebp),%eax
 454         bsfl    %eax,%eax               / index of least sig. 1 bit
 455         jne     3f                      / jump if 1 bit found
 456         movl    20(%ebp),%eax
 457         bsfl    %eax,%eax
 458         addl    $32,%eax                / 32 + index of least sig. 1 bit
 459 3:
 460         cmpl    %ecx,%eax
 461         jb      2f
 462         ja      1f
 463         movl    $1,%ecx
 464         ret
 465 1:
 466         movl    $2,%ecx
 467         ret
 468 2:
 469         xorl    %ecx,%ecx
 470         ret
 471         .align  4
 472         SET_SIZE(pow)