1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 /* 22 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 23 */ 24 /* 25 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 26 * Use is subject to license terms. 27 */ 28 29 .file "powl.s" 30 31 / Special cases: 32 / 33 / x ** 0 is 1 34 / 1 ** y is 1 (C99) 35 / x ** NaN is NaN 36 / NaN ** y (except 0) is NaN 37 / x ** 1 is x 38 / +-(|x| > 1) ** +inf is +inf 39 / +-(|x| > 1) ** -inf is +0 40 / +-(|x| < 1) ** +inf is +0 41 / +-(|x| < 1) ** -inf is +inf 42 / (-1) ** +-inf is +1 (C99) 43 / +0 ** +y (except 0, NaN) is +0 44 / -0 ** +y (except 0, NaN, odd int) is +0 45 / +0 ** -y (except 0, NaN) is +inf (z flag) 46 / -0 ** -y (except 0, NaN, odd int) is +inf (z flag) 47 / -0 ** y (odd int) is - (+0 ** x) 48 / +inf ** +y (except 0, NaN) is +inf 49 / +inf ** -y (except 0, NaN) is +0 50 / -inf ** +-y (except 0, NaN) is -0 ** -+y (NO z flag) 51 / x ** -1 is 1/x 52 / x ** 2 is x*x 53 / -x ** y (an integer) is (-1)**(y) * (+x)**(y) 54 / x ** y (x negative & y not integer) is NaN (i flag) 55 56 #include "libm.h" 57 LIBM_ANSI_PRAGMA_WEAK(powl,function) 58 #include "libm_synonyms.h" 59 #include "xpg6.h" 60 61 #undef fabs 62 63 .data 64 .align 16 65 negzero: 66 .float -0.0 67 half: 68 .float 0.5 69 one: 70 .float 1.0 71 negone: 72 .float -1.0 73 two: 74 .float 2.0 75 Snan: 76 .4byte 0x7f800001 77 pinfinity: 78 .4byte 0x7f800000 79 ninfinity: 80 .4byte 0xff800000 81 82 83 ENTRY(powl) 84 pushq %rbp 85 movq %rsp,%rbp 86 PIC_SETUP(1) 87 88 fldt 16(%rbp) / x 89 fxam / determine class of x 90 fnstsw %ax / store status in %ax 91 movb %ah,%dh / %dh <- condition code of x 92 93 fldt 32(%rbp) / y , x 94 fxam / determine class of y 95 fnstsw %ax / store status in %ax 96 movb %ah,%dl / %dl <- condition code of y 97 98 call .pow_main /// LOCAL 99 PIC_WRAPUP 100 leave 101 ret 102 103 .pow_main: 104 / x ** 0 is 1 105 movb %dl,%cl 106 andb $0x45,%cl 107 cmpb $0x40,%cl / C3=1 C2=0 C1=? C0=0 when +-0 108 jne 1f 109 fstp %st(0) / x 110 fstp %st(0) / stack empty 111 fld1 / 1 112 ret 113 114 1: / y is not zero 115 PIC_G_LOAD(movzwq,__xpg6,rax) 116 andl $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax 117 cmpl $0,%eax 118 je 1f 119 120 / C99: 1 ** anything is 1 121 fld1 / 1, y, x 122 fucomip %st(2),%st / y, x 123 jp 1f / so that pow(NaN1,NaN2) returns NaN2 124 jne 1f 125 fstp %st(0) / x 126 ret 127 128 1: 129 / x ** NaN is NaN 130 movb %dl,%cl 131 andb $0x45,%cl 132 cmpb $0x01,%cl / C3=0 C2=0 C1=? C0=1 when +-NaN 133 jne 1f 134 fstp %st(1) / y 135 ret 136 137 1: / y is not NaN 138 / NaN ** y (except 0) is NaN 139 movb %dh,%cl 140 andb $0x45,%cl 141 cmpb $0x01,%cl / C3=0 C2=0 C1=? C0=1 when +-NaN 142 jne 1f 143 fstp %st(0) / x 144 ret 145 146 1: / x is not NaN 147 / x ** 1 is x 148 fld1 / 1, y, x 149 fcomip %st(1),%st / y, x 150 jne 1f 151 fstp %st(0) / x 152 ret 153 154 1: / y is not 1 155 / +-(|x| > 1) ** +inf is +inf 156 / +-(|x| > 1) ** -inf is +0 157 / +-(|x| < 1) ** +inf is +0 158 / +-(|x| < 1) ** -inf is +inf 159 / +-(|x| = 1) ** +-inf is NaN 160 movb %dl,%cl 161 andb $0x47,%cl 162 cmpb $0x05,%cl / C3=0 C2=1 C1=0 C0=1 when +inf 163 je .yispinf 164 cmpb $0x07,%cl / C3=0 C2=1 C1=1 C0=1 when -inf 165 je .yisninf 166 167 / +0 ** +y (except 0, NaN) is +0 168 / -0 ** +y (except 0, NaN, odd int) is +0 169 / +0 ** -y (except 0, NaN) is +inf (z flag) 170 / -0 ** -y (except 0, NaN, odd int) is +inf (z flag) 171 / -0 ** y (odd int) is - (+0 ** x) 172 movb %dh,%cl 173 andb $0x47,%cl 174 cmpb $0x40,%cl / C3=1 C2=0 C1=0 C0=0 when +0 175 je .xispzero 176 cmpb $0x42,%cl / C3=1 C2=0 C1=1 C0=0 when -0 177 je .xisnzero 178 179 / +inf ** +y (except 0, NaN) is +inf 180 / +inf ** -y (except 0, NaN) is +0 181 / -inf ** +-y (except 0, NaN) is -0 ** -+y (NO z flag) 182 movb %dh,%cl 183 andb $0x47,%cl 184 cmpb $0x05,%cl / C3=0 C2=1 C1=0 C0=1 when +inf 185 je .xispinf 186 cmpb $0x07,%cl / C3=0 C2=1 C1=1 C0=1 when -inf 187 je .xisninf 188 189 / x ** -1 is 1/x 190 flds PIC_L(negone) / -1, y, x 191 fcomip %st(1),%st / y, x 192 jne 1f 193 fld %st(1) / x , y , x 194 fdivrs PIC_L(one) / 1/x , y , x 195 jmp .signok / check for over/underflow 196 197 1: / y is not -1 198 / x ** 2 is x*x 199 flds PIC_L(two) / 2, y , x 200 fcomip %st(1),%st / y, x 201 jne 1f 202 fld %st(1) / x , y , x 203 fld %st(0) / x , x , y , x 204 fmulp / x^2 , y , x 205 jmp .signok / check for over/underflow 206 207 1: / y is not 2 208 / x ** 1/2 is sqrt(x) 209 flds PIC_L(half) / 1/2, y , x 210 fcomip %st(1),%st / y, x 211 jne 1f 212 fld %st(1) / x , y , x 213 fsqrt / sqrt(x) , y , x 214 jmp .signok / check for over/underflow 215 216 1: / y is not 1/2 217 / make copies of x & y 218 fld %st(1) / x , y , x 219 fld %st(1) / y , x , y , x 220 221 / -x ** y (an integer) is (-1)**(y) * (+x)**(y) 222 / x ** y (x negative & y not integer) is NaN 223 movl $0,%ecx / track whether to flip sign of result 224 fldz / 0 , y , x , y , x 225 fcomip %st(2),%st / compare 0 with %st(2) 226 jb .merge / 0 < x 227 / x < 0 228 call .y_is_int 229 cmpl $0,%ecx 230 jne 1f 231 / x < 0 & y != int so x**y = NaN (i flag) 232 fstp %st(0) / x , y , x 233 fstp %st(0) / y , x 234 fstp %st(0) / x 235 fstp %st(0) / stack empty 236 fldz 237 fdiv %st,%st(0) / 0/0 238 ret 239 240 1: / x < 0 & y = int 241 fxch / x , y , y , x 242 fchs / px = -x , y , y , x 243 fxch / y , px , y , x 244 .merge: 245 / px > 0 246 fxch / px , y , y , x 247 248 / x**y = exp(y*ln(x)) 249 fyl2x / t=y*log2(px) , y , x 250 fld %st(0) / t , t , y , x 251 frndint / [t] , t , y , x 252 fxch / t , [t] , y , x 253 fucomi %st(1),%st 254 je 1f / t is integral 255 fsub %st(1),%st / t-[t] , [t] , y , x 256 f2xm1 / 2**(t-[t])-1 , [t] , y , x 257 fadds PIC_L(one) / 2**(t-[t]) , [t] , y , x 258 fscale / 2**t = px**y , [t] , y , x 259 jmp 2f 260 1: 261 fstp %st(0) / t=[t] , y , x 262 fld1 / 1 , t , y , x 263 fscale / 1*2**t = x**y , t , y , x 264 2: 265 fstp %st(1) / x**y , y , x 266 cmpl $1,%ecx 267 jne .signok 268 fchs / change sign since x<0 & y=-int 269 .signok: 270 fstp %st(2) / y , x**y 271 fstp %st(0) / x**y 272 ret 273 274 / ------------------------------------------------------------------------ 275 276 .xispinf: 277 fldz 278 fcomip %st(1),%st / compare 0 with %st(1) 279 jb .retpinf / 0 < y 280 jmp .retpzero / y < 0 281 282 .xisninf: 283 / -inf ** +-y is -0 ** -+y 284 fchs / -y , x 285 flds PIC_L(negzero) / -0 , -y , x 286 fstp %st(2) / -y , -0 287 jmp .xisnzero 288 289 .yispinf: 290 fld %st(1) / x , y , x 291 fabs / |x| , y , x 292 flds PIC_L(one) / 1 , |x| , y , x 293 fcomip %st(1),%st / |x| , y , x 294 fstp %st(0) / y , x 295 je .retponeorinvalid / x == -1 C99 296 jb .retpinf / 1 < |x| 297 jmp .retpzero / |x| < 1 298 299 .yisninf: 300 fld %st(1) / x , y , x 301 fabs / |x| , y , x 302 flds PIC_L(one) / 1 , |x| , y , x 303 fcomip %st(1),%st / |x| , y , x 304 fstp %st(0) / y , x 305 je .retponeorinvalid / x == -1 C99 306 jb .retpzero / 1 < |x| 307 jmp .retpinf / |x| < 1 308 309 .xispzero: 310 / y cannot be 0 or NaN ; stack has y , x 311 fldz / 0 , y , x 312 fcomip %st(1),%st / compare 0 with %st(1) 313 jb .retpzero / 0 < y 314 / x = +0 & y < 0 so x**y = +inf 315 jmp .retpinfzflag / ret +inf & z flag 316 317 .xisnzero: 318 / y cannot be 0 or NaN ; stack has y , x 319 call .y_is_int 320 cmpl $1,%ecx 321 jne 1f / y is not an odd integer 322 / y is an odd integer 323 fldz 324 fcomip %st(1),%st / compare 0 with %st(1) 325 jb .retnzero / 0 < y 326 / x = -0 & y < 0 (odd int) return -inf (z flag) 327 / x = -inf & y != 0 or NaN return -inf (NO z flag) 328 movb %dh,%cl 329 andb $0x45,%cl 330 cmpb $0x05,%cl / C3=0 C2=1 C1=? C0=1 when +-inf 331 je 2f 332 fdiv %st,%st(1) / y / x, x (raise z flag) 333 2: 334 fstp %st(0) / x 335 fstp %st(0) / stack empty 336 flds PIC_L(ninfinity) / -inf 337 ret 338 339 1: / y is not an odd integer 340 fldz 341 fcomip %st(1),%st / compare 0 with %st(1) 342 jb .retpzero / 0 < y 343 / x = -0 & y < 0 (not odd int) return +inf (z flag) 344 / x = -inf & y not 0 or NaN return +inf (NO z flag) 345 movb %dh,%cl 346 andb $0x45,%cl 347 cmpb $0x05,%cl / C3=0 C2=1 C1=? C0=1 when +-inf 348 jne .retpinfzflag / ret +inf & divide-by-0 flag 349 jmp .retpinf / return +inf (NO z flag) 350 351 .retpzero: 352 fstp %st(0) / x 353 fstp %st(0) / stack empty 354 fldz / +0 355 ret 356 357 .retnzero: 358 fstp %st(0) / x 359 fstp %st(0) / stack empty 360 flds PIC_L(negzero) / -0 361 ret 362 363 .retponeorinvalid: 364 PIC_G_LOAD(movzwq,__xpg6,rax) 365 andl $_C99SUSv3_pow_treats_Inf_as_an_even_int,%eax 366 cmpl $0,%eax 367 je 1f 368 fstp %st(0) / x 369 fstp %st(0) / stack empty 370 fld1 / 1 371 ret 372 373 1: 374 fstp %st(0) / x 375 fstp %st(0) / stack empty 376 flds PIC_L(Snan) / Q NaN (i flag) 377 fwait 378 ret 379 380 .retpinf: 381 fstp %st(0) / x 382 fstp %st(0) / stack empty 383 flds PIC_L(pinfinity) / +inf 384 ret 385 386 .retpinfzflag: 387 fstp %st(0) / x 388 fstp %st(0) / stack empty 389 fldz 390 fdivrs PIC_L(one) / 1/0 391 ret 392 393 / Set %ecx to 2 if y is an even integer, 1 if y is an odd integer, 394 / 0 otherwise. Assume y is not zero. Do not raise inexact or modify 395 / %edx. 396 .y_is_int: 397 movl 40(%rbp),%eax 398 andl $0x7fff,%eax / exponent of y 399 cmpl $0x403f,%eax 400 jae 1f / |y| >= 2^64, an even int 401 cmpl $0x3fff,%eax 402 jb 2f / |y| < 1, can't be an int 403 movl %eax,%ecx 404 subl $0x403e,%ecx 405 negl %ecx / 63 - unbiased exponent of y 406 movq 32(%rbp),%rax 407 bsfq %rax,%rax / index of least sig. 1 bit 408 cmpl %ecx,%eax 409 jb 2f 410 ja 1f 411 movl $1,%ecx 412 ret 413 1: 414 movl $2,%ecx 415 ret 416 2: 417 xorl %ecx,%ecx 418 ret 419 .align 16 420 SET_SIZE(powl)