1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2011, Richard Lowe 14 */ 15 16 #ifndef _FENV_INLINES_H 17 #define _FENV_INLINES_H 18 19 #ifdef __GNUC__ 20 #ifdef __cplusplus 21 extern "C" { 22 #endif 23 24 #include <sys/types.h> 25 26 #if defined(__x86) 27 /* 28 * Floating point Control Word and Status Word 29 * Definition should actually be shared with x86 30 * (much of this 'amd64' code can be, in fact.) 31 */ 32 union fp_cwsw { 33 uint32_t cwsw; 34 struct { 35 uint16_t cw; 36 uint16_t sw; 37 } words; 38 }; 39 40 extern __GNU_INLINE void 41 __fenv_getcwsw(unsigned int *value) 42 { 43 union fp_cwsw *u = (union fp_cwsw *)value; 44 45 __asm__ __volatile__( 46 "fstsw %0\n\t" 47 "fstcw %1\n\t" 48 : "=m" (u->words.cw), "=m" (u->words.sw)); 49 } 50 51 extern __GNU_INLINE void 52 __fenv_setcwsw(const unsigned int *value) 53 { 54 union fp_cwsw cwsw; 55 short fenv[16]; 56 57 cwsw.cwsw = *value; 58 59 __asm__ __volatile__( 60 "fstenv %0\n\t" 61 "movw %4,%1\n\t" 62 "movw %3,%2\n\t" 63 "fldenv %0\n\t" 64 "fwait\n\t" 65 : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2]) 66 : "r" (cwsw.words.cw), "r" (cwsw.words.sw) 67 /* For practical purposes, we clobber the whole FPU */ 68 : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", 69 "st(6)", "st(7)"); 70 } 71 72 extern __GNU_INLINE void 73 __fenv_getmxcsr(unsigned int *value) 74 { 75 __asm__ __volatile__("stmxcsr %0" : "=m" (*value)); 76 } 77 78 extern __GNU_INLINE void 79 __fenv_setmxcsr(const unsigned int *value) 80 { 81 __asm__ __volatile__("ldmxcsr %0" : : "m" (*value)); 82 } 83 84 extern __GNU_INLINE long double 85 f2xm1(long double x) 86 { 87 long double ret; 88 89 __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc"); 90 return (ret); 91 } 92 93 extern __GNU_INLINE long double 94 fyl2x(long double y, long double x) 95 { 96 long double ret; 97 98 __asm__ __volatile__("fyl2x" 99 : "=t" (ret) 100 : "0" (x), "u" (y) 101 : "st(1)", "cc"); 102 return (ret); 103 } 104 105 extern __GNU_INLINE long double 106 fptan(long double x) 107 { 108 /* 109 * fptan pushes 1.0 then the result on completion, so we want to pop 110 * the FP stack twice, so we need a dummy value into which to pop it. 111 */ 112 long double ret; 113 long double dummy; 114 115 __asm__ __volatile__("fptan" 116 : "=t" (dummy), "=u" (ret) 117 : "0" (x) 118 : "cc"); 119 return (ret); 120 } 121 122 extern __GNU_INLINE long double 123 fpatan(long double x, long double y) 124 { 125 long double ret; 126 127 __asm__ __volatile__("fpatan" 128 : "=t" (ret) 129 : "0" (y), "u" (x) 130 : "st(1)", "cc"); 131 return (ret); 132 } 133 134 extern __GNU_INLINE long double 135 fxtract(long double x) 136 { 137 __asm__ __volatile__("fxtract" : "+t" (x) : : "cc"); 138 return (x); 139 } 140 141 extern __GNU_INLINE long double 142 fprem1(long double idend, long double div) 143 { 144 __asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc"); 145 return (div); 146 } 147 148 extern __GNU_INLINE long double 149 fprem(long double idend, long double div) 150 { 151 __asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc"); 152 return (div); 153 } 154 155 extern __GNU_INLINE long double 156 fyl2xp1(long double y, long double x) 157 { 158 long double ret; 159 160 __asm__ __volatile__("fyl2xp1" 161 : "=t" (ret) 162 : "0" (x), "u" (y) 163 : "st(1)", "cc"); 164 return (ret); 165 } 166 167 extern __GNU_INLINE long double 168 fsqrt(long double x) 169 { 170 __asm__ __volatile__("fsqrt" : "+t" (x) : : "cc"); 171 return (x); 172 } 173 174 extern __GNU_INLINE long double 175 fsincos(long double x) 176 { 177 long double dummy; 178 179 __asm__ __volatile__("fsincos" : "+t" (x), "=u" (dummy) : : "cc"); 180 return (x); 181 } 182 183 extern __GNU_INLINE long double 184 frndint(long double x) 185 { 186 __asm__ __volatile__("frndint" : "+t" (x) : : "cc"); 187 return (x); 188 } 189 190 extern __GNU_INLINE long double 191 fscale(long double x, long double y) 192 { 193 long double ret; 194 195 __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc"); 196 return (ret); 197 } 198 199 extern __GNU_INLINE long double 200 fsin(long double x) 201 { 202 __asm__ __volatile__("fsin" : "+t" (x) : : "cc"); 203 return (x); 204 } 205 206 extern __GNU_INLINE long double 207 fcos(long double x) 208 { 209 __asm__ __volatile__("fcos" : "+t" (x) : : "cc"); 210 return (x); 211 } 212 213 extern __GNU_INLINE void 214 sse_cmpeqss(float *f1, float *f2, int *i1) 215 { 216 __asm__ __volatile__( 217 "cmpeqss %2, %1\n\t" 218 "movss %1, %0" 219 : "=m" (*i1), "+x" (*f1) 220 : "x" (*f2) 221 : "cc"); 222 } 223 224 extern __GNU_INLINE void 225 sse_cmpltss(float *f1, float *f2, int *i1) 226 { 227 __asm__ __volatile__( 228 "cmpltss %2, %1\n\t" 229 "movss %1, %0" 230 : "=m" (*i1), "+x" (*f1) 231 : "x" (*f2) 232 : "cc"); 233 } 234 235 extern __GNU_INLINE void 236 sse_cmpless(float *f1, float *f2, int *i1) 237 { 238 __asm__ __volatile__( 239 "cmpless %2, %1\n\t" 240 "movss %1, %0" 241 : "=m" (*i1), "+x" (*f1) 242 : "x" (*f2) 243 : "cc"); 244 } 245 246 extern __GNU_INLINE void 247 sse_cmpunordss(float *f1, float *f2, int *i1) 248 { 249 __asm__ __volatile__( 250 "cmpunordss %2, %1\n\t" 251 "movss %1, %0" 252 : "=m" (*i1), "+x" (*f1) 253 : "x" (*f2) 254 : "cc"); 255 } 256 257 extern __GNU_INLINE void 258 sse_minss(float *f1, float *f2, float *f3) 259 { 260 __asm__ __volatile__( 261 "minss %2, %1\n\t" 262 "movss %1, %0" 263 : "=m" (*f3), "+x" (*f1) 264 : "x" (*f2)); 265 } 266 267 extern __GNU_INLINE void 268 sse_maxss(float *f1, float *f2, float *f3) 269 { 270 __asm__ __volatile__( 271 "maxss %2, %1\n\t" 272 "movss %1, %0" 273 : "=m" (*f3), "+x" (*f1) 274 : "x" (*f2)); 275 } 276 277 extern __GNU_INLINE void 278 sse_addss(float *f1, float *f2, float *f3) 279 { 280 __asm__ __volatile__( 281 "addss %2, %1\n\t" 282 "movss %1, %0" 283 : "=m" (*f3), "+x" (*f1) 284 : "x" (*f2)); 285 } 286 287 extern __GNU_INLINE void 288 sse_subss(float *f1, float *f2, float *f3) 289 { 290 __asm__ __volatile__( 291 "subss %2, %1\n\t" 292 "movss %1, %0" 293 : "=m" (*f3), "+x" (*f1) 294 : "x" (*f2)); 295 } 296 297 extern __GNU_INLINE void 298 sse_mulss(float *f1, float *f2, float *f3) 299 { 300 __asm__ __volatile__( 301 "mulss %2, %1\n\t" 302 "movss %1, %0" 303 : "=m" (*f3), "+x" (*f1) 304 : "x" (*f2)); 305 } 306 307 extern __GNU_INLINE void 308 sse_divss(float *f1, float *f2, float *f3) 309 { 310 __asm__ __volatile__( 311 "divss %2, %1\n\t" 312 "movss %1, %0" 313 : "=m" (*f3), "+x" (*f1) 314 : "x" (*f2)); 315 } 316 317 extern __GNU_INLINE void 318 sse_sqrtss(float *f1, float *f2) 319 { 320 double tmp; 321 322 __asm__ __volatile__( 323 "sqrtss %2, %1\n\t" 324 "movss %1, %0" 325 : "=m" (*f2), "=x" (tmp) 326 : "m" (*f1)); 327 } 328 329 extern __GNU_INLINE void 330 sse_ucomiss(float *f1, float *f2) 331 { 332 __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2)); 333 334 } 335 336 extern __GNU_INLINE void 337 sse_comiss(float *f1, float *f2) 338 { 339 __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2)); 340 } 341 342 extern __GNU_INLINE void 343 sse_cvtss2sd(float *f1, double *d1) 344 { 345 double tmp; 346 347 __asm__ __volatile__( 348 "cvtss2sd %2, %1\n\t" 349 "movsd %1, %0" 350 : "=m" (*d1), "=x" (tmp) 351 : "m" (*f1)); 352 } 353 354 extern __GNU_INLINE void 355 sse_cvtsi2ss(int *i1, float *f1) 356 { 357 double tmp; 358 359 __asm__ __volatile__( 360 "cvtsi2ss %2, %1\n\t" 361 "movss %1, %0" 362 : "=m" (*f1), "=x" (tmp) 363 : "m" (*i1)); 364 } 365 366 extern __GNU_INLINE void 367 sse_cvttss2si(float *f1, int *i1) 368 { 369 int tmp; 370 371 __asm__ __volatile__( 372 "cvttss2si %2, %1\n\t" 373 "movl %1, %0" 374 : "=m" (*i1), "=r" (tmp) 375 : "m" (*f1)); 376 } 377 378 extern __GNU_INLINE void 379 sse_cvtss2si(float *f1, int *i1) 380 { 381 int tmp; 382 383 __asm__ __volatile__( 384 "cvtss2si %2, %1\n\t" 385 "movl %1, %0" 386 : "=m" (*i1), "=r" (tmp) 387 : "m" (*f1)); 388 } 389 390 #if defined(__amd64) 391 extern __GNU_INLINE void 392 sse_cvtsi2ssq(long long *ll1, float *f1) 393 { 394 double tmp; 395 396 __asm__ __volatile__( 397 "cvtsi2ssq %2, %1\n\t" 398 "movss %1, %0" 399 : "=m" (*f1), "=x" (tmp) 400 : "m" (*ll1)); 401 } 402 403 extern __GNU_INLINE void 404 sse_cvttss2siq(float *f1, long long *ll1) 405 { 406 uint64_t tmp; 407 408 __asm__ __volatile__( 409 "cvttss2siq %2, %1\n\t" 410 "movq %1, %0" 411 : "=m" (*ll1), "=r" (tmp) 412 : "m" (*f1)); 413 } 414 415 extern __GNU_INLINE void 416 sse_cvtss2siq(float *f1, long long *ll1) 417 { 418 uint64_t tmp; 419 420 __asm__ __volatile__( 421 "cvtss2siq %2, %1\n\t" 422 "movq %1, %0" 423 : "=m" (*ll1), "=r" (tmp) 424 : "m" (*f1)); 425 } 426 #endif 427 428 extern __GNU_INLINE void 429 sse_cmpeqsd(double *d1, double *d2, long long *ll1) 430 { 431 __asm__ __volatile__( 432 "cmpeqsd %2,%1\n\t" 433 "movsd %1,%0" 434 : "=m" (*ll1), "+x" (*d1) 435 : "x" (*d2)); 436 } 437 438 extern __GNU_INLINE void 439 sse_cmpltsd(double *d1, double *d2, long long *ll1) 440 { 441 __asm__ __volatile__( 442 "cmpltsd %2,%1\n\t" 443 "movsd %1,%0" 444 : "=m" (*ll1), "+x" (*d1) 445 : "x" (*d2)); 446 } 447 448 extern __GNU_INLINE void 449 sse_cmplesd(double *d1, double *d2, long long *ll1) 450 { 451 __asm__ __volatile__( 452 "cmplesd %2,%1\n\t" 453 "movsd %1,%0" 454 : "=m" (*ll1), "+x" (*d1) 455 : "x" (*d2)); 456 } 457 458 extern __GNU_INLINE void 459 sse_cmpunordsd(double *d1, double *d2, long long *ll1) 460 { 461 __asm__ __volatile__( 462 "cmpunordsd %2,%1\n\t" 463 "movsd %1,%0" 464 : "=m" (*ll1), "+x" (*d1) 465 : "x" (*d2)); 466 } 467 468 469 extern __GNU_INLINE void 470 sse_minsd(double *d1, double *d2, double *d3) 471 { 472 __asm__ __volatile__( 473 "minsd %2,%1\n\t" 474 "movsd %1,%0" 475 : "=m" (*d3), "+x" (*d1) 476 : "x" (*d2)); 477 } 478 479 extern __GNU_INLINE void 480 sse_maxsd(double *d1, double *d2, double *d3) 481 { 482 __asm__ __volatile__( 483 "maxsd %2,%1\n\t" 484 "movsd %1,%0" 485 : "=m" (*d3), "+x" (*d1) 486 : "x" (*d2)); 487 } 488 489 extern __GNU_INLINE void 490 sse_addsd(double *d1, double *d2, double *d3) 491 { 492 __asm__ __volatile__( 493 "addsd %2,%1\n\t" 494 "movsd %1,%0" 495 : "=m" (*d3), "+x" (*d1) 496 : "x" (*d2)); 497 } 498 499 extern __GNU_INLINE void 500 sse_subsd(double *d1, double *d2, double *d3) 501 { 502 __asm__ __volatile__( 503 "subsd %2,%1\n\t" 504 "movsd %1,%0" 505 : "=m" (*d3), "+x" (*d1) 506 : "x" (*d2)); 507 } 508 509 extern __GNU_INLINE void 510 sse_mulsd(double *d1, double *d2, double *d3) 511 { 512 __asm__ __volatile__( 513 "mulsd %2,%1\n\t" 514 "movsd %1,%0" 515 : "=m" (*d3), "+x" (*d1) 516 : "x" (*d2)); 517 } 518 519 extern __GNU_INLINE void 520 sse_divsd(double *d1, double *d2, double *d3) 521 { 522 __asm__ __volatile__( 523 "divsd %2,%1\n\t" 524 "movsd %1,%0" 525 : "=m" (*d3), "+x" (*d1) 526 : "x" (*d2)); 527 } 528 529 extern __GNU_INLINE void 530 sse_sqrtsd(double *d1, double *d2) 531 { 532 double tmp; 533 534 __asm__ __volatile__( 535 "sqrtsd %2, %1\n\t" 536 "movsd %1, %0" 537 : "=m" (*d2), "=x" (tmp) 538 : "m" (*d1)); 539 } 540 541 extern __GNU_INLINE void 542 sse_ucomisd(double *d1, double *d2) 543 { 544 __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2)); 545 } 546 547 extern __GNU_INLINE void 548 sse_comisd(double *d1, double *d2) 549 { 550 __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2)); 551 } 552 553 extern __GNU_INLINE void 554 sse_cvtsd2ss(double *d1, float *f1) 555 { 556 double tmp; 557 558 __asm__ __volatile__( 559 "cvtsd2ss %2,%1\n\t" 560 "movss %1,%0" 561 : "=m" (*f1), "=x" (tmp) 562 : "m" (*d1)); 563 } 564 565 extern __GNU_INLINE void 566 sse_cvtsi2sd(int *i1, double *d1) 567 { 568 double tmp; 569 __asm__ __volatile__( 570 "cvtsi2sd %2,%1\n\t" 571 "movsd %1,%0" 572 : "=m" (*d1), "=x" (tmp) 573 : "m" (*i1)); 574 } 575 576 extern __GNU_INLINE void 577 sse_cvttsd2si(double *d1, int *i1) 578 { 579 int tmp; 580 581 __asm__ __volatile__( 582 "cvttsd2si %2,%1\n\t" 583 "movl %1,%0" 584 : "=m" (*i1), "=r" (tmp) 585 : "m" (*d1)); 586 } 587 588 extern __GNU_INLINE void 589 sse_cvtsd2si(double *d1, int *i1) 590 { 591 int tmp; 592 593 __asm__ __volatile__( 594 "cvtsd2si %2,%1\n\t" 595 "movl %1,%0" 596 : "=m" (*i1), "=r" (tmp) 597 : "m" (*d1)); 598 } 599 600 #if defined(__amd64) 601 extern __GNU_INLINE void 602 sse_cvtsi2sdq(long long *ll1, double *d1) 603 { 604 double tmp; 605 606 __asm__ __volatile__( 607 "cvtsi2sdq %2,%1\n\t" 608 "movsd %1,%0" 609 : "=m" (*d1), "=x" (tmp) 610 : "m" (*ll1)); 611 } 612 613 extern __GNU_INLINE void 614 sse_cvttsd2siq(double *d1, long long *ll1) 615 { 616 uint64_t tmp; 617 618 __asm__ __volatile__( 619 "cvttsd2siq %2,%1\n\t" 620 "movq %1,%0" 621 : "=m" (*ll1), "=r" (tmp) 622 : "m" (*d1)); 623 } 624 625 extern __GNU_INLINE void 626 sse_cvtsd2siq(double *d1, long long *ll1) 627 { 628 uint64_t tmp; 629 630 __asm__ __volatile__( 631 "cvtsd2siq %2,%1\n\t" 632 "movq %1,%0" 633 : "=m" (*ll1), "=r" (tmp) 634 : "m" (*d1)); 635 } 636 #endif 637 #elif defined(__sparc) 638 extern __GNU_INLINE void 639 __fenv_getfsr(unsigned long *l) 640 { 641 __asm__ __volatile__( 642 #if defined(__sparcv9) 643 "stx %%fsr,%0\n\t" 644 #else 645 "st %%fsr,%0\n\t" 646 #endif 647 : "=m" (*l)); 648 } 649 650 extern __GNU_INLINE void 651 __fenv_setfsr(const unsigned long *l) 652 { 653 __asm__ __volatile__( 654 #if defined(__sparcv9) 655 "ldx %0,%%fsr\n\t" 656 #else 657 "ld %0,%%fsr\n\t" 658 #endif 659 : : "m" (*l) : "cc"); 660 } 661 662 extern __GNU_INLINE void 663 __fenv_getfsr32(unsigned int *l) 664 { 665 __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l)); 666 } 667 668 extern __GNU_INLINE void 669 __fenv_setfsr32(const unsigned int *l) 670 { 671 __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l)); 672 } 673 #else 674 #error "GCC FENV inlines not implemented for this platform" 675 #endif 676 677 #ifdef __cplusplus 678 } 679 #endif 680 #endif /* __GNUC__ */ 681 #endif /* _FENV_INLINES_H */