1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2011, Richard Lowe 14 */ 15 16 #ifndef _FENV_INLINES_H 17 #define _FENV_INLINES_H 18 19 #ifdef __GNUC__ 20 21 #ifdef __cplusplus 22 extern "C" { 23 #endif 24 25 #include <sys/types.h> 26 27 #if defined(__x86) 28 29 /* 30 * Floating point Control Word and Status Word 31 * Definition should actually be shared with x86 32 * (much of this 'amd64' code can be, in fact.) 33 */ 34 union fp_cwsw { 35 uint32_t cwsw; 36 struct { 37 uint16_t cw; 38 uint16_t sw; 39 } words; 40 }; 41 42 extern __inline__ void 43 __fenv_getcwsw(unsigned int *value) 44 { 45 union fp_cwsw *u = (union fp_cwsw *)value; 46 47 __asm__ __volatile__( 48 "fstsw %0\n\t" 49 "fstcw %1\n\t" 50 : "=m" (u->words.cw), "=m" (u->words.sw)); 51 } 52 53 extern __inline__ void 54 __fenv_setcwsw(const unsigned int *value) 55 { 56 union fp_cwsw cwsw; 57 short fenv[16]; 58 59 cwsw.cwsw = *value; 60 61 __asm__ __volatile__( 62 "fstenv %0\n\t" 63 "movw %4,%1\n\t" 64 "movw %3,%2\n\t" 65 "fldenv %0\n\t" 66 "fwait\n\t" 67 : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2]) 68 : "r" (cwsw.words.cw), "r" (cwsw.words.sw) 69 /* For practical purposes, we clobber the whole FPU */ 70 : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", 71 "st(6)", "st(7)"); 72 } 73 74 extern __inline__ void 75 __fenv_getmxcsr(unsigned int *value) 76 { 77 __asm__ __volatile__("stmxcsr %0" : "=m" (*value)); 78 } 79 80 extern __inline__ void 81 __fenv_setmxcsr(const unsigned int *value) 82 { 83 __asm__ __volatile__("ldmxcsr %0" : : "m" (*value)); 84 } 85 86 extern __inline__ long double 87 f2xm1(long double x) 88 { 89 long double ret; 90 91 __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc"); 92 return (ret); 93 } 94 95 extern __inline__ long double 96 fyl2x(long double y, long double x) 97 { 98 long double ret; 99 100 __asm__ __volatile__("fyl2x" 101 : "=t" (ret) 102 : "0" (x), "u" (y) 103 : "st(1)", "cc"); 104 return (ret); 105 } 106 107 extern __inline__ long double 108 fptan(long double x) 109 { 110 /* 111 * fptan pushes 1.0 then the result on completion, so we want to pop 112 * the FP stack twice, so we need a dummy value into which to pop it. 113 */ 114 long double ret; 115 long double dummy; 116 117 __asm__ __volatile__("fptan" 118 : "=t" (dummy), "=u" (ret) 119 : "0" (x) 120 : "cc"); 121 return (ret); 122 } 123 124 extern __inline__ long double 125 fpatan(long double x, long double y) 126 { 127 long double ret; 128 129 __asm__ __volatile__("fpatan" 130 : "=t" (ret) 131 : "0" (y), "u" (x) 132 : "st(1)", "cc"); 133 return (ret); 134 } 135 136 extern __inline__ long double 137 fxtract(long double x) 138 { 139 __asm__ __volatile__("fxtract" : "+t" (x) : : "cc"); 140 return (x); 141 } 142 143 extern __inline__ long double 144 fprem1(long double idend, long double div) 145 { 146 __asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc"); 147 return (div); 148 } 149 150 extern __inline__ long double 151 fprem(long double idend, long double div) 152 { 153 __asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc"); 154 return (div); 155 } 156 157 extern __inline__ long double 158 fyl2xp1(long double y, long double x) 159 { 160 long double ret; 161 162 __asm__ __volatile__("fyl2xp1" 163 : "=t" (ret) 164 : "0" (x), "u" (y) 165 : "st(1)", "cc"); 166 return (ret); 167 } 168 169 extern __inline__ long double 170 fsqrt(long double x) 171 { 172 __asm__ __volatile__("fsqrt" : "+t" (x) : : "cc"); 173 return (x); 174 } 175 176 extern __inline__ long double 177 fsincos(long double x) 178 { 179 __asm__ __volatile__("fsincos" : "+t" (x) : : "cc"); 180 return (x); 181 } 182 183 extern __inline__ long double 184 frndint(long double x) 185 { 186 __asm__ __volatile__("frndint" : "+t" (x) : : "cc"); 187 return (x); 188 } 189 190 extern __inline__ long double 191 fscale(long double x, long double y) 192 { 193 long double ret; 194 195 __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc"); 196 return (ret); 197 } 198 199 extern __inline__ long double 200 fsin(long double x) 201 { 202 __asm__ __volatile__("fsin" : "+t" (x) : : "cc"); 203 return (x); 204 } 205 206 extern __inline__ long double 207 fcos(long double x) 208 { 209 __asm__ __volatile__("fcos" : "+t" (x) : : "cc"); 210 return (x); 211 } 212 213 extern __inline__ void 214 sse_cmpeqss(float *f1, float *f2, int *i1) 215 { 216 __asm__ __volatile__( 217 "cmpeqss %2, %1\n\t" 218 "movss %1, %0" 219 : "=m" (*i1), "+x" (*f1) 220 : "x" (*f2) 221 : "cc"); 222 } 223 224 extern __inline__ void 225 sse_cmpltss(float *f1, float *f2, int *i1) 226 { 227 __asm__ __volatile__( 228 "cmpltss %2, %1\n\t" 229 "movss %1, %0" 230 : "=m" (*i1), "+x" (*f1) 231 : "x" (*f2) 232 : "cc"); 233 } 234 235 extern __inline__ void 236 sse_cmpless(float *f1, float *f2, int *i1) 237 { 238 __asm__ __volatile__( 239 "cmpless %2, %1\n\t" 240 "movss %1, %0" 241 : "=m" (*i1), "+x" (*f1) 242 : "x" (*f2) 243 : "cc"); 244 } 245 246 extern __inline__ void 247 sse_cmpunordss(float *f1, float *f2, int *i1) 248 { 249 __asm__ __volatile__( 250 "cmpunordss %2, %1\n\t" 251 "movss %1, %0" 252 : "=m" (*i1), "+x" (*f1) 253 : "x" (*f2) 254 : "cc"); 255 } 256 257 extern __inline__ void 258 sse_minss(float *f1, float *f2, float *f3) 259 { 260 __asm__ __volatile__( 261 "minss %2, %1\n\t" 262 "movss %1, %0" 263 : "=m" (*f3), "+x" (*f1) 264 : "x" (*f2)); 265 } 266 267 extern __inline__ void 268 sse_maxss(float *f1, float *f2, float *f3) 269 { 270 __asm__ __volatile__( 271 "maxss %2, %1\n\t" 272 "movss %1, %0" 273 : "=m" (*f3), "+x" (*f1) 274 : "x" (*f2)); 275 } 276 277 extern __inline__ void 278 sse_addss(float *f1, float *f2, float *f3) 279 { 280 __asm__ __volatile__( 281 "addss %2, %1\n\t" 282 "movss %1, %0" 283 : "=m" (*f3), "+x" (*f1) 284 : "x" (*f2)); 285 } 286 287 extern __inline__ void 288 sse_subss(float *f1, float *f2, float *f3) 289 { 290 __asm__ __volatile__( 291 "subss %2, %1\n\t" 292 "movss %1, %0" 293 : "=m" (*f3), "+x" (*f1) 294 : "x" (*f2)); 295 } 296 297 extern __inline__ void 298 sse_mulss(float *f1, float *f2, float *f3) 299 { 300 __asm__ __volatile__( 301 "mulss %2, %1\n\t" 302 "movss %1, %0" 303 : "=m" (*f3), "+x" (*f1) 304 : "x" (*f2)); 305 } 306 307 extern __inline__ void 308 sse_divss(float *f1, float *f2, float *f3) 309 { 310 __asm__ __volatile__( 311 "divss %2, %1\n\t" 312 "movss %1, %0" 313 : "=m" (*f3), "+x" (*f1) 314 : "x" (*f2)); 315 } 316 317 extern __inline__ void 318 sse_sqrtss(float *f1, float *f2) 319 { 320 double tmp; 321 322 __asm__ __volatile__( 323 "sqrtss %2, %1\n\t" 324 "movss %1, %0" 325 : "=m" (*f2), "=x" (tmp) 326 : "m" (*f1)); 327 } 328 329 extern __inline__ void 330 sse_ucomiss(float *f1, float *f2) 331 { 332 __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2)); 333 334 } 335 336 extern __inline__ void 337 sse_comiss(float *f1, float *f2) 338 { 339 __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2)); 340 } 341 342 extern __inline__ void 343 sse_cvtss2sd(float *f1, double *d1) 344 { 345 double tmp; 346 347 __asm__ __volatile__( 348 "cvtss2sd %2, %1\n\t" 349 "movsd %1, %0" 350 : "=m" (*d1), "=x" (tmp) 351 : "m" (*f1)); 352 } 353 354 extern __inline__ void 355 sse_cvtsi2ss(int *i1, float *f1) 356 { 357 double tmp; 358 359 __asm__ __volatile__( 360 "cvtsi2ss %2, %1\n\t" 361 "movss %1, %0" 362 : "=m" (*f1), "=x" (tmp) 363 : "m" (*i1)); 364 } 365 366 extern __inline__ void 367 sse_cvttss2si(float *f1, int *i1) 368 { 369 int tmp; 370 371 __asm__ __volatile__( 372 "cvttss2si %2, %1\n\t" 373 "movl %1, %0" 374 : "=m" (*i1), "=r" (tmp) 375 : "m" (*f1)); 376 } 377 378 extern __inline__ void 379 sse_cvtss2si(float *f1, int *i1) 380 { 381 int tmp; 382 383 __asm__ __volatile__( 384 "cvtss2si %2, %1\n\t" 385 "movl %1, %0" 386 : "=m" (*i1), "=r" (tmp) 387 : "m" (*f1)); 388 } 389 390 #if defined(__amd64) 391 extern __inline__ void 392 sse_cvtsi2ssq(long long *ll1, float *f1) 393 { 394 double tmp; 395 396 __asm__ __volatile__( 397 "cvtsi2ssq %2, %1\n\t" 398 "movss %1, %0" 399 : "=m" (*f1), "=x" (tmp) 400 : "m" (*ll1)); 401 } 402 403 extern __inline__ void 404 sse_cvttss2siq(float *f1, long long *ll1) 405 { 406 uint64_t tmp; 407 408 __asm__ __volatile__( 409 "cvttss2siq %2, %1\n\t" 410 "movq %1, %0" 411 : "=m" (*ll1), "=r" (tmp) 412 : "m" (*f1)); 413 } 414 415 extern __inline__ void 416 sse_cvtss2siq(float *f1, long long *ll1) 417 { 418 uint64_t tmp; 419 420 __asm__ __volatile__( 421 "cvtss2siq %2, %1\n\t" 422 "movq %1, %0" 423 : "=m" (*ll1), "=r" (tmp) 424 : "m" (*f1)); 425 } 426 427 #endif 428 429 extern __inline__ void 430 sse_cmpeqsd(double *d1, double *d2, long long *ll1) 431 { 432 __asm__ __volatile__( 433 "cmpeqsd %2,%1\n\t" 434 "movsd %1,%0" 435 : "=m" (*ll1), "=x" (*d1) 436 : "x" (*d2)); 437 } 438 439 extern __inline__ void 440 sse_cmpltsd(double *d1, double *d2, long long *ll1) 441 { 442 __asm__ __volatile__( 443 "cmpltsd %2,%1\n\t" 444 "movsd %1,%0" 445 : "=m" (*ll1), "=x" (*d1) 446 : "x" (*d2)); 447 } 448 449 extern __inline__ void 450 sse_cmplesd(double *d1, double *d2, long long *ll1) 451 { 452 __asm__ __volatile__( 453 "cmplesd %2,%1\n\t" 454 "movsd %1,%0" 455 : "=m" (*ll1), "=x" (*d1) 456 : "x" (*d2)); 457 } 458 459 extern __inline__ void 460 sse_cmpunordsd(double *d1, double *d2, long long *ll1) 461 { 462 __asm__ __volatile__( 463 "cmpunordsd %2,%1\n\t" 464 "movsd %1,%0" 465 : "=m" (*ll1), "=x" (*d1) 466 : "x" (*d2)); 467 } 468 469 470 extern __inline__ void 471 sse_minsd(double *d1, double *d2, double *d3) 472 { 473 __asm__ __volatile__( 474 "minsd %2,%1\n\t" 475 "movsd %1,%0" 476 : "=m" (*d3), "=x" (*d1) 477 : "x" (*d2)); 478 } 479 480 extern __inline__ void 481 sse_maxsd(double *d1, double *d2, double *d3) 482 { 483 __asm__ __volatile__( 484 "maxsd %2,%1\n\t" 485 "movsd %1,%0" 486 : "=m" (*d3), "=x" (*d1) 487 : "x" (*d2)); 488 } 489 490 extern __inline__ void 491 sse_addsd(double *d1, double *d2, double *d3) 492 { 493 __asm__ __volatile__( 494 "addsd %2,%1\n\t" 495 "movsd %1,%0" 496 : "=m" (*d3), "=x" (*d1) 497 : "x" (*d2)); 498 } 499 500 extern __inline__ void 501 sse_subsd(double *d1, double *d2, double *d3) 502 { 503 __asm__ __volatile__( 504 "subsd %2,%1\n\t" 505 "movsd %1,%0" 506 : "=m" (*d3), "=x" (*d1) 507 : "x" (*d2)); 508 } 509 510 extern __inline__ void 511 sse_mulsd(double *d1, double *d2, double *d3) 512 { 513 __asm__ __volatile__( 514 "mulsd %2,%1\n\t" 515 "movsd %1,%0" 516 : "=m" (*d3), "=x" (*d1) 517 : "x" (*d2)); 518 } 519 520 extern __inline__ void 521 sse_divsd(double *d1, double *d2, double *d3) 522 { 523 __asm__ __volatile__( 524 "divsd %2,%1\n\t" 525 "movsd %1,%0" 526 : "=m" (*d3), "=x" (*d1) 527 : "x" (*d2)); 528 } 529 530 extern __inline__ void 531 sse_sqrtsd(double *d1, double *d2) 532 { 533 double tmp; 534 535 __asm__ __volatile__( 536 "sqrtsd %2, %1\n\t" 537 "movsd %1, %0" 538 : "=m" (*d2), "=x" (tmp) 539 : "m" (*d1)); 540 } 541 542 extern __inline__ void 543 sse_ucomisd(double *d1, double *d2) 544 { 545 __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2)); 546 } 547 548 extern __inline__ void 549 sse_comisd(double *d1, double *d2) 550 { 551 __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2)); 552 } 553 554 extern __inline__ void 555 sse_cvtsd2ss(double *d1, float *f1) 556 { 557 double tmp; 558 559 __asm__ __volatile__( 560 "cvtsd2ss %2,%1\n\t" 561 "movss %1,%0" 562 : "=m" (*f1), "=x" (tmp) 563 : "m" (*d1)); 564 } 565 566 extern __inline__ void 567 sse_cvtsi2sd(int *i1, double *d1) 568 { 569 double tmp; 570 __asm__ __volatile__( 571 "cvtsi2sd %2,%1\n\t" 572 "movsd %1,%0" 573 : "=m" (*d1), "=x" (tmp) 574 : "m" (*i1)); 575 } 576 577 extern __inline__ void 578 sse_cvttsd2si(double *d1, int *i1) 579 { 580 int tmp; 581 582 __asm__ __volatile__( 583 "cvttsd2si %2,%1\n\t" 584 "movl %1,%0" 585 : "=m" (*i1), "=r" (tmp) 586 : "m" (*d1)); 587 } 588 589 extern __inline__ void 590 sse_cvtsd2si(double *d1, int *i1) 591 { 592 int tmp; 593 594 __asm__ __volatile__( 595 "cvtsd2si %2,%1\n\t" 596 "movl %1,%0" 597 : "=m" (*i1), "=r" (tmp) 598 : "m" (*d1)); 599 } 600 601 #if defined(__amd64) 602 extern __inline__ void 603 sse_cvtsi2sdq(long long *ll1, double *d1) 604 { 605 double tmp; 606 607 __asm__ __volatile__( 608 "cvtsi2sdq %2,%1\n\t" 609 "movsd %1,%0" 610 : "=m" (*d1), "=x" (tmp) 611 : "m" (*ll1)); 612 } 613 614 extern __inline__ void 615 sse_cvttsd2siq(double *d1, long long *ll1) 616 { 617 uint64_t tmp; 618 619 __asm__ __volatile__( 620 "cvttsd2siq %2,%1\n\t" 621 "movq %1,%0" 622 : "=m" (*ll1), "=r" (tmp) 623 : "m" (*d1)); 624 } 625 626 extern __inline__ void 627 sse_cvtsd2siq(double *d1, long long *ll1) 628 { 629 uint64_t tmp; 630 631 __asm__ __volatile__( 632 "cvtsd2siq %2,%1\n\t" 633 "movq %1,%0" 634 : "=m" (*ll1), "=r" (tmp) 635 : "m" (*d1)); 636 } 637 #endif 638 639 #elif defined(__sparc) 640 extern __inline__ void 641 __fenv_getfsr(unsigned long *l) 642 { 643 __asm__ __volatile__( 644 #if defined(__sparcv9) 645 "stx %%fsr,%0\n\t" 646 #else 647 "st %%fsr,%0\n\t" 648 #endif 649 : "=m" (*l)); 650 } 651 652 extern __inline__ void 653 __fenv_setfsr(const unsigned long *l) 654 { 655 __asm__ __volatile__( 656 #if defined(__sparcv9) 657 "ldx %0,%%fsr\n\t" 658 #else 659 "ld %0,%%fsr\n\t" 660 #endif 661 : : "m" (*l) : "cc"); 662 } 663 664 extern __inline__ void 665 __fenv_getfsr32(unsigned int *l) 666 { 667 __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l)); 668 } 669 670 extern __inline__ void 671 __fenv_setfsr32(const unsigned int *l) 672 { 673 __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l)); 674 } 675 #else 676 #error "GCC FENV inlines not implemented for this platform" 677 #endif 678 679 #ifdef __cplusplus 680 } 681 #endif 682 683 #endif /* __GNUC__ */ 684 685 #endif /* _FENV_INLINES_H */