1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2011, Richard Lowe 14 */ 15 16 #ifndef _FENV_INLINES_H 17 #define _FENV_INLINES_H 18 19 #ifdef __GNUC__ 20 21 #ifdef __cplusplus 22 extern "C" { 23 #endif 24 25 #include <sys/types.h> 26 27 #if defined(__x86) 28 29 /* 30 * Floating point Control Word and Status Word 31 * Definition should actually be shared with x86 32 * (much of this 'amd64' code can be, in fact.) 33 */ 34 union fp_cwsw { 35 uint32_t cwsw; 36 struct { 37 uint16_t cw; 38 uint16_t sw; 39 } words; 40 }; 41 42 extern __inline__ void 43 __fenv_getcwsw(unsigned int *value) 44 { 45 union fp_cwsw ret; 46 47 __asm__ __volatile__( 48 "fstsw %0\n\t" 49 "fstcw %1\n\t" 50 : "=m" (ret.words.cw), "=m" (ret.words.sw)); 51 *value = ret.cwsw; 52 } 53 54 extern __inline__ void 55 __fenv_setcwsw(const unsigned int *value) 56 { 57 union fp_cwsw cwsw; 58 short fenv[16]; 59 60 cwsw.cwsw = *value; 61 62 __asm__ __volatile__( 63 "fstenv %0\n\t" 64 "movw %4,%1\n\t" 65 "movw %3,%2\n\t" 66 "fldenv %0\n\t" 67 "fwait\n\t" 68 : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2]) 69 : "d" (cwsw.words.cw), "c" (cwsw.words.sw) 70 /* For practical purposes, we clobber the whole FPU */ 71 : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", 72 "st(6)", "st(7)"); 73 } 74 75 extern __inline__ void 76 __fenv_getmxcsr(unsigned int *value) 77 { 78 __asm__ __volatile__("stmxcsr %1" : "+m" (*value)); 79 } 80 81 extern __inline__ void 82 __fenv_setmxcsr(const unsigned int *value) 83 { 84 __asm__ __volatile__("ldmxcsr %0" : : "m" (*value)); 85 } 86 87 extern __inline__ long double 88 f2xm1(long double x) 89 { 90 long double ret; 91 92 __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x)); 93 return (ret); 94 } 95 96 extern __inline__ long double 97 fyl2x(long double y, long double x) 98 { 99 long double ret; 100 101 __asm__ __volatile__("fyl2x" : "=t" (ret): "0" (x), "u" (y) : "st(1)"); 102 return (ret); 103 } 104 105 extern __inline__ long double 106 fptan(long double x) 107 { 108 /* 109 * fptan pushes 1.0 then the result on completion, so we want to pop 110 * the FP stack twice, so we need a dummy value into which to pop it. 111 */ 112 long double ret; 113 long double dummy; 114 115 __asm__ __volatile__("fptan" : "=t" (dummy), "=u" (ret) : "0" (x)); 116 return (ret); 117 } 118 119 extern __inline__ long double 120 fpatan(long double x, long double y) 121 { 122 long double ret; 123 124 __asm__ __volatile__("fpatan" 125 : "=t" (ret) 126 : "0" (y), "u" (x) 127 : "st(1)"); 128 return (ret); 129 } 130 131 extern __inline__ long double 132 fxtract(long double x) 133 { 134 long double ret; 135 136 __asm__ __volatile__("fxtract" : "=t" (ret) : "0" (x)); 137 return (ret); 138 } 139 140 extern __inline__ long double 141 fprem1(long double idend, long double div) 142 { 143 long double ret; 144 145 __asm__ __volatile__("fprem1" : "=t" (ret) : "0" (div), "u" (idend)); 146 return (ret); 147 } 148 149 extern __inline__ long double 150 fprem(long double idend, long double div) 151 { 152 long double ret; 153 154 __asm__ __volatile__("fprem" : "=t" (ret) : "0" (div), "u" (idend)); 155 return (ret); 156 } 157 158 extern __inline__ long double 159 fyl2xp1(long double y, long double x) 160 { 161 long double ret; 162 163 __asm__ __volatile__("fyl2xp1" 164 : "=t" (ret) 165 : "0" (x), "u" (y) 166 : "st(1)"); 167 return (ret); 168 } 169 170 extern __inline__ long double 171 fsqrt(long double x) 172 { 173 long double ret; 174 175 __asm__ __volatile__("fsqrt" : "=t" (ret) : "0" (x)); 176 return (ret); 177 } 178 179 extern __inline__ long double 180 fsincos(long double x) 181 { 182 long double ret; 183 184 __asm__ __volatile__("fsincos" : "=t" (ret) : "0" (x)); 185 return (ret); 186 } 187 188 extern __inline__ long double 189 frndint(long double x) 190 { 191 long double ret; 192 193 __asm__ __volatile__("frndint" : "=t" (ret) : "0" (x)); 194 return (ret); 195 } 196 197 extern __inline__ long double 198 fscale(long double x, long double y) 199 { 200 long double ret; 201 202 __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x)); 203 return (ret); 204 } 205 206 extern __inline__ long double 207 fsin(long double x) 208 { 209 long double ret; 210 211 __asm__ __volatile__("fsin" : "=t" (ret) : "0" (x)); 212 return (ret); 213 } 214 215 extern __inline__ long double 216 fcos(long double x) 217 { 218 long double ret; 219 220 __asm__ __volatile__("fcos" : "=t" (ret) : "0" (x)); 221 return (ret); 222 } 223 224 extern __inline__ void 225 sse_cmpeqss(float *f1, float *f2, int *i1) 226 { 227 __asm__ __volatile__( 228 "cmpeqss %2, %1\n\t" 229 "movss %1, %0" 230 : "=m" (*i1) 231 : "x" (*f1), "x" (*f2)); 232 } 233 234 extern __inline__ void 235 sse_cmpltss(float *f1, float *f2, int *i1) 236 { 237 __asm__ __volatile__( 238 "cmpltss %2, %1\n\t" 239 "movss %1, %0" 240 : "=m" (*i1) 241 : "x" (*f1), "x" (*f2)); 242 } 243 244 extern __inline__ void 245 sse_cmpless(float *f1, float *f2, int *i1) 246 { 247 __asm__ __volatile__( 248 "cmpless %2, %1\n\t" 249 "movss %1, %0" 250 : "=m" (*i1) 251 : "x" (*f1), "x" (*f2)); 252 } 253 254 extern __inline__ void 255 sse_cmpunordss(float *f1, float *f2, int *i1) 256 { 257 __asm__ __volatile__( 258 "cmpunordss %2, %1\n\t" 259 "movss %1, %0" 260 : "=m" (*i1) 261 : "x" (*f1), "x" (*f2)); 262 } 263 264 extern __inline__ void 265 sse_minss(float *f1, float *f2, float *f3) 266 { 267 __asm__ __volatile__( 268 "minss %2, %1\n\t" 269 "movss %1, %0" 270 : "=m" (*f3) 271 : "x" (*f1), "x" (*f2)); 272 } 273 274 extern __inline__ void 275 sse_maxss(float *f1, float *f2, float *f3) 276 { 277 __asm__ __volatile__( 278 "maxss %2, %1\n\t" 279 "movss %1, %0" 280 : "=m" (*f3) 281 : "x" (*f1), "x" (*f2)); 282 } 283 284 extern __inline__ void 285 sse_addss(float *f1, float *f2, float *f3) 286 { 287 __asm__ __volatile__( 288 "addss %2, %1\n\t" 289 "movss %1, %0" 290 : "=m" (*f3) 291 : "x" (*f1), "x" (*f2)); 292 } 293 294 extern __inline__ void 295 sse_subss(float *f1, float *f2, float *f3) 296 { 297 __asm__ __volatile__( 298 "subss %2, %1\n\t" 299 "movss %1, %0" 300 : "=m" (*f3) 301 : "x" (*f1), "x" (*f2)); 302 } 303 304 extern __inline__ void 305 sse_mulss(float *f1, float *f2, float *f3) 306 { 307 __asm__ __volatile__( 308 "mulss %2, %1\n\t" 309 "movss %1, %0" 310 : "=m" (*f3) 311 : "x" (*f1), "x" (*f2)); 312 } 313 314 extern __inline__ void 315 sse_divss(float *f1, float *f2, float *f3) 316 { 317 __asm__ __volatile__( 318 "divss %2, %1\n\t" 319 "movss %1, %0" 320 : "=m" (*f3) 321 : "x" (*f1), "x" (*f2)); 322 } 323 324 extern __inline__ void 325 sse_sqrtss(float *f1, float *f2) 326 { 327 __asm__ __volatile__( 328 "sqrtss %1, %%xmm0\n\t" 329 "movss %%xmm0, %0" 330 : "=m" (*f2) 331 : "m" (*f1) 332 : "xmm0"); 333 } 334 335 extern __inline__ void 336 sse_ucomiss(float *f1, float *f2) 337 { 338 __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2)); 339 340 } 341 342 extern __inline__ void 343 sse_comiss(float *f1, float *f2) 344 { 345 __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2)); 346 } 347 348 extern __inline__ void 349 sse_cvtss2sd(float *f1, double *d1) 350 { 351 __asm__ __volatile__( 352 "cvtss2sd %1, %%xmm0\n\t" 353 "movsd %%xmm0, %0" 354 : "=m" (*d1) 355 : "m" (*f1) 356 : "xmm0"); 357 } 358 359 extern __inline__ void 360 sse_cvtsi2ss(int *i1, float *f1) 361 { 362 __asm__ __volatile__( 363 "cvtsi2ss %1, %%xmm0\n\t" 364 "movss %%xmm0, %0" 365 : "=m" (*f1) 366 : "m" (*i1) 367 : "xmm0"); 368 } 369 370 extern __inline__ void 371 sse_cvttss2si(float *f1, int *i1) 372 { 373 __asm__ __volatile__( 374 "cvttss2si %1, %%ecx\n\t" 375 "movl %%ecx, %0" 376 : "=m" (*i1) 377 : "m" (*f1) 378 : "ecx"); 379 } 380 381 extern __inline__ void 382 sse_cvtss2si(float *f1, int *i1) 383 { 384 __asm__ __volatile__( 385 "cvtss2si %1, %%ecx\n\t" 386 "movl %%ecx, %0" 387 : "=m" (*i1) 388 : "m" (*f1) 389 : "ecx"); 390 } 391 392 #if defined(__amd64) 393 extern __inline__ void 394 sse_cvtsi2ssq(long long *ll1, float *f1) 395 { 396 __asm__ __volatile__( 397 "cvtsi2ssq %1, %%xmm0\n\t" 398 "movss %%xmm0, %0" 399 : "=m" (*f1) 400 : "m" (*ll1) 401 : "xmm0"); 402 } 403 404 extern __inline__ void 405 sse_cvttss2siq(float *f1, long long *ll1) 406 { 407 __asm__ __volatile__( 408 "cvttss2siq %1, %%rcx\n\t" 409 "movq %%rcx, %0" 410 : "=m" (*ll1) 411 : "m" (*f1) 412 : "rcx"); 413 } 414 415 extern __inline__ void 416 sse_cvtss2siq(float *f1, long long *ll1) 417 { 418 __asm__ __volatile__( 419 "cvtss2siq %1, %%rcx\n\t" 420 "movq %%rcx, %0" 421 : "=m" (*ll1) 422 : "m" (*f1) 423 : "rcx"); 424 } 425 426 #endif 427 428 extern __inline__ void 429 sse_cmpeqsd(double *d1, double *d2, long long *ll1) 430 { 431 __asm__ __volatile__( 432 "cmpeqsd %2,%1\n\t" 433 "movsd %1,%0" 434 : "=m" (*ll1) 435 : "x" (*d1), "x" (*d2)); 436 } 437 438 extern __inline__ void 439 sse_cmpltsd(double *d1, double *d2, long long *ll1) 440 { 441 __asm__ __volatile__( 442 "cmpltsd %2,%1\n\t" 443 "movsd %1,%0" 444 : "=m" (*ll1) 445 : "x" (*d1), "x" (*d2)); 446 } 447 448 extern __inline__ void 449 sse_cmplesd(double *d1, double *d2, long long *ll1) 450 { 451 __asm__ __volatile__( 452 "cmplesd %2,%1\n\t" 453 "movsd %1,%0" 454 : "=m" (*ll1) 455 : "x" (*d1), "x" (*d2)); 456 } 457 458 extern __inline__ void 459 sse_cmpunordsd(double *d1, double *d2, long long *ll1) 460 { 461 __asm__ __volatile__( 462 "cmpunordsd %2,%1\n\t" 463 "movsd %1,%0" 464 : "=m" (*ll1) 465 : "x" (*d1), "x" (*d2)); 466 } 467 468 469 extern __inline__ void 470 sse_minsd(double *d1, double *d2, double *d3) 471 { 472 __asm__ __volatile__( 473 "minsd %2,%1\n\t" 474 "movsd %1,%0" 475 : "=m" (*d3) 476 : "x" (*d1), "x" (*d2)); 477 } 478 479 extern __inline__ void 480 sse_maxsd(double *d1, double *d2, double *d3) 481 { 482 __asm__ __volatile__( 483 "maxsd %2,%1\n\t" 484 "movsd %1,%0" 485 : "=m" (*d3) 486 : "x" (*d1), "x" (*d2)); 487 } 488 489 extern __inline__ void 490 sse_addsd(double *d1, double *d2, double *d3) 491 { 492 __asm__ __volatile__( 493 "addsd %2,%1\n\t" 494 "movsd %1,%0" 495 : "=m" (*d3) 496 : "x" (*d1), "x" (*d2)); 497 } 498 499 extern __inline__ void 500 sse_subsd(double *d1, double *d2, double *d3) 501 { 502 __asm__ __volatile__( 503 "subsd %2,%1\n\t" 504 "movsd %1,%0" 505 : "=m" (*d3) 506 : "x" (*d1), "x" (*d2)); 507 } 508 509 extern __inline__ void 510 sse_mulsd(double *d1, double *d2, double *d3) 511 { 512 __asm__ __volatile__( 513 "mulsd %2,%1\n\t" 514 "movsd %1,%0" 515 : "=m" (*d3) 516 : "x" (*d1), "x" (*d2)); 517 } 518 519 extern __inline__ void 520 sse_divsd(double *d1, double *d2, double *d3) 521 { 522 __asm__ __volatile__( 523 "divsd %2,%1\n\t" 524 "movsd %1,%0" 525 : "=m" (*d3) 526 : "x" (*d1), "x" (*d2) 527 : "xmm0"); 528 } 529 530 extern __inline__ void 531 sse_sqrtsd(double *d1, double *d2) 532 { 533 __asm__ __volatile__( 534 "sqrtsd %1, %%xmm0\n\t" 535 "movsd %%xmm0, %0" 536 : "=m" (*d2) 537 : "m" (*d1) 538 : "xmm0"); 539 } 540 541 extern __inline__ void 542 sse_ucomisd(double *d1, double *d2) 543 { 544 __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2)); 545 } 546 547 extern __inline__ void 548 sse_comisd(double *d1, double *d2) 549 { 550 __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2)); 551 } 552 553 extern __inline__ void 554 sse_cvtsd2ss(double *d1, float *f1) 555 { 556 __asm__ __volatile__( 557 "cvtsd2ss %1,%%xmm0\n\t" 558 "movss %%xmm0,%0" 559 : "=m" (*f1) 560 : "m" (*d1) 561 : "xmm0"); 562 } 563 564 565 extern __inline__ void 566 sse_cvtsi2sd(int *i1, double *d1) 567 { 568 __asm__ __volatile__( 569 "cvtsi2sd %1,%%xmm0\n\t" 570 "movsd %%xmm0,%0" 571 : "=m" (*d1) 572 : "m" (*i1) 573 : "xmm0"); 574 } 575 576 extern __inline__ void 577 sse_cvttsd2si(double *d1, int *i1) 578 { 579 __asm__ __volatile__( 580 "cvttsd2si %1,%%ecx\n\t" 581 "movl %%ecx,%0" 582 : "=m" (*i1) 583 : "m" (*d1) 584 : "ecx"); 585 } 586 587 extern __inline__ void 588 sse_cvtsd2si(double *d1, int *i1) 589 { 590 __asm__ __volatile__( 591 "cvtsd2si %1,%%ecx\n\t" 592 "movl %%ecx,%0" 593 : "=m" (*i1) 594 : "m" (*d1) 595 : "ecx"); 596 } 597 598 #if defined(__amd64) 599 extern __inline__ void 600 sse_cvtsi2sdq(long long *ll1, double *d1) 601 { 602 __asm__ __volatile__( 603 "cvtsi2sdq %1,%%xmm0\n\t" 604 "movsd %%xmm0,%0" 605 : "=m" (*d1) 606 : "m" (*ll1) 607 : "xmm0"); 608 } 609 610 extern __inline__ void 611 sse_cvttsd2siq(double *d1, long long *ll1) 612 { 613 __asm__ __volatile__( 614 "cvttsd2siq %1,%%rcx\n\t" 615 "movq %%rcx,%0" 616 : "=m" (*ll1) 617 : "m" (*d1) 618 : "rcx"); 619 } 620 621 extern __inline__ void 622 sse_cvtsd2siq(double *d1, long long *ll1) 623 { 624 __asm__ __volatile__( 625 "cvtsd2siq %1,%%rcx\n\t" 626 "movq %%rcx,%0" 627 : "=m" (*ll1) 628 : "m" (*d1) 629 : "rcx"); 630 } 631 #endif 632 #elif defined(__sparc) 633 extern __inline__ void 634 __fenv_getfsr(unsigned long *l) 635 { 636 __asm__ __volatile__( 637 #if defined(__sparcv9) 638 "stx %%fsr,%0\n\t" 639 #else 640 "st %%fsr,%0\n\t" 641 #endif 642 : "=m" (*l)); 643 } 644 645 extern __inline__ void 646 __fenv_setfsr(const unsigned long *l) 647 { 648 __asm__ __volatile__( 649 #if defined(__sparcv9) 650 "ldx %0,%%fsr\n\t" 651 #else 652 "ld %0,%%fsr\n\t" 653 #endif 654 : : "m" (*l)); 655 } 656 657 extern __inline__ void 658 __fenv_getfsr32(unsigned int *l) 659 { 660 __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l)); 661 } 662 663 extern __inline__ void 664 __fenv_setfsr32(const unsigned int *l) 665 { 666 __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l)); 667 } 668 #else 669 #error "GCC FENV inlines not implemented for this platform" 670 #endif 671 672 #ifdef __cplusplus 673 } 674 #endif 675 676 #endif /* __GNUC__ */ 677 678 #endif /* _FENV_INLINES_H */