1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2011, Richard Lowe
  14  */
  15 
  16 #ifndef _FENV_INLINES_H
  17 #define _FENV_INLINES_H
  18 
  19 #ifdef __GNUC__
  20 
  21 #ifdef __cplusplus
  22 extern "C" {
  23 #endif
  24 
  25 #include <sys/types.h>
  26 
  27 #if defined(__x86)
  28 
  29 /*
  30  * Floating point Control Word and Status Word
  31  * Definition should actually be shared with x86
  32  * (much of this 'amd64' code can be, in fact.)
  33  */
  34 union fp_cwsw {
  35         uint32_t cwsw;
  36         struct {
  37                 uint16_t cw;
  38                 uint16_t sw;
  39         } words;
  40 };
  41 
  42 extern __inline__ void
  43 __fenv_getcwsw(unsigned int *value)
  44 {
  45         union fp_cwsw *u = (union fp_cwsw *)value;
  46 
  47         __asm__ __volatile__(
  48             "fstsw %0\n\t"
  49             "fstcw %1\n\t"
  50             : "=m" (u->words.cw), "=m" (u->words.sw));
  51 }
  52 
  53 extern __inline__ void
  54 __fenv_setcwsw(const unsigned int *value)
  55 {
  56         union fp_cwsw cwsw;
  57         short fenv[16];
  58 
  59         cwsw.cwsw = *value;
  60 
  61         __asm__ __volatile__(
  62             "fstenv %0\n\t"
  63             "movw   %4,%1\n\t"
  64             "movw   %3,%2\n\t"
  65             "fldenv %0\n\t"
  66             "fwait\n\t"
  67             : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2])
  68             : "r" (cwsw.words.cw), "r" (cwsw.words.sw)
  69             /* For practical purposes, we clobber the whole FPU */
  70             : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)",
  71               "st(6)", "st(7)");
  72 }
  73 
  74 extern __inline__ void
  75 __fenv_getmxcsr(unsigned int *value)
  76 {
  77         __asm__ __volatile__("stmxcsr %0" : "=m" (*value));
  78 }
  79 
  80 extern __inline__ void
  81 __fenv_setmxcsr(const unsigned int *value)
  82 {
  83         __asm__ __volatile__("ldmxcsr %0" : : "m" (*value));
  84 }
  85 
  86 extern __inline__ long double
  87 f2xm1(long double x)
  88 {
  89         long double ret;
  90 
  91         __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc");
  92         return (ret);
  93 }
  94 
  95 extern __inline__ long double
  96 fyl2x(long double y, long double x)
  97 {
  98         long double ret;
  99 
 100         __asm__ __volatile__("fyl2x"
 101             : "=t" (ret)
 102             : "0" (x), "u" (y)
 103             : "st(1)", "cc");
 104         return (ret);
 105 }
 106 
 107 extern __inline__ long double
 108 fptan(long double x)
 109 {
 110         /*
 111          * fptan pushes 1.0 then the result on completion, so we want to pop
 112          * the FP stack twice, so we need a dummy value into which to pop it.
 113          */
 114         long double ret;
 115         long double dummy;
 116 
 117         __asm__ __volatile__("fptan"
 118             : "=t" (dummy), "=u" (ret)
 119             : "0" (x)
 120             : "cc");
 121         return (ret);
 122 }
 123 
 124 extern __inline__ long double
 125 fpatan(long double x, long double y)
 126 {
 127         long double ret;
 128 
 129         __asm__ __volatile__("fpatan"
 130             : "=t" (ret)
 131             : "0" (y), "u" (x)
 132             : "st(1)", "cc");
 133         return (ret);
 134 }
 135 
 136 extern __inline__ long double
 137 fxtract(long double x)
 138 {
 139         __asm__ __volatile__("fxtract" : "+t" (x) : : "cc");
 140         return (x);
 141 }
 142 
 143 extern __inline__ long double
 144 fprem1(long double idend, long double div)
 145 {
 146         __asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc");
 147         return (div);
 148 }
 149 
 150 extern __inline__ long double
 151 fprem(long double idend, long double div)
 152 {
 153         __asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc");
 154         return (div);
 155 }
 156 
 157 extern __inline__ long double
 158 fyl2xp1(long double y, long double x)
 159 {
 160         long double ret;
 161 
 162         __asm__ __volatile__("fyl2xp1"
 163             : "=t" (ret)
 164             : "0" (x), "u" (y)
 165             : "st(1)", "cc");
 166         return (ret);
 167 }
 168 
 169 extern __inline__ long double
 170 fsqrt(long double x)
 171 {
 172         __asm__ __volatile__("fsqrt" : "+t" (x) : : "cc");
 173         return (x);
 174 }
 175 
 176 extern __inline__ long double
 177 fsincos(long double x)
 178 {
 179         long double dummy;
 180 
 181         __asm__ __volatile__("fsincos" : "+t" (x), "=u" (dummy) : : "cc");
 182         return (x);
 183 }
 184 
 185 extern __inline__ long double
 186 frndint(long double x)
 187 {
 188         __asm__ __volatile__("frndint" : "+t" (x) : : "cc");
 189         return (x);
 190 }
 191 
 192 extern __inline__ long double
 193 fscale(long double x, long double y)
 194 {
 195         long double ret;
 196 
 197         __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc");
 198         return (ret);
 199 }
 200 
 201 extern __inline__ long double
 202 fsin(long double x)
 203 {
 204         __asm__ __volatile__("fsin" : "+t" (x) : : "cc");
 205         return (x);
 206 }
 207 
 208 extern __inline__ long double
 209 fcos(long double x)
 210 {
 211         __asm__ __volatile__("fcos" : "+t" (x) : : "cc");
 212         return (x);
 213 }
 214 
 215 extern __inline__ void
 216 sse_cmpeqss(float *f1, float *f2, int *i1)
 217 {
 218         __asm__ __volatile__(
 219             "cmpeqss %2, %1\n\t"
 220             "movss   %1, %0"
 221             : "=m" (*i1), "+x" (*f1)
 222             : "x" (*f2)
 223             : "cc");
 224 }
 225 
 226 extern __inline__ void
 227 sse_cmpltss(float *f1, float *f2, int *i1)
 228 {
 229         __asm__ __volatile__(
 230             "cmpltss %2, %1\n\t"
 231             "movss   %1, %0"
 232             : "=m" (*i1), "+x" (*f1)
 233             : "x" (*f2)
 234             : "cc");
 235 }
 236 
 237 extern __inline__ void
 238 sse_cmpless(float *f1, float *f2, int *i1)
 239 {
 240         __asm__ __volatile__(
 241             "cmpless %2, %1\n\t"
 242             "movss   %1, %0"
 243             : "=m" (*i1), "+x" (*f1)
 244             : "x" (*f2)
 245             : "cc");
 246 }
 247 
 248 extern __inline__ void
 249 sse_cmpunordss(float *f1, float *f2, int *i1)
 250 {
 251         __asm__ __volatile__(
 252             "cmpunordss %2, %1\n\t"
 253             "movss      %1, %0"
 254             : "=m" (*i1), "+x" (*f1)
 255             : "x" (*f2)
 256             : "cc");
 257 }
 258 
 259 extern __inline__ void
 260 sse_minss(float *f1, float *f2, float *f3)
 261 {
 262         __asm__ __volatile__(
 263             "minss %2, %1\n\t"
 264             "movss %1, %0"
 265             : "=m" (*f3), "+x" (*f1)
 266             : "x" (*f2));
 267 }
 268 
 269 extern __inline__ void
 270 sse_maxss(float *f1, float *f2, float *f3)
 271 {
 272         __asm__ __volatile__(
 273             "maxss %2, %1\n\t"
 274             "movss %1, %0"
 275             : "=m" (*f3), "+x" (*f1)
 276             : "x" (*f2));
 277 }
 278 
 279 extern __inline__ void
 280 sse_addss(float *f1, float *f2, float *f3)
 281 {
 282         __asm__ __volatile__(
 283             "addss %2, %1\n\t"
 284             "movss %1, %0"
 285             : "=m" (*f3), "+x" (*f1)
 286             : "x" (*f2));
 287 }
 288 
 289 extern __inline__ void
 290 sse_subss(float *f1, float *f2, float *f3)
 291 {
 292         __asm__ __volatile__(
 293             "subss %2, %1\n\t"
 294             "movss %1, %0"
 295             : "=m" (*f3), "+x" (*f1)
 296             : "x" (*f2));
 297 }
 298 
 299 extern __inline__ void
 300 sse_mulss(float *f1, float *f2, float *f3)
 301 {
 302         __asm__ __volatile__(
 303             "mulss %2, %1\n\t"
 304             "movss %1, %0"
 305             : "=m" (*f3), "+x" (*f1)
 306             : "x" (*f2));
 307 }
 308 
 309 extern __inline__ void
 310 sse_divss(float *f1, float *f2, float *f3)
 311 {
 312         __asm__ __volatile__(
 313             "divss %2, %1\n\t"
 314             "movss %1, %0"
 315             : "=m" (*f3), "+x" (*f1)
 316             : "x" (*f2));
 317 }
 318 
 319 extern __inline__ void
 320 sse_sqrtss(float *f1, float *f2)
 321 {
 322         double tmp;
 323 
 324         __asm__ __volatile__(
 325             "sqrtss %2, %1\n\t"
 326             "movss  %1, %0"
 327             : "=m" (*f2), "=x" (tmp)
 328             : "m" (*f1));
 329 }
 330 
 331 extern __inline__ void
 332 sse_ucomiss(float *f1, float *f2)
 333 {
 334         __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2));
 335 
 336 }
 337 
 338 extern __inline__ void
 339 sse_comiss(float *f1, float *f2)
 340 {
 341         __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2));
 342 }
 343 
 344 extern __inline__ void
 345 sse_cvtss2sd(float *f1, double *d1)
 346 {
 347         double tmp;
 348 
 349         __asm__ __volatile__(
 350             "cvtss2sd %2, %1\n\t"
 351             "movsd    %1, %0"
 352             : "=m" (*d1), "=x" (tmp)
 353             : "m" (*f1));
 354 }
 355 
 356 extern __inline__ void
 357 sse_cvtsi2ss(int *i1, float *f1)
 358 {
 359         double tmp;
 360 
 361         __asm__ __volatile__(
 362             "cvtsi2ss %2, %1\n\t"
 363             "movss    %1, %0"
 364             : "=m" (*f1), "=x" (tmp)
 365             : "m" (*i1));
 366 }
 367 
 368 extern __inline__ void
 369 sse_cvttss2si(float *f1, int *i1)
 370 {
 371         int tmp;
 372 
 373         __asm__ __volatile__(
 374             "cvttss2si %2, %1\n\t"
 375             "movl      %1, %0"
 376             : "=m" (*i1), "=r" (tmp)
 377             : "m" (*f1));
 378 }
 379 
 380 extern __inline__ void
 381 sse_cvtss2si(float *f1, int *i1)
 382 {
 383         int tmp;
 384 
 385         __asm__ __volatile__(
 386             "cvtss2si %2, %1\n\t"
 387             "movl     %1, %0"
 388             : "=m" (*i1), "=r" (tmp)
 389             : "m" (*f1));
 390 }
 391 
 392 #if defined(__amd64)
 393 extern __inline__ void
 394 sse_cvtsi2ssq(long long *ll1, float *f1)
 395 {
 396         double tmp;
 397 
 398         __asm__ __volatile__(
 399             "cvtsi2ssq %2, %1\n\t"
 400             "movss     %1, %0"
 401             : "=m" (*f1), "=x" (tmp)
 402             : "m" (*ll1));
 403 }
 404 
 405 extern __inline__ void
 406 sse_cvttss2siq(float *f1, long long *ll1)
 407 {
 408         uint64_t tmp;
 409 
 410         __asm__ __volatile__(
 411             "cvttss2siq %2, %1\n\t"
 412             "movq       %1, %0"
 413             : "=m" (*ll1), "=r" (tmp)
 414             : "m" (*f1));
 415 }
 416 
 417 extern __inline__ void
 418 sse_cvtss2siq(float *f1, long long *ll1)
 419 {
 420         uint64_t tmp;
 421 
 422         __asm__ __volatile__(
 423             "cvtss2siq %2, %1\n\t"
 424             "movq      %1, %0"
 425             : "=m" (*ll1), "=r" (tmp)
 426             : "m" (*f1));
 427 }
 428 
 429 #endif
 430 
 431 extern __inline__ void
 432 sse_cmpeqsd(double *d1, double *d2, long long *ll1)
 433 {
 434         __asm__ __volatile__(
 435             "cmpeqsd %2,%1\n\t"
 436             "movsd   %1,%0"
 437             : "=m" (*ll1), "+x" (*d1)
 438             : "x" (*d2));
 439 }
 440 
 441 extern __inline__ void
 442 sse_cmpltsd(double *d1, double *d2, long long *ll1)
 443 {
 444         __asm__ __volatile__(
 445             "cmpltsd %2,%1\n\t"
 446             "movsd   %1,%0"
 447             : "=m" (*ll1), "+x" (*d1)
 448             : "x" (*d2));
 449 }
 450 
 451 extern __inline__ void
 452 sse_cmplesd(double *d1, double *d2, long long *ll1)
 453 {
 454         __asm__ __volatile__(
 455             "cmplesd %2,%1\n\t"
 456             "movsd   %1,%0"
 457             : "=m" (*ll1), "+x" (*d1)
 458             : "x" (*d2));
 459 }
 460 
 461 extern __inline__ void
 462 sse_cmpunordsd(double *d1, double *d2, long long *ll1)
 463 {
 464         __asm__ __volatile__(
 465             "cmpunordsd %2,%1\n\t"
 466             "movsd      %1,%0"
 467             : "=m" (*ll1), "+x" (*d1)
 468             : "x" (*d2));
 469 }
 470 
 471 
 472 extern __inline__ void
 473 sse_minsd(double *d1, double *d2, double *d3)
 474 {
 475         __asm__ __volatile__(
 476             "minsd %2,%1\n\t"
 477             "movsd %1,%0"
 478             : "=m" (*d3), "+x" (*d1)
 479             : "x" (*d2));
 480 }
 481 
 482 extern __inline__ void
 483 sse_maxsd(double *d1, double *d2, double *d3)
 484 {
 485         __asm__ __volatile__(
 486             "maxsd %2,%1\n\t"
 487             "movsd %1,%0"
 488             : "=m" (*d3), "+x" (*d1)
 489             : "x" (*d2));
 490 }
 491 
 492 extern __inline__ void
 493 sse_addsd(double *d1, double *d2, double *d3)
 494 {
 495         __asm__ __volatile__(
 496             "addsd %2,%1\n\t"
 497             "movsd %1,%0"
 498             : "=m" (*d3), "+x" (*d1)
 499             : "x" (*d2));
 500 }
 501 
 502 extern __inline__ void
 503 sse_subsd(double *d1, double *d2, double *d3)
 504 {
 505         __asm__ __volatile__(
 506             "subsd %2,%1\n\t"
 507             "movsd %1,%0"
 508             : "=m" (*d3), "+x" (*d1)
 509             : "x" (*d2));
 510 }
 511 
 512 extern __inline__ void
 513 sse_mulsd(double *d1, double *d2, double *d3)
 514 {
 515         __asm__ __volatile__(
 516             "mulsd %2,%1\n\t"
 517             "movsd %1,%0"
 518             : "=m" (*d3), "+x" (*d1)
 519             : "x" (*d2));
 520 }
 521 
 522 extern __inline__ void
 523 sse_divsd(double *d1, double *d2, double *d3)
 524 {
 525         __asm__ __volatile__(
 526             "divsd %2,%1\n\t"
 527             "movsd %1,%0"
 528             : "=m" (*d3), "+x" (*d1)
 529             : "x" (*d2));
 530 }
 531 
 532 extern __inline__ void
 533 sse_sqrtsd(double *d1, double *d2)
 534 {
 535         double tmp;
 536 
 537         __asm__ __volatile__(
 538             "sqrtsd %2, %1\n\t"
 539             "movsd %1, %0"
 540             : "=m" (*d2), "=x" (tmp)
 541             : "m" (*d1));
 542 }
 543 
 544 extern __inline__ void
 545 sse_ucomisd(double *d1, double *d2)
 546 {
 547         __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2));
 548 }
 549 
 550 extern __inline__ void
 551 sse_comisd(double *d1, double *d2)
 552 {
 553         __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2));
 554 }
 555 
 556 extern __inline__ void
 557 sse_cvtsd2ss(double *d1, float *f1)
 558 {
 559         double tmp;
 560 
 561         __asm__ __volatile__(
 562             "cvtsd2ss %2,%1\n\t"
 563             "movss    %1,%0"
 564             : "=m" (*f1), "=x" (tmp)
 565             : "m" (*d1));
 566 }
 567 
 568 extern __inline__ void
 569 sse_cvtsi2sd(int *i1, double *d1)
 570 {
 571         double tmp;
 572         __asm__ __volatile__(
 573             "cvtsi2sd %2,%1\n\t"
 574             "movsd    %1,%0"
 575             : "=m" (*d1), "=x" (tmp)
 576             : "m" (*i1));
 577 }
 578 
 579 extern __inline__ void
 580 sse_cvttsd2si(double *d1, int *i1)
 581 {
 582         int tmp;
 583 
 584         __asm__ __volatile__(
 585             "cvttsd2si %2,%1\n\t"
 586             "movl      %1,%0"
 587             : "=m" (*i1), "=r" (tmp)
 588             : "m" (*d1));
 589 }
 590 
 591 extern __inline__ void
 592 sse_cvtsd2si(double *d1, int *i1)
 593 {
 594         int tmp;
 595 
 596         __asm__ __volatile__(
 597             "cvtsd2si %2,%1\n\t"
 598             "movl     %1,%0"
 599             : "=m" (*i1), "=r" (tmp)
 600             : "m" (*d1));
 601 }
 602 
 603 #if defined(__amd64)
 604 extern __inline__ void
 605 sse_cvtsi2sdq(long long *ll1, double *d1)
 606 {
 607         double tmp;
 608 
 609         __asm__ __volatile__(
 610             "cvtsi2sdq %2,%1\n\t"
 611             "movsd     %1,%0"
 612             : "=m" (*d1), "=x" (tmp)
 613             : "m" (*ll1));
 614 }
 615 
 616 extern __inline__ void
 617 sse_cvttsd2siq(double *d1, long long *ll1)
 618 {
 619         uint64_t tmp;
 620 
 621         __asm__ __volatile__(
 622             "cvttsd2siq %2,%1\n\t"
 623             "movq       %1,%0"
 624             : "=m" (*ll1), "=r" (tmp)
 625             : "m" (*d1));
 626 }
 627 
 628 extern __inline__ void
 629 sse_cvtsd2siq(double *d1, long long *ll1)
 630 {
 631         uint64_t tmp;
 632 
 633         __asm__ __volatile__(
 634             "cvtsd2siq %2,%1\n\t"
 635             "movq      %1,%0"
 636             : "=m" (*ll1), "=r" (tmp)
 637             : "m" (*d1));
 638 }
 639 #endif
 640 
 641 #elif defined(__sparc)
 642 extern __inline__ void
 643 __fenv_getfsr(unsigned long *l)
 644 {
 645         __asm__ __volatile__(
 646 #if defined(__sparcv9)
 647                 "stx %%fsr,%0\n\t"
 648 #else
 649                 "st  %%fsr,%0\n\t"
 650 #endif
 651                 : "=m" (*l));
 652 }
 653 
 654 extern __inline__ void
 655 __fenv_setfsr(const unsigned long *l)
 656 {
 657         __asm__ __volatile__(
 658 #if defined(__sparcv9)
 659                 "ldx %0,%%fsr\n\t"
 660 #else
 661                 "ld %0,%%fsr\n\t"
 662 #endif
 663                 : : "m" (*l) : "cc");
 664 }
 665 
 666 extern __inline__ void
 667 __fenv_getfsr32(unsigned int *l)
 668 {
 669         __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l));
 670 }
 671 
 672 extern __inline__ void
 673 __fenv_setfsr32(const unsigned int *l)
 674 {
 675         __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l));
 676 }
 677 #else
 678 #error "GCC FENV inlines not implemented for this platform"
 679 #endif
 680 
 681 #ifdef __cplusplus
 682 }
 683 #endif
 684 
 685 #endif  /* __GNUC__ */
 686 
 687 #endif /* _FENV_INLINES_H */