1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2011, Richard Lowe
  14  */
  15 
  16 #ifndef _FENV_INLINES_H
  17 #define _FENV_INLINES_H
  18 
  19 #ifdef __GNUC__
  20 #ifdef __cplusplus
  21 extern "C" {
  22 #endif
  23 
  24 #include <sys/types.h>
  25 
  26 #if defined(__x86)
  27 /*
  28  * Floating point Control Word and Status Word
  29  * Definition should actually be shared with x86
  30  * (much of this 'amd64' code can be, in fact.)
  31  */
  32 union fp_cwsw {
  33         uint32_t cwsw;
  34         struct {
  35                 uint16_t cw;
  36                 uint16_t sw;
  37         } words;
  38 };
  39 
  40 extern __GNU_INLINE void
  41 __fenv_getcwsw(unsigned int *value)
  42 {
  43         union fp_cwsw *u = (union fp_cwsw *)value;
  44 
  45         __asm__ __volatile__(
  46             "fstsw %0\n\t"
  47             "fstcw %1\n\t"
  48             : "=m" (u->words.cw), "=m" (u->words.sw));
  49 }
  50 
  51 extern __GNU_INLINE void
  52 __fenv_setcwsw(const unsigned int *value)
  53 {
  54         union fp_cwsw cwsw;
  55         short fenv[16];
  56 
  57         cwsw.cwsw = *value;
  58 
  59         __asm__ __volatile__(
  60             "fstenv %0\n\t"
  61             "movw   %4,%1\n\t"
  62             "movw   %3,%2\n\t"
  63             "fldenv %0\n\t"
  64             "fwait\n\t"
  65             : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2])
  66             : "r" (cwsw.words.cw), "r" (cwsw.words.sw)
  67             /* For practical purposes, we clobber the whole FPU */
  68             : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)",
  69             "st(6)", "st(7)");
  70 }
  71 
  72 extern __GNU_INLINE void
  73 __fenv_getmxcsr(unsigned int *value)
  74 {
  75         __asm__ __volatile__("stmxcsr %0" : "=m" (*value));
  76 }
  77 
  78 extern __GNU_INLINE void
  79 __fenv_setmxcsr(const unsigned int *value)
  80 {
  81         __asm__ __volatile__("ldmxcsr %0" : : "m" (*value));
  82 }
  83 
  84 extern __GNU_INLINE long double
  85 f2xm1(long double x)
  86 {
  87         long double ret;
  88 
  89         __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc");
  90         return (ret);
  91 }
  92 
  93 extern __GNU_INLINE long double
  94 fyl2x(long double y, long double x)
  95 {
  96         long double ret;
  97 
  98         __asm__ __volatile__("fyl2x"
  99             : "=t" (ret)
 100             : "0" (x), "u" (y)
 101             : "st(1)", "cc");
 102         return (ret);
 103 }
 104 
 105 extern __GNU_INLINE long double
 106 fptan(long double x)
 107 {
 108         /*
 109          * fptan pushes 1.0 then the result on completion, so we want to pop
 110          * the FP stack twice, so we need a dummy value into which to pop it.
 111          */
 112         long double ret;
 113         long double dummy;
 114 
 115         __asm__ __volatile__("fptan"
 116             : "=t" (dummy), "=u" (ret)
 117             : "0" (x)
 118             : "cc");
 119         return (ret);
 120 }
 121 
 122 extern __GNU_INLINE long double
 123 fpatan(long double x, long double y)
 124 {
 125         long double ret;
 126 
 127         __asm__ __volatile__("fpatan"
 128             : "=t" (ret)
 129             : "0" (y), "u" (x)
 130             : "st(1)", "cc");
 131         return (ret);
 132 }
 133 
 134 extern __GNU_INLINE long double
 135 fxtract(long double x)
 136 {
 137         __asm__ __volatile__("fxtract" : "+t" (x) : : "cc");
 138         return (x);
 139 }
 140 
 141 extern __GNU_INLINE long double
 142 fprem1(long double idend, long double div)
 143 {
 144         __asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc");
 145         return (div);
 146 }
 147 
 148 extern __GNU_INLINE long double
 149 fprem(long double idend, long double div)
 150 {
 151         __asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc");
 152         return (div);
 153 }
 154 
 155 extern __GNU_INLINE long double
 156 fyl2xp1(long double y, long double x)
 157 {
 158         long double ret;
 159 
 160         __asm__ __volatile__("fyl2xp1"
 161             : "=t" (ret)
 162             : "0" (x), "u" (y)
 163             : "st(1)", "cc");
 164         return (ret);
 165 }
 166 
 167 extern __GNU_INLINE long double
 168 fsqrt(long double x)
 169 {
 170         __asm__ __volatile__("fsqrt" : "+t" (x) : : "cc");
 171         return (x);
 172 }
 173 
 174 extern __GNU_INLINE long double
 175 fsincos(long double x)
 176 {
 177         long double dummy;
 178 
 179         __asm__ __volatile__("fsincos" : "+t" (x), "=u" (dummy) : : "cc");
 180         return (x);
 181 }
 182 
 183 extern __GNU_INLINE long double
 184 frndint(long double x)
 185 {
 186         __asm__ __volatile__("frndint" : "+t" (x) : : "cc");
 187         return (x);
 188 }
 189 
 190 extern __GNU_INLINE long double
 191 fscale(long double x, long double y)
 192 {
 193         long double ret;
 194 
 195         __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc");
 196         return (ret);
 197 }
 198 
 199 extern __GNU_INLINE long double
 200 fsin(long double x)
 201 {
 202         __asm__ __volatile__("fsin" : "+t" (x) : : "cc");
 203         return (x);
 204 }
 205 
 206 extern __GNU_INLINE long double
 207 fcos(long double x)
 208 {
 209         __asm__ __volatile__("fcos" : "+t" (x) : : "cc");
 210         return (x);
 211 }
 212 
 213 extern __GNU_INLINE void
 214 sse_cmpeqss(float *f1, float *f2, int *i1)
 215 {
 216         __asm__ __volatile__(
 217             "cmpeqss %2, %1\n\t"
 218             "movss   %1, %0"
 219             : "=m" (*i1), "+x" (*f1)
 220             : "x" (*f2)
 221             : "cc");
 222 }
 223 
 224 extern __GNU_INLINE void
 225 sse_cmpltss(float *f1, float *f2, int *i1)
 226 {
 227         __asm__ __volatile__(
 228             "cmpltss %2, %1\n\t"
 229             "movss   %1, %0"
 230             : "=m" (*i1), "+x" (*f1)
 231             : "x" (*f2)
 232             : "cc");
 233 }
 234 
 235 extern __GNU_INLINE void
 236 sse_cmpless(float *f1, float *f2, int *i1)
 237 {
 238         __asm__ __volatile__(
 239             "cmpless %2, %1\n\t"
 240             "movss   %1, %0"
 241             : "=m" (*i1), "+x" (*f1)
 242             : "x" (*f2)
 243             : "cc");
 244 }
 245 
 246 extern __GNU_INLINE void
 247 sse_cmpunordss(float *f1, float *f2, int *i1)
 248 {
 249         __asm__ __volatile__(
 250             "cmpunordss %2, %1\n\t"
 251             "movss      %1, %0"
 252             : "=m" (*i1), "+x" (*f1)
 253             : "x" (*f2)
 254             : "cc");
 255 }
 256 
 257 extern __GNU_INLINE void
 258 sse_minss(float *f1, float *f2, float *f3)
 259 {
 260         __asm__ __volatile__(
 261             "minss %2, %1\n\t"
 262             "movss %1, %0"
 263             : "=m" (*f3), "+x" (*f1)
 264             : "x" (*f2));
 265 }
 266 
 267 extern __GNU_INLINE void
 268 sse_maxss(float *f1, float *f2, float *f3)
 269 {
 270         __asm__ __volatile__(
 271             "maxss %2, %1\n\t"
 272             "movss %1, %0"
 273             : "=m" (*f3), "+x" (*f1)
 274             : "x" (*f2));
 275 }
 276 
 277 extern __GNU_INLINE void
 278 sse_addss(float *f1, float *f2, float *f3)
 279 {
 280         __asm__ __volatile__(
 281             "addss %2, %1\n\t"
 282             "movss %1, %0"
 283             : "=m" (*f3), "+x" (*f1)
 284             : "x" (*f2));
 285 }
 286 
 287 extern __GNU_INLINE void
 288 sse_subss(float *f1, float *f2, float *f3)
 289 {
 290         __asm__ __volatile__(
 291             "subss %2, %1\n\t"
 292             "movss %1, %0"
 293             : "=m" (*f3), "+x" (*f1)
 294             : "x" (*f2));
 295 }
 296 
 297 extern __GNU_INLINE void
 298 sse_mulss(float *f1, float *f2, float *f3)
 299 {
 300         __asm__ __volatile__(
 301             "mulss %2, %1\n\t"
 302             "movss %1, %0"
 303             : "=m" (*f3), "+x" (*f1)
 304             : "x" (*f2));
 305 }
 306 
 307 extern __GNU_INLINE void
 308 sse_divss(float *f1, float *f2, float *f3)
 309 {
 310         __asm__ __volatile__(
 311             "divss %2, %1\n\t"
 312             "movss %1, %0"
 313             : "=m" (*f3), "+x" (*f1)
 314             : "x" (*f2));
 315 }
 316 
 317 extern __GNU_INLINE void
 318 sse_sqrtss(float *f1, float *f2)
 319 {
 320         double tmp;
 321 
 322         __asm__ __volatile__(
 323             "sqrtss %2, %1\n\t"
 324             "movss  %1, %0"
 325             : "=m" (*f2), "=x" (tmp)
 326             : "m" (*f1));
 327 }
 328 
 329 extern __GNU_INLINE void
 330 sse_ucomiss(float *f1, float *f2)
 331 {
 332         __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2));
 333 
 334 }
 335 
 336 extern __GNU_INLINE void
 337 sse_comiss(float *f1, float *f2)
 338 {
 339         __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2));
 340 }
 341 
 342 extern __GNU_INLINE void
 343 sse_cvtss2sd(float *f1, double *d1)
 344 {
 345         double tmp;
 346 
 347         __asm__ __volatile__(
 348             "cvtss2sd %2, %1\n\t"
 349             "movsd    %1, %0"
 350             : "=m" (*d1), "=x" (tmp)
 351             : "m" (*f1));
 352 }
 353 
 354 extern __GNU_INLINE void
 355 sse_cvtsi2ss(int *i1, float *f1)
 356 {
 357         double tmp;
 358 
 359         __asm__ __volatile__(
 360             "cvtsi2ss %2, %1\n\t"
 361             "movss    %1, %0"
 362             : "=m" (*f1), "=x" (tmp)
 363             : "m" (*i1));
 364 }
 365 
 366 extern __GNU_INLINE void
 367 sse_cvttss2si(float *f1, int *i1)
 368 {
 369         int tmp;
 370 
 371         __asm__ __volatile__(
 372             "cvttss2si %2, %1\n\t"
 373             "movl      %1, %0"
 374             : "=m" (*i1), "=r" (tmp)
 375             : "m" (*f1));
 376 }
 377 
 378 extern __GNU_INLINE void
 379 sse_cvtss2si(float *f1, int *i1)
 380 {
 381         int tmp;
 382 
 383         __asm__ __volatile__(
 384             "cvtss2si %2, %1\n\t"
 385             "movl     %1, %0"
 386             : "=m" (*i1), "=r" (tmp)
 387             : "m" (*f1));
 388 }
 389 
 390 #if defined(__amd64)
 391 extern __GNU_INLINE void
 392 sse_cvtsi2ssq(long long *ll1, float *f1)
 393 {
 394         double tmp;
 395 
 396         __asm__ __volatile__(
 397             "cvtsi2ssq %2, %1\n\t"
 398             "movss     %1, %0"
 399             : "=m" (*f1), "=x" (tmp)
 400             : "m" (*ll1));
 401 }
 402 
 403 extern __GNU_INLINE void
 404 sse_cvttss2siq(float *f1, long long *ll1)
 405 {
 406         uint64_t tmp;
 407 
 408         __asm__ __volatile__(
 409             "cvttss2siq %2, %1\n\t"
 410             "movq       %1, %0"
 411             : "=m" (*ll1), "=r" (tmp)
 412             : "m" (*f1));
 413 }
 414 
 415 extern __GNU_INLINE void
 416 sse_cvtss2siq(float *f1, long long *ll1)
 417 {
 418         uint64_t tmp;
 419 
 420         __asm__ __volatile__(
 421             "cvtss2siq %2, %1\n\t"
 422             "movq      %1, %0"
 423             : "=m" (*ll1), "=r" (tmp)
 424             : "m" (*f1));
 425 }
 426 #endif
 427 
 428 extern __GNU_INLINE void
 429 sse_cmpeqsd(double *d1, double *d2, long long *ll1)
 430 {
 431         __asm__ __volatile__(
 432             "cmpeqsd %2,%1\n\t"
 433             "movsd   %1,%0"
 434             : "=m" (*ll1), "+x" (*d1)
 435             : "x" (*d2));
 436 }
 437 
 438 extern __GNU_INLINE void
 439 sse_cmpltsd(double *d1, double *d2, long long *ll1)
 440 {
 441         __asm__ __volatile__(
 442             "cmpltsd %2,%1\n\t"
 443             "movsd   %1,%0"
 444             : "=m" (*ll1), "+x" (*d1)
 445             : "x" (*d2));
 446 }
 447 
 448 extern __GNU_INLINE void
 449 sse_cmplesd(double *d1, double *d2, long long *ll1)
 450 {
 451         __asm__ __volatile__(
 452             "cmplesd %2,%1\n\t"
 453             "movsd   %1,%0"
 454             : "=m" (*ll1), "+x" (*d1)
 455             : "x" (*d2));
 456 }
 457 
 458 extern __GNU_INLINE void
 459 sse_cmpunordsd(double *d1, double *d2, long long *ll1)
 460 {
 461         __asm__ __volatile__(
 462             "cmpunordsd %2,%1\n\t"
 463             "movsd      %1,%0"
 464             : "=m" (*ll1), "+x" (*d1)
 465             : "x" (*d2));
 466 }
 467 
 468 
 469 extern __GNU_INLINE void
 470 sse_minsd(double *d1, double *d2, double *d3)
 471 {
 472         __asm__ __volatile__(
 473             "minsd %2,%1\n\t"
 474             "movsd %1,%0"
 475             : "=m" (*d3), "+x" (*d1)
 476             : "x" (*d2));
 477 }
 478 
 479 extern __GNU_INLINE void
 480 sse_maxsd(double *d1, double *d2, double *d3)
 481 {
 482         __asm__ __volatile__(
 483             "maxsd %2,%1\n\t"
 484             "movsd %1,%0"
 485             : "=m" (*d3), "+x" (*d1)
 486             : "x" (*d2));
 487 }
 488 
 489 extern __GNU_INLINE void
 490 sse_addsd(double *d1, double *d2, double *d3)
 491 {
 492         __asm__ __volatile__(
 493             "addsd %2,%1\n\t"
 494             "movsd %1,%0"
 495             : "=m" (*d3), "+x" (*d1)
 496             : "x" (*d2));
 497 }
 498 
 499 extern __GNU_INLINE void
 500 sse_subsd(double *d1, double *d2, double *d3)
 501 {
 502         __asm__ __volatile__(
 503             "subsd %2,%1\n\t"
 504             "movsd %1,%0"
 505             : "=m" (*d3), "+x" (*d1)
 506             : "x" (*d2));
 507 }
 508 
 509 extern __GNU_INLINE void
 510 sse_mulsd(double *d1, double *d2, double *d3)
 511 {
 512         __asm__ __volatile__(
 513             "mulsd %2,%1\n\t"
 514             "movsd %1,%0"
 515             : "=m" (*d3), "+x" (*d1)
 516             : "x" (*d2));
 517 }
 518 
 519 extern __GNU_INLINE void
 520 sse_divsd(double *d1, double *d2, double *d3)
 521 {
 522         __asm__ __volatile__(
 523             "divsd %2,%1\n\t"
 524             "movsd %1,%0"
 525             : "=m" (*d3), "+x" (*d1)
 526             : "x" (*d2));
 527 }
 528 
 529 extern __GNU_INLINE void
 530 sse_sqrtsd(double *d1, double *d2)
 531 {
 532         double tmp;
 533 
 534         __asm__ __volatile__(
 535             "sqrtsd %2, %1\n\t"
 536             "movsd %1, %0"
 537             : "=m" (*d2), "=x" (tmp)
 538             : "m" (*d1));
 539 }
 540 
 541 extern __GNU_INLINE void
 542 sse_ucomisd(double *d1, double *d2)
 543 {
 544         __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2));
 545 }
 546 
 547 extern __GNU_INLINE void
 548 sse_comisd(double *d1, double *d2)
 549 {
 550         __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2));
 551 }
 552 
 553 extern __GNU_INLINE void
 554 sse_cvtsd2ss(double *d1, float *f1)
 555 {
 556         double tmp;
 557 
 558         __asm__ __volatile__(
 559             "cvtsd2ss %2,%1\n\t"
 560             "movss    %1,%0"
 561             : "=m" (*f1), "=x" (tmp)
 562             : "m" (*d1));
 563 }
 564 
 565 extern __GNU_INLINE void
 566 sse_cvtsi2sd(int *i1, double *d1)
 567 {
 568         double tmp;
 569         __asm__ __volatile__(
 570             "cvtsi2sd %2,%1\n\t"
 571             "movsd    %1,%0"
 572             : "=m" (*d1), "=x" (tmp)
 573             : "m" (*i1));
 574 }
 575 
 576 extern __GNU_INLINE void
 577 sse_cvttsd2si(double *d1, int *i1)
 578 {
 579         int tmp;
 580 
 581         __asm__ __volatile__(
 582             "cvttsd2si %2,%1\n\t"
 583             "movl      %1,%0"
 584             : "=m" (*i1), "=r" (tmp)
 585             : "m" (*d1));
 586 }
 587 
 588 extern __GNU_INLINE void
 589 sse_cvtsd2si(double *d1, int *i1)
 590 {
 591         int tmp;
 592 
 593         __asm__ __volatile__(
 594             "cvtsd2si %2,%1\n\t"
 595             "movl     %1,%0"
 596             : "=m" (*i1), "=r" (tmp)
 597             : "m" (*d1));
 598 }
 599 
 600 #if defined(__amd64)
 601 extern __GNU_INLINE void
 602 sse_cvtsi2sdq(long long *ll1, double *d1)
 603 {
 604         double tmp;
 605 
 606         __asm__ __volatile__(
 607             "cvtsi2sdq %2,%1\n\t"
 608             "movsd     %1,%0"
 609             : "=m" (*d1), "=x" (tmp)
 610             : "m" (*ll1));
 611 }
 612 
 613 extern __GNU_INLINE void
 614 sse_cvttsd2siq(double *d1, long long *ll1)
 615 {
 616         uint64_t tmp;
 617 
 618         __asm__ __volatile__(
 619             "cvttsd2siq %2,%1\n\t"
 620             "movq       %1,%0"
 621             : "=m" (*ll1), "=r" (tmp)
 622             : "m" (*d1));
 623 }
 624 
 625 extern __GNU_INLINE void
 626 sse_cvtsd2siq(double *d1, long long *ll1)
 627 {
 628         uint64_t tmp;
 629 
 630         __asm__ __volatile__(
 631             "cvtsd2siq %2,%1\n\t"
 632             "movq      %1,%0"
 633             : "=m" (*ll1), "=r" (tmp)
 634             : "m" (*d1));
 635 }
 636 #endif
 637 #elif defined(__sparc)
 638 extern __GNU_INLINE void
 639 __fenv_getfsr(unsigned long *l)
 640 {
 641         __asm__ __volatile__(
 642 #if defined(__sparcv9)
 643             "stx %%fsr,%0\n\t"
 644 #else
 645             "st  %%fsr,%0\n\t"
 646 #endif
 647             : "=m" (*l));
 648 }
 649 
 650 extern __GNU_INLINE void
 651 __fenv_setfsr(const unsigned long *l)
 652 {
 653         __asm__ __volatile__(
 654 #if defined(__sparcv9)
 655             "ldx %0,%%fsr\n\t"
 656 #else
 657             "ld %0,%%fsr\n\t"
 658 #endif
 659             : : "m" (*l) : "cc");
 660 }
 661 
 662 extern __GNU_INLINE void
 663 __fenv_getfsr32(unsigned int *l)
 664 {
 665         __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l));
 666 }
 667 
 668 extern __GNU_INLINE void
 669 __fenv_setfsr32(const unsigned int *l)
 670 {
 671         __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l));
 672 }
 673 #else
 674 #error "GCC FENV inlines not implemented for this platform"
 675 #endif
 676 
 677 #ifdef __cplusplus
 678 }
 679 #endif
 680 #endif  /* __GNUC__ */
 681 #endif /* _FENV_INLINES_H */