1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2011, Richard Lowe
  14  */
  15 
  16 #ifndef _FENV_INLINES_H
  17 #define _FENV_INLINES_H
  18 
  19 #ifdef __GNUC__
  20 
  21 #ifdef __cplusplus
  22 extern "C" {
  23 #endif
  24 
  25 #include <sys/types.h>
  26 
  27 #if defined(__x86)
  28 
  29 /*
  30  * Floating point Control Word and Status Word
  31  * Definition should actually be shared with x86
  32  * (much of this 'amd64' code can be, in fact.)
  33  */
  34 union fp_cwsw {
  35         uint32_t cwsw;
  36         struct {
  37                 uint16_t cw;
  38                 uint16_t sw;
  39         } words;
  40 };
  41 
  42 extern __inline__ void
  43 __fenv_getcwsw(unsigned int *value)
  44 {
  45         union fp_cwsw *u = (union fp_cwsw *)value;
  46 
  47         __asm__ __volatile__(
  48             "fstsw %0\n\t"
  49             "fstcw %1\n\t"
  50             : "=m" (u->words.cw), "=m" (u->words.sw));
  51 }
  52 
  53 extern __inline__ void
  54 __fenv_setcwsw(const unsigned int *value)
  55 {
  56         union fp_cwsw cwsw;
  57         short fenv[16];
  58 
  59         cwsw.cwsw = *value;
  60 
  61         __asm__ __volatile__(
  62             "fstenv %0\n\t"
  63             "movw   %4,%1\n\t"
  64             "movw   %3,%2\n\t"
  65             "fldenv %0\n\t"
  66             "fwait\n\t"
  67             : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2])
  68             : "r" (cwsw.words.cw), "r" (cwsw.words.sw)
  69             /* For practical purposes, we clobber the whole FPU */
  70             : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)",
  71               "st(6)", "st(7)");
  72 }
  73 
  74 extern __inline__ void
  75 __fenv_getmxcsr(unsigned int *value)
  76 {
  77         __asm__ __volatile__("stmxcsr %0" : "=m" (*value));
  78 }
  79 
  80 extern __inline__ void
  81 __fenv_setmxcsr(const unsigned int *value)
  82 {
  83         __asm__ __volatile__("ldmxcsr %0" : : "m" (*value));
  84 }
  85 
  86 extern __inline__ long double
  87 f2xm1(long double x)
  88 {
  89         long double ret;
  90 
  91         __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc");
  92         return (ret);
  93 }
  94 
  95 extern __inline__ long double
  96 fyl2x(long double y, long double x)
  97 {
  98         long double ret;
  99 
 100         __asm__ __volatile__("fyl2x"
 101             : "=t" (ret)
 102             : "0" (x), "u" (y)
 103             : "st(1)", "cc");
 104         return (ret);
 105 }
 106 
 107 extern __inline__ long double
 108 fptan(long double x)
 109 {
 110         /*
 111          * fptan pushes 1.0 then the result on completion, so we want to pop
 112          * the FP stack twice, so we need a dummy value into which to pop it.
 113          */
 114         long double ret;
 115         long double dummy;
 116 
 117         __asm__ __volatile__("fptan"
 118             : "=t" (dummy), "=u" (ret)
 119             : "0" (x)
 120             : "cc");
 121         return (ret);
 122 }
 123 
 124 extern __inline__ long double
 125 fpatan(long double x, long double y)
 126 {
 127         long double ret;
 128 
 129         __asm__ __volatile__("fpatan"
 130             : "=t" (ret)
 131             : "0" (y), "u" (x)
 132             : "st(1)", "cc");
 133         return (ret);
 134 }
 135 
 136 extern __inline__ long double
 137 fxtract(long double x)
 138 {
 139         __asm__ __volatile__("fxtract" : "+t" (x) : : "cc");
 140         return (x);
 141 }
 142 
 143 extern __inline__ long double
 144 fprem1(long double idend, long double div)
 145 {
 146         __asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc");
 147         return (div);
 148 }
 149 
 150 extern __inline__ long double
 151 fprem(long double idend, long double div)
 152 {
 153         __asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc");
 154         return (div);
 155 }
 156 
 157 extern __inline__ long double
 158 fyl2xp1(long double y, long double x)
 159 {
 160         long double ret;
 161 
 162         __asm__ __volatile__("fyl2xp1"
 163             : "=t" (ret)
 164             : "0" (x), "u" (y)
 165             : "st(1)", "cc");
 166         return (ret);
 167 }
 168 
 169 extern __inline__ long double
 170 fsqrt(long double x)
 171 {
 172         __asm__ __volatile__("fsqrt" : "+t" (x) : : "cc");
 173         return (x);
 174 }
 175 
 176 extern __inline__ long double
 177 fsincos(long double x)
 178 {
 179         __asm__ __volatile__("fsincos" : "+t" (x) : : "cc");
 180         return (x);
 181 }
 182 
 183 extern __inline__ long double
 184 frndint(long double x)
 185 {
 186         __asm__ __volatile__("frndint" : "+t" (x) : : "cc");
 187         return (x);
 188 }
 189 
 190 extern __inline__ long double
 191 fscale(long double x, long double y)
 192 {
 193         long double ret;
 194 
 195         __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc");
 196         return (ret);
 197 }
 198 
 199 extern __inline__ long double
 200 fsin(long double x)
 201 {
 202         __asm__ __volatile__("fsin" : "+t" (x) : : "cc");
 203         return (x);
 204 }
 205 
 206 extern __inline__ long double
 207 fcos(long double x)
 208 {
 209         __asm__ __volatile__("fcos" : "+t" (x) : : "cc");
 210         return (x);
 211 }
 212 
 213 extern __inline__ void
 214 sse_cmpeqss(float *f1, float *f2, int *i1)
 215 {
 216         __asm__ __volatile__(
 217             "cmpeqss %2, %1\n\t"
 218             "movss   %1, %0"
 219             : "=m" (*i1), "+x" (*f1)
 220             : "x" (*f2)
 221             : "cc");
 222 }
 223 
 224 extern __inline__ void
 225 sse_cmpltss(float *f1, float *f2, int *i1)
 226 {
 227         __asm__ __volatile__(
 228             "cmpltss %2, %1\n\t"
 229             "movss   %1, %0"
 230             : "=m" (*i1), "+x" (*f1)
 231             : "x" (*f2)
 232             : "cc");
 233 }
 234 
 235 extern __inline__ void
 236 sse_cmpless(float *f1, float *f2, int *i1)
 237 {
 238         __asm__ __volatile__(
 239             "cmpless %2, %1\n\t"
 240             "movss   %1, %0"
 241             : "=m" (*i1), "+x" (*f1)
 242             : "x" (*f2)
 243             : "cc");
 244 }
 245 
 246 extern __inline__ void
 247 sse_cmpunordss(float *f1, float *f2, int *i1)
 248 {
 249         __asm__ __volatile__(
 250             "cmpunordss %2, %1\n\t"
 251             "movss      %1, %0"
 252             : "=m" (*i1), "+x" (*f1)
 253             : "x" (*f2)
 254             : "cc");
 255 }
 256 
 257 extern __inline__ void
 258 sse_minss(float *f1, float *f2, float *f3)
 259 {
 260         __asm__ __volatile__(
 261             "minss %2, %1\n\t"
 262             "movss %1, %0"
 263             : "=m" (*f3), "+x" (*f1)
 264             : "x" (*f2));
 265 }
 266 
 267 extern __inline__ void
 268 sse_maxss(float *f1, float *f2, float *f3)
 269 {
 270         __asm__ __volatile__(
 271             "maxss %2, %1\n\t"
 272             "movss %1, %0"
 273             : "=m" (*f3), "+x" (*f1)
 274             : "x" (*f2));
 275 }
 276 
 277 extern __inline__ void
 278 sse_addss(float *f1, float *f2, float *f3)
 279 {
 280         __asm__ __volatile__(
 281             "addss %2, %1\n\t"
 282             "movss %1, %0"
 283             : "=m" (*f3), "+x" (*f1)
 284             : "x" (*f2));
 285 }
 286 
 287 extern __inline__ void
 288 sse_subss(float *f1, float *f2, float *f3)
 289 {
 290         __asm__ __volatile__(
 291             "subss %2, %1\n\t"
 292             "movss %1, %0"
 293             : "=m" (*f3), "+x" (*f1)
 294             : "x" (*f2));
 295 }
 296 
 297 extern __inline__ void
 298 sse_mulss(float *f1, float *f2, float *f3)
 299 {
 300         __asm__ __volatile__(
 301             "mulss %2, %1\n\t"
 302             "movss %1, %0"
 303             : "=m" (*f3), "+x" (*f1)
 304             : "x" (*f2));
 305 }
 306 
 307 extern __inline__ void
 308 sse_divss(float *f1, float *f2, float *f3)
 309 {
 310         __asm__ __volatile__(
 311             "divss %2, %1\n\t"
 312             "movss %1, %0"
 313             : "=m" (*f3), "+x" (*f1)
 314             : "x" (*f2));
 315 }
 316 
 317 extern __inline__ void
 318 sse_sqrtss(float *f1, float *f2)
 319 {
 320         double tmp;
 321 
 322         __asm__ __volatile__(
 323             "sqrtss %2, %1\n\t"
 324             "movss  %1, %0"
 325             : "=m" (*f2), "=x" (tmp)
 326             : "m" (*f1));
 327 }
 328 
 329 extern __inline__ void
 330 sse_ucomiss(float *f1, float *f2)
 331 {
 332         __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2));
 333 
 334 }
 335 
 336 extern __inline__ void
 337 sse_comiss(float *f1, float *f2)
 338 {
 339         __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2));
 340 }
 341 
 342 extern __inline__ void
 343 sse_cvtss2sd(float *f1, double *d1)
 344 {
 345         double tmp;
 346 
 347         __asm__ __volatile__(
 348             "cvtss2sd %2, %1\n\t"
 349             "movsd    %1, %0"
 350             : "=m" (*d1), "=x" (tmp)
 351             : "m" (*f1));
 352 }
 353 
 354 extern __inline__ void
 355 sse_cvtsi2ss(int *i1, float *f1)
 356 {
 357         double tmp;
 358 
 359         __asm__ __volatile__(
 360             "cvtsi2ss %2, %1\n\t"
 361             "movss    %1, %0"
 362             : "=m" (*f1), "=x" (tmp)
 363             : "m" (*i1));
 364 }
 365 
 366 extern __inline__ void
 367 sse_cvttss2si(float *f1, int *i1)
 368 {
 369         int tmp;
 370 
 371         __asm__ __volatile__(
 372             "cvttss2si %2, %1\n\t"
 373             "movl      %1, %0"
 374             : "=m" (*i1), "=r" (tmp)
 375             : "m" (*f1));
 376 }
 377 
 378 extern __inline__ void
 379 sse_cvtss2si(float *f1, int *i1)
 380 {
 381         int tmp;
 382 
 383         __asm__ __volatile__(
 384             "cvtss2si %2, %1\n\t"
 385             "movl     %1, %0"
 386             : "=m" (*i1), "=r" (tmp)
 387             : "m" (*f1));
 388 }
 389 
 390 #if defined(__amd64)
 391 extern __inline__ void
 392 sse_cvtsi2ssq(long long *ll1, float *f1)
 393 {
 394         double tmp;
 395 
 396         __asm__ __volatile__(
 397             "cvtsi2ssq %2, %1\n\t"
 398             "movss     %1, %0"
 399             : "=m" (*f1), "=x" (tmp)
 400             : "m" (*ll1));
 401 }
 402 
 403 extern __inline__ void
 404 sse_cvttss2siq(float *f1, long long *ll1)
 405 {
 406         uint64_t tmp;
 407 
 408         __asm__ __volatile__(
 409             "cvttss2siq %2, %1\n\t"
 410             "movq       %1, %0"
 411             : "=m" (*ll1), "=r" (tmp)
 412             : "m" (*f1));
 413 }
 414 
 415 extern __inline__ void
 416 sse_cvtss2siq(float *f1, long long *ll1)
 417 {
 418         uint64_t tmp;
 419 
 420         __asm__ __volatile__(
 421             "cvtss2siq %2, %1\n\t"
 422             "movq      %1, %0"
 423             : "=m" (*ll1), "=r" (tmp)
 424             : "m" (*f1));
 425 }
 426 
 427 #endif
 428 
 429 extern __inline__ void
 430 sse_cmpeqsd(double *d1, double *d2, long long *ll1)
 431 {
 432         __asm__ __volatile__(
 433             "cmpeqsd %2,%1\n\t"
 434             "movsd   %1,%0"
 435             : "=m" (*ll1), "=x" (*d1)
 436             : "x" (*d2));
 437 }
 438 
 439 extern __inline__ void
 440 sse_cmpltsd(double *d1, double *d2, long long *ll1)
 441 {
 442         __asm__ __volatile__(
 443             "cmpltsd %2,%1\n\t"
 444             "movsd   %1,%0"
 445             : "=m" (*ll1), "=x" (*d1)
 446             : "x" (*d2));
 447 }
 448 
 449 extern __inline__ void
 450 sse_cmplesd(double *d1, double *d2, long long *ll1)
 451 {
 452         __asm__ __volatile__(
 453             "cmplesd %2,%1\n\t"
 454             "movsd   %1,%0"
 455             : "=m" (*ll1), "=x" (*d1)
 456             : "x" (*d2));
 457 }
 458 
 459 extern __inline__ void
 460 sse_cmpunordsd(double *d1, double *d2, long long *ll1)
 461 {
 462         __asm__ __volatile__(
 463             "cmpunordsd %2,%1\n\t"
 464             "movsd      %1,%0"
 465             : "=m" (*ll1), "=x" (*d1)
 466             : "x" (*d2));
 467 }
 468 
 469 
 470 extern __inline__ void
 471 sse_minsd(double *d1, double *d2, double *d3)
 472 {
 473         __asm__ __volatile__(
 474             "minsd %2,%1\n\t"
 475             "movsd %1,%0"
 476             : "=m" (*d3), "=x" (*d1)
 477             : "x" (*d2));
 478 }
 479 
 480 extern __inline__ void
 481 sse_maxsd(double *d1, double *d2, double *d3)
 482 {
 483         __asm__ __volatile__(
 484             "maxsd %2,%1\n\t"
 485             "movsd %1,%0"
 486             : "=m" (*d3), "=x" (*d1)
 487             : "x" (*d2));
 488 }
 489 
 490 extern __inline__ void
 491 sse_addsd(double *d1, double *d2, double *d3)
 492 {
 493         __asm__ __volatile__(
 494             "addsd %2,%1\n\t"
 495             "movsd %1,%0"
 496             : "=m" (*d3), "=x" (*d1)
 497             : "x" (*d2));
 498 }
 499 
 500 extern __inline__ void
 501 sse_subsd(double *d1, double *d2, double *d3)
 502 {
 503         __asm__ __volatile__(
 504             "subsd %2,%1\n\t"
 505             "movsd %1,%0"
 506             : "=m" (*d3), "=x" (*d1)
 507             : "x" (*d2));
 508 }
 509 
 510 extern __inline__ void
 511 sse_mulsd(double *d1, double *d2, double *d3)
 512 {
 513         __asm__ __volatile__(
 514             "mulsd %2,%1\n\t"
 515             "movsd %1,%0"
 516             : "=m" (*d3), "=x" (*d1)
 517             : "x" (*d2));
 518 }
 519 
 520 extern __inline__ void
 521 sse_divsd(double *d1, double *d2, double *d3)
 522 {
 523         __asm__ __volatile__(
 524             "divsd %2,%1\n\t"
 525             "movsd %1,%0"
 526             : "=m" (*d3), "=x" (*d1)
 527             : "x" (*d2));
 528 }
 529 
 530 extern __inline__ void
 531 sse_sqrtsd(double *d1, double *d2)
 532 {
 533         double tmp;
 534 
 535         __asm__ __volatile__(
 536             "sqrtsd %2, %1\n\t"
 537             "movsd %1, %0"
 538             : "=m" (*d2), "=x" (tmp)
 539             : "m" (*d1));
 540 }
 541 
 542 extern __inline__ void
 543 sse_ucomisd(double *d1, double *d2)
 544 {
 545         __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2));
 546 }
 547 
 548 extern __inline__ void
 549 sse_comisd(double *d1, double *d2)
 550 {
 551         __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2));
 552 }
 553 
 554 extern __inline__ void
 555 sse_cvtsd2ss(double *d1, float *f1)
 556 {
 557         double tmp;
 558 
 559         __asm__ __volatile__(
 560             "cvtsd2ss %2,%1\n\t"
 561             "movss    %1,%0"
 562             : "=m" (*f1), "=x" (tmp)
 563             : "m" (*d1));
 564 }
 565 
 566 extern __inline__ void
 567 sse_cvtsi2sd(int *i1, double *d1)
 568 {
 569         double tmp;
 570         __asm__ __volatile__(
 571             "cvtsi2sd %2,%1\n\t"
 572             "movsd    %1,%0"
 573             : "=m" (*d1), "=x" (tmp)
 574             : "m" (*i1));
 575 }
 576 
 577 extern __inline__ void
 578 sse_cvttsd2si(double *d1, int *i1)
 579 {
 580         int tmp;
 581 
 582         __asm__ __volatile__(
 583             "cvttsd2si %2,%1\n\t"
 584             "movl      %1,%0"
 585             : "=m" (*i1), "=r" (tmp)
 586             : "m" (*d1));
 587 }
 588 
 589 extern __inline__ void
 590 sse_cvtsd2si(double *d1, int *i1)
 591 {
 592         int tmp;
 593 
 594         __asm__ __volatile__(
 595             "cvtsd2si %2,%1\n\t"
 596             "movl     %1,%0"
 597             : "=m" (*i1), "=r" (tmp)
 598             : "m" (*d1));
 599 }
 600 
 601 #if defined(__amd64)
 602 extern __inline__ void
 603 sse_cvtsi2sdq(long long *ll1, double *d1)
 604 {
 605         double tmp;
 606 
 607         __asm__ __volatile__(
 608             "cvtsi2sdq %2,%1\n\t"
 609             "movsd     %1,%0"
 610             : "=m" (*d1), "=x" (tmp)
 611             : "m" (*ll1));
 612 }
 613 
 614 extern __inline__ void
 615 sse_cvttsd2siq(double *d1, long long *ll1)
 616 {
 617         uint64_t tmp;
 618 
 619         __asm__ __volatile__(
 620             "cvttsd2siq %2,%1\n\t"
 621             "movq       %1,%0"
 622             : "=m" (*ll1), "=r" (tmp)
 623             : "m" (*d1));
 624 }
 625 
 626 extern __inline__ void
 627 sse_cvtsd2siq(double *d1, long long *ll1)
 628 {
 629         uint64_t tmp;
 630 
 631         __asm__ __volatile__(
 632             "cvtsd2siq %2,%1\n\t"
 633             "movq      %1,%0"
 634             : "=m" (*ll1), "=r" (tmp)
 635             : "m" (*d1));
 636 }
 637 #endif
 638 
 639 #elif defined(__sparc)
 640 extern __inline__ void
 641 __fenv_getfsr(unsigned long *l)
 642 {
 643         __asm__ __volatile__(
 644 #if defined(__sparcv9)
 645                 "stx %%fsr,%0\n\t"
 646 #else
 647                 "st  %%fsr,%0\n\t"
 648 #endif
 649                 : "=m" (*l));
 650 }
 651 
 652 extern __inline__ void
 653 __fenv_setfsr(const unsigned long *l)
 654 {
 655         __asm__ __volatile__(
 656 #if defined(__sparcv9)
 657                 "ldx %0,%%fsr\n\t"
 658 #else
 659                 "ld %0,%%fsr\n\t"
 660 #endif
 661                 : : "m" (*l) : "cc");
 662 }
 663 
 664 extern __inline__ void
 665 __fenv_getfsr32(unsigned int *l)
 666 {
 667         __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l));
 668 }
 669 
 670 extern __inline__ void
 671 __fenv_setfsr32(const unsigned int *l)
 672 {
 673         __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l));
 674 }
 675 #else
 676 #error "GCC FENV inlines not implemented for this platform"
 677 #endif
 678 
 679 #ifdef __cplusplus
 680 }
 681 #endif
 682 
 683 #endif  /* __GNUC__ */
 684 
 685 #endif /* _FENV_INLINES_H */