1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2011, Richard Lowe
  14  */
  15 
  16 #ifndef _FENV_INLINES_H
  17 #define _FENV_INLINES_H
  18 
  19 #ifdef __GNUC__
  20 
  21 #ifdef __cplusplus
  22 extern "C" {
  23 #endif
  24 
  25 #include <sys/types.h>
  26 
  27 #if defined(__x86)
  28 
  29 /*
  30  * Floating point Control Word and Status Word
  31  * Definition should actually be shared with x86
  32  * (much of this 'amd64' code can be, in fact.)
  33  */
  34 union fp_cwsw {
  35         uint32_t cwsw;
  36         struct {
  37                 uint16_t cw;
  38                 uint16_t sw;
  39         } words;
  40 };
  41 
  42 extern __inline__ void
  43 __fenv_getcwsw(unsigned int *value)
  44 {
  45         union fp_cwsw ret;
  46 
  47         __asm__ __volatile__(
  48             "fstsw %0\n\t"
  49             "fstcw %1\n\t"
  50             : "=m" (ret.words.cw), "=m" (ret.words.sw));
  51         *value = ret.cwsw;
  52 }
  53 
  54 extern __inline__ void
  55 __fenv_setcwsw(const unsigned int *value)
  56 {
  57         union fp_cwsw cwsw;
  58         short fenv[16];
  59 
  60         cwsw.cwsw = *value;
  61 
  62         __asm__ __volatile__(
  63             "fstenv %0\n\t"
  64             "movw   %4,%1\n\t"
  65             "movw   %3,%2\n\t"
  66             "fldenv %0\n\t"
  67             "fwait\n\t"
  68             : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2])
  69             : "d" (cwsw.words.cw), "c" (cwsw.words.sw)
  70             /* For practical purposes, we clobber the whole FPU */
  71             : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)",
  72               "st(6)", "st(7)");
  73 }
  74 
  75 extern __inline__ void
  76 __fenv_getmxcsr(unsigned int *value)
  77 {
  78         __asm__ __volatile__("stmxcsr %1" : "+m" (*value));
  79 }
  80 
  81 extern __inline__ void
  82 __fenv_setmxcsr(const unsigned int *value)
  83 {
  84         __asm__ __volatile__("ldmxcsr %0" : : "m" (*value));
  85 }
  86 
  87 extern __inline__ long double
  88 f2xm1(long double x)
  89 {
  90         long double ret;
  91 
  92         __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x));
  93         return (ret);
  94 }
  95 
  96 extern __inline__ long double
  97 fyl2x(long double y, long double x)
  98 {
  99         long double ret;
 100 
 101         __asm__ __volatile__("fyl2x" : "=t" (ret): "0" (x), "u" (y) : "st(1)");
 102         return (ret);
 103 }
 104 
 105 extern __inline__ long double
 106 fptan(long double x)
 107 {
 108         /*
 109          * fptan pushes 1.0 then the result on completion, so we want to pop
 110          * the FP stack twice, so we need a dummy value into which to pop it.
 111          */
 112         long double ret;
 113         long double dummy;
 114 
 115         __asm__ __volatile__("fptan" : "=t" (dummy), "=u" (ret) : "0" (x));
 116         return (ret);
 117 }
 118 
 119 extern __inline__ long double
 120 fpatan(long double x, long double y)
 121 {
 122         long double ret;
 123 
 124         __asm__ __volatile__("fpatan"
 125             : "=t" (ret)
 126             : "0" (y), "u" (x)
 127             : "st(1)");
 128         return (ret);
 129 }
 130 
 131 extern __inline__ long double
 132 fxtract(long double x)
 133 {
 134         long double ret;
 135 
 136         __asm__ __volatile__("fxtract" : "=t" (ret) : "0" (x));
 137         return (ret);
 138 }
 139 
 140 extern __inline__ long double
 141 fprem1(long double idend, long double div)
 142 {
 143         long double ret;
 144 
 145         __asm__ __volatile__("fprem1" : "=t" (ret) : "0" (div), "u" (idend));
 146         return (ret);
 147 }
 148 
 149 extern __inline__ long double
 150 fprem(long double idend, long double div)
 151 {
 152         long double ret;
 153 
 154         __asm__ __volatile__("fprem" : "=t" (ret) : "0" (div), "u" (idend));
 155         return (ret);
 156 }
 157 
 158 extern __inline__ long double
 159 fyl2xp1(long double y, long double x)
 160 {
 161         long double ret;
 162 
 163         __asm__ __volatile__("fyl2xp1"
 164             : "=t" (ret)
 165             : "0" (x), "u" (y)
 166             : "st(1)");
 167         return (ret);
 168 }
 169 
 170 extern __inline__ long double
 171 fsqrt(long double x)
 172 {
 173         long double ret;
 174 
 175         __asm__ __volatile__("fsqrt" : "=t" (ret) : "0" (x));
 176         return (ret);
 177 }
 178 
 179 extern __inline__ long double
 180 fsincos(long double x)
 181 {
 182         long double ret;
 183 
 184         __asm__ __volatile__("fsincos" : "=t" (ret) : "0" (x));
 185         return (ret);
 186 }
 187 
 188 extern __inline__ long double
 189 frndint(long double x)
 190 {
 191         long double ret;
 192 
 193         __asm__ __volatile__("frndint" : "=t" (ret) : "0" (x));
 194         return (ret);
 195 }
 196 
 197 extern __inline__ long double
 198 fscale(long double x, long double y)
 199 {
 200         long double ret;
 201 
 202         __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x));
 203         return (ret);
 204 }
 205 
 206 extern __inline__ long double
 207 fsin(long double x)
 208 {
 209         long double ret;
 210 
 211         __asm__ __volatile__("fsin" : "=t" (ret) : "0" (x));
 212         return (ret);
 213 }
 214 
 215 extern __inline__ long double
 216 fcos(long double x)
 217 {
 218         long double ret;
 219 
 220         __asm__ __volatile__("fcos" : "=t" (ret) : "0" (x));
 221         return (ret);
 222 }
 223 
 224 extern __inline__ void
 225 sse_cmpeqss(float *f1, float *f2, int *i1)
 226 {
 227         __asm__ __volatile__(
 228             "cmpeqss %2, %1\n\t"
 229             "movss   %1, %0"
 230             : "=m" (*i1)
 231             : "x" (*f1), "x" (*f2));
 232 }
 233 
 234 extern __inline__ void
 235 sse_cmpltss(float *f1, float *f2, int *i1)
 236 {
 237         __asm__ __volatile__(
 238             "cmpltss %2, %1\n\t"
 239             "movss   %1, %0"
 240             : "=m" (*i1)
 241             : "x" (*f1), "x" (*f2));
 242 }
 243 
 244 extern __inline__ void
 245 sse_cmpless(float *f1, float *f2, int *i1)
 246 {
 247         __asm__ __volatile__(
 248             "cmpless %2, %1\n\t"
 249             "movss   %1, %0"
 250             : "=m" (*i1)
 251             : "x" (*f1), "x" (*f2));
 252 }
 253 
 254 extern __inline__ void
 255 sse_cmpunordss(float *f1, float *f2, int *i1)
 256 {
 257         __asm__ __volatile__(
 258             "cmpunordss %2, %1\n\t"
 259             "movss      %1, %0"
 260             : "=m" (*i1)
 261             : "x" (*f1), "x" (*f2));
 262 }
 263 
 264 extern __inline__ void
 265 sse_minss(float *f1, float *f2, float *f3)
 266 {
 267         __asm__ __volatile__(
 268             "minss %2, %1\n\t"
 269             "movss %1, %0"
 270             : "=m" (*f3)
 271             : "x" (*f1), "x" (*f2));
 272 }
 273 
 274 extern __inline__ void
 275 sse_maxss(float *f1, float *f2, float *f3)
 276 {
 277         __asm__ __volatile__(
 278             "maxss %2, %1\n\t"
 279             "movss %1, %0"
 280             : "=m" (*f3)
 281             : "x" (*f1), "x" (*f2));
 282 }
 283 
 284 extern __inline__ void
 285 sse_addss(float *f1, float *f2, float *f3)
 286 {
 287         __asm__ __volatile__(
 288             "addss %2, %1\n\t"
 289             "movss %1, %0"
 290             : "=m" (*f3)
 291             : "x" (*f1), "x" (*f2));
 292 }
 293 
 294 extern __inline__ void
 295 sse_subss(float *f1, float *f2, float *f3)
 296 {
 297         __asm__ __volatile__(
 298             "subss %2, %1\n\t"
 299             "movss %1, %0"
 300             : "=m" (*f3)
 301             : "x" (*f1), "x" (*f2));
 302 }
 303 
 304 extern __inline__ void
 305 sse_mulss(float *f1, float *f2, float *f3)
 306 {
 307         __asm__ __volatile__(
 308             "mulss %2, %1\n\t"
 309             "movss %1, %0"
 310             : "=m" (*f3)
 311             : "x" (*f1), "x" (*f2));
 312 }
 313 
 314 extern __inline__ void
 315 sse_divss(float *f1, float *f2, float *f3)
 316 {
 317         __asm__ __volatile__(
 318             "divss %2, %1\n\t"
 319             "movss %1, %0"
 320             : "=m" (*f3)
 321             : "x" (*f1), "x" (*f2));
 322 }
 323 
 324 extern __inline__ void
 325 sse_sqrtss(float *f1, float *f2)
 326 {
 327         __asm__ __volatile__(
 328             "sqrtss %1, %%xmm0\n\t"
 329             "movss  %%xmm0, %0"
 330             : "=m" (*f2)
 331             : "m" (*f1)
 332             : "xmm0");
 333 }
 334 
 335 extern __inline__ void
 336 sse_ucomiss(float *f1, float *f2)
 337 {
 338         __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2));
 339 
 340 }
 341 
 342 extern __inline__ void
 343 sse_comiss(float *f1, float *f2)
 344 {
 345         __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2));
 346 }
 347 
 348 extern __inline__ void
 349 sse_cvtss2sd(float *f1, double *d1)
 350 {
 351         __asm__ __volatile__(
 352             "cvtss2sd %1, %%xmm0\n\t"
 353             "movsd    %%xmm0, %0"
 354             : "=m" (*d1)
 355             : "m" (*f1)
 356             : "xmm0");
 357 }
 358 
 359 extern __inline__ void
 360 sse_cvtsi2ss(int *i1, float *f1)
 361 {
 362         __asm__ __volatile__(
 363             "cvtsi2ss %1, %%xmm0\n\t"
 364             "movss    %%xmm0, %0"
 365             : "=m" (*f1)
 366             : "m" (*i1)
 367             : "xmm0");
 368 }
 369 
 370 extern __inline__ void
 371 sse_cvttss2si(float *f1, int *i1)
 372 {
 373         __asm__ __volatile__(
 374             "cvttss2si %1, %%ecx\n\t"
 375             "movl      %%ecx, %0"
 376             : "=m" (*i1)
 377             : "m" (*f1)
 378             : "ecx");
 379 }
 380 
 381 extern __inline__ void
 382 sse_cvtss2si(float *f1, int *i1)
 383 {
 384         __asm__ __volatile__(
 385             "cvtss2si %1, %%ecx\n\t"
 386             "movl     %%ecx, %0"
 387             : "=m" (*i1)
 388             : "m" (*f1)
 389             : "ecx");
 390 }
 391 
 392 #if defined(__amd64)
 393 extern __inline__ void
 394 sse_cvtsi2ssq(long long *ll1, float *f1)
 395 {
 396         __asm__ __volatile__(
 397             "cvtsi2ssq %1, %%xmm0\n\t"
 398             "movss     %%xmm0, %0"
 399             : "=m" (*f1)
 400             : "m" (*ll1)
 401             : "xmm0");
 402 }
 403 
 404 extern __inline__ void
 405 sse_cvttss2siq(float *f1, long long *ll1)
 406 {
 407         __asm__ __volatile__(
 408             "cvttss2siq %1, %%rcx\n\t"
 409             "movq       %%rcx, %0"
 410             : "=m" (*ll1)
 411             : "m" (*f1)
 412             : "rcx");
 413 }
 414 
 415 extern __inline__ void
 416 sse_cvtss2siq(float *f1, long long *ll1)
 417 {
 418         __asm__ __volatile__(
 419             "cvtss2siq %1, %%rcx\n\t"
 420             "movq      %%rcx, %0"
 421             : "=m" (*ll1)
 422             : "m" (*f1)
 423             : "rcx");
 424 }
 425 
 426 #endif
 427 
 428 extern __inline__ void
 429 sse_cmpeqsd(double *d1, double *d2, long long *ll1)
 430 {
 431         __asm__ __volatile__(
 432             "cmpeqsd %2,%1\n\t"
 433             "movsd   %1,%0"
 434             : "=m" (*ll1)
 435             : "x" (*d1), "x" (*d2));
 436 }
 437 
 438 extern __inline__ void
 439 sse_cmpltsd(double *d1, double *d2, long long *ll1)
 440 {
 441         __asm__ __volatile__(
 442             "cmpltsd %2,%1\n\t"
 443             "movsd   %1,%0"
 444             : "=m" (*ll1)
 445             : "x" (*d1), "x" (*d2));
 446 }
 447 
 448 extern __inline__ void
 449 sse_cmplesd(double *d1, double *d2, long long *ll1)
 450 {
 451         __asm__ __volatile__(
 452             "cmplesd %2,%1\n\t"
 453             "movsd   %1,%0"
 454             : "=m" (*ll1)
 455             : "x" (*d1), "x" (*d2));
 456 }
 457 
 458 extern __inline__ void
 459 sse_cmpunordsd(double *d1, double *d2, long long *ll1)
 460 {
 461         __asm__ __volatile__(
 462             "cmpunordsd %2,%1\n\t"
 463             "movsd      %1,%0"
 464             : "=m" (*ll1)
 465             : "x" (*d1), "x" (*d2));
 466 }
 467 
 468 
 469 extern __inline__ void
 470 sse_minsd(double *d1, double *d2, double *d3)
 471 {
 472         __asm__ __volatile__(
 473             "minsd %2,%1\n\t"
 474             "movsd %1,%0"
 475             : "=m" (*d3)
 476             : "x" (*d1), "x" (*d2));
 477 }
 478 
 479 extern __inline__ void
 480 sse_maxsd(double *d1, double *d2, double *d3)
 481 {
 482         __asm__ __volatile__(
 483             "maxsd %2,%1\n\t"
 484             "movsd %1,%0"
 485             : "=m" (*d3)
 486             : "x" (*d1), "x" (*d2));
 487 }
 488 
 489 extern __inline__ void
 490 sse_addsd(double *d1, double *d2, double *d3)
 491 {
 492         __asm__ __volatile__(
 493             "addsd %2,%1\n\t"
 494             "movsd %1,%0"
 495             : "=m" (*d3)
 496             : "x" (*d1), "x" (*d2));
 497 }
 498 
 499 extern __inline__ void
 500 sse_subsd(double *d1, double *d2, double *d3)
 501 {
 502         __asm__ __volatile__(
 503             "subsd %2,%1\n\t"
 504             "movsd %1,%0"
 505             : "=m" (*d3)
 506             : "x" (*d1), "x" (*d2));
 507 }
 508 
 509 extern __inline__ void
 510 sse_mulsd(double *d1, double *d2, double *d3)
 511 {
 512         __asm__ __volatile__(
 513             "mulsd %2,%1\n\t"
 514             "movsd %1,%0"
 515             : "=m" (*d3)
 516             : "x" (*d1), "x" (*d2));
 517 }
 518 
 519 extern __inline__ void
 520 sse_divsd(double *d1, double *d2, double *d3)
 521 {
 522         __asm__ __volatile__(
 523             "divsd %2,%1\n\t"
 524             "movsd %1,%0"
 525             : "=m" (*d3)
 526             : "x" (*d1), "x" (*d2)
 527             : "xmm0");
 528 }
 529 
 530 extern __inline__ void
 531 sse_sqrtsd(double *d1, double *d2)
 532 {
 533         __asm__ __volatile__(
 534             "sqrtsd %1, %%xmm0\n\t"
 535             "movsd %%xmm0, %0"
 536             : "=m" (*d2)
 537             : "m" (*d1)
 538             : "xmm0");
 539 }
 540 
 541 extern __inline__ void
 542 sse_ucomisd(double *d1, double *d2)
 543 {
 544         __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2));
 545 }
 546 
 547 extern __inline__ void
 548 sse_comisd(double *d1, double *d2)
 549 {
 550         __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2));
 551 }
 552 
 553 extern __inline__ void
 554 sse_cvtsd2ss(double *d1, float *f1)
 555 {
 556         __asm__ __volatile__(
 557             "cvtsd2ss %1,%%xmm0\n\t"
 558             "movss    %%xmm0,%0"
 559             : "=m" (*f1)
 560             : "m" (*d1)
 561             : "xmm0");
 562 }
 563 
 564 
 565 extern __inline__ void
 566 sse_cvtsi2sd(int *i1, double *d1)
 567 {
 568         __asm__ __volatile__(
 569             "cvtsi2sd %1,%%xmm0\n\t"
 570             "movsd    %%xmm0,%0"
 571             : "=m" (*d1)
 572             : "m" (*i1)
 573             : "xmm0");
 574 }
 575 
 576 extern __inline__ void
 577 sse_cvttsd2si(double *d1, int *i1)
 578 {
 579         __asm__ __volatile__(
 580             "cvttsd2si %1,%%ecx\n\t"
 581             "movl      %%ecx,%0"
 582             : "=m" (*i1)
 583             : "m" (*d1)
 584             : "ecx");
 585 }
 586 
 587 extern __inline__ void
 588 sse_cvtsd2si(double *d1, int *i1)
 589 {
 590         __asm__ __volatile__(
 591             "cvtsd2si %1,%%ecx\n\t"
 592             "movl     %%ecx,%0"
 593             : "=m" (*i1)
 594             : "m" (*d1)
 595             : "ecx");
 596 }
 597 
 598 #if defined(__amd64)
 599 extern __inline__ void
 600 sse_cvtsi2sdq(long long *ll1, double *d1)
 601 {
 602         __asm__ __volatile__(
 603             "cvtsi2sdq %1,%%xmm0\n\t"
 604             "movsd     %%xmm0,%0"
 605             : "=m" (*d1)
 606             : "m" (*ll1)
 607             : "xmm0");
 608 }
 609 
 610 extern __inline__ void
 611 sse_cvttsd2siq(double *d1, long long *ll1)
 612 {
 613         __asm__ __volatile__(
 614             "cvttsd2siq %1,%%rcx\n\t"
 615             "movq       %%rcx,%0"
 616             : "=m" (*ll1)
 617             : "m" (*d1)
 618             : "rcx");
 619 }
 620 
 621 extern __inline__ void
 622 sse_cvtsd2siq(double *d1, long long *ll1)
 623 {
 624         __asm__ __volatile__(
 625             "cvtsd2siq %1,%%rcx\n\t"
 626             "movq      %%rcx,%0"
 627             : "=m" (*ll1)
 628             : "m" (*d1)
 629             : "rcx");
 630 }
 631 #endif
 632 #elif defined(__sparc)
 633 extern __inline__ void
 634 __fenv_getfsr(unsigned long *l)
 635 {
 636     __asm__ __volatile__(
 637 #if defined(__sparcv9)
 638         "stx %%fsr,%0\n\t"
 639 #else
 640         "st  %%fsr,%0\n\t"
 641 #endif
 642         : "=m" (*l));
 643 }
 644 
 645 extern __inline__ void
 646 __fenv_setfsr(const unsigned long *l)
 647 {
 648     __asm__ __volatile__(
 649 #if defined(__sparcv9)
 650         "ldx %0,%%fsr\n\t"
 651 #else
 652         "ld %0,%%fsr\n\t"
 653 #endif
 654         : : "m" (*l));
 655 }
 656 
 657 extern __inline__ void
 658 __fenv_getfsr32(unsigned int *l)
 659 {
 660     __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l));
 661 }
 662 
 663 extern __inline__ void
 664 __fenv_setfsr32(const unsigned int *l)
 665 {
 666     __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l));
 667 }
 668 #else
 669 #error "GCC FENV inlines not implemented for this platform"
 670 #endif
 671 
 672 #ifdef __cplusplus
 673 }
 674 #endif
 675 
 676 #endif  /* __GNUC__ */
 677 
 678 #endif /* _FENV_INLINES_H */