Print this page




  25 #include <sys/types.h>
  26 
  27 #if defined(__x86)
  28 
  29 /*
  30  * Floating point Control Word and Status Word
  31  * Definition should actually be shared with x86
  32  * (much of this 'amd64' code can be, in fact.)
  33  */
  34 union fp_cwsw {
  35         uint32_t cwsw;
  36         struct {
  37                 uint16_t cw;
  38                 uint16_t sw;
  39         } words;
  40 };
  41 
  42 extern __inline__ void
  43 __fenv_getcwsw(unsigned int *value)
  44 {
  45         union fp_cwsw ret;
  46 
  47         __asm__ __volatile__(
  48             "fstsw %0\n\t"
  49             "fstcw %1\n\t"
  50             : "=m" (ret.words.cw), "=m" (ret.words.sw));
  51         *value = ret.cwsw;
  52 }
  53 
  54 extern __inline__ void
  55 __fenv_setcwsw(const unsigned int *value)
  56 {
  57         union fp_cwsw cwsw;
  58         short fenv[16];
  59 
  60         cwsw.cwsw = *value;
  61 
  62         __asm__ __volatile__(
  63             "fstenv %0\n\t"
  64             "movw   %4,%1\n\t"
  65             "movw   %3,%2\n\t"
  66             "fldenv %0\n\t"
  67             "fwait\n\t"
  68             : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2])
  69             : "d" (cwsw.words.cw), "c" (cwsw.words.sw)
  70             /* For practical purposes, we clobber the whole FPU */
  71             : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)",
  72               "st(6)", "st(7)");
  73 }
  74 
  75 extern __inline__ void
  76 __fenv_getmxcsr(unsigned int *value)
  77 {
  78         __asm__ __volatile__("stmxcsr %1" : "+m" (*value));
  79 }
  80 
  81 extern __inline__ void
  82 __fenv_setmxcsr(const unsigned int *value)
  83 {
  84         __asm__ __volatile__("ldmxcsr %0" : : "m" (*value));
  85 }
  86 
  87 extern __inline__ long double
  88 f2xm1(long double x)
  89 {
  90         long double ret;
  91 
  92         __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x));
  93         return (ret);
  94 }
  95 
  96 extern __inline__ long double
  97 fyl2x(long double y, long double x)
  98 {
  99         long double ret;
 100 
 101         __asm__ __volatile__("fyl2x" : "=t" (ret): "0" (x), "u" (y) : "st(1)");



 102         return (ret);
 103 }
 104 
 105 extern __inline__ long double
 106 fptan(long double x)
 107 {
 108         /*
 109          * fptan pushes 1.0 then the result on completion, so we want to pop
 110          * the FP stack twice, so we need a dummy value into which to pop it.
 111          */
 112         long double ret;
 113         long double dummy;
 114 
 115         __asm__ __volatile__("fptan" : "=t" (dummy), "=u" (ret) : "0" (x));



 116         return (ret);
 117 }
 118 
 119 extern __inline__ long double
 120 fpatan(long double x, long double y)
 121 {
 122         long double ret;
 123 
 124         __asm__ __volatile__("fpatan"
 125             : "=t" (ret)
 126             : "0" (y), "u" (x)
 127             : "st(1)");
 128         return (ret);
 129 }
 130 
 131 extern __inline__ long double
 132 fxtract(long double x)
 133 {
 134         long double ret;
 135 
 136         __asm__ __volatile__("fxtract" : "=t" (ret) : "0" (x));
 137         return (ret);
 138 }
 139 
 140 extern __inline__ long double
 141 fprem1(long double idend, long double div)
 142 {
 143         long double ret;
 144 
 145         __asm__ __volatile__("fprem1" : "=t" (ret) : "0" (div), "u" (idend));
 146         return (ret);
 147 }
 148 
 149 extern __inline__ long double
 150 fprem(long double idend, long double div)
 151 {
 152         long double ret;
 153 
 154         __asm__ __volatile__("fprem" : "=t" (ret) : "0" (div), "u" (idend));
 155         return (ret);
 156 }
 157 
 158 extern __inline__ long double
 159 fyl2xp1(long double y, long double x)
 160 {
 161         long double ret;
 162 
 163         __asm__ __volatile__("fyl2xp1"
 164             : "=t" (ret)
 165             : "0" (x), "u" (y)
 166             : "st(1)");
 167         return (ret);
 168 }
 169 
 170 extern __inline__ long double
 171 fsqrt(long double x)
 172 {
 173         long double ret;
 174 
 175         __asm__ __volatile__("fsqrt" : "=t" (ret) : "0" (x));
 176         return (ret);
 177 }
 178 
 179 extern __inline__ long double
 180 fsincos(long double x)
 181 {
 182         long double ret;
 183 
 184         __asm__ __volatile__("fsincos" : "=t" (ret) : "0" (x));
 185         return (ret);
 186 }
 187 
 188 extern __inline__ long double
 189 frndint(long double x)
 190 {
 191         long double ret;
 192 
 193         __asm__ __volatile__("frndint" : "=t" (ret) : "0" (x));
 194         return (ret);
 195 }
 196 
 197 extern __inline__ long double
 198 fscale(long double x, long double y)
 199 {
 200         long double ret;
 201 
 202         __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x));
 203         return (ret);
 204 }
 205 
 206 extern __inline__ long double
 207 fsin(long double x)
 208 {
 209         long double ret;
 210 
 211         __asm__ __volatile__("fsin" : "=t" (ret) : "0" (x));
 212         return (ret);
 213 }
 214 
 215 extern __inline__ long double
 216 fcos(long double x)
 217 {
 218         long double ret;
 219 
 220         __asm__ __volatile__("fcos" : "=t" (ret) : "0" (x));
 221         return (ret);
 222 }
 223 
 224 extern __inline__ void
 225 sse_cmpeqss(float *f1, float *f2, int *i1)
 226 {
 227         __asm__ __volatile__(
 228             "cmpeqss %2, %1\n\t"
 229             "movss   %1, %0"
 230             : "=m" (*i1)
 231             : "x" (*f1), "x" (*f2));

 232 }
 233 
 234 extern __inline__ void
 235 sse_cmpltss(float *f1, float *f2, int *i1)
 236 {
 237         __asm__ __volatile__(
 238             "cmpltss %2, %1\n\t"
 239             "movss   %1, %0"
 240             : "=m" (*i1)
 241             : "x" (*f1), "x" (*f2));

 242 }
 243 
 244 extern __inline__ void
 245 sse_cmpless(float *f1, float *f2, int *i1)
 246 {
 247         __asm__ __volatile__(
 248             "cmpless %2, %1\n\t"
 249             "movss   %1, %0"
 250             : "=m" (*i1)
 251             : "x" (*f1), "x" (*f2));

 252 }
 253 
 254 extern __inline__ void
 255 sse_cmpunordss(float *f1, float *f2, int *i1)
 256 {
 257         __asm__ __volatile__(
 258             "cmpunordss %2, %1\n\t"
 259             "movss      %1, %0"
 260             : "=m" (*i1)
 261             : "x" (*f1), "x" (*f2));

 262 }
 263 
 264 extern __inline__ void
 265 sse_minss(float *f1, float *f2, float *f3)
 266 {
 267         __asm__ __volatile__(
 268             "minss %2, %1\n\t"
 269             "movss %1, %0"
 270             : "=m" (*f3)
 271             : "x" (*f1), "x" (*f2));
 272 }
 273 
 274 extern __inline__ void
 275 sse_maxss(float *f1, float *f2, float *f3)
 276 {
 277         __asm__ __volatile__(
 278             "maxss %2, %1\n\t"
 279             "movss %1, %0"
 280             : "=m" (*f3)
 281             : "x" (*f1), "x" (*f2));
 282 }
 283 
 284 extern __inline__ void
 285 sse_addss(float *f1, float *f2, float *f3)
 286 {
 287         __asm__ __volatile__(
 288             "addss %2, %1\n\t"
 289             "movss %1, %0"
 290             : "=m" (*f3)
 291             : "x" (*f1), "x" (*f2));
 292 }
 293 
 294 extern __inline__ void
 295 sse_subss(float *f1, float *f2, float *f3)
 296 {
 297         __asm__ __volatile__(
 298             "subss %2, %1\n\t"
 299             "movss %1, %0"
 300             : "=m" (*f3)
 301             : "x" (*f1), "x" (*f2));
 302 }
 303 
 304 extern __inline__ void
 305 sse_mulss(float *f1, float *f2, float *f3)
 306 {
 307         __asm__ __volatile__(
 308             "mulss %2, %1\n\t"
 309             "movss %1, %0"
 310             : "=m" (*f3)
 311             : "x" (*f1), "x" (*f2));
 312 }
 313 
 314 extern __inline__ void
 315 sse_divss(float *f1, float *f2, float *f3)
 316 {
 317         __asm__ __volatile__(
 318             "divss %2, %1\n\t"
 319             "movss %1, %0"
 320             : "=m" (*f3)
 321             : "x" (*f1), "x" (*f2));
 322 }
 323 
 324 extern __inline__ void
 325 sse_sqrtss(float *f1, float *f2)
 326 {


 327         __asm__ __volatile__(
 328             "sqrtss %1, %%xmm0\n\t"
 329             "movss  %%xmm0, %0"
 330             : "=m" (*f2)
 331             : "m" (*f1)
 332             : "xmm0");
 333 }
 334 
 335 extern __inline__ void
 336 sse_ucomiss(float *f1, float *f2)
 337 {
 338         __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2));
 339 
 340 }
 341 
 342 extern __inline__ void
 343 sse_comiss(float *f1, float *f2)
 344 {
 345         __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2));
 346 }
 347 
 348 extern __inline__ void
 349 sse_cvtss2sd(float *f1, double *d1)
 350 {


 351         __asm__ __volatile__(
 352             "cvtss2sd %1, %%xmm0\n\t"
 353             "movsd    %%xmm0, %0"
 354             : "=m" (*d1)
 355             : "m" (*f1)
 356             : "xmm0");
 357 }
 358 
 359 extern __inline__ void
 360 sse_cvtsi2ss(int *i1, float *f1)
 361 {


 362         __asm__ __volatile__(
 363             "cvtsi2ss %1, %%xmm0\n\t"
 364             "movss    %%xmm0, %0"
 365             : "=m" (*f1)
 366             : "m" (*i1)
 367             : "xmm0");
 368 }
 369 
 370 extern __inline__ void
 371 sse_cvttss2si(float *f1, int *i1)
 372 {


 373         __asm__ __volatile__(
 374             "cvttss2si %1, %%ecx\n\t"
 375             "movl      %%ecx, %0"
 376             : "=m" (*i1)
 377             : "m" (*f1)
 378             : "ecx");
 379 }
 380 
 381 extern __inline__ void
 382 sse_cvtss2si(float *f1, int *i1)
 383 {


 384         __asm__ __volatile__(
 385             "cvtss2si %1, %%ecx\n\t"
 386             "movl     %%ecx, %0"
 387             : "=m" (*i1)
 388             : "m" (*f1)
 389             : "ecx");
 390 }
 391 
 392 #if defined(__amd64)
 393 extern __inline__ void
 394 sse_cvtsi2ssq(long long *ll1, float *f1)
 395 {


 396         __asm__ __volatile__(
 397             "cvtsi2ssq %1, %%xmm0\n\t"
 398             "movss     %%xmm0, %0"
 399             : "=m" (*f1)
 400             : "m" (*ll1)
 401             : "xmm0");
 402 }
 403 
 404 extern __inline__ void
 405 sse_cvttss2siq(float *f1, long long *ll1)
 406 {


 407         __asm__ __volatile__(
 408             "cvttss2siq %1, %%rcx\n\t"
 409             "movq       %%rcx, %0"
 410             : "=m" (*ll1)
 411             : "m" (*f1)
 412             : "rcx");
 413 }
 414 
 415 extern __inline__ void
 416 sse_cvtss2siq(float *f1, long long *ll1)
 417 {


 418         __asm__ __volatile__(
 419             "cvtss2siq %1, %%rcx\n\t"
 420             "movq      %%rcx, %0"
 421             : "=m" (*ll1)
 422             : "m" (*f1)
 423             : "rcx");
 424 }
 425 
 426 #endif
 427 
 428 extern __inline__ void
 429 sse_cmpeqsd(double *d1, double *d2, long long *ll1)
 430 {
 431         __asm__ __volatile__(
 432             "cmpeqsd %2,%1\n\t"
 433             "movsd   %1,%0"
 434             : "=m" (*ll1)
 435             : "x" (*d1), "x" (*d2));
 436 }
 437 
 438 extern __inline__ void
 439 sse_cmpltsd(double *d1, double *d2, long long *ll1)
 440 {
 441         __asm__ __volatile__(
 442             "cmpltsd %2,%1\n\t"
 443             "movsd   %1,%0"
 444             : "=m" (*ll1)
 445             : "x" (*d1), "x" (*d2));
 446 }
 447 
 448 extern __inline__ void
 449 sse_cmplesd(double *d1, double *d2, long long *ll1)
 450 {
 451         __asm__ __volatile__(
 452             "cmplesd %2,%1\n\t"
 453             "movsd   %1,%0"
 454             : "=m" (*ll1)
 455             : "x" (*d1), "x" (*d2));
 456 }
 457 
 458 extern __inline__ void
 459 sse_cmpunordsd(double *d1, double *d2, long long *ll1)
 460 {
 461         __asm__ __volatile__(
 462             "cmpunordsd %2,%1\n\t"
 463             "movsd      %1,%0"
 464             : "=m" (*ll1)
 465             : "x" (*d1), "x" (*d2));
 466 }
 467 
 468 
 469 extern __inline__ void
 470 sse_minsd(double *d1, double *d2, double *d3)
 471 {
 472         __asm__ __volatile__(
 473             "minsd %2,%1\n\t"
 474             "movsd %1,%0"
 475             : "=m" (*d3)
 476             : "x" (*d1), "x" (*d2));
 477 }
 478 
 479 extern __inline__ void
 480 sse_maxsd(double *d1, double *d2, double *d3)
 481 {
 482         __asm__ __volatile__(
 483             "maxsd %2,%1\n\t"
 484             "movsd %1,%0"
 485             : "=m" (*d3)
 486             : "x" (*d1), "x" (*d2));
 487 }
 488 
 489 extern __inline__ void
 490 sse_addsd(double *d1, double *d2, double *d3)
 491 {
 492         __asm__ __volatile__(
 493             "addsd %2,%1\n\t"
 494             "movsd %1,%0"
 495             : "=m" (*d3)
 496             : "x" (*d1), "x" (*d2));
 497 }
 498 
 499 extern __inline__ void
 500 sse_subsd(double *d1, double *d2, double *d3)
 501 {
 502         __asm__ __volatile__(
 503             "subsd %2,%1\n\t"
 504             "movsd %1,%0"
 505             : "=m" (*d3)
 506             : "x" (*d1), "x" (*d2));
 507 }
 508 
 509 extern __inline__ void
 510 sse_mulsd(double *d1, double *d2, double *d3)
 511 {
 512         __asm__ __volatile__(
 513             "mulsd %2,%1\n\t"
 514             "movsd %1,%0"
 515             : "=m" (*d3)
 516             : "x" (*d1), "x" (*d2));
 517 }
 518 
 519 extern __inline__ void
 520 sse_divsd(double *d1, double *d2, double *d3)
 521 {
 522         __asm__ __volatile__(
 523             "divsd %2,%1\n\t"
 524             "movsd %1,%0"
 525             : "=m" (*d3)
 526             : "x" (*d1), "x" (*d2)
 527             : "xmm0");
 528 }
 529 
 530 extern __inline__ void
 531 sse_sqrtsd(double *d1, double *d2)
 532 {


 533         __asm__ __volatile__(
 534             "sqrtsd %1, %%xmm0\n\t"
 535             "movsd %%xmm0, %0"
 536             : "=m" (*d2)
 537             : "m" (*d1)
 538             : "xmm0");
 539 }
 540 
 541 extern __inline__ void
 542 sse_ucomisd(double *d1, double *d2)
 543 {
 544         __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2));
 545 }
 546 
 547 extern __inline__ void
 548 sse_comisd(double *d1, double *d2)
 549 {
 550         __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2));
 551 }
 552 
 553 extern __inline__ void
 554 sse_cvtsd2ss(double *d1, float *f1)
 555 {


 556         __asm__ __volatile__(
 557             "cvtsd2ss %1,%%xmm0\n\t"
 558             "movss    %%xmm0,%0"
 559             : "=m" (*f1)
 560             : "m" (*d1)
 561             : "xmm0");
 562 }
 563 
 564 
 565 extern __inline__ void
 566 sse_cvtsi2sd(int *i1, double *d1)
 567 {

 568         __asm__ __volatile__(
 569             "cvtsi2sd %1,%%xmm0\n\t"
 570             "movsd    %%xmm0,%0"
 571             : "=m" (*d1)
 572             : "m" (*i1)
 573             : "xmm0");
 574 }
 575 
 576 extern __inline__ void
 577 sse_cvttsd2si(double *d1, int *i1)
 578 {


 579         __asm__ __volatile__(
 580             "cvttsd2si %1,%%ecx\n\t"
 581             "movl      %%ecx,%0"
 582             : "=m" (*i1)
 583             : "m" (*d1)
 584             : "ecx");
 585 }
 586 
 587 extern __inline__ void
 588 sse_cvtsd2si(double *d1, int *i1)
 589 {


 590         __asm__ __volatile__(
 591             "cvtsd2si %1,%%ecx\n\t"
 592             "movl     %%ecx,%0"
 593             : "=m" (*i1)
 594             : "m" (*d1)
 595             : "ecx");
 596 }
 597 
 598 #if defined(__amd64)
 599 extern __inline__ void
 600 sse_cvtsi2sdq(long long *ll1, double *d1)
 601 {


 602         __asm__ __volatile__(
 603             "cvtsi2sdq %1,%%xmm0\n\t"
 604             "movsd     %%xmm0,%0"
 605             : "=m" (*d1)
 606             : "m" (*ll1)
 607             : "xmm0");
 608 }
 609 
 610 extern __inline__ void
 611 sse_cvttsd2siq(double *d1, long long *ll1)
 612 {


 613         __asm__ __volatile__(
 614             "cvttsd2siq %1,%%rcx\n\t"
 615             "movq       %%rcx,%0"
 616             : "=m" (*ll1)
 617             : "m" (*d1)
 618             : "rcx");
 619 }
 620 
 621 extern __inline__ void
 622 sse_cvtsd2siq(double *d1, long long *ll1)
 623 {


 624         __asm__ __volatile__(
 625             "cvtsd2siq %1,%%rcx\n\t"
 626             "movq      %%rcx,%0"
 627             : "=m" (*ll1)
 628             : "m" (*d1)
 629             : "rcx");
 630 }
 631 #endif

 632 #elif defined(__sparc)
 633 extern __inline__ void
 634 __fenv_getfsr(unsigned long *l)
 635 {
 636     __asm__ __volatile__(
 637 #if defined(__sparcv9)
 638         "stx %%fsr,%0\n\t"
 639 #else
 640         "st  %%fsr,%0\n\t"
 641 #endif
 642         : "=m" (*l));
 643 }
 644 
 645 extern __inline__ void
 646 __fenv_setfsr(const unsigned long *l)
 647 {
 648     __asm__ __volatile__(
 649 #if defined(__sparcv9)
 650         "ldx %0,%%fsr\n\t"
 651 #else
 652         "ld %0,%%fsr\n\t"
 653 #endif
 654         : : "m" (*l));
 655 }
 656 
 657 extern __inline__ void
 658 __fenv_getfsr32(unsigned int *l)
 659 {
 660     __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l));
 661 }
 662 
 663 extern __inline__ void
 664 __fenv_setfsr32(const unsigned int *l)
 665 {
 666     __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l));
 667 }
 668 #else
 669 #error "GCC FENV inlines not implemented for this platform"
 670 #endif
 671 
 672 #ifdef __cplusplus
 673 }
 674 #endif


  25 #include <sys/types.h>
  26 
  27 #if defined(__x86)
  28 
  29 /*
  30  * Floating point Control Word and Status Word
  31  * Definition should actually be shared with x86
  32  * (much of this 'amd64' code can be, in fact.)
  33  */
  34 union fp_cwsw {
  35         uint32_t cwsw;
  36         struct {
  37                 uint16_t cw;
  38                 uint16_t sw;
  39         } words;
  40 };
  41 
  42 extern __inline__ void
  43 __fenv_getcwsw(unsigned int *value)
  44 {
  45         union fp_cwsw *u = (union fp_cwsw *)value;
  46 
  47         __asm__ __volatile__(
  48             "fstsw %0\n\t"
  49             "fstcw %1\n\t"
  50             : "=m" (u->words.cw), "=m" (u->words.sw));

  51 }
  52 
  53 extern __inline__ void
  54 __fenv_setcwsw(const unsigned int *value)
  55 {
  56         union fp_cwsw cwsw;
  57         short fenv[16];
  58 
  59         cwsw.cwsw = *value;
  60 
  61         __asm__ __volatile__(
  62             "fstenv %0\n\t"
  63             "movw   %4,%1\n\t"
  64             "movw   %3,%2\n\t"
  65             "fldenv %0\n\t"
  66             "fwait\n\t"
  67             : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2])
  68             : "r" (cwsw.words.cw), "r" (cwsw.words.sw)
  69             /* For practical purposes, we clobber the whole FPU */
  70             : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)",
  71               "st(6)", "st(7)");
  72 }
  73 
  74 extern __inline__ void
  75 __fenv_getmxcsr(unsigned int *value)
  76 {
  77         __asm__ __volatile__("stmxcsr %0" : "=m" (*value));
  78 }
  79 
  80 extern __inline__ void
  81 __fenv_setmxcsr(const unsigned int *value)
  82 {
  83         __asm__ __volatile__("ldmxcsr %0" : : "m" (*value));
  84 }
  85 
  86 extern __inline__ long double
  87 f2xm1(long double x)
  88 {
  89         long double ret;
  90 
  91         __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc");
  92         return (ret);
  93 }
  94 
  95 extern __inline__ long double
  96 fyl2x(long double y, long double x)
  97 {
  98         long double ret;
  99 
 100         __asm__ __volatile__("fyl2x"
 101             : "=t" (ret)
 102             : "0" (x), "u" (y)
 103             : "st(1)", "cc");
 104         return (ret);
 105 }
 106 
 107 extern __inline__ long double
 108 fptan(long double x)
 109 {
 110         /*
 111          * fptan pushes 1.0 then the result on completion, so we want to pop
 112          * the FP stack twice, so we need a dummy value into which to pop it.
 113          */
 114         long double ret;
 115         long double dummy;
 116 
 117         __asm__ __volatile__("fptan"
 118             : "=t" (dummy), "=u" (ret)
 119             : "0" (x)
 120             : "cc");
 121         return (ret);
 122 }
 123 
 124 extern __inline__ long double
 125 fpatan(long double x, long double y)
 126 {
 127         long double ret;
 128 
 129         __asm__ __volatile__("fpatan"
 130             : "=t" (ret)
 131             : "0" (y), "u" (x)
 132             : "st(1)", "cc");
 133         return (ret);
 134 }
 135 
 136 extern __inline__ long double
 137 fxtract(long double x)
 138 {
 139         __asm__ __volatile__("fxtract" : "+t" (x) : : "cc");
 140         return (x);


 141 }
 142 
 143 extern __inline__ long double
 144 fprem1(long double idend, long double div)
 145 {
 146         __asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc");
 147         return (div);


 148 }
 149 
 150 extern __inline__ long double
 151 fprem(long double idend, long double div)
 152 {
 153         __asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc");
 154         return (div);


 155 }
 156 
 157 extern __inline__ long double
 158 fyl2xp1(long double y, long double x)
 159 {
 160         long double ret;
 161 
 162         __asm__ __volatile__("fyl2xp1"
 163             : "=t" (ret)
 164             : "0" (x), "u" (y)
 165             : "st(1)", "cc");
 166         return (ret);
 167 }
 168 
 169 extern __inline__ long double
 170 fsqrt(long double x)
 171 {
 172         __asm__ __volatile__("fsqrt" : "+t" (x) : : "cc");
 173         return (x);


 174 }
 175 
 176 extern __inline__ long double
 177 fsincos(long double x)
 178 {
 179         __asm__ __volatile__("fsincos" : "+t" (x) : : "cc");
 180         return (x);


 181 }
 182 
 183 extern __inline__ long double
 184 frndint(long double x)
 185 {
 186         __asm__ __volatile__("frndint" : "+t" (x) : : "cc");
 187         return (x);


 188 }
 189 
 190 extern __inline__ long double
 191 fscale(long double x, long double y)
 192 {
 193         long double ret;
 194 
 195         __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc");
 196         return (ret);
 197 }
 198 
 199 extern __inline__ long double
 200 fsin(long double x)
 201 {
 202         __asm__ __volatile__("fsin" : "+t" (x) : : "cc");
 203         return (x);


 204 }
 205 
 206 extern __inline__ long double
 207 fcos(long double x)
 208 {
 209         __asm__ __volatile__("fcos" : "+t" (x) : : "cc");
 210         return (x);


 211 }
 212 
 213 extern __inline__ void
 214 sse_cmpeqss(float *f1, float *f2, int *i1)
 215 {
 216         __asm__ __volatile__(
 217             "cmpeqss %2, %1\n\t"
 218             "movss   %1, %0"
 219             : "=m" (*i1), "+x" (*f1)
 220             : "x" (*f2)
 221             : "cc");
 222 }
 223 
 224 extern __inline__ void
 225 sse_cmpltss(float *f1, float *f2, int *i1)
 226 {
 227         __asm__ __volatile__(
 228             "cmpltss %2, %1\n\t"
 229             "movss   %1, %0"
 230             : "=m" (*i1), "+x" (*f1)
 231             : "x" (*f2)
 232             : "cc");
 233 }
 234 
 235 extern __inline__ void
 236 sse_cmpless(float *f1, float *f2, int *i1)
 237 {
 238         __asm__ __volatile__(
 239             "cmpless %2, %1\n\t"
 240             "movss   %1, %0"
 241             : "=m" (*i1), "+x" (*f1)
 242             : "x" (*f2)
 243             : "cc");
 244 }
 245 
 246 extern __inline__ void
 247 sse_cmpunordss(float *f1, float *f2, int *i1)
 248 {
 249         __asm__ __volatile__(
 250             "cmpunordss %2, %1\n\t"
 251             "movss      %1, %0"
 252             : "=m" (*i1), "+x" (*f1)
 253             : "x" (*f2)
 254             : "cc");
 255 }
 256 
 257 extern __inline__ void
 258 sse_minss(float *f1, float *f2, float *f3)
 259 {
 260         __asm__ __volatile__(
 261             "minss %2, %1\n\t"
 262             "movss %1, %0"
 263             : "=m" (*f3), "+x" (*f1)
 264             : "x" (*f2));
 265 }
 266 
 267 extern __inline__ void
 268 sse_maxss(float *f1, float *f2, float *f3)
 269 {
 270         __asm__ __volatile__(
 271             "maxss %2, %1\n\t"
 272             "movss %1, %0"
 273             : "=m" (*f3), "+x" (*f1)
 274             : "x" (*f2));
 275 }
 276 
 277 extern __inline__ void
 278 sse_addss(float *f1, float *f2, float *f3)
 279 {
 280         __asm__ __volatile__(
 281             "addss %2, %1\n\t"
 282             "movss %1, %0"
 283             : "=m" (*f3), "+x" (*f1)
 284             : "x" (*f2));
 285 }
 286 
 287 extern __inline__ void
 288 sse_subss(float *f1, float *f2, float *f3)
 289 {
 290         __asm__ __volatile__(
 291             "subss %2, %1\n\t"
 292             "movss %1, %0"
 293             : "=m" (*f3), "+x" (*f1)
 294             : "x" (*f2));
 295 }
 296 
 297 extern __inline__ void
 298 sse_mulss(float *f1, float *f2, float *f3)
 299 {
 300         __asm__ __volatile__(
 301             "mulss %2, %1\n\t"
 302             "movss %1, %0"
 303             : "=m" (*f3), "+x" (*f1)
 304             : "x" (*f2));
 305 }
 306 
 307 extern __inline__ void
 308 sse_divss(float *f1, float *f2, float *f3)
 309 {
 310         __asm__ __volatile__(
 311             "divss %2, %1\n\t"
 312             "movss %1, %0"
 313             : "=m" (*f3), "+x" (*f1)
 314             : "x" (*f2));
 315 }
 316 
 317 extern __inline__ void
 318 sse_sqrtss(float *f1, float *f2)
 319 {
 320         double tmp;
 321 
 322         __asm__ __volatile__(
 323             "sqrtss %2, %1\n\t"
 324             "movss  %1, %0"
 325             : "=m" (*f2), "=x" (tmp)
 326             : "m" (*f1));

 327 }
 328 
 329 extern __inline__ void
 330 sse_ucomiss(float *f1, float *f2)
 331 {
 332         __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2));
 333 
 334 }
 335 
 336 extern __inline__ void
 337 sse_comiss(float *f1, float *f2)
 338 {
 339         __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2));
 340 }
 341 
 342 extern __inline__ void
 343 sse_cvtss2sd(float *f1, double *d1)
 344 {
 345         double tmp;
 346 
 347         __asm__ __volatile__(
 348             "cvtss2sd %2, %1\n\t"
 349             "movsd    %1, %0"
 350             : "=m" (*d1), "=x" (tmp)
 351             : "m" (*f1));

 352 }
 353 
 354 extern __inline__ void
 355 sse_cvtsi2ss(int *i1, float *f1)
 356 {
 357         double tmp;
 358 
 359         __asm__ __volatile__(
 360             "cvtsi2ss %2, %1\n\t"
 361             "movss    %1, %0"
 362             : "=m" (*f1), "=x" (tmp)
 363             : "m" (*i1));

 364 }
 365 
 366 extern __inline__ void
 367 sse_cvttss2si(float *f1, int *i1)
 368 {
 369         int tmp;
 370 
 371         __asm__ __volatile__(
 372             "cvttss2si %2, %1\n\t"
 373             "movl      %1, %0"
 374             : "=m" (*i1), "=r" (tmp)
 375             : "m" (*f1));

 376 }
 377 
 378 extern __inline__ void
 379 sse_cvtss2si(float *f1, int *i1)
 380 {
 381         int tmp;
 382 
 383         __asm__ __volatile__(
 384             "cvtss2si %2, %1\n\t"
 385             "movl     %1, %0"
 386             : "=m" (*i1), "=r" (tmp)
 387             : "m" (*f1));

 388 }
 389 
 390 #if defined(__amd64)
 391 extern __inline__ void
 392 sse_cvtsi2ssq(long long *ll1, float *f1)
 393 {
 394         double tmp;
 395 
 396         __asm__ __volatile__(
 397             "cvtsi2ssq %2, %1\n\t"
 398             "movss     %1, %0"
 399             : "=m" (*f1), "=x" (tmp)
 400             : "m" (*ll1));

 401 }
 402 
 403 extern __inline__ void
 404 sse_cvttss2siq(float *f1, long long *ll1)
 405 {
 406         uint64_t tmp;
 407 
 408         __asm__ __volatile__(
 409             "cvttss2siq %2, %1\n\t"
 410             "movq       %1, %0"
 411             : "=m" (*ll1), "=r" (tmp)
 412             : "m" (*f1));

 413 }
 414 
 415 extern __inline__ void
 416 sse_cvtss2siq(float *f1, long long *ll1)
 417 {
 418         uint64_t tmp;
 419 
 420         __asm__ __volatile__(
 421             "cvtss2siq %2, %1\n\t"
 422             "movq      %1, %0"
 423             : "=m" (*ll1), "=r" (tmp)
 424             : "m" (*f1));

 425 }
 426 
 427 #endif
 428 
 429 extern __inline__ void
 430 sse_cmpeqsd(double *d1, double *d2, long long *ll1)
 431 {
 432         __asm__ __volatile__(
 433             "cmpeqsd %2,%1\n\t"
 434             "movsd   %1,%0"
 435             : "=m" (*ll1), "=x" (*d1)
 436             : "x" (*d2));
 437 }
 438 
 439 extern __inline__ void
 440 sse_cmpltsd(double *d1, double *d2, long long *ll1)
 441 {
 442         __asm__ __volatile__(
 443             "cmpltsd %2,%1\n\t"
 444             "movsd   %1,%0"
 445             : "=m" (*ll1), "=x" (*d1)
 446             : "x" (*d2));
 447 }
 448 
 449 extern __inline__ void
 450 sse_cmplesd(double *d1, double *d2, long long *ll1)
 451 {
 452         __asm__ __volatile__(
 453             "cmplesd %2,%1\n\t"
 454             "movsd   %1,%0"
 455             : "=m" (*ll1), "=x" (*d1)
 456             : "x" (*d2));
 457 }
 458 
 459 extern __inline__ void
 460 sse_cmpunordsd(double *d1, double *d2, long long *ll1)
 461 {
 462         __asm__ __volatile__(
 463             "cmpunordsd %2,%1\n\t"
 464             "movsd      %1,%0"
 465             : "=m" (*ll1), "=x" (*d1)
 466             : "x" (*d2));
 467 }
 468 
 469 
 470 extern __inline__ void
 471 sse_minsd(double *d1, double *d2, double *d3)
 472 {
 473         __asm__ __volatile__(
 474             "minsd %2,%1\n\t"
 475             "movsd %1,%0"
 476             : "=m" (*d3), "=x" (*d1)
 477             : "x" (*d2));
 478 }
 479 
 480 extern __inline__ void
 481 sse_maxsd(double *d1, double *d2, double *d3)
 482 {
 483         __asm__ __volatile__(
 484             "maxsd %2,%1\n\t"
 485             "movsd %1,%0"
 486             : "=m" (*d3), "=x" (*d1)
 487             : "x" (*d2));
 488 }
 489 
 490 extern __inline__ void
 491 sse_addsd(double *d1, double *d2, double *d3)
 492 {
 493         __asm__ __volatile__(
 494             "addsd %2,%1\n\t"
 495             "movsd %1,%0"
 496             : "=m" (*d3), "=x" (*d1)
 497             : "x" (*d2));
 498 }
 499 
 500 extern __inline__ void
 501 sse_subsd(double *d1, double *d2, double *d3)
 502 {
 503         __asm__ __volatile__(
 504             "subsd %2,%1\n\t"
 505             "movsd %1,%0"
 506             : "=m" (*d3), "=x" (*d1)
 507             : "x" (*d2));
 508 }
 509 
 510 extern __inline__ void
 511 sse_mulsd(double *d1, double *d2, double *d3)
 512 {
 513         __asm__ __volatile__(
 514             "mulsd %2,%1\n\t"
 515             "movsd %1,%0"
 516             : "=m" (*d3), "=x" (*d1)
 517             : "x" (*d2));
 518 }
 519 
 520 extern __inline__ void
 521 sse_divsd(double *d1, double *d2, double *d3)
 522 {
 523         __asm__ __volatile__(
 524             "divsd %2,%1\n\t"
 525             "movsd %1,%0"
 526             : "=m" (*d3), "=x" (*d1)
 527             : "x" (*d2));

 528 }
 529 
 530 extern __inline__ void
 531 sse_sqrtsd(double *d1, double *d2)
 532 {
 533         double tmp;
 534 
 535         __asm__ __volatile__(
 536             "sqrtsd %2, %1\n\t"
 537             "movsd %1, %0"
 538             : "=m" (*d2), "=x" (tmp)
 539             : "m" (*d1));

 540 }
 541 
 542 extern __inline__ void
 543 sse_ucomisd(double *d1, double *d2)
 544 {
 545         __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2));
 546 }
 547 
 548 extern __inline__ void
 549 sse_comisd(double *d1, double *d2)
 550 {
 551         __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2));
 552 }
 553 
 554 extern __inline__ void
 555 sse_cvtsd2ss(double *d1, float *f1)
 556 {
 557         double tmp;
 558 
 559         __asm__ __volatile__(
 560             "cvtsd2ss %2,%1\n\t"
 561             "movss    %1,%0"
 562             : "=m" (*f1), "=x" (tmp)
 563             : "m" (*d1));

 564 }
 565 

 566 extern __inline__ void
 567 sse_cvtsi2sd(int *i1, double *d1)
 568 {
 569         double tmp;
 570         __asm__ __volatile__(
 571             "cvtsi2sd %2,%1\n\t"
 572             "movsd    %1,%0"
 573             : "=m" (*d1), "=x" (tmp)
 574             : "m" (*i1));

 575 }
 576 
 577 extern __inline__ void
 578 sse_cvttsd2si(double *d1, int *i1)
 579 {
 580         int tmp;
 581 
 582         __asm__ __volatile__(
 583             "cvttsd2si %2,%1\n\t"
 584             "movl      %1,%0"
 585             : "=m" (*i1), "=r" (tmp)
 586             : "m" (*d1));

 587 }
 588 
 589 extern __inline__ void
 590 sse_cvtsd2si(double *d1, int *i1)
 591 {
 592         int tmp;
 593 
 594         __asm__ __volatile__(
 595             "cvtsd2si %2,%1\n\t"
 596             "movl     %1,%0"
 597             : "=m" (*i1), "=r" (tmp)
 598             : "m" (*d1));

 599 }
 600 
 601 #if defined(__amd64)
 602 extern __inline__ void
 603 sse_cvtsi2sdq(long long *ll1, double *d1)
 604 {
 605         double tmp;
 606 
 607         __asm__ __volatile__(
 608             "cvtsi2sdq %2,%1\n\t"
 609             "movsd     %1,%0"
 610             : "=m" (*d1), "=x" (tmp)
 611             : "m" (*ll1));

 612 }
 613 
 614 extern __inline__ void
 615 sse_cvttsd2siq(double *d1, long long *ll1)
 616 {
 617         uint64_t tmp;
 618 
 619         __asm__ __volatile__(
 620             "cvttsd2siq %2,%1\n\t"
 621             "movq       %1,%0"
 622             : "=m" (*ll1), "=r" (tmp)
 623             : "m" (*d1));

 624 }
 625 
 626 extern __inline__ void
 627 sse_cvtsd2siq(double *d1, long long *ll1)
 628 {
 629         uint64_t tmp;
 630 
 631         __asm__ __volatile__(
 632             "cvtsd2siq %2,%1\n\t"
 633             "movq      %1,%0"
 634             : "=m" (*ll1), "=r" (tmp)
 635             : "m" (*d1));

 636 }
 637 #endif
 638 
 639 #elif defined(__sparc)
 640 extern __inline__ void
 641 __fenv_getfsr(unsigned long *l)
 642 {
 643         __asm__ __volatile__(
 644 #if defined(__sparcv9)
 645                 "stx %%fsr,%0\n\t"
 646 #else
 647                 "st  %%fsr,%0\n\t"
 648 #endif
 649                 : "=m" (*l));
 650 }
 651 
 652 extern __inline__ void
 653 __fenv_setfsr(const unsigned long *l)
 654 {
 655         __asm__ __volatile__(
 656 #if defined(__sparcv9)
 657                 "ldx %0,%%fsr\n\t"
 658 #else
 659                 "ld %0,%%fsr\n\t"
 660 #endif
 661                 : : "m" (*l) : "cc");
 662 }
 663 
 664 extern __inline__ void
 665 __fenv_getfsr32(unsigned int *l)
 666 {
 667         __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l));
 668 }
 669 
 670 extern __inline__ void
 671 __fenv_setfsr32(const unsigned int *l)
 672 {
 673         __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l));
 674 }
 675 #else
 676 #error "GCC FENV inlines not implemented for this platform"
 677 #endif
 678 
 679 #ifdef __cplusplus
 680 }
 681 #endif