25 #include <sys/types.h> 26 27 #if defined(__x86) 28 29 /* 30 * Floating point Control Word and Status Word 31 * Definition should actually be shared with x86 32 * (much of this 'amd64' code can be, in fact.) 33 */ 34 union fp_cwsw { 35 uint32_t cwsw; 36 struct { 37 uint16_t cw; 38 uint16_t sw; 39 } words; 40 }; 41 42 extern __inline__ void 43 __fenv_getcwsw(unsigned int *value) 44 { 45 union fp_cwsw ret; 46 47 __asm__ __volatile__( 48 "fstsw %0\n\t" 49 "fstcw %1\n\t" 50 : "=m" (ret.words.cw), "=m" (ret.words.sw)); 51 *value = ret.cwsw; 52 } 53 54 extern __inline__ void 55 __fenv_setcwsw(const unsigned int *value) 56 { 57 union fp_cwsw cwsw; 58 short fenv[16]; 59 60 cwsw.cwsw = *value; 61 62 __asm__ __volatile__( 63 "fstenv %0\n\t" 64 "movw %4,%1\n\t" 65 "movw %3,%2\n\t" 66 "fldenv %0\n\t" 67 "fwait\n\t" 68 : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2]) 69 : "d" (cwsw.words.cw), "c" (cwsw.words.sw) 70 /* For practical purposes, we clobber the whole FPU */ 71 : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", 72 "st(6)", "st(7)"); 73 } 74 75 extern __inline__ void 76 __fenv_getmxcsr(unsigned int *value) 77 { 78 __asm__ __volatile__("stmxcsr %1" : "+m" (*value)); 79 } 80 81 extern __inline__ void 82 __fenv_setmxcsr(const unsigned int *value) 83 { 84 __asm__ __volatile__("ldmxcsr %0" : : "m" (*value)); 85 } 86 87 extern __inline__ long double 88 f2xm1(long double x) 89 { 90 long double ret; 91 92 __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x)); 93 return (ret); 94 } 95 96 extern __inline__ long double 97 fyl2x(long double y, long double x) 98 { 99 long double ret; 100 101 __asm__ __volatile__("fyl2x" : "=t" (ret): "0" (x), "u" (y) : "st(1)"); 102 return (ret); 103 } 104 105 extern __inline__ long double 106 fptan(long double x) 107 { 108 /* 109 * fptan pushes 1.0 then the result on completion, so we want to pop 110 * the FP stack twice, so we need a dummy value into which to pop it. 111 */ 112 long double ret; 113 long double dummy; 114 115 __asm__ __volatile__("fptan" : "=t" (dummy), "=u" (ret) : "0" (x)); 116 return (ret); 117 } 118 119 extern __inline__ long double 120 fpatan(long double x, long double y) 121 { 122 long double ret; 123 124 __asm__ __volatile__("fpatan" 125 : "=t" (ret) 126 : "0" (y), "u" (x) 127 : "st(1)"); 128 return (ret); 129 } 130 131 extern __inline__ long double 132 fxtract(long double x) 133 { 134 long double ret; 135 136 __asm__ __volatile__("fxtract" : "=t" (ret) : "0" (x)); 137 return (ret); 138 } 139 140 extern __inline__ long double 141 fprem1(long double idend, long double div) 142 { 143 long double ret; 144 145 __asm__ __volatile__("fprem1" : "=t" (ret) : "0" (div), "u" (idend)); 146 return (ret); 147 } 148 149 extern __inline__ long double 150 fprem(long double idend, long double div) 151 { 152 long double ret; 153 154 __asm__ __volatile__("fprem" : "=t" (ret) : "0" (div), "u" (idend)); 155 return (ret); 156 } 157 158 extern __inline__ long double 159 fyl2xp1(long double y, long double x) 160 { 161 long double ret; 162 163 __asm__ __volatile__("fyl2xp1" 164 : "=t" (ret) 165 : "0" (x), "u" (y) 166 : "st(1)"); 167 return (ret); 168 } 169 170 extern __inline__ long double 171 fsqrt(long double x) 172 { 173 long double ret; 174 175 __asm__ __volatile__("fsqrt" : "=t" (ret) : "0" (x)); 176 return (ret); 177 } 178 179 extern __inline__ long double 180 fsincos(long double x) 181 { 182 long double ret; 183 184 __asm__ __volatile__("fsincos" : "=t" (ret) : "0" (x)); 185 return (ret); 186 } 187 188 extern __inline__ long double 189 frndint(long double x) 190 { 191 long double ret; 192 193 __asm__ __volatile__("frndint" : "=t" (ret) : "0" (x)); 194 return (ret); 195 } 196 197 extern __inline__ long double 198 fscale(long double x, long double y) 199 { 200 long double ret; 201 202 __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x)); 203 return (ret); 204 } 205 206 extern __inline__ long double 207 fsin(long double x) 208 { 209 long double ret; 210 211 __asm__ __volatile__("fsin" : "=t" (ret) : "0" (x)); 212 return (ret); 213 } 214 215 extern __inline__ long double 216 fcos(long double x) 217 { 218 long double ret; 219 220 __asm__ __volatile__("fcos" : "=t" (ret) : "0" (x)); 221 return (ret); 222 } 223 224 extern __inline__ void 225 sse_cmpeqss(float *f1, float *f2, int *i1) 226 { 227 __asm__ __volatile__( 228 "cmpeqss %2, %1\n\t" 229 "movss %1, %0" 230 : "=m" (*i1) 231 : "x" (*f1), "x" (*f2)); 232 } 233 234 extern __inline__ void 235 sse_cmpltss(float *f1, float *f2, int *i1) 236 { 237 __asm__ __volatile__( 238 "cmpltss %2, %1\n\t" 239 "movss %1, %0" 240 : "=m" (*i1) 241 : "x" (*f1), "x" (*f2)); 242 } 243 244 extern __inline__ void 245 sse_cmpless(float *f1, float *f2, int *i1) 246 { 247 __asm__ __volatile__( 248 "cmpless %2, %1\n\t" 249 "movss %1, %0" 250 : "=m" (*i1) 251 : "x" (*f1), "x" (*f2)); 252 } 253 254 extern __inline__ void 255 sse_cmpunordss(float *f1, float *f2, int *i1) 256 { 257 __asm__ __volatile__( 258 "cmpunordss %2, %1\n\t" 259 "movss %1, %0" 260 : "=m" (*i1) 261 : "x" (*f1), "x" (*f2)); 262 } 263 264 extern __inline__ void 265 sse_minss(float *f1, float *f2, float *f3) 266 { 267 __asm__ __volatile__( 268 "minss %2, %1\n\t" 269 "movss %1, %0" 270 : "=m" (*f3) 271 : "x" (*f1), "x" (*f2)); 272 } 273 274 extern __inline__ void 275 sse_maxss(float *f1, float *f2, float *f3) 276 { 277 __asm__ __volatile__( 278 "maxss %2, %1\n\t" 279 "movss %1, %0" 280 : "=m" (*f3) 281 : "x" (*f1), "x" (*f2)); 282 } 283 284 extern __inline__ void 285 sse_addss(float *f1, float *f2, float *f3) 286 { 287 __asm__ __volatile__( 288 "addss %2, %1\n\t" 289 "movss %1, %0" 290 : "=m" (*f3) 291 : "x" (*f1), "x" (*f2)); 292 } 293 294 extern __inline__ void 295 sse_subss(float *f1, float *f2, float *f3) 296 { 297 __asm__ __volatile__( 298 "subss %2, %1\n\t" 299 "movss %1, %0" 300 : "=m" (*f3) 301 : "x" (*f1), "x" (*f2)); 302 } 303 304 extern __inline__ void 305 sse_mulss(float *f1, float *f2, float *f3) 306 { 307 __asm__ __volatile__( 308 "mulss %2, %1\n\t" 309 "movss %1, %0" 310 : "=m" (*f3) 311 : "x" (*f1), "x" (*f2)); 312 } 313 314 extern __inline__ void 315 sse_divss(float *f1, float *f2, float *f3) 316 { 317 __asm__ __volatile__( 318 "divss %2, %1\n\t" 319 "movss %1, %0" 320 : "=m" (*f3) 321 : "x" (*f1), "x" (*f2)); 322 } 323 324 extern __inline__ void 325 sse_sqrtss(float *f1, float *f2) 326 { 327 __asm__ __volatile__( 328 "sqrtss %1, %%xmm0\n\t" 329 "movss %%xmm0, %0" 330 : "=m" (*f2) 331 : "m" (*f1) 332 : "xmm0"); 333 } 334 335 extern __inline__ void 336 sse_ucomiss(float *f1, float *f2) 337 { 338 __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2)); 339 340 } 341 342 extern __inline__ void 343 sse_comiss(float *f1, float *f2) 344 { 345 __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2)); 346 } 347 348 extern __inline__ void 349 sse_cvtss2sd(float *f1, double *d1) 350 { 351 __asm__ __volatile__( 352 "cvtss2sd %1, %%xmm0\n\t" 353 "movsd %%xmm0, %0" 354 : "=m" (*d1) 355 : "m" (*f1) 356 : "xmm0"); 357 } 358 359 extern __inline__ void 360 sse_cvtsi2ss(int *i1, float *f1) 361 { 362 __asm__ __volatile__( 363 "cvtsi2ss %1, %%xmm0\n\t" 364 "movss %%xmm0, %0" 365 : "=m" (*f1) 366 : "m" (*i1) 367 : "xmm0"); 368 } 369 370 extern __inline__ void 371 sse_cvttss2si(float *f1, int *i1) 372 { 373 __asm__ __volatile__( 374 "cvttss2si %1, %%ecx\n\t" 375 "movl %%ecx, %0" 376 : "=m" (*i1) 377 : "m" (*f1) 378 : "ecx"); 379 } 380 381 extern __inline__ void 382 sse_cvtss2si(float *f1, int *i1) 383 { 384 __asm__ __volatile__( 385 "cvtss2si %1, %%ecx\n\t" 386 "movl %%ecx, %0" 387 : "=m" (*i1) 388 : "m" (*f1) 389 : "ecx"); 390 } 391 392 #if defined(__amd64) 393 extern __inline__ void 394 sse_cvtsi2ssq(long long *ll1, float *f1) 395 { 396 __asm__ __volatile__( 397 "cvtsi2ssq %1, %%xmm0\n\t" 398 "movss %%xmm0, %0" 399 : "=m" (*f1) 400 : "m" (*ll1) 401 : "xmm0"); 402 } 403 404 extern __inline__ void 405 sse_cvttss2siq(float *f1, long long *ll1) 406 { 407 __asm__ __volatile__( 408 "cvttss2siq %1, %%rcx\n\t" 409 "movq %%rcx, %0" 410 : "=m" (*ll1) 411 : "m" (*f1) 412 : "rcx"); 413 } 414 415 extern __inline__ void 416 sse_cvtss2siq(float *f1, long long *ll1) 417 { 418 __asm__ __volatile__( 419 "cvtss2siq %1, %%rcx\n\t" 420 "movq %%rcx, %0" 421 : "=m" (*ll1) 422 : "m" (*f1) 423 : "rcx"); 424 } 425 426 #endif 427 428 extern __inline__ void 429 sse_cmpeqsd(double *d1, double *d2, long long *ll1) 430 { 431 __asm__ __volatile__( 432 "cmpeqsd %2,%1\n\t" 433 "movsd %1,%0" 434 : "=m" (*ll1) 435 : "x" (*d1), "x" (*d2)); 436 } 437 438 extern __inline__ void 439 sse_cmpltsd(double *d1, double *d2, long long *ll1) 440 { 441 __asm__ __volatile__( 442 "cmpltsd %2,%1\n\t" 443 "movsd %1,%0" 444 : "=m" (*ll1) 445 : "x" (*d1), "x" (*d2)); 446 } 447 448 extern __inline__ void 449 sse_cmplesd(double *d1, double *d2, long long *ll1) 450 { 451 __asm__ __volatile__( 452 "cmplesd %2,%1\n\t" 453 "movsd %1,%0" 454 : "=m" (*ll1) 455 : "x" (*d1), "x" (*d2)); 456 } 457 458 extern __inline__ void 459 sse_cmpunordsd(double *d1, double *d2, long long *ll1) 460 { 461 __asm__ __volatile__( 462 "cmpunordsd %2,%1\n\t" 463 "movsd %1,%0" 464 : "=m" (*ll1) 465 : "x" (*d1), "x" (*d2)); 466 } 467 468 469 extern __inline__ void 470 sse_minsd(double *d1, double *d2, double *d3) 471 { 472 __asm__ __volatile__( 473 "minsd %2,%1\n\t" 474 "movsd %1,%0" 475 : "=m" (*d3) 476 : "x" (*d1), "x" (*d2)); 477 } 478 479 extern __inline__ void 480 sse_maxsd(double *d1, double *d2, double *d3) 481 { 482 __asm__ __volatile__( 483 "maxsd %2,%1\n\t" 484 "movsd %1,%0" 485 : "=m" (*d3) 486 : "x" (*d1), "x" (*d2)); 487 } 488 489 extern __inline__ void 490 sse_addsd(double *d1, double *d2, double *d3) 491 { 492 __asm__ __volatile__( 493 "addsd %2,%1\n\t" 494 "movsd %1,%0" 495 : "=m" (*d3) 496 : "x" (*d1), "x" (*d2)); 497 } 498 499 extern __inline__ void 500 sse_subsd(double *d1, double *d2, double *d3) 501 { 502 __asm__ __volatile__( 503 "subsd %2,%1\n\t" 504 "movsd %1,%0" 505 : "=m" (*d3) 506 : "x" (*d1), "x" (*d2)); 507 } 508 509 extern __inline__ void 510 sse_mulsd(double *d1, double *d2, double *d3) 511 { 512 __asm__ __volatile__( 513 "mulsd %2,%1\n\t" 514 "movsd %1,%0" 515 : "=m" (*d3) 516 : "x" (*d1), "x" (*d2)); 517 } 518 519 extern __inline__ void 520 sse_divsd(double *d1, double *d2, double *d3) 521 { 522 __asm__ __volatile__( 523 "divsd %2,%1\n\t" 524 "movsd %1,%0" 525 : "=m" (*d3) 526 : "x" (*d1), "x" (*d2) 527 : "xmm0"); 528 } 529 530 extern __inline__ void 531 sse_sqrtsd(double *d1, double *d2) 532 { 533 __asm__ __volatile__( 534 "sqrtsd %1, %%xmm0\n\t" 535 "movsd %%xmm0, %0" 536 : "=m" (*d2) 537 : "m" (*d1) 538 : "xmm0"); 539 } 540 541 extern __inline__ void 542 sse_ucomisd(double *d1, double *d2) 543 { 544 __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2)); 545 } 546 547 extern __inline__ void 548 sse_comisd(double *d1, double *d2) 549 { 550 __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2)); 551 } 552 553 extern __inline__ void 554 sse_cvtsd2ss(double *d1, float *f1) 555 { 556 __asm__ __volatile__( 557 "cvtsd2ss %1,%%xmm0\n\t" 558 "movss %%xmm0,%0" 559 : "=m" (*f1) 560 : "m" (*d1) 561 : "xmm0"); 562 } 563 564 565 extern __inline__ void 566 sse_cvtsi2sd(int *i1, double *d1) 567 { 568 __asm__ __volatile__( 569 "cvtsi2sd %1,%%xmm0\n\t" 570 "movsd %%xmm0,%0" 571 : "=m" (*d1) 572 : "m" (*i1) 573 : "xmm0"); 574 } 575 576 extern __inline__ void 577 sse_cvttsd2si(double *d1, int *i1) 578 { 579 __asm__ __volatile__( 580 "cvttsd2si %1,%%ecx\n\t" 581 "movl %%ecx,%0" 582 : "=m" (*i1) 583 : "m" (*d1) 584 : "ecx"); 585 } 586 587 extern __inline__ void 588 sse_cvtsd2si(double *d1, int *i1) 589 { 590 __asm__ __volatile__( 591 "cvtsd2si %1,%%ecx\n\t" 592 "movl %%ecx,%0" 593 : "=m" (*i1) 594 : "m" (*d1) 595 : "ecx"); 596 } 597 598 #if defined(__amd64) 599 extern __inline__ void 600 sse_cvtsi2sdq(long long *ll1, double *d1) 601 { 602 __asm__ __volatile__( 603 "cvtsi2sdq %1,%%xmm0\n\t" 604 "movsd %%xmm0,%0" 605 : "=m" (*d1) 606 : "m" (*ll1) 607 : "xmm0"); 608 } 609 610 extern __inline__ void 611 sse_cvttsd2siq(double *d1, long long *ll1) 612 { 613 __asm__ __volatile__( 614 "cvttsd2siq %1,%%rcx\n\t" 615 "movq %%rcx,%0" 616 : "=m" (*ll1) 617 : "m" (*d1) 618 : "rcx"); 619 } 620 621 extern __inline__ void 622 sse_cvtsd2siq(double *d1, long long *ll1) 623 { 624 __asm__ __volatile__( 625 "cvtsd2siq %1,%%rcx\n\t" 626 "movq %%rcx,%0" 627 : "=m" (*ll1) 628 : "m" (*d1) 629 : "rcx"); 630 } 631 #endif 632 #elif defined(__sparc) 633 extern __inline__ void 634 __fenv_getfsr(unsigned long *l) 635 { 636 __asm__ __volatile__( 637 #if defined(__sparcv9) 638 "stx %%fsr,%0\n\t" 639 #else 640 "st %%fsr,%0\n\t" 641 #endif 642 : "=m" (*l)); 643 } 644 645 extern __inline__ void 646 __fenv_setfsr(const unsigned long *l) 647 { 648 __asm__ __volatile__( 649 #if defined(__sparcv9) 650 "ldx %0,%%fsr\n\t" 651 #else 652 "ld %0,%%fsr\n\t" 653 #endif 654 : : "m" (*l)); 655 } 656 657 extern __inline__ void 658 __fenv_getfsr32(unsigned int *l) 659 { 660 __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l)); 661 } 662 663 extern __inline__ void 664 __fenv_setfsr32(const unsigned int *l) 665 { 666 __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l)); 667 } 668 #else 669 #error "GCC FENV inlines not implemented for this platform" 670 #endif 671 672 #ifdef __cplusplus 673 } 674 #endif | 25 #include <sys/types.h> 26 27 #if defined(__x86) 28 29 /* 30 * Floating point Control Word and Status Word 31 * Definition should actually be shared with x86 32 * (much of this 'amd64' code can be, in fact.) 33 */ 34 union fp_cwsw { 35 uint32_t cwsw; 36 struct { 37 uint16_t cw; 38 uint16_t sw; 39 } words; 40 }; 41 42 extern __inline__ void 43 __fenv_getcwsw(unsigned int *value) 44 { 45 union fp_cwsw *u = (union fp_cwsw *)value; 46 47 __asm__ __volatile__( 48 "fstsw %0\n\t" 49 "fstcw %1\n\t" 50 : "=m" (u->words.cw), "=m" (u->words.sw)); 51 } 52 53 extern __inline__ void 54 __fenv_setcwsw(const unsigned int *value) 55 { 56 union fp_cwsw cwsw; 57 short fenv[16]; 58 59 cwsw.cwsw = *value; 60 61 __asm__ __volatile__( 62 "fstenv %0\n\t" 63 "movw %4,%1\n\t" 64 "movw %3,%2\n\t" 65 "fldenv %0\n\t" 66 "fwait\n\t" 67 : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2]) 68 : "r" (cwsw.words.cw), "r" (cwsw.words.sw) 69 /* For practical purposes, we clobber the whole FPU */ 70 : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", 71 "st(6)", "st(7)"); 72 } 73 74 extern __inline__ void 75 __fenv_getmxcsr(unsigned int *value) 76 { 77 __asm__ __volatile__("stmxcsr %0" : "=m" (*value)); 78 } 79 80 extern __inline__ void 81 __fenv_setmxcsr(const unsigned int *value) 82 { 83 __asm__ __volatile__("ldmxcsr %0" : : "m" (*value)); 84 } 85 86 extern __inline__ long double 87 f2xm1(long double x) 88 { 89 long double ret; 90 91 __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc"); 92 return (ret); 93 } 94 95 extern __inline__ long double 96 fyl2x(long double y, long double x) 97 { 98 long double ret; 99 100 __asm__ __volatile__("fyl2x" 101 : "=t" (ret) 102 : "0" (x), "u" (y) 103 : "st(1)", "cc"); 104 return (ret); 105 } 106 107 extern __inline__ long double 108 fptan(long double x) 109 { 110 /* 111 * fptan pushes 1.0 then the result on completion, so we want to pop 112 * the FP stack twice, so we need a dummy value into which to pop it. 113 */ 114 long double ret; 115 long double dummy; 116 117 __asm__ __volatile__("fptan" 118 : "=t" (dummy), "=u" (ret) 119 : "0" (x) 120 : "cc"); 121 return (ret); 122 } 123 124 extern __inline__ long double 125 fpatan(long double x, long double y) 126 { 127 long double ret; 128 129 __asm__ __volatile__("fpatan" 130 : "=t" (ret) 131 : "0" (y), "u" (x) 132 : "st(1)", "cc"); 133 return (ret); 134 } 135 136 extern __inline__ long double 137 fxtract(long double x) 138 { 139 __asm__ __volatile__("fxtract" : "+t" (x) : : "cc"); 140 return (x); 141 } 142 143 extern __inline__ long double 144 fprem1(long double idend, long double div) 145 { 146 __asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc"); 147 return (div); 148 } 149 150 extern __inline__ long double 151 fprem(long double idend, long double div) 152 { 153 __asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc"); 154 return (div); 155 } 156 157 extern __inline__ long double 158 fyl2xp1(long double y, long double x) 159 { 160 long double ret; 161 162 __asm__ __volatile__("fyl2xp1" 163 : "=t" (ret) 164 : "0" (x), "u" (y) 165 : "st(1)", "cc"); 166 return (ret); 167 } 168 169 extern __inline__ long double 170 fsqrt(long double x) 171 { 172 __asm__ __volatile__("fsqrt" : "+t" (x) : : "cc"); 173 return (x); 174 } 175 176 extern __inline__ long double 177 fsincos(long double x) 178 { 179 __asm__ __volatile__("fsincos" : "+t" (x) : : "cc"); 180 return (x); 181 } 182 183 extern __inline__ long double 184 frndint(long double x) 185 { 186 __asm__ __volatile__("frndint" : "+t" (x) : : "cc"); 187 return (x); 188 } 189 190 extern __inline__ long double 191 fscale(long double x, long double y) 192 { 193 long double ret; 194 195 __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc"); 196 return (ret); 197 } 198 199 extern __inline__ long double 200 fsin(long double x) 201 { 202 __asm__ __volatile__("fsin" : "+t" (x) : : "cc"); 203 return (x); 204 } 205 206 extern __inline__ long double 207 fcos(long double x) 208 { 209 __asm__ __volatile__("fcos" : "+t" (x) : : "cc"); 210 return (x); 211 } 212 213 extern __inline__ void 214 sse_cmpeqss(float *f1, float *f2, int *i1) 215 { 216 __asm__ __volatile__( 217 "cmpeqss %2, %1\n\t" 218 "movss %1, %0" 219 : "=m" (*i1), "+x" (*f1) 220 : "x" (*f2) 221 : "cc"); 222 } 223 224 extern __inline__ void 225 sse_cmpltss(float *f1, float *f2, int *i1) 226 { 227 __asm__ __volatile__( 228 "cmpltss %2, %1\n\t" 229 "movss %1, %0" 230 : "=m" (*i1), "+x" (*f1) 231 : "x" (*f2) 232 : "cc"); 233 } 234 235 extern __inline__ void 236 sse_cmpless(float *f1, float *f2, int *i1) 237 { 238 __asm__ __volatile__( 239 "cmpless %2, %1\n\t" 240 "movss %1, %0" 241 : "=m" (*i1), "+x" (*f1) 242 : "x" (*f2) 243 : "cc"); 244 } 245 246 extern __inline__ void 247 sse_cmpunordss(float *f1, float *f2, int *i1) 248 { 249 __asm__ __volatile__( 250 "cmpunordss %2, %1\n\t" 251 "movss %1, %0" 252 : "=m" (*i1), "+x" (*f1) 253 : "x" (*f2) 254 : "cc"); 255 } 256 257 extern __inline__ void 258 sse_minss(float *f1, float *f2, float *f3) 259 { 260 __asm__ __volatile__( 261 "minss %2, %1\n\t" 262 "movss %1, %0" 263 : "=m" (*f3), "+x" (*f1) 264 : "x" (*f2)); 265 } 266 267 extern __inline__ void 268 sse_maxss(float *f1, float *f2, float *f3) 269 { 270 __asm__ __volatile__( 271 "maxss %2, %1\n\t" 272 "movss %1, %0" 273 : "=m" (*f3), "+x" (*f1) 274 : "x" (*f2)); 275 } 276 277 extern __inline__ void 278 sse_addss(float *f1, float *f2, float *f3) 279 { 280 __asm__ __volatile__( 281 "addss %2, %1\n\t" 282 "movss %1, %0" 283 : "=m" (*f3), "+x" (*f1) 284 : "x" (*f2)); 285 } 286 287 extern __inline__ void 288 sse_subss(float *f1, float *f2, float *f3) 289 { 290 __asm__ __volatile__( 291 "subss %2, %1\n\t" 292 "movss %1, %0" 293 : "=m" (*f3), "+x" (*f1) 294 : "x" (*f2)); 295 } 296 297 extern __inline__ void 298 sse_mulss(float *f1, float *f2, float *f3) 299 { 300 __asm__ __volatile__( 301 "mulss %2, %1\n\t" 302 "movss %1, %0" 303 : "=m" (*f3), "+x" (*f1) 304 : "x" (*f2)); 305 } 306 307 extern __inline__ void 308 sse_divss(float *f1, float *f2, float *f3) 309 { 310 __asm__ __volatile__( 311 "divss %2, %1\n\t" 312 "movss %1, %0" 313 : "=m" (*f3), "+x" (*f1) 314 : "x" (*f2)); 315 } 316 317 extern __inline__ void 318 sse_sqrtss(float *f1, float *f2) 319 { 320 double tmp; 321 322 __asm__ __volatile__( 323 "sqrtss %2, %1\n\t" 324 "movss %1, %0" 325 : "=m" (*f2), "=x" (tmp) 326 : "m" (*f1)); 327 } 328 329 extern __inline__ void 330 sse_ucomiss(float *f1, float *f2) 331 { 332 __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2)); 333 334 } 335 336 extern __inline__ void 337 sse_comiss(float *f1, float *f2) 338 { 339 __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2)); 340 } 341 342 extern __inline__ void 343 sse_cvtss2sd(float *f1, double *d1) 344 { 345 double tmp; 346 347 __asm__ __volatile__( 348 "cvtss2sd %2, %1\n\t" 349 "movsd %1, %0" 350 : "=m" (*d1), "=x" (tmp) 351 : "m" (*f1)); 352 } 353 354 extern __inline__ void 355 sse_cvtsi2ss(int *i1, float *f1) 356 { 357 double tmp; 358 359 __asm__ __volatile__( 360 "cvtsi2ss %2, %1\n\t" 361 "movss %1, %0" 362 : "=m" (*f1), "=x" (tmp) 363 : "m" (*i1)); 364 } 365 366 extern __inline__ void 367 sse_cvttss2si(float *f1, int *i1) 368 { 369 int tmp; 370 371 __asm__ __volatile__( 372 "cvttss2si %2, %1\n\t" 373 "movl %1, %0" 374 : "=m" (*i1), "=r" (tmp) 375 : "m" (*f1)); 376 } 377 378 extern __inline__ void 379 sse_cvtss2si(float *f1, int *i1) 380 { 381 int tmp; 382 383 __asm__ __volatile__( 384 "cvtss2si %2, %1\n\t" 385 "movl %1, %0" 386 : "=m" (*i1), "=r" (tmp) 387 : "m" (*f1)); 388 } 389 390 #if defined(__amd64) 391 extern __inline__ void 392 sse_cvtsi2ssq(long long *ll1, float *f1) 393 { 394 double tmp; 395 396 __asm__ __volatile__( 397 "cvtsi2ssq %2, %1\n\t" 398 "movss %1, %0" 399 : "=m" (*f1), "=x" (tmp) 400 : "m" (*ll1)); 401 } 402 403 extern __inline__ void 404 sse_cvttss2siq(float *f1, long long *ll1) 405 { 406 uint64_t tmp; 407 408 __asm__ __volatile__( 409 "cvttss2siq %2, %1\n\t" 410 "movq %1, %0" 411 : "=m" (*ll1), "=r" (tmp) 412 : "m" (*f1)); 413 } 414 415 extern __inline__ void 416 sse_cvtss2siq(float *f1, long long *ll1) 417 { 418 uint64_t tmp; 419 420 __asm__ __volatile__( 421 "cvtss2siq %2, %1\n\t" 422 "movq %1, %0" 423 : "=m" (*ll1), "=r" (tmp) 424 : "m" (*f1)); 425 } 426 427 #endif 428 429 extern __inline__ void 430 sse_cmpeqsd(double *d1, double *d2, long long *ll1) 431 { 432 __asm__ __volatile__( 433 "cmpeqsd %2,%1\n\t" 434 "movsd %1,%0" 435 : "=m" (*ll1), "=x" (*d1) 436 : "x" (*d2)); 437 } 438 439 extern __inline__ void 440 sse_cmpltsd(double *d1, double *d2, long long *ll1) 441 { 442 __asm__ __volatile__( 443 "cmpltsd %2,%1\n\t" 444 "movsd %1,%0" 445 : "=m" (*ll1), "=x" (*d1) 446 : "x" (*d2)); 447 } 448 449 extern __inline__ void 450 sse_cmplesd(double *d1, double *d2, long long *ll1) 451 { 452 __asm__ __volatile__( 453 "cmplesd %2,%1\n\t" 454 "movsd %1,%0" 455 : "=m" (*ll1), "=x" (*d1) 456 : "x" (*d2)); 457 } 458 459 extern __inline__ void 460 sse_cmpunordsd(double *d1, double *d2, long long *ll1) 461 { 462 __asm__ __volatile__( 463 "cmpunordsd %2,%1\n\t" 464 "movsd %1,%0" 465 : "=m" (*ll1), "=x" (*d1) 466 : "x" (*d2)); 467 } 468 469 470 extern __inline__ void 471 sse_minsd(double *d1, double *d2, double *d3) 472 { 473 __asm__ __volatile__( 474 "minsd %2,%1\n\t" 475 "movsd %1,%0" 476 : "=m" (*d3), "=x" (*d1) 477 : "x" (*d2)); 478 } 479 480 extern __inline__ void 481 sse_maxsd(double *d1, double *d2, double *d3) 482 { 483 __asm__ __volatile__( 484 "maxsd %2,%1\n\t" 485 "movsd %1,%0" 486 : "=m" (*d3), "=x" (*d1) 487 : "x" (*d2)); 488 } 489 490 extern __inline__ void 491 sse_addsd(double *d1, double *d2, double *d3) 492 { 493 __asm__ __volatile__( 494 "addsd %2,%1\n\t" 495 "movsd %1,%0" 496 : "=m" (*d3), "=x" (*d1) 497 : "x" (*d2)); 498 } 499 500 extern __inline__ void 501 sse_subsd(double *d1, double *d2, double *d3) 502 { 503 __asm__ __volatile__( 504 "subsd %2,%1\n\t" 505 "movsd %1,%0" 506 : "=m" (*d3), "=x" (*d1) 507 : "x" (*d2)); 508 } 509 510 extern __inline__ void 511 sse_mulsd(double *d1, double *d2, double *d3) 512 { 513 __asm__ __volatile__( 514 "mulsd %2,%1\n\t" 515 "movsd %1,%0" 516 : "=m" (*d3), "=x" (*d1) 517 : "x" (*d2)); 518 } 519 520 extern __inline__ void 521 sse_divsd(double *d1, double *d2, double *d3) 522 { 523 __asm__ __volatile__( 524 "divsd %2,%1\n\t" 525 "movsd %1,%0" 526 : "=m" (*d3), "=x" (*d1) 527 : "x" (*d2)); 528 } 529 530 extern __inline__ void 531 sse_sqrtsd(double *d1, double *d2) 532 { 533 double tmp; 534 535 __asm__ __volatile__( 536 "sqrtsd %2, %1\n\t" 537 "movsd %1, %0" 538 : "=m" (*d2), "=x" (tmp) 539 : "m" (*d1)); 540 } 541 542 extern __inline__ void 543 sse_ucomisd(double *d1, double *d2) 544 { 545 __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2)); 546 } 547 548 extern __inline__ void 549 sse_comisd(double *d1, double *d2) 550 { 551 __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2)); 552 } 553 554 extern __inline__ void 555 sse_cvtsd2ss(double *d1, float *f1) 556 { 557 double tmp; 558 559 __asm__ __volatile__( 560 "cvtsd2ss %2,%1\n\t" 561 "movss %1,%0" 562 : "=m" (*f1), "=x" (tmp) 563 : "m" (*d1)); 564 } 565 566 extern __inline__ void 567 sse_cvtsi2sd(int *i1, double *d1) 568 { 569 double tmp; 570 __asm__ __volatile__( 571 "cvtsi2sd %2,%1\n\t" 572 "movsd %1,%0" 573 : "=m" (*d1), "=x" (tmp) 574 : "m" (*i1)); 575 } 576 577 extern __inline__ void 578 sse_cvttsd2si(double *d1, int *i1) 579 { 580 int tmp; 581 582 __asm__ __volatile__( 583 "cvttsd2si %2,%1\n\t" 584 "movl %1,%0" 585 : "=m" (*i1), "=r" (tmp) 586 : "m" (*d1)); 587 } 588 589 extern __inline__ void 590 sse_cvtsd2si(double *d1, int *i1) 591 { 592 int tmp; 593 594 __asm__ __volatile__( 595 "cvtsd2si %2,%1\n\t" 596 "movl %1,%0" 597 : "=m" (*i1), "=r" (tmp) 598 : "m" (*d1)); 599 } 600 601 #if defined(__amd64) 602 extern __inline__ void 603 sse_cvtsi2sdq(long long *ll1, double *d1) 604 { 605 double tmp; 606 607 __asm__ __volatile__( 608 "cvtsi2sdq %2,%1\n\t" 609 "movsd %1,%0" 610 : "=m" (*d1), "=x" (tmp) 611 : "m" (*ll1)); 612 } 613 614 extern __inline__ void 615 sse_cvttsd2siq(double *d1, long long *ll1) 616 { 617 uint64_t tmp; 618 619 __asm__ __volatile__( 620 "cvttsd2siq %2,%1\n\t" 621 "movq %1,%0" 622 : "=m" (*ll1), "=r" (tmp) 623 : "m" (*d1)); 624 } 625 626 extern __inline__ void 627 sse_cvtsd2siq(double *d1, long long *ll1) 628 { 629 uint64_t tmp; 630 631 __asm__ __volatile__( 632 "cvtsd2siq %2,%1\n\t" 633 "movq %1,%0" 634 : "=m" (*ll1), "=r" (tmp) 635 : "m" (*d1)); 636 } 637 #endif 638 639 #elif defined(__sparc) 640 extern __inline__ void 641 __fenv_getfsr(unsigned long *l) 642 { 643 __asm__ __volatile__( 644 #if defined(__sparcv9) 645 "stx %%fsr,%0\n\t" 646 #else 647 "st %%fsr,%0\n\t" 648 #endif 649 : "=m" (*l)); 650 } 651 652 extern __inline__ void 653 __fenv_setfsr(const unsigned long *l) 654 { 655 __asm__ __volatile__( 656 #if defined(__sparcv9) 657 "ldx %0,%%fsr\n\t" 658 #else 659 "ld %0,%%fsr\n\t" 660 #endif 661 : : "m" (*l) : "cc"); 662 } 663 664 extern __inline__ void 665 __fenv_getfsr32(unsigned int *l) 666 { 667 __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l)); 668 } 669 670 extern __inline__ void 671 __fenv_setfsr32(const unsigned int *l) 672 { 673 __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l)); 674 } 675 #else 676 #error "GCC FENV inlines not implemented for this platform" 677 #endif 678 679 #ifdef __cplusplus 680 } 681 #endif |