Print this page


Split Close
Expand all
Collapse all
          --- old/usr/src/lib/libm/common/m9x/fenv_inlines.h
          +++ new/usr/src/lib/libm/common/m9x/fenv_inlines.h
↓ open down ↓ 34 lines elided ↑ open up ↑
  35   35          uint32_t cwsw;
  36   36          struct {
  37   37                  uint16_t cw;
  38   38                  uint16_t sw;
  39   39          } words;
  40   40  };
  41   41  
  42   42  extern __inline__ void
  43   43  __fenv_getcwsw(unsigned int *value)
  44   44  {
  45      -        union fp_cwsw ret;
       45 +        union fp_cwsw *u = (union fp_cwsw *)value;
  46   46  
  47   47          __asm__ __volatile__(
  48   48              "fstsw %0\n\t"
  49   49              "fstcw %1\n\t"
  50      -            : "=m" (ret.words.cw), "=m" (ret.words.sw));
  51      -        *value = ret.cwsw;
       50 +            : "=m" (u->words.cw), "=m" (u->words.sw));
  52   51  }
  53   52  
  54   53  extern __inline__ void
  55   54  __fenv_setcwsw(const unsigned int *value)
  56   55  {
  57   56          union fp_cwsw cwsw;
  58   57          short fenv[16];
  59   58  
  60   59          cwsw.cwsw = *value;
  61   60  
  62   61          __asm__ __volatile__(
  63   62              "fstenv %0\n\t"
  64   63              "movw   %4,%1\n\t"
  65   64              "movw   %3,%2\n\t"
  66   65              "fldenv %0\n\t"
  67   66              "fwait\n\t"
  68   67              : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2])
  69      -            : "d" (cwsw.words.cw), "c" (cwsw.words.sw)
       68 +            : "r" (cwsw.words.cw), "r" (cwsw.words.sw)
  70   69              /* For practical purposes, we clobber the whole FPU */
  71   70              : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)",
  72   71                "st(6)", "st(7)");
  73   72  }
  74   73  
  75   74  extern __inline__ void
  76   75  __fenv_getmxcsr(unsigned int *value)
  77   76  {
  78      -        __asm__ __volatile__("stmxcsr %1" : "+m" (*value));
       77 +        __asm__ __volatile__("stmxcsr %0" : "=m" (*value));
  79   78  }
  80   79  
  81   80  extern __inline__ void
  82   81  __fenv_setmxcsr(const unsigned int *value)
  83   82  {
  84   83          __asm__ __volatile__("ldmxcsr %0" : : "m" (*value));
  85   84  }
  86   85  
  87   86  extern __inline__ long double
  88   87  f2xm1(long double x)
  89   88  {
  90   89          long double ret;
  91   90  
  92      -        __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x));
       91 +        __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc");
  93   92          return (ret);
  94   93  }
  95   94  
  96   95  extern __inline__ long double
  97   96  fyl2x(long double y, long double x)
  98   97  {
  99   98          long double ret;
 100   99  
 101      -        __asm__ __volatile__("fyl2x" : "=t" (ret): "0" (x), "u" (y) : "st(1)");
      100 +        __asm__ __volatile__("fyl2x"
      101 +            : "=t" (ret)
      102 +            : "0" (x), "u" (y)
      103 +            : "st(1)", "cc");
 102  104          return (ret);
 103  105  }
 104  106  
 105  107  extern __inline__ long double
 106  108  fptan(long double x)
 107  109  {
 108  110          /*
 109  111           * fptan pushes 1.0 then the result on completion, so we want to pop
 110  112           * the FP stack twice, so we need a dummy value into which to pop it.
 111  113           */
 112  114          long double ret;
 113  115          long double dummy;
 114  116  
 115      -        __asm__ __volatile__("fptan" : "=t" (dummy), "=u" (ret) : "0" (x));
      117 +        __asm__ __volatile__("fptan"
      118 +            : "=t" (dummy), "=u" (ret)
      119 +            : "0" (x)
      120 +            : "cc");
 116  121          return (ret);
 117  122  }
 118  123  
 119  124  extern __inline__ long double
 120  125  fpatan(long double x, long double y)
 121  126  {
 122  127          long double ret;
 123  128  
 124  129          __asm__ __volatile__("fpatan"
 125  130              : "=t" (ret)
 126  131              : "0" (y), "u" (x)
 127      -            : "st(1)");
      132 +            : "st(1)", "cc");
 128  133          return (ret);
 129  134  }
 130  135  
 131  136  extern __inline__ long double
 132  137  fxtract(long double x)
 133  138  {
 134      -        long double ret;
 135      -
 136      -        __asm__ __volatile__("fxtract" : "=t" (ret) : "0" (x));
 137      -        return (ret);
      139 +        __asm__ __volatile__("fxtract" : "+t" (x) : : "cc");
      140 +        return (x);
 138  141  }
 139  142  
 140  143  extern __inline__ long double
 141  144  fprem1(long double idend, long double div)
 142  145  {
 143      -        long double ret;
 144      -
 145      -        __asm__ __volatile__("fprem1" : "=t" (ret) : "0" (div), "u" (idend));
 146      -        return (ret);
      146 +        __asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc");
      147 +        return (div);
 147  148  }
 148  149  
 149  150  extern __inline__ long double
 150  151  fprem(long double idend, long double div)
 151  152  {
 152      -        long double ret;
 153      -
 154      -        __asm__ __volatile__("fprem" : "=t" (ret) : "0" (div), "u" (idend));
 155      -        return (ret);
      153 +        __asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc");
      154 +        return (div);
 156  155  }
 157  156  
 158  157  extern __inline__ long double
 159  158  fyl2xp1(long double y, long double x)
 160  159  {
 161  160          long double ret;
 162  161  
 163  162          __asm__ __volatile__("fyl2xp1"
 164  163              : "=t" (ret)
 165  164              : "0" (x), "u" (y)
 166      -            : "st(1)");
      165 +            : "st(1)", "cc");
 167  166          return (ret);
 168  167  }
 169  168  
 170  169  extern __inline__ long double
 171  170  fsqrt(long double x)
 172  171  {
 173      -        long double ret;
 174      -
 175      -        __asm__ __volatile__("fsqrt" : "=t" (ret) : "0" (x));
 176      -        return (ret);
      172 +        __asm__ __volatile__("fsqrt" : "+t" (x) : : "cc");
      173 +        return (x);
 177  174  }
 178  175  
 179  176  extern __inline__ long double
 180  177  fsincos(long double x)
 181  178  {
 182      -        long double ret;
 183      -
 184      -        __asm__ __volatile__("fsincos" : "=t" (ret) : "0" (x));
 185      -        return (ret);
      179 +        __asm__ __volatile__("fsincos" : "+t" (x) : : "cc");
      180 +        return (x);
 186  181  }
 187  182  
 188  183  extern __inline__ long double
 189  184  frndint(long double x)
 190  185  {
 191      -        long double ret;
 192      -
 193      -        __asm__ __volatile__("frndint" : "=t" (ret) : "0" (x));
 194      -        return (ret);
      186 +        __asm__ __volatile__("frndint" : "+t" (x) : : "cc");
      187 +        return (x);
 195  188  }
 196  189  
 197  190  extern __inline__ long double
 198  191  fscale(long double x, long double y)
 199  192  {
 200  193          long double ret;
 201  194  
 202      -        __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x));
      195 +        __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc");
 203  196          return (ret);
 204  197  }
 205  198  
 206  199  extern __inline__ long double
 207  200  fsin(long double x)
 208  201  {
 209      -        long double ret;
 210      -
 211      -        __asm__ __volatile__("fsin" : "=t" (ret) : "0" (x));
 212      -        return (ret);
      202 +        __asm__ __volatile__("fsin" : "+t" (x) : : "cc");
      203 +        return (x);
 213  204  }
 214  205  
 215  206  extern __inline__ long double
 216  207  fcos(long double x)
 217  208  {
 218      -        long double ret;
 219      -
 220      -        __asm__ __volatile__("fcos" : "=t" (ret) : "0" (x));
 221      -        return (ret);
      209 +        __asm__ __volatile__("fcos" : "+t" (x) : : "cc");
      210 +        return (x);
 222  211  }
 223  212  
 224  213  extern __inline__ void
 225  214  sse_cmpeqss(float *f1, float *f2, int *i1)
 226  215  {
 227  216          __asm__ __volatile__(
 228  217              "cmpeqss %2, %1\n\t"
 229  218              "movss   %1, %0"
 230      -            : "=m" (*i1)
 231      -            : "x" (*f1), "x" (*f2));
      219 +            : "=m" (*i1), "+x" (*f1)
      220 +            : "x" (*f2)
      221 +            : "cc");
 232  222  }
 233  223  
 234  224  extern __inline__ void
 235  225  sse_cmpltss(float *f1, float *f2, int *i1)
 236  226  {
 237  227          __asm__ __volatile__(
 238  228              "cmpltss %2, %1\n\t"
 239  229              "movss   %1, %0"
 240      -            : "=m" (*i1)
 241      -            : "x" (*f1), "x" (*f2));
      230 +            : "=m" (*i1), "+x" (*f1)
      231 +            : "x" (*f2)
      232 +            : "cc");
 242  233  }
 243  234  
 244  235  extern __inline__ void
 245  236  sse_cmpless(float *f1, float *f2, int *i1)
 246  237  {
 247  238          __asm__ __volatile__(
 248  239              "cmpless %2, %1\n\t"
 249  240              "movss   %1, %0"
 250      -            : "=m" (*i1)
 251      -            : "x" (*f1), "x" (*f2));
      241 +            : "=m" (*i1), "+x" (*f1)
      242 +            : "x" (*f2)
      243 +            : "cc");
 252  244  }
 253  245  
 254  246  extern __inline__ void
 255  247  sse_cmpunordss(float *f1, float *f2, int *i1)
 256  248  {
 257  249          __asm__ __volatile__(
 258  250              "cmpunordss %2, %1\n\t"
 259  251              "movss      %1, %0"
 260      -            : "=m" (*i1)
 261      -            : "x" (*f1), "x" (*f2));
      252 +            : "=m" (*i1), "+x" (*f1)
      253 +            : "x" (*f2)
      254 +            : "cc");
 262  255  }
 263  256  
 264  257  extern __inline__ void
 265  258  sse_minss(float *f1, float *f2, float *f3)
 266  259  {
 267  260          __asm__ __volatile__(
 268  261              "minss %2, %1\n\t"
 269  262              "movss %1, %0"
 270      -            : "=m" (*f3)
 271      -            : "x" (*f1), "x" (*f2));
      263 +            : "=m" (*f3), "+x" (*f1)
      264 +            : "x" (*f2));
 272  265  }
 273  266  
 274  267  extern __inline__ void
 275  268  sse_maxss(float *f1, float *f2, float *f3)
 276  269  {
 277  270          __asm__ __volatile__(
 278  271              "maxss %2, %1\n\t"
 279  272              "movss %1, %0"
 280      -            : "=m" (*f3)
 281      -            : "x" (*f1), "x" (*f2));
      273 +            : "=m" (*f3), "+x" (*f1)
      274 +            : "x" (*f2));
 282  275  }
 283  276  
 284  277  extern __inline__ void
 285  278  sse_addss(float *f1, float *f2, float *f3)
 286  279  {
 287  280          __asm__ __volatile__(
 288  281              "addss %2, %1\n\t"
 289  282              "movss %1, %0"
 290      -            : "=m" (*f3)
 291      -            : "x" (*f1), "x" (*f2));
      283 +            : "=m" (*f3), "+x" (*f1)
      284 +            : "x" (*f2));
 292  285  }
 293  286  
 294  287  extern __inline__ void
 295  288  sse_subss(float *f1, float *f2, float *f3)
 296  289  {
 297  290          __asm__ __volatile__(
 298  291              "subss %2, %1\n\t"
 299  292              "movss %1, %0"
 300      -            : "=m" (*f3)
 301      -            : "x" (*f1), "x" (*f2));
      293 +            : "=m" (*f3), "+x" (*f1)
      294 +            : "x" (*f2));
 302  295  }
 303  296  
 304  297  extern __inline__ void
 305  298  sse_mulss(float *f1, float *f2, float *f3)
 306  299  {
 307  300          __asm__ __volatile__(
 308  301              "mulss %2, %1\n\t"
 309  302              "movss %1, %0"
 310      -            : "=m" (*f3)
 311      -            : "x" (*f1), "x" (*f2));
      303 +            : "=m" (*f3), "+x" (*f1)
      304 +            : "x" (*f2));
 312  305  }
 313  306  
 314  307  extern __inline__ void
 315  308  sse_divss(float *f1, float *f2, float *f3)
 316  309  {
 317  310          __asm__ __volatile__(
 318  311              "divss %2, %1\n\t"
 319  312              "movss %1, %0"
 320      -            : "=m" (*f3)
 321      -            : "x" (*f1), "x" (*f2));
      313 +            : "=m" (*f3), "+x" (*f1)
      314 +            : "x" (*f2));
 322  315  }
 323  316  
 324  317  extern __inline__ void
 325  318  sse_sqrtss(float *f1, float *f2)
 326  319  {
      320 +        double tmp;
      321 +
 327  322          __asm__ __volatile__(
 328      -            "sqrtss %1, %%xmm0\n\t"
 329      -            "movss  %%xmm0, %0"
 330      -            : "=m" (*f2)
 331      -            : "m" (*f1)
 332      -            : "xmm0");
      323 +            "sqrtss %2, %1\n\t"
      324 +            "movss  %1, %0"
      325 +            : "=m" (*f2), "=x" (tmp)
      326 +            : "m" (*f1));
 333  327  }
 334  328  
 335  329  extern __inline__ void
 336  330  sse_ucomiss(float *f1, float *f2)
 337  331  {
 338  332          __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2));
 339  333  
 340  334  }
 341  335  
 342  336  extern __inline__ void
 343  337  sse_comiss(float *f1, float *f2)
 344  338  {
 345  339          __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2));
 346  340  }
 347  341  
 348  342  extern __inline__ void
 349  343  sse_cvtss2sd(float *f1, double *d1)
 350  344  {
      345 +        double tmp;
      346 +
 351  347          __asm__ __volatile__(
 352      -            "cvtss2sd %1, %%xmm0\n\t"
 353      -            "movsd    %%xmm0, %0"
 354      -            : "=m" (*d1)
 355      -            : "m" (*f1)
 356      -            : "xmm0");
      348 +            "cvtss2sd %2, %1\n\t"
      349 +            "movsd    %1, %0"
      350 +            : "=m" (*d1), "=x" (tmp)
      351 +            : "m" (*f1));
 357  352  }
 358  353  
 359  354  extern __inline__ void
 360  355  sse_cvtsi2ss(int *i1, float *f1)
 361  356  {
      357 +        double tmp;
      358 +
 362  359          __asm__ __volatile__(
 363      -            "cvtsi2ss %1, %%xmm0\n\t"
 364      -            "movss    %%xmm0, %0"
 365      -            : "=m" (*f1)
 366      -            : "m" (*i1)
 367      -            : "xmm0");
      360 +            "cvtsi2ss %2, %1\n\t"
      361 +            "movss    %1, %0"
      362 +            : "=m" (*f1), "=x" (tmp)
      363 +            : "m" (*i1));
 368  364  }
 369  365  
 370  366  extern __inline__ void
 371  367  sse_cvttss2si(float *f1, int *i1)
 372  368  {
      369 +        int tmp;
      370 +
 373  371          __asm__ __volatile__(
 374      -            "cvttss2si %1, %%ecx\n\t"
 375      -            "movl      %%ecx, %0"
 376      -            : "=m" (*i1)
 377      -            : "m" (*f1)
 378      -            : "ecx");
      372 +            "cvttss2si %2, %1\n\t"
      373 +            "movl      %1, %0"
      374 +            : "=m" (*i1), "=r" (tmp)
      375 +            : "m" (*f1));
 379  376  }
 380  377  
 381  378  extern __inline__ void
 382  379  sse_cvtss2si(float *f1, int *i1)
 383  380  {
      381 +        int tmp;
      382 +
 384  383          __asm__ __volatile__(
 385      -            "cvtss2si %1, %%ecx\n\t"
 386      -            "movl     %%ecx, %0"
 387      -            : "=m" (*i1)
 388      -            : "m" (*f1)
 389      -            : "ecx");
      384 +            "cvtss2si %2, %1\n\t"
      385 +            "movl     %1, %0"
      386 +            : "=m" (*i1), "=r" (tmp)
      387 +            : "m" (*f1));
 390  388  }
 391  389  
 392  390  #if defined(__amd64)
 393  391  extern __inline__ void
 394  392  sse_cvtsi2ssq(long long *ll1, float *f1)
 395  393  {
      394 +        double tmp;
      395 +
 396  396          __asm__ __volatile__(
 397      -            "cvtsi2ssq %1, %%xmm0\n\t"
 398      -            "movss     %%xmm0, %0"
 399      -            : "=m" (*f1)
 400      -            : "m" (*ll1)
 401      -            : "xmm0");
      397 +            "cvtsi2ssq %2, %1\n\t"
      398 +            "movss     %1, %0"
      399 +            : "=m" (*f1), "=x" (tmp)
      400 +            : "m" (*ll1));
 402  401  }
 403  402  
 404  403  extern __inline__ void
 405  404  sse_cvttss2siq(float *f1, long long *ll1)
 406  405  {
      406 +        uint64_t tmp;
      407 +
 407  408          __asm__ __volatile__(
 408      -            "cvttss2siq %1, %%rcx\n\t"
 409      -            "movq       %%rcx, %0"
 410      -            : "=m" (*ll1)
 411      -            : "m" (*f1)
 412      -            : "rcx");
      409 +            "cvttss2siq %2, %1\n\t"
      410 +            "movq       %1, %0"
      411 +            : "=m" (*ll1), "=r" (tmp)
      412 +            : "m" (*f1));
 413  413  }
 414  414  
 415  415  extern __inline__ void
 416  416  sse_cvtss2siq(float *f1, long long *ll1)
 417  417  {
      418 +        uint64_t tmp;
      419 +
 418  420          __asm__ __volatile__(
 419      -            "cvtss2siq %1, %%rcx\n\t"
 420      -            "movq      %%rcx, %0"
 421      -            : "=m" (*ll1)
 422      -            : "m" (*f1)
 423      -            : "rcx");
      421 +            "cvtss2siq %2, %1\n\t"
      422 +            "movq      %1, %0"
      423 +            : "=m" (*ll1), "=r" (tmp)
      424 +            : "m" (*f1));
 424  425  }
 425  426  
 426  427  #endif
 427  428  
 428  429  extern __inline__ void
 429  430  sse_cmpeqsd(double *d1, double *d2, long long *ll1)
 430  431  {
 431  432          __asm__ __volatile__(
 432  433              "cmpeqsd %2,%1\n\t"
 433  434              "movsd   %1,%0"
 434      -            : "=m" (*ll1)
 435      -            : "x" (*d1), "x" (*d2));
      435 +            : "=m" (*ll1), "=x" (*d1)
      436 +            : "x" (*d2));
 436  437  }
 437  438  
 438  439  extern __inline__ void
 439  440  sse_cmpltsd(double *d1, double *d2, long long *ll1)
 440  441  {
 441  442          __asm__ __volatile__(
 442  443              "cmpltsd %2,%1\n\t"
 443  444              "movsd   %1,%0"
 444      -            : "=m" (*ll1)
 445      -            : "x" (*d1), "x" (*d2));
      445 +            : "=m" (*ll1), "=x" (*d1)
      446 +            : "x" (*d2));
 446  447  }
 447  448  
 448  449  extern __inline__ void
 449  450  sse_cmplesd(double *d1, double *d2, long long *ll1)
 450  451  {
 451  452          __asm__ __volatile__(
 452  453              "cmplesd %2,%1\n\t"
 453  454              "movsd   %1,%0"
 454      -            : "=m" (*ll1)
 455      -            : "x" (*d1), "x" (*d2));
      455 +            : "=m" (*ll1), "=x" (*d1)
      456 +            : "x" (*d2));
 456  457  }
 457  458  
 458  459  extern __inline__ void
 459  460  sse_cmpunordsd(double *d1, double *d2, long long *ll1)
 460  461  {
 461  462          __asm__ __volatile__(
 462  463              "cmpunordsd %2,%1\n\t"
 463  464              "movsd      %1,%0"
 464      -            : "=m" (*ll1)
 465      -            : "x" (*d1), "x" (*d2));
      465 +            : "=m" (*ll1), "=x" (*d1)
      466 +            : "x" (*d2));
 466  467  }
 467  468  
 468  469  
 469  470  extern __inline__ void
 470  471  sse_minsd(double *d1, double *d2, double *d3)
 471  472  {
 472  473          __asm__ __volatile__(
 473  474              "minsd %2,%1\n\t"
 474  475              "movsd %1,%0"
 475      -            : "=m" (*d3)
 476      -            : "x" (*d1), "x" (*d2));
      476 +            : "=m" (*d3), "=x" (*d1)
      477 +            : "x" (*d2));
 477  478  }
 478  479  
 479  480  extern __inline__ void
 480  481  sse_maxsd(double *d1, double *d2, double *d3)
 481  482  {
 482  483          __asm__ __volatile__(
 483  484              "maxsd %2,%1\n\t"
 484  485              "movsd %1,%0"
 485      -            : "=m" (*d3)
 486      -            : "x" (*d1), "x" (*d2));
      486 +            : "=m" (*d3), "=x" (*d1)
      487 +            : "x" (*d2));
 487  488  }
 488  489  
 489  490  extern __inline__ void
 490  491  sse_addsd(double *d1, double *d2, double *d3)
 491  492  {
 492  493          __asm__ __volatile__(
 493  494              "addsd %2,%1\n\t"
 494  495              "movsd %1,%0"
 495      -            : "=m" (*d3)
 496      -            : "x" (*d1), "x" (*d2));
      496 +            : "=m" (*d3), "=x" (*d1)
      497 +            : "x" (*d2));
 497  498  }
 498  499  
 499  500  extern __inline__ void
 500  501  sse_subsd(double *d1, double *d2, double *d3)
 501  502  {
 502  503          __asm__ __volatile__(
 503  504              "subsd %2,%1\n\t"
 504  505              "movsd %1,%0"
 505      -            : "=m" (*d3)
 506      -            : "x" (*d1), "x" (*d2));
      506 +            : "=m" (*d3), "=x" (*d1)
      507 +            : "x" (*d2));
 507  508  }
 508  509  
 509  510  extern __inline__ void
 510  511  sse_mulsd(double *d1, double *d2, double *d3)
 511  512  {
 512  513          __asm__ __volatile__(
 513  514              "mulsd %2,%1\n\t"
 514  515              "movsd %1,%0"
 515      -            : "=m" (*d3)
 516      -            : "x" (*d1), "x" (*d2));
      516 +            : "=m" (*d3), "=x" (*d1)
      517 +            : "x" (*d2));
 517  518  }
 518  519  
 519  520  extern __inline__ void
 520  521  sse_divsd(double *d1, double *d2, double *d3)
 521  522  {
 522  523          __asm__ __volatile__(
 523  524              "divsd %2,%1\n\t"
 524  525              "movsd %1,%0"
 525      -            : "=m" (*d3)
 526      -            : "x" (*d1), "x" (*d2)
 527      -            : "xmm0");
      526 +            : "=m" (*d3), "=x" (*d1)
      527 +            : "x" (*d2));
 528  528  }
 529  529  
 530  530  extern __inline__ void
 531  531  sse_sqrtsd(double *d1, double *d2)
 532  532  {
      533 +        double tmp;
      534 +
 533  535          __asm__ __volatile__(
 534      -            "sqrtsd %1, %%xmm0\n\t"
 535      -            "movsd %%xmm0, %0"
 536      -            : "=m" (*d2)
 537      -            : "m" (*d1)
 538      -            : "xmm0");
      536 +            "sqrtsd %2, %1\n\t"
      537 +            "movsd %1, %0"
      538 +            : "=m" (*d2), "=x" (tmp)
      539 +            : "m" (*d1));
 539  540  }
 540  541  
 541  542  extern __inline__ void
 542  543  sse_ucomisd(double *d1, double *d2)
 543  544  {
 544  545          __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2));
 545  546  }
 546  547  
 547  548  extern __inline__ void
 548  549  sse_comisd(double *d1, double *d2)
 549  550  {
 550  551          __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2));
 551  552  }
 552  553  
 553  554  extern __inline__ void
 554  555  sse_cvtsd2ss(double *d1, float *f1)
 555  556  {
      557 +        double tmp;
      558 +
 556  559          __asm__ __volatile__(
 557      -            "cvtsd2ss %1,%%xmm0\n\t"
 558      -            "movss    %%xmm0,%0"
 559      -            : "=m" (*f1)
 560      -            : "m" (*d1)
 561      -            : "xmm0");
      560 +            "cvtsd2ss %2,%1\n\t"
      561 +            "movss    %1,%0"
      562 +            : "=m" (*f1), "=x" (tmp)
      563 +            : "m" (*d1));
 562  564  }
 563  565  
 564      -
 565  566  extern __inline__ void
 566  567  sse_cvtsi2sd(int *i1, double *d1)
 567  568  {
      569 +        double tmp;
 568  570          __asm__ __volatile__(
 569      -            "cvtsi2sd %1,%%xmm0\n\t"
 570      -            "movsd    %%xmm0,%0"
 571      -            : "=m" (*d1)
 572      -            : "m" (*i1)
 573      -            : "xmm0");
      571 +            "cvtsi2sd %2,%1\n\t"
      572 +            "movsd    %1,%0"
      573 +            : "=m" (*d1), "=x" (tmp)
      574 +            : "m" (*i1));
 574  575  }
 575  576  
 576  577  extern __inline__ void
 577  578  sse_cvttsd2si(double *d1, int *i1)
 578  579  {
      580 +        int tmp;
      581 +
 579  582          __asm__ __volatile__(
 580      -            "cvttsd2si %1,%%ecx\n\t"
 581      -            "movl      %%ecx,%0"
 582      -            : "=m" (*i1)
 583      -            : "m" (*d1)
 584      -            : "ecx");
      583 +            "cvttsd2si %2,%1\n\t"
      584 +            "movl      %1,%0"
      585 +            : "=m" (*i1), "=r" (tmp)
      586 +            : "m" (*d1));
 585  587  }
 586  588  
 587  589  extern __inline__ void
 588  590  sse_cvtsd2si(double *d1, int *i1)
 589  591  {
      592 +        int tmp;
      593 +
 590  594          __asm__ __volatile__(
 591      -            "cvtsd2si %1,%%ecx\n\t"
 592      -            "movl     %%ecx,%0"
 593      -            : "=m" (*i1)
 594      -            : "m" (*d1)
 595      -            : "ecx");
      595 +            "cvtsd2si %2,%1\n\t"
      596 +            "movl     %1,%0"
      597 +            : "=m" (*i1), "=r" (tmp)
      598 +            : "m" (*d1));
 596  599  }
 597  600  
 598  601  #if defined(__amd64)
 599  602  extern __inline__ void
 600  603  sse_cvtsi2sdq(long long *ll1, double *d1)
 601  604  {
      605 +        double tmp;
      606 +
 602  607          __asm__ __volatile__(
 603      -            "cvtsi2sdq %1,%%xmm0\n\t"
 604      -            "movsd     %%xmm0,%0"
 605      -            : "=m" (*d1)
 606      -            : "m" (*ll1)
 607      -            : "xmm0");
      608 +            "cvtsi2sdq %2,%1\n\t"
      609 +            "movsd     %1,%0"
      610 +            : "=m" (*d1), "=x" (tmp)
      611 +            : "m" (*ll1));
 608  612  }
 609  613  
 610  614  extern __inline__ void
 611  615  sse_cvttsd2siq(double *d1, long long *ll1)
 612  616  {
      617 +        uint64_t tmp;
      618 +
 613  619          __asm__ __volatile__(
 614      -            "cvttsd2siq %1,%%rcx\n\t"
 615      -            "movq       %%rcx,%0"
 616      -            : "=m" (*ll1)
 617      -            : "m" (*d1)
 618      -            : "rcx");
      620 +            "cvttsd2siq %2,%1\n\t"
      621 +            "movq       %1,%0"
      622 +            : "=m" (*ll1), "=r" (tmp)
      623 +            : "m" (*d1));
 619  624  }
 620  625  
 621  626  extern __inline__ void
 622  627  sse_cvtsd2siq(double *d1, long long *ll1)
 623  628  {
      629 +        uint64_t tmp;
      630 +
 624  631          __asm__ __volatile__(
 625      -            "cvtsd2siq %1,%%rcx\n\t"
 626      -            "movq      %%rcx,%0"
 627      -            : "=m" (*ll1)
 628      -            : "m" (*d1)
 629      -            : "rcx");
      632 +            "cvtsd2siq %2,%1\n\t"
      633 +            "movq      %1,%0"
      634 +            : "=m" (*ll1), "=r" (tmp)
      635 +            : "m" (*d1));
 630  636  }
 631  637  #endif
      638 +
 632  639  #elif defined(__sparc)
 633  640  extern __inline__ void
 634  641  __fenv_getfsr(unsigned long *l)
 635  642  {
 636      -    __asm__ __volatile__(
      643 +        __asm__ __volatile__(
 637  644  #if defined(__sparcv9)
 638      -        "stx %%fsr,%0\n\t"
      645 +                "stx %%fsr,%0\n\t"
 639  646  #else
 640      -        "st  %%fsr,%0\n\t"
      647 +                "st  %%fsr,%0\n\t"
 641  648  #endif
 642      -        : "=m" (*l));
      649 +                : "=m" (*l));
 643  650  }
 644  651  
 645  652  extern __inline__ void
 646  653  __fenv_setfsr(const unsigned long *l)
 647  654  {
 648      -    __asm__ __volatile__(
      655 +        __asm__ __volatile__(
 649  656  #if defined(__sparcv9)
 650      -        "ldx %0,%%fsr\n\t"
      657 +                "ldx %0,%%fsr\n\t"
 651  658  #else
 652      -        "ld %0,%%fsr\n\t"
      659 +                "ld %0,%%fsr\n\t"
 653  660  #endif
 654      -        : : "m" (*l));
      661 +                : : "m" (*l) : "cc");
 655  662  }
 656  663  
 657  664  extern __inline__ void
 658  665  __fenv_getfsr32(unsigned int *l)
 659  666  {
 660      -    __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l));
      667 +        __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l));
 661  668  }
 662  669  
 663  670  extern __inline__ void
 664  671  __fenv_setfsr32(const unsigned int *l)
 665  672  {
 666      -    __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l));
      673 +        __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l));
 667  674  }
 668  675  #else
 669  676  #error "GCC FENV inlines not implemented for this platform"
 670  677  #endif
 671  678  
 672  679  #ifdef __cplusplus
 673  680  }
 674  681  #endif
 675  682  
 676  683  #endif  /* __GNUC__ */
 677  684  
 678  685  #endif /* _FENV_INLINES_H */
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX