Print this page
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libm/common/m9x/fenv_inlines.h
+++ new/usr/src/lib/libm/common/m9x/fenv_inlines.h
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11
12 12 /*
13 13 * Copyright 2011, Richard Lowe
14 14 */
15 15
16 16 #ifndef _FENV_INLINES_H
17 17 #define _FENV_INLINES_H
18 18
19 19 #ifdef __GNUC__
20 20
21 21 #ifdef __cplusplus
22 22 extern "C" {
23 23 #endif
24 24
25 25 #include <sys/types.h>
26 26
27 27 #if defined(__x86)
28 28
29 29 /*
30 30 * Floating point Control Word and Status Word
31 31 * Definition should actually be shared with x86
32 32 * (much of this 'amd64' code can be, in fact.)
33 33 */
34 34 union fp_cwsw {
↓ open down ↓ |
34 lines elided |
↑ open up ↑ |
35 35 uint32_t cwsw;
36 36 struct {
37 37 uint16_t cw;
38 38 uint16_t sw;
39 39 } words;
40 40 };
41 41
42 42 extern __inline__ void
43 43 __fenv_getcwsw(unsigned int *value)
44 44 {
45 - union fp_cwsw ret;
45 + union fp_cwsw *u = (union fp_cwsw *)value;
46 46
47 47 __asm__ __volatile__(
48 48 "fstsw %0\n\t"
49 49 "fstcw %1\n\t"
50 - : "=m" (ret.words.cw), "=m" (ret.words.sw));
51 - *value = ret.cwsw;
50 + : "=m" (u->words.cw), "=m" (u->words.sw));
52 51 }
53 52
54 53 extern __inline__ void
55 54 __fenv_setcwsw(const unsigned int *value)
56 55 {
57 56 union fp_cwsw cwsw;
58 57 short fenv[16];
59 58
60 59 cwsw.cwsw = *value;
61 60
62 61 __asm__ __volatile__(
63 62 "fstenv %0\n\t"
64 63 "movw %4,%1\n\t"
65 64 "movw %3,%2\n\t"
66 65 "fldenv %0\n\t"
67 66 "fwait\n\t"
68 67 : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2])
69 - : "d" (cwsw.words.cw), "c" (cwsw.words.sw)
68 + : "r" (cwsw.words.cw), "r" (cwsw.words.sw)
70 69 /* For practical purposes, we clobber the whole FPU */
71 70 : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)",
72 71 "st(6)", "st(7)");
73 72 }
74 73
75 74 extern __inline__ void
76 75 __fenv_getmxcsr(unsigned int *value)
77 76 {
78 - __asm__ __volatile__("stmxcsr %1" : "+m" (*value));
77 + __asm__ __volatile__("stmxcsr %0" : "=m" (*value));
79 78 }
80 79
81 80 extern __inline__ void
82 81 __fenv_setmxcsr(const unsigned int *value)
83 82 {
84 83 __asm__ __volatile__("ldmxcsr %0" : : "m" (*value));
85 84 }
86 85
87 86 extern __inline__ long double
88 87 f2xm1(long double x)
89 88 {
90 89 long double ret;
91 90
92 - __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x));
91 + __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc");
93 92 return (ret);
94 93 }
95 94
96 95 extern __inline__ long double
97 96 fyl2x(long double y, long double x)
98 97 {
99 98 long double ret;
100 99
101 - __asm__ __volatile__("fyl2x" : "=t" (ret): "0" (x), "u" (y) : "st(1)");
100 + __asm__ __volatile__("fyl2x"
101 + : "=t" (ret)
102 + : "0" (x), "u" (y)
103 + : "st(1)", "cc");
102 104 return (ret);
103 105 }
104 106
105 107 extern __inline__ long double
106 108 fptan(long double x)
107 109 {
108 110 /*
109 111 * fptan pushes 1.0 then the result on completion, so we want to pop
110 112 * the FP stack twice, so we need a dummy value into which to pop it.
111 113 */
112 114 long double ret;
113 115 long double dummy;
114 116
115 - __asm__ __volatile__("fptan" : "=t" (dummy), "=u" (ret) : "0" (x));
117 + __asm__ __volatile__("fptan"
118 + : "=t" (dummy), "=u" (ret)
119 + : "0" (x)
120 + : "cc");
116 121 return (ret);
117 122 }
118 123
119 124 extern __inline__ long double
120 125 fpatan(long double x, long double y)
121 126 {
122 127 long double ret;
123 128
124 129 __asm__ __volatile__("fpatan"
125 130 : "=t" (ret)
126 131 : "0" (y), "u" (x)
127 - : "st(1)");
132 + : "st(1)", "cc");
128 133 return (ret);
129 134 }
130 135
131 136 extern __inline__ long double
132 137 fxtract(long double x)
133 138 {
134 - long double ret;
135 -
136 - __asm__ __volatile__("fxtract" : "=t" (ret) : "0" (x));
137 - return (ret);
139 + __asm__ __volatile__("fxtract" : "+t" (x) : : "cc");
140 + return (x);
138 141 }
139 142
140 143 extern __inline__ long double
141 144 fprem1(long double idend, long double div)
142 145 {
143 - long double ret;
144 -
145 - __asm__ __volatile__("fprem1" : "=t" (ret) : "0" (div), "u" (idend));
146 - return (ret);
146 + __asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc");
147 + return (div);
147 148 }
148 149
149 150 extern __inline__ long double
150 151 fprem(long double idend, long double div)
151 152 {
152 - long double ret;
153 -
154 - __asm__ __volatile__("fprem" : "=t" (ret) : "0" (div), "u" (idend));
155 - return (ret);
153 + __asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc");
154 + return (div);
156 155 }
157 156
158 157 extern __inline__ long double
159 158 fyl2xp1(long double y, long double x)
160 159 {
161 160 long double ret;
162 161
163 162 __asm__ __volatile__("fyl2xp1"
164 163 : "=t" (ret)
165 164 : "0" (x), "u" (y)
166 - : "st(1)");
165 + : "st(1)", "cc");
167 166 return (ret);
168 167 }
169 168
170 169 extern __inline__ long double
171 170 fsqrt(long double x)
172 171 {
173 - long double ret;
174 -
175 - __asm__ __volatile__("fsqrt" : "=t" (ret) : "0" (x));
176 - return (ret);
172 + __asm__ __volatile__("fsqrt" : "+t" (x) : : "cc");
173 + return (x);
177 174 }
178 175
179 176 extern __inline__ long double
180 177 fsincos(long double x)
181 178 {
182 - long double ret;
183 -
184 - __asm__ __volatile__("fsincos" : "=t" (ret) : "0" (x));
185 - return (ret);
179 + __asm__ __volatile__("fsincos" : "+t" (x) : : "cc");
180 + return (x);
186 181 }
187 182
188 183 extern __inline__ long double
189 184 frndint(long double x)
190 185 {
191 - long double ret;
192 -
193 - __asm__ __volatile__("frndint" : "=t" (ret) : "0" (x));
194 - return (ret);
186 + __asm__ __volatile__("frndint" : "+t" (x) : : "cc");
187 + return (x);
195 188 }
196 189
197 190 extern __inline__ long double
198 191 fscale(long double x, long double y)
199 192 {
200 193 long double ret;
201 194
202 - __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x));
195 + __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc");
203 196 return (ret);
204 197 }
205 198
206 199 extern __inline__ long double
207 200 fsin(long double x)
208 201 {
209 - long double ret;
210 -
211 - __asm__ __volatile__("fsin" : "=t" (ret) : "0" (x));
212 - return (ret);
202 + __asm__ __volatile__("fsin" : "+t" (x) : : "cc");
203 + return (x);
213 204 }
214 205
215 206 extern __inline__ long double
216 207 fcos(long double x)
217 208 {
218 - long double ret;
219 -
220 - __asm__ __volatile__("fcos" : "=t" (ret) : "0" (x));
221 - return (ret);
209 + __asm__ __volatile__("fcos" : "+t" (x) : : "cc");
210 + return (x);
222 211 }
223 212
224 213 extern __inline__ void
225 214 sse_cmpeqss(float *f1, float *f2, int *i1)
226 215 {
227 216 __asm__ __volatile__(
228 217 "cmpeqss %2, %1\n\t"
229 218 "movss %1, %0"
230 - : "=m" (*i1)
231 - : "x" (*f1), "x" (*f2));
219 + : "=m" (*i1), "+x" (*f1)
220 + : "x" (*f2)
221 + : "cc");
232 222 }
233 223
234 224 extern __inline__ void
235 225 sse_cmpltss(float *f1, float *f2, int *i1)
236 226 {
237 227 __asm__ __volatile__(
238 228 "cmpltss %2, %1\n\t"
239 229 "movss %1, %0"
240 - : "=m" (*i1)
241 - : "x" (*f1), "x" (*f2));
230 + : "=m" (*i1), "+x" (*f1)
231 + : "x" (*f2)
232 + : "cc");
242 233 }
243 234
244 235 extern __inline__ void
245 236 sse_cmpless(float *f1, float *f2, int *i1)
246 237 {
247 238 __asm__ __volatile__(
248 239 "cmpless %2, %1\n\t"
249 240 "movss %1, %0"
250 - : "=m" (*i1)
251 - : "x" (*f1), "x" (*f2));
241 + : "=m" (*i1), "+x" (*f1)
242 + : "x" (*f2)
243 + : "cc");
252 244 }
253 245
254 246 extern __inline__ void
255 247 sse_cmpunordss(float *f1, float *f2, int *i1)
256 248 {
257 249 __asm__ __volatile__(
258 250 "cmpunordss %2, %1\n\t"
259 251 "movss %1, %0"
260 - : "=m" (*i1)
261 - : "x" (*f1), "x" (*f2));
252 + : "=m" (*i1), "+x" (*f1)
253 + : "x" (*f2)
254 + : "cc");
262 255 }
263 256
264 257 extern __inline__ void
265 258 sse_minss(float *f1, float *f2, float *f3)
266 259 {
267 260 __asm__ __volatile__(
268 261 "minss %2, %1\n\t"
269 262 "movss %1, %0"
270 - : "=m" (*f3)
271 - : "x" (*f1), "x" (*f2));
263 + : "=m" (*f3), "+x" (*f1)
264 + : "x" (*f2));
272 265 }
273 266
274 267 extern __inline__ void
275 268 sse_maxss(float *f1, float *f2, float *f3)
276 269 {
277 270 __asm__ __volatile__(
278 271 "maxss %2, %1\n\t"
279 272 "movss %1, %0"
280 - : "=m" (*f3)
281 - : "x" (*f1), "x" (*f2));
273 + : "=m" (*f3), "+x" (*f1)
274 + : "x" (*f2));
282 275 }
283 276
284 277 extern __inline__ void
285 278 sse_addss(float *f1, float *f2, float *f3)
286 279 {
287 280 __asm__ __volatile__(
288 281 "addss %2, %1\n\t"
289 282 "movss %1, %0"
290 - : "=m" (*f3)
291 - : "x" (*f1), "x" (*f2));
283 + : "=m" (*f3), "+x" (*f1)
284 + : "x" (*f2));
292 285 }
293 286
294 287 extern __inline__ void
295 288 sse_subss(float *f1, float *f2, float *f3)
296 289 {
297 290 __asm__ __volatile__(
298 291 "subss %2, %1\n\t"
299 292 "movss %1, %0"
300 - : "=m" (*f3)
301 - : "x" (*f1), "x" (*f2));
293 + : "=m" (*f3), "+x" (*f1)
294 + : "x" (*f2));
302 295 }
303 296
304 297 extern __inline__ void
305 298 sse_mulss(float *f1, float *f2, float *f3)
306 299 {
307 300 __asm__ __volatile__(
308 301 "mulss %2, %1\n\t"
309 302 "movss %1, %0"
310 - : "=m" (*f3)
311 - : "x" (*f1), "x" (*f2));
303 + : "=m" (*f3), "+x" (*f1)
304 + : "x" (*f2));
312 305 }
313 306
314 307 extern __inline__ void
315 308 sse_divss(float *f1, float *f2, float *f3)
316 309 {
317 310 __asm__ __volatile__(
318 311 "divss %2, %1\n\t"
319 312 "movss %1, %0"
320 - : "=m" (*f3)
321 - : "x" (*f1), "x" (*f2));
313 + : "=m" (*f3), "+x" (*f1)
314 + : "x" (*f2));
322 315 }
323 316
324 317 extern __inline__ void
325 318 sse_sqrtss(float *f1, float *f2)
326 319 {
320 + double tmp;
321 +
327 322 __asm__ __volatile__(
328 - "sqrtss %1, %%xmm0\n\t"
329 - "movss %%xmm0, %0"
330 - : "=m" (*f2)
331 - : "m" (*f1)
332 - : "xmm0");
323 + "sqrtss %2, %1\n\t"
324 + "movss %1, %0"
325 + : "=m" (*f2), "=x" (tmp)
326 + : "m" (*f1));
333 327 }
334 328
335 329 extern __inline__ void
336 330 sse_ucomiss(float *f1, float *f2)
337 331 {
338 332 __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2));
339 333
340 334 }
341 335
342 336 extern __inline__ void
343 337 sse_comiss(float *f1, float *f2)
344 338 {
345 339 __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2));
346 340 }
347 341
348 342 extern __inline__ void
349 343 sse_cvtss2sd(float *f1, double *d1)
350 344 {
345 + double tmp;
346 +
351 347 __asm__ __volatile__(
352 - "cvtss2sd %1, %%xmm0\n\t"
353 - "movsd %%xmm0, %0"
354 - : "=m" (*d1)
355 - : "m" (*f1)
356 - : "xmm0");
348 + "cvtss2sd %2, %1\n\t"
349 + "movsd %1, %0"
350 + : "=m" (*d1), "=x" (tmp)
351 + : "m" (*f1));
357 352 }
358 353
359 354 extern __inline__ void
360 355 sse_cvtsi2ss(int *i1, float *f1)
361 356 {
357 + double tmp;
358 +
362 359 __asm__ __volatile__(
363 - "cvtsi2ss %1, %%xmm0\n\t"
364 - "movss %%xmm0, %0"
365 - : "=m" (*f1)
366 - : "m" (*i1)
367 - : "xmm0");
360 + "cvtsi2ss %2, %1\n\t"
361 + "movss %1, %0"
362 + : "=m" (*f1), "=x" (tmp)
363 + : "m" (*i1));
368 364 }
369 365
370 366 extern __inline__ void
371 367 sse_cvttss2si(float *f1, int *i1)
372 368 {
369 + int tmp;
370 +
373 371 __asm__ __volatile__(
374 - "cvttss2si %1, %%ecx\n\t"
375 - "movl %%ecx, %0"
376 - : "=m" (*i1)
377 - : "m" (*f1)
378 - : "ecx");
372 + "cvttss2si %2, %1\n\t"
373 + "movl %1, %0"
374 + : "=m" (*i1), "=r" (tmp)
375 + : "m" (*f1));
379 376 }
380 377
381 378 extern __inline__ void
382 379 sse_cvtss2si(float *f1, int *i1)
383 380 {
381 + int tmp;
382 +
384 383 __asm__ __volatile__(
385 - "cvtss2si %1, %%ecx\n\t"
386 - "movl %%ecx, %0"
387 - : "=m" (*i1)
388 - : "m" (*f1)
389 - : "ecx");
384 + "cvtss2si %2, %1\n\t"
385 + "movl %1, %0"
386 + : "=m" (*i1), "=r" (tmp)
387 + : "m" (*f1));
390 388 }
391 389
392 390 #if defined(__amd64)
393 391 extern __inline__ void
394 392 sse_cvtsi2ssq(long long *ll1, float *f1)
395 393 {
394 + double tmp;
395 +
396 396 __asm__ __volatile__(
397 - "cvtsi2ssq %1, %%xmm0\n\t"
398 - "movss %%xmm0, %0"
399 - : "=m" (*f1)
400 - : "m" (*ll1)
401 - : "xmm0");
397 + "cvtsi2ssq %2, %1\n\t"
398 + "movss %1, %0"
399 + : "=m" (*f1), "=x" (tmp)
400 + : "m" (*ll1));
402 401 }
403 402
404 403 extern __inline__ void
405 404 sse_cvttss2siq(float *f1, long long *ll1)
406 405 {
406 + uint64_t tmp;
407 +
407 408 __asm__ __volatile__(
408 - "cvttss2siq %1, %%rcx\n\t"
409 - "movq %%rcx, %0"
410 - : "=m" (*ll1)
411 - : "m" (*f1)
412 - : "rcx");
409 + "cvttss2siq %2, %1\n\t"
410 + "movq %1, %0"
411 + : "=m" (*ll1), "=r" (tmp)
412 + : "m" (*f1));
413 413 }
414 414
415 415 extern __inline__ void
416 416 sse_cvtss2siq(float *f1, long long *ll1)
417 417 {
418 + uint64_t tmp;
419 +
418 420 __asm__ __volatile__(
419 - "cvtss2siq %1, %%rcx\n\t"
420 - "movq %%rcx, %0"
421 - : "=m" (*ll1)
422 - : "m" (*f1)
423 - : "rcx");
421 + "cvtss2siq %2, %1\n\t"
422 + "movq %1, %0"
423 + : "=m" (*ll1), "=r" (tmp)
424 + : "m" (*f1));
424 425 }
425 426
426 427 #endif
427 428
428 429 extern __inline__ void
429 430 sse_cmpeqsd(double *d1, double *d2, long long *ll1)
430 431 {
431 432 __asm__ __volatile__(
432 433 "cmpeqsd %2,%1\n\t"
433 434 "movsd %1,%0"
434 - : "=m" (*ll1)
435 - : "x" (*d1), "x" (*d2));
435 + : "=m" (*ll1), "=x" (*d1)
436 + : "x" (*d2));
436 437 }
437 438
438 439 extern __inline__ void
439 440 sse_cmpltsd(double *d1, double *d2, long long *ll1)
440 441 {
441 442 __asm__ __volatile__(
442 443 "cmpltsd %2,%1\n\t"
443 444 "movsd %1,%0"
444 - : "=m" (*ll1)
445 - : "x" (*d1), "x" (*d2));
445 + : "=m" (*ll1), "=x" (*d1)
446 + : "x" (*d2));
446 447 }
447 448
448 449 extern __inline__ void
449 450 sse_cmplesd(double *d1, double *d2, long long *ll1)
450 451 {
451 452 __asm__ __volatile__(
452 453 "cmplesd %2,%1\n\t"
453 454 "movsd %1,%0"
454 - : "=m" (*ll1)
455 - : "x" (*d1), "x" (*d2));
455 + : "=m" (*ll1), "=x" (*d1)
456 + : "x" (*d2));
456 457 }
457 458
458 459 extern __inline__ void
459 460 sse_cmpunordsd(double *d1, double *d2, long long *ll1)
460 461 {
461 462 __asm__ __volatile__(
462 463 "cmpunordsd %2,%1\n\t"
463 464 "movsd %1,%0"
464 - : "=m" (*ll1)
465 - : "x" (*d1), "x" (*d2));
465 + : "=m" (*ll1), "=x" (*d1)
466 + : "x" (*d2));
466 467 }
467 468
468 469
469 470 extern __inline__ void
470 471 sse_minsd(double *d1, double *d2, double *d3)
471 472 {
472 473 __asm__ __volatile__(
473 474 "minsd %2,%1\n\t"
474 475 "movsd %1,%0"
475 - : "=m" (*d3)
476 - : "x" (*d1), "x" (*d2));
476 + : "=m" (*d3), "=x" (*d1)
477 + : "x" (*d2));
477 478 }
478 479
479 480 extern __inline__ void
480 481 sse_maxsd(double *d1, double *d2, double *d3)
481 482 {
482 483 __asm__ __volatile__(
483 484 "maxsd %2,%1\n\t"
484 485 "movsd %1,%0"
485 - : "=m" (*d3)
486 - : "x" (*d1), "x" (*d2));
486 + : "=m" (*d3), "=x" (*d1)
487 + : "x" (*d2));
487 488 }
488 489
489 490 extern __inline__ void
490 491 sse_addsd(double *d1, double *d2, double *d3)
491 492 {
492 493 __asm__ __volatile__(
493 494 "addsd %2,%1\n\t"
494 495 "movsd %1,%0"
495 - : "=m" (*d3)
496 - : "x" (*d1), "x" (*d2));
496 + : "=m" (*d3), "=x" (*d1)
497 + : "x" (*d2));
497 498 }
498 499
499 500 extern __inline__ void
500 501 sse_subsd(double *d1, double *d2, double *d3)
501 502 {
502 503 __asm__ __volatile__(
503 504 "subsd %2,%1\n\t"
504 505 "movsd %1,%0"
505 - : "=m" (*d3)
506 - : "x" (*d1), "x" (*d2));
506 + : "=m" (*d3), "=x" (*d1)
507 + : "x" (*d2));
507 508 }
508 509
509 510 extern __inline__ void
510 511 sse_mulsd(double *d1, double *d2, double *d3)
511 512 {
512 513 __asm__ __volatile__(
513 514 "mulsd %2,%1\n\t"
514 515 "movsd %1,%0"
515 - : "=m" (*d3)
516 - : "x" (*d1), "x" (*d2));
516 + : "=m" (*d3), "=x" (*d1)
517 + : "x" (*d2));
517 518 }
518 519
519 520 extern __inline__ void
520 521 sse_divsd(double *d1, double *d2, double *d3)
521 522 {
522 523 __asm__ __volatile__(
523 524 "divsd %2,%1\n\t"
524 525 "movsd %1,%0"
525 - : "=m" (*d3)
526 - : "x" (*d1), "x" (*d2)
527 - : "xmm0");
526 + : "=m" (*d3), "=x" (*d1)
527 + : "x" (*d2));
528 528 }
529 529
530 530 extern __inline__ void
531 531 sse_sqrtsd(double *d1, double *d2)
532 532 {
533 + double tmp;
534 +
533 535 __asm__ __volatile__(
534 - "sqrtsd %1, %%xmm0\n\t"
535 - "movsd %%xmm0, %0"
536 - : "=m" (*d2)
537 - : "m" (*d1)
538 - : "xmm0");
536 + "sqrtsd %2, %1\n\t"
537 + "movsd %1, %0"
538 + : "=m" (*d2), "=x" (tmp)
539 + : "m" (*d1));
539 540 }
540 541
541 542 extern __inline__ void
542 543 sse_ucomisd(double *d1, double *d2)
543 544 {
544 545 __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2));
545 546 }
546 547
547 548 extern __inline__ void
548 549 sse_comisd(double *d1, double *d2)
549 550 {
550 551 __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2));
551 552 }
552 553
553 554 extern __inline__ void
554 555 sse_cvtsd2ss(double *d1, float *f1)
555 556 {
557 + double tmp;
558 +
556 559 __asm__ __volatile__(
557 - "cvtsd2ss %1,%%xmm0\n\t"
558 - "movss %%xmm0,%0"
559 - : "=m" (*f1)
560 - : "m" (*d1)
561 - : "xmm0");
560 + "cvtsd2ss %2,%1\n\t"
561 + "movss %1,%0"
562 + : "=m" (*f1), "=x" (tmp)
563 + : "m" (*d1));
562 564 }
563 565
564 -
565 566 extern __inline__ void
566 567 sse_cvtsi2sd(int *i1, double *d1)
567 568 {
569 + double tmp;
568 570 __asm__ __volatile__(
569 - "cvtsi2sd %1,%%xmm0\n\t"
570 - "movsd %%xmm0,%0"
571 - : "=m" (*d1)
572 - : "m" (*i1)
573 - : "xmm0");
571 + "cvtsi2sd %2,%1\n\t"
572 + "movsd %1,%0"
573 + : "=m" (*d1), "=x" (tmp)
574 + : "m" (*i1));
574 575 }
575 576
576 577 extern __inline__ void
577 578 sse_cvttsd2si(double *d1, int *i1)
578 579 {
580 + int tmp;
581 +
579 582 __asm__ __volatile__(
580 - "cvttsd2si %1,%%ecx\n\t"
581 - "movl %%ecx,%0"
582 - : "=m" (*i1)
583 - : "m" (*d1)
584 - : "ecx");
583 + "cvttsd2si %2,%1\n\t"
584 + "movl %1,%0"
585 + : "=m" (*i1), "=r" (tmp)
586 + : "m" (*d1));
585 587 }
586 588
587 589 extern __inline__ void
588 590 sse_cvtsd2si(double *d1, int *i1)
589 591 {
592 + int tmp;
593 +
590 594 __asm__ __volatile__(
591 - "cvtsd2si %1,%%ecx\n\t"
592 - "movl %%ecx,%0"
593 - : "=m" (*i1)
594 - : "m" (*d1)
595 - : "ecx");
595 + "cvtsd2si %2,%1\n\t"
596 + "movl %1,%0"
597 + : "=m" (*i1), "=r" (tmp)
598 + : "m" (*d1));
596 599 }
597 600
598 601 #if defined(__amd64)
599 602 extern __inline__ void
600 603 sse_cvtsi2sdq(long long *ll1, double *d1)
601 604 {
605 + double tmp;
606 +
602 607 __asm__ __volatile__(
603 - "cvtsi2sdq %1,%%xmm0\n\t"
604 - "movsd %%xmm0,%0"
605 - : "=m" (*d1)
606 - : "m" (*ll1)
607 - : "xmm0");
608 + "cvtsi2sdq %2,%1\n\t"
609 + "movsd %1,%0"
610 + : "=m" (*d1), "=x" (tmp)
611 + : "m" (*ll1));
608 612 }
609 613
610 614 extern __inline__ void
611 615 sse_cvttsd2siq(double *d1, long long *ll1)
612 616 {
617 + uint64_t tmp;
618 +
613 619 __asm__ __volatile__(
614 - "cvttsd2siq %1,%%rcx\n\t"
615 - "movq %%rcx,%0"
616 - : "=m" (*ll1)
617 - : "m" (*d1)
618 - : "rcx");
620 + "cvttsd2siq %2,%1\n\t"
621 + "movq %1,%0"
622 + : "=m" (*ll1), "=r" (tmp)
623 + : "m" (*d1));
619 624 }
620 625
621 626 extern __inline__ void
622 627 sse_cvtsd2siq(double *d1, long long *ll1)
623 628 {
629 + uint64_t tmp;
630 +
624 631 __asm__ __volatile__(
625 - "cvtsd2siq %1,%%rcx\n\t"
626 - "movq %%rcx,%0"
627 - : "=m" (*ll1)
628 - : "m" (*d1)
629 - : "rcx");
632 + "cvtsd2siq %2,%1\n\t"
633 + "movq %1,%0"
634 + : "=m" (*ll1), "=r" (tmp)
635 + : "m" (*d1));
630 636 }
631 637 #endif
638 +
632 639 #elif defined(__sparc)
633 640 extern __inline__ void
634 641 __fenv_getfsr(unsigned long *l)
635 642 {
636 - __asm__ __volatile__(
643 + __asm__ __volatile__(
637 644 #if defined(__sparcv9)
638 - "stx %%fsr,%0\n\t"
645 + "stx %%fsr,%0\n\t"
639 646 #else
640 - "st %%fsr,%0\n\t"
647 + "st %%fsr,%0\n\t"
641 648 #endif
642 - : "=m" (*l));
649 + : "=m" (*l));
643 650 }
644 651
645 652 extern __inline__ void
646 653 __fenv_setfsr(const unsigned long *l)
647 654 {
648 - __asm__ __volatile__(
655 + __asm__ __volatile__(
649 656 #if defined(__sparcv9)
650 - "ldx %0,%%fsr\n\t"
657 + "ldx %0,%%fsr\n\t"
651 658 #else
652 - "ld %0,%%fsr\n\t"
659 + "ld %0,%%fsr\n\t"
653 660 #endif
654 - : : "m" (*l));
661 + : : "m" (*l) : "cc");
655 662 }
656 663
657 664 extern __inline__ void
658 665 __fenv_getfsr32(unsigned int *l)
659 666 {
660 - __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l));
667 + __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l));
661 668 }
662 669
663 670 extern __inline__ void
664 671 __fenv_setfsr32(const unsigned int *l)
665 672 {
666 - __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l));
673 + __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l));
667 674 }
668 675 #else
669 676 #error "GCC FENV inlines not implemented for this platform"
670 677 #endif
671 678
672 679 #ifdef __cplusplus
673 680 }
674 681 #endif
675 682
676 683 #endif /* __GNUC__ */
677 684
678 685 #endif /* _FENV_INLINES_H */
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX