25 #include <sys/types.h>
26
27 #if defined(__x86)
28
29 /*
30 * Floating point Control Word and Status Word
31 * Definition should actually be shared with x86
32 * (much of this 'amd64' code can be, in fact.)
33 */
34 union fp_cwsw {
35 uint32_t cwsw;
36 struct {
37 uint16_t cw;
38 uint16_t sw;
39 } words;
40 };
41
42 extern __inline__ void
43 __fenv_getcwsw(unsigned int *value)
44 {
45 union fp_cwsw ret;
46
47 __asm__ __volatile__(
48 "fstsw %0\n\t"
49 "fstcw %1\n\t"
50 : "=m" (ret.words.cw), "=m" (ret.words.sw));
51 *value = ret.cwsw;
52 }
53
54 extern __inline__ void
55 __fenv_setcwsw(const unsigned int *value)
56 {
57 union fp_cwsw cwsw;
58 short fenv[16];
59
60 cwsw.cwsw = *value;
61
62 __asm__ __volatile__(
63 "fstenv %0\n\t"
64 "movw %4,%1\n\t"
65 "movw %3,%2\n\t"
66 "fldenv %0\n\t"
67 "fwait\n\t"
68 : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2])
69 : "d" (cwsw.words.cw), "c" (cwsw.words.sw)
70 /* For practical purposes, we clobber the whole FPU */
71 : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)",
72 "st(6)", "st(7)");
73 }
74
75 extern __inline__ void
76 __fenv_getmxcsr(unsigned int *value)
77 {
78 __asm__ __volatile__("stmxcsr %1" : "+m" (*value));
79 }
80
81 extern __inline__ void
82 __fenv_setmxcsr(const unsigned int *value)
83 {
84 __asm__ __volatile__("ldmxcsr %0" : : "m" (*value));
85 }
86
87 extern __inline__ long double
88 f2xm1(long double x)
89 {
90 long double ret;
91
92 __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x));
93 return (ret);
94 }
95
96 extern __inline__ long double
97 fyl2x(long double y, long double x)
98 {
99 long double ret;
100
101 __asm__ __volatile__("fyl2x" : "=t" (ret): "0" (x), "u" (y) : "st(1)");
102 return (ret);
103 }
104
105 extern __inline__ long double
106 fptan(long double x)
107 {
108 /*
109 * fptan pushes 1.0 then the result on completion, so we want to pop
110 * the FP stack twice, so we need a dummy value into which to pop it.
111 */
112 long double ret;
113 long double dummy;
114
115 __asm__ __volatile__("fptan" : "=t" (dummy), "=u" (ret) : "0" (x));
116 return (ret);
117 }
118
119 extern __inline__ long double
120 fpatan(long double x, long double y)
121 {
122 long double ret;
123
124 __asm__ __volatile__("fpatan"
125 : "=t" (ret)
126 : "0" (y), "u" (x)
127 : "st(1)");
128 return (ret);
129 }
130
131 extern __inline__ long double
132 fxtract(long double x)
133 {
134 long double ret;
135
136 __asm__ __volatile__("fxtract" : "=t" (ret) : "0" (x));
137 return (ret);
138 }
139
140 extern __inline__ long double
141 fprem1(long double idend, long double div)
142 {
143 long double ret;
144
145 __asm__ __volatile__("fprem1" : "=t" (ret) : "0" (div), "u" (idend));
146 return (ret);
147 }
148
149 extern __inline__ long double
150 fprem(long double idend, long double div)
151 {
152 long double ret;
153
154 __asm__ __volatile__("fprem" : "=t" (ret) : "0" (div), "u" (idend));
155 return (ret);
156 }
157
158 extern __inline__ long double
159 fyl2xp1(long double y, long double x)
160 {
161 long double ret;
162
163 __asm__ __volatile__("fyl2xp1"
164 : "=t" (ret)
165 : "0" (x), "u" (y)
166 : "st(1)");
167 return (ret);
168 }
169
170 extern __inline__ long double
171 fsqrt(long double x)
172 {
173 long double ret;
174
175 __asm__ __volatile__("fsqrt" : "=t" (ret) : "0" (x));
176 return (ret);
177 }
178
179 extern __inline__ long double
180 fsincos(long double x)
181 {
182 long double ret;
183
184 __asm__ __volatile__("fsincos" : "=t" (ret) : "0" (x));
185 return (ret);
186 }
187
188 extern __inline__ long double
189 frndint(long double x)
190 {
191 long double ret;
192
193 __asm__ __volatile__("frndint" : "=t" (ret) : "0" (x));
194 return (ret);
195 }
196
197 extern __inline__ long double
198 fscale(long double x, long double y)
199 {
200 long double ret;
201
202 __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x));
203 return (ret);
204 }
205
206 extern __inline__ long double
207 fsin(long double x)
208 {
209 long double ret;
210
211 __asm__ __volatile__("fsin" : "=t" (ret) : "0" (x));
212 return (ret);
213 }
214
215 extern __inline__ long double
216 fcos(long double x)
217 {
218 long double ret;
219
220 __asm__ __volatile__("fcos" : "=t" (ret) : "0" (x));
221 return (ret);
222 }
223
224 extern __inline__ void
225 sse_cmpeqss(float *f1, float *f2, int *i1)
226 {
227 __asm__ __volatile__(
228 "cmpeqss %2, %1\n\t"
229 "movss %1, %0"
230 : "=m" (*i1)
231 : "x" (*f1), "x" (*f2));
232 }
233
234 extern __inline__ void
235 sse_cmpltss(float *f1, float *f2, int *i1)
236 {
237 __asm__ __volatile__(
238 "cmpltss %2, %1\n\t"
239 "movss %1, %0"
240 : "=m" (*i1)
241 : "x" (*f1), "x" (*f2));
242 }
243
244 extern __inline__ void
245 sse_cmpless(float *f1, float *f2, int *i1)
246 {
247 __asm__ __volatile__(
248 "cmpless %2, %1\n\t"
249 "movss %1, %0"
250 : "=m" (*i1)
251 : "x" (*f1), "x" (*f2));
252 }
253
254 extern __inline__ void
255 sse_cmpunordss(float *f1, float *f2, int *i1)
256 {
257 __asm__ __volatile__(
258 "cmpunordss %2, %1\n\t"
259 "movss %1, %0"
260 : "=m" (*i1)
261 : "x" (*f1), "x" (*f2));
262 }
263
264 extern __inline__ void
265 sse_minss(float *f1, float *f2, float *f3)
266 {
267 __asm__ __volatile__(
268 "minss %2, %1\n\t"
269 "movss %1, %0"
270 : "=m" (*f3)
271 : "x" (*f1), "x" (*f2));
272 }
273
274 extern __inline__ void
275 sse_maxss(float *f1, float *f2, float *f3)
276 {
277 __asm__ __volatile__(
278 "maxss %2, %1\n\t"
279 "movss %1, %0"
280 : "=m" (*f3)
281 : "x" (*f1), "x" (*f2));
282 }
283
284 extern __inline__ void
285 sse_addss(float *f1, float *f2, float *f3)
286 {
287 __asm__ __volatile__(
288 "addss %2, %1\n\t"
289 "movss %1, %0"
290 : "=m" (*f3)
291 : "x" (*f1), "x" (*f2));
292 }
293
294 extern __inline__ void
295 sse_subss(float *f1, float *f2, float *f3)
296 {
297 __asm__ __volatile__(
298 "subss %2, %1\n\t"
299 "movss %1, %0"
300 : "=m" (*f3)
301 : "x" (*f1), "x" (*f2));
302 }
303
304 extern __inline__ void
305 sse_mulss(float *f1, float *f2, float *f3)
306 {
307 __asm__ __volatile__(
308 "mulss %2, %1\n\t"
309 "movss %1, %0"
310 : "=m" (*f3)
311 : "x" (*f1), "x" (*f2));
312 }
313
314 extern __inline__ void
315 sse_divss(float *f1, float *f2, float *f3)
316 {
317 __asm__ __volatile__(
318 "divss %2, %1\n\t"
319 "movss %1, %0"
320 : "=m" (*f3)
321 : "x" (*f1), "x" (*f2));
322 }
323
324 extern __inline__ void
325 sse_sqrtss(float *f1, float *f2)
326 {
327 __asm__ __volatile__(
328 "sqrtss %1, %%xmm0\n\t"
329 "movss %%xmm0, %0"
330 : "=m" (*f2)
331 : "m" (*f1)
332 : "xmm0");
333 }
334
335 extern __inline__ void
336 sse_ucomiss(float *f1, float *f2)
337 {
338 __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2));
339
340 }
341
342 extern __inline__ void
343 sse_comiss(float *f1, float *f2)
344 {
345 __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2));
346 }
347
348 extern __inline__ void
349 sse_cvtss2sd(float *f1, double *d1)
350 {
351 __asm__ __volatile__(
352 "cvtss2sd %1, %%xmm0\n\t"
353 "movsd %%xmm0, %0"
354 : "=m" (*d1)
355 : "m" (*f1)
356 : "xmm0");
357 }
358
359 extern __inline__ void
360 sse_cvtsi2ss(int *i1, float *f1)
361 {
362 __asm__ __volatile__(
363 "cvtsi2ss %1, %%xmm0\n\t"
364 "movss %%xmm0, %0"
365 : "=m" (*f1)
366 : "m" (*i1)
367 : "xmm0");
368 }
369
370 extern __inline__ void
371 sse_cvttss2si(float *f1, int *i1)
372 {
373 __asm__ __volatile__(
374 "cvttss2si %1, %%ecx\n\t"
375 "movl %%ecx, %0"
376 : "=m" (*i1)
377 : "m" (*f1)
378 : "ecx");
379 }
380
381 extern __inline__ void
382 sse_cvtss2si(float *f1, int *i1)
383 {
384 __asm__ __volatile__(
385 "cvtss2si %1, %%ecx\n\t"
386 "movl %%ecx, %0"
387 : "=m" (*i1)
388 : "m" (*f1)
389 : "ecx");
390 }
391
392 #if defined(__amd64)
393 extern __inline__ void
394 sse_cvtsi2ssq(long long *ll1, float *f1)
395 {
396 __asm__ __volatile__(
397 "cvtsi2ssq %1, %%xmm0\n\t"
398 "movss %%xmm0, %0"
399 : "=m" (*f1)
400 : "m" (*ll1)
401 : "xmm0");
402 }
403
404 extern __inline__ void
405 sse_cvttss2siq(float *f1, long long *ll1)
406 {
407 __asm__ __volatile__(
408 "cvttss2siq %1, %%rcx\n\t"
409 "movq %%rcx, %0"
410 : "=m" (*ll1)
411 : "m" (*f1)
412 : "rcx");
413 }
414
415 extern __inline__ void
416 sse_cvtss2siq(float *f1, long long *ll1)
417 {
418 __asm__ __volatile__(
419 "cvtss2siq %1, %%rcx\n\t"
420 "movq %%rcx, %0"
421 : "=m" (*ll1)
422 : "m" (*f1)
423 : "rcx");
424 }
425
426 #endif
427
428 extern __inline__ void
429 sse_cmpeqsd(double *d1, double *d2, long long *ll1)
430 {
431 __asm__ __volatile__(
432 "cmpeqsd %2,%1\n\t"
433 "movsd %1,%0"
434 : "=m" (*ll1)
435 : "x" (*d1), "x" (*d2));
436 }
437
438 extern __inline__ void
439 sse_cmpltsd(double *d1, double *d2, long long *ll1)
440 {
441 __asm__ __volatile__(
442 "cmpltsd %2,%1\n\t"
443 "movsd %1,%0"
444 : "=m" (*ll1)
445 : "x" (*d1), "x" (*d2));
446 }
447
448 extern __inline__ void
449 sse_cmplesd(double *d1, double *d2, long long *ll1)
450 {
451 __asm__ __volatile__(
452 "cmplesd %2,%1\n\t"
453 "movsd %1,%0"
454 : "=m" (*ll1)
455 : "x" (*d1), "x" (*d2));
456 }
457
458 extern __inline__ void
459 sse_cmpunordsd(double *d1, double *d2, long long *ll1)
460 {
461 __asm__ __volatile__(
462 "cmpunordsd %2,%1\n\t"
463 "movsd %1,%0"
464 : "=m" (*ll1)
465 : "x" (*d1), "x" (*d2));
466 }
467
468
469 extern __inline__ void
470 sse_minsd(double *d1, double *d2, double *d3)
471 {
472 __asm__ __volatile__(
473 "minsd %2,%1\n\t"
474 "movsd %1,%0"
475 : "=m" (*d3)
476 : "x" (*d1), "x" (*d2));
477 }
478
479 extern __inline__ void
480 sse_maxsd(double *d1, double *d2, double *d3)
481 {
482 __asm__ __volatile__(
483 "maxsd %2,%1\n\t"
484 "movsd %1,%0"
485 : "=m" (*d3)
486 : "x" (*d1), "x" (*d2));
487 }
488
489 extern __inline__ void
490 sse_addsd(double *d1, double *d2, double *d3)
491 {
492 __asm__ __volatile__(
493 "addsd %2,%1\n\t"
494 "movsd %1,%0"
495 : "=m" (*d3)
496 : "x" (*d1), "x" (*d2));
497 }
498
499 extern __inline__ void
500 sse_subsd(double *d1, double *d2, double *d3)
501 {
502 __asm__ __volatile__(
503 "subsd %2,%1\n\t"
504 "movsd %1,%0"
505 : "=m" (*d3)
506 : "x" (*d1), "x" (*d2));
507 }
508
509 extern __inline__ void
510 sse_mulsd(double *d1, double *d2, double *d3)
511 {
512 __asm__ __volatile__(
513 "mulsd %2,%1\n\t"
514 "movsd %1,%0"
515 : "=m" (*d3)
516 : "x" (*d1), "x" (*d2));
517 }
518
519 extern __inline__ void
520 sse_divsd(double *d1, double *d2, double *d3)
521 {
522 __asm__ __volatile__(
523 "divsd %2,%1\n\t"
524 "movsd %1,%0"
525 : "=m" (*d3)
526 : "x" (*d1), "x" (*d2)
527 : "xmm0");
528 }
529
530 extern __inline__ void
531 sse_sqrtsd(double *d1, double *d2)
532 {
533 __asm__ __volatile__(
534 "sqrtsd %1, %%xmm0\n\t"
535 "movsd %%xmm0, %0"
536 : "=m" (*d2)
537 : "m" (*d1)
538 : "xmm0");
539 }
540
541 extern __inline__ void
542 sse_ucomisd(double *d1, double *d2)
543 {
544 __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2));
545 }
546
547 extern __inline__ void
548 sse_comisd(double *d1, double *d2)
549 {
550 __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2));
551 }
552
553 extern __inline__ void
554 sse_cvtsd2ss(double *d1, float *f1)
555 {
556 __asm__ __volatile__(
557 "cvtsd2ss %1,%%xmm0\n\t"
558 "movss %%xmm0,%0"
559 : "=m" (*f1)
560 : "m" (*d1)
561 : "xmm0");
562 }
563
564
565 extern __inline__ void
566 sse_cvtsi2sd(int *i1, double *d1)
567 {
568 __asm__ __volatile__(
569 "cvtsi2sd %1,%%xmm0\n\t"
570 "movsd %%xmm0,%0"
571 : "=m" (*d1)
572 : "m" (*i1)
573 : "xmm0");
574 }
575
576 extern __inline__ void
577 sse_cvttsd2si(double *d1, int *i1)
578 {
579 __asm__ __volatile__(
580 "cvttsd2si %1,%%ecx\n\t"
581 "movl %%ecx,%0"
582 : "=m" (*i1)
583 : "m" (*d1)
584 : "ecx");
585 }
586
587 extern __inline__ void
588 sse_cvtsd2si(double *d1, int *i1)
589 {
590 __asm__ __volatile__(
591 "cvtsd2si %1,%%ecx\n\t"
592 "movl %%ecx,%0"
593 : "=m" (*i1)
594 : "m" (*d1)
595 : "ecx");
596 }
597
598 #if defined(__amd64)
599 extern __inline__ void
600 sse_cvtsi2sdq(long long *ll1, double *d1)
601 {
602 __asm__ __volatile__(
603 "cvtsi2sdq %1,%%xmm0\n\t"
604 "movsd %%xmm0,%0"
605 : "=m" (*d1)
606 : "m" (*ll1)
607 : "xmm0");
608 }
609
610 extern __inline__ void
611 sse_cvttsd2siq(double *d1, long long *ll1)
612 {
613 __asm__ __volatile__(
614 "cvttsd2siq %1,%%rcx\n\t"
615 "movq %%rcx,%0"
616 : "=m" (*ll1)
617 : "m" (*d1)
618 : "rcx");
619 }
620
621 extern __inline__ void
622 sse_cvtsd2siq(double *d1, long long *ll1)
623 {
624 __asm__ __volatile__(
625 "cvtsd2siq %1,%%rcx\n\t"
626 "movq %%rcx,%0"
627 : "=m" (*ll1)
628 : "m" (*d1)
629 : "rcx");
630 }
631 #endif
632 #elif defined(__sparc)
633 extern __inline__ void
634 __fenv_getfsr(unsigned long *l)
635 {
636 __asm__ __volatile__(
637 #if defined(__sparcv9)
638 "stx %%fsr,%0\n\t"
639 #else
640 "st %%fsr,%0\n\t"
641 #endif
642 : "=m" (*l));
643 }
644
645 extern __inline__ void
646 __fenv_setfsr(const unsigned long *l)
647 {
648 __asm__ __volatile__(
649 #if defined(__sparcv9)
650 "ldx %0,%%fsr\n\t"
651 #else
652 "ld %0,%%fsr\n\t"
653 #endif
654 : : "m" (*l));
655 }
656
657 extern __inline__ void
658 __fenv_getfsr32(unsigned int *l)
659 {
660 __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l));
661 }
662
663 extern __inline__ void
664 __fenv_setfsr32(const unsigned int *l)
665 {
666 __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l));
667 }
668 #else
669 #error "GCC FENV inlines not implemented for this platform"
670 #endif
671
672 #ifdef __cplusplus
673 }
674 #endif
|
25 #include <sys/types.h>
26
27 #if defined(__x86)
28
29 /*
30 * Floating point Control Word and Status Word
31 * Definition should actually be shared with x86
32 * (much of this 'amd64' code can be, in fact.)
33 */
34 union fp_cwsw {
35 uint32_t cwsw;
36 struct {
37 uint16_t cw;
38 uint16_t sw;
39 } words;
40 };
41
42 extern __inline__ void
43 __fenv_getcwsw(unsigned int *value)
44 {
45 union fp_cwsw *u = (union fp_cwsw *)value;
46
47 __asm__ __volatile__(
48 "fstsw %0\n\t"
49 "fstcw %1\n\t"
50 : "=m" (u->words.cw), "=m" (u->words.sw));
51 }
52
53 extern __inline__ void
54 __fenv_setcwsw(const unsigned int *value)
55 {
56 union fp_cwsw cwsw;
57 short fenv[16];
58
59 cwsw.cwsw = *value;
60
61 __asm__ __volatile__(
62 "fstenv %0\n\t"
63 "movw %4,%1\n\t"
64 "movw %3,%2\n\t"
65 "fldenv %0\n\t"
66 "fwait\n\t"
67 : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2])
68 : "r" (cwsw.words.cw), "r" (cwsw.words.sw)
69 /* For practical purposes, we clobber the whole FPU */
70 : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)",
71 "st(6)", "st(7)");
72 }
73
74 extern __inline__ void
75 __fenv_getmxcsr(unsigned int *value)
76 {
77 __asm__ __volatile__("stmxcsr %0" : "=m" (*value));
78 }
79
80 extern __inline__ void
81 __fenv_setmxcsr(const unsigned int *value)
82 {
83 __asm__ __volatile__("ldmxcsr %0" : : "m" (*value));
84 }
85
86 extern __inline__ long double
87 f2xm1(long double x)
88 {
89 long double ret;
90
91 __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc");
92 return (ret);
93 }
94
95 extern __inline__ long double
96 fyl2x(long double y, long double x)
97 {
98 long double ret;
99
100 __asm__ __volatile__("fyl2x"
101 : "=t" (ret)
102 : "0" (x), "u" (y)
103 : "st(1)", "cc");
104 return (ret);
105 }
106
107 extern __inline__ long double
108 fptan(long double x)
109 {
110 /*
111 * fptan pushes 1.0 then the result on completion, so we want to pop
112 * the FP stack twice, so we need a dummy value into which to pop it.
113 */
114 long double ret;
115 long double dummy;
116
117 __asm__ __volatile__("fptan"
118 : "=t" (dummy), "=u" (ret)
119 : "0" (x)
120 : "cc");
121 return (ret);
122 }
123
124 extern __inline__ long double
125 fpatan(long double x, long double y)
126 {
127 long double ret;
128
129 __asm__ __volatile__("fpatan"
130 : "=t" (ret)
131 : "0" (y), "u" (x)
132 : "st(1)", "cc");
133 return (ret);
134 }
135
136 extern __inline__ long double
137 fxtract(long double x)
138 {
139 __asm__ __volatile__("fxtract" : "+t" (x) : : "cc");
140 return (x);
141 }
142
143 extern __inline__ long double
144 fprem1(long double idend, long double div)
145 {
146 __asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc");
147 return (div);
148 }
149
150 extern __inline__ long double
151 fprem(long double idend, long double div)
152 {
153 __asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc");
154 return (div);
155 }
156
157 extern __inline__ long double
158 fyl2xp1(long double y, long double x)
159 {
160 long double ret;
161
162 __asm__ __volatile__("fyl2xp1"
163 : "=t" (ret)
164 : "0" (x), "u" (y)
165 : "st(1)", "cc");
166 return (ret);
167 }
168
169 extern __inline__ long double
170 fsqrt(long double x)
171 {
172 __asm__ __volatile__("fsqrt" : "+t" (x) : : "cc");
173 return (x);
174 }
175
176 extern __inline__ long double
177 fsincos(long double x)
178 {
179 __asm__ __volatile__("fsincos" : "+t" (x) : : "cc");
180 return (x);
181 }
182
183 extern __inline__ long double
184 frndint(long double x)
185 {
186 __asm__ __volatile__("frndint" : "+t" (x) : : "cc");
187 return (x);
188 }
189
190 extern __inline__ long double
191 fscale(long double x, long double y)
192 {
193 long double ret;
194
195 __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc");
196 return (ret);
197 }
198
199 extern __inline__ long double
200 fsin(long double x)
201 {
202 __asm__ __volatile__("fsin" : "+t" (x) : : "cc");
203 return (x);
204 }
205
206 extern __inline__ long double
207 fcos(long double x)
208 {
209 __asm__ __volatile__("fcos" : "+t" (x) : : "cc");
210 return (x);
211 }
212
213 extern __inline__ void
214 sse_cmpeqss(float *f1, float *f2, int *i1)
215 {
216 __asm__ __volatile__(
217 "cmpeqss %2, %1\n\t"
218 "movss %1, %0"
219 : "=m" (*i1), "+x" (*f1)
220 : "x" (*f2)
221 : "cc");
222 }
223
224 extern __inline__ void
225 sse_cmpltss(float *f1, float *f2, int *i1)
226 {
227 __asm__ __volatile__(
228 "cmpltss %2, %1\n\t"
229 "movss %1, %0"
230 : "=m" (*i1), "+x" (*f1)
231 : "x" (*f2)
232 : "cc");
233 }
234
235 extern __inline__ void
236 sse_cmpless(float *f1, float *f2, int *i1)
237 {
238 __asm__ __volatile__(
239 "cmpless %2, %1\n\t"
240 "movss %1, %0"
241 : "=m" (*i1), "+x" (*f1)
242 : "x" (*f2)
243 : "cc");
244 }
245
246 extern __inline__ void
247 sse_cmpunordss(float *f1, float *f2, int *i1)
248 {
249 __asm__ __volatile__(
250 "cmpunordss %2, %1\n\t"
251 "movss %1, %0"
252 : "=m" (*i1), "+x" (*f1)
253 : "x" (*f2)
254 : "cc");
255 }
256
257 extern __inline__ void
258 sse_minss(float *f1, float *f2, float *f3)
259 {
260 __asm__ __volatile__(
261 "minss %2, %1\n\t"
262 "movss %1, %0"
263 : "=m" (*f3), "+x" (*f1)
264 : "x" (*f2));
265 }
266
267 extern __inline__ void
268 sse_maxss(float *f1, float *f2, float *f3)
269 {
270 __asm__ __volatile__(
271 "maxss %2, %1\n\t"
272 "movss %1, %0"
273 : "=m" (*f3), "+x" (*f1)
274 : "x" (*f2));
275 }
276
277 extern __inline__ void
278 sse_addss(float *f1, float *f2, float *f3)
279 {
280 __asm__ __volatile__(
281 "addss %2, %1\n\t"
282 "movss %1, %0"
283 : "=m" (*f3), "+x" (*f1)
284 : "x" (*f2));
285 }
286
287 extern __inline__ void
288 sse_subss(float *f1, float *f2, float *f3)
289 {
290 __asm__ __volatile__(
291 "subss %2, %1\n\t"
292 "movss %1, %0"
293 : "=m" (*f3), "+x" (*f1)
294 : "x" (*f2));
295 }
296
297 extern __inline__ void
298 sse_mulss(float *f1, float *f2, float *f3)
299 {
300 __asm__ __volatile__(
301 "mulss %2, %1\n\t"
302 "movss %1, %0"
303 : "=m" (*f3), "+x" (*f1)
304 : "x" (*f2));
305 }
306
307 extern __inline__ void
308 sse_divss(float *f1, float *f2, float *f3)
309 {
310 __asm__ __volatile__(
311 "divss %2, %1\n\t"
312 "movss %1, %0"
313 : "=m" (*f3), "+x" (*f1)
314 : "x" (*f2));
315 }
316
317 extern __inline__ void
318 sse_sqrtss(float *f1, float *f2)
319 {
320 double tmp;
321
322 __asm__ __volatile__(
323 "sqrtss %2, %1\n\t"
324 "movss %1, %0"
325 : "=m" (*f2), "=x" (tmp)
326 : "m" (*f1));
327 }
328
329 extern __inline__ void
330 sse_ucomiss(float *f1, float *f2)
331 {
332 __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2));
333
334 }
335
336 extern __inline__ void
337 sse_comiss(float *f1, float *f2)
338 {
339 __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2));
340 }
341
342 extern __inline__ void
343 sse_cvtss2sd(float *f1, double *d1)
344 {
345 double tmp;
346
347 __asm__ __volatile__(
348 "cvtss2sd %2, %1\n\t"
349 "movsd %1, %0"
350 : "=m" (*d1), "=x" (tmp)
351 : "m" (*f1));
352 }
353
354 extern __inline__ void
355 sse_cvtsi2ss(int *i1, float *f1)
356 {
357 double tmp;
358
359 __asm__ __volatile__(
360 "cvtsi2ss %2, %1\n\t"
361 "movss %1, %0"
362 : "=m" (*f1), "=x" (tmp)
363 : "m" (*i1));
364 }
365
366 extern __inline__ void
367 sse_cvttss2si(float *f1, int *i1)
368 {
369 int tmp;
370
371 __asm__ __volatile__(
372 "cvttss2si %2, %1\n\t"
373 "movl %1, %0"
374 : "=m" (*i1), "=r" (tmp)
375 : "m" (*f1));
376 }
377
378 extern __inline__ void
379 sse_cvtss2si(float *f1, int *i1)
380 {
381 int tmp;
382
383 __asm__ __volatile__(
384 "cvtss2si %2, %1\n\t"
385 "movl %1, %0"
386 : "=m" (*i1), "=r" (tmp)
387 : "m" (*f1));
388 }
389
390 #if defined(__amd64)
391 extern __inline__ void
392 sse_cvtsi2ssq(long long *ll1, float *f1)
393 {
394 double tmp;
395
396 __asm__ __volatile__(
397 "cvtsi2ssq %2, %1\n\t"
398 "movss %1, %0"
399 : "=m" (*f1), "=x" (tmp)
400 : "m" (*ll1));
401 }
402
403 extern __inline__ void
404 sse_cvttss2siq(float *f1, long long *ll1)
405 {
406 uint64_t tmp;
407
408 __asm__ __volatile__(
409 "cvttss2siq %2, %1\n\t"
410 "movq %1, %0"
411 : "=m" (*ll1), "=r" (tmp)
412 : "m" (*f1));
413 }
414
415 extern __inline__ void
416 sse_cvtss2siq(float *f1, long long *ll1)
417 {
418 uint64_t tmp;
419
420 __asm__ __volatile__(
421 "cvtss2siq %2, %1\n\t"
422 "movq %1, %0"
423 : "=m" (*ll1), "=r" (tmp)
424 : "m" (*f1));
425 }
426
427 #endif
428
429 extern __inline__ void
430 sse_cmpeqsd(double *d1, double *d2, long long *ll1)
431 {
432 __asm__ __volatile__(
433 "cmpeqsd %2,%1\n\t"
434 "movsd %1,%0"
435 : "=m" (*ll1), "=x" (*d1)
436 : "x" (*d2));
437 }
438
439 extern __inline__ void
440 sse_cmpltsd(double *d1, double *d2, long long *ll1)
441 {
442 __asm__ __volatile__(
443 "cmpltsd %2,%1\n\t"
444 "movsd %1,%0"
445 : "=m" (*ll1), "=x" (*d1)
446 : "x" (*d2));
447 }
448
449 extern __inline__ void
450 sse_cmplesd(double *d1, double *d2, long long *ll1)
451 {
452 __asm__ __volatile__(
453 "cmplesd %2,%1\n\t"
454 "movsd %1,%0"
455 : "=m" (*ll1), "=x" (*d1)
456 : "x" (*d2));
457 }
458
459 extern __inline__ void
460 sse_cmpunordsd(double *d1, double *d2, long long *ll1)
461 {
462 __asm__ __volatile__(
463 "cmpunordsd %2,%1\n\t"
464 "movsd %1,%0"
465 : "=m" (*ll1), "=x" (*d1)
466 : "x" (*d2));
467 }
468
469
470 extern __inline__ void
471 sse_minsd(double *d1, double *d2, double *d3)
472 {
473 __asm__ __volatile__(
474 "minsd %2,%1\n\t"
475 "movsd %1,%0"
476 : "=m" (*d3), "=x" (*d1)
477 : "x" (*d2));
478 }
479
480 extern __inline__ void
481 sse_maxsd(double *d1, double *d2, double *d3)
482 {
483 __asm__ __volatile__(
484 "maxsd %2,%1\n\t"
485 "movsd %1,%0"
486 : "=m" (*d3), "=x" (*d1)
487 : "x" (*d2));
488 }
489
490 extern __inline__ void
491 sse_addsd(double *d1, double *d2, double *d3)
492 {
493 __asm__ __volatile__(
494 "addsd %2,%1\n\t"
495 "movsd %1,%0"
496 : "=m" (*d3), "=x" (*d1)
497 : "x" (*d2));
498 }
499
500 extern __inline__ void
501 sse_subsd(double *d1, double *d2, double *d3)
502 {
503 __asm__ __volatile__(
504 "subsd %2,%1\n\t"
505 "movsd %1,%0"
506 : "=m" (*d3), "=x" (*d1)
507 : "x" (*d2));
508 }
509
510 extern __inline__ void
511 sse_mulsd(double *d1, double *d2, double *d3)
512 {
513 __asm__ __volatile__(
514 "mulsd %2,%1\n\t"
515 "movsd %1,%0"
516 : "=m" (*d3), "=x" (*d1)
517 : "x" (*d2));
518 }
519
520 extern __inline__ void
521 sse_divsd(double *d1, double *d2, double *d3)
522 {
523 __asm__ __volatile__(
524 "divsd %2,%1\n\t"
525 "movsd %1,%0"
526 : "=m" (*d3), "=x" (*d1)
527 : "x" (*d2));
528 }
529
530 extern __inline__ void
531 sse_sqrtsd(double *d1, double *d2)
532 {
533 double tmp;
534
535 __asm__ __volatile__(
536 "sqrtsd %2, %1\n\t"
537 "movsd %1, %0"
538 : "=m" (*d2), "=x" (tmp)
539 : "m" (*d1));
540 }
541
542 extern __inline__ void
543 sse_ucomisd(double *d1, double *d2)
544 {
545 __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2));
546 }
547
548 extern __inline__ void
549 sse_comisd(double *d1, double *d2)
550 {
551 __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2));
552 }
553
554 extern __inline__ void
555 sse_cvtsd2ss(double *d1, float *f1)
556 {
557 double tmp;
558
559 __asm__ __volatile__(
560 "cvtsd2ss %2,%1\n\t"
561 "movss %1,%0"
562 : "=m" (*f1), "=x" (tmp)
563 : "m" (*d1));
564 }
565
566 extern __inline__ void
567 sse_cvtsi2sd(int *i1, double *d1)
568 {
569 double tmp;
570 __asm__ __volatile__(
571 "cvtsi2sd %2,%1\n\t"
572 "movsd %1,%0"
573 : "=m" (*d1), "=x" (tmp)
574 : "m" (*i1));
575 }
576
577 extern __inline__ void
578 sse_cvttsd2si(double *d1, int *i1)
579 {
580 int tmp;
581
582 __asm__ __volatile__(
583 "cvttsd2si %2,%1\n\t"
584 "movl %1,%0"
585 : "=m" (*i1), "=r" (tmp)
586 : "m" (*d1));
587 }
588
589 extern __inline__ void
590 sse_cvtsd2si(double *d1, int *i1)
591 {
592 int tmp;
593
594 __asm__ __volatile__(
595 "cvtsd2si %2,%1\n\t"
596 "movl %1,%0"
597 : "=m" (*i1), "=r" (tmp)
598 : "m" (*d1));
599 }
600
601 #if defined(__amd64)
602 extern __inline__ void
603 sse_cvtsi2sdq(long long *ll1, double *d1)
604 {
605 double tmp;
606
607 __asm__ __volatile__(
608 "cvtsi2sdq %2,%1\n\t"
609 "movsd %1,%0"
610 : "=m" (*d1), "=x" (tmp)
611 : "m" (*ll1));
612 }
613
614 extern __inline__ void
615 sse_cvttsd2siq(double *d1, long long *ll1)
616 {
617 uint64_t tmp;
618
619 __asm__ __volatile__(
620 "cvttsd2siq %2,%1\n\t"
621 "movq %1,%0"
622 : "=m" (*ll1), "=r" (tmp)
623 : "m" (*d1));
624 }
625
626 extern __inline__ void
627 sse_cvtsd2siq(double *d1, long long *ll1)
628 {
629 uint64_t tmp;
630
631 __asm__ __volatile__(
632 "cvtsd2siq %2,%1\n\t"
633 "movq %1,%0"
634 : "=m" (*ll1), "=r" (tmp)
635 : "m" (*d1));
636 }
637 #endif
638
639 #elif defined(__sparc)
640 extern __inline__ void
641 __fenv_getfsr(unsigned long *l)
642 {
643 __asm__ __volatile__(
644 #if defined(__sparcv9)
645 "stx %%fsr,%0\n\t"
646 #else
647 "st %%fsr,%0\n\t"
648 #endif
649 : "=m" (*l));
650 }
651
652 extern __inline__ void
653 __fenv_setfsr(const unsigned long *l)
654 {
655 __asm__ __volatile__(
656 #if defined(__sparcv9)
657 "ldx %0,%%fsr\n\t"
658 #else
659 "ld %0,%%fsr\n\t"
660 #endif
661 : : "m" (*l) : "cc");
662 }
663
664 extern __inline__ void
665 __fenv_getfsr32(unsigned int *l)
666 {
667 __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l));
668 }
669
670 extern __inline__ void
671 __fenv_setfsr32(const unsigned int *l)
672 {
673 __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l));
674 }
675 #else
676 #error "GCC FENV inlines not implemented for this platform"
677 #endif
678
679 #ifdef __cplusplus
680 }
681 #endif
|