1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2011, Richard Lowe
14 */
15
16 #ifndef _FENV_INLINES_H
17 #define _FENV_INLINES_H
18
19 #ifdef __GNUC__
20 #ifdef __cplusplus
21 extern "C" {
22 #endif
23
24 #include <sys/types.h>
25
26 #if defined(__x86)
27 /*
28 * Floating point Control Word and Status Word
29 * Definition should actually be shared with x86
30 * (much of this 'amd64' code can be, in fact.)
31 */
32 union fp_cwsw {
33 uint32_t cwsw;
34 struct {
35 uint16_t cw;
36 uint16_t sw;
37 } words;
38 };
39
40 extern __GNU_INLINE void
41 __fenv_getcwsw(unsigned int *value)
42 {
43 union fp_cwsw *u = (union fp_cwsw *)value;
44
45 __asm__ __volatile__(
46 "fstsw %0\n\t"
47 "fstcw %1\n\t"
48 : "=m" (u->words.cw), "=m" (u->words.sw));
49 }
50
51 extern __GNU_INLINE void
52 __fenv_setcwsw(const unsigned int *value)
53 {
54 union fp_cwsw cwsw;
55 short fenv[16];
56
57 cwsw.cwsw = *value;
58
59 __asm__ __volatile__(
60 "fstenv %0\n\t"
61 "movw %4,%1\n\t"
62 "movw %3,%2\n\t"
63 "fldenv %0\n\t"
64 "fwait\n\t"
65 : "=m" (fenv), "=m" (fenv[0]), "=m" (fenv[2])
66 : "r" (cwsw.words.cw), "r" (cwsw.words.sw)
67 /* For practical purposes, we clobber the whole FPU */
68 : "cc", "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)",
69 "st(6)", "st(7)");
70 }
71
72 extern __GNU_INLINE void
73 __fenv_getmxcsr(unsigned int *value)
74 {
75 __asm__ __volatile__("stmxcsr %0" : "=m" (*value));
76 }
77
78 extern __GNU_INLINE void
79 __fenv_setmxcsr(const unsigned int *value)
80 {
81 __asm__ __volatile__("ldmxcsr %0" : : "m" (*value));
82 }
83
84 extern __GNU_INLINE long double
85 f2xm1(long double x)
86 {
87 long double ret;
88
89 __asm__ __volatile__("f2xm1" : "=t" (ret) : "0" (x) : "cc");
90 return (ret);
91 }
92
93 extern __GNU_INLINE long double
94 fyl2x(long double y, long double x)
95 {
96 long double ret;
97
98 __asm__ __volatile__("fyl2x"
99 : "=t" (ret)
100 : "0" (x), "u" (y)
101 : "st(1)", "cc");
102 return (ret);
103 }
104
105 extern __GNU_INLINE long double
106 fptan(long double x)
107 {
108 /*
109 * fptan pushes 1.0 then the result on completion, so we want to pop
110 * the FP stack twice, so we need a dummy value into which to pop it.
111 */
112 long double ret;
113 long double dummy;
114
115 __asm__ __volatile__("fptan"
116 : "=t" (dummy), "=u" (ret)
117 : "0" (x)
118 : "cc");
119 return (ret);
120 }
121
122 extern __GNU_INLINE long double
123 fpatan(long double x, long double y)
124 {
125 long double ret;
126
127 __asm__ __volatile__("fpatan"
128 : "=t" (ret)
129 : "0" (y), "u" (x)
130 : "st(1)", "cc");
131 return (ret);
132 }
133
134 extern __GNU_INLINE long double
135 fxtract(long double x)
136 {
137 __asm__ __volatile__("fxtract" : "+t" (x) : : "cc");
138 return (x);
139 }
140
141 extern __GNU_INLINE long double
142 fprem1(long double idend, long double div)
143 {
144 __asm__ __volatile__("fprem1" : "+t" (div) : "u" (idend) : "cc");
145 return (div);
146 }
147
148 extern __GNU_INLINE long double
149 fprem(long double idend, long double div)
150 {
151 __asm__ __volatile__("fprem" : "+t" (div) : "u" (idend) : "cc");
152 return (div);
153 }
154
155 extern __GNU_INLINE long double
156 fyl2xp1(long double y, long double x)
157 {
158 long double ret;
159
160 __asm__ __volatile__("fyl2xp1"
161 : "=t" (ret)
162 : "0" (x), "u" (y)
163 : "st(1)", "cc");
164 return (ret);
165 }
166
167 extern __GNU_INLINE long double
168 fsqrt(long double x)
169 {
170 __asm__ __volatile__("fsqrt" : "+t" (x) : : "cc");
171 return (x);
172 }
173
174 extern __GNU_INLINE long double
175 fsincos(long double x)
176 {
177 long double dummy;
178
179 __asm__ __volatile__("fsincos" : "+t" (x), "=u" (dummy) : : "cc");
180 return (x);
181 }
182
183 extern __GNU_INLINE long double
184 frndint(long double x)
185 {
186 __asm__ __volatile__("frndint" : "+t" (x) : : "cc");
187 return (x);
188 }
189
190 extern __GNU_INLINE long double
191 fscale(long double x, long double y)
192 {
193 long double ret;
194
195 __asm__ __volatile__("fscale" : "=t" (ret) : "0" (y), "u" (x) : "cc");
196 return (ret);
197 }
198
199 extern __GNU_INLINE long double
200 fsin(long double x)
201 {
202 __asm__ __volatile__("fsin" : "+t" (x) : : "cc");
203 return (x);
204 }
205
206 extern __GNU_INLINE long double
207 fcos(long double x)
208 {
209 __asm__ __volatile__("fcos" : "+t" (x) : : "cc");
210 return (x);
211 }
212
213 extern __GNU_INLINE void
214 sse_cmpeqss(float *f1, float *f2, int *i1)
215 {
216 __asm__ __volatile__(
217 "cmpeqss %2, %1\n\t"
218 "movss %1, %0"
219 : "=m" (*i1), "+x" (*f1)
220 : "x" (*f2)
221 : "cc");
222 }
223
224 extern __GNU_INLINE void
225 sse_cmpltss(float *f1, float *f2, int *i1)
226 {
227 __asm__ __volatile__(
228 "cmpltss %2, %1\n\t"
229 "movss %1, %0"
230 : "=m" (*i1), "+x" (*f1)
231 : "x" (*f2)
232 : "cc");
233 }
234
235 extern __GNU_INLINE void
236 sse_cmpless(float *f1, float *f2, int *i1)
237 {
238 __asm__ __volatile__(
239 "cmpless %2, %1\n\t"
240 "movss %1, %0"
241 : "=m" (*i1), "+x" (*f1)
242 : "x" (*f2)
243 : "cc");
244 }
245
246 extern __GNU_INLINE void
247 sse_cmpunordss(float *f1, float *f2, int *i1)
248 {
249 __asm__ __volatile__(
250 "cmpunordss %2, %1\n\t"
251 "movss %1, %0"
252 : "=m" (*i1), "+x" (*f1)
253 : "x" (*f2)
254 : "cc");
255 }
256
257 extern __GNU_INLINE void
258 sse_minss(float *f1, float *f2, float *f3)
259 {
260 __asm__ __volatile__(
261 "minss %2, %1\n\t"
262 "movss %1, %0"
263 : "=m" (*f3), "+x" (*f1)
264 : "x" (*f2));
265 }
266
267 extern __GNU_INLINE void
268 sse_maxss(float *f1, float *f2, float *f3)
269 {
270 __asm__ __volatile__(
271 "maxss %2, %1\n\t"
272 "movss %1, %0"
273 : "=m" (*f3), "+x" (*f1)
274 : "x" (*f2));
275 }
276
277 extern __GNU_INLINE void
278 sse_addss(float *f1, float *f2, float *f3)
279 {
280 __asm__ __volatile__(
281 "addss %2, %1\n\t"
282 "movss %1, %0"
283 : "=m" (*f3), "+x" (*f1)
284 : "x" (*f2));
285 }
286
287 extern __GNU_INLINE void
288 sse_subss(float *f1, float *f2, float *f3)
289 {
290 __asm__ __volatile__(
291 "subss %2, %1\n\t"
292 "movss %1, %0"
293 : "=m" (*f3), "+x" (*f1)
294 : "x" (*f2));
295 }
296
297 extern __GNU_INLINE void
298 sse_mulss(float *f1, float *f2, float *f3)
299 {
300 __asm__ __volatile__(
301 "mulss %2, %1\n\t"
302 "movss %1, %0"
303 : "=m" (*f3), "+x" (*f1)
304 : "x" (*f2));
305 }
306
307 extern __GNU_INLINE void
308 sse_divss(float *f1, float *f2, float *f3)
309 {
310 __asm__ __volatile__(
311 "divss %2, %1\n\t"
312 "movss %1, %0"
313 : "=m" (*f3), "+x" (*f1)
314 : "x" (*f2));
315 }
316
317 extern __GNU_INLINE void
318 sse_sqrtss(float *f1, float *f2)
319 {
320 double tmp;
321
322 __asm__ __volatile__(
323 "sqrtss %2, %1\n\t"
324 "movss %1, %0"
325 : "=m" (*f2), "=x" (tmp)
326 : "m" (*f1));
327 }
328
329 extern __GNU_INLINE void
330 sse_ucomiss(float *f1, float *f2)
331 {
332 __asm__ __volatile__("ucomiss %1, %0" : : "x" (*f1), "x" (*f2));
333
334 }
335
336 extern __GNU_INLINE void
337 sse_comiss(float *f1, float *f2)
338 {
339 __asm__ __volatile__("comiss %1, %0" : : "x" (*f1), "x" (*f2));
340 }
341
342 extern __GNU_INLINE void
343 sse_cvtss2sd(float *f1, double *d1)
344 {
345 double tmp;
346
347 __asm__ __volatile__(
348 "cvtss2sd %2, %1\n\t"
349 "movsd %1, %0"
350 : "=m" (*d1), "=x" (tmp)
351 : "m" (*f1));
352 }
353
354 extern __GNU_INLINE void
355 sse_cvtsi2ss(int *i1, float *f1)
356 {
357 double tmp;
358
359 __asm__ __volatile__(
360 "cvtsi2ss %2, %1\n\t"
361 "movss %1, %0"
362 : "=m" (*f1), "=x" (tmp)
363 : "m" (*i1));
364 }
365
366 extern __GNU_INLINE void
367 sse_cvttss2si(float *f1, int *i1)
368 {
369 int tmp;
370
371 __asm__ __volatile__(
372 "cvttss2si %2, %1\n\t"
373 "movl %1, %0"
374 : "=m" (*i1), "=r" (tmp)
375 : "m" (*f1));
376 }
377
378 extern __GNU_INLINE void
379 sse_cvtss2si(float *f1, int *i1)
380 {
381 int tmp;
382
383 __asm__ __volatile__(
384 "cvtss2si %2, %1\n\t"
385 "movl %1, %0"
386 : "=m" (*i1), "=r" (tmp)
387 : "m" (*f1));
388 }
389
390 #if defined(__amd64)
391 extern __GNU_INLINE void
392 sse_cvtsi2ssq(long long *ll1, float *f1)
393 {
394 double tmp;
395
396 __asm__ __volatile__(
397 "cvtsi2ssq %2, %1\n\t"
398 "movss %1, %0"
399 : "=m" (*f1), "=x" (tmp)
400 : "m" (*ll1));
401 }
402
403 extern __GNU_INLINE void
404 sse_cvttss2siq(float *f1, long long *ll1)
405 {
406 uint64_t tmp;
407
408 __asm__ __volatile__(
409 "cvttss2siq %2, %1\n\t"
410 "movq %1, %0"
411 : "=m" (*ll1), "=r" (tmp)
412 : "m" (*f1));
413 }
414
415 extern __GNU_INLINE void
416 sse_cvtss2siq(float *f1, long long *ll1)
417 {
418 uint64_t tmp;
419
420 __asm__ __volatile__(
421 "cvtss2siq %2, %1\n\t"
422 "movq %1, %0"
423 : "=m" (*ll1), "=r" (tmp)
424 : "m" (*f1));
425 }
426 #endif
427
428 extern __GNU_INLINE void
429 sse_cmpeqsd(double *d1, double *d2, long long *ll1)
430 {
431 __asm__ __volatile__(
432 "cmpeqsd %2,%1\n\t"
433 "movsd %1,%0"
434 : "=m" (*ll1), "+x" (*d1)
435 : "x" (*d2));
436 }
437
438 extern __GNU_INLINE void
439 sse_cmpltsd(double *d1, double *d2, long long *ll1)
440 {
441 __asm__ __volatile__(
442 "cmpltsd %2,%1\n\t"
443 "movsd %1,%0"
444 : "=m" (*ll1), "+x" (*d1)
445 : "x" (*d2));
446 }
447
448 extern __GNU_INLINE void
449 sse_cmplesd(double *d1, double *d2, long long *ll1)
450 {
451 __asm__ __volatile__(
452 "cmplesd %2,%1\n\t"
453 "movsd %1,%0"
454 : "=m" (*ll1), "+x" (*d1)
455 : "x" (*d2));
456 }
457
458 extern __GNU_INLINE void
459 sse_cmpunordsd(double *d1, double *d2, long long *ll1)
460 {
461 __asm__ __volatile__(
462 "cmpunordsd %2,%1\n\t"
463 "movsd %1,%0"
464 : "=m" (*ll1), "+x" (*d1)
465 : "x" (*d2));
466 }
467
468
469 extern __GNU_INLINE void
470 sse_minsd(double *d1, double *d2, double *d3)
471 {
472 __asm__ __volatile__(
473 "minsd %2,%1\n\t"
474 "movsd %1,%0"
475 : "=m" (*d3), "+x" (*d1)
476 : "x" (*d2));
477 }
478
479 extern __GNU_INLINE void
480 sse_maxsd(double *d1, double *d2, double *d3)
481 {
482 __asm__ __volatile__(
483 "maxsd %2,%1\n\t"
484 "movsd %1,%0"
485 : "=m" (*d3), "+x" (*d1)
486 : "x" (*d2));
487 }
488
489 extern __GNU_INLINE void
490 sse_addsd(double *d1, double *d2, double *d3)
491 {
492 __asm__ __volatile__(
493 "addsd %2,%1\n\t"
494 "movsd %1,%0"
495 : "=m" (*d3), "+x" (*d1)
496 : "x" (*d2));
497 }
498
499 extern __GNU_INLINE void
500 sse_subsd(double *d1, double *d2, double *d3)
501 {
502 __asm__ __volatile__(
503 "subsd %2,%1\n\t"
504 "movsd %1,%0"
505 : "=m" (*d3), "+x" (*d1)
506 : "x" (*d2));
507 }
508
509 extern __GNU_INLINE void
510 sse_mulsd(double *d1, double *d2, double *d3)
511 {
512 __asm__ __volatile__(
513 "mulsd %2,%1\n\t"
514 "movsd %1,%0"
515 : "=m" (*d3), "+x" (*d1)
516 : "x" (*d2));
517 }
518
519 extern __GNU_INLINE void
520 sse_divsd(double *d1, double *d2, double *d3)
521 {
522 __asm__ __volatile__(
523 "divsd %2,%1\n\t"
524 "movsd %1,%0"
525 : "=m" (*d3), "+x" (*d1)
526 : "x" (*d2));
527 }
528
529 extern __GNU_INLINE void
530 sse_sqrtsd(double *d1, double *d2)
531 {
532 double tmp;
533
534 __asm__ __volatile__(
535 "sqrtsd %2, %1\n\t"
536 "movsd %1, %0"
537 : "=m" (*d2), "=x" (tmp)
538 : "m" (*d1));
539 }
540
541 extern __GNU_INLINE void
542 sse_ucomisd(double *d1, double *d2)
543 {
544 __asm__ __volatile__("ucomisd %1, %0" : : "x" (*d1), "x" (*d2));
545 }
546
547 extern __GNU_INLINE void
548 sse_comisd(double *d1, double *d2)
549 {
550 __asm__ __volatile__("comisd %1, %0" : : "x" (*d1), "x" (*d2));
551 }
552
553 extern __GNU_INLINE void
554 sse_cvtsd2ss(double *d1, float *f1)
555 {
556 double tmp;
557
558 __asm__ __volatile__(
559 "cvtsd2ss %2,%1\n\t"
560 "movss %1,%0"
561 : "=m" (*f1), "=x" (tmp)
562 : "m" (*d1));
563 }
564
565 extern __GNU_INLINE void
566 sse_cvtsi2sd(int *i1, double *d1)
567 {
568 double tmp;
569 __asm__ __volatile__(
570 "cvtsi2sd %2,%1\n\t"
571 "movsd %1,%0"
572 : "=m" (*d1), "=x" (tmp)
573 : "m" (*i1));
574 }
575
576 extern __GNU_INLINE void
577 sse_cvttsd2si(double *d1, int *i1)
578 {
579 int tmp;
580
581 __asm__ __volatile__(
582 "cvttsd2si %2,%1\n\t"
583 "movl %1,%0"
584 : "=m" (*i1), "=r" (tmp)
585 : "m" (*d1));
586 }
587
588 extern __GNU_INLINE void
589 sse_cvtsd2si(double *d1, int *i1)
590 {
591 int tmp;
592
593 __asm__ __volatile__(
594 "cvtsd2si %2,%1\n\t"
595 "movl %1,%0"
596 : "=m" (*i1), "=r" (tmp)
597 : "m" (*d1));
598 }
599
600 #if defined(__amd64)
601 extern __GNU_INLINE void
602 sse_cvtsi2sdq(long long *ll1, double *d1)
603 {
604 double tmp;
605
606 __asm__ __volatile__(
607 "cvtsi2sdq %2,%1\n\t"
608 "movsd %1,%0"
609 : "=m" (*d1), "=x" (tmp)
610 : "m" (*ll1));
611 }
612
613 extern __GNU_INLINE void
614 sse_cvttsd2siq(double *d1, long long *ll1)
615 {
616 uint64_t tmp;
617
618 __asm__ __volatile__(
619 "cvttsd2siq %2,%1\n\t"
620 "movq %1,%0"
621 : "=m" (*ll1), "=r" (tmp)
622 : "m" (*d1));
623 }
624
625 extern __GNU_INLINE void
626 sse_cvtsd2siq(double *d1, long long *ll1)
627 {
628 uint64_t tmp;
629
630 __asm__ __volatile__(
631 "cvtsd2siq %2,%1\n\t"
632 "movq %1,%0"
633 : "=m" (*ll1), "=r" (tmp)
634 : "m" (*d1));
635 }
636 #endif
637 #elif defined(__sparc)
638 extern __GNU_INLINE void
639 __fenv_getfsr(unsigned long *l)
640 {
641 __asm__ __volatile__(
642 #if defined(__sparcv9)
643 "stx %%fsr,%0\n\t"
644 #else
645 "st %%fsr,%0\n\t"
646 #endif
647 : "=m" (*l));
648 }
649
650 extern __GNU_INLINE void
651 __fenv_setfsr(const unsigned long *l)
652 {
653 __asm__ __volatile__(
654 #if defined(__sparcv9)
655 "ldx %0,%%fsr\n\t"
656 #else
657 "ld %0,%%fsr\n\t"
658 #endif
659 : : "m" (*l) : "cc");
660 }
661
662 extern __GNU_INLINE void
663 __fenv_getfsr32(unsigned int *l)
664 {
665 __asm__ __volatile__("st %%fsr,%0\n\t" : "=m" (*l));
666 }
667
668 extern __GNU_INLINE void
669 __fenv_setfsr32(const unsigned int *l)
670 {
671 __asm__ __volatile__("ld %0,%%fsr\n\t" : : "m" (*l));
672 }
673 #else
674 #error "GCC FENV inlines not implemented for this platform"
675 #endif
676
677 #ifdef __cplusplus
678 }
679 #endif
680 #endif /* __GNUC__ */
681 #endif /* _FENV_INLINES_H */