5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include <ucontext.h>
31 #include <fenv.h>
32 #if defined(__SUNPRO_C)
33 #include <sunmath.h>
34 #else
35 #include <sys/ieeefp.h>
36 #endif
37 #include "fex_handler.h"
38 #include "fenv_inlines.h"
39
40 #if !defined(REG_PC)
41 #define REG_PC EIP
42 #endif
43
44 #if !defined(REG_PS)
45 #define REG_PS EFL
46 #endif
47
48 #ifdef __amd64
49 #define regno(X) ((X < 4)? REG_RAX - X : \
50 ((X > 4)? REG_RAX + 1 - X : REG_RSP))
51 #else
52 #define regno(X) (EAX - X)
53 #endif
54
55 /*
56 * Support for SSE instructions
57 */
58
59 /*
60 * Decode an SSE instruction. Fill in *inst and return the length of the
61 * instruction in bytes. Return 0 if the instruction is not recognized.
62 */
63 int
64 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst)
65 {
66 unsigned char *ip;
67 char *addr;
68 int i, dbl, simd, rex, modrm, sib, r;
69
70 i = 0;
71 ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC];
72
73 /* look for pseudo-prefixes */
74 dbl = 0;
75 simd = SIMD;
76 if (ip[i] == 0xF3) {
77 simd = 0;
78 i++;
79 } else if (ip[i] == 0x66) {
80 dbl = DOUBLE;
81 i++;
82 } else if (ip[i] == 0xF2) {
83 dbl = DOUBLE;
84 simd = 0;
85 i++;
86 }
87
88 /* look for AMD64 REX prefix */
89 rex = 0;
90 if (ip[i] >= 0x40 && ip[i] <= 0x4F) {
91 rex = ip[i];
92 i++;
93 }
94
95 /* parse opcode */
96 if (ip[i++] != 0x0F)
97 return 0;
98 switch (ip[i++]) {
99 case 0x2A:
100 inst->op = (int)cvtsi2ss + simd + dbl;
101 if (!simd)
102 inst->op = (int)inst->op + (rex & 8);
103 break;
104
105 case 0x2C:
106 inst->op = (int)cvttss2si + simd + dbl;
107 if (!simd)
108 inst->op = (int)inst->op + (rex & 8);
109 break;
110
111 case 0x2D:
112 inst->op = (int)cvtss2si + simd + dbl;
113 if (!simd)
114 inst->op = (int)inst->op + (rex & 8);
115 break;
116
117 case 0x2E:
118 /* oddball: scalar instruction in a SIMD opcode group */
119 if (!simd)
120 return 0;
121 inst->op = (int)ucomiss + dbl;
122 break;
123
124 case 0x2F:
125 /* oddball: scalar instruction in a SIMD opcode group */
126 if (!simd)
127 return 0;
128 inst->op = (int)comiss + dbl;
129 break;
130
131 case 0x51:
132 inst->op = (int)sqrtss + simd + dbl;
133 break;
134
135 case 0x58:
136 inst->op = (int)addss + simd + dbl;
137 break;
138
139 case 0x59:
140 inst->op = (int)mulss + simd + dbl;
141 break;
142
143 case 0x5A:
144 inst->op = (int)cvtss2sd + simd + dbl;
145 break;
146
147 case 0x5B:
148 if (dbl) {
149 if (simd)
150 inst->op = cvtps2dq;
151 else
152 return 0;
153 } else {
154 inst->op = (simd)? cvtdq2ps : cvttps2dq;
155 }
156 break;
157
158 case 0x5C:
159 inst->op = (int)subss + simd + dbl;
160 break;
161
162 case 0x5D:
163 inst->op = (int)minss + simd + dbl;
164 break;
165
166 case 0x5E:
167 inst->op = (int)divss + simd + dbl;
168 break;
169
170 case 0x5F:
171 inst->op = (int)maxss + simd + dbl;
172 break;
173
174 case 0xC2:
175 inst->op = (int)cmpss + simd + dbl;
176 break;
177
178 case 0xE6:
179 if (simd) {
180 if (dbl)
181 inst->op = cvttpd2dq;
182 else
183 return 0;
184 } else {
185 inst->op = (dbl)? cvtpd2dq : cvtdq2pd;
186 }
187 break;
188
189 default:
190 return 0;
191 }
192
193 /* locate operands */
194 modrm = ip[i++];
195
196 if (inst->op == cvtss2si || inst->op == cvttss2si ||
197 inst->op == cvtsd2si || inst->op == cvttsd2si ||
198 inst->op == cvtss2siq || inst->op == cvttss2siq ||
199 inst->op == cvtsd2siq || inst->op == cvttsd2siq) {
200 /* op1 is a gp register */
201 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
202 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
203 } else if (inst->op == cvtps2pi || inst->op == cvttps2pi ||
204 inst->op == cvtpd2pi || inst->op == cvttpd2pi) {
205 /* op1 is a mmx register */
206 #ifdef __amd64
207 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set.
208 fpchip_state.st[(modrm >> 3) & 7];
209 #else
210 inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) +
211 (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
212 fpchip_state.state[7]);
213 #endif
214 } else {
215 /* op1 is a xmm register */
216 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
217 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
218 fp_reg_set.fpchip_state.xmm[r];
219 }
220
221 if ((modrm >> 6) == 3) {
222 if (inst->op == cvtsi2ss || inst->op == cvtsi2sd ||
223 inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) {
224 /* op2 is a gp register */
225 r = ((rex & 1) << 3) | (modrm & 7);
226 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.
227 gregs[regno(r)];
228 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) {
229 /* op2 is a mmx register */
230 #ifdef __amd64
231 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
232 fp_reg_set.fpchip_state.st[modrm & 7];
233 #else
234 inst->op2 = (sseoperand_t *)(10 * (modrm & 7) +
235 (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
236 fpchip_state.state[7]);
237 #endif
238 } else {
239 /* op2 is a xmm register */
240 r = ((rex & 1) << 3) | (modrm & 7);
241 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
242 fp_reg_set.fpchip_state.xmm[r];
243 }
244 } else if ((modrm & 0xc7) == 0x05) {
245 #ifdef __amd64
246 /* address of next instruction + offset */
247 r = i + 4;
248 if (inst->op == cmpss || inst->op == cmpps ||
249 inst->op == cmpsd || inst->op == cmppd)
250 r++;
251 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i));
252 #else
253 /* absolute address */
254 inst->op2 = (sseoperand_t *)(*(int *)(ip + i));
255 #endif
256 i += 4;
257 } else {
258 /* complex address */
259 if ((modrm & 7) == 4) {
260 /* parse sib byte */
261 sib = ip[i++];
262 if ((sib & 7) == 5 && (modrm >> 6) == 0) {
263 /* start with absolute address */
264 addr = (char *)(uintptr_t)(*(int *)(ip + i));
265 i += 4;
266 } else {
267 /* start with base */
268 r = ((rex & 1) << 3) | (sib & 7);
269 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
270 }
271 r = ((rex & 2) << 2) | ((sib >> 3) & 7);
272 if (r != 4) {
273 /* add scaled index */
274 addr += uap->uc_mcontext.gregs[regno(r)]
275 << (sib >> 6);
276 }
277 } else {
278 r = ((rex & 1) << 3) | (modrm & 7);
279 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
280 }
281
282 /* add displacement, if any */
283 if ((modrm >> 6) == 1) {
284 addr += (char)ip[i++];
285 } else if ((modrm >> 6) == 2) {
286 addr += *(int *)(ip + i);
287 i += 4;
288 }
289 inst->op2 = (sseoperand_t *)addr;
290 }
291
292 if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd ||
293 inst->op == cmppd) {
294 /* get the immediate operand */
295 inst->imm = ip[i++];
296 }
297
298 return i;
299 }
300
301 static enum fp_class_type
302 my_fp_classf(float *x)
303 {
304 int i = *(int *)x & ~0x80000000;
305
306 if (i < 0x7f800000) {
307 if (i < 0x00800000)
308 return ((i == 0)? fp_zero : fp_subnormal);
309 return fp_normal;
310 }
311 else if (i == 0x7f800000)
312 return fp_infinity;
313 else if (i & 0x400000)
314 return fp_quiet;
315 else
316 return fp_signaling;
317 }
318
319 static enum fp_class_type
320 my_fp_class(double *x)
321 {
322 int i = *(1+(int *)x) & ~0x80000000;
323
324 if (i < 0x7ff00000) {
325 if (i < 0x00100000)
326 return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal);
327 return fp_normal;
328 }
329 else if (i == 0x7ff00000 && *(int *)x == 0)
330 return fp_infinity;
331 else if (i & 0x80000)
332 return fp_quiet;
333 else
334 return fp_signaling;
335 }
336
337 /*
338 * Inspect a scalar SSE instruction that incurred an invalid operation
339 * exception to determine which type of exception it was.
340 */
341 static enum fex_exception
342 __fex_get_sse_invalid_type(sseinst_t *inst)
343 {
344 enum fp_class_type t1, t2;
345
346 /* check op2 for signaling nan */
347 t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) :
348 my_fp_classf(&inst->op2->f[0]);
349 if (t2 == fp_signaling)
350 return fex_inv_snan;
351
352 /* eliminate all single-operand instructions */
353 switch (inst->op) {
354 case cvtsd2ss:
355 case cvtss2sd:
356 /* hmm, this shouldn't have happened */
357 return (enum fex_exception) -1;
358
359 case sqrtss:
360 case sqrtsd:
361 return fex_inv_sqrt;
362
363 case cvtss2si:
364 case cvtsd2si:
365 case cvttss2si:
366 case cvttsd2si:
367 case cvtss2siq:
368 case cvtsd2siq:
369 case cvttss2siq:
370 case cvttsd2siq:
371 return fex_inv_int;
372 default:
373 break;
374 }
375
376 /* check op1 for signaling nan */
377 t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) :
378 my_fp_classf(&inst->op1->f[0]);
379 if (t1 == fp_signaling)
380 return fex_inv_snan;
381
382 /* check two-operand instructions for other cases */
383 switch (inst->op) {
384 case cmpss:
385 case cmpsd:
386 case minss:
387 case minsd:
388 case maxss:
389 case maxsd:
390 case comiss:
391 case comisd:
392 return fex_inv_cmp;
393
394 case addss:
395 case addsd:
396 case subss:
397 case subsd:
398 if (t1 == fp_infinity && t2 == fp_infinity)
399 return fex_inv_isi;
400 break;
401
402 case mulss:
403 case mulsd:
404 if ((t1 == fp_zero && t2 == fp_infinity) ||
405 (t2 == fp_zero && t1 == fp_infinity))
406 return fex_inv_zmi;
407 break;
408
409 case divss:
410 case divsd:
411 if (t1 == fp_zero && t2 == fp_zero)
412 return fex_inv_zdz;
413 if (t1 == fp_infinity && t2 == fp_infinity)
414 return fex_inv_idi;
415 default:
416 break;
417 }
418
419 return (enum fex_exception)-1;
420 }
421
422 /* inline templates */
423 extern void sse_cmpeqss(float *, float *, int *);
424 extern void sse_cmpltss(float *, float *, int *);
425 extern void sse_cmpless(float *, float *, int *);
426 extern void sse_cmpunordss(float *, float *, int *);
427 extern void sse_minss(float *, float *, float *);
428 extern void sse_maxss(float *, float *, float *);
429 extern void sse_addss(float *, float *, float *);
430 extern void sse_subss(float *, float *, float *);
431 extern void sse_mulss(float *, float *, float *);
432 extern void sse_divss(float *, float *, float *);
433 extern void sse_sqrtss(float *, float *);
434 extern void sse_ucomiss(float *, float *);
435 extern void sse_comiss(float *, float *);
436 extern void sse_cvtss2sd(float *, double *);
437 extern void sse_cvtsi2ss(int *, float *);
438 extern void sse_cvttss2si(float *, int *);
439 extern void sse_cvtss2si(float *, int *);
440 #ifdef __amd64
441 extern void sse_cvtsi2ssq(long long *, float *);
442 extern void sse_cvttss2siq(float *, long long *);
443 extern void sse_cvtss2siq(float *, long long *);
444 #endif
445 extern void sse_cmpeqsd(double *, double *, long long *);
446 extern void sse_cmpltsd(double *, double *, long long *);
447 extern void sse_cmplesd(double *, double *, long long *);
448 extern void sse_cmpunordsd(double *, double *, long long *);
449 extern void sse_minsd(double *, double *, double *);
450 extern void sse_maxsd(double *, double *, double *);
451 extern void sse_addsd(double *, double *, double *);
452 extern void sse_subsd(double *, double *, double *);
453 extern void sse_mulsd(double *, double *, double *);
454 extern void sse_divsd(double *, double *, double *);
455 extern void sse_sqrtsd(double *, double *);
456 extern void sse_ucomisd(double *, double *);
457 extern void sse_comisd(double *, double *);
458 extern void sse_cvtsd2ss(double *, float *);
459 extern void sse_cvtsi2sd(int *, double *);
460 extern void sse_cvttsd2si(double *, int *);
461 extern void sse_cvtsd2si(double *, int *);
462 #ifdef __amd64
463 extern void sse_cvtsi2sdq(long long *, double *);
464 extern void sse_cvttsd2siq(double *, long long *);
465 extern void sse_cvtsd2siq(double *, long long *);
466 #endif
467
468 /*
469 * Fill in *info with the operands, default untrapped result, and
470 * flags produced by a scalar SSE instruction, and return the type
471 * of trapped exception (if any). On entry, the mxcsr must have
472 * all exceptions masked and all flags clear. The same conditions
473 * will hold on exit.
474 *
475 * This routine does not work if the instruction specified by *inst
476 * is not a scalar instruction.
477 */
478 enum fex_exception
479 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info)
480 {
481 unsigned int e, te, mxcsr, oldmxcsr, subnorm;
482
483 /*
484 * Perform the operation with traps disabled and check the
485 * exception flags. If the underflow trap was enabled, also
486 * check for an exact subnormal result.
487 */
488 __fenv_getmxcsr(&oldmxcsr);
489 subnorm = 0;
490 if ((int)inst->op & DOUBLE) {
491 if (inst->op == cvtsi2sd) {
492 info->op1.type = fex_int;
493 info->op1.val.i = inst->op2->i[0];
494 info->op2.type = fex_nodata;
495 } else if (inst->op == cvtsi2sdq) {
496 info->op1.type = fex_llong;
497 info->op1.val.l = inst->op2->l[0];
498 info->op2.type = fex_nodata;
499 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss ||
500 inst->op == cvttsd2si || inst->op == cvtsd2si ||
501 inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
502 info->op1.type = fex_double;
503 info->op1.val.d = inst->op2->d[0];
504 info->op2.type = fex_nodata;
505 } else {
506 info->op1.type = fex_double;
507 info->op1.val.d = inst->op1->d[0];
508 info->op2.type = fex_double;
509 info->op2.val.d = inst->op2->d[0];
510 }
511 info->res.type = fex_double;
512 switch (inst->op) {
513 case cmpsd:
514 info->op = fex_cmp;
515 info->res.type = fex_llong;
516 switch (inst->imm & 3) {
517 case 0:
518 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d,
519 &info->res.val.l);
520 break;
521
522 case 1:
523 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d,
524 &info->res.val.l);
525 break;
526
527 case 2:
528 sse_cmplesd(&info->op1.val.d, &info->op2.val.d,
529 &info->res.val.l);
530 break;
531
532 case 3:
533 sse_cmpunordsd(&info->op1.val.d,
534 &info->op2.val.d, &info->res.val.l);
535 }
536 if (inst->imm & 4)
537 info->res.val.l ^= 0xffffffffffffffffull;
538 break;
539
540 case minsd:
541 info->op = fex_other;
542 sse_minsd(&info->op1.val.d, &info->op2.val.d,
543 &info->res.val.d);
544 break;
545
546 case maxsd:
547 info->op = fex_other;
548 sse_maxsd(&info->op1.val.d, &info->op2.val.d,
549 &info->res.val.d);
550 break;
551
552 case addsd:
553 info->op = fex_add;
554 sse_addsd(&info->op1.val.d, &info->op2.val.d,
555 &info->res.val.d);
556 if (my_fp_class(&info->res.val.d) == fp_subnormal)
557 subnorm = 1;
558 break;
559
560 case subsd:
561 info->op = fex_sub;
562 sse_subsd(&info->op1.val.d, &info->op2.val.d,
563 &info->res.val.d);
564 if (my_fp_class(&info->res.val.d) == fp_subnormal)
565 subnorm = 1;
566 break;
567
568 case mulsd:
569 info->op = fex_mul;
570 sse_mulsd(&info->op1.val.d, &info->op2.val.d,
571 &info->res.val.d);
572 if (my_fp_class(&info->res.val.d) == fp_subnormal)
573 subnorm = 1;
574 break;
575
576 case divsd:
577 info->op = fex_div;
578 sse_divsd(&info->op1.val.d, &info->op2.val.d,
579 &info->res.val.d);
580 if (my_fp_class(&info->res.val.d) == fp_subnormal)
581 subnorm = 1;
582 break;
583
584 case sqrtsd:
585 info->op = fex_sqrt;
586 sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
587 break;
588
589 case cvtsd2ss:
590 info->op = fex_cnvt;
591 info->res.type = fex_float;
592 sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f);
593 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
594 subnorm = 1;
595 break;
596
597 case cvtsi2sd:
598 info->op = fex_cnvt;
599 sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
600 break;
601
602 case cvttsd2si:
603 info->op = fex_cnvt;
604 info->res.type = fex_int;
605 sse_cvttsd2si(&info->op1.val.d, &info->res.val.i);
606 break;
607
608 case cvtsd2si:
609 info->op = fex_cnvt;
610 info->res.type = fex_int;
611 sse_cvtsd2si(&info->op1.val.d, &info->res.val.i);
612 break;
613
614 #ifdef __amd64
637 break;
638
639 case comisd:
640 info->op = fex_cmp;
641 info->res.type = fex_nodata;
642 sse_comisd(&info->op1.val.d, &info->op2.val.d);
643 break;
644 default:
645 break;
646 }
647 } else {
648 if (inst->op == cvtsi2ss) {
649 info->op1.type = fex_int;
650 info->op1.val.i = inst->op2->i[0];
651 info->op2.type = fex_nodata;
652 } else if (inst->op == cvtsi2ssq) {
653 info->op1.type = fex_llong;
654 info->op1.val.l = inst->op2->l[0];
655 info->op2.type = fex_nodata;
656 } else if (inst->op == sqrtss || inst->op == cvtss2sd ||
657 inst->op == cvttss2si || inst->op == cvtss2si ||
658 inst->op == cvttss2siq || inst->op == cvtss2siq) {
659 info->op1.type = fex_float;
660 info->op1.val.f = inst->op2->f[0];
661 info->op2.type = fex_nodata;
662 } else {
663 info->op1.type = fex_float;
664 info->op1.val.f = inst->op1->f[0];
665 info->op2.type = fex_float;
666 info->op2.val.f = inst->op2->f[0];
667 }
668 info->res.type = fex_float;
669 switch (inst->op) {
670 case cmpss:
671 info->op = fex_cmp;
672 info->res.type = fex_int;
673 switch (inst->imm & 3) {
674 case 0:
675 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f,
676 &info->res.val.i);
677 break;
678
679 case 1:
680 sse_cmpltss(&info->op1.val.f, &info->op2.val.f,
681 &info->res.val.i);
682 break;
683
684 case 2:
685 sse_cmpless(&info->op1.val.f, &info->op2.val.f,
686 &info->res.val.i);
687 break;
688
689 case 3:
690 sse_cmpunordss(&info->op1.val.f,
691 &info->op2.val.f, &info->res.val.i);
692 }
693 if (inst->imm & 4)
694 info->res.val.i ^= 0xffffffffu;
695 break;
696
697 case minss:
698 info->op = fex_other;
699 sse_minss(&info->op1.val.f, &info->op2.val.f,
700 &info->res.val.f);
701 break;
702
703 case maxss:
704 info->op = fex_other;
705 sse_maxss(&info->op1.val.f, &info->op2.val.f,
706 &info->res.val.f);
707 break;
708
709 case addss:
710 info->op = fex_add;
711 sse_addss(&info->op1.val.f, &info->op2.val.f,
712 &info->res.val.f);
713 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
714 subnorm = 1;
715 break;
716
717 case subss:
718 info->op = fex_sub;
719 sse_subss(&info->op1.val.f, &info->op2.val.f,
720 &info->res.val.f);
721 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
722 subnorm = 1;
723 break;
724
725 case mulss:
726 info->op = fex_mul;
727 sse_mulss(&info->op1.val.f, &info->op2.val.f,
728 &info->res.val.f);
729 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
730 subnorm = 1;
731 break;
732
733 case divss:
734 info->op = fex_div;
735 sse_divss(&info->op1.val.f, &info->op2.val.f,
736 &info->res.val.f);
737 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
738 subnorm = 1;
739 break;
740
741 case sqrtss:
742 info->op = fex_sqrt;
743 sse_sqrtss(&info->op1.val.f, &info->res.val.f);
744 break;
745
746 case cvtss2sd:
747 info->op = fex_cnvt;
748 info->res.type = fex_double;
749 sse_cvtss2sd(&info->op1.val.f, &info->res.val.d);
750 break;
751
752 case cvtsi2ss:
753 info->op = fex_cnvt;
754 sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f);
755 break;
756
757 case cvttss2si:
758 info->op = fex_cnvt;
783 info->res.type = fex_llong;
784 sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
785 break;
786 #endif
787
788 case ucomiss:
789 info->op = fex_cmp;
790 info->res.type = fex_nodata;
791 sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
792 break;
793
794 case comiss:
795 info->op = fex_cmp;
796 info->res.type = fex_nodata;
797 sse_comiss(&info->op1.val.f, &info->op2.val.f);
798 break;
799 default:
800 break;
801 }
802 }
803 __fenv_getmxcsr(&mxcsr);
804 info->flags = mxcsr & 0x3d;
805 __fenv_setmxcsr(&oldmxcsr);
806
807 /* determine which exception would have been trapped */
808 te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr
809 >> 7) & 0x3d;
810 e = mxcsr & te;
811 if (e & FE_INVALID)
812 return __fex_get_sse_invalid_type(inst);
813 if (e & FE_DIVBYZERO)
814 return fex_division;
815 if (e & FE_OVERFLOW)
816 return fex_overflow;
817 if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
818 return fex_underflow;
819 if (e & FE_INEXACT)
820 return fex_inexact;
821 return (enum fex_exception)-1;
822 }
823
824 /*
825 * Emulate a SIMD SSE instruction to determine which exceptions occur
826 * in each part. For i = 0, 1, 2, and 3, set e[i] to indicate the
827 * trapped exception that would occur if the i-th part of the SIMD
828 * instruction were executed in isolation; set e[i] to -1 if no
829 * trapped exception would occur in this part. Also fill in info[i]
830 * with the corresponding operands, default untrapped result, and
831 * flags.
832 *
833 * This routine does not work if the instruction specified by *inst
834 * is not a SIMD instruction.
835 */
836 void
837 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
838 fex_info_t *info)
839 {
840 sseinst_t dummy;
841 int i;
842
843 e[0] = e[1] = e[2] = e[3] = -1;
844
845 /* perform each part of the SIMD operation */
846 switch (inst->op) {
847 case cmpps:
848 dummy.op = cmpss;
849 dummy.imm = inst->imm;
850 for (i = 0; i < 4; i++) {
851 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
852 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
853 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
854 }
855 break;
856
857 case minps:
858 dummy.op = minss;
859 for (i = 0; i < 4; i++) {
860 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
861 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
862 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
863 }
864 break;
865
866 case maxps:
867 dummy.op = maxss;
868 for (i = 0; i < 4; i++) {
869 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
870 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
871 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
872 }
873 break;
874
875 case addps:
876 dummy.op = addss;
877 for (i = 0; i < 4; i++) {
878 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
879 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
880 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
881 }
882 break;
883
884 case subps:
885 dummy.op = subss;
886 for (i = 0; i < 4; i++) {
887 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
888 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
889 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
890 }
891 break;
892
893 case mulps:
894 dummy.op = mulss;
895 for (i = 0; i < 4; i++) {
896 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
897 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
898 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
899 }
900 break;
901
902 case divps:
903 dummy.op = divss;
904 for (i = 0; i < 4; i++) {
905 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
906 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
907 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
908 }
909 break;
910
911 case sqrtps:
912 dummy.op = sqrtss;
913 for (i = 0; i < 4; i++) {
914 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
915 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
916 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
917 }
918 break;
919
920 case cvtdq2ps:
921 dummy.op = cvtsi2ss;
922 for (i = 0; i < 4; i++) {
923 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
924 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
925 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
926 }
927 break;
928
929 case cvttps2dq:
930 dummy.op = cvttss2si;
931 for (i = 0; i < 4; i++) {
932 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
933 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
934 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
935 }
936 break;
937
938 case cvtps2dq:
939 dummy.op = cvtss2si;
940 for (i = 0; i < 4; i++) {
941 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
942 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
943 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
944 }
945 break;
946
947 case cvtpi2ps:
948 dummy.op = cvtsi2ss;
949 for (i = 0; i < 2; i++) {
950 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
951 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
952 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
953 }
954 break;
955
956 case cvttps2pi:
957 dummy.op = cvttss2si;
958 for (i = 0; i < 2; i++) {
959 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
960 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
961 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
962 }
963 break;
964
965 case cvtps2pi:
966 dummy.op = cvtss2si;
967 for (i = 0; i < 2; i++) {
968 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
969 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
970 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
971 }
972 break;
973
974 case cmppd:
975 dummy.op = cmpsd;
976 dummy.imm = inst->imm;
977 for (i = 0; i < 2; i++) {
978 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
979 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
980 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
981 }
982 break;
983
984 case minpd:
985 dummy.op = minsd;
986 for (i = 0; i < 2; i++) {
987 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
988 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
989 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
990 }
991 break;
992
993 case maxpd:
994 dummy.op = maxsd;
995 for (i = 0; i < 2; i++) {
996 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
997 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
998 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
999 }
1000 break;
1001
1002 case addpd:
1003 dummy.op = addsd;
1004 for (i = 0; i < 2; i++) {
1005 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1006 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1007 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1008 }
1009 break;
1010
1011 case subpd:
1012 dummy.op = subsd;
1013 for (i = 0; i < 2; i++) {
1014 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1015 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1016 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1017 }
1018 break;
1019
1020 case mulpd:
1021 dummy.op = mulsd;
1022 for (i = 0; i < 2; i++) {
1023 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1024 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1025 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1026 }
1027 break;
1028
1029 case divpd:
1030 dummy.op = divsd;
1031 for (i = 0; i < 2; i++) {
1032 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1033 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1034 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1035 }
1036 break;
1037
1038 case sqrtpd:
1039 dummy.op = sqrtsd;
1040 for (i = 0; i < 2; i++) {
1041 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1042 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1043 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1044 }
1045 break;
1046
1047 case cvtpi2pd:
1048 case cvtdq2pd:
1049 dummy.op = cvtsi2sd;
1050 for (i = 0; i < 2; i++) {
1051 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1052 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1053 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1054 }
1055 break;
1056
1057 case cvttpd2pi:
1058 case cvttpd2dq:
1059 dummy.op = cvttsd2si;
1060 for (i = 0; i < 2; i++) {
1061 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1062 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1063 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1064 }
1065 break;
1066
1067 case cvtpd2pi:
1068 case cvtpd2dq:
1069 dummy.op = cvtsd2si;
1070 for (i = 0; i < 2; i++) {
1071 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1072 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1073 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1074 }
1075 break;
1076
1077 case cvtps2pd:
1078 dummy.op = cvtss2sd;
1079 for (i = 0; i < 2; i++) {
1080 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1081 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1082 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1083 }
1084 break;
1085
1086 case cvtpd2ps:
1087 dummy.op = cvtsd2ss;
1088 for (i = 0; i < 2; i++) {
1089 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1090 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1091 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1092 }
1093 default:
1094 break;
1095 }
1096 }
1097
1098 /*
1099 * Store the result value from *info in the destination of the scalar
1100 * SSE instruction specified by *inst. If no result is given but the
1101 * exception is underflow or overflow, supply the default trapped result.
1102 *
1103 * This routine does not work if the instruction specified by *inst
1104 * is not a scalar instruction.
1105 */
1106 void
1107 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1108 fex_info_t *info)
1109 {
1110 int i = 0;
1111 long long l = 0L;;
1112 float f = 0.0, fscl;
1113 double d = 0.0L, dscl;
1114
1115 /* for compares that write eflags, just set the flags
1116 to indicate "unordered" */
1117 if (inst->op == ucomiss || inst->op == comiss ||
1118 inst->op == ucomisd || inst->op == comisd) {
1119 uap->uc_mcontext.gregs[REG_PS] |= 0x45;
1120 return;
1121 }
1122
1123 /* if info doesn't specify a result value, try to generate
1124 the default trapped result */
1125 if (info->res.type == fex_nodata) {
1126 /* set scale factors for exponent wrapping */
1127 switch (e) {
1128 case fex_overflow:
1129 fscl = 1.262177448e-29f; /* 2^-96 */
1130 dscl = 6.441148769597133308e-232; /* 2^-768 */
1131 break;
1132
1133 case fex_underflow:
1134 fscl = 7.922816251e+28f; /* 2^96 */
1135 dscl = 1.552518092300708935e+231; /* 2^768 */
1136 break;
1137
1138 default:
1139 (void) __fex_get_sse_op(uap, inst, info);
1140 if (info->res.type == fex_nodata)
1141 return;
1142 goto stuff;
1143 }
1144
1145 /* generate the wrapped result */
1146 if (inst->op == cvtsd2ss) {
1147 info->op1.type = fex_double;
1148 info->op1.val.d = inst->op2->d[0];
1149 info->op2.type = fex_nodata;
1150 info->res.type = fex_float;
1151 info->res.val.f = (float)(fscl * (fscl *
1152 info->op1.val.d));
1153 } else if ((int)inst->op & DOUBLE) {
1154 info->op1.type = fex_double;
1155 info->op1.val.d = inst->op1->d[0];
1156 info->op2.type = fex_double;
1157 info->op2.val.d = inst->op2->d[0];
1158 info->res.type = fex_double;
1159 switch (inst->op) {
1160 case addsd:
1161 info->res.val.d = dscl * (dscl *
1162 info->op1.val.d + dscl * info->op2.val.d);
1163 break;
1164
1165 case subsd:
1166 info->res.val.d = dscl * (dscl *
1167 info->op1.val.d - dscl * info->op2.val.d);
1168 break;
1169
1170 case mulsd:
1171 info->res.val.d = (dscl * info->op1.val.d) *
1172 (dscl * info->op2.val.d);
1173 break;
1174
1175 case divsd:
1176 info->res.val.d = (dscl * info->op1.val.d) /
1177 (info->op2.val.d / dscl);
1178 break;
1179
1180 default:
1181 return;
1182 }
1183 } else {
1184 info->op1.type = fex_float;
1185 info->op1.val.f = inst->op1->f[0];
1186 info->op2.type = fex_float;
1187 info->op2.val.f = inst->op2->f[0];
1188 info->res.type = fex_float;
1189 switch (inst->op) {
1190 case addss:
1191 info->res.val.f = fscl * (fscl *
1192 info->op1.val.f + fscl * info->op2.val.f);
1193 break;
1194
1195 case subss:
1196 info->res.val.f = fscl * (fscl *
1197 info->op1.val.f - fscl * info->op2.val.f);
1198 break;
1199
1200 case mulss:
1201 info->res.val.f = (fscl * info->op1.val.f) *
1202 (fscl * info->op2.val.f);
1203 break;
1204
1205 case divss:
1206 info->res.val.f = (fscl * info->op1.val.f) /
1207 (info->op2.val.f / fscl);
1208 break;
1209
1210 default:
1211 return;
1212 }
1213 }
1214 }
1215
1216 /* put the result in the destination */
1217 stuff:
1218 if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si
1219 || inst->op == cvttsd2si || inst->op == cvtsd2si) {
1220 switch (info->res.type) {
1221 case fex_int:
1222 i = info->res.val.i;
1223 break;
1224
1225 case fex_llong:
1226 i = info->res.val.l;
1227 break;
1228
1229 case fex_float:
1230 i = info->res.val.f;
1231 break;
1232
1233 case fex_double:
1234 i = info->res.val.d;
1235 break;
1236
1237 case fex_ldouble:
1238 i = info->res.val.q;
1239 break;
1240
1241 default:
1242 break;
1243 }
1244 inst->op1->i[0] = i;
1245 } else if (inst->op == cmpsd || inst->op == cvttss2siq ||
1246 inst->op == cvtss2siq || inst->op == cvttsd2siq ||
1247 inst->op == cvtsd2siq) {
1248 switch (info->res.type) {
1249 case fex_int:
1250 l = info->res.val.i;
1251 break;
1252
1253 case fex_llong:
1254 l = info->res.val.l;
1255 break;
1256
1257 case fex_float:
1258 l = info->res.val.f;
1259 break;
1260
1261 case fex_double:
1262 l = info->res.val.d;
1263 break;
1264
1265 case fex_ldouble:
1266 l = info->res.val.q;
1267 break;
1268
1269 default:
1270 break;
1271 }
1272 inst->op1->l[0] = l;
1273 } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1274 inst->op == cvtss2sd) {
1275 switch (info->res.type) {
1276 case fex_int:
1277 d = info->res.val.i;
1278 break;
1279
1280 case fex_llong:
1281 d = info->res.val.l;
1282 break;
1283
1284 case fex_float:
1285 d = info->res.val.f;
1286 break;
1287
1288 case fex_double:
1289 d = info->res.val.d;
1290 break;
1291
1292 case fex_ldouble:
1293 d = info->res.val.q;
1294 break;
1295
1296 default:
1297 break;
1298 }
1299 inst->op1->d[0] = d;
1300 } else {
1301 switch (info->res.type) {
1302 case fex_int:
1303 f = info->res.val.i;
1304 break;
1305
1306 case fex_llong:
1307 f = info->res.val.l;
1308 break;
1309
1310 case fex_float:
1311 f = info->res.val.f;
1312 break;
1313
1314 case fex_double:
1315 f = info->res.val.d;
1316 break;
1317
1318 case fex_ldouble:
1319 f = info->res.val.q;
1320 break;
1321
1322 default:
1323 break;
1324 }
1325 inst->op1->f[0] = f;
1326 }
1327 }
1328
1329 /*
1330 * Store the results from a SIMD instruction. For each i, store
1331 * the result value from info[i] in the i-th part of the destination
1332 * of the SIMD SSE instruction specified by *inst. If no result
1333 * is given but the exception indicated by e[i] is underflow or
1334 * overflow, supply the default trapped result.
1335 *
1336 * This routine does not work if the instruction specified by *inst
1337 * is not a SIMD instruction.
1338 */
1339 void
1340 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1341 fex_info_t *info)
1342 {
1343 sseinst_t dummy;
1344 int i;
1345
1346 /* store each part */
1347 switch (inst->op) {
1348 case cmpps:
1349 dummy.op = cmpss;
1350 dummy.imm = inst->imm;
1351 for (i = 0; i < 4; i++) {
1352 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1353 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1354 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1355 }
1356 break;
1357
1358 case minps:
1359 dummy.op = minss;
1360 for (i = 0; i < 4; i++) {
1361 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1362 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1363 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1364 }
1365 break;
1366
1367 case maxps:
1368 dummy.op = maxss;
1369 for (i = 0; i < 4; i++) {
1370 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1371 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1372 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1373 }
1374 break;
1375
1376 case addps:
1377 dummy.op = addss;
1378 for (i = 0; i < 4; i++) {
1379 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1380 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1381 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1382 }
1383 break;
1384
1385 case subps:
1386 dummy.op = subss;
1387 for (i = 0; i < 4; i++) {
1388 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1389 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1390 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1391 }
1392 break;
1393
1394 case mulps:
1395 dummy.op = mulss;
1396 for (i = 0; i < 4; i++) {
1397 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1398 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1399 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1400 }
1401 break;
1402
1403 case divps:
1404 dummy.op = divss;
1405 for (i = 0; i < 4; i++) {
1406 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1407 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1408 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1409 }
1410 break;
1411
1412 case sqrtps:
1413 dummy.op = sqrtss;
1414 for (i = 0; i < 4; i++) {
1415 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1416 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1417 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1418 }
1419 break;
1420
1421 case cvtdq2ps:
1422 dummy.op = cvtsi2ss;
1423 for (i = 0; i < 4; i++) {
1424 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1425 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1426 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1427 }
1428 break;
1429
1430 case cvttps2dq:
1431 dummy.op = cvttss2si;
1432 for (i = 0; i < 4; i++) {
1433 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1434 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1435 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1436 }
1437 break;
1438
1439 case cvtps2dq:
1440 dummy.op = cvtss2si;
1441 for (i = 0; i < 4; i++) {
1442 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1443 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1444 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1445 }
1446 break;
1447
1448 case cvtpi2ps:
1449 dummy.op = cvtsi2ss;
1450 for (i = 0; i < 2; i++) {
1451 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1452 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1453 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1454 }
1455 break;
1456
1457 case cvttps2pi:
1458 dummy.op = cvttss2si;
1459 for (i = 0; i < 2; i++) {
1460 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1461 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1462 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1463 }
1464 break;
1465
1466 case cvtps2pi:
1467 dummy.op = cvtss2si;
1468 for (i = 0; i < 2; i++) {
1469 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1470 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1471 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1472 }
1473 break;
1474
1475 case cmppd:
1476 dummy.op = cmpsd;
1477 dummy.imm = inst->imm;
1478 for (i = 0; i < 2; i++) {
1479 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1480 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1481 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1482 }
1483 break;
1484
1485 case minpd:
1486 dummy.op = minsd;
1487 for (i = 0; i < 2; i++) {
1488 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1489 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1490 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1491 }
1492 break;
1493
1494 case maxpd:
1495 dummy.op = maxsd;
1496 for (i = 0; i < 2; i++) {
1497 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1498 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1499 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1500 }
1501 break;
1502
1503 case addpd:
1504 dummy.op = addsd;
1505 for (i = 0; i < 2; i++) {
1506 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1507 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1508 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1509 }
1510 break;
1511
1512 case subpd:
1513 dummy.op = subsd;
1514 for (i = 0; i < 2; i++) {
1515 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1516 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1517 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1518 }
1519 break;
1520
1521 case mulpd:
1522 dummy.op = mulsd;
1523 for (i = 0; i < 2; i++) {
1524 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1525 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1526 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1527 }
1528 break;
1529
1530 case divpd:
1531 dummy.op = divsd;
1532 for (i = 0; i < 2; i++) {
1533 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1534 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1535 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1536 }
1537 break;
1538
1539 case sqrtpd:
1540 dummy.op = sqrtsd;
1541 for (i = 0; i < 2; i++) {
1542 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1543 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1544 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1545 }
1546 break;
1547
1548 case cvtpi2pd:
1549 case cvtdq2pd:
1550 dummy.op = cvtsi2sd;
1551 for (i = 0; i < 2; i++) {
1552 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1553 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1554 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1555 }
1556 break;
1557
1558 case cvttpd2pi:
1559 case cvttpd2dq:
1560 dummy.op = cvttsd2si;
1561 for (i = 0; i < 2; i++) {
1562 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1563 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1564 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1565 }
1566 /* for cvttpd2dq, zero the high 64 bits of the destination */
1567 if (inst->op == cvttpd2dq)
1568 inst->op1->l[1] = 0ll;
1569 break;
1570
1571 case cvtpd2pi:
1572 case cvtpd2dq:
1573 dummy.op = cvtsd2si;
1574 for (i = 0; i < 2; i++) {
1575 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1576 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1577 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1578 }
1579 /* for cvtpd2dq, zero the high 64 bits of the destination */
1580 if (inst->op == cvtpd2dq)
1581 inst->op1->l[1] = 0ll;
1582 break;
1583
1584 case cvtps2pd:
1585 dummy.op = cvtss2sd;
1586 for (i = 0; i < 2; i++) {
1587 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1588 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1589 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1590 }
1591 break;
1592
1593 case cvtpd2ps:
1594 dummy.op = cvtsd2ss;
1595 for (i = 0; i < 2; i++) {
1596 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1597 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1598 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1599 }
1600 /* zero the high 64 bits of the destination */
1601 inst->op1->l[1] = 0ll;
1602
1603 default:
1604 break;
1605 }
1606 }
1607
|
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25
26 /*
27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
29 */
30
31 #include <ucontext.h>
32 #include <fenv.h>
33 #if defined(__SUNPRO_C)
34 #include <sunmath.h>
35 #else
36 #include <sys/ieeefp.h>
37 #endif
38 #include "fex_handler.h"
39 #include "fenv_inlines.h"
40
41 #if !defined(REG_PC)
42 #define REG_PC EIP
43 #endif
44
45 #if !defined(REG_PS)
46 #define REG_PS EFL
47 #endif
48
49 #ifdef __amd64
50 #define regno(X) ((X < 4) ? REG_RAX - X : ((X > 4) ? REG_RAX + 1 - X : \
51 REG_RSP))
52 #else
53 #define regno(X) (EAX - X)
54 #endif
55
56 /*
57 * Support for SSE instructions
58 */
59
60 /*
61 * Decode an SSE instruction. Fill in *inst and return the length of the
62 * instruction in bytes. Return 0 if the instruction is not recognized.
63 */
64 int
65 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst)
66 {
67 unsigned char *ip;
68 char *addr;
69 int i, dbl, simd, rex, modrm, sib, r;
70
71 i = 0;
72 ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC];
73
74 /* look for pseudo-prefixes */
75 dbl = 0;
76 simd = SIMD;
77
78 if (ip[i] == 0xF3) {
79 simd = 0;
80 i++;
81 } else if (ip[i] == 0x66) {
82 dbl = DOUBLE;
83 i++;
84 } else if (ip[i] == 0xF2) {
85 dbl = DOUBLE;
86 simd = 0;
87 i++;
88 }
89
90 /* look for AMD64 REX prefix */
91 rex = 0;
92
93 if (ip[i] >= 0x40 && ip[i] <= 0x4F) {
94 rex = ip[i];
95 i++;
96 }
97
98 /* parse opcode */
99 if (ip[i++] != 0x0F)
100 return (0);
101
102 switch (ip[i++]) {
103 case 0x2A:
104 inst->op = (int)cvtsi2ss + simd + dbl;
105
106 if (!simd)
107 inst->op = (int)inst->op + (rex & 8);
108
109 break;
110
111 case 0x2C:
112 inst->op = (int)cvttss2si + simd + dbl;
113
114 if (!simd)
115 inst->op = (int)inst->op + (rex & 8);
116
117 break;
118
119 case 0x2D:
120 inst->op = (int)cvtss2si + simd + dbl;
121
122 if (!simd)
123 inst->op = (int)inst->op + (rex & 8);
124
125 break;
126
127 case 0x2E:
128
129 /* oddball: scalar instruction in a SIMD opcode group */
130 if (!simd)
131 return (0);
132
133 inst->op = (int)ucomiss + dbl;
134 break;
135
136 case 0x2F:
137
138 /* oddball: scalar instruction in a SIMD opcode group */
139 if (!simd)
140 return (0);
141
142 inst->op = (int)comiss + dbl;
143 break;
144
145 case 0x51:
146 inst->op = (int)sqrtss + simd + dbl;
147 break;
148
149 case 0x58:
150 inst->op = (int)addss + simd + dbl;
151 break;
152
153 case 0x59:
154 inst->op = (int)mulss + simd + dbl;
155 break;
156
157 case 0x5A:
158 inst->op = (int)cvtss2sd + simd + dbl;
159 break;
160
161 case 0x5B:
162
163 if (dbl) {
164 if (simd)
165 inst->op = cvtps2dq;
166 else
167 return (0);
168 } else {
169 inst->op = (simd) ? cvtdq2ps : cvttps2dq;
170 }
171
172 break;
173
174 case 0x5C:
175 inst->op = (int)subss + simd + dbl;
176 break;
177
178 case 0x5D:
179 inst->op = (int)minss + simd + dbl;
180 break;
181
182 case 0x5E:
183 inst->op = (int)divss + simd + dbl;
184 break;
185
186 case 0x5F:
187 inst->op = (int)maxss + simd + dbl;
188 break;
189
190 case 0xC2:
191 inst->op = (int)cmpss + simd + dbl;
192 break;
193
194 case 0xE6:
195
196 if (simd) {
197 if (dbl)
198 inst->op = cvttpd2dq;
199 else
200 return (0);
201 } else {
202 inst->op = (dbl) ? cvtpd2dq : cvtdq2pd;
203 }
204
205 break;
206
207 default:
208 return (0);
209 }
210
211 /* locate operands */
212 modrm = ip[i++];
213
214 if (inst->op == cvtss2si || inst->op == cvttss2si || inst->op ==
215 cvtsd2si || inst->op == cvttsd2si || inst->op == cvtss2siq ||
216 inst->op == cvttss2siq || inst->op == cvtsd2siq || inst->op ==
217 cvttsd2siq) {
218 /* op1 is a gp register */
219 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
220 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
221 } else if (inst->op == cvtps2pi || inst->op == cvttps2pi || inst->op ==
222 cvtpd2pi || inst->op == cvttpd2pi) {
223 /* op1 is a mmx register */
224 #ifdef __amd64
225 inst->op1 = (sseoperand_t *)
226 &uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state
227 .st[(modrm >> 3) & 7];
228 #else
229 inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) +
230 (char *)&uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state
231 .state[7]);
232 #endif
233 } else {
234 /* op1 is a xmm register */
235 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
236 inst->op1 =
237 (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set
238 .fpchip_state.xmm[r];
239 }
240
241 if ((modrm >> 6) == 3) {
242 if (inst->op == cvtsi2ss || inst->op == cvtsi2sd || inst->op ==
243 cvtsi2ssq || inst->op == cvtsi2sdq) {
244 /* op2 is a gp register */
245 r = ((rex & 1) << 3) | (modrm & 7);
246 inst->op2 =
247 (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
248 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) {
249 /* op2 is a mmx register */
250 #ifdef __amd64
251 inst->op2 =
252 (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set
253 .fpchip_state.st[modrm & 7];
254 #else
255 inst->op2 = (sseoperand_t *)(10 * (modrm & 7) +
256 (char *)&uap->uc_mcontext.fpregs.fp_reg_set
257 .fpchip_state.state[7]);
258 #endif
259 } else {
260 /* op2 is a xmm register */
261 r = ((rex & 1) << 3) | (modrm & 7);
262 inst->op2 =
263 (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set
264 .fpchip_state.xmm[r];
265 }
266 } else if ((modrm & 0xc7) == 0x05) {
267 #ifdef __amd64
268 /* address of next instruction + offset */
269 r = i + 4;
270
271 if (inst->op == cmpss || inst->op == cmpps || inst->op ==
272 cmpsd || inst->op == cmppd)
273 r++;
274
275 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i));
276 #else
277 /* absolute address */
278 inst->op2 = (sseoperand_t *)(*(int *)(ip + i));
279 #endif
280 i += 4;
281 } else {
282 /* complex address */
283 if ((modrm & 7) == 4) {
284 /* parse sib byte */
285 sib = ip[i++];
286
287 if ((sib & 7) == 5 && (modrm >> 6) == 0) {
288 /* start with absolute address */
289 addr = (char *)(uintptr_t)(*(int *)(ip + i));
290 i += 4;
291 } else {
292 /* start with base */
293 r = ((rex & 1) << 3) | (sib & 7);
294 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
295 }
296
297 r = ((rex & 2) << 2) | ((sib >> 3) & 7);
298
299 if (r != 4) {
300 /* add scaled index */
301 addr += uap->uc_mcontext.gregs[regno(r)] <<
302 (sib >> 6);
303 }
304 } else {
305 r = ((rex & 1) << 3) | (modrm & 7);
306 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
307 }
308
309 /* add displacement, if any */
310 if ((modrm >> 6) == 1) {
311 addr += (char)ip[i++];
312 } else if ((modrm >> 6) == 2) {
313 addr += *(int *)(ip + i);
314 i += 4;
315 }
316
317 inst->op2 = (sseoperand_t *)addr;
318 }
319
320 if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd ||
321 inst->op == cmppd) {
322 /* get the immediate operand */
323 inst->imm = ip[i++];
324 }
325
326 return (i);
327 }
328
329 static enum fp_class_type
330 my_fp_classf(float *x)
331 {
332 int i = *(int *)x & ~0x80000000;
333
334 if (i < 0x7f800000) {
335 if (i < 0x00800000)
336 return ((i == 0) ? fp_zero : fp_subnormal);
337
338 return (fp_normal);
339 } else if (i == 0x7f800000) {
340 return (fp_infinity);
341 } else if (i & 0x400000) {
342 return (fp_quiet);
343 } else {
344 return (fp_signaling);
345 }
346 }
347
348 static enum fp_class_type
349 my_fp_class(double *x)
350 {
351 int i = *(1 + (int *)x) & ~0x80000000;
352
353 if (i < 0x7ff00000) {
354 if (i < 0x00100000)
355 return (((i | *(int *)x) == 0) ? fp_zero :
356 fp_subnormal);
357
358 return (fp_normal);
359 } else if (i == 0x7ff00000 && *(int *)x == 0) {
360 return (fp_infinity);
361 } else if (i & 0x80000) {
362 return (fp_quiet);
363 } else {
364 return (fp_signaling);
365 }
366 }
367
368 /*
369 * Inspect a scalar SSE instruction that incurred an invalid operation
370 * exception to determine which type of exception it was.
371 */
372 static enum fex_exception
373 __fex_get_sse_invalid_type(sseinst_t *inst)
374 {
375 enum fp_class_type t1, t2;
376
377 /* check op2 for signaling nan */
378 t2 = ((int)inst->op & DOUBLE) ? my_fp_class(&inst->op2->d[0]) :
379 my_fp_classf(&inst->op2->f[0]);
380
381 if (t2 == fp_signaling)
382 return (fex_inv_snan);
383
384 /* eliminate all single-operand instructions */
385 switch (inst->op) {
386 case cvtsd2ss:
387 case cvtss2sd:
388 /* hmm, this shouldn't have happened */
389 return ((enum fex_exception)-1);
390
391 case sqrtss:
392 case sqrtsd:
393 return (fex_inv_sqrt);
394
395 case cvtss2si:
396 case cvtsd2si:
397 case cvttss2si:
398 case cvttsd2si:
399 case cvtss2siq:
400 case cvtsd2siq:
401 case cvttss2siq:
402 case cvttsd2siq:
403 return (fex_inv_int);
404 default:
405 break;
406 }
407
408 /* check op1 for signaling nan */
409 t1 = ((int)inst->op & DOUBLE) ? my_fp_class(&inst->op1->d[0]) :
410 my_fp_classf(&inst->op1->f[0]);
411
412 if (t1 == fp_signaling)
413 return (fex_inv_snan);
414
415 /* check two-operand instructions for other cases */
416 switch (inst->op) {
417 case cmpss:
418 case cmpsd:
419 case minss:
420 case minsd:
421 case maxss:
422 case maxsd:
423 case comiss:
424 case comisd:
425 return (fex_inv_cmp);
426
427 case addss:
428 case addsd:
429 case subss:
430 case subsd:
431
432 if (t1 == fp_infinity && t2 == fp_infinity)
433 return (fex_inv_isi);
434
435 break;
436
437 case mulss:
438 case mulsd:
439
440 if ((t1 == fp_zero && t2 == fp_infinity) || (t2 == fp_zero &&
441 t1 == fp_infinity))
442 return (fex_inv_zmi);
443
444 break;
445
446 case divss:
447 case divsd:
448
449 if (t1 == fp_zero && t2 == fp_zero)
450 return (fex_inv_zdz);
451
452 if (t1 == fp_infinity && t2 == fp_infinity)
453 return (fex_inv_idi);
454
455 default:
456 break;
457 }
458
459 return ((enum fex_exception)-1);
460 }
461
462 /* inline templates */
463 extern void sse_cmpeqss(float *, float *, int *);
464 extern void sse_cmpltss(float *, float *, int *);
465 extern void sse_cmpless(float *, float *, int *);
466 extern void sse_cmpunordss(float *, float *, int *);
467 extern void sse_minss(float *, float *, float *);
468 extern void sse_maxss(float *, float *, float *);
469 extern void sse_addss(float *, float *, float *);
470 extern void sse_subss(float *, float *, float *);
471 extern void sse_mulss(float *, float *, float *);
472 extern void sse_divss(float *, float *, float *);
473 extern void sse_sqrtss(float *, float *);
474 extern void sse_ucomiss(float *, float *);
475 extern void sse_comiss(float *, float *);
476 extern void sse_cvtss2sd(float *, double *);
477 extern void sse_cvtsi2ss(int *, float *);
478 extern void sse_cvttss2si(float *, int *);
479 extern void sse_cvtss2si(float *, int *);
480
481 #ifdef __amd64
482 extern void sse_cvtsi2ssq(long long *, float *);
483 extern void sse_cvttss2siq(float *, long long *);
484 extern void sse_cvtss2siq(float *, long long *);
485 #endif
486
487 extern void sse_cmpeqsd(double *, double *, long long *);
488 extern void sse_cmpltsd(double *, double *, long long *);
489 extern void sse_cmplesd(double *, double *, long long *);
490 extern void sse_cmpunordsd(double *, double *, long long *);
491 extern void sse_minsd(double *, double *, double *);
492 extern void sse_maxsd(double *, double *, double *);
493 extern void sse_addsd(double *, double *, double *);
494 extern void sse_subsd(double *, double *, double *);
495 extern void sse_mulsd(double *, double *, double *);
496 extern void sse_divsd(double *, double *, double *);
497 extern void sse_sqrtsd(double *, double *);
498 extern void sse_ucomisd(double *, double *);
499 extern void sse_comisd(double *, double *);
500 extern void sse_cvtsd2ss(double *, float *);
501 extern void sse_cvtsi2sd(int *, double *);
502 extern void sse_cvttsd2si(double *, int *);
503 extern void sse_cvtsd2si(double *, int *);
504
505 #ifdef __amd64
506 extern void sse_cvtsi2sdq(long long *, double *);
507 extern void sse_cvttsd2siq(double *, long long *);
508 extern void sse_cvtsd2siq(double *, long long *);
509 #endif
510
511 /*
512 * Fill in *info with the operands, default untrapped result, and
513 * flags produced by a scalar SSE instruction, and return the type
514 * of trapped exception (if any). On entry, the mxcsr must have
515 * all exceptions masked and all flags clear. The same conditions
516 * will hold on exit.
517 *
518 * This routine does not work if the instruction specified by *inst
519 * is not a scalar instruction.
520 */
521 enum fex_exception
522 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info)
523 {
524 unsigned int e, te, mxcsr, oldmxcsr, subnorm;
525
526 /*
527 * Perform the operation with traps disabled and check the
528 * exception flags. If the underflow trap was enabled, also
529 * check for an exact subnormal result.
530 */
531 __fenv_getmxcsr(&oldmxcsr);
532 subnorm = 0;
533
534 if ((int)inst->op & DOUBLE) {
535 if (inst->op == cvtsi2sd) {
536 info->op1.type = fex_int;
537 info->op1.val.i = inst->op2->i[0];
538 info->op2.type = fex_nodata;
539 } else if (inst->op == cvtsi2sdq) {
540 info->op1.type = fex_llong;
541 info->op1.val.l = inst->op2->l[0];
542 info->op2.type = fex_nodata;
543 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss ||
544 inst->op == cvttsd2si || inst->op == cvtsd2si || inst->op ==
545 cvttsd2siq || inst->op == cvtsd2siq) {
546 info->op1.type = fex_double;
547 info->op1.val.d = inst->op2->d[0];
548 info->op2.type = fex_nodata;
549 } else {
550 info->op1.type = fex_double;
551 info->op1.val.d = inst->op1->d[0];
552 info->op2.type = fex_double;
553 info->op2.val.d = inst->op2->d[0];
554 }
555
556 info->res.type = fex_double;
557
558 switch (inst->op) {
559 case cmpsd:
560 info->op = fex_cmp;
561 info->res.type = fex_llong;
562
563 switch (inst->imm & 3) {
564 case 0:
565 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d,
566 &info->res.val.l);
567 break;
568
569 case 1:
570 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d,
571 &info->res.val.l);
572 break;
573
574 case 2:
575 sse_cmplesd(&info->op1.val.d, &info->op2.val.d,
576 &info->res.val.l);
577 break;
578
579 case 3:
580 sse_cmpunordsd(&info->op1.val.d,
581 &info->op2.val.d, &info->res.val.l);
582 }
583
584 if (inst->imm & 4)
585 info->res.val.l ^= 0xffffffffffffffffull;
586
587 break;
588
589 case minsd:
590 info->op = fex_other;
591 sse_minsd(&info->op1.val.d, &info->op2.val.d,
592 &info->res.val.d);
593 break;
594
595 case maxsd:
596 info->op = fex_other;
597 sse_maxsd(&info->op1.val.d, &info->op2.val.d,
598 &info->res.val.d);
599 break;
600
601 case addsd:
602 info->op = fex_add;
603 sse_addsd(&info->op1.val.d, &info->op2.val.d,
604 &info->res.val.d);
605
606 if (my_fp_class(&info->res.val.d) == fp_subnormal)
607 subnorm = 1;
608
609 break;
610
611 case subsd:
612 info->op = fex_sub;
613 sse_subsd(&info->op1.val.d, &info->op2.val.d,
614 &info->res.val.d);
615
616 if (my_fp_class(&info->res.val.d) == fp_subnormal)
617 subnorm = 1;
618
619 break;
620
621 case mulsd:
622 info->op = fex_mul;
623 sse_mulsd(&info->op1.val.d, &info->op2.val.d,
624 &info->res.val.d);
625
626 if (my_fp_class(&info->res.val.d) == fp_subnormal)
627 subnorm = 1;
628
629 break;
630
631 case divsd:
632 info->op = fex_div;
633 sse_divsd(&info->op1.val.d, &info->op2.val.d,
634 &info->res.val.d);
635
636 if (my_fp_class(&info->res.val.d) == fp_subnormal)
637 subnorm = 1;
638
639 break;
640
641 case sqrtsd:
642 info->op = fex_sqrt;
643 sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
644 break;
645
646 case cvtsd2ss:
647 info->op = fex_cnvt;
648 info->res.type = fex_float;
649 sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f);
650
651 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
652 subnorm = 1;
653
654 break;
655
656 case cvtsi2sd:
657 info->op = fex_cnvt;
658 sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
659 break;
660
661 case cvttsd2si:
662 info->op = fex_cnvt;
663 info->res.type = fex_int;
664 sse_cvttsd2si(&info->op1.val.d, &info->res.val.i);
665 break;
666
667 case cvtsd2si:
668 info->op = fex_cnvt;
669 info->res.type = fex_int;
670 sse_cvtsd2si(&info->op1.val.d, &info->res.val.i);
671 break;
672
673 #ifdef __amd64
696 break;
697
698 case comisd:
699 info->op = fex_cmp;
700 info->res.type = fex_nodata;
701 sse_comisd(&info->op1.val.d, &info->op2.val.d);
702 break;
703 default:
704 break;
705 }
706 } else {
707 if (inst->op == cvtsi2ss) {
708 info->op1.type = fex_int;
709 info->op1.val.i = inst->op2->i[0];
710 info->op2.type = fex_nodata;
711 } else if (inst->op == cvtsi2ssq) {
712 info->op1.type = fex_llong;
713 info->op1.val.l = inst->op2->l[0];
714 info->op2.type = fex_nodata;
715 } else if (inst->op == sqrtss || inst->op == cvtss2sd ||
716 inst->op == cvttss2si || inst->op == cvtss2si || inst->op ==
717 cvttss2siq || inst->op == cvtss2siq) {
718 info->op1.type = fex_float;
719 info->op1.val.f = inst->op2->f[0];
720 info->op2.type = fex_nodata;
721 } else {
722 info->op1.type = fex_float;
723 info->op1.val.f = inst->op1->f[0];
724 info->op2.type = fex_float;
725 info->op2.val.f = inst->op2->f[0];
726 }
727
728 info->res.type = fex_float;
729
730 switch (inst->op) {
731 case cmpss:
732 info->op = fex_cmp;
733 info->res.type = fex_int;
734
735 switch (inst->imm & 3) {
736 case 0:
737 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f,
738 &info->res.val.i);
739 break;
740
741 case 1:
742 sse_cmpltss(&info->op1.val.f, &info->op2.val.f,
743 &info->res.val.i);
744 break;
745
746 case 2:
747 sse_cmpless(&info->op1.val.f, &info->op2.val.f,
748 &info->res.val.i);
749 break;
750
751 case 3:
752 sse_cmpunordss(&info->op1.val.f,
753 &info->op2.val.f, &info->res.val.i);
754 }
755
756 if (inst->imm & 4)
757 info->res.val.i ^= 0xffffffffu;
758
759 break;
760
761 case minss:
762 info->op = fex_other;
763 sse_minss(&info->op1.val.f, &info->op2.val.f,
764 &info->res.val.f);
765 break;
766
767 case maxss:
768 info->op = fex_other;
769 sse_maxss(&info->op1.val.f, &info->op2.val.f,
770 &info->res.val.f);
771 break;
772
773 case addss:
774 info->op = fex_add;
775 sse_addss(&info->op1.val.f, &info->op2.val.f,
776 &info->res.val.f);
777
778 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
779 subnorm = 1;
780
781 break;
782
783 case subss:
784 info->op = fex_sub;
785 sse_subss(&info->op1.val.f, &info->op2.val.f,
786 &info->res.val.f);
787
788 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
789 subnorm = 1;
790
791 break;
792
793 case mulss:
794 info->op = fex_mul;
795 sse_mulss(&info->op1.val.f, &info->op2.val.f,
796 &info->res.val.f);
797
798 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
799 subnorm = 1;
800
801 break;
802
803 case divss:
804 info->op = fex_div;
805 sse_divss(&info->op1.val.f, &info->op2.val.f,
806 &info->res.val.f);
807
808 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
809 subnorm = 1;
810
811 break;
812
813 case sqrtss:
814 info->op = fex_sqrt;
815 sse_sqrtss(&info->op1.val.f, &info->res.val.f);
816 break;
817
818 case cvtss2sd:
819 info->op = fex_cnvt;
820 info->res.type = fex_double;
821 sse_cvtss2sd(&info->op1.val.f, &info->res.val.d);
822 break;
823
824 case cvtsi2ss:
825 info->op = fex_cnvt;
826 sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f);
827 break;
828
829 case cvttss2si:
830 info->op = fex_cnvt;
855 info->res.type = fex_llong;
856 sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
857 break;
858 #endif
859
860 case ucomiss:
861 info->op = fex_cmp;
862 info->res.type = fex_nodata;
863 sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
864 break;
865
866 case comiss:
867 info->op = fex_cmp;
868 info->res.type = fex_nodata;
869 sse_comiss(&info->op1.val.f, &info->op2.val.f);
870 break;
871 default:
872 break;
873 }
874 }
875
876 __fenv_getmxcsr(&mxcsr);
877 info->flags = mxcsr & 0x3d;
878 __fenv_setmxcsr(&oldmxcsr);
879
880 /* determine which exception would have been trapped */
881 te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr >> 7) &
882 0x3d;
883 e = mxcsr & te;
884
885 if (e & FE_INVALID)
886 return (__fex_get_sse_invalid_type(inst));
887
888 if (e & FE_DIVBYZERO)
889 return (fex_division);
890
891 if (e & FE_OVERFLOW)
892 return (fex_overflow);
893
894 if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
895 return (fex_underflow);
896
897 if (e & FE_INEXACT)
898 return (fex_inexact);
899
900 return ((enum fex_exception)-1);
901 }
902
903 /*
904 * Emulate a SIMD SSE instruction to determine which exceptions occur
905 * in each part. For i = 0, 1, 2, and 3, set e[i] to indicate the
906 * trapped exception that would occur if the i-th part of the SIMD
907 * instruction were executed in isolation; set e[i] to -1 if no
908 * trapped exception would occur in this part. Also fill in info[i]
909 * with the corresponding operands, default untrapped result, and
910 * flags.
911 *
912 * This routine does not work if the instruction specified by *inst
913 * is not a SIMD instruction.
914 */
915 void
916 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
917 fex_info_t *info)
918 {
919 sseinst_t dummy;
920 int i;
921
922 e[0] = e[1] = e[2] = e[3] = -1;
923
924 /* perform each part of the SIMD operation */
925 switch (inst->op) {
926 case cmpps:
927 dummy.op = cmpss;
928 dummy.imm = inst->imm;
929
930 for (i = 0; i < 4; i++) {
931 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
932 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
933 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
934 }
935
936 break;
937
938 case minps:
939 dummy.op = minss;
940
941 for (i = 0; i < 4; i++) {
942 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
943 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
944 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
945 }
946
947 break;
948
949 case maxps:
950 dummy.op = maxss;
951
952 for (i = 0; i < 4; i++) {
953 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
954 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
955 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
956 }
957
958 break;
959
960 case addps:
961 dummy.op = addss;
962
963 for (i = 0; i < 4; i++) {
964 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
965 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
966 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
967 }
968
969 break;
970
971 case subps:
972 dummy.op = subss;
973
974 for (i = 0; i < 4; i++) {
975 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
976 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
977 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
978 }
979
980 break;
981
982 case mulps:
983 dummy.op = mulss;
984
985 for (i = 0; i < 4; i++) {
986 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
987 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
988 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
989 }
990
991 break;
992
993 case divps:
994 dummy.op = divss;
995
996 for (i = 0; i < 4; i++) {
997 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
998 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
999 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1000 }
1001
1002 break;
1003
1004 case sqrtps:
1005 dummy.op = sqrtss;
1006
1007 for (i = 0; i < 4; i++) {
1008 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1009 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1010 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1011 }
1012
1013 break;
1014
1015 case cvtdq2ps:
1016 dummy.op = cvtsi2ss;
1017
1018 for (i = 0; i < 4; i++) {
1019 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1020 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1021 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1022 }
1023
1024 break;
1025
1026 case cvttps2dq:
1027 dummy.op = cvttss2si;
1028
1029 for (i = 0; i < 4; i++) {
1030 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1031 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1032 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1033 }
1034
1035 break;
1036
1037 case cvtps2dq:
1038 dummy.op = cvtss2si;
1039
1040 for (i = 0; i < 4; i++) {
1041 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1042 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1043 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1044 }
1045
1046 break;
1047
1048 case cvtpi2ps:
1049 dummy.op = cvtsi2ss;
1050
1051 for (i = 0; i < 2; i++) {
1052 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1053 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1054 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1055 }
1056
1057 break;
1058
1059 case cvttps2pi:
1060 dummy.op = cvttss2si;
1061
1062 for (i = 0; i < 2; i++) {
1063 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1064 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1065 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1066 }
1067
1068 break;
1069
1070 case cvtps2pi:
1071 dummy.op = cvtss2si;
1072
1073 for (i = 0; i < 2; i++) {
1074 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1075 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1076 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1077 }
1078
1079 break;
1080
1081 case cmppd:
1082 dummy.op = cmpsd;
1083 dummy.imm = inst->imm;
1084
1085 for (i = 0; i < 2; i++) {
1086 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1087 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1088 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1089 }
1090
1091 break;
1092
1093 case minpd:
1094 dummy.op = minsd;
1095
1096 for (i = 0; i < 2; i++) {
1097 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1098 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1099 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1100 }
1101
1102 break;
1103
1104 case maxpd:
1105 dummy.op = maxsd;
1106
1107 for (i = 0; i < 2; i++) {
1108 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1109 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1110 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1111 }
1112
1113 break;
1114
1115 case addpd:
1116 dummy.op = addsd;
1117
1118 for (i = 0; i < 2; i++) {
1119 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1120 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1121 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1122 }
1123
1124 break;
1125
1126 case subpd:
1127 dummy.op = subsd;
1128
1129 for (i = 0; i < 2; i++) {
1130 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1131 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1132 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1133 }
1134
1135 break;
1136
1137 case mulpd:
1138 dummy.op = mulsd;
1139
1140 for (i = 0; i < 2; i++) {
1141 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1142 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1143 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1144 }
1145
1146 break;
1147
1148 case divpd:
1149 dummy.op = divsd;
1150
1151 for (i = 0; i < 2; i++) {
1152 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1153 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1154 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1155 }
1156
1157 break;
1158
1159 case sqrtpd:
1160 dummy.op = sqrtsd;
1161
1162 for (i = 0; i < 2; i++) {
1163 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1164 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1165 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1166 }
1167
1168 break;
1169
1170 case cvtpi2pd:
1171 case cvtdq2pd:
1172 dummy.op = cvtsi2sd;
1173
1174 for (i = 0; i < 2; i++) {
1175 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1176 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1177 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1178 }
1179
1180 break;
1181
1182 case cvttpd2pi:
1183 case cvttpd2dq:
1184 dummy.op = cvttsd2si;
1185
1186 for (i = 0; i < 2; i++) {
1187 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1188 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1189 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1190 }
1191
1192 break;
1193
1194 case cvtpd2pi:
1195 case cvtpd2dq:
1196 dummy.op = cvtsd2si;
1197
1198 for (i = 0; i < 2; i++) {
1199 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1200 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1201 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1202 }
1203
1204 break;
1205
1206 case cvtps2pd:
1207 dummy.op = cvtss2sd;
1208
1209 for (i = 0; i < 2; i++) {
1210 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1211 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1212 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1213 }
1214
1215 break;
1216
1217 case cvtpd2ps:
1218 dummy.op = cvtsd2ss;
1219
1220 for (i = 0; i < 2; i++) {
1221 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1222 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1223 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1224 }
1225
1226 default:
1227 break;
1228 }
1229 }
1230
1231 /*
1232 * Store the result value from *info in the destination of the scalar
1233 * SSE instruction specified by *inst. If no result is given but the
1234 * exception is underflow or overflow, supply the default trapped result.
1235 *
1236 * This routine does not work if the instruction specified by *inst
1237 * is not a scalar instruction.
1238 */
1239 void
1240 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1241 fex_info_t *info)
1242 {
1243 int i = 0;
1244 long long l = 0L;
1245 float f = 0.0, fscl;
1246 double d = 0.0L, dscl;
1247
1248 /*
1249 * for compares that write eflags, just set the flags
1250 * to indicate "unordered"
1251 */
1252 if (inst->op == ucomiss || inst->op == comiss || inst->op == ucomisd ||
1253 inst->op == comisd) {
1254 uap->uc_mcontext.gregs[REG_PS] |= 0x45;
1255 return;
1256 }
1257
1258 /*
1259 * if info doesn't specify a result value, try to generate
1260 * the default trapped result
1261 */
1262 if (info->res.type == fex_nodata) {
1263 /* set scale factors for exponent wrapping */
1264 switch (e) {
1265 case fex_overflow:
1266 fscl = 1.262177448e-29f; /* 2^-96 */
1267 dscl = 6.441148769597133308e-232; /* 2^-768 */
1268 break;
1269
1270 case fex_underflow:
1271 fscl = 7.922816251e+28f; /* 2^96 */
1272 dscl = 1.552518092300708935e+231; /* 2^768 */
1273 break;
1274
1275 default:
1276 (void) __fex_get_sse_op(uap, inst, info);
1277
1278 if (info->res.type == fex_nodata)
1279 return;
1280
1281 goto stuff;
1282 }
1283
1284 /* generate the wrapped result */
1285 if (inst->op == cvtsd2ss) {
1286 info->op1.type = fex_double;
1287 info->op1.val.d = inst->op2->d[0];
1288 info->op2.type = fex_nodata;
1289 info->res.type = fex_float;
1290 info->res.val.f = (float)(fscl * (fscl *
1291 info->op1.val.d));
1292 } else if ((int)inst->op & DOUBLE) {
1293 info->op1.type = fex_double;
1294 info->op1.val.d = inst->op1->d[0];
1295 info->op2.type = fex_double;
1296 info->op2.val.d = inst->op2->d[0];
1297 info->res.type = fex_double;
1298
1299 switch (inst->op) {
1300 case addsd:
1301 info->res.val.d = dscl * (dscl *
1302 info->op1.val.d + dscl * info->op2.val.d);
1303 break;
1304
1305 case subsd:
1306 info->res.val.d = dscl * (dscl *
1307 info->op1.val.d - dscl * info->op2.val.d);
1308 break;
1309
1310 case mulsd:
1311 info->res.val.d = (dscl * info->op1.val.d) *
1312 (dscl * info->op2.val.d);
1313 break;
1314
1315 case divsd:
1316 info->res.val.d = (dscl * info->op1.val.d) /
1317 (info->op2.val.d / dscl);
1318 break;
1319
1320 default:
1321 return;
1322 }
1323 } else {
1324 info->op1.type = fex_float;
1325 info->op1.val.f = inst->op1->f[0];
1326 info->op2.type = fex_float;
1327 info->op2.val.f = inst->op2->f[0];
1328 info->res.type = fex_float;
1329
1330 switch (inst->op) {
1331 case addss:
1332 info->res.val.f = fscl * (fscl *
1333 info->op1.val.f + fscl * info->op2.val.f);
1334 break;
1335
1336 case subss:
1337 info->res.val.f = fscl * (fscl *
1338 info->op1.val.f - fscl * info->op2.val.f);
1339 break;
1340
1341 case mulss:
1342 info->res.val.f = (fscl * info->op1.val.f) *
1343 (fscl * info->op2.val.f);
1344 break;
1345
1346 case divss:
1347 info->res.val.f = (fscl * info->op1.val.f) /
1348 (info->op2.val.f / fscl);
1349 break;
1350
1351 default:
1352 return;
1353 }
1354 }
1355 }
1356
1357 /* put the result in the destination */
1358 stuff:
1359 if (inst->op == cmpss || inst->op == cvttss2si || inst->op ==
1360 cvtss2si || inst->op == cvttsd2si || inst->op == cvtsd2si) {
1361 switch (info->res.type) {
1362 case fex_int:
1363 i = info->res.val.i;
1364 break;
1365
1366 case fex_llong:
1367 i = info->res.val.l;
1368 break;
1369
1370 case fex_float:
1371 i = info->res.val.f;
1372 break;
1373
1374 case fex_double:
1375 i = info->res.val.d;
1376 break;
1377
1378 case fex_ldouble:
1379 i = info->res.val.q;
1380 break;
1381
1382 default:
1383 break;
1384 }
1385
1386 inst->op1->i[0] = i;
1387 } else if (inst->op == cmpsd || inst->op == cvttss2siq || inst->op ==
1388 cvtss2siq || inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
1389 switch (info->res.type) {
1390 case fex_int:
1391 l = info->res.val.i;
1392 break;
1393
1394 case fex_llong:
1395 l = info->res.val.l;
1396 break;
1397
1398 case fex_float:
1399 l = info->res.val.f;
1400 break;
1401
1402 case fex_double:
1403 l = info->res.val.d;
1404 break;
1405
1406 case fex_ldouble:
1407 l = info->res.val.q;
1408 break;
1409
1410 default:
1411 break;
1412 }
1413
1414 inst->op1->l[0] = l;
1415 } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1416 inst->op == cvtss2sd) {
1417 switch (info->res.type) {
1418 case fex_int:
1419 d = info->res.val.i;
1420 break;
1421
1422 case fex_llong:
1423 d = info->res.val.l;
1424 break;
1425
1426 case fex_float:
1427 d = info->res.val.f;
1428 break;
1429
1430 case fex_double:
1431 d = info->res.val.d;
1432 break;
1433
1434 case fex_ldouble:
1435 d = info->res.val.q;
1436 break;
1437
1438 default:
1439 break;
1440 }
1441
1442 inst->op1->d[0] = d;
1443 } else {
1444 switch (info->res.type) {
1445 case fex_int:
1446 f = info->res.val.i;
1447 break;
1448
1449 case fex_llong:
1450 f = info->res.val.l;
1451 break;
1452
1453 case fex_float:
1454 f = info->res.val.f;
1455 break;
1456
1457 case fex_double:
1458 f = info->res.val.d;
1459 break;
1460
1461 case fex_ldouble:
1462 f = info->res.val.q;
1463 break;
1464
1465 default:
1466 break;
1467 }
1468
1469 inst->op1->f[0] = f;
1470 }
1471 }
1472
1473 /*
1474 * Store the results from a SIMD instruction. For each i, store
1475 * the result value from info[i] in the i-th part of the destination
1476 * of the SIMD SSE instruction specified by *inst. If no result
1477 * is given but the exception indicated by e[i] is underflow or
1478 * overflow, supply the default trapped result.
1479 *
1480 * This routine does not work if the instruction specified by *inst
1481 * is not a SIMD instruction.
1482 */
1483 void
1484 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1485 fex_info_t *info)
1486 {
1487 sseinst_t dummy;
1488 int i;
1489
1490 /* store each part */
1491 switch (inst->op) {
1492 case cmpps:
1493 dummy.op = cmpss;
1494 dummy.imm = inst->imm;
1495
1496 for (i = 0; i < 4; i++) {
1497 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1498 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1499 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1500 }
1501
1502 break;
1503
1504 case minps:
1505 dummy.op = minss;
1506
1507 for (i = 0; i < 4; i++) {
1508 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1509 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1510 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1511 }
1512
1513 break;
1514
1515 case maxps:
1516 dummy.op = maxss;
1517
1518 for (i = 0; i < 4; i++) {
1519 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1520 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1521 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1522 }
1523
1524 break;
1525
1526 case addps:
1527 dummy.op = addss;
1528
1529 for (i = 0; i < 4; i++) {
1530 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1531 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1532 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1533 }
1534
1535 break;
1536
1537 case subps:
1538 dummy.op = subss;
1539
1540 for (i = 0; i < 4; i++) {
1541 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1542 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1543 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1544 }
1545
1546 break;
1547
1548 case mulps:
1549 dummy.op = mulss;
1550
1551 for (i = 0; i < 4; i++) {
1552 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1553 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1554 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1555 }
1556
1557 break;
1558
1559 case divps:
1560 dummy.op = divss;
1561
1562 for (i = 0; i < 4; i++) {
1563 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1564 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1565 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1566 }
1567
1568 break;
1569
1570 case sqrtps:
1571 dummy.op = sqrtss;
1572
1573 for (i = 0; i < 4; i++) {
1574 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1575 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1576 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1577 }
1578
1579 break;
1580
1581 case cvtdq2ps:
1582 dummy.op = cvtsi2ss;
1583
1584 for (i = 0; i < 4; i++) {
1585 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1586 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1587 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1588 }
1589
1590 break;
1591
1592 case cvttps2dq:
1593 dummy.op = cvttss2si;
1594
1595 for (i = 0; i < 4; i++) {
1596 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1597 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1598 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1599 }
1600
1601 break;
1602
1603 case cvtps2dq:
1604 dummy.op = cvtss2si;
1605
1606 for (i = 0; i < 4; i++) {
1607 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1608 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1609 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1610 }
1611
1612 break;
1613
1614 case cvtpi2ps:
1615 dummy.op = cvtsi2ss;
1616
1617 for (i = 0; i < 2; i++) {
1618 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1619 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1620 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1621 }
1622
1623 break;
1624
1625 case cvttps2pi:
1626 dummy.op = cvttss2si;
1627
1628 for (i = 0; i < 2; i++) {
1629 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1630 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1631 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1632 }
1633
1634 break;
1635
1636 case cvtps2pi:
1637 dummy.op = cvtss2si;
1638
1639 for (i = 0; i < 2; i++) {
1640 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1641 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1642 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1643 }
1644
1645 break;
1646
1647 case cmppd:
1648 dummy.op = cmpsd;
1649 dummy.imm = inst->imm;
1650
1651 for (i = 0; i < 2; i++) {
1652 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1653 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1654 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1655 }
1656
1657 break;
1658
1659 case minpd:
1660 dummy.op = minsd;
1661
1662 for (i = 0; i < 2; i++) {
1663 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1664 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1665 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1666 }
1667
1668 break;
1669
1670 case maxpd:
1671 dummy.op = maxsd;
1672
1673 for (i = 0; i < 2; i++) {
1674 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1675 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1676 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1677 }
1678
1679 break;
1680
1681 case addpd:
1682 dummy.op = addsd;
1683
1684 for (i = 0; i < 2; i++) {
1685 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1686 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1687 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1688 }
1689
1690 break;
1691
1692 case subpd:
1693 dummy.op = subsd;
1694
1695 for (i = 0; i < 2; i++) {
1696 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1697 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1698 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1699 }
1700
1701 break;
1702
1703 case mulpd:
1704 dummy.op = mulsd;
1705
1706 for (i = 0; i < 2; i++) {
1707 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1708 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1709 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1710 }
1711
1712 break;
1713
1714 case divpd:
1715 dummy.op = divsd;
1716
1717 for (i = 0; i < 2; i++) {
1718 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1719 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1720 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1721 }
1722
1723 break;
1724
1725 case sqrtpd:
1726 dummy.op = sqrtsd;
1727
1728 for (i = 0; i < 2; i++) {
1729 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1730 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1731 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1732 }
1733
1734 break;
1735
1736 case cvtpi2pd:
1737 case cvtdq2pd:
1738 dummy.op = cvtsi2sd;
1739
1740 for (i = 0; i < 2; i++) {
1741 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1742 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1743 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1744 }
1745
1746 break;
1747
1748 case cvttpd2pi:
1749 case cvttpd2dq:
1750 dummy.op = cvttsd2si;
1751
1752 for (i = 0; i < 2; i++) {
1753 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1754 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1755 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1756 }
1757
1758 /* for cvttpd2dq, zero the high 64 bits of the destination */
1759 if (inst->op == cvttpd2dq)
1760 inst->op1->l[1] = 0ll;
1761
1762 break;
1763
1764 case cvtpd2pi:
1765 case cvtpd2dq:
1766 dummy.op = cvtsd2si;
1767
1768 for (i = 0; i < 2; i++) {
1769 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1770 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1771 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1772 }
1773
1774 /* for cvtpd2dq, zero the high 64 bits of the destination */
1775 if (inst->op == cvtpd2dq)
1776 inst->op1->l[1] = 0ll;
1777
1778 break;
1779
1780 case cvtps2pd:
1781 dummy.op = cvtss2sd;
1782
1783 for (i = 0; i < 2; i++) {
1784 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1785 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1786 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1787 }
1788
1789 break;
1790
1791 case cvtpd2ps:
1792 dummy.op = cvtsd2ss;
1793
1794 for (i = 0; i < 2; i++) {
1795 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1796 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1797 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1798 }
1799
1800 /* zero the high 64 bits of the destination */
1801 inst->op1->l[1] = 0ll;
1802
1803 default:
1804 break;
1805 }
1806 }
|