1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25
26 /*
27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
29 */
30
31 #include <ucontext.h>
32 #include <fenv.h>
33 #if defined(__SUNPRO_C)
34 #include <sunmath.h>
35 #else
36 #include <sys/ieeefp.h>
37 #endif
38 #include "fex_handler.h"
39 #include "fenv_inlines.h"
40
41 #if !defined(REG_PC)
42 #define REG_PC EIP
43 #endif
44
45 #if !defined(REG_PS)
46 #define REG_PS EFL
47 #endif
48
49 #ifdef __amd64
50 #define regno(X) ((X < 4) ? REG_RAX - X : ((X > 4) ? REG_RAX + 1 - X : \
51 REG_RSP))
52 #else
53 #define regno(X) (EAX - X)
54 #endif
55
56 /*
57 * Support for SSE instructions
58 */
59
60 /*
61 * Decode an SSE instruction. Fill in *inst and return the length of the
62 * instruction in bytes. Return 0 if the instruction is not recognized.
63 */
64 int
65 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst)
66 {
67 unsigned char *ip;
68 char *addr;
69 int i, dbl, simd, rex, modrm, sib, r;
70
71 i = 0;
72 ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC];
73
74 /* look for pseudo-prefixes */
75 dbl = 0;
76 simd = SIMD;
77
78 if (ip[i] == 0xF3) {
79 simd = 0;
80 i++;
81 } else if (ip[i] == 0x66) {
82 dbl = DOUBLE;
83 i++;
84 } else if (ip[i] == 0xF2) {
85 dbl = DOUBLE;
86 simd = 0;
87 i++;
88 }
89
90 /* look for AMD64 REX prefix */
91 rex = 0;
92
93 if (ip[i] >= 0x40 && ip[i] <= 0x4F) {
94 rex = ip[i];
95 i++;
96 }
97
98 /* parse opcode */
99 if (ip[i++] != 0x0F)
100 return (0);
101
102 switch (ip[i++]) {
103 case 0x2A:
104 inst->op = (int)cvtsi2ss + simd + dbl;
105
106 if (!simd)
107 inst->op = (int)inst->op + (rex & 8);
108
109 break;
110
111 case 0x2C:
112 inst->op = (int)cvttss2si + simd + dbl;
113
114 if (!simd)
115 inst->op = (int)inst->op + (rex & 8);
116
117 break;
118
119 case 0x2D:
120 inst->op = (int)cvtss2si + simd + dbl;
121
122 if (!simd)
123 inst->op = (int)inst->op + (rex & 8);
124
125 break;
126
127 case 0x2E:
128
129 /* oddball: scalar instruction in a SIMD opcode group */
130 if (!simd)
131 return (0);
132
133 inst->op = (int)ucomiss + dbl;
134 break;
135
136 case 0x2F:
137
138 /* oddball: scalar instruction in a SIMD opcode group */
139 if (!simd)
140 return (0);
141
142 inst->op = (int)comiss + dbl;
143 break;
144
145 case 0x51:
146 inst->op = (int)sqrtss + simd + dbl;
147 break;
148
149 case 0x58:
150 inst->op = (int)addss + simd + dbl;
151 break;
152
153 case 0x59:
154 inst->op = (int)mulss + simd + dbl;
155 break;
156
157 case 0x5A:
158 inst->op = (int)cvtss2sd + simd + dbl;
159 break;
160
161 case 0x5B:
162
163 if (dbl) {
164 if (simd)
165 inst->op = cvtps2dq;
166 else
167 return (0);
168 } else {
169 inst->op = (simd) ? cvtdq2ps : cvttps2dq;
170 }
171
172 break;
173
174 case 0x5C:
175 inst->op = (int)subss + simd + dbl;
176 break;
177
178 case 0x5D:
179 inst->op = (int)minss + simd + dbl;
180 break;
181
182 case 0x5E:
183 inst->op = (int)divss + simd + dbl;
184 break;
185
186 case 0x5F:
187 inst->op = (int)maxss + simd + dbl;
188 break;
189
190 case 0xC2:
191 inst->op = (int)cmpss + simd + dbl;
192 break;
193
194 case 0xE6:
195
196 if (simd) {
197 if (dbl)
198 inst->op = cvttpd2dq;
199 else
200 return (0);
201 } else {
202 inst->op = (dbl) ? cvtpd2dq : cvtdq2pd;
203 }
204
205 break;
206
207 default:
208 return (0);
209 }
210
211 /* locate operands */
212 modrm = ip[i++];
213
214 if (inst->op == cvtss2si || inst->op == cvttss2si || inst->op ==
215 cvtsd2si || inst->op == cvttsd2si || inst->op == cvtss2siq ||
216 inst->op == cvttss2siq || inst->op == cvtsd2siq || inst->op ==
217 cvttsd2siq) {
218 /* op1 is a gp register */
219 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
220 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
221 } else if (inst->op == cvtps2pi || inst->op == cvttps2pi || inst->op ==
222 cvtpd2pi || inst->op == cvttpd2pi) {
223 /* op1 is a mmx register */
224 #ifdef __amd64
225 inst->op1 = (sseoperand_t *)
226 &uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state
227 .st[(modrm >> 3) & 7];
228 #else
229 inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) +
230 (char *)&uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state
231 .state[7]);
232 #endif
233 } else {
234 /* op1 is a xmm register */
235 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
236 inst->op1 =
237 (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set
238 .fpchip_state.xmm[r];
239 }
240
241 if ((modrm >> 6) == 3) {
242 if (inst->op == cvtsi2ss || inst->op == cvtsi2sd || inst->op ==
243 cvtsi2ssq || inst->op == cvtsi2sdq) {
244 /* op2 is a gp register */
245 r = ((rex & 1) << 3) | (modrm & 7);
246 inst->op2 =
247 (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
248 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) {
249 /* op2 is a mmx register */
250 #ifdef __amd64
251 inst->op2 =
252 (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set
253 .fpchip_state.st[modrm & 7];
254 #else
255 inst->op2 = (sseoperand_t *)(10 * (modrm & 7) +
256 (char *)&uap->uc_mcontext.fpregs.fp_reg_set
257 .fpchip_state.state[7]);
258 #endif
259 } else {
260 /* op2 is a xmm register */
261 r = ((rex & 1) << 3) | (modrm & 7);
262 inst->op2 =
263 (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set
264 .fpchip_state.xmm[r];
265 }
266 } else if ((modrm & 0xc7) == 0x05) {
267 #ifdef __amd64
268 /* address of next instruction + offset */
269 r = i + 4;
270
271 if (inst->op == cmpss || inst->op == cmpps || inst->op ==
272 cmpsd || inst->op == cmppd)
273 r++;
274
275 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i));
276 #else
277 /* absolute address */
278 inst->op2 = (sseoperand_t *)(*(int *)(ip + i));
279 #endif
280 i += 4;
281 } else {
282 /* complex address */
283 if ((modrm & 7) == 4) {
284 /* parse sib byte */
285 sib = ip[i++];
286
287 if ((sib & 7) == 5 && (modrm >> 6) == 0) {
288 /* start with absolute address */
289 addr = (char *)(uintptr_t)(*(int *)(ip + i));
290 i += 4;
291 } else {
292 /* start with base */
293 r = ((rex & 1) << 3) | (sib & 7);
294 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
295 }
296
297 r = ((rex & 2) << 2) | ((sib >> 3) & 7);
298
299 if (r != 4) {
300 /* add scaled index */
301 addr += uap->uc_mcontext.gregs[regno(r)] <<
302 (sib >> 6);
303 }
304 } else {
305 r = ((rex & 1) << 3) | (modrm & 7);
306 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
307 }
308
309 /* add displacement, if any */
310 if ((modrm >> 6) == 1) {
311 addr += (char)ip[i++];
312 } else if ((modrm >> 6) == 2) {
313 addr += *(int *)(ip + i);
314 i += 4;
315 }
316
317 inst->op2 = (sseoperand_t *)addr;
318 }
319
320 if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd ||
321 inst->op == cmppd) {
322 /* get the immediate operand */
323 inst->imm = ip[i++];
324 }
325
326 return (i);
327 }
328
329 static enum fp_class_type
330 my_fp_classf(float *x)
331 {
332 int i = *(int *)x & ~0x80000000;
333
334 if (i < 0x7f800000) {
335 if (i < 0x00800000)
336 return ((i == 0) ? fp_zero : fp_subnormal);
337
338 return (fp_normal);
339 } else if (i == 0x7f800000) {
340 return (fp_infinity);
341 } else if (i & 0x400000) {
342 return (fp_quiet);
343 } else {
344 return (fp_signaling);
345 }
346 }
347
348 static enum fp_class_type
349 my_fp_class(double *x)
350 {
351 int i = *(1 + (int *)x) & ~0x80000000;
352
353 if (i < 0x7ff00000) {
354 if (i < 0x00100000)
355 return (((i | *(int *)x) == 0) ? fp_zero :
356 fp_subnormal);
357
358 return (fp_normal);
359 } else if (i == 0x7ff00000 && *(int *)x == 0) {
360 return (fp_infinity);
361 } else if (i & 0x80000) {
362 return (fp_quiet);
363 } else {
364 return (fp_signaling);
365 }
366 }
367
368 /*
369 * Inspect a scalar SSE instruction that incurred an invalid operation
370 * exception to determine which type of exception it was.
371 */
372 static enum fex_exception
373 __fex_get_sse_invalid_type(sseinst_t *inst)
374 {
375 enum fp_class_type t1, t2;
376
377 /* check op2 for signaling nan */
378 t2 = ((int)inst->op & DOUBLE) ? my_fp_class(&inst->op2->d[0]) :
379 my_fp_classf(&inst->op2->f[0]);
380
381 if (t2 == fp_signaling)
382 return (fex_inv_snan);
383
384 /* eliminate all single-operand instructions */
385 switch (inst->op) {
386 case cvtsd2ss:
387 case cvtss2sd:
388 /* hmm, this shouldn't have happened */
389 return ((enum fex_exception)-1);
390
391 case sqrtss:
392 case sqrtsd:
393 return (fex_inv_sqrt);
394
395 case cvtss2si:
396 case cvtsd2si:
397 case cvttss2si:
398 case cvttsd2si:
399 case cvtss2siq:
400 case cvtsd2siq:
401 case cvttss2siq:
402 case cvttsd2siq:
403 return (fex_inv_int);
404 default:
405 break;
406 }
407
408 /* check op1 for signaling nan */
409 t1 = ((int)inst->op & DOUBLE) ? my_fp_class(&inst->op1->d[0]) :
410 my_fp_classf(&inst->op1->f[0]);
411
412 if (t1 == fp_signaling)
413 return (fex_inv_snan);
414
415 /* check two-operand instructions for other cases */
416 switch (inst->op) {
417 case cmpss:
418 case cmpsd:
419 case minss:
420 case minsd:
421 case maxss:
422 case maxsd:
423 case comiss:
424 case comisd:
425 return (fex_inv_cmp);
426
427 case addss:
428 case addsd:
429 case subss:
430 case subsd:
431
432 if (t1 == fp_infinity && t2 == fp_infinity)
433 return (fex_inv_isi);
434
435 break;
436
437 case mulss:
438 case mulsd:
439
440 if ((t1 == fp_zero && t2 == fp_infinity) || (t2 == fp_zero &&
441 t1 == fp_infinity))
442 return (fex_inv_zmi);
443
444 break;
445
446 case divss:
447 case divsd:
448
449 if (t1 == fp_zero && t2 == fp_zero)
450 return (fex_inv_zdz);
451
452 if (t1 == fp_infinity && t2 == fp_infinity)
453 return (fex_inv_idi);
454
455 default:
456 break;
457 }
458
459 return ((enum fex_exception)-1);
460 }
461
462 /* inline templates */
463 extern void sse_cmpeqss(float *, float *, int *);
464 extern void sse_cmpltss(float *, float *, int *);
465 extern void sse_cmpless(float *, float *, int *);
466 extern void sse_cmpunordss(float *, float *, int *);
467 extern void sse_minss(float *, float *, float *);
468 extern void sse_maxss(float *, float *, float *);
469 extern void sse_addss(float *, float *, float *);
470 extern void sse_subss(float *, float *, float *);
471 extern void sse_mulss(float *, float *, float *);
472 extern void sse_divss(float *, float *, float *);
473 extern void sse_sqrtss(float *, float *);
474 extern void sse_ucomiss(float *, float *);
475 extern void sse_comiss(float *, float *);
476 extern void sse_cvtss2sd(float *, double *);
477 extern void sse_cvtsi2ss(int *, float *);
478 extern void sse_cvttss2si(float *, int *);
479 extern void sse_cvtss2si(float *, int *);
480
481 #ifdef __amd64
482 extern void sse_cvtsi2ssq(long long *, float *);
483 extern void sse_cvttss2siq(float *, long long *);
484 extern void sse_cvtss2siq(float *, long long *);
485 #endif
486
487 extern void sse_cmpeqsd(double *, double *, long long *);
488 extern void sse_cmpltsd(double *, double *, long long *);
489 extern void sse_cmplesd(double *, double *, long long *);
490 extern void sse_cmpunordsd(double *, double *, long long *);
491 extern void sse_minsd(double *, double *, double *);
492 extern void sse_maxsd(double *, double *, double *);
493 extern void sse_addsd(double *, double *, double *);
494 extern void sse_subsd(double *, double *, double *);
495 extern void sse_mulsd(double *, double *, double *);
496 extern void sse_divsd(double *, double *, double *);
497 extern void sse_sqrtsd(double *, double *);
498 extern void sse_ucomisd(double *, double *);
499 extern void sse_comisd(double *, double *);
500 extern void sse_cvtsd2ss(double *, float *);
501 extern void sse_cvtsi2sd(int *, double *);
502 extern void sse_cvttsd2si(double *, int *);
503 extern void sse_cvtsd2si(double *, int *);
504
505 #ifdef __amd64
506 extern void sse_cvtsi2sdq(long long *, double *);
507 extern void sse_cvttsd2siq(double *, long long *);
508 extern void sse_cvtsd2siq(double *, long long *);
509 #endif
510
511 /*
512 * Fill in *info with the operands, default untrapped result, and
513 * flags produced by a scalar SSE instruction, and return the type
514 * of trapped exception (if any). On entry, the mxcsr must have
515 * all exceptions masked and all flags clear. The same conditions
516 * will hold on exit.
517 *
518 * This routine does not work if the instruction specified by *inst
519 * is not a scalar instruction.
520 */
521 enum fex_exception
522 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info)
523 {
524 unsigned int e, te, mxcsr, oldmxcsr, subnorm;
525
526 /*
527 * Perform the operation with traps disabled and check the
528 * exception flags. If the underflow trap was enabled, also
529 * check for an exact subnormal result.
530 */
531 __fenv_getmxcsr(&oldmxcsr);
532 subnorm = 0;
533
534 if ((int)inst->op & DOUBLE) {
535 if (inst->op == cvtsi2sd) {
536 info->op1.type = fex_int;
537 info->op1.val.i = inst->op2->i[0];
538 info->op2.type = fex_nodata;
539 } else if (inst->op == cvtsi2sdq) {
540 info->op1.type = fex_llong;
541 info->op1.val.l = inst->op2->l[0];
542 info->op2.type = fex_nodata;
543 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss ||
544 inst->op == cvttsd2si || inst->op == cvtsd2si || inst->op ==
545 cvttsd2siq || inst->op == cvtsd2siq) {
546 info->op1.type = fex_double;
547 info->op1.val.d = inst->op2->d[0];
548 info->op2.type = fex_nodata;
549 } else {
550 info->op1.type = fex_double;
551 info->op1.val.d = inst->op1->d[0];
552 info->op2.type = fex_double;
553 info->op2.val.d = inst->op2->d[0];
554 }
555
556 info->res.type = fex_double;
557
558 switch (inst->op) {
559 case cmpsd:
560 info->op = fex_cmp;
561 info->res.type = fex_llong;
562
563 switch (inst->imm & 3) {
564 case 0:
565 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d,
566 &info->res.val.l);
567 break;
568
569 case 1:
570 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d,
571 &info->res.val.l);
572 break;
573
574 case 2:
575 sse_cmplesd(&info->op1.val.d, &info->op2.val.d,
576 &info->res.val.l);
577 break;
578
579 case 3:
580 sse_cmpunordsd(&info->op1.val.d,
581 &info->op2.val.d, &info->res.val.l);
582 }
583
584 if (inst->imm & 4)
585 info->res.val.l ^= 0xffffffffffffffffull;
586
587 break;
588
589 case minsd:
590 info->op = fex_other;
591 sse_minsd(&info->op1.val.d, &info->op2.val.d,
592 &info->res.val.d);
593 break;
594
595 case maxsd:
596 info->op = fex_other;
597 sse_maxsd(&info->op1.val.d, &info->op2.val.d,
598 &info->res.val.d);
599 break;
600
601 case addsd:
602 info->op = fex_add;
603 sse_addsd(&info->op1.val.d, &info->op2.val.d,
604 &info->res.val.d);
605
606 if (my_fp_class(&info->res.val.d) == fp_subnormal)
607 subnorm = 1;
608
609 break;
610
611 case subsd:
612 info->op = fex_sub;
613 sse_subsd(&info->op1.val.d, &info->op2.val.d,
614 &info->res.val.d);
615
616 if (my_fp_class(&info->res.val.d) == fp_subnormal)
617 subnorm = 1;
618
619 break;
620
621 case mulsd:
622 info->op = fex_mul;
623 sse_mulsd(&info->op1.val.d, &info->op2.val.d,
624 &info->res.val.d);
625
626 if (my_fp_class(&info->res.val.d) == fp_subnormal)
627 subnorm = 1;
628
629 break;
630
631 case divsd:
632 info->op = fex_div;
633 sse_divsd(&info->op1.val.d, &info->op2.val.d,
634 &info->res.val.d);
635
636 if (my_fp_class(&info->res.val.d) == fp_subnormal)
637 subnorm = 1;
638
639 break;
640
641 case sqrtsd:
642 info->op = fex_sqrt;
643 sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
644 break;
645
646 case cvtsd2ss:
647 info->op = fex_cnvt;
648 info->res.type = fex_float;
649 sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f);
650
651 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
652 subnorm = 1;
653
654 break;
655
656 case cvtsi2sd:
657 info->op = fex_cnvt;
658 sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
659 break;
660
661 case cvttsd2si:
662 info->op = fex_cnvt;
663 info->res.type = fex_int;
664 sse_cvttsd2si(&info->op1.val.d, &info->res.val.i);
665 break;
666
667 case cvtsd2si:
668 info->op = fex_cnvt;
669 info->res.type = fex_int;
670 sse_cvtsd2si(&info->op1.val.d, &info->res.val.i);
671 break;
672
673 #ifdef __amd64
674 case cvtsi2sdq:
675 info->op = fex_cnvt;
676 sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d);
677 break;
678
679 case cvttsd2siq:
680 info->op = fex_cnvt;
681 info->res.type = fex_llong;
682 sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l);
683 break;
684
685 case cvtsd2siq:
686 info->op = fex_cnvt;
687 info->res.type = fex_llong;
688 sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l);
689 break;
690 #endif
691
692 case ucomisd:
693 info->op = fex_cmp;
694 info->res.type = fex_nodata;
695 sse_ucomisd(&info->op1.val.d, &info->op2.val.d);
696 break;
697
698 case comisd:
699 info->op = fex_cmp;
700 info->res.type = fex_nodata;
701 sse_comisd(&info->op1.val.d, &info->op2.val.d);
702 break;
703 default:
704 break;
705 }
706 } else {
707 if (inst->op == cvtsi2ss) {
708 info->op1.type = fex_int;
709 info->op1.val.i = inst->op2->i[0];
710 info->op2.type = fex_nodata;
711 } else if (inst->op == cvtsi2ssq) {
712 info->op1.type = fex_llong;
713 info->op1.val.l = inst->op2->l[0];
714 info->op2.type = fex_nodata;
715 } else if (inst->op == sqrtss || inst->op == cvtss2sd ||
716 inst->op == cvttss2si || inst->op == cvtss2si || inst->op ==
717 cvttss2siq || inst->op == cvtss2siq) {
718 info->op1.type = fex_float;
719 info->op1.val.f = inst->op2->f[0];
720 info->op2.type = fex_nodata;
721 } else {
722 info->op1.type = fex_float;
723 info->op1.val.f = inst->op1->f[0];
724 info->op2.type = fex_float;
725 info->op2.val.f = inst->op2->f[0];
726 }
727
728 info->res.type = fex_float;
729
730 switch (inst->op) {
731 case cmpss:
732 info->op = fex_cmp;
733 info->res.type = fex_int;
734
735 switch (inst->imm & 3) {
736 case 0:
737 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f,
738 &info->res.val.i);
739 break;
740
741 case 1:
742 sse_cmpltss(&info->op1.val.f, &info->op2.val.f,
743 &info->res.val.i);
744 break;
745
746 case 2:
747 sse_cmpless(&info->op1.val.f, &info->op2.val.f,
748 &info->res.val.i);
749 break;
750
751 case 3:
752 sse_cmpunordss(&info->op1.val.f,
753 &info->op2.val.f, &info->res.val.i);
754 }
755
756 if (inst->imm & 4)
757 info->res.val.i ^= 0xffffffffu;
758
759 break;
760
761 case minss:
762 info->op = fex_other;
763 sse_minss(&info->op1.val.f, &info->op2.val.f,
764 &info->res.val.f);
765 break;
766
767 case maxss:
768 info->op = fex_other;
769 sse_maxss(&info->op1.val.f, &info->op2.val.f,
770 &info->res.val.f);
771 break;
772
773 case addss:
774 info->op = fex_add;
775 sse_addss(&info->op1.val.f, &info->op2.val.f,
776 &info->res.val.f);
777
778 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
779 subnorm = 1;
780
781 break;
782
783 case subss:
784 info->op = fex_sub;
785 sse_subss(&info->op1.val.f, &info->op2.val.f,
786 &info->res.val.f);
787
788 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
789 subnorm = 1;
790
791 break;
792
793 case mulss:
794 info->op = fex_mul;
795 sse_mulss(&info->op1.val.f, &info->op2.val.f,
796 &info->res.val.f);
797
798 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
799 subnorm = 1;
800
801 break;
802
803 case divss:
804 info->op = fex_div;
805 sse_divss(&info->op1.val.f, &info->op2.val.f,
806 &info->res.val.f);
807
808 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
809 subnorm = 1;
810
811 break;
812
813 case sqrtss:
814 info->op = fex_sqrt;
815 sse_sqrtss(&info->op1.val.f, &info->res.val.f);
816 break;
817
818 case cvtss2sd:
819 info->op = fex_cnvt;
820 info->res.type = fex_double;
821 sse_cvtss2sd(&info->op1.val.f, &info->res.val.d);
822 break;
823
824 case cvtsi2ss:
825 info->op = fex_cnvt;
826 sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f);
827 break;
828
829 case cvttss2si:
830 info->op = fex_cnvt;
831 info->res.type = fex_int;
832 sse_cvttss2si(&info->op1.val.f, &info->res.val.i);
833 break;
834
835 case cvtss2si:
836 info->op = fex_cnvt;
837 info->res.type = fex_int;
838 sse_cvtss2si(&info->op1.val.f, &info->res.val.i);
839 break;
840
841 #ifdef __amd64
842 case cvtsi2ssq:
843 info->op = fex_cnvt;
844 sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f);
845 break;
846
847 case cvttss2siq:
848 info->op = fex_cnvt;
849 info->res.type = fex_llong;
850 sse_cvttss2siq(&info->op1.val.f, &info->res.val.l);
851 break;
852
853 case cvtss2siq:
854 info->op = fex_cnvt;
855 info->res.type = fex_llong;
856 sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
857 break;
858 #endif
859
860 case ucomiss:
861 info->op = fex_cmp;
862 info->res.type = fex_nodata;
863 sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
864 break;
865
866 case comiss:
867 info->op = fex_cmp;
868 info->res.type = fex_nodata;
869 sse_comiss(&info->op1.val.f, &info->op2.val.f);
870 break;
871 default:
872 break;
873 }
874 }
875
876 __fenv_getmxcsr(&mxcsr);
877 info->flags = mxcsr & 0x3d;
878 __fenv_setmxcsr(&oldmxcsr);
879
880 /* determine which exception would have been trapped */
881 te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr >> 7) &
882 0x3d;
883 e = mxcsr & te;
884
885 if (e & FE_INVALID)
886 return (__fex_get_sse_invalid_type(inst));
887
888 if (e & FE_DIVBYZERO)
889 return (fex_division);
890
891 if (e & FE_OVERFLOW)
892 return (fex_overflow);
893
894 if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
895 return (fex_underflow);
896
897 if (e & FE_INEXACT)
898 return (fex_inexact);
899
900 return ((enum fex_exception)-1);
901 }
902
903 /*
904 * Emulate a SIMD SSE instruction to determine which exceptions occur
905 * in each part. For i = 0, 1, 2, and 3, set e[i] to indicate the
906 * trapped exception that would occur if the i-th part of the SIMD
907 * instruction were executed in isolation; set e[i] to -1 if no
908 * trapped exception would occur in this part. Also fill in info[i]
909 * with the corresponding operands, default untrapped result, and
910 * flags.
911 *
912 * This routine does not work if the instruction specified by *inst
913 * is not a SIMD instruction.
914 */
915 void
916 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
917 fex_info_t *info)
918 {
919 sseinst_t dummy;
920 int i;
921
922 e[0] = e[1] = e[2] = e[3] = -1;
923
924 /* perform each part of the SIMD operation */
925 switch (inst->op) {
926 case cmpps:
927 dummy.op = cmpss;
928 dummy.imm = inst->imm;
929
930 for (i = 0; i < 4; i++) {
931 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
932 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
933 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
934 }
935
936 break;
937
938 case minps:
939 dummy.op = minss;
940
941 for (i = 0; i < 4; i++) {
942 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
943 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
944 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
945 }
946
947 break;
948
949 case maxps:
950 dummy.op = maxss;
951
952 for (i = 0; i < 4; i++) {
953 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
954 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
955 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
956 }
957
958 break;
959
960 case addps:
961 dummy.op = addss;
962
963 for (i = 0; i < 4; i++) {
964 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
965 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
966 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
967 }
968
969 break;
970
971 case subps:
972 dummy.op = subss;
973
974 for (i = 0; i < 4; i++) {
975 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
976 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
977 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
978 }
979
980 break;
981
982 case mulps:
983 dummy.op = mulss;
984
985 for (i = 0; i < 4; i++) {
986 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
987 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
988 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
989 }
990
991 break;
992
993 case divps:
994 dummy.op = divss;
995
996 for (i = 0; i < 4; i++) {
997 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
998 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
999 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1000 }
1001
1002 break;
1003
1004 case sqrtps:
1005 dummy.op = sqrtss;
1006
1007 for (i = 0; i < 4; i++) {
1008 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1009 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1010 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1011 }
1012
1013 break;
1014
1015 case cvtdq2ps:
1016 dummy.op = cvtsi2ss;
1017
1018 for (i = 0; i < 4; i++) {
1019 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1020 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1021 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1022 }
1023
1024 break;
1025
1026 case cvttps2dq:
1027 dummy.op = cvttss2si;
1028
1029 for (i = 0; i < 4; i++) {
1030 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1031 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1032 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1033 }
1034
1035 break;
1036
1037 case cvtps2dq:
1038 dummy.op = cvtss2si;
1039
1040 for (i = 0; i < 4; i++) {
1041 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1042 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1043 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1044 }
1045
1046 break;
1047
1048 case cvtpi2ps:
1049 dummy.op = cvtsi2ss;
1050
1051 for (i = 0; i < 2; i++) {
1052 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1053 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1054 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1055 }
1056
1057 break;
1058
1059 case cvttps2pi:
1060 dummy.op = cvttss2si;
1061
1062 for (i = 0; i < 2; i++) {
1063 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1064 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1065 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1066 }
1067
1068 break;
1069
1070 case cvtps2pi:
1071 dummy.op = cvtss2si;
1072
1073 for (i = 0; i < 2; i++) {
1074 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1075 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1076 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1077 }
1078
1079 break;
1080
1081 case cmppd:
1082 dummy.op = cmpsd;
1083 dummy.imm = inst->imm;
1084
1085 for (i = 0; i < 2; i++) {
1086 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1087 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1088 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1089 }
1090
1091 break;
1092
1093 case minpd:
1094 dummy.op = minsd;
1095
1096 for (i = 0; i < 2; i++) {
1097 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1098 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1099 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1100 }
1101
1102 break;
1103
1104 case maxpd:
1105 dummy.op = maxsd;
1106
1107 for (i = 0; i < 2; i++) {
1108 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1109 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1110 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1111 }
1112
1113 break;
1114
1115 case addpd:
1116 dummy.op = addsd;
1117
1118 for (i = 0; i < 2; i++) {
1119 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1120 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1121 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1122 }
1123
1124 break;
1125
1126 case subpd:
1127 dummy.op = subsd;
1128
1129 for (i = 0; i < 2; i++) {
1130 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1131 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1132 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1133 }
1134
1135 break;
1136
1137 case mulpd:
1138 dummy.op = mulsd;
1139
1140 for (i = 0; i < 2; i++) {
1141 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1142 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1143 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1144 }
1145
1146 break;
1147
1148 case divpd:
1149 dummy.op = divsd;
1150
1151 for (i = 0; i < 2; i++) {
1152 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1153 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1154 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1155 }
1156
1157 break;
1158
1159 case sqrtpd:
1160 dummy.op = sqrtsd;
1161
1162 for (i = 0; i < 2; i++) {
1163 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1164 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1165 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1166 }
1167
1168 break;
1169
1170 case cvtpi2pd:
1171 case cvtdq2pd:
1172 dummy.op = cvtsi2sd;
1173
1174 for (i = 0; i < 2; i++) {
1175 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1176 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1177 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1178 }
1179
1180 break;
1181
1182 case cvttpd2pi:
1183 case cvttpd2dq:
1184 dummy.op = cvttsd2si;
1185
1186 for (i = 0; i < 2; i++) {
1187 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1188 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1189 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1190 }
1191
1192 break;
1193
1194 case cvtpd2pi:
1195 case cvtpd2dq:
1196 dummy.op = cvtsd2si;
1197
1198 for (i = 0; i < 2; i++) {
1199 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1200 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1201 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1202 }
1203
1204 break;
1205
1206 case cvtps2pd:
1207 dummy.op = cvtss2sd;
1208
1209 for (i = 0; i < 2; i++) {
1210 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1211 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1212 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1213 }
1214
1215 break;
1216
1217 case cvtpd2ps:
1218 dummy.op = cvtsd2ss;
1219
1220 for (i = 0; i < 2; i++) {
1221 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1222 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1223 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1224 }
1225
1226 default:
1227 break;
1228 }
1229 }
1230
1231 /*
1232 * Store the result value from *info in the destination of the scalar
1233 * SSE instruction specified by *inst. If no result is given but the
1234 * exception is underflow or overflow, supply the default trapped result.
1235 *
1236 * This routine does not work if the instruction specified by *inst
1237 * is not a scalar instruction.
1238 */
1239 void
1240 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1241 fex_info_t *info)
1242 {
1243 int i = 0;
1244 long long l = 0L;
1245 float f = 0.0, fscl;
1246 double d = 0.0L, dscl;
1247
1248 /*
1249 * for compares that write eflags, just set the flags
1250 * to indicate "unordered"
1251 */
1252 if (inst->op == ucomiss || inst->op == comiss || inst->op == ucomisd ||
1253 inst->op == comisd) {
1254 uap->uc_mcontext.gregs[REG_PS] |= 0x45;
1255 return;
1256 }
1257
1258 /*
1259 * if info doesn't specify a result value, try to generate
1260 * the default trapped result
1261 */
1262 if (info->res.type == fex_nodata) {
1263 /* set scale factors for exponent wrapping */
1264 switch (e) {
1265 case fex_overflow:
1266 fscl = 1.262177448e-29f; /* 2^-96 */
1267 dscl = 6.441148769597133308e-232; /* 2^-768 */
1268 break;
1269
1270 case fex_underflow:
1271 fscl = 7.922816251e+28f; /* 2^96 */
1272 dscl = 1.552518092300708935e+231; /* 2^768 */
1273 break;
1274
1275 default:
1276 (void) __fex_get_sse_op(uap, inst, info);
1277
1278 if (info->res.type == fex_nodata)
1279 return;
1280
1281 goto stuff;
1282 }
1283
1284 /* generate the wrapped result */
1285 if (inst->op == cvtsd2ss) {
1286 info->op1.type = fex_double;
1287 info->op1.val.d = inst->op2->d[0];
1288 info->op2.type = fex_nodata;
1289 info->res.type = fex_float;
1290 info->res.val.f = (float)(fscl * (fscl *
1291 info->op1.val.d));
1292 } else if ((int)inst->op & DOUBLE) {
1293 info->op1.type = fex_double;
1294 info->op1.val.d = inst->op1->d[0];
1295 info->op2.type = fex_double;
1296 info->op2.val.d = inst->op2->d[0];
1297 info->res.type = fex_double;
1298
1299 switch (inst->op) {
1300 case addsd:
1301 info->res.val.d = dscl * (dscl *
1302 info->op1.val.d + dscl * info->op2.val.d);
1303 break;
1304
1305 case subsd:
1306 info->res.val.d = dscl * (dscl *
1307 info->op1.val.d - dscl * info->op2.val.d);
1308 break;
1309
1310 case mulsd:
1311 info->res.val.d = (dscl * info->op1.val.d) *
1312 (dscl * info->op2.val.d);
1313 break;
1314
1315 case divsd:
1316 info->res.val.d = (dscl * info->op1.val.d) /
1317 (info->op2.val.d / dscl);
1318 break;
1319
1320 default:
1321 return;
1322 }
1323 } else {
1324 info->op1.type = fex_float;
1325 info->op1.val.f = inst->op1->f[0];
1326 info->op2.type = fex_float;
1327 info->op2.val.f = inst->op2->f[0];
1328 info->res.type = fex_float;
1329
1330 switch (inst->op) {
1331 case addss:
1332 info->res.val.f = fscl * (fscl *
1333 info->op1.val.f + fscl * info->op2.val.f);
1334 break;
1335
1336 case subss:
1337 info->res.val.f = fscl * (fscl *
1338 info->op1.val.f - fscl * info->op2.val.f);
1339 break;
1340
1341 case mulss:
1342 info->res.val.f = (fscl * info->op1.val.f) *
1343 (fscl * info->op2.val.f);
1344 break;
1345
1346 case divss:
1347 info->res.val.f = (fscl * info->op1.val.f) /
1348 (info->op2.val.f / fscl);
1349 break;
1350
1351 default:
1352 return;
1353 }
1354 }
1355 }
1356
1357 /* put the result in the destination */
1358 stuff:
1359 if (inst->op == cmpss || inst->op == cvttss2si || inst->op ==
1360 cvtss2si || inst->op == cvttsd2si || inst->op == cvtsd2si) {
1361 switch (info->res.type) {
1362 case fex_int:
1363 i = info->res.val.i;
1364 break;
1365
1366 case fex_llong:
1367 i = info->res.val.l;
1368 break;
1369
1370 case fex_float:
1371 i = info->res.val.f;
1372 break;
1373
1374 case fex_double:
1375 i = info->res.val.d;
1376 break;
1377
1378 case fex_ldouble:
1379 i = info->res.val.q;
1380 break;
1381
1382 default:
1383 break;
1384 }
1385
1386 inst->op1->i[0] = i;
1387 } else if (inst->op == cmpsd || inst->op == cvttss2siq || inst->op ==
1388 cvtss2siq || inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
1389 switch (info->res.type) {
1390 case fex_int:
1391 l = info->res.val.i;
1392 break;
1393
1394 case fex_llong:
1395 l = info->res.val.l;
1396 break;
1397
1398 case fex_float:
1399 l = info->res.val.f;
1400 break;
1401
1402 case fex_double:
1403 l = info->res.val.d;
1404 break;
1405
1406 case fex_ldouble:
1407 l = info->res.val.q;
1408 break;
1409
1410 default:
1411 break;
1412 }
1413
1414 inst->op1->l[0] = l;
1415 } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1416 inst->op == cvtss2sd) {
1417 switch (info->res.type) {
1418 case fex_int:
1419 d = info->res.val.i;
1420 break;
1421
1422 case fex_llong:
1423 d = info->res.val.l;
1424 break;
1425
1426 case fex_float:
1427 d = info->res.val.f;
1428 break;
1429
1430 case fex_double:
1431 d = info->res.val.d;
1432 break;
1433
1434 case fex_ldouble:
1435 d = info->res.val.q;
1436 break;
1437
1438 default:
1439 break;
1440 }
1441
1442 inst->op1->d[0] = d;
1443 } else {
1444 switch (info->res.type) {
1445 case fex_int:
1446 f = info->res.val.i;
1447 break;
1448
1449 case fex_llong:
1450 f = info->res.val.l;
1451 break;
1452
1453 case fex_float:
1454 f = info->res.val.f;
1455 break;
1456
1457 case fex_double:
1458 f = info->res.val.d;
1459 break;
1460
1461 case fex_ldouble:
1462 f = info->res.val.q;
1463 break;
1464
1465 default:
1466 break;
1467 }
1468
1469 inst->op1->f[0] = f;
1470 }
1471 }
1472
1473 /*
1474 * Store the results from a SIMD instruction. For each i, store
1475 * the result value from info[i] in the i-th part of the destination
1476 * of the SIMD SSE instruction specified by *inst. If no result
1477 * is given but the exception indicated by e[i] is underflow or
1478 * overflow, supply the default trapped result.
1479 *
1480 * This routine does not work if the instruction specified by *inst
1481 * is not a SIMD instruction.
1482 */
1483 void
1484 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1485 fex_info_t *info)
1486 {
1487 sseinst_t dummy;
1488 int i;
1489
1490 /* store each part */
1491 switch (inst->op) {
1492 case cmpps:
1493 dummy.op = cmpss;
1494 dummy.imm = inst->imm;
1495
1496 for (i = 0; i < 4; i++) {
1497 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1498 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1499 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1500 }
1501
1502 break;
1503
1504 case minps:
1505 dummy.op = minss;
1506
1507 for (i = 0; i < 4; i++) {
1508 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1509 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1510 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1511 }
1512
1513 break;
1514
1515 case maxps:
1516 dummy.op = maxss;
1517
1518 for (i = 0; i < 4; i++) {
1519 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1520 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1521 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1522 }
1523
1524 break;
1525
1526 case addps:
1527 dummy.op = addss;
1528
1529 for (i = 0; i < 4; i++) {
1530 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1531 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1532 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1533 }
1534
1535 break;
1536
1537 case subps:
1538 dummy.op = subss;
1539
1540 for (i = 0; i < 4; i++) {
1541 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1542 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1543 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1544 }
1545
1546 break;
1547
1548 case mulps:
1549 dummy.op = mulss;
1550
1551 for (i = 0; i < 4; i++) {
1552 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1553 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1554 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1555 }
1556
1557 break;
1558
1559 case divps:
1560 dummy.op = divss;
1561
1562 for (i = 0; i < 4; i++) {
1563 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1564 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1565 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1566 }
1567
1568 break;
1569
1570 case sqrtps:
1571 dummy.op = sqrtss;
1572
1573 for (i = 0; i < 4; i++) {
1574 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1575 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1576 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1577 }
1578
1579 break;
1580
1581 case cvtdq2ps:
1582 dummy.op = cvtsi2ss;
1583
1584 for (i = 0; i < 4; i++) {
1585 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1586 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1587 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1588 }
1589
1590 break;
1591
1592 case cvttps2dq:
1593 dummy.op = cvttss2si;
1594
1595 for (i = 0; i < 4; i++) {
1596 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1597 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1598 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1599 }
1600
1601 break;
1602
1603 case cvtps2dq:
1604 dummy.op = cvtss2si;
1605
1606 for (i = 0; i < 4; i++) {
1607 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1608 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1609 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1610 }
1611
1612 break;
1613
1614 case cvtpi2ps:
1615 dummy.op = cvtsi2ss;
1616
1617 for (i = 0; i < 2; i++) {
1618 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1619 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1620 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1621 }
1622
1623 break;
1624
1625 case cvttps2pi:
1626 dummy.op = cvttss2si;
1627
1628 for (i = 0; i < 2; i++) {
1629 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1630 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1631 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1632 }
1633
1634 break;
1635
1636 case cvtps2pi:
1637 dummy.op = cvtss2si;
1638
1639 for (i = 0; i < 2; i++) {
1640 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1641 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1642 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1643 }
1644
1645 break;
1646
1647 case cmppd:
1648 dummy.op = cmpsd;
1649 dummy.imm = inst->imm;
1650
1651 for (i = 0; i < 2; i++) {
1652 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1653 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1654 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1655 }
1656
1657 break;
1658
1659 case minpd:
1660 dummy.op = minsd;
1661
1662 for (i = 0; i < 2; i++) {
1663 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1664 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1665 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1666 }
1667
1668 break;
1669
1670 case maxpd:
1671 dummy.op = maxsd;
1672
1673 for (i = 0; i < 2; i++) {
1674 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1675 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1676 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1677 }
1678
1679 break;
1680
1681 case addpd:
1682 dummy.op = addsd;
1683
1684 for (i = 0; i < 2; i++) {
1685 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1686 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1687 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1688 }
1689
1690 break;
1691
1692 case subpd:
1693 dummy.op = subsd;
1694
1695 for (i = 0; i < 2; i++) {
1696 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1697 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1698 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1699 }
1700
1701 break;
1702
1703 case mulpd:
1704 dummy.op = mulsd;
1705
1706 for (i = 0; i < 2; i++) {
1707 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1708 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1709 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1710 }
1711
1712 break;
1713
1714 case divpd:
1715 dummy.op = divsd;
1716
1717 for (i = 0; i < 2; i++) {
1718 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1719 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1720 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1721 }
1722
1723 break;
1724
1725 case sqrtpd:
1726 dummy.op = sqrtsd;
1727
1728 for (i = 0; i < 2; i++) {
1729 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1730 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1731 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1732 }
1733
1734 break;
1735
1736 case cvtpi2pd:
1737 case cvtdq2pd:
1738 dummy.op = cvtsi2sd;
1739
1740 for (i = 0; i < 2; i++) {
1741 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1742 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1743 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1744 }
1745
1746 break;
1747
1748 case cvttpd2pi:
1749 case cvttpd2dq:
1750 dummy.op = cvttsd2si;
1751
1752 for (i = 0; i < 2; i++) {
1753 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1754 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1755 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1756 }
1757
1758 /* for cvttpd2dq, zero the high 64 bits of the destination */
1759 if (inst->op == cvttpd2dq)
1760 inst->op1->l[1] = 0ll;
1761
1762 break;
1763
1764 case cvtpd2pi:
1765 case cvtpd2dq:
1766 dummy.op = cvtsd2si;
1767
1768 for (i = 0; i < 2; i++) {
1769 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1770 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1771 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1772 }
1773
1774 /* for cvtpd2dq, zero the high 64 bits of the destination */
1775 if (inst->op == cvtpd2dq)
1776 inst->op1->l[1] = 0ll;
1777
1778 break;
1779
1780 case cvtps2pd:
1781 dummy.op = cvtss2sd;
1782
1783 for (i = 0; i < 2; i++) {
1784 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1785 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1786 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1787 }
1788
1789 break;
1790
1791 case cvtpd2ps:
1792 dummy.op = cvtsd2ss;
1793
1794 for (i = 0; i < 2; i++) {
1795 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1796 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1797 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1798 }
1799
1800 /* zero the high 64 bits of the destination */
1801 inst->op1->l[1] = 0ll;
1802
1803 default:
1804 break;
1805 }
1806 }