1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include "fenv_synonyms.h"
31 #include <ucontext.h>
32 #include <fenv.h>
33 #if defined(__SUNPRO_C)
34 #include <sunmath.h>
35 #else
36 #include <sys/ieeefp.h>
37 #endif
38 #include "fex_handler.h"
39 #include "fenv_inlines.h"
40
41 #if !defined(REG_PC)
42 #define REG_PC EIP
43 #endif
44
45 #if !defined(REG_PS)
46 #define REG_PS EFL
47 #endif
48
49 #ifdef __amd64
50 #define regno(X) ((X < 4)? REG_RAX - X : \
51 ((X > 4)? REG_RAX + 1 - X : REG_RSP))
52 #else
53 #define regno(X) (EAX - X)
54 #endif
55
56 /*
57 * Support for SSE instructions
58 */
59
60 /*
61 * Decode an SSE instruction. Fill in *inst and return the length of the
62 * instruction in bytes. Return 0 if the instruction is not recognized.
63 */
64 int
65 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst)
66 {
67 unsigned char *ip;
68 char *addr;
69 int i, dbl, simd, rex, modrm, sib, r;
70
71 i = 0;
72 ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC];
73
74 /* look for pseudo-prefixes */
75 dbl = 0;
76 simd = SIMD;
77 if (ip[i] == 0xF3) {
78 simd = 0;
79 i++;
80 } else if (ip[i] == 0x66) {
81 dbl = DOUBLE;
82 i++;
83 } else if (ip[i] == 0xF2) {
84 dbl = DOUBLE;
85 simd = 0;
86 i++;
87 }
88
89 /* look for AMD64 REX prefix */
90 rex = 0;
91 if (ip[i] >= 0x40 && ip[i] <= 0x4F) {
92 rex = ip[i];
93 i++;
94 }
95
96 /* parse opcode */
97 if (ip[i++] != 0x0F)
98 return 0;
99 switch (ip[i++]) {
100 case 0x2A:
101 inst->op = (int)cvtsi2ss + simd + dbl;
102 if (!simd)
103 inst->op = (int)inst->op + (rex & 8);
104 break;
105
106 case 0x2C:
107 inst->op = (int)cvttss2si + simd + dbl;
108 if (!simd)
109 inst->op = (int)inst->op + (rex & 8);
110 break;
111
112 case 0x2D:
113 inst->op = (int)cvtss2si + simd + dbl;
114 if (!simd)
115 inst->op = (int)inst->op + (rex & 8);
116 break;
117
118 case 0x2E:
119 /* oddball: scalar instruction in a SIMD opcode group */
120 if (!simd)
121 return 0;
122 inst->op = (int)ucomiss + dbl;
123 break;
124
125 case 0x2F:
126 /* oddball: scalar instruction in a SIMD opcode group */
127 if (!simd)
128 return 0;
129 inst->op = (int)comiss + dbl;
130 break;
131
132 case 0x51:
133 inst->op = (int)sqrtss + simd + dbl;
134 break;
135
136 case 0x58:
137 inst->op = (int)addss + simd + dbl;
138 break;
139
140 case 0x59:
141 inst->op = (int)mulss + simd + dbl;
142 break;
143
144 case 0x5A:
145 inst->op = (int)cvtss2sd + simd + dbl;
146 break;
147
148 case 0x5B:
149 if (dbl) {
150 if (simd)
151 inst->op = cvtps2dq;
152 else
153 return 0;
154 } else {
155 inst->op = (simd)? cvtdq2ps : cvttps2dq;
156 }
157 break;
158
159 case 0x5C:
160 inst->op = (int)subss + simd + dbl;
161 break;
162
163 case 0x5D:
164 inst->op = (int)minss + simd + dbl;
165 break;
166
167 case 0x5E:
168 inst->op = (int)divss + simd + dbl;
169 break;
170
171 case 0x5F:
172 inst->op = (int)maxss + simd + dbl;
173 break;
174
175 case 0xC2:
176 inst->op = (int)cmpss + simd + dbl;
177 break;
178
179 case 0xE6:
180 if (simd) {
181 if (dbl)
182 inst->op = cvttpd2dq;
183 else
184 return 0;
185 } else {
186 inst->op = (dbl)? cvtpd2dq : cvtdq2pd;
187 }
188 break;
189
190 default:
191 return 0;
192 }
193
194 /* locate operands */
195 modrm = ip[i++];
196
197 if (inst->op == cvtss2si || inst->op == cvttss2si ||
198 inst->op == cvtsd2si || inst->op == cvttsd2si ||
199 inst->op == cvtss2siq || inst->op == cvttss2siq ||
200 inst->op == cvtsd2siq || inst->op == cvttsd2siq) {
201 /* op1 is a gp register */
202 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
203 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
204 } else if (inst->op == cvtps2pi || inst->op == cvttps2pi ||
205 inst->op == cvtpd2pi || inst->op == cvttpd2pi) {
206 /* op1 is a mmx register */
207 #ifdef __amd64
208 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set.
209 fpchip_state.st[(modrm >> 3) & 7];
210 #else
211 inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) +
212 (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
213 fpchip_state.state[7]);
214 #endif
215 } else {
216 /* op1 is a xmm register */
217 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
218 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
219 fp_reg_set.fpchip_state.xmm[r];
220 }
221
222 if ((modrm >> 6) == 3) {
223 if (inst->op == cvtsi2ss || inst->op == cvtsi2sd ||
224 inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) {
225 /* op2 is a gp register */
226 r = ((rex & 1) << 3) | (modrm & 7);
227 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.
228 gregs[regno(r)];
229 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) {
230 /* op2 is a mmx register */
231 #ifdef __amd64
232 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
233 fp_reg_set.fpchip_state.st[modrm & 7];
234 #else
235 inst->op2 = (sseoperand_t *)(10 * (modrm & 7) +
236 (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
237 fpchip_state.state[7]);
238 #endif
239 } else {
240 /* op2 is a xmm register */
241 r = ((rex & 1) << 3) | (modrm & 7);
242 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
243 fp_reg_set.fpchip_state.xmm[r];
244 }
245 } else if ((modrm & 0xc7) == 0x05) {
246 #if defined(__amd64)
247 /* address of next instruction + offset */
248 r = i + 4;
249 if (inst->op == cmpss || inst->op == cmpps ||
250 inst->op == cmpsd || inst->op == cmppd)
251 r++;
252 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i));
253 #else
254 /* absolute address */
255 inst->op2 = (sseoperand_t *)(*(int *)(ip + i));
256 #endif
257 i += 4;
258 } else {
259 /* complex address */
260 if ((modrm & 7) == 4) {
261 /* parse sib byte */
262 sib = ip[i++];
263 if ((sib & 7) == 5 && (modrm >> 6) == 0) {
264 /* start with absolute address */
265 addr = (char *)(uintptr_t)(ip + i);
266 i += 4;
267 } else {
268 /* start with base */
269 r = ((rex & 1) << 3) | (sib & 7);
270 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
271 }
272 r = ((rex & 2) << 2) | ((sib >> 3) & 7);
273 if (r != 4) {
274 /* add scaled index */
275 addr += uap->uc_mcontext.gregs[regno(r)]
276 << (sib >> 6);
277 }
278 } else {
279 r = ((rex & 1) << 3) | (modrm & 7);
280 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
281 }
282
283 /* add displacement, if any */
284 if ((modrm >> 6) == 1) {
285 addr += (char)ip[i++];
286 } else if ((modrm >> 6) == 2) {
287 addr += *(int *)(ip + i);
288 i += 4;
289 }
290 inst->op2 = (sseoperand_t *)addr;
291 }
292
293 if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd ||
294 inst->op == cmppd) {
295 /* get the immediate operand */
296 inst->imm = ip[i++];
297 }
298
299 return i;
300 }
301
302 static enum fp_class_type
303 my_fp_classf(float *x)
304 {
305 int i = *(int *)x & ~0x80000000;
306
307 if (i < 0x7f800000) {
308 if (i < 0x00800000)
309 return ((i == 0)? fp_zero : fp_subnormal);
310 return fp_normal;
311 }
312 else if (i == 0x7f800000)
313 return fp_infinity;
314 else if (i & 0x400000)
315 return fp_quiet;
316 else
317 return fp_signaling;
318 }
319
320 static enum fp_class_type
321 my_fp_class(double *x)
322 {
323 int i = *(1+(int *)x) & ~0x80000000;
324
325 if (i < 0x7ff00000) {
326 if (i < 0x00100000)
327 return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal);
328 return fp_normal;
329 }
330 else if (i == 0x7ff00000 && *(int *)x == 0)
331 return fp_infinity;
332 else if (i & 0x80000)
333 return fp_quiet;
334 else
335 return fp_signaling;
336 }
337
338 /*
339 * Inspect a scalar SSE instruction that incurred an invalid operation
340 * exception to determine which type of exception it was.
341 */
342 static enum fex_exception
343 __fex_get_sse_invalid_type(sseinst_t *inst)
344 {
345 enum fp_class_type t1, t2;
346
347 /* check op2 for signaling nan */
348 t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) :
349 my_fp_classf(&inst->op2->f[0]);
350 if (t2 == fp_signaling)
351 return fex_inv_snan;
352
353 /* eliminate all single-operand instructions */
354 switch (inst->op) {
355 case cvtsd2ss:
356 case cvtss2sd:
357 /* hmm, this shouldn't have happened */
358 return (enum fex_exception) -1;
359
360 case sqrtss:
361 case sqrtsd:
362 return fex_inv_sqrt;
363
364 case cvtss2si:
365 case cvtsd2si:
366 case cvttss2si:
367 case cvttsd2si:
368 case cvtss2siq:
369 case cvtsd2siq:
370 case cvttss2siq:
371 case cvttsd2siq:
372 return fex_inv_int;
373 }
374
375 /* check op1 for signaling nan */
376 t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) :
377 my_fp_classf(&inst->op1->f[0]);
378 if (t1 == fp_signaling)
379 return fex_inv_snan;
380
381 /* check two-operand instructions for other cases */
382 switch (inst->op) {
383 case cmpss:
384 case cmpsd:
385 case minss:
386 case minsd:
387 case maxss:
388 case maxsd:
389 case comiss:
390 case comisd:
391 return fex_inv_cmp;
392
393 case addss:
394 case addsd:
395 case subss:
396 case subsd:
397 if (t1 == fp_infinity && t2 == fp_infinity)
398 return fex_inv_isi;
399 break;
400
401 case mulss:
402 case mulsd:
403 if ((t1 == fp_zero && t2 == fp_infinity) ||
404 (t2 == fp_zero && t1 == fp_infinity))
405 return fex_inv_zmi;
406 break;
407
408 case divss:
409 case divsd:
410 if (t1 == fp_zero && t2 == fp_zero)
411 return fex_inv_zdz;
412 if (t1 == fp_infinity && t2 == fp_infinity)
413 return fex_inv_idi;
414 }
415
416 return (enum fex_exception)-1;
417 }
418
419 /* inline templates */
420 extern void sse_cmpeqss(float *, float *, int *);
421 extern void sse_cmpltss(float *, float *, int *);
422 extern void sse_cmpless(float *, float *, int *);
423 extern void sse_cmpunordss(float *, float *, int *);
424 extern void sse_minss(float *, float *, float *);
425 extern void sse_maxss(float *, float *, float *);
426 extern void sse_addss(float *, float *, float *);
427 extern void sse_subss(float *, float *, float *);
428 extern void sse_mulss(float *, float *, float *);
429 extern void sse_divss(float *, float *, float *);
430 extern void sse_sqrtss(float *, float *);
431 extern void sse_ucomiss(float *, float *);
432 extern void sse_comiss(float *, float *);
433 extern void sse_cvtss2sd(float *, double *);
434 extern void sse_cvtsi2ss(int *, float *);
435 extern void sse_cvttss2si(float *, int *);
436 extern void sse_cvtss2si(float *, int *);
437 #ifdef __amd64
438 extern void sse_cvtsi2ssq(long long *, float *);
439 extern void sse_cvttss2siq(float *, long long *);
440 extern void sse_cvtss2siq(float *, long long *);
441 #endif
442 extern void sse_cmpeqsd(double *, double *, long long *);
443 extern void sse_cmpltsd(double *, double *, long long *);
444 extern void sse_cmplesd(double *, double *, long long *);
445 extern void sse_cmpunordsd(double *, double *, long long *);
446 extern void sse_minsd(double *, double *, double *);
447 extern void sse_maxsd(double *, double *, double *);
448 extern void sse_addsd(double *, double *, double *);
449 extern void sse_subsd(double *, double *, double *);
450 extern void sse_mulsd(double *, double *, double *);
451 extern void sse_divsd(double *, double *, double *);
452 extern void sse_sqrtsd(double *, double *);
453 extern void sse_ucomisd(double *, double *);
454 extern void sse_comisd(double *, double *);
455 extern void sse_cvtsd2ss(double *, float *);
456 extern void sse_cvtsi2sd(int *, double *);
457 extern void sse_cvttsd2si(double *, int *);
458 extern void sse_cvtsd2si(double *, int *);
459 #ifdef __amd64
460 extern void sse_cvtsi2sdq(long long *, double *);
461 extern void sse_cvttsd2siq(double *, long long *);
462 extern void sse_cvtsd2siq(double *, long long *);
463 #endif
464
465 /*
466 * Fill in *info with the operands, default untrapped result, and
467 * flags produced by a scalar SSE instruction, and return the type
468 * of trapped exception (if any). On entry, the mxcsr must have
469 * all exceptions masked and all flags clear. The same conditions
470 * will hold on exit.
471 *
472 * This routine does not work if the instruction specified by *inst
473 * is not a scalar instruction.
474 */
475 enum fex_exception
476 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info)
477 {
478 unsigned int e, te, mxcsr, oldmxcsr, subnorm;
479
480 /*
481 * Perform the operation with traps disabled and check the
482 * exception flags. If the underflow trap was enabled, also
483 * check for an exact subnormal result.
484 */
485 __fenv_getmxcsr(&oldmxcsr);
486 subnorm = 0;
487 if ((int)inst->op & DOUBLE) {
488 if (inst->op == cvtsi2sd) {
489 info->op1.type = fex_int;
490 info->op1.val.i = inst->op2->i[0];
491 info->op2.type = fex_nodata;
492 } else if (inst->op == cvtsi2sdq) {
493 info->op1.type = fex_llong;
494 info->op1.val.l = inst->op2->l[0];
495 info->op2.type = fex_nodata;
496 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss ||
497 inst->op == cvttsd2si || inst->op == cvtsd2si ||
498 inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
499 info->op1.type = fex_double;
500 info->op1.val.d = inst->op2->d[0];
501 info->op2.type = fex_nodata;
502 } else {
503 info->op1.type = fex_double;
504 info->op1.val.d = inst->op1->d[0];
505 info->op2.type = fex_double;
506 info->op2.val.d = inst->op2->d[0];
507 }
508 info->res.type = fex_double;
509 switch (inst->op) {
510 case cmpsd:
511 info->op = fex_cmp;
512 info->res.type = fex_llong;
513 switch (inst->imm & 3) {
514 case 0:
515 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d,
516 &info->res.val.l);
517 break;
518
519 case 1:
520 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d,
521 &info->res.val.l);
522 break;
523
524 case 2:
525 sse_cmplesd(&info->op1.val.d, &info->op2.val.d,
526 &info->res.val.l);
527 break;
528
529 case 3:
530 sse_cmpunordsd(&info->op1.val.d,
531 &info->op2.val.d, &info->res.val.l);
532 }
533 if (inst->imm & 4)
534 info->res.val.l ^= 0xffffffffffffffffull;
535 break;
536
537 case minsd:
538 info->op = fex_other;
539 sse_minsd(&info->op1.val.d, &info->op2.val.d,
540 &info->res.val.d);
541 break;
542
543 case maxsd:
544 info->op = fex_other;
545 sse_maxsd(&info->op1.val.d, &info->op2.val.d,
546 &info->res.val.d);
547 break;
548
549 case addsd:
550 info->op = fex_add;
551 sse_addsd(&info->op1.val.d, &info->op2.val.d,
552 &info->res.val.d);
553 if (my_fp_class(&info->res.val.d) == fp_subnormal)
554 subnorm = 1;
555 break;
556
557 case subsd:
558 info->op = fex_sub;
559 sse_subsd(&info->op1.val.d, &info->op2.val.d,
560 &info->res.val.d);
561 if (my_fp_class(&info->res.val.d) == fp_subnormal)
562 subnorm = 1;
563 break;
564
565 case mulsd:
566 info->op = fex_mul;
567 sse_mulsd(&info->op1.val.d, &info->op2.val.d,
568 &info->res.val.d);
569 if (my_fp_class(&info->res.val.d) == fp_subnormal)
570 subnorm = 1;
571 break;
572
573 case divsd:
574 info->op = fex_div;
575 sse_divsd(&info->op1.val.d, &info->op2.val.d,
576 &info->res.val.d);
577 if (my_fp_class(&info->res.val.d) == fp_subnormal)
578 subnorm = 1;
579 break;
580
581 case sqrtsd:
582 info->op = fex_sqrt;
583 sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
584 break;
585
586 case cvtsd2ss:
587 info->op = fex_cnvt;
588 info->res.type = fex_float;
589 sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f);
590 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
591 subnorm = 1;
592 break;
593
594 case cvtsi2sd:
595 info->op = fex_cnvt;
596 sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
597 break;
598
599 case cvttsd2si:
600 info->op = fex_cnvt;
601 info->res.type = fex_int;
602 sse_cvttsd2si(&info->op1.val.d, &info->res.val.i);
603 break;
604
605 case cvtsd2si:
606 info->op = fex_cnvt;
607 info->res.type = fex_int;
608 sse_cvtsd2si(&info->op1.val.d, &info->res.val.i);
609 break;
610
611 #ifdef __amd64
612 case cvtsi2sdq:
613 info->op = fex_cnvt;
614 sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d);
615 break;
616
617 case cvttsd2siq:
618 info->op = fex_cnvt;
619 info->res.type = fex_llong;
620 sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l);
621 break;
622
623 case cvtsd2siq:
624 info->op = fex_cnvt;
625 info->res.type = fex_llong;
626 sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l);
627 break;
628 #endif
629
630 case ucomisd:
631 info->op = fex_cmp;
632 info->res.type = fex_nodata;
633 sse_ucomisd(&info->op1.val.d, &info->op2.val.d);
634 break;
635
636 case comisd:
637 info->op = fex_cmp;
638 info->res.type = fex_nodata;
639 sse_comisd(&info->op1.val.d, &info->op2.val.d);
640 break;
641 }
642 } else {
643 if (inst->op == cvtsi2ss) {
644 info->op1.type = fex_int;
645 info->op1.val.i = inst->op2->i[0];
646 info->op2.type = fex_nodata;
647 } else if (inst->op == cvtsi2ssq) {
648 info->op1.type = fex_llong;
649 info->op1.val.l = inst->op2->l[0];
650 info->op2.type = fex_nodata;
651 } else if (inst->op == sqrtss || inst->op == cvtss2sd ||
652 inst->op == cvttss2si || inst->op == cvtss2si ||
653 inst->op == cvttss2siq || inst->op == cvtss2siq) {
654 info->op1.type = fex_float;
655 info->op1.val.f = inst->op2->f[0];
656 info->op2.type = fex_nodata;
657 } else {
658 info->op1.type = fex_float;
659 info->op1.val.f = inst->op1->f[0];
660 info->op2.type = fex_float;
661 info->op2.val.f = inst->op2->f[0];
662 }
663 info->res.type = fex_float;
664 switch (inst->op) {
665 case cmpss:
666 info->op = fex_cmp;
667 info->res.type = fex_int;
668 switch (inst->imm & 3) {
669 case 0:
670 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f,
671 &info->res.val.i);
672 break;
673
674 case 1:
675 sse_cmpltss(&info->op1.val.f, &info->op2.val.f,
676 &info->res.val.i);
677 break;
678
679 case 2:
680 sse_cmpless(&info->op1.val.f, &info->op2.val.f,
681 &info->res.val.i);
682 break;
683
684 case 3:
685 sse_cmpunordss(&info->op1.val.f,
686 &info->op2.val.f, &info->res.val.i);
687 }
688 if (inst->imm & 4)
689 info->res.val.i ^= 0xffffffffu;
690 break;
691
692 case minss:
693 info->op = fex_other;
694 sse_minss(&info->op1.val.f, &info->op2.val.f,
695 &info->res.val.f);
696 break;
697
698 case maxss:
699 info->op = fex_other;
700 sse_maxss(&info->op1.val.f, &info->op2.val.f,
701 &info->res.val.f);
702 break;
703
704 case addss:
705 info->op = fex_add;
706 sse_addss(&info->op1.val.f, &info->op2.val.f,
707 &info->res.val.f);
708 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
709 subnorm = 1;
710 break;
711
712 case subss:
713 info->op = fex_sub;
714 sse_subss(&info->op1.val.f, &info->op2.val.f,
715 &info->res.val.f);
716 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
717 subnorm = 1;
718 break;
719
720 case mulss:
721 info->op = fex_mul;
722 sse_mulss(&info->op1.val.f, &info->op2.val.f,
723 &info->res.val.f);
724 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
725 subnorm = 1;
726 break;
727
728 case divss:
729 info->op = fex_div;
730 sse_divss(&info->op1.val.f, &info->op2.val.f,
731 &info->res.val.f);
732 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
733 subnorm = 1;
734 break;
735
736 case sqrtss:
737 info->op = fex_sqrt;
738 sse_sqrtss(&info->op1.val.f, &info->res.val.f);
739 break;
740
741 case cvtss2sd:
742 info->op = fex_cnvt;
743 info->res.type = fex_double;
744 sse_cvtss2sd(&info->op1.val.f, &info->res.val.d);
745 break;
746
747 case cvtsi2ss:
748 info->op = fex_cnvt;
749 sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f);
750 break;
751
752 case cvttss2si:
753 info->op = fex_cnvt;
754 info->res.type = fex_int;
755 sse_cvttss2si(&info->op1.val.f, &info->res.val.i);
756 break;
757
758 case cvtss2si:
759 info->op = fex_cnvt;
760 info->res.type = fex_int;
761 sse_cvtss2si(&info->op1.val.f, &info->res.val.i);
762 break;
763
764 #ifdef __amd64
765 case cvtsi2ssq:
766 info->op = fex_cnvt;
767 sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f);
768 break;
769
770 case cvttss2siq:
771 info->op = fex_cnvt;
772 info->res.type = fex_llong;
773 sse_cvttss2siq(&info->op1.val.f, &info->res.val.l);
774 break;
775
776 case cvtss2siq:
777 info->op = fex_cnvt;
778 info->res.type = fex_llong;
779 sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
780 break;
781 #endif
782
783 case ucomiss:
784 info->op = fex_cmp;
785 info->res.type = fex_nodata;
786 sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
787 break;
788
789 case comiss:
790 info->op = fex_cmp;
791 info->res.type = fex_nodata;
792 sse_comiss(&info->op1.val.f, &info->op2.val.f);
793 break;
794 }
795 }
796 __fenv_getmxcsr(&mxcsr);
797 info->flags = mxcsr & 0x3d;
798 __fenv_setmxcsr(&oldmxcsr);
799
800 /* determine which exception would have been trapped */
801 te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr
802 >> 7) & 0x3d;
803 e = mxcsr & te;
804 if (e & FE_INVALID)
805 return __fex_get_sse_invalid_type(inst);
806 if (e & FE_DIVBYZERO)
807 return fex_division;
808 if (e & FE_OVERFLOW)
809 return fex_overflow;
810 if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
811 return fex_underflow;
812 if (e & FE_INEXACT)
813 return fex_inexact;
814 return (enum fex_exception)-1;
815 }
816
817 /*
818 * Emulate a SIMD SSE instruction to determine which exceptions occur
819 * in each part. For i = 0, 1, 2, and 3, set e[i] to indicate the
820 * trapped exception that would occur if the i-th part of the SIMD
821 * instruction were executed in isolation; set e[i] to -1 if no
822 * trapped exception would occur in this part. Also fill in info[i]
823 * with the corresponding operands, default untrapped result, and
824 * flags.
825 *
826 * This routine does not work if the instruction specified by *inst
827 * is not a SIMD instruction.
828 */
829 void
830 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
831 fex_info_t *info)
832 {
833 sseinst_t dummy;
834 int i;
835
836 e[0] = e[1] = e[2] = e[3] = -1;
837
838 /* perform each part of the SIMD operation */
839 switch (inst->op) {
840 case cmpps:
841 dummy.op = cmpss;
842 dummy.imm = inst->imm;
843 for (i = 0; i < 4; i++) {
844 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
845 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
846 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
847 }
848 break;
849
850 case minps:
851 dummy.op = minss;
852 for (i = 0; i < 4; i++) {
853 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
854 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
855 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
856 }
857 break;
858
859 case maxps:
860 dummy.op = maxss;
861 for (i = 0; i < 4; i++) {
862 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
863 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
864 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
865 }
866 break;
867
868 case addps:
869 dummy.op = addss;
870 for (i = 0; i < 4; i++) {
871 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
872 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
873 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
874 }
875 break;
876
877 case subps:
878 dummy.op = subss;
879 for (i = 0; i < 4; i++) {
880 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
881 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
882 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
883 }
884 break;
885
886 case mulps:
887 dummy.op = mulss;
888 for (i = 0; i < 4; i++) {
889 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
890 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
891 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
892 }
893 break;
894
895 case divps:
896 dummy.op = divss;
897 for (i = 0; i < 4; i++) {
898 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
899 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
900 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
901 }
902 break;
903
904 case sqrtps:
905 dummy.op = sqrtss;
906 for (i = 0; i < 4; i++) {
907 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
908 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
909 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
910 }
911 break;
912
913 case cvtdq2ps:
914 dummy.op = cvtsi2ss;
915 for (i = 0; i < 4; i++) {
916 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
917 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
918 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
919 }
920 break;
921
922 case cvttps2dq:
923 dummy.op = cvttss2si;
924 for (i = 0; i < 4; i++) {
925 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
926 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
927 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
928 }
929 break;
930
931 case cvtps2dq:
932 dummy.op = cvtss2si;
933 for (i = 0; i < 4; i++) {
934 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
935 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
936 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
937 }
938 break;
939
940 case cvtpi2ps:
941 dummy.op = cvtsi2ss;
942 for (i = 0; i < 2; i++) {
943 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
944 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
945 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
946 }
947 break;
948
949 case cvttps2pi:
950 dummy.op = cvttss2si;
951 for (i = 0; i < 2; i++) {
952 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
953 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
954 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
955 }
956 break;
957
958 case cvtps2pi:
959 dummy.op = cvtss2si;
960 for (i = 0; i < 2; i++) {
961 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
962 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
963 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
964 }
965 break;
966
967 case cmppd:
968 dummy.op = cmpsd;
969 dummy.imm = inst->imm;
970 for (i = 0; i < 2; i++) {
971 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
972 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
973 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
974 }
975 break;
976
977 case minpd:
978 dummy.op = minsd;
979 for (i = 0; i < 2; i++) {
980 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
981 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
982 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
983 }
984 break;
985
986 case maxpd:
987 dummy.op = maxsd;
988 for (i = 0; i < 2; i++) {
989 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
990 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
991 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
992 }
993 break;
994
995 case addpd:
996 dummy.op = addsd;
997 for (i = 0; i < 2; i++) {
998 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
999 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1000 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1001 }
1002 break;
1003
1004 case subpd:
1005 dummy.op = subsd;
1006 for (i = 0; i < 2; i++) {
1007 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1008 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1009 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1010 }
1011 break;
1012
1013 case mulpd:
1014 dummy.op = mulsd;
1015 for (i = 0; i < 2; i++) {
1016 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1017 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1018 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1019 }
1020 break;
1021
1022 case divpd:
1023 dummy.op = divsd;
1024 for (i = 0; i < 2; i++) {
1025 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1026 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1027 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1028 }
1029 break;
1030
1031 case sqrtpd:
1032 dummy.op = sqrtsd;
1033 for (i = 0; i < 2; i++) {
1034 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1035 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1036 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1037 }
1038 break;
1039
1040 case cvtpi2pd:
1041 case cvtdq2pd:
1042 dummy.op = cvtsi2sd;
1043 for (i = 0; i < 2; i++) {
1044 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1045 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1046 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1047 }
1048 break;
1049
1050 case cvttpd2pi:
1051 case cvttpd2dq:
1052 dummy.op = cvttsd2si;
1053 for (i = 0; i < 2; i++) {
1054 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1055 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1056 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1057 }
1058 break;
1059
1060 case cvtpd2pi:
1061 case cvtpd2dq:
1062 dummy.op = cvtsd2si;
1063 for (i = 0; i < 2; i++) {
1064 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1065 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1066 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1067 }
1068 break;
1069
1070 case cvtps2pd:
1071 dummy.op = cvtss2sd;
1072 for (i = 0; i < 2; i++) {
1073 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1074 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1075 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1076 }
1077 break;
1078
1079 case cvtpd2ps:
1080 dummy.op = cvtsd2ss;
1081 for (i = 0; i < 2; i++) {
1082 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1083 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1084 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1085 }
1086 }
1087 }
1088
1089 /*
1090 * Store the result value from *info in the destination of the scalar
1091 * SSE instruction specified by *inst. If no result is given but the
1092 * exception is underflow or overflow, supply the default trapped result.
1093 *
1094 * This routine does not work if the instruction specified by *inst
1095 * is not a scalar instruction.
1096 */
1097 void
1098 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1099 fex_info_t *info)
1100 {
1101 int i;
1102 long long l;
1103 float f, fscl;
1104 double d, dscl;
1105
1106 /* for compares that write eflags, just set the flags
1107 to indicate "unordered" */
1108 if (inst->op == ucomiss || inst->op == comiss ||
1109 inst->op == ucomisd || inst->op == comisd) {
1110 uap->uc_mcontext.gregs[REG_PS] |= 0x45;
1111 return;
1112 }
1113
1114 /* if info doesn't specify a result value, try to generate
1115 the default trapped result */
1116 if (info->res.type == fex_nodata) {
1117 /* set scale factors for exponent wrapping */
1118 switch (e) {
1119 case fex_overflow:
1120 fscl = 1.262177448e-29f; /* 2^-96 */
1121 dscl = 6.441148769597133308e-232; /* 2^-768 */
1122 break;
1123
1124 case fex_underflow:
1125 fscl = 7.922816251e+28f; /* 2^96 */
1126 dscl = 1.552518092300708935e+231; /* 2^768 */
1127 break;
1128
1129 default:
1130 (void) __fex_get_sse_op(uap, inst, info);
1131 if (info->res.type == fex_nodata)
1132 return;
1133 goto stuff;
1134 }
1135
1136 /* generate the wrapped result */
1137 if (inst->op == cvtsd2ss) {
1138 info->op1.type = fex_double;
1139 info->op1.val.d = inst->op2->d[0];
1140 info->op2.type = fex_nodata;
1141 info->res.type = fex_float;
1142 info->res.val.f = (float)(fscl * (fscl *
1143 info->op1.val.d));
1144 } else if ((int)inst->op & DOUBLE) {
1145 info->op1.type = fex_double;
1146 info->op1.val.d = inst->op1->d[0];
1147 info->op2.type = fex_double;
1148 info->op2.val.d = inst->op2->d[0];
1149 info->res.type = fex_double;
1150 switch (inst->op) {
1151 case addsd:
1152 info->res.val.d = dscl * (dscl *
1153 info->op1.val.d + dscl * info->op2.val.d);
1154 break;
1155
1156 case subsd:
1157 info->res.val.d = dscl * (dscl *
1158 info->op1.val.d - dscl * info->op2.val.d);
1159 break;
1160
1161 case mulsd:
1162 info->res.val.d = (dscl * info->op1.val.d) *
1163 (dscl * info->op2.val.d);
1164 break;
1165
1166 case divsd:
1167 info->res.val.d = (dscl * info->op1.val.d) /
1168 (info->op2.val.d / dscl);
1169 break;
1170
1171 default:
1172 return;
1173 }
1174 } else {
1175 info->op1.type = fex_float;
1176 info->op1.val.f = inst->op1->f[0];
1177 info->op2.type = fex_float;
1178 info->op2.val.f = inst->op2->f[0];
1179 info->res.type = fex_float;
1180 switch (inst->op) {
1181 case addss:
1182 info->res.val.f = fscl * (fscl *
1183 info->op1.val.f + fscl * info->op2.val.f);
1184 break;
1185
1186 case subss:
1187 info->res.val.f = fscl * (fscl *
1188 info->op1.val.f - fscl * info->op2.val.f);
1189 break;
1190
1191 case mulss:
1192 info->res.val.f = (fscl * info->op1.val.f) *
1193 (fscl * info->op2.val.f);
1194 break;
1195
1196 case divss:
1197 info->res.val.f = (fscl * info->op1.val.f) /
1198 (info->op2.val.f / fscl);
1199 break;
1200
1201 default:
1202 return;
1203 }
1204 }
1205 }
1206
1207 /* put the result in the destination */
1208 stuff:
1209 if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si
1210 || inst->op == cvttsd2si || inst->op == cvtsd2si) {
1211 switch (info->res.type) {
1212 case fex_int:
1213 i = info->res.val.i;
1214 break;
1215
1216 case fex_llong:
1217 i = info->res.val.l;
1218 break;
1219
1220 case fex_float:
1221 i = info->res.val.f;
1222 break;
1223
1224 case fex_double:
1225 i = info->res.val.d;
1226 break;
1227
1228 case fex_ldouble:
1229 i = info->res.val.q;
1230 break;
1231 }
1232 inst->op1->i[0] = i;
1233 } else if (inst->op == cmpsd || inst->op == cvttss2siq ||
1234 inst->op == cvtss2siq || inst->op == cvttsd2siq ||
1235 inst->op == cvtsd2siq) {
1236 switch (info->res.type) {
1237 case fex_int:
1238 l = info->res.val.i;
1239 break;
1240
1241 case fex_llong:
1242 l = info->res.val.l;
1243 break;
1244
1245 case fex_float:
1246 l = info->res.val.f;
1247 break;
1248
1249 case fex_double:
1250 l = info->res.val.d;
1251 break;
1252
1253 case fex_ldouble:
1254 l = info->res.val.q;
1255 break;
1256 }
1257 inst->op1->l[0] = l;
1258 } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1259 inst->op == cvtss2sd) {
1260 switch (info->res.type) {
1261 case fex_int:
1262 d = info->res.val.i;
1263 break;
1264
1265 case fex_llong:
1266 d = info->res.val.l;
1267 break;
1268
1269 case fex_float:
1270 d = info->res.val.f;
1271 break;
1272
1273 case fex_double:
1274 d = info->res.val.d;
1275 break;
1276
1277 case fex_ldouble:
1278 d = info->res.val.q;
1279 break;
1280 }
1281 inst->op1->d[0] = d;
1282 } else {
1283 switch (info->res.type) {
1284 case fex_int:
1285 f = info->res.val.i;
1286 break;
1287
1288 case fex_llong:
1289 f = info->res.val.l;
1290 break;
1291
1292 case fex_float:
1293 f = info->res.val.f;
1294 break;
1295
1296 case fex_double:
1297 f = info->res.val.d;
1298 break;
1299
1300 case fex_ldouble:
1301 f = info->res.val.q;
1302 break;
1303 }
1304 inst->op1->f[0] = f;
1305 }
1306 }
1307
1308 /*
1309 * Store the results from a SIMD instruction. For each i, store
1310 * the result value from info[i] in the i-th part of the destination
1311 * of the SIMD SSE instruction specified by *inst. If no result
1312 * is given but the exception indicated by e[i] is underflow or
1313 * overflow, supply the default trapped result.
1314 *
1315 * This routine does not work if the instruction specified by *inst
1316 * is not a SIMD instruction.
1317 */
1318 void
1319 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1320 fex_info_t *info)
1321 {
1322 sseinst_t dummy;
1323 int i;
1324
1325 /* store each part */
1326 switch (inst->op) {
1327 case cmpps:
1328 dummy.op = cmpss;
1329 dummy.imm = inst->imm;
1330 for (i = 0; i < 4; i++) {
1331 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1332 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1333 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1334 }
1335 break;
1336
1337 case minps:
1338 dummy.op = minss;
1339 for (i = 0; i < 4; i++) {
1340 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1341 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1342 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1343 }
1344 break;
1345
1346 case maxps:
1347 dummy.op = maxss;
1348 for (i = 0; i < 4; i++) {
1349 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1350 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1351 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1352 }
1353 break;
1354
1355 case addps:
1356 dummy.op = addss;
1357 for (i = 0; i < 4; i++) {
1358 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1359 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1360 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1361 }
1362 break;
1363
1364 case subps:
1365 dummy.op = subss;
1366 for (i = 0; i < 4; i++) {
1367 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1368 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1369 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1370 }
1371 break;
1372
1373 case mulps:
1374 dummy.op = mulss;
1375 for (i = 0; i < 4; i++) {
1376 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1377 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1378 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1379 }
1380 break;
1381
1382 case divps:
1383 dummy.op = divss;
1384 for (i = 0; i < 4; i++) {
1385 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1386 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1387 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1388 }
1389 break;
1390
1391 case sqrtps:
1392 dummy.op = sqrtss;
1393 for (i = 0; i < 4; i++) {
1394 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1395 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1396 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1397 }
1398 break;
1399
1400 case cvtdq2ps:
1401 dummy.op = cvtsi2ss;
1402 for (i = 0; i < 4; i++) {
1403 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1404 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1405 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1406 }
1407 break;
1408
1409 case cvttps2dq:
1410 dummy.op = cvttss2si;
1411 for (i = 0; i < 4; i++) {
1412 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1413 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1414 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1415 }
1416 break;
1417
1418 case cvtps2dq:
1419 dummy.op = cvtss2si;
1420 for (i = 0; i < 4; i++) {
1421 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1422 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1423 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1424 }
1425 break;
1426
1427 case cvtpi2ps:
1428 dummy.op = cvtsi2ss;
1429 for (i = 0; i < 2; i++) {
1430 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1431 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1432 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1433 }
1434 break;
1435
1436 case cvttps2pi:
1437 dummy.op = cvttss2si;
1438 for (i = 0; i < 2; i++) {
1439 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1440 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1441 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1442 }
1443 break;
1444
1445 case cvtps2pi:
1446 dummy.op = cvtss2si;
1447 for (i = 0; i < 2; i++) {
1448 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1449 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1450 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1451 }
1452 break;
1453
1454 case cmppd:
1455 dummy.op = cmpsd;
1456 dummy.imm = inst->imm;
1457 for (i = 0; i < 2; i++) {
1458 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1459 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1460 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1461 }
1462 break;
1463
1464 case minpd:
1465 dummy.op = minsd;
1466 for (i = 0; i < 2; i++) {
1467 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1468 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1469 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1470 }
1471 break;
1472
1473 case maxpd:
1474 dummy.op = maxsd;
1475 for (i = 0; i < 2; i++) {
1476 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1477 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1478 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1479 }
1480 break;
1481
1482 case addpd:
1483 dummy.op = addsd;
1484 for (i = 0; i < 2; i++) {
1485 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1486 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1487 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1488 }
1489 break;
1490
1491 case subpd:
1492 dummy.op = subsd;
1493 for (i = 0; i < 2; i++) {
1494 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1495 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1496 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1497 }
1498 break;
1499
1500 case mulpd:
1501 dummy.op = mulsd;
1502 for (i = 0; i < 2; i++) {
1503 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1504 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1505 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1506 }
1507 break;
1508
1509 case divpd:
1510 dummy.op = divsd;
1511 for (i = 0; i < 2; i++) {
1512 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1513 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1514 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1515 }
1516 break;
1517
1518 case sqrtpd:
1519 dummy.op = sqrtsd;
1520 for (i = 0; i < 2; i++) {
1521 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1522 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1523 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1524 }
1525 break;
1526
1527 case cvtpi2pd:
1528 case cvtdq2pd:
1529 dummy.op = cvtsi2sd;
1530 for (i = 0; i < 2; i++) {
1531 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1532 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1533 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1534 }
1535 break;
1536
1537 case cvttpd2pi:
1538 case cvttpd2dq:
1539 dummy.op = cvttsd2si;
1540 for (i = 0; i < 2; i++) {
1541 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1542 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1543 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1544 }
1545 /* for cvttpd2dq, zero the high 64 bits of the destination */
1546 if (inst->op == cvttpd2dq)
1547 inst->op1->l[1] = 0ll;
1548 break;
1549
1550 case cvtpd2pi:
1551 case cvtpd2dq:
1552 dummy.op = cvtsd2si;
1553 for (i = 0; i < 2; i++) {
1554 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1555 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1556 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1557 }
1558 /* for cvtpd2dq, zero the high 64 bits of the destination */
1559 if (inst->op == cvtpd2dq)
1560 inst->op1->l[1] = 0ll;
1561 break;
1562
1563 case cvtps2pd:
1564 dummy.op = cvtss2sd;
1565 for (i = 0; i < 2; i++) {
1566 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1567 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1568 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1569 }
1570 break;
1571
1572 case cvtpd2ps:
1573 dummy.op = cvtsd2ss;
1574 for (i = 0; i < 2; i++) {
1575 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1576 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1577 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1578 }
1579 /* zero the high 64 bits of the destination */
1580 inst->op1->l[1] = 0ll;
1581 }
1582 }