5261 libm should stop using synonyms.h
5298 fabs is 0-sized, confuses dis(1) and others
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Gordon Ross <gwr@nexenta.com>
1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 */
25 /*
26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 * Use is subject to license terms.
28 */
29
30 #include "fenv_synonyms.h"
31 #include <ucontext.h>
32 #include <fenv.h>
33 #if defined(__SUNPRO_C)
34 #include <sunmath.h>
35 #else
36 #include <sys/ieeefp.h>
37 #endif
38 #include "fex_handler.h"
39 #include "fenv_inlines.h"
40
41 #if !defined(REG_PC)
42 #define REG_PC EIP
43 #endif
44
45 #if !defined(REG_PS)
46 #define REG_PS EFL
47 #endif
48
49 #ifdef __amd64
50 #define regno(X) ((X < 4)? REG_RAX - X : \
51 ((X > 4)? REG_RAX + 1 - X : REG_RSP))
52 #else
53 #define regno(X) (EAX - X)
54 #endif
55
56 /*
57 * Support for SSE instructions
58 */
59
60 /*
61 * Decode an SSE instruction. Fill in *inst and return the length of the
62 * instruction in bytes. Return 0 if the instruction is not recognized.
63 */
64 int
65 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst)
66 {
67 unsigned char *ip;
68 char *addr;
69 int i, dbl, simd, rex, modrm, sib, r;
70
71 i = 0;
72 ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC];
73
74 /* look for pseudo-prefixes */
75 dbl = 0;
76 simd = SIMD;
77 if (ip[i] == 0xF3) {
78 simd = 0;
79 i++;
80 } else if (ip[i] == 0x66) {
81 dbl = DOUBLE;
82 i++;
83 } else if (ip[i] == 0xF2) {
84 dbl = DOUBLE;
85 simd = 0;
86 i++;
87 }
88
89 /* look for AMD64 REX prefix */
90 rex = 0;
91 if (ip[i] >= 0x40 && ip[i] <= 0x4F) {
92 rex = ip[i];
93 i++;
94 }
95
96 /* parse opcode */
97 if (ip[i++] != 0x0F)
98 return 0;
99 switch (ip[i++]) {
100 case 0x2A:
101 inst->op = (int)cvtsi2ss + simd + dbl;
102 if (!simd)
103 inst->op = (int)inst->op + (rex & 8);
104 break;
105
106 case 0x2C:
107 inst->op = (int)cvttss2si + simd + dbl;
108 if (!simd)
109 inst->op = (int)inst->op + (rex & 8);
110 break;
111
112 case 0x2D:
113 inst->op = (int)cvtss2si + simd + dbl;
114 if (!simd)
115 inst->op = (int)inst->op + (rex & 8);
116 break;
117
118 case 0x2E:
119 /* oddball: scalar instruction in a SIMD opcode group */
120 if (!simd)
121 return 0;
122 inst->op = (int)ucomiss + dbl;
123 break;
124
125 case 0x2F:
126 /* oddball: scalar instruction in a SIMD opcode group */
127 if (!simd)
128 return 0;
129 inst->op = (int)comiss + dbl;
130 break;
131
132 case 0x51:
133 inst->op = (int)sqrtss + simd + dbl;
134 break;
135
136 case 0x58:
137 inst->op = (int)addss + simd + dbl;
138 break;
139
140 case 0x59:
141 inst->op = (int)mulss + simd + dbl;
142 break;
143
144 case 0x5A:
145 inst->op = (int)cvtss2sd + simd + dbl;
146 break;
147
148 case 0x5B:
149 if (dbl) {
150 if (simd)
151 inst->op = cvtps2dq;
152 else
153 return 0;
154 } else {
155 inst->op = (simd)? cvtdq2ps : cvttps2dq;
156 }
157 break;
158
159 case 0x5C:
160 inst->op = (int)subss + simd + dbl;
161 break;
162
163 case 0x5D:
164 inst->op = (int)minss + simd + dbl;
165 break;
166
167 case 0x5E:
168 inst->op = (int)divss + simd + dbl;
169 break;
170
171 case 0x5F:
172 inst->op = (int)maxss + simd + dbl;
173 break;
174
175 case 0xC2:
176 inst->op = (int)cmpss + simd + dbl;
177 break;
178
179 case 0xE6:
180 if (simd) {
181 if (dbl)
182 inst->op = cvttpd2dq;
183 else
184 return 0;
185 } else {
186 inst->op = (dbl)? cvtpd2dq : cvtdq2pd;
187 }
188 break;
189
190 default:
191 return 0;
192 }
193
194 /* locate operands */
195 modrm = ip[i++];
196
197 if (inst->op == cvtss2si || inst->op == cvttss2si ||
198 inst->op == cvtsd2si || inst->op == cvttsd2si ||
199 inst->op == cvtss2siq || inst->op == cvttss2siq ||
200 inst->op == cvtsd2siq || inst->op == cvttsd2siq) {
201 /* op1 is a gp register */
202 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
203 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
204 } else if (inst->op == cvtps2pi || inst->op == cvttps2pi ||
205 inst->op == cvtpd2pi || inst->op == cvttpd2pi) {
206 /* op1 is a mmx register */
207 #ifdef __amd64
208 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set.
209 fpchip_state.st[(modrm >> 3) & 7];
210 #else
211 inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) +
212 (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
213 fpchip_state.state[7]);
214 #endif
215 } else {
216 /* op1 is a xmm register */
217 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
218 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
219 fp_reg_set.fpchip_state.xmm[r];
220 }
221
222 if ((modrm >> 6) == 3) {
223 if (inst->op == cvtsi2ss || inst->op == cvtsi2sd ||
224 inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) {
225 /* op2 is a gp register */
226 r = ((rex & 1) << 3) | (modrm & 7);
227 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.
228 gregs[regno(r)];
229 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) {
230 /* op2 is a mmx register */
231 #ifdef __amd64
232 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
233 fp_reg_set.fpchip_state.st[modrm & 7];
234 #else
235 inst->op2 = (sseoperand_t *)(10 * (modrm & 7) +
236 (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
237 fpchip_state.state[7]);
238 #endif
239 } else {
240 /* op2 is a xmm register */
241 r = ((rex & 1) << 3) | (modrm & 7);
242 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
243 fp_reg_set.fpchip_state.xmm[r];
244 }
245 } else if ((modrm & 0xc7) == 0x05) {
246 #ifdef __amd64
247 /* address of next instruction + offset */
248 r = i + 4;
249 if (inst->op == cmpss || inst->op == cmpps ||
250 inst->op == cmpsd || inst->op == cmppd)
251 r++;
252 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i));
253 #else
254 /* absolute address */
255 inst->op2 = (sseoperand_t *)(*(int *)(ip + i));
256 #endif
257 i += 4;
258 } else {
259 /* complex address */
260 if ((modrm & 7) == 4) {
261 /* parse sib byte */
262 sib = ip[i++];
263 if ((sib & 7) == 5 && (modrm >> 6) == 0) {
264 /* start with absolute address */
265 addr = (char *)(uintptr_t)(*(int *)(ip + i));
266 i += 4;
267 } else {
268 /* start with base */
269 r = ((rex & 1) << 3) | (sib & 7);
270 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
271 }
272 r = ((rex & 2) << 2) | ((sib >> 3) & 7);
273 if (r != 4) {
274 /* add scaled index */
275 addr += uap->uc_mcontext.gregs[regno(r)]
276 << (sib >> 6);
277 }
278 } else {
279 r = ((rex & 1) << 3) | (modrm & 7);
280 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
281 }
282
283 /* add displacement, if any */
284 if ((modrm >> 6) == 1) {
285 addr += (char)ip[i++];
286 } else if ((modrm >> 6) == 2) {
287 addr += *(int *)(ip + i);
288 i += 4;
289 }
290 inst->op2 = (sseoperand_t *)addr;
291 }
292
293 if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd ||
294 inst->op == cmppd) {
295 /* get the immediate operand */
296 inst->imm = ip[i++];
297 }
298
299 return i;
300 }
301
302 static enum fp_class_type
303 my_fp_classf(float *x)
304 {
305 int i = *(int *)x & ~0x80000000;
306
307 if (i < 0x7f800000) {
308 if (i < 0x00800000)
309 return ((i == 0)? fp_zero : fp_subnormal);
310 return fp_normal;
311 }
312 else if (i == 0x7f800000)
313 return fp_infinity;
314 else if (i & 0x400000)
315 return fp_quiet;
316 else
317 return fp_signaling;
318 }
319
320 static enum fp_class_type
321 my_fp_class(double *x)
322 {
323 int i = *(1+(int *)x) & ~0x80000000;
324
325 if (i < 0x7ff00000) {
326 if (i < 0x00100000)
327 return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal);
328 return fp_normal;
329 }
330 else if (i == 0x7ff00000 && *(int *)x == 0)
331 return fp_infinity;
332 else if (i & 0x80000)
333 return fp_quiet;
334 else
335 return fp_signaling;
336 }
337
338 /*
339 * Inspect a scalar SSE instruction that incurred an invalid operation
340 * exception to determine which type of exception it was.
341 */
342 static enum fex_exception
343 __fex_get_sse_invalid_type(sseinst_t *inst)
344 {
345 enum fp_class_type t1, t2;
346
347 /* check op2 for signaling nan */
348 t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) :
349 my_fp_classf(&inst->op2->f[0]);
350 if (t2 == fp_signaling)
351 return fex_inv_snan;
352
353 /* eliminate all single-operand instructions */
354 switch (inst->op) {
355 case cvtsd2ss:
356 case cvtss2sd:
357 /* hmm, this shouldn't have happened */
358 return (enum fex_exception) -1;
359
360 case sqrtss:
361 case sqrtsd:
362 return fex_inv_sqrt;
363
364 case cvtss2si:
365 case cvtsd2si:
366 case cvttss2si:
367 case cvttsd2si:
368 case cvtss2siq:
369 case cvtsd2siq:
370 case cvttss2siq:
371 case cvttsd2siq:
372 return fex_inv_int;
373 default:
374 break;
375 }
376
377 /* check op1 for signaling nan */
378 t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) :
379 my_fp_classf(&inst->op1->f[0]);
380 if (t1 == fp_signaling)
381 return fex_inv_snan;
382
383 /* check two-operand instructions for other cases */
384 switch (inst->op) {
385 case cmpss:
386 case cmpsd:
387 case minss:
388 case minsd:
389 case maxss:
390 case maxsd:
391 case comiss:
392 case comisd:
393 return fex_inv_cmp;
394
395 case addss:
396 case addsd:
397 case subss:
398 case subsd:
399 if (t1 == fp_infinity && t2 == fp_infinity)
400 return fex_inv_isi;
401 break;
402
403 case mulss:
404 case mulsd:
405 if ((t1 == fp_zero && t2 == fp_infinity) ||
406 (t2 == fp_zero && t1 == fp_infinity))
407 return fex_inv_zmi;
408 break;
409
410 case divss:
411 case divsd:
412 if (t1 == fp_zero && t2 == fp_zero)
413 return fex_inv_zdz;
414 if (t1 == fp_infinity && t2 == fp_infinity)
415 return fex_inv_idi;
416 default:
417 break;
418 }
419
420 return (enum fex_exception)-1;
421 }
422
423 /* inline templates */
424 extern void sse_cmpeqss(float *, float *, int *);
425 extern void sse_cmpltss(float *, float *, int *);
426 extern void sse_cmpless(float *, float *, int *);
427 extern void sse_cmpunordss(float *, float *, int *);
428 extern void sse_minss(float *, float *, float *);
429 extern void sse_maxss(float *, float *, float *);
430 extern void sse_addss(float *, float *, float *);
431 extern void sse_subss(float *, float *, float *);
432 extern void sse_mulss(float *, float *, float *);
433 extern void sse_divss(float *, float *, float *);
434 extern void sse_sqrtss(float *, float *);
435 extern void sse_ucomiss(float *, float *);
436 extern void sse_comiss(float *, float *);
437 extern void sse_cvtss2sd(float *, double *);
438 extern void sse_cvtsi2ss(int *, float *);
439 extern void sse_cvttss2si(float *, int *);
440 extern void sse_cvtss2si(float *, int *);
441 #ifdef __amd64
442 extern void sse_cvtsi2ssq(long long *, float *);
443 extern void sse_cvttss2siq(float *, long long *);
444 extern void sse_cvtss2siq(float *, long long *);
445 #endif
446 extern void sse_cmpeqsd(double *, double *, long long *);
447 extern void sse_cmpltsd(double *, double *, long long *);
448 extern void sse_cmplesd(double *, double *, long long *);
449 extern void sse_cmpunordsd(double *, double *, long long *);
450 extern void sse_minsd(double *, double *, double *);
451 extern void sse_maxsd(double *, double *, double *);
452 extern void sse_addsd(double *, double *, double *);
453 extern void sse_subsd(double *, double *, double *);
454 extern void sse_mulsd(double *, double *, double *);
455 extern void sse_divsd(double *, double *, double *);
456 extern void sse_sqrtsd(double *, double *);
457 extern void sse_ucomisd(double *, double *);
458 extern void sse_comisd(double *, double *);
459 extern void sse_cvtsd2ss(double *, float *);
460 extern void sse_cvtsi2sd(int *, double *);
461 extern void sse_cvttsd2si(double *, int *);
462 extern void sse_cvtsd2si(double *, int *);
463 #ifdef __amd64
464 extern void sse_cvtsi2sdq(long long *, double *);
465 extern void sse_cvttsd2siq(double *, long long *);
466 extern void sse_cvtsd2siq(double *, long long *);
467 #endif
468
469 /*
470 * Fill in *info with the operands, default untrapped result, and
471 * flags produced by a scalar SSE instruction, and return the type
472 * of trapped exception (if any). On entry, the mxcsr must have
473 * all exceptions masked and all flags clear. The same conditions
474 * will hold on exit.
475 *
476 * This routine does not work if the instruction specified by *inst
477 * is not a scalar instruction.
478 */
479 enum fex_exception
480 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info)
481 {
482 unsigned int e, te, mxcsr, oldmxcsr, subnorm;
483
484 /*
485 * Perform the operation with traps disabled and check the
486 * exception flags. If the underflow trap was enabled, also
487 * check for an exact subnormal result.
488 */
489 __fenv_getmxcsr(&oldmxcsr);
490 subnorm = 0;
491 if ((int)inst->op & DOUBLE) {
492 if (inst->op == cvtsi2sd) {
493 info->op1.type = fex_int;
494 info->op1.val.i = inst->op2->i[0];
495 info->op2.type = fex_nodata;
496 } else if (inst->op == cvtsi2sdq) {
497 info->op1.type = fex_llong;
498 info->op1.val.l = inst->op2->l[0];
499 info->op2.type = fex_nodata;
500 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss ||
501 inst->op == cvttsd2si || inst->op == cvtsd2si ||
502 inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
503 info->op1.type = fex_double;
504 info->op1.val.d = inst->op2->d[0];
505 info->op2.type = fex_nodata;
506 } else {
507 info->op1.type = fex_double;
508 info->op1.val.d = inst->op1->d[0];
509 info->op2.type = fex_double;
510 info->op2.val.d = inst->op2->d[0];
511 }
512 info->res.type = fex_double;
513 switch (inst->op) {
514 case cmpsd:
515 info->op = fex_cmp;
516 info->res.type = fex_llong;
517 switch (inst->imm & 3) {
518 case 0:
519 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d,
520 &info->res.val.l);
521 break;
522
523 case 1:
524 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d,
525 &info->res.val.l);
526 break;
527
528 case 2:
529 sse_cmplesd(&info->op1.val.d, &info->op2.val.d,
530 &info->res.val.l);
531 break;
532
533 case 3:
534 sse_cmpunordsd(&info->op1.val.d,
535 &info->op2.val.d, &info->res.val.l);
536 }
537 if (inst->imm & 4)
538 info->res.val.l ^= 0xffffffffffffffffull;
539 break;
540
541 case minsd:
542 info->op = fex_other;
543 sse_minsd(&info->op1.val.d, &info->op2.val.d,
544 &info->res.val.d);
545 break;
546
547 case maxsd:
548 info->op = fex_other;
549 sse_maxsd(&info->op1.val.d, &info->op2.val.d,
550 &info->res.val.d);
551 break;
552
553 case addsd:
554 info->op = fex_add;
555 sse_addsd(&info->op1.val.d, &info->op2.val.d,
556 &info->res.val.d);
557 if (my_fp_class(&info->res.val.d) == fp_subnormal)
558 subnorm = 1;
559 break;
560
561 case subsd:
562 info->op = fex_sub;
563 sse_subsd(&info->op1.val.d, &info->op2.val.d,
564 &info->res.val.d);
565 if (my_fp_class(&info->res.val.d) == fp_subnormal)
566 subnorm = 1;
567 break;
568
569 case mulsd:
570 info->op = fex_mul;
571 sse_mulsd(&info->op1.val.d, &info->op2.val.d,
572 &info->res.val.d);
573 if (my_fp_class(&info->res.val.d) == fp_subnormal)
574 subnorm = 1;
575 break;
576
577 case divsd:
578 info->op = fex_div;
579 sse_divsd(&info->op1.val.d, &info->op2.val.d,
580 &info->res.val.d);
581 if (my_fp_class(&info->res.val.d) == fp_subnormal)
582 subnorm = 1;
583 break;
584
585 case sqrtsd:
586 info->op = fex_sqrt;
587 sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
588 break;
589
590 case cvtsd2ss:
591 info->op = fex_cnvt;
592 info->res.type = fex_float;
593 sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f);
594 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
595 subnorm = 1;
596 break;
597
598 case cvtsi2sd:
599 info->op = fex_cnvt;
600 sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
601 break;
602
603 case cvttsd2si:
604 info->op = fex_cnvt;
605 info->res.type = fex_int;
606 sse_cvttsd2si(&info->op1.val.d, &info->res.val.i);
607 break;
608
609 case cvtsd2si:
610 info->op = fex_cnvt;
611 info->res.type = fex_int;
612 sse_cvtsd2si(&info->op1.val.d, &info->res.val.i);
613 break;
614
615 #ifdef __amd64
616 case cvtsi2sdq:
617 info->op = fex_cnvt;
618 sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d);
619 break;
620
621 case cvttsd2siq:
622 info->op = fex_cnvt;
623 info->res.type = fex_llong;
624 sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l);
625 break;
626
627 case cvtsd2siq:
628 info->op = fex_cnvt;
629 info->res.type = fex_llong;
630 sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l);
631 break;
632 #endif
633
634 case ucomisd:
635 info->op = fex_cmp;
636 info->res.type = fex_nodata;
637 sse_ucomisd(&info->op1.val.d, &info->op2.val.d);
638 break;
639
640 case comisd:
641 info->op = fex_cmp;
642 info->res.type = fex_nodata;
643 sse_comisd(&info->op1.val.d, &info->op2.val.d);
644 break;
645 default:
646 break;
647 }
648 } else {
649 if (inst->op == cvtsi2ss) {
650 info->op1.type = fex_int;
651 info->op1.val.i = inst->op2->i[0];
652 info->op2.type = fex_nodata;
653 } else if (inst->op == cvtsi2ssq) {
654 info->op1.type = fex_llong;
655 info->op1.val.l = inst->op2->l[0];
656 info->op2.type = fex_nodata;
657 } else if (inst->op == sqrtss || inst->op == cvtss2sd ||
658 inst->op == cvttss2si || inst->op == cvtss2si ||
659 inst->op == cvttss2siq || inst->op == cvtss2siq) {
660 info->op1.type = fex_float;
661 info->op1.val.f = inst->op2->f[0];
662 info->op2.type = fex_nodata;
663 } else {
664 info->op1.type = fex_float;
665 info->op1.val.f = inst->op1->f[0];
666 info->op2.type = fex_float;
667 info->op2.val.f = inst->op2->f[0];
668 }
669 info->res.type = fex_float;
670 switch (inst->op) {
671 case cmpss:
672 info->op = fex_cmp;
673 info->res.type = fex_int;
674 switch (inst->imm & 3) {
675 case 0:
676 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f,
677 &info->res.val.i);
678 break;
679
680 case 1:
681 sse_cmpltss(&info->op1.val.f, &info->op2.val.f,
682 &info->res.val.i);
683 break;
684
685 case 2:
686 sse_cmpless(&info->op1.val.f, &info->op2.val.f,
687 &info->res.val.i);
688 break;
689
690 case 3:
691 sse_cmpunordss(&info->op1.val.f,
692 &info->op2.val.f, &info->res.val.i);
693 }
694 if (inst->imm & 4)
695 info->res.val.i ^= 0xffffffffu;
696 break;
697
698 case minss:
699 info->op = fex_other;
700 sse_minss(&info->op1.val.f, &info->op2.val.f,
701 &info->res.val.f);
702 break;
703
704 case maxss:
705 info->op = fex_other;
706 sse_maxss(&info->op1.val.f, &info->op2.val.f,
707 &info->res.val.f);
708 break;
709
710 case addss:
711 info->op = fex_add;
712 sse_addss(&info->op1.val.f, &info->op2.val.f,
713 &info->res.val.f);
714 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
715 subnorm = 1;
716 break;
717
718 case subss:
719 info->op = fex_sub;
720 sse_subss(&info->op1.val.f, &info->op2.val.f,
721 &info->res.val.f);
722 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
723 subnorm = 1;
724 break;
725
726 case mulss:
727 info->op = fex_mul;
728 sse_mulss(&info->op1.val.f, &info->op2.val.f,
729 &info->res.val.f);
730 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
731 subnorm = 1;
732 break;
733
734 case divss:
735 info->op = fex_div;
736 sse_divss(&info->op1.val.f, &info->op2.val.f,
737 &info->res.val.f);
738 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
739 subnorm = 1;
740 break;
741
742 case sqrtss:
743 info->op = fex_sqrt;
744 sse_sqrtss(&info->op1.val.f, &info->res.val.f);
745 break;
746
747 case cvtss2sd:
748 info->op = fex_cnvt;
749 info->res.type = fex_double;
750 sse_cvtss2sd(&info->op1.val.f, &info->res.val.d);
751 break;
752
753 case cvtsi2ss:
754 info->op = fex_cnvt;
755 sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f);
756 break;
757
758 case cvttss2si:
759 info->op = fex_cnvt;
760 info->res.type = fex_int;
761 sse_cvttss2si(&info->op1.val.f, &info->res.val.i);
762 break;
763
764 case cvtss2si:
765 info->op = fex_cnvt;
766 info->res.type = fex_int;
767 sse_cvtss2si(&info->op1.val.f, &info->res.val.i);
768 break;
769
770 #ifdef __amd64
771 case cvtsi2ssq:
772 info->op = fex_cnvt;
773 sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f);
774 break;
775
776 case cvttss2siq:
777 info->op = fex_cnvt;
778 info->res.type = fex_llong;
779 sse_cvttss2siq(&info->op1.val.f, &info->res.val.l);
780 break;
781
782 case cvtss2siq:
783 info->op = fex_cnvt;
784 info->res.type = fex_llong;
785 sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
786 break;
787 #endif
788
789 case ucomiss:
790 info->op = fex_cmp;
791 info->res.type = fex_nodata;
792 sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
793 break;
794
795 case comiss:
796 info->op = fex_cmp;
797 info->res.type = fex_nodata;
798 sse_comiss(&info->op1.val.f, &info->op2.val.f);
799 break;
800 default:
801 break;
802 }
803 }
804 __fenv_getmxcsr(&mxcsr);
805 info->flags = mxcsr & 0x3d;
806 __fenv_setmxcsr(&oldmxcsr);
807
808 /* determine which exception would have been trapped */
809 te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr
810 >> 7) & 0x3d;
811 e = mxcsr & te;
812 if (e & FE_INVALID)
813 return __fex_get_sse_invalid_type(inst);
814 if (e & FE_DIVBYZERO)
815 return fex_division;
816 if (e & FE_OVERFLOW)
817 return fex_overflow;
818 if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
819 return fex_underflow;
820 if (e & FE_INEXACT)
821 return fex_inexact;
822 return (enum fex_exception)-1;
823 }
824
825 /*
826 * Emulate a SIMD SSE instruction to determine which exceptions occur
827 * in each part. For i = 0, 1, 2, and 3, set e[i] to indicate the
828 * trapped exception that would occur if the i-th part of the SIMD
829 * instruction were executed in isolation; set e[i] to -1 if no
830 * trapped exception would occur in this part. Also fill in info[i]
831 * with the corresponding operands, default untrapped result, and
832 * flags.
833 *
834 * This routine does not work if the instruction specified by *inst
835 * is not a SIMD instruction.
836 */
837 void
838 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
839 fex_info_t *info)
840 {
841 sseinst_t dummy;
842 int i;
843
844 e[0] = e[1] = e[2] = e[3] = -1;
845
846 /* perform each part of the SIMD operation */
847 switch (inst->op) {
848 case cmpps:
849 dummy.op = cmpss;
850 dummy.imm = inst->imm;
851 for (i = 0; i < 4; i++) {
852 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
853 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
854 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
855 }
856 break;
857
858 case minps:
859 dummy.op = minss;
860 for (i = 0; i < 4; i++) {
861 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
862 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
863 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
864 }
865 break;
866
867 case maxps:
868 dummy.op = maxss;
869 for (i = 0; i < 4; i++) {
870 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
871 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
872 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
873 }
874 break;
875
876 case addps:
877 dummy.op = addss;
878 for (i = 0; i < 4; i++) {
879 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
880 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
881 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
882 }
883 break;
884
885 case subps:
886 dummy.op = subss;
887 for (i = 0; i < 4; i++) {
888 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
889 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
890 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
891 }
892 break;
893
894 case mulps:
895 dummy.op = mulss;
896 for (i = 0; i < 4; i++) {
897 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
898 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
899 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
900 }
901 break;
902
903 case divps:
904 dummy.op = divss;
905 for (i = 0; i < 4; i++) {
906 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
907 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
908 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
909 }
910 break;
911
912 case sqrtps:
913 dummy.op = sqrtss;
914 for (i = 0; i < 4; i++) {
915 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
916 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
917 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
918 }
919 break;
920
921 case cvtdq2ps:
922 dummy.op = cvtsi2ss;
923 for (i = 0; i < 4; i++) {
924 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
925 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
926 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
927 }
928 break;
929
930 case cvttps2dq:
931 dummy.op = cvttss2si;
932 for (i = 0; i < 4; i++) {
933 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
934 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
935 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
936 }
937 break;
938
939 case cvtps2dq:
940 dummy.op = cvtss2si;
941 for (i = 0; i < 4; i++) {
942 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
943 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
944 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
945 }
946 break;
947
948 case cvtpi2ps:
949 dummy.op = cvtsi2ss;
950 for (i = 0; i < 2; i++) {
951 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
952 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
953 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
954 }
955 break;
956
957 case cvttps2pi:
958 dummy.op = cvttss2si;
959 for (i = 0; i < 2; i++) {
960 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
961 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
962 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
963 }
964 break;
965
966 case cvtps2pi:
967 dummy.op = cvtss2si;
968 for (i = 0; i < 2; i++) {
969 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
970 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
971 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
972 }
973 break;
974
975 case cmppd:
976 dummy.op = cmpsd;
977 dummy.imm = inst->imm;
978 for (i = 0; i < 2; i++) {
979 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
980 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
981 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
982 }
983 break;
984
985 case minpd:
986 dummy.op = minsd;
987 for (i = 0; i < 2; i++) {
988 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
989 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
990 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
991 }
992 break;
993
994 case maxpd:
995 dummy.op = maxsd;
996 for (i = 0; i < 2; i++) {
997 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
998 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
999 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1000 }
1001 break;
1002
1003 case addpd:
1004 dummy.op = addsd;
1005 for (i = 0; i < 2; i++) {
1006 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1007 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1008 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1009 }
1010 break;
1011
1012 case subpd:
1013 dummy.op = subsd;
1014 for (i = 0; i < 2; i++) {
1015 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1016 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1017 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1018 }
1019 break;
1020
1021 case mulpd:
1022 dummy.op = mulsd;
1023 for (i = 0; i < 2; i++) {
1024 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1025 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1026 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1027 }
1028 break;
1029
1030 case divpd:
1031 dummy.op = divsd;
1032 for (i = 0; i < 2; i++) {
1033 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1034 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1035 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1036 }
1037 break;
1038
1039 case sqrtpd:
1040 dummy.op = sqrtsd;
1041 for (i = 0; i < 2; i++) {
1042 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1043 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1044 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1045 }
1046 break;
1047
1048 case cvtpi2pd:
1049 case cvtdq2pd:
1050 dummy.op = cvtsi2sd;
1051 for (i = 0; i < 2; i++) {
1052 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1053 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1054 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1055 }
1056 break;
1057
1058 case cvttpd2pi:
1059 case cvttpd2dq:
1060 dummy.op = cvttsd2si;
1061 for (i = 0; i < 2; i++) {
1062 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1063 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1064 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1065 }
1066 break;
1067
1068 case cvtpd2pi:
1069 case cvtpd2dq:
1070 dummy.op = cvtsd2si;
1071 for (i = 0; i < 2; i++) {
1072 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1073 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1074 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1075 }
1076 break;
1077
1078 case cvtps2pd:
1079 dummy.op = cvtss2sd;
1080 for (i = 0; i < 2; i++) {
1081 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1082 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1083 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1084 }
1085 break;
1086
1087 case cvtpd2ps:
1088 dummy.op = cvtsd2ss;
1089 for (i = 0; i < 2; i++) {
1090 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1091 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1092 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1093 }
1094 default:
1095 break;
1096 }
1097 }
1098
1099 /*
1100 * Store the result value from *info in the destination of the scalar
1101 * SSE instruction specified by *inst. If no result is given but the
1102 * exception is underflow or overflow, supply the default trapped result.
1103 *
1104 * This routine does not work if the instruction specified by *inst
1105 * is not a scalar instruction.
1106 */
1107 void
1108 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1109 fex_info_t *info)
1110 {
1111 int i = 0;
1112 long long l = 0L;;
1113 float f = 0.0, fscl;
1114 double d = 0.0L, dscl;
1115
1116 /* for compares that write eflags, just set the flags
1117 to indicate "unordered" */
1118 if (inst->op == ucomiss || inst->op == comiss ||
1119 inst->op == ucomisd || inst->op == comisd) {
1120 uap->uc_mcontext.gregs[REG_PS] |= 0x45;
1121 return;
1122 }
1123
1124 /* if info doesn't specify a result value, try to generate
1125 the default trapped result */
1126 if (info->res.type == fex_nodata) {
1127 /* set scale factors for exponent wrapping */
1128 switch (e) {
1129 case fex_overflow:
1130 fscl = 1.262177448e-29f; /* 2^-96 */
1131 dscl = 6.441148769597133308e-232; /* 2^-768 */
1132 break;
1133
1134 case fex_underflow:
1135 fscl = 7.922816251e+28f; /* 2^96 */
1136 dscl = 1.552518092300708935e+231; /* 2^768 */
1137 break;
1138
1139 default:
1140 (void) __fex_get_sse_op(uap, inst, info);
1141 if (info->res.type == fex_nodata)
1142 return;
1143 goto stuff;
1144 }
1145
1146 /* generate the wrapped result */
1147 if (inst->op == cvtsd2ss) {
1148 info->op1.type = fex_double;
1149 info->op1.val.d = inst->op2->d[0];
1150 info->op2.type = fex_nodata;
1151 info->res.type = fex_float;
1152 info->res.val.f = (float)(fscl * (fscl *
1153 info->op1.val.d));
1154 } else if ((int)inst->op & DOUBLE) {
1155 info->op1.type = fex_double;
1156 info->op1.val.d = inst->op1->d[0];
1157 info->op2.type = fex_double;
1158 info->op2.val.d = inst->op2->d[0];
1159 info->res.type = fex_double;
1160 switch (inst->op) {
1161 case addsd:
1162 info->res.val.d = dscl * (dscl *
1163 info->op1.val.d + dscl * info->op2.val.d);
1164 break;
1165
1166 case subsd:
1167 info->res.val.d = dscl * (dscl *
1168 info->op1.val.d - dscl * info->op2.val.d);
1169 break;
1170
1171 case mulsd:
1172 info->res.val.d = (dscl * info->op1.val.d) *
1173 (dscl * info->op2.val.d);
1174 break;
1175
1176 case divsd:
1177 info->res.val.d = (dscl * info->op1.val.d) /
1178 (info->op2.val.d / dscl);
1179 break;
1180
1181 default:
1182 return;
1183 }
1184 } else {
1185 info->op1.type = fex_float;
1186 info->op1.val.f = inst->op1->f[0];
1187 info->op2.type = fex_float;
1188 info->op2.val.f = inst->op2->f[0];
1189 info->res.type = fex_float;
1190 switch (inst->op) {
1191 case addss:
1192 info->res.val.f = fscl * (fscl *
1193 info->op1.val.f + fscl * info->op2.val.f);
1194 break;
1195
1196 case subss:
1197 info->res.val.f = fscl * (fscl *
1198 info->op1.val.f - fscl * info->op2.val.f);
1199 break;
1200
1201 case mulss:
1202 info->res.val.f = (fscl * info->op1.val.f) *
1203 (fscl * info->op2.val.f);
1204 break;
1205
1206 case divss:
1207 info->res.val.f = (fscl * info->op1.val.f) /
1208 (info->op2.val.f / fscl);
1209 break;
1210
1211 default:
1212 return;
1213 }
1214 }
1215 }
1216
1217 /* put the result in the destination */
1218 stuff:
1219 if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si
1220 || inst->op == cvttsd2si || inst->op == cvtsd2si) {
1221 switch (info->res.type) {
1222 case fex_int:
1223 i = info->res.val.i;
1224 break;
1225
1226 case fex_llong:
1227 i = info->res.val.l;
1228 break;
1229
1230 case fex_float:
1231 i = info->res.val.f;
1232 break;
1233
1234 case fex_double:
1235 i = info->res.val.d;
1236 break;
1237
1238 case fex_ldouble:
1239 i = info->res.val.q;
1240 break;
1241
1242 default:
1243 break;
1244 }
1245 inst->op1->i[0] = i;
1246 } else if (inst->op == cmpsd || inst->op == cvttss2siq ||
1247 inst->op == cvtss2siq || inst->op == cvttsd2siq ||
1248 inst->op == cvtsd2siq) {
1249 switch (info->res.type) {
1250 case fex_int:
1251 l = info->res.val.i;
1252 break;
1253
1254 case fex_llong:
1255 l = info->res.val.l;
1256 break;
1257
1258 case fex_float:
1259 l = info->res.val.f;
1260 break;
1261
1262 case fex_double:
1263 l = info->res.val.d;
1264 break;
1265
1266 case fex_ldouble:
1267 l = info->res.val.q;
1268 break;
1269
1270 default:
1271 break;
1272 }
1273 inst->op1->l[0] = l;
1274 } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1275 inst->op == cvtss2sd) {
1276 switch (info->res.type) {
1277 case fex_int:
1278 d = info->res.val.i;
1279 break;
1280
1281 case fex_llong:
1282 d = info->res.val.l;
1283 break;
1284
1285 case fex_float:
1286 d = info->res.val.f;
1287 break;
1288
1289 case fex_double:
1290 d = info->res.val.d;
1291 break;
1292
1293 case fex_ldouble:
1294 d = info->res.val.q;
1295 break;
1296
1297 default:
1298 break;
1299 }
1300 inst->op1->d[0] = d;
1301 } else {
1302 switch (info->res.type) {
1303 case fex_int:
1304 f = info->res.val.i;
1305 break;
1306
1307 case fex_llong:
1308 f = info->res.val.l;
1309 break;
1310
1311 case fex_float:
1312 f = info->res.val.f;
1313 break;
1314
1315 case fex_double:
1316 f = info->res.val.d;
1317 break;
1318
1319 case fex_ldouble:
1320 f = info->res.val.q;
1321 break;
1322
1323 default:
1324 break;
1325 }
1326 inst->op1->f[0] = f;
1327 }
1328 }
1329
1330 /*
1331 * Store the results from a SIMD instruction. For each i, store
1332 * the result value from info[i] in the i-th part of the destination
1333 * of the SIMD SSE instruction specified by *inst. If no result
1334 * is given but the exception indicated by e[i] is underflow or
1335 * overflow, supply the default trapped result.
1336 *
1337 * This routine does not work if the instruction specified by *inst
1338 * is not a SIMD instruction.
1339 */
1340 void
1341 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1342 fex_info_t *info)
1343 {
1344 sseinst_t dummy;
1345 int i;
1346
1347 /* store each part */
1348 switch (inst->op) {
1349 case cmpps:
1350 dummy.op = cmpss;
1351 dummy.imm = inst->imm;
1352 for (i = 0; i < 4; i++) {
1353 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1354 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1355 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1356 }
1357 break;
1358
1359 case minps:
1360 dummy.op = minss;
1361 for (i = 0; i < 4; i++) {
1362 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1363 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1364 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1365 }
1366 break;
1367
1368 case maxps:
1369 dummy.op = maxss;
1370 for (i = 0; i < 4; i++) {
1371 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1372 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1373 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1374 }
1375 break;
1376
1377 case addps:
1378 dummy.op = addss;
1379 for (i = 0; i < 4; i++) {
1380 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1381 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1382 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1383 }
1384 break;
1385
1386 case subps:
1387 dummy.op = subss;
1388 for (i = 0; i < 4; i++) {
1389 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1390 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1391 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1392 }
1393 break;
1394
1395 case mulps:
1396 dummy.op = mulss;
1397 for (i = 0; i < 4; i++) {
1398 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1399 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1400 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1401 }
1402 break;
1403
1404 case divps:
1405 dummy.op = divss;
1406 for (i = 0; i < 4; i++) {
1407 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1408 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1409 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1410 }
1411 break;
1412
1413 case sqrtps:
1414 dummy.op = sqrtss;
1415 for (i = 0; i < 4; i++) {
1416 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1417 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1418 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1419 }
1420 break;
1421
1422 case cvtdq2ps:
1423 dummy.op = cvtsi2ss;
1424 for (i = 0; i < 4; i++) {
1425 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1426 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1427 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1428 }
1429 break;
1430
1431 case cvttps2dq:
1432 dummy.op = cvttss2si;
1433 for (i = 0; i < 4; i++) {
1434 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1435 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1436 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1437 }
1438 break;
1439
1440 case cvtps2dq:
1441 dummy.op = cvtss2si;
1442 for (i = 0; i < 4; i++) {
1443 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1444 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1445 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1446 }
1447 break;
1448
1449 case cvtpi2ps:
1450 dummy.op = cvtsi2ss;
1451 for (i = 0; i < 2; i++) {
1452 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1453 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1454 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1455 }
1456 break;
1457
1458 case cvttps2pi:
1459 dummy.op = cvttss2si;
1460 for (i = 0; i < 2; i++) {
1461 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1462 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1463 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1464 }
1465 break;
1466
1467 case cvtps2pi:
1468 dummy.op = cvtss2si;
1469 for (i = 0; i < 2; i++) {
1470 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1471 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1472 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1473 }
1474 break;
1475
1476 case cmppd:
1477 dummy.op = cmpsd;
1478 dummy.imm = inst->imm;
1479 for (i = 0; i < 2; i++) {
1480 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1481 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1482 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1483 }
1484 break;
1485
1486 case minpd:
1487 dummy.op = minsd;
1488 for (i = 0; i < 2; i++) {
1489 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1490 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1491 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1492 }
1493 break;
1494
1495 case maxpd:
1496 dummy.op = maxsd;
1497 for (i = 0; i < 2; i++) {
1498 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1499 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1500 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1501 }
1502 break;
1503
1504 case addpd:
1505 dummy.op = addsd;
1506 for (i = 0; i < 2; i++) {
1507 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1508 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1509 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1510 }
1511 break;
1512
1513 case subpd:
1514 dummy.op = subsd;
1515 for (i = 0; i < 2; i++) {
1516 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1517 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1518 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1519 }
1520 break;
1521
1522 case mulpd:
1523 dummy.op = mulsd;
1524 for (i = 0; i < 2; i++) {
1525 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1526 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1527 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1528 }
1529 break;
1530
1531 case divpd:
1532 dummy.op = divsd;
1533 for (i = 0; i < 2; i++) {
1534 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1535 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1536 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1537 }
1538 break;
1539
1540 case sqrtpd:
1541 dummy.op = sqrtsd;
1542 for (i = 0; i < 2; i++) {
1543 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1544 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1545 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1546 }
1547 break;
1548
1549 case cvtpi2pd:
1550 case cvtdq2pd:
1551 dummy.op = cvtsi2sd;
1552 for (i = 0; i < 2; i++) {
1553 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1554 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1555 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1556 }
1557 break;
1558
1559 case cvttpd2pi:
1560 case cvttpd2dq:
1561 dummy.op = cvttsd2si;
1562 for (i = 0; i < 2; i++) {
1563 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1564 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1565 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1566 }
1567 /* for cvttpd2dq, zero the high 64 bits of the destination */
1568 if (inst->op == cvttpd2dq)
1569 inst->op1->l[1] = 0ll;
1570 break;
1571
1572 case cvtpd2pi:
1573 case cvtpd2dq:
1574 dummy.op = cvtsd2si;
1575 for (i = 0; i < 2; i++) {
1576 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1577 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1578 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1579 }
1580 /* for cvtpd2dq, zero the high 64 bits of the destination */
1581 if (inst->op == cvtpd2dq)
1582 inst->op1->l[1] = 0ll;
1583 break;
1584
1585 case cvtps2pd:
1586 dummy.op = cvtss2sd;
1587 for (i = 0; i < 2; i++) {
1588 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1589 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1590 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1591 }
1592 break;
1593
1594 case cvtpd2ps:
1595 dummy.op = cvtsd2ss;
1596 for (i = 0; i < 2; i++) {
1597 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1598 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1599 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1600 }
1601 /* zero the high 64 bits of the destination */
1602 inst->op1->l[1] = 0ll;
1603
1604 default:
1605 break;
1606 }
1607 }
1608
--- EOF ---