Print this page
11210 libm should be cstyle(1ONBLD) clean

*** 20,29 **** --- 20,30 ---- */ /* * Copyright 2011 Nexenta Systems, Inc. All rights reserved. */ + /* * Copyright 2006 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */
*** 44,55 **** #if !defined(REG_PS) #define REG_PS EFL #endif #ifdef __amd64 ! #define regno(X) ((X < 4)? REG_RAX - X : \ ! ((X > 4)? REG_RAX + 1 - X : REG_RSP)) #else #define regno(X) (EAX - X) #endif /* --- 45,56 ---- #if !defined(REG_PS) #define REG_PS EFL #endif #ifdef __amd64 ! #define regno(X) ((X < 4) ? REG_RAX - X : ((X > 4) ? REG_RAX + 1 - X : \ ! REG_RSP)) #else #define regno(X) (EAX - X) #endif /*
*** 71,80 **** --- 72,82 ---- ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC]; /* look for pseudo-prefixes */ dbl = 0; simd = SIMD; + if (ip[i] == 0xF3) { simd = 0; i++; } else if (ip[i] == 0x66) { dbl = DOUBLE;
*** 85,132 **** i++; } /* look for AMD64 REX prefix */ rex = 0; if (ip[i] >= 0x40 && ip[i] <= 0x4F) { rex = ip[i]; i++; } /* parse opcode */ if (ip[i++] != 0x0F) ! return 0; switch (ip[i++]) { case 0x2A: inst->op = (int)cvtsi2ss + simd + dbl; if (!simd) inst->op = (int)inst->op + (rex & 8); break; case 0x2C: inst->op = (int)cvttss2si + simd + dbl; if (!simd) inst->op = (int)inst->op + (rex & 8); break; case 0x2D: inst->op = (int)cvtss2si + simd + dbl; if (!simd) inst->op = (int)inst->op + (rex & 8); break; case 0x2E: /* oddball: scalar instruction in a SIMD opcode group */ if (!simd) ! return 0; inst->op = (int)ucomiss + dbl; break; case 0x2F: /* oddball: scalar instruction in a SIMD opcode group */ if (!simd) ! return 0; inst->op = (int)comiss + dbl; break; case 0x51: inst->op = (int)sqrtss + simd + dbl; --- 87,146 ---- i++; } /* look for AMD64 REX prefix */ rex = 0; + if (ip[i] >= 0x40 && ip[i] <= 0x4F) { rex = ip[i]; i++; } /* parse opcode */ if (ip[i++] != 0x0F) ! return (0); ! switch (ip[i++]) { case 0x2A: inst->op = (int)cvtsi2ss + simd + dbl; + if (!simd) inst->op = (int)inst->op + (rex & 8); + break; case 0x2C: inst->op = (int)cvttss2si + simd + dbl; + if (!simd) inst->op = (int)inst->op + (rex & 8); + break; case 0x2D: inst->op = (int)cvtss2si + simd + dbl; + if (!simd) inst->op = (int)inst->op + (rex & 8); + break; case 0x2E: + /* oddball: scalar instruction in a SIMD opcode group */ if (!simd) ! return (0); ! inst->op = (int)ucomiss + dbl; break; case 0x2F: + /* oddball: scalar instruction in a SIMD opcode group */ if (!simd) ! return (0); ! inst->op = (int)comiss + dbl; break; case 0x51: inst->op = (int)sqrtss + simd + dbl;
*** 143,160 **** case 0x5A: inst->op = (int)cvtss2sd + simd + dbl; break; case 0x5B: if (dbl) { if (simd) inst->op = cvtps2dq; else ! return 0; } else { ! inst->op = (simd)? cvtdq2ps : cvttps2dq; } break; case 0x5C: inst->op = (int)subss + simd + dbl; break; --- 157,176 ---- case 0x5A: inst->op = (int)cvtss2sd + simd + dbl; break; case 0x5B: + if (dbl) { if (simd) inst->op = cvtps2dq; else ! return (0); } else { ! inst->op = (simd) ? cvtdq2ps : cvttps2dq; } + break; case 0x5C: inst->op = (int)subss + simd + dbl; break;
*** 174,255 **** case 0xC2: inst->op = (int)cmpss + simd + dbl; break; case 0xE6: if (simd) { if (dbl) inst->op = cvttpd2dq; else ! return 0; } else { ! inst->op = (dbl)? cvtpd2dq : cvtdq2pd; } break; default: ! return 0; } /* locate operands */ modrm = ip[i++]; ! if (inst->op == cvtss2si || inst->op == cvttss2si || ! inst->op == cvtsd2si || inst->op == cvttsd2si || ! inst->op == cvtss2siq || inst->op == cvttss2siq || ! inst->op == cvtsd2siq || inst->op == cvttsd2siq) { /* op1 is a gp register */ r = ((rex & 4) << 1) | ((modrm >> 3) & 7); inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)]; ! } else if (inst->op == cvtps2pi || inst->op == cvttps2pi || ! inst->op == cvtpd2pi || inst->op == cvttpd2pi) { /* op1 is a mmx register */ #ifdef __amd64 ! inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set. ! fpchip_state.st[(modrm >> 3) & 7]; #else inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) + ! (char *)&uap->uc_mcontext.fpregs.fp_reg_set. ! fpchip_state.state[7]); #endif } else { /* op1 is a xmm register */ r = ((rex & 4) << 1) | ((modrm >> 3) & 7); ! inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs. ! fp_reg_set.fpchip_state.xmm[r]; } if ((modrm >> 6) == 3) { ! if (inst->op == cvtsi2ss || inst->op == cvtsi2sd || ! inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) { /* op2 is a gp register */ r = ((rex & 1) << 3) | (modrm & 7); ! inst->op2 = (sseoperand_t *)&uap->uc_mcontext. ! gregs[regno(r)]; } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) { /* op2 is a mmx register */ #ifdef __amd64 ! inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs. ! fp_reg_set.fpchip_state.st[modrm & 7]; #else inst->op2 = (sseoperand_t *)(10 * (modrm & 7) + ! (char *)&uap->uc_mcontext.fpregs.fp_reg_set. ! fpchip_state.state[7]); #endif } else { /* op2 is a xmm register */ r = ((rex & 1) << 3) | (modrm & 7); ! inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs. ! fp_reg_set.fpchip_state.xmm[r]; } } else if ((modrm & 0xc7) == 0x05) { #ifdef __amd64 /* address of next instruction + offset */ r = i + 4; ! if (inst->op == cmpss || inst->op == cmpps || ! inst->op == cmpsd || inst->op == cmppd) r++; inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i)); #else /* absolute address */ inst->op2 = (sseoperand_t *)(*(int *)(ip + i)); #endif --- 190,279 ---- case 0xC2: inst->op = (int)cmpss + simd + dbl; break; case 0xE6: + if (simd) { if (dbl) inst->op = cvttpd2dq; else ! return (0); } else { ! inst->op = (dbl) ? cvtpd2dq : cvtdq2pd; } + break; default: ! return (0); } /* locate operands */ modrm = ip[i++]; ! if (inst->op == cvtss2si || inst->op == cvttss2si || inst->op == ! cvtsd2si || inst->op == cvttsd2si || inst->op == cvtss2siq || ! inst->op == cvttss2siq || inst->op == cvtsd2siq || inst->op == ! cvttsd2siq) { /* op1 is a gp register */ r = ((rex & 4) << 1) | ((modrm >> 3) & 7); inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)]; ! } else if (inst->op == cvtps2pi || inst->op == cvttps2pi || inst->op == ! cvtpd2pi || inst->op == cvttpd2pi) { /* op1 is a mmx register */ #ifdef __amd64 ! inst->op1 = (sseoperand_t *) ! &uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state ! .st[(modrm >> 3) & 7]; #else inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) + ! (char *)&uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state ! .state[7]); #endif } else { /* op1 is a xmm register */ r = ((rex & 4) << 1) | ((modrm >> 3) & 7); ! inst->op1 = ! (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set ! .fpchip_state.xmm[r]; } if ((modrm >> 6) == 3) { ! if (inst->op == cvtsi2ss || inst->op == cvtsi2sd || inst->op == ! cvtsi2ssq || inst->op == cvtsi2sdq) { /* op2 is a gp register */ r = ((rex & 1) << 3) | (modrm & 7); ! inst->op2 = ! (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)]; } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) { /* op2 is a mmx register */ #ifdef __amd64 ! inst->op2 = ! (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set ! .fpchip_state.st[modrm & 7]; #else inst->op2 = (sseoperand_t *)(10 * (modrm & 7) + ! (char *)&uap->uc_mcontext.fpregs.fp_reg_set ! .fpchip_state.state[7]); #endif } else { /* op2 is a xmm register */ r = ((rex & 1) << 3) | (modrm & 7); ! inst->op2 = ! (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set ! .fpchip_state.xmm[r]; } } else if ((modrm & 0xc7) == 0x05) { #ifdef __amd64 /* address of next instruction + offset */ r = i + 4; ! ! if (inst->op == cmpss || inst->op == cmpps || inst->op == ! cmpsd || inst->op == cmppd) r++; + inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i)); #else /* absolute address */ inst->op2 = (sseoperand_t *)(*(int *)(ip + i)); #endif
*** 257,280 **** } else { /* complex address */ if ((modrm & 7) == 4) { /* parse sib byte */ sib = ip[i++]; if ((sib & 7) == 5 && (modrm >> 6) == 0) { /* start with absolute address */ addr = (char *)(uintptr_t)(*(int *)(ip + i)); i += 4; } else { /* start with base */ r = ((rex & 1) << 3) | (sib & 7); addr = (char *)uap->uc_mcontext.gregs[regno(r)]; } r = ((rex & 2) << 2) | ((sib >> 3) & 7); if (r != 4) { /* add scaled index */ ! addr += uap->uc_mcontext.gregs[regno(r)] ! << (sib >> 6); } } else { r = ((rex & 1) << 3) | (modrm & 7); addr = (char *)uap->uc_mcontext.gregs[regno(r)]; } --- 281,307 ---- } else { /* complex address */ if ((modrm & 7) == 4) { /* parse sib byte */ sib = ip[i++]; + if ((sib & 7) == 5 && (modrm >> 6) == 0) { /* start with absolute address */ addr = (char *)(uintptr_t)(*(int *)(ip + i)); i += 4; } else { /* start with base */ r = ((rex & 1) << 3) | (sib & 7); addr = (char *)uap->uc_mcontext.gregs[regno(r)]; } + r = ((rex & 2) << 2) | ((sib >> 3) & 7); + if (r != 4) { /* add scaled index */ ! addr += uap->uc_mcontext.gregs[regno(r)] << ! (sib >> 6); } } else { r = ((rex & 1) << 3) | (modrm & 7); addr = (char *)uap->uc_mcontext.gregs[regno(r)]; }
*** 284,339 **** addr += (char)ip[i++]; } else if ((modrm >> 6) == 2) { addr += *(int *)(ip + i); i += 4; } inst->op2 = (sseoperand_t *)addr; } if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd || inst->op == cmppd) { /* get the immediate operand */ inst->imm = ip[i++]; } ! return i; } static enum fp_class_type my_fp_classf(float *x) { int i = *(int *)x & ~0x80000000; if (i < 0x7f800000) { if (i < 0x00800000) ! return ((i == 0)? fp_zero : fp_subnormal); ! return fp_normal; } - else if (i == 0x7f800000) - return fp_infinity; - else if (i & 0x400000) - return fp_quiet; - else - return fp_signaling; } static enum fp_class_type my_fp_class(double *x) { ! int i = *(1+(int *)x) & ~0x80000000; if (i < 0x7ff00000) { if (i < 0x00100000) ! return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal); ! return fp_normal; } - else if (i == 0x7ff00000 && *(int *)x == 0) - return fp_infinity; - else if (i & 0x80000) - return fp_quiet; - else - return fp_signaling; } /* * Inspect a scalar SSE instruction that incurred an invalid operation * exception to determine which type of exception it was. --- 311,370 ---- addr += (char)ip[i++]; } else if ((modrm >> 6) == 2) { addr += *(int *)(ip + i); i += 4; } + inst->op2 = (sseoperand_t *)addr; } if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd || inst->op == cmppd) { /* get the immediate operand */ inst->imm = ip[i++]; } ! return (i); } static enum fp_class_type my_fp_classf(float *x) { int i = *(int *)x & ~0x80000000; if (i < 0x7f800000) { if (i < 0x00800000) ! return ((i == 0) ? fp_zero : fp_subnormal); ! ! return (fp_normal); ! } else if (i == 0x7f800000) { ! return (fp_infinity); ! } else if (i & 0x400000) { ! return (fp_quiet); ! } else { ! return (fp_signaling); } } static enum fp_class_type my_fp_class(double *x) { ! int i = *(1 + (int *)x) & ~0x80000000; if (i < 0x7ff00000) { if (i < 0x00100000) ! return (((i | *(int *)x) == 0) ? fp_zero : ! fp_subnormal); ! ! return (fp_normal); ! } else if (i == 0x7ff00000 && *(int *)x == 0) { ! return (fp_infinity); ! } else if (i & 0x80000) { ! return (fp_quiet); ! } else { ! return (fp_signaling); } } /* * Inspect a scalar SSE instruction that incurred an invalid operation * exception to determine which type of exception it was.
*** 342,385 **** __fex_get_sse_invalid_type(sseinst_t *inst) { enum fp_class_type t1, t2; /* check op2 for signaling nan */ ! t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) : my_fp_classf(&inst->op2->f[0]); if (t2 == fp_signaling) ! return fex_inv_snan; /* eliminate all single-operand instructions */ switch (inst->op) { case cvtsd2ss: case cvtss2sd: /* hmm, this shouldn't have happened */ ! return (enum fex_exception) -1; case sqrtss: case sqrtsd: ! return fex_inv_sqrt; case cvtss2si: case cvtsd2si: case cvttss2si: case cvttsd2si: case cvtss2siq: case cvtsd2siq: case cvttss2siq: case cvttsd2siq: ! return fex_inv_int; default: break; } /* check op1 for signaling nan */ ! t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) : my_fp_classf(&inst->op1->f[0]); if (t1 == fp_signaling) ! return fex_inv_snan; /* check two-operand instructions for other cases */ switch (inst->op) { case cmpss: case cmpsd: --- 373,418 ---- __fex_get_sse_invalid_type(sseinst_t *inst) { enum fp_class_type t1, t2; /* check op2 for signaling nan */ ! t2 = ((int)inst->op & DOUBLE) ? my_fp_class(&inst->op2->d[0]) : my_fp_classf(&inst->op2->f[0]); + if (t2 == fp_signaling) ! return (fex_inv_snan); /* eliminate all single-operand instructions */ switch (inst->op) { case cvtsd2ss: case cvtss2sd: /* hmm, this shouldn't have happened */ ! return ((enum fex_exception)-1); case sqrtss: case sqrtsd: ! return (fex_inv_sqrt); case cvtss2si: case cvtsd2si: case cvttss2si: case cvttsd2si: case cvtss2siq: case cvtsd2siq: case cvttss2siq: case cvttsd2siq: ! return (fex_inv_int); default: break; } /* check op1 for signaling nan */ ! t1 = ((int)inst->op & DOUBLE) ? my_fp_class(&inst->op1->d[0]) : my_fp_classf(&inst->op1->f[0]); + if (t1 == fp_signaling) ! return (fex_inv_snan); /* check two-operand instructions for other cases */ switch (inst->op) { case cmpss: case cmpsd:
*** 387,424 **** case minsd: case maxss: case maxsd: case comiss: case comisd: ! return fex_inv_cmp; case addss: case addsd: case subss: case subsd: if (t1 == fp_infinity && t2 == fp_infinity) ! return fex_inv_isi; break; case mulss: case mulsd: ! if ((t1 == fp_zero && t2 == fp_infinity) || ! (t2 == fp_zero && t1 == fp_infinity)) ! return fex_inv_zmi; break; case divss: case divsd: if (t1 == fp_zero && t2 == fp_zero) ! return fex_inv_zdz; if (t1 == fp_infinity && t2 == fp_infinity) ! return fex_inv_idi; default: break; } ! return (enum fex_exception)-1; } /* inline templates */ extern void sse_cmpeqss(float *, float *, int *); extern void sse_cmpltss(float *, float *, int *); --- 420,464 ---- case minsd: case maxss: case maxsd: case comiss: case comisd: ! return (fex_inv_cmp); case addss: case addsd: case subss: case subsd: + if (t1 == fp_infinity && t2 == fp_infinity) ! return (fex_inv_isi); ! break; case mulss: case mulsd: ! ! if ((t1 == fp_zero && t2 == fp_infinity) || (t2 == fp_zero && ! t1 == fp_infinity)) ! return (fex_inv_zmi); ! break; case divss: case divsd: + if (t1 == fp_zero && t2 == fp_zero) ! return (fex_inv_zdz); ! if (t1 == fp_infinity && t2 == fp_infinity) ! return (fex_inv_idi); ! default: break; } ! return ((enum fex_exception)-1); } /* inline templates */ extern void sse_cmpeqss(float *, float *, int *); extern void sse_cmpltss(float *, float *, int *);
*** 435,449 **** --- 475,491 ---- extern void sse_comiss(float *, float *); extern void sse_cvtss2sd(float *, double *); extern void sse_cvtsi2ss(int *, float *); extern void sse_cvttss2si(float *, int *); extern void sse_cvtss2si(float *, int *); + #ifdef __amd64 extern void sse_cvtsi2ssq(long long *, float *); extern void sse_cvttss2siq(float *, long long *); extern void sse_cvtss2siq(float *, long long *); #endif + extern void sse_cmpeqsd(double *, double *, long long *); extern void sse_cmpltsd(double *, double *, long long *); extern void sse_cmplesd(double *, double *, long long *); extern void sse_cmpunordsd(double *, double *, long long *); extern void sse_minsd(double *, double *, double *);
*** 457,466 **** --- 499,509 ---- extern void sse_comisd(double *, double *); extern void sse_cvtsd2ss(double *, float *); extern void sse_cvtsi2sd(int *, double *); extern void sse_cvttsd2si(double *, int *); extern void sse_cvtsd2si(double *, int *); + #ifdef __amd64 extern void sse_cvtsi2sdq(long long *, double *); extern void sse_cvttsd2siq(double *, long long *); extern void sse_cvtsd2siq(double *, long long *); #endif
*** 485,520 **** * exception flags. If the underflow trap was enabled, also * check for an exact subnormal result. */ __fenv_getmxcsr(&oldmxcsr); subnorm = 0; if ((int)inst->op & DOUBLE) { if (inst->op == cvtsi2sd) { info->op1.type = fex_int; info->op1.val.i = inst->op2->i[0]; info->op2.type = fex_nodata; } else if (inst->op == cvtsi2sdq) { info->op1.type = fex_llong; info->op1.val.l = inst->op2->l[0]; info->op2.type = fex_nodata; } else if (inst->op == sqrtsd || inst->op == cvtsd2ss || ! inst->op == cvttsd2si || inst->op == cvtsd2si || ! inst->op == cvttsd2siq || inst->op == cvtsd2siq) { info->op1.type = fex_double; info->op1.val.d = inst->op2->d[0]; info->op2.type = fex_nodata; } else { info->op1.type = fex_double; info->op1.val.d = inst->op1->d[0]; info->op2.type = fex_double; info->op2.val.d = inst->op2->d[0]; } info->res.type = fex_double; switch (inst->op) { case cmpsd: info->op = fex_cmp; info->res.type = fex_llong; switch (inst->imm & 3) { case 0: sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d, &info->res.val.l); break; --- 528,567 ---- * exception flags. If the underflow trap was enabled, also * check for an exact subnormal result. */ __fenv_getmxcsr(&oldmxcsr); subnorm = 0; + if ((int)inst->op & DOUBLE) { if (inst->op == cvtsi2sd) { info->op1.type = fex_int; info->op1.val.i = inst->op2->i[0]; info->op2.type = fex_nodata; } else if (inst->op == cvtsi2sdq) { info->op1.type = fex_llong; info->op1.val.l = inst->op2->l[0]; info->op2.type = fex_nodata; } else if (inst->op == sqrtsd || inst->op == cvtsd2ss || ! inst->op == cvttsd2si || inst->op == cvtsd2si || inst->op == ! cvttsd2siq || inst->op == cvtsd2siq) { info->op1.type = fex_double; info->op1.val.d = inst->op2->d[0]; info->op2.type = fex_nodata; } else { info->op1.type = fex_double; info->op1.val.d = inst->op1->d[0]; info->op2.type = fex_double; info->op2.val.d = inst->op2->d[0]; } + info->res.type = fex_double; + switch (inst->op) { case cmpsd: info->op = fex_cmp; info->res.type = fex_llong; + switch (inst->imm & 3) { case 0: sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d, &info->res.val.l); break;
*** 531,542 **** --- 578,591 ---- case 3: sse_cmpunordsd(&info->op1.val.d, &info->op2.val.d, &info->res.val.l); } + if (inst->imm & 4) info->res.val.l ^= 0xffffffffffffffffull; + break; case minsd: info->op = fex_other; sse_minsd(&info->op1.val.d, &info->op2.val.d,
*** 551,586 **** --- 600,643 ---- case addsd: info->op = fex_add; sse_addsd(&info->op1.val.d, &info->op2.val.d, &info->res.val.d); + if (my_fp_class(&info->res.val.d) == fp_subnormal) subnorm = 1; + break; case subsd: info->op = fex_sub; sse_subsd(&info->op1.val.d, &info->op2.val.d, &info->res.val.d); + if (my_fp_class(&info->res.val.d) == fp_subnormal) subnorm = 1; + break; case mulsd: info->op = fex_mul; sse_mulsd(&info->op1.val.d, &info->op2.val.d, &info->res.val.d); + if (my_fp_class(&info->res.val.d) == fp_subnormal) subnorm = 1; + break; case divsd: info->op = fex_div; sse_divsd(&info->op1.val.d, &info->op2.val.d, &info->res.val.d); + if (my_fp_class(&info->res.val.d) == fp_subnormal) subnorm = 1; + break; case sqrtsd: info->op = fex_sqrt; sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
*** 588,599 **** --- 645,658 ---- case cvtsd2ss: info->op = fex_cnvt; info->res.type = fex_float; sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) subnorm = 1; + break; case cvtsi2sd: info->op = fex_cnvt; sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
*** 652,677 **** } else if (inst->op == cvtsi2ssq) { info->op1.type = fex_llong; info->op1.val.l = inst->op2->l[0]; info->op2.type = fex_nodata; } else if (inst->op == sqrtss || inst->op == cvtss2sd || ! inst->op == cvttss2si || inst->op == cvtss2si || ! inst->op == cvttss2siq || inst->op == cvtss2siq) { info->op1.type = fex_float; info->op1.val.f = inst->op2->f[0]; info->op2.type = fex_nodata; } else { info->op1.type = fex_float; info->op1.val.f = inst->op1->f[0]; info->op2.type = fex_float; info->op2.val.f = inst->op2->f[0]; } info->res.type = fex_float; switch (inst->op) { case cmpss: info->op = fex_cmp; info->res.type = fex_int; switch (inst->imm & 3) { case 0: sse_cmpeqss(&info->op1.val.f, &info->op2.val.f, &info->res.val.i); break; --- 711,739 ---- } else if (inst->op == cvtsi2ssq) { info->op1.type = fex_llong; info->op1.val.l = inst->op2->l[0]; info->op2.type = fex_nodata; } else if (inst->op == sqrtss || inst->op == cvtss2sd || ! inst->op == cvttss2si || inst->op == cvtss2si || inst->op == ! cvttss2siq || inst->op == cvtss2siq) { info->op1.type = fex_float; info->op1.val.f = inst->op2->f[0]; info->op2.type = fex_nodata; } else { info->op1.type = fex_float; info->op1.val.f = inst->op1->f[0]; info->op2.type = fex_float; info->op2.val.f = inst->op2->f[0]; } + info->res.type = fex_float; + switch (inst->op) { case cmpss: info->op = fex_cmp; info->res.type = fex_int; + switch (inst->imm & 3) { case 0: sse_cmpeqss(&info->op1.val.f, &info->op2.val.f, &info->res.val.i); break;
*** 688,699 **** --- 750,763 ---- case 3: sse_cmpunordss(&info->op1.val.f, &info->op2.val.f, &info->res.val.i); } + if (inst->imm & 4) info->res.val.i ^= 0xffffffffu; + break; case minss: info->op = fex_other; sse_minss(&info->op1.val.f, &info->op2.val.f,
*** 708,743 **** --- 772,815 ---- case addss: info->op = fex_add; sse_addss(&info->op1.val.f, &info->op2.val.f, &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) subnorm = 1; + break; case subss: info->op = fex_sub; sse_subss(&info->op1.val.f, &info->op2.val.f, &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) subnorm = 1; + break; case mulss: info->op = fex_mul; sse_mulss(&info->op1.val.f, &info->op2.val.f, &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) subnorm = 1; + break; case divss: info->op = fex_div; sse_divss(&info->op1.val.f, &info->op2.val.f, &info->res.val.f); + if (my_fp_classf(&info->res.val.f) == fp_subnormal) subnorm = 1; + break; case sqrtss: info->op = fex_sqrt; sse_sqrtss(&info->op1.val.f, &info->res.val.f);
*** 798,826 **** break; default: break; } } __fenv_getmxcsr(&mxcsr); info->flags = mxcsr & 0x3d; __fenv_setmxcsr(&oldmxcsr); /* determine which exception would have been trapped */ ! te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr ! >> 7) & 0x3d; e = mxcsr & te; if (e & FE_INVALID) ! return __fex_get_sse_invalid_type(inst); if (e & FE_DIVBYZERO) ! return fex_division; if (e & FE_OVERFLOW) ! return fex_overflow; if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW))) ! return fex_underflow; if (e & FE_INEXACT) ! return fex_inexact; ! return (enum fex_exception)-1; } /* * Emulate a SIMD SSE instruction to determine which exceptions occur * in each part. For i = 0, 1, 2, and 3, set e[i] to indicate the --- 870,905 ---- break; default: break; } } + __fenv_getmxcsr(&mxcsr); info->flags = mxcsr & 0x3d; __fenv_setmxcsr(&oldmxcsr); /* determine which exception would have been trapped */ ! te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr >> 7) & ! 0x3d; e = mxcsr & te; + if (e & FE_INVALID) ! return (__fex_get_sse_invalid_type(inst)); ! if (e & FE_DIVBYZERO) ! return (fex_division); ! if (e & FE_OVERFLOW) ! return (fex_overflow); ! if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW))) ! return (fex_underflow); ! if (e & FE_INEXACT) ! return (fex_inexact); ! ! return ((enum fex_exception)-1); } /* * Emulate a SIMD SSE instruction to determine which exceptions occur * in each part. For i = 0, 1, 2, and 3, set e[i] to indicate the
*** 845,1097 **** --- 924,1230 ---- /* perform each part of the SIMD operation */ switch (inst->op) { case cmpps: dummy.op = cmpss; dummy.imm = inst->imm; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case minps: dummy.op = minss; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case maxps: dummy.op = maxss; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case addps: dummy.op = addss; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case subps: dummy.op = subss; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case mulps: dummy.op = mulss; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case divps: dummy.op = divss; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case sqrtps: dummy.op = sqrtss; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case cvtdq2ps: dummy.op = cvtsi2ss; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case cvttps2dq: dummy.op = cvttss2si; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case cvtps2dq: dummy.op = cvtss2si; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case cvtpi2ps: dummy.op = cvtsi2ss; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case cvttps2pi: dummy.op = cvttss2si; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case cvtps2pi: dummy.op = cvtss2si; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case cmppd: dummy.op = cmpsd; dummy.imm = inst->imm; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case minpd: dummy.op = minsd; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case maxpd: dummy.op = maxsd; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case addpd: dummy.op = addsd; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case subpd: dummy.op = subsd; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case mulpd: dummy.op = mulsd; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case divpd: dummy.op = divsd; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case sqrtpd: dummy.op = sqrtsd; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case cvtpi2pd: case cvtdq2pd: dummy.op = cvtsi2sd; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case cvttpd2pi: case cvttpd2dq: dummy.op = cvttsd2si; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case cvtpd2pi: case cvtpd2dq: dummy.op = cvtsd2si; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case cvtps2pd: dummy.op = cvtss2sd; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + break; case cvtpd2ps: dummy.op = cvtsd2ss; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); } + default: break; } }
*** 1106,1129 **** void __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e, fex_info_t *info) { int i = 0; ! long long l = 0L;; float f = 0.0, fscl; double d = 0.0L, dscl; ! /* for compares that write eflags, just set the flags ! to indicate "unordered" */ ! if (inst->op == ucomiss || inst->op == comiss || ! inst->op == ucomisd || inst->op == comisd) { uap->uc_mcontext.gregs[REG_PS] |= 0x45; return; } ! /* if info doesn't specify a result value, try to generate ! the default trapped result */ if (info->res.type == fex_nodata) { /* set scale factors for exponent wrapping */ switch (e) { case fex_overflow: fscl = 1.262177448e-29f; /* 2^-96 */ --- 1239,1266 ---- void __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e, fex_info_t *info) { int i = 0; ! long long l = 0L; float f = 0.0, fscl; double d = 0.0L, dscl; ! /* ! * for compares that write eflags, just set the flags ! * to indicate "unordered" ! */ ! if (inst->op == ucomiss || inst->op == comiss || inst->op == ucomisd || ! inst->op == comisd) { uap->uc_mcontext.gregs[REG_PS] |= 0x45; return; } ! /* ! * if info doesn't specify a result value, try to generate ! * the default trapped result ! */ if (info->res.type == fex_nodata) { /* set scale factors for exponent wrapping */ switch (e) { case fex_overflow: fscl = 1.262177448e-29f; /* 2^-96 */
*** 1135,1146 **** --- 1272,1285 ---- dscl = 1.552518092300708935e+231; /* 2^768 */ break; default: (void) __fex_get_sse_op(uap, inst, info); + if (info->res.type == fex_nodata) return; + goto stuff; } /* generate the wrapped result */ if (inst->op == cvtsd2ss) {
*** 1154,1163 **** --- 1293,1303 ---- info->op1.type = fex_double; info->op1.val.d = inst->op1->d[0]; info->op2.type = fex_double; info->op2.val.d = inst->op2->d[0]; info->res.type = fex_double; + switch (inst->op) { case addsd: info->res.val.d = dscl * (dscl * info->op1.val.d + dscl * info->op2.val.d); break;
*** 1184,1193 **** --- 1324,1334 ---- info->op1.type = fex_float; info->op1.val.f = inst->op1->f[0]; info->op2.type = fex_float; info->op2.val.f = inst->op2->f[0]; info->res.type = fex_float; + switch (inst->op) { case addss: info->res.val.f = fscl * (fscl * info->op1.val.f + fscl * info->op2.val.f); break;
*** 1213,1224 **** } } /* put the result in the destination */ stuff: ! if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si ! || inst->op == cvttsd2si || inst->op == cvtsd2si) { switch (info->res.type) { case fex_int: i = info->res.val.i; break; --- 1354,1365 ---- } } /* put the result in the destination */ stuff: ! if (inst->op == cmpss || inst->op == cvttss2si || inst->op == ! cvtss2si || inst->op == cvttsd2si || inst->op == cvtsd2si) { switch (info->res.type) { case fex_int: i = info->res.val.i; break;
*** 1239,1252 **** break; default: break; } inst->op1->i[0] = i; ! } else if (inst->op == cmpsd || inst->op == cvttss2siq || ! inst->op == cvtss2siq || inst->op == cvttsd2siq || ! inst->op == cvtsd2siq) { switch (info->res.type) { case fex_int: l = info->res.val.i; break; --- 1380,1393 ---- break; default: break; } + inst->op1->i[0] = i; ! } else if (inst->op == cmpsd || inst->op == cvttss2siq || inst->op == ! cvtss2siq || inst->op == cvttsd2siq || inst->op == cvtsd2siq) { switch (info->res.type) { case fex_int: l = info->res.val.i; break;
*** 1267,1276 **** --- 1408,1418 ---- break; default: break; } + inst->op1->l[0] = l; } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) || inst->op == cvtss2sd) { switch (info->res.type) { case fex_int:
*** 1294,1303 **** --- 1436,1446 ---- break; default: break; } + inst->op1->d[0] = d; } else { switch (info->res.type) { case fex_int: f = info->res.val.i;
*** 1320,1329 **** --- 1463,1473 ---- break; default: break; } + inst->op1->f[0] = f; } } /*
*** 1346,1607 **** /* store each part */ switch (inst->op) { case cmpps: dummy.op = cmpss; dummy.imm = inst->imm; for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case minps: dummy.op = minss; for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case maxps: dummy.op = maxss; for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case addps: dummy.op = addss; for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case subps: dummy.op = subss; for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case mulps: dummy.op = mulss; for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case divps: dummy.op = divss; for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case sqrtps: dummy.op = sqrtss; for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case cvtdq2ps: dummy.op = cvtsi2ss; for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case cvttps2dq: dummy.op = cvttss2si; for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case cvtps2dq: dummy.op = cvtss2si; for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case cvtpi2ps: dummy.op = cvtsi2ss; for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case cvttps2pi: dummy.op = cvttss2si; for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case cvtps2pi: dummy.op = cvtss2si; for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case cmppd: dummy.op = cmpsd; dummy.imm = inst->imm; for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case minpd: dummy.op = minsd; for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case maxpd: dummy.op = maxsd; for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case addpd: dummy.op = addsd; for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case subpd: dummy.op = subsd; for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case mulpd: dummy.op = mulsd; for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case divpd: dummy.op = divsd; for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case sqrtpd: dummy.op = sqrtsd; for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case cvtpi2pd: case cvtdq2pd: dummy.op = cvtsi2sd; for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case cvttpd2pi: case cvttpd2dq: dummy.op = cvttsd2si; for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } /* for cvttpd2dq, zero the high 64 bits of the destination */ if (inst->op == cvttpd2dq) inst->op1->l[1] = 0ll; break; case cvtpd2pi: case cvtpd2dq: dummy.op = cvtsd2si; for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } /* for cvtpd2dq, zero the high 64 bits of the destination */ if (inst->op == cvtpd2dq) inst->op1->l[1] = 0ll; break; case cvtps2pd: dummy.op = cvtss2sd; for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } break; case cvtpd2ps: dummy.op = cvtsd2ss; for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } /* zero the high 64 bits of the destination */ inst->op1->l[1] = 0ll; default: break; } } - --- 1490,1806 ---- /* store each part */ switch (inst->op) { case cmpps: dummy.op = cmpss; dummy.imm = inst->imm; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case minps: dummy.op = minss; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case maxps: dummy.op = maxss; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case addps: dummy.op = addss; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case subps: dummy.op = subss; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case mulps: dummy.op = mulss; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case divps: dummy.op = divss; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case sqrtps: dummy.op = sqrtss; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case cvtdq2ps: dummy.op = cvtsi2ss; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case cvttps2dq: dummy.op = cvttss2si; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case cvtps2dq: dummy.op = cvtss2si; + for (i = 0; i < 4; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case cvtpi2ps: dummy.op = cvtsi2ss; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case cvttps2pi: dummy.op = cvttss2si; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case cvtps2pi: dummy.op = cvtss2si; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case cmppd: dummy.op = cmpsd; dummy.imm = inst->imm; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case minpd: dummy.op = minsd; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case maxpd: dummy.op = maxsd; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case addpd: dummy.op = addsd; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case subpd: dummy.op = subsd; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case mulpd: dummy.op = mulsd; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case divpd: dummy.op = divsd; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case sqrtpd: dummy.op = sqrtsd; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case cvtpi2pd: case cvtdq2pd: dummy.op = cvtsi2sd; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case cvttpd2pi: case cvttpd2dq: dummy.op = cvttsd2si; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + /* for cvttpd2dq, zero the high 64 bits of the destination */ if (inst->op == cvttpd2dq) inst->op1->l[1] = 0ll; + break; case cvtpd2pi: case cvtpd2dq: dummy.op = cvtsd2si; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + /* for cvtpd2dq, zero the high 64 bits of the destination */ if (inst->op == cvtpd2dq) inst->op1->l[1] = 0ll; + break; case cvtps2pd: dummy.op = cvtss2sd; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + break; case cvtpd2ps: dummy.op = cvtsd2ss; + for (i = 0; i < 2; i++) { dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; __fex_st_sse_result(uap, &dummy, e[i], &info[i]); } + /* zero the high 64 bits of the destination */ inst->op1->l[1] = 0ll; default: break; } }