353 /* eliminate all single-operand instructions */ 354 switch (inst->op) { 355 case cvtsd2ss: 356 case cvtss2sd: 357 /* hmm, this shouldn't have happened */ 358 return (enum fex_exception) -1; 359 360 case sqrtss: 361 case sqrtsd: 362 return fex_inv_sqrt; 363 364 case cvtss2si: 365 case cvtsd2si: 366 case cvttss2si: 367 case cvttsd2si: 368 case cvtss2siq: 369 case cvtsd2siq: 370 case cvttss2siq: 371 case cvttsd2siq: 372 return fex_inv_int; 373 } 374 375 /* check op1 for signaling nan */ 376 t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) : 377 my_fp_classf(&inst->op1->f[0]); 378 if (t1 == fp_signaling) 379 return fex_inv_snan; 380 381 /* check two-operand instructions for other cases */ 382 switch (inst->op) { 383 case cmpss: 384 case cmpsd: 385 case minss: 386 case minsd: 387 case maxss: 388 case maxsd: 389 case comiss: 390 case comisd: 391 return fex_inv_cmp; 392 394 case addsd: 395 case subss: 396 case subsd: 397 if (t1 == fp_infinity && t2 == fp_infinity) 398 return fex_inv_isi; 399 break; 400 401 case mulss: 402 case mulsd: 403 if ((t1 == fp_zero && t2 == fp_infinity) || 404 (t2 == fp_zero && t1 == fp_infinity)) 405 return fex_inv_zmi; 406 break; 407 408 case divss: 409 case divsd: 410 if (t1 == fp_zero && t2 == fp_zero) 411 return fex_inv_zdz; 412 if (t1 == fp_infinity && t2 == fp_infinity) 413 return fex_inv_idi; 414 } 415 416 return (enum fex_exception)-1; 417 } 418 419 /* inline templates */ 420 extern void sse_cmpeqss(float *, float *, int *); 421 extern void sse_cmpltss(float *, float *, int *); 422 extern void sse_cmpless(float *, float *, int *); 423 extern void sse_cmpunordss(float *, float *, int *); 424 extern void sse_minss(float *, float *, float *); 425 extern void sse_maxss(float *, float *, float *); 426 extern void sse_addss(float *, float *, float *); 427 extern void sse_subss(float *, float *, float *); 428 extern void sse_mulss(float *, float *, float *); 429 extern void sse_divss(float *, float *, float *); 430 extern void sse_sqrtss(float *, float *); 431 extern void sse_ucomiss(float *, float *); 432 extern void sse_comiss(float *, float *); 433 extern void sse_cvtss2sd(float *, double *); 621 break; 622 623 case cvtsd2siq: 624 info->op = fex_cnvt; 625 info->res.type = fex_llong; 626 sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l); 627 break; 628 #endif 629 630 case ucomisd: 631 info->op = fex_cmp; 632 info->res.type = fex_nodata; 633 sse_ucomisd(&info->op1.val.d, &info->op2.val.d); 634 break; 635 636 case comisd: 637 info->op = fex_cmp; 638 info->res.type = fex_nodata; 639 sse_comisd(&info->op1.val.d, &info->op2.val.d); 640 break; 641 } 642 } else { 643 if (inst->op == cvtsi2ss) { 644 info->op1.type = fex_int; 645 info->op1.val.i = inst->op2->i[0]; 646 info->op2.type = fex_nodata; 647 } else if (inst->op == cvtsi2ssq) { 648 info->op1.type = fex_llong; 649 info->op1.val.l = inst->op2->l[0]; 650 info->op2.type = fex_nodata; 651 } else if (inst->op == sqrtss || inst->op == cvtss2sd || 652 inst->op == cvttss2si || inst->op == cvtss2si || 653 inst->op == cvttss2siq || inst->op == cvtss2siq) { 654 info->op1.type = fex_float; 655 info->op1.val.f = inst->op2->f[0]; 656 info->op2.type = fex_nodata; 657 } else { 658 info->op1.type = fex_float; 659 info->op1.val.f = inst->op1->f[0]; 660 info->op2.type = fex_float; 774 break; 775 776 case cvtss2siq: 777 info->op = fex_cnvt; 778 info->res.type = fex_llong; 779 sse_cvtss2siq(&info->op1.val.f, &info->res.val.l); 780 break; 781 #endif 782 783 case ucomiss: 784 info->op = fex_cmp; 785 info->res.type = fex_nodata; 786 sse_ucomiss(&info->op1.val.f, &info->op2.val.f); 787 break; 788 789 case comiss: 790 info->op = fex_cmp; 791 info->res.type = fex_nodata; 792 sse_comiss(&info->op1.val.f, &info->op2.val.f); 793 break; 794 } 795 } 796 __fenv_getmxcsr(&mxcsr); 797 info->flags = mxcsr & 0x3d; 798 __fenv_setmxcsr(&oldmxcsr); 799 800 /* determine which exception would have been trapped */ 801 te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr 802 >> 7) & 0x3d; 803 e = mxcsr & te; 804 if (e & FE_INVALID) 805 return __fex_get_sse_invalid_type(inst); 806 if (e & FE_DIVBYZERO) 807 return fex_division; 808 if (e & FE_OVERFLOW) 809 return fex_overflow; 810 if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW))) 811 return fex_underflow; 812 if (e & FE_INEXACT) 813 return fex_inexact; 1066 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1067 } 1068 break; 1069 1070 case cvtps2pd: 1071 dummy.op = cvtss2sd; 1072 for (i = 0; i < 2; i++) { 1073 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1074 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1075 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1076 } 1077 break; 1078 1079 case cvtpd2ps: 1080 dummy.op = cvtsd2ss; 1081 for (i = 0; i < 2; i++) { 1082 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1083 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1084 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1085 } 1086 } 1087 } 1088 1089 /* 1090 * Store the result value from *info in the destination of the scalar 1091 * SSE instruction specified by *inst. If no result is given but the 1092 * exception is underflow or overflow, supply the default trapped result. 1093 * 1094 * This routine does not work if the instruction specified by *inst 1095 * is not a scalar instruction. 1096 */ 1097 void 1098 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e, 1099 fex_info_t *info) 1100 { 1101 int i; 1102 long long l; 1103 float f, fscl; 1104 double d, dscl; 1105 1106 /* for compares that write eflags, just set the flags 1107 to indicate "unordered" */ 1108 if (inst->op == ucomiss || inst->op == comiss || 1109 inst->op == ucomisd || inst->op == comisd) { 1110 uap->uc_mcontext.gregs[REG_PS] |= 0x45; 1111 return; 1112 } 1113 1114 /* if info doesn't specify a result value, try to generate 1115 the default trapped result */ 1116 if (info->res.type == fex_nodata) { 1117 /* set scale factors for exponent wrapping */ 1118 switch (e) { 1119 case fex_overflow: 1120 fscl = 1.262177448e-29f; /* 2^-96 */ 1121 dscl = 6.441148769597133308e-232; /* 2^-768 */ 1122 break; 1123 1124 case fex_underflow: 1211 switch (info->res.type) { 1212 case fex_int: 1213 i = info->res.val.i; 1214 break; 1215 1216 case fex_llong: 1217 i = info->res.val.l; 1218 break; 1219 1220 case fex_float: 1221 i = info->res.val.f; 1222 break; 1223 1224 case fex_double: 1225 i = info->res.val.d; 1226 break; 1227 1228 case fex_ldouble: 1229 i = info->res.val.q; 1230 break; 1231 } 1232 inst->op1->i[0] = i; 1233 } else if (inst->op == cmpsd || inst->op == cvttss2siq || 1234 inst->op == cvtss2siq || inst->op == cvttsd2siq || 1235 inst->op == cvtsd2siq) { 1236 switch (info->res.type) { 1237 case fex_int: 1238 l = info->res.val.i; 1239 break; 1240 1241 case fex_llong: 1242 l = info->res.val.l; 1243 break; 1244 1245 case fex_float: 1246 l = info->res.val.f; 1247 break; 1248 1249 case fex_double: 1250 l = info->res.val.d; 1251 break; 1252 1253 case fex_ldouble: 1254 l = info->res.val.q; 1255 break; 1256 } 1257 inst->op1->l[0] = l; 1258 } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) || 1259 inst->op == cvtss2sd) { 1260 switch (info->res.type) { 1261 case fex_int: 1262 d = info->res.val.i; 1263 break; 1264 1265 case fex_llong: 1266 d = info->res.val.l; 1267 break; 1268 1269 case fex_float: 1270 d = info->res.val.f; 1271 break; 1272 1273 case fex_double: 1274 d = info->res.val.d; 1275 break; 1276 1277 case fex_ldouble: 1278 d = info->res.val.q; 1279 break; 1280 } 1281 inst->op1->d[0] = d; 1282 } else { 1283 switch (info->res.type) { 1284 case fex_int: 1285 f = info->res.val.i; 1286 break; 1287 1288 case fex_llong: 1289 f = info->res.val.l; 1290 break; 1291 1292 case fex_float: 1293 f = info->res.val.f; 1294 break; 1295 1296 case fex_double: 1297 f = info->res.val.d; 1298 break; 1299 1300 case fex_ldouble: 1301 f = info->res.val.q; 1302 break; 1303 } 1304 inst->op1->f[0] = f; 1305 } 1306 } 1307 1308 /* 1309 * Store the results from a SIMD instruction. For each i, store 1310 * the result value from info[i] in the i-th part of the destination 1311 * of the SIMD SSE instruction specified by *inst. If no result 1312 * is given but the exception indicated by e[i] is underflow or 1313 * overflow, supply the default trapped result. 1314 * 1315 * This routine does not work if the instruction specified by *inst 1316 * is not a SIMD instruction. 1317 */ 1318 void 1319 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e, 1320 fex_info_t *info) 1321 { 1322 sseinst_t dummy; 1561 break; 1562 1563 case cvtps2pd: 1564 dummy.op = cvtss2sd; 1565 for (i = 0; i < 2; i++) { 1566 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1567 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1568 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1569 } 1570 break; 1571 1572 case cvtpd2ps: 1573 dummy.op = cvtsd2ss; 1574 for (i = 0; i < 2; i++) { 1575 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1576 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1577 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1578 } 1579 /* zero the high 64 bits of the destination */ 1580 inst->op1->l[1] = 0ll; 1581 } 1582 } | 353 /* eliminate all single-operand instructions */ 354 switch (inst->op) { 355 case cvtsd2ss: 356 case cvtss2sd: 357 /* hmm, this shouldn't have happened */ 358 return (enum fex_exception) -1; 359 360 case sqrtss: 361 case sqrtsd: 362 return fex_inv_sqrt; 363 364 case cvtss2si: 365 case cvtsd2si: 366 case cvttss2si: 367 case cvttsd2si: 368 case cvtss2siq: 369 case cvtsd2siq: 370 case cvttss2siq: 371 case cvttsd2siq: 372 return fex_inv_int; 373 default: 374 break; 375 } 376 377 /* check op1 for signaling nan */ 378 t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) : 379 my_fp_classf(&inst->op1->f[0]); 380 if (t1 == fp_signaling) 381 return fex_inv_snan; 382 383 /* check two-operand instructions for other cases */ 384 switch (inst->op) { 385 case cmpss: 386 case cmpsd: 387 case minss: 388 case minsd: 389 case maxss: 390 case maxsd: 391 case comiss: 392 case comisd: 393 return fex_inv_cmp; 394 396 case addsd: 397 case subss: 398 case subsd: 399 if (t1 == fp_infinity && t2 == fp_infinity) 400 return fex_inv_isi; 401 break; 402 403 case mulss: 404 case mulsd: 405 if ((t1 == fp_zero && t2 == fp_infinity) || 406 (t2 == fp_zero && t1 == fp_infinity)) 407 return fex_inv_zmi; 408 break; 409 410 case divss: 411 case divsd: 412 if (t1 == fp_zero && t2 == fp_zero) 413 return fex_inv_zdz; 414 if (t1 == fp_infinity && t2 == fp_infinity) 415 return fex_inv_idi; 416 default: 417 break; 418 } 419 420 return (enum fex_exception)-1; 421 } 422 423 /* inline templates */ 424 extern void sse_cmpeqss(float *, float *, int *); 425 extern void sse_cmpltss(float *, float *, int *); 426 extern void sse_cmpless(float *, float *, int *); 427 extern void sse_cmpunordss(float *, float *, int *); 428 extern void sse_minss(float *, float *, float *); 429 extern void sse_maxss(float *, float *, float *); 430 extern void sse_addss(float *, float *, float *); 431 extern void sse_subss(float *, float *, float *); 432 extern void sse_mulss(float *, float *, float *); 433 extern void sse_divss(float *, float *, float *); 434 extern void sse_sqrtss(float *, float *); 435 extern void sse_ucomiss(float *, float *); 436 extern void sse_comiss(float *, float *); 437 extern void sse_cvtss2sd(float *, double *); 625 break; 626 627 case cvtsd2siq: 628 info->op = fex_cnvt; 629 info->res.type = fex_llong; 630 sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l); 631 break; 632 #endif 633 634 case ucomisd: 635 info->op = fex_cmp; 636 info->res.type = fex_nodata; 637 sse_ucomisd(&info->op1.val.d, &info->op2.val.d); 638 break; 639 640 case comisd: 641 info->op = fex_cmp; 642 info->res.type = fex_nodata; 643 sse_comisd(&info->op1.val.d, &info->op2.val.d); 644 break; 645 default: 646 break; 647 } 648 } else { 649 if (inst->op == cvtsi2ss) { 650 info->op1.type = fex_int; 651 info->op1.val.i = inst->op2->i[0]; 652 info->op2.type = fex_nodata; 653 } else if (inst->op == cvtsi2ssq) { 654 info->op1.type = fex_llong; 655 info->op1.val.l = inst->op2->l[0]; 656 info->op2.type = fex_nodata; 657 } else if (inst->op == sqrtss || inst->op == cvtss2sd || 658 inst->op == cvttss2si || inst->op == cvtss2si || 659 inst->op == cvttss2siq || inst->op == cvtss2siq) { 660 info->op1.type = fex_float; 661 info->op1.val.f = inst->op2->f[0]; 662 info->op2.type = fex_nodata; 663 } else { 664 info->op1.type = fex_float; 665 info->op1.val.f = inst->op1->f[0]; 666 info->op2.type = fex_float; 780 break; 781 782 case cvtss2siq: 783 info->op = fex_cnvt; 784 info->res.type = fex_llong; 785 sse_cvtss2siq(&info->op1.val.f, &info->res.val.l); 786 break; 787 #endif 788 789 case ucomiss: 790 info->op = fex_cmp; 791 info->res.type = fex_nodata; 792 sse_ucomiss(&info->op1.val.f, &info->op2.val.f); 793 break; 794 795 case comiss: 796 info->op = fex_cmp; 797 info->res.type = fex_nodata; 798 sse_comiss(&info->op1.val.f, &info->op2.val.f); 799 break; 800 default: 801 break; 802 } 803 } 804 __fenv_getmxcsr(&mxcsr); 805 info->flags = mxcsr & 0x3d; 806 __fenv_setmxcsr(&oldmxcsr); 807 808 /* determine which exception would have been trapped */ 809 te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr 810 >> 7) & 0x3d; 811 e = mxcsr & te; 812 if (e & FE_INVALID) 813 return __fex_get_sse_invalid_type(inst); 814 if (e & FE_DIVBYZERO) 815 return fex_division; 816 if (e & FE_OVERFLOW) 817 return fex_overflow; 818 if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW))) 819 return fex_underflow; 820 if (e & FE_INEXACT) 821 return fex_inexact; 1074 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1075 } 1076 break; 1077 1078 case cvtps2pd: 1079 dummy.op = cvtss2sd; 1080 for (i = 0; i < 2; i++) { 1081 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1082 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1083 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1084 } 1085 break; 1086 1087 case cvtpd2ps: 1088 dummy.op = cvtsd2ss; 1089 for (i = 0; i < 2; i++) { 1090 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1091 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1092 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1093 } 1094 default: 1095 break; 1096 } 1097 } 1098 1099 /* 1100 * Store the result value from *info in the destination of the scalar 1101 * SSE instruction specified by *inst. If no result is given but the 1102 * exception is underflow or overflow, supply the default trapped result. 1103 * 1104 * This routine does not work if the instruction specified by *inst 1105 * is not a scalar instruction. 1106 */ 1107 void 1108 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e, 1109 fex_info_t *info) 1110 { 1111 int i = 0; 1112 long long l = 0L;; 1113 float f = 0.0, fscl; 1114 double d = 0.0L, dscl; 1115 1116 /* for compares that write eflags, just set the flags 1117 to indicate "unordered" */ 1118 if (inst->op == ucomiss || inst->op == comiss || 1119 inst->op == ucomisd || inst->op == comisd) { 1120 uap->uc_mcontext.gregs[REG_PS] |= 0x45; 1121 return; 1122 } 1123 1124 /* if info doesn't specify a result value, try to generate 1125 the default trapped result */ 1126 if (info->res.type == fex_nodata) { 1127 /* set scale factors for exponent wrapping */ 1128 switch (e) { 1129 case fex_overflow: 1130 fscl = 1.262177448e-29f; /* 2^-96 */ 1131 dscl = 6.441148769597133308e-232; /* 2^-768 */ 1132 break; 1133 1134 case fex_underflow: 1221 switch (info->res.type) { 1222 case fex_int: 1223 i = info->res.val.i; 1224 break; 1225 1226 case fex_llong: 1227 i = info->res.val.l; 1228 break; 1229 1230 case fex_float: 1231 i = info->res.val.f; 1232 break; 1233 1234 case fex_double: 1235 i = info->res.val.d; 1236 break; 1237 1238 case fex_ldouble: 1239 i = info->res.val.q; 1240 break; 1241 1242 default: 1243 break; 1244 } 1245 inst->op1->i[0] = i; 1246 } else if (inst->op == cmpsd || inst->op == cvttss2siq || 1247 inst->op == cvtss2siq || inst->op == cvttsd2siq || 1248 inst->op == cvtsd2siq) { 1249 switch (info->res.type) { 1250 case fex_int: 1251 l = info->res.val.i; 1252 break; 1253 1254 case fex_llong: 1255 l = info->res.val.l; 1256 break; 1257 1258 case fex_float: 1259 l = info->res.val.f; 1260 break; 1261 1262 case fex_double: 1263 l = info->res.val.d; 1264 break; 1265 1266 case fex_ldouble: 1267 l = info->res.val.q; 1268 break; 1269 1270 default: 1271 break; 1272 } 1273 inst->op1->l[0] = l; 1274 } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) || 1275 inst->op == cvtss2sd) { 1276 switch (info->res.type) { 1277 case fex_int: 1278 d = info->res.val.i; 1279 break; 1280 1281 case fex_llong: 1282 d = info->res.val.l; 1283 break; 1284 1285 case fex_float: 1286 d = info->res.val.f; 1287 break; 1288 1289 case fex_double: 1290 d = info->res.val.d; 1291 break; 1292 1293 case fex_ldouble: 1294 d = info->res.val.q; 1295 break; 1296 1297 default: 1298 break; 1299 } 1300 inst->op1->d[0] = d; 1301 } else { 1302 switch (info->res.type) { 1303 case fex_int: 1304 f = info->res.val.i; 1305 break; 1306 1307 case fex_llong: 1308 f = info->res.val.l; 1309 break; 1310 1311 case fex_float: 1312 f = info->res.val.f; 1313 break; 1314 1315 case fex_double: 1316 f = info->res.val.d; 1317 break; 1318 1319 case fex_ldouble: 1320 f = info->res.val.q; 1321 break; 1322 1323 default: 1324 break; 1325 } 1326 inst->op1->f[0] = f; 1327 } 1328 } 1329 1330 /* 1331 * Store the results from a SIMD instruction. For each i, store 1332 * the result value from info[i] in the i-th part of the destination 1333 * of the SIMD SSE instruction specified by *inst. If no result 1334 * is given but the exception indicated by e[i] is underflow or 1335 * overflow, supply the default trapped result. 1336 * 1337 * This routine does not work if the instruction specified by *inst 1338 * is not a SIMD instruction. 1339 */ 1340 void 1341 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e, 1342 fex_info_t *info) 1343 { 1344 sseinst_t dummy; 1583 break; 1584 1585 case cvtps2pd: 1586 dummy.op = cvtss2sd; 1587 for (i = 0; i < 2; i++) { 1588 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1589 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1590 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1591 } 1592 break; 1593 1594 case cvtpd2ps: 1595 dummy.op = cvtsd2ss; 1596 for (i = 0; i < 2; i++) { 1597 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1598 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1599 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1600 } 1601 /* zero the high 64 bits of the destination */ 1602 inst->op1->l[1] = 0ll; 1603 1604 default: 1605 break; 1606 } 1607 } 1608 |