353 /* eliminate all single-operand instructions */
354 switch (inst->op) {
355 case cvtsd2ss:
356 case cvtss2sd:
357 /* hmm, this shouldn't have happened */
358 return (enum fex_exception) -1;
359
360 case sqrtss:
361 case sqrtsd:
362 return fex_inv_sqrt;
363
364 case cvtss2si:
365 case cvtsd2si:
366 case cvttss2si:
367 case cvttsd2si:
368 case cvtss2siq:
369 case cvtsd2siq:
370 case cvttss2siq:
371 case cvttsd2siq:
372 return fex_inv_int;
373 }
374
375 /* check op1 for signaling nan */
376 t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) :
377 my_fp_classf(&inst->op1->f[0]);
378 if (t1 == fp_signaling)
379 return fex_inv_snan;
380
381 /* check two-operand instructions for other cases */
382 switch (inst->op) {
383 case cmpss:
384 case cmpsd:
385 case minss:
386 case minsd:
387 case maxss:
388 case maxsd:
389 case comiss:
390 case comisd:
391 return fex_inv_cmp;
392
394 case addsd:
395 case subss:
396 case subsd:
397 if (t1 == fp_infinity && t2 == fp_infinity)
398 return fex_inv_isi;
399 break;
400
401 case mulss:
402 case mulsd:
403 if ((t1 == fp_zero && t2 == fp_infinity) ||
404 (t2 == fp_zero && t1 == fp_infinity))
405 return fex_inv_zmi;
406 break;
407
408 case divss:
409 case divsd:
410 if (t1 == fp_zero && t2 == fp_zero)
411 return fex_inv_zdz;
412 if (t1 == fp_infinity && t2 == fp_infinity)
413 return fex_inv_idi;
414 }
415
416 return (enum fex_exception)-1;
417 }
418
419 /* inline templates */
420 extern void sse_cmpeqss(float *, float *, int *);
421 extern void sse_cmpltss(float *, float *, int *);
422 extern void sse_cmpless(float *, float *, int *);
423 extern void sse_cmpunordss(float *, float *, int *);
424 extern void sse_minss(float *, float *, float *);
425 extern void sse_maxss(float *, float *, float *);
426 extern void sse_addss(float *, float *, float *);
427 extern void sse_subss(float *, float *, float *);
428 extern void sse_mulss(float *, float *, float *);
429 extern void sse_divss(float *, float *, float *);
430 extern void sse_sqrtss(float *, float *);
431 extern void sse_ucomiss(float *, float *);
432 extern void sse_comiss(float *, float *);
433 extern void sse_cvtss2sd(float *, double *);
621 break;
622
623 case cvtsd2siq:
624 info->op = fex_cnvt;
625 info->res.type = fex_llong;
626 sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l);
627 break;
628 #endif
629
630 case ucomisd:
631 info->op = fex_cmp;
632 info->res.type = fex_nodata;
633 sse_ucomisd(&info->op1.val.d, &info->op2.val.d);
634 break;
635
636 case comisd:
637 info->op = fex_cmp;
638 info->res.type = fex_nodata;
639 sse_comisd(&info->op1.val.d, &info->op2.val.d);
640 break;
641 }
642 } else {
643 if (inst->op == cvtsi2ss) {
644 info->op1.type = fex_int;
645 info->op1.val.i = inst->op2->i[0];
646 info->op2.type = fex_nodata;
647 } else if (inst->op == cvtsi2ssq) {
648 info->op1.type = fex_llong;
649 info->op1.val.l = inst->op2->l[0];
650 info->op2.type = fex_nodata;
651 } else if (inst->op == sqrtss || inst->op == cvtss2sd ||
652 inst->op == cvttss2si || inst->op == cvtss2si ||
653 inst->op == cvttss2siq || inst->op == cvtss2siq) {
654 info->op1.type = fex_float;
655 info->op1.val.f = inst->op2->f[0];
656 info->op2.type = fex_nodata;
657 } else {
658 info->op1.type = fex_float;
659 info->op1.val.f = inst->op1->f[0];
660 info->op2.type = fex_float;
774 break;
775
776 case cvtss2siq:
777 info->op = fex_cnvt;
778 info->res.type = fex_llong;
779 sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
780 break;
781 #endif
782
783 case ucomiss:
784 info->op = fex_cmp;
785 info->res.type = fex_nodata;
786 sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
787 break;
788
789 case comiss:
790 info->op = fex_cmp;
791 info->res.type = fex_nodata;
792 sse_comiss(&info->op1.val.f, &info->op2.val.f);
793 break;
794 }
795 }
796 __fenv_getmxcsr(&mxcsr);
797 info->flags = mxcsr & 0x3d;
798 __fenv_setmxcsr(&oldmxcsr);
799
800 /* determine which exception would have been trapped */
801 te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr
802 >> 7) & 0x3d;
803 e = mxcsr & te;
804 if (e & FE_INVALID)
805 return __fex_get_sse_invalid_type(inst);
806 if (e & FE_DIVBYZERO)
807 return fex_division;
808 if (e & FE_OVERFLOW)
809 return fex_overflow;
810 if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
811 return fex_underflow;
812 if (e & FE_INEXACT)
813 return fex_inexact;
1066 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1067 }
1068 break;
1069
1070 case cvtps2pd:
1071 dummy.op = cvtss2sd;
1072 for (i = 0; i < 2; i++) {
1073 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1074 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1075 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1076 }
1077 break;
1078
1079 case cvtpd2ps:
1080 dummy.op = cvtsd2ss;
1081 for (i = 0; i < 2; i++) {
1082 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1083 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1084 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1085 }
1086 }
1087 }
1088
1089 /*
1090 * Store the result value from *info in the destination of the scalar
1091 * SSE instruction specified by *inst. If no result is given but the
1092 * exception is underflow or overflow, supply the default trapped result.
1093 *
1094 * This routine does not work if the instruction specified by *inst
1095 * is not a scalar instruction.
1096 */
1097 void
1098 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1099 fex_info_t *info)
1100 {
1101 int i;
1102 long long l;
1103 float f, fscl;
1104 double d, dscl;
1105
1106 /* for compares that write eflags, just set the flags
1107 to indicate "unordered" */
1108 if (inst->op == ucomiss || inst->op == comiss ||
1109 inst->op == ucomisd || inst->op == comisd) {
1110 uap->uc_mcontext.gregs[REG_PS] |= 0x45;
1111 return;
1112 }
1113
1114 /* if info doesn't specify a result value, try to generate
1115 the default trapped result */
1116 if (info->res.type == fex_nodata) {
1117 /* set scale factors for exponent wrapping */
1118 switch (e) {
1119 case fex_overflow:
1120 fscl = 1.262177448e-29f; /* 2^-96 */
1121 dscl = 6.441148769597133308e-232; /* 2^-768 */
1122 break;
1123
1124 case fex_underflow:
1211 switch (info->res.type) {
1212 case fex_int:
1213 i = info->res.val.i;
1214 break;
1215
1216 case fex_llong:
1217 i = info->res.val.l;
1218 break;
1219
1220 case fex_float:
1221 i = info->res.val.f;
1222 break;
1223
1224 case fex_double:
1225 i = info->res.val.d;
1226 break;
1227
1228 case fex_ldouble:
1229 i = info->res.val.q;
1230 break;
1231 }
1232 inst->op1->i[0] = i;
1233 } else if (inst->op == cmpsd || inst->op == cvttss2siq ||
1234 inst->op == cvtss2siq || inst->op == cvttsd2siq ||
1235 inst->op == cvtsd2siq) {
1236 switch (info->res.type) {
1237 case fex_int:
1238 l = info->res.val.i;
1239 break;
1240
1241 case fex_llong:
1242 l = info->res.val.l;
1243 break;
1244
1245 case fex_float:
1246 l = info->res.val.f;
1247 break;
1248
1249 case fex_double:
1250 l = info->res.val.d;
1251 break;
1252
1253 case fex_ldouble:
1254 l = info->res.val.q;
1255 break;
1256 }
1257 inst->op1->l[0] = l;
1258 } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1259 inst->op == cvtss2sd) {
1260 switch (info->res.type) {
1261 case fex_int:
1262 d = info->res.val.i;
1263 break;
1264
1265 case fex_llong:
1266 d = info->res.val.l;
1267 break;
1268
1269 case fex_float:
1270 d = info->res.val.f;
1271 break;
1272
1273 case fex_double:
1274 d = info->res.val.d;
1275 break;
1276
1277 case fex_ldouble:
1278 d = info->res.val.q;
1279 break;
1280 }
1281 inst->op1->d[0] = d;
1282 } else {
1283 switch (info->res.type) {
1284 case fex_int:
1285 f = info->res.val.i;
1286 break;
1287
1288 case fex_llong:
1289 f = info->res.val.l;
1290 break;
1291
1292 case fex_float:
1293 f = info->res.val.f;
1294 break;
1295
1296 case fex_double:
1297 f = info->res.val.d;
1298 break;
1299
1300 case fex_ldouble:
1301 f = info->res.val.q;
1302 break;
1303 }
1304 inst->op1->f[0] = f;
1305 }
1306 }
1307
1308 /*
1309 * Store the results from a SIMD instruction. For each i, store
1310 * the result value from info[i] in the i-th part of the destination
1311 * of the SIMD SSE instruction specified by *inst. If no result
1312 * is given but the exception indicated by e[i] is underflow or
1313 * overflow, supply the default trapped result.
1314 *
1315 * This routine does not work if the instruction specified by *inst
1316 * is not a SIMD instruction.
1317 */
1318 void
1319 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1320 fex_info_t *info)
1321 {
1322 sseinst_t dummy;
1561 break;
1562
1563 case cvtps2pd:
1564 dummy.op = cvtss2sd;
1565 for (i = 0; i < 2; i++) {
1566 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1567 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1568 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1569 }
1570 break;
1571
1572 case cvtpd2ps:
1573 dummy.op = cvtsd2ss;
1574 for (i = 0; i < 2; i++) {
1575 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1576 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1577 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1578 }
1579 /* zero the high 64 bits of the destination */
1580 inst->op1->l[1] = 0ll;
1581 }
1582 }
|
353 /* eliminate all single-operand instructions */
354 switch (inst->op) {
355 case cvtsd2ss:
356 case cvtss2sd:
357 /* hmm, this shouldn't have happened */
358 return (enum fex_exception) -1;
359
360 case sqrtss:
361 case sqrtsd:
362 return fex_inv_sqrt;
363
364 case cvtss2si:
365 case cvtsd2si:
366 case cvttss2si:
367 case cvttsd2si:
368 case cvtss2siq:
369 case cvtsd2siq:
370 case cvttss2siq:
371 case cvttsd2siq:
372 return fex_inv_int;
373 default:
374 break;
375 }
376
377 /* check op1 for signaling nan */
378 t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) :
379 my_fp_classf(&inst->op1->f[0]);
380 if (t1 == fp_signaling)
381 return fex_inv_snan;
382
383 /* check two-operand instructions for other cases */
384 switch (inst->op) {
385 case cmpss:
386 case cmpsd:
387 case minss:
388 case minsd:
389 case maxss:
390 case maxsd:
391 case comiss:
392 case comisd:
393 return fex_inv_cmp;
394
396 case addsd:
397 case subss:
398 case subsd:
399 if (t1 == fp_infinity && t2 == fp_infinity)
400 return fex_inv_isi;
401 break;
402
403 case mulss:
404 case mulsd:
405 if ((t1 == fp_zero && t2 == fp_infinity) ||
406 (t2 == fp_zero && t1 == fp_infinity))
407 return fex_inv_zmi;
408 break;
409
410 case divss:
411 case divsd:
412 if (t1 == fp_zero && t2 == fp_zero)
413 return fex_inv_zdz;
414 if (t1 == fp_infinity && t2 == fp_infinity)
415 return fex_inv_idi;
416 default:
417 break;
418 }
419
420 return (enum fex_exception)-1;
421 }
422
423 /* inline templates */
424 extern void sse_cmpeqss(float *, float *, int *);
425 extern void sse_cmpltss(float *, float *, int *);
426 extern void sse_cmpless(float *, float *, int *);
427 extern void sse_cmpunordss(float *, float *, int *);
428 extern void sse_minss(float *, float *, float *);
429 extern void sse_maxss(float *, float *, float *);
430 extern void sse_addss(float *, float *, float *);
431 extern void sse_subss(float *, float *, float *);
432 extern void sse_mulss(float *, float *, float *);
433 extern void sse_divss(float *, float *, float *);
434 extern void sse_sqrtss(float *, float *);
435 extern void sse_ucomiss(float *, float *);
436 extern void sse_comiss(float *, float *);
437 extern void sse_cvtss2sd(float *, double *);
625 break;
626
627 case cvtsd2siq:
628 info->op = fex_cnvt;
629 info->res.type = fex_llong;
630 sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l);
631 break;
632 #endif
633
634 case ucomisd:
635 info->op = fex_cmp;
636 info->res.type = fex_nodata;
637 sse_ucomisd(&info->op1.val.d, &info->op2.val.d);
638 break;
639
640 case comisd:
641 info->op = fex_cmp;
642 info->res.type = fex_nodata;
643 sse_comisd(&info->op1.val.d, &info->op2.val.d);
644 break;
645 default:
646 break;
647 }
648 } else {
649 if (inst->op == cvtsi2ss) {
650 info->op1.type = fex_int;
651 info->op1.val.i = inst->op2->i[0];
652 info->op2.type = fex_nodata;
653 } else if (inst->op == cvtsi2ssq) {
654 info->op1.type = fex_llong;
655 info->op1.val.l = inst->op2->l[0];
656 info->op2.type = fex_nodata;
657 } else if (inst->op == sqrtss || inst->op == cvtss2sd ||
658 inst->op == cvttss2si || inst->op == cvtss2si ||
659 inst->op == cvttss2siq || inst->op == cvtss2siq) {
660 info->op1.type = fex_float;
661 info->op1.val.f = inst->op2->f[0];
662 info->op2.type = fex_nodata;
663 } else {
664 info->op1.type = fex_float;
665 info->op1.val.f = inst->op1->f[0];
666 info->op2.type = fex_float;
780 break;
781
782 case cvtss2siq:
783 info->op = fex_cnvt;
784 info->res.type = fex_llong;
785 sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
786 break;
787 #endif
788
789 case ucomiss:
790 info->op = fex_cmp;
791 info->res.type = fex_nodata;
792 sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
793 break;
794
795 case comiss:
796 info->op = fex_cmp;
797 info->res.type = fex_nodata;
798 sse_comiss(&info->op1.val.f, &info->op2.val.f);
799 break;
800 default:
801 break;
802 }
803 }
804 __fenv_getmxcsr(&mxcsr);
805 info->flags = mxcsr & 0x3d;
806 __fenv_setmxcsr(&oldmxcsr);
807
808 /* determine which exception would have been trapped */
809 te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr
810 >> 7) & 0x3d;
811 e = mxcsr & te;
812 if (e & FE_INVALID)
813 return __fex_get_sse_invalid_type(inst);
814 if (e & FE_DIVBYZERO)
815 return fex_division;
816 if (e & FE_OVERFLOW)
817 return fex_overflow;
818 if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
819 return fex_underflow;
820 if (e & FE_INEXACT)
821 return fex_inexact;
1074 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1075 }
1076 break;
1077
1078 case cvtps2pd:
1079 dummy.op = cvtss2sd;
1080 for (i = 0; i < 2; i++) {
1081 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1082 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1083 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1084 }
1085 break;
1086
1087 case cvtpd2ps:
1088 dummy.op = cvtsd2ss;
1089 for (i = 0; i < 2; i++) {
1090 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1091 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1092 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1093 }
1094 default:
1095 break;
1096 }
1097 }
1098
1099 /*
1100 * Store the result value from *info in the destination of the scalar
1101 * SSE instruction specified by *inst. If no result is given but the
1102 * exception is underflow or overflow, supply the default trapped result.
1103 *
1104 * This routine does not work if the instruction specified by *inst
1105 * is not a scalar instruction.
1106 */
1107 void
1108 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1109 fex_info_t *info)
1110 {
1111 int i = 0;
1112 long long l = 0L;;
1113 float f = 0.0, fscl;
1114 double d = 0.0L, dscl;
1115
1116 /* for compares that write eflags, just set the flags
1117 to indicate "unordered" */
1118 if (inst->op == ucomiss || inst->op == comiss ||
1119 inst->op == ucomisd || inst->op == comisd) {
1120 uap->uc_mcontext.gregs[REG_PS] |= 0x45;
1121 return;
1122 }
1123
1124 /* if info doesn't specify a result value, try to generate
1125 the default trapped result */
1126 if (info->res.type == fex_nodata) {
1127 /* set scale factors for exponent wrapping */
1128 switch (e) {
1129 case fex_overflow:
1130 fscl = 1.262177448e-29f; /* 2^-96 */
1131 dscl = 6.441148769597133308e-232; /* 2^-768 */
1132 break;
1133
1134 case fex_underflow:
1221 switch (info->res.type) {
1222 case fex_int:
1223 i = info->res.val.i;
1224 break;
1225
1226 case fex_llong:
1227 i = info->res.val.l;
1228 break;
1229
1230 case fex_float:
1231 i = info->res.val.f;
1232 break;
1233
1234 case fex_double:
1235 i = info->res.val.d;
1236 break;
1237
1238 case fex_ldouble:
1239 i = info->res.val.q;
1240 break;
1241
1242 default:
1243 break;
1244 }
1245 inst->op1->i[0] = i;
1246 } else if (inst->op == cmpsd || inst->op == cvttss2siq ||
1247 inst->op == cvtss2siq || inst->op == cvttsd2siq ||
1248 inst->op == cvtsd2siq) {
1249 switch (info->res.type) {
1250 case fex_int:
1251 l = info->res.val.i;
1252 break;
1253
1254 case fex_llong:
1255 l = info->res.val.l;
1256 break;
1257
1258 case fex_float:
1259 l = info->res.val.f;
1260 break;
1261
1262 case fex_double:
1263 l = info->res.val.d;
1264 break;
1265
1266 case fex_ldouble:
1267 l = info->res.val.q;
1268 break;
1269
1270 default:
1271 break;
1272 }
1273 inst->op1->l[0] = l;
1274 } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1275 inst->op == cvtss2sd) {
1276 switch (info->res.type) {
1277 case fex_int:
1278 d = info->res.val.i;
1279 break;
1280
1281 case fex_llong:
1282 d = info->res.val.l;
1283 break;
1284
1285 case fex_float:
1286 d = info->res.val.f;
1287 break;
1288
1289 case fex_double:
1290 d = info->res.val.d;
1291 break;
1292
1293 case fex_ldouble:
1294 d = info->res.val.q;
1295 break;
1296
1297 default:
1298 break;
1299 }
1300 inst->op1->d[0] = d;
1301 } else {
1302 switch (info->res.type) {
1303 case fex_int:
1304 f = info->res.val.i;
1305 break;
1306
1307 case fex_llong:
1308 f = info->res.val.l;
1309 break;
1310
1311 case fex_float:
1312 f = info->res.val.f;
1313 break;
1314
1315 case fex_double:
1316 f = info->res.val.d;
1317 break;
1318
1319 case fex_ldouble:
1320 f = info->res.val.q;
1321 break;
1322
1323 default:
1324 break;
1325 }
1326 inst->op1->f[0] = f;
1327 }
1328 }
1329
1330 /*
1331 * Store the results from a SIMD instruction. For each i, store
1332 * the result value from info[i] in the i-th part of the destination
1333 * of the SIMD SSE instruction specified by *inst. If no result
1334 * is given but the exception indicated by e[i] is underflow or
1335 * overflow, supply the default trapped result.
1336 *
1337 * This routine does not work if the instruction specified by *inst
1338 * is not a SIMD instruction.
1339 */
1340 void
1341 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1342 fex_info_t *info)
1343 {
1344 sseinst_t dummy;
1583 break;
1584
1585 case cvtps2pd:
1586 dummy.op = cvtss2sd;
1587 for (i = 0; i < 2; i++) {
1588 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1589 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1590 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1591 }
1592 break;
1593
1594 case cvtpd2ps:
1595 dummy.op = cvtsd2ss;
1596 for (i = 0; i < 2; i++) {
1597 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1598 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1599 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1600 }
1601 /* zero the high 64 bits of the destination */
1602 inst->op1->l[1] = 0ll;
1603
1604 default:
1605 break;
1606 }
1607 }
1608
|