1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 26 /* 27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 28 * Use is subject to license terms. 29 */ 30 31 #include <ucontext.h> 32 #include <fenv.h> 33 #if defined(__SUNPRO_C) 34 #include <sunmath.h> 35 #else 36 #include <sys/ieeefp.h> 37 #endif 38 #include "fex_handler.h" 39 #include "fenv_inlines.h" 40 41 #if !defined(REG_PC) 42 #define REG_PC EIP 43 #endif 44 45 #if !defined(REG_PS) 46 #define REG_PS EFL 47 #endif 48 49 #ifdef __amd64 50 #define regno(X) ((X < 4) ? REG_RAX - X : ((X > 4) ? REG_RAX + 1 - X : \ 51 REG_RSP)) 52 #else 53 #define regno(X) (EAX - X) 54 #endif 55 56 /* 57 * Support for SSE instructions 58 */ 59 60 /* 61 * Decode an SSE instruction. Fill in *inst and return the length of the 62 * instruction in bytes. Return 0 if the instruction is not recognized. 63 */ 64 int 65 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst) 66 { 67 unsigned char *ip; 68 char *addr; 69 int i, dbl, simd, rex, modrm, sib, r; 70 71 i = 0; 72 ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC]; 73 74 /* look for pseudo-prefixes */ 75 dbl = 0; 76 simd = SIMD; 77 78 if (ip[i] == 0xF3) { 79 simd = 0; 80 i++; 81 } else if (ip[i] == 0x66) { 82 dbl = DOUBLE; 83 i++; 84 } else if (ip[i] == 0xF2) { 85 dbl = DOUBLE; 86 simd = 0; 87 i++; 88 } 89 90 /* look for AMD64 REX prefix */ 91 rex = 0; 92 93 if (ip[i] >= 0x40 && ip[i] <= 0x4F) { 94 rex = ip[i]; 95 i++; 96 } 97 98 /* parse opcode */ 99 if (ip[i++] != 0x0F) 100 return (0); 101 102 switch (ip[i++]) { 103 case 0x2A: 104 inst->op = (int)cvtsi2ss + simd + dbl; 105 106 if (!simd) 107 inst->op = (int)inst->op + (rex & 8); 108 109 break; 110 111 case 0x2C: 112 inst->op = (int)cvttss2si + simd + dbl; 113 114 if (!simd) 115 inst->op = (int)inst->op + (rex & 8); 116 117 break; 118 119 case 0x2D: 120 inst->op = (int)cvtss2si + simd + dbl; 121 122 if (!simd) 123 inst->op = (int)inst->op + (rex & 8); 124 125 break; 126 127 case 0x2E: 128 129 /* oddball: scalar instruction in a SIMD opcode group */ 130 if (!simd) 131 return (0); 132 133 inst->op = (int)ucomiss + dbl; 134 break; 135 136 case 0x2F: 137 138 /* oddball: scalar instruction in a SIMD opcode group */ 139 if (!simd) 140 return (0); 141 142 inst->op = (int)comiss + dbl; 143 break; 144 145 case 0x51: 146 inst->op = (int)sqrtss + simd + dbl; 147 break; 148 149 case 0x58: 150 inst->op = (int)addss + simd + dbl; 151 break; 152 153 case 0x59: 154 inst->op = (int)mulss + simd + dbl; 155 break; 156 157 case 0x5A: 158 inst->op = (int)cvtss2sd + simd + dbl; 159 break; 160 161 case 0x5B: 162 163 if (dbl) { 164 if (simd) 165 inst->op = cvtps2dq; 166 else 167 return (0); 168 } else { 169 inst->op = (simd) ? cvtdq2ps : cvttps2dq; 170 } 171 172 break; 173 174 case 0x5C: 175 inst->op = (int)subss + simd + dbl; 176 break; 177 178 case 0x5D: 179 inst->op = (int)minss + simd + dbl; 180 break; 181 182 case 0x5E: 183 inst->op = (int)divss + simd + dbl; 184 break; 185 186 case 0x5F: 187 inst->op = (int)maxss + simd + dbl; 188 break; 189 190 case 0xC2: 191 inst->op = (int)cmpss + simd + dbl; 192 break; 193 194 case 0xE6: 195 196 if (simd) { 197 if (dbl) 198 inst->op = cvttpd2dq; 199 else 200 return (0); 201 } else { 202 inst->op = (dbl) ? cvtpd2dq : cvtdq2pd; 203 } 204 205 break; 206 207 default: 208 return (0); 209 } 210 211 /* locate operands */ 212 modrm = ip[i++]; 213 214 if (inst->op == cvtss2si || inst->op == cvttss2si || inst->op == 215 cvtsd2si || inst->op == cvttsd2si || inst->op == cvtss2siq || 216 inst->op == cvttss2siq || inst->op == cvtsd2siq || inst->op == 217 cvttsd2siq) { 218 /* op1 is a gp register */ 219 r = ((rex & 4) << 1) | ((modrm >> 3) & 7); 220 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)]; 221 } else if (inst->op == cvtps2pi || inst->op == cvttps2pi || inst->op == 222 cvtpd2pi || inst->op == cvttpd2pi) { 223 /* op1 is a mmx register */ 224 #ifdef __amd64 225 inst->op1 = (sseoperand_t *) 226 &uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state 227 .st[(modrm >> 3) & 7]; 228 #else 229 inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) + 230 (char *)&uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state 231 .state[7]); 232 #endif 233 } else { 234 /* op1 is a xmm register */ 235 r = ((rex & 4) << 1) | ((modrm >> 3) & 7); 236 inst->op1 = 237 (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set 238 .fpchip_state.xmm[r]; 239 } 240 241 if ((modrm >> 6) == 3) { 242 if (inst->op == cvtsi2ss || inst->op == cvtsi2sd || inst->op == 243 cvtsi2ssq || inst->op == cvtsi2sdq) { 244 /* op2 is a gp register */ 245 r = ((rex & 1) << 3) | (modrm & 7); 246 inst->op2 = 247 (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)]; 248 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) { 249 /* op2 is a mmx register */ 250 #ifdef __amd64 251 inst->op2 = 252 (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set 253 .fpchip_state.st[modrm & 7]; 254 #else 255 inst->op2 = (sseoperand_t *)(10 * (modrm & 7) + 256 (char *)&uap->uc_mcontext.fpregs.fp_reg_set 257 .fpchip_state.state[7]); 258 #endif 259 } else { 260 /* op2 is a xmm register */ 261 r = ((rex & 1) << 3) | (modrm & 7); 262 inst->op2 = 263 (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set 264 .fpchip_state.xmm[r]; 265 } 266 } else if ((modrm & 0xc7) == 0x05) { 267 #ifdef __amd64 268 /* address of next instruction + offset */ 269 r = i + 4; 270 271 if (inst->op == cmpss || inst->op == cmpps || inst->op == 272 cmpsd || inst->op == cmppd) 273 r++; 274 275 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i)); 276 #else 277 /* absolute address */ 278 inst->op2 = (sseoperand_t *)(*(int *)(ip + i)); 279 #endif 280 i += 4; 281 } else { 282 /* complex address */ 283 if ((modrm & 7) == 4) { 284 /* parse sib byte */ 285 sib = ip[i++]; 286 287 if ((sib & 7) == 5 && (modrm >> 6) == 0) { 288 /* start with absolute address */ 289 addr = (char *)(uintptr_t)(*(int *)(ip + i)); 290 i += 4; 291 } else { 292 /* start with base */ 293 r = ((rex & 1) << 3) | (sib & 7); 294 addr = (char *)uap->uc_mcontext.gregs[regno(r)]; 295 } 296 297 r = ((rex & 2) << 2) | ((sib >> 3) & 7); 298 299 if (r != 4) { 300 /* add scaled index */ 301 addr += uap->uc_mcontext.gregs[regno(r)] << 302 (sib >> 6); 303 } 304 } else { 305 r = ((rex & 1) << 3) | (modrm & 7); 306 addr = (char *)uap->uc_mcontext.gregs[regno(r)]; 307 } 308 309 /* add displacement, if any */ 310 if ((modrm >> 6) == 1) { 311 addr += (char)ip[i++]; 312 } else if ((modrm >> 6) == 2) { 313 addr += *(int *)(ip + i); 314 i += 4; 315 } 316 317 inst->op2 = (sseoperand_t *)addr; 318 } 319 320 if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd || 321 inst->op == cmppd) { 322 /* get the immediate operand */ 323 inst->imm = ip[i++]; 324 } 325 326 return (i); 327 } 328 329 static enum fp_class_type 330 my_fp_classf(float *x) 331 { 332 int i = *(int *)x & ~0x80000000; 333 334 if (i < 0x7f800000) { 335 if (i < 0x00800000) 336 return ((i == 0) ? fp_zero : fp_subnormal); 337 338 return (fp_normal); 339 } else if (i == 0x7f800000) { 340 return (fp_infinity); 341 } else if (i & 0x400000) { 342 return (fp_quiet); 343 } else { 344 return (fp_signaling); 345 } 346 } 347 348 static enum fp_class_type 349 my_fp_class(double *x) 350 { 351 int i = *(1 + (int *)x) & ~0x80000000; 352 353 if (i < 0x7ff00000) { 354 if (i < 0x00100000) 355 return (((i | *(int *)x) == 0) ? fp_zero : 356 fp_subnormal); 357 358 return (fp_normal); 359 } else if (i == 0x7ff00000 && *(int *)x == 0) { 360 return (fp_infinity); 361 } else if (i & 0x80000) { 362 return (fp_quiet); 363 } else { 364 return (fp_signaling); 365 } 366 } 367 368 /* 369 * Inspect a scalar SSE instruction that incurred an invalid operation 370 * exception to determine which type of exception it was. 371 */ 372 static enum fex_exception 373 __fex_get_sse_invalid_type(sseinst_t *inst) 374 { 375 enum fp_class_type t1, t2; 376 377 /* check op2 for signaling nan */ 378 t2 = ((int)inst->op & DOUBLE) ? my_fp_class(&inst->op2->d[0]) : 379 my_fp_classf(&inst->op2->f[0]); 380 381 if (t2 == fp_signaling) 382 return (fex_inv_snan); 383 384 /* eliminate all single-operand instructions */ 385 switch (inst->op) { 386 case cvtsd2ss: 387 case cvtss2sd: 388 /* hmm, this shouldn't have happened */ 389 return ((enum fex_exception)-1); 390 391 case sqrtss: 392 case sqrtsd: 393 return (fex_inv_sqrt); 394 395 case cvtss2si: 396 case cvtsd2si: 397 case cvttss2si: 398 case cvttsd2si: 399 case cvtss2siq: 400 case cvtsd2siq: 401 case cvttss2siq: 402 case cvttsd2siq: 403 return (fex_inv_int); 404 default: 405 break; 406 } 407 408 /* check op1 for signaling nan */ 409 t1 = ((int)inst->op & DOUBLE) ? my_fp_class(&inst->op1->d[0]) : 410 my_fp_classf(&inst->op1->f[0]); 411 412 if (t1 == fp_signaling) 413 return (fex_inv_snan); 414 415 /* check two-operand instructions for other cases */ 416 switch (inst->op) { 417 case cmpss: 418 case cmpsd: 419 case minss: 420 case minsd: 421 case maxss: 422 case maxsd: 423 case comiss: 424 case comisd: 425 return (fex_inv_cmp); 426 427 case addss: 428 case addsd: 429 case subss: 430 case subsd: 431 432 if (t1 == fp_infinity && t2 == fp_infinity) 433 return (fex_inv_isi); 434 435 break; 436 437 case mulss: 438 case mulsd: 439 440 if ((t1 == fp_zero && t2 == fp_infinity) || (t2 == fp_zero && 441 t1 == fp_infinity)) 442 return (fex_inv_zmi); 443 444 break; 445 446 case divss: 447 case divsd: 448 449 if (t1 == fp_zero && t2 == fp_zero) 450 return (fex_inv_zdz); 451 452 if (t1 == fp_infinity && t2 == fp_infinity) 453 return (fex_inv_idi); 454 455 default: 456 break; 457 } 458 459 return ((enum fex_exception)-1); 460 } 461 462 /* inline templates */ 463 extern void sse_cmpeqss(float *, float *, int *); 464 extern void sse_cmpltss(float *, float *, int *); 465 extern void sse_cmpless(float *, float *, int *); 466 extern void sse_cmpunordss(float *, float *, int *); 467 extern void sse_minss(float *, float *, float *); 468 extern void sse_maxss(float *, float *, float *); 469 extern void sse_addss(float *, float *, float *); 470 extern void sse_subss(float *, float *, float *); 471 extern void sse_mulss(float *, float *, float *); 472 extern void sse_divss(float *, float *, float *); 473 extern void sse_sqrtss(float *, float *); 474 extern void sse_ucomiss(float *, float *); 475 extern void sse_comiss(float *, float *); 476 extern void sse_cvtss2sd(float *, double *); 477 extern void sse_cvtsi2ss(int *, float *); 478 extern void sse_cvttss2si(float *, int *); 479 extern void sse_cvtss2si(float *, int *); 480 481 #ifdef __amd64 482 extern void sse_cvtsi2ssq(long long *, float *); 483 extern void sse_cvttss2siq(float *, long long *); 484 extern void sse_cvtss2siq(float *, long long *); 485 #endif 486 487 extern void sse_cmpeqsd(double *, double *, long long *); 488 extern void sse_cmpltsd(double *, double *, long long *); 489 extern void sse_cmplesd(double *, double *, long long *); 490 extern void sse_cmpunordsd(double *, double *, long long *); 491 extern void sse_minsd(double *, double *, double *); 492 extern void sse_maxsd(double *, double *, double *); 493 extern void sse_addsd(double *, double *, double *); 494 extern void sse_subsd(double *, double *, double *); 495 extern void sse_mulsd(double *, double *, double *); 496 extern void sse_divsd(double *, double *, double *); 497 extern void sse_sqrtsd(double *, double *); 498 extern void sse_ucomisd(double *, double *); 499 extern void sse_comisd(double *, double *); 500 extern void sse_cvtsd2ss(double *, float *); 501 extern void sse_cvtsi2sd(int *, double *); 502 extern void sse_cvttsd2si(double *, int *); 503 extern void sse_cvtsd2si(double *, int *); 504 505 #ifdef __amd64 506 extern void sse_cvtsi2sdq(long long *, double *); 507 extern void sse_cvttsd2siq(double *, long long *); 508 extern void sse_cvtsd2siq(double *, long long *); 509 #endif 510 511 /* 512 * Fill in *info with the operands, default untrapped result, and 513 * flags produced by a scalar SSE instruction, and return the type 514 * of trapped exception (if any). On entry, the mxcsr must have 515 * all exceptions masked and all flags clear. The same conditions 516 * will hold on exit. 517 * 518 * This routine does not work if the instruction specified by *inst 519 * is not a scalar instruction. 520 */ 521 enum fex_exception 522 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info) 523 { 524 unsigned int e, te, mxcsr, oldmxcsr, subnorm; 525 526 /* 527 * Perform the operation with traps disabled and check the 528 * exception flags. If the underflow trap was enabled, also 529 * check for an exact subnormal result. 530 */ 531 __fenv_getmxcsr(&oldmxcsr); 532 subnorm = 0; 533 534 if ((int)inst->op & DOUBLE) { 535 if (inst->op == cvtsi2sd) { 536 info->op1.type = fex_int; 537 info->op1.val.i = inst->op2->i[0]; 538 info->op2.type = fex_nodata; 539 } else if (inst->op == cvtsi2sdq) { 540 info->op1.type = fex_llong; 541 info->op1.val.l = inst->op2->l[0]; 542 info->op2.type = fex_nodata; 543 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss || 544 inst->op == cvttsd2si || inst->op == cvtsd2si || inst->op == 545 cvttsd2siq || inst->op == cvtsd2siq) { 546 info->op1.type = fex_double; 547 info->op1.val.d = inst->op2->d[0]; 548 info->op2.type = fex_nodata; 549 } else { 550 info->op1.type = fex_double; 551 info->op1.val.d = inst->op1->d[0]; 552 info->op2.type = fex_double; 553 info->op2.val.d = inst->op2->d[0]; 554 } 555 556 info->res.type = fex_double; 557 558 switch (inst->op) { 559 case cmpsd: 560 info->op = fex_cmp; 561 info->res.type = fex_llong; 562 563 switch (inst->imm & 3) { 564 case 0: 565 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d, 566 &info->res.val.l); 567 break; 568 569 case 1: 570 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d, 571 &info->res.val.l); 572 break; 573 574 case 2: 575 sse_cmplesd(&info->op1.val.d, &info->op2.val.d, 576 &info->res.val.l); 577 break; 578 579 case 3: 580 sse_cmpunordsd(&info->op1.val.d, 581 &info->op2.val.d, &info->res.val.l); 582 } 583 584 if (inst->imm & 4) 585 info->res.val.l ^= 0xffffffffffffffffull; 586 587 break; 588 589 case minsd: 590 info->op = fex_other; 591 sse_minsd(&info->op1.val.d, &info->op2.val.d, 592 &info->res.val.d); 593 break; 594 595 case maxsd: 596 info->op = fex_other; 597 sse_maxsd(&info->op1.val.d, &info->op2.val.d, 598 &info->res.val.d); 599 break; 600 601 case addsd: 602 info->op = fex_add; 603 sse_addsd(&info->op1.val.d, &info->op2.val.d, 604 &info->res.val.d); 605 606 if (my_fp_class(&info->res.val.d) == fp_subnormal) 607 subnorm = 1; 608 609 break; 610 611 case subsd: 612 info->op = fex_sub; 613 sse_subsd(&info->op1.val.d, &info->op2.val.d, 614 &info->res.val.d); 615 616 if (my_fp_class(&info->res.val.d) == fp_subnormal) 617 subnorm = 1; 618 619 break; 620 621 case mulsd: 622 info->op = fex_mul; 623 sse_mulsd(&info->op1.val.d, &info->op2.val.d, 624 &info->res.val.d); 625 626 if (my_fp_class(&info->res.val.d) == fp_subnormal) 627 subnorm = 1; 628 629 break; 630 631 case divsd: 632 info->op = fex_div; 633 sse_divsd(&info->op1.val.d, &info->op2.val.d, 634 &info->res.val.d); 635 636 if (my_fp_class(&info->res.val.d) == fp_subnormal) 637 subnorm = 1; 638 639 break; 640 641 case sqrtsd: 642 info->op = fex_sqrt; 643 sse_sqrtsd(&info->op1.val.d, &info->res.val.d); 644 break; 645 646 case cvtsd2ss: 647 info->op = fex_cnvt; 648 info->res.type = fex_float; 649 sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f); 650 651 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 652 subnorm = 1; 653 654 break; 655 656 case cvtsi2sd: 657 info->op = fex_cnvt; 658 sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d); 659 break; 660 661 case cvttsd2si: 662 info->op = fex_cnvt; 663 info->res.type = fex_int; 664 sse_cvttsd2si(&info->op1.val.d, &info->res.val.i); 665 break; 666 667 case cvtsd2si: 668 info->op = fex_cnvt; 669 info->res.type = fex_int; 670 sse_cvtsd2si(&info->op1.val.d, &info->res.val.i); 671 break; 672 673 #ifdef __amd64 674 case cvtsi2sdq: 675 info->op = fex_cnvt; 676 sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d); 677 break; 678 679 case cvttsd2siq: 680 info->op = fex_cnvt; 681 info->res.type = fex_llong; 682 sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l); 683 break; 684 685 case cvtsd2siq: 686 info->op = fex_cnvt; 687 info->res.type = fex_llong; 688 sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l); 689 break; 690 #endif 691 692 case ucomisd: 693 info->op = fex_cmp; 694 info->res.type = fex_nodata; 695 sse_ucomisd(&info->op1.val.d, &info->op2.val.d); 696 break; 697 698 case comisd: 699 info->op = fex_cmp; 700 info->res.type = fex_nodata; 701 sse_comisd(&info->op1.val.d, &info->op2.val.d); 702 break; 703 default: 704 break; 705 } 706 } else { 707 if (inst->op == cvtsi2ss) { 708 info->op1.type = fex_int; 709 info->op1.val.i = inst->op2->i[0]; 710 info->op2.type = fex_nodata; 711 } else if (inst->op == cvtsi2ssq) { 712 info->op1.type = fex_llong; 713 info->op1.val.l = inst->op2->l[0]; 714 info->op2.type = fex_nodata; 715 } else if (inst->op == sqrtss || inst->op == cvtss2sd || 716 inst->op == cvttss2si || inst->op == cvtss2si || inst->op == 717 cvttss2siq || inst->op == cvtss2siq) { 718 info->op1.type = fex_float; 719 info->op1.val.f = inst->op2->f[0]; 720 info->op2.type = fex_nodata; 721 } else { 722 info->op1.type = fex_float; 723 info->op1.val.f = inst->op1->f[0]; 724 info->op2.type = fex_float; 725 info->op2.val.f = inst->op2->f[0]; 726 } 727 728 info->res.type = fex_float; 729 730 switch (inst->op) { 731 case cmpss: 732 info->op = fex_cmp; 733 info->res.type = fex_int; 734 735 switch (inst->imm & 3) { 736 case 0: 737 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f, 738 &info->res.val.i); 739 break; 740 741 case 1: 742 sse_cmpltss(&info->op1.val.f, &info->op2.val.f, 743 &info->res.val.i); 744 break; 745 746 case 2: 747 sse_cmpless(&info->op1.val.f, &info->op2.val.f, 748 &info->res.val.i); 749 break; 750 751 case 3: 752 sse_cmpunordss(&info->op1.val.f, 753 &info->op2.val.f, &info->res.val.i); 754 } 755 756 if (inst->imm & 4) 757 info->res.val.i ^= 0xffffffffu; 758 759 break; 760 761 case minss: 762 info->op = fex_other; 763 sse_minss(&info->op1.val.f, &info->op2.val.f, 764 &info->res.val.f); 765 break; 766 767 case maxss: 768 info->op = fex_other; 769 sse_maxss(&info->op1.val.f, &info->op2.val.f, 770 &info->res.val.f); 771 break; 772 773 case addss: 774 info->op = fex_add; 775 sse_addss(&info->op1.val.f, &info->op2.val.f, 776 &info->res.val.f); 777 778 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 779 subnorm = 1; 780 781 break; 782 783 case subss: 784 info->op = fex_sub; 785 sse_subss(&info->op1.val.f, &info->op2.val.f, 786 &info->res.val.f); 787 788 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 789 subnorm = 1; 790 791 break; 792 793 case mulss: 794 info->op = fex_mul; 795 sse_mulss(&info->op1.val.f, &info->op2.val.f, 796 &info->res.val.f); 797 798 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 799 subnorm = 1; 800 801 break; 802 803 case divss: 804 info->op = fex_div; 805 sse_divss(&info->op1.val.f, &info->op2.val.f, 806 &info->res.val.f); 807 808 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 809 subnorm = 1; 810 811 break; 812 813 case sqrtss: 814 info->op = fex_sqrt; 815 sse_sqrtss(&info->op1.val.f, &info->res.val.f); 816 break; 817 818 case cvtss2sd: 819 info->op = fex_cnvt; 820 info->res.type = fex_double; 821 sse_cvtss2sd(&info->op1.val.f, &info->res.val.d); 822 break; 823 824 case cvtsi2ss: 825 info->op = fex_cnvt; 826 sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f); 827 break; 828 829 case cvttss2si: 830 info->op = fex_cnvt; 831 info->res.type = fex_int; 832 sse_cvttss2si(&info->op1.val.f, &info->res.val.i); 833 break; 834 835 case cvtss2si: 836 info->op = fex_cnvt; 837 info->res.type = fex_int; 838 sse_cvtss2si(&info->op1.val.f, &info->res.val.i); 839 break; 840 841 #ifdef __amd64 842 case cvtsi2ssq: 843 info->op = fex_cnvt; 844 sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f); 845 break; 846 847 case cvttss2siq: 848 info->op = fex_cnvt; 849 info->res.type = fex_llong; 850 sse_cvttss2siq(&info->op1.val.f, &info->res.val.l); 851 break; 852 853 case cvtss2siq: 854 info->op = fex_cnvt; 855 info->res.type = fex_llong; 856 sse_cvtss2siq(&info->op1.val.f, &info->res.val.l); 857 break; 858 #endif 859 860 case ucomiss: 861 info->op = fex_cmp; 862 info->res.type = fex_nodata; 863 sse_ucomiss(&info->op1.val.f, &info->op2.val.f); 864 break; 865 866 case comiss: 867 info->op = fex_cmp; 868 info->res.type = fex_nodata; 869 sse_comiss(&info->op1.val.f, &info->op2.val.f); 870 break; 871 default: 872 break; 873 } 874 } 875 876 __fenv_getmxcsr(&mxcsr); 877 info->flags = mxcsr & 0x3d; 878 __fenv_setmxcsr(&oldmxcsr); 879 880 /* determine which exception would have been trapped */ 881 te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr >> 7) & 882 0x3d; 883 e = mxcsr & te; 884 885 if (e & FE_INVALID) 886 return (__fex_get_sse_invalid_type(inst)); 887 888 if (e & FE_DIVBYZERO) 889 return (fex_division); 890 891 if (e & FE_OVERFLOW) 892 return (fex_overflow); 893 894 if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW))) 895 return (fex_underflow); 896 897 if (e & FE_INEXACT) 898 return (fex_inexact); 899 900 return ((enum fex_exception)-1); 901 } 902 903 /* 904 * Emulate a SIMD SSE instruction to determine which exceptions occur 905 * in each part. For i = 0, 1, 2, and 3, set e[i] to indicate the 906 * trapped exception that would occur if the i-th part of the SIMD 907 * instruction were executed in isolation; set e[i] to -1 if no 908 * trapped exception would occur in this part. Also fill in info[i] 909 * with the corresponding operands, default untrapped result, and 910 * flags. 911 * 912 * This routine does not work if the instruction specified by *inst 913 * is not a SIMD instruction. 914 */ 915 void 916 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e, 917 fex_info_t *info) 918 { 919 sseinst_t dummy; 920 int i; 921 922 e[0] = e[1] = e[2] = e[3] = -1; 923 924 /* perform each part of the SIMD operation */ 925 switch (inst->op) { 926 case cmpps: 927 dummy.op = cmpss; 928 dummy.imm = inst->imm; 929 930 for (i = 0; i < 4; i++) { 931 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 932 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 933 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 934 } 935 936 break; 937 938 case minps: 939 dummy.op = minss; 940 941 for (i = 0; i < 4; i++) { 942 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 943 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 944 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 945 } 946 947 break; 948 949 case maxps: 950 dummy.op = maxss; 951 952 for (i = 0; i < 4; i++) { 953 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 954 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 955 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 956 } 957 958 break; 959 960 case addps: 961 dummy.op = addss; 962 963 for (i = 0; i < 4; i++) { 964 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 965 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 966 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 967 } 968 969 break; 970 971 case subps: 972 dummy.op = subss; 973 974 for (i = 0; i < 4; i++) { 975 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 976 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 977 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 978 } 979 980 break; 981 982 case mulps: 983 dummy.op = mulss; 984 985 for (i = 0; i < 4; i++) { 986 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 987 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 988 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 989 } 990 991 break; 992 993 case divps: 994 dummy.op = divss; 995 996 for (i = 0; i < 4; i++) { 997 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 998 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 999 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1000 } 1001 1002 break; 1003 1004 case sqrtps: 1005 dummy.op = sqrtss; 1006 1007 for (i = 0; i < 4; i++) { 1008 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1009 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1010 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1011 } 1012 1013 break; 1014 1015 case cvtdq2ps: 1016 dummy.op = cvtsi2ss; 1017 1018 for (i = 0; i < 4; i++) { 1019 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1020 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 1021 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1022 } 1023 1024 break; 1025 1026 case cvttps2dq: 1027 dummy.op = cvttss2si; 1028 1029 for (i = 0; i < 4; i++) { 1030 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1031 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1032 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1033 } 1034 1035 break; 1036 1037 case cvtps2dq: 1038 dummy.op = cvtss2si; 1039 1040 for (i = 0; i < 4; i++) { 1041 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1042 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1043 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1044 } 1045 1046 break; 1047 1048 case cvtpi2ps: 1049 dummy.op = cvtsi2ss; 1050 1051 for (i = 0; i < 2; i++) { 1052 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1053 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 1054 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1055 } 1056 1057 break; 1058 1059 case cvttps2pi: 1060 dummy.op = cvttss2si; 1061 1062 for (i = 0; i < 2; i++) { 1063 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1064 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1065 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1066 } 1067 1068 break; 1069 1070 case cvtps2pi: 1071 dummy.op = cvtss2si; 1072 1073 for (i = 0; i < 2; i++) { 1074 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1075 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1076 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1077 } 1078 1079 break; 1080 1081 case cmppd: 1082 dummy.op = cmpsd; 1083 dummy.imm = inst->imm; 1084 1085 for (i = 0; i < 2; i++) { 1086 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1087 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1088 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1089 } 1090 1091 break; 1092 1093 case minpd: 1094 dummy.op = minsd; 1095 1096 for (i = 0; i < 2; i++) { 1097 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1098 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1099 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1100 } 1101 1102 break; 1103 1104 case maxpd: 1105 dummy.op = maxsd; 1106 1107 for (i = 0; i < 2; i++) { 1108 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1109 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1110 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1111 } 1112 1113 break; 1114 1115 case addpd: 1116 dummy.op = addsd; 1117 1118 for (i = 0; i < 2; i++) { 1119 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1120 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1121 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1122 } 1123 1124 break; 1125 1126 case subpd: 1127 dummy.op = subsd; 1128 1129 for (i = 0; i < 2; i++) { 1130 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1131 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1132 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1133 } 1134 1135 break; 1136 1137 case mulpd: 1138 dummy.op = mulsd; 1139 1140 for (i = 0; i < 2; i++) { 1141 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1142 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1143 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1144 } 1145 1146 break; 1147 1148 case divpd: 1149 dummy.op = divsd; 1150 1151 for (i = 0; i < 2; i++) { 1152 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1153 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1154 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1155 } 1156 1157 break; 1158 1159 case sqrtpd: 1160 dummy.op = sqrtsd; 1161 1162 for (i = 0; i < 2; i++) { 1163 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1164 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1165 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1166 } 1167 1168 break; 1169 1170 case cvtpi2pd: 1171 case cvtdq2pd: 1172 dummy.op = cvtsi2sd; 1173 1174 for (i = 0; i < 2; i++) { 1175 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1176 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 1177 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1178 } 1179 1180 break; 1181 1182 case cvttpd2pi: 1183 case cvttpd2dq: 1184 dummy.op = cvttsd2si; 1185 1186 for (i = 0; i < 2; i++) { 1187 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1188 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1189 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1190 } 1191 1192 break; 1193 1194 case cvtpd2pi: 1195 case cvtpd2dq: 1196 dummy.op = cvtsd2si; 1197 1198 for (i = 0; i < 2; i++) { 1199 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1200 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1201 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1202 } 1203 1204 break; 1205 1206 case cvtps2pd: 1207 dummy.op = cvtss2sd; 1208 1209 for (i = 0; i < 2; i++) { 1210 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1211 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1212 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1213 } 1214 1215 break; 1216 1217 case cvtpd2ps: 1218 dummy.op = cvtsd2ss; 1219 1220 for (i = 0; i < 2; i++) { 1221 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1222 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1223 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1224 } 1225 1226 default: 1227 break; 1228 } 1229 } 1230 1231 /* 1232 * Store the result value from *info in the destination of the scalar 1233 * SSE instruction specified by *inst. If no result is given but the 1234 * exception is underflow or overflow, supply the default trapped result. 1235 * 1236 * This routine does not work if the instruction specified by *inst 1237 * is not a scalar instruction. 1238 */ 1239 void 1240 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e, 1241 fex_info_t *info) 1242 { 1243 int i = 0; 1244 long long l = 0L; 1245 float f = 0.0, fscl; 1246 double d = 0.0L, dscl; 1247 1248 /* 1249 * for compares that write eflags, just set the flags 1250 * to indicate "unordered" 1251 */ 1252 if (inst->op == ucomiss || inst->op == comiss || inst->op == ucomisd || 1253 inst->op == comisd) { 1254 uap->uc_mcontext.gregs[REG_PS] |= 0x45; 1255 return; 1256 } 1257 1258 /* 1259 * if info doesn't specify a result value, try to generate 1260 * the default trapped result 1261 */ 1262 if (info->res.type == fex_nodata) { 1263 /* set scale factors for exponent wrapping */ 1264 switch (e) { 1265 case fex_overflow: 1266 fscl = 1.262177448e-29f; /* 2^-96 */ 1267 dscl = 6.441148769597133308e-232; /* 2^-768 */ 1268 break; 1269 1270 case fex_underflow: 1271 fscl = 7.922816251e+28f; /* 2^96 */ 1272 dscl = 1.552518092300708935e+231; /* 2^768 */ 1273 break; 1274 1275 default: 1276 (void) __fex_get_sse_op(uap, inst, info); 1277 1278 if (info->res.type == fex_nodata) 1279 return; 1280 1281 goto stuff; 1282 } 1283 1284 /* generate the wrapped result */ 1285 if (inst->op == cvtsd2ss) { 1286 info->op1.type = fex_double; 1287 info->op1.val.d = inst->op2->d[0]; 1288 info->op2.type = fex_nodata; 1289 info->res.type = fex_float; 1290 info->res.val.f = (float)(fscl * (fscl * 1291 info->op1.val.d)); 1292 } else if ((int)inst->op & DOUBLE) { 1293 info->op1.type = fex_double; 1294 info->op1.val.d = inst->op1->d[0]; 1295 info->op2.type = fex_double; 1296 info->op2.val.d = inst->op2->d[0]; 1297 info->res.type = fex_double; 1298 1299 switch (inst->op) { 1300 case addsd: 1301 info->res.val.d = dscl * (dscl * 1302 info->op1.val.d + dscl * info->op2.val.d); 1303 break; 1304 1305 case subsd: 1306 info->res.val.d = dscl * (dscl * 1307 info->op1.val.d - dscl * info->op2.val.d); 1308 break; 1309 1310 case mulsd: 1311 info->res.val.d = (dscl * info->op1.val.d) * 1312 (dscl * info->op2.val.d); 1313 break; 1314 1315 case divsd: 1316 info->res.val.d = (dscl * info->op1.val.d) / 1317 (info->op2.val.d / dscl); 1318 break; 1319 1320 default: 1321 return; 1322 } 1323 } else { 1324 info->op1.type = fex_float; 1325 info->op1.val.f = inst->op1->f[0]; 1326 info->op2.type = fex_float; 1327 info->op2.val.f = inst->op2->f[0]; 1328 info->res.type = fex_float; 1329 1330 switch (inst->op) { 1331 case addss: 1332 info->res.val.f = fscl * (fscl * 1333 info->op1.val.f + fscl * info->op2.val.f); 1334 break; 1335 1336 case subss: 1337 info->res.val.f = fscl * (fscl * 1338 info->op1.val.f - fscl * info->op2.val.f); 1339 break; 1340 1341 case mulss: 1342 info->res.val.f = (fscl * info->op1.val.f) * 1343 (fscl * info->op2.val.f); 1344 break; 1345 1346 case divss: 1347 info->res.val.f = (fscl * info->op1.val.f) / 1348 (info->op2.val.f / fscl); 1349 break; 1350 1351 default: 1352 return; 1353 } 1354 } 1355 } 1356 1357 /* put the result in the destination */ 1358 stuff: 1359 if (inst->op == cmpss || inst->op == cvttss2si || inst->op == 1360 cvtss2si || inst->op == cvttsd2si || inst->op == cvtsd2si) { 1361 switch (info->res.type) { 1362 case fex_int: 1363 i = info->res.val.i; 1364 break; 1365 1366 case fex_llong: 1367 i = info->res.val.l; 1368 break; 1369 1370 case fex_float: 1371 i = info->res.val.f; 1372 break; 1373 1374 case fex_double: 1375 i = info->res.val.d; 1376 break; 1377 1378 case fex_ldouble: 1379 i = info->res.val.q; 1380 break; 1381 1382 default: 1383 break; 1384 } 1385 1386 inst->op1->i[0] = i; 1387 } else if (inst->op == cmpsd || inst->op == cvttss2siq || inst->op == 1388 cvtss2siq || inst->op == cvttsd2siq || inst->op == cvtsd2siq) { 1389 switch (info->res.type) { 1390 case fex_int: 1391 l = info->res.val.i; 1392 break; 1393 1394 case fex_llong: 1395 l = info->res.val.l; 1396 break; 1397 1398 case fex_float: 1399 l = info->res.val.f; 1400 break; 1401 1402 case fex_double: 1403 l = info->res.val.d; 1404 break; 1405 1406 case fex_ldouble: 1407 l = info->res.val.q; 1408 break; 1409 1410 default: 1411 break; 1412 } 1413 1414 inst->op1->l[0] = l; 1415 } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) || 1416 inst->op == cvtss2sd) { 1417 switch (info->res.type) { 1418 case fex_int: 1419 d = info->res.val.i; 1420 break; 1421 1422 case fex_llong: 1423 d = info->res.val.l; 1424 break; 1425 1426 case fex_float: 1427 d = info->res.val.f; 1428 break; 1429 1430 case fex_double: 1431 d = info->res.val.d; 1432 break; 1433 1434 case fex_ldouble: 1435 d = info->res.val.q; 1436 break; 1437 1438 default: 1439 break; 1440 } 1441 1442 inst->op1->d[0] = d; 1443 } else { 1444 switch (info->res.type) { 1445 case fex_int: 1446 f = info->res.val.i; 1447 break; 1448 1449 case fex_llong: 1450 f = info->res.val.l; 1451 break; 1452 1453 case fex_float: 1454 f = info->res.val.f; 1455 break; 1456 1457 case fex_double: 1458 f = info->res.val.d; 1459 break; 1460 1461 case fex_ldouble: 1462 f = info->res.val.q; 1463 break; 1464 1465 default: 1466 break; 1467 } 1468 1469 inst->op1->f[0] = f; 1470 } 1471 } 1472 1473 /* 1474 * Store the results from a SIMD instruction. For each i, store 1475 * the result value from info[i] in the i-th part of the destination 1476 * of the SIMD SSE instruction specified by *inst. If no result 1477 * is given but the exception indicated by e[i] is underflow or 1478 * overflow, supply the default trapped result. 1479 * 1480 * This routine does not work if the instruction specified by *inst 1481 * is not a SIMD instruction. 1482 */ 1483 void 1484 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e, 1485 fex_info_t *info) 1486 { 1487 sseinst_t dummy; 1488 int i; 1489 1490 /* store each part */ 1491 switch (inst->op) { 1492 case cmpps: 1493 dummy.op = cmpss; 1494 dummy.imm = inst->imm; 1495 1496 for (i = 0; i < 4; i++) { 1497 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1498 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1499 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1500 } 1501 1502 break; 1503 1504 case minps: 1505 dummy.op = minss; 1506 1507 for (i = 0; i < 4; i++) { 1508 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1509 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1510 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1511 } 1512 1513 break; 1514 1515 case maxps: 1516 dummy.op = maxss; 1517 1518 for (i = 0; i < 4; i++) { 1519 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1520 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1521 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1522 } 1523 1524 break; 1525 1526 case addps: 1527 dummy.op = addss; 1528 1529 for (i = 0; i < 4; i++) { 1530 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1531 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1532 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1533 } 1534 1535 break; 1536 1537 case subps: 1538 dummy.op = subss; 1539 1540 for (i = 0; i < 4; i++) { 1541 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1542 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1543 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1544 } 1545 1546 break; 1547 1548 case mulps: 1549 dummy.op = mulss; 1550 1551 for (i = 0; i < 4; i++) { 1552 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1553 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1554 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1555 } 1556 1557 break; 1558 1559 case divps: 1560 dummy.op = divss; 1561 1562 for (i = 0; i < 4; i++) { 1563 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1564 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1565 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1566 } 1567 1568 break; 1569 1570 case sqrtps: 1571 dummy.op = sqrtss; 1572 1573 for (i = 0; i < 4; i++) { 1574 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1575 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1576 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1577 } 1578 1579 break; 1580 1581 case cvtdq2ps: 1582 dummy.op = cvtsi2ss; 1583 1584 for (i = 0; i < 4; i++) { 1585 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1586 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 1587 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1588 } 1589 1590 break; 1591 1592 case cvttps2dq: 1593 dummy.op = cvttss2si; 1594 1595 for (i = 0; i < 4; i++) { 1596 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1597 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1598 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1599 } 1600 1601 break; 1602 1603 case cvtps2dq: 1604 dummy.op = cvtss2si; 1605 1606 for (i = 0; i < 4; i++) { 1607 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1608 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1609 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1610 } 1611 1612 break; 1613 1614 case cvtpi2ps: 1615 dummy.op = cvtsi2ss; 1616 1617 for (i = 0; i < 2; i++) { 1618 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1619 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 1620 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1621 } 1622 1623 break; 1624 1625 case cvttps2pi: 1626 dummy.op = cvttss2si; 1627 1628 for (i = 0; i < 2; i++) { 1629 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1630 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1631 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1632 } 1633 1634 break; 1635 1636 case cvtps2pi: 1637 dummy.op = cvtss2si; 1638 1639 for (i = 0; i < 2; i++) { 1640 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1641 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1642 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1643 } 1644 1645 break; 1646 1647 case cmppd: 1648 dummy.op = cmpsd; 1649 dummy.imm = inst->imm; 1650 1651 for (i = 0; i < 2; i++) { 1652 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1653 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1654 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1655 } 1656 1657 break; 1658 1659 case minpd: 1660 dummy.op = minsd; 1661 1662 for (i = 0; i < 2; i++) { 1663 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1664 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1665 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1666 } 1667 1668 break; 1669 1670 case maxpd: 1671 dummy.op = maxsd; 1672 1673 for (i = 0; i < 2; i++) { 1674 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1675 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1676 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1677 } 1678 1679 break; 1680 1681 case addpd: 1682 dummy.op = addsd; 1683 1684 for (i = 0; i < 2; i++) { 1685 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1686 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1687 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1688 } 1689 1690 break; 1691 1692 case subpd: 1693 dummy.op = subsd; 1694 1695 for (i = 0; i < 2; i++) { 1696 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1697 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1698 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1699 } 1700 1701 break; 1702 1703 case mulpd: 1704 dummy.op = mulsd; 1705 1706 for (i = 0; i < 2; i++) { 1707 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1708 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1709 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1710 } 1711 1712 break; 1713 1714 case divpd: 1715 dummy.op = divsd; 1716 1717 for (i = 0; i < 2; i++) { 1718 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1719 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1720 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1721 } 1722 1723 break; 1724 1725 case sqrtpd: 1726 dummy.op = sqrtsd; 1727 1728 for (i = 0; i < 2; i++) { 1729 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1730 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1731 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1732 } 1733 1734 break; 1735 1736 case cvtpi2pd: 1737 case cvtdq2pd: 1738 dummy.op = cvtsi2sd; 1739 1740 for (i = 0; i < 2; i++) { 1741 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1742 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 1743 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1744 } 1745 1746 break; 1747 1748 case cvttpd2pi: 1749 case cvttpd2dq: 1750 dummy.op = cvttsd2si; 1751 1752 for (i = 0; i < 2; i++) { 1753 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1754 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1755 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1756 } 1757 1758 /* for cvttpd2dq, zero the high 64 bits of the destination */ 1759 if (inst->op == cvttpd2dq) 1760 inst->op1->l[1] = 0ll; 1761 1762 break; 1763 1764 case cvtpd2pi: 1765 case cvtpd2dq: 1766 dummy.op = cvtsd2si; 1767 1768 for (i = 0; i < 2; i++) { 1769 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1770 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1771 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1772 } 1773 1774 /* for cvtpd2dq, zero the high 64 bits of the destination */ 1775 if (inst->op == cvtpd2dq) 1776 inst->op1->l[1] = 0ll; 1777 1778 break; 1779 1780 case cvtps2pd: 1781 dummy.op = cvtss2sd; 1782 1783 for (i = 0; i < 2; i++) { 1784 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1785 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1786 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1787 } 1788 1789 break; 1790 1791 case cvtpd2ps: 1792 dummy.op = cvtsd2ss; 1793 1794 for (i = 0; i < 2; i++) { 1795 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1796 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1797 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1798 } 1799 1800 /* zero the high 64 bits of the destination */ 1801 inst->op1->l[1] = 0ll; 1802 1803 default: 1804 break; 1805 } 1806 }