1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #include "fenv_synonyms.h" 31 #include <ucontext.h> 32 #include <fenv.h> 33 #if defined(__SUNPRO_C) 34 #include <sunmath.h> 35 #else 36 #include <sys/ieeefp.h> 37 #endif 38 #include "fex_handler.h" 39 #include "fenv_inlines.h" 40 41 #if !defined(REG_PC) 42 #define REG_PC EIP 43 #endif 44 45 #if !defined(REG_PS) 46 #define REG_PS EFL 47 #endif 48 49 #ifdef __amd64 50 #define regno(X) ((X < 4)? REG_RAX - X : \ 51 ((X > 4)? REG_RAX + 1 - X : REG_RSP)) 52 #else 53 #define regno(X) (EAX - X) 54 #endif 55 56 /* 57 * Support for SSE instructions 58 */ 59 60 /* 61 * Decode an SSE instruction. Fill in *inst and return the length of the 62 * instruction in bytes. Return 0 if the instruction is not recognized. 63 */ 64 int 65 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst) 66 { 67 unsigned char *ip; 68 char *addr; 69 int i, dbl, simd, rex, modrm, sib, r; 70 71 i = 0; 72 ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC]; 73 74 /* look for pseudo-prefixes */ 75 dbl = 0; 76 simd = SIMD; 77 if (ip[i] == 0xF3) { 78 simd = 0; 79 i++; 80 } else if (ip[i] == 0x66) { 81 dbl = DOUBLE; 82 i++; 83 } else if (ip[i] == 0xF2) { 84 dbl = DOUBLE; 85 simd = 0; 86 i++; 87 } 88 89 /* look for AMD64 REX prefix */ 90 rex = 0; 91 if (ip[i] >= 0x40 && ip[i] <= 0x4F) { 92 rex = ip[i]; 93 i++; 94 } 95 96 /* parse opcode */ 97 if (ip[i++] != 0x0F) 98 return 0; 99 switch (ip[i++]) { 100 case 0x2A: 101 inst->op = (int)cvtsi2ss + simd + dbl; 102 if (!simd) 103 inst->op = (int)inst->op + (rex & 8); 104 break; 105 106 case 0x2C: 107 inst->op = (int)cvttss2si + simd + dbl; 108 if (!simd) 109 inst->op = (int)inst->op + (rex & 8); 110 break; 111 112 case 0x2D: 113 inst->op = (int)cvtss2si + simd + dbl; 114 if (!simd) 115 inst->op = (int)inst->op + (rex & 8); 116 break; 117 118 case 0x2E: 119 /* oddball: scalar instruction in a SIMD opcode group */ 120 if (!simd) 121 return 0; 122 inst->op = (int)ucomiss + dbl; 123 break; 124 125 case 0x2F: 126 /* oddball: scalar instruction in a SIMD opcode group */ 127 if (!simd) 128 return 0; 129 inst->op = (int)comiss + dbl; 130 break; 131 132 case 0x51: 133 inst->op = (int)sqrtss + simd + dbl; 134 break; 135 136 case 0x58: 137 inst->op = (int)addss + simd + dbl; 138 break; 139 140 case 0x59: 141 inst->op = (int)mulss + simd + dbl; 142 break; 143 144 case 0x5A: 145 inst->op = (int)cvtss2sd + simd + dbl; 146 break; 147 148 case 0x5B: 149 if (dbl) { 150 if (simd) 151 inst->op = cvtps2dq; 152 else 153 return 0; 154 } else { 155 inst->op = (simd)? cvtdq2ps : cvttps2dq; 156 } 157 break; 158 159 case 0x5C: 160 inst->op = (int)subss + simd + dbl; 161 break; 162 163 case 0x5D: 164 inst->op = (int)minss + simd + dbl; 165 break; 166 167 case 0x5E: 168 inst->op = (int)divss + simd + dbl; 169 break; 170 171 case 0x5F: 172 inst->op = (int)maxss + simd + dbl; 173 break; 174 175 case 0xC2: 176 inst->op = (int)cmpss + simd + dbl; 177 break; 178 179 case 0xE6: 180 if (simd) { 181 if (dbl) 182 inst->op = cvttpd2dq; 183 else 184 return 0; 185 } else { 186 inst->op = (dbl)? cvtpd2dq : cvtdq2pd; 187 } 188 break; 189 190 default: 191 return 0; 192 } 193 194 /* locate operands */ 195 modrm = ip[i++]; 196 197 if (inst->op == cvtss2si || inst->op == cvttss2si || 198 inst->op == cvtsd2si || inst->op == cvttsd2si || 199 inst->op == cvtss2siq || inst->op == cvttss2siq || 200 inst->op == cvtsd2siq || inst->op == cvttsd2siq) { 201 /* op1 is a gp register */ 202 r = ((rex & 4) << 1) | ((modrm >> 3) & 7); 203 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)]; 204 } else if (inst->op == cvtps2pi || inst->op == cvttps2pi || 205 inst->op == cvtpd2pi || inst->op == cvttpd2pi) { 206 /* op1 is a mmx register */ 207 #ifdef __amd64 208 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set. 209 fpchip_state.st[(modrm >> 3) & 7]; 210 #else 211 inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) + 212 (char *)&uap->uc_mcontext.fpregs.fp_reg_set. 213 fpchip_state.state[7]); 214 #endif 215 } else { 216 /* op1 is a xmm register */ 217 r = ((rex & 4) << 1) | ((modrm >> 3) & 7); 218 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs. 219 fp_reg_set.fpchip_state.xmm[r]; 220 } 221 222 if ((modrm >> 6) == 3) { 223 if (inst->op == cvtsi2ss || inst->op == cvtsi2sd || 224 inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) { 225 /* op2 is a gp register */ 226 r = ((rex & 1) << 3) | (modrm & 7); 227 inst->op2 = (sseoperand_t *)&uap->uc_mcontext. 228 gregs[regno(r)]; 229 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) { 230 /* op2 is a mmx register */ 231 #ifdef __amd64 232 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs. 233 fp_reg_set.fpchip_state.st[modrm & 7]; 234 #else 235 inst->op2 = (sseoperand_t *)(10 * (modrm & 7) + 236 (char *)&uap->uc_mcontext.fpregs.fp_reg_set. 237 fpchip_state.state[7]); 238 #endif 239 } else { 240 /* op2 is a xmm register */ 241 r = ((rex & 1) << 3) | (modrm & 7); 242 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs. 243 fp_reg_set.fpchip_state.xmm[r]; 244 } 245 } else if ((modrm & 0xc7) == 0x05) { 246 #if defined(__amd64) 247 /* address of next instruction + offset */ 248 r = i + 4; 249 if (inst->op == cmpss || inst->op == cmpps || 250 inst->op == cmpsd || inst->op == cmppd) 251 r++; 252 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i)); 253 #else 254 /* absolute address */ 255 inst->op2 = (sseoperand_t *)(*(int *)(ip + i)); 256 #endif 257 i += 4; 258 } else { 259 /* complex address */ 260 if ((modrm & 7) == 4) { 261 /* parse sib byte */ 262 sib = ip[i++]; 263 if ((sib & 7) == 5 && (modrm >> 6) == 0) { 264 /* start with absolute address */ 265 addr = (char *)(uintptr_t)(ip + i); 266 i += 4; 267 } else { 268 /* start with base */ 269 r = ((rex & 1) << 3) | (sib & 7); 270 addr = (char *)uap->uc_mcontext.gregs[regno(r)]; 271 } 272 r = ((rex & 2) << 2) | ((sib >> 3) & 7); 273 if (r != 4) { 274 /* add scaled index */ 275 addr += uap->uc_mcontext.gregs[regno(r)] 276 << (sib >> 6); 277 } 278 } else { 279 r = ((rex & 1) << 3) | (modrm & 7); 280 addr = (char *)uap->uc_mcontext.gregs[regno(r)]; 281 } 282 283 /* add displacement, if any */ 284 if ((modrm >> 6) == 1) { 285 addr += (char)ip[i++]; 286 } else if ((modrm >> 6) == 2) { 287 addr += *(int *)(ip + i); 288 i += 4; 289 } 290 inst->op2 = (sseoperand_t *)addr; 291 } 292 293 if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd || 294 inst->op == cmppd) { 295 /* get the immediate operand */ 296 inst->imm = ip[i++]; 297 } 298 299 return i; 300 } 301 302 static enum fp_class_type 303 my_fp_classf(float *x) 304 { 305 int i = *(int *)x & ~0x80000000; 306 307 if (i < 0x7f800000) { 308 if (i < 0x00800000) 309 return ((i == 0)? fp_zero : fp_subnormal); 310 return fp_normal; 311 } 312 else if (i == 0x7f800000) 313 return fp_infinity; 314 else if (i & 0x400000) 315 return fp_quiet; 316 else 317 return fp_signaling; 318 } 319 320 static enum fp_class_type 321 my_fp_class(double *x) 322 { 323 int i = *(1+(int *)x) & ~0x80000000; 324 325 if (i < 0x7ff00000) { 326 if (i < 0x00100000) 327 return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal); 328 return fp_normal; 329 } 330 else if (i == 0x7ff00000 && *(int *)x == 0) 331 return fp_infinity; 332 else if (i & 0x80000) 333 return fp_quiet; 334 else 335 return fp_signaling; 336 } 337 338 /* 339 * Inspect a scalar SSE instruction that incurred an invalid operation 340 * exception to determine which type of exception it was. 341 */ 342 static enum fex_exception 343 __fex_get_sse_invalid_type(sseinst_t *inst) 344 { 345 enum fp_class_type t1, t2; 346 347 /* check op2 for signaling nan */ 348 t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) : 349 my_fp_classf(&inst->op2->f[0]); 350 if (t2 == fp_signaling) 351 return fex_inv_snan; 352 353 /* eliminate all single-operand instructions */ 354 switch (inst->op) { 355 case cvtsd2ss: 356 case cvtss2sd: 357 /* hmm, this shouldn't have happened */ 358 return (enum fex_exception) -1; 359 360 case sqrtss: 361 case sqrtsd: 362 return fex_inv_sqrt; 363 364 case cvtss2si: 365 case cvtsd2si: 366 case cvttss2si: 367 case cvttsd2si: 368 case cvtss2siq: 369 case cvtsd2siq: 370 case cvttss2siq: 371 case cvttsd2siq: 372 return fex_inv_int; 373 } 374 375 /* check op1 for signaling nan */ 376 t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) : 377 my_fp_classf(&inst->op1->f[0]); 378 if (t1 == fp_signaling) 379 return fex_inv_snan; 380 381 /* check two-operand instructions for other cases */ 382 switch (inst->op) { 383 case cmpss: 384 case cmpsd: 385 case minss: 386 case minsd: 387 case maxss: 388 case maxsd: 389 case comiss: 390 case comisd: 391 return fex_inv_cmp; 392 393 case addss: 394 case addsd: 395 case subss: 396 case subsd: 397 if (t1 == fp_infinity && t2 == fp_infinity) 398 return fex_inv_isi; 399 break; 400 401 case mulss: 402 case mulsd: 403 if ((t1 == fp_zero && t2 == fp_infinity) || 404 (t2 == fp_zero && t1 == fp_infinity)) 405 return fex_inv_zmi; 406 break; 407 408 case divss: 409 case divsd: 410 if (t1 == fp_zero && t2 == fp_zero) 411 return fex_inv_zdz; 412 if (t1 == fp_infinity && t2 == fp_infinity) 413 return fex_inv_idi; 414 } 415 416 return (enum fex_exception)-1; 417 } 418 419 /* inline templates */ 420 extern void sse_cmpeqss(float *, float *, int *); 421 extern void sse_cmpltss(float *, float *, int *); 422 extern void sse_cmpless(float *, float *, int *); 423 extern void sse_cmpunordss(float *, float *, int *); 424 extern void sse_minss(float *, float *, float *); 425 extern void sse_maxss(float *, float *, float *); 426 extern void sse_addss(float *, float *, float *); 427 extern void sse_subss(float *, float *, float *); 428 extern void sse_mulss(float *, float *, float *); 429 extern void sse_divss(float *, float *, float *); 430 extern void sse_sqrtss(float *, float *); 431 extern void sse_ucomiss(float *, float *); 432 extern void sse_comiss(float *, float *); 433 extern void sse_cvtss2sd(float *, double *); 434 extern void sse_cvtsi2ss(int *, float *); 435 extern void sse_cvttss2si(float *, int *); 436 extern void sse_cvtss2si(float *, int *); 437 #ifdef __amd64 438 extern void sse_cvtsi2ssq(long long *, float *); 439 extern void sse_cvttss2siq(float *, long long *); 440 extern void sse_cvtss2siq(float *, long long *); 441 #endif 442 extern void sse_cmpeqsd(double *, double *, long long *); 443 extern void sse_cmpltsd(double *, double *, long long *); 444 extern void sse_cmplesd(double *, double *, long long *); 445 extern void sse_cmpunordsd(double *, double *, long long *); 446 extern void sse_minsd(double *, double *, double *); 447 extern void sse_maxsd(double *, double *, double *); 448 extern void sse_addsd(double *, double *, double *); 449 extern void sse_subsd(double *, double *, double *); 450 extern void sse_mulsd(double *, double *, double *); 451 extern void sse_divsd(double *, double *, double *); 452 extern void sse_sqrtsd(double *, double *); 453 extern void sse_ucomisd(double *, double *); 454 extern void sse_comisd(double *, double *); 455 extern void sse_cvtsd2ss(double *, float *); 456 extern void sse_cvtsi2sd(int *, double *); 457 extern void sse_cvttsd2si(double *, int *); 458 extern void sse_cvtsd2si(double *, int *); 459 #ifdef __amd64 460 extern void sse_cvtsi2sdq(long long *, double *); 461 extern void sse_cvttsd2siq(double *, long long *); 462 extern void sse_cvtsd2siq(double *, long long *); 463 #endif 464 465 /* 466 * Fill in *info with the operands, default untrapped result, and 467 * flags produced by a scalar SSE instruction, and return the type 468 * of trapped exception (if any). On entry, the mxcsr must have 469 * all exceptions masked and all flags clear. The same conditions 470 * will hold on exit. 471 * 472 * This routine does not work if the instruction specified by *inst 473 * is not a scalar instruction. 474 */ 475 enum fex_exception 476 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info) 477 { 478 unsigned int e, te, mxcsr, oldmxcsr, subnorm; 479 480 /* 481 * Perform the operation with traps disabled and check the 482 * exception flags. If the underflow trap was enabled, also 483 * check for an exact subnormal result. 484 */ 485 __fenv_getmxcsr(&oldmxcsr); 486 subnorm = 0; 487 if ((int)inst->op & DOUBLE) { 488 if (inst->op == cvtsi2sd) { 489 info->op1.type = fex_int; 490 info->op1.val.i = inst->op2->i[0]; 491 info->op2.type = fex_nodata; 492 } else if (inst->op == cvtsi2sdq) { 493 info->op1.type = fex_llong; 494 info->op1.val.l = inst->op2->l[0]; 495 info->op2.type = fex_nodata; 496 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss || 497 inst->op == cvttsd2si || inst->op == cvtsd2si || 498 inst->op == cvttsd2siq || inst->op == cvtsd2siq) { 499 info->op1.type = fex_double; 500 info->op1.val.d = inst->op2->d[0]; 501 info->op2.type = fex_nodata; 502 } else { 503 info->op1.type = fex_double; 504 info->op1.val.d = inst->op1->d[0]; 505 info->op2.type = fex_double; 506 info->op2.val.d = inst->op2->d[0]; 507 } 508 info->res.type = fex_double; 509 switch (inst->op) { 510 case cmpsd: 511 info->op = fex_cmp; 512 info->res.type = fex_llong; 513 switch (inst->imm & 3) { 514 case 0: 515 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d, 516 &info->res.val.l); 517 break; 518 519 case 1: 520 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d, 521 &info->res.val.l); 522 break; 523 524 case 2: 525 sse_cmplesd(&info->op1.val.d, &info->op2.val.d, 526 &info->res.val.l); 527 break; 528 529 case 3: 530 sse_cmpunordsd(&info->op1.val.d, 531 &info->op2.val.d, &info->res.val.l); 532 } 533 if (inst->imm & 4) 534 info->res.val.l ^= 0xffffffffffffffffull; 535 break; 536 537 case minsd: 538 info->op = fex_other; 539 sse_minsd(&info->op1.val.d, &info->op2.val.d, 540 &info->res.val.d); 541 break; 542 543 case maxsd: 544 info->op = fex_other; 545 sse_maxsd(&info->op1.val.d, &info->op2.val.d, 546 &info->res.val.d); 547 break; 548 549 case addsd: 550 info->op = fex_add; 551 sse_addsd(&info->op1.val.d, &info->op2.val.d, 552 &info->res.val.d); 553 if (my_fp_class(&info->res.val.d) == fp_subnormal) 554 subnorm = 1; 555 break; 556 557 case subsd: 558 info->op = fex_sub; 559 sse_subsd(&info->op1.val.d, &info->op2.val.d, 560 &info->res.val.d); 561 if (my_fp_class(&info->res.val.d) == fp_subnormal) 562 subnorm = 1; 563 break; 564 565 case mulsd: 566 info->op = fex_mul; 567 sse_mulsd(&info->op1.val.d, &info->op2.val.d, 568 &info->res.val.d); 569 if (my_fp_class(&info->res.val.d) == fp_subnormal) 570 subnorm = 1; 571 break; 572 573 case divsd: 574 info->op = fex_div; 575 sse_divsd(&info->op1.val.d, &info->op2.val.d, 576 &info->res.val.d); 577 if (my_fp_class(&info->res.val.d) == fp_subnormal) 578 subnorm = 1; 579 break; 580 581 case sqrtsd: 582 info->op = fex_sqrt; 583 sse_sqrtsd(&info->op1.val.d, &info->res.val.d); 584 break; 585 586 case cvtsd2ss: 587 info->op = fex_cnvt; 588 info->res.type = fex_float; 589 sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f); 590 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 591 subnorm = 1; 592 break; 593 594 case cvtsi2sd: 595 info->op = fex_cnvt; 596 sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d); 597 break; 598 599 case cvttsd2si: 600 info->op = fex_cnvt; 601 info->res.type = fex_int; 602 sse_cvttsd2si(&info->op1.val.d, &info->res.val.i); 603 break; 604 605 case cvtsd2si: 606 info->op = fex_cnvt; 607 info->res.type = fex_int; 608 sse_cvtsd2si(&info->op1.val.d, &info->res.val.i); 609 break; 610 611 #ifdef __amd64 612 case cvtsi2sdq: 613 info->op = fex_cnvt; 614 sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d); 615 break; 616 617 case cvttsd2siq: 618 info->op = fex_cnvt; 619 info->res.type = fex_llong; 620 sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l); 621 break; 622 623 case cvtsd2siq: 624 info->op = fex_cnvt; 625 info->res.type = fex_llong; 626 sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l); 627 break; 628 #endif 629 630 case ucomisd: 631 info->op = fex_cmp; 632 info->res.type = fex_nodata; 633 sse_ucomisd(&info->op1.val.d, &info->op2.val.d); 634 break; 635 636 case comisd: 637 info->op = fex_cmp; 638 info->res.type = fex_nodata; 639 sse_comisd(&info->op1.val.d, &info->op2.val.d); 640 break; 641 } 642 } else { 643 if (inst->op == cvtsi2ss) { 644 info->op1.type = fex_int; 645 info->op1.val.i = inst->op2->i[0]; 646 info->op2.type = fex_nodata; 647 } else if (inst->op == cvtsi2ssq) { 648 info->op1.type = fex_llong; 649 info->op1.val.l = inst->op2->l[0]; 650 info->op2.type = fex_nodata; 651 } else if (inst->op == sqrtss || inst->op == cvtss2sd || 652 inst->op == cvttss2si || inst->op == cvtss2si || 653 inst->op == cvttss2siq || inst->op == cvtss2siq) { 654 info->op1.type = fex_float; 655 info->op1.val.f = inst->op2->f[0]; 656 info->op2.type = fex_nodata; 657 } else { 658 info->op1.type = fex_float; 659 info->op1.val.f = inst->op1->f[0]; 660 info->op2.type = fex_float; 661 info->op2.val.f = inst->op2->f[0]; 662 } 663 info->res.type = fex_float; 664 switch (inst->op) { 665 case cmpss: 666 info->op = fex_cmp; 667 info->res.type = fex_int; 668 switch (inst->imm & 3) { 669 case 0: 670 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f, 671 &info->res.val.i); 672 break; 673 674 case 1: 675 sse_cmpltss(&info->op1.val.f, &info->op2.val.f, 676 &info->res.val.i); 677 break; 678 679 case 2: 680 sse_cmpless(&info->op1.val.f, &info->op2.val.f, 681 &info->res.val.i); 682 break; 683 684 case 3: 685 sse_cmpunordss(&info->op1.val.f, 686 &info->op2.val.f, &info->res.val.i); 687 } 688 if (inst->imm & 4) 689 info->res.val.i ^= 0xffffffffu; 690 break; 691 692 case minss: 693 info->op = fex_other; 694 sse_minss(&info->op1.val.f, &info->op2.val.f, 695 &info->res.val.f); 696 break; 697 698 case maxss: 699 info->op = fex_other; 700 sse_maxss(&info->op1.val.f, &info->op2.val.f, 701 &info->res.val.f); 702 break; 703 704 case addss: 705 info->op = fex_add; 706 sse_addss(&info->op1.val.f, &info->op2.val.f, 707 &info->res.val.f); 708 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 709 subnorm = 1; 710 break; 711 712 case subss: 713 info->op = fex_sub; 714 sse_subss(&info->op1.val.f, &info->op2.val.f, 715 &info->res.val.f); 716 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 717 subnorm = 1; 718 break; 719 720 case mulss: 721 info->op = fex_mul; 722 sse_mulss(&info->op1.val.f, &info->op2.val.f, 723 &info->res.val.f); 724 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 725 subnorm = 1; 726 break; 727 728 case divss: 729 info->op = fex_div; 730 sse_divss(&info->op1.val.f, &info->op2.val.f, 731 &info->res.val.f); 732 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 733 subnorm = 1; 734 break; 735 736 case sqrtss: 737 info->op = fex_sqrt; 738 sse_sqrtss(&info->op1.val.f, &info->res.val.f); 739 break; 740 741 case cvtss2sd: 742 info->op = fex_cnvt; 743 info->res.type = fex_double; 744 sse_cvtss2sd(&info->op1.val.f, &info->res.val.d); 745 break; 746 747 case cvtsi2ss: 748 info->op = fex_cnvt; 749 sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f); 750 break; 751 752 case cvttss2si: 753 info->op = fex_cnvt; 754 info->res.type = fex_int; 755 sse_cvttss2si(&info->op1.val.f, &info->res.val.i); 756 break; 757 758 case cvtss2si: 759 info->op = fex_cnvt; 760 info->res.type = fex_int; 761 sse_cvtss2si(&info->op1.val.f, &info->res.val.i); 762 break; 763 764 #ifdef __amd64 765 case cvtsi2ssq: 766 info->op = fex_cnvt; 767 sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f); 768 break; 769 770 case cvttss2siq: 771 info->op = fex_cnvt; 772 info->res.type = fex_llong; 773 sse_cvttss2siq(&info->op1.val.f, &info->res.val.l); 774 break; 775 776 case cvtss2siq: 777 info->op = fex_cnvt; 778 info->res.type = fex_llong; 779 sse_cvtss2siq(&info->op1.val.f, &info->res.val.l); 780 break; 781 #endif 782 783 case ucomiss: 784 info->op = fex_cmp; 785 info->res.type = fex_nodata; 786 sse_ucomiss(&info->op1.val.f, &info->op2.val.f); 787 break; 788 789 case comiss: 790 info->op = fex_cmp; 791 info->res.type = fex_nodata; 792 sse_comiss(&info->op1.val.f, &info->op2.val.f); 793 break; 794 } 795 } 796 __fenv_getmxcsr(&mxcsr); 797 info->flags = mxcsr & 0x3d; 798 __fenv_setmxcsr(&oldmxcsr); 799 800 /* determine which exception would have been trapped */ 801 te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr 802 >> 7) & 0x3d; 803 e = mxcsr & te; 804 if (e & FE_INVALID) 805 return __fex_get_sse_invalid_type(inst); 806 if (e & FE_DIVBYZERO) 807 return fex_division; 808 if (e & FE_OVERFLOW) 809 return fex_overflow; 810 if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW))) 811 return fex_underflow; 812 if (e & FE_INEXACT) 813 return fex_inexact; 814 return (enum fex_exception)-1; 815 } 816 817 /* 818 * Emulate a SIMD SSE instruction to determine which exceptions occur 819 * in each part. For i = 0, 1, 2, and 3, set e[i] to indicate the 820 * trapped exception that would occur if the i-th part of the SIMD 821 * instruction were executed in isolation; set e[i] to -1 if no 822 * trapped exception would occur in this part. Also fill in info[i] 823 * with the corresponding operands, default untrapped result, and 824 * flags. 825 * 826 * This routine does not work if the instruction specified by *inst 827 * is not a SIMD instruction. 828 */ 829 void 830 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e, 831 fex_info_t *info) 832 { 833 sseinst_t dummy; 834 int i; 835 836 e[0] = e[1] = e[2] = e[3] = -1; 837 838 /* perform each part of the SIMD operation */ 839 switch (inst->op) { 840 case cmpps: 841 dummy.op = cmpss; 842 dummy.imm = inst->imm; 843 for (i = 0; i < 4; i++) { 844 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 845 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 846 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 847 } 848 break; 849 850 case minps: 851 dummy.op = minss; 852 for (i = 0; i < 4; i++) { 853 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 854 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 855 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 856 } 857 break; 858 859 case maxps: 860 dummy.op = maxss; 861 for (i = 0; i < 4; i++) { 862 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 863 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 864 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 865 } 866 break; 867 868 case addps: 869 dummy.op = addss; 870 for (i = 0; i < 4; i++) { 871 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 872 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 873 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 874 } 875 break; 876 877 case subps: 878 dummy.op = subss; 879 for (i = 0; i < 4; i++) { 880 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 881 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 882 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 883 } 884 break; 885 886 case mulps: 887 dummy.op = mulss; 888 for (i = 0; i < 4; i++) { 889 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 890 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 891 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 892 } 893 break; 894 895 case divps: 896 dummy.op = divss; 897 for (i = 0; i < 4; i++) { 898 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 899 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 900 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 901 } 902 break; 903 904 case sqrtps: 905 dummy.op = sqrtss; 906 for (i = 0; i < 4; i++) { 907 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 908 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 909 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 910 } 911 break; 912 913 case cvtdq2ps: 914 dummy.op = cvtsi2ss; 915 for (i = 0; i < 4; i++) { 916 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 917 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 918 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 919 } 920 break; 921 922 case cvttps2dq: 923 dummy.op = cvttss2si; 924 for (i = 0; i < 4; i++) { 925 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 926 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 927 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 928 } 929 break; 930 931 case cvtps2dq: 932 dummy.op = cvtss2si; 933 for (i = 0; i < 4; i++) { 934 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 935 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 936 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 937 } 938 break; 939 940 case cvtpi2ps: 941 dummy.op = cvtsi2ss; 942 for (i = 0; i < 2; i++) { 943 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 944 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 945 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 946 } 947 break; 948 949 case cvttps2pi: 950 dummy.op = cvttss2si; 951 for (i = 0; i < 2; i++) { 952 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 953 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 954 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 955 } 956 break; 957 958 case cvtps2pi: 959 dummy.op = cvtss2si; 960 for (i = 0; i < 2; i++) { 961 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 962 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 963 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 964 } 965 break; 966 967 case cmppd: 968 dummy.op = cmpsd; 969 dummy.imm = inst->imm; 970 for (i = 0; i < 2; i++) { 971 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 972 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 973 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 974 } 975 break; 976 977 case minpd: 978 dummy.op = minsd; 979 for (i = 0; i < 2; i++) { 980 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 981 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 982 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 983 } 984 break; 985 986 case maxpd: 987 dummy.op = maxsd; 988 for (i = 0; i < 2; i++) { 989 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 990 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 991 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 992 } 993 break; 994 995 case addpd: 996 dummy.op = addsd; 997 for (i = 0; i < 2; i++) { 998 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 999 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1000 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1001 } 1002 break; 1003 1004 case subpd: 1005 dummy.op = subsd; 1006 for (i = 0; i < 2; i++) { 1007 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1008 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1009 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1010 } 1011 break; 1012 1013 case mulpd: 1014 dummy.op = mulsd; 1015 for (i = 0; i < 2; i++) { 1016 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1017 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1018 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1019 } 1020 break; 1021 1022 case divpd: 1023 dummy.op = divsd; 1024 for (i = 0; i < 2; i++) { 1025 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1026 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1027 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1028 } 1029 break; 1030 1031 case sqrtpd: 1032 dummy.op = sqrtsd; 1033 for (i = 0; i < 2; i++) { 1034 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1035 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1036 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1037 } 1038 break; 1039 1040 case cvtpi2pd: 1041 case cvtdq2pd: 1042 dummy.op = cvtsi2sd; 1043 for (i = 0; i < 2; i++) { 1044 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1045 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 1046 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1047 } 1048 break; 1049 1050 case cvttpd2pi: 1051 case cvttpd2dq: 1052 dummy.op = cvttsd2si; 1053 for (i = 0; i < 2; i++) { 1054 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1055 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1056 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1057 } 1058 break; 1059 1060 case cvtpd2pi: 1061 case cvtpd2dq: 1062 dummy.op = cvtsd2si; 1063 for (i = 0; i < 2; i++) { 1064 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1065 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1066 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1067 } 1068 break; 1069 1070 case cvtps2pd: 1071 dummy.op = cvtss2sd; 1072 for (i = 0; i < 2; i++) { 1073 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1074 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1075 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1076 } 1077 break; 1078 1079 case cvtpd2ps: 1080 dummy.op = cvtsd2ss; 1081 for (i = 0; i < 2; i++) { 1082 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1083 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1084 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1085 } 1086 } 1087 } 1088 1089 /* 1090 * Store the result value from *info in the destination of the scalar 1091 * SSE instruction specified by *inst. If no result is given but the 1092 * exception is underflow or overflow, supply the default trapped result. 1093 * 1094 * This routine does not work if the instruction specified by *inst 1095 * is not a scalar instruction. 1096 */ 1097 void 1098 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e, 1099 fex_info_t *info) 1100 { 1101 int i; 1102 long long l; 1103 float f, fscl; 1104 double d, dscl; 1105 1106 /* for compares that write eflags, just set the flags 1107 to indicate "unordered" */ 1108 if (inst->op == ucomiss || inst->op == comiss || 1109 inst->op == ucomisd || inst->op == comisd) { 1110 uap->uc_mcontext.gregs[REG_PS] |= 0x45; 1111 return; 1112 } 1113 1114 /* if info doesn't specify a result value, try to generate 1115 the default trapped result */ 1116 if (info->res.type == fex_nodata) { 1117 /* set scale factors for exponent wrapping */ 1118 switch (e) { 1119 case fex_overflow: 1120 fscl = 1.262177448e-29f; /* 2^-96 */ 1121 dscl = 6.441148769597133308e-232; /* 2^-768 */ 1122 break; 1123 1124 case fex_underflow: 1125 fscl = 7.922816251e+28f; /* 2^96 */ 1126 dscl = 1.552518092300708935e+231; /* 2^768 */ 1127 break; 1128 1129 default: 1130 (void) __fex_get_sse_op(uap, inst, info); 1131 if (info->res.type == fex_nodata) 1132 return; 1133 goto stuff; 1134 } 1135 1136 /* generate the wrapped result */ 1137 if (inst->op == cvtsd2ss) { 1138 info->op1.type = fex_double; 1139 info->op1.val.d = inst->op2->d[0]; 1140 info->op2.type = fex_nodata; 1141 info->res.type = fex_float; 1142 info->res.val.f = (float)(fscl * (fscl * 1143 info->op1.val.d)); 1144 } else if ((int)inst->op & DOUBLE) { 1145 info->op1.type = fex_double; 1146 info->op1.val.d = inst->op1->d[0]; 1147 info->op2.type = fex_double; 1148 info->op2.val.d = inst->op2->d[0]; 1149 info->res.type = fex_double; 1150 switch (inst->op) { 1151 case addsd: 1152 info->res.val.d = dscl * (dscl * 1153 info->op1.val.d + dscl * info->op2.val.d); 1154 break; 1155 1156 case subsd: 1157 info->res.val.d = dscl * (dscl * 1158 info->op1.val.d - dscl * info->op2.val.d); 1159 break; 1160 1161 case mulsd: 1162 info->res.val.d = (dscl * info->op1.val.d) * 1163 (dscl * info->op2.val.d); 1164 break; 1165 1166 case divsd: 1167 info->res.val.d = (dscl * info->op1.val.d) / 1168 (info->op2.val.d / dscl); 1169 break; 1170 1171 default: 1172 return; 1173 } 1174 } else { 1175 info->op1.type = fex_float; 1176 info->op1.val.f = inst->op1->f[0]; 1177 info->op2.type = fex_float; 1178 info->op2.val.f = inst->op2->f[0]; 1179 info->res.type = fex_float; 1180 switch (inst->op) { 1181 case addss: 1182 info->res.val.f = fscl * (fscl * 1183 info->op1.val.f + fscl * info->op2.val.f); 1184 break; 1185 1186 case subss: 1187 info->res.val.f = fscl * (fscl * 1188 info->op1.val.f - fscl * info->op2.val.f); 1189 break; 1190 1191 case mulss: 1192 info->res.val.f = (fscl * info->op1.val.f) * 1193 (fscl * info->op2.val.f); 1194 break; 1195 1196 case divss: 1197 info->res.val.f = (fscl * info->op1.val.f) / 1198 (info->op2.val.f / fscl); 1199 break; 1200 1201 default: 1202 return; 1203 } 1204 } 1205 } 1206 1207 /* put the result in the destination */ 1208 stuff: 1209 if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si 1210 || inst->op == cvttsd2si || inst->op == cvtsd2si) { 1211 switch (info->res.type) { 1212 case fex_int: 1213 i = info->res.val.i; 1214 break; 1215 1216 case fex_llong: 1217 i = info->res.val.l; 1218 break; 1219 1220 case fex_float: 1221 i = info->res.val.f; 1222 break; 1223 1224 case fex_double: 1225 i = info->res.val.d; 1226 break; 1227 1228 case fex_ldouble: 1229 i = info->res.val.q; 1230 break; 1231 } 1232 inst->op1->i[0] = i; 1233 } else if (inst->op == cmpsd || inst->op == cvttss2siq || 1234 inst->op == cvtss2siq || inst->op == cvttsd2siq || 1235 inst->op == cvtsd2siq) { 1236 switch (info->res.type) { 1237 case fex_int: 1238 l = info->res.val.i; 1239 break; 1240 1241 case fex_llong: 1242 l = info->res.val.l; 1243 break; 1244 1245 case fex_float: 1246 l = info->res.val.f; 1247 break; 1248 1249 case fex_double: 1250 l = info->res.val.d; 1251 break; 1252 1253 case fex_ldouble: 1254 l = info->res.val.q; 1255 break; 1256 } 1257 inst->op1->l[0] = l; 1258 } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) || 1259 inst->op == cvtss2sd) { 1260 switch (info->res.type) { 1261 case fex_int: 1262 d = info->res.val.i; 1263 break; 1264 1265 case fex_llong: 1266 d = info->res.val.l; 1267 break; 1268 1269 case fex_float: 1270 d = info->res.val.f; 1271 break; 1272 1273 case fex_double: 1274 d = info->res.val.d; 1275 break; 1276 1277 case fex_ldouble: 1278 d = info->res.val.q; 1279 break; 1280 } 1281 inst->op1->d[0] = d; 1282 } else { 1283 switch (info->res.type) { 1284 case fex_int: 1285 f = info->res.val.i; 1286 break; 1287 1288 case fex_llong: 1289 f = info->res.val.l; 1290 break; 1291 1292 case fex_float: 1293 f = info->res.val.f; 1294 break; 1295 1296 case fex_double: 1297 f = info->res.val.d; 1298 break; 1299 1300 case fex_ldouble: 1301 f = info->res.val.q; 1302 break; 1303 } 1304 inst->op1->f[0] = f; 1305 } 1306 } 1307 1308 /* 1309 * Store the results from a SIMD instruction. For each i, store 1310 * the result value from info[i] in the i-th part of the destination 1311 * of the SIMD SSE instruction specified by *inst. If no result 1312 * is given but the exception indicated by e[i] is underflow or 1313 * overflow, supply the default trapped result. 1314 * 1315 * This routine does not work if the instruction specified by *inst 1316 * is not a SIMD instruction. 1317 */ 1318 void 1319 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e, 1320 fex_info_t *info) 1321 { 1322 sseinst_t dummy; 1323 int i; 1324 1325 /* store each part */ 1326 switch (inst->op) { 1327 case cmpps: 1328 dummy.op = cmpss; 1329 dummy.imm = inst->imm; 1330 for (i = 0; i < 4; i++) { 1331 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1332 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1333 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1334 } 1335 break; 1336 1337 case minps: 1338 dummy.op = minss; 1339 for (i = 0; i < 4; i++) { 1340 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1341 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1342 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1343 } 1344 break; 1345 1346 case maxps: 1347 dummy.op = maxss; 1348 for (i = 0; i < 4; i++) { 1349 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1350 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1351 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1352 } 1353 break; 1354 1355 case addps: 1356 dummy.op = addss; 1357 for (i = 0; i < 4; i++) { 1358 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1359 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1360 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1361 } 1362 break; 1363 1364 case subps: 1365 dummy.op = subss; 1366 for (i = 0; i < 4; i++) { 1367 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1368 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1369 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1370 } 1371 break; 1372 1373 case mulps: 1374 dummy.op = mulss; 1375 for (i = 0; i < 4; i++) { 1376 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1377 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1378 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1379 } 1380 break; 1381 1382 case divps: 1383 dummy.op = divss; 1384 for (i = 0; i < 4; i++) { 1385 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1386 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1387 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1388 } 1389 break; 1390 1391 case sqrtps: 1392 dummy.op = sqrtss; 1393 for (i = 0; i < 4; i++) { 1394 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1395 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1396 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1397 } 1398 break; 1399 1400 case cvtdq2ps: 1401 dummy.op = cvtsi2ss; 1402 for (i = 0; i < 4; i++) { 1403 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1404 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 1405 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1406 } 1407 break; 1408 1409 case cvttps2dq: 1410 dummy.op = cvttss2si; 1411 for (i = 0; i < 4; i++) { 1412 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1413 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1414 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1415 } 1416 break; 1417 1418 case cvtps2dq: 1419 dummy.op = cvtss2si; 1420 for (i = 0; i < 4; i++) { 1421 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1422 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1423 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1424 } 1425 break; 1426 1427 case cvtpi2ps: 1428 dummy.op = cvtsi2ss; 1429 for (i = 0; i < 2; i++) { 1430 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1431 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 1432 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1433 } 1434 break; 1435 1436 case cvttps2pi: 1437 dummy.op = cvttss2si; 1438 for (i = 0; i < 2; i++) { 1439 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1440 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1441 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1442 } 1443 break; 1444 1445 case cvtps2pi: 1446 dummy.op = cvtss2si; 1447 for (i = 0; i < 2; i++) { 1448 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1449 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1450 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1451 } 1452 break; 1453 1454 case cmppd: 1455 dummy.op = cmpsd; 1456 dummy.imm = inst->imm; 1457 for (i = 0; i < 2; i++) { 1458 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1459 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1460 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1461 } 1462 break; 1463 1464 case minpd: 1465 dummy.op = minsd; 1466 for (i = 0; i < 2; i++) { 1467 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1468 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1469 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1470 } 1471 break; 1472 1473 case maxpd: 1474 dummy.op = maxsd; 1475 for (i = 0; i < 2; i++) { 1476 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1477 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1478 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1479 } 1480 break; 1481 1482 case addpd: 1483 dummy.op = addsd; 1484 for (i = 0; i < 2; i++) { 1485 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1486 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1487 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1488 } 1489 break; 1490 1491 case subpd: 1492 dummy.op = subsd; 1493 for (i = 0; i < 2; i++) { 1494 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1495 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1496 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1497 } 1498 break; 1499 1500 case mulpd: 1501 dummy.op = mulsd; 1502 for (i = 0; i < 2; i++) { 1503 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1504 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1505 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1506 } 1507 break; 1508 1509 case divpd: 1510 dummy.op = divsd; 1511 for (i = 0; i < 2; i++) { 1512 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1513 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1514 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1515 } 1516 break; 1517 1518 case sqrtpd: 1519 dummy.op = sqrtsd; 1520 for (i = 0; i < 2; i++) { 1521 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1522 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1523 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1524 } 1525 break; 1526 1527 case cvtpi2pd: 1528 case cvtdq2pd: 1529 dummy.op = cvtsi2sd; 1530 for (i = 0; i < 2; i++) { 1531 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1532 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 1533 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1534 } 1535 break; 1536 1537 case cvttpd2pi: 1538 case cvttpd2dq: 1539 dummy.op = cvttsd2si; 1540 for (i = 0; i < 2; i++) { 1541 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1542 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1543 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1544 } 1545 /* for cvttpd2dq, zero the high 64 bits of the destination */ 1546 if (inst->op == cvttpd2dq) 1547 inst->op1->l[1] = 0ll; 1548 break; 1549 1550 case cvtpd2pi: 1551 case cvtpd2dq: 1552 dummy.op = cvtsd2si; 1553 for (i = 0; i < 2; i++) { 1554 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1555 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1556 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1557 } 1558 /* for cvtpd2dq, zero the high 64 bits of the destination */ 1559 if (inst->op == cvtpd2dq) 1560 inst->op1->l[1] = 0ll; 1561 break; 1562 1563 case cvtps2pd: 1564 dummy.op = cvtss2sd; 1565 for (i = 0; i < 2; i++) { 1566 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1567 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1568 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1569 } 1570 break; 1571 1572 case cvtpd2ps: 1573 dummy.op = cvtsd2ss; 1574 for (i = 0; i < 2; i++) { 1575 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1576 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1577 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1578 } 1579 /* zero the high 64 bits of the destination */ 1580 inst->op1->l[1] = 0ll; 1581 } 1582 }