1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #include "fenv_synonyms.h" 31 #include <ucontext.h> 32 #include <fenv.h> 33 #if defined(__SUNPRO_C) 34 #include <sunmath.h> 35 #else 36 #include <sys/ieeefp.h> 37 #endif 38 #include "fex_handler.h" 39 #include "fenv_inlines.h" 40 41 #if !defined(REG_PC) 42 #define REG_PC EIP 43 #endif 44 45 #if !defined(REG_PS) 46 #define REG_PS EFL 47 #endif 48 49 #ifdef __amd64 50 #define regno(X) ((X < 4)? REG_RAX - X : \ 51 ((X > 4)? REG_RAX + 1 - X : REG_RSP)) 52 #else 53 #define regno(X) (EAX - X) 54 #endif 55 56 /* 57 * Support for SSE instructions 58 */ 59 60 /* 61 * Decode an SSE instruction. Fill in *inst and return the length of the 62 * instruction in bytes. Return 0 if the instruction is not recognized. 63 */ 64 int 65 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst) 66 { 67 unsigned char *ip; 68 char *addr; 69 int i, dbl, simd, rex, modrm, sib, r; 70 71 i = 0; 72 ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC]; 73 74 /* look for pseudo-prefixes */ 75 dbl = 0; 76 simd = SIMD; 77 if (ip[i] == 0xF3) { 78 simd = 0; 79 i++; 80 } else if (ip[i] == 0x66) { 81 dbl = DOUBLE; 82 i++; 83 } else if (ip[i] == 0xF2) { 84 dbl = DOUBLE; 85 simd = 0; 86 i++; 87 } 88 89 /* look for AMD64 REX prefix */ 90 rex = 0; 91 if (ip[i] >= 0x40 && ip[i] <= 0x4F) { 92 rex = ip[i]; 93 i++; 94 } 95 96 /* parse opcode */ 97 if (ip[i++] != 0x0F) 98 return 0; 99 switch (ip[i++]) { 100 case 0x2A: 101 inst->op = (int)cvtsi2ss + simd + dbl; 102 if (!simd) 103 inst->op = (int)inst->op + (rex & 8); 104 break; 105 106 case 0x2C: 107 inst->op = (int)cvttss2si + simd + dbl; 108 if (!simd) 109 inst->op = (int)inst->op + (rex & 8); 110 break; 111 112 case 0x2D: 113 inst->op = (int)cvtss2si + simd + dbl; 114 if (!simd) 115 inst->op = (int)inst->op + (rex & 8); 116 break; 117 118 case 0x2E: 119 /* oddball: scalar instruction in a SIMD opcode group */ 120 if (!simd) 121 return 0; 122 inst->op = (int)ucomiss + dbl; 123 break; 124 125 case 0x2F: 126 /* oddball: scalar instruction in a SIMD opcode group */ 127 if (!simd) 128 return 0; 129 inst->op = (int)comiss + dbl; 130 break; 131 132 case 0x51: 133 inst->op = (int)sqrtss + simd + dbl; 134 break; 135 136 case 0x58: 137 inst->op = (int)addss + simd + dbl; 138 break; 139 140 case 0x59: 141 inst->op = (int)mulss + simd + dbl; 142 break; 143 144 case 0x5A: 145 inst->op = (int)cvtss2sd + simd + dbl; 146 break; 147 148 case 0x5B: 149 if (dbl) { 150 if (simd) 151 inst->op = cvtps2dq; 152 else 153 return 0; 154 } else { 155 inst->op = (simd)? cvtdq2ps : cvttps2dq; 156 } 157 break; 158 159 case 0x5C: 160 inst->op = (int)subss + simd + dbl; 161 break; 162 163 case 0x5D: 164 inst->op = (int)minss + simd + dbl; 165 break; 166 167 case 0x5E: 168 inst->op = (int)divss + simd + dbl; 169 break; 170 171 case 0x5F: 172 inst->op = (int)maxss + simd + dbl; 173 break; 174 175 case 0xC2: 176 inst->op = (int)cmpss + simd + dbl; 177 break; 178 179 case 0xE6: 180 if (simd) { 181 if (dbl) 182 inst->op = cvttpd2dq; 183 else 184 return 0; 185 } else { 186 inst->op = (dbl)? cvtpd2dq : cvtdq2pd; 187 } 188 break; 189 190 default: 191 return 0; 192 } 193 194 /* locate operands */ 195 modrm = ip[i++]; 196 197 if (inst->op == cvtss2si || inst->op == cvttss2si || 198 inst->op == cvtsd2si || inst->op == cvttsd2si || 199 inst->op == cvtss2siq || inst->op == cvttss2siq || 200 inst->op == cvtsd2siq || inst->op == cvttsd2siq) { 201 /* op1 is a gp register */ 202 r = ((rex & 4) << 1) | ((modrm >> 3) & 7); 203 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)]; 204 } else if (inst->op == cvtps2pi || inst->op == cvttps2pi || 205 inst->op == cvtpd2pi || inst->op == cvttpd2pi) { 206 /* op1 is a mmx register */ 207 #ifdef __amd64 208 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set. 209 fpchip_state.st[(modrm >> 3) & 7]; 210 #else 211 inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) + 212 (char *)&uap->uc_mcontext.fpregs.fp_reg_set. 213 fpchip_state.state[7]); 214 #endif 215 } else { 216 /* op1 is a xmm register */ 217 r = ((rex & 4) << 1) | ((modrm >> 3) & 7); 218 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs. 219 fp_reg_set.fpchip_state.xmm[r]; 220 } 221 222 if ((modrm >> 6) == 3) { 223 if (inst->op == cvtsi2ss || inst->op == cvtsi2sd || 224 inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) { 225 /* op2 is a gp register */ 226 r = ((rex & 1) << 3) | (modrm & 7); 227 inst->op2 = (sseoperand_t *)&uap->uc_mcontext. 228 gregs[regno(r)]; 229 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) { 230 /* op2 is a mmx register */ 231 #ifdef __amd64 232 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs. 233 fp_reg_set.fpchip_state.st[modrm & 7]; 234 #else 235 inst->op2 = (sseoperand_t *)(10 * (modrm & 7) + 236 (char *)&uap->uc_mcontext.fpregs.fp_reg_set. 237 fpchip_state.state[7]); 238 #endif 239 } else { 240 /* op2 is a xmm register */ 241 r = ((rex & 1) << 3) | (modrm & 7); 242 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs. 243 fp_reg_set.fpchip_state.xmm[r]; 244 } 245 } else if ((modrm & 0xc7) == 0x05) { 246 #ifdef __amd64 247 /* address of next instruction + offset */ 248 r = i + 4; 249 if (inst->op == cmpss || inst->op == cmpps || 250 inst->op == cmpsd || inst->op == cmppd) 251 r++; 252 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i)); 253 #else 254 /* absolute address */ 255 inst->op2 = (sseoperand_t *)(*(int *)(ip + i)); 256 #endif 257 i += 4; 258 } else { 259 /* complex address */ 260 if ((modrm & 7) == 4) { 261 /* parse sib byte */ 262 sib = ip[i++]; 263 if ((sib & 7) == 5 && (modrm >> 6) == 0) { 264 /* start with absolute address */ 265 addr = (char *)(uintptr_t)(*(int *)(ip + i)); 266 i += 4; 267 } else { 268 /* start with base */ 269 r = ((rex & 1) << 3) | (sib & 7); 270 addr = (char *)uap->uc_mcontext.gregs[regno(r)]; 271 } 272 r = ((rex & 2) << 2) | ((sib >> 3) & 7); 273 if (r != 4) { 274 /* add scaled index */ 275 addr += uap->uc_mcontext.gregs[regno(r)] 276 << (sib >> 6); 277 } 278 } else { 279 r = ((rex & 1) << 3) | (modrm & 7); 280 addr = (char *)uap->uc_mcontext.gregs[regno(r)]; 281 } 282 283 /* add displacement, if any */ 284 if ((modrm >> 6) == 1) { 285 addr += (char)ip[i++]; 286 } else if ((modrm >> 6) == 2) { 287 addr += *(int *)(ip + i); 288 i += 4; 289 } 290 inst->op2 = (sseoperand_t *)addr; 291 } 292 293 if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd || 294 inst->op == cmppd) { 295 /* get the immediate operand */ 296 inst->imm = ip[i++]; 297 } 298 299 return i; 300 } 301 302 static enum fp_class_type 303 my_fp_classf(float *x) 304 { 305 int i = *(int *)x & ~0x80000000; 306 307 if (i < 0x7f800000) { 308 if (i < 0x00800000) 309 return ((i == 0)? fp_zero : fp_subnormal); 310 return fp_normal; 311 } 312 else if (i == 0x7f800000) 313 return fp_infinity; 314 else if (i & 0x400000) 315 return fp_quiet; 316 else 317 return fp_signaling; 318 } 319 320 static enum fp_class_type 321 my_fp_class(double *x) 322 { 323 int i = *(1+(int *)x) & ~0x80000000; 324 325 if (i < 0x7ff00000) { 326 if (i < 0x00100000) 327 return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal); 328 return fp_normal; 329 } 330 else if (i == 0x7ff00000 && *(int *)x == 0) 331 return fp_infinity; 332 else if (i & 0x80000) 333 return fp_quiet; 334 else 335 return fp_signaling; 336 } 337 338 /* 339 * Inspect a scalar SSE instruction that incurred an invalid operation 340 * exception to determine which type of exception it was. 341 */ 342 static enum fex_exception 343 __fex_get_sse_invalid_type(sseinst_t *inst) 344 { 345 enum fp_class_type t1, t2; 346 347 /* check op2 for signaling nan */ 348 t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) : 349 my_fp_classf(&inst->op2->f[0]); 350 if (t2 == fp_signaling) 351 return fex_inv_snan; 352 353 /* eliminate all single-operand instructions */ 354 switch (inst->op) { 355 case cvtsd2ss: 356 case cvtss2sd: 357 /* hmm, this shouldn't have happened */ 358 return (enum fex_exception) -1; 359 360 case sqrtss: 361 case sqrtsd: 362 return fex_inv_sqrt; 363 364 case cvtss2si: 365 case cvtsd2si: 366 case cvttss2si: 367 case cvttsd2si: 368 case cvtss2siq: 369 case cvtsd2siq: 370 case cvttss2siq: 371 case cvttsd2siq: 372 return fex_inv_int; 373 default: 374 break; 375 } 376 377 /* check op1 for signaling nan */ 378 t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) : 379 my_fp_classf(&inst->op1->f[0]); 380 if (t1 == fp_signaling) 381 return fex_inv_snan; 382 383 /* check two-operand instructions for other cases */ 384 switch (inst->op) { 385 case cmpss: 386 case cmpsd: 387 case minss: 388 case minsd: 389 case maxss: 390 case maxsd: 391 case comiss: 392 case comisd: 393 return fex_inv_cmp; 394 395 case addss: 396 case addsd: 397 case subss: 398 case subsd: 399 if (t1 == fp_infinity && t2 == fp_infinity) 400 return fex_inv_isi; 401 break; 402 403 case mulss: 404 case mulsd: 405 if ((t1 == fp_zero && t2 == fp_infinity) || 406 (t2 == fp_zero && t1 == fp_infinity)) 407 return fex_inv_zmi; 408 break; 409 410 case divss: 411 case divsd: 412 if (t1 == fp_zero && t2 == fp_zero) 413 return fex_inv_zdz; 414 if (t1 == fp_infinity && t2 == fp_infinity) 415 return fex_inv_idi; 416 default: 417 break; 418 } 419 420 return (enum fex_exception)-1; 421 } 422 423 /* inline templates */ 424 extern void sse_cmpeqss(float *, float *, int *); 425 extern void sse_cmpltss(float *, float *, int *); 426 extern void sse_cmpless(float *, float *, int *); 427 extern void sse_cmpunordss(float *, float *, int *); 428 extern void sse_minss(float *, float *, float *); 429 extern void sse_maxss(float *, float *, float *); 430 extern void sse_addss(float *, float *, float *); 431 extern void sse_subss(float *, float *, float *); 432 extern void sse_mulss(float *, float *, float *); 433 extern void sse_divss(float *, float *, float *); 434 extern void sse_sqrtss(float *, float *); 435 extern void sse_ucomiss(float *, float *); 436 extern void sse_comiss(float *, float *); 437 extern void sse_cvtss2sd(float *, double *); 438 extern void sse_cvtsi2ss(int *, float *); 439 extern void sse_cvttss2si(float *, int *); 440 extern void sse_cvtss2si(float *, int *); 441 #ifdef __amd64 442 extern void sse_cvtsi2ssq(long long *, float *); 443 extern void sse_cvttss2siq(float *, long long *); 444 extern void sse_cvtss2siq(float *, long long *); 445 #endif 446 extern void sse_cmpeqsd(double *, double *, long long *); 447 extern void sse_cmpltsd(double *, double *, long long *); 448 extern void sse_cmplesd(double *, double *, long long *); 449 extern void sse_cmpunordsd(double *, double *, long long *); 450 extern void sse_minsd(double *, double *, double *); 451 extern void sse_maxsd(double *, double *, double *); 452 extern void sse_addsd(double *, double *, double *); 453 extern void sse_subsd(double *, double *, double *); 454 extern void sse_mulsd(double *, double *, double *); 455 extern void sse_divsd(double *, double *, double *); 456 extern void sse_sqrtsd(double *, double *); 457 extern void sse_ucomisd(double *, double *); 458 extern void sse_comisd(double *, double *); 459 extern void sse_cvtsd2ss(double *, float *); 460 extern void sse_cvtsi2sd(int *, double *); 461 extern void sse_cvttsd2si(double *, int *); 462 extern void sse_cvtsd2si(double *, int *); 463 #ifdef __amd64 464 extern void sse_cvtsi2sdq(long long *, double *); 465 extern void sse_cvttsd2siq(double *, long long *); 466 extern void sse_cvtsd2siq(double *, long long *); 467 #endif 468 469 /* 470 * Fill in *info with the operands, default untrapped result, and 471 * flags produced by a scalar SSE instruction, and return the type 472 * of trapped exception (if any). On entry, the mxcsr must have 473 * all exceptions masked and all flags clear. The same conditions 474 * will hold on exit. 475 * 476 * This routine does not work if the instruction specified by *inst 477 * is not a scalar instruction. 478 */ 479 enum fex_exception 480 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info) 481 { 482 unsigned int e, te, mxcsr, oldmxcsr, subnorm; 483 484 /* 485 * Perform the operation with traps disabled and check the 486 * exception flags. If the underflow trap was enabled, also 487 * check for an exact subnormal result. 488 */ 489 __fenv_getmxcsr(&oldmxcsr); 490 subnorm = 0; 491 if ((int)inst->op & DOUBLE) { 492 if (inst->op == cvtsi2sd) { 493 info->op1.type = fex_int; 494 info->op1.val.i = inst->op2->i[0]; 495 info->op2.type = fex_nodata; 496 } else if (inst->op == cvtsi2sdq) { 497 info->op1.type = fex_llong; 498 info->op1.val.l = inst->op2->l[0]; 499 info->op2.type = fex_nodata; 500 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss || 501 inst->op == cvttsd2si || inst->op == cvtsd2si || 502 inst->op == cvttsd2siq || inst->op == cvtsd2siq) { 503 info->op1.type = fex_double; 504 info->op1.val.d = inst->op2->d[0]; 505 info->op2.type = fex_nodata; 506 } else { 507 info->op1.type = fex_double; 508 info->op1.val.d = inst->op1->d[0]; 509 info->op2.type = fex_double; 510 info->op2.val.d = inst->op2->d[0]; 511 } 512 info->res.type = fex_double; 513 switch (inst->op) { 514 case cmpsd: 515 info->op = fex_cmp; 516 info->res.type = fex_llong; 517 switch (inst->imm & 3) { 518 case 0: 519 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d, 520 &info->res.val.l); 521 break; 522 523 case 1: 524 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d, 525 &info->res.val.l); 526 break; 527 528 case 2: 529 sse_cmplesd(&info->op1.val.d, &info->op2.val.d, 530 &info->res.val.l); 531 break; 532 533 case 3: 534 sse_cmpunordsd(&info->op1.val.d, 535 &info->op2.val.d, &info->res.val.l); 536 } 537 if (inst->imm & 4) 538 info->res.val.l ^= 0xffffffffffffffffull; 539 break; 540 541 case minsd: 542 info->op = fex_other; 543 sse_minsd(&info->op1.val.d, &info->op2.val.d, 544 &info->res.val.d); 545 break; 546 547 case maxsd: 548 info->op = fex_other; 549 sse_maxsd(&info->op1.val.d, &info->op2.val.d, 550 &info->res.val.d); 551 break; 552 553 case addsd: 554 info->op = fex_add; 555 sse_addsd(&info->op1.val.d, &info->op2.val.d, 556 &info->res.val.d); 557 if (my_fp_class(&info->res.val.d) == fp_subnormal) 558 subnorm = 1; 559 break; 560 561 case subsd: 562 info->op = fex_sub; 563 sse_subsd(&info->op1.val.d, &info->op2.val.d, 564 &info->res.val.d); 565 if (my_fp_class(&info->res.val.d) == fp_subnormal) 566 subnorm = 1; 567 break; 568 569 case mulsd: 570 info->op = fex_mul; 571 sse_mulsd(&info->op1.val.d, &info->op2.val.d, 572 &info->res.val.d); 573 if (my_fp_class(&info->res.val.d) == fp_subnormal) 574 subnorm = 1; 575 break; 576 577 case divsd: 578 info->op = fex_div; 579 sse_divsd(&info->op1.val.d, &info->op2.val.d, 580 &info->res.val.d); 581 if (my_fp_class(&info->res.val.d) == fp_subnormal) 582 subnorm = 1; 583 break; 584 585 case sqrtsd: 586 info->op = fex_sqrt; 587 sse_sqrtsd(&info->op1.val.d, &info->res.val.d); 588 break; 589 590 case cvtsd2ss: 591 info->op = fex_cnvt; 592 info->res.type = fex_float; 593 sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f); 594 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 595 subnorm = 1; 596 break; 597 598 case cvtsi2sd: 599 info->op = fex_cnvt; 600 sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d); 601 break; 602 603 case cvttsd2si: 604 info->op = fex_cnvt; 605 info->res.type = fex_int; 606 sse_cvttsd2si(&info->op1.val.d, &info->res.val.i); 607 break; 608 609 case cvtsd2si: 610 info->op = fex_cnvt; 611 info->res.type = fex_int; 612 sse_cvtsd2si(&info->op1.val.d, &info->res.val.i); 613 break; 614 615 #ifdef __amd64 616 case cvtsi2sdq: 617 info->op = fex_cnvt; 618 sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d); 619 break; 620 621 case cvttsd2siq: 622 info->op = fex_cnvt; 623 info->res.type = fex_llong; 624 sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l); 625 break; 626 627 case cvtsd2siq: 628 info->op = fex_cnvt; 629 info->res.type = fex_llong; 630 sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l); 631 break; 632 #endif 633 634 case ucomisd: 635 info->op = fex_cmp; 636 info->res.type = fex_nodata; 637 sse_ucomisd(&info->op1.val.d, &info->op2.val.d); 638 break; 639 640 case comisd: 641 info->op = fex_cmp; 642 info->res.type = fex_nodata; 643 sse_comisd(&info->op1.val.d, &info->op2.val.d); 644 break; 645 default: 646 break; 647 } 648 } else { 649 if (inst->op == cvtsi2ss) { 650 info->op1.type = fex_int; 651 info->op1.val.i = inst->op2->i[0]; 652 info->op2.type = fex_nodata; 653 } else if (inst->op == cvtsi2ssq) { 654 info->op1.type = fex_llong; 655 info->op1.val.l = inst->op2->l[0]; 656 info->op2.type = fex_nodata; 657 } else if (inst->op == sqrtss || inst->op == cvtss2sd || 658 inst->op == cvttss2si || inst->op == cvtss2si || 659 inst->op == cvttss2siq || inst->op == cvtss2siq) { 660 info->op1.type = fex_float; 661 info->op1.val.f = inst->op2->f[0]; 662 info->op2.type = fex_nodata; 663 } else { 664 info->op1.type = fex_float; 665 info->op1.val.f = inst->op1->f[0]; 666 info->op2.type = fex_float; 667 info->op2.val.f = inst->op2->f[0]; 668 } 669 info->res.type = fex_float; 670 switch (inst->op) { 671 case cmpss: 672 info->op = fex_cmp; 673 info->res.type = fex_int; 674 switch (inst->imm & 3) { 675 case 0: 676 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f, 677 &info->res.val.i); 678 break; 679 680 case 1: 681 sse_cmpltss(&info->op1.val.f, &info->op2.val.f, 682 &info->res.val.i); 683 break; 684 685 case 2: 686 sse_cmpless(&info->op1.val.f, &info->op2.val.f, 687 &info->res.val.i); 688 break; 689 690 case 3: 691 sse_cmpunordss(&info->op1.val.f, 692 &info->op2.val.f, &info->res.val.i); 693 } 694 if (inst->imm & 4) 695 info->res.val.i ^= 0xffffffffu; 696 break; 697 698 case minss: 699 info->op = fex_other; 700 sse_minss(&info->op1.val.f, &info->op2.val.f, 701 &info->res.val.f); 702 break; 703 704 case maxss: 705 info->op = fex_other; 706 sse_maxss(&info->op1.val.f, &info->op2.val.f, 707 &info->res.val.f); 708 break; 709 710 case addss: 711 info->op = fex_add; 712 sse_addss(&info->op1.val.f, &info->op2.val.f, 713 &info->res.val.f); 714 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 715 subnorm = 1; 716 break; 717 718 case subss: 719 info->op = fex_sub; 720 sse_subss(&info->op1.val.f, &info->op2.val.f, 721 &info->res.val.f); 722 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 723 subnorm = 1; 724 break; 725 726 case mulss: 727 info->op = fex_mul; 728 sse_mulss(&info->op1.val.f, &info->op2.val.f, 729 &info->res.val.f); 730 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 731 subnorm = 1; 732 break; 733 734 case divss: 735 info->op = fex_div; 736 sse_divss(&info->op1.val.f, &info->op2.val.f, 737 &info->res.val.f); 738 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 739 subnorm = 1; 740 break; 741 742 case sqrtss: 743 info->op = fex_sqrt; 744 sse_sqrtss(&info->op1.val.f, &info->res.val.f); 745 break; 746 747 case cvtss2sd: 748 info->op = fex_cnvt; 749 info->res.type = fex_double; 750 sse_cvtss2sd(&info->op1.val.f, &info->res.val.d); 751 break; 752 753 case cvtsi2ss: 754 info->op = fex_cnvt; 755 sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f); 756 break; 757 758 case cvttss2si: 759 info->op = fex_cnvt; 760 info->res.type = fex_int; 761 sse_cvttss2si(&info->op1.val.f, &info->res.val.i); 762 break; 763 764 case cvtss2si: 765 info->op = fex_cnvt; 766 info->res.type = fex_int; 767 sse_cvtss2si(&info->op1.val.f, &info->res.val.i); 768 break; 769 770 #ifdef __amd64 771 case cvtsi2ssq: 772 info->op = fex_cnvt; 773 sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f); 774 break; 775 776 case cvttss2siq: 777 info->op = fex_cnvt; 778 info->res.type = fex_llong; 779 sse_cvttss2siq(&info->op1.val.f, &info->res.val.l); 780 break; 781 782 case cvtss2siq: 783 info->op = fex_cnvt; 784 info->res.type = fex_llong; 785 sse_cvtss2siq(&info->op1.val.f, &info->res.val.l); 786 break; 787 #endif 788 789 case ucomiss: 790 info->op = fex_cmp; 791 info->res.type = fex_nodata; 792 sse_ucomiss(&info->op1.val.f, &info->op2.val.f); 793 break; 794 795 case comiss: 796 info->op = fex_cmp; 797 info->res.type = fex_nodata; 798 sse_comiss(&info->op1.val.f, &info->op2.val.f); 799 break; 800 default: 801 break; 802 } 803 } 804 __fenv_getmxcsr(&mxcsr); 805 info->flags = mxcsr & 0x3d; 806 __fenv_setmxcsr(&oldmxcsr); 807 808 /* determine which exception would have been trapped */ 809 te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr 810 >> 7) & 0x3d; 811 e = mxcsr & te; 812 if (e & FE_INVALID) 813 return __fex_get_sse_invalid_type(inst); 814 if (e & FE_DIVBYZERO) 815 return fex_division; 816 if (e & FE_OVERFLOW) 817 return fex_overflow; 818 if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW))) 819 return fex_underflow; 820 if (e & FE_INEXACT) 821 return fex_inexact; 822 return (enum fex_exception)-1; 823 } 824 825 /* 826 * Emulate a SIMD SSE instruction to determine which exceptions occur 827 * in each part. For i = 0, 1, 2, and 3, set e[i] to indicate the 828 * trapped exception that would occur if the i-th part of the SIMD 829 * instruction were executed in isolation; set e[i] to -1 if no 830 * trapped exception would occur in this part. Also fill in info[i] 831 * with the corresponding operands, default untrapped result, and 832 * flags. 833 * 834 * This routine does not work if the instruction specified by *inst 835 * is not a SIMD instruction. 836 */ 837 void 838 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e, 839 fex_info_t *info) 840 { 841 sseinst_t dummy; 842 int i; 843 844 e[0] = e[1] = e[2] = e[3] = -1; 845 846 /* perform each part of the SIMD operation */ 847 switch (inst->op) { 848 case cmpps: 849 dummy.op = cmpss; 850 dummy.imm = inst->imm; 851 for (i = 0; i < 4; i++) { 852 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 853 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 854 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 855 } 856 break; 857 858 case minps: 859 dummy.op = minss; 860 for (i = 0; i < 4; i++) { 861 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 862 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 863 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 864 } 865 break; 866 867 case maxps: 868 dummy.op = maxss; 869 for (i = 0; i < 4; i++) { 870 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 871 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 872 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 873 } 874 break; 875 876 case addps: 877 dummy.op = addss; 878 for (i = 0; i < 4; i++) { 879 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 880 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 881 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 882 } 883 break; 884 885 case subps: 886 dummy.op = subss; 887 for (i = 0; i < 4; i++) { 888 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 889 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 890 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 891 } 892 break; 893 894 case mulps: 895 dummy.op = mulss; 896 for (i = 0; i < 4; i++) { 897 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 898 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 899 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 900 } 901 break; 902 903 case divps: 904 dummy.op = divss; 905 for (i = 0; i < 4; i++) { 906 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 907 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 908 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 909 } 910 break; 911 912 case sqrtps: 913 dummy.op = sqrtss; 914 for (i = 0; i < 4; i++) { 915 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 916 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 917 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 918 } 919 break; 920 921 case cvtdq2ps: 922 dummy.op = cvtsi2ss; 923 for (i = 0; i < 4; i++) { 924 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 925 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 926 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 927 } 928 break; 929 930 case cvttps2dq: 931 dummy.op = cvttss2si; 932 for (i = 0; i < 4; i++) { 933 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 934 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 935 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 936 } 937 break; 938 939 case cvtps2dq: 940 dummy.op = cvtss2si; 941 for (i = 0; i < 4; i++) { 942 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 943 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 944 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 945 } 946 break; 947 948 case cvtpi2ps: 949 dummy.op = cvtsi2ss; 950 for (i = 0; i < 2; i++) { 951 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 952 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 953 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 954 } 955 break; 956 957 case cvttps2pi: 958 dummy.op = cvttss2si; 959 for (i = 0; i < 2; i++) { 960 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 961 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 962 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 963 } 964 break; 965 966 case cvtps2pi: 967 dummy.op = cvtss2si; 968 for (i = 0; i < 2; i++) { 969 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 970 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 971 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 972 } 973 break; 974 975 case cmppd: 976 dummy.op = cmpsd; 977 dummy.imm = inst->imm; 978 for (i = 0; i < 2; i++) { 979 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 980 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 981 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 982 } 983 break; 984 985 case minpd: 986 dummy.op = minsd; 987 for (i = 0; i < 2; i++) { 988 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 989 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 990 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 991 } 992 break; 993 994 case maxpd: 995 dummy.op = maxsd; 996 for (i = 0; i < 2; i++) { 997 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 998 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 999 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1000 } 1001 break; 1002 1003 case addpd: 1004 dummy.op = addsd; 1005 for (i = 0; i < 2; i++) { 1006 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1007 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1008 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1009 } 1010 break; 1011 1012 case subpd: 1013 dummy.op = subsd; 1014 for (i = 0; i < 2; i++) { 1015 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1016 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1017 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1018 } 1019 break; 1020 1021 case mulpd: 1022 dummy.op = mulsd; 1023 for (i = 0; i < 2; i++) { 1024 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1025 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1026 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1027 } 1028 break; 1029 1030 case divpd: 1031 dummy.op = divsd; 1032 for (i = 0; i < 2; i++) { 1033 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1034 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1035 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1036 } 1037 break; 1038 1039 case sqrtpd: 1040 dummy.op = sqrtsd; 1041 for (i = 0; i < 2; i++) { 1042 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1043 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1044 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1045 } 1046 break; 1047 1048 case cvtpi2pd: 1049 case cvtdq2pd: 1050 dummy.op = cvtsi2sd; 1051 for (i = 0; i < 2; i++) { 1052 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1053 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 1054 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1055 } 1056 break; 1057 1058 case cvttpd2pi: 1059 case cvttpd2dq: 1060 dummy.op = cvttsd2si; 1061 for (i = 0; i < 2; i++) { 1062 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1063 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1064 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1065 } 1066 break; 1067 1068 case cvtpd2pi: 1069 case cvtpd2dq: 1070 dummy.op = cvtsd2si; 1071 for (i = 0; i < 2; i++) { 1072 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1073 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1074 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1075 } 1076 break; 1077 1078 case cvtps2pd: 1079 dummy.op = cvtss2sd; 1080 for (i = 0; i < 2; i++) { 1081 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1082 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1083 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1084 } 1085 break; 1086 1087 case cvtpd2ps: 1088 dummy.op = cvtsd2ss; 1089 for (i = 0; i < 2; i++) { 1090 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1091 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1092 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1093 } 1094 default: 1095 break; 1096 } 1097 } 1098 1099 /* 1100 * Store the result value from *info in the destination of the scalar 1101 * SSE instruction specified by *inst. If no result is given but the 1102 * exception is underflow or overflow, supply the default trapped result. 1103 * 1104 * This routine does not work if the instruction specified by *inst 1105 * is not a scalar instruction. 1106 */ 1107 void 1108 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e, 1109 fex_info_t *info) 1110 { 1111 int i = 0; 1112 long long l = 0L;; 1113 float f = 0.0, fscl; 1114 double d = 0.0L, dscl; 1115 1116 /* for compares that write eflags, just set the flags 1117 to indicate "unordered" */ 1118 if (inst->op == ucomiss || inst->op == comiss || 1119 inst->op == ucomisd || inst->op == comisd) { 1120 uap->uc_mcontext.gregs[REG_PS] |= 0x45; 1121 return; 1122 } 1123 1124 /* if info doesn't specify a result value, try to generate 1125 the default trapped result */ 1126 if (info->res.type == fex_nodata) { 1127 /* set scale factors for exponent wrapping */ 1128 switch (e) { 1129 case fex_overflow: 1130 fscl = 1.262177448e-29f; /* 2^-96 */ 1131 dscl = 6.441148769597133308e-232; /* 2^-768 */ 1132 break; 1133 1134 case fex_underflow: 1135 fscl = 7.922816251e+28f; /* 2^96 */ 1136 dscl = 1.552518092300708935e+231; /* 2^768 */ 1137 break; 1138 1139 default: 1140 (void) __fex_get_sse_op(uap, inst, info); 1141 if (info->res.type == fex_nodata) 1142 return; 1143 goto stuff; 1144 } 1145 1146 /* generate the wrapped result */ 1147 if (inst->op == cvtsd2ss) { 1148 info->op1.type = fex_double; 1149 info->op1.val.d = inst->op2->d[0]; 1150 info->op2.type = fex_nodata; 1151 info->res.type = fex_float; 1152 info->res.val.f = (float)(fscl * (fscl * 1153 info->op1.val.d)); 1154 } else if ((int)inst->op & DOUBLE) { 1155 info->op1.type = fex_double; 1156 info->op1.val.d = inst->op1->d[0]; 1157 info->op2.type = fex_double; 1158 info->op2.val.d = inst->op2->d[0]; 1159 info->res.type = fex_double; 1160 switch (inst->op) { 1161 case addsd: 1162 info->res.val.d = dscl * (dscl * 1163 info->op1.val.d + dscl * info->op2.val.d); 1164 break; 1165 1166 case subsd: 1167 info->res.val.d = dscl * (dscl * 1168 info->op1.val.d - dscl * info->op2.val.d); 1169 break; 1170 1171 case mulsd: 1172 info->res.val.d = (dscl * info->op1.val.d) * 1173 (dscl * info->op2.val.d); 1174 break; 1175 1176 case divsd: 1177 info->res.val.d = (dscl * info->op1.val.d) / 1178 (info->op2.val.d / dscl); 1179 break; 1180 1181 default: 1182 return; 1183 } 1184 } else { 1185 info->op1.type = fex_float; 1186 info->op1.val.f = inst->op1->f[0]; 1187 info->op2.type = fex_float; 1188 info->op2.val.f = inst->op2->f[0]; 1189 info->res.type = fex_float; 1190 switch (inst->op) { 1191 case addss: 1192 info->res.val.f = fscl * (fscl * 1193 info->op1.val.f + fscl * info->op2.val.f); 1194 break; 1195 1196 case subss: 1197 info->res.val.f = fscl * (fscl * 1198 info->op1.val.f - fscl * info->op2.val.f); 1199 break; 1200 1201 case mulss: 1202 info->res.val.f = (fscl * info->op1.val.f) * 1203 (fscl * info->op2.val.f); 1204 break; 1205 1206 case divss: 1207 info->res.val.f = (fscl * info->op1.val.f) / 1208 (info->op2.val.f / fscl); 1209 break; 1210 1211 default: 1212 return; 1213 } 1214 } 1215 } 1216 1217 /* put the result in the destination */ 1218 stuff: 1219 if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si 1220 || inst->op == cvttsd2si || inst->op == cvtsd2si) { 1221 switch (info->res.type) { 1222 case fex_int: 1223 i = info->res.val.i; 1224 break; 1225 1226 case fex_llong: 1227 i = info->res.val.l; 1228 break; 1229 1230 case fex_float: 1231 i = info->res.val.f; 1232 break; 1233 1234 case fex_double: 1235 i = info->res.val.d; 1236 break; 1237 1238 case fex_ldouble: 1239 i = info->res.val.q; 1240 break; 1241 1242 default: 1243 break; 1244 } 1245 inst->op1->i[0] = i; 1246 } else if (inst->op == cmpsd || inst->op == cvttss2siq || 1247 inst->op == cvtss2siq || inst->op == cvttsd2siq || 1248 inst->op == cvtsd2siq) { 1249 switch (info->res.type) { 1250 case fex_int: 1251 l = info->res.val.i; 1252 break; 1253 1254 case fex_llong: 1255 l = info->res.val.l; 1256 break; 1257 1258 case fex_float: 1259 l = info->res.val.f; 1260 break; 1261 1262 case fex_double: 1263 l = info->res.val.d; 1264 break; 1265 1266 case fex_ldouble: 1267 l = info->res.val.q; 1268 break; 1269 1270 default: 1271 break; 1272 } 1273 inst->op1->l[0] = l; 1274 } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) || 1275 inst->op == cvtss2sd) { 1276 switch (info->res.type) { 1277 case fex_int: 1278 d = info->res.val.i; 1279 break; 1280 1281 case fex_llong: 1282 d = info->res.val.l; 1283 break; 1284 1285 case fex_float: 1286 d = info->res.val.f; 1287 break; 1288 1289 case fex_double: 1290 d = info->res.val.d; 1291 break; 1292 1293 case fex_ldouble: 1294 d = info->res.val.q; 1295 break; 1296 1297 default: 1298 break; 1299 } 1300 inst->op1->d[0] = d; 1301 } else { 1302 switch (info->res.type) { 1303 case fex_int: 1304 f = info->res.val.i; 1305 break; 1306 1307 case fex_llong: 1308 f = info->res.val.l; 1309 break; 1310 1311 case fex_float: 1312 f = info->res.val.f; 1313 break; 1314 1315 case fex_double: 1316 f = info->res.val.d; 1317 break; 1318 1319 case fex_ldouble: 1320 f = info->res.val.q; 1321 break; 1322 1323 default: 1324 break; 1325 } 1326 inst->op1->f[0] = f; 1327 } 1328 } 1329 1330 /* 1331 * Store the results from a SIMD instruction. For each i, store 1332 * the result value from info[i] in the i-th part of the destination 1333 * of the SIMD SSE instruction specified by *inst. If no result 1334 * is given but the exception indicated by e[i] is underflow or 1335 * overflow, supply the default trapped result. 1336 * 1337 * This routine does not work if the instruction specified by *inst 1338 * is not a SIMD instruction. 1339 */ 1340 void 1341 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e, 1342 fex_info_t *info) 1343 { 1344 sseinst_t dummy; 1345 int i; 1346 1347 /* store each part */ 1348 switch (inst->op) { 1349 case cmpps: 1350 dummy.op = cmpss; 1351 dummy.imm = inst->imm; 1352 for (i = 0; i < 4; i++) { 1353 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1354 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1355 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1356 } 1357 break; 1358 1359 case minps: 1360 dummy.op = minss; 1361 for (i = 0; i < 4; i++) { 1362 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1363 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1364 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1365 } 1366 break; 1367 1368 case maxps: 1369 dummy.op = maxss; 1370 for (i = 0; i < 4; i++) { 1371 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1372 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1373 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1374 } 1375 break; 1376 1377 case addps: 1378 dummy.op = addss; 1379 for (i = 0; i < 4; i++) { 1380 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1381 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1382 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1383 } 1384 break; 1385 1386 case subps: 1387 dummy.op = subss; 1388 for (i = 0; i < 4; i++) { 1389 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1390 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1391 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1392 } 1393 break; 1394 1395 case mulps: 1396 dummy.op = mulss; 1397 for (i = 0; i < 4; i++) { 1398 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1399 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1400 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1401 } 1402 break; 1403 1404 case divps: 1405 dummy.op = divss; 1406 for (i = 0; i < 4; i++) { 1407 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1408 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1409 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1410 } 1411 break; 1412 1413 case sqrtps: 1414 dummy.op = sqrtss; 1415 for (i = 0; i < 4; i++) { 1416 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1417 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1418 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1419 } 1420 break; 1421 1422 case cvtdq2ps: 1423 dummy.op = cvtsi2ss; 1424 for (i = 0; i < 4; i++) { 1425 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1426 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 1427 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1428 } 1429 break; 1430 1431 case cvttps2dq: 1432 dummy.op = cvttss2si; 1433 for (i = 0; i < 4; i++) { 1434 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1435 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1436 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1437 } 1438 break; 1439 1440 case cvtps2dq: 1441 dummy.op = cvtss2si; 1442 for (i = 0; i < 4; i++) { 1443 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1444 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1445 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1446 } 1447 break; 1448 1449 case cvtpi2ps: 1450 dummy.op = cvtsi2ss; 1451 for (i = 0; i < 2; i++) { 1452 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1453 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 1454 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1455 } 1456 break; 1457 1458 case cvttps2pi: 1459 dummy.op = cvttss2si; 1460 for (i = 0; i < 2; i++) { 1461 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1462 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1463 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1464 } 1465 break; 1466 1467 case cvtps2pi: 1468 dummy.op = cvtss2si; 1469 for (i = 0; i < 2; i++) { 1470 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1471 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1472 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1473 } 1474 break; 1475 1476 case cmppd: 1477 dummy.op = cmpsd; 1478 dummy.imm = inst->imm; 1479 for (i = 0; i < 2; i++) { 1480 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1481 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1482 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1483 } 1484 break; 1485 1486 case minpd: 1487 dummy.op = minsd; 1488 for (i = 0; i < 2; i++) { 1489 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1490 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1491 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1492 } 1493 break; 1494 1495 case maxpd: 1496 dummy.op = maxsd; 1497 for (i = 0; i < 2; i++) { 1498 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1499 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1500 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1501 } 1502 break; 1503 1504 case addpd: 1505 dummy.op = addsd; 1506 for (i = 0; i < 2; i++) { 1507 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1508 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1509 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1510 } 1511 break; 1512 1513 case subpd: 1514 dummy.op = subsd; 1515 for (i = 0; i < 2; i++) { 1516 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1517 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1518 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1519 } 1520 break; 1521 1522 case mulpd: 1523 dummy.op = mulsd; 1524 for (i = 0; i < 2; i++) { 1525 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1526 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1527 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1528 } 1529 break; 1530 1531 case divpd: 1532 dummy.op = divsd; 1533 for (i = 0; i < 2; i++) { 1534 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1535 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1536 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1537 } 1538 break; 1539 1540 case sqrtpd: 1541 dummy.op = sqrtsd; 1542 for (i = 0; i < 2; i++) { 1543 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1544 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1545 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1546 } 1547 break; 1548 1549 case cvtpi2pd: 1550 case cvtdq2pd: 1551 dummy.op = cvtsi2sd; 1552 for (i = 0; i < 2; i++) { 1553 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1554 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 1555 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1556 } 1557 break; 1558 1559 case cvttpd2pi: 1560 case cvttpd2dq: 1561 dummy.op = cvttsd2si; 1562 for (i = 0; i < 2; i++) { 1563 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1564 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1565 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1566 } 1567 /* for cvttpd2dq, zero the high 64 bits of the destination */ 1568 if (inst->op == cvttpd2dq) 1569 inst->op1->l[1] = 0ll; 1570 break; 1571 1572 case cvtpd2pi: 1573 case cvtpd2dq: 1574 dummy.op = cvtsd2si; 1575 for (i = 0; i < 2; i++) { 1576 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1577 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1578 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1579 } 1580 /* for cvtpd2dq, zero the high 64 bits of the destination */ 1581 if (inst->op == cvtpd2dq) 1582 inst->op1->l[1] = 0ll; 1583 break; 1584 1585 case cvtps2pd: 1586 dummy.op = cvtss2sd; 1587 for (i = 0; i < 2; i++) { 1588 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1589 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1590 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1591 } 1592 break; 1593 1594 case cvtpd2ps: 1595 dummy.op = cvtsd2ss; 1596 for (i = 0; i < 2; i++) { 1597 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1598 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1599 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1600 } 1601 /* zero the high 64 bits of the destination */ 1602 inst->op1->l[1] = 0ll; 1603 1604 default: 1605 break; 1606 } 1607 } 1608