1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #include <ucontext.h> 31 #include <fenv.h> 32 #if defined(__SUNPRO_C) 33 #include <sunmath.h> 34 #else 35 #include <sys/ieeefp.h> 36 #endif 37 #include "fex_handler.h" 38 #include "fenv_inlines.h" 39 40 #if !defined(REG_PC) 41 #define REG_PC EIP 42 #endif 43 44 #if !defined(REG_PS) 45 #define REG_PS EFL 46 #endif 47 48 #ifdef __amd64 49 #define regno(X) ((X < 4)? REG_RAX - X : \ 50 ((X > 4)? REG_RAX + 1 - X : REG_RSP)) 51 #else 52 #define regno(X) (EAX - X) 53 #endif 54 55 /* 56 * Support for SSE instructions 57 */ 58 59 /* 60 * Decode an SSE instruction. Fill in *inst and return the length of the 61 * instruction in bytes. Return 0 if the instruction is not recognized. 62 */ 63 int 64 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst) 65 { 66 unsigned char *ip; 67 char *addr; 68 int i, dbl, simd, rex, modrm, sib, r; 69 70 i = 0; 71 ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC]; 72 73 /* look for pseudo-prefixes */ 74 dbl = 0; 75 simd = SIMD; 76 if (ip[i] == 0xF3) { 77 simd = 0; 78 i++; 79 } else if (ip[i] == 0x66) { 80 dbl = DOUBLE; 81 i++; 82 } else if (ip[i] == 0xF2) { 83 dbl = DOUBLE; 84 simd = 0; 85 i++; 86 } 87 88 /* look for AMD64 REX prefix */ 89 rex = 0; 90 if (ip[i] >= 0x40 && ip[i] <= 0x4F) { 91 rex = ip[i]; 92 i++; 93 } 94 95 /* parse opcode */ 96 if (ip[i++] != 0x0F) 97 return 0; 98 switch (ip[i++]) { 99 case 0x2A: 100 inst->op = (int)cvtsi2ss + simd + dbl; 101 if (!simd) 102 inst->op = (int)inst->op + (rex & 8); 103 break; 104 105 case 0x2C: 106 inst->op = (int)cvttss2si + simd + dbl; 107 if (!simd) 108 inst->op = (int)inst->op + (rex & 8); 109 break; 110 111 case 0x2D: 112 inst->op = (int)cvtss2si + simd + dbl; 113 if (!simd) 114 inst->op = (int)inst->op + (rex & 8); 115 break; 116 117 case 0x2E: 118 /* oddball: scalar instruction in a SIMD opcode group */ 119 if (!simd) 120 return 0; 121 inst->op = (int)ucomiss + dbl; 122 break; 123 124 case 0x2F: 125 /* oddball: scalar instruction in a SIMD opcode group */ 126 if (!simd) 127 return 0; 128 inst->op = (int)comiss + dbl; 129 break; 130 131 case 0x51: 132 inst->op = (int)sqrtss + simd + dbl; 133 break; 134 135 case 0x58: 136 inst->op = (int)addss + simd + dbl; 137 break; 138 139 case 0x59: 140 inst->op = (int)mulss + simd + dbl; 141 break; 142 143 case 0x5A: 144 inst->op = (int)cvtss2sd + simd + dbl; 145 break; 146 147 case 0x5B: 148 if (dbl) { 149 if (simd) 150 inst->op = cvtps2dq; 151 else 152 return 0; 153 } else { 154 inst->op = (simd)? cvtdq2ps : cvttps2dq; 155 } 156 break; 157 158 case 0x5C: 159 inst->op = (int)subss + simd + dbl; 160 break; 161 162 case 0x5D: 163 inst->op = (int)minss + simd + dbl; 164 break; 165 166 case 0x5E: 167 inst->op = (int)divss + simd + dbl; 168 break; 169 170 case 0x5F: 171 inst->op = (int)maxss + simd + dbl; 172 break; 173 174 case 0xC2: 175 inst->op = (int)cmpss + simd + dbl; 176 break; 177 178 case 0xE6: 179 if (simd) { 180 if (dbl) 181 inst->op = cvttpd2dq; 182 else 183 return 0; 184 } else { 185 inst->op = (dbl)? cvtpd2dq : cvtdq2pd; 186 } 187 break; 188 189 default: 190 return 0; 191 } 192 193 /* locate operands */ 194 modrm = ip[i++]; 195 196 if (inst->op == cvtss2si || inst->op == cvttss2si || 197 inst->op == cvtsd2si || inst->op == cvttsd2si || 198 inst->op == cvtss2siq || inst->op == cvttss2siq || 199 inst->op == cvtsd2siq || inst->op == cvttsd2siq) { 200 /* op1 is a gp register */ 201 r = ((rex & 4) << 1) | ((modrm >> 3) & 7); 202 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)]; 203 } else if (inst->op == cvtps2pi || inst->op == cvttps2pi || 204 inst->op == cvtpd2pi || inst->op == cvttpd2pi) { 205 /* op1 is a mmx register */ 206 #ifdef __amd64 207 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set. 208 fpchip_state.st[(modrm >> 3) & 7]; 209 #else 210 inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) + 211 (char *)&uap->uc_mcontext.fpregs.fp_reg_set. 212 fpchip_state.state[7]); 213 #endif 214 } else { 215 /* op1 is a xmm register */ 216 r = ((rex & 4) << 1) | ((modrm >> 3) & 7); 217 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs. 218 fp_reg_set.fpchip_state.xmm[r]; 219 } 220 221 if ((modrm >> 6) == 3) { 222 if (inst->op == cvtsi2ss || inst->op == cvtsi2sd || 223 inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) { 224 /* op2 is a gp register */ 225 r = ((rex & 1) << 3) | (modrm & 7); 226 inst->op2 = (sseoperand_t *)&uap->uc_mcontext. 227 gregs[regno(r)]; 228 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) { 229 /* op2 is a mmx register */ 230 #ifdef __amd64 231 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs. 232 fp_reg_set.fpchip_state.st[modrm & 7]; 233 #else 234 inst->op2 = (sseoperand_t *)(10 * (modrm & 7) + 235 (char *)&uap->uc_mcontext.fpregs.fp_reg_set. 236 fpchip_state.state[7]); 237 #endif 238 } else { 239 /* op2 is a xmm register */ 240 r = ((rex & 1) << 3) | (modrm & 7); 241 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs. 242 fp_reg_set.fpchip_state.xmm[r]; 243 } 244 } else if ((modrm & 0xc7) == 0x05) { 245 #ifdef __amd64 246 /* address of next instruction + offset */ 247 r = i + 4; 248 if (inst->op == cmpss || inst->op == cmpps || 249 inst->op == cmpsd || inst->op == cmppd) 250 r++; 251 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i)); 252 #else 253 /* absolute address */ 254 inst->op2 = (sseoperand_t *)(*(int *)(ip + i)); 255 #endif 256 i += 4; 257 } else { 258 /* complex address */ 259 if ((modrm & 7) == 4) { 260 /* parse sib byte */ 261 sib = ip[i++]; 262 if ((sib & 7) == 5 && (modrm >> 6) == 0) { 263 /* start with absolute address */ 264 addr = (char *)(uintptr_t)(*(int *)(ip + i)); 265 i += 4; 266 } else { 267 /* start with base */ 268 r = ((rex & 1) << 3) | (sib & 7); 269 addr = (char *)uap->uc_mcontext.gregs[regno(r)]; 270 } 271 r = ((rex & 2) << 2) | ((sib >> 3) & 7); 272 if (r != 4) { 273 /* add scaled index */ 274 addr += uap->uc_mcontext.gregs[regno(r)] 275 << (sib >> 6); 276 } 277 } else { 278 r = ((rex & 1) << 3) | (modrm & 7); 279 addr = (char *)uap->uc_mcontext.gregs[regno(r)]; 280 } 281 282 /* add displacement, if any */ 283 if ((modrm >> 6) == 1) { 284 addr += (char)ip[i++]; 285 } else if ((modrm >> 6) == 2) { 286 addr += *(int *)(ip + i); 287 i += 4; 288 } 289 inst->op2 = (sseoperand_t *)addr; 290 } 291 292 if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd || 293 inst->op == cmppd) { 294 /* get the immediate operand */ 295 inst->imm = ip[i++]; 296 } 297 298 return i; 299 } 300 301 static enum fp_class_type 302 my_fp_classf(float *x) 303 { 304 int i = *(int *)x & ~0x80000000; 305 306 if (i < 0x7f800000) { 307 if (i < 0x00800000) 308 return ((i == 0)? fp_zero : fp_subnormal); 309 return fp_normal; 310 } 311 else if (i == 0x7f800000) 312 return fp_infinity; 313 else if (i & 0x400000) 314 return fp_quiet; 315 else 316 return fp_signaling; 317 } 318 319 static enum fp_class_type 320 my_fp_class(double *x) 321 { 322 int i = *(1+(int *)x) & ~0x80000000; 323 324 if (i < 0x7ff00000) { 325 if (i < 0x00100000) 326 return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal); 327 return fp_normal; 328 } 329 else if (i == 0x7ff00000 && *(int *)x == 0) 330 return fp_infinity; 331 else if (i & 0x80000) 332 return fp_quiet; 333 else 334 return fp_signaling; 335 } 336 337 /* 338 * Inspect a scalar SSE instruction that incurred an invalid operation 339 * exception to determine which type of exception it was. 340 */ 341 static enum fex_exception 342 __fex_get_sse_invalid_type(sseinst_t *inst) 343 { 344 enum fp_class_type t1, t2; 345 346 /* check op2 for signaling nan */ 347 t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) : 348 my_fp_classf(&inst->op2->f[0]); 349 if (t2 == fp_signaling) 350 return fex_inv_snan; 351 352 /* eliminate all single-operand instructions */ 353 switch (inst->op) { 354 case cvtsd2ss: 355 case cvtss2sd: 356 /* hmm, this shouldn't have happened */ 357 return (enum fex_exception) -1; 358 359 case sqrtss: 360 case sqrtsd: 361 return fex_inv_sqrt; 362 363 case cvtss2si: 364 case cvtsd2si: 365 case cvttss2si: 366 case cvttsd2si: 367 case cvtss2siq: 368 case cvtsd2siq: 369 case cvttss2siq: 370 case cvttsd2siq: 371 return fex_inv_int; 372 default: 373 break; 374 } 375 376 /* check op1 for signaling nan */ 377 t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) : 378 my_fp_classf(&inst->op1->f[0]); 379 if (t1 == fp_signaling) 380 return fex_inv_snan; 381 382 /* check two-operand instructions for other cases */ 383 switch (inst->op) { 384 case cmpss: 385 case cmpsd: 386 case minss: 387 case minsd: 388 case maxss: 389 case maxsd: 390 case comiss: 391 case comisd: 392 return fex_inv_cmp; 393 394 case addss: 395 case addsd: 396 case subss: 397 case subsd: 398 if (t1 == fp_infinity && t2 == fp_infinity) 399 return fex_inv_isi; 400 break; 401 402 case mulss: 403 case mulsd: 404 if ((t1 == fp_zero && t2 == fp_infinity) || 405 (t2 == fp_zero && t1 == fp_infinity)) 406 return fex_inv_zmi; 407 break; 408 409 case divss: 410 case divsd: 411 if (t1 == fp_zero && t2 == fp_zero) 412 return fex_inv_zdz; 413 if (t1 == fp_infinity && t2 == fp_infinity) 414 return fex_inv_idi; 415 default: 416 break; 417 } 418 419 return (enum fex_exception)-1; 420 } 421 422 /* inline templates */ 423 extern void sse_cmpeqss(float *, float *, int *); 424 extern void sse_cmpltss(float *, float *, int *); 425 extern void sse_cmpless(float *, float *, int *); 426 extern void sse_cmpunordss(float *, float *, int *); 427 extern void sse_minss(float *, float *, float *); 428 extern void sse_maxss(float *, float *, float *); 429 extern void sse_addss(float *, float *, float *); 430 extern void sse_subss(float *, float *, float *); 431 extern void sse_mulss(float *, float *, float *); 432 extern void sse_divss(float *, float *, float *); 433 extern void sse_sqrtss(float *, float *); 434 extern void sse_ucomiss(float *, float *); 435 extern void sse_comiss(float *, float *); 436 extern void sse_cvtss2sd(float *, double *); 437 extern void sse_cvtsi2ss(int *, float *); 438 extern void sse_cvttss2si(float *, int *); 439 extern void sse_cvtss2si(float *, int *); 440 #ifdef __amd64 441 extern void sse_cvtsi2ssq(long long *, float *); 442 extern void sse_cvttss2siq(float *, long long *); 443 extern void sse_cvtss2siq(float *, long long *); 444 #endif 445 extern void sse_cmpeqsd(double *, double *, long long *); 446 extern void sse_cmpltsd(double *, double *, long long *); 447 extern void sse_cmplesd(double *, double *, long long *); 448 extern void sse_cmpunordsd(double *, double *, long long *); 449 extern void sse_minsd(double *, double *, double *); 450 extern void sse_maxsd(double *, double *, double *); 451 extern void sse_addsd(double *, double *, double *); 452 extern void sse_subsd(double *, double *, double *); 453 extern void sse_mulsd(double *, double *, double *); 454 extern void sse_divsd(double *, double *, double *); 455 extern void sse_sqrtsd(double *, double *); 456 extern void sse_ucomisd(double *, double *); 457 extern void sse_comisd(double *, double *); 458 extern void sse_cvtsd2ss(double *, float *); 459 extern void sse_cvtsi2sd(int *, double *); 460 extern void sse_cvttsd2si(double *, int *); 461 extern void sse_cvtsd2si(double *, int *); 462 #ifdef __amd64 463 extern void sse_cvtsi2sdq(long long *, double *); 464 extern void sse_cvttsd2siq(double *, long long *); 465 extern void sse_cvtsd2siq(double *, long long *); 466 #endif 467 468 /* 469 * Fill in *info with the operands, default untrapped result, and 470 * flags produced by a scalar SSE instruction, and return the type 471 * of trapped exception (if any). On entry, the mxcsr must have 472 * all exceptions masked and all flags clear. The same conditions 473 * will hold on exit. 474 * 475 * This routine does not work if the instruction specified by *inst 476 * is not a scalar instruction. 477 */ 478 enum fex_exception 479 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info) 480 { 481 unsigned int e, te, mxcsr, oldmxcsr, subnorm; 482 483 /* 484 * Perform the operation with traps disabled and check the 485 * exception flags. If the underflow trap was enabled, also 486 * check for an exact subnormal result. 487 */ 488 __fenv_getmxcsr(&oldmxcsr); 489 subnorm = 0; 490 if ((int)inst->op & DOUBLE) { 491 if (inst->op == cvtsi2sd) { 492 info->op1.type = fex_int; 493 info->op1.val.i = inst->op2->i[0]; 494 info->op2.type = fex_nodata; 495 } else if (inst->op == cvtsi2sdq) { 496 info->op1.type = fex_llong; 497 info->op1.val.l = inst->op2->l[0]; 498 info->op2.type = fex_nodata; 499 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss || 500 inst->op == cvttsd2si || inst->op == cvtsd2si || 501 inst->op == cvttsd2siq || inst->op == cvtsd2siq) { 502 info->op1.type = fex_double; 503 info->op1.val.d = inst->op2->d[0]; 504 info->op2.type = fex_nodata; 505 } else { 506 info->op1.type = fex_double; 507 info->op1.val.d = inst->op1->d[0]; 508 info->op2.type = fex_double; 509 info->op2.val.d = inst->op2->d[0]; 510 } 511 info->res.type = fex_double; 512 switch (inst->op) { 513 case cmpsd: 514 info->op = fex_cmp; 515 info->res.type = fex_llong; 516 switch (inst->imm & 3) { 517 case 0: 518 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d, 519 &info->res.val.l); 520 break; 521 522 case 1: 523 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d, 524 &info->res.val.l); 525 break; 526 527 case 2: 528 sse_cmplesd(&info->op1.val.d, &info->op2.val.d, 529 &info->res.val.l); 530 break; 531 532 case 3: 533 sse_cmpunordsd(&info->op1.val.d, 534 &info->op2.val.d, &info->res.val.l); 535 } 536 if (inst->imm & 4) 537 info->res.val.l ^= 0xffffffffffffffffull; 538 break; 539 540 case minsd: 541 info->op = fex_other; 542 sse_minsd(&info->op1.val.d, &info->op2.val.d, 543 &info->res.val.d); 544 break; 545 546 case maxsd: 547 info->op = fex_other; 548 sse_maxsd(&info->op1.val.d, &info->op2.val.d, 549 &info->res.val.d); 550 break; 551 552 case addsd: 553 info->op = fex_add; 554 sse_addsd(&info->op1.val.d, &info->op2.val.d, 555 &info->res.val.d); 556 if (my_fp_class(&info->res.val.d) == fp_subnormal) 557 subnorm = 1; 558 break; 559 560 case subsd: 561 info->op = fex_sub; 562 sse_subsd(&info->op1.val.d, &info->op2.val.d, 563 &info->res.val.d); 564 if (my_fp_class(&info->res.val.d) == fp_subnormal) 565 subnorm = 1; 566 break; 567 568 case mulsd: 569 info->op = fex_mul; 570 sse_mulsd(&info->op1.val.d, &info->op2.val.d, 571 &info->res.val.d); 572 if (my_fp_class(&info->res.val.d) == fp_subnormal) 573 subnorm = 1; 574 break; 575 576 case divsd: 577 info->op = fex_div; 578 sse_divsd(&info->op1.val.d, &info->op2.val.d, 579 &info->res.val.d); 580 if (my_fp_class(&info->res.val.d) == fp_subnormal) 581 subnorm = 1; 582 break; 583 584 case sqrtsd: 585 info->op = fex_sqrt; 586 sse_sqrtsd(&info->op1.val.d, &info->res.val.d); 587 break; 588 589 case cvtsd2ss: 590 info->op = fex_cnvt; 591 info->res.type = fex_float; 592 sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f); 593 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 594 subnorm = 1; 595 break; 596 597 case cvtsi2sd: 598 info->op = fex_cnvt; 599 sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d); 600 break; 601 602 case cvttsd2si: 603 info->op = fex_cnvt; 604 info->res.type = fex_int; 605 sse_cvttsd2si(&info->op1.val.d, &info->res.val.i); 606 break; 607 608 case cvtsd2si: 609 info->op = fex_cnvt; 610 info->res.type = fex_int; 611 sse_cvtsd2si(&info->op1.val.d, &info->res.val.i); 612 break; 613 614 #ifdef __amd64 615 case cvtsi2sdq: 616 info->op = fex_cnvt; 617 sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d); 618 break; 619 620 case cvttsd2siq: 621 info->op = fex_cnvt; 622 info->res.type = fex_llong; 623 sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l); 624 break; 625 626 case cvtsd2siq: 627 info->op = fex_cnvt; 628 info->res.type = fex_llong; 629 sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l); 630 break; 631 #endif 632 633 case ucomisd: 634 info->op = fex_cmp; 635 info->res.type = fex_nodata; 636 sse_ucomisd(&info->op1.val.d, &info->op2.val.d); 637 break; 638 639 case comisd: 640 info->op = fex_cmp; 641 info->res.type = fex_nodata; 642 sse_comisd(&info->op1.val.d, &info->op2.val.d); 643 break; 644 default: 645 break; 646 } 647 } else { 648 if (inst->op == cvtsi2ss) { 649 info->op1.type = fex_int; 650 info->op1.val.i = inst->op2->i[0]; 651 info->op2.type = fex_nodata; 652 } else if (inst->op == cvtsi2ssq) { 653 info->op1.type = fex_llong; 654 info->op1.val.l = inst->op2->l[0]; 655 info->op2.type = fex_nodata; 656 } else if (inst->op == sqrtss || inst->op == cvtss2sd || 657 inst->op == cvttss2si || inst->op == cvtss2si || 658 inst->op == cvttss2siq || inst->op == cvtss2siq) { 659 info->op1.type = fex_float; 660 info->op1.val.f = inst->op2->f[0]; 661 info->op2.type = fex_nodata; 662 } else { 663 info->op1.type = fex_float; 664 info->op1.val.f = inst->op1->f[0]; 665 info->op2.type = fex_float; 666 info->op2.val.f = inst->op2->f[0]; 667 } 668 info->res.type = fex_float; 669 switch (inst->op) { 670 case cmpss: 671 info->op = fex_cmp; 672 info->res.type = fex_int; 673 switch (inst->imm & 3) { 674 case 0: 675 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f, 676 &info->res.val.i); 677 break; 678 679 case 1: 680 sse_cmpltss(&info->op1.val.f, &info->op2.val.f, 681 &info->res.val.i); 682 break; 683 684 case 2: 685 sse_cmpless(&info->op1.val.f, &info->op2.val.f, 686 &info->res.val.i); 687 break; 688 689 case 3: 690 sse_cmpunordss(&info->op1.val.f, 691 &info->op2.val.f, &info->res.val.i); 692 } 693 if (inst->imm & 4) 694 info->res.val.i ^= 0xffffffffu; 695 break; 696 697 case minss: 698 info->op = fex_other; 699 sse_minss(&info->op1.val.f, &info->op2.val.f, 700 &info->res.val.f); 701 break; 702 703 case maxss: 704 info->op = fex_other; 705 sse_maxss(&info->op1.val.f, &info->op2.val.f, 706 &info->res.val.f); 707 break; 708 709 case addss: 710 info->op = fex_add; 711 sse_addss(&info->op1.val.f, &info->op2.val.f, 712 &info->res.val.f); 713 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 714 subnorm = 1; 715 break; 716 717 case subss: 718 info->op = fex_sub; 719 sse_subss(&info->op1.val.f, &info->op2.val.f, 720 &info->res.val.f); 721 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 722 subnorm = 1; 723 break; 724 725 case mulss: 726 info->op = fex_mul; 727 sse_mulss(&info->op1.val.f, &info->op2.val.f, 728 &info->res.val.f); 729 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 730 subnorm = 1; 731 break; 732 733 case divss: 734 info->op = fex_div; 735 sse_divss(&info->op1.val.f, &info->op2.val.f, 736 &info->res.val.f); 737 if (my_fp_classf(&info->res.val.f) == fp_subnormal) 738 subnorm = 1; 739 break; 740 741 case sqrtss: 742 info->op = fex_sqrt; 743 sse_sqrtss(&info->op1.val.f, &info->res.val.f); 744 break; 745 746 case cvtss2sd: 747 info->op = fex_cnvt; 748 info->res.type = fex_double; 749 sse_cvtss2sd(&info->op1.val.f, &info->res.val.d); 750 break; 751 752 case cvtsi2ss: 753 info->op = fex_cnvt; 754 sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f); 755 break; 756 757 case cvttss2si: 758 info->op = fex_cnvt; 759 info->res.type = fex_int; 760 sse_cvttss2si(&info->op1.val.f, &info->res.val.i); 761 break; 762 763 case cvtss2si: 764 info->op = fex_cnvt; 765 info->res.type = fex_int; 766 sse_cvtss2si(&info->op1.val.f, &info->res.val.i); 767 break; 768 769 #ifdef __amd64 770 case cvtsi2ssq: 771 info->op = fex_cnvt; 772 sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f); 773 break; 774 775 case cvttss2siq: 776 info->op = fex_cnvt; 777 info->res.type = fex_llong; 778 sse_cvttss2siq(&info->op1.val.f, &info->res.val.l); 779 break; 780 781 case cvtss2siq: 782 info->op = fex_cnvt; 783 info->res.type = fex_llong; 784 sse_cvtss2siq(&info->op1.val.f, &info->res.val.l); 785 break; 786 #endif 787 788 case ucomiss: 789 info->op = fex_cmp; 790 info->res.type = fex_nodata; 791 sse_ucomiss(&info->op1.val.f, &info->op2.val.f); 792 break; 793 794 case comiss: 795 info->op = fex_cmp; 796 info->res.type = fex_nodata; 797 sse_comiss(&info->op1.val.f, &info->op2.val.f); 798 break; 799 default: 800 break; 801 } 802 } 803 __fenv_getmxcsr(&mxcsr); 804 info->flags = mxcsr & 0x3d; 805 __fenv_setmxcsr(&oldmxcsr); 806 807 /* determine which exception would have been trapped */ 808 te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr 809 >> 7) & 0x3d; 810 e = mxcsr & te; 811 if (e & FE_INVALID) 812 return __fex_get_sse_invalid_type(inst); 813 if (e & FE_DIVBYZERO) 814 return fex_division; 815 if (e & FE_OVERFLOW) 816 return fex_overflow; 817 if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW))) 818 return fex_underflow; 819 if (e & FE_INEXACT) 820 return fex_inexact; 821 return (enum fex_exception)-1; 822 } 823 824 /* 825 * Emulate a SIMD SSE instruction to determine which exceptions occur 826 * in each part. For i = 0, 1, 2, and 3, set e[i] to indicate the 827 * trapped exception that would occur if the i-th part of the SIMD 828 * instruction were executed in isolation; set e[i] to -1 if no 829 * trapped exception would occur in this part. Also fill in info[i] 830 * with the corresponding operands, default untrapped result, and 831 * flags. 832 * 833 * This routine does not work if the instruction specified by *inst 834 * is not a SIMD instruction. 835 */ 836 void 837 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e, 838 fex_info_t *info) 839 { 840 sseinst_t dummy; 841 int i; 842 843 e[0] = e[1] = e[2] = e[3] = -1; 844 845 /* perform each part of the SIMD operation */ 846 switch (inst->op) { 847 case cmpps: 848 dummy.op = cmpss; 849 dummy.imm = inst->imm; 850 for (i = 0; i < 4; i++) { 851 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 852 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 853 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 854 } 855 break; 856 857 case minps: 858 dummy.op = minss; 859 for (i = 0; i < 4; i++) { 860 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 861 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 862 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 863 } 864 break; 865 866 case maxps: 867 dummy.op = maxss; 868 for (i = 0; i < 4; i++) { 869 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 870 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 871 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 872 } 873 break; 874 875 case addps: 876 dummy.op = addss; 877 for (i = 0; i < 4; i++) { 878 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 879 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 880 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 881 } 882 break; 883 884 case subps: 885 dummy.op = subss; 886 for (i = 0; i < 4; i++) { 887 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 888 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 889 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 890 } 891 break; 892 893 case mulps: 894 dummy.op = mulss; 895 for (i = 0; i < 4; i++) { 896 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 897 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 898 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 899 } 900 break; 901 902 case divps: 903 dummy.op = divss; 904 for (i = 0; i < 4; i++) { 905 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 906 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 907 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 908 } 909 break; 910 911 case sqrtps: 912 dummy.op = sqrtss; 913 for (i = 0; i < 4; i++) { 914 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 915 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 916 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 917 } 918 break; 919 920 case cvtdq2ps: 921 dummy.op = cvtsi2ss; 922 for (i = 0; i < 4; i++) { 923 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 924 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 925 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 926 } 927 break; 928 929 case cvttps2dq: 930 dummy.op = cvttss2si; 931 for (i = 0; i < 4; i++) { 932 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 933 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 934 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 935 } 936 break; 937 938 case cvtps2dq: 939 dummy.op = cvtss2si; 940 for (i = 0; i < 4; i++) { 941 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 942 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 943 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 944 } 945 break; 946 947 case cvtpi2ps: 948 dummy.op = cvtsi2ss; 949 for (i = 0; i < 2; i++) { 950 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 951 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 952 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 953 } 954 break; 955 956 case cvttps2pi: 957 dummy.op = cvttss2si; 958 for (i = 0; i < 2; i++) { 959 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 960 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 961 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 962 } 963 break; 964 965 case cvtps2pi: 966 dummy.op = cvtss2si; 967 for (i = 0; i < 2; i++) { 968 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 969 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 970 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 971 } 972 break; 973 974 case cmppd: 975 dummy.op = cmpsd; 976 dummy.imm = inst->imm; 977 for (i = 0; i < 2; i++) { 978 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 979 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 980 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 981 } 982 break; 983 984 case minpd: 985 dummy.op = minsd; 986 for (i = 0; i < 2; i++) { 987 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 988 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 989 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 990 } 991 break; 992 993 case maxpd: 994 dummy.op = maxsd; 995 for (i = 0; i < 2; i++) { 996 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 997 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 998 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 999 } 1000 break; 1001 1002 case addpd: 1003 dummy.op = addsd; 1004 for (i = 0; i < 2; i++) { 1005 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1006 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1007 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1008 } 1009 break; 1010 1011 case subpd: 1012 dummy.op = subsd; 1013 for (i = 0; i < 2; i++) { 1014 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1015 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1016 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1017 } 1018 break; 1019 1020 case mulpd: 1021 dummy.op = mulsd; 1022 for (i = 0; i < 2; i++) { 1023 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1024 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1025 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1026 } 1027 break; 1028 1029 case divpd: 1030 dummy.op = divsd; 1031 for (i = 0; i < 2; i++) { 1032 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1033 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1034 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1035 } 1036 break; 1037 1038 case sqrtpd: 1039 dummy.op = sqrtsd; 1040 for (i = 0; i < 2; i++) { 1041 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1042 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1043 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1044 } 1045 break; 1046 1047 case cvtpi2pd: 1048 case cvtdq2pd: 1049 dummy.op = cvtsi2sd; 1050 for (i = 0; i < 2; i++) { 1051 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1052 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 1053 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1054 } 1055 break; 1056 1057 case cvttpd2pi: 1058 case cvttpd2dq: 1059 dummy.op = cvttsd2si; 1060 for (i = 0; i < 2; i++) { 1061 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1062 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1063 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1064 } 1065 break; 1066 1067 case cvtpd2pi: 1068 case cvtpd2dq: 1069 dummy.op = cvtsd2si; 1070 for (i = 0; i < 2; i++) { 1071 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1072 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1073 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1074 } 1075 break; 1076 1077 case cvtps2pd: 1078 dummy.op = cvtss2sd; 1079 for (i = 0; i < 2; i++) { 1080 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1081 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1082 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1083 } 1084 break; 1085 1086 case cvtpd2ps: 1087 dummy.op = cvtsd2ss; 1088 for (i = 0; i < 2; i++) { 1089 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1090 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1091 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]); 1092 } 1093 default: 1094 break; 1095 } 1096 } 1097 1098 /* 1099 * Store the result value from *info in the destination of the scalar 1100 * SSE instruction specified by *inst. If no result is given but the 1101 * exception is underflow or overflow, supply the default trapped result. 1102 * 1103 * This routine does not work if the instruction specified by *inst 1104 * is not a scalar instruction. 1105 */ 1106 void 1107 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e, 1108 fex_info_t *info) 1109 { 1110 int i = 0; 1111 long long l = 0L;; 1112 float f = 0.0, fscl; 1113 double d = 0.0L, dscl; 1114 1115 /* for compares that write eflags, just set the flags 1116 to indicate "unordered" */ 1117 if (inst->op == ucomiss || inst->op == comiss || 1118 inst->op == ucomisd || inst->op == comisd) { 1119 uap->uc_mcontext.gregs[REG_PS] |= 0x45; 1120 return; 1121 } 1122 1123 /* if info doesn't specify a result value, try to generate 1124 the default trapped result */ 1125 if (info->res.type == fex_nodata) { 1126 /* set scale factors for exponent wrapping */ 1127 switch (e) { 1128 case fex_overflow: 1129 fscl = 1.262177448e-29f; /* 2^-96 */ 1130 dscl = 6.441148769597133308e-232; /* 2^-768 */ 1131 break; 1132 1133 case fex_underflow: 1134 fscl = 7.922816251e+28f; /* 2^96 */ 1135 dscl = 1.552518092300708935e+231; /* 2^768 */ 1136 break; 1137 1138 default: 1139 (void) __fex_get_sse_op(uap, inst, info); 1140 if (info->res.type == fex_nodata) 1141 return; 1142 goto stuff; 1143 } 1144 1145 /* generate the wrapped result */ 1146 if (inst->op == cvtsd2ss) { 1147 info->op1.type = fex_double; 1148 info->op1.val.d = inst->op2->d[0]; 1149 info->op2.type = fex_nodata; 1150 info->res.type = fex_float; 1151 info->res.val.f = (float)(fscl * (fscl * 1152 info->op1.val.d)); 1153 } else if ((int)inst->op & DOUBLE) { 1154 info->op1.type = fex_double; 1155 info->op1.val.d = inst->op1->d[0]; 1156 info->op2.type = fex_double; 1157 info->op2.val.d = inst->op2->d[0]; 1158 info->res.type = fex_double; 1159 switch (inst->op) { 1160 case addsd: 1161 info->res.val.d = dscl * (dscl * 1162 info->op1.val.d + dscl * info->op2.val.d); 1163 break; 1164 1165 case subsd: 1166 info->res.val.d = dscl * (dscl * 1167 info->op1.val.d - dscl * info->op2.val.d); 1168 break; 1169 1170 case mulsd: 1171 info->res.val.d = (dscl * info->op1.val.d) * 1172 (dscl * info->op2.val.d); 1173 break; 1174 1175 case divsd: 1176 info->res.val.d = (dscl * info->op1.val.d) / 1177 (info->op2.val.d / dscl); 1178 break; 1179 1180 default: 1181 return; 1182 } 1183 } else { 1184 info->op1.type = fex_float; 1185 info->op1.val.f = inst->op1->f[0]; 1186 info->op2.type = fex_float; 1187 info->op2.val.f = inst->op2->f[0]; 1188 info->res.type = fex_float; 1189 switch (inst->op) { 1190 case addss: 1191 info->res.val.f = fscl * (fscl * 1192 info->op1.val.f + fscl * info->op2.val.f); 1193 break; 1194 1195 case subss: 1196 info->res.val.f = fscl * (fscl * 1197 info->op1.val.f - fscl * info->op2.val.f); 1198 break; 1199 1200 case mulss: 1201 info->res.val.f = (fscl * info->op1.val.f) * 1202 (fscl * info->op2.val.f); 1203 break; 1204 1205 case divss: 1206 info->res.val.f = (fscl * info->op1.val.f) / 1207 (info->op2.val.f / fscl); 1208 break; 1209 1210 default: 1211 return; 1212 } 1213 } 1214 } 1215 1216 /* put the result in the destination */ 1217 stuff: 1218 if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si 1219 || inst->op == cvttsd2si || inst->op == cvtsd2si) { 1220 switch (info->res.type) { 1221 case fex_int: 1222 i = info->res.val.i; 1223 break; 1224 1225 case fex_llong: 1226 i = info->res.val.l; 1227 break; 1228 1229 case fex_float: 1230 i = info->res.val.f; 1231 break; 1232 1233 case fex_double: 1234 i = info->res.val.d; 1235 break; 1236 1237 case fex_ldouble: 1238 i = info->res.val.q; 1239 break; 1240 1241 default: 1242 break; 1243 } 1244 inst->op1->i[0] = i; 1245 } else if (inst->op == cmpsd || inst->op == cvttss2siq || 1246 inst->op == cvtss2siq || inst->op == cvttsd2siq || 1247 inst->op == cvtsd2siq) { 1248 switch (info->res.type) { 1249 case fex_int: 1250 l = info->res.val.i; 1251 break; 1252 1253 case fex_llong: 1254 l = info->res.val.l; 1255 break; 1256 1257 case fex_float: 1258 l = info->res.val.f; 1259 break; 1260 1261 case fex_double: 1262 l = info->res.val.d; 1263 break; 1264 1265 case fex_ldouble: 1266 l = info->res.val.q; 1267 break; 1268 1269 default: 1270 break; 1271 } 1272 inst->op1->l[0] = l; 1273 } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) || 1274 inst->op == cvtss2sd) { 1275 switch (info->res.type) { 1276 case fex_int: 1277 d = info->res.val.i; 1278 break; 1279 1280 case fex_llong: 1281 d = info->res.val.l; 1282 break; 1283 1284 case fex_float: 1285 d = info->res.val.f; 1286 break; 1287 1288 case fex_double: 1289 d = info->res.val.d; 1290 break; 1291 1292 case fex_ldouble: 1293 d = info->res.val.q; 1294 break; 1295 1296 default: 1297 break; 1298 } 1299 inst->op1->d[0] = d; 1300 } else { 1301 switch (info->res.type) { 1302 case fex_int: 1303 f = info->res.val.i; 1304 break; 1305 1306 case fex_llong: 1307 f = info->res.val.l; 1308 break; 1309 1310 case fex_float: 1311 f = info->res.val.f; 1312 break; 1313 1314 case fex_double: 1315 f = info->res.val.d; 1316 break; 1317 1318 case fex_ldouble: 1319 f = info->res.val.q; 1320 break; 1321 1322 default: 1323 break; 1324 } 1325 inst->op1->f[0] = f; 1326 } 1327 } 1328 1329 /* 1330 * Store the results from a SIMD instruction. For each i, store 1331 * the result value from info[i] in the i-th part of the destination 1332 * of the SIMD SSE instruction specified by *inst. If no result 1333 * is given but the exception indicated by e[i] is underflow or 1334 * overflow, supply the default trapped result. 1335 * 1336 * This routine does not work if the instruction specified by *inst 1337 * is not a SIMD instruction. 1338 */ 1339 void 1340 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e, 1341 fex_info_t *info) 1342 { 1343 sseinst_t dummy; 1344 int i; 1345 1346 /* store each part */ 1347 switch (inst->op) { 1348 case cmpps: 1349 dummy.op = cmpss; 1350 dummy.imm = inst->imm; 1351 for (i = 0; i < 4; i++) { 1352 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1353 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1354 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1355 } 1356 break; 1357 1358 case minps: 1359 dummy.op = minss; 1360 for (i = 0; i < 4; i++) { 1361 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1362 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1363 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1364 } 1365 break; 1366 1367 case maxps: 1368 dummy.op = maxss; 1369 for (i = 0; i < 4; i++) { 1370 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1371 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1372 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1373 } 1374 break; 1375 1376 case addps: 1377 dummy.op = addss; 1378 for (i = 0; i < 4; i++) { 1379 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1380 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1381 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1382 } 1383 break; 1384 1385 case subps: 1386 dummy.op = subss; 1387 for (i = 0; i < 4; i++) { 1388 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1389 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1390 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1391 } 1392 break; 1393 1394 case mulps: 1395 dummy.op = mulss; 1396 for (i = 0; i < 4; i++) { 1397 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1398 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1399 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1400 } 1401 break; 1402 1403 case divps: 1404 dummy.op = divss; 1405 for (i = 0; i < 4; i++) { 1406 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1407 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1408 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1409 } 1410 break; 1411 1412 case sqrtps: 1413 dummy.op = sqrtss; 1414 for (i = 0; i < 4; i++) { 1415 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1416 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1417 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1418 } 1419 break; 1420 1421 case cvtdq2ps: 1422 dummy.op = cvtsi2ss; 1423 for (i = 0; i < 4; i++) { 1424 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1425 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 1426 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1427 } 1428 break; 1429 1430 case cvttps2dq: 1431 dummy.op = cvttss2si; 1432 for (i = 0; i < 4; i++) { 1433 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1434 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1435 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1436 } 1437 break; 1438 1439 case cvtps2dq: 1440 dummy.op = cvtss2si; 1441 for (i = 0; i < 4; i++) { 1442 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1443 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1444 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1445 } 1446 break; 1447 1448 case cvtpi2ps: 1449 dummy.op = cvtsi2ss; 1450 for (i = 0; i < 2; i++) { 1451 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1452 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 1453 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1454 } 1455 break; 1456 1457 case cvttps2pi: 1458 dummy.op = cvttss2si; 1459 for (i = 0; i < 2; i++) { 1460 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1461 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1462 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1463 } 1464 break; 1465 1466 case cvtps2pi: 1467 dummy.op = cvtss2si; 1468 for (i = 0; i < 2; i++) { 1469 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1470 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1471 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1472 } 1473 break; 1474 1475 case cmppd: 1476 dummy.op = cmpsd; 1477 dummy.imm = inst->imm; 1478 for (i = 0; i < 2; i++) { 1479 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1480 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1481 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1482 } 1483 break; 1484 1485 case minpd: 1486 dummy.op = minsd; 1487 for (i = 0; i < 2; i++) { 1488 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1489 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1490 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1491 } 1492 break; 1493 1494 case maxpd: 1495 dummy.op = maxsd; 1496 for (i = 0; i < 2; i++) { 1497 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1498 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1499 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1500 } 1501 break; 1502 1503 case addpd: 1504 dummy.op = addsd; 1505 for (i = 0; i < 2; i++) { 1506 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1507 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1508 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1509 } 1510 break; 1511 1512 case subpd: 1513 dummy.op = subsd; 1514 for (i = 0; i < 2; i++) { 1515 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1516 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1517 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1518 } 1519 break; 1520 1521 case mulpd: 1522 dummy.op = mulsd; 1523 for (i = 0; i < 2; i++) { 1524 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1525 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1526 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1527 } 1528 break; 1529 1530 case divpd: 1531 dummy.op = divsd; 1532 for (i = 0; i < 2; i++) { 1533 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1534 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1535 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1536 } 1537 break; 1538 1539 case sqrtpd: 1540 dummy.op = sqrtsd; 1541 for (i = 0; i < 2; i++) { 1542 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1543 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1544 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1545 } 1546 break; 1547 1548 case cvtpi2pd: 1549 case cvtdq2pd: 1550 dummy.op = cvtsi2sd; 1551 for (i = 0; i < 2; i++) { 1552 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1553 dummy.op2 = (sseoperand_t *)&inst->op2->i[i]; 1554 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1555 } 1556 break; 1557 1558 case cvttpd2pi: 1559 case cvttpd2dq: 1560 dummy.op = cvttsd2si; 1561 for (i = 0; i < 2; i++) { 1562 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1563 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1564 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1565 } 1566 /* for cvttpd2dq, zero the high 64 bits of the destination */ 1567 if (inst->op == cvttpd2dq) 1568 inst->op1->l[1] = 0ll; 1569 break; 1570 1571 case cvtpd2pi: 1572 case cvtpd2dq: 1573 dummy.op = cvtsd2si; 1574 for (i = 0; i < 2; i++) { 1575 dummy.op1 = (sseoperand_t *)&inst->op1->i[i]; 1576 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1577 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1578 } 1579 /* for cvtpd2dq, zero the high 64 bits of the destination */ 1580 if (inst->op == cvtpd2dq) 1581 inst->op1->l[1] = 0ll; 1582 break; 1583 1584 case cvtps2pd: 1585 dummy.op = cvtss2sd; 1586 for (i = 0; i < 2; i++) { 1587 dummy.op1 = (sseoperand_t *)&inst->op1->d[i]; 1588 dummy.op2 = (sseoperand_t *)&inst->op2->f[i]; 1589 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1590 } 1591 break; 1592 1593 case cvtpd2ps: 1594 dummy.op = cvtsd2ss; 1595 for (i = 0; i < 2; i++) { 1596 dummy.op1 = (sseoperand_t *)&inst->op1->f[i]; 1597 dummy.op2 = (sseoperand_t *)&inst->op2->d[i]; 1598 __fex_st_sse_result(uap, &dummy, e[i], &info[i]); 1599 } 1600 /* zero the high 64 bits of the destination */ 1601 inst->op1->l[1] = 0ll; 1602 1603 default: 1604 break; 1605 } 1606 } 1607