1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  28  * Use is subject to license terms.
  29  */
  30 
  31 #include <ucontext.h>
  32 #include <fenv.h>
  33 #if defined(__SUNPRO_C)
  34 #include <sunmath.h>
  35 #else
  36 #include <sys/ieeefp.h>
  37 #endif
  38 #include "fex_handler.h"
  39 #include "fenv_inlines.h"
  40 
  41 #if !defined(REG_PC)
  42 #define REG_PC          EIP
  43 #endif
  44 
  45 #if !defined(REG_PS)
  46 #define REG_PS          EFL
  47 #endif
  48 
  49 #ifdef __amd64
  50 #define regno(X)        ((X < 4) ? REG_RAX - X : ((X > 4) ? REG_RAX + 1 - X : \
  51         REG_RSP))
  52 #else
  53 #define regno(X)        (EAX - X)
  54 #endif
  55 
  56 /*
  57  * Support for SSE instructions
  58  */
  59 
  60 /*
  61  * Decode an SSE instruction.  Fill in *inst and return the length of the
  62  * instruction in bytes.  Return 0 if the instruction is not recognized.
  63  */
  64 int
  65 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst)
  66 {
  67         unsigned char *ip;
  68         char *addr;
  69         int i, dbl, simd, rex, modrm, sib, r;
  70 
  71         i = 0;
  72         ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC];
  73 
  74         /* look for pseudo-prefixes */
  75         dbl = 0;
  76         simd = SIMD;
  77 
  78         if (ip[i] == 0xF3) {
  79                 simd = 0;
  80                 i++;
  81         } else if (ip[i] == 0x66) {
  82                 dbl = DOUBLE;
  83                 i++;
  84         } else if (ip[i] == 0xF2) {
  85                 dbl = DOUBLE;
  86                 simd = 0;
  87                 i++;
  88         }
  89 
  90         /* look for AMD64 REX prefix */
  91         rex = 0;
  92 
  93         if (ip[i] >= 0x40 && ip[i] <= 0x4F) {
  94                 rex = ip[i];
  95                 i++;
  96         }
  97 
  98         /* parse opcode */
  99         if (ip[i++] != 0x0F)
 100                 return (0);
 101 
 102         switch (ip[i++]) {
 103         case 0x2A:
 104                 inst->op = (int)cvtsi2ss + simd + dbl;
 105 
 106                 if (!simd)
 107                         inst->op = (int)inst->op + (rex & 8);
 108 
 109                 break;
 110 
 111         case 0x2C:
 112                 inst->op = (int)cvttss2si + simd + dbl;
 113 
 114                 if (!simd)
 115                         inst->op = (int)inst->op + (rex & 8);
 116 
 117                 break;
 118 
 119         case 0x2D:
 120                 inst->op = (int)cvtss2si + simd + dbl;
 121 
 122                 if (!simd)
 123                         inst->op = (int)inst->op + (rex & 8);
 124 
 125                 break;
 126 
 127         case 0x2E:
 128 
 129                 /* oddball: scalar instruction in a SIMD opcode group */
 130                 if (!simd)
 131                         return (0);
 132 
 133                 inst->op = (int)ucomiss + dbl;
 134                 break;
 135 
 136         case 0x2F:
 137 
 138                 /* oddball: scalar instruction in a SIMD opcode group */
 139                 if (!simd)
 140                         return (0);
 141 
 142                 inst->op = (int)comiss + dbl;
 143                 break;
 144 
 145         case 0x51:
 146                 inst->op = (int)sqrtss + simd + dbl;
 147                 break;
 148 
 149         case 0x58:
 150                 inst->op = (int)addss + simd + dbl;
 151                 break;
 152 
 153         case 0x59:
 154                 inst->op = (int)mulss + simd + dbl;
 155                 break;
 156 
 157         case 0x5A:
 158                 inst->op = (int)cvtss2sd + simd + dbl;
 159                 break;
 160 
 161         case 0x5B:
 162 
 163                 if (dbl) {
 164                         if (simd)
 165                                 inst->op = cvtps2dq;
 166                         else
 167                                 return (0);
 168                 } else {
 169                         inst->op = (simd) ? cvtdq2ps : cvttps2dq;
 170                 }
 171 
 172                 break;
 173 
 174         case 0x5C:
 175                 inst->op = (int)subss + simd + dbl;
 176                 break;
 177 
 178         case 0x5D:
 179                 inst->op = (int)minss + simd + dbl;
 180                 break;
 181 
 182         case 0x5E:
 183                 inst->op = (int)divss + simd + dbl;
 184                 break;
 185 
 186         case 0x5F:
 187                 inst->op = (int)maxss + simd + dbl;
 188                 break;
 189 
 190         case 0xC2:
 191                 inst->op = (int)cmpss + simd + dbl;
 192                 break;
 193 
 194         case 0xE6:
 195 
 196                 if (simd) {
 197                         if (dbl)
 198                                 inst->op = cvttpd2dq;
 199                         else
 200                                 return (0);
 201                 } else {
 202                         inst->op = (dbl) ? cvtpd2dq : cvtdq2pd;
 203                 }
 204 
 205                 break;
 206 
 207         default:
 208                 return (0);
 209         }
 210 
 211         /* locate operands */
 212         modrm = ip[i++];
 213 
 214         if (inst->op == cvtss2si || inst->op == cvttss2si || inst->op ==
 215             cvtsd2si || inst->op == cvttsd2si || inst->op == cvtss2siq ||
 216             inst->op == cvttss2siq || inst->op == cvtsd2siq || inst->op ==
 217             cvttsd2siq) {
 218                 /* op1 is a gp register */
 219                 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
 220                 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
 221         } else if (inst->op == cvtps2pi || inst->op == cvttps2pi || inst->op ==
 222             cvtpd2pi || inst->op == cvttpd2pi) {
 223                 /* op1 is a mmx register */
 224 #ifdef __amd64
 225                 inst->op1 = (sseoperand_t *)
 226                     &uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state
 227                     .st[(modrm >> 3) & 7];
 228 #else
 229                 inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) +
 230                     (char *)&uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state
 231                     .state[7]);
 232 #endif
 233         } else {
 234                 /* op1 is a xmm register */
 235                 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
 236                 inst->op1 =
 237                     (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set
 238                     .fpchip_state.xmm[r];
 239         }
 240 
 241         if ((modrm >> 6) == 3) {
 242                 if (inst->op == cvtsi2ss || inst->op == cvtsi2sd || inst->op ==
 243                     cvtsi2ssq || inst->op == cvtsi2sdq) {
 244                         /* op2 is a gp register */
 245                         r = ((rex & 1) << 3) | (modrm & 7);
 246                         inst->op2 =
 247                             (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
 248                 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) {
 249                         /* op2 is a mmx register */
 250 #ifdef __amd64
 251                         inst->op2 =
 252                             (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set
 253                             .fpchip_state.st[modrm & 7];
 254 #else
 255                         inst->op2 = (sseoperand_t *)(10 * (modrm & 7) +
 256                             (char *)&uap->uc_mcontext.fpregs.fp_reg_set
 257                             .fpchip_state.state[7]);
 258 #endif
 259                 } else {
 260                         /* op2 is a xmm register */
 261                         r = ((rex & 1) << 3) | (modrm & 7);
 262                         inst->op2 =
 263                             (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set
 264                             .fpchip_state.xmm[r];
 265                 }
 266         } else if ((modrm & 0xc7) == 0x05) {
 267 #ifdef __amd64
 268                 /* address of next instruction + offset */
 269                 r = i + 4;
 270 
 271                 if (inst->op == cmpss || inst->op == cmpps || inst->op ==
 272                     cmpsd || inst->op == cmppd)
 273                         r++;
 274 
 275                 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i));
 276 #else
 277                 /* absolute address */
 278                 inst->op2 = (sseoperand_t *)(*(int *)(ip + i));
 279 #endif
 280                 i += 4;
 281         } else {
 282                 /* complex address */
 283                 if ((modrm & 7) == 4) {
 284                         /* parse sib byte */
 285                         sib = ip[i++];
 286 
 287                         if ((sib & 7) == 5 && (modrm >> 6) == 0) {
 288                                 /* start with absolute address */
 289                                 addr = (char *)(uintptr_t)(*(int *)(ip + i));
 290                                 i += 4;
 291                         } else {
 292                                 /* start with base */
 293                                 r = ((rex & 1) << 3) | (sib & 7);
 294                                 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
 295                         }
 296 
 297                         r = ((rex & 2) << 2) | ((sib >> 3) & 7);
 298 
 299                         if (r != 4) {
 300                                 /* add scaled index */
 301                                 addr += uap->uc_mcontext.gregs[regno(r)] <<
 302                                     (sib >> 6);
 303                         }
 304                 } else {
 305                         r = ((rex & 1) << 3) | (modrm & 7);
 306                         addr = (char *)uap->uc_mcontext.gregs[regno(r)];
 307                 }
 308 
 309                 /* add displacement, if any */
 310                 if ((modrm >> 6) == 1) {
 311                         addr += (char)ip[i++];
 312                 } else if ((modrm >> 6) == 2) {
 313                         addr += *(int *)(ip + i);
 314                         i += 4;
 315                 }
 316 
 317                 inst->op2 = (sseoperand_t *)addr;
 318         }
 319 
 320         if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd ||
 321             inst->op == cmppd) {
 322                 /* get the immediate operand */
 323                 inst->imm = ip[i++];
 324         }
 325 
 326         return (i);
 327 }
 328 
 329 static enum fp_class_type
 330 my_fp_classf(float *x)
 331 {
 332         int i = *(int *)x & ~0x80000000;
 333 
 334         if (i < 0x7f800000) {
 335                 if (i < 0x00800000)
 336                         return ((i == 0) ? fp_zero : fp_subnormal);
 337 
 338                 return (fp_normal);
 339         } else if (i == 0x7f800000) {
 340                 return (fp_infinity);
 341         } else if (i & 0x400000) {
 342                 return (fp_quiet);
 343         } else {
 344                 return (fp_signaling);
 345         }
 346 }
 347 
 348 static enum fp_class_type
 349 my_fp_class(double *x)
 350 {
 351         int i = *(1 + (int *)x) & ~0x80000000;
 352 
 353         if (i < 0x7ff00000) {
 354                 if (i < 0x00100000)
 355                         return (((i | *(int *)x) == 0) ? fp_zero :
 356                             fp_subnormal);
 357 
 358                 return (fp_normal);
 359         } else if (i == 0x7ff00000 && *(int *)x == 0) {
 360                 return (fp_infinity);
 361         } else if (i & 0x80000) {
 362                 return (fp_quiet);
 363         } else {
 364                 return (fp_signaling);
 365         }
 366 }
 367 
 368 /*
 369  * Inspect a scalar SSE instruction that incurred an invalid operation
 370  * exception to determine which type of exception it was.
 371  */
 372 static enum fex_exception
 373 __fex_get_sse_invalid_type(sseinst_t *inst)
 374 {
 375         enum fp_class_type t1, t2;
 376 
 377         /* check op2 for signaling nan */
 378         t2 = ((int)inst->op & DOUBLE) ? my_fp_class(&inst->op2->d[0]) :
 379             my_fp_classf(&inst->op2->f[0]);
 380 
 381         if (t2 == fp_signaling)
 382                 return (fex_inv_snan);
 383 
 384         /* eliminate all single-operand instructions */
 385         switch (inst->op) {
 386         case cvtsd2ss:
 387         case cvtss2sd:
 388                 /* hmm, this shouldn't have happened */
 389                 return ((enum fex_exception)-1);
 390 
 391         case sqrtss:
 392         case sqrtsd:
 393                 return (fex_inv_sqrt);
 394 
 395         case cvtss2si:
 396         case cvtsd2si:
 397         case cvttss2si:
 398         case cvttsd2si:
 399         case cvtss2siq:
 400         case cvtsd2siq:
 401         case cvttss2siq:
 402         case cvttsd2siq:
 403                 return (fex_inv_int);
 404         default:
 405                 break;
 406         }
 407 
 408         /* check op1 for signaling nan */
 409         t1 = ((int)inst->op & DOUBLE) ? my_fp_class(&inst->op1->d[0]) :
 410             my_fp_classf(&inst->op1->f[0]);
 411 
 412         if (t1 == fp_signaling)
 413                 return (fex_inv_snan);
 414 
 415         /* check two-operand instructions for other cases */
 416         switch (inst->op) {
 417         case cmpss:
 418         case cmpsd:
 419         case minss:
 420         case minsd:
 421         case maxss:
 422         case maxsd:
 423         case comiss:
 424         case comisd:
 425                 return (fex_inv_cmp);
 426 
 427         case addss:
 428         case addsd:
 429         case subss:
 430         case subsd:
 431 
 432                 if (t1 == fp_infinity && t2 == fp_infinity)
 433                         return (fex_inv_isi);
 434 
 435                 break;
 436 
 437         case mulss:
 438         case mulsd:
 439 
 440                 if ((t1 == fp_zero && t2 == fp_infinity) || (t2 == fp_zero &&
 441                     t1 == fp_infinity))
 442                         return (fex_inv_zmi);
 443 
 444                 break;
 445 
 446         case divss:
 447         case divsd:
 448 
 449                 if (t1 == fp_zero && t2 == fp_zero)
 450                         return (fex_inv_zdz);
 451 
 452                 if (t1 == fp_infinity && t2 == fp_infinity)
 453                         return (fex_inv_idi);
 454 
 455         default:
 456                 break;
 457         }
 458 
 459         return ((enum fex_exception)-1);
 460 }
 461 
 462 /* inline templates */
 463 extern void sse_cmpeqss(float *, float *, int *);
 464 extern void sse_cmpltss(float *, float *, int *);
 465 extern void sse_cmpless(float *, float *, int *);
 466 extern void sse_cmpunordss(float *, float *, int *);
 467 extern void sse_minss(float *, float *, float *);
 468 extern void sse_maxss(float *, float *, float *);
 469 extern void sse_addss(float *, float *, float *);
 470 extern void sse_subss(float *, float *, float *);
 471 extern void sse_mulss(float *, float *, float *);
 472 extern void sse_divss(float *, float *, float *);
 473 extern void sse_sqrtss(float *, float *);
 474 extern void sse_ucomiss(float *, float *);
 475 extern void sse_comiss(float *, float *);
 476 extern void sse_cvtss2sd(float *, double *);
 477 extern void sse_cvtsi2ss(int *, float *);
 478 extern void sse_cvttss2si(float *, int *);
 479 extern void sse_cvtss2si(float *, int *);
 480 
 481 #ifdef __amd64
 482 extern void sse_cvtsi2ssq(long long *, float *);
 483 extern void sse_cvttss2siq(float *, long long *);
 484 extern void sse_cvtss2siq(float *, long long *);
 485 #endif
 486 
 487 extern void sse_cmpeqsd(double *, double *, long long *);
 488 extern void sse_cmpltsd(double *, double *, long long *);
 489 extern void sse_cmplesd(double *, double *, long long *);
 490 extern void sse_cmpunordsd(double *, double *, long long *);
 491 extern void sse_minsd(double *, double *, double *);
 492 extern void sse_maxsd(double *, double *, double *);
 493 extern void sse_addsd(double *, double *, double *);
 494 extern void sse_subsd(double *, double *, double *);
 495 extern void sse_mulsd(double *, double *, double *);
 496 extern void sse_divsd(double *, double *, double *);
 497 extern void sse_sqrtsd(double *, double *);
 498 extern void sse_ucomisd(double *, double *);
 499 extern void sse_comisd(double *, double *);
 500 extern void sse_cvtsd2ss(double *, float *);
 501 extern void sse_cvtsi2sd(int *, double *);
 502 extern void sse_cvttsd2si(double *, int *);
 503 extern void sse_cvtsd2si(double *, int *);
 504 
 505 #ifdef __amd64
 506 extern void sse_cvtsi2sdq(long long *, double *);
 507 extern void sse_cvttsd2siq(double *, long long *);
 508 extern void sse_cvtsd2siq(double *, long long *);
 509 #endif
 510 
 511 /*
 512  * Fill in *info with the operands, default untrapped result, and
 513  * flags produced by a scalar SSE instruction, and return the type
 514  * of trapped exception (if any).  On entry, the mxcsr must have
 515  * all exceptions masked and all flags clear.  The same conditions
 516  * will hold on exit.
 517  *
 518  * This routine does not work if the instruction specified by *inst
 519  * is not a scalar instruction.
 520  */
 521 enum fex_exception
 522 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info)
 523 {
 524         unsigned int e, te, mxcsr, oldmxcsr, subnorm;
 525 
 526         /*
 527          * Perform the operation with traps disabled and check the
 528          * exception flags.  If the underflow trap was enabled, also
 529          * check for an exact subnormal result.
 530          */
 531         __fenv_getmxcsr(&oldmxcsr);
 532         subnorm = 0;
 533 
 534         if ((int)inst->op & DOUBLE) {
 535                 if (inst->op == cvtsi2sd) {
 536                         info->op1.type = fex_int;
 537                         info->op1.val.i = inst->op2->i[0];
 538                         info->op2.type = fex_nodata;
 539                 } else if (inst->op == cvtsi2sdq) {
 540                         info->op1.type = fex_llong;
 541                         info->op1.val.l = inst->op2->l[0];
 542                         info->op2.type = fex_nodata;
 543                 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss ||
 544                     inst->op == cvttsd2si || inst->op == cvtsd2si || inst->op ==
 545                     cvttsd2siq || inst->op == cvtsd2siq) {
 546                         info->op1.type = fex_double;
 547                         info->op1.val.d = inst->op2->d[0];
 548                         info->op2.type = fex_nodata;
 549                 } else {
 550                         info->op1.type = fex_double;
 551                         info->op1.val.d = inst->op1->d[0];
 552                         info->op2.type = fex_double;
 553                         info->op2.val.d = inst->op2->d[0];
 554                 }
 555 
 556                 info->res.type = fex_double;
 557 
 558                 switch (inst->op) {
 559                 case cmpsd:
 560                         info->op = fex_cmp;
 561                         info->res.type = fex_llong;
 562 
 563                         switch (inst->imm & 3) {
 564                         case 0:
 565                                 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d,
 566                                     &info->res.val.l);
 567                                 break;
 568 
 569                         case 1:
 570                                 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d,
 571                                     &info->res.val.l);
 572                                 break;
 573 
 574                         case 2:
 575                                 sse_cmplesd(&info->op1.val.d, &info->op2.val.d,
 576                                     &info->res.val.l);
 577                                 break;
 578 
 579                         case 3:
 580                                 sse_cmpunordsd(&info->op1.val.d,
 581                                     &info->op2.val.d, &info->res.val.l);
 582                         }
 583 
 584                         if (inst->imm & 4)
 585                                 info->res.val.l ^= 0xffffffffffffffffull;
 586 
 587                         break;
 588 
 589                 case minsd:
 590                         info->op = fex_other;
 591                         sse_minsd(&info->op1.val.d, &info->op2.val.d,
 592                             &info->res.val.d);
 593                         break;
 594 
 595                 case maxsd:
 596                         info->op = fex_other;
 597                         sse_maxsd(&info->op1.val.d, &info->op2.val.d,
 598                             &info->res.val.d);
 599                         break;
 600 
 601                 case addsd:
 602                         info->op = fex_add;
 603                         sse_addsd(&info->op1.val.d, &info->op2.val.d,
 604                             &info->res.val.d);
 605 
 606                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 607                                 subnorm = 1;
 608 
 609                         break;
 610 
 611                 case subsd:
 612                         info->op = fex_sub;
 613                         sse_subsd(&info->op1.val.d, &info->op2.val.d,
 614                             &info->res.val.d);
 615 
 616                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 617                                 subnorm = 1;
 618 
 619                         break;
 620 
 621                 case mulsd:
 622                         info->op = fex_mul;
 623                         sse_mulsd(&info->op1.val.d, &info->op2.val.d,
 624                             &info->res.val.d);
 625 
 626                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 627                                 subnorm = 1;
 628 
 629                         break;
 630 
 631                 case divsd:
 632                         info->op = fex_div;
 633                         sse_divsd(&info->op1.val.d, &info->op2.val.d,
 634                             &info->res.val.d);
 635 
 636                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 637                                 subnorm = 1;
 638 
 639                         break;
 640 
 641                 case sqrtsd:
 642                         info->op = fex_sqrt;
 643                         sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
 644                         break;
 645 
 646                 case cvtsd2ss:
 647                         info->op = fex_cnvt;
 648                         info->res.type = fex_float;
 649                         sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f);
 650 
 651                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 652                                 subnorm = 1;
 653 
 654                         break;
 655 
 656                 case cvtsi2sd:
 657                         info->op = fex_cnvt;
 658                         sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
 659                         break;
 660 
 661                 case cvttsd2si:
 662                         info->op = fex_cnvt;
 663                         info->res.type = fex_int;
 664                         sse_cvttsd2si(&info->op1.val.d, &info->res.val.i);
 665                         break;
 666 
 667                 case cvtsd2si:
 668                         info->op = fex_cnvt;
 669                         info->res.type = fex_int;
 670                         sse_cvtsd2si(&info->op1.val.d, &info->res.val.i);
 671                         break;
 672 
 673 #ifdef __amd64
 674                 case cvtsi2sdq:
 675                         info->op = fex_cnvt;
 676                         sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d);
 677                         break;
 678 
 679                 case cvttsd2siq:
 680                         info->op = fex_cnvt;
 681                         info->res.type = fex_llong;
 682                         sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l);
 683                         break;
 684 
 685                 case cvtsd2siq:
 686                         info->op = fex_cnvt;
 687                         info->res.type = fex_llong;
 688                         sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l);
 689                         break;
 690 #endif
 691 
 692                 case ucomisd:
 693                         info->op = fex_cmp;
 694                         info->res.type = fex_nodata;
 695                         sse_ucomisd(&info->op1.val.d, &info->op2.val.d);
 696                         break;
 697 
 698                 case comisd:
 699                         info->op = fex_cmp;
 700                         info->res.type = fex_nodata;
 701                         sse_comisd(&info->op1.val.d, &info->op2.val.d);
 702                         break;
 703                 default:
 704                         break;
 705                 }
 706         } else {
 707                 if (inst->op == cvtsi2ss) {
 708                         info->op1.type = fex_int;
 709                         info->op1.val.i = inst->op2->i[0];
 710                         info->op2.type = fex_nodata;
 711                 } else if (inst->op == cvtsi2ssq) {
 712                         info->op1.type = fex_llong;
 713                         info->op1.val.l = inst->op2->l[0];
 714                         info->op2.type = fex_nodata;
 715                 } else if (inst->op == sqrtss || inst->op == cvtss2sd ||
 716                     inst->op == cvttss2si || inst->op == cvtss2si || inst->op ==
 717                     cvttss2siq || inst->op == cvtss2siq) {
 718                         info->op1.type = fex_float;
 719                         info->op1.val.f = inst->op2->f[0];
 720                         info->op2.type = fex_nodata;
 721                 } else {
 722                         info->op1.type = fex_float;
 723                         info->op1.val.f = inst->op1->f[0];
 724                         info->op2.type = fex_float;
 725                         info->op2.val.f = inst->op2->f[0];
 726                 }
 727 
 728                 info->res.type = fex_float;
 729 
 730                 switch (inst->op) {
 731                 case cmpss:
 732                         info->op = fex_cmp;
 733                         info->res.type = fex_int;
 734 
 735                         switch (inst->imm & 3) {
 736                         case 0:
 737                                 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f,
 738                                     &info->res.val.i);
 739                                 break;
 740 
 741                         case 1:
 742                                 sse_cmpltss(&info->op1.val.f, &info->op2.val.f,
 743                                     &info->res.val.i);
 744                                 break;
 745 
 746                         case 2:
 747                                 sse_cmpless(&info->op1.val.f, &info->op2.val.f,
 748                                     &info->res.val.i);
 749                                 break;
 750 
 751                         case 3:
 752                                 sse_cmpunordss(&info->op1.val.f,
 753                                     &info->op2.val.f, &info->res.val.i);
 754                         }
 755 
 756                         if (inst->imm & 4)
 757                                 info->res.val.i ^= 0xffffffffu;
 758 
 759                         break;
 760 
 761                 case minss:
 762                         info->op = fex_other;
 763                         sse_minss(&info->op1.val.f, &info->op2.val.f,
 764                             &info->res.val.f);
 765                         break;
 766 
 767                 case maxss:
 768                         info->op = fex_other;
 769                         sse_maxss(&info->op1.val.f, &info->op2.val.f,
 770                             &info->res.val.f);
 771                         break;
 772 
 773                 case addss:
 774                         info->op = fex_add;
 775                         sse_addss(&info->op1.val.f, &info->op2.val.f,
 776                             &info->res.val.f);
 777 
 778                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 779                                 subnorm = 1;
 780 
 781                         break;
 782 
 783                 case subss:
 784                         info->op = fex_sub;
 785                         sse_subss(&info->op1.val.f, &info->op2.val.f,
 786                             &info->res.val.f);
 787 
 788                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 789                                 subnorm = 1;
 790 
 791                         break;
 792 
 793                 case mulss:
 794                         info->op = fex_mul;
 795                         sse_mulss(&info->op1.val.f, &info->op2.val.f,
 796                             &info->res.val.f);
 797 
 798                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 799                                 subnorm = 1;
 800 
 801                         break;
 802 
 803                 case divss:
 804                         info->op = fex_div;
 805                         sse_divss(&info->op1.val.f, &info->op2.val.f,
 806                             &info->res.val.f);
 807 
 808                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 809                                 subnorm = 1;
 810 
 811                         break;
 812 
 813                 case sqrtss:
 814                         info->op = fex_sqrt;
 815                         sse_sqrtss(&info->op1.val.f, &info->res.val.f);
 816                         break;
 817 
 818                 case cvtss2sd:
 819                         info->op = fex_cnvt;
 820                         info->res.type = fex_double;
 821                         sse_cvtss2sd(&info->op1.val.f, &info->res.val.d);
 822                         break;
 823 
 824                 case cvtsi2ss:
 825                         info->op = fex_cnvt;
 826                         sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f);
 827                         break;
 828 
 829                 case cvttss2si:
 830                         info->op = fex_cnvt;
 831                         info->res.type = fex_int;
 832                         sse_cvttss2si(&info->op1.val.f, &info->res.val.i);
 833                         break;
 834 
 835                 case cvtss2si:
 836                         info->op = fex_cnvt;
 837                         info->res.type = fex_int;
 838                         sse_cvtss2si(&info->op1.val.f, &info->res.val.i);
 839                         break;
 840 
 841 #ifdef __amd64
 842                 case cvtsi2ssq:
 843                         info->op = fex_cnvt;
 844                         sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f);
 845                         break;
 846 
 847                 case cvttss2siq:
 848                         info->op = fex_cnvt;
 849                         info->res.type = fex_llong;
 850                         sse_cvttss2siq(&info->op1.val.f, &info->res.val.l);
 851                         break;
 852 
 853                 case cvtss2siq:
 854                         info->op = fex_cnvt;
 855                         info->res.type = fex_llong;
 856                         sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
 857                         break;
 858 #endif
 859 
 860                 case ucomiss:
 861                         info->op = fex_cmp;
 862                         info->res.type = fex_nodata;
 863                         sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
 864                         break;
 865 
 866                 case comiss:
 867                         info->op = fex_cmp;
 868                         info->res.type = fex_nodata;
 869                         sse_comiss(&info->op1.val.f, &info->op2.val.f);
 870                         break;
 871                 default:
 872                         break;
 873                 }
 874         }
 875 
 876         __fenv_getmxcsr(&mxcsr);
 877         info->flags = mxcsr & 0x3d;
 878         __fenv_setmxcsr(&oldmxcsr);
 879 
 880         /* determine which exception would have been trapped */
 881         te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr >> 7) &
 882             0x3d;
 883         e = mxcsr & te;
 884 
 885         if (e & FE_INVALID)
 886                 return (__fex_get_sse_invalid_type(inst));
 887 
 888         if (e & FE_DIVBYZERO)
 889                 return (fex_division);
 890 
 891         if (e & FE_OVERFLOW)
 892                 return (fex_overflow);
 893 
 894         if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
 895                 return (fex_underflow);
 896 
 897         if (e & FE_INEXACT)
 898                 return (fex_inexact);
 899 
 900         return ((enum fex_exception)-1);
 901 }
 902 
 903 /*
 904  * Emulate a SIMD SSE instruction to determine which exceptions occur
 905  * in each part.  For i = 0, 1, 2, and 3, set e[i] to indicate the
 906  * trapped exception that would occur if the i-th part of the SIMD
 907  * instruction were executed in isolation; set e[i] to -1 if no
 908  * trapped exception would occur in this part.  Also fill in info[i]
 909  * with the corresponding operands, default untrapped result, and
 910  * flags.
 911  *
 912  * This routine does not work if the instruction specified by *inst
 913  * is not a SIMD instruction.
 914  */
 915 void
 916 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
 917     fex_info_t *info)
 918 {
 919         sseinst_t dummy;
 920         int i;
 921 
 922         e[0] = e[1] = e[2] = e[3] = -1;
 923 
 924         /* perform each part of the SIMD operation */
 925         switch (inst->op) {
 926         case cmpps:
 927                 dummy.op = cmpss;
 928                 dummy.imm = inst->imm;
 929 
 930                 for (i = 0; i < 4; i++) {
 931                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 932                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 933                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 934                 }
 935 
 936                 break;
 937 
 938         case minps:
 939                 dummy.op = minss;
 940 
 941                 for (i = 0; i < 4; i++) {
 942                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 943                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 944                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 945                 }
 946 
 947                 break;
 948 
 949         case maxps:
 950                 dummy.op = maxss;
 951 
 952                 for (i = 0; i < 4; i++) {
 953                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 954                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 955                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 956                 }
 957 
 958                 break;
 959 
 960         case addps:
 961                 dummy.op = addss;
 962 
 963                 for (i = 0; i < 4; i++) {
 964                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 965                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 966                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 967                 }
 968 
 969                 break;
 970 
 971         case subps:
 972                 dummy.op = subss;
 973 
 974                 for (i = 0; i < 4; i++) {
 975                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 976                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 977                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 978                 }
 979 
 980                 break;
 981 
 982         case mulps:
 983                 dummy.op = mulss;
 984 
 985                 for (i = 0; i < 4; i++) {
 986                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 987                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 988                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 989                 }
 990 
 991                 break;
 992 
 993         case divps:
 994                 dummy.op = divss;
 995 
 996                 for (i = 0; i < 4; i++) {
 997                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 998                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 999                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1000                 }
1001 
1002                 break;
1003 
1004         case sqrtps:
1005                 dummy.op = sqrtss;
1006 
1007                 for (i = 0; i < 4; i++) {
1008                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1009                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1010                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1011                 }
1012 
1013                 break;
1014 
1015         case cvtdq2ps:
1016                 dummy.op = cvtsi2ss;
1017 
1018                 for (i = 0; i < 4; i++) {
1019                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1020                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1021                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1022                 }
1023 
1024                 break;
1025 
1026         case cvttps2dq:
1027                 dummy.op = cvttss2si;
1028 
1029                 for (i = 0; i < 4; i++) {
1030                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1031                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1032                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1033                 }
1034 
1035                 break;
1036 
1037         case cvtps2dq:
1038                 dummy.op = cvtss2si;
1039 
1040                 for (i = 0; i < 4; i++) {
1041                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1042                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1043                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1044                 }
1045 
1046                 break;
1047 
1048         case cvtpi2ps:
1049                 dummy.op = cvtsi2ss;
1050 
1051                 for (i = 0; i < 2; i++) {
1052                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1053                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1054                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1055                 }
1056 
1057                 break;
1058 
1059         case cvttps2pi:
1060                 dummy.op = cvttss2si;
1061 
1062                 for (i = 0; i < 2; i++) {
1063                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1064                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1065                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1066                 }
1067 
1068                 break;
1069 
1070         case cvtps2pi:
1071                 dummy.op = cvtss2si;
1072 
1073                 for (i = 0; i < 2; i++) {
1074                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1075                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1076                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1077                 }
1078 
1079                 break;
1080 
1081         case cmppd:
1082                 dummy.op = cmpsd;
1083                 dummy.imm = inst->imm;
1084 
1085                 for (i = 0; i < 2; i++) {
1086                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1087                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1088                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1089                 }
1090 
1091                 break;
1092 
1093         case minpd:
1094                 dummy.op = minsd;
1095 
1096                 for (i = 0; i < 2; i++) {
1097                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1098                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1099                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1100                 }
1101 
1102                 break;
1103 
1104         case maxpd:
1105                 dummy.op = maxsd;
1106 
1107                 for (i = 0; i < 2; i++) {
1108                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1109                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1110                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1111                 }
1112 
1113                 break;
1114 
1115         case addpd:
1116                 dummy.op = addsd;
1117 
1118                 for (i = 0; i < 2; i++) {
1119                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1120                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1121                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1122                 }
1123 
1124                 break;
1125 
1126         case subpd:
1127                 dummy.op = subsd;
1128 
1129                 for (i = 0; i < 2; i++) {
1130                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1131                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1132                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1133                 }
1134 
1135                 break;
1136 
1137         case mulpd:
1138                 dummy.op = mulsd;
1139 
1140                 for (i = 0; i < 2; i++) {
1141                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1142                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1143                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1144                 }
1145 
1146                 break;
1147 
1148         case divpd:
1149                 dummy.op = divsd;
1150 
1151                 for (i = 0; i < 2; i++) {
1152                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1153                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1154                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1155                 }
1156 
1157                 break;
1158 
1159         case sqrtpd:
1160                 dummy.op = sqrtsd;
1161 
1162                 for (i = 0; i < 2; i++) {
1163                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1164                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1165                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1166                 }
1167 
1168                 break;
1169 
1170         case cvtpi2pd:
1171         case cvtdq2pd:
1172                 dummy.op = cvtsi2sd;
1173 
1174                 for (i = 0; i < 2; i++) {
1175                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1176                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1177                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1178                 }
1179 
1180                 break;
1181 
1182         case cvttpd2pi:
1183         case cvttpd2dq:
1184                 dummy.op = cvttsd2si;
1185 
1186                 for (i = 0; i < 2; i++) {
1187                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1188                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1189                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1190                 }
1191 
1192                 break;
1193 
1194         case cvtpd2pi:
1195         case cvtpd2dq:
1196                 dummy.op = cvtsd2si;
1197 
1198                 for (i = 0; i < 2; i++) {
1199                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1200                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1201                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1202                 }
1203 
1204                 break;
1205 
1206         case cvtps2pd:
1207                 dummy.op = cvtss2sd;
1208 
1209                 for (i = 0; i < 2; i++) {
1210                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1211                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1212                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1213                 }
1214 
1215                 break;
1216 
1217         case cvtpd2ps:
1218                 dummy.op = cvtsd2ss;
1219 
1220                 for (i = 0; i < 2; i++) {
1221                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1222                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1223                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1224                 }
1225 
1226         default:
1227                 break;
1228         }
1229 }
1230 
1231 /*
1232  * Store the result value from *info in the destination of the scalar
1233  * SSE instruction specified by *inst.  If no result is given but the
1234  * exception is underflow or overflow, supply the default trapped result.
1235  *
1236  * This routine does not work if the instruction specified by *inst
1237  * is not a scalar instruction.
1238  */
1239 void
1240 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1241     fex_info_t *info)
1242 {
1243         int i = 0;
1244         long long l = 0L;
1245         float f = 0.0, fscl;
1246         double d = 0.0L, dscl;
1247 
1248         /*
1249          * for compares that write eflags, just set the flags
1250          * to indicate "unordered"
1251          */
1252         if (inst->op == ucomiss || inst->op == comiss || inst->op == ucomisd ||
1253             inst->op == comisd) {
1254                 uap->uc_mcontext.gregs[REG_PS] |= 0x45;
1255                 return;
1256         }
1257 
1258         /*
1259          * if info doesn't specify a result value, try to generate
1260          * the default trapped result
1261          */
1262         if (info->res.type == fex_nodata) {
1263                 /* set scale factors for exponent wrapping */
1264                 switch (e) {
1265                 case fex_overflow:
1266                         fscl = 1.262177448e-29f;                /* 2^-96 */
1267                         dscl = 6.441148769597133308e-232;       /* 2^-768 */
1268                         break;
1269 
1270                 case fex_underflow:
1271                         fscl = 7.922816251e+28f;                /* 2^96 */
1272                         dscl = 1.552518092300708935e+231;       /* 2^768 */
1273                         break;
1274 
1275                 default:
1276                         (void) __fex_get_sse_op(uap, inst, info);
1277 
1278                         if (info->res.type == fex_nodata)
1279                                 return;
1280 
1281                         goto stuff;
1282                 }
1283 
1284                 /* generate the wrapped result */
1285                 if (inst->op == cvtsd2ss) {
1286                         info->op1.type = fex_double;
1287                         info->op1.val.d = inst->op2->d[0];
1288                         info->op2.type = fex_nodata;
1289                         info->res.type = fex_float;
1290                         info->res.val.f = (float)(fscl * (fscl *
1291                             info->op1.val.d));
1292                 } else if ((int)inst->op & DOUBLE) {
1293                         info->op1.type = fex_double;
1294                         info->op1.val.d = inst->op1->d[0];
1295                         info->op2.type = fex_double;
1296                         info->op2.val.d = inst->op2->d[0];
1297                         info->res.type = fex_double;
1298 
1299                         switch (inst->op) {
1300                         case addsd:
1301                                 info->res.val.d = dscl * (dscl *
1302                                     info->op1.val.d + dscl * info->op2.val.d);
1303                                 break;
1304 
1305                         case subsd:
1306                                 info->res.val.d = dscl * (dscl *
1307                                     info->op1.val.d - dscl * info->op2.val.d);
1308                                 break;
1309 
1310                         case mulsd:
1311                                 info->res.val.d = (dscl * info->op1.val.d) *
1312                                     (dscl * info->op2.val.d);
1313                                 break;
1314 
1315                         case divsd:
1316                                 info->res.val.d = (dscl * info->op1.val.d) /
1317                                     (info->op2.val.d / dscl);
1318                                 break;
1319 
1320                         default:
1321                                 return;
1322                         }
1323                 } else {
1324                         info->op1.type = fex_float;
1325                         info->op1.val.f = inst->op1->f[0];
1326                         info->op2.type = fex_float;
1327                         info->op2.val.f = inst->op2->f[0];
1328                         info->res.type = fex_float;
1329 
1330                         switch (inst->op) {
1331                         case addss:
1332                                 info->res.val.f = fscl * (fscl *
1333                                     info->op1.val.f + fscl * info->op2.val.f);
1334                                 break;
1335 
1336                         case subss:
1337                                 info->res.val.f = fscl * (fscl *
1338                                     info->op1.val.f - fscl * info->op2.val.f);
1339                                 break;
1340 
1341                         case mulss:
1342                                 info->res.val.f = (fscl * info->op1.val.f) *
1343                                     (fscl * info->op2.val.f);
1344                                 break;
1345 
1346                         case divss:
1347                                 info->res.val.f = (fscl * info->op1.val.f) /
1348                                     (info->op2.val.f / fscl);
1349                                 break;
1350 
1351                         default:
1352                                 return;
1353                         }
1354                 }
1355         }
1356 
1357         /* put the result in the destination */
1358 stuff:
1359         if (inst->op == cmpss || inst->op == cvttss2si || inst->op ==
1360             cvtss2si || inst->op == cvttsd2si || inst->op == cvtsd2si) {
1361                 switch (info->res.type) {
1362                 case fex_int:
1363                         i = info->res.val.i;
1364                         break;
1365 
1366                 case fex_llong:
1367                         i = info->res.val.l;
1368                         break;
1369 
1370                 case fex_float:
1371                         i = info->res.val.f;
1372                         break;
1373 
1374                 case fex_double:
1375                         i = info->res.val.d;
1376                         break;
1377 
1378                 case fex_ldouble:
1379                         i = info->res.val.q;
1380                         break;
1381 
1382                 default:
1383                         break;
1384                 }
1385 
1386                 inst->op1->i[0] = i;
1387         } else if (inst->op == cmpsd || inst->op == cvttss2siq || inst->op ==
1388             cvtss2siq || inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
1389                 switch (info->res.type) {
1390                 case fex_int:
1391                         l = info->res.val.i;
1392                         break;
1393 
1394                 case fex_llong:
1395                         l = info->res.val.l;
1396                         break;
1397 
1398                 case fex_float:
1399                         l = info->res.val.f;
1400                         break;
1401 
1402                 case fex_double:
1403                         l = info->res.val.d;
1404                         break;
1405 
1406                 case fex_ldouble:
1407                         l = info->res.val.q;
1408                         break;
1409 
1410                 default:
1411                         break;
1412                 }
1413 
1414                 inst->op1->l[0] = l;
1415         } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1416             inst->op == cvtss2sd) {
1417                 switch (info->res.type) {
1418                 case fex_int:
1419                         d = info->res.val.i;
1420                         break;
1421 
1422                 case fex_llong:
1423                         d = info->res.val.l;
1424                         break;
1425 
1426                 case fex_float:
1427                         d = info->res.val.f;
1428                         break;
1429 
1430                 case fex_double:
1431                         d = info->res.val.d;
1432                         break;
1433 
1434                 case fex_ldouble:
1435                         d = info->res.val.q;
1436                         break;
1437 
1438                 default:
1439                         break;
1440                 }
1441 
1442                 inst->op1->d[0] = d;
1443         } else {
1444                 switch (info->res.type) {
1445                 case fex_int:
1446                         f = info->res.val.i;
1447                         break;
1448 
1449                 case fex_llong:
1450                         f = info->res.val.l;
1451                         break;
1452 
1453                 case fex_float:
1454                         f = info->res.val.f;
1455                         break;
1456 
1457                 case fex_double:
1458                         f = info->res.val.d;
1459                         break;
1460 
1461                 case fex_ldouble:
1462                         f = info->res.val.q;
1463                         break;
1464 
1465                 default:
1466                         break;
1467                 }
1468 
1469                 inst->op1->f[0] = f;
1470         }
1471 }
1472 
1473 /*
1474  * Store the results from a SIMD instruction.  For each i, store
1475  * the result value from info[i] in the i-th part of the destination
1476  * of the SIMD SSE instruction specified by *inst.  If no result
1477  * is given but the exception indicated by e[i] is underflow or
1478  * overflow, supply the default trapped result.
1479  *
1480  * This routine does not work if the instruction specified by *inst
1481  * is not a SIMD instruction.
1482  */
1483 void
1484 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1485     fex_info_t *info)
1486 {
1487         sseinst_t dummy;
1488         int i;
1489 
1490         /* store each part */
1491         switch (inst->op) {
1492         case cmpps:
1493                 dummy.op = cmpss;
1494                 dummy.imm = inst->imm;
1495 
1496                 for (i = 0; i < 4; i++) {
1497                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1498                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1499                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1500                 }
1501 
1502                 break;
1503 
1504         case minps:
1505                 dummy.op = minss;
1506 
1507                 for (i = 0; i < 4; i++) {
1508                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1509                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1510                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1511                 }
1512 
1513                 break;
1514 
1515         case maxps:
1516                 dummy.op = maxss;
1517 
1518                 for (i = 0; i < 4; i++) {
1519                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1520                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1521                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1522                 }
1523 
1524                 break;
1525 
1526         case addps:
1527                 dummy.op = addss;
1528 
1529                 for (i = 0; i < 4; i++) {
1530                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1531                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1532                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1533                 }
1534 
1535                 break;
1536 
1537         case subps:
1538                 dummy.op = subss;
1539 
1540                 for (i = 0; i < 4; i++) {
1541                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1542                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1543                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1544                 }
1545 
1546                 break;
1547 
1548         case mulps:
1549                 dummy.op = mulss;
1550 
1551                 for (i = 0; i < 4; i++) {
1552                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1553                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1554                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1555                 }
1556 
1557                 break;
1558 
1559         case divps:
1560                 dummy.op = divss;
1561 
1562                 for (i = 0; i < 4; i++) {
1563                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1564                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1565                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1566                 }
1567 
1568                 break;
1569 
1570         case sqrtps:
1571                 dummy.op = sqrtss;
1572 
1573                 for (i = 0; i < 4; i++) {
1574                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1575                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1576                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1577                 }
1578 
1579                 break;
1580 
1581         case cvtdq2ps:
1582                 dummy.op = cvtsi2ss;
1583 
1584                 for (i = 0; i < 4; i++) {
1585                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1586                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1587                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1588                 }
1589 
1590                 break;
1591 
1592         case cvttps2dq:
1593                 dummy.op = cvttss2si;
1594 
1595                 for (i = 0; i < 4; i++) {
1596                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1597                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1598                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1599                 }
1600 
1601                 break;
1602 
1603         case cvtps2dq:
1604                 dummy.op = cvtss2si;
1605 
1606                 for (i = 0; i < 4; i++) {
1607                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1608                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1609                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1610                 }
1611 
1612                 break;
1613 
1614         case cvtpi2ps:
1615                 dummy.op = cvtsi2ss;
1616 
1617                 for (i = 0; i < 2; i++) {
1618                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1619                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1620                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1621                 }
1622 
1623                 break;
1624 
1625         case cvttps2pi:
1626                 dummy.op = cvttss2si;
1627 
1628                 for (i = 0; i < 2; i++) {
1629                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1630                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1631                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1632                 }
1633 
1634                 break;
1635 
1636         case cvtps2pi:
1637                 dummy.op = cvtss2si;
1638 
1639                 for (i = 0; i < 2; i++) {
1640                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1641                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1642                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1643                 }
1644 
1645                 break;
1646 
1647         case cmppd:
1648                 dummy.op = cmpsd;
1649                 dummy.imm = inst->imm;
1650 
1651                 for (i = 0; i < 2; i++) {
1652                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1653                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1654                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1655                 }
1656 
1657                 break;
1658 
1659         case minpd:
1660                 dummy.op = minsd;
1661 
1662                 for (i = 0; i < 2; i++) {
1663                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1664                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1665                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1666                 }
1667 
1668                 break;
1669 
1670         case maxpd:
1671                 dummy.op = maxsd;
1672 
1673                 for (i = 0; i < 2; i++) {
1674                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1675                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1676                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1677                 }
1678 
1679                 break;
1680 
1681         case addpd:
1682                 dummy.op = addsd;
1683 
1684                 for (i = 0; i < 2; i++) {
1685                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1686                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1687                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1688                 }
1689 
1690                 break;
1691 
1692         case subpd:
1693                 dummy.op = subsd;
1694 
1695                 for (i = 0; i < 2; i++) {
1696                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1697                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1698                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1699                 }
1700 
1701                 break;
1702 
1703         case mulpd:
1704                 dummy.op = mulsd;
1705 
1706                 for (i = 0; i < 2; i++) {
1707                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1708                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1709                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1710                 }
1711 
1712                 break;
1713 
1714         case divpd:
1715                 dummy.op = divsd;
1716 
1717                 for (i = 0; i < 2; i++) {
1718                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1719                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1720                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1721                 }
1722 
1723                 break;
1724 
1725         case sqrtpd:
1726                 dummy.op = sqrtsd;
1727 
1728                 for (i = 0; i < 2; i++) {
1729                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1730                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1731                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1732                 }
1733 
1734                 break;
1735 
1736         case cvtpi2pd:
1737         case cvtdq2pd:
1738                 dummy.op = cvtsi2sd;
1739 
1740                 for (i = 0; i < 2; i++) {
1741                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1742                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1743                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1744                 }
1745 
1746                 break;
1747 
1748         case cvttpd2pi:
1749         case cvttpd2dq:
1750                 dummy.op = cvttsd2si;
1751 
1752                 for (i = 0; i < 2; i++) {
1753                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1754                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1755                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1756                 }
1757 
1758                 /* for cvttpd2dq, zero the high 64 bits of the destination */
1759                 if (inst->op == cvttpd2dq)
1760                         inst->op1->l[1] = 0ll;
1761 
1762                 break;
1763 
1764         case cvtpd2pi:
1765         case cvtpd2dq:
1766                 dummy.op = cvtsd2si;
1767 
1768                 for (i = 0; i < 2; i++) {
1769                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1770                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1771                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1772                 }
1773 
1774                 /* for cvtpd2dq, zero the high 64 bits of the destination */
1775                 if (inst->op == cvtpd2dq)
1776                         inst->op1->l[1] = 0ll;
1777 
1778                 break;
1779 
1780         case cvtps2pd:
1781                 dummy.op = cvtss2sd;
1782 
1783                 for (i = 0; i < 2; i++) {
1784                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1785                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1786                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1787                 }
1788 
1789                 break;
1790 
1791         case cvtpd2ps:
1792                 dummy.op = cvtsd2ss;
1793 
1794                 for (i = 0; i < 2; i++) {
1795                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1796                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1797                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1798                 }
1799 
1800                 /* zero the high 64 bits of the destination */
1801                 inst->op1->l[1] = 0ll;
1802 
1803         default:
1804                 break;
1805         }
1806 }