1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #include <ucontext.h>
  31 #include <fenv.h>
  32 #if defined(__SUNPRO_C)
  33 #include <sunmath.h>
  34 #else
  35 #include <sys/ieeefp.h>
  36 #endif
  37 #include "fex_handler.h"
  38 #include "fenv_inlines.h"
  39 
  40 #if !defined(REG_PC)
  41 #define REG_PC  EIP
  42 #endif
  43 
  44 #if !defined(REG_PS)
  45 #define REG_PS  EFL
  46 #endif
  47 
  48 #ifdef __amd64
  49 #define regno(X)        ((X < 4)? REG_RAX - X : \
  50                         ((X > 4)? REG_RAX + 1 - X : REG_RSP))
  51 #else
  52 #define regno(X)        (EAX - X)
  53 #endif
  54 
  55 /*
  56  * Support for SSE instructions
  57  */
  58 
  59 /*
  60  * Decode an SSE instruction.  Fill in *inst and return the length of the
  61  * instruction in bytes.  Return 0 if the instruction is not recognized.
  62  */
  63 int
  64 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst)
  65 {
  66         unsigned char   *ip;
  67         char            *addr;
  68         int             i, dbl, simd, rex, modrm, sib, r;
  69 
  70         i = 0;
  71         ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC];
  72 
  73         /* look for pseudo-prefixes */
  74         dbl = 0;
  75         simd = SIMD;
  76         if (ip[i] == 0xF3) {
  77                 simd = 0;
  78                 i++;
  79         } else if (ip[i] == 0x66) {
  80                 dbl = DOUBLE;
  81                 i++;
  82         } else if (ip[i] == 0xF2) {
  83                 dbl = DOUBLE;
  84                 simd = 0;
  85                 i++;
  86         }
  87 
  88         /* look for AMD64 REX prefix */
  89         rex = 0;
  90         if (ip[i] >= 0x40 && ip[i] <= 0x4F) {
  91                 rex = ip[i];
  92                 i++;
  93         }
  94 
  95         /* parse opcode */
  96         if (ip[i++] != 0x0F)
  97                 return 0;
  98         switch (ip[i++]) {
  99         case 0x2A:
 100                 inst->op = (int)cvtsi2ss + simd + dbl;
 101                 if (!simd)
 102                         inst->op = (int)inst->op + (rex & 8);
 103                 break;
 104 
 105         case 0x2C:
 106                 inst->op = (int)cvttss2si + simd + dbl;
 107                 if (!simd)
 108                         inst->op = (int)inst->op + (rex & 8);
 109                 break;
 110 
 111         case 0x2D:
 112                 inst->op = (int)cvtss2si + simd + dbl;
 113                 if (!simd)
 114                         inst->op = (int)inst->op + (rex & 8);
 115                 break;
 116 
 117         case 0x2E:
 118                 /* oddball: scalar instruction in a SIMD opcode group */
 119                 if (!simd)
 120                         return 0;
 121                 inst->op = (int)ucomiss + dbl;
 122                 break;
 123 
 124         case 0x2F:
 125                 /* oddball: scalar instruction in a SIMD opcode group */
 126                 if (!simd)
 127                         return 0;
 128                 inst->op = (int)comiss + dbl;
 129                 break;
 130 
 131         case 0x51:
 132                 inst->op = (int)sqrtss + simd + dbl;
 133                 break;
 134 
 135         case 0x58:
 136                 inst->op = (int)addss + simd + dbl;
 137                 break;
 138 
 139         case 0x59:
 140                 inst->op = (int)mulss + simd + dbl;
 141                 break;
 142 
 143         case 0x5A:
 144                 inst->op = (int)cvtss2sd + simd + dbl;
 145                 break;
 146 
 147         case 0x5B:
 148                 if (dbl) {
 149                         if (simd)
 150                                 inst->op = cvtps2dq;
 151                         else
 152                                 return 0;
 153                 } else {
 154                         inst->op = (simd)? cvtdq2ps : cvttps2dq;
 155                 }
 156                 break;
 157 
 158         case 0x5C:
 159                 inst->op = (int)subss + simd + dbl;
 160                 break;
 161 
 162         case 0x5D:
 163                 inst->op = (int)minss + simd + dbl;
 164                 break;
 165 
 166         case 0x5E:
 167                 inst->op = (int)divss + simd + dbl;
 168                 break;
 169 
 170         case 0x5F:
 171                 inst->op = (int)maxss + simd + dbl;
 172                 break;
 173 
 174         case 0xC2:
 175                 inst->op = (int)cmpss + simd + dbl;
 176                 break;
 177 
 178         case 0xE6:
 179                 if (simd) {
 180                         if (dbl)
 181                                 inst->op = cvttpd2dq;
 182                         else
 183                                 return 0;
 184                 } else {
 185                         inst->op = (dbl)? cvtpd2dq : cvtdq2pd;
 186                 }
 187                 break;
 188 
 189         default:
 190                 return 0;
 191         }
 192 
 193         /* locate operands */
 194         modrm = ip[i++];
 195 
 196         if (inst->op == cvtss2si || inst->op == cvttss2si ||
 197             inst->op == cvtsd2si || inst->op == cvttsd2si ||
 198             inst->op == cvtss2siq || inst->op == cvttss2siq ||
 199             inst->op == cvtsd2siq || inst->op == cvttsd2siq) {
 200                 /* op1 is a gp register */
 201                 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
 202                 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
 203         } else if (inst->op == cvtps2pi || inst->op == cvttps2pi ||
 204             inst->op == cvtpd2pi || inst->op == cvttpd2pi) {
 205                 /* op1 is a mmx register */
 206 #ifdef __amd64
 207                 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set.
 208                     fpchip_state.st[(modrm >> 3) & 7];
 209 #else
 210                 inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) +
 211                     (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
 212                     fpchip_state.state[7]);
 213 #endif
 214         } else {
 215                 /* op1 is a xmm register */
 216                 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
 217                 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
 218                     fp_reg_set.fpchip_state.xmm[r];
 219         }
 220 
 221         if ((modrm >> 6) == 3) {
 222                 if (inst->op == cvtsi2ss || inst->op == cvtsi2sd ||
 223                     inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) {
 224                         /* op2 is a gp register */
 225                         r = ((rex & 1) << 3) | (modrm & 7);
 226                         inst->op2 = (sseoperand_t *)&uap->uc_mcontext.
 227                             gregs[regno(r)];
 228                 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) {
 229                         /* op2 is a mmx register */
 230 #ifdef __amd64
 231                         inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
 232                             fp_reg_set.fpchip_state.st[modrm & 7];
 233 #else
 234                         inst->op2 = (sseoperand_t *)(10 * (modrm & 7) +
 235                             (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
 236                             fpchip_state.state[7]);
 237 #endif
 238                 } else {
 239                         /* op2 is a xmm register */
 240                         r = ((rex & 1) << 3) | (modrm & 7);
 241                         inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
 242                             fp_reg_set.fpchip_state.xmm[r];
 243                 }
 244         } else if ((modrm & 0xc7) == 0x05) {
 245 #ifdef __amd64
 246                 /* address of next instruction + offset */
 247                 r = i + 4;
 248                 if (inst->op == cmpss || inst->op == cmpps ||
 249                     inst->op == cmpsd || inst->op == cmppd)
 250                         r++;
 251                 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i));
 252 #else
 253                 /* absolute address */
 254                 inst->op2 = (sseoperand_t *)(*(int *)(ip + i));
 255 #endif
 256                 i += 4;
 257         } else {
 258                 /* complex address */
 259                 if ((modrm & 7) == 4) {
 260                         /* parse sib byte */
 261                         sib = ip[i++];
 262                         if ((sib & 7) == 5 && (modrm >> 6) == 0) {
 263                                 /* start with absolute address */
 264                                 addr = (char *)(uintptr_t)(*(int *)(ip + i));
 265                                 i += 4;
 266                         } else {
 267                                 /* start with base */
 268                                 r = ((rex & 1) << 3) | (sib & 7);
 269                                 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
 270                         }
 271                         r = ((rex & 2) << 2) | ((sib >> 3) & 7);
 272                         if (r != 4) {
 273                                 /* add scaled index */
 274                                 addr += uap->uc_mcontext.gregs[regno(r)]
 275                                     << (sib >> 6);
 276                         }
 277                 } else {
 278                         r = ((rex & 1) << 3) | (modrm & 7);
 279                         addr = (char *)uap->uc_mcontext.gregs[regno(r)];
 280                 }
 281 
 282                 /* add displacement, if any */
 283                 if ((modrm >> 6) == 1) {
 284                         addr += (char)ip[i++];
 285                 } else if ((modrm >> 6) == 2) {
 286                         addr += *(int *)(ip + i);
 287                         i += 4;
 288                 }
 289                 inst->op2 = (sseoperand_t *)addr;
 290         }
 291 
 292         if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd ||
 293             inst->op == cmppd) {
 294                 /* get the immediate operand */
 295                 inst->imm = ip[i++];
 296         }
 297 
 298         return i;
 299 }
 300 
 301 static enum fp_class_type
 302 my_fp_classf(float *x)
 303 {
 304         int     i = *(int *)x & ~0x80000000;
 305 
 306         if (i < 0x7f800000) {
 307                 if (i < 0x00800000)
 308                         return ((i == 0)? fp_zero : fp_subnormal);
 309                 return fp_normal;
 310         }
 311         else if (i == 0x7f800000)
 312                 return fp_infinity;
 313         else if (i & 0x400000)
 314                 return fp_quiet;
 315         else
 316                 return fp_signaling;
 317 }
 318 
 319 static enum fp_class_type
 320 my_fp_class(double *x)
 321 {
 322         int     i = *(1+(int *)x) & ~0x80000000;
 323 
 324         if (i < 0x7ff00000) {
 325                 if (i < 0x00100000)
 326                         return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal);
 327                 return fp_normal;
 328         }
 329         else if (i == 0x7ff00000 && *(int *)x == 0)
 330                 return fp_infinity;
 331         else if (i & 0x80000)
 332                 return fp_quiet;
 333         else
 334                 return fp_signaling;
 335 }
 336 
 337 /*
 338  * Inspect a scalar SSE instruction that incurred an invalid operation
 339  * exception to determine which type of exception it was.
 340  */
 341 static enum fex_exception
 342 __fex_get_sse_invalid_type(sseinst_t *inst)
 343 {
 344         enum fp_class_type      t1, t2;
 345 
 346         /* check op2 for signaling nan */
 347         t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) :
 348             my_fp_classf(&inst->op2->f[0]);
 349         if (t2 == fp_signaling)
 350                 return fex_inv_snan;
 351 
 352         /* eliminate all single-operand instructions */
 353         switch (inst->op) {
 354         case cvtsd2ss:
 355         case cvtss2sd:
 356                 /* hmm, this shouldn't have happened */
 357                 return (enum fex_exception) -1;
 358 
 359         case sqrtss:
 360         case sqrtsd:
 361                 return fex_inv_sqrt;
 362 
 363         case cvtss2si:
 364         case cvtsd2si:
 365         case cvttss2si:
 366         case cvttsd2si:
 367         case cvtss2siq:
 368         case cvtsd2siq:
 369         case cvttss2siq:
 370         case cvttsd2siq:
 371                 return fex_inv_int;
 372         default:
 373                 break;
 374         }
 375 
 376         /* check op1 for signaling nan */
 377         t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) :
 378             my_fp_classf(&inst->op1->f[0]);
 379         if (t1 == fp_signaling)
 380                 return fex_inv_snan;
 381 
 382         /* check two-operand instructions for other cases */
 383         switch (inst->op) {
 384         case cmpss:
 385         case cmpsd:
 386         case minss:
 387         case minsd:
 388         case maxss:
 389         case maxsd:
 390         case comiss:
 391         case comisd:
 392                 return fex_inv_cmp;
 393 
 394         case addss:
 395         case addsd:
 396         case subss:
 397         case subsd:
 398                 if (t1 == fp_infinity && t2 == fp_infinity)
 399                         return fex_inv_isi;
 400                 break;
 401 
 402         case mulss:
 403         case mulsd:
 404                 if ((t1 == fp_zero && t2 == fp_infinity) ||
 405                     (t2 == fp_zero && t1 == fp_infinity))
 406                         return fex_inv_zmi;
 407                 break;
 408 
 409         case divss:
 410         case divsd:
 411                 if (t1 == fp_zero && t2 == fp_zero)
 412                         return fex_inv_zdz;
 413                 if (t1 == fp_infinity && t2 == fp_infinity)
 414                         return fex_inv_idi;
 415         default:
 416                 break;
 417         }
 418 
 419         return (enum fex_exception)-1;
 420 }
 421 
 422 /* inline templates */
 423 extern void sse_cmpeqss(float *, float *, int *);
 424 extern void sse_cmpltss(float *, float *, int *);
 425 extern void sse_cmpless(float *, float *, int *);
 426 extern void sse_cmpunordss(float *, float *, int *);
 427 extern void sse_minss(float *, float *, float *);
 428 extern void sse_maxss(float *, float *, float *);
 429 extern void sse_addss(float *, float *, float *);
 430 extern void sse_subss(float *, float *, float *);
 431 extern void sse_mulss(float *, float *, float *);
 432 extern void sse_divss(float *, float *, float *);
 433 extern void sse_sqrtss(float *, float *);
 434 extern void sse_ucomiss(float *, float *);
 435 extern void sse_comiss(float *, float *);
 436 extern void sse_cvtss2sd(float *, double *);
 437 extern void sse_cvtsi2ss(int *, float *);
 438 extern void sse_cvttss2si(float *, int *);
 439 extern void sse_cvtss2si(float *, int *);
 440 #ifdef __amd64
 441 extern void sse_cvtsi2ssq(long long *, float *);
 442 extern void sse_cvttss2siq(float *, long long *);
 443 extern void sse_cvtss2siq(float *, long long *);
 444 #endif
 445 extern void sse_cmpeqsd(double *, double *, long long *);
 446 extern void sse_cmpltsd(double *, double *, long long *);
 447 extern void sse_cmplesd(double *, double *, long long *);
 448 extern void sse_cmpunordsd(double *, double *, long long *);
 449 extern void sse_minsd(double *, double *, double *);
 450 extern void sse_maxsd(double *, double *, double *);
 451 extern void sse_addsd(double *, double *, double *);
 452 extern void sse_subsd(double *, double *, double *);
 453 extern void sse_mulsd(double *, double *, double *);
 454 extern void sse_divsd(double *, double *, double *);
 455 extern void sse_sqrtsd(double *, double *);
 456 extern void sse_ucomisd(double *, double *);
 457 extern void sse_comisd(double *, double *);
 458 extern void sse_cvtsd2ss(double *, float *);
 459 extern void sse_cvtsi2sd(int *, double *);
 460 extern void sse_cvttsd2si(double *, int *);
 461 extern void sse_cvtsd2si(double *, int *);
 462 #ifdef __amd64
 463 extern void sse_cvtsi2sdq(long long *, double *);
 464 extern void sse_cvttsd2siq(double *, long long *);
 465 extern void sse_cvtsd2siq(double *, long long *);
 466 #endif
 467 
 468 /*
 469  * Fill in *info with the operands, default untrapped result, and
 470  * flags produced by a scalar SSE instruction, and return the type
 471  * of trapped exception (if any).  On entry, the mxcsr must have
 472  * all exceptions masked and all flags clear.  The same conditions
 473  * will hold on exit.
 474  *
 475  * This routine does not work if the instruction specified by *inst
 476  * is not a scalar instruction.
 477  */
 478 enum fex_exception
 479 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info)
 480 {
 481         unsigned int    e, te, mxcsr, oldmxcsr, subnorm;
 482 
 483         /*
 484          * Perform the operation with traps disabled and check the
 485          * exception flags.  If the underflow trap was enabled, also
 486          * check for an exact subnormal result.
 487          */
 488         __fenv_getmxcsr(&oldmxcsr);
 489         subnorm = 0;
 490         if ((int)inst->op & DOUBLE) {
 491                 if (inst->op == cvtsi2sd) {
 492                         info->op1.type = fex_int;
 493                         info->op1.val.i = inst->op2->i[0];
 494                         info->op2.type = fex_nodata;
 495                 } else if (inst->op == cvtsi2sdq) {
 496                         info->op1.type = fex_llong;
 497                         info->op1.val.l = inst->op2->l[0];
 498                         info->op2.type = fex_nodata;
 499                 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss ||
 500                     inst->op == cvttsd2si || inst->op == cvtsd2si ||
 501                     inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
 502                         info->op1.type = fex_double;
 503                         info->op1.val.d = inst->op2->d[0];
 504                         info->op2.type = fex_nodata;
 505                 } else {
 506                         info->op1.type = fex_double;
 507                         info->op1.val.d = inst->op1->d[0];
 508                         info->op2.type = fex_double;
 509                         info->op2.val.d = inst->op2->d[0];
 510                 }
 511                 info->res.type = fex_double;
 512                 switch (inst->op) {
 513                 case cmpsd:
 514                         info->op = fex_cmp;
 515                         info->res.type = fex_llong;
 516                         switch (inst->imm & 3) {
 517                         case 0:
 518                                 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d,
 519                                     &info->res.val.l);
 520                                 break;
 521 
 522                         case 1:
 523                                 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d,
 524                                     &info->res.val.l);
 525                                 break;
 526 
 527                         case 2:
 528                                 sse_cmplesd(&info->op1.val.d, &info->op2.val.d,
 529                                     &info->res.val.l);
 530                                 break;
 531 
 532                         case 3:
 533                                 sse_cmpunordsd(&info->op1.val.d,
 534                                     &info->op2.val.d, &info->res.val.l);
 535                         }
 536                         if (inst->imm & 4)
 537                                 info->res.val.l ^= 0xffffffffffffffffull;
 538                         break;
 539 
 540                 case minsd:
 541                         info->op = fex_other;
 542                         sse_minsd(&info->op1.val.d, &info->op2.val.d,
 543                             &info->res.val.d);
 544                         break;
 545 
 546                 case maxsd:
 547                         info->op = fex_other;
 548                         sse_maxsd(&info->op1.val.d, &info->op2.val.d,
 549                             &info->res.val.d);
 550                         break;
 551 
 552                 case addsd:
 553                         info->op = fex_add;
 554                         sse_addsd(&info->op1.val.d, &info->op2.val.d,
 555                             &info->res.val.d);
 556                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 557                                 subnorm = 1;
 558                         break;
 559 
 560                 case subsd:
 561                         info->op = fex_sub;
 562                         sse_subsd(&info->op1.val.d, &info->op2.val.d,
 563                             &info->res.val.d);
 564                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 565                                 subnorm = 1;
 566                         break;
 567 
 568                 case mulsd:
 569                         info->op = fex_mul;
 570                         sse_mulsd(&info->op1.val.d, &info->op2.val.d,
 571                             &info->res.val.d);
 572                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 573                                 subnorm = 1;
 574                         break;
 575 
 576                 case divsd:
 577                         info->op = fex_div;
 578                         sse_divsd(&info->op1.val.d, &info->op2.val.d,
 579                             &info->res.val.d);
 580                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 581                                 subnorm = 1;
 582                         break;
 583 
 584                 case sqrtsd:
 585                         info->op = fex_sqrt;
 586                         sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
 587                         break;
 588 
 589                 case cvtsd2ss:
 590                         info->op = fex_cnvt;
 591                         info->res.type = fex_float;
 592                         sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f);
 593                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 594                                 subnorm = 1;
 595                         break;
 596 
 597                 case cvtsi2sd:
 598                         info->op = fex_cnvt;
 599                         sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
 600                         break;
 601 
 602                 case cvttsd2si:
 603                         info->op = fex_cnvt;
 604                         info->res.type = fex_int;
 605                         sse_cvttsd2si(&info->op1.val.d, &info->res.val.i);
 606                         break;
 607 
 608                 case cvtsd2si:
 609                         info->op = fex_cnvt;
 610                         info->res.type = fex_int;
 611                         sse_cvtsd2si(&info->op1.val.d, &info->res.val.i);
 612                         break;
 613 
 614 #ifdef __amd64
 615                 case cvtsi2sdq:
 616                         info->op = fex_cnvt;
 617                         sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d);
 618                         break;
 619 
 620                 case cvttsd2siq:
 621                         info->op = fex_cnvt;
 622                         info->res.type = fex_llong;
 623                         sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l);
 624                         break;
 625 
 626                 case cvtsd2siq:
 627                         info->op = fex_cnvt;
 628                         info->res.type = fex_llong;
 629                         sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l);
 630                         break;
 631 #endif
 632 
 633                 case ucomisd:
 634                         info->op = fex_cmp;
 635                         info->res.type = fex_nodata;
 636                         sse_ucomisd(&info->op1.val.d, &info->op2.val.d);
 637                         break;
 638 
 639                 case comisd:
 640                         info->op = fex_cmp;
 641                         info->res.type = fex_nodata;
 642                         sse_comisd(&info->op1.val.d, &info->op2.val.d);
 643                         break;
 644                 default:
 645                         break;
 646                 }
 647         } else {
 648                 if (inst->op == cvtsi2ss) {
 649                         info->op1.type = fex_int;
 650                         info->op1.val.i = inst->op2->i[0];
 651                         info->op2.type = fex_nodata;
 652                 } else if (inst->op == cvtsi2ssq) {
 653                         info->op1.type = fex_llong;
 654                         info->op1.val.l = inst->op2->l[0];
 655                         info->op2.type = fex_nodata;
 656                 } else if (inst->op == sqrtss || inst->op == cvtss2sd ||
 657                     inst->op == cvttss2si || inst->op == cvtss2si ||
 658                     inst->op == cvttss2siq || inst->op == cvtss2siq) {
 659                         info->op1.type = fex_float;
 660                         info->op1.val.f = inst->op2->f[0];
 661                         info->op2.type = fex_nodata;
 662                 } else {
 663                         info->op1.type = fex_float;
 664                         info->op1.val.f = inst->op1->f[0];
 665                         info->op2.type = fex_float;
 666                         info->op2.val.f = inst->op2->f[0];
 667                 }
 668                 info->res.type = fex_float;
 669                 switch (inst->op) {
 670                 case cmpss:
 671                         info->op = fex_cmp;
 672                         info->res.type = fex_int;
 673                         switch (inst->imm & 3) {
 674                         case 0:
 675                                 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f,
 676                                     &info->res.val.i);
 677                                 break;
 678 
 679                         case 1:
 680                                 sse_cmpltss(&info->op1.val.f, &info->op2.val.f,
 681                                     &info->res.val.i);
 682                                 break;
 683 
 684                         case 2:
 685                                 sse_cmpless(&info->op1.val.f, &info->op2.val.f,
 686                                     &info->res.val.i);
 687                                 break;
 688 
 689                         case 3:
 690                                 sse_cmpunordss(&info->op1.val.f,
 691                                     &info->op2.val.f, &info->res.val.i);
 692                         }
 693                         if (inst->imm & 4)
 694                                 info->res.val.i ^= 0xffffffffu;
 695                         break;
 696 
 697                 case minss:
 698                         info->op = fex_other;
 699                         sse_minss(&info->op1.val.f, &info->op2.val.f,
 700                             &info->res.val.f);
 701                         break;
 702 
 703                 case maxss:
 704                         info->op = fex_other;
 705                         sse_maxss(&info->op1.val.f, &info->op2.val.f,
 706                             &info->res.val.f);
 707                         break;
 708 
 709                 case addss:
 710                         info->op = fex_add;
 711                         sse_addss(&info->op1.val.f, &info->op2.val.f,
 712                             &info->res.val.f);
 713                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 714                                 subnorm = 1;
 715                         break;
 716 
 717                 case subss:
 718                         info->op = fex_sub;
 719                         sse_subss(&info->op1.val.f, &info->op2.val.f,
 720                             &info->res.val.f);
 721                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 722                                 subnorm = 1;
 723                         break;
 724 
 725                 case mulss:
 726                         info->op = fex_mul;
 727                         sse_mulss(&info->op1.val.f, &info->op2.val.f,
 728                             &info->res.val.f);
 729                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 730                                 subnorm = 1;
 731                         break;
 732 
 733                 case divss:
 734                         info->op = fex_div;
 735                         sse_divss(&info->op1.val.f, &info->op2.val.f,
 736                             &info->res.val.f);
 737                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 738                                 subnorm = 1;
 739                         break;
 740 
 741                 case sqrtss:
 742                         info->op = fex_sqrt;
 743                         sse_sqrtss(&info->op1.val.f, &info->res.val.f);
 744                         break;
 745 
 746                 case cvtss2sd:
 747                         info->op = fex_cnvt;
 748                         info->res.type = fex_double;
 749                         sse_cvtss2sd(&info->op1.val.f, &info->res.val.d);
 750                         break;
 751 
 752                 case cvtsi2ss:
 753                         info->op = fex_cnvt;
 754                         sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f);
 755                         break;
 756 
 757                 case cvttss2si:
 758                         info->op = fex_cnvt;
 759                         info->res.type = fex_int;
 760                         sse_cvttss2si(&info->op1.val.f, &info->res.val.i);
 761                         break;
 762 
 763                 case cvtss2si:
 764                         info->op = fex_cnvt;
 765                         info->res.type = fex_int;
 766                         sse_cvtss2si(&info->op1.val.f, &info->res.val.i);
 767                         break;
 768 
 769 #ifdef __amd64
 770                 case cvtsi2ssq:
 771                         info->op = fex_cnvt;
 772                         sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f);
 773                         break;
 774 
 775                 case cvttss2siq:
 776                         info->op = fex_cnvt;
 777                         info->res.type = fex_llong;
 778                         sse_cvttss2siq(&info->op1.val.f, &info->res.val.l);
 779                         break;
 780 
 781                 case cvtss2siq:
 782                         info->op = fex_cnvt;
 783                         info->res.type = fex_llong;
 784                         sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
 785                         break;
 786 #endif
 787 
 788                 case ucomiss:
 789                         info->op = fex_cmp;
 790                         info->res.type = fex_nodata;
 791                         sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
 792                         break;
 793 
 794                 case comiss:
 795                         info->op = fex_cmp;
 796                         info->res.type = fex_nodata;
 797                         sse_comiss(&info->op1.val.f, &info->op2.val.f);
 798                         break;
 799                 default:
 800                         break;
 801                 }
 802         }
 803         __fenv_getmxcsr(&mxcsr);
 804         info->flags = mxcsr & 0x3d;
 805         __fenv_setmxcsr(&oldmxcsr);
 806 
 807         /* determine which exception would have been trapped */
 808         te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr
 809             >> 7) & 0x3d;
 810         e = mxcsr & te;
 811         if (e & FE_INVALID)
 812                 return __fex_get_sse_invalid_type(inst);
 813         if (e & FE_DIVBYZERO)
 814                 return fex_division;
 815         if (e & FE_OVERFLOW)
 816                 return fex_overflow;
 817         if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
 818                 return fex_underflow;
 819         if (e & FE_INEXACT)
 820                 return fex_inexact;
 821         return (enum fex_exception)-1;
 822 }
 823 
 824 /*
 825  * Emulate a SIMD SSE instruction to determine which exceptions occur
 826  * in each part.  For i = 0, 1, 2, and 3, set e[i] to indicate the
 827  * trapped exception that would occur if the i-th part of the SIMD
 828  * instruction were executed in isolation; set e[i] to -1 if no
 829  * trapped exception would occur in this part.  Also fill in info[i]
 830  * with the corresponding operands, default untrapped result, and
 831  * flags.
 832  *
 833  * This routine does not work if the instruction specified by *inst
 834  * is not a SIMD instruction.
 835  */
 836 void
 837 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
 838     fex_info_t *info)
 839 {
 840         sseinst_t       dummy;
 841         int             i;
 842 
 843         e[0] = e[1] = e[2] = e[3] = -1;
 844 
 845         /* perform each part of the SIMD operation */
 846         switch (inst->op) {
 847         case cmpps:
 848                 dummy.op = cmpss;
 849                 dummy.imm = inst->imm;
 850                 for (i = 0; i < 4; i++) {
 851                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 852                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 853                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 854                 }
 855                 break;
 856 
 857         case minps:
 858                 dummy.op = minss;
 859                 for (i = 0; i < 4; i++) {
 860                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 861                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 862                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 863                 }
 864                 break;
 865 
 866         case maxps:
 867                 dummy.op = maxss;
 868                 for (i = 0; i < 4; i++) {
 869                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 870                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 871                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 872                 }
 873                 break;
 874 
 875         case addps:
 876                 dummy.op = addss;
 877                 for (i = 0; i < 4; i++) {
 878                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 879                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 880                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 881                 }
 882                 break;
 883 
 884         case subps:
 885                 dummy.op = subss;
 886                 for (i = 0; i < 4; i++) {
 887                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 888                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 889                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 890                 }
 891                 break;
 892 
 893         case mulps:
 894                 dummy.op = mulss;
 895                 for (i = 0; i < 4; i++) {
 896                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 897                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 898                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 899                 }
 900                 break;
 901 
 902         case divps:
 903                 dummy.op = divss;
 904                 for (i = 0; i < 4; i++) {
 905                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 906                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 907                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 908                 }
 909                 break;
 910 
 911         case sqrtps:
 912                 dummy.op = sqrtss;
 913                 for (i = 0; i < 4; i++) {
 914                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 915                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 916                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 917                 }
 918                 break;
 919 
 920         case cvtdq2ps:
 921                 dummy.op = cvtsi2ss;
 922                 for (i = 0; i < 4; i++) {
 923                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 924                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
 925                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 926                 }
 927                 break;
 928 
 929         case cvttps2dq:
 930                 dummy.op = cvttss2si;
 931                 for (i = 0; i < 4; i++) {
 932                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 933                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 934                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 935                 }
 936                 break;
 937 
 938         case cvtps2dq:
 939                 dummy.op = cvtss2si;
 940                 for (i = 0; i < 4; i++) {
 941                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 942                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 943                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 944                 }
 945                 break;
 946 
 947         case cvtpi2ps:
 948                 dummy.op = cvtsi2ss;
 949                 for (i = 0; i < 2; i++) {
 950                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 951                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
 952                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 953                 }
 954                 break;
 955 
 956         case cvttps2pi:
 957                 dummy.op = cvttss2si;
 958                 for (i = 0; i < 2; i++) {
 959                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 960                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 961                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 962                 }
 963                 break;
 964 
 965         case cvtps2pi:
 966                 dummy.op = cvtss2si;
 967                 for (i = 0; i < 2; i++) {
 968                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 969                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 970                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 971                 }
 972                 break;
 973 
 974         case cmppd:
 975                 dummy.op = cmpsd;
 976                 dummy.imm = inst->imm;
 977                 for (i = 0; i < 2; i++) {
 978                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
 979                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
 980                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 981                 }
 982                 break;
 983 
 984         case minpd:
 985                 dummy.op = minsd;
 986                 for (i = 0; i < 2; i++) {
 987                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
 988                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
 989                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 990                 }
 991                 break;
 992 
 993         case maxpd:
 994                 dummy.op = maxsd;
 995                 for (i = 0; i < 2; i++) {
 996                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
 997                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
 998                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 999                 }
1000                 break;
1001 
1002         case addpd:
1003                 dummy.op = addsd;
1004                 for (i = 0; i < 2; i++) {
1005                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1006                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1007                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1008                 }
1009                 break;
1010 
1011         case subpd:
1012                 dummy.op = subsd;
1013                 for (i = 0; i < 2; i++) {
1014                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1015                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1016                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1017                 }
1018                 break;
1019 
1020         case mulpd:
1021                 dummy.op = mulsd;
1022                 for (i = 0; i < 2; i++) {
1023                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1024                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1025                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1026                 }
1027                 break;
1028 
1029         case divpd:
1030                 dummy.op = divsd;
1031                 for (i = 0; i < 2; i++) {
1032                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1033                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1034                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1035                 }
1036                 break;
1037 
1038         case sqrtpd:
1039                 dummy.op = sqrtsd;
1040                 for (i = 0; i < 2; i++) {
1041                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1042                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1043                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1044                 }
1045                 break;
1046 
1047         case cvtpi2pd:
1048         case cvtdq2pd:
1049                 dummy.op = cvtsi2sd;
1050                 for (i = 0; i < 2; i++) {
1051                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1052                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1053                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1054                 }
1055                 break;
1056 
1057         case cvttpd2pi:
1058         case cvttpd2dq:
1059                 dummy.op = cvttsd2si;
1060                 for (i = 0; i < 2; i++) {
1061                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1062                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1063                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1064                 }
1065                 break;
1066 
1067         case cvtpd2pi:
1068         case cvtpd2dq:
1069                 dummy.op = cvtsd2si;
1070                 for (i = 0; i < 2; i++) {
1071                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1072                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1073                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1074                 }
1075                 break;
1076 
1077         case cvtps2pd:
1078                 dummy.op = cvtss2sd;
1079                 for (i = 0; i < 2; i++) {
1080                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1081                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1082                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1083                 }
1084                 break;
1085 
1086         case cvtpd2ps:
1087                 dummy.op = cvtsd2ss;
1088                 for (i = 0; i < 2; i++) {
1089                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1090                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1091                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1092                 }
1093         default:
1094                 break;
1095         }
1096 }
1097 
1098 /*
1099  * Store the result value from *info in the destination of the scalar
1100  * SSE instruction specified by *inst.  If no result is given but the
1101  * exception is underflow or overflow, supply the default trapped result.
1102  *
1103  * This routine does not work if the instruction specified by *inst
1104  * is not a scalar instruction.
1105  */
1106 void
1107 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1108     fex_info_t *info)
1109 {
1110         int             i = 0;
1111         long long       l = 0L;;
1112         float           f = 0.0, fscl;
1113         double          d = 0.0L, dscl;
1114 
1115         /* for compares that write eflags, just set the flags
1116            to indicate "unordered" */
1117         if (inst->op == ucomiss || inst->op == comiss ||
1118             inst->op == ucomisd || inst->op == comisd) {
1119                 uap->uc_mcontext.gregs[REG_PS] |= 0x45;
1120                 return;
1121         }
1122 
1123         /* if info doesn't specify a result value, try to generate
1124            the default trapped result */
1125         if (info->res.type == fex_nodata) {
1126                 /* set scale factors for exponent wrapping */
1127                 switch (e) {
1128                 case fex_overflow:
1129                         fscl = 1.262177448e-29f; /* 2^-96 */
1130                         dscl = 6.441148769597133308e-232; /* 2^-768 */
1131                         break;
1132 
1133                 case fex_underflow:
1134                         fscl = 7.922816251e+28f; /* 2^96 */
1135                         dscl = 1.552518092300708935e+231; /* 2^768 */
1136                         break;
1137 
1138                 default:
1139                         (void) __fex_get_sse_op(uap, inst, info);
1140                         if (info->res.type == fex_nodata)
1141                                 return;
1142                         goto stuff;
1143                 }
1144 
1145                 /* generate the wrapped result */
1146                 if (inst->op == cvtsd2ss) {
1147                         info->op1.type = fex_double;
1148                         info->op1.val.d = inst->op2->d[0];
1149                         info->op2.type = fex_nodata;
1150                         info->res.type = fex_float;
1151                         info->res.val.f = (float)(fscl * (fscl *
1152                             info->op1.val.d));
1153                 } else if ((int)inst->op & DOUBLE) {
1154                         info->op1.type = fex_double;
1155                         info->op1.val.d = inst->op1->d[0];
1156                         info->op2.type = fex_double;
1157                         info->op2.val.d = inst->op2->d[0];
1158                         info->res.type = fex_double;
1159                         switch (inst->op) {
1160                         case addsd:
1161                                 info->res.val.d = dscl * (dscl *
1162                                     info->op1.val.d + dscl * info->op2.val.d);
1163                                 break;
1164 
1165                         case subsd:
1166                                 info->res.val.d = dscl * (dscl *
1167                                     info->op1.val.d - dscl * info->op2.val.d);
1168                                 break;
1169 
1170                         case mulsd:
1171                                 info->res.val.d = (dscl * info->op1.val.d) *
1172                                     (dscl * info->op2.val.d);
1173                                 break;
1174 
1175                         case divsd:
1176                                 info->res.val.d = (dscl * info->op1.val.d) /
1177                                     (info->op2.val.d / dscl);
1178                                 break;
1179 
1180                         default:
1181                                 return;
1182                         }
1183                 } else {
1184                         info->op1.type = fex_float;
1185                         info->op1.val.f = inst->op1->f[0];
1186                         info->op2.type = fex_float;
1187                         info->op2.val.f = inst->op2->f[0];
1188                         info->res.type = fex_float;
1189                         switch (inst->op) {
1190                         case addss:
1191                                 info->res.val.f = fscl * (fscl *
1192                                     info->op1.val.f + fscl * info->op2.val.f);
1193                                 break;
1194 
1195                         case subss:
1196                                 info->res.val.f = fscl * (fscl *
1197                                     info->op1.val.f - fscl * info->op2.val.f);
1198                                 break;
1199 
1200                         case mulss:
1201                                 info->res.val.f = (fscl * info->op1.val.f) *
1202                                     (fscl * info->op2.val.f);
1203                                 break;
1204 
1205                         case divss:
1206                                 info->res.val.f = (fscl * info->op1.val.f) /
1207                                     (info->op2.val.f / fscl);
1208                                 break;
1209 
1210                         default:
1211                                 return;
1212                         }
1213                 }
1214         }
1215 
1216         /* put the result in the destination */
1217 stuff:
1218         if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si
1219             || inst->op == cvttsd2si || inst->op == cvtsd2si) {
1220                 switch (info->res.type) {
1221                 case fex_int:
1222                         i = info->res.val.i;
1223                         break;
1224 
1225                 case fex_llong:
1226                         i = info->res.val.l;
1227                         break;
1228 
1229                 case fex_float:
1230                         i = info->res.val.f;
1231                         break;
1232 
1233                 case fex_double:
1234                         i = info->res.val.d;
1235                         break;
1236 
1237                 case fex_ldouble:
1238                         i = info->res.val.q;
1239                         break;
1240 
1241                 default:
1242                         break;
1243                 }
1244                 inst->op1->i[0] = i;
1245         } else if (inst->op == cmpsd || inst->op == cvttss2siq ||
1246             inst->op == cvtss2siq || inst->op == cvttsd2siq ||
1247             inst->op == cvtsd2siq) {
1248                 switch (info->res.type) {
1249                 case fex_int:
1250                         l = info->res.val.i;
1251                         break;
1252 
1253                 case fex_llong:
1254                         l = info->res.val.l;
1255                         break;
1256 
1257                 case fex_float:
1258                         l = info->res.val.f;
1259                         break;
1260 
1261                 case fex_double:
1262                         l = info->res.val.d;
1263                         break;
1264 
1265                 case fex_ldouble:
1266                         l = info->res.val.q;
1267                         break;
1268 
1269                 default:
1270                         break;
1271                 }
1272                 inst->op1->l[0] = l;
1273         } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1274             inst->op == cvtss2sd) {
1275                 switch (info->res.type) {
1276                 case fex_int:
1277                         d = info->res.val.i;
1278                         break;
1279 
1280                 case fex_llong:
1281                         d = info->res.val.l;
1282                         break;
1283 
1284                 case fex_float:
1285                         d = info->res.val.f;
1286                         break;
1287 
1288                 case fex_double:
1289                         d = info->res.val.d;
1290                         break;
1291 
1292                 case fex_ldouble:
1293                         d = info->res.val.q;
1294                         break;
1295 
1296                 default:
1297                         break;
1298                 }
1299                 inst->op1->d[0] = d;
1300         } else {
1301                 switch (info->res.type) {
1302                 case fex_int:
1303                         f = info->res.val.i;
1304                         break;
1305 
1306                 case fex_llong:
1307                         f = info->res.val.l;
1308                         break;
1309 
1310                 case fex_float:
1311                         f = info->res.val.f;
1312                         break;
1313 
1314                 case fex_double:
1315                         f = info->res.val.d;
1316                         break;
1317 
1318                 case fex_ldouble:
1319                         f = info->res.val.q;
1320                         break;
1321 
1322                 default:
1323                         break;
1324                 }
1325                 inst->op1->f[0] = f;
1326         }
1327 }
1328 
1329 /*
1330  * Store the results from a SIMD instruction.  For each i, store
1331  * the result value from info[i] in the i-th part of the destination
1332  * of the SIMD SSE instruction specified by *inst.  If no result
1333  * is given but the exception indicated by e[i] is underflow or
1334  * overflow, supply the default trapped result.
1335  *
1336  * This routine does not work if the instruction specified by *inst
1337  * is not a SIMD instruction.
1338  */
1339 void
1340 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1341     fex_info_t *info)
1342 {
1343         sseinst_t       dummy;
1344         int             i;
1345 
1346         /* store each part */
1347         switch (inst->op) {
1348         case cmpps:
1349                 dummy.op = cmpss;
1350                 dummy.imm = inst->imm;
1351                 for (i = 0; i < 4; i++) {
1352                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1353                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1354                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1355                 }
1356                 break;
1357 
1358         case minps:
1359                 dummy.op = minss;
1360                 for (i = 0; i < 4; i++) {
1361                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1362                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1363                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1364                 }
1365                 break;
1366 
1367         case maxps:
1368                 dummy.op = maxss;
1369                 for (i = 0; i < 4; i++) {
1370                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1371                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1372                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1373                 }
1374                 break;
1375 
1376         case addps:
1377                 dummy.op = addss;
1378                 for (i = 0; i < 4; i++) {
1379                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1380                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1381                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1382                 }
1383                 break;
1384 
1385         case subps:
1386                 dummy.op = subss;
1387                 for (i = 0; i < 4; i++) {
1388                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1389                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1390                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1391                 }
1392                 break;
1393 
1394         case mulps:
1395                 dummy.op = mulss;
1396                 for (i = 0; i < 4; i++) {
1397                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1398                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1399                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1400                 }
1401                 break;
1402 
1403         case divps:
1404                 dummy.op = divss;
1405                 for (i = 0; i < 4; i++) {
1406                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1407                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1408                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1409                 }
1410                 break;
1411 
1412         case sqrtps:
1413                 dummy.op = sqrtss;
1414                 for (i = 0; i < 4; i++) {
1415                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1416                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1417                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1418                 }
1419                 break;
1420 
1421         case cvtdq2ps:
1422                 dummy.op = cvtsi2ss;
1423                 for (i = 0; i < 4; i++) {
1424                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1425                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1426                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1427                 }
1428                 break;
1429 
1430         case cvttps2dq:
1431                 dummy.op = cvttss2si;
1432                 for (i = 0; i < 4; i++) {
1433                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1434                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1435                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1436                 }
1437                 break;
1438 
1439         case cvtps2dq:
1440                 dummy.op = cvtss2si;
1441                 for (i = 0; i < 4; i++) {
1442                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1443                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1444                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1445                 }
1446                 break;
1447 
1448         case cvtpi2ps:
1449                 dummy.op = cvtsi2ss;
1450                 for (i = 0; i < 2; i++) {
1451                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1452                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1453                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1454                 }
1455                 break;
1456 
1457         case cvttps2pi:
1458                 dummy.op = cvttss2si;
1459                 for (i = 0; i < 2; i++) {
1460                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1461                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1462                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1463                 }
1464                 break;
1465 
1466         case cvtps2pi:
1467                 dummy.op = cvtss2si;
1468                 for (i = 0; i < 2; i++) {
1469                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1470                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1471                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1472                 }
1473                 break;
1474 
1475         case cmppd:
1476                 dummy.op = cmpsd;
1477                 dummy.imm = inst->imm;
1478                 for (i = 0; i < 2; i++) {
1479                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1480                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1481                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1482                 }
1483                 break;
1484 
1485         case minpd:
1486                 dummy.op = minsd;
1487                 for (i = 0; i < 2; i++) {
1488                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1489                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1490                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1491                 }
1492                 break;
1493 
1494         case maxpd:
1495                 dummy.op = maxsd;
1496                 for (i = 0; i < 2; i++) {
1497                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1498                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1499                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1500                 }
1501                 break;
1502 
1503         case addpd:
1504                 dummy.op = addsd;
1505                 for (i = 0; i < 2; i++) {
1506                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1507                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1508                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1509                 }
1510                 break;
1511 
1512         case subpd:
1513                 dummy.op = subsd;
1514                 for (i = 0; i < 2; i++) {
1515                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1516                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1517                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1518                 }
1519                 break;
1520 
1521         case mulpd:
1522                 dummy.op = mulsd;
1523                 for (i = 0; i < 2; i++) {
1524                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1525                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1526                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1527                 }
1528                 break;
1529 
1530         case divpd:
1531                 dummy.op = divsd;
1532                 for (i = 0; i < 2; i++) {
1533                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1534                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1535                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1536                 }
1537                 break;
1538 
1539         case sqrtpd:
1540                 dummy.op = sqrtsd;
1541                 for (i = 0; i < 2; i++) {
1542                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1543                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1544                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1545                 }
1546                 break;
1547 
1548         case cvtpi2pd:
1549         case cvtdq2pd:
1550                 dummy.op = cvtsi2sd;
1551                 for (i = 0; i < 2; i++) {
1552                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1553                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1554                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1555                 }
1556                 break;
1557 
1558         case cvttpd2pi:
1559         case cvttpd2dq:
1560                 dummy.op = cvttsd2si;
1561                 for (i = 0; i < 2; i++) {
1562                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1563                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1564                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1565                 }
1566                 /* for cvttpd2dq, zero the high 64 bits of the destination */
1567                 if (inst->op == cvttpd2dq)
1568                         inst->op1->l[1] = 0ll;
1569                 break;
1570 
1571         case cvtpd2pi:
1572         case cvtpd2dq:
1573                 dummy.op = cvtsd2si;
1574                 for (i = 0; i < 2; i++) {
1575                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1576                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1577                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1578                 }
1579                 /* for cvtpd2dq, zero the high 64 bits of the destination */
1580                 if (inst->op == cvtpd2dq)
1581                         inst->op1->l[1] = 0ll;
1582                 break;
1583 
1584         case cvtps2pd:
1585                 dummy.op = cvtss2sd;
1586                 for (i = 0; i < 2; i++) {
1587                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1588                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1589                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1590                 }
1591                 break;
1592 
1593         case cvtpd2ps:
1594                 dummy.op = cvtsd2ss;
1595                 for (i = 0; i < 2; i++) {
1596                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1597                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1598                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1599                 }
1600                 /* zero the high 64 bits of the destination */
1601                 inst->op1->l[1] = 0ll;
1602 
1603         default:
1604                 break;
1605         }
1606 }
1607