1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #include "fenv_synonyms.h"
  31 #include <ucontext.h>
  32 #include <fenv.h>
  33 #if defined(__SUNPRO_C)
  34 #include <sunmath.h>
  35 #else
  36 #include <sys/ieeefp.h>
  37 #endif
  38 #include "fex_handler.h"
  39 #include "fenv_inlines.h"
  40 
  41 #if !defined(REG_PC)
  42 #define REG_PC  EIP
  43 #endif
  44 
  45 #if !defined(REG_PS)
  46 #define REG_PS  EFL
  47 #endif
  48 
  49 #ifdef __amd64
  50 #define regno(X)        ((X < 4)? REG_RAX - X : \
  51                         ((X > 4)? REG_RAX + 1 - X : REG_RSP))
  52 #else
  53 #define regno(X)        (EAX - X)
  54 #endif
  55 
  56 /*
  57  * Support for SSE instructions
  58  */
  59 
  60 /*
  61  * Decode an SSE instruction.  Fill in *inst and return the length of the
  62  * instruction in bytes.  Return 0 if the instruction is not recognized.
  63  */
  64 int
  65 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst)
  66 {
  67         unsigned char   *ip;
  68         char            *addr;
  69         int             i, dbl, simd, rex, modrm, sib, r;
  70 
  71         i = 0;
  72         ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC];
  73 
  74         /* look for pseudo-prefixes */
  75         dbl = 0;
  76         simd = SIMD;
  77         if (ip[i] == 0xF3) {
  78                 simd = 0;
  79                 i++;
  80         } else if (ip[i] == 0x66) {
  81                 dbl = DOUBLE;
  82                 i++;
  83         } else if (ip[i] == 0xF2) {
  84                 dbl = DOUBLE;
  85                 simd = 0;
  86                 i++;
  87         }
  88 
  89         /* look for AMD64 REX prefix */
  90         rex = 0;
  91         if (ip[i] >= 0x40 && ip[i] <= 0x4F) {
  92                 rex = ip[i];
  93                 i++;
  94         }
  95 
  96         /* parse opcode */
  97         if (ip[i++] != 0x0F)
  98                 return 0;
  99         switch (ip[i++]) {
 100         case 0x2A:
 101                 inst->op = (int)cvtsi2ss + simd + dbl;
 102                 if (!simd)
 103                         inst->op = (int)inst->op + (rex & 8);
 104                 break;
 105 
 106         case 0x2C:
 107                 inst->op = (int)cvttss2si + simd + dbl;
 108                 if (!simd)
 109                         inst->op = (int)inst->op + (rex & 8);
 110                 break;
 111 
 112         case 0x2D:
 113                 inst->op = (int)cvtss2si + simd + dbl;
 114                 if (!simd)
 115                         inst->op = (int)inst->op + (rex & 8);
 116                 break;
 117 
 118         case 0x2E:
 119                 /* oddball: scalar instruction in a SIMD opcode group */
 120                 if (!simd)
 121                         return 0;
 122                 inst->op = (int)ucomiss + dbl;
 123                 break;
 124 
 125         case 0x2F:
 126                 /* oddball: scalar instruction in a SIMD opcode group */
 127                 if (!simd)
 128                         return 0;
 129                 inst->op = (int)comiss + dbl;
 130                 break;
 131 
 132         case 0x51:
 133                 inst->op = (int)sqrtss + simd + dbl;
 134                 break;
 135 
 136         case 0x58:
 137                 inst->op = (int)addss + simd + dbl;
 138                 break;
 139 
 140         case 0x59:
 141                 inst->op = (int)mulss + simd + dbl;
 142                 break;
 143 
 144         case 0x5A:
 145                 inst->op = (int)cvtss2sd + simd + dbl;
 146                 break;
 147 
 148         case 0x5B:
 149                 if (dbl) {
 150                         if (simd)
 151                                 inst->op = cvtps2dq;
 152                         else
 153                                 return 0;
 154                 } else {
 155                         inst->op = (simd)? cvtdq2ps : cvttps2dq;
 156                 }
 157                 break;
 158 
 159         case 0x5C:
 160                 inst->op = (int)subss + simd + dbl;
 161                 break;
 162 
 163         case 0x5D:
 164                 inst->op = (int)minss + simd + dbl;
 165                 break;
 166 
 167         case 0x5E:
 168                 inst->op = (int)divss + simd + dbl;
 169                 break;
 170 
 171         case 0x5F:
 172                 inst->op = (int)maxss + simd + dbl;
 173                 break;
 174 
 175         case 0xC2:
 176                 inst->op = (int)cmpss + simd + dbl;
 177                 break;
 178 
 179         case 0xE6:
 180                 if (simd) {
 181                         if (dbl)
 182                                 inst->op = cvttpd2dq;
 183                         else
 184                                 return 0;
 185                 } else {
 186                         inst->op = (dbl)? cvtpd2dq : cvtdq2pd;
 187                 }
 188                 break;
 189 
 190         default:
 191                 return 0;
 192         }
 193 
 194         /* locate operands */
 195         modrm = ip[i++];
 196 
 197         if (inst->op == cvtss2si || inst->op == cvttss2si ||
 198             inst->op == cvtsd2si || inst->op == cvttsd2si ||
 199             inst->op == cvtss2siq || inst->op == cvttss2siq ||
 200             inst->op == cvtsd2siq || inst->op == cvttsd2siq) {
 201                 /* op1 is a gp register */
 202                 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
 203                 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
 204         } else if (inst->op == cvtps2pi || inst->op == cvttps2pi ||
 205             inst->op == cvtpd2pi || inst->op == cvttpd2pi) {
 206                 /* op1 is a mmx register */
 207 #ifdef __amd64
 208                 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set.
 209                     fpchip_state.st[(modrm >> 3) & 7];
 210 #else
 211                 inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) +
 212                     (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
 213                     fpchip_state.state[7]);
 214 #endif
 215         } else {
 216                 /* op1 is a xmm register */
 217                 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
 218                 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
 219                     fp_reg_set.fpchip_state.xmm[r];
 220         }
 221 
 222         if ((modrm >> 6) == 3) {
 223                 if (inst->op == cvtsi2ss || inst->op == cvtsi2sd ||
 224                     inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) {
 225                         /* op2 is a gp register */
 226                         r = ((rex & 1) << 3) | (modrm & 7);
 227                         inst->op2 = (sseoperand_t *)&uap->uc_mcontext.
 228                             gregs[regno(r)];
 229                 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) {
 230                         /* op2 is a mmx register */
 231 #ifdef __amd64
 232                         inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
 233                             fp_reg_set.fpchip_state.st[modrm & 7];
 234 #else
 235                         inst->op2 = (sseoperand_t *)(10 * (modrm & 7) +
 236                             (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
 237                             fpchip_state.state[7]);
 238 #endif
 239                 } else {
 240                         /* op2 is a xmm register */
 241                         r = ((rex & 1) << 3) | (modrm & 7);
 242                         inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
 243                             fp_reg_set.fpchip_state.xmm[r];
 244                 }
 245         } else if ((modrm & 0xc7) == 0x05) {
 246 #ifdef __amd64
 247                 /* address of next instruction + offset */
 248                 r = i + 4;
 249                 if (inst->op == cmpss || inst->op == cmpps ||
 250                     inst->op == cmpsd || inst->op == cmppd)
 251                         r++;
 252                 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i));
 253 #else
 254                 /* absolute address */
 255                 inst->op2 = (sseoperand_t *)(*(int *)(ip + i));
 256 #endif
 257                 i += 4;
 258         } else {
 259                 /* complex address */
 260                 if ((modrm & 7) == 4) {
 261                         /* parse sib byte */
 262                         sib = ip[i++];
 263                         if ((sib & 7) == 5 && (modrm >> 6) == 0) {
 264                                 /* start with absolute address */
 265                                 addr = (char *)(uintptr_t)(*(int *)(ip + i));
 266                                 i += 4;
 267                         } else {
 268                                 /* start with base */
 269                                 r = ((rex & 1) << 3) | (sib & 7);
 270                                 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
 271                         }
 272                         r = ((rex & 2) << 2) | ((sib >> 3) & 7);
 273                         if (r != 4) {
 274                                 /* add scaled index */
 275                                 addr += uap->uc_mcontext.gregs[regno(r)]
 276                                     << (sib >> 6);
 277                         }
 278                 } else {
 279                         r = ((rex & 1) << 3) | (modrm & 7);
 280                         addr = (char *)uap->uc_mcontext.gregs[regno(r)];
 281                 }
 282 
 283                 /* add displacement, if any */
 284                 if ((modrm >> 6) == 1) {
 285                         addr += (char)ip[i++];
 286                 } else if ((modrm >> 6) == 2) {
 287                         addr += *(int *)(ip + i);
 288                         i += 4;
 289                 }
 290                 inst->op2 = (sseoperand_t *)addr;
 291         }
 292 
 293         if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd ||
 294             inst->op == cmppd) {
 295                 /* get the immediate operand */
 296                 inst->imm = ip[i++];
 297         }
 298 
 299         return i;
 300 }
 301 
 302 static enum fp_class_type
 303 my_fp_classf(float *x)
 304 {
 305         int     i = *(int *)x & ~0x80000000;
 306 
 307         if (i < 0x7f800000) {
 308                 if (i < 0x00800000)
 309                         return ((i == 0)? fp_zero : fp_subnormal);
 310                 return fp_normal;
 311         }
 312         else if (i == 0x7f800000)
 313                 return fp_infinity;
 314         else if (i & 0x400000)
 315                 return fp_quiet;
 316         else
 317                 return fp_signaling;
 318 }
 319 
 320 static enum fp_class_type
 321 my_fp_class(double *x)
 322 {
 323         int     i = *(1+(int *)x) & ~0x80000000;
 324 
 325         if (i < 0x7ff00000) {
 326                 if (i < 0x00100000)
 327                         return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal);
 328                 return fp_normal;
 329         }
 330         else if (i == 0x7ff00000 && *(int *)x == 0)
 331                 return fp_infinity;
 332         else if (i & 0x80000)
 333                 return fp_quiet;
 334         else
 335                 return fp_signaling;
 336 }
 337 
 338 /*
 339  * Inspect a scalar SSE instruction that incurred an invalid operation
 340  * exception to determine which type of exception it was.
 341  */
 342 static enum fex_exception
 343 __fex_get_sse_invalid_type(sseinst_t *inst)
 344 {
 345         enum fp_class_type      t1, t2;
 346 
 347         /* check op2 for signaling nan */
 348         t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) :
 349             my_fp_classf(&inst->op2->f[0]);
 350         if (t2 == fp_signaling)
 351                 return fex_inv_snan;
 352 
 353         /* eliminate all single-operand instructions */
 354         switch (inst->op) {
 355         case cvtsd2ss:
 356         case cvtss2sd:
 357                 /* hmm, this shouldn't have happened */
 358                 return (enum fex_exception) -1;
 359 
 360         case sqrtss:
 361         case sqrtsd:
 362                 return fex_inv_sqrt;
 363 
 364         case cvtss2si:
 365         case cvtsd2si:
 366         case cvttss2si:
 367         case cvttsd2si:
 368         case cvtss2siq:
 369         case cvtsd2siq:
 370         case cvttss2siq:
 371         case cvttsd2siq:
 372                 return fex_inv_int;
 373         default:
 374                 break;
 375         }
 376 
 377         /* check op1 for signaling nan */
 378         t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) :
 379             my_fp_classf(&inst->op1->f[0]);
 380         if (t1 == fp_signaling)
 381                 return fex_inv_snan;
 382 
 383         /* check two-operand instructions for other cases */
 384         switch (inst->op) {
 385         case cmpss:
 386         case cmpsd:
 387         case minss:
 388         case minsd:
 389         case maxss:
 390         case maxsd:
 391         case comiss:
 392         case comisd:
 393                 return fex_inv_cmp;
 394 
 395         case addss:
 396         case addsd:
 397         case subss:
 398         case subsd:
 399                 if (t1 == fp_infinity && t2 == fp_infinity)
 400                         return fex_inv_isi;
 401                 break;
 402 
 403         case mulss:
 404         case mulsd:
 405                 if ((t1 == fp_zero && t2 == fp_infinity) ||
 406                     (t2 == fp_zero && t1 == fp_infinity))
 407                         return fex_inv_zmi;
 408                 break;
 409 
 410         case divss:
 411         case divsd:
 412                 if (t1 == fp_zero && t2 == fp_zero)
 413                         return fex_inv_zdz;
 414                 if (t1 == fp_infinity && t2 == fp_infinity)
 415                         return fex_inv_idi;
 416         default:
 417                 break;
 418         }
 419 
 420         return (enum fex_exception)-1;
 421 }
 422 
 423 /* inline templates */
 424 extern void sse_cmpeqss(float *, float *, int *);
 425 extern void sse_cmpltss(float *, float *, int *);
 426 extern void sse_cmpless(float *, float *, int *);
 427 extern void sse_cmpunordss(float *, float *, int *);
 428 extern void sse_minss(float *, float *, float *);
 429 extern void sse_maxss(float *, float *, float *);
 430 extern void sse_addss(float *, float *, float *);
 431 extern void sse_subss(float *, float *, float *);
 432 extern void sse_mulss(float *, float *, float *);
 433 extern void sse_divss(float *, float *, float *);
 434 extern void sse_sqrtss(float *, float *);
 435 extern void sse_ucomiss(float *, float *);
 436 extern void sse_comiss(float *, float *);
 437 extern void sse_cvtss2sd(float *, double *);
 438 extern void sse_cvtsi2ss(int *, float *);
 439 extern void sse_cvttss2si(float *, int *);
 440 extern void sse_cvtss2si(float *, int *);
 441 #ifdef __amd64
 442 extern void sse_cvtsi2ssq(long long *, float *);
 443 extern void sse_cvttss2siq(float *, long long *);
 444 extern void sse_cvtss2siq(float *, long long *);
 445 #endif
 446 extern void sse_cmpeqsd(double *, double *, long long *);
 447 extern void sse_cmpltsd(double *, double *, long long *);
 448 extern void sse_cmplesd(double *, double *, long long *);
 449 extern void sse_cmpunordsd(double *, double *, long long *);
 450 extern void sse_minsd(double *, double *, double *);
 451 extern void sse_maxsd(double *, double *, double *);
 452 extern void sse_addsd(double *, double *, double *);
 453 extern void sse_subsd(double *, double *, double *);
 454 extern void sse_mulsd(double *, double *, double *);
 455 extern void sse_divsd(double *, double *, double *);
 456 extern void sse_sqrtsd(double *, double *);
 457 extern void sse_ucomisd(double *, double *);
 458 extern void sse_comisd(double *, double *);
 459 extern void sse_cvtsd2ss(double *, float *);
 460 extern void sse_cvtsi2sd(int *, double *);
 461 extern void sse_cvttsd2si(double *, int *);
 462 extern void sse_cvtsd2si(double *, int *);
 463 #ifdef __amd64
 464 extern void sse_cvtsi2sdq(long long *, double *);
 465 extern void sse_cvttsd2siq(double *, long long *);
 466 extern void sse_cvtsd2siq(double *, long long *);
 467 #endif
 468 
 469 /*
 470  * Fill in *info with the operands, default untrapped result, and
 471  * flags produced by a scalar SSE instruction, and return the type
 472  * of trapped exception (if any).  On entry, the mxcsr must have
 473  * all exceptions masked and all flags clear.  The same conditions
 474  * will hold on exit.
 475  *
 476  * This routine does not work if the instruction specified by *inst
 477  * is not a scalar instruction.
 478  */
 479 enum fex_exception
 480 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info)
 481 {
 482         unsigned int    e, te, mxcsr, oldmxcsr, subnorm;
 483 
 484         /*
 485          * Perform the operation with traps disabled and check the
 486          * exception flags.  If the underflow trap was enabled, also
 487          * check for an exact subnormal result.
 488          */
 489         __fenv_getmxcsr(&oldmxcsr);
 490         subnorm = 0;
 491         if ((int)inst->op & DOUBLE) {
 492                 if (inst->op == cvtsi2sd) {
 493                         info->op1.type = fex_int;
 494                         info->op1.val.i = inst->op2->i[0];
 495                         info->op2.type = fex_nodata;
 496                 } else if (inst->op == cvtsi2sdq) {
 497                         info->op1.type = fex_llong;
 498                         info->op1.val.l = inst->op2->l[0];
 499                         info->op2.type = fex_nodata;
 500                 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss ||
 501                     inst->op == cvttsd2si || inst->op == cvtsd2si ||
 502                     inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
 503                         info->op1.type = fex_double;
 504                         info->op1.val.d = inst->op2->d[0];
 505                         info->op2.type = fex_nodata;
 506                 } else {
 507                         info->op1.type = fex_double;
 508                         info->op1.val.d = inst->op1->d[0];
 509                         info->op2.type = fex_double;
 510                         info->op2.val.d = inst->op2->d[0];
 511                 }
 512                 info->res.type = fex_double;
 513                 switch (inst->op) {
 514                 case cmpsd:
 515                         info->op = fex_cmp;
 516                         info->res.type = fex_llong;
 517                         switch (inst->imm & 3) {
 518                         case 0:
 519                                 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d,
 520                                     &info->res.val.l);
 521                                 break;
 522 
 523                         case 1:
 524                                 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d,
 525                                     &info->res.val.l);
 526                                 break;
 527 
 528                         case 2:
 529                                 sse_cmplesd(&info->op1.val.d, &info->op2.val.d,
 530                                     &info->res.val.l);
 531                                 break;
 532 
 533                         case 3:
 534                                 sse_cmpunordsd(&info->op1.val.d,
 535                                     &info->op2.val.d, &info->res.val.l);
 536                         }
 537                         if (inst->imm & 4)
 538                                 info->res.val.l ^= 0xffffffffffffffffull;
 539                         break;
 540 
 541                 case minsd:
 542                         info->op = fex_other;
 543                         sse_minsd(&info->op1.val.d, &info->op2.val.d,
 544                             &info->res.val.d);
 545                         break;
 546 
 547                 case maxsd:
 548                         info->op = fex_other;
 549                         sse_maxsd(&info->op1.val.d, &info->op2.val.d,
 550                             &info->res.val.d);
 551                         break;
 552 
 553                 case addsd:
 554                         info->op = fex_add;
 555                         sse_addsd(&info->op1.val.d, &info->op2.val.d,
 556                             &info->res.val.d);
 557                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 558                                 subnorm = 1;
 559                         break;
 560 
 561                 case subsd:
 562                         info->op = fex_sub;
 563                         sse_subsd(&info->op1.val.d, &info->op2.val.d,
 564                             &info->res.val.d);
 565                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 566                                 subnorm = 1;
 567                         break;
 568 
 569                 case mulsd:
 570                         info->op = fex_mul;
 571                         sse_mulsd(&info->op1.val.d, &info->op2.val.d,
 572                             &info->res.val.d);
 573                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 574                                 subnorm = 1;
 575                         break;
 576 
 577                 case divsd:
 578                         info->op = fex_div;
 579                         sse_divsd(&info->op1.val.d, &info->op2.val.d,
 580                             &info->res.val.d);
 581                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 582                                 subnorm = 1;
 583                         break;
 584 
 585                 case sqrtsd:
 586                         info->op = fex_sqrt;
 587                         sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
 588                         break;
 589 
 590                 case cvtsd2ss:
 591                         info->op = fex_cnvt;
 592                         info->res.type = fex_float;
 593                         sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f);
 594                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 595                                 subnorm = 1;
 596                         break;
 597 
 598                 case cvtsi2sd:
 599                         info->op = fex_cnvt;
 600                         sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
 601                         break;
 602 
 603                 case cvttsd2si:
 604                         info->op = fex_cnvt;
 605                         info->res.type = fex_int;
 606                         sse_cvttsd2si(&info->op1.val.d, &info->res.val.i);
 607                         break;
 608 
 609                 case cvtsd2si:
 610                         info->op = fex_cnvt;
 611                         info->res.type = fex_int;
 612                         sse_cvtsd2si(&info->op1.val.d, &info->res.val.i);
 613                         break;
 614 
 615 #ifdef __amd64
 616                 case cvtsi2sdq:
 617                         info->op = fex_cnvt;
 618                         sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d);
 619                         break;
 620 
 621                 case cvttsd2siq:
 622                         info->op = fex_cnvt;
 623                         info->res.type = fex_llong;
 624                         sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l);
 625                         break;
 626 
 627                 case cvtsd2siq:
 628                         info->op = fex_cnvt;
 629                         info->res.type = fex_llong;
 630                         sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l);
 631                         break;
 632 #endif
 633 
 634                 case ucomisd:
 635                         info->op = fex_cmp;
 636                         info->res.type = fex_nodata;
 637                         sse_ucomisd(&info->op1.val.d, &info->op2.val.d);
 638                         break;
 639 
 640                 case comisd:
 641                         info->op = fex_cmp;
 642                         info->res.type = fex_nodata;
 643                         sse_comisd(&info->op1.val.d, &info->op2.val.d);
 644                         break;
 645                 default:
 646                         break;
 647                 }
 648         } else {
 649                 if (inst->op == cvtsi2ss) {
 650                         info->op1.type = fex_int;
 651                         info->op1.val.i = inst->op2->i[0];
 652                         info->op2.type = fex_nodata;
 653                 } else if (inst->op == cvtsi2ssq) {
 654                         info->op1.type = fex_llong;
 655                         info->op1.val.l = inst->op2->l[0];
 656                         info->op2.type = fex_nodata;
 657                 } else if (inst->op == sqrtss || inst->op == cvtss2sd ||
 658                     inst->op == cvttss2si || inst->op == cvtss2si ||
 659                     inst->op == cvttss2siq || inst->op == cvtss2siq) {
 660                         info->op1.type = fex_float;
 661                         info->op1.val.f = inst->op2->f[0];
 662                         info->op2.type = fex_nodata;
 663                 } else {
 664                         info->op1.type = fex_float;
 665                         info->op1.val.f = inst->op1->f[0];
 666                         info->op2.type = fex_float;
 667                         info->op2.val.f = inst->op2->f[0];
 668                 }
 669                 info->res.type = fex_float;
 670                 switch (inst->op) {
 671                 case cmpss:
 672                         info->op = fex_cmp;
 673                         info->res.type = fex_int;
 674                         switch (inst->imm & 3) {
 675                         case 0:
 676                                 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f,
 677                                     &info->res.val.i);
 678                                 break;
 679 
 680                         case 1:
 681                                 sse_cmpltss(&info->op1.val.f, &info->op2.val.f,
 682                                     &info->res.val.i);
 683                                 break;
 684 
 685                         case 2:
 686                                 sse_cmpless(&info->op1.val.f, &info->op2.val.f,
 687                                     &info->res.val.i);
 688                                 break;
 689 
 690                         case 3:
 691                                 sse_cmpunordss(&info->op1.val.f,
 692                                     &info->op2.val.f, &info->res.val.i);
 693                         }
 694                         if (inst->imm & 4)
 695                                 info->res.val.i ^= 0xffffffffu;
 696                         break;
 697 
 698                 case minss:
 699                         info->op = fex_other;
 700                         sse_minss(&info->op1.val.f, &info->op2.val.f,
 701                             &info->res.val.f);
 702                         break;
 703 
 704                 case maxss:
 705                         info->op = fex_other;
 706                         sse_maxss(&info->op1.val.f, &info->op2.val.f,
 707                             &info->res.val.f);
 708                         break;
 709 
 710                 case addss:
 711                         info->op = fex_add;
 712                         sse_addss(&info->op1.val.f, &info->op2.val.f,
 713                             &info->res.val.f);
 714                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 715                                 subnorm = 1;
 716                         break;
 717 
 718                 case subss:
 719                         info->op = fex_sub;
 720                         sse_subss(&info->op1.val.f, &info->op2.val.f,
 721                             &info->res.val.f);
 722                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 723                                 subnorm = 1;
 724                         break;
 725 
 726                 case mulss:
 727                         info->op = fex_mul;
 728                         sse_mulss(&info->op1.val.f, &info->op2.val.f,
 729                             &info->res.val.f);
 730                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 731                                 subnorm = 1;
 732                         break;
 733 
 734                 case divss:
 735                         info->op = fex_div;
 736                         sse_divss(&info->op1.val.f, &info->op2.val.f,
 737                             &info->res.val.f);
 738                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 739                                 subnorm = 1;
 740                         break;
 741 
 742                 case sqrtss:
 743                         info->op = fex_sqrt;
 744                         sse_sqrtss(&info->op1.val.f, &info->res.val.f);
 745                         break;
 746 
 747                 case cvtss2sd:
 748                         info->op = fex_cnvt;
 749                         info->res.type = fex_double;
 750                         sse_cvtss2sd(&info->op1.val.f, &info->res.val.d);
 751                         break;
 752 
 753                 case cvtsi2ss:
 754                         info->op = fex_cnvt;
 755                         sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f);
 756                         break;
 757 
 758                 case cvttss2si:
 759                         info->op = fex_cnvt;
 760                         info->res.type = fex_int;
 761                         sse_cvttss2si(&info->op1.val.f, &info->res.val.i);
 762                         break;
 763 
 764                 case cvtss2si:
 765                         info->op = fex_cnvt;
 766                         info->res.type = fex_int;
 767                         sse_cvtss2si(&info->op1.val.f, &info->res.val.i);
 768                         break;
 769 
 770 #ifdef __amd64
 771                 case cvtsi2ssq:
 772                         info->op = fex_cnvt;
 773                         sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f);
 774                         break;
 775 
 776                 case cvttss2siq:
 777                         info->op = fex_cnvt;
 778                         info->res.type = fex_llong;
 779                         sse_cvttss2siq(&info->op1.val.f, &info->res.val.l);
 780                         break;
 781 
 782                 case cvtss2siq:
 783                         info->op = fex_cnvt;
 784                         info->res.type = fex_llong;
 785                         sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
 786                         break;
 787 #endif
 788 
 789                 case ucomiss:
 790                         info->op = fex_cmp;
 791                         info->res.type = fex_nodata;
 792                         sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
 793                         break;
 794 
 795                 case comiss:
 796                         info->op = fex_cmp;
 797                         info->res.type = fex_nodata;
 798                         sse_comiss(&info->op1.val.f, &info->op2.val.f);
 799                         break;
 800                 default:
 801                         break;
 802                 }
 803         }
 804         __fenv_getmxcsr(&mxcsr);
 805         info->flags = mxcsr & 0x3d;
 806         __fenv_setmxcsr(&oldmxcsr);
 807 
 808         /* determine which exception would have been trapped */
 809         te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr
 810             >> 7) & 0x3d;
 811         e = mxcsr & te;
 812         if (e & FE_INVALID)
 813                 return __fex_get_sse_invalid_type(inst);
 814         if (e & FE_DIVBYZERO)
 815                 return fex_division;
 816         if (e & FE_OVERFLOW)
 817                 return fex_overflow;
 818         if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
 819                 return fex_underflow;
 820         if (e & FE_INEXACT)
 821                 return fex_inexact;
 822         return (enum fex_exception)-1;
 823 }
 824 
 825 /*
 826  * Emulate a SIMD SSE instruction to determine which exceptions occur
 827  * in each part.  For i = 0, 1, 2, and 3, set e[i] to indicate the
 828  * trapped exception that would occur if the i-th part of the SIMD
 829  * instruction were executed in isolation; set e[i] to -1 if no
 830  * trapped exception would occur in this part.  Also fill in info[i]
 831  * with the corresponding operands, default untrapped result, and
 832  * flags.
 833  *
 834  * This routine does not work if the instruction specified by *inst
 835  * is not a SIMD instruction.
 836  */
 837 void
 838 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
 839     fex_info_t *info)
 840 {
 841         sseinst_t       dummy;
 842         int             i;
 843 
 844         e[0] = e[1] = e[2] = e[3] = -1;
 845 
 846         /* perform each part of the SIMD operation */
 847         switch (inst->op) {
 848         case cmpps:
 849                 dummy.op = cmpss;
 850                 dummy.imm = inst->imm;
 851                 for (i = 0; i < 4; i++) {
 852                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 853                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 854                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 855                 }
 856                 break;
 857 
 858         case minps:
 859                 dummy.op = minss;
 860                 for (i = 0; i < 4; i++) {
 861                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 862                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 863                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 864                 }
 865                 break;
 866 
 867         case maxps:
 868                 dummy.op = maxss;
 869                 for (i = 0; i < 4; i++) {
 870                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 871                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 872                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 873                 }
 874                 break;
 875 
 876         case addps:
 877                 dummy.op = addss;
 878                 for (i = 0; i < 4; i++) {
 879                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 880                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 881                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 882                 }
 883                 break;
 884 
 885         case subps:
 886                 dummy.op = subss;
 887                 for (i = 0; i < 4; i++) {
 888                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 889                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 890                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 891                 }
 892                 break;
 893 
 894         case mulps:
 895                 dummy.op = mulss;
 896                 for (i = 0; i < 4; i++) {
 897                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 898                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 899                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 900                 }
 901                 break;
 902 
 903         case divps:
 904                 dummy.op = divss;
 905                 for (i = 0; i < 4; i++) {
 906                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 907                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 908                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 909                 }
 910                 break;
 911 
 912         case sqrtps:
 913                 dummy.op = sqrtss;
 914                 for (i = 0; i < 4; i++) {
 915                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 916                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 917                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 918                 }
 919                 break;
 920 
 921         case cvtdq2ps:
 922                 dummy.op = cvtsi2ss;
 923                 for (i = 0; i < 4; i++) {
 924                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 925                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
 926                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 927                 }
 928                 break;
 929 
 930         case cvttps2dq:
 931                 dummy.op = cvttss2si;
 932                 for (i = 0; i < 4; i++) {
 933                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 934                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 935                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 936                 }
 937                 break;
 938 
 939         case cvtps2dq:
 940                 dummy.op = cvtss2si;
 941                 for (i = 0; i < 4; i++) {
 942                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 943                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 944                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 945                 }
 946                 break;
 947 
 948         case cvtpi2ps:
 949                 dummy.op = cvtsi2ss;
 950                 for (i = 0; i < 2; i++) {
 951                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 952                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
 953                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 954                 }
 955                 break;
 956 
 957         case cvttps2pi:
 958                 dummy.op = cvttss2si;
 959                 for (i = 0; i < 2; i++) {
 960                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 961                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 962                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 963                 }
 964                 break;
 965 
 966         case cvtps2pi:
 967                 dummy.op = cvtss2si;
 968                 for (i = 0; i < 2; i++) {
 969                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 970                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 971                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 972                 }
 973                 break;
 974 
 975         case cmppd:
 976                 dummy.op = cmpsd;
 977                 dummy.imm = inst->imm;
 978                 for (i = 0; i < 2; i++) {
 979                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
 980                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
 981                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 982                 }
 983                 break;
 984 
 985         case minpd:
 986                 dummy.op = minsd;
 987                 for (i = 0; i < 2; i++) {
 988                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
 989                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
 990                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 991                 }
 992                 break;
 993 
 994         case maxpd:
 995                 dummy.op = maxsd;
 996                 for (i = 0; i < 2; i++) {
 997                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
 998                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
 999                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1000                 }
1001                 break;
1002 
1003         case addpd:
1004                 dummy.op = addsd;
1005                 for (i = 0; i < 2; i++) {
1006                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1007                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1008                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1009                 }
1010                 break;
1011 
1012         case subpd:
1013                 dummy.op = subsd;
1014                 for (i = 0; i < 2; i++) {
1015                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1016                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1017                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1018                 }
1019                 break;
1020 
1021         case mulpd:
1022                 dummy.op = mulsd;
1023                 for (i = 0; i < 2; i++) {
1024                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1025                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1026                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1027                 }
1028                 break;
1029 
1030         case divpd:
1031                 dummy.op = divsd;
1032                 for (i = 0; i < 2; i++) {
1033                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1034                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1035                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1036                 }
1037                 break;
1038 
1039         case sqrtpd:
1040                 dummy.op = sqrtsd;
1041                 for (i = 0; i < 2; i++) {
1042                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1043                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1044                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1045                 }
1046                 break;
1047 
1048         case cvtpi2pd:
1049         case cvtdq2pd:
1050                 dummy.op = cvtsi2sd;
1051                 for (i = 0; i < 2; i++) {
1052                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1053                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1054                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1055                 }
1056                 break;
1057 
1058         case cvttpd2pi:
1059         case cvttpd2dq:
1060                 dummy.op = cvttsd2si;
1061                 for (i = 0; i < 2; i++) {
1062                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1063                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1064                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1065                 }
1066                 break;
1067 
1068         case cvtpd2pi:
1069         case cvtpd2dq:
1070                 dummy.op = cvtsd2si;
1071                 for (i = 0; i < 2; i++) {
1072                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1073                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1074                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1075                 }
1076                 break;
1077 
1078         case cvtps2pd:
1079                 dummy.op = cvtss2sd;
1080                 for (i = 0; i < 2; i++) {
1081                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1082                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1083                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1084                 }
1085                 break;
1086 
1087         case cvtpd2ps:
1088                 dummy.op = cvtsd2ss;
1089                 for (i = 0; i < 2; i++) {
1090                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1091                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1092                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1093                 }
1094         default:
1095                 break;
1096         }
1097 }
1098 
1099 /*
1100  * Store the result value from *info in the destination of the scalar
1101  * SSE instruction specified by *inst.  If no result is given but the
1102  * exception is underflow or overflow, supply the default trapped result.
1103  *
1104  * This routine does not work if the instruction specified by *inst
1105  * is not a scalar instruction.
1106  */
1107 void
1108 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1109     fex_info_t *info)
1110 {
1111         int             i = 0;
1112         long long       l = 0L;;
1113         float           f = 0.0, fscl;
1114         double          d = 0.0L, dscl;
1115 
1116         /* for compares that write eflags, just set the flags
1117            to indicate "unordered" */
1118         if (inst->op == ucomiss || inst->op == comiss ||
1119             inst->op == ucomisd || inst->op == comisd) {
1120                 uap->uc_mcontext.gregs[REG_PS] |= 0x45;
1121                 return;
1122         }
1123 
1124         /* if info doesn't specify a result value, try to generate
1125            the default trapped result */
1126         if (info->res.type == fex_nodata) {
1127                 /* set scale factors for exponent wrapping */
1128                 switch (e) {
1129                 case fex_overflow:
1130                         fscl = 1.262177448e-29f; /* 2^-96 */
1131                         dscl = 6.441148769597133308e-232; /* 2^-768 */
1132                         break;
1133 
1134                 case fex_underflow:
1135                         fscl = 7.922816251e+28f; /* 2^96 */
1136                         dscl = 1.552518092300708935e+231; /* 2^768 */
1137                         break;
1138 
1139                 default:
1140                         (void) __fex_get_sse_op(uap, inst, info);
1141                         if (info->res.type == fex_nodata)
1142                                 return;
1143                         goto stuff;
1144                 }
1145 
1146                 /* generate the wrapped result */
1147                 if (inst->op == cvtsd2ss) {
1148                         info->op1.type = fex_double;
1149                         info->op1.val.d = inst->op2->d[0];
1150                         info->op2.type = fex_nodata;
1151                         info->res.type = fex_float;
1152                         info->res.val.f = (float)(fscl * (fscl *
1153                             info->op1.val.d));
1154                 } else if ((int)inst->op & DOUBLE) {
1155                         info->op1.type = fex_double;
1156                         info->op1.val.d = inst->op1->d[0];
1157                         info->op2.type = fex_double;
1158                         info->op2.val.d = inst->op2->d[0];
1159                         info->res.type = fex_double;
1160                         switch (inst->op) {
1161                         case addsd:
1162                                 info->res.val.d = dscl * (dscl *
1163                                     info->op1.val.d + dscl * info->op2.val.d);
1164                                 break;
1165 
1166                         case subsd:
1167                                 info->res.val.d = dscl * (dscl *
1168                                     info->op1.val.d - dscl * info->op2.val.d);
1169                                 break;
1170 
1171                         case mulsd:
1172                                 info->res.val.d = (dscl * info->op1.val.d) *
1173                                     (dscl * info->op2.val.d);
1174                                 break;
1175 
1176                         case divsd:
1177                                 info->res.val.d = (dscl * info->op1.val.d) /
1178                                     (info->op2.val.d / dscl);
1179                                 break;
1180 
1181                         default:
1182                                 return;
1183                         }
1184                 } else {
1185                         info->op1.type = fex_float;
1186                         info->op1.val.f = inst->op1->f[0];
1187                         info->op2.type = fex_float;
1188                         info->op2.val.f = inst->op2->f[0];
1189                         info->res.type = fex_float;
1190                         switch (inst->op) {
1191                         case addss:
1192                                 info->res.val.f = fscl * (fscl *
1193                                     info->op1.val.f + fscl * info->op2.val.f);
1194                                 break;
1195 
1196                         case subss:
1197                                 info->res.val.f = fscl * (fscl *
1198                                     info->op1.val.f - fscl * info->op2.val.f);
1199                                 break;
1200 
1201                         case mulss:
1202                                 info->res.val.f = (fscl * info->op1.val.f) *
1203                                     (fscl * info->op2.val.f);
1204                                 break;
1205 
1206                         case divss:
1207                                 info->res.val.f = (fscl * info->op1.val.f) /
1208                                     (info->op2.val.f / fscl);
1209                                 break;
1210 
1211                         default:
1212                                 return;
1213                         }
1214                 }
1215         }
1216 
1217         /* put the result in the destination */
1218 stuff:
1219         if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si
1220             || inst->op == cvttsd2si || inst->op == cvtsd2si) {
1221                 switch (info->res.type) {
1222                 case fex_int:
1223                         i = info->res.val.i;
1224                         break;
1225 
1226                 case fex_llong:
1227                         i = info->res.val.l;
1228                         break;
1229 
1230                 case fex_float:
1231                         i = info->res.val.f;
1232                         break;
1233 
1234                 case fex_double:
1235                         i = info->res.val.d;
1236                         break;
1237 
1238                 case fex_ldouble:
1239                         i = info->res.val.q;
1240                         break;
1241 
1242                 default:
1243                         break;
1244                 }
1245                 inst->op1->i[0] = i;
1246         } else if (inst->op == cmpsd || inst->op == cvttss2siq ||
1247             inst->op == cvtss2siq || inst->op == cvttsd2siq ||
1248             inst->op == cvtsd2siq) {
1249                 switch (info->res.type) {
1250                 case fex_int:
1251                         l = info->res.val.i;
1252                         break;
1253 
1254                 case fex_llong:
1255                         l = info->res.val.l;
1256                         break;
1257 
1258                 case fex_float:
1259                         l = info->res.val.f;
1260                         break;
1261 
1262                 case fex_double:
1263                         l = info->res.val.d;
1264                         break;
1265 
1266                 case fex_ldouble:
1267                         l = info->res.val.q;
1268                         break;
1269 
1270                 default:
1271                         break;
1272                 }
1273                 inst->op1->l[0] = l;
1274         } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1275             inst->op == cvtss2sd) {
1276                 switch (info->res.type) {
1277                 case fex_int:
1278                         d = info->res.val.i;
1279                         break;
1280 
1281                 case fex_llong:
1282                         d = info->res.val.l;
1283                         break;
1284 
1285                 case fex_float:
1286                         d = info->res.val.f;
1287                         break;
1288 
1289                 case fex_double:
1290                         d = info->res.val.d;
1291                         break;
1292 
1293                 case fex_ldouble:
1294                         d = info->res.val.q;
1295                         break;
1296 
1297                 default:
1298                         break;
1299                 }
1300                 inst->op1->d[0] = d;
1301         } else {
1302                 switch (info->res.type) {
1303                 case fex_int:
1304                         f = info->res.val.i;
1305                         break;
1306 
1307                 case fex_llong:
1308                         f = info->res.val.l;
1309                         break;
1310 
1311                 case fex_float:
1312                         f = info->res.val.f;
1313                         break;
1314 
1315                 case fex_double:
1316                         f = info->res.val.d;
1317                         break;
1318 
1319                 case fex_ldouble:
1320                         f = info->res.val.q;
1321                         break;
1322 
1323                 default:
1324                         break;
1325                 }
1326                 inst->op1->f[0] = f;
1327         }
1328 }
1329 
1330 /*
1331  * Store the results from a SIMD instruction.  For each i, store
1332  * the result value from info[i] in the i-th part of the destination
1333  * of the SIMD SSE instruction specified by *inst.  If no result
1334  * is given but the exception indicated by e[i] is underflow or
1335  * overflow, supply the default trapped result.
1336  *
1337  * This routine does not work if the instruction specified by *inst
1338  * is not a SIMD instruction.
1339  */
1340 void
1341 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1342     fex_info_t *info)
1343 {
1344         sseinst_t       dummy;
1345         int             i;
1346 
1347         /* store each part */
1348         switch (inst->op) {
1349         case cmpps:
1350                 dummy.op = cmpss;
1351                 dummy.imm = inst->imm;
1352                 for (i = 0; i < 4; i++) {
1353                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1354                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1355                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1356                 }
1357                 break;
1358 
1359         case minps:
1360                 dummy.op = minss;
1361                 for (i = 0; i < 4; i++) {
1362                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1363                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1364                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1365                 }
1366                 break;
1367 
1368         case maxps:
1369                 dummy.op = maxss;
1370                 for (i = 0; i < 4; i++) {
1371                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1372                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1373                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1374                 }
1375                 break;
1376 
1377         case addps:
1378                 dummy.op = addss;
1379                 for (i = 0; i < 4; i++) {
1380                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1381                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1382                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1383                 }
1384                 break;
1385 
1386         case subps:
1387                 dummy.op = subss;
1388                 for (i = 0; i < 4; i++) {
1389                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1390                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1391                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1392                 }
1393                 break;
1394 
1395         case mulps:
1396                 dummy.op = mulss;
1397                 for (i = 0; i < 4; i++) {
1398                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1399                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1400                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1401                 }
1402                 break;
1403 
1404         case divps:
1405                 dummy.op = divss;
1406                 for (i = 0; i < 4; i++) {
1407                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1408                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1409                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1410                 }
1411                 break;
1412 
1413         case sqrtps:
1414                 dummy.op = sqrtss;
1415                 for (i = 0; i < 4; i++) {
1416                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1417                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1418                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1419                 }
1420                 break;
1421 
1422         case cvtdq2ps:
1423                 dummy.op = cvtsi2ss;
1424                 for (i = 0; i < 4; i++) {
1425                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1426                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1427                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1428                 }
1429                 break;
1430 
1431         case cvttps2dq:
1432                 dummy.op = cvttss2si;
1433                 for (i = 0; i < 4; i++) {
1434                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1435                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1436                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1437                 }
1438                 break;
1439 
1440         case cvtps2dq:
1441                 dummy.op = cvtss2si;
1442                 for (i = 0; i < 4; i++) {
1443                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1444                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1445                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1446                 }
1447                 break;
1448 
1449         case cvtpi2ps:
1450                 dummy.op = cvtsi2ss;
1451                 for (i = 0; i < 2; i++) {
1452                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1453                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1454                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1455                 }
1456                 break;
1457 
1458         case cvttps2pi:
1459                 dummy.op = cvttss2si;
1460                 for (i = 0; i < 2; i++) {
1461                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1462                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1463                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1464                 }
1465                 break;
1466 
1467         case cvtps2pi:
1468                 dummy.op = cvtss2si;
1469                 for (i = 0; i < 2; i++) {
1470                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1471                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1472                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1473                 }
1474                 break;
1475 
1476         case cmppd:
1477                 dummy.op = cmpsd;
1478                 dummy.imm = inst->imm;
1479                 for (i = 0; i < 2; i++) {
1480                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1481                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1482                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1483                 }
1484                 break;
1485 
1486         case minpd:
1487                 dummy.op = minsd;
1488                 for (i = 0; i < 2; i++) {
1489                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1490                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1491                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1492                 }
1493                 break;
1494 
1495         case maxpd:
1496                 dummy.op = maxsd;
1497                 for (i = 0; i < 2; i++) {
1498                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1499                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1500                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1501                 }
1502                 break;
1503 
1504         case addpd:
1505                 dummy.op = addsd;
1506                 for (i = 0; i < 2; i++) {
1507                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1508                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1509                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1510                 }
1511                 break;
1512 
1513         case subpd:
1514                 dummy.op = subsd;
1515                 for (i = 0; i < 2; i++) {
1516                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1517                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1518                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1519                 }
1520                 break;
1521 
1522         case mulpd:
1523                 dummy.op = mulsd;
1524                 for (i = 0; i < 2; i++) {
1525                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1526                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1527                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1528                 }
1529                 break;
1530 
1531         case divpd:
1532                 dummy.op = divsd;
1533                 for (i = 0; i < 2; i++) {
1534                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1535                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1536                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1537                 }
1538                 break;
1539 
1540         case sqrtpd:
1541                 dummy.op = sqrtsd;
1542                 for (i = 0; i < 2; i++) {
1543                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1544                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1545                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1546                 }
1547                 break;
1548 
1549         case cvtpi2pd:
1550         case cvtdq2pd:
1551                 dummy.op = cvtsi2sd;
1552                 for (i = 0; i < 2; i++) {
1553                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1554                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1555                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1556                 }
1557                 break;
1558 
1559         case cvttpd2pi:
1560         case cvttpd2dq:
1561                 dummy.op = cvttsd2si;
1562                 for (i = 0; i < 2; i++) {
1563                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1564                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1565                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1566                 }
1567                 /* for cvttpd2dq, zero the high 64 bits of the destination */
1568                 if (inst->op == cvttpd2dq)
1569                         inst->op1->l[1] = 0ll;
1570                 break;
1571 
1572         case cvtpd2pi:
1573         case cvtpd2dq:
1574                 dummy.op = cvtsd2si;
1575                 for (i = 0; i < 2; i++) {
1576                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1577                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1578                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1579                 }
1580                 /* for cvtpd2dq, zero the high 64 bits of the destination */
1581                 if (inst->op == cvtpd2dq)
1582                         inst->op1->l[1] = 0ll;
1583                 break;
1584 
1585         case cvtps2pd:
1586                 dummy.op = cvtss2sd;
1587                 for (i = 0; i < 2; i++) {
1588                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1589                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1590                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1591                 }
1592                 break;
1593 
1594         case cvtpd2ps:
1595                 dummy.op = cvtsd2ss;
1596                 for (i = 0; i < 2; i++) {
1597                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1598                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1599                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1600                 }
1601                 /* zero the high 64 bits of the destination */
1602                 inst->op1->l[1] = 0ll;
1603 
1604         default:
1605                 break;
1606         }
1607 }
1608