1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #include "fenv_synonyms.h"
  31 #include <ucontext.h>
  32 #include <fenv.h>
  33 #if defined(__SUNPRO_C)
  34 #include <sunmath.h>
  35 #else
  36 #include <sys/ieeefp.h>
  37 #endif
  38 #include "fex_handler.h"
  39 #include "fenv_inlines.h"
  40 
  41 #if !defined(REG_PC)
  42 #define REG_PC  EIP
  43 #endif
  44 
  45 #if !defined(REG_PS)
  46 #define REG_PS  EFL
  47 #endif
  48 
  49 #ifdef __amd64
  50 #define regno(X)        ((X < 4)? REG_RAX - X : \
  51                         ((X > 4)? REG_RAX + 1 - X : REG_RSP))
  52 #else
  53 #define regno(X)        (EAX - X)
  54 #endif
  55 
  56 /*
  57  * Support for SSE instructions
  58  */
  59 
  60 /*
  61  * Decode an SSE instruction.  Fill in *inst and return the length of the
  62  * instruction in bytes.  Return 0 if the instruction is not recognized.
  63  */
  64 int
  65 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst)
  66 {
  67         unsigned char   *ip;
  68         char            *addr;
  69         int             i, dbl, simd, rex, modrm, sib, r;
  70 
  71         i = 0;
  72         ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC];
  73 
  74         /* look for pseudo-prefixes */
  75         dbl = 0;
  76         simd = SIMD;
  77         if (ip[i] == 0xF3) {
  78                 simd = 0;
  79                 i++;
  80         } else if (ip[i] == 0x66) {
  81                 dbl = DOUBLE;
  82                 i++;
  83         } else if (ip[i] == 0xF2) {
  84                 dbl = DOUBLE;
  85                 simd = 0;
  86                 i++;
  87         }
  88 
  89         /* look for AMD64 REX prefix */
  90         rex = 0;
  91         if (ip[i] >= 0x40 && ip[i] <= 0x4F) {
  92                 rex = ip[i];
  93                 i++;
  94         }
  95 
  96         /* parse opcode */
  97         if (ip[i++] != 0x0F)
  98                 return 0;
  99         switch (ip[i++]) {
 100         case 0x2A:
 101                 inst->op = (int)cvtsi2ss + simd + dbl;
 102                 if (!simd)
 103                         inst->op = (int)inst->op + (rex & 8);
 104                 break;
 105 
 106         case 0x2C:
 107                 inst->op = (int)cvttss2si + simd + dbl;
 108                 if (!simd)
 109                         inst->op = (int)inst->op + (rex & 8);
 110                 break;
 111 
 112         case 0x2D:
 113                 inst->op = (int)cvtss2si + simd + dbl;
 114                 if (!simd)
 115                         inst->op = (int)inst->op + (rex & 8);
 116                 break;
 117 
 118         case 0x2E:
 119                 /* oddball: scalar instruction in a SIMD opcode group */
 120                 if (!simd)
 121                         return 0;
 122                 inst->op = (int)ucomiss + dbl;
 123                 break;
 124 
 125         case 0x2F:
 126                 /* oddball: scalar instruction in a SIMD opcode group */
 127                 if (!simd)
 128                         return 0;
 129                 inst->op = (int)comiss + dbl;
 130                 break;
 131 
 132         case 0x51:
 133                 inst->op = (int)sqrtss + simd + dbl;
 134                 break;
 135 
 136         case 0x58:
 137                 inst->op = (int)addss + simd + dbl;
 138                 break;
 139 
 140         case 0x59:
 141                 inst->op = (int)mulss + simd + dbl;
 142                 break;
 143 
 144         case 0x5A:
 145                 inst->op = (int)cvtss2sd + simd + dbl;
 146                 break;
 147 
 148         case 0x5B:
 149                 if (dbl) {
 150                         if (simd)
 151                                 inst->op = cvtps2dq;
 152                         else
 153                                 return 0;
 154                 } else {
 155                         inst->op = (simd)? cvtdq2ps : cvttps2dq;
 156                 }
 157                 break;
 158 
 159         case 0x5C:
 160                 inst->op = (int)subss + simd + dbl;
 161                 break;
 162 
 163         case 0x5D:
 164                 inst->op = (int)minss + simd + dbl;
 165                 break;
 166 
 167         case 0x5E:
 168                 inst->op = (int)divss + simd + dbl;
 169                 break;
 170 
 171         case 0x5F:
 172                 inst->op = (int)maxss + simd + dbl;
 173                 break;
 174 
 175         case 0xC2:
 176                 inst->op = (int)cmpss + simd + dbl;
 177                 break;
 178 
 179         case 0xE6:
 180                 if (simd) {
 181                         if (dbl)
 182                                 inst->op = cvttpd2dq;
 183                         else
 184                                 return 0;
 185                 } else {
 186                         inst->op = (dbl)? cvtpd2dq : cvtdq2pd;
 187                 }
 188                 break;
 189 
 190         default:
 191                 return 0;
 192         }
 193 
 194         /* locate operands */
 195         modrm = ip[i++];
 196 
 197         if (inst->op == cvtss2si || inst->op == cvttss2si ||
 198             inst->op == cvtsd2si || inst->op == cvttsd2si ||
 199             inst->op == cvtss2siq || inst->op == cvttss2siq ||
 200             inst->op == cvtsd2siq || inst->op == cvttsd2siq) {
 201                 /* op1 is a gp register */
 202                 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
 203                 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
 204         } else if (inst->op == cvtps2pi || inst->op == cvttps2pi ||
 205             inst->op == cvtpd2pi || inst->op == cvttpd2pi) {
 206                 /* op1 is a mmx register */
 207 #ifdef __amd64
 208                 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set.
 209                     fpchip_state.st[(modrm >> 3) & 7];
 210 #else
 211                 inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) +
 212                     (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
 213                     fpchip_state.state[7]);
 214 #endif
 215         } else {
 216                 /* op1 is a xmm register */
 217                 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
 218                 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
 219                     fp_reg_set.fpchip_state.xmm[r];
 220         }
 221 
 222         if ((modrm >> 6) == 3) {
 223                 if (inst->op == cvtsi2ss || inst->op == cvtsi2sd ||
 224                     inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) {
 225                         /* op2 is a gp register */
 226                         r = ((rex & 1) << 3) | (modrm & 7);
 227                         inst->op2 = (sseoperand_t *)&uap->uc_mcontext.
 228                             gregs[regno(r)];
 229                 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) {
 230                         /* op2 is a mmx register */
 231 #ifdef __amd64
 232                         inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
 233                             fp_reg_set.fpchip_state.st[modrm & 7];
 234 #else
 235                         inst->op2 = (sseoperand_t *)(10 * (modrm & 7) +
 236                             (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
 237                             fpchip_state.state[7]);
 238 #endif
 239                 } else {
 240                         /* op2 is a xmm register */
 241                         r = ((rex & 1) << 3) | (modrm & 7);
 242                         inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
 243                             fp_reg_set.fpchip_state.xmm[r];
 244                 }
 245         } else if ((modrm & 0xc7) == 0x05) {
 246 #if defined(__amd64)
 247                 /* address of next instruction + offset */
 248                 r = i + 4;
 249                 if (inst->op == cmpss || inst->op == cmpps ||
 250                     inst->op == cmpsd || inst->op == cmppd)
 251                         r++;
 252                 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i));
 253 #else
 254                 /* absolute address */
 255                 inst->op2 = (sseoperand_t *)(*(int *)(ip + i));
 256 #endif
 257                 i += 4;
 258         } else {
 259                 /* complex address */
 260                 if ((modrm & 7) == 4) {
 261                         /* parse sib byte */
 262                         sib = ip[i++];
 263                         if ((sib & 7) == 5 && (modrm >> 6) == 0) {
 264                                 /* start with absolute address */
 265                                 addr = (char *)(uintptr_t)(ip + i);
 266                                 i += 4;
 267                         } else {
 268                                 /* start with base */
 269                                 r = ((rex & 1) << 3) | (sib & 7);
 270                                 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
 271                         }
 272                         r = ((rex & 2) << 2) | ((sib >> 3) & 7);
 273                         if (r != 4) {
 274                                 /* add scaled index */
 275                                 addr += uap->uc_mcontext.gregs[regno(r)]
 276                                     << (sib >> 6);
 277                         }
 278                 } else {
 279                         r = ((rex & 1) << 3) | (modrm & 7);
 280                         addr = (char *)uap->uc_mcontext.gregs[regno(r)];
 281                 }
 282 
 283                 /* add displacement, if any */
 284                 if ((modrm >> 6) == 1) {
 285                         addr += (char)ip[i++];
 286                 } else if ((modrm >> 6) == 2) {
 287                         addr += *(int *)(ip + i);
 288                         i += 4;
 289                 }
 290                 inst->op2 = (sseoperand_t *)addr;
 291         }
 292 
 293         if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd ||
 294             inst->op == cmppd) {
 295                 /* get the immediate operand */
 296                 inst->imm = ip[i++];
 297         }
 298 
 299         return i;
 300 }
 301 
 302 static enum fp_class_type
 303 my_fp_classf(float *x)
 304 {
 305         int     i = *(int *)x & ~0x80000000;
 306 
 307         if (i < 0x7f800000) {
 308                 if (i < 0x00800000)
 309                         return ((i == 0)? fp_zero : fp_subnormal);
 310                 return fp_normal;
 311         }
 312         else if (i == 0x7f800000)
 313                 return fp_infinity;
 314         else if (i & 0x400000)
 315                 return fp_quiet;
 316         else
 317                 return fp_signaling;
 318 }
 319 
 320 static enum fp_class_type
 321 my_fp_class(double *x)
 322 {
 323         int     i = *(1+(int *)x) & ~0x80000000;
 324 
 325         if (i < 0x7ff00000) {
 326                 if (i < 0x00100000)
 327                         return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal);
 328                 return fp_normal;
 329         }
 330         else if (i == 0x7ff00000 && *(int *)x == 0)
 331                 return fp_infinity;
 332         else if (i & 0x80000)
 333                 return fp_quiet;
 334         else
 335                 return fp_signaling;
 336 }
 337 
 338 /*
 339  * Inspect a scalar SSE instruction that incurred an invalid operation
 340  * exception to determine which type of exception it was.
 341  */
 342 static enum fex_exception
 343 __fex_get_sse_invalid_type(sseinst_t *inst)
 344 {
 345         enum fp_class_type      t1, t2;
 346 
 347         /* check op2 for signaling nan */
 348         t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) :
 349             my_fp_classf(&inst->op2->f[0]);
 350         if (t2 == fp_signaling)
 351                 return fex_inv_snan;
 352 
 353         /* eliminate all single-operand instructions */
 354         switch (inst->op) {
 355         case cvtsd2ss:
 356         case cvtss2sd:
 357                 /* hmm, this shouldn't have happened */
 358                 return (enum fex_exception) -1;
 359 
 360         case sqrtss:
 361         case sqrtsd:
 362                 return fex_inv_sqrt;
 363 
 364         case cvtss2si:
 365         case cvtsd2si:
 366         case cvttss2si:
 367         case cvttsd2si:
 368         case cvtss2siq:
 369         case cvtsd2siq:
 370         case cvttss2siq:
 371         case cvttsd2siq:
 372                 return fex_inv_int;
 373         }
 374 
 375         /* check op1 for signaling nan */
 376         t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) :
 377             my_fp_classf(&inst->op1->f[0]);
 378         if (t1 == fp_signaling)
 379                 return fex_inv_snan;
 380 
 381         /* check two-operand instructions for other cases */
 382         switch (inst->op) {
 383         case cmpss:
 384         case cmpsd:
 385         case minss:
 386         case minsd:
 387         case maxss:
 388         case maxsd:
 389         case comiss:
 390         case comisd:
 391                 return fex_inv_cmp;
 392 
 393         case addss:
 394         case addsd:
 395         case subss:
 396         case subsd:
 397                 if (t1 == fp_infinity && t2 == fp_infinity)
 398                         return fex_inv_isi;
 399                 break;
 400 
 401         case mulss:
 402         case mulsd:
 403                 if ((t1 == fp_zero && t2 == fp_infinity) ||
 404                     (t2 == fp_zero && t1 == fp_infinity))
 405                         return fex_inv_zmi;
 406                 break;
 407 
 408         case divss:
 409         case divsd:
 410                 if (t1 == fp_zero && t2 == fp_zero)
 411                         return fex_inv_zdz;
 412                 if (t1 == fp_infinity && t2 == fp_infinity)
 413                         return fex_inv_idi;
 414         }
 415 
 416         return (enum fex_exception)-1;
 417 }
 418 
 419 /* inline templates */
 420 extern void sse_cmpeqss(float *, float *, int *);
 421 extern void sse_cmpltss(float *, float *, int *);
 422 extern void sse_cmpless(float *, float *, int *);
 423 extern void sse_cmpunordss(float *, float *, int *);
 424 extern void sse_minss(float *, float *, float *);
 425 extern void sse_maxss(float *, float *, float *);
 426 extern void sse_addss(float *, float *, float *);
 427 extern void sse_subss(float *, float *, float *);
 428 extern void sse_mulss(float *, float *, float *);
 429 extern void sse_divss(float *, float *, float *);
 430 extern void sse_sqrtss(float *, float *);
 431 extern void sse_ucomiss(float *, float *);
 432 extern void sse_comiss(float *, float *);
 433 extern void sse_cvtss2sd(float *, double *);
 434 extern void sse_cvtsi2ss(int *, float *);
 435 extern void sse_cvttss2si(float *, int *);
 436 extern void sse_cvtss2si(float *, int *);
 437 #ifdef __amd64
 438 extern void sse_cvtsi2ssq(long long *, float *);
 439 extern void sse_cvttss2siq(float *, long long *);
 440 extern void sse_cvtss2siq(float *, long long *);
 441 #endif
 442 extern void sse_cmpeqsd(double *, double *, long long *);
 443 extern void sse_cmpltsd(double *, double *, long long *);
 444 extern void sse_cmplesd(double *, double *, long long *);
 445 extern void sse_cmpunordsd(double *, double *, long long *);
 446 extern void sse_minsd(double *, double *, double *);
 447 extern void sse_maxsd(double *, double *, double *);
 448 extern void sse_addsd(double *, double *, double *);
 449 extern void sse_subsd(double *, double *, double *);
 450 extern void sse_mulsd(double *, double *, double *);
 451 extern void sse_divsd(double *, double *, double *);
 452 extern void sse_sqrtsd(double *, double *);
 453 extern void sse_ucomisd(double *, double *);
 454 extern void sse_comisd(double *, double *);
 455 extern void sse_cvtsd2ss(double *, float *);
 456 extern void sse_cvtsi2sd(int *, double *);
 457 extern void sse_cvttsd2si(double *, int *);
 458 extern void sse_cvtsd2si(double *, int *);
 459 #ifdef __amd64
 460 extern void sse_cvtsi2sdq(long long *, double *);
 461 extern void sse_cvttsd2siq(double *, long long *);
 462 extern void sse_cvtsd2siq(double *, long long *);
 463 #endif
 464 
 465 /*
 466  * Fill in *info with the operands, default untrapped result, and
 467  * flags produced by a scalar SSE instruction, and return the type
 468  * of trapped exception (if any).  On entry, the mxcsr must have
 469  * all exceptions masked and all flags clear.  The same conditions
 470  * will hold on exit.
 471  *
 472  * This routine does not work if the instruction specified by *inst
 473  * is not a scalar instruction.
 474  */
 475 enum fex_exception
 476 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info)
 477 {
 478         unsigned int    e, te, mxcsr, oldmxcsr, subnorm;
 479 
 480         /*
 481          * Perform the operation with traps disabled and check the
 482          * exception flags.  If the underflow trap was enabled, also
 483          * check for an exact subnormal result.
 484          */
 485         __fenv_getmxcsr(&oldmxcsr);
 486         subnorm = 0;
 487         if ((int)inst->op & DOUBLE) {
 488                 if (inst->op == cvtsi2sd) {
 489                         info->op1.type = fex_int;
 490                         info->op1.val.i = inst->op2->i[0];
 491                         info->op2.type = fex_nodata;
 492                 } else if (inst->op == cvtsi2sdq) {
 493                         info->op1.type = fex_llong;
 494                         info->op1.val.l = inst->op2->l[0];
 495                         info->op2.type = fex_nodata;
 496                 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss ||
 497                     inst->op == cvttsd2si || inst->op == cvtsd2si ||
 498                     inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
 499                         info->op1.type = fex_double;
 500                         info->op1.val.d = inst->op2->d[0];
 501                         info->op2.type = fex_nodata;
 502                 } else {
 503                         info->op1.type = fex_double;
 504                         info->op1.val.d = inst->op1->d[0];
 505                         info->op2.type = fex_double;
 506                         info->op2.val.d = inst->op2->d[0];
 507                 }
 508                 info->res.type = fex_double;
 509                 switch (inst->op) {
 510                 case cmpsd:
 511                         info->op = fex_cmp;
 512                         info->res.type = fex_llong;
 513                         switch (inst->imm & 3) {
 514                         case 0:
 515                                 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d,
 516                                     &info->res.val.l);
 517                                 break;
 518 
 519                         case 1:
 520                                 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d,
 521                                     &info->res.val.l);
 522                                 break;
 523 
 524                         case 2:
 525                                 sse_cmplesd(&info->op1.val.d, &info->op2.val.d,
 526                                     &info->res.val.l);
 527                                 break;
 528 
 529                         case 3:
 530                                 sse_cmpunordsd(&info->op1.val.d,
 531                                     &info->op2.val.d, &info->res.val.l);
 532                         }
 533                         if (inst->imm & 4)
 534                                 info->res.val.l ^= 0xffffffffffffffffull;
 535                         break;
 536 
 537                 case minsd:
 538                         info->op = fex_other;
 539                         sse_minsd(&info->op1.val.d, &info->op2.val.d,
 540                             &info->res.val.d);
 541                         break;
 542 
 543                 case maxsd:
 544                         info->op = fex_other;
 545                         sse_maxsd(&info->op1.val.d, &info->op2.val.d,
 546                             &info->res.val.d);
 547                         break;
 548 
 549                 case addsd:
 550                         info->op = fex_add;
 551                         sse_addsd(&info->op1.val.d, &info->op2.val.d,
 552                             &info->res.val.d);
 553                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 554                                 subnorm = 1;
 555                         break;
 556 
 557                 case subsd:
 558                         info->op = fex_sub;
 559                         sse_subsd(&info->op1.val.d, &info->op2.val.d,
 560                             &info->res.val.d);
 561                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 562                                 subnorm = 1;
 563                         break;
 564 
 565                 case mulsd:
 566                         info->op = fex_mul;
 567                         sse_mulsd(&info->op1.val.d, &info->op2.val.d,
 568                             &info->res.val.d);
 569                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 570                                 subnorm = 1;
 571                         break;
 572 
 573                 case divsd:
 574                         info->op = fex_div;
 575                         sse_divsd(&info->op1.val.d, &info->op2.val.d,
 576                             &info->res.val.d);
 577                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 578                                 subnorm = 1;
 579                         break;
 580 
 581                 case sqrtsd:
 582                         info->op = fex_sqrt;
 583                         sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
 584                         break;
 585 
 586                 case cvtsd2ss:
 587                         info->op = fex_cnvt;
 588                         info->res.type = fex_float;
 589                         sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f);
 590                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 591                                 subnorm = 1;
 592                         break;
 593 
 594                 case cvtsi2sd:
 595                         info->op = fex_cnvt;
 596                         sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
 597                         break;
 598 
 599                 case cvttsd2si:
 600                         info->op = fex_cnvt;
 601                         info->res.type = fex_int;
 602                         sse_cvttsd2si(&info->op1.val.d, &info->res.val.i);
 603                         break;
 604 
 605                 case cvtsd2si:
 606                         info->op = fex_cnvt;
 607                         info->res.type = fex_int;
 608                         sse_cvtsd2si(&info->op1.val.d, &info->res.val.i);
 609                         break;
 610 
 611 #ifdef __amd64
 612                 case cvtsi2sdq:
 613                         info->op = fex_cnvt;
 614                         sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d);
 615                         break;
 616 
 617                 case cvttsd2siq:
 618                         info->op = fex_cnvt;
 619                         info->res.type = fex_llong;
 620                         sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l);
 621                         break;
 622 
 623                 case cvtsd2siq:
 624                         info->op = fex_cnvt;
 625                         info->res.type = fex_llong;
 626                         sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l);
 627                         break;
 628 #endif
 629 
 630                 case ucomisd:
 631                         info->op = fex_cmp;
 632                         info->res.type = fex_nodata;
 633                         sse_ucomisd(&info->op1.val.d, &info->op2.val.d);
 634                         break;
 635 
 636                 case comisd:
 637                         info->op = fex_cmp;
 638                         info->res.type = fex_nodata;
 639                         sse_comisd(&info->op1.val.d, &info->op2.val.d);
 640                         break;
 641                 }
 642         } else {
 643                 if (inst->op == cvtsi2ss) {
 644                         info->op1.type = fex_int;
 645                         info->op1.val.i = inst->op2->i[0];
 646                         info->op2.type = fex_nodata;
 647                 } else if (inst->op == cvtsi2ssq) {
 648                         info->op1.type = fex_llong;
 649                         info->op1.val.l = inst->op2->l[0];
 650                         info->op2.type = fex_nodata;
 651                 } else if (inst->op == sqrtss || inst->op == cvtss2sd ||
 652                     inst->op == cvttss2si || inst->op == cvtss2si ||
 653                     inst->op == cvttss2siq || inst->op == cvtss2siq) {
 654                         info->op1.type = fex_float;
 655                         info->op1.val.f = inst->op2->f[0];
 656                         info->op2.type = fex_nodata;
 657                 } else {
 658                         info->op1.type = fex_float;
 659                         info->op1.val.f = inst->op1->f[0];
 660                         info->op2.type = fex_float;
 661                         info->op2.val.f = inst->op2->f[0];
 662                 }
 663                 info->res.type = fex_float;
 664                 switch (inst->op) {
 665                 case cmpss:
 666                         info->op = fex_cmp;
 667                         info->res.type = fex_int;
 668                         switch (inst->imm & 3) {
 669                         case 0:
 670                                 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f,
 671                                     &info->res.val.i);
 672                                 break;
 673 
 674                         case 1:
 675                                 sse_cmpltss(&info->op1.val.f, &info->op2.val.f,
 676                                     &info->res.val.i);
 677                                 break;
 678 
 679                         case 2:
 680                                 sse_cmpless(&info->op1.val.f, &info->op2.val.f,
 681                                     &info->res.val.i);
 682                                 break;
 683 
 684                         case 3:
 685                                 sse_cmpunordss(&info->op1.val.f,
 686                                     &info->op2.val.f, &info->res.val.i);
 687                         }
 688                         if (inst->imm & 4)
 689                                 info->res.val.i ^= 0xffffffffu;
 690                         break;
 691 
 692                 case minss:
 693                         info->op = fex_other;
 694                         sse_minss(&info->op1.val.f, &info->op2.val.f,
 695                             &info->res.val.f);
 696                         break;
 697 
 698                 case maxss:
 699                         info->op = fex_other;
 700                         sse_maxss(&info->op1.val.f, &info->op2.val.f,
 701                             &info->res.val.f);
 702                         break;
 703 
 704                 case addss:
 705                         info->op = fex_add;
 706                         sse_addss(&info->op1.val.f, &info->op2.val.f,
 707                             &info->res.val.f);
 708                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 709                                 subnorm = 1;
 710                         break;
 711 
 712                 case subss:
 713                         info->op = fex_sub;
 714                         sse_subss(&info->op1.val.f, &info->op2.val.f,
 715                             &info->res.val.f);
 716                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 717                                 subnorm = 1;
 718                         break;
 719 
 720                 case mulss:
 721                         info->op = fex_mul;
 722                         sse_mulss(&info->op1.val.f, &info->op2.val.f,
 723                             &info->res.val.f);
 724                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 725                                 subnorm = 1;
 726                         break;
 727 
 728                 case divss:
 729                         info->op = fex_div;
 730                         sse_divss(&info->op1.val.f, &info->op2.val.f,
 731                             &info->res.val.f);
 732                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 733                                 subnorm = 1;
 734                         break;
 735 
 736                 case sqrtss:
 737                         info->op = fex_sqrt;
 738                         sse_sqrtss(&info->op1.val.f, &info->res.val.f);
 739                         break;
 740 
 741                 case cvtss2sd:
 742                         info->op = fex_cnvt;
 743                         info->res.type = fex_double;
 744                         sse_cvtss2sd(&info->op1.val.f, &info->res.val.d);
 745                         break;
 746 
 747                 case cvtsi2ss:
 748                         info->op = fex_cnvt;
 749                         sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f);
 750                         break;
 751 
 752                 case cvttss2si:
 753                         info->op = fex_cnvt;
 754                         info->res.type = fex_int;
 755                         sse_cvttss2si(&info->op1.val.f, &info->res.val.i);
 756                         break;
 757 
 758                 case cvtss2si:
 759                         info->op = fex_cnvt;
 760                         info->res.type = fex_int;
 761                         sse_cvtss2si(&info->op1.val.f, &info->res.val.i);
 762                         break;
 763 
 764 #ifdef __amd64
 765                 case cvtsi2ssq:
 766                         info->op = fex_cnvt;
 767                         sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f);
 768                         break;
 769 
 770                 case cvttss2siq:
 771                         info->op = fex_cnvt;
 772                         info->res.type = fex_llong;
 773                         sse_cvttss2siq(&info->op1.val.f, &info->res.val.l);
 774                         break;
 775 
 776                 case cvtss2siq:
 777                         info->op = fex_cnvt;
 778                         info->res.type = fex_llong;
 779                         sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
 780                         break;
 781 #endif
 782 
 783                 case ucomiss:
 784                         info->op = fex_cmp;
 785                         info->res.type = fex_nodata;
 786                         sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
 787                         break;
 788 
 789                 case comiss:
 790                         info->op = fex_cmp;
 791                         info->res.type = fex_nodata;
 792                         sse_comiss(&info->op1.val.f, &info->op2.val.f);
 793                         break;
 794                 }
 795         }
 796         __fenv_getmxcsr(&mxcsr);
 797         info->flags = mxcsr & 0x3d;
 798         __fenv_setmxcsr(&oldmxcsr);
 799 
 800         /* determine which exception would have been trapped */
 801         te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr
 802             >> 7) & 0x3d;
 803         e = mxcsr & te;
 804         if (e & FE_INVALID)
 805                 return __fex_get_sse_invalid_type(inst);
 806         if (e & FE_DIVBYZERO)
 807                 return fex_division;
 808         if (e & FE_OVERFLOW)
 809                 return fex_overflow;
 810         if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
 811                 return fex_underflow;
 812         if (e & FE_INEXACT)
 813                 return fex_inexact;
 814         return (enum fex_exception)-1;
 815 }
 816 
 817 /*
 818  * Emulate a SIMD SSE instruction to determine which exceptions occur
 819  * in each part.  For i = 0, 1, 2, and 3, set e[i] to indicate the
 820  * trapped exception that would occur if the i-th part of the SIMD
 821  * instruction were executed in isolation; set e[i] to -1 if no
 822  * trapped exception would occur in this part.  Also fill in info[i]
 823  * with the corresponding operands, default untrapped result, and
 824  * flags.
 825  *
 826  * This routine does not work if the instruction specified by *inst
 827  * is not a SIMD instruction.
 828  */
 829 void
 830 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
 831     fex_info_t *info)
 832 {
 833         sseinst_t       dummy;
 834         int             i;
 835 
 836         e[0] = e[1] = e[2] = e[3] = -1;
 837 
 838         /* perform each part of the SIMD operation */
 839         switch (inst->op) {
 840         case cmpps:
 841                 dummy.op = cmpss;
 842                 dummy.imm = inst->imm;
 843                 for (i = 0; i < 4; i++) {
 844                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 845                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 846                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 847                 }
 848                 break;
 849 
 850         case minps:
 851                 dummy.op = minss;
 852                 for (i = 0; i < 4; i++) {
 853                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 854                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 855                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 856                 }
 857                 break;
 858 
 859         case maxps:
 860                 dummy.op = maxss;
 861                 for (i = 0; i < 4; i++) {
 862                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 863                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 864                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 865                 }
 866                 break;
 867 
 868         case addps:
 869                 dummy.op = addss;
 870                 for (i = 0; i < 4; i++) {
 871                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 872                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 873                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 874                 }
 875                 break;
 876 
 877         case subps:
 878                 dummy.op = subss;
 879                 for (i = 0; i < 4; i++) {
 880                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 881                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 882                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 883                 }
 884                 break;
 885 
 886         case mulps:
 887                 dummy.op = mulss;
 888                 for (i = 0; i < 4; i++) {
 889                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 890                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 891                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 892                 }
 893                 break;
 894 
 895         case divps:
 896                 dummy.op = divss;
 897                 for (i = 0; i < 4; i++) {
 898                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 899                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 900                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 901                 }
 902                 break;
 903 
 904         case sqrtps:
 905                 dummy.op = sqrtss;
 906                 for (i = 0; i < 4; i++) {
 907                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 908                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 909                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 910                 }
 911                 break;
 912 
 913         case cvtdq2ps:
 914                 dummy.op = cvtsi2ss;
 915                 for (i = 0; i < 4; i++) {
 916                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 917                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
 918                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 919                 }
 920                 break;
 921 
 922         case cvttps2dq:
 923                 dummy.op = cvttss2si;
 924                 for (i = 0; i < 4; i++) {
 925                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 926                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 927                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 928                 }
 929                 break;
 930 
 931         case cvtps2dq:
 932                 dummy.op = cvtss2si;
 933                 for (i = 0; i < 4; i++) {
 934                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 935                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 936                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 937                 }
 938                 break;
 939 
 940         case cvtpi2ps:
 941                 dummy.op = cvtsi2ss;
 942                 for (i = 0; i < 2; i++) {
 943                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 944                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
 945                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 946                 }
 947                 break;
 948 
 949         case cvttps2pi:
 950                 dummy.op = cvttss2si;
 951                 for (i = 0; i < 2; i++) {
 952                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 953                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 954                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 955                 }
 956                 break;
 957 
 958         case cvtps2pi:
 959                 dummy.op = cvtss2si;
 960                 for (i = 0; i < 2; i++) {
 961                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 962                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 963                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 964                 }
 965                 break;
 966 
 967         case cmppd:
 968                 dummy.op = cmpsd;
 969                 dummy.imm = inst->imm;
 970                 for (i = 0; i < 2; i++) {
 971                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
 972                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
 973                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 974                 }
 975                 break;
 976 
 977         case minpd:
 978                 dummy.op = minsd;
 979                 for (i = 0; i < 2; i++) {
 980                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
 981                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
 982                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 983                 }
 984                 break;
 985 
 986         case maxpd:
 987                 dummy.op = maxsd;
 988                 for (i = 0; i < 2; i++) {
 989                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
 990                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
 991                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 992                 }
 993                 break;
 994 
 995         case addpd:
 996                 dummy.op = addsd;
 997                 for (i = 0; i < 2; i++) {
 998                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
 999                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1000                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1001                 }
1002                 break;
1003 
1004         case subpd:
1005                 dummy.op = subsd;
1006                 for (i = 0; i < 2; i++) {
1007                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1008                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1009                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1010                 }
1011                 break;
1012 
1013         case mulpd:
1014                 dummy.op = mulsd;
1015                 for (i = 0; i < 2; i++) {
1016                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1017                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1018                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1019                 }
1020                 break;
1021 
1022         case divpd:
1023                 dummy.op = divsd;
1024                 for (i = 0; i < 2; i++) {
1025                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1026                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1027                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1028                 }
1029                 break;
1030 
1031         case sqrtpd:
1032                 dummy.op = sqrtsd;
1033                 for (i = 0; i < 2; i++) {
1034                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1035                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1036                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1037                 }
1038                 break;
1039 
1040         case cvtpi2pd:
1041         case cvtdq2pd:
1042                 dummy.op = cvtsi2sd;
1043                 for (i = 0; i < 2; i++) {
1044                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1045                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1046                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1047                 }
1048                 break;
1049 
1050         case cvttpd2pi:
1051         case cvttpd2dq:
1052                 dummy.op = cvttsd2si;
1053                 for (i = 0; i < 2; i++) {
1054                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1055                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1056                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1057                 }
1058                 break;
1059 
1060         case cvtpd2pi:
1061         case cvtpd2dq:
1062                 dummy.op = cvtsd2si;
1063                 for (i = 0; i < 2; i++) {
1064                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1065                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1066                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1067                 }
1068                 break;
1069 
1070         case cvtps2pd:
1071                 dummy.op = cvtss2sd;
1072                 for (i = 0; i < 2; i++) {
1073                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1074                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1075                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1076                 }
1077                 break;
1078 
1079         case cvtpd2ps:
1080                 dummy.op = cvtsd2ss;
1081                 for (i = 0; i < 2; i++) {
1082                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1083                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1084                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1085                 }
1086         }
1087 }
1088 
1089 /*
1090  * Store the result value from *info in the destination of the scalar
1091  * SSE instruction specified by *inst.  If no result is given but the
1092  * exception is underflow or overflow, supply the default trapped result.
1093  *
1094  * This routine does not work if the instruction specified by *inst
1095  * is not a scalar instruction.
1096  */
1097 void
1098 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1099     fex_info_t *info)
1100 {
1101         int             i;
1102         long long       l;
1103         float           f, fscl;
1104         double          d, dscl;
1105 
1106         /* for compares that write eflags, just set the flags
1107            to indicate "unordered" */
1108         if (inst->op == ucomiss || inst->op == comiss ||
1109             inst->op == ucomisd || inst->op == comisd) {
1110                 uap->uc_mcontext.gregs[REG_PS] |= 0x45;
1111                 return;
1112         }
1113 
1114         /* if info doesn't specify a result value, try to generate
1115            the default trapped result */
1116         if (info->res.type == fex_nodata) {
1117                 /* set scale factors for exponent wrapping */
1118                 switch (e) {
1119                 case fex_overflow:
1120                         fscl = 1.262177448e-29f; /* 2^-96 */
1121                         dscl = 6.441148769597133308e-232; /* 2^-768 */
1122                         break;
1123 
1124                 case fex_underflow:
1125                         fscl = 7.922816251e+28f; /* 2^96 */
1126                         dscl = 1.552518092300708935e+231; /* 2^768 */
1127                         break;
1128 
1129                 default:
1130                         (void) __fex_get_sse_op(uap, inst, info);
1131                         if (info->res.type == fex_nodata)
1132                                 return;
1133                         goto stuff;
1134                 }
1135 
1136                 /* generate the wrapped result */
1137                 if (inst->op == cvtsd2ss) {
1138                         info->op1.type = fex_double;
1139                         info->op1.val.d = inst->op2->d[0];
1140                         info->op2.type = fex_nodata;
1141                         info->res.type = fex_float;
1142                         info->res.val.f = (float)(fscl * (fscl *
1143                             info->op1.val.d));
1144                 } else if ((int)inst->op & DOUBLE) {
1145                         info->op1.type = fex_double;
1146                         info->op1.val.d = inst->op1->d[0];
1147                         info->op2.type = fex_double;
1148                         info->op2.val.d = inst->op2->d[0];
1149                         info->res.type = fex_double;
1150                         switch (inst->op) {
1151                         case addsd:
1152                                 info->res.val.d = dscl * (dscl *
1153                                     info->op1.val.d + dscl * info->op2.val.d);
1154                                 break;
1155 
1156                         case subsd:
1157                                 info->res.val.d = dscl * (dscl *
1158                                     info->op1.val.d - dscl * info->op2.val.d);
1159                                 break;
1160 
1161                         case mulsd:
1162                                 info->res.val.d = (dscl * info->op1.val.d) *
1163                                     (dscl * info->op2.val.d);
1164                                 break;
1165 
1166                         case divsd:
1167                                 info->res.val.d = (dscl * info->op1.val.d) /
1168                                     (info->op2.val.d / dscl);
1169                                 break;
1170 
1171                         default:
1172                                 return;
1173                         }
1174                 } else {
1175                         info->op1.type = fex_float;
1176                         info->op1.val.f = inst->op1->f[0];
1177                         info->op2.type = fex_float;
1178                         info->op2.val.f = inst->op2->f[0];
1179                         info->res.type = fex_float;
1180                         switch (inst->op) {
1181                         case addss:
1182                                 info->res.val.f = fscl * (fscl *
1183                                     info->op1.val.f + fscl * info->op2.val.f);
1184                                 break;
1185 
1186                         case subss:
1187                                 info->res.val.f = fscl * (fscl *
1188                                     info->op1.val.f - fscl * info->op2.val.f);
1189                                 break;
1190 
1191                         case mulss:
1192                                 info->res.val.f = (fscl * info->op1.val.f) *
1193                                     (fscl * info->op2.val.f);
1194                                 break;
1195 
1196                         case divss:
1197                                 info->res.val.f = (fscl * info->op1.val.f) /
1198                                     (info->op2.val.f / fscl);
1199                                 break;
1200 
1201                         default:
1202                                 return;
1203                         }
1204                 }
1205         }
1206 
1207         /* put the result in the destination */
1208 stuff:
1209         if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si
1210             || inst->op == cvttsd2si || inst->op == cvtsd2si) {
1211                 switch (info->res.type) {
1212                 case fex_int:
1213                         i = info->res.val.i;
1214                         break;
1215 
1216                 case fex_llong:
1217                         i = info->res.val.l;
1218                         break;
1219 
1220                 case fex_float:
1221                         i = info->res.val.f;
1222                         break;
1223 
1224                 case fex_double:
1225                         i = info->res.val.d;
1226                         break;
1227 
1228                 case fex_ldouble:
1229                         i = info->res.val.q;
1230                         break;
1231                 }
1232                 inst->op1->i[0] = i;
1233         } else if (inst->op == cmpsd || inst->op == cvttss2siq ||
1234             inst->op == cvtss2siq || inst->op == cvttsd2siq ||
1235             inst->op == cvtsd2siq) {
1236                 switch (info->res.type) {
1237                 case fex_int:
1238                         l = info->res.val.i;
1239                         break;
1240 
1241                 case fex_llong:
1242                         l = info->res.val.l;
1243                         break;
1244 
1245                 case fex_float:
1246                         l = info->res.val.f;
1247                         break;
1248 
1249                 case fex_double:
1250                         l = info->res.val.d;
1251                         break;
1252 
1253                 case fex_ldouble:
1254                         l = info->res.val.q;
1255                         break;
1256                 }
1257                 inst->op1->l[0] = l;
1258         } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1259             inst->op == cvtss2sd) {
1260                 switch (info->res.type) {
1261                 case fex_int:
1262                         d = info->res.val.i;
1263                         break;
1264 
1265                 case fex_llong:
1266                         d = info->res.val.l;
1267                         break;
1268 
1269                 case fex_float:
1270                         d = info->res.val.f;
1271                         break;
1272 
1273                 case fex_double:
1274                         d = info->res.val.d;
1275                         break;
1276 
1277                 case fex_ldouble:
1278                         d = info->res.val.q;
1279                         break;
1280                 }
1281                 inst->op1->d[0] = d;
1282         } else {
1283                 switch (info->res.type) {
1284                 case fex_int:
1285                         f = info->res.val.i;
1286                         break;
1287 
1288                 case fex_llong:
1289                         f = info->res.val.l;
1290                         break;
1291 
1292                 case fex_float:
1293                         f = info->res.val.f;
1294                         break;
1295 
1296                 case fex_double:
1297                         f = info->res.val.d;
1298                         break;
1299 
1300                 case fex_ldouble:
1301                         f = info->res.val.q;
1302                         break;
1303                 }
1304                 inst->op1->f[0] = f;
1305         }
1306 }
1307 
1308 /*
1309  * Store the results from a SIMD instruction.  For each i, store
1310  * the result value from info[i] in the i-th part of the destination
1311  * of the SIMD SSE instruction specified by *inst.  If no result
1312  * is given but the exception indicated by e[i] is underflow or
1313  * overflow, supply the default trapped result.
1314  *
1315  * This routine does not work if the instruction specified by *inst
1316  * is not a SIMD instruction.
1317  */
1318 void
1319 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1320     fex_info_t *info)
1321 {
1322         sseinst_t       dummy;
1323         int             i;
1324 
1325         /* store each part */
1326         switch (inst->op) {
1327         case cmpps:
1328                 dummy.op = cmpss;
1329                 dummy.imm = inst->imm;
1330                 for (i = 0; i < 4; i++) {
1331                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1332                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1333                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1334                 }
1335                 break;
1336 
1337         case minps:
1338                 dummy.op = minss;
1339                 for (i = 0; i < 4; i++) {
1340                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1341                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1342                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1343                 }
1344                 break;
1345 
1346         case maxps:
1347                 dummy.op = maxss;
1348                 for (i = 0; i < 4; i++) {
1349                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1350                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1351                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1352                 }
1353                 break;
1354 
1355         case addps:
1356                 dummy.op = addss;
1357                 for (i = 0; i < 4; i++) {
1358                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1359                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1360                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1361                 }
1362                 break;
1363 
1364         case subps:
1365                 dummy.op = subss;
1366                 for (i = 0; i < 4; i++) {
1367                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1368                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1369                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1370                 }
1371                 break;
1372 
1373         case mulps:
1374                 dummy.op = mulss;
1375                 for (i = 0; i < 4; i++) {
1376                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1377                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1378                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1379                 }
1380                 break;
1381 
1382         case divps:
1383                 dummy.op = divss;
1384                 for (i = 0; i < 4; i++) {
1385                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1386                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1387                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1388                 }
1389                 break;
1390 
1391         case sqrtps:
1392                 dummy.op = sqrtss;
1393                 for (i = 0; i < 4; i++) {
1394                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1395                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1396                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1397                 }
1398                 break;
1399 
1400         case cvtdq2ps:
1401                 dummy.op = cvtsi2ss;
1402                 for (i = 0; i < 4; i++) {
1403                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1404                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1405                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1406                 }
1407                 break;
1408 
1409         case cvttps2dq:
1410                 dummy.op = cvttss2si;
1411                 for (i = 0; i < 4; i++) {
1412                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1413                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1414                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1415                 }
1416                 break;
1417 
1418         case cvtps2dq:
1419                 dummy.op = cvtss2si;
1420                 for (i = 0; i < 4; i++) {
1421                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1422                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1423                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1424                 }
1425                 break;
1426 
1427         case cvtpi2ps:
1428                 dummy.op = cvtsi2ss;
1429                 for (i = 0; i < 2; i++) {
1430                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1431                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1432                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1433                 }
1434                 break;
1435 
1436         case cvttps2pi:
1437                 dummy.op = cvttss2si;
1438                 for (i = 0; i < 2; i++) {
1439                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1440                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1441                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1442                 }
1443                 break;
1444 
1445         case cvtps2pi:
1446                 dummy.op = cvtss2si;
1447                 for (i = 0; i < 2; i++) {
1448                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1449                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1450                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1451                 }
1452                 break;
1453 
1454         case cmppd:
1455                 dummy.op = cmpsd;
1456                 dummy.imm = inst->imm;
1457                 for (i = 0; i < 2; i++) {
1458                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1459                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1460                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1461                 }
1462                 break;
1463 
1464         case minpd:
1465                 dummy.op = minsd;
1466                 for (i = 0; i < 2; i++) {
1467                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1468                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1469                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1470                 }
1471                 break;
1472 
1473         case maxpd:
1474                 dummy.op = maxsd;
1475                 for (i = 0; i < 2; i++) {
1476                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1477                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1478                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1479                 }
1480                 break;
1481 
1482         case addpd:
1483                 dummy.op = addsd;
1484                 for (i = 0; i < 2; i++) {
1485                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1486                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1487                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1488                 }
1489                 break;
1490 
1491         case subpd:
1492                 dummy.op = subsd;
1493                 for (i = 0; i < 2; i++) {
1494                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1495                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1496                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1497                 }
1498                 break;
1499 
1500         case mulpd:
1501                 dummy.op = mulsd;
1502                 for (i = 0; i < 2; i++) {
1503                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1504                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1505                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1506                 }
1507                 break;
1508 
1509         case divpd:
1510                 dummy.op = divsd;
1511                 for (i = 0; i < 2; i++) {
1512                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1513                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1514                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1515                 }
1516                 break;
1517 
1518         case sqrtpd:
1519                 dummy.op = sqrtsd;
1520                 for (i = 0; i < 2; i++) {
1521                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1522                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1523                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1524                 }
1525                 break;
1526 
1527         case cvtpi2pd:
1528         case cvtdq2pd:
1529                 dummy.op = cvtsi2sd;
1530                 for (i = 0; i < 2; i++) {
1531                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1532                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1533                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1534                 }
1535                 break;
1536 
1537         case cvttpd2pi:
1538         case cvttpd2dq:
1539                 dummy.op = cvttsd2si;
1540                 for (i = 0; i < 2; i++) {
1541                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1542                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1543                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1544                 }
1545                 /* for cvttpd2dq, zero the high 64 bits of the destination */
1546                 if (inst->op == cvttpd2dq)
1547                         inst->op1->l[1] = 0ll;
1548                 break;
1549 
1550         case cvtpd2pi:
1551         case cvtpd2dq:
1552                 dummy.op = cvtsd2si;
1553                 for (i = 0; i < 2; i++) {
1554                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1555                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1556                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1557                 }
1558                 /* for cvtpd2dq, zero the high 64 bits of the destination */
1559                 if (inst->op == cvtpd2dq)
1560                         inst->op1->l[1] = 0ll;
1561                 break;
1562 
1563         case cvtps2pd:
1564                 dummy.op = cvtss2sd;
1565                 for (i = 0; i < 2; i++) {
1566                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1567                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1568                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1569                 }
1570                 break;
1571 
1572         case cvtpd2ps:
1573                 dummy.op = cvtsd2ss;
1574                 for (i = 0; i < 2; i++) {
1575                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1576                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1577                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1578                 }
1579                 /* zero the high 64 bits of the destination */
1580                 inst->op1->l[1] = 0ll;
1581         }
1582 }