Print this page
11210 libm should be cstyle(1ONBLD) clean


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */

  25 /*
  26  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27  * Use is subject to license terms.
  28  */
  29 
  30 #include <ucontext.h>
  31 #include <fenv.h>
  32 #if defined(__SUNPRO_C)
  33 #include <sunmath.h>
  34 #else
  35 #include <sys/ieeefp.h>
  36 #endif
  37 #include "fex_handler.h"
  38 #include "fenv_inlines.h"
  39 
  40 #if !defined(REG_PC)
  41 #define REG_PC  EIP
  42 #endif
  43 
  44 #if !defined(REG_PS)
  45 #define REG_PS  EFL
  46 #endif
  47 
  48 #ifdef __amd64
  49 #define regno(X)        ((X < 4)? REG_RAX - X : \
  50                         ((X > 4)? REG_RAX + 1 - X : REG_RSP))
  51 #else
  52 #define regno(X)        (EAX - X)
  53 #endif
  54 
  55 /*
  56  * Support for SSE instructions
  57  */
  58 
  59 /*
  60  * Decode an SSE instruction.  Fill in *inst and return the length of the
  61  * instruction in bytes.  Return 0 if the instruction is not recognized.
  62  */
  63 int
  64 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst)
  65 {
  66         unsigned char   *ip;
  67         char            *addr;
  68         int             i, dbl, simd, rex, modrm, sib, r;
  69 
  70         i = 0;
  71         ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC];
  72 
  73         /* look for pseudo-prefixes */
  74         dbl = 0;
  75         simd = SIMD;

  76         if (ip[i] == 0xF3) {
  77                 simd = 0;
  78                 i++;
  79         } else if (ip[i] == 0x66) {
  80                 dbl = DOUBLE;
  81                 i++;
  82         } else if (ip[i] == 0xF2) {
  83                 dbl = DOUBLE;
  84                 simd = 0;
  85                 i++;
  86         }
  87 
  88         /* look for AMD64 REX prefix */
  89         rex = 0;

  90         if (ip[i] >= 0x40 && ip[i] <= 0x4F) {
  91                 rex = ip[i];
  92                 i++;
  93         }
  94 
  95         /* parse opcode */
  96         if (ip[i++] != 0x0F)
  97                 return 0;

  98         switch (ip[i++]) {
  99         case 0x2A:
 100                 inst->op = (int)cvtsi2ss + simd + dbl;

 101                 if (!simd)
 102                         inst->op = (int)inst->op + (rex & 8);

 103                 break;
 104 
 105         case 0x2C:
 106                 inst->op = (int)cvttss2si + simd + dbl;

 107                 if (!simd)
 108                         inst->op = (int)inst->op + (rex & 8);

 109                 break;
 110 
 111         case 0x2D:
 112                 inst->op = (int)cvtss2si + simd + dbl;

 113                 if (!simd)
 114                         inst->op = (int)inst->op + (rex & 8);

 115                 break;
 116 
 117         case 0x2E:

 118                 /* oddball: scalar instruction in a SIMD opcode group */
 119                 if (!simd)
 120                         return 0;

 121                 inst->op = (int)ucomiss + dbl;
 122                 break;
 123 
 124         case 0x2F:

 125                 /* oddball: scalar instruction in a SIMD opcode group */
 126                 if (!simd)
 127                         return 0;

 128                 inst->op = (int)comiss + dbl;
 129                 break;
 130 
 131         case 0x51:
 132                 inst->op = (int)sqrtss + simd + dbl;
 133                 break;
 134 
 135         case 0x58:
 136                 inst->op = (int)addss + simd + dbl;
 137                 break;
 138 
 139         case 0x59:
 140                 inst->op = (int)mulss + simd + dbl;
 141                 break;
 142 
 143         case 0x5A:
 144                 inst->op = (int)cvtss2sd + simd + dbl;
 145                 break;
 146 
 147         case 0x5B:

 148                 if (dbl) {
 149                         if (simd)
 150                                 inst->op = cvtps2dq;
 151                         else
 152                                 return 0;
 153                 } else {
 154                         inst->op = (simd)? cvtdq2ps : cvttps2dq;
 155                 }

 156                 break;
 157 
 158         case 0x5C:
 159                 inst->op = (int)subss + simd + dbl;
 160                 break;
 161 
 162         case 0x5D:
 163                 inst->op = (int)minss + simd + dbl;
 164                 break;
 165 
 166         case 0x5E:
 167                 inst->op = (int)divss + simd + dbl;
 168                 break;
 169 
 170         case 0x5F:
 171                 inst->op = (int)maxss + simd + dbl;
 172                 break;
 173 
 174         case 0xC2:
 175                 inst->op = (int)cmpss + simd + dbl;
 176                 break;
 177 
 178         case 0xE6:

 179                 if (simd) {
 180                         if (dbl)
 181                                 inst->op = cvttpd2dq;
 182                         else
 183                                 return 0;
 184                 } else {
 185                         inst->op = (dbl)? cvtpd2dq : cvtdq2pd;
 186                 }

 187                 break;
 188 
 189         default:
 190                 return 0;
 191         }
 192 
 193         /* locate operands */
 194         modrm = ip[i++];
 195 
 196         if (inst->op == cvtss2si || inst->op == cvttss2si ||
 197             inst->op == cvtsd2si || inst->op == cvttsd2si ||
 198             inst->op == cvtss2siq || inst->op == cvttss2siq ||
 199             inst->op == cvtsd2siq || inst->op == cvttsd2siq) {
 200                 /* op1 is a gp register */
 201                 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
 202                 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
 203         } else if (inst->op == cvtps2pi || inst->op == cvttps2pi ||
 204             inst->op == cvtpd2pi || inst->op == cvttpd2pi) {
 205                 /* op1 is a mmx register */
 206 #ifdef __amd64
 207                 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set.
 208                     fpchip_state.st[(modrm >> 3) & 7];

 209 #else
 210                 inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) +
 211                     (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
 212                     fpchip_state.state[7]);
 213 #endif
 214         } else {
 215                 /* op1 is a xmm register */
 216                 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
 217                 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
 218                     fp_reg_set.fpchip_state.xmm[r];

 219         }
 220 
 221         if ((modrm >> 6) == 3) {
 222                 if (inst->op == cvtsi2ss || inst->op == cvtsi2sd ||
 223                     inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) {
 224                         /* op2 is a gp register */
 225                         r = ((rex & 1) << 3) | (modrm & 7);
 226                         inst->op2 = (sseoperand_t *)&uap->uc_mcontext.
 227                             gregs[regno(r)];
 228                 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) {
 229                         /* op2 is a mmx register */
 230 #ifdef __amd64
 231                         inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
 232                             fp_reg_set.fpchip_state.st[modrm & 7];

 233 #else
 234                         inst->op2 = (sseoperand_t *)(10 * (modrm & 7) +
 235                             (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
 236                             fpchip_state.state[7]);
 237 #endif
 238                 } else {
 239                         /* op2 is a xmm register */
 240                         r = ((rex & 1) << 3) | (modrm & 7);
 241                         inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
 242                             fp_reg_set.fpchip_state.xmm[r];

 243                 }
 244         } else if ((modrm & 0xc7) == 0x05) {
 245 #ifdef __amd64
 246                 /* address of next instruction + offset */
 247                 r = i + 4;
 248                 if (inst->op == cmpss || inst->op == cmpps ||
 249                     inst->op == cmpsd || inst->op == cmppd)

 250                         r++;

 251                 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i));
 252 #else
 253                 /* absolute address */
 254                 inst->op2 = (sseoperand_t *)(*(int *)(ip + i));
 255 #endif
 256                 i += 4;
 257         } else {
 258                 /* complex address */
 259                 if ((modrm & 7) == 4) {
 260                         /* parse sib byte */
 261                         sib = ip[i++];

 262                         if ((sib & 7) == 5 && (modrm >> 6) == 0) {
 263                                 /* start with absolute address */
 264                                 addr = (char *)(uintptr_t)(*(int *)(ip + i));
 265                                 i += 4;
 266                         } else {
 267                                 /* start with base */
 268                                 r = ((rex & 1) << 3) | (sib & 7);
 269                                 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
 270                         }

 271                         r = ((rex & 2) << 2) | ((sib >> 3) & 7);

 272                         if (r != 4) {
 273                                 /* add scaled index */
 274                                 addr += uap->uc_mcontext.gregs[regno(r)]
 275                                     << (sib >> 6);
 276                         }
 277                 } else {
 278                         r = ((rex & 1) << 3) | (modrm & 7);
 279                         addr = (char *)uap->uc_mcontext.gregs[regno(r)];
 280                 }
 281 
 282                 /* add displacement, if any */
 283                 if ((modrm >> 6) == 1) {
 284                         addr += (char)ip[i++];
 285                 } else if ((modrm >> 6) == 2) {
 286                         addr += *(int *)(ip + i);
 287                         i += 4;
 288                 }

 289                 inst->op2 = (sseoperand_t *)addr;
 290         }
 291 
 292         if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd ||
 293             inst->op == cmppd) {
 294                 /* get the immediate operand */
 295                 inst->imm = ip[i++];
 296         }
 297 
 298         return i;
 299 }
 300 
 301 static enum fp_class_type
 302 my_fp_classf(float *x)
 303 {
 304         int     i = *(int *)x & ~0x80000000;
 305 
 306         if (i < 0x7f800000) {
 307                 if (i < 0x00800000)
 308                         return ((i == 0)? fp_zero : fp_subnormal);
 309                 return fp_normal;







 310         }
 311         else if (i == 0x7f800000)
 312                 return fp_infinity;
 313         else if (i & 0x400000)
 314                 return fp_quiet;
 315         else
 316                 return fp_signaling;
 317 }
 318 
 319 static enum fp_class_type
 320 my_fp_class(double *x)
 321 {
 322         int     i = *(1+(int *)x) & ~0x80000000;
 323 
 324         if (i < 0x7ff00000) {
 325                 if (i < 0x00100000)
 326                         return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal);
 327                 return fp_normal;








 328         }
 329         else if (i == 0x7ff00000 && *(int *)x == 0)
 330                 return fp_infinity;
 331         else if (i & 0x80000)
 332                 return fp_quiet;
 333         else
 334                 return fp_signaling;
 335 }
 336 
 337 /*
 338  * Inspect a scalar SSE instruction that incurred an invalid operation
 339  * exception to determine which type of exception it was.
 340  */
 341 static enum fex_exception
 342 __fex_get_sse_invalid_type(sseinst_t *inst)
 343 {
 344         enum fp_class_type      t1, t2;
 345 
 346         /* check op2 for signaling nan */
 347         t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) :
 348             my_fp_classf(&inst->op2->f[0]);

 349         if (t2 == fp_signaling)
 350                 return fex_inv_snan;
 351 
 352         /* eliminate all single-operand instructions */
 353         switch (inst->op) {
 354         case cvtsd2ss:
 355         case cvtss2sd:
 356                 /* hmm, this shouldn't have happened */
 357                 return (enum fex_exception) -1;
 358 
 359         case sqrtss:
 360         case sqrtsd:
 361                 return fex_inv_sqrt;
 362 
 363         case cvtss2si:
 364         case cvtsd2si:
 365         case cvttss2si:
 366         case cvttsd2si:
 367         case cvtss2siq:
 368         case cvtsd2siq:
 369         case cvttss2siq:
 370         case cvttsd2siq:
 371                 return fex_inv_int;
 372         default:
 373                 break;
 374         }
 375 
 376         /* check op1 for signaling nan */
 377         t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) :
 378             my_fp_classf(&inst->op1->f[0]);

 379         if (t1 == fp_signaling)
 380                 return fex_inv_snan;
 381 
 382         /* check two-operand instructions for other cases */
 383         switch (inst->op) {
 384         case cmpss:
 385         case cmpsd:
 386         case minss:
 387         case minsd:
 388         case maxss:
 389         case maxsd:
 390         case comiss:
 391         case comisd:
 392                 return fex_inv_cmp;
 393 
 394         case addss:
 395         case addsd:
 396         case subss:
 397         case subsd:

 398                 if (t1 == fp_infinity && t2 == fp_infinity)
 399                         return fex_inv_isi;

 400                 break;
 401 
 402         case mulss:
 403         case mulsd:
 404                 if ((t1 == fp_zero && t2 == fp_infinity) ||
 405                     (t2 == fp_zero && t1 == fp_infinity))
 406                         return fex_inv_zmi;


 407                 break;
 408 
 409         case divss:
 410         case divsd:

 411                 if (t1 == fp_zero && t2 == fp_zero)
 412                         return fex_inv_zdz;

 413                 if (t1 == fp_infinity && t2 == fp_infinity)
 414                         return fex_inv_idi;

 415         default:
 416                 break;
 417         }
 418 
 419         return (enum fex_exception)-1;
 420 }
 421 
 422 /* inline templates */
 423 extern void sse_cmpeqss(float *, float *, int *);
 424 extern void sse_cmpltss(float *, float *, int *);
 425 extern void sse_cmpless(float *, float *, int *);
 426 extern void sse_cmpunordss(float *, float *, int *);
 427 extern void sse_minss(float *, float *, float *);
 428 extern void sse_maxss(float *, float *, float *);
 429 extern void sse_addss(float *, float *, float *);
 430 extern void sse_subss(float *, float *, float *);
 431 extern void sse_mulss(float *, float *, float *);
 432 extern void sse_divss(float *, float *, float *);
 433 extern void sse_sqrtss(float *, float *);
 434 extern void sse_ucomiss(float *, float *);
 435 extern void sse_comiss(float *, float *);
 436 extern void sse_cvtss2sd(float *, double *);
 437 extern void sse_cvtsi2ss(int *, float *);
 438 extern void sse_cvttss2si(float *, int *);
 439 extern void sse_cvtss2si(float *, int *);

 440 #ifdef __amd64
 441 extern void sse_cvtsi2ssq(long long *, float *);
 442 extern void sse_cvttss2siq(float *, long long *);
 443 extern void sse_cvtss2siq(float *, long long *);
 444 #endif

 445 extern void sse_cmpeqsd(double *, double *, long long *);
 446 extern void sse_cmpltsd(double *, double *, long long *);
 447 extern void sse_cmplesd(double *, double *, long long *);
 448 extern void sse_cmpunordsd(double *, double *, long long *);
 449 extern void sse_minsd(double *, double *, double *);
 450 extern void sse_maxsd(double *, double *, double *);
 451 extern void sse_addsd(double *, double *, double *);
 452 extern void sse_subsd(double *, double *, double *);
 453 extern void sse_mulsd(double *, double *, double *);
 454 extern void sse_divsd(double *, double *, double *);
 455 extern void sse_sqrtsd(double *, double *);
 456 extern void sse_ucomisd(double *, double *);
 457 extern void sse_comisd(double *, double *);
 458 extern void sse_cvtsd2ss(double *, float *);
 459 extern void sse_cvtsi2sd(int *, double *);
 460 extern void sse_cvttsd2si(double *, int *);
 461 extern void sse_cvtsd2si(double *, int *);

 462 #ifdef __amd64
 463 extern void sse_cvtsi2sdq(long long *, double *);
 464 extern void sse_cvttsd2siq(double *, long long *);
 465 extern void sse_cvtsd2siq(double *, long long *);
 466 #endif
 467 
 468 /*
 469  * Fill in *info with the operands, default untrapped result, and
 470  * flags produced by a scalar SSE instruction, and return the type
 471  * of trapped exception (if any).  On entry, the mxcsr must have
 472  * all exceptions masked and all flags clear.  The same conditions
 473  * will hold on exit.
 474  *
 475  * This routine does not work if the instruction specified by *inst
 476  * is not a scalar instruction.
 477  */
 478 enum fex_exception
 479 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info)
 480 {
 481         unsigned int    e, te, mxcsr, oldmxcsr, subnorm;
 482 
 483         /*
 484          * Perform the operation with traps disabled and check the
 485          * exception flags.  If the underflow trap was enabled, also
 486          * check for an exact subnormal result.
 487          */
 488         __fenv_getmxcsr(&oldmxcsr);
 489         subnorm = 0;

 490         if ((int)inst->op & DOUBLE) {
 491                 if (inst->op == cvtsi2sd) {
 492                         info->op1.type = fex_int;
 493                         info->op1.val.i = inst->op2->i[0];
 494                         info->op2.type = fex_nodata;
 495                 } else if (inst->op == cvtsi2sdq) {
 496                         info->op1.type = fex_llong;
 497                         info->op1.val.l = inst->op2->l[0];
 498                         info->op2.type = fex_nodata;
 499                 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss ||
 500                     inst->op == cvttsd2si || inst->op == cvtsd2si ||
 501                     inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
 502                         info->op1.type = fex_double;
 503                         info->op1.val.d = inst->op2->d[0];
 504                         info->op2.type = fex_nodata;
 505                 } else {
 506                         info->op1.type = fex_double;
 507                         info->op1.val.d = inst->op1->d[0];
 508                         info->op2.type = fex_double;
 509                         info->op2.val.d = inst->op2->d[0];
 510                 }

 511                 info->res.type = fex_double;

 512                 switch (inst->op) {
 513                 case cmpsd:
 514                         info->op = fex_cmp;
 515                         info->res.type = fex_llong;

 516                         switch (inst->imm & 3) {
 517                         case 0:
 518                                 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d,
 519                                     &info->res.val.l);
 520                                 break;
 521 
 522                         case 1:
 523                                 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d,
 524                                     &info->res.val.l);
 525                                 break;
 526 
 527                         case 2:
 528                                 sse_cmplesd(&info->op1.val.d, &info->op2.val.d,
 529                                     &info->res.val.l);
 530                                 break;
 531 
 532                         case 3:
 533                                 sse_cmpunordsd(&info->op1.val.d,
 534                                     &info->op2.val.d, &info->res.val.l);
 535                         }

 536                         if (inst->imm & 4)
 537                                 info->res.val.l ^= 0xffffffffffffffffull;

 538                         break;
 539 
 540                 case minsd:
 541                         info->op = fex_other;
 542                         sse_minsd(&info->op1.val.d, &info->op2.val.d,
 543                             &info->res.val.d);
 544                         break;
 545 
 546                 case maxsd:
 547                         info->op = fex_other;
 548                         sse_maxsd(&info->op1.val.d, &info->op2.val.d,
 549                             &info->res.val.d);
 550                         break;
 551 
 552                 case addsd:
 553                         info->op = fex_add;
 554                         sse_addsd(&info->op1.val.d, &info->op2.val.d,
 555                             &info->res.val.d);

 556                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 557                                 subnorm = 1;

 558                         break;
 559 
 560                 case subsd:
 561                         info->op = fex_sub;
 562                         sse_subsd(&info->op1.val.d, &info->op2.val.d,
 563                             &info->res.val.d);

 564                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 565                                 subnorm = 1;

 566                         break;
 567 
 568                 case mulsd:
 569                         info->op = fex_mul;
 570                         sse_mulsd(&info->op1.val.d, &info->op2.val.d,
 571                             &info->res.val.d);

 572                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 573                                 subnorm = 1;

 574                         break;
 575 
 576                 case divsd:
 577                         info->op = fex_div;
 578                         sse_divsd(&info->op1.val.d, &info->op2.val.d,
 579                             &info->res.val.d);

 580                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 581                                 subnorm = 1;

 582                         break;
 583 
 584                 case sqrtsd:
 585                         info->op = fex_sqrt;
 586                         sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
 587                         break;
 588 
 589                 case cvtsd2ss:
 590                         info->op = fex_cnvt;
 591                         info->res.type = fex_float;
 592                         sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f);

 593                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 594                                 subnorm = 1;

 595                         break;
 596 
 597                 case cvtsi2sd:
 598                         info->op = fex_cnvt;
 599                         sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
 600                         break;
 601 
 602                 case cvttsd2si:
 603                         info->op = fex_cnvt;
 604                         info->res.type = fex_int;
 605                         sse_cvttsd2si(&info->op1.val.d, &info->res.val.i);
 606                         break;
 607 
 608                 case cvtsd2si:
 609                         info->op = fex_cnvt;
 610                         info->res.type = fex_int;
 611                         sse_cvtsd2si(&info->op1.val.d, &info->res.val.i);
 612                         break;
 613 
 614 #ifdef __amd64


 637                         break;
 638 
 639                 case comisd:
 640                         info->op = fex_cmp;
 641                         info->res.type = fex_nodata;
 642                         sse_comisd(&info->op1.val.d, &info->op2.val.d);
 643                         break;
 644                 default:
 645                         break;
 646                 }
 647         } else {
 648                 if (inst->op == cvtsi2ss) {
 649                         info->op1.type = fex_int;
 650                         info->op1.val.i = inst->op2->i[0];
 651                         info->op2.type = fex_nodata;
 652                 } else if (inst->op == cvtsi2ssq) {
 653                         info->op1.type = fex_llong;
 654                         info->op1.val.l = inst->op2->l[0];
 655                         info->op2.type = fex_nodata;
 656                 } else if (inst->op == sqrtss || inst->op == cvtss2sd ||
 657                     inst->op == cvttss2si || inst->op == cvtss2si ||
 658                     inst->op == cvttss2siq || inst->op == cvtss2siq) {
 659                         info->op1.type = fex_float;
 660                         info->op1.val.f = inst->op2->f[0];
 661                         info->op2.type = fex_nodata;
 662                 } else {
 663                         info->op1.type = fex_float;
 664                         info->op1.val.f = inst->op1->f[0];
 665                         info->op2.type = fex_float;
 666                         info->op2.val.f = inst->op2->f[0];
 667                 }

 668                 info->res.type = fex_float;

 669                 switch (inst->op) {
 670                 case cmpss:
 671                         info->op = fex_cmp;
 672                         info->res.type = fex_int;

 673                         switch (inst->imm & 3) {
 674                         case 0:
 675                                 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f,
 676                                     &info->res.val.i);
 677                                 break;
 678 
 679                         case 1:
 680                                 sse_cmpltss(&info->op1.val.f, &info->op2.val.f,
 681                                     &info->res.val.i);
 682                                 break;
 683 
 684                         case 2:
 685                                 sse_cmpless(&info->op1.val.f, &info->op2.val.f,
 686                                     &info->res.val.i);
 687                                 break;
 688 
 689                         case 3:
 690                                 sse_cmpunordss(&info->op1.val.f,
 691                                     &info->op2.val.f, &info->res.val.i);
 692                         }

 693                         if (inst->imm & 4)
 694                                 info->res.val.i ^= 0xffffffffu;

 695                         break;
 696 
 697                 case minss:
 698                         info->op = fex_other;
 699                         sse_minss(&info->op1.val.f, &info->op2.val.f,
 700                             &info->res.val.f);
 701                         break;
 702 
 703                 case maxss:
 704                         info->op = fex_other;
 705                         sse_maxss(&info->op1.val.f, &info->op2.val.f,
 706                             &info->res.val.f);
 707                         break;
 708 
 709                 case addss:
 710                         info->op = fex_add;
 711                         sse_addss(&info->op1.val.f, &info->op2.val.f,
 712                             &info->res.val.f);

 713                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 714                                 subnorm = 1;

 715                         break;
 716 
 717                 case subss:
 718                         info->op = fex_sub;
 719                         sse_subss(&info->op1.val.f, &info->op2.val.f,
 720                             &info->res.val.f);

 721                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 722                                 subnorm = 1;

 723                         break;
 724 
 725                 case mulss:
 726                         info->op = fex_mul;
 727                         sse_mulss(&info->op1.val.f, &info->op2.val.f,
 728                             &info->res.val.f);

 729                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 730                                 subnorm = 1;

 731                         break;
 732 
 733                 case divss:
 734                         info->op = fex_div;
 735                         sse_divss(&info->op1.val.f, &info->op2.val.f,
 736                             &info->res.val.f);

 737                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 738                                 subnorm = 1;

 739                         break;
 740 
 741                 case sqrtss:
 742                         info->op = fex_sqrt;
 743                         sse_sqrtss(&info->op1.val.f, &info->res.val.f);
 744                         break;
 745 
 746                 case cvtss2sd:
 747                         info->op = fex_cnvt;
 748                         info->res.type = fex_double;
 749                         sse_cvtss2sd(&info->op1.val.f, &info->res.val.d);
 750                         break;
 751 
 752                 case cvtsi2ss:
 753                         info->op = fex_cnvt;
 754                         sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f);
 755                         break;
 756 
 757                 case cvttss2si:
 758                         info->op = fex_cnvt;


 783                         info->res.type = fex_llong;
 784                         sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
 785                         break;
 786 #endif
 787 
 788                 case ucomiss:
 789                         info->op = fex_cmp;
 790                         info->res.type = fex_nodata;
 791                         sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
 792                         break;
 793 
 794                 case comiss:
 795                         info->op = fex_cmp;
 796                         info->res.type = fex_nodata;
 797                         sse_comiss(&info->op1.val.f, &info->op2.val.f);
 798                         break;
 799                 default:
 800                         break;
 801                 }
 802         }

 803         __fenv_getmxcsr(&mxcsr);
 804         info->flags = mxcsr & 0x3d;
 805         __fenv_setmxcsr(&oldmxcsr);
 806 
 807         /* determine which exception would have been trapped */
 808         te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr
 809             >> 7) & 0x3d;
 810         e = mxcsr & te;

 811         if (e & FE_INVALID)
 812                 return __fex_get_sse_invalid_type(inst);

 813         if (e & FE_DIVBYZERO)
 814                 return fex_division;

 815         if (e & FE_OVERFLOW)
 816                 return fex_overflow;

 817         if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
 818                 return fex_underflow;

 819         if (e & FE_INEXACT)
 820                 return fex_inexact;
 821         return (enum fex_exception)-1;

 822 }
 823 
 824 /*
 825  * Emulate a SIMD SSE instruction to determine which exceptions occur
 826  * in each part.  For i = 0, 1, 2, and 3, set e[i] to indicate the
 827  * trapped exception that would occur if the i-th part of the SIMD
 828  * instruction were executed in isolation; set e[i] to -1 if no
 829  * trapped exception would occur in this part.  Also fill in info[i]
 830  * with the corresponding operands, default untrapped result, and
 831  * flags.
 832  *
 833  * This routine does not work if the instruction specified by *inst
 834  * is not a SIMD instruction.
 835  */
 836 void
 837 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
 838     fex_info_t *info)
 839 {
 840         sseinst_t       dummy;
 841         int             i;
 842 
 843         e[0] = e[1] = e[2] = e[3] = -1;
 844 
 845         /* perform each part of the SIMD operation */
 846         switch (inst->op) {
 847         case cmpps:
 848                 dummy.op = cmpss;
 849                 dummy.imm = inst->imm;

 850                 for (i = 0; i < 4; i++) {
 851                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 852                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 853                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 854                 }

 855                 break;
 856 
 857         case minps:
 858                 dummy.op = minss;

 859                 for (i = 0; i < 4; i++) {
 860                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 861                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 862                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 863                 }

 864                 break;
 865 
 866         case maxps:
 867                 dummy.op = maxss;

 868                 for (i = 0; i < 4; i++) {
 869                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 870                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 871                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 872                 }

 873                 break;
 874 
 875         case addps:
 876                 dummy.op = addss;

 877                 for (i = 0; i < 4; i++) {
 878                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 879                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 880                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 881                 }

 882                 break;
 883 
 884         case subps:
 885                 dummy.op = subss;

 886                 for (i = 0; i < 4; i++) {
 887                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 888                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 889                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 890                 }

 891                 break;
 892 
 893         case mulps:
 894                 dummy.op = mulss;

 895                 for (i = 0; i < 4; i++) {
 896                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 897                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 898                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 899                 }

 900                 break;
 901 
 902         case divps:
 903                 dummy.op = divss;

 904                 for (i = 0; i < 4; i++) {
 905                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 906                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 907                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 908                 }

 909                 break;
 910 
 911         case sqrtps:
 912                 dummy.op = sqrtss;

 913                 for (i = 0; i < 4; i++) {
 914                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 915                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 916                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 917                 }

 918                 break;
 919 
 920         case cvtdq2ps:
 921                 dummy.op = cvtsi2ss;

 922                 for (i = 0; i < 4; i++) {
 923                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 924                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
 925                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 926                 }

 927                 break;
 928 
 929         case cvttps2dq:
 930                 dummy.op = cvttss2si;

 931                 for (i = 0; i < 4; i++) {
 932                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 933                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 934                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 935                 }

 936                 break;
 937 
 938         case cvtps2dq:
 939                 dummy.op = cvtss2si;

 940                 for (i = 0; i < 4; i++) {
 941                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 942                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 943                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 944                 }

 945                 break;
 946 
 947         case cvtpi2ps:
 948                 dummy.op = cvtsi2ss;

 949                 for (i = 0; i < 2; i++) {
 950                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 951                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
 952                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 953                 }

 954                 break;
 955 
 956         case cvttps2pi:
 957                 dummy.op = cvttss2si;

 958                 for (i = 0; i < 2; i++) {
 959                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 960                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 961                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 962                 }

 963                 break;
 964 
 965         case cvtps2pi:
 966                 dummy.op = cvtss2si;

 967                 for (i = 0; i < 2; i++) {
 968                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 969                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 970                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 971                 }

 972                 break;
 973 
 974         case cmppd:
 975                 dummy.op = cmpsd;
 976                 dummy.imm = inst->imm;

 977                 for (i = 0; i < 2; i++) {
 978                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
 979                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
 980                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 981                 }

 982                 break;
 983 
 984         case minpd:
 985                 dummy.op = minsd;

 986                 for (i = 0; i < 2; i++) {
 987                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
 988                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
 989                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 990                 }

 991                 break;
 992 
 993         case maxpd:
 994                 dummy.op = maxsd;

 995                 for (i = 0; i < 2; i++) {
 996                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
 997                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
 998                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 999                 }

1000                 break;
1001 
1002         case addpd:
1003                 dummy.op = addsd;

1004                 for (i = 0; i < 2; i++) {
1005                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1006                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1007                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1008                 }

1009                 break;
1010 
1011         case subpd:
1012                 dummy.op = subsd;

1013                 for (i = 0; i < 2; i++) {
1014                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1015                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1016                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1017                 }

1018                 break;
1019 
1020         case mulpd:
1021                 dummy.op = mulsd;

1022                 for (i = 0; i < 2; i++) {
1023                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1024                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1025                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1026                 }

1027                 break;
1028 
1029         case divpd:
1030                 dummy.op = divsd;

1031                 for (i = 0; i < 2; i++) {
1032                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1033                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1034                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1035                 }

1036                 break;
1037 
1038         case sqrtpd:
1039                 dummy.op = sqrtsd;

1040                 for (i = 0; i < 2; i++) {
1041                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1042                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1043                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1044                 }

1045                 break;
1046 
1047         case cvtpi2pd:
1048         case cvtdq2pd:
1049                 dummy.op = cvtsi2sd;

1050                 for (i = 0; i < 2; i++) {
1051                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1052                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1053                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1054                 }

1055                 break;
1056 
1057         case cvttpd2pi:
1058         case cvttpd2dq:
1059                 dummy.op = cvttsd2si;

1060                 for (i = 0; i < 2; i++) {
1061                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1062                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1063                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1064                 }

1065                 break;
1066 
1067         case cvtpd2pi:
1068         case cvtpd2dq:
1069                 dummy.op = cvtsd2si;

1070                 for (i = 0; i < 2; i++) {
1071                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1072                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1073                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1074                 }

1075                 break;
1076 
1077         case cvtps2pd:
1078                 dummy.op = cvtss2sd;

1079                 for (i = 0; i < 2; i++) {
1080                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1081                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1082                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1083                 }

1084                 break;
1085 
1086         case cvtpd2ps:
1087                 dummy.op = cvtsd2ss;

1088                 for (i = 0; i < 2; i++) {
1089                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1090                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1091                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1092                 }

1093         default:
1094                 break;
1095         }
1096 }
1097 
1098 /*
1099  * Store the result value from *info in the destination of the scalar
1100  * SSE instruction specified by *inst.  If no result is given but the
1101  * exception is underflow or overflow, supply the default trapped result.
1102  *
1103  * This routine does not work if the instruction specified by *inst
1104  * is not a scalar instruction.
1105  */
1106 void
1107 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1108     fex_info_t *info)
1109 {
1110         int             i = 0;
1111         long long       l = 0L;;
1112         float           f = 0.0, fscl;
1113         double          d = 0.0L, dscl;
1114 
1115         /* for compares that write eflags, just set the flags
1116            to indicate "unordered" */
1117         if (inst->op == ucomiss || inst->op == comiss ||
1118             inst->op == ucomisd || inst->op == comisd) {


1119                 uap->uc_mcontext.gregs[REG_PS] |= 0x45;
1120                 return;
1121         }
1122 
1123         /* if info doesn't specify a result value, try to generate
1124            the default trapped result */


1125         if (info->res.type == fex_nodata) {
1126                 /* set scale factors for exponent wrapping */
1127                 switch (e) {
1128                 case fex_overflow:
1129                         fscl = 1.262177448e-29f; /* 2^-96 */
1130                         dscl = 6.441148769597133308e-232; /* 2^-768 */
1131                         break;
1132 
1133                 case fex_underflow:
1134                         fscl = 7.922816251e+28f; /* 2^96 */
1135                         dscl = 1.552518092300708935e+231; /* 2^768 */
1136                         break;
1137 
1138                 default:
1139                         (void) __fex_get_sse_op(uap, inst, info);

1140                         if (info->res.type == fex_nodata)
1141                                 return;

1142                         goto stuff;
1143                 }
1144 
1145                 /* generate the wrapped result */
1146                 if (inst->op == cvtsd2ss) {
1147                         info->op1.type = fex_double;
1148                         info->op1.val.d = inst->op2->d[0];
1149                         info->op2.type = fex_nodata;
1150                         info->res.type = fex_float;
1151                         info->res.val.f = (float)(fscl * (fscl *
1152                             info->op1.val.d));
1153                 } else if ((int)inst->op & DOUBLE) {
1154                         info->op1.type = fex_double;
1155                         info->op1.val.d = inst->op1->d[0];
1156                         info->op2.type = fex_double;
1157                         info->op2.val.d = inst->op2->d[0];
1158                         info->res.type = fex_double;

1159                         switch (inst->op) {
1160                         case addsd:
1161                                 info->res.val.d = dscl * (dscl *
1162                                     info->op1.val.d + dscl * info->op2.val.d);
1163                                 break;
1164 
1165                         case subsd:
1166                                 info->res.val.d = dscl * (dscl *
1167                                     info->op1.val.d - dscl * info->op2.val.d);
1168                                 break;
1169 
1170                         case mulsd:
1171                                 info->res.val.d = (dscl * info->op1.val.d) *
1172                                     (dscl * info->op2.val.d);
1173                                 break;
1174 
1175                         case divsd:
1176                                 info->res.val.d = (dscl * info->op1.val.d) /
1177                                     (info->op2.val.d / dscl);
1178                                 break;
1179 
1180                         default:
1181                                 return;
1182                         }
1183                 } else {
1184                         info->op1.type = fex_float;
1185                         info->op1.val.f = inst->op1->f[0];
1186                         info->op2.type = fex_float;
1187                         info->op2.val.f = inst->op2->f[0];
1188                         info->res.type = fex_float;

1189                         switch (inst->op) {
1190                         case addss:
1191                                 info->res.val.f = fscl * (fscl *
1192                                     info->op1.val.f + fscl * info->op2.val.f);
1193                                 break;
1194 
1195                         case subss:
1196                                 info->res.val.f = fscl * (fscl *
1197                                     info->op1.val.f - fscl * info->op2.val.f);
1198                                 break;
1199 
1200                         case mulss:
1201                                 info->res.val.f = (fscl * info->op1.val.f) *
1202                                     (fscl * info->op2.val.f);
1203                                 break;
1204 
1205                         case divss:
1206                                 info->res.val.f = (fscl * info->op1.val.f) /
1207                                     (info->op2.val.f / fscl);
1208                                 break;
1209 
1210                         default:
1211                                 return;
1212                         }
1213                 }
1214         }
1215 
1216         /* put the result in the destination */
1217 stuff:
1218         if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si
1219             || inst->op == cvttsd2si || inst->op == cvtsd2si) {
1220                 switch (info->res.type) {
1221                 case fex_int:
1222                         i = info->res.val.i;
1223                         break;
1224 
1225                 case fex_llong:
1226                         i = info->res.val.l;
1227                         break;
1228 
1229                 case fex_float:
1230                         i = info->res.val.f;
1231                         break;
1232 
1233                 case fex_double:
1234                         i = info->res.val.d;
1235                         break;
1236 
1237                 case fex_ldouble:
1238                         i = info->res.val.q;
1239                         break;
1240 
1241                 default:
1242                         break;
1243                 }

1244                 inst->op1->i[0] = i;
1245         } else if (inst->op == cmpsd || inst->op == cvttss2siq ||
1246             inst->op == cvtss2siq || inst->op == cvttsd2siq ||
1247             inst->op == cvtsd2siq) {
1248                 switch (info->res.type) {
1249                 case fex_int:
1250                         l = info->res.val.i;
1251                         break;
1252 
1253                 case fex_llong:
1254                         l = info->res.val.l;
1255                         break;
1256 
1257                 case fex_float:
1258                         l = info->res.val.f;
1259                         break;
1260 
1261                 case fex_double:
1262                         l = info->res.val.d;
1263                         break;
1264 
1265                 case fex_ldouble:
1266                         l = info->res.val.q;
1267                         break;
1268 
1269                 default:
1270                         break;
1271                 }

1272                 inst->op1->l[0] = l;
1273         } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1274             inst->op == cvtss2sd) {
1275                 switch (info->res.type) {
1276                 case fex_int:
1277                         d = info->res.val.i;
1278                         break;
1279 
1280                 case fex_llong:
1281                         d = info->res.val.l;
1282                         break;
1283 
1284                 case fex_float:
1285                         d = info->res.val.f;
1286                         break;
1287 
1288                 case fex_double:
1289                         d = info->res.val.d;
1290                         break;
1291 
1292                 case fex_ldouble:
1293                         d = info->res.val.q;
1294                         break;
1295 
1296                 default:
1297                         break;
1298                 }

1299                 inst->op1->d[0] = d;
1300         } else {
1301                 switch (info->res.type) {
1302                 case fex_int:
1303                         f = info->res.val.i;
1304                         break;
1305 
1306                 case fex_llong:
1307                         f = info->res.val.l;
1308                         break;
1309 
1310                 case fex_float:
1311                         f = info->res.val.f;
1312                         break;
1313 
1314                 case fex_double:
1315                         f = info->res.val.d;
1316                         break;
1317 
1318                 case fex_ldouble:
1319                         f = info->res.val.q;
1320                         break;
1321 
1322                 default:
1323                         break;
1324                 }

1325                 inst->op1->f[0] = f;
1326         }
1327 }
1328 
1329 /*
1330  * Store the results from a SIMD instruction.  For each i, store
1331  * the result value from info[i] in the i-th part of the destination
1332  * of the SIMD SSE instruction specified by *inst.  If no result
1333  * is given but the exception indicated by e[i] is underflow or
1334  * overflow, supply the default trapped result.
1335  *
1336  * This routine does not work if the instruction specified by *inst
1337  * is not a SIMD instruction.
1338  */
1339 void
1340 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1341     fex_info_t *info)
1342 {
1343         sseinst_t       dummy;
1344         int             i;
1345 
1346         /* store each part */
1347         switch (inst->op) {
1348         case cmpps:
1349                 dummy.op = cmpss;
1350                 dummy.imm = inst->imm;

1351                 for (i = 0; i < 4; i++) {
1352                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1353                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1354                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1355                 }

1356                 break;
1357 
1358         case minps:
1359                 dummy.op = minss;

1360                 for (i = 0; i < 4; i++) {
1361                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1362                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1363                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1364                 }

1365                 break;
1366 
1367         case maxps:
1368                 dummy.op = maxss;

1369                 for (i = 0; i < 4; i++) {
1370                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1371                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1372                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1373                 }

1374                 break;
1375 
1376         case addps:
1377                 dummy.op = addss;

1378                 for (i = 0; i < 4; i++) {
1379                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1380                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1381                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1382                 }

1383                 break;
1384 
1385         case subps:
1386                 dummy.op = subss;

1387                 for (i = 0; i < 4; i++) {
1388                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1389                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1390                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1391                 }

1392                 break;
1393 
1394         case mulps:
1395                 dummy.op = mulss;

1396                 for (i = 0; i < 4; i++) {
1397                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1398                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1399                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1400                 }

1401                 break;
1402 
1403         case divps:
1404                 dummy.op = divss;

1405                 for (i = 0; i < 4; i++) {
1406                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1407                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1408                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1409                 }

1410                 break;
1411 
1412         case sqrtps:
1413                 dummy.op = sqrtss;

1414                 for (i = 0; i < 4; i++) {
1415                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1416                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1417                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1418                 }

1419                 break;
1420 
1421         case cvtdq2ps:
1422                 dummy.op = cvtsi2ss;

1423                 for (i = 0; i < 4; i++) {
1424                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1425                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1426                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1427                 }

1428                 break;
1429 
1430         case cvttps2dq:
1431                 dummy.op = cvttss2si;

1432                 for (i = 0; i < 4; i++) {
1433                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1434                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1435                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1436                 }

1437                 break;
1438 
1439         case cvtps2dq:
1440                 dummy.op = cvtss2si;

1441                 for (i = 0; i < 4; i++) {
1442                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1443                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1444                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1445                 }

1446                 break;
1447 
1448         case cvtpi2ps:
1449                 dummy.op = cvtsi2ss;

1450                 for (i = 0; i < 2; i++) {
1451                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1452                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1453                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1454                 }

1455                 break;
1456 
1457         case cvttps2pi:
1458                 dummy.op = cvttss2si;

1459                 for (i = 0; i < 2; i++) {
1460                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1461                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1462                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1463                 }

1464                 break;
1465 
1466         case cvtps2pi:
1467                 dummy.op = cvtss2si;

1468                 for (i = 0; i < 2; i++) {
1469                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1470                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1471                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1472                 }

1473                 break;
1474 
1475         case cmppd:
1476                 dummy.op = cmpsd;
1477                 dummy.imm = inst->imm;

1478                 for (i = 0; i < 2; i++) {
1479                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1480                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1481                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1482                 }

1483                 break;
1484 
1485         case minpd:
1486                 dummy.op = minsd;

1487                 for (i = 0; i < 2; i++) {
1488                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1489                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1490                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1491                 }

1492                 break;
1493 
1494         case maxpd:
1495                 dummy.op = maxsd;

1496                 for (i = 0; i < 2; i++) {
1497                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1498                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1499                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1500                 }

1501                 break;
1502 
1503         case addpd:
1504                 dummy.op = addsd;

1505                 for (i = 0; i < 2; i++) {
1506                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1507                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1508                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1509                 }

1510                 break;
1511 
1512         case subpd:
1513                 dummy.op = subsd;

1514                 for (i = 0; i < 2; i++) {
1515                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1516                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1517                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1518                 }

1519                 break;
1520 
1521         case mulpd:
1522                 dummy.op = mulsd;

1523                 for (i = 0; i < 2; i++) {
1524                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1525                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1526                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1527                 }

1528                 break;
1529 
1530         case divpd:
1531                 dummy.op = divsd;

1532                 for (i = 0; i < 2; i++) {
1533                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1534                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1535                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1536                 }

1537                 break;
1538 
1539         case sqrtpd:
1540                 dummy.op = sqrtsd;

1541                 for (i = 0; i < 2; i++) {
1542                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1543                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1544                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1545                 }

1546                 break;
1547 
1548         case cvtpi2pd:
1549         case cvtdq2pd:
1550                 dummy.op = cvtsi2sd;

1551                 for (i = 0; i < 2; i++) {
1552                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1553                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1554                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1555                 }

1556                 break;
1557 
1558         case cvttpd2pi:
1559         case cvttpd2dq:
1560                 dummy.op = cvttsd2si;

1561                 for (i = 0; i < 2; i++) {
1562                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1563                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1564                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1565                 }

1566                 /* for cvttpd2dq, zero the high 64 bits of the destination */
1567                 if (inst->op == cvttpd2dq)
1568                         inst->op1->l[1] = 0ll;

1569                 break;
1570 
1571         case cvtpd2pi:
1572         case cvtpd2dq:
1573                 dummy.op = cvtsd2si;

1574                 for (i = 0; i < 2; i++) {
1575                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1576                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1577                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1578                 }

1579                 /* for cvtpd2dq, zero the high 64 bits of the destination */
1580                 if (inst->op == cvtpd2dq)
1581                         inst->op1->l[1] = 0ll;

1582                 break;
1583 
1584         case cvtps2pd:
1585                 dummy.op = cvtss2sd;

1586                 for (i = 0; i < 2; i++) {
1587                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1588                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1589                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1590                 }

1591                 break;
1592 
1593         case cvtpd2ps:
1594                 dummy.op = cvtsd2ss;

1595                 for (i = 0; i < 2; i++) {
1596                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1597                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1598                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1599                 }

1600                 /* zero the high 64 bits of the destination */
1601                 inst->op1->l[1] = 0ll;
1602 
1603         default:
1604                 break;
1605         }
1606 }
1607 


   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 
  22 /*
  23  * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24  */
  25 
  26 /*
  27  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  28  * Use is subject to license terms.
  29  */
  30 
  31 #include <ucontext.h>
  32 #include <fenv.h>
  33 #if defined(__SUNPRO_C)
  34 #include <sunmath.h>
  35 #else
  36 #include <sys/ieeefp.h>
  37 #endif
  38 #include "fex_handler.h"
  39 #include "fenv_inlines.h"
  40 
  41 #if !defined(REG_PC)
  42 #define REG_PC          EIP
  43 #endif
  44 
  45 #if !defined(REG_PS)
  46 #define REG_PS          EFL
  47 #endif
  48 
  49 #ifdef __amd64
  50 #define regno(X)        ((X < 4) ? REG_RAX - X : ((X > 4) ? REG_RAX + 1 - X : \
  51         REG_RSP))
  52 #else
  53 #define regno(X)        (EAX - X)
  54 #endif
  55 
  56 /*
  57  * Support for SSE instructions
  58  */
  59 
  60 /*
  61  * Decode an SSE instruction.  Fill in *inst and return the length of the
  62  * instruction in bytes.  Return 0 if the instruction is not recognized.
  63  */
  64 int
  65 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst)
  66 {
  67         unsigned char *ip;
  68         char *addr;
  69         int i, dbl, simd, rex, modrm, sib, r;
  70 
  71         i = 0;
  72         ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC];
  73 
  74         /* look for pseudo-prefixes */
  75         dbl = 0;
  76         simd = SIMD;
  77 
  78         if (ip[i] == 0xF3) {
  79                 simd = 0;
  80                 i++;
  81         } else if (ip[i] == 0x66) {
  82                 dbl = DOUBLE;
  83                 i++;
  84         } else if (ip[i] == 0xF2) {
  85                 dbl = DOUBLE;
  86                 simd = 0;
  87                 i++;
  88         }
  89 
  90         /* look for AMD64 REX prefix */
  91         rex = 0;
  92 
  93         if (ip[i] >= 0x40 && ip[i] <= 0x4F) {
  94                 rex = ip[i];
  95                 i++;
  96         }
  97 
  98         /* parse opcode */
  99         if (ip[i++] != 0x0F)
 100                 return (0);
 101 
 102         switch (ip[i++]) {
 103         case 0x2A:
 104                 inst->op = (int)cvtsi2ss + simd + dbl;
 105 
 106                 if (!simd)
 107                         inst->op = (int)inst->op + (rex & 8);
 108 
 109                 break;
 110 
 111         case 0x2C:
 112                 inst->op = (int)cvttss2si + simd + dbl;
 113 
 114                 if (!simd)
 115                         inst->op = (int)inst->op + (rex & 8);
 116 
 117                 break;
 118 
 119         case 0x2D:
 120                 inst->op = (int)cvtss2si + simd + dbl;
 121 
 122                 if (!simd)
 123                         inst->op = (int)inst->op + (rex & 8);
 124 
 125                 break;
 126 
 127         case 0x2E:
 128 
 129                 /* oddball: scalar instruction in a SIMD opcode group */
 130                 if (!simd)
 131                         return (0);
 132 
 133                 inst->op = (int)ucomiss + dbl;
 134                 break;
 135 
 136         case 0x2F:
 137 
 138                 /* oddball: scalar instruction in a SIMD opcode group */
 139                 if (!simd)
 140                         return (0);
 141 
 142                 inst->op = (int)comiss + dbl;
 143                 break;
 144 
 145         case 0x51:
 146                 inst->op = (int)sqrtss + simd + dbl;
 147                 break;
 148 
 149         case 0x58:
 150                 inst->op = (int)addss + simd + dbl;
 151                 break;
 152 
 153         case 0x59:
 154                 inst->op = (int)mulss + simd + dbl;
 155                 break;
 156 
 157         case 0x5A:
 158                 inst->op = (int)cvtss2sd + simd + dbl;
 159                 break;
 160 
 161         case 0x5B:
 162 
 163                 if (dbl) {
 164                         if (simd)
 165                                 inst->op = cvtps2dq;
 166                         else
 167                                 return (0);
 168                 } else {
 169                         inst->op = (simd) ? cvtdq2ps : cvttps2dq;
 170                 }
 171 
 172                 break;
 173 
 174         case 0x5C:
 175                 inst->op = (int)subss + simd + dbl;
 176                 break;
 177 
 178         case 0x5D:
 179                 inst->op = (int)minss + simd + dbl;
 180                 break;
 181 
 182         case 0x5E:
 183                 inst->op = (int)divss + simd + dbl;
 184                 break;
 185 
 186         case 0x5F:
 187                 inst->op = (int)maxss + simd + dbl;
 188                 break;
 189 
 190         case 0xC2:
 191                 inst->op = (int)cmpss + simd + dbl;
 192                 break;
 193 
 194         case 0xE6:
 195 
 196                 if (simd) {
 197                         if (dbl)
 198                                 inst->op = cvttpd2dq;
 199                         else
 200                                 return (0);
 201                 } else {
 202                         inst->op = (dbl) ? cvtpd2dq : cvtdq2pd;
 203                 }
 204 
 205                 break;
 206 
 207         default:
 208                 return (0);
 209         }
 210 
 211         /* locate operands */
 212         modrm = ip[i++];
 213 
 214         if (inst->op == cvtss2si || inst->op == cvttss2si || inst->op ==
 215             cvtsd2si || inst->op == cvttsd2si || inst->op == cvtss2siq ||
 216             inst->op == cvttss2siq || inst->op == cvtsd2siq || inst->op ==
 217             cvttsd2siq) {
 218                 /* op1 is a gp register */
 219                 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
 220                 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
 221         } else if (inst->op == cvtps2pi || inst->op == cvttps2pi || inst->op ==
 222             cvtpd2pi || inst->op == cvttpd2pi) {
 223                 /* op1 is a mmx register */
 224 #ifdef __amd64
 225                 inst->op1 = (sseoperand_t *)
 226                     &uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state
 227                     .st[(modrm >> 3) & 7];
 228 #else
 229                 inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) +
 230                     (char *)&uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state
 231                     .state[7]);
 232 #endif
 233         } else {
 234                 /* op1 is a xmm register */
 235                 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
 236                 inst->op1 =
 237                     (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set
 238                     .fpchip_state.xmm[r];
 239         }
 240 
 241         if ((modrm >> 6) == 3) {
 242                 if (inst->op == cvtsi2ss || inst->op == cvtsi2sd || inst->op ==
 243                     cvtsi2ssq || inst->op == cvtsi2sdq) {
 244                         /* op2 is a gp register */
 245                         r = ((rex & 1) << 3) | (modrm & 7);
 246                         inst->op2 =
 247                             (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
 248                 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) {
 249                         /* op2 is a mmx register */
 250 #ifdef __amd64
 251                         inst->op2 =
 252                             (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set
 253                             .fpchip_state.st[modrm & 7];
 254 #else
 255                         inst->op2 = (sseoperand_t *)(10 * (modrm & 7) +
 256                             (char *)&uap->uc_mcontext.fpregs.fp_reg_set
 257                             .fpchip_state.state[7]);
 258 #endif
 259                 } else {
 260                         /* op2 is a xmm register */
 261                         r = ((rex & 1) << 3) | (modrm & 7);
 262                         inst->op2 =
 263                             (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set
 264                             .fpchip_state.xmm[r];
 265                 }
 266         } else if ((modrm & 0xc7) == 0x05) {
 267 #ifdef __amd64
 268                 /* address of next instruction + offset */
 269                 r = i + 4;
 270 
 271                 if (inst->op == cmpss || inst->op == cmpps || inst->op ==
 272                     cmpsd || inst->op == cmppd)
 273                         r++;
 274 
 275                 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i));
 276 #else
 277                 /* absolute address */
 278                 inst->op2 = (sseoperand_t *)(*(int *)(ip + i));
 279 #endif
 280                 i += 4;
 281         } else {
 282                 /* complex address */
 283                 if ((modrm & 7) == 4) {
 284                         /* parse sib byte */
 285                         sib = ip[i++];
 286 
 287                         if ((sib & 7) == 5 && (modrm >> 6) == 0) {
 288                                 /* start with absolute address */
 289                                 addr = (char *)(uintptr_t)(*(int *)(ip + i));
 290                                 i += 4;
 291                         } else {
 292                                 /* start with base */
 293                                 r = ((rex & 1) << 3) | (sib & 7);
 294                                 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
 295                         }
 296 
 297                         r = ((rex & 2) << 2) | ((sib >> 3) & 7);
 298 
 299                         if (r != 4) {
 300                                 /* add scaled index */
 301                                 addr += uap->uc_mcontext.gregs[regno(r)] <<
 302                                     (sib >> 6);
 303                         }
 304                 } else {
 305                         r = ((rex & 1) << 3) | (modrm & 7);
 306                         addr = (char *)uap->uc_mcontext.gregs[regno(r)];
 307                 }
 308 
 309                 /* add displacement, if any */
 310                 if ((modrm >> 6) == 1) {
 311                         addr += (char)ip[i++];
 312                 } else if ((modrm >> 6) == 2) {
 313                         addr += *(int *)(ip + i);
 314                         i += 4;
 315                 }
 316 
 317                 inst->op2 = (sseoperand_t *)addr;
 318         }
 319 
 320         if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd ||
 321             inst->op == cmppd) {
 322                 /* get the immediate operand */
 323                 inst->imm = ip[i++];
 324         }
 325 
 326         return (i);
 327 }
 328 
 329 static enum fp_class_type
 330 my_fp_classf(float *x)
 331 {
 332         int i = *(int *)x & ~0x80000000;
 333 
 334         if (i < 0x7f800000) {
 335                 if (i < 0x00800000)
 336                         return ((i == 0) ? fp_zero : fp_subnormal);
 337 
 338                 return (fp_normal);
 339         } else if (i == 0x7f800000) {
 340                 return (fp_infinity);
 341         } else if (i & 0x400000) {
 342                 return (fp_quiet);
 343         } else {
 344                 return (fp_signaling);
 345         }






 346 }
 347 
 348 static enum fp_class_type
 349 my_fp_class(double *x)
 350 {
 351         int i = *(1 + (int *)x) & ~0x80000000;
 352 
 353         if (i < 0x7ff00000) {
 354                 if (i < 0x00100000)
 355                         return (((i | *(int *)x) == 0) ? fp_zero :
 356                             fp_subnormal);
 357 
 358                 return (fp_normal);
 359         } else if (i == 0x7ff00000 && *(int *)x == 0) {
 360                 return (fp_infinity);
 361         } else if (i & 0x80000) {
 362                 return (fp_quiet);
 363         } else {
 364                 return (fp_signaling);
 365         }






 366 }
 367 
 368 /*
 369  * Inspect a scalar SSE instruction that incurred an invalid operation
 370  * exception to determine which type of exception it was.
 371  */
 372 static enum fex_exception
 373 __fex_get_sse_invalid_type(sseinst_t *inst)
 374 {
 375         enum fp_class_type t1, t2;
 376 
 377         /* check op2 for signaling nan */
 378         t2 = ((int)inst->op & DOUBLE) ? my_fp_class(&inst->op2->d[0]) :
 379             my_fp_classf(&inst->op2->f[0]);
 380 
 381         if (t2 == fp_signaling)
 382                 return (fex_inv_snan);
 383 
 384         /* eliminate all single-operand instructions */
 385         switch (inst->op) {
 386         case cvtsd2ss:
 387         case cvtss2sd:
 388                 /* hmm, this shouldn't have happened */
 389                 return ((enum fex_exception)-1);
 390 
 391         case sqrtss:
 392         case sqrtsd:
 393                 return (fex_inv_sqrt);
 394 
 395         case cvtss2si:
 396         case cvtsd2si:
 397         case cvttss2si:
 398         case cvttsd2si:
 399         case cvtss2siq:
 400         case cvtsd2siq:
 401         case cvttss2siq:
 402         case cvttsd2siq:
 403                 return (fex_inv_int);
 404         default:
 405                 break;
 406         }
 407 
 408         /* check op1 for signaling nan */
 409         t1 = ((int)inst->op & DOUBLE) ? my_fp_class(&inst->op1->d[0]) :
 410             my_fp_classf(&inst->op1->f[0]);
 411 
 412         if (t1 == fp_signaling)
 413                 return (fex_inv_snan);
 414 
 415         /* check two-operand instructions for other cases */
 416         switch (inst->op) {
 417         case cmpss:
 418         case cmpsd:
 419         case minss:
 420         case minsd:
 421         case maxss:
 422         case maxsd:
 423         case comiss:
 424         case comisd:
 425                 return (fex_inv_cmp);
 426 
 427         case addss:
 428         case addsd:
 429         case subss:
 430         case subsd:
 431 
 432                 if (t1 == fp_infinity && t2 == fp_infinity)
 433                         return (fex_inv_isi);
 434 
 435                 break;
 436 
 437         case mulss:
 438         case mulsd:
 439 
 440                 if ((t1 == fp_zero && t2 == fp_infinity) || (t2 == fp_zero &&
 441                     t1 == fp_infinity))
 442                         return (fex_inv_zmi);
 443 
 444                 break;
 445 
 446         case divss:
 447         case divsd:
 448 
 449                 if (t1 == fp_zero && t2 == fp_zero)
 450                         return (fex_inv_zdz);
 451 
 452                 if (t1 == fp_infinity && t2 == fp_infinity)
 453                         return (fex_inv_idi);
 454 
 455         default:
 456                 break;
 457         }
 458 
 459         return ((enum fex_exception)-1);
 460 }
 461 
 462 /* inline templates */
 463 extern void sse_cmpeqss(float *, float *, int *);
 464 extern void sse_cmpltss(float *, float *, int *);
 465 extern void sse_cmpless(float *, float *, int *);
 466 extern void sse_cmpunordss(float *, float *, int *);
 467 extern void sse_minss(float *, float *, float *);
 468 extern void sse_maxss(float *, float *, float *);
 469 extern void sse_addss(float *, float *, float *);
 470 extern void sse_subss(float *, float *, float *);
 471 extern void sse_mulss(float *, float *, float *);
 472 extern void sse_divss(float *, float *, float *);
 473 extern void sse_sqrtss(float *, float *);
 474 extern void sse_ucomiss(float *, float *);
 475 extern void sse_comiss(float *, float *);
 476 extern void sse_cvtss2sd(float *, double *);
 477 extern void sse_cvtsi2ss(int *, float *);
 478 extern void sse_cvttss2si(float *, int *);
 479 extern void sse_cvtss2si(float *, int *);
 480 
 481 #ifdef __amd64
 482 extern void sse_cvtsi2ssq(long long *, float *);
 483 extern void sse_cvttss2siq(float *, long long *);
 484 extern void sse_cvtss2siq(float *, long long *);
 485 #endif
 486 
 487 extern void sse_cmpeqsd(double *, double *, long long *);
 488 extern void sse_cmpltsd(double *, double *, long long *);
 489 extern void sse_cmplesd(double *, double *, long long *);
 490 extern void sse_cmpunordsd(double *, double *, long long *);
 491 extern void sse_minsd(double *, double *, double *);
 492 extern void sse_maxsd(double *, double *, double *);
 493 extern void sse_addsd(double *, double *, double *);
 494 extern void sse_subsd(double *, double *, double *);
 495 extern void sse_mulsd(double *, double *, double *);
 496 extern void sse_divsd(double *, double *, double *);
 497 extern void sse_sqrtsd(double *, double *);
 498 extern void sse_ucomisd(double *, double *);
 499 extern void sse_comisd(double *, double *);
 500 extern void sse_cvtsd2ss(double *, float *);
 501 extern void sse_cvtsi2sd(int *, double *);
 502 extern void sse_cvttsd2si(double *, int *);
 503 extern void sse_cvtsd2si(double *, int *);
 504 
 505 #ifdef __amd64
 506 extern void sse_cvtsi2sdq(long long *, double *);
 507 extern void sse_cvttsd2siq(double *, long long *);
 508 extern void sse_cvtsd2siq(double *, long long *);
 509 #endif
 510 
 511 /*
 512  * Fill in *info with the operands, default untrapped result, and
 513  * flags produced by a scalar SSE instruction, and return the type
 514  * of trapped exception (if any).  On entry, the mxcsr must have
 515  * all exceptions masked and all flags clear.  The same conditions
 516  * will hold on exit.
 517  *
 518  * This routine does not work if the instruction specified by *inst
 519  * is not a scalar instruction.
 520  */
 521 enum fex_exception
 522 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info)
 523 {
 524         unsigned int e, te, mxcsr, oldmxcsr, subnorm;
 525 
 526         /*
 527          * Perform the operation with traps disabled and check the
 528          * exception flags.  If the underflow trap was enabled, also
 529          * check for an exact subnormal result.
 530          */
 531         __fenv_getmxcsr(&oldmxcsr);
 532         subnorm = 0;
 533 
 534         if ((int)inst->op & DOUBLE) {
 535                 if (inst->op == cvtsi2sd) {
 536                         info->op1.type = fex_int;
 537                         info->op1.val.i = inst->op2->i[0];
 538                         info->op2.type = fex_nodata;
 539                 } else if (inst->op == cvtsi2sdq) {
 540                         info->op1.type = fex_llong;
 541                         info->op1.val.l = inst->op2->l[0];
 542                         info->op2.type = fex_nodata;
 543                 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss ||
 544                     inst->op == cvttsd2si || inst->op == cvtsd2si || inst->op ==
 545                     cvttsd2siq || inst->op == cvtsd2siq) {
 546                         info->op1.type = fex_double;
 547                         info->op1.val.d = inst->op2->d[0];
 548                         info->op2.type = fex_nodata;
 549                 } else {
 550                         info->op1.type = fex_double;
 551                         info->op1.val.d = inst->op1->d[0];
 552                         info->op2.type = fex_double;
 553                         info->op2.val.d = inst->op2->d[0];
 554                 }
 555 
 556                 info->res.type = fex_double;
 557 
 558                 switch (inst->op) {
 559                 case cmpsd:
 560                         info->op = fex_cmp;
 561                         info->res.type = fex_llong;
 562 
 563                         switch (inst->imm & 3) {
 564                         case 0:
 565                                 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d,
 566                                     &info->res.val.l);
 567                                 break;
 568 
 569                         case 1:
 570                                 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d,
 571                                     &info->res.val.l);
 572                                 break;
 573 
 574                         case 2:
 575                                 sse_cmplesd(&info->op1.val.d, &info->op2.val.d,
 576                                     &info->res.val.l);
 577                                 break;
 578 
 579                         case 3:
 580                                 sse_cmpunordsd(&info->op1.val.d,
 581                                     &info->op2.val.d, &info->res.val.l);
 582                         }
 583 
 584                         if (inst->imm & 4)
 585                                 info->res.val.l ^= 0xffffffffffffffffull;
 586 
 587                         break;
 588 
 589                 case minsd:
 590                         info->op = fex_other;
 591                         sse_minsd(&info->op1.val.d, &info->op2.val.d,
 592                             &info->res.val.d);
 593                         break;
 594 
 595                 case maxsd:
 596                         info->op = fex_other;
 597                         sse_maxsd(&info->op1.val.d, &info->op2.val.d,
 598                             &info->res.val.d);
 599                         break;
 600 
 601                 case addsd:
 602                         info->op = fex_add;
 603                         sse_addsd(&info->op1.val.d, &info->op2.val.d,
 604                             &info->res.val.d);
 605 
 606                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 607                                 subnorm = 1;
 608 
 609                         break;
 610 
 611                 case subsd:
 612                         info->op = fex_sub;
 613                         sse_subsd(&info->op1.val.d, &info->op2.val.d,
 614                             &info->res.val.d);
 615 
 616                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 617                                 subnorm = 1;
 618 
 619                         break;
 620 
 621                 case mulsd:
 622                         info->op = fex_mul;
 623                         sse_mulsd(&info->op1.val.d, &info->op2.val.d,
 624                             &info->res.val.d);
 625 
 626                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 627                                 subnorm = 1;
 628 
 629                         break;
 630 
 631                 case divsd:
 632                         info->op = fex_div;
 633                         sse_divsd(&info->op1.val.d, &info->op2.val.d,
 634                             &info->res.val.d);
 635 
 636                         if (my_fp_class(&info->res.val.d) == fp_subnormal)
 637                                 subnorm = 1;
 638 
 639                         break;
 640 
 641                 case sqrtsd:
 642                         info->op = fex_sqrt;
 643                         sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
 644                         break;
 645 
 646                 case cvtsd2ss:
 647                         info->op = fex_cnvt;
 648                         info->res.type = fex_float;
 649                         sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f);
 650 
 651                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 652                                 subnorm = 1;
 653 
 654                         break;
 655 
 656                 case cvtsi2sd:
 657                         info->op = fex_cnvt;
 658                         sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
 659                         break;
 660 
 661                 case cvttsd2si:
 662                         info->op = fex_cnvt;
 663                         info->res.type = fex_int;
 664                         sse_cvttsd2si(&info->op1.val.d, &info->res.val.i);
 665                         break;
 666 
 667                 case cvtsd2si:
 668                         info->op = fex_cnvt;
 669                         info->res.type = fex_int;
 670                         sse_cvtsd2si(&info->op1.val.d, &info->res.val.i);
 671                         break;
 672 
 673 #ifdef __amd64


 696                         break;
 697 
 698                 case comisd:
 699                         info->op = fex_cmp;
 700                         info->res.type = fex_nodata;
 701                         sse_comisd(&info->op1.val.d, &info->op2.val.d);
 702                         break;
 703                 default:
 704                         break;
 705                 }
 706         } else {
 707                 if (inst->op == cvtsi2ss) {
 708                         info->op1.type = fex_int;
 709                         info->op1.val.i = inst->op2->i[0];
 710                         info->op2.type = fex_nodata;
 711                 } else if (inst->op == cvtsi2ssq) {
 712                         info->op1.type = fex_llong;
 713                         info->op1.val.l = inst->op2->l[0];
 714                         info->op2.type = fex_nodata;
 715                 } else if (inst->op == sqrtss || inst->op == cvtss2sd ||
 716                     inst->op == cvttss2si || inst->op == cvtss2si || inst->op ==
 717                     cvttss2siq || inst->op == cvtss2siq) {
 718                         info->op1.type = fex_float;
 719                         info->op1.val.f = inst->op2->f[0];
 720                         info->op2.type = fex_nodata;
 721                 } else {
 722                         info->op1.type = fex_float;
 723                         info->op1.val.f = inst->op1->f[0];
 724                         info->op2.type = fex_float;
 725                         info->op2.val.f = inst->op2->f[0];
 726                 }
 727 
 728                 info->res.type = fex_float;
 729 
 730                 switch (inst->op) {
 731                 case cmpss:
 732                         info->op = fex_cmp;
 733                         info->res.type = fex_int;
 734 
 735                         switch (inst->imm & 3) {
 736                         case 0:
 737                                 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f,
 738                                     &info->res.val.i);
 739                                 break;
 740 
 741                         case 1:
 742                                 sse_cmpltss(&info->op1.val.f, &info->op2.val.f,
 743                                     &info->res.val.i);
 744                                 break;
 745 
 746                         case 2:
 747                                 sse_cmpless(&info->op1.val.f, &info->op2.val.f,
 748                                     &info->res.val.i);
 749                                 break;
 750 
 751                         case 3:
 752                                 sse_cmpunordss(&info->op1.val.f,
 753                                     &info->op2.val.f, &info->res.val.i);
 754                         }
 755 
 756                         if (inst->imm & 4)
 757                                 info->res.val.i ^= 0xffffffffu;
 758 
 759                         break;
 760 
 761                 case minss:
 762                         info->op = fex_other;
 763                         sse_minss(&info->op1.val.f, &info->op2.val.f,
 764                             &info->res.val.f);
 765                         break;
 766 
 767                 case maxss:
 768                         info->op = fex_other;
 769                         sse_maxss(&info->op1.val.f, &info->op2.val.f,
 770                             &info->res.val.f);
 771                         break;
 772 
 773                 case addss:
 774                         info->op = fex_add;
 775                         sse_addss(&info->op1.val.f, &info->op2.val.f,
 776                             &info->res.val.f);
 777 
 778                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 779                                 subnorm = 1;
 780 
 781                         break;
 782 
 783                 case subss:
 784                         info->op = fex_sub;
 785                         sse_subss(&info->op1.val.f, &info->op2.val.f,
 786                             &info->res.val.f);
 787 
 788                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 789                                 subnorm = 1;
 790 
 791                         break;
 792 
 793                 case mulss:
 794                         info->op = fex_mul;
 795                         sse_mulss(&info->op1.val.f, &info->op2.val.f,
 796                             &info->res.val.f);
 797 
 798                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 799                                 subnorm = 1;
 800 
 801                         break;
 802 
 803                 case divss:
 804                         info->op = fex_div;
 805                         sse_divss(&info->op1.val.f, &info->op2.val.f,
 806                             &info->res.val.f);
 807 
 808                         if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 809                                 subnorm = 1;
 810 
 811                         break;
 812 
 813                 case sqrtss:
 814                         info->op = fex_sqrt;
 815                         sse_sqrtss(&info->op1.val.f, &info->res.val.f);
 816                         break;
 817 
 818                 case cvtss2sd:
 819                         info->op = fex_cnvt;
 820                         info->res.type = fex_double;
 821                         sse_cvtss2sd(&info->op1.val.f, &info->res.val.d);
 822                         break;
 823 
 824                 case cvtsi2ss:
 825                         info->op = fex_cnvt;
 826                         sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f);
 827                         break;
 828 
 829                 case cvttss2si:
 830                         info->op = fex_cnvt;


 855                         info->res.type = fex_llong;
 856                         sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
 857                         break;
 858 #endif
 859 
 860                 case ucomiss:
 861                         info->op = fex_cmp;
 862                         info->res.type = fex_nodata;
 863                         sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
 864                         break;
 865 
 866                 case comiss:
 867                         info->op = fex_cmp;
 868                         info->res.type = fex_nodata;
 869                         sse_comiss(&info->op1.val.f, &info->op2.val.f);
 870                         break;
 871                 default:
 872                         break;
 873                 }
 874         }
 875 
 876         __fenv_getmxcsr(&mxcsr);
 877         info->flags = mxcsr & 0x3d;
 878         __fenv_setmxcsr(&oldmxcsr);
 879 
 880         /* determine which exception would have been trapped */
 881         te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr >> 7) &
 882             0x3d;
 883         e = mxcsr & te;
 884 
 885         if (e & FE_INVALID)
 886                 return (__fex_get_sse_invalid_type(inst));
 887 
 888         if (e & FE_DIVBYZERO)
 889                 return (fex_division);
 890 
 891         if (e & FE_OVERFLOW)
 892                 return (fex_overflow);
 893 
 894         if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
 895                 return (fex_underflow);
 896 
 897         if (e & FE_INEXACT)
 898                 return (fex_inexact);
 899 
 900         return ((enum fex_exception)-1);
 901 }
 902 
 903 /*
 904  * Emulate a SIMD SSE instruction to determine which exceptions occur
 905  * in each part.  For i = 0, 1, 2, and 3, set e[i] to indicate the
 906  * trapped exception that would occur if the i-th part of the SIMD
 907  * instruction were executed in isolation; set e[i] to -1 if no
 908  * trapped exception would occur in this part.  Also fill in info[i]
 909  * with the corresponding operands, default untrapped result, and
 910  * flags.
 911  *
 912  * This routine does not work if the instruction specified by *inst
 913  * is not a SIMD instruction.
 914  */
 915 void
 916 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
 917     fex_info_t *info)
 918 {
 919         sseinst_t dummy;
 920         int i;
 921 
 922         e[0] = e[1] = e[2] = e[3] = -1;
 923 
 924         /* perform each part of the SIMD operation */
 925         switch (inst->op) {
 926         case cmpps:
 927                 dummy.op = cmpss;
 928                 dummy.imm = inst->imm;
 929 
 930                 for (i = 0; i < 4; i++) {
 931                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 932                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 933                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 934                 }
 935 
 936                 break;
 937 
 938         case minps:
 939                 dummy.op = minss;
 940 
 941                 for (i = 0; i < 4; i++) {
 942                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 943                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 944                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 945                 }
 946 
 947                 break;
 948 
 949         case maxps:
 950                 dummy.op = maxss;
 951 
 952                 for (i = 0; i < 4; i++) {
 953                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 954                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 955                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 956                 }
 957 
 958                 break;
 959 
 960         case addps:
 961                 dummy.op = addss;
 962 
 963                 for (i = 0; i < 4; i++) {
 964                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 965                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 966                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 967                 }
 968 
 969                 break;
 970 
 971         case subps:
 972                 dummy.op = subss;
 973 
 974                 for (i = 0; i < 4; i++) {
 975                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 976                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 977                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 978                 }
 979 
 980                 break;
 981 
 982         case mulps:
 983                 dummy.op = mulss;
 984 
 985                 for (i = 0; i < 4; i++) {
 986                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 987                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 988                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 989                 }
 990 
 991                 break;
 992 
 993         case divps:
 994                 dummy.op = divss;
 995 
 996                 for (i = 0; i < 4; i++) {
 997                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 998                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 999                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1000                 }
1001 
1002                 break;
1003 
1004         case sqrtps:
1005                 dummy.op = sqrtss;
1006 
1007                 for (i = 0; i < 4; i++) {
1008                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1009                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1010                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1011                 }
1012 
1013                 break;
1014 
1015         case cvtdq2ps:
1016                 dummy.op = cvtsi2ss;
1017 
1018                 for (i = 0; i < 4; i++) {
1019                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1020                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1021                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1022                 }
1023 
1024                 break;
1025 
1026         case cvttps2dq:
1027                 dummy.op = cvttss2si;
1028 
1029                 for (i = 0; i < 4; i++) {
1030                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1031                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1032                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1033                 }
1034 
1035                 break;
1036 
1037         case cvtps2dq:
1038                 dummy.op = cvtss2si;
1039 
1040                 for (i = 0; i < 4; i++) {
1041                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1042                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1043                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1044                 }
1045 
1046                 break;
1047 
1048         case cvtpi2ps:
1049                 dummy.op = cvtsi2ss;
1050 
1051                 for (i = 0; i < 2; i++) {
1052                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1053                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1054                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1055                 }
1056 
1057                 break;
1058 
1059         case cvttps2pi:
1060                 dummy.op = cvttss2si;
1061 
1062                 for (i = 0; i < 2; i++) {
1063                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1064                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1065                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1066                 }
1067 
1068                 break;
1069 
1070         case cvtps2pi:
1071                 dummy.op = cvtss2si;
1072 
1073                 for (i = 0; i < 2; i++) {
1074                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1075                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1076                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1077                 }
1078 
1079                 break;
1080 
1081         case cmppd:
1082                 dummy.op = cmpsd;
1083                 dummy.imm = inst->imm;
1084 
1085                 for (i = 0; i < 2; i++) {
1086                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1087                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1088                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1089                 }
1090 
1091                 break;
1092 
1093         case minpd:
1094                 dummy.op = minsd;
1095 
1096                 for (i = 0; i < 2; i++) {
1097                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1098                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1099                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1100                 }
1101 
1102                 break;
1103 
1104         case maxpd:
1105                 dummy.op = maxsd;
1106 
1107                 for (i = 0; i < 2; i++) {
1108                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1109                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1110                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1111                 }
1112 
1113                 break;
1114 
1115         case addpd:
1116                 dummy.op = addsd;
1117 
1118                 for (i = 0; i < 2; i++) {
1119                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1120                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1121                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1122                 }
1123 
1124                 break;
1125 
1126         case subpd:
1127                 dummy.op = subsd;
1128 
1129                 for (i = 0; i < 2; i++) {
1130                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1131                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1132                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1133                 }
1134 
1135                 break;
1136 
1137         case mulpd:
1138                 dummy.op = mulsd;
1139 
1140                 for (i = 0; i < 2; i++) {
1141                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1142                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1143                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1144                 }
1145 
1146                 break;
1147 
1148         case divpd:
1149                 dummy.op = divsd;
1150 
1151                 for (i = 0; i < 2; i++) {
1152                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1153                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1154                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1155                 }
1156 
1157                 break;
1158 
1159         case sqrtpd:
1160                 dummy.op = sqrtsd;
1161 
1162                 for (i = 0; i < 2; i++) {
1163                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1164                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1165                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1166                 }
1167 
1168                 break;
1169 
1170         case cvtpi2pd:
1171         case cvtdq2pd:
1172                 dummy.op = cvtsi2sd;
1173 
1174                 for (i = 0; i < 2; i++) {
1175                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1176                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1177                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1178                 }
1179 
1180                 break;
1181 
1182         case cvttpd2pi:
1183         case cvttpd2dq:
1184                 dummy.op = cvttsd2si;
1185 
1186                 for (i = 0; i < 2; i++) {
1187                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1188                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1189                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1190                 }
1191 
1192                 break;
1193 
1194         case cvtpd2pi:
1195         case cvtpd2dq:
1196                 dummy.op = cvtsd2si;
1197 
1198                 for (i = 0; i < 2; i++) {
1199                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1200                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1201                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1202                 }
1203 
1204                 break;
1205 
1206         case cvtps2pd:
1207                 dummy.op = cvtss2sd;
1208 
1209                 for (i = 0; i < 2; i++) {
1210                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1211                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1212                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1213                 }
1214 
1215                 break;
1216 
1217         case cvtpd2ps:
1218                 dummy.op = cvtsd2ss;
1219 
1220                 for (i = 0; i < 2; i++) {
1221                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1222                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1223                         e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1224                 }
1225 
1226         default:
1227                 break;
1228         }
1229 }
1230 
1231 /*
1232  * Store the result value from *info in the destination of the scalar
1233  * SSE instruction specified by *inst.  If no result is given but the
1234  * exception is underflow or overflow, supply the default trapped result.
1235  *
1236  * This routine does not work if the instruction specified by *inst
1237  * is not a scalar instruction.
1238  */
1239 void
1240 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1241     fex_info_t *info)
1242 {
1243         int i = 0;
1244         long long l = 0L;
1245         float f = 0.0, fscl;
1246         double d = 0.0L, dscl;
1247 
1248         /*
1249          * for compares that write eflags, just set the flags
1250          * to indicate "unordered"
1251          */
1252         if (inst->op == ucomiss || inst->op == comiss || inst->op == ucomisd ||
1253             inst->op == comisd) {
1254                 uap->uc_mcontext.gregs[REG_PS] |= 0x45;
1255                 return;
1256         }
1257 
1258         /*
1259          * if info doesn't specify a result value, try to generate
1260          * the default trapped result
1261          */
1262         if (info->res.type == fex_nodata) {
1263                 /* set scale factors for exponent wrapping */
1264                 switch (e) {
1265                 case fex_overflow:
1266                         fscl = 1.262177448e-29f;                /* 2^-96 */
1267                         dscl = 6.441148769597133308e-232;       /* 2^-768 */
1268                         break;
1269 
1270                 case fex_underflow:
1271                         fscl = 7.922816251e+28f;                /* 2^96 */
1272                         dscl = 1.552518092300708935e+231;       /* 2^768 */
1273                         break;
1274 
1275                 default:
1276                         (void) __fex_get_sse_op(uap, inst, info);
1277 
1278                         if (info->res.type == fex_nodata)
1279                                 return;
1280 
1281                         goto stuff;
1282                 }
1283 
1284                 /* generate the wrapped result */
1285                 if (inst->op == cvtsd2ss) {
1286                         info->op1.type = fex_double;
1287                         info->op1.val.d = inst->op2->d[0];
1288                         info->op2.type = fex_nodata;
1289                         info->res.type = fex_float;
1290                         info->res.val.f = (float)(fscl * (fscl *
1291                             info->op1.val.d));
1292                 } else if ((int)inst->op & DOUBLE) {
1293                         info->op1.type = fex_double;
1294                         info->op1.val.d = inst->op1->d[0];
1295                         info->op2.type = fex_double;
1296                         info->op2.val.d = inst->op2->d[0];
1297                         info->res.type = fex_double;
1298 
1299                         switch (inst->op) {
1300                         case addsd:
1301                                 info->res.val.d = dscl * (dscl *
1302                                     info->op1.val.d + dscl * info->op2.val.d);
1303                                 break;
1304 
1305                         case subsd:
1306                                 info->res.val.d = dscl * (dscl *
1307                                     info->op1.val.d - dscl * info->op2.val.d);
1308                                 break;
1309 
1310                         case mulsd:
1311                                 info->res.val.d = (dscl * info->op1.val.d) *
1312                                     (dscl * info->op2.val.d);
1313                                 break;
1314 
1315                         case divsd:
1316                                 info->res.val.d = (dscl * info->op1.val.d) /
1317                                     (info->op2.val.d / dscl);
1318                                 break;
1319 
1320                         default:
1321                                 return;
1322                         }
1323                 } else {
1324                         info->op1.type = fex_float;
1325                         info->op1.val.f = inst->op1->f[0];
1326                         info->op2.type = fex_float;
1327                         info->op2.val.f = inst->op2->f[0];
1328                         info->res.type = fex_float;
1329 
1330                         switch (inst->op) {
1331                         case addss:
1332                                 info->res.val.f = fscl * (fscl *
1333                                     info->op1.val.f + fscl * info->op2.val.f);
1334                                 break;
1335 
1336                         case subss:
1337                                 info->res.val.f = fscl * (fscl *
1338                                     info->op1.val.f - fscl * info->op2.val.f);
1339                                 break;
1340 
1341                         case mulss:
1342                                 info->res.val.f = (fscl * info->op1.val.f) *
1343                                     (fscl * info->op2.val.f);
1344                                 break;
1345 
1346                         case divss:
1347                                 info->res.val.f = (fscl * info->op1.val.f) /
1348                                     (info->op2.val.f / fscl);
1349                                 break;
1350 
1351                         default:
1352                                 return;
1353                         }
1354                 }
1355         }
1356 
1357         /* put the result in the destination */
1358 stuff:
1359         if (inst->op == cmpss || inst->op == cvttss2si || inst->op ==
1360             cvtss2si || inst->op == cvttsd2si || inst->op == cvtsd2si) {
1361                 switch (info->res.type) {
1362                 case fex_int:
1363                         i = info->res.val.i;
1364                         break;
1365 
1366                 case fex_llong:
1367                         i = info->res.val.l;
1368                         break;
1369 
1370                 case fex_float:
1371                         i = info->res.val.f;
1372                         break;
1373 
1374                 case fex_double:
1375                         i = info->res.val.d;
1376                         break;
1377 
1378                 case fex_ldouble:
1379                         i = info->res.val.q;
1380                         break;
1381 
1382                 default:
1383                         break;
1384                 }
1385 
1386                 inst->op1->i[0] = i;
1387         } else if (inst->op == cmpsd || inst->op == cvttss2siq || inst->op ==
1388             cvtss2siq || inst->op == cvttsd2siq || inst->op == cvtsd2siq) {

1389                 switch (info->res.type) {
1390                 case fex_int:
1391                         l = info->res.val.i;
1392                         break;
1393 
1394                 case fex_llong:
1395                         l = info->res.val.l;
1396                         break;
1397 
1398                 case fex_float:
1399                         l = info->res.val.f;
1400                         break;
1401 
1402                 case fex_double:
1403                         l = info->res.val.d;
1404                         break;
1405 
1406                 case fex_ldouble:
1407                         l = info->res.val.q;
1408                         break;
1409 
1410                 default:
1411                         break;
1412                 }
1413 
1414                 inst->op1->l[0] = l;
1415         } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1416             inst->op == cvtss2sd) {
1417                 switch (info->res.type) {
1418                 case fex_int:
1419                         d = info->res.val.i;
1420                         break;
1421 
1422                 case fex_llong:
1423                         d = info->res.val.l;
1424                         break;
1425 
1426                 case fex_float:
1427                         d = info->res.val.f;
1428                         break;
1429 
1430                 case fex_double:
1431                         d = info->res.val.d;
1432                         break;
1433 
1434                 case fex_ldouble:
1435                         d = info->res.val.q;
1436                         break;
1437 
1438                 default:
1439                         break;
1440                 }
1441 
1442                 inst->op1->d[0] = d;
1443         } else {
1444                 switch (info->res.type) {
1445                 case fex_int:
1446                         f = info->res.val.i;
1447                         break;
1448 
1449                 case fex_llong:
1450                         f = info->res.val.l;
1451                         break;
1452 
1453                 case fex_float:
1454                         f = info->res.val.f;
1455                         break;
1456 
1457                 case fex_double:
1458                         f = info->res.val.d;
1459                         break;
1460 
1461                 case fex_ldouble:
1462                         f = info->res.val.q;
1463                         break;
1464 
1465                 default:
1466                         break;
1467                 }
1468 
1469                 inst->op1->f[0] = f;
1470         }
1471 }
1472 
1473 /*
1474  * Store the results from a SIMD instruction.  For each i, store
1475  * the result value from info[i] in the i-th part of the destination
1476  * of the SIMD SSE instruction specified by *inst.  If no result
1477  * is given but the exception indicated by e[i] is underflow or
1478  * overflow, supply the default trapped result.
1479  *
1480  * This routine does not work if the instruction specified by *inst
1481  * is not a SIMD instruction.
1482  */
1483 void
1484 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1485     fex_info_t *info)
1486 {
1487         sseinst_t dummy;
1488         int i;
1489 
1490         /* store each part */
1491         switch (inst->op) {
1492         case cmpps:
1493                 dummy.op = cmpss;
1494                 dummy.imm = inst->imm;
1495 
1496                 for (i = 0; i < 4; i++) {
1497                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1498                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1499                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1500                 }
1501 
1502                 break;
1503 
1504         case minps:
1505                 dummy.op = minss;
1506 
1507                 for (i = 0; i < 4; i++) {
1508                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1509                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1510                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1511                 }
1512 
1513                 break;
1514 
1515         case maxps:
1516                 dummy.op = maxss;
1517 
1518                 for (i = 0; i < 4; i++) {
1519                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1520                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1521                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1522                 }
1523 
1524                 break;
1525 
1526         case addps:
1527                 dummy.op = addss;
1528 
1529                 for (i = 0; i < 4; i++) {
1530                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1531                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1532                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1533                 }
1534 
1535                 break;
1536 
1537         case subps:
1538                 dummy.op = subss;
1539 
1540                 for (i = 0; i < 4; i++) {
1541                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1542                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1543                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1544                 }
1545 
1546                 break;
1547 
1548         case mulps:
1549                 dummy.op = mulss;
1550 
1551                 for (i = 0; i < 4; i++) {
1552                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1553                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1554                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1555                 }
1556 
1557                 break;
1558 
1559         case divps:
1560                 dummy.op = divss;
1561 
1562                 for (i = 0; i < 4; i++) {
1563                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1564                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1565                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1566                 }
1567 
1568                 break;
1569 
1570         case sqrtps:
1571                 dummy.op = sqrtss;
1572 
1573                 for (i = 0; i < 4; i++) {
1574                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1575                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1576                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1577                 }
1578 
1579                 break;
1580 
1581         case cvtdq2ps:
1582                 dummy.op = cvtsi2ss;
1583 
1584                 for (i = 0; i < 4; i++) {
1585                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1586                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1587                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1588                 }
1589 
1590                 break;
1591 
1592         case cvttps2dq:
1593                 dummy.op = cvttss2si;
1594 
1595                 for (i = 0; i < 4; i++) {
1596                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1597                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1598                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1599                 }
1600 
1601                 break;
1602 
1603         case cvtps2dq:
1604                 dummy.op = cvtss2si;
1605 
1606                 for (i = 0; i < 4; i++) {
1607                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1608                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1609                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1610                 }
1611 
1612                 break;
1613 
1614         case cvtpi2ps:
1615                 dummy.op = cvtsi2ss;
1616 
1617                 for (i = 0; i < 2; i++) {
1618                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1619                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1620                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1621                 }
1622 
1623                 break;
1624 
1625         case cvttps2pi:
1626                 dummy.op = cvttss2si;
1627 
1628                 for (i = 0; i < 2; i++) {
1629                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1630                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1631                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1632                 }
1633 
1634                 break;
1635 
1636         case cvtps2pi:
1637                 dummy.op = cvtss2si;
1638 
1639                 for (i = 0; i < 2; i++) {
1640                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1641                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1642                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1643                 }
1644 
1645                 break;
1646 
1647         case cmppd:
1648                 dummy.op = cmpsd;
1649                 dummy.imm = inst->imm;
1650 
1651                 for (i = 0; i < 2; i++) {
1652                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1653                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1654                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1655                 }
1656 
1657                 break;
1658 
1659         case minpd:
1660                 dummy.op = minsd;
1661 
1662                 for (i = 0; i < 2; i++) {
1663                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1664                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1665                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1666                 }
1667 
1668                 break;
1669 
1670         case maxpd:
1671                 dummy.op = maxsd;
1672 
1673                 for (i = 0; i < 2; i++) {
1674                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1675                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1676                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1677                 }
1678 
1679                 break;
1680 
1681         case addpd:
1682                 dummy.op = addsd;
1683 
1684                 for (i = 0; i < 2; i++) {
1685                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1686                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1687                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1688                 }
1689 
1690                 break;
1691 
1692         case subpd:
1693                 dummy.op = subsd;
1694 
1695                 for (i = 0; i < 2; i++) {
1696                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1697                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1698                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1699                 }
1700 
1701                 break;
1702 
1703         case mulpd:
1704                 dummy.op = mulsd;
1705 
1706                 for (i = 0; i < 2; i++) {
1707                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1708                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1709                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1710                 }
1711 
1712                 break;
1713 
1714         case divpd:
1715                 dummy.op = divsd;
1716 
1717                 for (i = 0; i < 2; i++) {
1718                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1719                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1720                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1721                 }
1722 
1723                 break;
1724 
1725         case sqrtpd:
1726                 dummy.op = sqrtsd;
1727 
1728                 for (i = 0; i < 2; i++) {
1729                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1730                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1731                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1732                 }
1733 
1734                 break;
1735 
1736         case cvtpi2pd:
1737         case cvtdq2pd:
1738                 dummy.op = cvtsi2sd;
1739 
1740                 for (i = 0; i < 2; i++) {
1741                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1742                         dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1743                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1744                 }
1745 
1746                 break;
1747 
1748         case cvttpd2pi:
1749         case cvttpd2dq:
1750                 dummy.op = cvttsd2si;
1751 
1752                 for (i = 0; i < 2; i++) {
1753                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1754                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1755                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1756                 }
1757 
1758                 /* for cvttpd2dq, zero the high 64 bits of the destination */
1759                 if (inst->op == cvttpd2dq)
1760                         inst->op1->l[1] = 0ll;
1761 
1762                 break;
1763 
1764         case cvtpd2pi:
1765         case cvtpd2dq:
1766                 dummy.op = cvtsd2si;
1767 
1768                 for (i = 0; i < 2; i++) {
1769                         dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1770                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1771                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1772                 }
1773 
1774                 /* for cvtpd2dq, zero the high 64 bits of the destination */
1775                 if (inst->op == cvtpd2dq)
1776                         inst->op1->l[1] = 0ll;
1777 
1778                 break;
1779 
1780         case cvtps2pd:
1781                 dummy.op = cvtss2sd;
1782 
1783                 for (i = 0; i < 2; i++) {
1784                         dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1785                         dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1786                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1787                 }
1788 
1789                 break;
1790 
1791         case cvtpd2ps:
1792                 dummy.op = cvtsd2ss;
1793 
1794                 for (i = 0; i < 2; i++) {
1795                         dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1796                         dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1797                         __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1798                 }
1799 
1800                 /* zero the high 64 bits of the destination */
1801                 inst->op1->l[1] = 0ll;
1802 
1803         default:
1804                 break;
1805         }
1806 }