Print this page
11210 libm should be cstyle(1ONBLD) clean

Split Close
Expand all
Collapse all
          --- old/usr/src/lib/libm/common/m9x/__fex_sse.c
          +++ new/usr/src/lib/libm/common/m9x/__fex_sse.c
↓ open down ↓ 14 lines elided ↑ open up ↑
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
  21   21  
  22   22  /*
  23   23   * Copyright 2011 Nexenta Systems, Inc.  All rights reserved.
  24   24   */
       25 +
  25   26  /*
  26   27   * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
  27   28   * Use is subject to license terms.
  28   29   */
  29   30  
  30   31  #include <ucontext.h>
  31   32  #include <fenv.h>
  32   33  #if defined(__SUNPRO_C)
  33   34  #include <sunmath.h>
  34   35  #else
  35   36  #include <sys/ieeefp.h>
  36   37  #endif
  37   38  #include "fex_handler.h"
  38   39  #include "fenv_inlines.h"
  39   40  
  40   41  #if !defined(REG_PC)
  41      -#define REG_PC  EIP
       42 +#define REG_PC          EIP
  42   43  #endif
  43   44  
  44   45  #if !defined(REG_PS)
  45      -#define REG_PS  EFL
       46 +#define REG_PS          EFL
  46   47  #endif
  47   48  
  48   49  #ifdef __amd64
  49      -#define regno(X)        ((X < 4)? REG_RAX - X : \
  50      -                        ((X > 4)? REG_RAX + 1 - X : REG_RSP))
       50 +#define regno(X)        ((X < 4) ? REG_RAX - X : ((X > 4) ? REG_RAX + 1 - X : \
       51 +        REG_RSP))
  51   52  #else
  52      -#define regno(X)        (EAX - X)
       53 +#define regno(X)        (EAX - X)
  53   54  #endif
  54   55  
  55   56  /*
  56   57   * Support for SSE instructions
  57   58   */
  58   59  
  59   60  /*
  60   61   * Decode an SSE instruction.  Fill in *inst and return the length of the
  61   62   * instruction in bytes.  Return 0 if the instruction is not recognized.
  62   63   */
  63   64  int
  64   65  __fex_parse_sse(ucontext_t *uap, sseinst_t *inst)
  65   66  {
  66      -        unsigned char   *ip;
  67      -        char            *addr;
  68      -        int             i, dbl, simd, rex, modrm, sib, r;
       67 +        unsigned char *ip;
       68 +        char *addr;
       69 +        int i, dbl, simd, rex, modrm, sib, r;
  69   70  
  70   71          i = 0;
  71   72          ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC];
  72   73  
  73   74          /* look for pseudo-prefixes */
  74   75          dbl = 0;
  75   76          simd = SIMD;
       77 +
  76   78          if (ip[i] == 0xF3) {
  77   79                  simd = 0;
  78   80                  i++;
  79   81          } else if (ip[i] == 0x66) {
  80   82                  dbl = DOUBLE;
  81   83                  i++;
  82   84          } else if (ip[i] == 0xF2) {
  83   85                  dbl = DOUBLE;
  84   86                  simd = 0;
  85   87                  i++;
  86   88          }
  87   89  
  88   90          /* look for AMD64 REX prefix */
  89   91          rex = 0;
       92 +
  90   93          if (ip[i] >= 0x40 && ip[i] <= 0x4F) {
  91   94                  rex = ip[i];
  92   95                  i++;
  93   96          }
  94   97  
  95   98          /* parse opcode */
  96   99          if (ip[i++] != 0x0F)
  97      -                return 0;
      100 +                return (0);
      101 +
  98  102          switch (ip[i++]) {
  99  103          case 0x2A:
 100  104                  inst->op = (int)cvtsi2ss + simd + dbl;
      105 +
 101  106                  if (!simd)
 102  107                          inst->op = (int)inst->op + (rex & 8);
      108 +
 103  109                  break;
 104  110  
 105  111          case 0x2C:
 106  112                  inst->op = (int)cvttss2si + simd + dbl;
      113 +
 107  114                  if (!simd)
 108  115                          inst->op = (int)inst->op + (rex & 8);
      116 +
 109  117                  break;
 110  118  
 111  119          case 0x2D:
 112  120                  inst->op = (int)cvtss2si + simd + dbl;
      121 +
 113  122                  if (!simd)
 114  123                          inst->op = (int)inst->op + (rex & 8);
      124 +
 115  125                  break;
 116  126  
 117  127          case 0x2E:
      128 +
 118  129                  /* oddball: scalar instruction in a SIMD opcode group */
 119  130                  if (!simd)
 120      -                        return 0;
      131 +                        return (0);
      132 +
 121  133                  inst->op = (int)ucomiss + dbl;
 122  134                  break;
 123  135  
 124  136          case 0x2F:
      137 +
 125  138                  /* oddball: scalar instruction in a SIMD opcode group */
 126  139                  if (!simd)
 127      -                        return 0;
      140 +                        return (0);
      141 +
 128  142                  inst->op = (int)comiss + dbl;
 129  143                  break;
 130  144  
 131  145          case 0x51:
 132  146                  inst->op = (int)sqrtss + simd + dbl;
 133  147                  break;
 134  148  
 135  149          case 0x58:
 136  150                  inst->op = (int)addss + simd + dbl;
 137  151                  break;
 138  152  
 139  153          case 0x59:
 140  154                  inst->op = (int)mulss + simd + dbl;
 141  155                  break;
 142  156  
 143  157          case 0x5A:
 144  158                  inst->op = (int)cvtss2sd + simd + dbl;
 145  159                  break;
 146  160  
 147  161          case 0x5B:
      162 +
 148  163                  if (dbl) {
 149  164                          if (simd)
 150  165                                  inst->op = cvtps2dq;
 151  166                          else
 152      -                                return 0;
      167 +                                return (0);
 153  168                  } else {
 154      -                        inst->op = (simd)? cvtdq2ps : cvttps2dq;
      169 +                        inst->op = (simd) ? cvtdq2ps : cvttps2dq;
 155  170                  }
      171 +
 156  172                  break;
 157  173  
 158  174          case 0x5C:
 159  175                  inst->op = (int)subss + simd + dbl;
 160  176                  break;
 161  177  
 162  178          case 0x5D:
 163  179                  inst->op = (int)minss + simd + dbl;
 164  180                  break;
 165  181  
↓ open down ↓ 3 lines elided ↑ open up ↑
 169  185  
 170  186          case 0x5F:
 171  187                  inst->op = (int)maxss + simd + dbl;
 172  188                  break;
 173  189  
 174  190          case 0xC2:
 175  191                  inst->op = (int)cmpss + simd + dbl;
 176  192                  break;
 177  193  
 178  194          case 0xE6:
      195 +
 179  196                  if (simd) {
 180  197                          if (dbl)
 181  198                                  inst->op = cvttpd2dq;
 182  199                          else
 183      -                                return 0;
      200 +                                return (0);
 184  201                  } else {
 185      -                        inst->op = (dbl)? cvtpd2dq : cvtdq2pd;
      202 +                        inst->op = (dbl) ? cvtpd2dq : cvtdq2pd;
 186  203                  }
      204 +
 187  205                  break;
 188  206  
 189  207          default:
 190      -                return 0;
      208 +                return (0);
 191  209          }
 192  210  
 193  211          /* locate operands */
 194  212          modrm = ip[i++];
 195  213  
 196      -        if (inst->op == cvtss2si || inst->op == cvttss2si ||
 197      -            inst->op == cvtsd2si || inst->op == cvttsd2si ||
 198      -            inst->op == cvtss2siq || inst->op == cvttss2siq ||
 199      -            inst->op == cvtsd2siq || inst->op == cvttsd2siq) {
      214 +        if (inst->op == cvtss2si || inst->op == cvttss2si || inst->op ==
      215 +            cvtsd2si || inst->op == cvttsd2si || inst->op == cvtss2siq ||
      216 +            inst->op == cvttss2siq || inst->op == cvtsd2siq || inst->op ==
      217 +            cvttsd2siq) {
 200  218                  /* op1 is a gp register */
 201  219                  r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
 202  220                  inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
 203      -        } else if (inst->op == cvtps2pi || inst->op == cvttps2pi ||
 204      -            inst->op == cvtpd2pi || inst->op == cvttpd2pi) {
      221 +        } else if (inst->op == cvtps2pi || inst->op == cvttps2pi || inst->op ==
      222 +            cvtpd2pi || inst->op == cvttpd2pi) {
 205  223                  /* op1 is a mmx register */
 206  224  #ifdef __amd64
 207      -                inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set.
 208      -                    fpchip_state.st[(modrm >> 3) & 7];
      225 +                inst->op1 = (sseoperand_t *)
      226 +                    &uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state
      227 +                    .st[(modrm >> 3) & 7];
 209  228  #else
 210  229                  inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) +
 211      -                    (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
 212      -                    fpchip_state.state[7]);
      230 +                    (char *)&uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state
      231 +                    .state[7]);
 213  232  #endif
 214  233          } else {
 215  234                  /* op1 is a xmm register */
 216  235                  r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
 217      -                inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
 218      -                    fp_reg_set.fpchip_state.xmm[r];
      236 +                inst->op1 =
      237 +                    (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set
      238 +                    .fpchip_state.xmm[r];
 219  239          }
 220  240  
 221  241          if ((modrm >> 6) == 3) {
 222      -                if (inst->op == cvtsi2ss || inst->op == cvtsi2sd ||
 223      -                    inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) {
      242 +                if (inst->op == cvtsi2ss || inst->op == cvtsi2sd || inst->op ==
      243 +                    cvtsi2ssq || inst->op == cvtsi2sdq) {
 224  244                          /* op2 is a gp register */
 225  245                          r = ((rex & 1) << 3) | (modrm & 7);
 226      -                        inst->op2 = (sseoperand_t *)&uap->uc_mcontext.
 227      -                            gregs[regno(r)];
      246 +                        inst->op2 =
      247 +                            (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
 228  248                  } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) {
 229  249                          /* op2 is a mmx register */
 230  250  #ifdef __amd64
 231      -                        inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
 232      -                            fp_reg_set.fpchip_state.st[modrm & 7];
      251 +                        inst->op2 =
      252 +                            (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set
      253 +                            .fpchip_state.st[modrm & 7];
 233  254  #else
 234  255                          inst->op2 = (sseoperand_t *)(10 * (modrm & 7) +
 235      -                            (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
 236      -                            fpchip_state.state[7]);
      256 +                            (char *)&uap->uc_mcontext.fpregs.fp_reg_set
      257 +                            .fpchip_state.state[7]);
 237  258  #endif
 238  259                  } else {
 239  260                          /* op2 is a xmm register */
 240  261                          r = ((rex & 1) << 3) | (modrm & 7);
 241      -                        inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
 242      -                            fp_reg_set.fpchip_state.xmm[r];
      262 +                        inst->op2 =
      263 +                            (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set
      264 +                            .fpchip_state.xmm[r];
 243  265                  }
 244  266          } else if ((modrm & 0xc7) == 0x05) {
 245  267  #ifdef __amd64
 246  268                  /* address of next instruction + offset */
 247  269                  r = i + 4;
 248      -                if (inst->op == cmpss || inst->op == cmpps ||
 249      -                    inst->op == cmpsd || inst->op == cmppd)
      270 +
      271 +                if (inst->op == cmpss || inst->op == cmpps || inst->op ==
      272 +                    cmpsd || inst->op == cmppd)
 250  273                          r++;
      274 +
 251  275                  inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i));
 252  276  #else
 253  277                  /* absolute address */
 254  278                  inst->op2 = (sseoperand_t *)(*(int *)(ip + i));
 255  279  #endif
 256  280                  i += 4;
 257  281          } else {
 258  282                  /* complex address */
 259  283                  if ((modrm & 7) == 4) {
 260  284                          /* parse sib byte */
 261  285                          sib = ip[i++];
      286 +
 262  287                          if ((sib & 7) == 5 && (modrm >> 6) == 0) {
 263  288                                  /* start with absolute address */
 264  289                                  addr = (char *)(uintptr_t)(*(int *)(ip + i));
 265  290                                  i += 4;
 266  291                          } else {
 267  292                                  /* start with base */
 268  293                                  r = ((rex & 1) << 3) | (sib & 7);
 269  294                                  addr = (char *)uap->uc_mcontext.gregs[regno(r)];
 270  295                          }
      296 +
 271  297                          r = ((rex & 2) << 2) | ((sib >> 3) & 7);
      298 +
 272  299                          if (r != 4) {
 273  300                                  /* add scaled index */
 274      -                                addr += uap->uc_mcontext.gregs[regno(r)]
 275      -                                    << (sib >> 6);
      301 +                                addr += uap->uc_mcontext.gregs[regno(r)] <<
      302 +                                    (sib >> 6);
 276  303                          }
 277  304                  } else {
 278  305                          r = ((rex & 1) << 3) | (modrm & 7);
 279  306                          addr = (char *)uap->uc_mcontext.gregs[regno(r)];
 280  307                  }
 281  308  
 282  309                  /* add displacement, if any */
 283  310                  if ((modrm >> 6) == 1) {
 284  311                          addr += (char)ip[i++];
 285  312                  } else if ((modrm >> 6) == 2) {
 286  313                          addr += *(int *)(ip + i);
 287  314                          i += 4;
 288  315                  }
      316 +
 289  317                  inst->op2 = (sseoperand_t *)addr;
 290  318          }
 291  319  
 292  320          if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd ||
 293  321              inst->op == cmppd) {
 294  322                  /* get the immediate operand */
 295  323                  inst->imm = ip[i++];
 296  324          }
 297  325  
 298      -        return i;
      326 +        return (i);
 299  327  }
 300  328  
 301  329  static enum fp_class_type
 302  330  my_fp_classf(float *x)
 303  331  {
 304      -        int     i = *(int *)x & ~0x80000000;
      332 +        int i = *(int *)x & ~0x80000000;
 305  333  
 306  334          if (i < 0x7f800000) {
 307  335                  if (i < 0x00800000)
 308      -                        return ((i == 0)? fp_zero : fp_subnormal);
 309      -                return fp_normal;
      336 +                        return ((i == 0) ? fp_zero : fp_subnormal);
      337 +
      338 +                return (fp_normal);
      339 +        } else if (i == 0x7f800000) {
      340 +                return (fp_infinity);
      341 +        } else if (i & 0x400000) {
      342 +                return (fp_quiet);
      343 +        } else {
      344 +                return (fp_signaling);
 310  345          }
 311      -        else if (i == 0x7f800000)
 312      -                return fp_infinity;
 313      -        else if (i & 0x400000)
 314      -                return fp_quiet;
 315      -        else
 316      -                return fp_signaling;
 317  346  }
 318  347  
 319  348  static enum fp_class_type
 320  349  my_fp_class(double *x)
 321  350  {
 322      -        int     i = *(1+(int *)x) & ~0x80000000;
      351 +        int i = *(1 + (int *)x) & ~0x80000000;
 323  352  
 324  353          if (i < 0x7ff00000) {
 325  354                  if (i < 0x00100000)
 326      -                        return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal);
 327      -                return fp_normal;
      355 +                        return (((i | *(int *)x) == 0) ? fp_zero :
      356 +                            fp_subnormal);
      357 +
      358 +                return (fp_normal);
      359 +        } else if (i == 0x7ff00000 && *(int *)x == 0) {
      360 +                return (fp_infinity);
      361 +        } else if (i & 0x80000) {
      362 +                return (fp_quiet);
      363 +        } else {
      364 +                return (fp_signaling);
 328  365          }
 329      -        else if (i == 0x7ff00000 && *(int *)x == 0)
 330      -                return fp_infinity;
 331      -        else if (i & 0x80000)
 332      -                return fp_quiet;
 333      -        else
 334      -                return fp_signaling;
 335  366  }
 336  367  
 337  368  /*
 338  369   * Inspect a scalar SSE instruction that incurred an invalid operation
 339  370   * exception to determine which type of exception it was.
 340  371   */
 341  372  static enum fex_exception
 342  373  __fex_get_sse_invalid_type(sseinst_t *inst)
 343  374  {
 344      -        enum fp_class_type      t1, t2;
      375 +        enum fp_class_type t1, t2;
 345  376  
 346  377          /* check op2 for signaling nan */
 347      -        t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) :
      378 +        t2 = ((int)inst->op & DOUBLE) ? my_fp_class(&inst->op2->d[0]) :
 348  379              my_fp_classf(&inst->op2->f[0]);
      380 +
 349  381          if (t2 == fp_signaling)
 350      -                return fex_inv_snan;
      382 +                return (fex_inv_snan);
 351  383  
 352  384          /* eliminate all single-operand instructions */
 353  385          switch (inst->op) {
 354  386          case cvtsd2ss:
 355  387          case cvtss2sd:
 356  388                  /* hmm, this shouldn't have happened */
 357      -                return (enum fex_exception) -1;
      389 +                return ((enum fex_exception)-1);
 358  390  
 359  391          case sqrtss:
 360  392          case sqrtsd:
 361      -                return fex_inv_sqrt;
      393 +                return (fex_inv_sqrt);
 362  394  
 363  395          case cvtss2si:
 364  396          case cvtsd2si:
 365  397          case cvttss2si:
 366  398          case cvttsd2si:
 367  399          case cvtss2siq:
 368  400          case cvtsd2siq:
 369  401          case cvttss2siq:
 370  402          case cvttsd2siq:
 371      -                return fex_inv_int;
      403 +                return (fex_inv_int);
 372  404          default:
 373  405                  break;
 374  406          }
 375  407  
 376  408          /* check op1 for signaling nan */
 377      -        t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) :
      409 +        t1 = ((int)inst->op & DOUBLE) ? my_fp_class(&inst->op1->d[0]) :
 378  410              my_fp_classf(&inst->op1->f[0]);
      411 +
 379  412          if (t1 == fp_signaling)
 380      -                return fex_inv_snan;
      413 +                return (fex_inv_snan);
 381  414  
 382  415          /* check two-operand instructions for other cases */
 383  416          switch (inst->op) {
 384  417          case cmpss:
 385  418          case cmpsd:
 386  419          case minss:
 387  420          case minsd:
 388  421          case maxss:
 389  422          case maxsd:
 390  423          case comiss:
 391  424          case comisd:
 392      -                return fex_inv_cmp;
      425 +                return (fex_inv_cmp);
 393  426  
 394  427          case addss:
 395  428          case addsd:
 396  429          case subss:
 397  430          case subsd:
      431 +
 398  432                  if (t1 == fp_infinity && t2 == fp_infinity)
 399      -                        return fex_inv_isi;
      433 +                        return (fex_inv_isi);
      434 +
 400  435                  break;
 401  436  
 402  437          case mulss:
 403  438          case mulsd:
 404      -                if ((t1 == fp_zero && t2 == fp_infinity) ||
 405      -                    (t2 == fp_zero && t1 == fp_infinity))
 406      -                        return fex_inv_zmi;
      439 +
      440 +                if ((t1 == fp_zero && t2 == fp_infinity) || (t2 == fp_zero &&
      441 +                    t1 == fp_infinity))
      442 +                        return (fex_inv_zmi);
      443 +
 407  444                  break;
 408  445  
 409  446          case divss:
 410  447          case divsd:
      448 +
 411  449                  if (t1 == fp_zero && t2 == fp_zero)
 412      -                        return fex_inv_zdz;
      450 +                        return (fex_inv_zdz);
      451 +
 413  452                  if (t1 == fp_infinity && t2 == fp_infinity)
 414      -                        return fex_inv_idi;
      453 +                        return (fex_inv_idi);
      454 +
 415  455          default:
 416  456                  break;
 417  457          }
 418  458  
 419      -        return (enum fex_exception)-1;
      459 +        return ((enum fex_exception)-1);
 420  460  }
 421  461  
 422  462  /* inline templates */
 423  463  extern void sse_cmpeqss(float *, float *, int *);
 424  464  extern void sse_cmpltss(float *, float *, int *);
 425  465  extern void sse_cmpless(float *, float *, int *);
 426  466  extern void sse_cmpunordss(float *, float *, int *);
 427  467  extern void sse_minss(float *, float *, float *);
 428  468  extern void sse_maxss(float *, float *, float *);
 429  469  extern void sse_addss(float *, float *, float *);
 430  470  extern void sse_subss(float *, float *, float *);
 431  471  extern void sse_mulss(float *, float *, float *);
 432  472  extern void sse_divss(float *, float *, float *);
 433  473  extern void sse_sqrtss(float *, float *);
 434  474  extern void sse_ucomiss(float *, float *);
 435  475  extern void sse_comiss(float *, float *);
 436  476  extern void sse_cvtss2sd(float *, double *);
 437  477  extern void sse_cvtsi2ss(int *, float *);
 438  478  extern void sse_cvttss2si(float *, int *);
 439  479  extern void sse_cvtss2si(float *, int *);
      480 +
 440  481  #ifdef __amd64
 441  482  extern void sse_cvtsi2ssq(long long *, float *);
 442  483  extern void sse_cvttss2siq(float *, long long *);
 443  484  extern void sse_cvtss2siq(float *, long long *);
 444  485  #endif
      486 +
 445  487  extern void sse_cmpeqsd(double *, double *, long long *);
 446  488  extern void sse_cmpltsd(double *, double *, long long *);
 447  489  extern void sse_cmplesd(double *, double *, long long *);
 448  490  extern void sse_cmpunordsd(double *, double *, long long *);
 449  491  extern void sse_minsd(double *, double *, double *);
 450  492  extern void sse_maxsd(double *, double *, double *);
 451  493  extern void sse_addsd(double *, double *, double *);
 452  494  extern void sse_subsd(double *, double *, double *);
 453  495  extern void sse_mulsd(double *, double *, double *);
 454  496  extern void sse_divsd(double *, double *, double *);
 455  497  extern void sse_sqrtsd(double *, double *);
 456  498  extern void sse_ucomisd(double *, double *);
 457  499  extern void sse_comisd(double *, double *);
 458  500  extern void sse_cvtsd2ss(double *, float *);
 459  501  extern void sse_cvtsi2sd(int *, double *);
 460  502  extern void sse_cvttsd2si(double *, int *);
 461  503  extern void sse_cvtsd2si(double *, int *);
      504 +
 462  505  #ifdef __amd64
 463  506  extern void sse_cvtsi2sdq(long long *, double *);
 464  507  extern void sse_cvttsd2siq(double *, long long *);
 465  508  extern void sse_cvtsd2siq(double *, long long *);
 466  509  #endif
 467  510  
 468  511  /*
 469  512   * Fill in *info with the operands, default untrapped result, and
 470  513   * flags produced by a scalar SSE instruction, and return the type
 471  514   * of trapped exception (if any).  On entry, the mxcsr must have
 472  515   * all exceptions masked and all flags clear.  The same conditions
 473  516   * will hold on exit.
 474  517   *
 475  518   * This routine does not work if the instruction specified by *inst
 476  519   * is not a scalar instruction.
 477  520   */
 478  521  enum fex_exception
 479  522  __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info)
 480  523  {
 481      -        unsigned int    e, te, mxcsr, oldmxcsr, subnorm;
      524 +        unsigned int e, te, mxcsr, oldmxcsr, subnorm;
 482  525  
 483  526          /*
 484  527           * Perform the operation with traps disabled and check the
 485  528           * exception flags.  If the underflow trap was enabled, also
 486  529           * check for an exact subnormal result.
 487  530           */
 488  531          __fenv_getmxcsr(&oldmxcsr);
 489  532          subnorm = 0;
      533 +
 490  534          if ((int)inst->op & DOUBLE) {
 491  535                  if (inst->op == cvtsi2sd) {
 492  536                          info->op1.type = fex_int;
 493  537                          info->op1.val.i = inst->op2->i[0];
 494  538                          info->op2.type = fex_nodata;
 495  539                  } else if (inst->op == cvtsi2sdq) {
 496  540                          info->op1.type = fex_llong;
 497  541                          info->op1.val.l = inst->op2->l[0];
 498  542                          info->op2.type = fex_nodata;
 499  543                  } else if (inst->op == sqrtsd || inst->op == cvtsd2ss ||
 500      -                    inst->op == cvttsd2si || inst->op == cvtsd2si ||
 501      -                    inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
      544 +                    inst->op == cvttsd2si || inst->op == cvtsd2si || inst->op ==
      545 +                    cvttsd2siq || inst->op == cvtsd2siq) {
 502  546                          info->op1.type = fex_double;
 503  547                          info->op1.val.d = inst->op2->d[0];
 504  548                          info->op2.type = fex_nodata;
 505  549                  } else {
 506  550                          info->op1.type = fex_double;
 507  551                          info->op1.val.d = inst->op1->d[0];
 508  552                          info->op2.type = fex_double;
 509  553                          info->op2.val.d = inst->op2->d[0];
 510  554                  }
      555 +
 511  556                  info->res.type = fex_double;
      557 +
 512  558                  switch (inst->op) {
 513  559                  case cmpsd:
 514  560                          info->op = fex_cmp;
 515  561                          info->res.type = fex_llong;
      562 +
 516  563                          switch (inst->imm & 3) {
 517  564                          case 0:
 518  565                                  sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d,
 519  566                                      &info->res.val.l);
 520  567                                  break;
 521  568  
 522  569                          case 1:
 523  570                                  sse_cmpltsd(&info->op1.val.d, &info->op2.val.d,
 524  571                                      &info->res.val.l);
 525  572                                  break;
 526  573  
 527  574                          case 2:
 528  575                                  sse_cmplesd(&info->op1.val.d, &info->op2.val.d,
 529  576                                      &info->res.val.l);
 530  577                                  break;
 531  578  
 532  579                          case 3:
 533  580                                  sse_cmpunordsd(&info->op1.val.d,
 534  581                                      &info->op2.val.d, &info->res.val.l);
 535  582                          }
      583 +
 536  584                          if (inst->imm & 4)
 537  585                                  info->res.val.l ^= 0xffffffffffffffffull;
      586 +
 538  587                          break;
 539  588  
 540  589                  case minsd:
 541  590                          info->op = fex_other;
 542  591                          sse_minsd(&info->op1.val.d, &info->op2.val.d,
 543  592                              &info->res.val.d);
 544  593                          break;
 545  594  
 546  595                  case maxsd:
 547  596                          info->op = fex_other;
 548  597                          sse_maxsd(&info->op1.val.d, &info->op2.val.d,
 549  598                              &info->res.val.d);
 550  599                          break;
 551  600  
 552  601                  case addsd:
 553  602                          info->op = fex_add;
 554  603                          sse_addsd(&info->op1.val.d, &info->op2.val.d,
 555  604                              &info->res.val.d);
      605 +
 556  606                          if (my_fp_class(&info->res.val.d) == fp_subnormal)
 557  607                                  subnorm = 1;
      608 +
 558  609                          break;
 559  610  
 560  611                  case subsd:
 561  612                          info->op = fex_sub;
 562  613                          sse_subsd(&info->op1.val.d, &info->op2.val.d,
 563  614                              &info->res.val.d);
      615 +
 564  616                          if (my_fp_class(&info->res.val.d) == fp_subnormal)
 565  617                                  subnorm = 1;
      618 +
 566  619                          break;
 567  620  
 568  621                  case mulsd:
 569  622                          info->op = fex_mul;
 570  623                          sse_mulsd(&info->op1.val.d, &info->op2.val.d,
 571  624                              &info->res.val.d);
      625 +
 572  626                          if (my_fp_class(&info->res.val.d) == fp_subnormal)
 573  627                                  subnorm = 1;
      628 +
 574  629                          break;
 575  630  
 576  631                  case divsd:
 577  632                          info->op = fex_div;
 578  633                          sse_divsd(&info->op1.val.d, &info->op2.val.d,
 579  634                              &info->res.val.d);
      635 +
 580  636                          if (my_fp_class(&info->res.val.d) == fp_subnormal)
 581  637                                  subnorm = 1;
      638 +
 582  639                          break;
 583  640  
 584  641                  case sqrtsd:
 585  642                          info->op = fex_sqrt;
 586  643                          sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
 587  644                          break;
 588  645  
 589  646                  case cvtsd2ss:
 590  647                          info->op = fex_cnvt;
 591  648                          info->res.type = fex_float;
 592  649                          sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f);
      650 +
 593  651                          if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 594  652                                  subnorm = 1;
      653 +
 595  654                          break;
 596  655  
 597  656                  case cvtsi2sd:
 598  657                          info->op = fex_cnvt;
 599  658                          sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
 600  659                          break;
 601  660  
 602  661                  case cvttsd2si:
 603  662                          info->op = fex_cnvt;
 604  663                          info->res.type = fex_int;
↓ open down ↓ 42 lines elided ↑ open up ↑
 647  706          } else {
 648  707                  if (inst->op == cvtsi2ss) {
 649  708                          info->op1.type = fex_int;
 650  709                          info->op1.val.i = inst->op2->i[0];
 651  710                          info->op2.type = fex_nodata;
 652  711                  } else if (inst->op == cvtsi2ssq) {
 653  712                          info->op1.type = fex_llong;
 654  713                          info->op1.val.l = inst->op2->l[0];
 655  714                          info->op2.type = fex_nodata;
 656  715                  } else if (inst->op == sqrtss || inst->op == cvtss2sd ||
 657      -                    inst->op == cvttss2si || inst->op == cvtss2si ||
 658      -                    inst->op == cvttss2siq || inst->op == cvtss2siq) {
      716 +                    inst->op == cvttss2si || inst->op == cvtss2si || inst->op ==
      717 +                    cvttss2siq || inst->op == cvtss2siq) {
 659  718                          info->op1.type = fex_float;
 660  719                          info->op1.val.f = inst->op2->f[0];
 661  720                          info->op2.type = fex_nodata;
 662  721                  } else {
 663  722                          info->op1.type = fex_float;
 664  723                          info->op1.val.f = inst->op1->f[0];
 665  724                          info->op2.type = fex_float;
 666  725                          info->op2.val.f = inst->op2->f[0];
 667  726                  }
      727 +
 668  728                  info->res.type = fex_float;
      729 +
 669  730                  switch (inst->op) {
 670  731                  case cmpss:
 671  732                          info->op = fex_cmp;
 672  733                          info->res.type = fex_int;
      734 +
 673  735                          switch (inst->imm & 3) {
 674  736                          case 0:
 675  737                                  sse_cmpeqss(&info->op1.val.f, &info->op2.val.f,
 676  738                                      &info->res.val.i);
 677  739                                  break;
 678  740  
 679  741                          case 1:
 680  742                                  sse_cmpltss(&info->op1.val.f, &info->op2.val.f,
 681  743                                      &info->res.val.i);
 682  744                                  break;
 683  745  
 684  746                          case 2:
 685  747                                  sse_cmpless(&info->op1.val.f, &info->op2.val.f,
 686  748                                      &info->res.val.i);
 687  749                                  break;
 688  750  
 689  751                          case 3:
 690  752                                  sse_cmpunordss(&info->op1.val.f,
 691  753                                      &info->op2.val.f, &info->res.val.i);
 692  754                          }
      755 +
 693  756                          if (inst->imm & 4)
 694  757                                  info->res.val.i ^= 0xffffffffu;
      758 +
 695  759                          break;
 696  760  
 697  761                  case minss:
 698  762                          info->op = fex_other;
 699  763                          sse_minss(&info->op1.val.f, &info->op2.val.f,
 700  764                              &info->res.val.f);
 701  765                          break;
 702  766  
 703  767                  case maxss:
 704  768                          info->op = fex_other;
 705  769                          sse_maxss(&info->op1.val.f, &info->op2.val.f,
 706  770                              &info->res.val.f);
 707  771                          break;
 708  772  
 709  773                  case addss:
 710  774                          info->op = fex_add;
 711  775                          sse_addss(&info->op1.val.f, &info->op2.val.f,
 712  776                              &info->res.val.f);
      777 +
 713  778                          if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 714  779                                  subnorm = 1;
      780 +
 715  781                          break;
 716  782  
 717  783                  case subss:
 718  784                          info->op = fex_sub;
 719  785                          sse_subss(&info->op1.val.f, &info->op2.val.f,
 720  786                              &info->res.val.f);
      787 +
 721  788                          if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 722  789                                  subnorm = 1;
      790 +
 723  791                          break;
 724  792  
 725  793                  case mulss:
 726  794                          info->op = fex_mul;
 727  795                          sse_mulss(&info->op1.val.f, &info->op2.val.f,
 728  796                              &info->res.val.f);
      797 +
 729  798                          if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 730  799                                  subnorm = 1;
      800 +
 731  801                          break;
 732  802  
 733  803                  case divss:
 734  804                          info->op = fex_div;
 735  805                          sse_divss(&info->op1.val.f, &info->op2.val.f,
 736  806                              &info->res.val.f);
      807 +
 737  808                          if (my_fp_classf(&info->res.val.f) == fp_subnormal)
 738  809                                  subnorm = 1;
      810 +
 739  811                          break;
 740  812  
 741  813                  case sqrtss:
 742  814                          info->op = fex_sqrt;
 743  815                          sse_sqrtss(&info->op1.val.f, &info->res.val.f);
 744  816                          break;
 745  817  
 746  818                  case cvtss2sd:
 747  819                          info->op = fex_cnvt;
 748  820                          info->res.type = fex_double;
↓ open down ↓ 44 lines elided ↑ open up ↑
 793  865  
 794  866                  case comiss:
 795  867                          info->op = fex_cmp;
 796  868                          info->res.type = fex_nodata;
 797  869                          sse_comiss(&info->op1.val.f, &info->op2.val.f);
 798  870                          break;
 799  871                  default:
 800  872                          break;
 801  873                  }
 802  874          }
      875 +
 803  876          __fenv_getmxcsr(&mxcsr);
 804  877          info->flags = mxcsr & 0x3d;
 805  878          __fenv_setmxcsr(&oldmxcsr);
 806  879  
 807  880          /* determine which exception would have been trapped */
 808      -        te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr
 809      -            >> 7) & 0x3d;
      881 +        te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr >> 7) &
      882 +            0x3d;
 810  883          e = mxcsr & te;
      884 +
 811  885          if (e & FE_INVALID)
 812      -                return __fex_get_sse_invalid_type(inst);
      886 +                return (__fex_get_sse_invalid_type(inst));
      887 +
 813  888          if (e & FE_DIVBYZERO)
 814      -                return fex_division;
      889 +                return (fex_division);
      890 +
 815  891          if (e & FE_OVERFLOW)
 816      -                return fex_overflow;
      892 +                return (fex_overflow);
      893 +
 817  894          if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
 818      -                return fex_underflow;
      895 +                return (fex_underflow);
      896 +
 819  897          if (e & FE_INEXACT)
 820      -                return fex_inexact;
 821      -        return (enum fex_exception)-1;
      898 +                return (fex_inexact);
      899 +
      900 +        return ((enum fex_exception)-1);
 822  901  }
 823  902  
 824  903  /*
 825  904   * Emulate a SIMD SSE instruction to determine which exceptions occur
 826  905   * in each part.  For i = 0, 1, 2, and 3, set e[i] to indicate the
 827  906   * trapped exception that would occur if the i-th part of the SIMD
 828  907   * instruction were executed in isolation; set e[i] to -1 if no
 829  908   * trapped exception would occur in this part.  Also fill in info[i]
 830  909   * with the corresponding operands, default untrapped result, and
 831  910   * flags.
 832  911   *
 833  912   * This routine does not work if the instruction specified by *inst
 834  913   * is not a SIMD instruction.
 835  914   */
 836  915  void
 837  916  __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
 838  917      fex_info_t *info)
 839  918  {
 840      -        sseinst_t       dummy;
 841      -        int             i;
      919 +        sseinst_t dummy;
      920 +        int i;
 842  921  
 843  922          e[0] = e[1] = e[2] = e[3] = -1;
 844  923  
 845  924          /* perform each part of the SIMD operation */
 846  925          switch (inst->op) {
 847  926          case cmpps:
 848  927                  dummy.op = cmpss;
 849  928                  dummy.imm = inst->imm;
      929 +
 850  930                  for (i = 0; i < 4; i++) {
 851  931                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 852  932                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 853  933                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 854  934                  }
      935 +
 855  936                  break;
 856  937  
 857  938          case minps:
 858  939                  dummy.op = minss;
      940 +
 859  941                  for (i = 0; i < 4; i++) {
 860  942                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 861  943                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 862  944                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 863  945                  }
      946 +
 864  947                  break;
 865  948  
 866  949          case maxps:
 867  950                  dummy.op = maxss;
      951 +
 868  952                  for (i = 0; i < 4; i++) {
 869  953                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 870  954                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 871  955                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 872  956                  }
      957 +
 873  958                  break;
 874  959  
 875  960          case addps:
 876  961                  dummy.op = addss;
      962 +
 877  963                  for (i = 0; i < 4; i++) {
 878  964                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 879  965                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 880  966                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 881  967                  }
      968 +
 882  969                  break;
 883  970  
 884  971          case subps:
 885  972                  dummy.op = subss;
      973 +
 886  974                  for (i = 0; i < 4; i++) {
 887  975                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 888  976                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 889  977                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 890  978                  }
      979 +
 891  980                  break;
 892  981  
 893  982          case mulps:
 894  983                  dummy.op = mulss;
      984 +
 895  985                  for (i = 0; i < 4; i++) {
 896  986                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 897  987                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 898  988                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 899  989                  }
      990 +
 900  991                  break;
 901  992  
 902  993          case divps:
 903  994                  dummy.op = divss;
      995 +
 904  996                  for (i = 0; i < 4; i++) {
 905  997                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 906  998                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 907  999                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 908 1000                  }
     1001 +
 909 1002                  break;
 910 1003  
 911 1004          case sqrtps:
 912 1005                  dummy.op = sqrtss;
     1006 +
 913 1007                  for (i = 0; i < 4; i++) {
 914 1008                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 915 1009                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 916 1010                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 917 1011                  }
     1012 +
 918 1013                  break;
 919 1014  
 920 1015          case cvtdq2ps:
 921 1016                  dummy.op = cvtsi2ss;
     1017 +
 922 1018                  for (i = 0; i < 4; i++) {
 923 1019                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 924 1020                          dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
 925 1021                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 926 1022                  }
     1023 +
 927 1024                  break;
 928 1025  
 929 1026          case cvttps2dq:
 930 1027                  dummy.op = cvttss2si;
     1028 +
 931 1029                  for (i = 0; i < 4; i++) {
 932 1030                          dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 933 1031                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 934 1032                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 935 1033                  }
     1034 +
 936 1035                  break;
 937 1036  
 938 1037          case cvtps2dq:
 939 1038                  dummy.op = cvtss2si;
     1039 +
 940 1040                  for (i = 0; i < 4; i++) {
 941 1041                          dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 942 1042                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 943 1043                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 944 1044                  }
     1045 +
 945 1046                  break;
 946 1047  
 947 1048          case cvtpi2ps:
 948 1049                  dummy.op = cvtsi2ss;
     1050 +
 949 1051                  for (i = 0; i < 2; i++) {
 950 1052                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
 951 1053                          dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
 952 1054                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 953 1055                  }
     1056 +
 954 1057                  break;
 955 1058  
 956 1059          case cvttps2pi:
 957 1060                  dummy.op = cvttss2si;
     1061 +
 958 1062                  for (i = 0; i < 2; i++) {
 959 1063                          dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 960 1064                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 961 1065                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 962 1066                  }
     1067 +
 963 1068                  break;
 964 1069  
 965 1070          case cvtps2pi:
 966 1071                  dummy.op = cvtss2si;
     1072 +
 967 1073                  for (i = 0; i < 2; i++) {
 968 1074                          dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
 969 1075                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
 970 1076                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 971 1077                  }
     1078 +
 972 1079                  break;
 973 1080  
 974 1081          case cmppd:
 975 1082                  dummy.op = cmpsd;
 976 1083                  dummy.imm = inst->imm;
     1084 +
 977 1085                  for (i = 0; i < 2; i++) {
 978 1086                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
 979 1087                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
 980 1088                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 981 1089                  }
     1090 +
 982 1091                  break;
 983 1092  
 984 1093          case minpd:
 985 1094                  dummy.op = minsd;
     1095 +
 986 1096                  for (i = 0; i < 2; i++) {
 987 1097                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
 988 1098                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
 989 1099                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 990 1100                  }
     1101 +
 991 1102                  break;
 992 1103  
 993 1104          case maxpd:
 994 1105                  dummy.op = maxsd;
     1106 +
 995 1107                  for (i = 0; i < 2; i++) {
 996 1108                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
 997 1109                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
 998 1110                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
 999 1111                  }
     1112 +
1000 1113                  break;
1001 1114  
1002 1115          case addpd:
1003 1116                  dummy.op = addsd;
     1117 +
1004 1118                  for (i = 0; i < 2; i++) {
1005 1119                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1006 1120                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1007 1121                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1008 1122                  }
     1123 +
1009 1124                  break;
1010 1125  
1011 1126          case subpd:
1012 1127                  dummy.op = subsd;
     1128 +
1013 1129                  for (i = 0; i < 2; i++) {
1014 1130                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1015 1131                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1016 1132                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1017 1133                  }
     1134 +
1018 1135                  break;
1019 1136  
1020 1137          case mulpd:
1021 1138                  dummy.op = mulsd;
     1139 +
1022 1140                  for (i = 0; i < 2; i++) {
1023 1141                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1024 1142                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1025 1143                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1026 1144                  }
     1145 +
1027 1146                  break;
1028 1147  
1029 1148          case divpd:
1030 1149                  dummy.op = divsd;
     1150 +
1031 1151                  for (i = 0; i < 2; i++) {
1032 1152                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1033 1153                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1034 1154                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1035 1155                  }
     1156 +
1036 1157                  break;
1037 1158  
1038 1159          case sqrtpd:
1039 1160                  dummy.op = sqrtsd;
     1161 +
1040 1162                  for (i = 0; i < 2; i++) {
1041 1163                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1042 1164                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1043 1165                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1044 1166                  }
     1167 +
1045 1168                  break;
1046 1169  
1047 1170          case cvtpi2pd:
1048 1171          case cvtdq2pd:
1049 1172                  dummy.op = cvtsi2sd;
     1173 +
1050 1174                  for (i = 0; i < 2; i++) {
1051 1175                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1052 1176                          dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1053 1177                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1054 1178                  }
     1179 +
1055 1180                  break;
1056 1181  
1057 1182          case cvttpd2pi:
1058 1183          case cvttpd2dq:
1059 1184                  dummy.op = cvttsd2si;
     1185 +
1060 1186                  for (i = 0; i < 2; i++) {
1061 1187                          dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1062 1188                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1063 1189                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1064 1190                  }
     1191 +
1065 1192                  break;
1066 1193  
1067 1194          case cvtpd2pi:
1068 1195          case cvtpd2dq:
1069 1196                  dummy.op = cvtsd2si;
     1197 +
1070 1198                  for (i = 0; i < 2; i++) {
1071 1199                          dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1072 1200                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1073 1201                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1074 1202                  }
     1203 +
1075 1204                  break;
1076 1205  
1077 1206          case cvtps2pd:
1078 1207                  dummy.op = cvtss2sd;
     1208 +
1079 1209                  for (i = 0; i < 2; i++) {
1080 1210                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1081 1211                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1082 1212                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1083 1213                  }
     1214 +
1084 1215                  break;
1085 1216  
1086 1217          case cvtpd2ps:
1087 1218                  dummy.op = cvtsd2ss;
     1219 +
1088 1220                  for (i = 0; i < 2; i++) {
1089 1221                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1090 1222                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1091 1223                          e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1092 1224                  }
     1225 +
1093 1226          default:
1094 1227                  break;
1095 1228          }
1096 1229  }
1097 1230  
1098 1231  /*
1099 1232   * Store the result value from *info in the destination of the scalar
1100 1233   * SSE instruction specified by *inst.  If no result is given but the
1101 1234   * exception is underflow or overflow, supply the default trapped result.
1102 1235   *
1103 1236   * This routine does not work if the instruction specified by *inst
1104 1237   * is not a scalar instruction.
1105 1238   */
1106 1239  void
1107 1240  __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1108 1241      fex_info_t *info)
1109 1242  {
1110      -        int             i = 0;
1111      -        long long       l = 0L;;
1112      -        float           f = 0.0, fscl;
1113      -        double          d = 0.0L, dscl;
1114      -
1115      -        /* for compares that write eflags, just set the flags
1116      -           to indicate "unordered" */
1117      -        if (inst->op == ucomiss || inst->op == comiss ||
1118      -            inst->op == ucomisd || inst->op == comisd) {
     1243 +        int i = 0;
     1244 +        long long l = 0L;
     1245 +        float f = 0.0, fscl;
     1246 +        double d = 0.0L, dscl;
     1247 +
     1248 +        /*
     1249 +         * for compares that write eflags, just set the flags
     1250 +         * to indicate "unordered"
     1251 +         */
     1252 +        if (inst->op == ucomiss || inst->op == comiss || inst->op == ucomisd ||
     1253 +            inst->op == comisd) {
1119 1254                  uap->uc_mcontext.gregs[REG_PS] |= 0x45;
1120 1255                  return;
1121 1256          }
1122 1257  
1123      -        /* if info doesn't specify a result value, try to generate
1124      -           the default trapped result */
     1258 +        /*
     1259 +         * if info doesn't specify a result value, try to generate
     1260 +         * the default trapped result
     1261 +         */
1125 1262          if (info->res.type == fex_nodata) {
1126 1263                  /* set scale factors for exponent wrapping */
1127 1264                  switch (e) {
1128 1265                  case fex_overflow:
1129      -                        fscl = 1.262177448e-29f; /* 2^-96 */
1130      -                        dscl = 6.441148769597133308e-232; /* 2^-768 */
     1266 +                        fscl = 1.262177448e-29f;                /* 2^-96 */
     1267 +                        dscl = 6.441148769597133308e-232;       /* 2^-768 */
1131 1268                          break;
1132 1269  
1133 1270                  case fex_underflow:
1134      -                        fscl = 7.922816251e+28f; /* 2^96 */
1135      -                        dscl = 1.552518092300708935e+231; /* 2^768 */
     1271 +                        fscl = 7.922816251e+28f;                /* 2^96 */
     1272 +                        dscl = 1.552518092300708935e+231;       /* 2^768 */
1136 1273                          break;
1137 1274  
1138 1275                  default:
1139 1276                          (void) __fex_get_sse_op(uap, inst, info);
     1277 +
1140 1278                          if (info->res.type == fex_nodata)
1141 1279                                  return;
     1280 +
1142 1281                          goto stuff;
1143 1282                  }
1144 1283  
1145 1284                  /* generate the wrapped result */
1146 1285                  if (inst->op == cvtsd2ss) {
1147 1286                          info->op1.type = fex_double;
1148 1287                          info->op1.val.d = inst->op2->d[0];
1149 1288                          info->op2.type = fex_nodata;
1150 1289                          info->res.type = fex_float;
1151 1290                          info->res.val.f = (float)(fscl * (fscl *
1152 1291                              info->op1.val.d));
1153 1292                  } else if ((int)inst->op & DOUBLE) {
1154 1293                          info->op1.type = fex_double;
1155 1294                          info->op1.val.d = inst->op1->d[0];
1156 1295                          info->op2.type = fex_double;
1157 1296                          info->op2.val.d = inst->op2->d[0];
1158 1297                          info->res.type = fex_double;
     1298 +
1159 1299                          switch (inst->op) {
1160 1300                          case addsd:
1161 1301                                  info->res.val.d = dscl * (dscl *
1162 1302                                      info->op1.val.d + dscl * info->op2.val.d);
1163 1303                                  break;
1164 1304  
1165 1305                          case subsd:
1166 1306                                  info->res.val.d = dscl * (dscl *
1167 1307                                      info->op1.val.d - dscl * info->op2.val.d);
1168 1308                                  break;
↓ open down ↓ 10 lines elided ↑ open up ↑
1179 1319  
1180 1320                          default:
1181 1321                                  return;
1182 1322                          }
1183 1323                  } else {
1184 1324                          info->op1.type = fex_float;
1185 1325                          info->op1.val.f = inst->op1->f[0];
1186 1326                          info->op2.type = fex_float;
1187 1327                          info->op2.val.f = inst->op2->f[0];
1188 1328                          info->res.type = fex_float;
     1329 +
1189 1330                          switch (inst->op) {
1190 1331                          case addss:
1191 1332                                  info->res.val.f = fscl * (fscl *
1192 1333                                      info->op1.val.f + fscl * info->op2.val.f);
1193 1334                                  break;
1194 1335  
1195 1336                          case subss:
1196 1337                                  info->res.val.f = fscl * (fscl *
1197 1338                                      info->op1.val.f - fscl * info->op2.val.f);
1198 1339                                  break;
↓ open down ↓ 9 lines elided ↑ open up ↑
1208 1349                                  break;
1209 1350  
1210 1351                          default:
1211 1352                                  return;
1212 1353                          }
1213 1354                  }
1214 1355          }
1215 1356  
1216 1357          /* put the result in the destination */
1217 1358  stuff:
1218      -        if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si
1219      -            || inst->op == cvttsd2si || inst->op == cvtsd2si) {
     1359 +        if (inst->op == cmpss || inst->op == cvttss2si || inst->op ==
     1360 +            cvtss2si || inst->op == cvttsd2si || inst->op == cvtsd2si) {
1220 1361                  switch (info->res.type) {
1221 1362                  case fex_int:
1222 1363                          i = info->res.val.i;
1223 1364                          break;
1224 1365  
1225 1366                  case fex_llong:
1226 1367                          i = info->res.val.l;
1227 1368                          break;
1228 1369  
1229 1370                  case fex_float:
↓ open down ↓ 4 lines elided ↑ open up ↑
1234 1375                          i = info->res.val.d;
1235 1376                          break;
1236 1377  
1237 1378                  case fex_ldouble:
1238 1379                          i = info->res.val.q;
1239 1380                          break;
1240 1381  
1241 1382                  default:
1242 1383                          break;
1243 1384                  }
     1385 +
1244 1386                  inst->op1->i[0] = i;
1245      -        } else if (inst->op == cmpsd || inst->op == cvttss2siq ||
1246      -            inst->op == cvtss2siq || inst->op == cvttsd2siq ||
1247      -            inst->op == cvtsd2siq) {
     1387 +        } else if (inst->op == cmpsd || inst->op == cvttss2siq || inst->op ==
     1388 +            cvtss2siq || inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
1248 1389                  switch (info->res.type) {
1249 1390                  case fex_int:
1250 1391                          l = info->res.val.i;
1251 1392                          break;
1252 1393  
1253 1394                  case fex_llong:
1254 1395                          l = info->res.val.l;
1255 1396                          break;
1256 1397  
1257 1398                  case fex_float:
↓ open down ↓ 4 lines elided ↑ open up ↑
1262 1403                          l = info->res.val.d;
1263 1404                          break;
1264 1405  
1265 1406                  case fex_ldouble:
1266 1407                          l = info->res.val.q;
1267 1408                          break;
1268 1409  
1269 1410                  default:
1270 1411                          break;
1271 1412                  }
     1413 +
1272 1414                  inst->op1->l[0] = l;
1273 1415          } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1274 1416              inst->op == cvtss2sd) {
1275 1417                  switch (info->res.type) {
1276 1418                  case fex_int:
1277 1419                          d = info->res.val.i;
1278 1420                          break;
1279 1421  
1280 1422                  case fex_llong:
1281 1423                          d = info->res.val.l;
↓ open down ↓ 7 lines elided ↑ open up ↑
1289 1431                          d = info->res.val.d;
1290 1432                          break;
1291 1433  
1292 1434                  case fex_ldouble:
1293 1435                          d = info->res.val.q;
1294 1436                          break;
1295 1437  
1296 1438                  default:
1297 1439                          break;
1298 1440                  }
     1441 +
1299 1442                  inst->op1->d[0] = d;
1300 1443          } else {
1301 1444                  switch (info->res.type) {
1302 1445                  case fex_int:
1303 1446                          f = info->res.val.i;
1304 1447                          break;
1305 1448  
1306 1449                  case fex_llong:
1307 1450                          f = info->res.val.l;
1308 1451                          break;
↓ open down ↓ 6 lines elided ↑ open up ↑
1315 1458                          f = info->res.val.d;
1316 1459                          break;
1317 1460  
1318 1461                  case fex_ldouble:
1319 1462                          f = info->res.val.q;
1320 1463                          break;
1321 1464  
1322 1465                  default:
1323 1466                          break;
1324 1467                  }
     1468 +
1325 1469                  inst->op1->f[0] = f;
1326 1470          }
1327 1471  }
1328 1472  
1329 1473  /*
1330 1474   * Store the results from a SIMD instruction.  For each i, store
1331 1475   * the result value from info[i] in the i-th part of the destination
1332 1476   * of the SIMD SSE instruction specified by *inst.  If no result
1333 1477   * is given but the exception indicated by e[i] is underflow or
1334 1478   * overflow, supply the default trapped result.
1335 1479   *
1336 1480   * This routine does not work if the instruction specified by *inst
1337 1481   * is not a SIMD instruction.
1338 1482   */
1339 1483  void
1340 1484  __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1341 1485      fex_info_t *info)
1342 1486  {
1343      -        sseinst_t       dummy;
1344      -        int             i;
     1487 +        sseinst_t dummy;
     1488 +        int i;
1345 1489  
1346 1490          /* store each part */
1347 1491          switch (inst->op) {
1348 1492          case cmpps:
1349 1493                  dummy.op = cmpss;
1350 1494                  dummy.imm = inst->imm;
     1495 +
1351 1496                  for (i = 0; i < 4; i++) {
1352 1497                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1353 1498                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1354 1499                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1355 1500                  }
     1501 +
1356 1502                  break;
1357 1503  
1358 1504          case minps:
1359 1505                  dummy.op = minss;
     1506 +
1360 1507                  for (i = 0; i < 4; i++) {
1361 1508                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1362 1509                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1363 1510                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1364 1511                  }
     1512 +
1365 1513                  break;
1366 1514  
1367 1515          case maxps:
1368 1516                  dummy.op = maxss;
     1517 +
1369 1518                  for (i = 0; i < 4; i++) {
1370 1519                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1371 1520                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1372 1521                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1373 1522                  }
     1523 +
1374 1524                  break;
1375 1525  
1376 1526          case addps:
1377 1527                  dummy.op = addss;
     1528 +
1378 1529                  for (i = 0; i < 4; i++) {
1379 1530                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1380 1531                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1381 1532                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1382 1533                  }
     1534 +
1383 1535                  break;
1384 1536  
1385 1537          case subps:
1386 1538                  dummy.op = subss;
     1539 +
1387 1540                  for (i = 0; i < 4; i++) {
1388 1541                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1389 1542                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1390 1543                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1391 1544                  }
     1545 +
1392 1546                  break;
1393 1547  
1394 1548          case mulps:
1395 1549                  dummy.op = mulss;
     1550 +
1396 1551                  for (i = 0; i < 4; i++) {
1397 1552                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1398 1553                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1399 1554                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1400 1555                  }
     1556 +
1401 1557                  break;
1402 1558  
1403 1559          case divps:
1404 1560                  dummy.op = divss;
     1561 +
1405 1562                  for (i = 0; i < 4; i++) {
1406 1563                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1407 1564                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1408 1565                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1409 1566                  }
     1567 +
1410 1568                  break;
1411 1569  
1412 1570          case sqrtps:
1413 1571                  dummy.op = sqrtss;
     1572 +
1414 1573                  for (i = 0; i < 4; i++) {
1415 1574                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1416 1575                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1417 1576                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1418 1577                  }
     1578 +
1419 1579                  break;
1420 1580  
1421 1581          case cvtdq2ps:
1422 1582                  dummy.op = cvtsi2ss;
     1583 +
1423 1584                  for (i = 0; i < 4; i++) {
1424 1585                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1425 1586                          dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1426 1587                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1427 1588                  }
     1589 +
1428 1590                  break;
1429 1591  
1430 1592          case cvttps2dq:
1431 1593                  dummy.op = cvttss2si;
     1594 +
1432 1595                  for (i = 0; i < 4; i++) {
1433 1596                          dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1434 1597                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1435 1598                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1436 1599                  }
     1600 +
1437 1601                  break;
1438 1602  
1439 1603          case cvtps2dq:
1440 1604                  dummy.op = cvtss2si;
     1605 +
1441 1606                  for (i = 0; i < 4; i++) {
1442 1607                          dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1443 1608                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1444 1609                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1445 1610                  }
     1611 +
1446 1612                  break;
1447 1613  
1448 1614          case cvtpi2ps:
1449 1615                  dummy.op = cvtsi2ss;
     1616 +
1450 1617                  for (i = 0; i < 2; i++) {
1451 1618                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1452 1619                          dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1453 1620                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1454 1621                  }
     1622 +
1455 1623                  break;
1456 1624  
1457 1625          case cvttps2pi:
1458 1626                  dummy.op = cvttss2si;
     1627 +
1459 1628                  for (i = 0; i < 2; i++) {
1460 1629                          dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1461 1630                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1462 1631                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1463 1632                  }
     1633 +
1464 1634                  break;
1465 1635  
1466 1636          case cvtps2pi:
1467 1637                  dummy.op = cvtss2si;
     1638 +
1468 1639                  for (i = 0; i < 2; i++) {
1469 1640                          dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1470 1641                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1471 1642                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1472 1643                  }
     1644 +
1473 1645                  break;
1474 1646  
1475 1647          case cmppd:
1476 1648                  dummy.op = cmpsd;
1477 1649                  dummy.imm = inst->imm;
     1650 +
1478 1651                  for (i = 0; i < 2; i++) {
1479 1652                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1480 1653                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1481 1654                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1482 1655                  }
     1656 +
1483 1657                  break;
1484 1658  
1485 1659          case minpd:
1486 1660                  dummy.op = minsd;
     1661 +
1487 1662                  for (i = 0; i < 2; i++) {
1488 1663                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1489 1664                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1490 1665                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1491 1666                  }
     1667 +
1492 1668                  break;
1493 1669  
1494 1670          case maxpd:
1495 1671                  dummy.op = maxsd;
     1672 +
1496 1673                  for (i = 0; i < 2; i++) {
1497 1674                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1498 1675                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1499 1676                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1500 1677                  }
     1678 +
1501 1679                  break;
1502 1680  
1503 1681          case addpd:
1504 1682                  dummy.op = addsd;
     1683 +
1505 1684                  for (i = 0; i < 2; i++) {
1506 1685                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1507 1686                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1508 1687                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1509 1688                  }
     1689 +
1510 1690                  break;
1511 1691  
1512 1692          case subpd:
1513 1693                  dummy.op = subsd;
     1694 +
1514 1695                  for (i = 0; i < 2; i++) {
1515 1696                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1516 1697                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1517 1698                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1518 1699                  }
     1700 +
1519 1701                  break;
1520 1702  
1521 1703          case mulpd:
1522 1704                  dummy.op = mulsd;
     1705 +
1523 1706                  for (i = 0; i < 2; i++) {
1524 1707                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1525 1708                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1526 1709                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1527 1710                  }
     1711 +
1528 1712                  break;
1529 1713  
1530 1714          case divpd:
1531 1715                  dummy.op = divsd;
     1716 +
1532 1717                  for (i = 0; i < 2; i++) {
1533 1718                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1534 1719                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1535 1720                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1536 1721                  }
     1722 +
1537 1723                  break;
1538 1724  
1539 1725          case sqrtpd:
1540 1726                  dummy.op = sqrtsd;
     1727 +
1541 1728                  for (i = 0; i < 2; i++) {
1542 1729                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1543 1730                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1544 1731                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1545 1732                  }
     1733 +
1546 1734                  break;
1547 1735  
1548 1736          case cvtpi2pd:
1549 1737          case cvtdq2pd:
1550 1738                  dummy.op = cvtsi2sd;
     1739 +
1551 1740                  for (i = 0; i < 2; i++) {
1552 1741                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1553 1742                          dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1554 1743                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1555 1744                  }
     1745 +
1556 1746                  break;
1557 1747  
1558 1748          case cvttpd2pi:
1559 1749          case cvttpd2dq:
1560 1750                  dummy.op = cvttsd2si;
     1751 +
1561 1752                  for (i = 0; i < 2; i++) {
1562 1753                          dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1563 1754                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1564 1755                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1565 1756                  }
     1757 +
1566 1758                  /* for cvttpd2dq, zero the high 64 bits of the destination */
1567 1759                  if (inst->op == cvttpd2dq)
1568 1760                          inst->op1->l[1] = 0ll;
     1761 +
1569 1762                  break;
1570 1763  
1571 1764          case cvtpd2pi:
1572 1765          case cvtpd2dq:
1573 1766                  dummy.op = cvtsd2si;
     1767 +
1574 1768                  for (i = 0; i < 2; i++) {
1575 1769                          dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1576 1770                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1577 1771                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1578 1772                  }
     1773 +
1579 1774                  /* for cvtpd2dq, zero the high 64 bits of the destination */
1580 1775                  if (inst->op == cvtpd2dq)
1581 1776                          inst->op1->l[1] = 0ll;
     1777 +
1582 1778                  break;
1583 1779  
1584 1780          case cvtps2pd:
1585 1781                  dummy.op = cvtss2sd;
     1782 +
1586 1783                  for (i = 0; i < 2; i++) {
1587 1784                          dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1588 1785                          dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1589 1786                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1590 1787                  }
     1788 +
1591 1789                  break;
1592 1790  
1593 1791          case cvtpd2ps:
1594 1792                  dummy.op = cvtsd2ss;
     1793 +
1595 1794                  for (i = 0; i < 2; i++) {
1596 1795                          dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1597 1796                          dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1598 1797                          __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1599 1798                  }
     1799 +
1600 1800                  /* zero the high 64 bits of the destination */
1601 1801                  inst->op1->l[1] = 0ll;
1602 1802  
1603 1803          default:
1604 1804                  break;
1605 1805          }
1606 1806  }
1607      -
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX