Print this page
11210 libm should be cstyle(1ONBLD) clean
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libm/common/m9x/__fex_sse.c
+++ new/usr/src/lib/libm/common/m9x/__fex_sse.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 24 */
25 +
25 26 /*
26 27 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 28 * Use is subject to license terms.
28 29 */
29 30
30 31 #include <ucontext.h>
31 32 #include <fenv.h>
32 33 #if defined(__SUNPRO_C)
33 34 #include <sunmath.h>
34 35 #else
35 36 #include <sys/ieeefp.h>
36 37 #endif
37 38 #include "fex_handler.h"
38 39 #include "fenv_inlines.h"
39 40
40 41 #if !defined(REG_PC)
41 -#define REG_PC EIP
42 +#define REG_PC EIP
42 43 #endif
43 44
44 45 #if !defined(REG_PS)
45 -#define REG_PS EFL
46 +#define REG_PS EFL
46 47 #endif
47 48
48 49 #ifdef __amd64
49 -#define regno(X) ((X < 4)? REG_RAX - X : \
50 - ((X > 4)? REG_RAX + 1 - X : REG_RSP))
50 +#define regno(X) ((X < 4) ? REG_RAX - X : ((X > 4) ? REG_RAX + 1 - X : \
51 + REG_RSP))
51 52 #else
52 -#define regno(X) (EAX - X)
53 +#define regno(X) (EAX - X)
53 54 #endif
54 55
55 56 /*
56 57 * Support for SSE instructions
57 58 */
58 59
59 60 /*
60 61 * Decode an SSE instruction. Fill in *inst and return the length of the
61 62 * instruction in bytes. Return 0 if the instruction is not recognized.
62 63 */
63 64 int
64 65 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst)
65 66 {
66 - unsigned char *ip;
67 - char *addr;
68 - int i, dbl, simd, rex, modrm, sib, r;
67 + unsigned char *ip;
68 + char *addr;
69 + int i, dbl, simd, rex, modrm, sib, r;
69 70
70 71 i = 0;
71 72 ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC];
72 73
73 74 /* look for pseudo-prefixes */
74 75 dbl = 0;
75 76 simd = SIMD;
77 +
76 78 if (ip[i] == 0xF3) {
77 79 simd = 0;
78 80 i++;
79 81 } else if (ip[i] == 0x66) {
80 82 dbl = DOUBLE;
81 83 i++;
82 84 } else if (ip[i] == 0xF2) {
83 85 dbl = DOUBLE;
84 86 simd = 0;
85 87 i++;
86 88 }
87 89
88 90 /* look for AMD64 REX prefix */
89 91 rex = 0;
92 +
90 93 if (ip[i] >= 0x40 && ip[i] <= 0x4F) {
91 94 rex = ip[i];
92 95 i++;
93 96 }
94 97
95 98 /* parse opcode */
96 99 if (ip[i++] != 0x0F)
97 - return 0;
100 + return (0);
101 +
98 102 switch (ip[i++]) {
99 103 case 0x2A:
100 104 inst->op = (int)cvtsi2ss + simd + dbl;
105 +
101 106 if (!simd)
102 107 inst->op = (int)inst->op + (rex & 8);
108 +
103 109 break;
104 110
105 111 case 0x2C:
106 112 inst->op = (int)cvttss2si + simd + dbl;
113 +
107 114 if (!simd)
108 115 inst->op = (int)inst->op + (rex & 8);
116 +
109 117 break;
110 118
111 119 case 0x2D:
112 120 inst->op = (int)cvtss2si + simd + dbl;
121 +
113 122 if (!simd)
114 123 inst->op = (int)inst->op + (rex & 8);
124 +
115 125 break;
116 126
117 127 case 0x2E:
128 +
118 129 /* oddball: scalar instruction in a SIMD opcode group */
119 130 if (!simd)
120 - return 0;
131 + return (0);
132 +
121 133 inst->op = (int)ucomiss + dbl;
122 134 break;
123 135
124 136 case 0x2F:
137 +
125 138 /* oddball: scalar instruction in a SIMD opcode group */
126 139 if (!simd)
127 - return 0;
140 + return (0);
141 +
128 142 inst->op = (int)comiss + dbl;
129 143 break;
130 144
131 145 case 0x51:
132 146 inst->op = (int)sqrtss + simd + dbl;
133 147 break;
134 148
135 149 case 0x58:
136 150 inst->op = (int)addss + simd + dbl;
137 151 break;
138 152
139 153 case 0x59:
140 154 inst->op = (int)mulss + simd + dbl;
141 155 break;
142 156
143 157 case 0x5A:
144 158 inst->op = (int)cvtss2sd + simd + dbl;
145 159 break;
146 160
147 161 case 0x5B:
162 +
148 163 if (dbl) {
149 164 if (simd)
150 165 inst->op = cvtps2dq;
151 166 else
152 - return 0;
167 + return (0);
153 168 } else {
154 - inst->op = (simd)? cvtdq2ps : cvttps2dq;
169 + inst->op = (simd) ? cvtdq2ps : cvttps2dq;
155 170 }
171 +
156 172 break;
157 173
158 174 case 0x5C:
159 175 inst->op = (int)subss + simd + dbl;
160 176 break;
161 177
162 178 case 0x5D:
163 179 inst->op = (int)minss + simd + dbl;
164 180 break;
165 181
166 182 case 0x5E:
167 183 inst->op = (int)divss + simd + dbl;
168 184 break;
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
169 185
170 186 case 0x5F:
171 187 inst->op = (int)maxss + simd + dbl;
172 188 break;
173 189
174 190 case 0xC2:
175 191 inst->op = (int)cmpss + simd + dbl;
176 192 break;
177 193
178 194 case 0xE6:
195 +
179 196 if (simd) {
180 197 if (dbl)
181 198 inst->op = cvttpd2dq;
182 199 else
183 - return 0;
200 + return (0);
184 201 } else {
185 - inst->op = (dbl)? cvtpd2dq : cvtdq2pd;
202 + inst->op = (dbl) ? cvtpd2dq : cvtdq2pd;
186 203 }
204 +
187 205 break;
188 206
189 207 default:
190 - return 0;
208 + return (0);
191 209 }
192 210
193 211 /* locate operands */
194 212 modrm = ip[i++];
195 213
196 - if (inst->op == cvtss2si || inst->op == cvttss2si ||
197 - inst->op == cvtsd2si || inst->op == cvttsd2si ||
198 - inst->op == cvtss2siq || inst->op == cvttss2siq ||
199 - inst->op == cvtsd2siq || inst->op == cvttsd2siq) {
214 + if (inst->op == cvtss2si || inst->op == cvttss2si || inst->op ==
215 + cvtsd2si || inst->op == cvttsd2si || inst->op == cvtss2siq ||
216 + inst->op == cvttss2siq || inst->op == cvtsd2siq || inst->op ==
217 + cvttsd2siq) {
200 218 /* op1 is a gp register */
201 219 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
202 220 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
203 - } else if (inst->op == cvtps2pi || inst->op == cvttps2pi ||
204 - inst->op == cvtpd2pi || inst->op == cvttpd2pi) {
221 + } else if (inst->op == cvtps2pi || inst->op == cvttps2pi || inst->op ==
222 + cvtpd2pi || inst->op == cvttpd2pi) {
205 223 /* op1 is a mmx register */
206 224 #ifdef __amd64
207 - inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set.
208 - fpchip_state.st[(modrm >> 3) & 7];
225 + inst->op1 = (sseoperand_t *)
226 + &uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state
227 + .st[(modrm >> 3) & 7];
209 228 #else
210 229 inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) +
211 - (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
212 - fpchip_state.state[7]);
230 + (char *)&uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state
231 + .state[7]);
213 232 #endif
214 233 } else {
215 234 /* op1 is a xmm register */
216 235 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
217 - inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
218 - fp_reg_set.fpchip_state.xmm[r];
236 + inst->op1 =
237 + (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set
238 + .fpchip_state.xmm[r];
219 239 }
220 240
221 241 if ((modrm >> 6) == 3) {
222 - if (inst->op == cvtsi2ss || inst->op == cvtsi2sd ||
223 - inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) {
242 + if (inst->op == cvtsi2ss || inst->op == cvtsi2sd || inst->op ==
243 + cvtsi2ssq || inst->op == cvtsi2sdq) {
224 244 /* op2 is a gp register */
225 245 r = ((rex & 1) << 3) | (modrm & 7);
226 - inst->op2 = (sseoperand_t *)&uap->uc_mcontext.
227 - gregs[regno(r)];
246 + inst->op2 =
247 + (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
228 248 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) {
229 249 /* op2 is a mmx register */
230 250 #ifdef __amd64
231 - inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
232 - fp_reg_set.fpchip_state.st[modrm & 7];
251 + inst->op2 =
252 + (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set
253 + .fpchip_state.st[modrm & 7];
233 254 #else
234 255 inst->op2 = (sseoperand_t *)(10 * (modrm & 7) +
235 - (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
236 - fpchip_state.state[7]);
256 + (char *)&uap->uc_mcontext.fpregs.fp_reg_set
257 + .fpchip_state.state[7]);
237 258 #endif
238 259 } else {
239 260 /* op2 is a xmm register */
240 261 r = ((rex & 1) << 3) | (modrm & 7);
241 - inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
242 - fp_reg_set.fpchip_state.xmm[r];
262 + inst->op2 =
263 + (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set
264 + .fpchip_state.xmm[r];
243 265 }
244 266 } else if ((modrm & 0xc7) == 0x05) {
245 267 #ifdef __amd64
246 268 /* address of next instruction + offset */
247 269 r = i + 4;
248 - if (inst->op == cmpss || inst->op == cmpps ||
249 - inst->op == cmpsd || inst->op == cmppd)
270 +
271 + if (inst->op == cmpss || inst->op == cmpps || inst->op ==
272 + cmpsd || inst->op == cmppd)
250 273 r++;
274 +
251 275 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i));
252 276 #else
253 277 /* absolute address */
254 278 inst->op2 = (sseoperand_t *)(*(int *)(ip + i));
255 279 #endif
256 280 i += 4;
257 281 } else {
258 282 /* complex address */
259 283 if ((modrm & 7) == 4) {
260 284 /* parse sib byte */
261 285 sib = ip[i++];
286 +
262 287 if ((sib & 7) == 5 && (modrm >> 6) == 0) {
263 288 /* start with absolute address */
264 289 addr = (char *)(uintptr_t)(*(int *)(ip + i));
265 290 i += 4;
266 291 } else {
267 292 /* start with base */
268 293 r = ((rex & 1) << 3) | (sib & 7);
269 294 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
270 295 }
296 +
271 297 r = ((rex & 2) << 2) | ((sib >> 3) & 7);
298 +
272 299 if (r != 4) {
273 300 /* add scaled index */
274 - addr += uap->uc_mcontext.gregs[regno(r)]
275 - << (sib >> 6);
301 + addr += uap->uc_mcontext.gregs[regno(r)] <<
302 + (sib >> 6);
276 303 }
277 304 } else {
278 305 r = ((rex & 1) << 3) | (modrm & 7);
279 306 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
280 307 }
281 308
282 309 /* add displacement, if any */
283 310 if ((modrm >> 6) == 1) {
284 311 addr += (char)ip[i++];
285 312 } else if ((modrm >> 6) == 2) {
286 313 addr += *(int *)(ip + i);
287 314 i += 4;
288 315 }
316 +
289 317 inst->op2 = (sseoperand_t *)addr;
290 318 }
291 319
292 320 if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd ||
293 321 inst->op == cmppd) {
294 322 /* get the immediate operand */
295 323 inst->imm = ip[i++];
296 324 }
297 325
298 - return i;
326 + return (i);
299 327 }
300 328
301 329 static enum fp_class_type
302 330 my_fp_classf(float *x)
303 331 {
304 - int i = *(int *)x & ~0x80000000;
332 + int i = *(int *)x & ~0x80000000;
305 333
306 334 if (i < 0x7f800000) {
307 335 if (i < 0x00800000)
308 - return ((i == 0)? fp_zero : fp_subnormal);
309 - return fp_normal;
336 + return ((i == 0) ? fp_zero : fp_subnormal);
337 +
338 + return (fp_normal);
339 + } else if (i == 0x7f800000) {
340 + return (fp_infinity);
341 + } else if (i & 0x400000) {
342 + return (fp_quiet);
343 + } else {
344 + return (fp_signaling);
310 345 }
311 - else if (i == 0x7f800000)
312 - return fp_infinity;
313 - else if (i & 0x400000)
314 - return fp_quiet;
315 - else
316 - return fp_signaling;
317 346 }
318 347
319 348 static enum fp_class_type
320 349 my_fp_class(double *x)
321 350 {
322 - int i = *(1+(int *)x) & ~0x80000000;
351 + int i = *(1 + (int *)x) & ~0x80000000;
323 352
324 353 if (i < 0x7ff00000) {
325 354 if (i < 0x00100000)
326 - return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal);
327 - return fp_normal;
355 + return (((i | *(int *)x) == 0) ? fp_zero :
356 + fp_subnormal);
357 +
358 + return (fp_normal);
359 + } else if (i == 0x7ff00000 && *(int *)x == 0) {
360 + return (fp_infinity);
361 + } else if (i & 0x80000) {
362 + return (fp_quiet);
363 + } else {
364 + return (fp_signaling);
328 365 }
329 - else if (i == 0x7ff00000 && *(int *)x == 0)
330 - return fp_infinity;
331 - else if (i & 0x80000)
332 - return fp_quiet;
333 - else
334 - return fp_signaling;
335 366 }
336 367
337 368 /*
338 369 * Inspect a scalar SSE instruction that incurred an invalid operation
339 370 * exception to determine which type of exception it was.
340 371 */
341 372 static enum fex_exception
342 373 __fex_get_sse_invalid_type(sseinst_t *inst)
343 374 {
344 - enum fp_class_type t1, t2;
375 + enum fp_class_type t1, t2;
345 376
346 377 /* check op2 for signaling nan */
347 - t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) :
378 + t2 = ((int)inst->op & DOUBLE) ? my_fp_class(&inst->op2->d[0]) :
348 379 my_fp_classf(&inst->op2->f[0]);
380 +
349 381 if (t2 == fp_signaling)
350 - return fex_inv_snan;
382 + return (fex_inv_snan);
351 383
352 384 /* eliminate all single-operand instructions */
353 385 switch (inst->op) {
354 386 case cvtsd2ss:
355 387 case cvtss2sd:
356 388 /* hmm, this shouldn't have happened */
357 - return (enum fex_exception) -1;
389 + return ((enum fex_exception)-1);
358 390
359 391 case sqrtss:
360 392 case sqrtsd:
361 - return fex_inv_sqrt;
393 + return (fex_inv_sqrt);
362 394
363 395 case cvtss2si:
364 396 case cvtsd2si:
365 397 case cvttss2si:
366 398 case cvttsd2si:
367 399 case cvtss2siq:
368 400 case cvtsd2siq:
369 401 case cvttss2siq:
370 402 case cvttsd2siq:
371 - return fex_inv_int;
403 + return (fex_inv_int);
372 404 default:
373 405 break;
374 406 }
375 407
376 408 /* check op1 for signaling nan */
377 - t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) :
409 + t1 = ((int)inst->op & DOUBLE) ? my_fp_class(&inst->op1->d[0]) :
378 410 my_fp_classf(&inst->op1->f[0]);
411 +
379 412 if (t1 == fp_signaling)
380 - return fex_inv_snan;
413 + return (fex_inv_snan);
381 414
382 415 /* check two-operand instructions for other cases */
383 416 switch (inst->op) {
384 417 case cmpss:
385 418 case cmpsd:
386 419 case minss:
387 420 case minsd:
388 421 case maxss:
389 422 case maxsd:
390 423 case comiss:
391 424 case comisd:
392 - return fex_inv_cmp;
425 + return (fex_inv_cmp);
393 426
394 427 case addss:
395 428 case addsd:
396 429 case subss:
397 430 case subsd:
431 +
398 432 if (t1 == fp_infinity && t2 == fp_infinity)
399 - return fex_inv_isi;
433 + return (fex_inv_isi);
434 +
400 435 break;
401 436
402 437 case mulss:
403 438 case mulsd:
404 - if ((t1 == fp_zero && t2 == fp_infinity) ||
405 - (t2 == fp_zero && t1 == fp_infinity))
406 - return fex_inv_zmi;
439 +
440 + if ((t1 == fp_zero && t2 == fp_infinity) || (t2 == fp_zero &&
441 + t1 == fp_infinity))
442 + return (fex_inv_zmi);
443 +
407 444 break;
408 445
409 446 case divss:
410 447 case divsd:
448 +
411 449 if (t1 == fp_zero && t2 == fp_zero)
412 - return fex_inv_zdz;
450 + return (fex_inv_zdz);
451 +
413 452 if (t1 == fp_infinity && t2 == fp_infinity)
414 - return fex_inv_idi;
453 + return (fex_inv_idi);
454 +
415 455 default:
416 456 break;
417 457 }
418 458
419 - return (enum fex_exception)-1;
459 + return ((enum fex_exception)-1);
420 460 }
421 461
422 462 /* inline templates */
423 463 extern void sse_cmpeqss(float *, float *, int *);
424 464 extern void sse_cmpltss(float *, float *, int *);
425 465 extern void sse_cmpless(float *, float *, int *);
426 466 extern void sse_cmpunordss(float *, float *, int *);
427 467 extern void sse_minss(float *, float *, float *);
428 468 extern void sse_maxss(float *, float *, float *);
429 469 extern void sse_addss(float *, float *, float *);
430 470 extern void sse_subss(float *, float *, float *);
431 471 extern void sse_mulss(float *, float *, float *);
432 472 extern void sse_divss(float *, float *, float *);
433 473 extern void sse_sqrtss(float *, float *);
434 474 extern void sse_ucomiss(float *, float *);
435 475 extern void sse_comiss(float *, float *);
436 476 extern void sse_cvtss2sd(float *, double *);
437 477 extern void sse_cvtsi2ss(int *, float *);
438 478 extern void sse_cvttss2si(float *, int *);
439 479 extern void sse_cvtss2si(float *, int *);
480 +
440 481 #ifdef __amd64
441 482 extern void sse_cvtsi2ssq(long long *, float *);
442 483 extern void sse_cvttss2siq(float *, long long *);
443 484 extern void sse_cvtss2siq(float *, long long *);
444 485 #endif
486 +
445 487 extern void sse_cmpeqsd(double *, double *, long long *);
446 488 extern void sse_cmpltsd(double *, double *, long long *);
447 489 extern void sse_cmplesd(double *, double *, long long *);
448 490 extern void sse_cmpunordsd(double *, double *, long long *);
449 491 extern void sse_minsd(double *, double *, double *);
450 492 extern void sse_maxsd(double *, double *, double *);
451 493 extern void sse_addsd(double *, double *, double *);
452 494 extern void sse_subsd(double *, double *, double *);
453 495 extern void sse_mulsd(double *, double *, double *);
454 496 extern void sse_divsd(double *, double *, double *);
455 497 extern void sse_sqrtsd(double *, double *);
456 498 extern void sse_ucomisd(double *, double *);
457 499 extern void sse_comisd(double *, double *);
458 500 extern void sse_cvtsd2ss(double *, float *);
459 501 extern void sse_cvtsi2sd(int *, double *);
460 502 extern void sse_cvttsd2si(double *, int *);
461 503 extern void sse_cvtsd2si(double *, int *);
504 +
462 505 #ifdef __amd64
463 506 extern void sse_cvtsi2sdq(long long *, double *);
464 507 extern void sse_cvttsd2siq(double *, long long *);
465 508 extern void sse_cvtsd2siq(double *, long long *);
466 509 #endif
467 510
468 511 /*
469 512 * Fill in *info with the operands, default untrapped result, and
470 513 * flags produced by a scalar SSE instruction, and return the type
471 514 * of trapped exception (if any). On entry, the mxcsr must have
472 515 * all exceptions masked and all flags clear. The same conditions
473 516 * will hold on exit.
474 517 *
475 518 * This routine does not work if the instruction specified by *inst
476 519 * is not a scalar instruction.
477 520 */
478 521 enum fex_exception
479 522 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info)
480 523 {
481 - unsigned int e, te, mxcsr, oldmxcsr, subnorm;
524 + unsigned int e, te, mxcsr, oldmxcsr, subnorm;
482 525
483 526 /*
484 527 * Perform the operation with traps disabled and check the
485 528 * exception flags. If the underflow trap was enabled, also
486 529 * check for an exact subnormal result.
487 530 */
488 531 __fenv_getmxcsr(&oldmxcsr);
489 532 subnorm = 0;
533 +
490 534 if ((int)inst->op & DOUBLE) {
491 535 if (inst->op == cvtsi2sd) {
492 536 info->op1.type = fex_int;
493 537 info->op1.val.i = inst->op2->i[0];
494 538 info->op2.type = fex_nodata;
495 539 } else if (inst->op == cvtsi2sdq) {
496 540 info->op1.type = fex_llong;
497 541 info->op1.val.l = inst->op2->l[0];
498 542 info->op2.type = fex_nodata;
499 543 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss ||
500 - inst->op == cvttsd2si || inst->op == cvtsd2si ||
501 - inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
544 + inst->op == cvttsd2si || inst->op == cvtsd2si || inst->op ==
545 + cvttsd2siq || inst->op == cvtsd2siq) {
502 546 info->op1.type = fex_double;
503 547 info->op1.val.d = inst->op2->d[0];
504 548 info->op2.type = fex_nodata;
505 549 } else {
506 550 info->op1.type = fex_double;
507 551 info->op1.val.d = inst->op1->d[0];
508 552 info->op2.type = fex_double;
509 553 info->op2.val.d = inst->op2->d[0];
510 554 }
555 +
511 556 info->res.type = fex_double;
557 +
512 558 switch (inst->op) {
513 559 case cmpsd:
514 560 info->op = fex_cmp;
515 561 info->res.type = fex_llong;
562 +
516 563 switch (inst->imm & 3) {
517 564 case 0:
518 565 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d,
519 566 &info->res.val.l);
520 567 break;
521 568
522 569 case 1:
523 570 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d,
524 571 &info->res.val.l);
525 572 break;
526 573
527 574 case 2:
528 575 sse_cmplesd(&info->op1.val.d, &info->op2.val.d,
529 576 &info->res.val.l);
530 577 break;
531 578
532 579 case 3:
533 580 sse_cmpunordsd(&info->op1.val.d,
534 581 &info->op2.val.d, &info->res.val.l);
535 582 }
583 +
536 584 if (inst->imm & 4)
537 585 info->res.val.l ^= 0xffffffffffffffffull;
586 +
538 587 break;
539 588
540 589 case minsd:
541 590 info->op = fex_other;
542 591 sse_minsd(&info->op1.val.d, &info->op2.val.d,
543 592 &info->res.val.d);
544 593 break;
545 594
546 595 case maxsd:
547 596 info->op = fex_other;
548 597 sse_maxsd(&info->op1.val.d, &info->op2.val.d,
549 598 &info->res.val.d);
550 599 break;
551 600
552 601 case addsd:
553 602 info->op = fex_add;
554 603 sse_addsd(&info->op1.val.d, &info->op2.val.d,
555 604 &info->res.val.d);
605 +
556 606 if (my_fp_class(&info->res.val.d) == fp_subnormal)
557 607 subnorm = 1;
608 +
558 609 break;
559 610
560 611 case subsd:
561 612 info->op = fex_sub;
562 613 sse_subsd(&info->op1.val.d, &info->op2.val.d,
563 614 &info->res.val.d);
615 +
564 616 if (my_fp_class(&info->res.val.d) == fp_subnormal)
565 617 subnorm = 1;
618 +
566 619 break;
567 620
568 621 case mulsd:
569 622 info->op = fex_mul;
570 623 sse_mulsd(&info->op1.val.d, &info->op2.val.d,
571 624 &info->res.val.d);
625 +
572 626 if (my_fp_class(&info->res.val.d) == fp_subnormal)
573 627 subnorm = 1;
628 +
574 629 break;
575 630
576 631 case divsd:
577 632 info->op = fex_div;
578 633 sse_divsd(&info->op1.val.d, &info->op2.val.d,
579 634 &info->res.val.d);
635 +
580 636 if (my_fp_class(&info->res.val.d) == fp_subnormal)
581 637 subnorm = 1;
638 +
582 639 break;
583 640
584 641 case sqrtsd:
585 642 info->op = fex_sqrt;
586 643 sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
587 644 break;
588 645
589 646 case cvtsd2ss:
590 647 info->op = fex_cnvt;
591 648 info->res.type = fex_float;
592 649 sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f);
650 +
593 651 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
594 652 subnorm = 1;
653 +
595 654 break;
596 655
597 656 case cvtsi2sd:
598 657 info->op = fex_cnvt;
599 658 sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
600 659 break;
601 660
602 661 case cvttsd2si:
603 662 info->op = fex_cnvt;
604 663 info->res.type = fex_int;
605 664 sse_cvttsd2si(&info->op1.val.d, &info->res.val.i);
606 665 break;
607 666
608 667 case cvtsd2si:
609 668 info->op = fex_cnvt;
610 669 info->res.type = fex_int;
611 670 sse_cvtsd2si(&info->op1.val.d, &info->res.val.i);
612 671 break;
613 672
614 673 #ifdef __amd64
615 674 case cvtsi2sdq:
616 675 info->op = fex_cnvt;
617 676 sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d);
618 677 break;
619 678
620 679 case cvttsd2siq:
621 680 info->op = fex_cnvt;
622 681 info->res.type = fex_llong;
623 682 sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l);
624 683 break;
625 684
626 685 case cvtsd2siq:
627 686 info->op = fex_cnvt;
628 687 info->res.type = fex_llong;
629 688 sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l);
630 689 break;
631 690 #endif
632 691
633 692 case ucomisd:
634 693 info->op = fex_cmp;
635 694 info->res.type = fex_nodata;
636 695 sse_ucomisd(&info->op1.val.d, &info->op2.val.d);
637 696 break;
638 697
639 698 case comisd:
640 699 info->op = fex_cmp;
641 700 info->res.type = fex_nodata;
642 701 sse_comisd(&info->op1.val.d, &info->op2.val.d);
643 702 break;
644 703 default:
645 704 break;
646 705 }
↓ open down ↓ |
42 lines elided |
↑ open up ↑ |
647 706 } else {
648 707 if (inst->op == cvtsi2ss) {
649 708 info->op1.type = fex_int;
650 709 info->op1.val.i = inst->op2->i[0];
651 710 info->op2.type = fex_nodata;
652 711 } else if (inst->op == cvtsi2ssq) {
653 712 info->op1.type = fex_llong;
654 713 info->op1.val.l = inst->op2->l[0];
655 714 info->op2.type = fex_nodata;
656 715 } else if (inst->op == sqrtss || inst->op == cvtss2sd ||
657 - inst->op == cvttss2si || inst->op == cvtss2si ||
658 - inst->op == cvttss2siq || inst->op == cvtss2siq) {
716 + inst->op == cvttss2si || inst->op == cvtss2si || inst->op ==
717 + cvttss2siq || inst->op == cvtss2siq) {
659 718 info->op1.type = fex_float;
660 719 info->op1.val.f = inst->op2->f[0];
661 720 info->op2.type = fex_nodata;
662 721 } else {
663 722 info->op1.type = fex_float;
664 723 info->op1.val.f = inst->op1->f[0];
665 724 info->op2.type = fex_float;
666 725 info->op2.val.f = inst->op2->f[0];
667 726 }
727 +
668 728 info->res.type = fex_float;
729 +
669 730 switch (inst->op) {
670 731 case cmpss:
671 732 info->op = fex_cmp;
672 733 info->res.type = fex_int;
734 +
673 735 switch (inst->imm & 3) {
674 736 case 0:
675 737 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f,
676 738 &info->res.val.i);
677 739 break;
678 740
679 741 case 1:
680 742 sse_cmpltss(&info->op1.val.f, &info->op2.val.f,
681 743 &info->res.val.i);
682 744 break;
683 745
684 746 case 2:
685 747 sse_cmpless(&info->op1.val.f, &info->op2.val.f,
686 748 &info->res.val.i);
687 749 break;
688 750
689 751 case 3:
690 752 sse_cmpunordss(&info->op1.val.f,
691 753 &info->op2.val.f, &info->res.val.i);
692 754 }
755 +
693 756 if (inst->imm & 4)
694 757 info->res.val.i ^= 0xffffffffu;
758 +
695 759 break;
696 760
697 761 case minss:
698 762 info->op = fex_other;
699 763 sse_minss(&info->op1.val.f, &info->op2.val.f,
700 764 &info->res.val.f);
701 765 break;
702 766
703 767 case maxss:
704 768 info->op = fex_other;
705 769 sse_maxss(&info->op1.val.f, &info->op2.val.f,
706 770 &info->res.val.f);
707 771 break;
708 772
709 773 case addss:
710 774 info->op = fex_add;
711 775 sse_addss(&info->op1.val.f, &info->op2.val.f,
712 776 &info->res.val.f);
777 +
713 778 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
714 779 subnorm = 1;
780 +
715 781 break;
716 782
717 783 case subss:
718 784 info->op = fex_sub;
719 785 sse_subss(&info->op1.val.f, &info->op2.val.f,
720 786 &info->res.val.f);
787 +
721 788 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
722 789 subnorm = 1;
790 +
723 791 break;
724 792
725 793 case mulss:
726 794 info->op = fex_mul;
727 795 sse_mulss(&info->op1.val.f, &info->op2.val.f,
728 796 &info->res.val.f);
797 +
729 798 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
730 799 subnorm = 1;
800 +
731 801 break;
732 802
733 803 case divss:
734 804 info->op = fex_div;
735 805 sse_divss(&info->op1.val.f, &info->op2.val.f,
736 806 &info->res.val.f);
807 +
737 808 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
738 809 subnorm = 1;
810 +
739 811 break;
740 812
741 813 case sqrtss:
742 814 info->op = fex_sqrt;
743 815 sse_sqrtss(&info->op1.val.f, &info->res.val.f);
744 816 break;
745 817
746 818 case cvtss2sd:
747 819 info->op = fex_cnvt;
748 820 info->res.type = fex_double;
749 821 sse_cvtss2sd(&info->op1.val.f, &info->res.val.d);
750 822 break;
751 823
752 824 case cvtsi2ss:
753 825 info->op = fex_cnvt;
754 826 sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f);
755 827 break;
756 828
757 829 case cvttss2si:
758 830 info->op = fex_cnvt;
759 831 info->res.type = fex_int;
760 832 sse_cvttss2si(&info->op1.val.f, &info->res.val.i);
761 833 break;
762 834
763 835 case cvtss2si:
764 836 info->op = fex_cnvt;
765 837 info->res.type = fex_int;
766 838 sse_cvtss2si(&info->op1.val.f, &info->res.val.i);
767 839 break;
768 840
769 841 #ifdef __amd64
770 842 case cvtsi2ssq:
771 843 info->op = fex_cnvt;
772 844 sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f);
773 845 break;
774 846
775 847 case cvttss2siq:
776 848 info->op = fex_cnvt;
777 849 info->res.type = fex_llong;
778 850 sse_cvttss2siq(&info->op1.val.f, &info->res.val.l);
779 851 break;
780 852
781 853 case cvtss2siq:
782 854 info->op = fex_cnvt;
783 855 info->res.type = fex_llong;
784 856 sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
785 857 break;
786 858 #endif
787 859
788 860 case ucomiss:
789 861 info->op = fex_cmp;
790 862 info->res.type = fex_nodata;
791 863 sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
792 864 break;
↓ open down ↓ |
44 lines elided |
↑ open up ↑ |
793 865
794 866 case comiss:
795 867 info->op = fex_cmp;
796 868 info->res.type = fex_nodata;
797 869 sse_comiss(&info->op1.val.f, &info->op2.val.f);
798 870 break;
799 871 default:
800 872 break;
801 873 }
802 874 }
875 +
803 876 __fenv_getmxcsr(&mxcsr);
804 877 info->flags = mxcsr & 0x3d;
805 878 __fenv_setmxcsr(&oldmxcsr);
806 879
807 880 /* determine which exception would have been trapped */
808 - te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr
809 - >> 7) & 0x3d;
881 + te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr >> 7) &
882 + 0x3d;
810 883 e = mxcsr & te;
884 +
811 885 if (e & FE_INVALID)
812 - return __fex_get_sse_invalid_type(inst);
886 + return (__fex_get_sse_invalid_type(inst));
887 +
813 888 if (e & FE_DIVBYZERO)
814 - return fex_division;
889 + return (fex_division);
890 +
815 891 if (e & FE_OVERFLOW)
816 - return fex_overflow;
892 + return (fex_overflow);
893 +
817 894 if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
818 - return fex_underflow;
895 + return (fex_underflow);
896 +
819 897 if (e & FE_INEXACT)
820 - return fex_inexact;
821 - return (enum fex_exception)-1;
898 + return (fex_inexact);
899 +
900 + return ((enum fex_exception)-1);
822 901 }
823 902
824 903 /*
825 904 * Emulate a SIMD SSE instruction to determine which exceptions occur
826 905 * in each part. For i = 0, 1, 2, and 3, set e[i] to indicate the
827 906 * trapped exception that would occur if the i-th part of the SIMD
828 907 * instruction were executed in isolation; set e[i] to -1 if no
829 908 * trapped exception would occur in this part. Also fill in info[i]
830 909 * with the corresponding operands, default untrapped result, and
831 910 * flags.
832 911 *
833 912 * This routine does not work if the instruction specified by *inst
834 913 * is not a SIMD instruction.
835 914 */
836 915 void
837 916 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
838 917 fex_info_t *info)
839 918 {
840 - sseinst_t dummy;
841 - int i;
919 + sseinst_t dummy;
920 + int i;
842 921
843 922 e[0] = e[1] = e[2] = e[3] = -1;
844 923
845 924 /* perform each part of the SIMD operation */
846 925 switch (inst->op) {
847 926 case cmpps:
848 927 dummy.op = cmpss;
849 928 dummy.imm = inst->imm;
929 +
850 930 for (i = 0; i < 4; i++) {
851 931 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
852 932 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
853 933 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
854 934 }
935 +
855 936 break;
856 937
857 938 case minps:
858 939 dummy.op = minss;
940 +
859 941 for (i = 0; i < 4; i++) {
860 942 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
861 943 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
862 944 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
863 945 }
946 +
864 947 break;
865 948
866 949 case maxps:
867 950 dummy.op = maxss;
951 +
868 952 for (i = 0; i < 4; i++) {
869 953 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
870 954 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
871 955 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
872 956 }
957 +
873 958 break;
874 959
875 960 case addps:
876 961 dummy.op = addss;
962 +
877 963 for (i = 0; i < 4; i++) {
878 964 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
879 965 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
880 966 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
881 967 }
968 +
882 969 break;
883 970
884 971 case subps:
885 972 dummy.op = subss;
973 +
886 974 for (i = 0; i < 4; i++) {
887 975 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
888 976 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
889 977 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
890 978 }
979 +
891 980 break;
892 981
893 982 case mulps:
894 983 dummy.op = mulss;
984 +
895 985 for (i = 0; i < 4; i++) {
896 986 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
897 987 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
898 988 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
899 989 }
990 +
900 991 break;
901 992
902 993 case divps:
903 994 dummy.op = divss;
995 +
904 996 for (i = 0; i < 4; i++) {
905 997 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
906 998 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
907 999 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
908 1000 }
1001 +
909 1002 break;
910 1003
911 1004 case sqrtps:
912 1005 dummy.op = sqrtss;
1006 +
913 1007 for (i = 0; i < 4; i++) {
914 1008 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
915 1009 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
916 1010 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
917 1011 }
1012 +
918 1013 break;
919 1014
920 1015 case cvtdq2ps:
921 1016 dummy.op = cvtsi2ss;
1017 +
922 1018 for (i = 0; i < 4; i++) {
923 1019 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
924 1020 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
925 1021 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
926 1022 }
1023 +
927 1024 break;
928 1025
929 1026 case cvttps2dq:
930 1027 dummy.op = cvttss2si;
1028 +
931 1029 for (i = 0; i < 4; i++) {
932 1030 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
933 1031 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
934 1032 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
935 1033 }
1034 +
936 1035 break;
937 1036
938 1037 case cvtps2dq:
939 1038 dummy.op = cvtss2si;
1039 +
940 1040 for (i = 0; i < 4; i++) {
941 1041 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
942 1042 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
943 1043 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
944 1044 }
1045 +
945 1046 break;
946 1047
947 1048 case cvtpi2ps:
948 1049 dummy.op = cvtsi2ss;
1050 +
949 1051 for (i = 0; i < 2; i++) {
950 1052 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
951 1053 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
952 1054 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
953 1055 }
1056 +
954 1057 break;
955 1058
956 1059 case cvttps2pi:
957 1060 dummy.op = cvttss2si;
1061 +
958 1062 for (i = 0; i < 2; i++) {
959 1063 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
960 1064 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
961 1065 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
962 1066 }
1067 +
963 1068 break;
964 1069
965 1070 case cvtps2pi:
966 1071 dummy.op = cvtss2si;
1072 +
967 1073 for (i = 0; i < 2; i++) {
968 1074 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
969 1075 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
970 1076 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
971 1077 }
1078 +
972 1079 break;
973 1080
974 1081 case cmppd:
975 1082 dummy.op = cmpsd;
976 1083 dummy.imm = inst->imm;
1084 +
977 1085 for (i = 0; i < 2; i++) {
978 1086 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
979 1087 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
980 1088 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
981 1089 }
1090 +
982 1091 break;
983 1092
984 1093 case minpd:
985 1094 dummy.op = minsd;
1095 +
986 1096 for (i = 0; i < 2; i++) {
987 1097 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
988 1098 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
989 1099 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
990 1100 }
1101 +
991 1102 break;
992 1103
993 1104 case maxpd:
994 1105 dummy.op = maxsd;
1106 +
995 1107 for (i = 0; i < 2; i++) {
996 1108 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
997 1109 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
998 1110 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
999 1111 }
1112 +
1000 1113 break;
1001 1114
1002 1115 case addpd:
1003 1116 dummy.op = addsd;
1117 +
1004 1118 for (i = 0; i < 2; i++) {
1005 1119 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1006 1120 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1007 1121 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1008 1122 }
1123 +
1009 1124 break;
1010 1125
1011 1126 case subpd:
1012 1127 dummy.op = subsd;
1128 +
1013 1129 for (i = 0; i < 2; i++) {
1014 1130 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1015 1131 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1016 1132 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1017 1133 }
1134 +
1018 1135 break;
1019 1136
1020 1137 case mulpd:
1021 1138 dummy.op = mulsd;
1139 +
1022 1140 for (i = 0; i < 2; i++) {
1023 1141 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1024 1142 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1025 1143 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1026 1144 }
1145 +
1027 1146 break;
1028 1147
1029 1148 case divpd:
1030 1149 dummy.op = divsd;
1150 +
1031 1151 for (i = 0; i < 2; i++) {
1032 1152 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1033 1153 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1034 1154 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1035 1155 }
1156 +
1036 1157 break;
1037 1158
1038 1159 case sqrtpd:
1039 1160 dummy.op = sqrtsd;
1161 +
1040 1162 for (i = 0; i < 2; i++) {
1041 1163 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1042 1164 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1043 1165 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1044 1166 }
1167 +
1045 1168 break;
1046 1169
1047 1170 case cvtpi2pd:
1048 1171 case cvtdq2pd:
1049 1172 dummy.op = cvtsi2sd;
1173 +
1050 1174 for (i = 0; i < 2; i++) {
1051 1175 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1052 1176 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1053 1177 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1054 1178 }
1179 +
1055 1180 break;
1056 1181
1057 1182 case cvttpd2pi:
1058 1183 case cvttpd2dq:
1059 1184 dummy.op = cvttsd2si;
1185 +
1060 1186 for (i = 0; i < 2; i++) {
1061 1187 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1062 1188 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1063 1189 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1064 1190 }
1191 +
1065 1192 break;
1066 1193
1067 1194 case cvtpd2pi:
1068 1195 case cvtpd2dq:
1069 1196 dummy.op = cvtsd2si;
1197 +
1070 1198 for (i = 0; i < 2; i++) {
1071 1199 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1072 1200 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1073 1201 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1074 1202 }
1203 +
1075 1204 break;
1076 1205
1077 1206 case cvtps2pd:
1078 1207 dummy.op = cvtss2sd;
1208 +
1079 1209 for (i = 0; i < 2; i++) {
1080 1210 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1081 1211 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1082 1212 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1083 1213 }
1214 +
1084 1215 break;
1085 1216
1086 1217 case cvtpd2ps:
1087 1218 dummy.op = cvtsd2ss;
1219 +
1088 1220 for (i = 0; i < 2; i++) {
1089 1221 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1090 1222 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1091 1223 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1092 1224 }
1225 +
1093 1226 default:
1094 1227 break;
1095 1228 }
1096 1229 }
1097 1230
1098 1231 /*
1099 1232 * Store the result value from *info in the destination of the scalar
1100 1233 * SSE instruction specified by *inst. If no result is given but the
1101 1234 * exception is underflow or overflow, supply the default trapped result.
1102 1235 *
1103 1236 * This routine does not work if the instruction specified by *inst
1104 1237 * is not a scalar instruction.
1105 1238 */
1106 1239 void
1107 1240 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1108 1241 fex_info_t *info)
1109 1242 {
1110 - int i = 0;
1111 - long long l = 0L;;
1112 - float f = 0.0, fscl;
1113 - double d = 0.0L, dscl;
1114 -
1115 - /* for compares that write eflags, just set the flags
1116 - to indicate "unordered" */
1117 - if (inst->op == ucomiss || inst->op == comiss ||
1118 - inst->op == ucomisd || inst->op == comisd) {
1243 + int i = 0;
1244 + long long l = 0L;
1245 + float f = 0.0, fscl;
1246 + double d = 0.0L, dscl;
1247 +
1248 + /*
1249 + * for compares that write eflags, just set the flags
1250 + * to indicate "unordered"
1251 + */
1252 + if (inst->op == ucomiss || inst->op == comiss || inst->op == ucomisd ||
1253 + inst->op == comisd) {
1119 1254 uap->uc_mcontext.gregs[REG_PS] |= 0x45;
1120 1255 return;
1121 1256 }
1122 1257
1123 - /* if info doesn't specify a result value, try to generate
1124 - the default trapped result */
1258 + /*
1259 + * if info doesn't specify a result value, try to generate
1260 + * the default trapped result
1261 + */
1125 1262 if (info->res.type == fex_nodata) {
1126 1263 /* set scale factors for exponent wrapping */
1127 1264 switch (e) {
1128 1265 case fex_overflow:
1129 - fscl = 1.262177448e-29f; /* 2^-96 */
1130 - dscl = 6.441148769597133308e-232; /* 2^-768 */
1266 + fscl = 1.262177448e-29f; /* 2^-96 */
1267 + dscl = 6.441148769597133308e-232; /* 2^-768 */
1131 1268 break;
1132 1269
1133 1270 case fex_underflow:
1134 - fscl = 7.922816251e+28f; /* 2^96 */
1135 - dscl = 1.552518092300708935e+231; /* 2^768 */
1271 + fscl = 7.922816251e+28f; /* 2^96 */
1272 + dscl = 1.552518092300708935e+231; /* 2^768 */
1136 1273 break;
1137 1274
1138 1275 default:
1139 1276 (void) __fex_get_sse_op(uap, inst, info);
1277 +
1140 1278 if (info->res.type == fex_nodata)
1141 1279 return;
1280 +
1142 1281 goto stuff;
1143 1282 }
1144 1283
1145 1284 /* generate the wrapped result */
1146 1285 if (inst->op == cvtsd2ss) {
1147 1286 info->op1.type = fex_double;
1148 1287 info->op1.val.d = inst->op2->d[0];
1149 1288 info->op2.type = fex_nodata;
1150 1289 info->res.type = fex_float;
1151 1290 info->res.val.f = (float)(fscl * (fscl *
1152 1291 info->op1.val.d));
1153 1292 } else if ((int)inst->op & DOUBLE) {
1154 1293 info->op1.type = fex_double;
1155 1294 info->op1.val.d = inst->op1->d[0];
1156 1295 info->op2.type = fex_double;
1157 1296 info->op2.val.d = inst->op2->d[0];
1158 1297 info->res.type = fex_double;
1298 +
1159 1299 switch (inst->op) {
1160 1300 case addsd:
1161 1301 info->res.val.d = dscl * (dscl *
1162 1302 info->op1.val.d + dscl * info->op2.val.d);
1163 1303 break;
1164 1304
1165 1305 case subsd:
1166 1306 info->res.val.d = dscl * (dscl *
1167 1307 info->op1.val.d - dscl * info->op2.val.d);
1168 1308 break;
1169 1309
1170 1310 case mulsd:
1171 1311 info->res.val.d = (dscl * info->op1.val.d) *
1172 1312 (dscl * info->op2.val.d);
1173 1313 break;
1174 1314
1175 1315 case divsd:
1176 1316 info->res.val.d = (dscl * info->op1.val.d) /
1177 1317 (info->op2.val.d / dscl);
1178 1318 break;
↓ open down ↓ |
10 lines elided |
↑ open up ↑ |
1179 1319
1180 1320 default:
1181 1321 return;
1182 1322 }
1183 1323 } else {
1184 1324 info->op1.type = fex_float;
1185 1325 info->op1.val.f = inst->op1->f[0];
1186 1326 info->op2.type = fex_float;
1187 1327 info->op2.val.f = inst->op2->f[0];
1188 1328 info->res.type = fex_float;
1329 +
1189 1330 switch (inst->op) {
1190 1331 case addss:
1191 1332 info->res.val.f = fscl * (fscl *
1192 1333 info->op1.val.f + fscl * info->op2.val.f);
1193 1334 break;
1194 1335
1195 1336 case subss:
1196 1337 info->res.val.f = fscl * (fscl *
1197 1338 info->op1.val.f - fscl * info->op2.val.f);
1198 1339 break;
1199 1340
1200 1341 case mulss:
1201 1342 info->res.val.f = (fscl * info->op1.val.f) *
1202 1343 (fscl * info->op2.val.f);
1203 1344 break;
1204 1345
1205 1346 case divss:
1206 1347 info->res.val.f = (fscl * info->op1.val.f) /
1207 1348 (info->op2.val.f / fscl);
↓ open down ↓ |
9 lines elided |
↑ open up ↑ |
1208 1349 break;
1209 1350
1210 1351 default:
1211 1352 return;
1212 1353 }
1213 1354 }
1214 1355 }
1215 1356
1216 1357 /* put the result in the destination */
1217 1358 stuff:
1218 - if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si
1219 - || inst->op == cvttsd2si || inst->op == cvtsd2si) {
1359 + if (inst->op == cmpss || inst->op == cvttss2si || inst->op ==
1360 + cvtss2si || inst->op == cvttsd2si || inst->op == cvtsd2si) {
1220 1361 switch (info->res.type) {
1221 1362 case fex_int:
1222 1363 i = info->res.val.i;
1223 1364 break;
1224 1365
1225 1366 case fex_llong:
1226 1367 i = info->res.val.l;
1227 1368 break;
1228 1369
1229 1370 case fex_float:
1230 1371 i = info->res.val.f;
1231 1372 break;
1232 1373
1233 1374 case fex_double:
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
1234 1375 i = info->res.val.d;
1235 1376 break;
1236 1377
1237 1378 case fex_ldouble:
1238 1379 i = info->res.val.q;
1239 1380 break;
1240 1381
1241 1382 default:
1242 1383 break;
1243 1384 }
1385 +
1244 1386 inst->op1->i[0] = i;
1245 - } else if (inst->op == cmpsd || inst->op == cvttss2siq ||
1246 - inst->op == cvtss2siq || inst->op == cvttsd2siq ||
1247 - inst->op == cvtsd2siq) {
1387 + } else if (inst->op == cmpsd || inst->op == cvttss2siq || inst->op ==
1388 + cvtss2siq || inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
1248 1389 switch (info->res.type) {
1249 1390 case fex_int:
1250 1391 l = info->res.val.i;
1251 1392 break;
1252 1393
1253 1394 case fex_llong:
1254 1395 l = info->res.val.l;
1255 1396 break;
1256 1397
1257 1398 case fex_float:
1258 1399 l = info->res.val.f;
1259 1400 break;
1260 1401
1261 1402 case fex_double:
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
1262 1403 l = info->res.val.d;
1263 1404 break;
1264 1405
1265 1406 case fex_ldouble:
1266 1407 l = info->res.val.q;
1267 1408 break;
1268 1409
1269 1410 default:
1270 1411 break;
1271 1412 }
1413 +
1272 1414 inst->op1->l[0] = l;
1273 1415 } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1274 1416 inst->op == cvtss2sd) {
1275 1417 switch (info->res.type) {
1276 1418 case fex_int:
1277 1419 d = info->res.val.i;
1278 1420 break;
1279 1421
1280 1422 case fex_llong:
1281 1423 d = info->res.val.l;
1282 1424 break;
1283 1425
1284 1426 case fex_float:
1285 1427 d = info->res.val.f;
1286 1428 break;
1287 1429
1288 1430 case fex_double:
↓ open down ↓ |
7 lines elided |
↑ open up ↑ |
1289 1431 d = info->res.val.d;
1290 1432 break;
1291 1433
1292 1434 case fex_ldouble:
1293 1435 d = info->res.val.q;
1294 1436 break;
1295 1437
1296 1438 default:
1297 1439 break;
1298 1440 }
1441 +
1299 1442 inst->op1->d[0] = d;
1300 1443 } else {
1301 1444 switch (info->res.type) {
1302 1445 case fex_int:
1303 1446 f = info->res.val.i;
1304 1447 break;
1305 1448
1306 1449 case fex_llong:
1307 1450 f = info->res.val.l;
1308 1451 break;
1309 1452
1310 1453 case fex_float:
1311 1454 f = info->res.val.f;
1312 1455 break;
1313 1456
1314 1457 case fex_double:
↓ open down ↓ |
6 lines elided |
↑ open up ↑ |
1315 1458 f = info->res.val.d;
1316 1459 break;
1317 1460
1318 1461 case fex_ldouble:
1319 1462 f = info->res.val.q;
1320 1463 break;
1321 1464
1322 1465 default:
1323 1466 break;
1324 1467 }
1468 +
1325 1469 inst->op1->f[0] = f;
1326 1470 }
1327 1471 }
1328 1472
1329 1473 /*
1330 1474 * Store the results from a SIMD instruction. For each i, store
1331 1475 * the result value from info[i] in the i-th part of the destination
1332 1476 * of the SIMD SSE instruction specified by *inst. If no result
1333 1477 * is given but the exception indicated by e[i] is underflow or
1334 1478 * overflow, supply the default trapped result.
1335 1479 *
1336 1480 * This routine does not work if the instruction specified by *inst
1337 1481 * is not a SIMD instruction.
1338 1482 */
1339 1483 void
1340 1484 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1341 1485 fex_info_t *info)
1342 1486 {
1343 - sseinst_t dummy;
1344 - int i;
1487 + sseinst_t dummy;
1488 + int i;
1345 1489
1346 1490 /* store each part */
1347 1491 switch (inst->op) {
1348 1492 case cmpps:
1349 1493 dummy.op = cmpss;
1350 1494 dummy.imm = inst->imm;
1495 +
1351 1496 for (i = 0; i < 4; i++) {
1352 1497 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1353 1498 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1354 1499 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1355 1500 }
1501 +
1356 1502 break;
1357 1503
1358 1504 case minps:
1359 1505 dummy.op = minss;
1506 +
1360 1507 for (i = 0; i < 4; i++) {
1361 1508 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1362 1509 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1363 1510 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1364 1511 }
1512 +
1365 1513 break;
1366 1514
1367 1515 case maxps:
1368 1516 dummy.op = maxss;
1517 +
1369 1518 for (i = 0; i < 4; i++) {
1370 1519 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1371 1520 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1372 1521 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1373 1522 }
1523 +
1374 1524 break;
1375 1525
1376 1526 case addps:
1377 1527 dummy.op = addss;
1528 +
1378 1529 for (i = 0; i < 4; i++) {
1379 1530 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1380 1531 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1381 1532 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1382 1533 }
1534 +
1383 1535 break;
1384 1536
1385 1537 case subps:
1386 1538 dummy.op = subss;
1539 +
1387 1540 for (i = 0; i < 4; i++) {
1388 1541 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1389 1542 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1390 1543 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1391 1544 }
1545 +
1392 1546 break;
1393 1547
1394 1548 case mulps:
1395 1549 dummy.op = mulss;
1550 +
1396 1551 for (i = 0; i < 4; i++) {
1397 1552 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1398 1553 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1399 1554 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1400 1555 }
1556 +
1401 1557 break;
1402 1558
1403 1559 case divps:
1404 1560 dummy.op = divss;
1561 +
1405 1562 for (i = 0; i < 4; i++) {
1406 1563 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1407 1564 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1408 1565 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1409 1566 }
1567 +
1410 1568 break;
1411 1569
1412 1570 case sqrtps:
1413 1571 dummy.op = sqrtss;
1572 +
1414 1573 for (i = 0; i < 4; i++) {
1415 1574 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1416 1575 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1417 1576 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1418 1577 }
1578 +
1419 1579 break;
1420 1580
1421 1581 case cvtdq2ps:
1422 1582 dummy.op = cvtsi2ss;
1583 +
1423 1584 for (i = 0; i < 4; i++) {
1424 1585 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1425 1586 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1426 1587 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1427 1588 }
1589 +
1428 1590 break;
1429 1591
1430 1592 case cvttps2dq:
1431 1593 dummy.op = cvttss2si;
1594 +
1432 1595 for (i = 0; i < 4; i++) {
1433 1596 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1434 1597 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1435 1598 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1436 1599 }
1600 +
1437 1601 break;
1438 1602
1439 1603 case cvtps2dq:
1440 1604 dummy.op = cvtss2si;
1605 +
1441 1606 for (i = 0; i < 4; i++) {
1442 1607 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1443 1608 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1444 1609 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1445 1610 }
1611 +
1446 1612 break;
1447 1613
1448 1614 case cvtpi2ps:
1449 1615 dummy.op = cvtsi2ss;
1616 +
1450 1617 for (i = 0; i < 2; i++) {
1451 1618 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1452 1619 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1453 1620 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1454 1621 }
1622 +
1455 1623 break;
1456 1624
1457 1625 case cvttps2pi:
1458 1626 dummy.op = cvttss2si;
1627 +
1459 1628 for (i = 0; i < 2; i++) {
1460 1629 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1461 1630 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1462 1631 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1463 1632 }
1633 +
1464 1634 break;
1465 1635
1466 1636 case cvtps2pi:
1467 1637 dummy.op = cvtss2si;
1638 +
1468 1639 for (i = 0; i < 2; i++) {
1469 1640 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1470 1641 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1471 1642 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1472 1643 }
1644 +
1473 1645 break;
1474 1646
1475 1647 case cmppd:
1476 1648 dummy.op = cmpsd;
1477 1649 dummy.imm = inst->imm;
1650 +
1478 1651 for (i = 0; i < 2; i++) {
1479 1652 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1480 1653 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1481 1654 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1482 1655 }
1656 +
1483 1657 break;
1484 1658
1485 1659 case minpd:
1486 1660 dummy.op = minsd;
1661 +
1487 1662 for (i = 0; i < 2; i++) {
1488 1663 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1489 1664 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1490 1665 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1491 1666 }
1667 +
1492 1668 break;
1493 1669
1494 1670 case maxpd:
1495 1671 dummy.op = maxsd;
1672 +
1496 1673 for (i = 0; i < 2; i++) {
1497 1674 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1498 1675 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1499 1676 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1500 1677 }
1678 +
1501 1679 break;
1502 1680
1503 1681 case addpd:
1504 1682 dummy.op = addsd;
1683 +
1505 1684 for (i = 0; i < 2; i++) {
1506 1685 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1507 1686 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1508 1687 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1509 1688 }
1689 +
1510 1690 break;
1511 1691
1512 1692 case subpd:
1513 1693 dummy.op = subsd;
1694 +
1514 1695 for (i = 0; i < 2; i++) {
1515 1696 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1516 1697 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1517 1698 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1518 1699 }
1700 +
1519 1701 break;
1520 1702
1521 1703 case mulpd:
1522 1704 dummy.op = mulsd;
1705 +
1523 1706 for (i = 0; i < 2; i++) {
1524 1707 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1525 1708 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1526 1709 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1527 1710 }
1711 +
1528 1712 break;
1529 1713
1530 1714 case divpd:
1531 1715 dummy.op = divsd;
1716 +
1532 1717 for (i = 0; i < 2; i++) {
1533 1718 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1534 1719 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1535 1720 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1536 1721 }
1722 +
1537 1723 break;
1538 1724
1539 1725 case sqrtpd:
1540 1726 dummy.op = sqrtsd;
1727 +
1541 1728 for (i = 0; i < 2; i++) {
1542 1729 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1543 1730 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1544 1731 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1545 1732 }
1733 +
1546 1734 break;
1547 1735
1548 1736 case cvtpi2pd:
1549 1737 case cvtdq2pd:
1550 1738 dummy.op = cvtsi2sd;
1739 +
1551 1740 for (i = 0; i < 2; i++) {
1552 1741 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1553 1742 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1554 1743 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1555 1744 }
1745 +
1556 1746 break;
1557 1747
1558 1748 case cvttpd2pi:
1559 1749 case cvttpd2dq:
1560 1750 dummy.op = cvttsd2si;
1751 +
1561 1752 for (i = 0; i < 2; i++) {
1562 1753 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1563 1754 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1564 1755 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1565 1756 }
1757 +
1566 1758 /* for cvttpd2dq, zero the high 64 bits of the destination */
1567 1759 if (inst->op == cvttpd2dq)
1568 1760 inst->op1->l[1] = 0ll;
1761 +
1569 1762 break;
1570 1763
1571 1764 case cvtpd2pi:
1572 1765 case cvtpd2dq:
1573 1766 dummy.op = cvtsd2si;
1767 +
1574 1768 for (i = 0; i < 2; i++) {
1575 1769 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1576 1770 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1577 1771 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1578 1772 }
1773 +
1579 1774 /* for cvtpd2dq, zero the high 64 bits of the destination */
1580 1775 if (inst->op == cvtpd2dq)
1581 1776 inst->op1->l[1] = 0ll;
1777 +
1582 1778 break;
1583 1779
1584 1780 case cvtps2pd:
1585 1781 dummy.op = cvtss2sd;
1782 +
1586 1783 for (i = 0; i < 2; i++) {
1587 1784 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1588 1785 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1589 1786 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1590 1787 }
1788 +
1591 1789 break;
1592 1790
1593 1791 case cvtpd2ps:
1594 1792 dummy.op = cvtsd2ss;
1793 +
1595 1794 for (i = 0; i < 2; i++) {
1596 1795 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1597 1796 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1598 1797 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1599 1798 }
1799 +
1600 1800 /* zero the high 64 bits of the destination */
1601 1801 inst->op1->l[1] = 0ll;
1602 1802
1603 1803 default:
1604 1804 break;
1605 1805 }
1606 1806 }
1607 -
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX