Print this page
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libm/common/m9x/__fex_sse.c
+++ new/usr/src/lib/libm/common/m9x/__fex_sse.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 27 * Use is subject to license terms.
28 28 */
29 29
30 30 #include "fenv_synonyms.h"
31 31 #include <ucontext.h>
32 32 #include <fenv.h>
33 33 #if defined(__SUNPRO_C)
34 34 #include <sunmath.h>
35 35 #else
36 36 #include <sys/ieeefp.h>
37 37 #endif
38 38 #include "fex_handler.h"
39 39 #include "fenv_inlines.h"
40 40
41 41 #if !defined(REG_PC)
42 42 #define REG_PC EIP
43 43 #endif
44 44
45 45 #if !defined(REG_PS)
46 46 #define REG_PS EFL
47 47 #endif
48 48
49 49 #ifdef __amd64
50 50 #define regno(X) ((X < 4)? REG_RAX - X : \
51 51 ((X > 4)? REG_RAX + 1 - X : REG_RSP))
52 52 #else
53 53 #define regno(X) (EAX - X)
54 54 #endif
55 55
56 56 /*
57 57 * Support for SSE instructions
58 58 */
59 59
60 60 /*
61 61 * Decode an SSE instruction. Fill in *inst and return the length of the
62 62 * instruction in bytes. Return 0 if the instruction is not recognized.
63 63 */
64 64 int
65 65 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst)
66 66 {
67 67 unsigned char *ip;
68 68 char *addr;
69 69 int i, dbl, simd, rex, modrm, sib, r;
70 70
71 71 i = 0;
72 72 ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC];
73 73
74 74 /* look for pseudo-prefixes */
75 75 dbl = 0;
76 76 simd = SIMD;
77 77 if (ip[i] == 0xF3) {
78 78 simd = 0;
79 79 i++;
80 80 } else if (ip[i] == 0x66) {
81 81 dbl = DOUBLE;
82 82 i++;
83 83 } else if (ip[i] == 0xF2) {
84 84 dbl = DOUBLE;
85 85 simd = 0;
86 86 i++;
87 87 }
88 88
89 89 /* look for AMD64 REX prefix */
90 90 rex = 0;
91 91 if (ip[i] >= 0x40 && ip[i] <= 0x4F) {
92 92 rex = ip[i];
93 93 i++;
94 94 }
95 95
96 96 /* parse opcode */
97 97 if (ip[i++] != 0x0F)
98 98 return 0;
99 99 switch (ip[i++]) {
100 100 case 0x2A:
101 101 inst->op = (int)cvtsi2ss + simd + dbl;
102 102 if (!simd)
103 103 inst->op = (int)inst->op + (rex & 8);
104 104 break;
105 105
106 106 case 0x2C:
107 107 inst->op = (int)cvttss2si + simd + dbl;
108 108 if (!simd)
109 109 inst->op = (int)inst->op + (rex & 8);
110 110 break;
111 111
112 112 case 0x2D:
113 113 inst->op = (int)cvtss2si + simd + dbl;
114 114 if (!simd)
115 115 inst->op = (int)inst->op + (rex & 8);
116 116 break;
117 117
118 118 case 0x2E:
119 119 /* oddball: scalar instruction in a SIMD opcode group */
120 120 if (!simd)
121 121 return 0;
122 122 inst->op = (int)ucomiss + dbl;
123 123 break;
124 124
125 125 case 0x2F:
126 126 /* oddball: scalar instruction in a SIMD opcode group */
127 127 if (!simd)
128 128 return 0;
129 129 inst->op = (int)comiss + dbl;
130 130 break;
131 131
132 132 case 0x51:
133 133 inst->op = (int)sqrtss + simd + dbl;
134 134 break;
135 135
136 136 case 0x58:
137 137 inst->op = (int)addss + simd + dbl;
138 138 break;
139 139
140 140 case 0x59:
141 141 inst->op = (int)mulss + simd + dbl;
142 142 break;
143 143
144 144 case 0x5A:
145 145 inst->op = (int)cvtss2sd + simd + dbl;
146 146 break;
147 147
148 148 case 0x5B:
149 149 if (dbl) {
150 150 if (simd)
151 151 inst->op = cvtps2dq;
152 152 else
153 153 return 0;
154 154 } else {
155 155 inst->op = (simd)? cvtdq2ps : cvttps2dq;
156 156 }
157 157 break;
158 158
159 159 case 0x5C:
160 160 inst->op = (int)subss + simd + dbl;
161 161 break;
162 162
163 163 case 0x5D:
164 164 inst->op = (int)minss + simd + dbl;
165 165 break;
166 166
167 167 case 0x5E:
168 168 inst->op = (int)divss + simd + dbl;
169 169 break;
170 170
171 171 case 0x5F:
172 172 inst->op = (int)maxss + simd + dbl;
173 173 break;
174 174
175 175 case 0xC2:
176 176 inst->op = (int)cmpss + simd + dbl;
177 177 break;
178 178
179 179 case 0xE6:
180 180 if (simd) {
181 181 if (dbl)
182 182 inst->op = cvttpd2dq;
183 183 else
184 184 return 0;
185 185 } else {
186 186 inst->op = (dbl)? cvtpd2dq : cvtdq2pd;
187 187 }
188 188 break;
189 189
190 190 default:
191 191 return 0;
192 192 }
193 193
194 194 /* locate operands */
195 195 modrm = ip[i++];
196 196
197 197 if (inst->op == cvtss2si || inst->op == cvttss2si ||
198 198 inst->op == cvtsd2si || inst->op == cvttsd2si ||
199 199 inst->op == cvtss2siq || inst->op == cvttss2siq ||
200 200 inst->op == cvtsd2siq || inst->op == cvttsd2siq) {
201 201 /* op1 is a gp register */
202 202 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
203 203 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
204 204 } else if (inst->op == cvtps2pi || inst->op == cvttps2pi ||
205 205 inst->op == cvtpd2pi || inst->op == cvttpd2pi) {
206 206 /* op1 is a mmx register */
207 207 #ifdef __amd64
208 208 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set.
209 209 fpchip_state.st[(modrm >> 3) & 7];
210 210 #else
211 211 inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) +
212 212 (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
213 213 fpchip_state.state[7]);
214 214 #endif
215 215 } else {
216 216 /* op1 is a xmm register */
217 217 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
218 218 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
219 219 fp_reg_set.fpchip_state.xmm[r];
220 220 }
221 221
222 222 if ((modrm >> 6) == 3) {
223 223 if (inst->op == cvtsi2ss || inst->op == cvtsi2sd ||
224 224 inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) {
225 225 /* op2 is a gp register */
226 226 r = ((rex & 1) << 3) | (modrm & 7);
227 227 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.
228 228 gregs[regno(r)];
229 229 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) {
230 230 /* op2 is a mmx register */
231 231 #ifdef __amd64
232 232 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
233 233 fp_reg_set.fpchip_state.st[modrm & 7];
234 234 #else
235 235 inst->op2 = (sseoperand_t *)(10 * (modrm & 7) +
↓ open down ↓ |
235 lines elided |
↑ open up ↑ |
236 236 (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
237 237 fpchip_state.state[7]);
238 238 #endif
239 239 } else {
240 240 /* op2 is a xmm register */
241 241 r = ((rex & 1) << 3) | (modrm & 7);
242 242 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
243 243 fp_reg_set.fpchip_state.xmm[r];
244 244 }
245 245 } else if ((modrm & 0xc7) == 0x05) {
246 -#if defined(__amd64)
246 +#ifdef __amd64
247 247 /* address of next instruction + offset */
248 248 r = i + 4;
249 249 if (inst->op == cmpss || inst->op == cmpps ||
250 250 inst->op == cmpsd || inst->op == cmppd)
251 251 r++;
252 252 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i));
253 253 #else
254 254 /* absolute address */
255 255 inst->op2 = (sseoperand_t *)(*(int *)(ip + i));
256 256 #endif
257 257 i += 4;
258 258 } else {
259 259 /* complex address */
260 260 if ((modrm & 7) == 4) {
261 261 /* parse sib byte */
262 262 sib = ip[i++];
263 263 if ((sib & 7) == 5 && (modrm >> 6) == 0) {
264 264 /* start with absolute address */
265 - addr = (char *)(uintptr_t)(ip + i);
265 + addr = (char *)(uintptr_t)(*(int *)(ip + i));
266 266 i += 4;
267 267 } else {
268 268 /* start with base */
269 269 r = ((rex & 1) << 3) | (sib & 7);
270 270 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
271 271 }
272 272 r = ((rex & 2) << 2) | ((sib >> 3) & 7);
273 273 if (r != 4) {
274 274 /* add scaled index */
275 275 addr += uap->uc_mcontext.gregs[regno(r)]
276 276 << (sib >> 6);
277 277 }
278 278 } else {
279 279 r = ((rex & 1) << 3) | (modrm & 7);
280 280 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
281 281 }
282 282
283 283 /* add displacement, if any */
284 284 if ((modrm >> 6) == 1) {
285 285 addr += (char)ip[i++];
286 286 } else if ((modrm >> 6) == 2) {
287 287 addr += *(int *)(ip + i);
288 288 i += 4;
289 289 }
290 290 inst->op2 = (sseoperand_t *)addr;
291 291 }
292 292
293 293 if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd ||
294 294 inst->op == cmppd) {
295 295 /* get the immediate operand */
296 296 inst->imm = ip[i++];
297 297 }
298 298
299 299 return i;
300 300 }
301 301
302 302 static enum fp_class_type
303 303 my_fp_classf(float *x)
304 304 {
305 305 int i = *(int *)x & ~0x80000000;
306 306
307 307 if (i < 0x7f800000) {
308 308 if (i < 0x00800000)
309 309 return ((i == 0)? fp_zero : fp_subnormal);
310 310 return fp_normal;
311 311 }
312 312 else if (i == 0x7f800000)
313 313 return fp_infinity;
314 314 else if (i & 0x400000)
315 315 return fp_quiet;
316 316 else
317 317 return fp_signaling;
318 318 }
319 319
320 320 static enum fp_class_type
321 321 my_fp_class(double *x)
322 322 {
323 323 int i = *(1+(int *)x) & ~0x80000000;
324 324
325 325 if (i < 0x7ff00000) {
326 326 if (i < 0x00100000)
327 327 return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal);
328 328 return fp_normal;
329 329 }
330 330 else if (i == 0x7ff00000 && *(int *)x == 0)
331 331 return fp_infinity;
332 332 else if (i & 0x80000)
333 333 return fp_quiet;
334 334 else
335 335 return fp_signaling;
336 336 }
337 337
338 338 /*
339 339 * Inspect a scalar SSE instruction that incurred an invalid operation
340 340 * exception to determine which type of exception it was.
341 341 */
342 342 static enum fex_exception
343 343 __fex_get_sse_invalid_type(sseinst_t *inst)
344 344 {
345 345 enum fp_class_type t1, t2;
346 346
347 347 /* check op2 for signaling nan */
348 348 t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) :
349 349 my_fp_classf(&inst->op2->f[0]);
350 350 if (t2 == fp_signaling)
351 351 return fex_inv_snan;
352 352
353 353 /* eliminate all single-operand instructions */
354 354 switch (inst->op) {
355 355 case cvtsd2ss:
356 356 case cvtss2sd:
357 357 /* hmm, this shouldn't have happened */
358 358 return (enum fex_exception) -1;
359 359
360 360 case sqrtss:
361 361 case sqrtsd:
362 362 return fex_inv_sqrt;
↓ open down ↓ |
87 lines elided |
↑ open up ↑ |
363 363
364 364 case cvtss2si:
365 365 case cvtsd2si:
366 366 case cvttss2si:
367 367 case cvttsd2si:
368 368 case cvtss2siq:
369 369 case cvtsd2siq:
370 370 case cvttss2siq:
371 371 case cvttsd2siq:
372 372 return fex_inv_int;
373 + default:
374 + break;
373 375 }
374 376
375 377 /* check op1 for signaling nan */
376 378 t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) :
377 379 my_fp_classf(&inst->op1->f[0]);
378 380 if (t1 == fp_signaling)
379 381 return fex_inv_snan;
380 382
381 383 /* check two-operand instructions for other cases */
382 384 switch (inst->op) {
383 385 case cmpss:
384 386 case cmpsd:
385 387 case minss:
386 388 case minsd:
387 389 case maxss:
388 390 case maxsd:
389 391 case comiss:
390 392 case comisd:
391 393 return fex_inv_cmp;
392 394
393 395 case addss:
394 396 case addsd:
395 397 case subss:
396 398 case subsd:
397 399 if (t1 == fp_infinity && t2 == fp_infinity)
398 400 return fex_inv_isi;
399 401 break;
400 402
401 403 case mulss:
402 404 case mulsd:
403 405 if ((t1 == fp_zero && t2 == fp_infinity) ||
↓ open down ↓ |
21 lines elided |
↑ open up ↑ |
404 406 (t2 == fp_zero && t1 == fp_infinity))
405 407 return fex_inv_zmi;
406 408 break;
407 409
408 410 case divss:
409 411 case divsd:
410 412 if (t1 == fp_zero && t2 == fp_zero)
411 413 return fex_inv_zdz;
412 414 if (t1 == fp_infinity && t2 == fp_infinity)
413 415 return fex_inv_idi;
416 + default:
417 + break;
414 418 }
415 419
416 420 return (enum fex_exception)-1;
417 421 }
418 422
419 423 /* inline templates */
420 424 extern void sse_cmpeqss(float *, float *, int *);
421 425 extern void sse_cmpltss(float *, float *, int *);
422 426 extern void sse_cmpless(float *, float *, int *);
423 427 extern void sse_cmpunordss(float *, float *, int *);
424 428 extern void sse_minss(float *, float *, float *);
425 429 extern void sse_maxss(float *, float *, float *);
426 430 extern void sse_addss(float *, float *, float *);
427 431 extern void sse_subss(float *, float *, float *);
428 432 extern void sse_mulss(float *, float *, float *);
429 433 extern void sse_divss(float *, float *, float *);
430 434 extern void sse_sqrtss(float *, float *);
431 435 extern void sse_ucomiss(float *, float *);
432 436 extern void sse_comiss(float *, float *);
433 437 extern void sse_cvtss2sd(float *, double *);
434 438 extern void sse_cvtsi2ss(int *, float *);
435 439 extern void sse_cvttss2si(float *, int *);
436 440 extern void sse_cvtss2si(float *, int *);
437 441 #ifdef __amd64
438 442 extern void sse_cvtsi2ssq(long long *, float *);
439 443 extern void sse_cvttss2siq(float *, long long *);
440 444 extern void sse_cvtss2siq(float *, long long *);
441 445 #endif
442 446 extern void sse_cmpeqsd(double *, double *, long long *);
443 447 extern void sse_cmpltsd(double *, double *, long long *);
444 448 extern void sse_cmplesd(double *, double *, long long *);
445 449 extern void sse_cmpunordsd(double *, double *, long long *);
446 450 extern void sse_minsd(double *, double *, double *);
447 451 extern void sse_maxsd(double *, double *, double *);
448 452 extern void sse_addsd(double *, double *, double *);
449 453 extern void sse_subsd(double *, double *, double *);
450 454 extern void sse_mulsd(double *, double *, double *);
451 455 extern void sse_divsd(double *, double *, double *);
452 456 extern void sse_sqrtsd(double *, double *);
453 457 extern void sse_ucomisd(double *, double *);
454 458 extern void sse_comisd(double *, double *);
455 459 extern void sse_cvtsd2ss(double *, float *);
456 460 extern void sse_cvtsi2sd(int *, double *);
457 461 extern void sse_cvttsd2si(double *, int *);
458 462 extern void sse_cvtsd2si(double *, int *);
459 463 #ifdef __amd64
460 464 extern void sse_cvtsi2sdq(long long *, double *);
461 465 extern void sse_cvttsd2siq(double *, long long *);
462 466 extern void sse_cvtsd2siq(double *, long long *);
463 467 #endif
464 468
465 469 /*
466 470 * Fill in *info with the operands, default untrapped result, and
467 471 * flags produced by a scalar SSE instruction, and return the type
468 472 * of trapped exception (if any). On entry, the mxcsr must have
469 473 * all exceptions masked and all flags clear. The same conditions
470 474 * will hold on exit.
471 475 *
472 476 * This routine does not work if the instruction specified by *inst
473 477 * is not a scalar instruction.
474 478 */
475 479 enum fex_exception
476 480 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info)
477 481 {
478 482 unsigned int e, te, mxcsr, oldmxcsr, subnorm;
479 483
480 484 /*
481 485 * Perform the operation with traps disabled and check the
482 486 * exception flags. If the underflow trap was enabled, also
483 487 * check for an exact subnormal result.
484 488 */
485 489 __fenv_getmxcsr(&oldmxcsr);
486 490 subnorm = 0;
487 491 if ((int)inst->op & DOUBLE) {
488 492 if (inst->op == cvtsi2sd) {
489 493 info->op1.type = fex_int;
490 494 info->op1.val.i = inst->op2->i[0];
491 495 info->op2.type = fex_nodata;
492 496 } else if (inst->op == cvtsi2sdq) {
493 497 info->op1.type = fex_llong;
494 498 info->op1.val.l = inst->op2->l[0];
495 499 info->op2.type = fex_nodata;
496 500 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss ||
497 501 inst->op == cvttsd2si || inst->op == cvtsd2si ||
498 502 inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
499 503 info->op1.type = fex_double;
500 504 info->op1.val.d = inst->op2->d[0];
501 505 info->op2.type = fex_nodata;
502 506 } else {
503 507 info->op1.type = fex_double;
504 508 info->op1.val.d = inst->op1->d[0];
505 509 info->op2.type = fex_double;
506 510 info->op2.val.d = inst->op2->d[0];
507 511 }
508 512 info->res.type = fex_double;
509 513 switch (inst->op) {
510 514 case cmpsd:
511 515 info->op = fex_cmp;
512 516 info->res.type = fex_llong;
513 517 switch (inst->imm & 3) {
514 518 case 0:
515 519 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d,
516 520 &info->res.val.l);
517 521 break;
518 522
519 523 case 1:
520 524 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d,
521 525 &info->res.val.l);
522 526 break;
523 527
524 528 case 2:
525 529 sse_cmplesd(&info->op1.val.d, &info->op2.val.d,
526 530 &info->res.val.l);
527 531 break;
528 532
529 533 case 3:
530 534 sse_cmpunordsd(&info->op1.val.d,
531 535 &info->op2.val.d, &info->res.val.l);
532 536 }
533 537 if (inst->imm & 4)
534 538 info->res.val.l ^= 0xffffffffffffffffull;
535 539 break;
536 540
537 541 case minsd:
538 542 info->op = fex_other;
539 543 sse_minsd(&info->op1.val.d, &info->op2.val.d,
540 544 &info->res.val.d);
541 545 break;
542 546
543 547 case maxsd:
544 548 info->op = fex_other;
545 549 sse_maxsd(&info->op1.val.d, &info->op2.val.d,
546 550 &info->res.val.d);
547 551 break;
548 552
549 553 case addsd:
550 554 info->op = fex_add;
551 555 sse_addsd(&info->op1.val.d, &info->op2.val.d,
552 556 &info->res.val.d);
553 557 if (my_fp_class(&info->res.val.d) == fp_subnormal)
554 558 subnorm = 1;
555 559 break;
556 560
557 561 case subsd:
558 562 info->op = fex_sub;
559 563 sse_subsd(&info->op1.val.d, &info->op2.val.d,
560 564 &info->res.val.d);
561 565 if (my_fp_class(&info->res.val.d) == fp_subnormal)
562 566 subnorm = 1;
563 567 break;
564 568
565 569 case mulsd:
566 570 info->op = fex_mul;
567 571 sse_mulsd(&info->op1.val.d, &info->op2.val.d,
568 572 &info->res.val.d);
569 573 if (my_fp_class(&info->res.val.d) == fp_subnormal)
570 574 subnorm = 1;
571 575 break;
572 576
573 577 case divsd:
574 578 info->op = fex_div;
575 579 sse_divsd(&info->op1.val.d, &info->op2.val.d,
576 580 &info->res.val.d);
577 581 if (my_fp_class(&info->res.val.d) == fp_subnormal)
578 582 subnorm = 1;
579 583 break;
580 584
581 585 case sqrtsd:
582 586 info->op = fex_sqrt;
583 587 sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
584 588 break;
585 589
586 590 case cvtsd2ss:
587 591 info->op = fex_cnvt;
588 592 info->res.type = fex_float;
589 593 sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f);
590 594 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
591 595 subnorm = 1;
592 596 break;
593 597
594 598 case cvtsi2sd:
595 599 info->op = fex_cnvt;
596 600 sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
597 601 break;
598 602
599 603 case cvttsd2si:
600 604 info->op = fex_cnvt;
601 605 info->res.type = fex_int;
602 606 sse_cvttsd2si(&info->op1.val.d, &info->res.val.i);
603 607 break;
604 608
605 609 case cvtsd2si:
606 610 info->op = fex_cnvt;
607 611 info->res.type = fex_int;
608 612 sse_cvtsd2si(&info->op1.val.d, &info->res.val.i);
609 613 break;
610 614
611 615 #ifdef __amd64
612 616 case cvtsi2sdq:
613 617 info->op = fex_cnvt;
614 618 sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d);
615 619 break;
616 620
617 621 case cvttsd2siq:
618 622 info->op = fex_cnvt;
619 623 info->res.type = fex_llong;
620 624 sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l);
621 625 break;
622 626
623 627 case cvtsd2siq:
624 628 info->op = fex_cnvt;
625 629 info->res.type = fex_llong;
626 630 sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l);
627 631 break;
628 632 #endif
629 633
630 634 case ucomisd:
↓ open down ↓ |
207 lines elided |
↑ open up ↑ |
631 635 info->op = fex_cmp;
632 636 info->res.type = fex_nodata;
633 637 sse_ucomisd(&info->op1.val.d, &info->op2.val.d);
634 638 break;
635 639
636 640 case comisd:
637 641 info->op = fex_cmp;
638 642 info->res.type = fex_nodata;
639 643 sse_comisd(&info->op1.val.d, &info->op2.val.d);
640 644 break;
645 + default:
646 + break;
641 647 }
642 648 } else {
643 649 if (inst->op == cvtsi2ss) {
644 650 info->op1.type = fex_int;
645 651 info->op1.val.i = inst->op2->i[0];
646 652 info->op2.type = fex_nodata;
647 653 } else if (inst->op == cvtsi2ssq) {
648 654 info->op1.type = fex_llong;
649 655 info->op1.val.l = inst->op2->l[0];
650 656 info->op2.type = fex_nodata;
651 657 } else if (inst->op == sqrtss || inst->op == cvtss2sd ||
652 658 inst->op == cvttss2si || inst->op == cvtss2si ||
653 659 inst->op == cvttss2siq || inst->op == cvtss2siq) {
654 660 info->op1.type = fex_float;
655 661 info->op1.val.f = inst->op2->f[0];
656 662 info->op2.type = fex_nodata;
657 663 } else {
658 664 info->op1.type = fex_float;
659 665 info->op1.val.f = inst->op1->f[0];
660 666 info->op2.type = fex_float;
661 667 info->op2.val.f = inst->op2->f[0];
662 668 }
663 669 info->res.type = fex_float;
664 670 switch (inst->op) {
665 671 case cmpss:
666 672 info->op = fex_cmp;
667 673 info->res.type = fex_int;
668 674 switch (inst->imm & 3) {
669 675 case 0:
670 676 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f,
671 677 &info->res.val.i);
672 678 break;
673 679
674 680 case 1:
675 681 sse_cmpltss(&info->op1.val.f, &info->op2.val.f,
676 682 &info->res.val.i);
677 683 break;
678 684
679 685 case 2:
680 686 sse_cmpless(&info->op1.val.f, &info->op2.val.f,
681 687 &info->res.val.i);
682 688 break;
683 689
684 690 case 3:
685 691 sse_cmpunordss(&info->op1.val.f,
686 692 &info->op2.val.f, &info->res.val.i);
687 693 }
688 694 if (inst->imm & 4)
689 695 info->res.val.i ^= 0xffffffffu;
690 696 break;
691 697
692 698 case minss:
693 699 info->op = fex_other;
694 700 sse_minss(&info->op1.val.f, &info->op2.val.f,
695 701 &info->res.val.f);
696 702 break;
697 703
698 704 case maxss:
699 705 info->op = fex_other;
700 706 sse_maxss(&info->op1.val.f, &info->op2.val.f,
701 707 &info->res.val.f);
702 708 break;
703 709
704 710 case addss:
705 711 info->op = fex_add;
706 712 sse_addss(&info->op1.val.f, &info->op2.val.f,
707 713 &info->res.val.f);
708 714 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
709 715 subnorm = 1;
710 716 break;
711 717
712 718 case subss:
713 719 info->op = fex_sub;
714 720 sse_subss(&info->op1.val.f, &info->op2.val.f,
715 721 &info->res.val.f);
716 722 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
717 723 subnorm = 1;
718 724 break;
719 725
720 726 case mulss:
721 727 info->op = fex_mul;
722 728 sse_mulss(&info->op1.val.f, &info->op2.val.f,
723 729 &info->res.val.f);
724 730 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
725 731 subnorm = 1;
726 732 break;
727 733
728 734 case divss:
729 735 info->op = fex_div;
730 736 sse_divss(&info->op1.val.f, &info->op2.val.f,
731 737 &info->res.val.f);
732 738 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
733 739 subnorm = 1;
734 740 break;
735 741
736 742 case sqrtss:
737 743 info->op = fex_sqrt;
738 744 sse_sqrtss(&info->op1.val.f, &info->res.val.f);
739 745 break;
740 746
741 747 case cvtss2sd:
742 748 info->op = fex_cnvt;
743 749 info->res.type = fex_double;
744 750 sse_cvtss2sd(&info->op1.val.f, &info->res.val.d);
745 751 break;
746 752
747 753 case cvtsi2ss:
748 754 info->op = fex_cnvt;
749 755 sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f);
750 756 break;
751 757
752 758 case cvttss2si:
753 759 info->op = fex_cnvt;
754 760 info->res.type = fex_int;
755 761 sse_cvttss2si(&info->op1.val.f, &info->res.val.i);
756 762 break;
757 763
758 764 case cvtss2si:
759 765 info->op = fex_cnvt;
760 766 info->res.type = fex_int;
761 767 sse_cvtss2si(&info->op1.val.f, &info->res.val.i);
762 768 break;
763 769
764 770 #ifdef __amd64
765 771 case cvtsi2ssq:
766 772 info->op = fex_cnvt;
767 773 sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f);
768 774 break;
769 775
770 776 case cvttss2siq:
771 777 info->op = fex_cnvt;
772 778 info->res.type = fex_llong;
773 779 sse_cvttss2siq(&info->op1.val.f, &info->res.val.l);
774 780 break;
775 781
776 782 case cvtss2siq:
777 783 info->op = fex_cnvt;
778 784 info->res.type = fex_llong;
779 785 sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
780 786 break;
781 787 #endif
782 788
783 789 case ucomiss:
↓ open down ↓ |
133 lines elided |
↑ open up ↑ |
784 790 info->op = fex_cmp;
785 791 info->res.type = fex_nodata;
786 792 sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
787 793 break;
788 794
789 795 case comiss:
790 796 info->op = fex_cmp;
791 797 info->res.type = fex_nodata;
792 798 sse_comiss(&info->op1.val.f, &info->op2.val.f);
793 799 break;
800 + default:
801 + break;
794 802 }
795 803 }
796 804 __fenv_getmxcsr(&mxcsr);
797 805 info->flags = mxcsr & 0x3d;
798 806 __fenv_setmxcsr(&oldmxcsr);
799 807
800 808 /* determine which exception would have been trapped */
801 809 te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr
802 810 >> 7) & 0x3d;
803 811 e = mxcsr & te;
804 812 if (e & FE_INVALID)
805 813 return __fex_get_sse_invalid_type(inst);
806 814 if (e & FE_DIVBYZERO)
807 815 return fex_division;
808 816 if (e & FE_OVERFLOW)
809 817 return fex_overflow;
810 818 if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
811 819 return fex_underflow;
812 820 if (e & FE_INEXACT)
813 821 return fex_inexact;
814 822 return (enum fex_exception)-1;
815 823 }
816 824
817 825 /*
818 826 * Emulate a SIMD SSE instruction to determine which exceptions occur
819 827 * in each part. For i = 0, 1, 2, and 3, set e[i] to indicate the
820 828 * trapped exception that would occur if the i-th part of the SIMD
821 829 * instruction were executed in isolation; set e[i] to -1 if no
822 830 * trapped exception would occur in this part. Also fill in info[i]
823 831 * with the corresponding operands, default untrapped result, and
824 832 * flags.
825 833 *
826 834 * This routine does not work if the instruction specified by *inst
827 835 * is not a SIMD instruction.
828 836 */
829 837 void
830 838 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
831 839 fex_info_t *info)
832 840 {
833 841 sseinst_t dummy;
834 842 int i;
835 843
836 844 e[0] = e[1] = e[2] = e[3] = -1;
837 845
838 846 /* perform each part of the SIMD operation */
839 847 switch (inst->op) {
840 848 case cmpps:
841 849 dummy.op = cmpss;
842 850 dummy.imm = inst->imm;
843 851 for (i = 0; i < 4; i++) {
844 852 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
845 853 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
846 854 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
847 855 }
848 856 break;
849 857
850 858 case minps:
851 859 dummy.op = minss;
852 860 for (i = 0; i < 4; i++) {
853 861 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
854 862 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
855 863 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
856 864 }
857 865 break;
858 866
859 867 case maxps:
860 868 dummy.op = maxss;
861 869 for (i = 0; i < 4; i++) {
862 870 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
863 871 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
864 872 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
865 873 }
866 874 break;
867 875
868 876 case addps:
869 877 dummy.op = addss;
870 878 for (i = 0; i < 4; i++) {
871 879 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
872 880 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
873 881 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
874 882 }
875 883 break;
876 884
877 885 case subps:
878 886 dummy.op = subss;
879 887 for (i = 0; i < 4; i++) {
880 888 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
881 889 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
882 890 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
883 891 }
884 892 break;
885 893
886 894 case mulps:
887 895 dummy.op = mulss;
888 896 for (i = 0; i < 4; i++) {
889 897 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
890 898 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
891 899 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
892 900 }
893 901 break;
894 902
895 903 case divps:
896 904 dummy.op = divss;
897 905 for (i = 0; i < 4; i++) {
898 906 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
899 907 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
900 908 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
901 909 }
902 910 break;
903 911
904 912 case sqrtps:
905 913 dummy.op = sqrtss;
906 914 for (i = 0; i < 4; i++) {
907 915 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
908 916 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
909 917 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
910 918 }
911 919 break;
912 920
913 921 case cvtdq2ps:
914 922 dummy.op = cvtsi2ss;
915 923 for (i = 0; i < 4; i++) {
916 924 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
917 925 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
918 926 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
919 927 }
920 928 break;
921 929
922 930 case cvttps2dq:
923 931 dummy.op = cvttss2si;
924 932 for (i = 0; i < 4; i++) {
925 933 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
926 934 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
927 935 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
928 936 }
929 937 break;
930 938
931 939 case cvtps2dq:
932 940 dummy.op = cvtss2si;
933 941 for (i = 0; i < 4; i++) {
934 942 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
935 943 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
936 944 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
937 945 }
938 946 break;
939 947
940 948 case cvtpi2ps:
941 949 dummy.op = cvtsi2ss;
942 950 for (i = 0; i < 2; i++) {
943 951 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
944 952 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
945 953 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
946 954 }
947 955 break;
948 956
949 957 case cvttps2pi:
950 958 dummy.op = cvttss2si;
951 959 for (i = 0; i < 2; i++) {
952 960 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
953 961 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
954 962 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
955 963 }
956 964 break;
957 965
958 966 case cvtps2pi:
959 967 dummy.op = cvtss2si;
960 968 for (i = 0; i < 2; i++) {
961 969 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
962 970 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
963 971 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
964 972 }
965 973 break;
966 974
967 975 case cmppd:
968 976 dummy.op = cmpsd;
969 977 dummy.imm = inst->imm;
970 978 for (i = 0; i < 2; i++) {
971 979 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
972 980 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
973 981 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
974 982 }
975 983 break;
976 984
977 985 case minpd:
978 986 dummy.op = minsd;
979 987 for (i = 0; i < 2; i++) {
980 988 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
981 989 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
982 990 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
983 991 }
984 992 break;
985 993
986 994 case maxpd:
987 995 dummy.op = maxsd;
988 996 for (i = 0; i < 2; i++) {
989 997 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
990 998 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
991 999 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
992 1000 }
993 1001 break;
994 1002
995 1003 case addpd:
996 1004 dummy.op = addsd;
997 1005 for (i = 0; i < 2; i++) {
998 1006 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
999 1007 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1000 1008 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1001 1009 }
1002 1010 break;
1003 1011
1004 1012 case subpd:
1005 1013 dummy.op = subsd;
1006 1014 for (i = 0; i < 2; i++) {
1007 1015 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1008 1016 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1009 1017 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1010 1018 }
1011 1019 break;
1012 1020
1013 1021 case mulpd:
1014 1022 dummy.op = mulsd;
1015 1023 for (i = 0; i < 2; i++) {
1016 1024 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1017 1025 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1018 1026 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1019 1027 }
1020 1028 break;
1021 1029
1022 1030 case divpd:
1023 1031 dummy.op = divsd;
1024 1032 for (i = 0; i < 2; i++) {
1025 1033 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1026 1034 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1027 1035 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1028 1036 }
1029 1037 break;
1030 1038
1031 1039 case sqrtpd:
1032 1040 dummy.op = sqrtsd;
1033 1041 for (i = 0; i < 2; i++) {
1034 1042 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1035 1043 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1036 1044 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1037 1045 }
1038 1046 break;
1039 1047
1040 1048 case cvtpi2pd:
1041 1049 case cvtdq2pd:
1042 1050 dummy.op = cvtsi2sd;
1043 1051 for (i = 0; i < 2; i++) {
1044 1052 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1045 1053 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1046 1054 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1047 1055 }
1048 1056 break;
1049 1057
1050 1058 case cvttpd2pi:
1051 1059 case cvttpd2dq:
1052 1060 dummy.op = cvttsd2si;
1053 1061 for (i = 0; i < 2; i++) {
1054 1062 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1055 1063 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1056 1064 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1057 1065 }
1058 1066 break;
1059 1067
1060 1068 case cvtpd2pi:
1061 1069 case cvtpd2dq:
1062 1070 dummy.op = cvtsd2si;
1063 1071 for (i = 0; i < 2; i++) {
1064 1072 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1065 1073 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1066 1074 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1067 1075 }
1068 1076 break;
1069 1077
1070 1078 case cvtps2pd:
1071 1079 dummy.op = cvtss2sd;
1072 1080 for (i = 0; i < 2; i++) {
1073 1081 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1074 1082 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1075 1083 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
↓ open down ↓ |
272 lines elided |
↑ open up ↑ |
1076 1084 }
1077 1085 break;
1078 1086
1079 1087 case cvtpd2ps:
1080 1088 dummy.op = cvtsd2ss;
1081 1089 for (i = 0; i < 2; i++) {
1082 1090 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1083 1091 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1084 1092 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1085 1093 }
1094 + default:
1095 + break;
1086 1096 }
1087 1097 }
1088 1098
1089 1099 /*
1090 1100 * Store the result value from *info in the destination of the scalar
1091 1101 * SSE instruction specified by *inst. If no result is given but the
1092 1102 * exception is underflow or overflow, supply the default trapped result.
1093 1103 *
1094 1104 * This routine does not work if the instruction specified by *inst
1095 1105 * is not a scalar instruction.
1096 1106 */
1097 1107 void
1098 1108 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1099 1109 fex_info_t *info)
1100 1110 {
1101 - int i;
1102 - long long l;
1103 - float f, fscl;
1104 - double d, dscl;
1111 + int i = 0;
1112 + long long l = 0L;;
1113 + float f = 0.0, fscl;
1114 + double d = 0.0L, dscl;
1105 1115
1106 1116 /* for compares that write eflags, just set the flags
1107 1117 to indicate "unordered" */
1108 1118 if (inst->op == ucomiss || inst->op == comiss ||
1109 1119 inst->op == ucomisd || inst->op == comisd) {
1110 1120 uap->uc_mcontext.gregs[REG_PS] |= 0x45;
1111 1121 return;
1112 1122 }
1113 1123
1114 1124 /* if info doesn't specify a result value, try to generate
1115 1125 the default trapped result */
1116 1126 if (info->res.type == fex_nodata) {
1117 1127 /* set scale factors for exponent wrapping */
1118 1128 switch (e) {
1119 1129 case fex_overflow:
1120 1130 fscl = 1.262177448e-29f; /* 2^-96 */
1121 1131 dscl = 6.441148769597133308e-232; /* 2^-768 */
1122 1132 break;
1123 1133
1124 1134 case fex_underflow:
1125 1135 fscl = 7.922816251e+28f; /* 2^96 */
1126 1136 dscl = 1.552518092300708935e+231; /* 2^768 */
1127 1137 break;
1128 1138
1129 1139 default:
1130 1140 (void) __fex_get_sse_op(uap, inst, info);
1131 1141 if (info->res.type == fex_nodata)
1132 1142 return;
1133 1143 goto stuff;
1134 1144 }
1135 1145
1136 1146 /* generate the wrapped result */
1137 1147 if (inst->op == cvtsd2ss) {
1138 1148 info->op1.type = fex_double;
1139 1149 info->op1.val.d = inst->op2->d[0];
1140 1150 info->op2.type = fex_nodata;
1141 1151 info->res.type = fex_float;
1142 1152 info->res.val.f = (float)(fscl * (fscl *
1143 1153 info->op1.val.d));
1144 1154 } else if ((int)inst->op & DOUBLE) {
1145 1155 info->op1.type = fex_double;
1146 1156 info->op1.val.d = inst->op1->d[0];
1147 1157 info->op2.type = fex_double;
1148 1158 info->op2.val.d = inst->op2->d[0];
1149 1159 info->res.type = fex_double;
1150 1160 switch (inst->op) {
1151 1161 case addsd:
1152 1162 info->res.val.d = dscl * (dscl *
1153 1163 info->op1.val.d + dscl * info->op2.val.d);
1154 1164 break;
1155 1165
1156 1166 case subsd:
1157 1167 info->res.val.d = dscl * (dscl *
1158 1168 info->op1.val.d - dscl * info->op2.val.d);
1159 1169 break;
1160 1170
1161 1171 case mulsd:
1162 1172 info->res.val.d = (dscl * info->op1.val.d) *
1163 1173 (dscl * info->op2.val.d);
1164 1174 break;
1165 1175
1166 1176 case divsd:
1167 1177 info->res.val.d = (dscl * info->op1.val.d) /
1168 1178 (info->op2.val.d / dscl);
1169 1179 break;
1170 1180
1171 1181 default:
1172 1182 return;
1173 1183 }
1174 1184 } else {
1175 1185 info->op1.type = fex_float;
1176 1186 info->op1.val.f = inst->op1->f[0];
1177 1187 info->op2.type = fex_float;
1178 1188 info->op2.val.f = inst->op2->f[0];
1179 1189 info->res.type = fex_float;
1180 1190 switch (inst->op) {
1181 1191 case addss:
1182 1192 info->res.val.f = fscl * (fscl *
1183 1193 info->op1.val.f + fscl * info->op2.val.f);
1184 1194 break;
1185 1195
1186 1196 case subss:
1187 1197 info->res.val.f = fscl * (fscl *
1188 1198 info->op1.val.f - fscl * info->op2.val.f);
1189 1199 break;
1190 1200
1191 1201 case mulss:
1192 1202 info->res.val.f = (fscl * info->op1.val.f) *
1193 1203 (fscl * info->op2.val.f);
1194 1204 break;
1195 1205
1196 1206 case divss:
1197 1207 info->res.val.f = (fscl * info->op1.val.f) /
1198 1208 (info->op2.val.f / fscl);
1199 1209 break;
1200 1210
1201 1211 default:
1202 1212 return;
1203 1213 }
1204 1214 }
1205 1215 }
1206 1216
1207 1217 /* put the result in the destination */
1208 1218 stuff:
1209 1219 if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si
1210 1220 || inst->op == cvttsd2si || inst->op == cvtsd2si) {
1211 1221 switch (info->res.type) {
1212 1222 case fex_int:
1213 1223 i = info->res.val.i;
1214 1224 break;
1215 1225
1216 1226 case fex_llong:
1217 1227 i = info->res.val.l;
1218 1228 break;
1219 1229
1220 1230 case fex_float:
↓ open down ↓ |
106 lines elided |
↑ open up ↑ |
1221 1231 i = info->res.val.f;
1222 1232 break;
1223 1233
1224 1234 case fex_double:
1225 1235 i = info->res.val.d;
1226 1236 break;
1227 1237
1228 1238 case fex_ldouble:
1229 1239 i = info->res.val.q;
1230 1240 break;
1241 +
1242 + default:
1243 + break;
1231 1244 }
1232 1245 inst->op1->i[0] = i;
1233 1246 } else if (inst->op == cmpsd || inst->op == cvttss2siq ||
1234 1247 inst->op == cvtss2siq || inst->op == cvttsd2siq ||
1235 1248 inst->op == cvtsd2siq) {
1236 1249 switch (info->res.type) {
1237 1250 case fex_int:
1238 1251 l = info->res.val.i;
1239 1252 break;
1240 1253
1241 1254 case fex_llong:
1242 1255 l = info->res.val.l;
1243 1256 break;
1244 1257
1245 1258 case fex_float:
↓ open down ↓ |
5 lines elided |
↑ open up ↑ |
1246 1259 l = info->res.val.f;
1247 1260 break;
1248 1261
1249 1262 case fex_double:
1250 1263 l = info->res.val.d;
1251 1264 break;
1252 1265
1253 1266 case fex_ldouble:
1254 1267 l = info->res.val.q;
1255 1268 break;
1269 +
1270 + default:
1271 + break;
1256 1272 }
1257 1273 inst->op1->l[0] = l;
1258 1274 } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1259 1275 inst->op == cvtss2sd) {
1260 1276 switch (info->res.type) {
1261 1277 case fex_int:
1262 1278 d = info->res.val.i;
1263 1279 break;
1264 1280
1265 1281 case fex_llong:
1266 1282 d = info->res.val.l;
1267 1283 break;
1268 1284
1269 1285 case fex_float:
↓ open down ↓ |
4 lines elided |
↑ open up ↑ |
1270 1286 d = info->res.val.f;
1271 1287 break;
1272 1288
1273 1289 case fex_double:
1274 1290 d = info->res.val.d;
1275 1291 break;
1276 1292
1277 1293 case fex_ldouble:
1278 1294 d = info->res.val.q;
1279 1295 break;
1296 +
1297 + default:
1298 + break;
1280 1299 }
1281 1300 inst->op1->d[0] = d;
1282 1301 } else {
1283 1302 switch (info->res.type) {
1284 1303 case fex_int:
1285 1304 f = info->res.val.i;
1286 1305 break;
1287 1306
1288 1307 case fex_llong:
1289 1308 f = info->res.val.l;
1290 1309 break;
1291 1310
1292 1311 case fex_float:
↓ open down ↓ |
3 lines elided |
↑ open up ↑ |
1293 1312 f = info->res.val.f;
1294 1313 break;
1295 1314
1296 1315 case fex_double:
1297 1316 f = info->res.val.d;
1298 1317 break;
1299 1318
1300 1319 case fex_ldouble:
1301 1320 f = info->res.val.q;
1302 1321 break;
1322 +
1323 + default:
1324 + break;
1303 1325 }
1304 1326 inst->op1->f[0] = f;
1305 1327 }
1306 1328 }
1307 1329
1308 1330 /*
1309 1331 * Store the results from a SIMD instruction. For each i, store
1310 1332 * the result value from info[i] in the i-th part of the destination
1311 1333 * of the SIMD SSE instruction specified by *inst. If no result
1312 1334 * is given but the exception indicated by e[i] is underflow or
1313 1335 * overflow, supply the default trapped result.
1314 1336 *
1315 1337 * This routine does not work if the instruction specified by *inst
1316 1338 * is not a SIMD instruction.
1317 1339 */
1318 1340 void
1319 1341 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1320 1342 fex_info_t *info)
1321 1343 {
1322 1344 sseinst_t dummy;
1323 1345 int i;
1324 1346
1325 1347 /* store each part */
1326 1348 switch (inst->op) {
1327 1349 case cmpps:
1328 1350 dummy.op = cmpss;
1329 1351 dummy.imm = inst->imm;
1330 1352 for (i = 0; i < 4; i++) {
1331 1353 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1332 1354 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1333 1355 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1334 1356 }
1335 1357 break;
1336 1358
1337 1359 case minps:
1338 1360 dummy.op = minss;
1339 1361 for (i = 0; i < 4; i++) {
1340 1362 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1341 1363 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1342 1364 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1343 1365 }
1344 1366 break;
1345 1367
1346 1368 case maxps:
1347 1369 dummy.op = maxss;
1348 1370 for (i = 0; i < 4; i++) {
1349 1371 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1350 1372 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1351 1373 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1352 1374 }
1353 1375 break;
1354 1376
1355 1377 case addps:
1356 1378 dummy.op = addss;
1357 1379 for (i = 0; i < 4; i++) {
1358 1380 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1359 1381 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1360 1382 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1361 1383 }
1362 1384 break;
1363 1385
1364 1386 case subps:
1365 1387 dummy.op = subss;
1366 1388 for (i = 0; i < 4; i++) {
1367 1389 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1368 1390 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1369 1391 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1370 1392 }
1371 1393 break;
1372 1394
1373 1395 case mulps:
1374 1396 dummy.op = mulss;
1375 1397 for (i = 0; i < 4; i++) {
1376 1398 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1377 1399 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1378 1400 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1379 1401 }
1380 1402 break;
1381 1403
1382 1404 case divps:
1383 1405 dummy.op = divss;
1384 1406 for (i = 0; i < 4; i++) {
1385 1407 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1386 1408 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1387 1409 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1388 1410 }
1389 1411 break;
1390 1412
1391 1413 case sqrtps:
1392 1414 dummy.op = sqrtss;
1393 1415 for (i = 0; i < 4; i++) {
1394 1416 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1395 1417 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1396 1418 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1397 1419 }
1398 1420 break;
1399 1421
1400 1422 case cvtdq2ps:
1401 1423 dummy.op = cvtsi2ss;
1402 1424 for (i = 0; i < 4; i++) {
1403 1425 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1404 1426 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1405 1427 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1406 1428 }
1407 1429 break;
1408 1430
1409 1431 case cvttps2dq:
1410 1432 dummy.op = cvttss2si;
1411 1433 for (i = 0; i < 4; i++) {
1412 1434 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1413 1435 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1414 1436 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1415 1437 }
1416 1438 break;
1417 1439
1418 1440 case cvtps2dq:
1419 1441 dummy.op = cvtss2si;
1420 1442 for (i = 0; i < 4; i++) {
1421 1443 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1422 1444 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1423 1445 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1424 1446 }
1425 1447 break;
1426 1448
1427 1449 case cvtpi2ps:
1428 1450 dummy.op = cvtsi2ss;
1429 1451 for (i = 0; i < 2; i++) {
1430 1452 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1431 1453 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1432 1454 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1433 1455 }
1434 1456 break;
1435 1457
1436 1458 case cvttps2pi:
1437 1459 dummy.op = cvttss2si;
1438 1460 for (i = 0; i < 2; i++) {
1439 1461 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1440 1462 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1441 1463 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1442 1464 }
1443 1465 break;
1444 1466
1445 1467 case cvtps2pi:
1446 1468 dummy.op = cvtss2si;
1447 1469 for (i = 0; i < 2; i++) {
1448 1470 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1449 1471 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1450 1472 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1451 1473 }
1452 1474 break;
1453 1475
1454 1476 case cmppd:
1455 1477 dummy.op = cmpsd;
1456 1478 dummy.imm = inst->imm;
1457 1479 for (i = 0; i < 2; i++) {
1458 1480 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1459 1481 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1460 1482 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1461 1483 }
1462 1484 break;
1463 1485
1464 1486 case minpd:
1465 1487 dummy.op = minsd;
1466 1488 for (i = 0; i < 2; i++) {
1467 1489 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1468 1490 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1469 1491 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1470 1492 }
1471 1493 break;
1472 1494
1473 1495 case maxpd:
1474 1496 dummy.op = maxsd;
1475 1497 for (i = 0; i < 2; i++) {
1476 1498 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1477 1499 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1478 1500 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1479 1501 }
1480 1502 break;
1481 1503
1482 1504 case addpd:
1483 1505 dummy.op = addsd;
1484 1506 for (i = 0; i < 2; i++) {
1485 1507 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1486 1508 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1487 1509 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1488 1510 }
1489 1511 break;
1490 1512
1491 1513 case subpd:
1492 1514 dummy.op = subsd;
1493 1515 for (i = 0; i < 2; i++) {
1494 1516 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1495 1517 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1496 1518 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1497 1519 }
1498 1520 break;
1499 1521
1500 1522 case mulpd:
1501 1523 dummy.op = mulsd;
1502 1524 for (i = 0; i < 2; i++) {
1503 1525 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1504 1526 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1505 1527 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1506 1528 }
1507 1529 break;
1508 1530
1509 1531 case divpd:
1510 1532 dummy.op = divsd;
1511 1533 for (i = 0; i < 2; i++) {
1512 1534 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1513 1535 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1514 1536 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1515 1537 }
1516 1538 break;
1517 1539
1518 1540 case sqrtpd:
1519 1541 dummy.op = sqrtsd;
1520 1542 for (i = 0; i < 2; i++) {
1521 1543 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1522 1544 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1523 1545 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1524 1546 }
1525 1547 break;
1526 1548
1527 1549 case cvtpi2pd:
1528 1550 case cvtdq2pd:
1529 1551 dummy.op = cvtsi2sd;
1530 1552 for (i = 0; i < 2; i++) {
1531 1553 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1532 1554 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1533 1555 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1534 1556 }
1535 1557 break;
1536 1558
1537 1559 case cvttpd2pi:
1538 1560 case cvttpd2dq:
1539 1561 dummy.op = cvttsd2si;
1540 1562 for (i = 0; i < 2; i++) {
1541 1563 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1542 1564 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1543 1565 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1544 1566 }
1545 1567 /* for cvttpd2dq, zero the high 64 bits of the destination */
1546 1568 if (inst->op == cvttpd2dq)
1547 1569 inst->op1->l[1] = 0ll;
1548 1570 break;
1549 1571
1550 1572 case cvtpd2pi:
1551 1573 case cvtpd2dq:
1552 1574 dummy.op = cvtsd2si;
1553 1575 for (i = 0; i < 2; i++) {
1554 1576 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1555 1577 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1556 1578 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1557 1579 }
1558 1580 /* for cvtpd2dq, zero the high 64 bits of the destination */
1559 1581 if (inst->op == cvtpd2dq)
1560 1582 inst->op1->l[1] = 0ll;
1561 1583 break;
1562 1584
1563 1585 case cvtps2pd:
1564 1586 dummy.op = cvtss2sd;
1565 1587 for (i = 0; i < 2; i++) {
1566 1588 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1567 1589 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1568 1590 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1569 1591 }
1570 1592 break;
↓ open down ↓ |
258 lines elided |
↑ open up ↑ |
1571 1593
1572 1594 case cvtpd2ps:
1573 1595 dummy.op = cvtsd2ss;
1574 1596 for (i = 0; i < 2; i++) {
1575 1597 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1576 1598 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1577 1599 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1578 1600 }
1579 1601 /* zero the high 64 bits of the destination */
1580 1602 inst->op1->l[1] = 0ll;
1603 +
1604 + default:
1605 + break;
1581 1606 }
1582 1607 }
1608 +
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX