Print this page
5261 libm should stop using synonyms.h
5298 fabs is 0-sized, confuses dis(1) and others
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Gordon Ross <gwr@nexenta.com>
Split |
Close |
Expand all |
Collapse all |
--- old/usr/src/lib/libm/common/m9x/__fex_sse.c
+++ new/usr/src/lib/libm/common/m9x/__fex_sse.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
↓ open down ↓ |
19 lines elided |
↑ open up ↑ |
20 20 */
21 21
22 22 /*
23 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24 24 */
25 25 /*
26 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27 27 * Use is subject to license terms.
28 28 */
29 29
30 -#include "fenv_synonyms.h"
31 30 #include <ucontext.h>
32 31 #include <fenv.h>
33 32 #if defined(__SUNPRO_C)
34 33 #include <sunmath.h>
35 34 #else
36 35 #include <sys/ieeefp.h>
37 36 #endif
38 37 #include "fex_handler.h"
39 38 #include "fenv_inlines.h"
40 39
41 40 #if !defined(REG_PC)
42 41 #define REG_PC EIP
43 42 #endif
44 43
45 44 #if !defined(REG_PS)
46 45 #define REG_PS EFL
47 46 #endif
48 47
49 48 #ifdef __amd64
50 49 #define regno(X) ((X < 4)? REG_RAX - X : \
51 50 ((X > 4)? REG_RAX + 1 - X : REG_RSP))
52 51 #else
53 52 #define regno(X) (EAX - X)
54 53 #endif
55 54
56 55 /*
57 56 * Support for SSE instructions
58 57 */
59 58
60 59 /*
61 60 * Decode an SSE instruction. Fill in *inst and return the length of the
62 61 * instruction in bytes. Return 0 if the instruction is not recognized.
63 62 */
64 63 int
65 64 __fex_parse_sse(ucontext_t *uap, sseinst_t *inst)
66 65 {
67 66 unsigned char *ip;
68 67 char *addr;
69 68 int i, dbl, simd, rex, modrm, sib, r;
70 69
71 70 i = 0;
72 71 ip = (unsigned char *)uap->uc_mcontext.gregs[REG_PC];
73 72
74 73 /* look for pseudo-prefixes */
75 74 dbl = 0;
76 75 simd = SIMD;
77 76 if (ip[i] == 0xF3) {
78 77 simd = 0;
79 78 i++;
80 79 } else if (ip[i] == 0x66) {
81 80 dbl = DOUBLE;
82 81 i++;
83 82 } else if (ip[i] == 0xF2) {
84 83 dbl = DOUBLE;
85 84 simd = 0;
86 85 i++;
87 86 }
88 87
89 88 /* look for AMD64 REX prefix */
90 89 rex = 0;
91 90 if (ip[i] >= 0x40 && ip[i] <= 0x4F) {
92 91 rex = ip[i];
93 92 i++;
94 93 }
95 94
96 95 /* parse opcode */
97 96 if (ip[i++] != 0x0F)
98 97 return 0;
99 98 switch (ip[i++]) {
100 99 case 0x2A:
101 100 inst->op = (int)cvtsi2ss + simd + dbl;
102 101 if (!simd)
103 102 inst->op = (int)inst->op + (rex & 8);
104 103 break;
105 104
106 105 case 0x2C:
107 106 inst->op = (int)cvttss2si + simd + dbl;
108 107 if (!simd)
109 108 inst->op = (int)inst->op + (rex & 8);
110 109 break;
111 110
112 111 case 0x2D:
113 112 inst->op = (int)cvtss2si + simd + dbl;
114 113 if (!simd)
115 114 inst->op = (int)inst->op + (rex & 8);
116 115 break;
117 116
118 117 case 0x2E:
119 118 /* oddball: scalar instruction in a SIMD opcode group */
120 119 if (!simd)
121 120 return 0;
122 121 inst->op = (int)ucomiss + dbl;
123 122 break;
124 123
125 124 case 0x2F:
126 125 /* oddball: scalar instruction in a SIMD opcode group */
127 126 if (!simd)
128 127 return 0;
129 128 inst->op = (int)comiss + dbl;
130 129 break;
131 130
132 131 case 0x51:
133 132 inst->op = (int)sqrtss + simd + dbl;
134 133 break;
135 134
136 135 case 0x58:
137 136 inst->op = (int)addss + simd + dbl;
138 137 break;
139 138
140 139 case 0x59:
141 140 inst->op = (int)mulss + simd + dbl;
142 141 break;
143 142
144 143 case 0x5A:
145 144 inst->op = (int)cvtss2sd + simd + dbl;
146 145 break;
147 146
148 147 case 0x5B:
149 148 if (dbl) {
150 149 if (simd)
151 150 inst->op = cvtps2dq;
152 151 else
153 152 return 0;
154 153 } else {
155 154 inst->op = (simd)? cvtdq2ps : cvttps2dq;
156 155 }
157 156 break;
158 157
159 158 case 0x5C:
160 159 inst->op = (int)subss + simd + dbl;
161 160 break;
162 161
163 162 case 0x5D:
164 163 inst->op = (int)minss + simd + dbl;
165 164 break;
166 165
167 166 case 0x5E:
168 167 inst->op = (int)divss + simd + dbl;
169 168 break;
170 169
171 170 case 0x5F:
172 171 inst->op = (int)maxss + simd + dbl;
173 172 break;
174 173
175 174 case 0xC2:
176 175 inst->op = (int)cmpss + simd + dbl;
177 176 break;
178 177
179 178 case 0xE6:
180 179 if (simd) {
181 180 if (dbl)
182 181 inst->op = cvttpd2dq;
183 182 else
184 183 return 0;
185 184 } else {
186 185 inst->op = (dbl)? cvtpd2dq : cvtdq2pd;
187 186 }
188 187 break;
189 188
190 189 default:
191 190 return 0;
192 191 }
193 192
194 193 /* locate operands */
195 194 modrm = ip[i++];
196 195
197 196 if (inst->op == cvtss2si || inst->op == cvttss2si ||
198 197 inst->op == cvtsd2si || inst->op == cvttsd2si ||
199 198 inst->op == cvtss2siq || inst->op == cvttss2siq ||
200 199 inst->op == cvtsd2siq || inst->op == cvttsd2siq) {
201 200 /* op1 is a gp register */
202 201 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
203 202 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.gregs[regno(r)];
204 203 } else if (inst->op == cvtps2pi || inst->op == cvttps2pi ||
205 204 inst->op == cvtpd2pi || inst->op == cvttpd2pi) {
206 205 /* op1 is a mmx register */
207 206 #ifdef __amd64
208 207 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.fp_reg_set.
209 208 fpchip_state.st[(modrm >> 3) & 7];
210 209 #else
211 210 inst->op1 = (sseoperand_t *)(10 * ((modrm >> 3) & 7) +
212 211 (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
213 212 fpchip_state.state[7]);
214 213 #endif
215 214 } else {
216 215 /* op1 is a xmm register */
217 216 r = ((rex & 4) << 1) | ((modrm >> 3) & 7);
218 217 inst->op1 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
219 218 fp_reg_set.fpchip_state.xmm[r];
220 219 }
221 220
222 221 if ((modrm >> 6) == 3) {
223 222 if (inst->op == cvtsi2ss || inst->op == cvtsi2sd ||
224 223 inst->op == cvtsi2ssq || inst->op == cvtsi2sdq) {
225 224 /* op2 is a gp register */
226 225 r = ((rex & 1) << 3) | (modrm & 7);
227 226 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.
228 227 gregs[regno(r)];
229 228 } else if (inst->op == cvtpi2ps || inst->op == cvtpi2pd) {
230 229 /* op2 is a mmx register */
231 230 #ifdef __amd64
232 231 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
233 232 fp_reg_set.fpchip_state.st[modrm & 7];
234 233 #else
235 234 inst->op2 = (sseoperand_t *)(10 * (modrm & 7) +
236 235 (char *)&uap->uc_mcontext.fpregs.fp_reg_set.
237 236 fpchip_state.state[7]);
238 237 #endif
239 238 } else {
240 239 /* op2 is a xmm register */
241 240 r = ((rex & 1) << 3) | (modrm & 7);
242 241 inst->op2 = (sseoperand_t *)&uap->uc_mcontext.fpregs.
243 242 fp_reg_set.fpchip_state.xmm[r];
244 243 }
245 244 } else if ((modrm & 0xc7) == 0x05) {
246 245 #ifdef __amd64
247 246 /* address of next instruction + offset */
248 247 r = i + 4;
249 248 if (inst->op == cmpss || inst->op == cmpps ||
250 249 inst->op == cmpsd || inst->op == cmppd)
251 250 r++;
252 251 inst->op2 = (sseoperand_t *)(ip + r + *(int *)(ip + i));
253 252 #else
254 253 /* absolute address */
255 254 inst->op2 = (sseoperand_t *)(*(int *)(ip + i));
256 255 #endif
257 256 i += 4;
258 257 } else {
259 258 /* complex address */
260 259 if ((modrm & 7) == 4) {
261 260 /* parse sib byte */
262 261 sib = ip[i++];
263 262 if ((sib & 7) == 5 && (modrm >> 6) == 0) {
264 263 /* start with absolute address */
265 264 addr = (char *)(uintptr_t)(*(int *)(ip + i));
266 265 i += 4;
267 266 } else {
268 267 /* start with base */
269 268 r = ((rex & 1) << 3) | (sib & 7);
270 269 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
271 270 }
272 271 r = ((rex & 2) << 2) | ((sib >> 3) & 7);
273 272 if (r != 4) {
274 273 /* add scaled index */
275 274 addr += uap->uc_mcontext.gregs[regno(r)]
276 275 << (sib >> 6);
277 276 }
278 277 } else {
279 278 r = ((rex & 1) << 3) | (modrm & 7);
280 279 addr = (char *)uap->uc_mcontext.gregs[regno(r)];
281 280 }
282 281
283 282 /* add displacement, if any */
284 283 if ((modrm >> 6) == 1) {
285 284 addr += (char)ip[i++];
286 285 } else if ((modrm >> 6) == 2) {
287 286 addr += *(int *)(ip + i);
288 287 i += 4;
289 288 }
290 289 inst->op2 = (sseoperand_t *)addr;
291 290 }
292 291
293 292 if (inst->op == cmpss || inst->op == cmpps || inst->op == cmpsd ||
294 293 inst->op == cmppd) {
295 294 /* get the immediate operand */
296 295 inst->imm = ip[i++];
297 296 }
298 297
299 298 return i;
300 299 }
301 300
302 301 static enum fp_class_type
303 302 my_fp_classf(float *x)
304 303 {
305 304 int i = *(int *)x & ~0x80000000;
306 305
307 306 if (i < 0x7f800000) {
308 307 if (i < 0x00800000)
309 308 return ((i == 0)? fp_zero : fp_subnormal);
310 309 return fp_normal;
311 310 }
312 311 else if (i == 0x7f800000)
313 312 return fp_infinity;
314 313 else if (i & 0x400000)
315 314 return fp_quiet;
316 315 else
317 316 return fp_signaling;
318 317 }
319 318
320 319 static enum fp_class_type
321 320 my_fp_class(double *x)
322 321 {
323 322 int i = *(1+(int *)x) & ~0x80000000;
324 323
325 324 if (i < 0x7ff00000) {
326 325 if (i < 0x00100000)
327 326 return (((i | *(int *)x) == 0)? fp_zero : fp_subnormal);
328 327 return fp_normal;
329 328 }
330 329 else if (i == 0x7ff00000 && *(int *)x == 0)
331 330 return fp_infinity;
332 331 else if (i & 0x80000)
333 332 return fp_quiet;
334 333 else
335 334 return fp_signaling;
336 335 }
337 336
338 337 /*
339 338 * Inspect a scalar SSE instruction that incurred an invalid operation
340 339 * exception to determine which type of exception it was.
341 340 */
342 341 static enum fex_exception
343 342 __fex_get_sse_invalid_type(sseinst_t *inst)
344 343 {
345 344 enum fp_class_type t1, t2;
346 345
347 346 /* check op2 for signaling nan */
348 347 t2 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op2->d[0]) :
349 348 my_fp_classf(&inst->op2->f[0]);
350 349 if (t2 == fp_signaling)
351 350 return fex_inv_snan;
352 351
353 352 /* eliminate all single-operand instructions */
354 353 switch (inst->op) {
355 354 case cvtsd2ss:
356 355 case cvtss2sd:
357 356 /* hmm, this shouldn't have happened */
358 357 return (enum fex_exception) -1;
359 358
360 359 case sqrtss:
361 360 case sqrtsd:
362 361 return fex_inv_sqrt;
363 362
364 363 case cvtss2si:
365 364 case cvtsd2si:
366 365 case cvttss2si:
367 366 case cvttsd2si:
368 367 case cvtss2siq:
369 368 case cvtsd2siq:
370 369 case cvttss2siq:
371 370 case cvttsd2siq:
372 371 return fex_inv_int;
373 372 default:
374 373 break;
375 374 }
376 375
377 376 /* check op1 for signaling nan */
378 377 t1 = ((int)inst->op & DOUBLE)? my_fp_class(&inst->op1->d[0]) :
379 378 my_fp_classf(&inst->op1->f[0]);
380 379 if (t1 == fp_signaling)
381 380 return fex_inv_snan;
382 381
383 382 /* check two-operand instructions for other cases */
384 383 switch (inst->op) {
385 384 case cmpss:
386 385 case cmpsd:
387 386 case minss:
388 387 case minsd:
389 388 case maxss:
390 389 case maxsd:
391 390 case comiss:
392 391 case comisd:
393 392 return fex_inv_cmp;
394 393
395 394 case addss:
396 395 case addsd:
397 396 case subss:
398 397 case subsd:
399 398 if (t1 == fp_infinity && t2 == fp_infinity)
400 399 return fex_inv_isi;
401 400 break;
402 401
403 402 case mulss:
404 403 case mulsd:
405 404 if ((t1 == fp_zero && t2 == fp_infinity) ||
406 405 (t2 == fp_zero && t1 == fp_infinity))
407 406 return fex_inv_zmi;
408 407 break;
409 408
410 409 case divss:
411 410 case divsd:
412 411 if (t1 == fp_zero && t2 == fp_zero)
413 412 return fex_inv_zdz;
414 413 if (t1 == fp_infinity && t2 == fp_infinity)
415 414 return fex_inv_idi;
416 415 default:
417 416 break;
418 417 }
419 418
420 419 return (enum fex_exception)-1;
421 420 }
422 421
423 422 /* inline templates */
424 423 extern void sse_cmpeqss(float *, float *, int *);
425 424 extern void sse_cmpltss(float *, float *, int *);
426 425 extern void sse_cmpless(float *, float *, int *);
427 426 extern void sse_cmpunordss(float *, float *, int *);
428 427 extern void sse_minss(float *, float *, float *);
429 428 extern void sse_maxss(float *, float *, float *);
430 429 extern void sse_addss(float *, float *, float *);
431 430 extern void sse_subss(float *, float *, float *);
432 431 extern void sse_mulss(float *, float *, float *);
433 432 extern void sse_divss(float *, float *, float *);
434 433 extern void sse_sqrtss(float *, float *);
435 434 extern void sse_ucomiss(float *, float *);
436 435 extern void sse_comiss(float *, float *);
437 436 extern void sse_cvtss2sd(float *, double *);
438 437 extern void sse_cvtsi2ss(int *, float *);
439 438 extern void sse_cvttss2si(float *, int *);
440 439 extern void sse_cvtss2si(float *, int *);
441 440 #ifdef __amd64
442 441 extern void sse_cvtsi2ssq(long long *, float *);
443 442 extern void sse_cvttss2siq(float *, long long *);
444 443 extern void sse_cvtss2siq(float *, long long *);
445 444 #endif
446 445 extern void sse_cmpeqsd(double *, double *, long long *);
447 446 extern void sse_cmpltsd(double *, double *, long long *);
448 447 extern void sse_cmplesd(double *, double *, long long *);
449 448 extern void sse_cmpunordsd(double *, double *, long long *);
450 449 extern void sse_minsd(double *, double *, double *);
451 450 extern void sse_maxsd(double *, double *, double *);
452 451 extern void sse_addsd(double *, double *, double *);
453 452 extern void sse_subsd(double *, double *, double *);
454 453 extern void sse_mulsd(double *, double *, double *);
455 454 extern void sse_divsd(double *, double *, double *);
456 455 extern void sse_sqrtsd(double *, double *);
457 456 extern void sse_ucomisd(double *, double *);
458 457 extern void sse_comisd(double *, double *);
459 458 extern void sse_cvtsd2ss(double *, float *);
460 459 extern void sse_cvtsi2sd(int *, double *);
461 460 extern void sse_cvttsd2si(double *, int *);
462 461 extern void sse_cvtsd2si(double *, int *);
463 462 #ifdef __amd64
464 463 extern void sse_cvtsi2sdq(long long *, double *);
465 464 extern void sse_cvttsd2siq(double *, long long *);
466 465 extern void sse_cvtsd2siq(double *, long long *);
467 466 #endif
468 467
469 468 /*
470 469 * Fill in *info with the operands, default untrapped result, and
471 470 * flags produced by a scalar SSE instruction, and return the type
472 471 * of trapped exception (if any). On entry, the mxcsr must have
473 472 * all exceptions masked and all flags clear. The same conditions
474 473 * will hold on exit.
475 474 *
476 475 * This routine does not work if the instruction specified by *inst
477 476 * is not a scalar instruction.
478 477 */
479 478 enum fex_exception
480 479 __fex_get_sse_op(ucontext_t *uap, sseinst_t *inst, fex_info_t *info)
481 480 {
482 481 unsigned int e, te, mxcsr, oldmxcsr, subnorm;
483 482
484 483 /*
485 484 * Perform the operation with traps disabled and check the
486 485 * exception flags. If the underflow trap was enabled, also
487 486 * check for an exact subnormal result.
488 487 */
489 488 __fenv_getmxcsr(&oldmxcsr);
490 489 subnorm = 0;
491 490 if ((int)inst->op & DOUBLE) {
492 491 if (inst->op == cvtsi2sd) {
493 492 info->op1.type = fex_int;
494 493 info->op1.val.i = inst->op2->i[0];
495 494 info->op2.type = fex_nodata;
496 495 } else if (inst->op == cvtsi2sdq) {
497 496 info->op1.type = fex_llong;
498 497 info->op1.val.l = inst->op2->l[0];
499 498 info->op2.type = fex_nodata;
500 499 } else if (inst->op == sqrtsd || inst->op == cvtsd2ss ||
501 500 inst->op == cvttsd2si || inst->op == cvtsd2si ||
502 501 inst->op == cvttsd2siq || inst->op == cvtsd2siq) {
503 502 info->op1.type = fex_double;
504 503 info->op1.val.d = inst->op2->d[0];
505 504 info->op2.type = fex_nodata;
506 505 } else {
507 506 info->op1.type = fex_double;
508 507 info->op1.val.d = inst->op1->d[0];
509 508 info->op2.type = fex_double;
510 509 info->op2.val.d = inst->op2->d[0];
511 510 }
512 511 info->res.type = fex_double;
513 512 switch (inst->op) {
514 513 case cmpsd:
515 514 info->op = fex_cmp;
516 515 info->res.type = fex_llong;
517 516 switch (inst->imm & 3) {
518 517 case 0:
519 518 sse_cmpeqsd(&info->op1.val.d, &info->op2.val.d,
520 519 &info->res.val.l);
521 520 break;
522 521
523 522 case 1:
524 523 sse_cmpltsd(&info->op1.val.d, &info->op2.val.d,
525 524 &info->res.val.l);
526 525 break;
527 526
528 527 case 2:
529 528 sse_cmplesd(&info->op1.val.d, &info->op2.val.d,
530 529 &info->res.val.l);
531 530 break;
532 531
533 532 case 3:
534 533 sse_cmpunordsd(&info->op1.val.d,
535 534 &info->op2.val.d, &info->res.val.l);
536 535 }
537 536 if (inst->imm & 4)
538 537 info->res.val.l ^= 0xffffffffffffffffull;
539 538 break;
540 539
541 540 case minsd:
542 541 info->op = fex_other;
543 542 sse_minsd(&info->op1.val.d, &info->op2.val.d,
544 543 &info->res.val.d);
545 544 break;
546 545
547 546 case maxsd:
548 547 info->op = fex_other;
549 548 sse_maxsd(&info->op1.val.d, &info->op2.val.d,
550 549 &info->res.val.d);
551 550 break;
552 551
553 552 case addsd:
554 553 info->op = fex_add;
555 554 sse_addsd(&info->op1.val.d, &info->op2.val.d,
556 555 &info->res.val.d);
557 556 if (my_fp_class(&info->res.val.d) == fp_subnormal)
558 557 subnorm = 1;
559 558 break;
560 559
561 560 case subsd:
562 561 info->op = fex_sub;
563 562 sse_subsd(&info->op1.val.d, &info->op2.val.d,
564 563 &info->res.val.d);
565 564 if (my_fp_class(&info->res.val.d) == fp_subnormal)
566 565 subnorm = 1;
567 566 break;
568 567
569 568 case mulsd:
570 569 info->op = fex_mul;
571 570 sse_mulsd(&info->op1.val.d, &info->op2.val.d,
572 571 &info->res.val.d);
573 572 if (my_fp_class(&info->res.val.d) == fp_subnormal)
574 573 subnorm = 1;
575 574 break;
576 575
577 576 case divsd:
578 577 info->op = fex_div;
579 578 sse_divsd(&info->op1.val.d, &info->op2.val.d,
580 579 &info->res.val.d);
581 580 if (my_fp_class(&info->res.val.d) == fp_subnormal)
582 581 subnorm = 1;
583 582 break;
584 583
585 584 case sqrtsd:
586 585 info->op = fex_sqrt;
587 586 sse_sqrtsd(&info->op1.val.d, &info->res.val.d);
588 587 break;
589 588
590 589 case cvtsd2ss:
591 590 info->op = fex_cnvt;
592 591 info->res.type = fex_float;
593 592 sse_cvtsd2ss(&info->op1.val.d, &info->res.val.f);
594 593 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
595 594 subnorm = 1;
596 595 break;
597 596
598 597 case cvtsi2sd:
599 598 info->op = fex_cnvt;
600 599 sse_cvtsi2sd(&info->op1.val.i, &info->res.val.d);
601 600 break;
602 601
603 602 case cvttsd2si:
604 603 info->op = fex_cnvt;
605 604 info->res.type = fex_int;
606 605 sse_cvttsd2si(&info->op1.val.d, &info->res.val.i);
607 606 break;
608 607
609 608 case cvtsd2si:
610 609 info->op = fex_cnvt;
611 610 info->res.type = fex_int;
612 611 sse_cvtsd2si(&info->op1.val.d, &info->res.val.i);
613 612 break;
614 613
615 614 #ifdef __amd64
616 615 case cvtsi2sdq:
617 616 info->op = fex_cnvt;
618 617 sse_cvtsi2sdq(&info->op1.val.l, &info->res.val.d);
619 618 break;
620 619
621 620 case cvttsd2siq:
622 621 info->op = fex_cnvt;
623 622 info->res.type = fex_llong;
624 623 sse_cvttsd2siq(&info->op1.val.d, &info->res.val.l);
625 624 break;
626 625
627 626 case cvtsd2siq:
628 627 info->op = fex_cnvt;
629 628 info->res.type = fex_llong;
630 629 sse_cvtsd2siq(&info->op1.val.d, &info->res.val.l);
631 630 break;
632 631 #endif
633 632
634 633 case ucomisd:
635 634 info->op = fex_cmp;
636 635 info->res.type = fex_nodata;
637 636 sse_ucomisd(&info->op1.val.d, &info->op2.val.d);
638 637 break;
639 638
640 639 case comisd:
641 640 info->op = fex_cmp;
642 641 info->res.type = fex_nodata;
643 642 sse_comisd(&info->op1.val.d, &info->op2.val.d);
644 643 break;
645 644 default:
646 645 break;
647 646 }
648 647 } else {
649 648 if (inst->op == cvtsi2ss) {
650 649 info->op1.type = fex_int;
651 650 info->op1.val.i = inst->op2->i[0];
652 651 info->op2.type = fex_nodata;
653 652 } else if (inst->op == cvtsi2ssq) {
654 653 info->op1.type = fex_llong;
655 654 info->op1.val.l = inst->op2->l[0];
656 655 info->op2.type = fex_nodata;
657 656 } else if (inst->op == sqrtss || inst->op == cvtss2sd ||
658 657 inst->op == cvttss2si || inst->op == cvtss2si ||
659 658 inst->op == cvttss2siq || inst->op == cvtss2siq) {
660 659 info->op1.type = fex_float;
661 660 info->op1.val.f = inst->op2->f[0];
662 661 info->op2.type = fex_nodata;
663 662 } else {
664 663 info->op1.type = fex_float;
665 664 info->op1.val.f = inst->op1->f[0];
666 665 info->op2.type = fex_float;
667 666 info->op2.val.f = inst->op2->f[0];
668 667 }
669 668 info->res.type = fex_float;
670 669 switch (inst->op) {
671 670 case cmpss:
672 671 info->op = fex_cmp;
673 672 info->res.type = fex_int;
674 673 switch (inst->imm & 3) {
675 674 case 0:
676 675 sse_cmpeqss(&info->op1.val.f, &info->op2.val.f,
677 676 &info->res.val.i);
678 677 break;
679 678
680 679 case 1:
681 680 sse_cmpltss(&info->op1.val.f, &info->op2.val.f,
682 681 &info->res.val.i);
683 682 break;
684 683
685 684 case 2:
686 685 sse_cmpless(&info->op1.val.f, &info->op2.val.f,
687 686 &info->res.val.i);
688 687 break;
689 688
690 689 case 3:
691 690 sse_cmpunordss(&info->op1.val.f,
692 691 &info->op2.val.f, &info->res.val.i);
693 692 }
694 693 if (inst->imm & 4)
695 694 info->res.val.i ^= 0xffffffffu;
696 695 break;
697 696
698 697 case minss:
699 698 info->op = fex_other;
700 699 sse_minss(&info->op1.val.f, &info->op2.val.f,
701 700 &info->res.val.f);
702 701 break;
703 702
704 703 case maxss:
705 704 info->op = fex_other;
706 705 sse_maxss(&info->op1.val.f, &info->op2.val.f,
707 706 &info->res.val.f);
708 707 break;
709 708
710 709 case addss:
711 710 info->op = fex_add;
712 711 sse_addss(&info->op1.val.f, &info->op2.val.f,
713 712 &info->res.val.f);
714 713 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
715 714 subnorm = 1;
716 715 break;
717 716
718 717 case subss:
719 718 info->op = fex_sub;
720 719 sse_subss(&info->op1.val.f, &info->op2.val.f,
721 720 &info->res.val.f);
722 721 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
723 722 subnorm = 1;
724 723 break;
725 724
726 725 case mulss:
727 726 info->op = fex_mul;
728 727 sse_mulss(&info->op1.val.f, &info->op2.val.f,
729 728 &info->res.val.f);
730 729 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
731 730 subnorm = 1;
732 731 break;
733 732
734 733 case divss:
735 734 info->op = fex_div;
736 735 sse_divss(&info->op1.val.f, &info->op2.val.f,
737 736 &info->res.val.f);
738 737 if (my_fp_classf(&info->res.val.f) == fp_subnormal)
739 738 subnorm = 1;
740 739 break;
741 740
742 741 case sqrtss:
743 742 info->op = fex_sqrt;
744 743 sse_sqrtss(&info->op1.val.f, &info->res.val.f);
745 744 break;
746 745
747 746 case cvtss2sd:
748 747 info->op = fex_cnvt;
749 748 info->res.type = fex_double;
750 749 sse_cvtss2sd(&info->op1.val.f, &info->res.val.d);
751 750 break;
752 751
753 752 case cvtsi2ss:
754 753 info->op = fex_cnvt;
755 754 sse_cvtsi2ss(&info->op1.val.i, &info->res.val.f);
756 755 break;
757 756
758 757 case cvttss2si:
759 758 info->op = fex_cnvt;
760 759 info->res.type = fex_int;
761 760 sse_cvttss2si(&info->op1.val.f, &info->res.val.i);
762 761 break;
763 762
764 763 case cvtss2si:
765 764 info->op = fex_cnvt;
766 765 info->res.type = fex_int;
767 766 sse_cvtss2si(&info->op1.val.f, &info->res.val.i);
768 767 break;
769 768
770 769 #ifdef __amd64
771 770 case cvtsi2ssq:
772 771 info->op = fex_cnvt;
773 772 sse_cvtsi2ssq(&info->op1.val.l, &info->res.val.f);
774 773 break;
775 774
776 775 case cvttss2siq:
777 776 info->op = fex_cnvt;
778 777 info->res.type = fex_llong;
779 778 sse_cvttss2siq(&info->op1.val.f, &info->res.val.l);
780 779 break;
781 780
782 781 case cvtss2siq:
783 782 info->op = fex_cnvt;
784 783 info->res.type = fex_llong;
785 784 sse_cvtss2siq(&info->op1.val.f, &info->res.val.l);
786 785 break;
787 786 #endif
788 787
789 788 case ucomiss:
790 789 info->op = fex_cmp;
791 790 info->res.type = fex_nodata;
792 791 sse_ucomiss(&info->op1.val.f, &info->op2.val.f);
793 792 break;
794 793
795 794 case comiss:
796 795 info->op = fex_cmp;
797 796 info->res.type = fex_nodata;
798 797 sse_comiss(&info->op1.val.f, &info->op2.val.f);
799 798 break;
800 799 default:
801 800 break;
802 801 }
803 802 }
804 803 __fenv_getmxcsr(&mxcsr);
805 804 info->flags = mxcsr & 0x3d;
806 805 __fenv_setmxcsr(&oldmxcsr);
807 806
808 807 /* determine which exception would have been trapped */
809 808 te = ~(uap->uc_mcontext.fpregs.fp_reg_set.fpchip_state.mxcsr
810 809 >> 7) & 0x3d;
811 810 e = mxcsr & te;
812 811 if (e & FE_INVALID)
813 812 return __fex_get_sse_invalid_type(inst);
814 813 if (e & FE_DIVBYZERO)
815 814 return fex_division;
816 815 if (e & FE_OVERFLOW)
817 816 return fex_overflow;
818 817 if ((e & FE_UNDERFLOW) || (subnorm && (te & FE_UNDERFLOW)))
819 818 return fex_underflow;
820 819 if (e & FE_INEXACT)
821 820 return fex_inexact;
822 821 return (enum fex_exception)-1;
823 822 }
824 823
825 824 /*
826 825 * Emulate a SIMD SSE instruction to determine which exceptions occur
827 826 * in each part. For i = 0, 1, 2, and 3, set e[i] to indicate the
828 827 * trapped exception that would occur if the i-th part of the SIMD
829 828 * instruction were executed in isolation; set e[i] to -1 if no
830 829 * trapped exception would occur in this part. Also fill in info[i]
831 830 * with the corresponding operands, default untrapped result, and
832 831 * flags.
833 832 *
834 833 * This routine does not work if the instruction specified by *inst
835 834 * is not a SIMD instruction.
836 835 */
837 836 void
838 837 __fex_get_simd_op(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
839 838 fex_info_t *info)
840 839 {
841 840 sseinst_t dummy;
842 841 int i;
843 842
844 843 e[0] = e[1] = e[2] = e[3] = -1;
845 844
846 845 /* perform each part of the SIMD operation */
847 846 switch (inst->op) {
848 847 case cmpps:
849 848 dummy.op = cmpss;
850 849 dummy.imm = inst->imm;
851 850 for (i = 0; i < 4; i++) {
852 851 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
853 852 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
854 853 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
855 854 }
856 855 break;
857 856
858 857 case minps:
859 858 dummy.op = minss;
860 859 for (i = 0; i < 4; i++) {
861 860 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
862 861 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
863 862 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
864 863 }
865 864 break;
866 865
867 866 case maxps:
868 867 dummy.op = maxss;
869 868 for (i = 0; i < 4; i++) {
870 869 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
871 870 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
872 871 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
873 872 }
874 873 break;
875 874
876 875 case addps:
877 876 dummy.op = addss;
878 877 for (i = 0; i < 4; i++) {
879 878 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
880 879 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
881 880 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
882 881 }
883 882 break;
884 883
885 884 case subps:
886 885 dummy.op = subss;
887 886 for (i = 0; i < 4; i++) {
888 887 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
889 888 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
890 889 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
891 890 }
892 891 break;
893 892
894 893 case mulps:
895 894 dummy.op = mulss;
896 895 for (i = 0; i < 4; i++) {
897 896 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
898 897 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
899 898 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
900 899 }
901 900 break;
902 901
903 902 case divps:
904 903 dummy.op = divss;
905 904 for (i = 0; i < 4; i++) {
906 905 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
907 906 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
908 907 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
909 908 }
910 909 break;
911 910
912 911 case sqrtps:
913 912 dummy.op = sqrtss;
914 913 for (i = 0; i < 4; i++) {
915 914 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
916 915 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
917 916 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
918 917 }
919 918 break;
920 919
921 920 case cvtdq2ps:
922 921 dummy.op = cvtsi2ss;
923 922 for (i = 0; i < 4; i++) {
924 923 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
925 924 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
926 925 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
927 926 }
928 927 break;
929 928
930 929 case cvttps2dq:
931 930 dummy.op = cvttss2si;
932 931 for (i = 0; i < 4; i++) {
933 932 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
934 933 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
935 934 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
936 935 }
937 936 break;
938 937
939 938 case cvtps2dq:
940 939 dummy.op = cvtss2si;
941 940 for (i = 0; i < 4; i++) {
942 941 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
943 942 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
944 943 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
945 944 }
946 945 break;
947 946
948 947 case cvtpi2ps:
949 948 dummy.op = cvtsi2ss;
950 949 for (i = 0; i < 2; i++) {
951 950 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
952 951 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
953 952 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
954 953 }
955 954 break;
956 955
957 956 case cvttps2pi:
958 957 dummy.op = cvttss2si;
959 958 for (i = 0; i < 2; i++) {
960 959 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
961 960 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
962 961 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
963 962 }
964 963 break;
965 964
966 965 case cvtps2pi:
967 966 dummy.op = cvtss2si;
968 967 for (i = 0; i < 2; i++) {
969 968 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
970 969 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
971 970 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
972 971 }
973 972 break;
974 973
975 974 case cmppd:
976 975 dummy.op = cmpsd;
977 976 dummy.imm = inst->imm;
978 977 for (i = 0; i < 2; i++) {
979 978 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
980 979 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
981 980 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
982 981 }
983 982 break;
984 983
985 984 case minpd:
986 985 dummy.op = minsd;
987 986 for (i = 0; i < 2; i++) {
988 987 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
989 988 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
990 989 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
991 990 }
992 991 break;
993 992
994 993 case maxpd:
995 994 dummy.op = maxsd;
996 995 for (i = 0; i < 2; i++) {
997 996 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
998 997 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
999 998 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1000 999 }
1001 1000 break;
1002 1001
1003 1002 case addpd:
1004 1003 dummy.op = addsd;
1005 1004 for (i = 0; i < 2; i++) {
1006 1005 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1007 1006 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1008 1007 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1009 1008 }
1010 1009 break;
1011 1010
1012 1011 case subpd:
1013 1012 dummy.op = subsd;
1014 1013 for (i = 0; i < 2; i++) {
1015 1014 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1016 1015 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1017 1016 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1018 1017 }
1019 1018 break;
1020 1019
1021 1020 case mulpd:
1022 1021 dummy.op = mulsd;
1023 1022 for (i = 0; i < 2; i++) {
1024 1023 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1025 1024 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1026 1025 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1027 1026 }
1028 1027 break;
1029 1028
1030 1029 case divpd:
1031 1030 dummy.op = divsd;
1032 1031 for (i = 0; i < 2; i++) {
1033 1032 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1034 1033 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1035 1034 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1036 1035 }
1037 1036 break;
1038 1037
1039 1038 case sqrtpd:
1040 1039 dummy.op = sqrtsd;
1041 1040 for (i = 0; i < 2; i++) {
1042 1041 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1043 1042 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1044 1043 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1045 1044 }
1046 1045 break;
1047 1046
1048 1047 case cvtpi2pd:
1049 1048 case cvtdq2pd:
1050 1049 dummy.op = cvtsi2sd;
1051 1050 for (i = 0; i < 2; i++) {
1052 1051 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1053 1052 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1054 1053 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1055 1054 }
1056 1055 break;
1057 1056
1058 1057 case cvttpd2pi:
1059 1058 case cvttpd2dq:
1060 1059 dummy.op = cvttsd2si;
1061 1060 for (i = 0; i < 2; i++) {
1062 1061 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1063 1062 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1064 1063 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1065 1064 }
1066 1065 break;
1067 1066
1068 1067 case cvtpd2pi:
1069 1068 case cvtpd2dq:
1070 1069 dummy.op = cvtsd2si;
1071 1070 for (i = 0; i < 2; i++) {
1072 1071 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1073 1072 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1074 1073 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1075 1074 }
1076 1075 break;
1077 1076
1078 1077 case cvtps2pd:
1079 1078 dummy.op = cvtss2sd;
1080 1079 for (i = 0; i < 2; i++) {
1081 1080 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1082 1081 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1083 1082 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1084 1083 }
1085 1084 break;
1086 1085
1087 1086 case cvtpd2ps:
1088 1087 dummy.op = cvtsd2ss;
1089 1088 for (i = 0; i < 2; i++) {
1090 1089 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1091 1090 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1092 1091 e[i] = __fex_get_sse_op(uap, &dummy, &info[i]);
1093 1092 }
1094 1093 default:
1095 1094 break;
1096 1095 }
1097 1096 }
1098 1097
1099 1098 /*
1100 1099 * Store the result value from *info in the destination of the scalar
1101 1100 * SSE instruction specified by *inst. If no result is given but the
1102 1101 * exception is underflow or overflow, supply the default trapped result.
1103 1102 *
1104 1103 * This routine does not work if the instruction specified by *inst
1105 1104 * is not a scalar instruction.
1106 1105 */
1107 1106 void
1108 1107 __fex_st_sse_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception e,
1109 1108 fex_info_t *info)
1110 1109 {
1111 1110 int i = 0;
1112 1111 long long l = 0L;;
1113 1112 float f = 0.0, fscl;
1114 1113 double d = 0.0L, dscl;
1115 1114
1116 1115 /* for compares that write eflags, just set the flags
1117 1116 to indicate "unordered" */
1118 1117 if (inst->op == ucomiss || inst->op == comiss ||
1119 1118 inst->op == ucomisd || inst->op == comisd) {
1120 1119 uap->uc_mcontext.gregs[REG_PS] |= 0x45;
1121 1120 return;
1122 1121 }
1123 1122
1124 1123 /* if info doesn't specify a result value, try to generate
1125 1124 the default trapped result */
1126 1125 if (info->res.type == fex_nodata) {
1127 1126 /* set scale factors for exponent wrapping */
1128 1127 switch (e) {
1129 1128 case fex_overflow:
1130 1129 fscl = 1.262177448e-29f; /* 2^-96 */
1131 1130 dscl = 6.441148769597133308e-232; /* 2^-768 */
1132 1131 break;
1133 1132
1134 1133 case fex_underflow:
1135 1134 fscl = 7.922816251e+28f; /* 2^96 */
1136 1135 dscl = 1.552518092300708935e+231; /* 2^768 */
1137 1136 break;
1138 1137
1139 1138 default:
1140 1139 (void) __fex_get_sse_op(uap, inst, info);
1141 1140 if (info->res.type == fex_nodata)
1142 1141 return;
1143 1142 goto stuff;
1144 1143 }
1145 1144
1146 1145 /* generate the wrapped result */
1147 1146 if (inst->op == cvtsd2ss) {
1148 1147 info->op1.type = fex_double;
1149 1148 info->op1.val.d = inst->op2->d[0];
1150 1149 info->op2.type = fex_nodata;
1151 1150 info->res.type = fex_float;
1152 1151 info->res.val.f = (float)(fscl * (fscl *
1153 1152 info->op1.val.d));
1154 1153 } else if ((int)inst->op & DOUBLE) {
1155 1154 info->op1.type = fex_double;
1156 1155 info->op1.val.d = inst->op1->d[0];
1157 1156 info->op2.type = fex_double;
1158 1157 info->op2.val.d = inst->op2->d[0];
1159 1158 info->res.type = fex_double;
1160 1159 switch (inst->op) {
1161 1160 case addsd:
1162 1161 info->res.val.d = dscl * (dscl *
1163 1162 info->op1.val.d + dscl * info->op2.val.d);
1164 1163 break;
1165 1164
1166 1165 case subsd:
1167 1166 info->res.val.d = dscl * (dscl *
1168 1167 info->op1.val.d - dscl * info->op2.val.d);
1169 1168 break;
1170 1169
1171 1170 case mulsd:
1172 1171 info->res.val.d = (dscl * info->op1.val.d) *
1173 1172 (dscl * info->op2.val.d);
1174 1173 break;
1175 1174
1176 1175 case divsd:
1177 1176 info->res.val.d = (dscl * info->op1.val.d) /
1178 1177 (info->op2.val.d / dscl);
1179 1178 break;
1180 1179
1181 1180 default:
1182 1181 return;
1183 1182 }
1184 1183 } else {
1185 1184 info->op1.type = fex_float;
1186 1185 info->op1.val.f = inst->op1->f[0];
1187 1186 info->op2.type = fex_float;
1188 1187 info->op2.val.f = inst->op2->f[0];
1189 1188 info->res.type = fex_float;
1190 1189 switch (inst->op) {
1191 1190 case addss:
1192 1191 info->res.val.f = fscl * (fscl *
1193 1192 info->op1.val.f + fscl * info->op2.val.f);
1194 1193 break;
1195 1194
1196 1195 case subss:
1197 1196 info->res.val.f = fscl * (fscl *
1198 1197 info->op1.val.f - fscl * info->op2.val.f);
1199 1198 break;
1200 1199
1201 1200 case mulss:
1202 1201 info->res.val.f = (fscl * info->op1.val.f) *
1203 1202 (fscl * info->op2.val.f);
1204 1203 break;
1205 1204
1206 1205 case divss:
1207 1206 info->res.val.f = (fscl * info->op1.val.f) /
1208 1207 (info->op2.val.f / fscl);
1209 1208 break;
1210 1209
1211 1210 default:
1212 1211 return;
1213 1212 }
1214 1213 }
1215 1214 }
1216 1215
1217 1216 /* put the result in the destination */
1218 1217 stuff:
1219 1218 if (inst->op == cmpss || inst->op == cvttss2si || inst->op == cvtss2si
1220 1219 || inst->op == cvttsd2si || inst->op == cvtsd2si) {
1221 1220 switch (info->res.type) {
1222 1221 case fex_int:
1223 1222 i = info->res.val.i;
1224 1223 break;
1225 1224
1226 1225 case fex_llong:
1227 1226 i = info->res.val.l;
1228 1227 break;
1229 1228
1230 1229 case fex_float:
1231 1230 i = info->res.val.f;
1232 1231 break;
1233 1232
1234 1233 case fex_double:
1235 1234 i = info->res.val.d;
1236 1235 break;
1237 1236
1238 1237 case fex_ldouble:
1239 1238 i = info->res.val.q;
1240 1239 break;
1241 1240
1242 1241 default:
1243 1242 break;
1244 1243 }
1245 1244 inst->op1->i[0] = i;
1246 1245 } else if (inst->op == cmpsd || inst->op == cvttss2siq ||
1247 1246 inst->op == cvtss2siq || inst->op == cvttsd2siq ||
1248 1247 inst->op == cvtsd2siq) {
1249 1248 switch (info->res.type) {
1250 1249 case fex_int:
1251 1250 l = info->res.val.i;
1252 1251 break;
1253 1252
1254 1253 case fex_llong:
1255 1254 l = info->res.val.l;
1256 1255 break;
1257 1256
1258 1257 case fex_float:
1259 1258 l = info->res.val.f;
1260 1259 break;
1261 1260
1262 1261 case fex_double:
1263 1262 l = info->res.val.d;
1264 1263 break;
1265 1264
1266 1265 case fex_ldouble:
1267 1266 l = info->res.val.q;
1268 1267 break;
1269 1268
1270 1269 default:
1271 1270 break;
1272 1271 }
1273 1272 inst->op1->l[0] = l;
1274 1273 } else if ((((int)inst->op & DOUBLE) && inst->op != cvtsd2ss) ||
1275 1274 inst->op == cvtss2sd) {
1276 1275 switch (info->res.type) {
1277 1276 case fex_int:
1278 1277 d = info->res.val.i;
1279 1278 break;
1280 1279
1281 1280 case fex_llong:
1282 1281 d = info->res.val.l;
1283 1282 break;
1284 1283
1285 1284 case fex_float:
1286 1285 d = info->res.val.f;
1287 1286 break;
1288 1287
1289 1288 case fex_double:
1290 1289 d = info->res.val.d;
1291 1290 break;
1292 1291
1293 1292 case fex_ldouble:
1294 1293 d = info->res.val.q;
1295 1294 break;
1296 1295
1297 1296 default:
1298 1297 break;
1299 1298 }
1300 1299 inst->op1->d[0] = d;
1301 1300 } else {
1302 1301 switch (info->res.type) {
1303 1302 case fex_int:
1304 1303 f = info->res.val.i;
1305 1304 break;
1306 1305
1307 1306 case fex_llong:
1308 1307 f = info->res.val.l;
1309 1308 break;
1310 1309
1311 1310 case fex_float:
1312 1311 f = info->res.val.f;
1313 1312 break;
1314 1313
1315 1314 case fex_double:
1316 1315 f = info->res.val.d;
1317 1316 break;
1318 1317
1319 1318 case fex_ldouble:
1320 1319 f = info->res.val.q;
1321 1320 break;
1322 1321
1323 1322 default:
1324 1323 break;
1325 1324 }
1326 1325 inst->op1->f[0] = f;
1327 1326 }
1328 1327 }
1329 1328
1330 1329 /*
1331 1330 * Store the results from a SIMD instruction. For each i, store
1332 1331 * the result value from info[i] in the i-th part of the destination
1333 1332 * of the SIMD SSE instruction specified by *inst. If no result
1334 1333 * is given but the exception indicated by e[i] is underflow or
1335 1334 * overflow, supply the default trapped result.
1336 1335 *
1337 1336 * This routine does not work if the instruction specified by *inst
1338 1337 * is not a SIMD instruction.
1339 1338 */
1340 1339 void
1341 1340 __fex_st_simd_result(ucontext_t *uap, sseinst_t *inst, enum fex_exception *e,
1342 1341 fex_info_t *info)
1343 1342 {
1344 1343 sseinst_t dummy;
1345 1344 int i;
1346 1345
1347 1346 /* store each part */
1348 1347 switch (inst->op) {
1349 1348 case cmpps:
1350 1349 dummy.op = cmpss;
1351 1350 dummy.imm = inst->imm;
1352 1351 for (i = 0; i < 4; i++) {
1353 1352 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1354 1353 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1355 1354 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1356 1355 }
1357 1356 break;
1358 1357
1359 1358 case minps:
1360 1359 dummy.op = minss;
1361 1360 for (i = 0; i < 4; i++) {
1362 1361 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1363 1362 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1364 1363 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1365 1364 }
1366 1365 break;
1367 1366
1368 1367 case maxps:
1369 1368 dummy.op = maxss;
1370 1369 for (i = 0; i < 4; i++) {
1371 1370 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1372 1371 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1373 1372 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1374 1373 }
1375 1374 break;
1376 1375
1377 1376 case addps:
1378 1377 dummy.op = addss;
1379 1378 for (i = 0; i < 4; i++) {
1380 1379 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1381 1380 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1382 1381 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1383 1382 }
1384 1383 break;
1385 1384
1386 1385 case subps:
1387 1386 dummy.op = subss;
1388 1387 for (i = 0; i < 4; i++) {
1389 1388 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1390 1389 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1391 1390 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1392 1391 }
1393 1392 break;
1394 1393
1395 1394 case mulps:
1396 1395 dummy.op = mulss;
1397 1396 for (i = 0; i < 4; i++) {
1398 1397 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1399 1398 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1400 1399 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1401 1400 }
1402 1401 break;
1403 1402
1404 1403 case divps:
1405 1404 dummy.op = divss;
1406 1405 for (i = 0; i < 4; i++) {
1407 1406 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1408 1407 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1409 1408 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1410 1409 }
1411 1410 break;
1412 1411
1413 1412 case sqrtps:
1414 1413 dummy.op = sqrtss;
1415 1414 for (i = 0; i < 4; i++) {
1416 1415 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1417 1416 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1418 1417 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1419 1418 }
1420 1419 break;
1421 1420
1422 1421 case cvtdq2ps:
1423 1422 dummy.op = cvtsi2ss;
1424 1423 for (i = 0; i < 4; i++) {
1425 1424 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1426 1425 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1427 1426 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1428 1427 }
1429 1428 break;
1430 1429
1431 1430 case cvttps2dq:
1432 1431 dummy.op = cvttss2si;
1433 1432 for (i = 0; i < 4; i++) {
1434 1433 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1435 1434 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1436 1435 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1437 1436 }
1438 1437 break;
1439 1438
1440 1439 case cvtps2dq:
1441 1440 dummy.op = cvtss2si;
1442 1441 for (i = 0; i < 4; i++) {
1443 1442 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1444 1443 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1445 1444 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1446 1445 }
1447 1446 break;
1448 1447
1449 1448 case cvtpi2ps:
1450 1449 dummy.op = cvtsi2ss;
1451 1450 for (i = 0; i < 2; i++) {
1452 1451 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1453 1452 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1454 1453 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1455 1454 }
1456 1455 break;
1457 1456
1458 1457 case cvttps2pi:
1459 1458 dummy.op = cvttss2si;
1460 1459 for (i = 0; i < 2; i++) {
1461 1460 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1462 1461 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1463 1462 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1464 1463 }
1465 1464 break;
1466 1465
1467 1466 case cvtps2pi:
1468 1467 dummy.op = cvtss2si;
1469 1468 for (i = 0; i < 2; i++) {
1470 1469 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1471 1470 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1472 1471 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1473 1472 }
1474 1473 break;
1475 1474
1476 1475 case cmppd:
1477 1476 dummy.op = cmpsd;
1478 1477 dummy.imm = inst->imm;
1479 1478 for (i = 0; i < 2; i++) {
1480 1479 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1481 1480 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1482 1481 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1483 1482 }
1484 1483 break;
1485 1484
1486 1485 case minpd:
1487 1486 dummy.op = minsd;
1488 1487 for (i = 0; i < 2; i++) {
1489 1488 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1490 1489 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1491 1490 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1492 1491 }
1493 1492 break;
1494 1493
1495 1494 case maxpd:
1496 1495 dummy.op = maxsd;
1497 1496 for (i = 0; i < 2; i++) {
1498 1497 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1499 1498 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1500 1499 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1501 1500 }
1502 1501 break;
1503 1502
1504 1503 case addpd:
1505 1504 dummy.op = addsd;
1506 1505 for (i = 0; i < 2; i++) {
1507 1506 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1508 1507 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1509 1508 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1510 1509 }
1511 1510 break;
1512 1511
1513 1512 case subpd:
1514 1513 dummy.op = subsd;
1515 1514 for (i = 0; i < 2; i++) {
1516 1515 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1517 1516 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1518 1517 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1519 1518 }
1520 1519 break;
1521 1520
1522 1521 case mulpd:
1523 1522 dummy.op = mulsd;
1524 1523 for (i = 0; i < 2; i++) {
1525 1524 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1526 1525 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1527 1526 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1528 1527 }
1529 1528 break;
1530 1529
1531 1530 case divpd:
1532 1531 dummy.op = divsd;
1533 1532 for (i = 0; i < 2; i++) {
1534 1533 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1535 1534 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1536 1535 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1537 1536 }
1538 1537 break;
1539 1538
1540 1539 case sqrtpd:
1541 1540 dummy.op = sqrtsd;
1542 1541 for (i = 0; i < 2; i++) {
1543 1542 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1544 1543 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1545 1544 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1546 1545 }
1547 1546 break;
1548 1547
1549 1548 case cvtpi2pd:
1550 1549 case cvtdq2pd:
1551 1550 dummy.op = cvtsi2sd;
1552 1551 for (i = 0; i < 2; i++) {
1553 1552 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1554 1553 dummy.op2 = (sseoperand_t *)&inst->op2->i[i];
1555 1554 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1556 1555 }
1557 1556 break;
1558 1557
1559 1558 case cvttpd2pi:
1560 1559 case cvttpd2dq:
1561 1560 dummy.op = cvttsd2si;
1562 1561 for (i = 0; i < 2; i++) {
1563 1562 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1564 1563 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1565 1564 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1566 1565 }
1567 1566 /* for cvttpd2dq, zero the high 64 bits of the destination */
1568 1567 if (inst->op == cvttpd2dq)
1569 1568 inst->op1->l[1] = 0ll;
1570 1569 break;
1571 1570
1572 1571 case cvtpd2pi:
1573 1572 case cvtpd2dq:
1574 1573 dummy.op = cvtsd2si;
1575 1574 for (i = 0; i < 2; i++) {
1576 1575 dummy.op1 = (sseoperand_t *)&inst->op1->i[i];
1577 1576 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1578 1577 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1579 1578 }
1580 1579 /* for cvtpd2dq, zero the high 64 bits of the destination */
1581 1580 if (inst->op == cvtpd2dq)
1582 1581 inst->op1->l[1] = 0ll;
1583 1582 break;
1584 1583
1585 1584 case cvtps2pd:
1586 1585 dummy.op = cvtss2sd;
1587 1586 for (i = 0; i < 2; i++) {
1588 1587 dummy.op1 = (sseoperand_t *)&inst->op1->d[i];
1589 1588 dummy.op2 = (sseoperand_t *)&inst->op2->f[i];
1590 1589 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1591 1590 }
1592 1591 break;
1593 1592
1594 1593 case cvtpd2ps:
1595 1594 dummy.op = cvtsd2ss;
1596 1595 for (i = 0; i < 2; i++) {
1597 1596 dummy.op1 = (sseoperand_t *)&inst->op1->f[i];
1598 1597 dummy.op2 = (sseoperand_t *)&inst->op2->d[i];
1599 1598 __fex_st_sse_result(uap, &dummy, e[i], &info[i]);
1600 1599 }
1601 1600 /* zero the high 64 bits of the destination */
1602 1601 inst->op1->l[1] = 0ll;
1603 1602
1604 1603 default:
1605 1604 break;
1606 1605 }
1607 1606 }
1608 1607
↓ open down ↓ |
1568 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX