1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 Free Software Foundation, Inc.
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "c-common.h"
39 #include "except.h"
40 #include "function.h"
41 #include "recog.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "toplev.h"
45 #include "basic-block.h"
46 #include "ggc.h"
47 #include "target.h"
48 #include "target-def.h"
49 #include "langhooks.h"
50 #include "cgraph.h"
51 #include "gimple.h"
52 #include "dwarf2.h"
53 #include "df.h"
54 #include "tm-constrs.h"
55 #include "params.h"
56 #include "cselib.h"
57
58 static int x86_builtin_vectorization_cost (bool);
59 static rtx legitimize_dllimport_symbol (rtx, bool);
60
61 #ifndef CHECK_STACK_LIMIT
62 #define CHECK_STACK_LIMIT (-1)
63 #endif
64
65 /* Return index of given mode in mult and division cost tables. */
66 #define MODE_INDEX(mode) \
67 ((mode) == QImode ? 0 \
68 : (mode) == HImode ? 1 \
69 : (mode) == SImode ? 2 \
70 : (mode) == DImode ? 3 \
71 : 4)
72
73 /* Processor costs (relative to an add) */
74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 #define COSTS_N_BYTES(N) ((N) * 2)
76
77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
78
79 const
80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 COSTS_N_BYTES (2), /* variable shift costs */
84 COSTS_N_BYTES (3), /* constant shift costs */
85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 0, /* cost of multiply per each bit set */
91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 COSTS_N_BYTES (3), /* HI */
93 COSTS_N_BYTES (3), /* SI */
94 COSTS_N_BYTES (3), /* DI */
95 COSTS_N_BYTES (5)}, /* other */
96 COSTS_N_BYTES (3), /* cost of movsx */
97 COSTS_N_BYTES (3), /* cost of movzx */
98 0, /* "large" insn */
99 2, /* MOVE_RATIO */
100 2, /* cost for loading QImode using movzbl */
101 {2, 2, 2}, /* cost of loading integer registers
102 in QImode, HImode and SImode.
103 Relative to reg-reg move (2). */
104 {2, 2, 2}, /* cost of storing integer registers */
105 2, /* cost of reg,reg fld/fst */
106 {2, 2, 2}, /* cost of loading fp registers
107 in SFmode, DFmode and XFmode */
108 {2, 2, 2}, /* cost of storing fp registers
109 in SFmode, DFmode and XFmode */
110 3, /* cost of moving MMX register */
111 {3, 3}, /* cost of loading MMX registers
112 in SImode and DImode */
113 {3, 3}, /* cost of storing MMX registers
114 in SImode and DImode */
115 3, /* cost of moving SSE register */
116 {3, 3, 3}, /* cost of loading SSE registers
117 in SImode, DImode and TImode */
118 {3, 3, 3}, /* cost of storing SSE registers
119 in SImode, DImode and TImode */
120 3, /* MMX or SSE register to integer */
121 0, /* size of l1 cache */
122 0, /* size of l2 cache */
123 0, /* size of prefetch block */
124 0, /* number of parallel prefetches */
125 2, /* Branch cost */
126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 1, /* scalar_stmt_cost. */
137 1, /* scalar load_cost. */
138 1, /* scalar_store_cost. */
139 1, /* vec_stmt_cost. */
140 1, /* vec_to_scalar_cost. */
141 1, /* scalar_to_vec_cost. */
142 1, /* vec_align_load_cost. */
143 1, /* vec_unalign_load_cost. */
144 1, /* vec_store_cost. */
145 1, /* cond_taken_branch_cost. */
146 1, /* cond_not_taken_branch_cost. */
147 };
148
149 /* Processor costs (relative to an add) */
150 static const
151 struct processor_costs i386_cost = { /* 386 specific costs */
152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 COSTS_N_INSNS (3), /* variable shift costs */
155 COSTS_N_INSNS (2), /* constant shift costs */
156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 COSTS_N_INSNS (6), /* HI */
158 COSTS_N_INSNS (6), /* SI */
159 COSTS_N_INSNS (6), /* DI */
160 COSTS_N_INSNS (6)}, /* other */
161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 COSTS_N_INSNS (23), /* HI */
164 COSTS_N_INSNS (23), /* SI */
165 COSTS_N_INSNS (23), /* DI */
166 COSTS_N_INSNS (23)}, /* other */
167 COSTS_N_INSNS (3), /* cost of movsx */
168 COSTS_N_INSNS (2), /* cost of movzx */
169 15, /* "large" insn */
170 3, /* MOVE_RATIO */
171 4, /* cost for loading QImode using movzbl */
172 {2, 4, 2}, /* cost of loading integer registers
173 in QImode, HImode and SImode.
174 Relative to reg-reg move (2). */
175 {2, 4, 2}, /* cost of storing integer registers */
176 2, /* cost of reg,reg fld/fst */
177 {8, 8, 8}, /* cost of loading fp registers
178 in SFmode, DFmode and XFmode */
179 {8, 8, 8}, /* cost of storing fp registers
180 in SFmode, DFmode and XFmode */
181 2, /* cost of moving MMX register */
182 {4, 8}, /* cost of loading MMX registers
183 in SImode and DImode */
184 {4, 8}, /* cost of storing MMX registers
185 in SImode and DImode */
186 2, /* cost of moving SSE register */
187 {4, 8, 16}, /* cost of loading SSE registers
188 in SImode, DImode and TImode */
189 {4, 8, 16}, /* cost of storing SSE registers
190 in SImode, DImode and TImode */
191 3, /* MMX or SSE register to integer */
192 0, /* size of l1 cache */
193 0, /* size of l2 cache */
194 0, /* size of prefetch block */
195 0, /* number of parallel prefetches */
196 1, /* Branch cost */
197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 DUMMY_STRINGOP_ALGS},
205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 DUMMY_STRINGOP_ALGS},
207 1, /* scalar_stmt_cost. */
208 1, /* scalar load_cost. */
209 1, /* scalar_store_cost. */
210 1, /* vec_stmt_cost. */
211 1, /* vec_to_scalar_cost. */
212 1, /* scalar_to_vec_cost. */
213 1, /* vec_align_load_cost. */
214 2, /* vec_unalign_load_cost. */
215 1, /* vec_store_cost. */
216 3, /* cond_taken_branch_cost. */
217 1, /* cond_not_taken_branch_cost. */
218 };
219
220 static const
221 struct processor_costs i486_cost = { /* 486 specific costs */
222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 COSTS_N_INSNS (3), /* variable shift costs */
225 COSTS_N_INSNS (2), /* constant shift costs */
226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 COSTS_N_INSNS (12), /* HI */
228 COSTS_N_INSNS (12), /* SI */
229 COSTS_N_INSNS (12), /* DI */
230 COSTS_N_INSNS (12)}, /* other */
231 1, /* cost of multiply per each bit set */
232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 COSTS_N_INSNS (40), /* HI */
234 COSTS_N_INSNS (40), /* SI */
235 COSTS_N_INSNS (40), /* DI */
236 COSTS_N_INSNS (40)}, /* other */
237 COSTS_N_INSNS (3), /* cost of movsx */
238 COSTS_N_INSNS (2), /* cost of movzx */
239 15, /* "large" insn */
240 3, /* MOVE_RATIO */
241 4, /* cost for loading QImode using movzbl */
242 {2, 4, 2}, /* cost of loading integer registers
243 in QImode, HImode and SImode.
244 Relative to reg-reg move (2). */
245 {2, 4, 2}, /* cost of storing integer registers */
246 2, /* cost of reg,reg fld/fst */
247 {8, 8, 8}, /* cost of loading fp registers
248 in SFmode, DFmode and XFmode */
249 {8, 8, 8}, /* cost of storing fp registers
250 in SFmode, DFmode and XFmode */
251 2, /* cost of moving MMX register */
252 {4, 8}, /* cost of loading MMX registers
253 in SImode and DImode */
254 {4, 8}, /* cost of storing MMX registers
255 in SImode and DImode */
256 2, /* cost of moving SSE register */
257 {4, 8, 16}, /* cost of loading SSE registers
258 in SImode, DImode and TImode */
259 {4, 8, 16}, /* cost of storing SSE registers
260 in SImode, DImode and TImode */
261 3, /* MMX or SSE register to integer */
262 4, /* size of l1 cache. 486 has 8kB cache
263 shared for code and data, so 4kB is
264 not really precise. */
265 4, /* size of l2 cache */
266 0, /* size of prefetch block */
267 0, /* number of parallel prefetches */
268 1, /* Branch cost */
269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 DUMMY_STRINGOP_ALGS},
277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 DUMMY_STRINGOP_ALGS},
279 1, /* scalar_stmt_cost. */
280 1, /* scalar load_cost. */
281 1, /* scalar_store_cost. */
282 1, /* vec_stmt_cost. */
283 1, /* vec_to_scalar_cost. */
284 1, /* scalar_to_vec_cost. */
285 1, /* vec_align_load_cost. */
286 2, /* vec_unalign_load_cost. */
287 1, /* vec_store_cost. */
288 3, /* cond_taken_branch_cost. */
289 1, /* cond_not_taken_branch_cost. */
290 };
291
292 static const
293 struct processor_costs pentium_cost = {
294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 COSTS_N_INSNS (4), /* variable shift costs */
297 COSTS_N_INSNS (1), /* constant shift costs */
298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 COSTS_N_INSNS (11), /* HI */
300 COSTS_N_INSNS (11), /* SI */
301 COSTS_N_INSNS (11), /* DI */
302 COSTS_N_INSNS (11)}, /* other */
303 0, /* cost of multiply per each bit set */
304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 COSTS_N_INSNS (25), /* HI */
306 COSTS_N_INSNS (25), /* SI */
307 COSTS_N_INSNS (25), /* DI */
308 COSTS_N_INSNS (25)}, /* other */
309 COSTS_N_INSNS (3), /* cost of movsx */
310 COSTS_N_INSNS (2), /* cost of movzx */
311 8, /* "large" insn */
312 6, /* MOVE_RATIO */
313 6, /* cost for loading QImode using movzbl */
314 {2, 4, 2}, /* cost of loading integer registers
315 in QImode, HImode and SImode.
316 Relative to reg-reg move (2). */
317 {2, 4, 2}, /* cost of storing integer registers */
318 2, /* cost of reg,reg fld/fst */
319 {2, 2, 6}, /* cost of loading fp registers
320 in SFmode, DFmode and XFmode */
321 {4, 4, 6}, /* cost of storing fp registers
322 in SFmode, DFmode and XFmode */
323 8, /* cost of moving MMX register */
324 {8, 8}, /* cost of loading MMX registers
325 in SImode and DImode */
326 {8, 8}, /* cost of storing MMX registers
327 in SImode and DImode */
328 2, /* cost of moving SSE register */
329 {4, 8, 16}, /* cost of loading SSE registers
330 in SImode, DImode and TImode */
331 {4, 8, 16}, /* cost of storing SSE registers
332 in SImode, DImode and TImode */
333 3, /* MMX or SSE register to integer */
334 8, /* size of l1 cache. */
335 8, /* size of l2 cache */
336 0, /* size of prefetch block */
337 0, /* number of parallel prefetches */
338 2, /* Branch cost */
339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 DUMMY_STRINGOP_ALGS},
347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 DUMMY_STRINGOP_ALGS},
349 1, /* scalar_stmt_cost. */
350 1, /* scalar load_cost. */
351 1, /* scalar_store_cost. */
352 1, /* vec_stmt_cost. */
353 1, /* vec_to_scalar_cost. */
354 1, /* scalar_to_vec_cost. */
355 1, /* vec_align_load_cost. */
356 2, /* vec_unalign_load_cost. */
357 1, /* vec_store_cost. */
358 3, /* cond_taken_branch_cost. */
359 1, /* cond_not_taken_branch_cost. */
360 };
361
362 static const
363 struct processor_costs pentiumpro_cost = {
364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 COSTS_N_INSNS (1), /* variable shift costs */
367 COSTS_N_INSNS (1), /* constant shift costs */
368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 COSTS_N_INSNS (4), /* HI */
370 COSTS_N_INSNS (4), /* SI */
371 COSTS_N_INSNS (4), /* DI */
372 COSTS_N_INSNS (4)}, /* other */
373 0, /* cost of multiply per each bit set */
374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 COSTS_N_INSNS (17), /* HI */
376 COSTS_N_INSNS (17), /* SI */
377 COSTS_N_INSNS (17), /* DI */
378 COSTS_N_INSNS (17)}, /* other */
379 COSTS_N_INSNS (1), /* cost of movsx */
380 COSTS_N_INSNS (1), /* cost of movzx */
381 8, /* "large" insn */
382 6, /* MOVE_RATIO */
383 2, /* cost for loading QImode using movzbl */
384 {4, 4, 4}, /* cost of loading integer registers
385 in QImode, HImode and SImode.
386 Relative to reg-reg move (2). */
387 {2, 2, 2}, /* cost of storing integer registers */
388 2, /* cost of reg,reg fld/fst */
389 {2, 2, 6}, /* cost of loading fp registers
390 in SFmode, DFmode and XFmode */
391 {4, 4, 6}, /* cost of storing fp registers
392 in SFmode, DFmode and XFmode */
393 2, /* cost of moving MMX register */
394 {2, 2}, /* cost of loading MMX registers
395 in SImode and DImode */
396 {2, 2}, /* cost of storing MMX registers
397 in SImode and DImode */
398 2, /* cost of moving SSE register */
399 {2, 2, 8}, /* cost of loading SSE registers
400 in SImode, DImode and TImode */
401 {2, 2, 8}, /* cost of storing SSE registers
402 in SImode, DImode and TImode */
403 3, /* MMX or SSE register to integer */
404 8, /* size of l1 cache. */
405 256, /* size of l2 cache */
406 32, /* size of prefetch block */
407 6, /* number of parallel prefetches */
408 2, /* Branch cost */
409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416 the alignment). For small blocks inline loop is still a noticeable win, for bigger
417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
418 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 */
420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 DUMMY_STRINGOP_ALGS},
423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 DUMMY_STRINGOP_ALGS},
426 1, /* scalar_stmt_cost. */
427 1, /* scalar load_cost. */
428 1, /* scalar_store_cost. */
429 1, /* vec_stmt_cost. */
430 1, /* vec_to_scalar_cost. */
431 1, /* scalar_to_vec_cost. */
432 1, /* vec_align_load_cost. */
433 2, /* vec_unalign_load_cost. */
434 1, /* vec_store_cost. */
435 3, /* cond_taken_branch_cost. */
436 1, /* cond_not_taken_branch_cost. */
437 };
438
439 static const
440 struct processor_costs geode_cost = {
441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 COSTS_N_INSNS (2), /* variable shift costs */
444 COSTS_N_INSNS (1), /* constant shift costs */
445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 COSTS_N_INSNS (4), /* HI */
447 COSTS_N_INSNS (7), /* SI */
448 COSTS_N_INSNS (7), /* DI */
449 COSTS_N_INSNS (7)}, /* other */
450 0, /* cost of multiply per each bit set */
451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 COSTS_N_INSNS (23), /* HI */
453 COSTS_N_INSNS (39), /* SI */
454 COSTS_N_INSNS (39), /* DI */
455 COSTS_N_INSNS (39)}, /* other */
456 COSTS_N_INSNS (1), /* cost of movsx */
457 COSTS_N_INSNS (1), /* cost of movzx */
458 8, /* "large" insn */
459 4, /* MOVE_RATIO */
460 1, /* cost for loading QImode using movzbl */
461 {1, 1, 1}, /* cost of loading integer registers
462 in QImode, HImode and SImode.
463 Relative to reg-reg move (2). */
464 {1, 1, 1}, /* cost of storing integer registers */
465 1, /* cost of reg,reg fld/fst */
466 {1, 1, 1}, /* cost of loading fp registers
467 in SFmode, DFmode and XFmode */
468 {4, 6, 6}, /* cost of storing fp registers
469 in SFmode, DFmode and XFmode */
470
471 1, /* cost of moving MMX register */
472 {1, 1}, /* cost of loading MMX registers
473 in SImode and DImode */
474 {1, 1}, /* cost of storing MMX registers
475 in SImode and DImode */
476 1, /* cost of moving SSE register */
477 {1, 1, 1}, /* cost of loading SSE registers
478 in SImode, DImode and TImode */
479 {1, 1, 1}, /* cost of storing SSE registers
480 in SImode, DImode and TImode */
481 1, /* MMX or SSE register to integer */
482 64, /* size of l1 cache. */
483 128, /* size of l2 cache. */
484 32, /* size of prefetch block */
485 1, /* number of parallel prefetches */
486 1, /* Branch cost */
487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 DUMMY_STRINGOP_ALGS},
495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 DUMMY_STRINGOP_ALGS},
497 1, /* scalar_stmt_cost. */
498 1, /* scalar load_cost. */
499 1, /* scalar_store_cost. */
500 1, /* vec_stmt_cost. */
501 1, /* vec_to_scalar_cost. */
502 1, /* scalar_to_vec_cost. */
503 1, /* vec_align_load_cost. */
504 2, /* vec_unalign_load_cost. */
505 1, /* vec_store_cost. */
506 3, /* cond_taken_branch_cost. */
507 1, /* cond_not_taken_branch_cost. */
508 };
509
510 static const
511 struct processor_costs k6_cost = {
512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 COSTS_N_INSNS (1), /* variable shift costs */
515 COSTS_N_INSNS (1), /* constant shift costs */
516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 COSTS_N_INSNS (3), /* HI */
518 COSTS_N_INSNS (3), /* SI */
519 COSTS_N_INSNS (3), /* DI */
520 COSTS_N_INSNS (3)}, /* other */
521 0, /* cost of multiply per each bit set */
522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 COSTS_N_INSNS (18), /* HI */
524 COSTS_N_INSNS (18), /* SI */
525 COSTS_N_INSNS (18), /* DI */
526 COSTS_N_INSNS (18)}, /* other */
527 COSTS_N_INSNS (2), /* cost of movsx */
528 COSTS_N_INSNS (2), /* cost of movzx */
529 8, /* "large" insn */
530 4, /* MOVE_RATIO */
531 3, /* cost for loading QImode using movzbl */
532 {4, 5, 4}, /* cost of loading integer registers
533 in QImode, HImode and SImode.
534 Relative to reg-reg move (2). */
535 {2, 3, 2}, /* cost of storing integer registers */
536 4, /* cost of reg,reg fld/fst */
537 {6, 6, 6}, /* cost of loading fp registers
538 in SFmode, DFmode and XFmode */
539 {4, 4, 4}, /* cost of storing fp registers
540 in SFmode, DFmode and XFmode */
541 2, /* cost of moving MMX register */
542 {2, 2}, /* cost of loading MMX registers
543 in SImode and DImode */
544 {2, 2}, /* cost of storing MMX registers
545 in SImode and DImode */
546 2, /* cost of moving SSE register */
547 {2, 2, 8}, /* cost of loading SSE registers
548 in SImode, DImode and TImode */
549 {2, 2, 8}, /* cost of storing SSE registers
550 in SImode, DImode and TImode */
551 6, /* MMX or SSE register to integer */
552 32, /* size of l1 cache. */
553 32, /* size of l2 cache. Some models
554 have integrated l2 cache, but
555 optimizing for k6 is not important
556 enough to worry about that. */
557 32, /* size of prefetch block */
558 1, /* number of parallel prefetches */
559 1, /* Branch cost */
560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 DUMMY_STRINGOP_ALGS},
568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 DUMMY_STRINGOP_ALGS},
570 1, /* scalar_stmt_cost. */
571 1, /* scalar load_cost. */
572 1, /* scalar_store_cost. */
573 1, /* vec_stmt_cost. */
574 1, /* vec_to_scalar_cost. */
575 1, /* scalar_to_vec_cost. */
576 1, /* vec_align_load_cost. */
577 2, /* vec_unalign_load_cost. */
578 1, /* vec_store_cost. */
579 3, /* cond_taken_branch_cost. */
580 1, /* cond_not_taken_branch_cost. */
581 };
582
583 static const
584 struct processor_costs athlon_cost = {
585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 COSTS_N_INSNS (1), /* variable shift costs */
588 COSTS_N_INSNS (1), /* constant shift costs */
589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 COSTS_N_INSNS (5), /* HI */
591 COSTS_N_INSNS (5), /* SI */
592 COSTS_N_INSNS (5), /* DI */
593 COSTS_N_INSNS (5)}, /* other */
594 0, /* cost of multiply per each bit set */
595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 COSTS_N_INSNS (26), /* HI */
597 COSTS_N_INSNS (42), /* SI */
598 COSTS_N_INSNS (74), /* DI */
599 COSTS_N_INSNS (74)}, /* other */
600 COSTS_N_INSNS (1), /* cost of movsx */
601 COSTS_N_INSNS (1), /* cost of movzx */
602 8, /* "large" insn */
603 9, /* MOVE_RATIO */
604 4, /* cost for loading QImode using movzbl */
605 {3, 4, 3}, /* cost of loading integer registers
606 in QImode, HImode and SImode.
607 Relative to reg-reg move (2). */
608 {3, 4, 3}, /* cost of storing integer registers */
609 4, /* cost of reg,reg fld/fst */
610 {4, 4, 12}, /* cost of loading fp registers
611 in SFmode, DFmode and XFmode */
612 {6, 6, 8}, /* cost of storing fp registers
613 in SFmode, DFmode and XFmode */
614 2, /* cost of moving MMX register */
615 {4, 4}, /* cost of loading MMX registers
616 in SImode and DImode */
617 {4, 4}, /* cost of storing MMX registers
618 in SImode and DImode */
619 2, /* cost of moving SSE register */
620 {4, 4, 6}, /* cost of loading SSE registers
621 in SImode, DImode and TImode */
622 {4, 4, 5}, /* cost of storing SSE registers
623 in SImode, DImode and TImode */
624 5, /* MMX or SSE register to integer */
625 64, /* size of l1 cache. */
626 256, /* size of l2 cache. */
627 64, /* size of prefetch block */
628 6, /* number of parallel prefetches */
629 5, /* Branch cost */
630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 128 bytes for memset. */
639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 DUMMY_STRINGOP_ALGS},
641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 DUMMY_STRINGOP_ALGS},
643 1, /* scalar_stmt_cost. */
644 1, /* scalar load_cost. */
645 1, /* scalar_store_cost. */
646 1, /* vec_stmt_cost. */
647 1, /* vec_to_scalar_cost. */
648 1, /* scalar_to_vec_cost. */
649 1, /* vec_align_load_cost. */
650 2, /* vec_unalign_load_cost. */
651 1, /* vec_store_cost. */
652 3, /* cond_taken_branch_cost. */
653 1, /* cond_not_taken_branch_cost. */
654 };
655
656 static const
657 struct processor_costs k8_cost = {
658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 COSTS_N_INSNS (1), /* variable shift costs */
661 COSTS_N_INSNS (1), /* constant shift costs */
662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 COSTS_N_INSNS (4), /* HI */
664 COSTS_N_INSNS (3), /* SI */
665 COSTS_N_INSNS (4), /* DI */
666 COSTS_N_INSNS (5)}, /* other */
667 0, /* cost of multiply per each bit set */
668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 COSTS_N_INSNS (26), /* HI */
670 COSTS_N_INSNS (42), /* SI */
671 COSTS_N_INSNS (74), /* DI */
672 COSTS_N_INSNS (74)}, /* other */
673 COSTS_N_INSNS (1), /* cost of movsx */
674 COSTS_N_INSNS (1), /* cost of movzx */
675 8, /* "large" insn */
676 9, /* MOVE_RATIO */
677 4, /* cost for loading QImode using movzbl */
678 {3, 4, 3}, /* cost of loading integer registers
679 in QImode, HImode and SImode.
680 Relative to reg-reg move (2). */
681 {3, 4, 3}, /* cost of storing integer registers */
682 4, /* cost of reg,reg fld/fst */
683 {4, 4, 12}, /* cost of loading fp registers
684 in SFmode, DFmode and XFmode */
685 {6, 6, 8}, /* cost of storing fp registers
686 in SFmode, DFmode and XFmode */
687 2, /* cost of moving MMX register */
688 {3, 3}, /* cost of loading MMX registers
689 in SImode and DImode */
690 {4, 4}, /* cost of storing MMX registers
691 in SImode and DImode */
692 2, /* cost of moving SSE register */
693 {4, 3, 6}, /* cost of loading SSE registers
694 in SImode, DImode and TImode */
695 {4, 4, 5}, /* cost of storing SSE registers
696 in SImode, DImode and TImode */
697 5, /* MMX or SSE register to integer */
698 64, /* size of l1 cache. */
699 512, /* size of l2 cache. */
700 64, /* size of prefetch block */
701 /* New AMD processors never drop prefetches; if they cannot be performed
702 immediately, they are queued. We set number of simultaneous prefetches
703 to a large constant to reflect this (it probably is not a good idea not
704 to limit number of prefetches at all, as their execution also takes some
705 time). */
706 100, /* number of parallel prefetches */
707 3, /* Branch cost */
708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 /* K8 has optimized REP instruction for medium sized blocks, but for very small
715 blocks it is better to use loop. For large blocks, libcall can do
716 nontemporary accesses and beat inline considerably. */
717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 4, /* scalar_stmt_cost. */
723 2, /* scalar load_cost. */
724 2, /* scalar_store_cost. */
725 5, /* vec_stmt_cost. */
726 0, /* vec_to_scalar_cost. */
727 2, /* scalar_to_vec_cost. */
728 2, /* vec_align_load_cost. */
729 3, /* vec_unalign_load_cost. */
730 3, /* vec_store_cost. */
731 3, /* cond_taken_branch_cost. */
732 2, /* cond_not_taken_branch_cost. */
733 };
734
735 struct processor_costs amdfam10_cost = {
736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 COSTS_N_INSNS (1), /* variable shift costs */
739 COSTS_N_INSNS (1), /* constant shift costs */
740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 COSTS_N_INSNS (4), /* HI */
742 COSTS_N_INSNS (3), /* SI */
743 COSTS_N_INSNS (4), /* DI */
744 COSTS_N_INSNS (5)}, /* other */
745 0, /* cost of multiply per each bit set */
746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 COSTS_N_INSNS (35), /* HI */
748 COSTS_N_INSNS (51), /* SI */
749 COSTS_N_INSNS (83), /* DI */
750 COSTS_N_INSNS (83)}, /* other */
751 COSTS_N_INSNS (1), /* cost of movsx */
752 COSTS_N_INSNS (1), /* cost of movzx */
753 8, /* "large" insn */
754 9, /* MOVE_RATIO */
755 4, /* cost for loading QImode using movzbl */
756 {3, 4, 3}, /* cost of loading integer registers
757 in QImode, HImode and SImode.
758 Relative to reg-reg move (2). */
759 {3, 4, 3}, /* cost of storing integer registers */
760 4, /* cost of reg,reg fld/fst */
761 {4, 4, 12}, /* cost of loading fp registers
762 in SFmode, DFmode and XFmode */
763 {6, 6, 8}, /* cost of storing fp registers
764 in SFmode, DFmode and XFmode */
765 2, /* cost of moving MMX register */
766 {3, 3}, /* cost of loading MMX registers
767 in SImode and DImode */
768 {4, 4}, /* cost of storing MMX registers
769 in SImode and DImode */
770 2, /* cost of moving SSE register */
771 {4, 4, 3}, /* cost of loading SSE registers
772 in SImode, DImode and TImode */
773 {4, 4, 5}, /* cost of storing SSE registers
774 in SImode, DImode and TImode */
775 3, /* MMX or SSE register to integer */
776 /* On K8
777 MOVD reg64, xmmreg Double FSTORE 4
778 MOVD reg32, xmmreg Double FSTORE 4
779 On AMDFAM10
780 MOVD reg64, xmmreg Double FADD 3
781 1/1 1/1
782 MOVD reg32, xmmreg Double FADD 3
783 1/1 1/1 */
784 64, /* size of l1 cache. */
785 512, /* size of l2 cache. */
786 64, /* size of prefetch block */
787 /* New AMD processors never drop prefetches; if they cannot be performed
788 immediately, they are queued. We set number of simultaneous prefetches
789 to a large constant to reflect this (it probably is not a good idea not
790 to limit number of prefetches at all, as their execution also takes some
791 time). */
792 100, /* number of parallel prefetches */
793 2, /* Branch cost */
794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800
801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 very small blocks it is better to use loop. For large blocks, libcall can
803 do nontemporary accesses and beat inline considerably. */
804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 4, /* scalar_stmt_cost. */
810 2, /* scalar load_cost. */
811 2, /* scalar_store_cost. */
812 6, /* vec_stmt_cost. */
813 0, /* vec_to_scalar_cost. */
814 2, /* scalar_to_vec_cost. */
815 2, /* vec_align_load_cost. */
816 2, /* vec_unalign_load_cost. */
817 2, /* vec_store_cost. */
818 2, /* cond_taken_branch_cost. */
819 1, /* cond_not_taken_branch_cost. */
820 };
821
822 static const
823 struct processor_costs pentium4_cost = {
824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 COSTS_N_INSNS (3), /* cost of a lea instruction */
826 COSTS_N_INSNS (4), /* variable shift costs */
827 COSTS_N_INSNS (4), /* constant shift costs */
828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
829 COSTS_N_INSNS (15), /* HI */
830 COSTS_N_INSNS (15), /* SI */
831 COSTS_N_INSNS (15), /* DI */
832 COSTS_N_INSNS (15)}, /* other */
833 0, /* cost of multiply per each bit set */
834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
835 COSTS_N_INSNS (56), /* HI */
836 COSTS_N_INSNS (56), /* SI */
837 COSTS_N_INSNS (56), /* DI */
838 COSTS_N_INSNS (56)}, /* other */
839 COSTS_N_INSNS (1), /* cost of movsx */
840 COSTS_N_INSNS (1), /* cost of movzx */
841 16, /* "large" insn */
842 6, /* MOVE_RATIO */
843 2, /* cost for loading QImode using movzbl */
844 {4, 5, 4}, /* cost of loading integer registers
845 in QImode, HImode and SImode.
846 Relative to reg-reg move (2). */
847 {2, 3, 2}, /* cost of storing integer registers */
848 2, /* cost of reg,reg fld/fst */
849 {2, 2, 6}, /* cost of loading fp registers
850 in SFmode, DFmode and XFmode */
851 {4, 4, 6}, /* cost of storing fp registers
852 in SFmode, DFmode and XFmode */
853 2, /* cost of moving MMX register */
854 {2, 2}, /* cost of loading MMX registers
855 in SImode and DImode */
856 {2, 2}, /* cost of storing MMX registers
857 in SImode and DImode */
858 12, /* cost of moving SSE register */
859 {12, 12, 12}, /* cost of loading SSE registers
860 in SImode, DImode and TImode */
861 {2, 2, 8}, /* cost of storing SSE registers
862 in SImode, DImode and TImode */
863 10, /* MMX or SSE register to integer */
864 8, /* size of l1 cache. */
865 256, /* size of l2 cache. */
866 64, /* size of prefetch block */
867 6, /* number of parallel prefetches */
868 2, /* Branch cost */
869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
872 COSTS_N_INSNS (2), /* cost of FABS instruction. */
873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
876 DUMMY_STRINGOP_ALGS},
877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 {-1, libcall}}},
879 DUMMY_STRINGOP_ALGS},
880 1, /* scalar_stmt_cost. */
881 1, /* scalar load_cost. */
882 1, /* scalar_store_cost. */
883 1, /* vec_stmt_cost. */
884 1, /* vec_to_scalar_cost. */
885 1, /* scalar_to_vec_cost. */
886 1, /* vec_align_load_cost. */
887 2, /* vec_unalign_load_cost. */
888 1, /* vec_store_cost. */
889 3, /* cond_taken_branch_cost. */
890 1, /* cond_not_taken_branch_cost. */
891 };
892
893 static const
894 struct processor_costs nocona_cost = {
895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 COSTS_N_INSNS (1), /* variable shift costs */
898 COSTS_N_INSNS (1), /* constant shift costs */
899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
900 COSTS_N_INSNS (10), /* HI */
901 COSTS_N_INSNS (10), /* SI */
902 COSTS_N_INSNS (10), /* DI */
903 COSTS_N_INSNS (10)}, /* other */
904 0, /* cost of multiply per each bit set */
905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
906 COSTS_N_INSNS (66), /* HI */
907 COSTS_N_INSNS (66), /* SI */
908 COSTS_N_INSNS (66), /* DI */
909 COSTS_N_INSNS (66)}, /* other */
910 COSTS_N_INSNS (1), /* cost of movsx */
911 COSTS_N_INSNS (1), /* cost of movzx */
912 16, /* "large" insn */
913 17, /* MOVE_RATIO */
914 4, /* cost for loading QImode using movzbl */
915 {4, 4, 4}, /* cost of loading integer registers
916 in QImode, HImode and SImode.
917 Relative to reg-reg move (2). */
918 {4, 4, 4}, /* cost of storing integer registers */
919 3, /* cost of reg,reg fld/fst */
920 {12, 12, 12}, /* cost of loading fp registers
921 in SFmode, DFmode and XFmode */
922 {4, 4, 4}, /* cost of storing fp registers
923 in SFmode, DFmode and XFmode */
924 6, /* cost of moving MMX register */
925 {12, 12}, /* cost of loading MMX registers
926 in SImode and DImode */
927 {12, 12}, /* cost of storing MMX registers
928 in SImode and DImode */
929 6, /* cost of moving SSE register */
930 {12, 12, 12}, /* cost of loading SSE registers
931 in SImode, DImode and TImode */
932 {12, 12, 12}, /* cost of storing SSE registers
933 in SImode, DImode and TImode */
934 8, /* MMX or SSE register to integer */
935 8, /* size of l1 cache. */
936 1024, /* size of l2 cache. */
937 128, /* size of prefetch block */
938 8, /* number of parallel prefetches */
939 1, /* Branch cost */
940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
943 COSTS_N_INSNS (3), /* cost of FABS instruction. */
944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
948 {100000, unrolled_loop}, {-1, libcall}}}},
949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 {-1, libcall}}},
951 {libcall, {{24, loop}, {64, unrolled_loop},
952 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
953 1, /* scalar_stmt_cost. */
954 1, /* scalar load_cost. */
955 1, /* scalar_store_cost. */
956 1, /* vec_stmt_cost. */
957 1, /* vec_to_scalar_cost. */
958 1, /* scalar_to_vec_cost. */
959 1, /* vec_align_load_cost. */
960 2, /* vec_unalign_load_cost. */
961 1, /* vec_store_cost. */
962 3, /* cond_taken_branch_cost. */
963 1, /* cond_not_taken_branch_cost. */
964 };
965
966 static const
967 struct processor_costs core2_cost = {
968 COSTS_N_INSNS (1), /* cost of an add instruction */
969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
970 COSTS_N_INSNS (1), /* variable shift costs */
971 COSTS_N_INSNS (1), /* constant shift costs */
972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
973 COSTS_N_INSNS (3), /* HI */
974 COSTS_N_INSNS (3), /* SI */
975 COSTS_N_INSNS (3), /* DI */
976 COSTS_N_INSNS (3)}, /* other */
977 0, /* cost of multiply per each bit set */
978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
979 COSTS_N_INSNS (22), /* HI */
980 COSTS_N_INSNS (22), /* SI */
981 COSTS_N_INSNS (22), /* DI */
982 COSTS_N_INSNS (22)}, /* other */
983 COSTS_N_INSNS (1), /* cost of movsx */
984 COSTS_N_INSNS (1), /* cost of movzx */
985 8, /* "large" insn */
986 16, /* MOVE_RATIO */
987 2, /* cost for loading QImode using movzbl */
988 {6, 6, 6}, /* cost of loading integer registers
989 in QImode, HImode and SImode.
990 Relative to reg-reg move (2). */
991 {4, 4, 4}, /* cost of storing integer registers */
992 2, /* cost of reg,reg fld/fst */
993 {6, 6, 6}, /* cost of loading fp registers
994 in SFmode, DFmode and XFmode */
995 {4, 4, 4}, /* cost of storing fp registers
996 in SFmode, DFmode and XFmode */
997 2, /* cost of moving MMX register */
998 {6, 6}, /* cost of loading MMX registers
999 in SImode and DImode */
1000 {4, 4}, /* cost of storing MMX registers
1001 in SImode and DImode */
1002 2, /* cost of moving SSE register */
1003 {6, 6, 6}, /* cost of loading SSE registers
1004 in SImode, DImode and TImode */
1005 {4, 4, 4}, /* cost of storing SSE registers
1006 in SImode, DImode and TImode */
1007 2, /* MMX or SSE register to integer */
1008 32, /* size of l1 cache. */
1009 2048, /* size of l2 cache. */
1010 128, /* size of prefetch block */
1011 8, /* number of parallel prefetches */
1012 3, /* Branch cost */
1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 {{libcall, {{8, loop}, {15, unrolled_loop},
1023 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024 {libcall, {{24, loop}, {32, unrolled_loop},
1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 2, /* vec_unalign_load_cost. */
1034 1, /* vec_store_cost. */
1035 3, /* cond_taken_branch_cost. */
1036 1, /* cond_not_taken_branch_cost. */
1037 };
1038
1039 /* Generic64 should produce code tuned for Nocona and K8. */
1040 static const
1041 struct processor_costs generic64_cost = {
1042 COSTS_N_INSNS (1), /* cost of an add instruction */
1043 /* On all chips taken into consideration lea is 2 cycles and more. With
1044 this cost however our current implementation of synth_mult results in
1045 use of unnecessary temporary registers causing regression on several
1046 SPECfp benchmarks. */
1047 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1048 COSTS_N_INSNS (1), /* variable shift costs */
1049 COSTS_N_INSNS (1), /* constant shift costs */
1050 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1051 COSTS_N_INSNS (4), /* HI */
1052 COSTS_N_INSNS (3), /* SI */
1053 COSTS_N_INSNS (4), /* DI */
1054 COSTS_N_INSNS (2)}, /* other */
1055 0, /* cost of multiply per each bit set */
1056 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1057 COSTS_N_INSNS (26), /* HI */
1058 COSTS_N_INSNS (42), /* SI */
1059 COSTS_N_INSNS (74), /* DI */
1060 COSTS_N_INSNS (74)}, /* other */
1061 COSTS_N_INSNS (1), /* cost of movsx */
1062 COSTS_N_INSNS (1), /* cost of movzx */
1063 8, /* "large" insn */
1064 17, /* MOVE_RATIO */
1065 4, /* cost for loading QImode using movzbl */
1066 {4, 4, 4}, /* cost of loading integer registers
1067 in QImode, HImode and SImode.
1068 Relative to reg-reg move (2). */
1069 {4, 4, 4}, /* cost of storing integer registers */
1070 4, /* cost of reg,reg fld/fst */
1071 {12, 12, 12}, /* cost of loading fp registers
1072 in SFmode, DFmode and XFmode */
1073 {6, 6, 8}, /* cost of storing fp registers
1074 in SFmode, DFmode and XFmode */
1075 2, /* cost of moving MMX register */
1076 {8, 8}, /* cost of loading MMX registers
1077 in SImode and DImode */
1078 {8, 8}, /* cost of storing MMX registers
1079 in SImode and DImode */
1080 2, /* cost of moving SSE register */
1081 {8, 8, 8}, /* cost of loading SSE registers
1082 in SImode, DImode and TImode */
1083 {8, 8, 8}, /* cost of storing SSE registers
1084 in SImode, DImode and TImode */
1085 5, /* MMX or SSE register to integer */
1086 32, /* size of l1 cache. */
1087 512, /* size of l2 cache. */
1088 64, /* size of prefetch block */
1089 6, /* number of parallel prefetches */
1090 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1091 is increased to perhaps more appropriate value of 5. */
1092 3, /* Branch cost */
1093 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1094 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1095 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1096 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1097 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1098 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1099 {DUMMY_STRINGOP_ALGS,
1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 {DUMMY_STRINGOP_ALGS,
1102 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1103 1, /* scalar_stmt_cost. */
1104 1, /* scalar load_cost. */
1105 1, /* scalar_store_cost. */
1106 1, /* vec_stmt_cost. */
1107 1, /* vec_to_scalar_cost. */
1108 1, /* scalar_to_vec_cost. */
1109 1, /* vec_align_load_cost. */
1110 2, /* vec_unalign_load_cost. */
1111 1, /* vec_store_cost. */
1112 3, /* cond_taken_branch_cost. */
1113 1, /* cond_not_taken_branch_cost. */
1114 };
1115
1116 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1117 static const
1118 struct processor_costs generic32_cost = {
1119 COSTS_N_INSNS (1), /* cost of an add instruction */
1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1121 COSTS_N_INSNS (1), /* variable shift costs */
1122 COSTS_N_INSNS (1), /* constant shift costs */
1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1124 COSTS_N_INSNS (4), /* HI */
1125 COSTS_N_INSNS (3), /* SI */
1126 COSTS_N_INSNS (4), /* DI */
1127 COSTS_N_INSNS (2)}, /* other */
1128 0, /* cost of multiply per each bit set */
1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1130 COSTS_N_INSNS (26), /* HI */
1131 COSTS_N_INSNS (42), /* SI */
1132 COSTS_N_INSNS (74), /* DI */
1133 COSTS_N_INSNS (74)}, /* other */
1134 COSTS_N_INSNS (1), /* cost of movsx */
1135 COSTS_N_INSNS (1), /* cost of movzx */
1136 8, /* "large" insn */
1137 17, /* MOVE_RATIO */
1138 4, /* cost for loading QImode using movzbl */
1139 {4, 4, 4}, /* cost of loading integer registers
1140 in QImode, HImode and SImode.
1141 Relative to reg-reg move (2). */
1142 {4, 4, 4}, /* cost of storing integer registers */
1143 4, /* cost of reg,reg fld/fst */
1144 {12, 12, 12}, /* cost of loading fp registers
1145 in SFmode, DFmode and XFmode */
1146 {6, 6, 8}, /* cost of storing fp registers
1147 in SFmode, DFmode and XFmode */
1148 2, /* cost of moving MMX register */
1149 {8, 8}, /* cost of loading MMX registers
1150 in SImode and DImode */
1151 {8, 8}, /* cost of storing MMX registers
1152 in SImode and DImode */
1153 2, /* cost of moving SSE register */
1154 {8, 8, 8}, /* cost of loading SSE registers
1155 in SImode, DImode and TImode */
1156 {8, 8, 8}, /* cost of storing SSE registers
1157 in SImode, DImode and TImode */
1158 5, /* MMX or SSE register to integer */
1159 32, /* size of l1 cache. */
1160 256, /* size of l2 cache. */
1161 64, /* size of prefetch block */
1162 6, /* number of parallel prefetches */
1163 3, /* Branch cost */
1164 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1165 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1166 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1167 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1168 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1169 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 DUMMY_STRINGOP_ALGS},
1172 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1173 DUMMY_STRINGOP_ALGS},
1174 1, /* scalar_stmt_cost. */
1175 1, /* scalar load_cost. */
1176 1, /* scalar_store_cost. */
1177 1, /* vec_stmt_cost. */
1178 1, /* vec_to_scalar_cost. */
1179 1, /* scalar_to_vec_cost. */
1180 1, /* vec_align_load_cost. */
1181 2, /* vec_unalign_load_cost. */
1182 1, /* vec_store_cost. */
1183 3, /* cond_taken_branch_cost. */
1184 1, /* cond_not_taken_branch_cost. */
1185 };
1186
1187 const struct processor_costs *ix86_cost = &pentium_cost;
1188
1189 /* Processor feature/optimization bitmasks. */
1190 #define m_386 (1<<PROCESSOR_I386)
1191 #define m_486 (1<<PROCESSOR_I486)
1192 #define m_PENT (1<<PROCESSOR_PENTIUM)
1193 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1194 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1195 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1196 #define m_CORE2 (1<<PROCESSOR_CORE2)
1197
1198 #define m_GEODE (1<<PROCESSOR_GEODE)
1199 #define m_K6 (1<<PROCESSOR_K6)
1200 #define m_K6_GEODE (m_K6 | m_GEODE)
1201 #define m_K8 (1<<PROCESSOR_K8)
1202 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1203 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1204 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1205 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1206
1207 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1208 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1209
1210 /* Generic instruction choice should be common subset of supported CPUs
1211 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1212 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1213
1214 /* Feature tests against the various tunings. */
1215 unsigned char ix86_tune_features[X86_TUNE_LAST];
1216
1217 /* Feature tests against the various tunings used to create ix86_tune_features
1218 based on the processor mask. */
1219 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1220 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1221 negatively, so enabling for Generic64 seems like good code size
1222 tradeoff. We can't enable it for 32bit generic because it does not
1223 work well with PPro base chips. */
1224 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1225
1226 /* X86_TUNE_PUSH_MEMORY */
1227 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1228 | m_NOCONA | m_CORE2 | m_GENERIC,
1229
1230 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1231 m_486 | m_PENT,
1232
1233 /* X86_TUNE_UNROLL_STRLEN */
1234 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1235
1236 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1237 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1238
1239 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1240 on simulation result. But after P4 was made, no performance benefit
1241 was observed with branch hints. It also increases the code size.
1242 As a result, icc never generates branch hints. */
1243 0,
1244
1245 /* X86_TUNE_DOUBLE_WITH_ADD */
1246 ~m_386,
1247
1248 /* X86_TUNE_USE_SAHF */
1249 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1250 | m_NOCONA | m_CORE2 | m_GENERIC,
1251
1252 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1253 partial dependencies. */
1254 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1255 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1256
1257 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1258 register stalls on Generic32 compilation setting as well. However
1259 in current implementation the partial register stalls are not eliminated
1260 very well - they can be introduced via subregs synthesized by combine
1261 and can happen in caller/callee saving sequences. Because this option
1262 pays back little on PPro based chips and is in conflict with partial reg
1263 dependencies used by Athlon/P4 based chips, it is better to leave it off
1264 for generic32 for now. */
1265 m_PPRO,
1266
1267 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1268 m_CORE2 | m_GENERIC,
1269
1270 /* X86_TUNE_USE_HIMODE_FIOP */
1271 m_386 | m_486 | m_K6_GEODE,
1272
1273 /* X86_TUNE_USE_SIMODE_FIOP */
1274 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1275
1276 /* X86_TUNE_USE_MOV0 */
1277 m_K6,
1278
1279 /* X86_TUNE_USE_CLTD */
1280 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1281
1282 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1283 m_PENT4,
1284
1285 /* X86_TUNE_SPLIT_LONG_MOVES */
1286 m_PPRO,
1287
1288 /* X86_TUNE_READ_MODIFY_WRITE */
1289 ~m_PENT,
1290
1291 /* X86_TUNE_READ_MODIFY */
1292 ~(m_PENT | m_PPRO),
1293
1294 /* X86_TUNE_PROMOTE_QIMODE */
1295 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1296 | m_GENERIC /* | m_PENT4 ? */,
1297
1298 /* X86_TUNE_FAST_PREFIX */
1299 ~(m_PENT | m_486 | m_386),
1300
1301 /* X86_TUNE_SINGLE_STRINGOP */
1302 m_386 | m_PENT4 | m_NOCONA,
1303
1304 /* X86_TUNE_QIMODE_MATH */
1305 ~0,
1306
1307 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1308 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1309 might be considered for Generic32 if our scheme for avoiding partial
1310 stalls was more effective. */
1311 ~m_PPRO,
1312
1313 /* X86_TUNE_PROMOTE_QI_REGS */
1314 0,
1315
1316 /* X86_TUNE_PROMOTE_HI_REGS */
1317 m_PPRO,
1318
1319 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1320 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1321
1322 /* X86_TUNE_ADD_ESP_8 */
1323 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1324 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1325
1326 /* X86_TUNE_SUB_ESP_4 */
1327 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1328
1329 /* X86_TUNE_SUB_ESP_8 */
1330 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1331 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1332
1333 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1334 for DFmode copies */
1335 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1336 | m_GENERIC | m_GEODE),
1337
1338 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1339 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1340
1341 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1342 conflict here in between PPro/Pentium4 based chips that thread 128bit
1343 SSE registers as single units versus K8 based chips that divide SSE
1344 registers to two 64bit halves. This knob promotes all store destinations
1345 to be 128bit to allow register renaming on 128bit SSE units, but usually
1346 results in one extra microop on 64bit SSE units. Experimental results
1347 shows that disabling this option on P4 brings over 20% SPECfp regression,
1348 while enabling it on K8 brings roughly 2.4% regression that can be partly
1349 masked by careful scheduling of moves. */
1350 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1351
1352 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1353 m_AMDFAM10,
1354
1355 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1356 are resolved on SSE register parts instead of whole registers, so we may
1357 maintain just lower part of scalar values in proper format leaving the
1358 upper part undefined. */
1359 m_ATHLON_K8,
1360
1361 /* X86_TUNE_SSE_TYPELESS_STORES */
1362 m_AMD_MULTIPLE,
1363
1364 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1365 m_PPRO | m_PENT4 | m_NOCONA,
1366
1367 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1368 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1369
1370 /* X86_TUNE_PROLOGUE_USING_MOVE */
1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1372
1373 /* X86_TUNE_EPILOGUE_USING_MOVE */
1374 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1375
1376 /* X86_TUNE_SHIFT1 */
1377 ~m_486,
1378
1379 /* X86_TUNE_USE_FFREEP */
1380 m_AMD_MULTIPLE,
1381
1382 /* X86_TUNE_INTER_UNIT_MOVES */
1383 ~(m_AMD_MULTIPLE | m_GENERIC),
1384
1385 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1386 ~(m_AMDFAM10),
1387
1388 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1389 than 4 branch instructions in the 16 byte window. */
1390 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1391
1392 /* X86_TUNE_SCHEDULE */
1393 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1394
1395 /* X86_TUNE_USE_BT */
1396 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1397
1398 /* X86_TUNE_USE_INCDEC */
1399 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1400
1401 /* X86_TUNE_PAD_RETURNS */
1402 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1403
1404 /* X86_TUNE_EXT_80387_CONSTANTS */
1405 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1406
1407 /* X86_TUNE_SHORTEN_X87_SSE */
1408 ~m_K8,
1409
1410 /* X86_TUNE_AVOID_VECTOR_DECODE */
1411 m_K8 | m_GENERIC64,
1412
1413 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1414 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1415 ~(m_386 | m_486),
1416
1417 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1418 vector path on AMD machines. */
1419 m_K8 | m_GENERIC64 | m_AMDFAM10,
1420
1421 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1422 machines. */
1423 m_K8 | m_GENERIC64 | m_AMDFAM10,
1424
1425 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1426 than a MOV. */
1427 m_PENT,
1428
1429 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1430 but one byte longer. */
1431 m_PENT,
1432
1433 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1434 operand that cannot be represented using a modRM byte. The XOR
1435 replacement is long decoded, so this split helps here as well. */
1436 m_K6,
1437
1438 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1439 from FP to FP. */
1440 m_AMDFAM10 | m_GENERIC,
1441
1442 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1443 from integer to FP. */
1444 m_AMDFAM10,
1445
1446 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1447 with a subsequent conditional jump instruction into a single
1448 compare-and-branch uop. */
1449 m_CORE2,
1450 };
1451
1452 /* Feature tests against the various architecture variations. */
1453 unsigned char ix86_arch_features[X86_ARCH_LAST];
1454
1455 /* Feature tests against the various architecture variations, used to create
1456 ix86_arch_features based on the processor mask. */
1457 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1458 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1459 ~(m_386 | m_486 | m_PENT | m_K6),
1460
1461 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1462 ~m_386,
1463
1464 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1465 ~(m_386 | m_486),
1466
1467 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1468 ~m_386,
1469
1470 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1471 ~m_386,
1472 };
1473
1474 static const unsigned int x86_accumulate_outgoing_args
1475 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1476
1477 static const unsigned int x86_arch_always_fancy_math_387
1478 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1479 | m_NOCONA | m_CORE2 | m_GENERIC;
1480
1481 static enum stringop_alg stringop_alg = no_stringop;
1482
1483 /* In case the average insn count for single function invocation is
1484 lower than this constant, emit fast (but longer) prologue and
1485 epilogue code. */
1486 #define FAST_PROLOGUE_INSN_COUNT 20
1487
1488 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1489 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1490 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1491 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1492
1493 /* Array of the smallest class containing reg number REGNO, indexed by
1494 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1495
1496 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1497 {
1498 /* ax, dx, cx, bx */
1499 AREG, DREG, CREG, BREG,
1500 /* si, di, bp, sp */
1501 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1502 /* FP registers */
1503 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1504 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1505 /* arg pointer */
1506 NON_Q_REGS,
1507 /* flags, fpsr, fpcr, frame */
1508 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1509 /* SSE registers */
1510 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1511 SSE_REGS, SSE_REGS,
1512 /* MMX registers */
1513 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1514 MMX_REGS, MMX_REGS,
1515 /* REX registers */
1516 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1517 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1518 /* SSE REX registers */
1519 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1520 SSE_REGS, SSE_REGS,
1521 };
1522
1523 /* The "default" register map used in 32bit mode. */
1524
1525 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1526 {
1527 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1528 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1529 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1530 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1531 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1532 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1533 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1534 };
1535
1536 /* The "default" register map used in 64bit mode. */
1537
1538 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1539 {
1540 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1541 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1542 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1543 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1544 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1545 8,9,10,11,12,13,14,15, /* extended integer registers */
1546 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1547 };
1548
1549 /* Define the register numbers to be used in Dwarf debugging information.
1550 The SVR4 reference port C compiler uses the following register numbers
1551 in its Dwarf output code:
1552 0 for %eax (gcc regno = 0)
1553 1 for %ecx (gcc regno = 2)
1554 2 for %edx (gcc regno = 1)
1555 3 for %ebx (gcc regno = 3)
1556 4 for %esp (gcc regno = 7)
1557 5 for %ebp (gcc regno = 6)
1558 6 for %esi (gcc regno = 4)
1559 7 for %edi (gcc regno = 5)
1560 The following three DWARF register numbers are never generated by
1561 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1562 believes these numbers have these meanings.
1563 8 for %eip (no gcc equivalent)
1564 9 for %eflags (gcc regno = 17)
1565 10 for %trapno (no gcc equivalent)
1566 It is not at all clear how we should number the FP stack registers
1567 for the x86 architecture. If the version of SDB on x86/svr4 were
1568 a bit less brain dead with respect to floating-point then we would
1569 have a precedent to follow with respect to DWARF register numbers
1570 for x86 FP registers, but the SDB on x86/svr4 is so completely
1571 broken with respect to FP registers that it is hardly worth thinking
1572 of it as something to strive for compatibility with.
1573 The version of x86/svr4 SDB I have at the moment does (partially)
1574 seem to believe that DWARF register number 11 is associated with
1575 the x86 register %st(0), but that's about all. Higher DWARF
1576 register numbers don't seem to be associated with anything in
1577 particular, and even for DWARF regno 11, SDB only seems to under-
1578 stand that it should say that a variable lives in %st(0) (when
1579 asked via an `=' command) if we said it was in DWARF regno 11,
1580 but SDB still prints garbage when asked for the value of the
1581 variable in question (via a `/' command).
1582 (Also note that the labels SDB prints for various FP stack regs
1583 when doing an `x' command are all wrong.)
1584 Note that these problems generally don't affect the native SVR4
1585 C compiler because it doesn't allow the use of -O with -g and
1586 because when it is *not* optimizing, it allocates a memory
1587 location for each floating-point variable, and the memory
1588 location is what gets described in the DWARF AT_location
1589 attribute for the variable in question.
1590 Regardless of the severe mental illness of the x86/svr4 SDB, we
1591 do something sensible here and we use the following DWARF
1592 register numbers. Note that these are all stack-top-relative
1593 numbers.
1594 11 for %st(0) (gcc regno = 8)
1595 12 for %st(1) (gcc regno = 9)
1596 13 for %st(2) (gcc regno = 10)
1597 14 for %st(3) (gcc regno = 11)
1598 15 for %st(4) (gcc regno = 12)
1599 16 for %st(5) (gcc regno = 13)
1600 17 for %st(6) (gcc regno = 14)
1601 18 for %st(7) (gcc regno = 15)
1602 */
1603 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1604 {
1605 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1606 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1607 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1608 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1609 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1610 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1611 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1612 };
1613
1614 /* Test and compare insns in i386.md store the information needed to
1615 generate branch and scc insns here. */
1616
1617 rtx ix86_compare_op0 = NULL_RTX;
1618 rtx ix86_compare_op1 = NULL_RTX;
1619 rtx ix86_compare_emitted = NULL_RTX;
1620
1621 /* Define parameter passing and return registers. */
1622
1623 static int const x86_64_int_parameter_registers[6] =
1624 {
1625 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1626 };
1627
1628 static int const x86_64_ms_abi_int_parameter_registers[4] =
1629 {
1630 CX_REG, DX_REG, R8_REG, R9_REG
1631 };
1632
1633 static int const x86_64_int_return_registers[4] =
1634 {
1635 AX_REG, DX_REG, DI_REG, SI_REG
1636 };
1637
1638 /* Define the structure for the machine field in struct function. */
1639
1640 struct stack_local_entry GTY(())
1641 {
1642 unsigned short mode;
1643 unsigned short n;
1644 rtx rtl;
1645 struct stack_local_entry *next;
1646 };
1647
1648 /* Structure describing stack frame layout.
1649 Stack grows downward:
1650
1651 [arguments]
1652 <- ARG_POINTER
1653 saved pc
1654
1655 saved frame pointer if frame_pointer_needed
1656 <- HARD_FRAME_POINTER
1657 [-msave-args]
1658
1659 [padding0]
1660
1661 [saved regs]
1662
1663 [padding05]
1664
1665 [saved SSE regs]
1666
1667 [padding1] \
1668 )
1669 [va_arg registers] (
1670 > to_allocate <- FRAME_POINTER
1671 [frame] (
1672 )
1673 [padding2] /
1674 */
1675 struct ix86_frame
1676 {
1677 int nmsave_args;
1678 int padding0;
1679 int nsseregs;
1680 int padding05;
1681 int nregs;
1682 int padding1;
1683 int va_arg_size;
1684 HOST_WIDE_INT frame;
1685 int padding2;
1686 int outgoing_arguments_size;
1687 int red_zone_size;
1688
1689 HOST_WIDE_INT to_allocate;
1690 /* The offsets relative to ARG_POINTER. */
1691 HOST_WIDE_INT frame_pointer_offset;
1692 HOST_WIDE_INT hard_frame_pointer_offset;
1693 HOST_WIDE_INT stack_pointer_offset;
1694
1695 /* When save_regs_using_mov is set, emit prologue using
1696 move instead of push instructions. */
1697 bool save_regs_using_mov;
1698 };
1699
1700 /* Code model option. */
1701 enum cmodel ix86_cmodel;
1702 /* Asm dialect. */
1703 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1704 /* TLS dialects. */
1705 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1706
1707 /* Which unit we are generating floating point math for. */
1708 enum fpmath_unit ix86_fpmath;
1709
1710 /* Which cpu are we scheduling for. */
1711 enum attr_cpu ix86_schedule;
1712
1713 /* Which cpu are we optimizing for. */
1714 enum processor_type ix86_tune;
1715
1716 /* Which instruction set architecture to use. */
1717 enum processor_type ix86_arch;
1718
1719 /* true if sse prefetch instruction is not NOOP. */
1720 int x86_prefetch_sse;
1721
1722 /* ix86_regparm_string as a number */
1723 static int ix86_regparm;
1724
1725 /* -mstackrealign option */
1726 extern int ix86_force_align_arg_pointer;
1727 static const char ix86_force_align_arg_pointer_string[]
1728 = "force_align_arg_pointer";
1729
1730 static rtx (*ix86_gen_leave) (void);
1731 static rtx (*ix86_gen_pop1) (rtx);
1732 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1733 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1734 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1735 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1736 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1737 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1738
1739 /* Preferred alignment for stack boundary in bits. */
1740 unsigned int ix86_preferred_stack_boundary;
1741
1742 /* Alignment for incoming stack boundary in bits specified at
1743 command line. */
1744 static unsigned int ix86_user_incoming_stack_boundary;
1745
1746 /* Default alignment for incoming stack boundary in bits. */
1747 static unsigned int ix86_default_incoming_stack_boundary;
1748
1749 /* Alignment for incoming stack boundary in bits. */
1750 unsigned int ix86_incoming_stack_boundary;
1751
1752 /* Values 1-5: see jump.c */
1753 int ix86_branch_cost;
1754
1755 /* Calling abi specific va_list type nodes. */
1756 static GTY(()) tree sysv_va_list_type_node;
1757 static GTY(()) tree ms_va_list_type_node;
1758
1759 /* Variables which are this size or smaller are put in the data/bss
1760 or ldata/lbss sections. */
1761
1762 int ix86_section_threshold = 65536;
1763
1764 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1765 char internal_label_prefix[16];
1766 int internal_label_prefix_len;
1767
1768 /* Fence to use after loop using movnt. */
1769 tree x86_mfence;
1770
1771 static int ix86_nsaved_args (void);
1772
1773 /* Register class used for passing given 64bit part of the argument.
1774 These represent classes as documented by the PS ABI, with the exception
1775 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1776 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1777
1778 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1779 whenever possible (upper half does contain padding). */
1780 enum x86_64_reg_class
1781 {
1782 X86_64_NO_CLASS,
1783 X86_64_INTEGER_CLASS,
1784 X86_64_INTEGERSI_CLASS,
1785 X86_64_SSE_CLASS,
1786 X86_64_SSESF_CLASS,
1787 X86_64_SSEDF_CLASS,
1788 X86_64_SSEUP_CLASS,
1789 X86_64_X87_CLASS,
1790 X86_64_X87UP_CLASS,
1791 X86_64_COMPLEX_X87_CLASS,
1792 X86_64_MEMORY_CLASS
1793 };
1794
1795 #define MAX_CLASSES 4
1796
1797 /* Table of constants used by fldpi, fldln2, etc.... */
1798 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1799 static bool ext_80387_constants_init = 0;
1800
1801
1802 static struct machine_function * ix86_init_machine_status (void);
1803 static rtx ix86_function_value (const_tree, const_tree, bool);
1804 static int ix86_function_regparm (const_tree, const_tree);
1805 static void ix86_compute_frame_layout (struct ix86_frame *);
1806 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1807 rtx, rtx, int);
1808 static void ix86_add_new_builtins (int);
1809
1810 enum ix86_function_specific_strings
1811 {
1812 IX86_FUNCTION_SPECIFIC_ARCH,
1813 IX86_FUNCTION_SPECIFIC_TUNE,
1814 IX86_FUNCTION_SPECIFIC_FPMATH,
1815 IX86_FUNCTION_SPECIFIC_MAX
1816 };
1817
1818 static char *ix86_target_string (int, int, const char *, const char *,
1819 const char *, bool);
1820 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1821 static void ix86_function_specific_save (struct cl_target_option *);
1822 static void ix86_function_specific_restore (struct cl_target_option *);
1823 static void ix86_function_specific_print (FILE *, int,
1824 struct cl_target_option *);
1825 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1826 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1827 static bool ix86_can_inline_p (tree, tree);
1828 static void ix86_set_current_function (tree);
1829
1830 static void pro_epilogue_adjust_stack (rtx, rtx, rtx, int);
1831
1832
1833 /* The svr4 ABI for the i386 says that records and unions are returned
1834 in memory. */
1835 #ifndef DEFAULT_PCC_STRUCT_RETURN
1836 #define DEFAULT_PCC_STRUCT_RETURN 1
1837 #endif
1838
1839 /* Whether -mtune= or -march= were specified */
1840 static int ix86_tune_defaulted;
1841 static int ix86_arch_specified;
1842
1843 /* Bit flags that specify the ISA we are compiling for. */
1844 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1845
1846 /* A mask of ix86_isa_flags that includes bit X if X
1847 was set or cleared on the command line. */
1848 static int ix86_isa_flags_explicit;
1849
1850 /* Define a set of ISAs which are available when a given ISA is
1851 enabled. MMX and SSE ISAs are handled separately. */
1852
1853 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1854 #define OPTION_MASK_ISA_3DNOW_SET \
1855 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1856
1857 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1858 #define OPTION_MASK_ISA_SSE2_SET \
1859 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1860 #define OPTION_MASK_ISA_SSE3_SET \
1861 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1862 #define OPTION_MASK_ISA_SSSE3_SET \
1863 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1864 #define OPTION_MASK_ISA_SSE4_1_SET \
1865 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1866 #define OPTION_MASK_ISA_SSE4_2_SET \
1867 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1868 #define OPTION_MASK_ISA_AVX_SET \
1869 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1870 #define OPTION_MASK_ISA_FMA_SET \
1871 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1872
1873 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1874 as -msse4.2. */
1875 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1876
1877 #define OPTION_MASK_ISA_SSE4A_SET \
1878 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1879 #define OPTION_MASK_ISA_SSE5_SET \
1880 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1881
1882 /* AES and PCLMUL need SSE2 because they use xmm registers */
1883 #define OPTION_MASK_ISA_AES_SET \
1884 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1885 #define OPTION_MASK_ISA_PCLMUL_SET \
1886 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1887
1888 #define OPTION_MASK_ISA_ABM_SET \
1889 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1890 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1891 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1892 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1893
1894 /* Define a set of ISAs which aren't available when a given ISA is
1895 disabled. MMX and SSE ISAs are handled separately. */
1896
1897 #define OPTION_MASK_ISA_MMX_UNSET \
1898 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1899 #define OPTION_MASK_ISA_3DNOW_UNSET \
1900 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1901 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1902
1903 #define OPTION_MASK_ISA_SSE_UNSET \
1904 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1905 #define OPTION_MASK_ISA_SSE2_UNSET \
1906 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1907 #define OPTION_MASK_ISA_SSE3_UNSET \
1908 (OPTION_MASK_ISA_SSE3 \
1909 | OPTION_MASK_ISA_SSSE3_UNSET \
1910 | OPTION_MASK_ISA_SSE4A_UNSET )
1911 #define OPTION_MASK_ISA_SSSE3_UNSET \
1912 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1913 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1914 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1915 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1916 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1917 #define OPTION_MASK_ISA_AVX_UNSET \
1918 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1919 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
1920
1921 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1922 as -mno-sse4.1. */
1923 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1924
1925 #define OPTION_MASK_ISA_SSE4A_UNSET \
1926 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1927 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1928 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
1929 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
1930 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
1931 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
1932 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
1933 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
1934
1935 /* Vectorization library interface and handlers. */
1936 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1937 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1938 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1939
1940 /* Processor target table, indexed by processor number */
1941 struct ptt
1942 {
1943 const struct processor_costs *cost; /* Processor costs */
1944 const int align_loop; /* Default alignments. */
1945 const int align_loop_max_skip;
1946 const int align_jump;
1947 const int align_jump_max_skip;
1948 const int align_func;
1949 };
1950
1951 static const struct ptt processor_target_table[PROCESSOR_max] =
1952 {
1953 {&i386_cost, 4, 3, 4, 3, 4},
1954 {&i486_cost, 16, 15, 16, 15, 16},
1955 {&pentium_cost, 16, 7, 16, 7, 16},
1956 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1957 {&geode_cost, 0, 0, 0, 0, 0},
1958 {&k6_cost, 32, 7, 32, 7, 32},
1959 {&athlon_cost, 16, 7, 16, 7, 16},
1960 {&pentium4_cost, 0, 0, 0, 0, 0},
1961 {&k8_cost, 16, 7, 16, 7, 16},
1962 {&nocona_cost, 0, 0, 0, 0, 0},
1963 {&core2_cost, 16, 10, 16, 10, 16},
1964 {&generic32_cost, 16, 7, 16, 7, 16},
1965 {&generic64_cost, 16, 10, 16, 10, 16},
1966 {&amdfam10_cost, 32, 24, 32, 7, 32}
1967 };
1968
1969 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1970 {
1971 "generic",
1972 "i386",
1973 "i486",
1974 "pentium",
1975 "pentium-mmx",
1976 "pentiumpro",
1977 "pentium2",
1978 "pentium3",
1979 "pentium4",
1980 "pentium-m",
1981 "prescott",
1982 "nocona",
1983 "core2",
1984 "geode",
1985 "k6",
1986 "k6-2",
1987 "k6-3",
1988 "athlon",
1989 "athlon-4",
1990 "k8",
1991 "amdfam10"
1992 };
1993
1994 /* Implement TARGET_HANDLE_OPTION. */
1995
1996 static bool
1997 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1998 {
1999 switch (code)
2000 {
2001 case OPT_mmmx:
2002 if (value)
2003 {
2004 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2005 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2006 }
2007 else
2008 {
2009 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2010 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2011 }
2012 return true;
2013
2014 case OPT_m3dnow:
2015 if (value)
2016 {
2017 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2018 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2019 }
2020 else
2021 {
2022 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2023 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2024 }
2025 return true;
2026
2027 case OPT_m3dnowa:
2028 return false;
2029
2030 case OPT_msse:
2031 if (value)
2032 {
2033 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2034 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2035 }
2036 else
2037 {
2038 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2039 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2040 }
2041 return true;
2042
2043 case OPT_msse2:
2044 if (value)
2045 {
2046 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2047 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2048 }
2049 else
2050 {
2051 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2052 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2053 }
2054 return true;
2055
2056 case OPT_msse3:
2057 if (value)
2058 {
2059 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2060 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2061 }
2062 else
2063 {
2064 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2065 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2066 }
2067 return true;
2068
2069 case OPT_mssse3:
2070 if (value)
2071 {
2072 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2073 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2074 }
2075 else
2076 {
2077 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2078 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2079 }
2080 return true;
2081
2082 case OPT_msse4_1:
2083 if (value)
2084 {
2085 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2086 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2087 }
2088 else
2089 {
2090 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2091 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2092 }
2093 return true;
2094
2095 case OPT_msse4_2:
2096 if (value)
2097 {
2098 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2099 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2100 }
2101 else
2102 {
2103 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2104 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2105 }
2106 return true;
2107
2108 case OPT_mavx:
2109 if (value)
2110 {
2111 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2112 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2113 }
2114 else
2115 {
2116 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2117 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2118 }
2119 return true;
2120
2121 case OPT_mfma:
2122 if (value)
2123 {
2124 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2125 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2126 }
2127 else
2128 {
2129 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2130 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2131 }
2132 return true;
2133
2134 case OPT_msse4:
2135 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2136 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2137 return true;
2138
2139 case OPT_mno_sse4:
2140 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2141 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2142 return true;
2143
2144 case OPT_msse4a:
2145 if (value)
2146 {
2147 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2148 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2149 }
2150 else
2151 {
2152 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2153 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2154 }
2155 return true;
2156
2157 case OPT_msse5:
2158 if (value)
2159 {
2160 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2161 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2162 }
2163 else
2164 {
2165 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2166 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2167 }
2168 return true;
2169
2170 case OPT_mabm:
2171 if (value)
2172 {
2173 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2174 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2175 }
2176 else
2177 {
2178 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2179 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2180 }
2181 return true;
2182
2183 case OPT_mpopcnt:
2184 if (value)
2185 {
2186 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2187 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2188 }
2189 else
2190 {
2191 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2192 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2193 }
2194 return true;
2195
2196 case OPT_msahf:
2197 if (value)
2198 {
2199 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2200 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2201 }
2202 else
2203 {
2204 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2205 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2206 }
2207 return true;
2208
2209 case OPT_mcx16:
2210 if (value)
2211 {
2212 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2213 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2214 }
2215 else
2216 {
2217 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2218 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2219 }
2220 return true;
2221
2222 case OPT_maes:
2223 if (value)
2224 {
2225 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2226 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2227 }
2228 else
2229 {
2230 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2231 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2232 }
2233 return true;
2234
2235 case OPT_mpclmul:
2236 if (value)
2237 {
2238 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2239 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2240 }
2241 else
2242 {
2243 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2244 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2245 }
2246 return true;
2247
2248 default:
2249 return true;
2250 }
2251 }
2252
2253 /* Return a string the documents the current -m options. The caller is
2254 responsible for freeing the string. */
2255
2256 static char *
2257 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2258 const char *fpmath, bool add_nl_p)
2259 {
2260 struct ix86_target_opts
2261 {
2262 const char *option; /* option string */
2263 int mask; /* isa mask options */
2264 };
2265
2266 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2267 preceding options while match those first. */
2268 static struct ix86_target_opts isa_opts[] =
2269 {
2270 { "-m64", OPTION_MASK_ISA_64BIT },
2271 { "-msse5", OPTION_MASK_ISA_SSE5 },
2272 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2273 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2274 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2275 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2276 { "-msse3", OPTION_MASK_ISA_SSE3 },
2277 { "-msse2", OPTION_MASK_ISA_SSE2 },
2278 { "-msse", OPTION_MASK_ISA_SSE },
2279 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2280 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2281 { "-mmmx", OPTION_MASK_ISA_MMX },
2282 { "-mabm", OPTION_MASK_ISA_ABM },
2283 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2284 { "-maes", OPTION_MASK_ISA_AES },
2285 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2286 };
2287
2288 /* Flag options. */
2289 static struct ix86_target_opts flag_opts[] =
2290 {
2291 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2292 { "-m80387", MASK_80387 },
2293 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2294 { "-malign-double", MASK_ALIGN_DOUBLE },
2295 { "-mcld", MASK_CLD },
2296 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2297 { "-mieee-fp", MASK_IEEE_FP },
2298 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2299 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2300 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2301 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2302 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2303 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2304 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2305 { "-mno-red-zone", MASK_NO_RED_ZONE },
2306 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2307 { "-mrecip", MASK_RECIP },
2308 { "-mrtd", MASK_RTD },
2309 { "-msseregparm", MASK_SSEREGPARM },
2310 { "-mstack-arg-probe", MASK_STACK_PROBE },
2311 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2312 };
2313
2314 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2315
2316 char isa_other[40];
2317 char target_other[40];
2318 unsigned num = 0;
2319 unsigned i, j;
2320 char *ret;
2321 char *ptr;
2322 size_t len;
2323 size_t line_len;
2324 size_t sep_len;
2325
2326 memset (opts, '\0', sizeof (opts));
2327
2328 /* Add -march= option. */
2329 if (arch)
2330 {
2331 opts[num][0] = "-march=";
2332 opts[num++][1] = arch;
2333 }
2334
2335 /* Add -mtune= option. */
2336 if (tune)
2337 {
2338 opts[num][0] = "-mtune=";
2339 opts[num++][1] = tune;
2340 }
2341
2342 /* Pick out the options in isa options. */
2343 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2344 {
2345 if ((isa & isa_opts[i].mask) != 0)
2346 {
2347 opts[num++][0] = isa_opts[i].option;
2348 isa &= ~ isa_opts[i].mask;
2349 }
2350 }
2351
2352 if (isa && add_nl_p)
2353 {
2354 opts[num++][0] = isa_other;
2355 sprintf (isa_other, "(other isa: 0x%x)", isa);
2356 }
2357
2358 /* Add flag options. */
2359 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2360 {
2361 if ((flags & flag_opts[i].mask) != 0)
2362 {
2363 opts[num++][0] = flag_opts[i].option;
2364 flags &= ~ flag_opts[i].mask;
2365 }
2366 }
2367
2368 if (flags && add_nl_p)
2369 {
2370 opts[num++][0] = target_other;
2371 sprintf (target_other, "(other flags: 0x%x)", isa);
2372 }
2373
2374 /* Add -fpmath= option. */
2375 if (fpmath)
2376 {
2377 opts[num][0] = "-mfpmath=";
2378 opts[num++][1] = fpmath;
2379 }
2380
2381 /* Any options? */
2382 if (num == 0)
2383 return NULL;
2384
2385 gcc_assert (num < ARRAY_SIZE (opts));
2386
2387 /* Size the string. */
2388 len = 0;
2389 sep_len = (add_nl_p) ? 3 : 1;
2390 for (i = 0; i < num; i++)
2391 {
2392 len += sep_len;
2393 for (j = 0; j < 2; j++)
2394 if (opts[i][j])
2395 len += strlen (opts[i][j]);
2396 }
2397
2398 /* Build the string. */
2399 ret = ptr = (char *) xmalloc (len);
2400 line_len = 0;
2401
2402 for (i = 0; i < num; i++)
2403 {
2404 size_t len2[2];
2405
2406 for (j = 0; j < 2; j++)
2407 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2408
2409 if (i != 0)
2410 {
2411 *ptr++ = ' ';
2412 line_len++;
2413
2414 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2415 {
2416 *ptr++ = '\\';
2417 *ptr++ = '\n';
2418 line_len = 0;
2419 }
2420 }
2421
2422 for (j = 0; j < 2; j++)
2423 if (opts[i][j])
2424 {
2425 memcpy (ptr, opts[i][j], len2[j]);
2426 ptr += len2[j];
2427 line_len += len2[j];
2428 }
2429 }
2430
2431 *ptr = '\0';
2432 gcc_assert (ret + len >= ptr);
2433
2434 return ret;
2435 }
2436
2437 /* Function that is callable from the debugger to print the current
2438 options. */
2439 void
2440 ix86_debug_options (void)
2441 {
2442 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2443 ix86_arch_string, ix86_tune_string,
2444 ix86_fpmath_string, true);
2445
2446 if (opts)
2447 {
2448 fprintf (stderr, "%s\n\n", opts);
2449 free (opts);
2450 }
2451 else
2452 fprintf (stderr, "<no options>\n\n");
2453
2454 return;
2455 }
2456
2457 /* Sometimes certain combinations of command options do not make
2458 sense on a particular target machine. You can define a macro
2459 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2460 defined, is executed once just after all the command options have
2461 been parsed.
2462
2463 Don't use this macro to turn on various extra optimizations for
2464 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2465
2466 void
2467 override_options (bool main_args_p)
2468 {
2469 int i;
2470 unsigned int ix86_arch_mask, ix86_tune_mask;
2471 const char *prefix;
2472 const char *suffix;
2473 const char *sw;
2474
2475 /* Comes from final.c -- no real reason to change it. */
2476 #define MAX_CODE_ALIGN 16
2477
2478 enum pta_flags
2479 {
2480 PTA_SSE = 1 << 0,
2481 PTA_SSE2 = 1 << 1,
2482 PTA_SSE3 = 1 << 2,
2483 PTA_MMX = 1 << 3,
2484 PTA_PREFETCH_SSE = 1 << 4,
2485 PTA_3DNOW = 1 << 5,
2486 PTA_3DNOW_A = 1 << 6,
2487 PTA_64BIT = 1 << 7,
2488 PTA_SSSE3 = 1 << 8,
2489 PTA_CX16 = 1 << 9,
2490 PTA_POPCNT = 1 << 10,
2491 PTA_ABM = 1 << 11,
2492 PTA_SSE4A = 1 << 12,
2493 PTA_NO_SAHF = 1 << 13,
2494 PTA_SSE4_1 = 1 << 14,
2495 PTA_SSE4_2 = 1 << 15,
2496 PTA_SSE5 = 1 << 16,
2497 PTA_AES = 1 << 17,
2498 PTA_PCLMUL = 1 << 18,
2499 PTA_AVX = 1 << 19,
2500 PTA_FMA = 1 << 20
2501 };
2502
2503 static struct pta
2504 {
2505 const char *const name; /* processor name or nickname. */
2506 const enum processor_type processor;
2507 const enum attr_cpu schedule;
2508 const unsigned /*enum pta_flags*/ flags;
2509 }
2510 const processor_alias_table[] =
2511 {
2512 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2513 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2514 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2515 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2516 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2517 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2518 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2519 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2520 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2521 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2522 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2523 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2524 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2525 PTA_MMX | PTA_SSE},
2526 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2527 PTA_MMX | PTA_SSE},
2528 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2529 PTA_MMX | PTA_SSE | PTA_SSE2},
2530 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2531 PTA_MMX |PTA_SSE | PTA_SSE2},
2532 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2533 PTA_MMX | PTA_SSE | PTA_SSE2},
2534 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2535 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2536 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2537 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2538 | PTA_CX16 | PTA_NO_SAHF},
2539 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2540 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2541 | PTA_SSSE3 | PTA_CX16},
2542 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2543 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2544 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2545 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2546 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2547 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2548 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2549 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2550 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2551 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2552 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2553 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2554 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2555 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2556 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2557 {"x86-64", PROCESSOR_K8, CPU_K8,
2558 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2559 {"k8", PROCESSOR_K8, CPU_K8,
2560 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2561 | PTA_SSE2 | PTA_NO_SAHF},
2562 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2563 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2564 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2565 {"opteron", PROCESSOR_K8, CPU_K8,
2566 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2567 | PTA_SSE2 | PTA_NO_SAHF},
2568 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2569 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2570 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2571 {"athlon64", PROCESSOR_K8, CPU_K8,
2572 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2573 | PTA_SSE2 | PTA_NO_SAHF},
2574 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2575 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2576 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2577 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2578 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2579 | PTA_SSE2 | PTA_NO_SAHF},
2580 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2581 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2582 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2583 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2584 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2585 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2586 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2587 0 /* flags are only used for -march switch. */ },
2588 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2589 PTA_64BIT /* flags are only used for -march switch. */ },
2590 };
2591
2592 int const pta_size = ARRAY_SIZE (processor_alias_table);
2593
2594 /* Set up prefix/suffix so the error messages refer to either the command
2595 line argument, or the attribute(target). */
2596 if (main_args_p)
2597 {
2598 prefix = "-m";
2599 suffix = "";
2600 sw = "switch";
2601 }
2602 else
2603 {
2604 prefix = "option(\"";
2605 suffix = "\")";
2606 sw = "attribute";
2607 }
2608
2609 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2610 SUBTARGET_OVERRIDE_OPTIONS;
2611 #endif
2612
2613 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2614 SUBSUBTARGET_OVERRIDE_OPTIONS;
2615 #endif
2616
2617 /* -fPIC is the default for x86_64. */
2618 if (TARGET_MACHO && TARGET_64BIT)
2619 flag_pic = 2;
2620
2621 /* Set the default values for switches whose default depends on TARGET_64BIT
2622 in case they weren't overwritten by command line options. */
2623 if (TARGET_64BIT)
2624 {
2625 /* Mach-O doesn't support omitting the frame pointer for now. */
2626 if (flag_omit_frame_pointer == 2)
2627 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2628 if (flag_asynchronous_unwind_tables == 2)
2629 flag_asynchronous_unwind_tables = 1;
2630 if (flag_pcc_struct_return == 2)
2631 flag_pcc_struct_return = 0;
2632 }
2633 else
2634 {
2635 if (flag_omit_frame_pointer == 2)
2636 flag_omit_frame_pointer = 0;
2637 if (flag_asynchronous_unwind_tables == 2)
2638 flag_asynchronous_unwind_tables = 0;
2639 if (flag_pcc_struct_return == 2)
2640 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2641 }
2642
2643 /* Need to check -mtune=generic first. */
2644 if (ix86_tune_string)
2645 {
2646 if (!strcmp (ix86_tune_string, "generic")
2647 || !strcmp (ix86_tune_string, "i686")
2648 /* As special support for cross compilers we read -mtune=native
2649 as -mtune=generic. With native compilers we won't see the
2650 -mtune=native, as it was changed by the driver. */
2651 || !strcmp (ix86_tune_string, "native"))
2652 {
2653 if (TARGET_64BIT)
2654 ix86_tune_string = "generic64";
2655 else
2656 ix86_tune_string = "generic32";
2657 }
2658 /* If this call is for setting the option attribute, allow the
2659 generic32/generic64 that was previously set. */
2660 else if (!main_args_p
2661 && (!strcmp (ix86_tune_string, "generic32")
2662 || !strcmp (ix86_tune_string, "generic64")))
2663 ;
2664 else if (!strncmp (ix86_tune_string, "generic", 7))
2665 error ("bad value (%s) for %stune=%s %s",
2666 ix86_tune_string, prefix, suffix, sw);
2667 }
2668 else
2669 {
2670 if (ix86_arch_string)
2671 ix86_tune_string = ix86_arch_string;
2672 if (!ix86_tune_string)
2673 {
2674 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2675 ix86_tune_defaulted = 1;
2676 }
2677
2678 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2679 need to use a sensible tune option. */
2680 if (!strcmp (ix86_tune_string, "generic")
2681 || !strcmp (ix86_tune_string, "x86-64")
2682 || !strcmp (ix86_tune_string, "i686"))
2683 {
2684 if (TARGET_64BIT)
2685 ix86_tune_string = "generic64";
2686 else
2687 ix86_tune_string = "generic32";
2688 }
2689 }
2690 if (ix86_stringop_string)
2691 {
2692 if (!strcmp (ix86_stringop_string, "rep_byte"))
2693 stringop_alg = rep_prefix_1_byte;
2694 else if (!strcmp (ix86_stringop_string, "libcall"))
2695 stringop_alg = libcall;
2696 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2697 stringop_alg = rep_prefix_4_byte;
2698 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2699 && TARGET_64BIT)
2700 /* rep; movq isn't available in 32-bit code. */
2701 stringop_alg = rep_prefix_8_byte;
2702 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2703 stringop_alg = loop_1_byte;
2704 else if (!strcmp (ix86_stringop_string, "loop"))
2705 stringop_alg = loop;
2706 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2707 stringop_alg = unrolled_loop;
2708 else
2709 error ("bad value (%s) for %sstringop-strategy=%s %s",
2710 ix86_stringop_string, prefix, suffix, sw);
2711 }
2712 if (!strcmp (ix86_tune_string, "x86-64"))
2713 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2714 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2715 prefix, suffix, prefix, suffix, prefix, suffix);
2716
2717 if (!ix86_arch_string)
2718 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2719 else
2720 ix86_arch_specified = 1;
2721
2722 if (!strcmp (ix86_arch_string, "generic"))
2723 error ("generic CPU can be used only for %stune=%s %s",
2724 prefix, suffix, sw);
2725 if (!strncmp (ix86_arch_string, "generic", 7))
2726 error ("bad value (%s) for %sarch=%s %s",
2727 ix86_arch_string, prefix, suffix, sw);
2728
2729 if (ix86_cmodel_string != 0)
2730 {
2731 if (!strcmp (ix86_cmodel_string, "small"))
2732 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2733 else if (!strcmp (ix86_cmodel_string, "medium"))
2734 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2735 else if (!strcmp (ix86_cmodel_string, "large"))
2736 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2737 else if (flag_pic)
2738 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2739 else if (!strcmp (ix86_cmodel_string, "32"))
2740 ix86_cmodel = CM_32;
2741 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2742 ix86_cmodel = CM_KERNEL;
2743 else
2744 error ("bad value (%s) for %scmodel=%s %s",
2745 ix86_cmodel_string, prefix, suffix, sw);
2746 }
2747 else
2748 {
2749 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2750 use of rip-relative addressing. This eliminates fixups that
2751 would otherwise be needed if this object is to be placed in a
2752 DLL, and is essentially just as efficient as direct addressing. */
2753 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2754 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2755 else if (TARGET_64BIT)
2756 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2757 else
2758 ix86_cmodel = CM_32;
2759 }
2760 if (ix86_asm_string != 0)
2761 {
2762 if (! TARGET_MACHO
2763 && !strcmp (ix86_asm_string, "intel"))
2764 ix86_asm_dialect = ASM_INTEL;
2765 else if (!strcmp (ix86_asm_string, "att"))
2766 ix86_asm_dialect = ASM_ATT;
2767 else
2768 error ("bad value (%s) for %sasm=%s %s",
2769 ix86_asm_string, prefix, suffix, sw);
2770 }
2771 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2772 error ("code model %qs not supported in the %s bit mode",
2773 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2774 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2775 sorry ("%i-bit mode not compiled in",
2776 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2777
2778 for (i = 0; i < pta_size; i++)
2779 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2780 {
2781 ix86_schedule = processor_alias_table[i].schedule;
2782 ix86_arch = processor_alias_table[i].processor;
2783 /* Default cpu tuning to the architecture. */
2784 ix86_tune = ix86_arch;
2785
2786 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2787 error ("CPU you selected does not support x86-64 "
2788 "instruction set");
2789
2790 if (processor_alias_table[i].flags & PTA_MMX
2791 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2792 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2793 if (processor_alias_table[i].flags & PTA_3DNOW
2794 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2795 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2796 if (processor_alias_table[i].flags & PTA_3DNOW_A
2797 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2798 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2799 if (processor_alias_table[i].flags & PTA_SSE
2800 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2801 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2802 if (processor_alias_table[i].flags & PTA_SSE2
2803 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2804 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2805 if (processor_alias_table[i].flags & PTA_SSE3
2806 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2807 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2808 if (processor_alias_table[i].flags & PTA_SSSE3
2809 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2810 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2811 if (processor_alias_table[i].flags & PTA_SSE4_1
2812 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2813 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2814 if (processor_alias_table[i].flags & PTA_SSE4_2
2815 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2816 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2817 if (processor_alias_table[i].flags & PTA_AVX
2818 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2819 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2820 if (processor_alias_table[i].flags & PTA_FMA
2821 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2822 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2823 if (processor_alias_table[i].flags & PTA_SSE4A
2824 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2825 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2826 if (processor_alias_table[i].flags & PTA_SSE5
2827 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2828 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2829 if (processor_alias_table[i].flags & PTA_ABM
2830 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2831 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2832 if (processor_alias_table[i].flags & PTA_CX16
2833 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2834 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2835 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2836 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2837 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2838 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2839 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2840 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2841 if (processor_alias_table[i].flags & PTA_AES
2842 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2843 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2844 if (processor_alias_table[i].flags & PTA_PCLMUL
2845 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2846 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2847 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2848 x86_prefetch_sse = true;
2849
2850 break;
2851 }
2852
2853 if (i == pta_size)
2854 error ("bad value (%s) for %sarch=%s %s",
2855 ix86_arch_string, prefix, suffix, sw);
2856
2857 ix86_arch_mask = 1u << ix86_arch;
2858 for (i = 0; i < X86_ARCH_LAST; ++i)
2859 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2860
2861 for (i = 0; i < pta_size; i++)
2862 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2863 {
2864 ix86_schedule = processor_alias_table[i].schedule;
2865 ix86_tune = processor_alias_table[i].processor;
2866 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2867 {
2868 if (ix86_tune_defaulted)
2869 {
2870 ix86_tune_string = "x86-64";
2871 for (i = 0; i < pta_size; i++)
2872 if (! strcmp (ix86_tune_string,
2873 processor_alias_table[i].name))
2874 break;
2875 ix86_schedule = processor_alias_table[i].schedule;
2876 ix86_tune = processor_alias_table[i].processor;
2877 }
2878 else
2879 error ("CPU you selected does not support x86-64 "
2880 "instruction set");
2881 }
2882
2883 /* Intel CPUs have always interpreted SSE prefetch instructions as
2884 NOPs; so, we can enable SSE prefetch instructions even when
2885 -mtune (rather than -march) points us to a processor that has them.
2886 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2887 higher processors. */
2888 if (TARGET_CMOVE
2889 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2890 x86_prefetch_sse = true;
2891 break;
2892 }
2893 if (i == pta_size)
2894 error ("bad value (%s) for %stune=%s %s",
2895 ix86_tune_string, prefix, suffix, sw);
2896
2897 ix86_tune_mask = 1u << ix86_tune;
2898 for (i = 0; i < X86_TUNE_LAST; ++i)
2899 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2900
2901 if (optimize_size)
2902 ix86_cost = &ix86_size_cost;
2903 else
2904 ix86_cost = processor_target_table[ix86_tune].cost;
2905
2906 /* Arrange to set up i386_stack_locals for all functions. */
2907 init_machine_status = ix86_init_machine_status;
2908
2909 /* Validate -mregparm= value. */
2910 if (ix86_regparm_string)
2911 {
2912 if (TARGET_64BIT)
2913 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
2914 i = atoi (ix86_regparm_string);
2915 if (i < 0 || i > REGPARM_MAX)
2916 error ("%sregparm=%d%s is not between 0 and %d",
2917 prefix, i, suffix, REGPARM_MAX);
2918 else
2919 ix86_regparm = i;
2920 }
2921 if (TARGET_64BIT)
2922 ix86_regparm = REGPARM_MAX;
2923
2924 /* If the user has provided any of the -malign-* options,
2925 warn and use that value only if -falign-* is not set.
2926 Remove this code in GCC 3.2 or later. */
2927 if (ix86_align_loops_string)
2928 {
2929 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
2930 prefix, suffix, suffix);
2931 if (align_loops == 0)
2932 {
2933 i = atoi (ix86_align_loops_string);
2934 if (i < 0 || i > MAX_CODE_ALIGN)
2935 error ("%salign-loops=%d%s is not between 0 and %d",
2936 prefix, i, suffix, MAX_CODE_ALIGN);
2937 else
2938 align_loops = 1 << i;
2939 }
2940 }
2941
2942 if (ix86_align_jumps_string)
2943 {
2944 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
2945 prefix, suffix, suffix);
2946 if (align_jumps == 0)
2947 {
2948 i = atoi (ix86_align_jumps_string);
2949 if (i < 0 || i > MAX_CODE_ALIGN)
2950 error ("%salign-loops=%d%s is not between 0 and %d",
2951 prefix, i, suffix, MAX_CODE_ALIGN);
2952 else
2953 align_jumps = 1 << i;
2954 }
2955 }
2956
2957 if (ix86_align_funcs_string)
2958 {
2959 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
2960 prefix, suffix, suffix);
2961 if (align_functions == 0)
2962 {
2963 i = atoi (ix86_align_funcs_string);
2964 if (i < 0 || i > MAX_CODE_ALIGN)
2965 error ("%salign-loops=%d%s is not between 0 and %d",
2966 prefix, i, suffix, MAX_CODE_ALIGN);
2967 else
2968 align_functions = 1 << i;
2969 }
2970 }
2971
2972 /* Default align_* from the processor table. */
2973 if (align_loops == 0)
2974 {
2975 align_loops = processor_target_table[ix86_tune].align_loop;
2976 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2977 }
2978 if (align_jumps == 0)
2979 {
2980 align_jumps = processor_target_table[ix86_tune].align_jump;
2981 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2982 }
2983 if (align_functions == 0)
2984 {
2985 align_functions = processor_target_table[ix86_tune].align_func;
2986 }
2987
2988 /* Validate -mbranch-cost= value, or provide default. */
2989 ix86_branch_cost = ix86_cost->branch_cost;
2990 if (ix86_branch_cost_string)
2991 {
2992 i = atoi (ix86_branch_cost_string);
2993 if (i < 0 || i > 5)
2994 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
2995 else
2996 ix86_branch_cost = i;
2997 }
2998 if (ix86_section_threshold_string)
2999 {
3000 i = atoi (ix86_section_threshold_string);
3001 if (i < 0)
3002 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3003 else
3004 ix86_section_threshold = i;
3005 }
3006
3007 if (ix86_tls_dialect_string)
3008 {
3009 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3010 ix86_tls_dialect = TLS_DIALECT_GNU;
3011 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3012 ix86_tls_dialect = TLS_DIALECT_GNU2;
3013 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3014 ix86_tls_dialect = TLS_DIALECT_SUN;
3015 else
3016 error ("bad value (%s) for %stls-dialect=%s %s",
3017 ix86_tls_dialect_string, prefix, suffix, sw);
3018 }
3019
3020 if (ix87_precision_string)
3021 {
3022 i = atoi (ix87_precision_string);
3023 if (i != 32 && i != 64 && i != 80)
3024 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3025 }
3026
3027 if (TARGET_64BIT)
3028 {
3029 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3030
3031 /* Enable by default the SSE and MMX builtins. Do allow the user to
3032 explicitly disable any of these. In particular, disabling SSE and
3033 MMX for kernel code is extremely useful. */
3034 if (!ix86_arch_specified)
3035 ix86_isa_flags
3036 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3037 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3038
3039 if (TARGET_RTD)
3040 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3041 }
3042 else
3043 {
3044 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3045
3046 if (!ix86_arch_specified)
3047 ix86_isa_flags
3048 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3049
3050 /* i386 ABI does not specify red zone. It still makes sense to use it
3051 when programmer takes care to stack from being destroyed. */
3052 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3053 target_flags |= MASK_NO_RED_ZONE;
3054 }
3055
3056 /* Keep nonleaf frame pointers. */
3057 if (flag_omit_frame_pointer)
3058 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3059 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3060 flag_omit_frame_pointer = 1;
3061
3062 /* If we're doing fast math, we don't care about comparison order
3063 wrt NaNs. This lets us use a shorter comparison sequence. */
3064 if (flag_finite_math_only)
3065 target_flags &= ~MASK_IEEE_FP;
3066
3067 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3068 since the insns won't need emulation. */
3069 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3070 target_flags &= ~MASK_NO_FANCY_MATH_387;
3071
3072 /* Likewise, if the target doesn't have a 387, or we've specified
3073 software floating point, don't use 387 inline intrinsics. */
3074 if (!TARGET_80387)
3075 target_flags |= MASK_NO_FANCY_MATH_387;
3076
3077 /* Turn on MMX builtins for -msse. */
3078 if (TARGET_SSE)
3079 {
3080 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3081 x86_prefetch_sse = true;
3082 }
3083
3084 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3085 if (TARGET_SSE4_2 || TARGET_ABM)
3086 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3087
3088 if (!TARGET_64BIT && TARGET_SAVE_ARGS)
3089 error ("-msave-args makes no sense in the 32-bit mode");
3090
3091 /* Validate -mpreferred-stack-boundary= value or default it to
3092 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3093 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3094 if (ix86_preferred_stack_boundary_string)
3095 {
3096 i = atoi (ix86_preferred_stack_boundary_string);
3097 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3098 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3099 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3100 else
3101 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3102 }
3103
3104 /* Set the default value for -mstackrealign. */
3105 if (ix86_force_align_arg_pointer == -1)
3106 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3107
3108 /* Validate -mincoming-stack-boundary= value or default it to
3109 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3110 if (ix86_force_align_arg_pointer)
3111 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3112 else
3113 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3114 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3115 if (ix86_incoming_stack_boundary_string)
3116 {
3117 i = atoi (ix86_incoming_stack_boundary_string);
3118 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3119 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3120 i, TARGET_64BIT ? 4 : 2);
3121 else
3122 {
3123 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3124 ix86_incoming_stack_boundary
3125 = ix86_user_incoming_stack_boundary;
3126 }
3127 }
3128
3129 /* Accept -msseregparm only if at least SSE support is enabled. */
3130 if (TARGET_SSEREGPARM
3131 && ! TARGET_SSE)
3132 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3133
3134 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3135 if (ix86_fpmath_string != 0)
3136 {
3137 if (! strcmp (ix86_fpmath_string, "387"))
3138 ix86_fpmath = FPMATH_387;
3139 else if (! strcmp (ix86_fpmath_string, "sse"))
3140 {
3141 if (!TARGET_SSE)
3142 {
3143 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3144 ix86_fpmath = FPMATH_387;
3145 }
3146 else
3147 ix86_fpmath = FPMATH_SSE;
3148 }
3149 else if (! strcmp (ix86_fpmath_string, "387,sse")
3150 || ! strcmp (ix86_fpmath_string, "387+sse")
3151 || ! strcmp (ix86_fpmath_string, "sse,387")
3152 || ! strcmp (ix86_fpmath_string, "sse+387")
3153 || ! strcmp (ix86_fpmath_string, "both"))
3154 {
3155 if (!TARGET_SSE)
3156 {
3157 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3158 ix86_fpmath = FPMATH_387;
3159 }
3160 else if (!TARGET_80387)
3161 {
3162 warning (0, "387 instruction set disabled, using SSE arithmetics");
3163 ix86_fpmath = FPMATH_SSE;
3164 }
3165 else
3166 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3167 }
3168 else
3169 error ("bad value (%s) for %sfpmath=%s %s",
3170 ix86_fpmath_string, prefix, suffix, sw);
3171 }
3172
3173 /* If the i387 is disabled, then do not return values in it. */
3174 if (!TARGET_80387)
3175 target_flags &= ~MASK_FLOAT_RETURNS;
3176
3177 /* Use external vectorized library in vectorizing intrinsics. */
3178 if (ix86_veclibabi_string)
3179 {
3180 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3181 ix86_veclib_handler = ix86_veclibabi_svml;
3182 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3183 ix86_veclib_handler = ix86_veclibabi_acml;
3184 else
3185 error ("unknown vectorization library ABI type (%s) for "
3186 "%sveclibabi=%s %s", ix86_veclibabi_string,
3187 prefix, suffix, sw);
3188 }
3189
3190 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3191 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3192 && !optimize_size)
3193 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3194
3195 /* ??? Unwind info is not correct around the CFG unless either a frame
3196 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3197 unwind info generation to be aware of the CFG and propagating states
3198 around edges. */
3199 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3200 || flag_exceptions || flag_non_call_exceptions)
3201 && flag_omit_frame_pointer
3202 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3203 {
3204 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3205 warning (0, "unwind tables currently require either a frame pointer "
3206 "or %saccumulate-outgoing-args%s for correctness",
3207 prefix, suffix);
3208 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3209 }
3210
3211 /* If stack probes are required, the space used for large function
3212 arguments on the stack must also be probed, so enable
3213 -maccumulate-outgoing-args so this happens in the prologue. */
3214 if (TARGET_STACK_PROBE
3215 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3216 {
3217 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3218 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3219 "for correctness", prefix, suffix);
3220 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3221 }
3222
3223 /* For sane SSE instruction set generation we need fcomi instruction.
3224 It is safe to enable all CMOVE instructions. */
3225 if (TARGET_SSE)
3226 TARGET_CMOVE = 1;
3227
3228 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3229 {
3230 char *p;
3231 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3232 p = strchr (internal_label_prefix, 'X');
3233 internal_label_prefix_len = p - internal_label_prefix;
3234 *p = '\0';
3235 }
3236
3237 /* When scheduling description is not available, disable scheduler pass
3238 so it won't slow down the compilation and make x87 code slower. */
3239 if (!TARGET_SCHEDULE)
3240 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3241
3242 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3243 set_param_value ("simultaneous-prefetches",
3244 ix86_cost->simultaneous_prefetches);
3245 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3246 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3247 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3248 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3249 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3250 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3251
3252 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3253 can be optimized to ap = __builtin_next_arg (0). */
3254 if (!TARGET_64BIT)
3255 targetm.expand_builtin_va_start = NULL;
3256
3257 if (TARGET_64BIT)
3258 {
3259 ix86_gen_leave = gen_leave_rex64;
3260 ix86_gen_pop1 = gen_popdi1;
3261 ix86_gen_add3 = gen_adddi3;
3262 ix86_gen_sub3 = gen_subdi3;
3263 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3264 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3265 ix86_gen_monitor = gen_sse3_monitor64;
3266 ix86_gen_andsp = gen_anddi3;
3267 }
3268 else
3269 {
3270 ix86_gen_leave = gen_leave;
3271 ix86_gen_pop1 = gen_popsi1;
3272 ix86_gen_add3 = gen_addsi3;
3273 ix86_gen_sub3 = gen_subsi3;
3274 ix86_gen_sub3_carry = gen_subsi3_carry;
3275 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3276 ix86_gen_monitor = gen_sse3_monitor;
3277 ix86_gen_andsp = gen_andsi3;
3278 }
3279
3280 #ifdef USE_IX86_CLD
3281 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3282 if (!TARGET_64BIT)
3283 target_flags |= MASK_CLD & ~target_flags_explicit;
3284 #endif
3285
3286 /* Save the initial options in case the user does function specific options */
3287 if (main_args_p)
3288 target_option_default_node = target_option_current_node
3289 = build_target_option_node ();
3290 }
3291
3292 /* Update register usage after having seen the compiler flags. */
3293
3294 void
3295 ix86_conditional_register_usage (void)
3296 {
3297 int i;
3298 unsigned int j;
3299
3300 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3301 {
3302 if (fixed_regs[i] > 1)
3303 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3304 if (call_used_regs[i] > 1)
3305 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3306 }
3307
3308 /* The PIC register, if it exists, is fixed. */
3309 j = PIC_OFFSET_TABLE_REGNUM;
3310 if (j != INVALID_REGNUM)
3311 fixed_regs[j] = call_used_regs[j] = 1;
3312
3313 /* The MS_ABI changes the set of call-used registers. */
3314 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3315 {
3316 call_used_regs[SI_REG] = 0;
3317 call_used_regs[DI_REG] = 0;
3318 call_used_regs[XMM6_REG] = 0;
3319 call_used_regs[XMM7_REG] = 0;
3320 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3321 call_used_regs[i] = 0;
3322 }
3323
3324 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3325 other call-clobbered regs for 64-bit. */
3326 if (TARGET_64BIT)
3327 {
3328 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3329
3330 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3331 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3332 && call_used_regs[i])
3333 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3334 }
3335
3336 /* If MMX is disabled, squash the registers. */
3337 if (! TARGET_MMX)
3338 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3339 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3340 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3341
3342 /* If SSE is disabled, squash the registers. */
3343 if (! TARGET_SSE)
3344 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3345 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3346 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3347
3348 /* If the FPU is disabled, squash the registers. */
3349 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3350 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3351 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3352 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3353
3354 /* If 32-bit, squash the 64-bit registers. */
3355 if (! TARGET_64BIT)
3356 {
3357 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3358 reg_names[i] = "";
3359 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3360 reg_names[i] = "";
3361 }
3362 }
3363
3364
3365 /* Save the current options */
3366
3367 static void
3368 ix86_function_specific_save (struct cl_target_option *ptr)
3369 {
3370 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3371 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3372 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3373 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3374 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3375
3376 ptr->arch = ix86_arch;
3377 ptr->schedule = ix86_schedule;
3378 ptr->tune = ix86_tune;
3379 ptr->fpmath = ix86_fpmath;
3380 ptr->branch_cost = ix86_branch_cost;
3381 ptr->tune_defaulted = ix86_tune_defaulted;
3382 ptr->arch_specified = ix86_arch_specified;
3383 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3384 ptr->target_flags_explicit = target_flags_explicit;
3385 }
3386
3387 /* Restore the current options */
3388
3389 static void
3390 ix86_function_specific_restore (struct cl_target_option *ptr)
3391 {
3392 enum processor_type old_tune = ix86_tune;
3393 enum processor_type old_arch = ix86_arch;
3394 unsigned int ix86_arch_mask, ix86_tune_mask;
3395 int i;
3396
3397 ix86_arch = ptr->arch;
3398 ix86_schedule = ptr->schedule;
3399 ix86_tune = ptr->tune;
3400 ix86_fpmath = ptr->fpmath;
3401 ix86_branch_cost = ptr->branch_cost;
3402 ix86_tune_defaulted = ptr->tune_defaulted;
3403 ix86_arch_specified = ptr->arch_specified;
3404 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3405 target_flags_explicit = ptr->target_flags_explicit;
3406
3407 /* Recreate the arch feature tests if the arch changed */
3408 if (old_arch != ix86_arch)
3409 {
3410 ix86_arch_mask = 1u << ix86_arch;
3411 for (i = 0; i < X86_ARCH_LAST; ++i)
3412 ix86_arch_features[i]
3413 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3414 }
3415
3416 /* Recreate the tune optimization tests */
3417 if (old_tune != ix86_tune)
3418 {
3419 ix86_tune_mask = 1u << ix86_tune;
3420 for (i = 0; i < X86_TUNE_LAST; ++i)
3421 ix86_tune_features[i]
3422 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3423 }
3424 }
3425
3426 /* Print the current options */
3427
3428 static void
3429 ix86_function_specific_print (FILE *file, int indent,
3430 struct cl_target_option *ptr)
3431 {
3432 char *target_string
3433 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3434 NULL, NULL, NULL, false);
3435
3436 fprintf (file, "%*sarch = %d (%s)\n",
3437 indent, "",
3438 ptr->arch,
3439 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3440 ? cpu_names[ptr->arch]
3441 : "<unknown>"));
3442
3443 fprintf (file, "%*stune = %d (%s)\n",
3444 indent, "",
3445 ptr->tune,
3446 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3447 ? cpu_names[ptr->tune]
3448 : "<unknown>"));
3449
3450 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3451 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3452 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3453 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3454
3455 if (target_string)
3456 {
3457 fprintf (file, "%*s%s\n", indent, "", target_string);
3458 free (target_string);
3459 }
3460 }
3461
3462
3463 /* Inner function to process the attribute((target(...))), take an argument and
3464 set the current options from the argument. If we have a list, recursively go
3465 over the list. */
3466
3467 static bool
3468 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3469 {
3470 char *next_optstr;
3471 bool ret = true;
3472
3473 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3474 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3475 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3476 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3477
3478 enum ix86_opt_type
3479 {
3480 ix86_opt_unknown,
3481 ix86_opt_yes,
3482 ix86_opt_no,
3483 ix86_opt_str,
3484 ix86_opt_isa
3485 };
3486
3487 static const struct
3488 {
3489 const char *string;
3490 size_t len;
3491 enum ix86_opt_type type;
3492 int opt;
3493 int mask;
3494 } attrs[] = {
3495 /* isa options */
3496 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3497 IX86_ATTR_ISA ("abm", OPT_mabm),
3498 IX86_ATTR_ISA ("aes", OPT_maes),
3499 IX86_ATTR_ISA ("avx", OPT_mavx),
3500 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3501 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3502 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3503 IX86_ATTR_ISA ("sse", OPT_msse),
3504 IX86_ATTR_ISA ("sse2", OPT_msse2),
3505 IX86_ATTR_ISA ("sse3", OPT_msse3),
3506 IX86_ATTR_ISA ("sse4", OPT_msse4),
3507 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3508 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3509 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3510 IX86_ATTR_ISA ("sse5", OPT_msse5),
3511 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3512
3513 /* string options */
3514 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3515 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3516 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3517
3518 /* flag options */
3519 IX86_ATTR_YES ("cld",
3520 OPT_mcld,
3521 MASK_CLD),
3522
3523 IX86_ATTR_NO ("fancy-math-387",
3524 OPT_mfancy_math_387,
3525 MASK_NO_FANCY_MATH_387),
3526
3527 IX86_ATTR_NO ("fused-madd",
3528 OPT_mfused_madd,
3529 MASK_NO_FUSED_MADD),
3530
3531 IX86_ATTR_YES ("ieee-fp",
3532 OPT_mieee_fp,
3533 MASK_IEEE_FP),
3534
3535 IX86_ATTR_YES ("inline-all-stringops",
3536 OPT_minline_all_stringops,
3537 MASK_INLINE_ALL_STRINGOPS),
3538
3539 IX86_ATTR_YES ("inline-stringops-dynamically",
3540 OPT_minline_stringops_dynamically,
3541 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3542
3543 IX86_ATTR_NO ("align-stringops",
3544 OPT_mno_align_stringops,
3545 MASK_NO_ALIGN_STRINGOPS),
3546
3547 IX86_ATTR_YES ("recip",
3548 OPT_mrecip,
3549 MASK_RECIP),
3550
3551 };
3552
3553 /* If this is a list, recurse to get the options. */
3554 if (TREE_CODE (args) == TREE_LIST)
3555 {
3556 bool ret = true;
3557
3558 for (; args; args = TREE_CHAIN (args))
3559 if (TREE_VALUE (args)
3560 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3561 ret = false;
3562
3563 return ret;
3564 }
3565
3566 else if (TREE_CODE (args) != STRING_CST)
3567 gcc_unreachable ();
3568
3569 /* Handle multiple arguments separated by commas. */
3570 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3571
3572 while (next_optstr && *next_optstr != '\0')
3573 {
3574 char *p = next_optstr;
3575 char *orig_p = p;
3576 char *comma = strchr (next_optstr, ',');
3577 const char *opt_string;
3578 size_t len, opt_len;
3579 int opt;
3580 bool opt_set_p;
3581 char ch;
3582 unsigned i;
3583 enum ix86_opt_type type = ix86_opt_unknown;
3584 int mask = 0;
3585
3586 if (comma)
3587 {
3588 *comma = '\0';
3589 len = comma - next_optstr;
3590 next_optstr = comma + 1;
3591 }
3592 else
3593 {
3594 len = strlen (p);
3595 next_optstr = NULL;
3596 }
3597
3598 /* Recognize no-xxx. */
3599 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3600 {
3601 opt_set_p = false;
3602 p += 3;
3603 len -= 3;
3604 }
3605 else
3606 opt_set_p = true;
3607
3608 /* Find the option. */
3609 ch = *p;
3610 opt = N_OPTS;
3611 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3612 {
3613 type = attrs[i].type;
3614 opt_len = attrs[i].len;
3615 if (ch == attrs[i].string[0]
3616 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3617 && memcmp (p, attrs[i].string, opt_len) == 0)
3618 {
3619 opt = attrs[i].opt;
3620 mask = attrs[i].mask;
3621 opt_string = attrs[i].string;
3622 break;
3623 }
3624 }
3625
3626 /* Process the option. */
3627 if (opt == N_OPTS)
3628 {
3629 error ("attribute(target(\"%s\")) is unknown", orig_p);
3630 ret = false;
3631 }
3632
3633 else if (type == ix86_opt_isa)
3634 ix86_handle_option (opt, p, opt_set_p);
3635
3636 else if (type == ix86_opt_yes || type == ix86_opt_no)
3637 {
3638 if (type == ix86_opt_no)
3639 opt_set_p = !opt_set_p;
3640
3641 if (opt_set_p)
3642 target_flags |= mask;
3643 else
3644 target_flags &= ~mask;
3645 }
3646
3647 else if (type == ix86_opt_str)
3648 {
3649 if (p_strings[opt])
3650 {
3651 error ("option(\"%s\") was already specified", opt_string);
3652 ret = false;
3653 }
3654 else
3655 p_strings[opt] = xstrdup (p + opt_len);
3656 }
3657
3658 else
3659 gcc_unreachable ();
3660 }
3661
3662 return ret;
3663 }
3664
3665 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3666
3667 tree
3668 ix86_valid_target_attribute_tree (tree args)
3669 {
3670 const char *orig_arch_string = ix86_arch_string;
3671 const char *orig_tune_string = ix86_tune_string;
3672 const char *orig_fpmath_string = ix86_fpmath_string;
3673 int orig_tune_defaulted = ix86_tune_defaulted;
3674 int orig_arch_specified = ix86_arch_specified;
3675 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3676 tree t = NULL_TREE;
3677 int i;
3678 struct cl_target_option *def
3679 = TREE_TARGET_OPTION (target_option_default_node);
3680
3681 /* Process each of the options on the chain. */
3682 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3683 return NULL_TREE;
3684
3685 /* If the changed options are different from the default, rerun override_options,
3686 and then save the options away. The string options are are attribute options,
3687 and will be undone when we copy the save structure. */
3688 if (ix86_isa_flags != def->ix86_isa_flags
3689 || target_flags != def->target_flags
3690 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3691 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3692 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3693 {
3694 /* If we are using the default tune= or arch=, undo the string assigned,
3695 and use the default. */
3696 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3697 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3698 else if (!orig_arch_specified)
3699 ix86_arch_string = NULL;
3700
3701 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3702 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3703 else if (orig_tune_defaulted)
3704 ix86_tune_string = NULL;
3705
3706 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3707 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3708 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3709 else if (!TARGET_64BIT && TARGET_SSE)
3710 ix86_fpmath_string = "sse,387";
3711
3712 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3713 override_options (false);
3714
3715 /* Add any builtin functions with the new isa if any. */
3716 ix86_add_new_builtins (ix86_isa_flags);
3717
3718 /* Save the current options unless we are validating options for
3719 #pragma. */
3720 t = build_target_option_node ();
3721
3722 ix86_arch_string = orig_arch_string;
3723 ix86_tune_string = orig_tune_string;
3724 ix86_fpmath_string = orig_fpmath_string;
3725
3726 /* Free up memory allocated to hold the strings */
3727 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3728 if (option_strings[i])
3729 free (option_strings[i]);
3730 }
3731
3732 return t;
3733 }
3734
3735 /* Hook to validate attribute((target("string"))). */
3736
3737 static bool
3738 ix86_valid_target_attribute_p (tree fndecl,
3739 tree ARG_UNUSED (name),
3740 tree args,
3741 int ARG_UNUSED (flags))
3742 {
3743 struct cl_target_option cur_target;
3744 bool ret = true;
3745 tree old_optimize = build_optimization_node ();
3746 tree new_target, new_optimize;
3747 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3748
3749 /* If the function changed the optimization levels as well as setting target
3750 options, start with the optimizations specified. */
3751 if (func_optimize && func_optimize != old_optimize)
3752 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3753
3754 /* The target attributes may also change some optimization flags, so update
3755 the optimization options if necessary. */
3756 cl_target_option_save (&cur_target);
3757 new_target = ix86_valid_target_attribute_tree (args);
3758 new_optimize = build_optimization_node ();
3759
3760 if (!new_target)
3761 ret = false;
3762
3763 else if (fndecl)
3764 {
3765 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3766
3767 if (old_optimize != new_optimize)
3768 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3769 }
3770
3771 cl_target_option_restore (&cur_target);
3772
3773 if (old_optimize != new_optimize)
3774 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3775
3776 return ret;
3777 }
3778
3779
3780 /* Hook to determine if one function can safely inline another. */
3781
3782 static bool
3783 ix86_can_inline_p (tree caller, tree callee)
3784 {
3785 bool ret = false;
3786 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3787 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3788
3789 /* If callee has no option attributes, then it is ok to inline. */
3790 if (!callee_tree)
3791 ret = true;
3792
3793 /* If caller has no option attributes, but callee does then it is not ok to
3794 inline. */
3795 else if (!caller_tree)
3796 ret = false;
3797
3798 else
3799 {
3800 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3801 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3802
3803 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3804 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3805 function. */
3806 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3807 != callee_opts->ix86_isa_flags)
3808 ret = false;
3809
3810 /* See if we have the same non-isa options. */
3811 else if (caller_opts->target_flags != callee_opts->target_flags)
3812 ret = false;
3813
3814 /* See if arch, tune, etc. are the same. */
3815 else if (caller_opts->arch != callee_opts->arch)
3816 ret = false;
3817
3818 else if (caller_opts->tune != callee_opts->tune)
3819 ret = false;
3820
3821 else if (caller_opts->fpmath != callee_opts->fpmath)
3822 ret = false;
3823
3824 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3825 ret = false;
3826
3827 else
3828 ret = true;
3829 }
3830
3831 return ret;
3832 }
3833
3834
3835 /* Remember the last target of ix86_set_current_function. */
3836 static GTY(()) tree ix86_previous_fndecl;
3837
3838 /* Establish appropriate back-end context for processing the function
3839 FNDECL. The argument might be NULL to indicate processing at top
3840 level, outside of any function scope. */
3841 static void
3842 ix86_set_current_function (tree fndecl)
3843 {
3844 /* Only change the context if the function changes. This hook is called
3845 several times in the course of compiling a function, and we don't want to
3846 slow things down too much or call target_reinit when it isn't safe. */
3847 if (fndecl && fndecl != ix86_previous_fndecl)
3848 {
3849 tree old_tree = (ix86_previous_fndecl
3850 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3851 : NULL_TREE);
3852
3853 tree new_tree = (fndecl
3854 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3855 : NULL_TREE);
3856
3857 ix86_previous_fndecl = fndecl;
3858 if (old_tree == new_tree)
3859 ;
3860
3861 else if (new_tree)
3862 {
3863 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3864 target_reinit ();
3865 }
3866
3867 else if (old_tree)
3868 {
3869 struct cl_target_option *def
3870 = TREE_TARGET_OPTION (target_option_current_node);
3871
3872 cl_target_option_restore (def);
3873 target_reinit ();
3874 }
3875 }
3876 }
3877
3878
3879 /* Return true if this goes in large data/bss. */
3880
3881 static bool
3882 ix86_in_large_data_p (tree exp)
3883 {
3884 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3885 return false;
3886
3887 /* Functions are never large data. */
3888 if (TREE_CODE (exp) == FUNCTION_DECL)
3889 return false;
3890
3891 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3892 {
3893 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3894 if (strcmp (section, ".ldata") == 0
3895 || strcmp (section, ".lbss") == 0)
3896 return true;
3897 return false;
3898 }
3899 else
3900 {
3901 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3902
3903 /* If this is an incomplete type with size 0, then we can't put it
3904 in data because it might be too big when completed. */
3905 if (!size || size > ix86_section_threshold)
3906 return true;
3907 }
3908
3909 return false;
3910 }
3911
3912 /* Switch to the appropriate section for output of DECL.
3913 DECL is either a `VAR_DECL' node or a constant of some sort.
3914 RELOC indicates whether forming the initial value of DECL requires
3915 link-time relocations. */
3916
3917 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3918 ATTRIBUTE_UNUSED;
3919
3920 static section *
3921 x86_64_elf_select_section (tree decl, int reloc,
3922 unsigned HOST_WIDE_INT align)
3923 {
3924 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3925 && ix86_in_large_data_p (decl))
3926 {
3927 const char *sname = NULL;
3928 unsigned int flags = SECTION_WRITE;
3929 switch (categorize_decl_for_section (decl, reloc))
3930 {
3931 case SECCAT_DATA:
3932 sname = ".ldata";
3933 break;
3934 case SECCAT_DATA_REL:
3935 sname = ".ldata.rel";
3936 break;
3937 case SECCAT_DATA_REL_LOCAL:
3938 sname = ".ldata.rel.local";
3939 break;
3940 case SECCAT_DATA_REL_RO:
3941 sname = ".ldata.rel.ro";
3942 break;
3943 case SECCAT_DATA_REL_RO_LOCAL:
3944 sname = ".ldata.rel.ro.local";
3945 break;
3946 case SECCAT_BSS:
3947 sname = ".lbss";
3948 flags |= SECTION_BSS;
3949 break;
3950 case SECCAT_RODATA:
3951 case SECCAT_RODATA_MERGE_STR:
3952 case SECCAT_RODATA_MERGE_STR_INIT:
3953 case SECCAT_RODATA_MERGE_CONST:
3954 sname = ".lrodata";
3955 flags = 0;
3956 break;
3957 case SECCAT_SRODATA:
3958 case SECCAT_SDATA:
3959 case SECCAT_SBSS:
3960 gcc_unreachable ();
3961 case SECCAT_TEXT:
3962 case SECCAT_TDATA:
3963 case SECCAT_TBSS:
3964 /* We don't split these for medium model. Place them into
3965 default sections and hope for best. */
3966 break;
3967 case SECCAT_EMUTLS_VAR:
3968 case SECCAT_EMUTLS_TMPL:
3969 gcc_unreachable ();
3970 }
3971 if (sname)
3972 {
3973 /* We might get called with string constants, but get_named_section
3974 doesn't like them as they are not DECLs. Also, we need to set
3975 flags in that case. */
3976 if (!DECL_P (decl))
3977 return get_section (sname, flags, NULL);
3978 return get_named_section (decl, sname, reloc);
3979 }
3980 }
3981 return default_elf_select_section (decl, reloc, align);
3982 }
3983
3984 /* Build up a unique section name, expressed as a
3985 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
3986 RELOC indicates whether the initial value of EXP requires
3987 link-time relocations. */
3988
3989 static void ATTRIBUTE_UNUSED
3990 x86_64_elf_unique_section (tree decl, int reloc)
3991 {
3992 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3993 && ix86_in_large_data_p (decl))
3994 {
3995 const char *prefix = NULL;
3996 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
3997 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
3998
3999 switch (categorize_decl_for_section (decl, reloc))
4000 {
4001 case SECCAT_DATA:
4002 case SECCAT_DATA_REL:
4003 case SECCAT_DATA_REL_LOCAL:
4004 case SECCAT_DATA_REL_RO:
4005 case SECCAT_DATA_REL_RO_LOCAL:
4006 prefix = one_only ? ".ld" : ".ldata";
4007 break;
4008 case SECCAT_BSS:
4009 prefix = one_only ? ".lb" : ".lbss";
4010 break;
4011 case SECCAT_RODATA:
4012 case SECCAT_RODATA_MERGE_STR:
4013 case SECCAT_RODATA_MERGE_STR_INIT:
4014 case SECCAT_RODATA_MERGE_CONST:
4015 prefix = one_only ? ".lr" : ".lrodata";
4016 break;
4017 case SECCAT_SRODATA:
4018 case SECCAT_SDATA:
4019 case SECCAT_SBSS:
4020 gcc_unreachable ();
4021 case SECCAT_TEXT:
4022 case SECCAT_TDATA:
4023 case SECCAT_TBSS:
4024 /* We don't split these for medium model. Place them into
4025 default sections and hope for best. */
4026 break;
4027 case SECCAT_EMUTLS_VAR:
4028 prefix = targetm.emutls.var_section;
4029 break;
4030 case SECCAT_EMUTLS_TMPL:
4031 prefix = targetm.emutls.tmpl_section;
4032 break;
4033 }
4034 if (prefix)
4035 {
4036 const char *name, *linkonce;
4037 char *string;
4038
4039 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4040 name = targetm.strip_name_encoding (name);
4041
4042 /* If we're using one_only, then there needs to be a .gnu.linkonce
4043 prefix to the section name. */
4044 linkonce = one_only ? ".gnu.linkonce" : "";
4045
4046 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4047
4048 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4049 return;
4050 }
4051 }
4052 default_unique_section (decl, reloc);
4053 }
4054
4055 #ifdef COMMON_ASM_OP
4056 /* This says how to output assembler code to declare an
4057 uninitialized external linkage data object.
4058
4059 For medium model x86-64 we need to use .largecomm opcode for
4060 large objects. */
4061 void
4062 x86_elf_aligned_common (FILE *file,
4063 const char *name, unsigned HOST_WIDE_INT size,
4064 int align)
4065 {
4066 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4067 && size > (unsigned int)ix86_section_threshold)
4068 fprintf (file, ".largecomm\t");
4069 else
4070 fprintf (file, "%s", COMMON_ASM_OP);
4071 assemble_name (file, name);
4072 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4073 size, align / BITS_PER_UNIT);
4074 }
4075 #endif
4076
4077 /* Utility function for targets to use in implementing
4078 ASM_OUTPUT_ALIGNED_BSS. */
4079
4080 void
4081 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4082 const char *name, unsigned HOST_WIDE_INT size,
4083 int align)
4084 {
4085 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4086 && size > (unsigned int)ix86_section_threshold)
4087 switch_to_section (get_named_section (decl, ".lbss", 0));
4088 else
4089 switch_to_section (bss_section);
4090 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4091 #ifdef ASM_DECLARE_OBJECT_NAME
4092 last_assemble_variable_decl = decl;
4093 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4094 #else
4095 /* Standard thing is just output label for the object. */
4096 ASM_OUTPUT_LABEL (file, name);
4097 #endif /* ASM_DECLARE_OBJECT_NAME */
4098 ASM_OUTPUT_SKIP (file, size ? size : 1);
4099 }
4100
4101 void
4102 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4103 {
4104 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4105 make the problem with not enough registers even worse. */
4106 #ifdef INSN_SCHEDULING
4107 if (level > 1)
4108 flag_schedule_insns = 0;
4109 #endif
4110
4111 if (TARGET_MACHO)
4112 /* The Darwin libraries never set errno, so we might as well
4113 avoid calling them when that's the only reason we would. */
4114 flag_errno_math = 0;
4115
4116 /* The default values of these switches depend on the TARGET_64BIT
4117 that is not known at this moment. Mark these values with 2 and
4118 let user the to override these. In case there is no command line option
4119 specifying them, we will set the defaults in override_options. */
4120 if (optimize >= 1)
4121 flag_omit_frame_pointer = 2;
4122 flag_pcc_struct_return = 2;
4123 flag_asynchronous_unwind_tables = 2;
4124 flag_vect_cost_model = 1;
4125 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4126 SUBTARGET_OPTIMIZATION_OPTIONS;
4127 #endif
4128 }
4129
4130 /* Decide whether we can make a sibling call to a function. DECL is the
4131 declaration of the function being targeted by the call and EXP is the
4132 CALL_EXPR representing the call. */
4133
4134 static bool
4135 ix86_function_ok_for_sibcall (tree decl, tree exp)
4136 {
4137 tree type, decl_or_type;
4138 rtx a, b;
4139
4140 /* If we are generating position-independent code, we cannot sibcall
4141 optimize any indirect call, or a direct call to a global function,
4142 as the PLT requires %ebx be live. */
4143 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4144 return false;
4145
4146 /* If we need to align the outgoing stack, then sibcalling would
4147 unalign the stack, which may break the called function. */
4148 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4149 return false;
4150
4151 if (decl)
4152 {
4153 decl_or_type = decl;
4154 type = TREE_TYPE (decl);
4155 }
4156 else
4157 {
4158 /* We're looking at the CALL_EXPR, we need the type of the function. */
4159 type = CALL_EXPR_FN (exp); /* pointer expression */
4160 type = TREE_TYPE (type); /* pointer type */
4161 type = TREE_TYPE (type); /* function type */
4162 decl_or_type = type;
4163 }
4164
4165 /* Check that the return value locations are the same. Like
4166 if we are returning floats on the 80387 register stack, we cannot
4167 make a sibcall from a function that doesn't return a float to a
4168 function that does or, conversely, from a function that does return
4169 a float to a function that doesn't; the necessary stack adjustment
4170 would not be executed. This is also the place we notice
4171 differences in the return value ABI. Note that it is ok for one
4172 of the functions to have void return type as long as the return
4173 value of the other is passed in a register. */
4174 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4175 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4176 cfun->decl, false);
4177 if (STACK_REG_P (a) || STACK_REG_P (b))
4178 {
4179 if (!rtx_equal_p (a, b))
4180 return false;
4181 }
4182 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4183 ;
4184 else if (!rtx_equal_p (a, b))
4185 return false;
4186
4187 if (TARGET_64BIT)
4188 {
4189 /* The SYSV ABI has more call-clobbered registers;
4190 disallow sibcalls from MS to SYSV. */
4191 if (cfun->machine->call_abi == MS_ABI
4192 && ix86_function_type_abi (type) == SYSV_ABI)
4193 return false;
4194 }
4195 else
4196 {
4197 /* If this call is indirect, we'll need to be able to use a
4198 call-clobbered register for the address of the target function.
4199 Make sure that all such registers are not used for passing
4200 parameters. Note that DLLIMPORT functions are indirect. */
4201 if (!decl
4202 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4203 {
4204 if (ix86_function_regparm (type, NULL) >= 3)
4205 {
4206 /* ??? Need to count the actual number of registers to be used,
4207 not the possible number of registers. Fix later. */
4208 return false;
4209 }
4210 }
4211 }
4212
4213 /* Otherwise okay. That also includes certain types of indirect calls. */
4214 return true;
4215 }
4216
4217 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4218 calling convention attributes;
4219 arguments as in struct attribute_spec.handler. */
4220
4221 static tree
4222 ix86_handle_cconv_attribute (tree *node, tree name,
4223 tree args,
4224 int flags ATTRIBUTE_UNUSED,
4225 bool *no_add_attrs)
4226 {
4227 if (TREE_CODE (*node) != FUNCTION_TYPE
4228 && TREE_CODE (*node) != METHOD_TYPE
4229 && TREE_CODE (*node) != FIELD_DECL
4230 && TREE_CODE (*node) != TYPE_DECL)
4231 {
4232 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4233 IDENTIFIER_POINTER (name));
4234 *no_add_attrs = true;
4235 return NULL_TREE;
4236 }
4237
4238 /* Can combine regparm with all attributes but fastcall. */
4239 if (is_attribute_p ("regparm", name))
4240 {
4241 tree cst;
4242
4243 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4244 {
4245 error ("fastcall and regparm attributes are not compatible");
4246 }
4247
4248 cst = TREE_VALUE (args);
4249 if (TREE_CODE (cst) != INTEGER_CST)
4250 {
4251 warning (OPT_Wattributes,
4252 "%qs attribute requires an integer constant argument",
4253 IDENTIFIER_POINTER (name));
4254 *no_add_attrs = true;
4255 }
4256 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4257 {
4258 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4259 IDENTIFIER_POINTER (name), REGPARM_MAX);
4260 *no_add_attrs = true;
4261 }
4262
4263 return NULL_TREE;
4264 }
4265
4266 if (TARGET_64BIT)
4267 {
4268 /* Do not warn when emulating the MS ABI. */
4269 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4270 warning (OPT_Wattributes, "%qs attribute ignored",
4271 IDENTIFIER_POINTER (name));
4272 *no_add_attrs = true;
4273 return NULL_TREE;
4274 }
4275
4276 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4277 if (is_attribute_p ("fastcall", name))
4278 {
4279 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4280 {
4281 error ("fastcall and cdecl attributes are not compatible");
4282 }
4283 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4284 {
4285 error ("fastcall and stdcall attributes are not compatible");
4286 }
4287 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4288 {
4289 error ("fastcall and regparm attributes are not compatible");
4290 }
4291 }
4292
4293 /* Can combine stdcall with fastcall (redundant), regparm and
4294 sseregparm. */
4295 else if (is_attribute_p ("stdcall", name))
4296 {
4297 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4298 {
4299 error ("stdcall and cdecl attributes are not compatible");
4300 }
4301 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4302 {
4303 error ("stdcall and fastcall attributes are not compatible");
4304 }
4305 }
4306
4307 /* Can combine cdecl with regparm and sseregparm. */
4308 else if (is_attribute_p ("cdecl", name))
4309 {
4310 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4311 {
4312 error ("stdcall and cdecl attributes are not compatible");
4313 }
4314 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4315 {
4316 error ("fastcall and cdecl attributes are not compatible");
4317 }
4318 }
4319
4320 /* Can combine sseregparm with all attributes. */
4321
4322 return NULL_TREE;
4323 }
4324
4325 /* Return 0 if the attributes for two types are incompatible, 1 if they
4326 are compatible, and 2 if they are nearly compatible (which causes a
4327 warning to be generated). */
4328
4329 static int
4330 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4331 {
4332 /* Check for mismatch of non-default calling convention. */
4333 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4334
4335 if (TREE_CODE (type1) != FUNCTION_TYPE
4336 && TREE_CODE (type1) != METHOD_TYPE)
4337 return 1;
4338
4339 /* Check for mismatched fastcall/regparm types. */
4340 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4341 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4342 || (ix86_function_regparm (type1, NULL)
4343 != ix86_function_regparm (type2, NULL)))
4344 return 0;
4345
4346 /* Check for mismatched sseregparm types. */
4347 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4348 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4349 return 0;
4350
4351 /* Check for mismatched return types (cdecl vs stdcall). */
4352 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4353 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4354 return 0;
4355
4356 return 1;
4357 }
4358
4359 /* Return the regparm value for a function with the indicated TYPE and DECL.
4360 DECL may be NULL when calling function indirectly
4361 or considering a libcall. */
4362
4363 static int
4364 ix86_function_regparm (const_tree type, const_tree decl)
4365 {
4366 tree attr;
4367 int regparm;
4368
4369 static bool error_issued;
4370
4371 if (TARGET_64BIT)
4372 return (ix86_function_type_abi (type) == SYSV_ABI
4373 ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
4374
4375 regparm = ix86_regparm;
4376 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4377 if (attr)
4378 {
4379 regparm
4380 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4381
4382 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4383 {
4384 /* We can't use regparm(3) for nested functions because
4385 these pass static chain pointer in %ecx register. */
4386 if (!error_issued && regparm == 3
4387 && decl_function_context (decl)
4388 && !DECL_NO_STATIC_CHAIN (decl))
4389 {
4390 error ("nested functions are limited to 2 register parameters");
4391 error_issued = true;
4392 return 0;
4393 }
4394 }
4395
4396 return regparm;
4397 }
4398
4399 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4400 return 2;
4401
4402 /* Use register calling convention for local functions when possible. */
4403 if (decl
4404 && TREE_CODE (decl) == FUNCTION_DECL
4405 && optimize
4406 && (TARGET_64BIT || !flag_strict_calling_conventions)
4407 && !profile_flag)
4408 {
4409 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4410 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4411 if (i && i->local)
4412 {
4413 int local_regparm, globals = 0, regno;
4414 struct function *f;
4415
4416 /* Make sure no regparm register is taken by a
4417 fixed register variable. */
4418 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4419 if (fixed_regs[local_regparm])
4420 break;
4421
4422 /* We can't use regparm(3) for nested functions as these use
4423 static chain pointer in third argument. */
4424 if (local_regparm == 3
4425 && decl_function_context (decl)
4426 && !DECL_NO_STATIC_CHAIN (decl))
4427 local_regparm = 2;
4428
4429 /* If the function realigns its stackpointer, the prologue will
4430 clobber %ecx. If we've already generated code for the callee,
4431 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4432 scanning the attributes for the self-realigning property. */
4433 f = DECL_STRUCT_FUNCTION (decl);
4434 /* Since current internal arg pointer won't conflict with
4435 parameter passing regs, so no need to change stack
4436 realignment and adjust regparm number.
4437
4438 Each fixed register usage increases register pressure,
4439 so less registers should be used for argument passing.
4440 This functionality can be overriden by an explicit
4441 regparm value. */
4442 for (regno = 0; regno <= DI_REG; regno++)
4443 if (fixed_regs[regno])
4444 globals++;
4445
4446 local_regparm
4447 = globals < local_regparm ? local_regparm - globals : 0;
4448
4449 if (local_regparm > regparm)
4450 regparm = local_regparm;
4451 }
4452 }
4453
4454 return regparm;
4455 }
4456
4457 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4458 DFmode (2) arguments in SSE registers for a function with the
4459 indicated TYPE and DECL. DECL may be NULL when calling function
4460 indirectly or considering a libcall. Otherwise return 0. */
4461
4462 static int
4463 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4464 {
4465 gcc_assert (!TARGET_64BIT);
4466
4467 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4468 by the sseregparm attribute. */
4469 if (TARGET_SSEREGPARM
4470 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4471 {
4472 if (!TARGET_SSE)
4473 {
4474 if (warn)
4475 {
4476 if (decl)
4477 error ("Calling %qD with attribute sseregparm without "
4478 "SSE/SSE2 enabled", decl);
4479 else
4480 error ("Calling %qT with attribute sseregparm without "
4481 "SSE/SSE2 enabled", type);
4482 }
4483 return 0;
4484 }
4485
4486 return 2;
4487 }
4488
4489 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4490 (and DFmode for SSE2) arguments in SSE registers. */
4491 if (decl && TARGET_SSE_MATH && optimize && !profile_flag &&
4492 (TARGET_64BIT || !flag_strict_calling_conventions))
4493 {
4494 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4495 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4496 if (i && i->local)
4497 return TARGET_SSE2 ? 2 : 1;
4498 }
4499
4500 return 0;
4501 }
4502
4503 /* Return true if EAX is live at the start of the function. Used by
4504 ix86_expand_prologue to determine if we need special help before
4505 calling allocate_stack_worker. */
4506
4507 static bool
4508 ix86_eax_live_at_start_p (void)
4509 {
4510 /* Cheat. Don't bother working forward from ix86_function_regparm
4511 to the function type to whether an actual argument is located in
4512 eax. Instead just look at cfg info, which is still close enough
4513 to correct at this point. This gives false positives for broken
4514 functions that might use uninitialized data that happens to be
4515 allocated in eax, but who cares? */
4516 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4517 }
4518
4519 /* Value is the number of bytes of arguments automatically
4520 popped when returning from a subroutine call.
4521 FUNDECL is the declaration node of the function (as a tree),
4522 FUNTYPE is the data type of the function (as a tree),
4523 or for a library call it is an identifier node for the subroutine name.
4524 SIZE is the number of bytes of arguments passed on the stack.
4525
4526 On the 80386, the RTD insn may be used to pop them if the number
4527 of args is fixed, but if the number is variable then the caller
4528 must pop them all. RTD can't be used for library calls now
4529 because the library is compiled with the Unix compiler.
4530 Use of RTD is a selectable option, since it is incompatible with
4531 standard Unix calling sequences. If the option is not selected,
4532 the caller must always pop the args.
4533
4534 The attribute stdcall is equivalent to RTD on a per module basis. */
4535
4536 int
4537 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4538 {
4539 int rtd;
4540
4541 /* None of the 64-bit ABIs pop arguments. */
4542 if (TARGET_64BIT)
4543 return 0;
4544
4545 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4546
4547 /* Cdecl functions override -mrtd, and never pop the stack. */
4548 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4549 {
4550 /* Stdcall and fastcall functions will pop the stack if not
4551 variable args. */
4552 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4553 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4554 rtd = 1;
4555
4556 if (rtd && ! stdarg_p (funtype))
4557 return size;
4558 }
4559
4560 /* Lose any fake structure return argument if it is passed on the stack. */
4561 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4562 && !KEEP_AGGREGATE_RETURN_POINTER)
4563 {
4564 int nregs = ix86_function_regparm (funtype, fundecl);
4565 if (nregs == 0)
4566 return GET_MODE_SIZE (Pmode);
4567 }
4568
4569 return 0;
4570 }
4571
4572 /* Argument support functions. */
4573
4574 /* Return true when register may be used to pass function parameters. */
4575 bool
4576 ix86_function_arg_regno_p (int regno)
4577 {
4578 int i;
4579 const int *parm_regs;
4580
4581 if (!TARGET_64BIT)
4582 {
4583 if (TARGET_MACHO)
4584 return (regno < REGPARM_MAX
4585 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4586 else
4587 return (regno < REGPARM_MAX
4588 || (TARGET_MMX && MMX_REGNO_P (regno)
4589 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4590 || (TARGET_SSE && SSE_REGNO_P (regno)
4591 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4592 }
4593
4594 if (TARGET_MACHO)
4595 {
4596 if (SSE_REGNO_P (regno) && TARGET_SSE)
4597 return true;
4598 }
4599 else
4600 {
4601 if (TARGET_SSE && SSE_REGNO_P (regno)
4602 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4603 return true;
4604 }
4605
4606 /* TODO: The function should depend on current function ABI but
4607 builtins.c would need updating then. Therefore we use the
4608 default ABI. */
4609
4610 /* RAX is used as hidden argument to va_arg functions. */
4611 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
4612 return true;
4613
4614 if (DEFAULT_ABI == MS_ABI)
4615 parm_regs = x86_64_ms_abi_int_parameter_registers;
4616 else
4617 parm_regs = x86_64_int_parameter_registers;
4618 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
4619 : X86_64_REGPARM_MAX); i++)
4620 if (regno == parm_regs[i])
4621 return true;
4622 return false;
4623 }
4624
4625 /* Return if we do not know how to pass TYPE solely in registers. */
4626
4627 static bool
4628 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4629 {
4630 if (must_pass_in_stack_var_size_or_pad (mode, type))
4631 return true;
4632
4633 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4634 The layout_type routine is crafty and tries to trick us into passing
4635 currently unsupported vector types on the stack by using TImode. */
4636 return (!TARGET_64BIT && mode == TImode
4637 && type && TREE_CODE (type) != VECTOR_TYPE);
4638 }
4639
4640 /* It returns the size, in bytes, of the area reserved for arguments passed
4641 in registers for the function represented by fndecl dependent to the used
4642 abi format. */
4643 int
4644 ix86_reg_parm_stack_space (const_tree fndecl)
4645 {
4646 int call_abi = SYSV_ABI;
4647 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4648 call_abi = ix86_function_abi (fndecl);
4649 else
4650 call_abi = ix86_function_type_abi (fndecl);
4651 if (call_abi == MS_ABI)
4652 return 32;
4653 return 0;
4654 }
4655
4656 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4657 call abi used. */
4658 int
4659 ix86_function_type_abi (const_tree fntype)
4660 {
4661 if (TARGET_64BIT && fntype != NULL)
4662 {
4663 int abi;
4664 if (DEFAULT_ABI == SYSV_ABI)
4665 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
4666 else
4667 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
4668
4669 return abi;
4670 }
4671 return DEFAULT_ABI;
4672 }
4673
4674 int
4675 ix86_function_abi (const_tree fndecl)
4676 {
4677 if (! fndecl)
4678 return DEFAULT_ABI;
4679 return ix86_function_type_abi (TREE_TYPE (fndecl));
4680 }
4681
4682 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4683 call abi used. */
4684 int
4685 ix86_cfun_abi (void)
4686 {
4687 if (! cfun || ! TARGET_64BIT)
4688 return DEFAULT_ABI;
4689 return cfun->machine->call_abi;
4690 }
4691
4692 /* regclass.c */
4693 extern void init_regs (void);
4694
4695 /* Implementation of call abi switching target hook. Specific to FNDECL
4696 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4697 for more details. */
4698 void
4699 ix86_call_abi_override (const_tree fndecl)
4700 {
4701 if (fndecl == NULL_TREE)
4702 cfun->machine->call_abi = DEFAULT_ABI;
4703 else
4704 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4705 }
4706
4707 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4708 re-initialization of init_regs each time we switch function context since
4709 this is needed only during RTL expansion. */
4710 static void
4711 ix86_maybe_switch_abi (void)
4712 {
4713 if (TARGET_64BIT &&
4714 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4715 reinit_regs ();
4716 }
4717
4718 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4719 for a call to a function whose data type is FNTYPE.
4720 For a library call, FNTYPE is 0. */
4721
4722 void
4723 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4724 tree fntype, /* tree ptr for function decl */
4725 rtx libname, /* SYMBOL_REF of library name or 0 */
4726 tree fndecl)
4727 {
4728 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4729 memset (cum, 0, sizeof (*cum));
4730
4731 if (fndecl)
4732 cum->call_abi = ix86_function_abi (fndecl);
4733 else
4734 cum->call_abi = ix86_function_type_abi (fntype);
4735 /* Set up the number of registers to use for passing arguments. */
4736
4737 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4738 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
4739 "or subtarget optimization implying it");
4740 cum->nregs = ix86_regparm;
4741 if (TARGET_64BIT)
4742 {
4743 if (cum->call_abi != DEFAULT_ABI)
4744 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
4745 : X64_REGPARM_MAX;
4746 }
4747 if (TARGET_SSE)
4748 {
4749 cum->sse_nregs = SSE_REGPARM_MAX;
4750 if (TARGET_64BIT)
4751 {
4752 if (cum->call_abi != DEFAULT_ABI)
4753 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4754 : X64_SSE_REGPARM_MAX;
4755 }
4756 }
4757 if (TARGET_MMX)
4758 cum->mmx_nregs = MMX_REGPARM_MAX;
4759 cum->warn_avx = true;
4760 cum->warn_sse = true;
4761 cum->warn_mmx = true;
4762
4763 /* Because type might mismatch in between caller and callee, we need to
4764 use actual type of function for local calls.
4765 FIXME: cgraph_analyze can be told to actually record if function uses
4766 va_start so for local functions maybe_vaarg can be made aggressive
4767 helping K&R code.
4768 FIXME: once typesytem is fixed, we won't need this code anymore. */
4769 if (i && i->local)
4770 fntype = TREE_TYPE (fndecl);
4771 cum->maybe_vaarg = (fntype
4772 ? (!prototype_p (fntype) || stdarg_p (fntype))
4773 : !libname);
4774
4775 if (!TARGET_64BIT)
4776 {
4777 /* If there are variable arguments, then we won't pass anything
4778 in registers in 32-bit mode. */
4779 if (stdarg_p (fntype))
4780 {
4781 cum->nregs = 0;
4782 cum->sse_nregs = 0;
4783 cum->mmx_nregs = 0;
4784 cum->warn_avx = 0;
4785 cum->warn_sse = 0;
4786 cum->warn_mmx = 0;
4787 return;
4788 }
4789
4790 /* Use ecx and edx registers if function has fastcall attribute,
4791 else look for regparm information. */
4792 if (fntype)
4793 {
4794 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4795 {
4796 cum->nregs = 2;
4797 cum->fastcall = 1;
4798 }
4799 else
4800 cum->nregs = ix86_function_regparm (fntype, fndecl);
4801 }
4802
4803 /* Set up the number of SSE registers used for passing SFmode
4804 and DFmode arguments. Warn for mismatching ABI. */
4805 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4806 }
4807 }
4808
4809 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4810 But in the case of vector types, it is some vector mode.
4811
4812 When we have only some of our vector isa extensions enabled, then there
4813 are some modes for which vector_mode_supported_p is false. For these
4814 modes, the generic vector support in gcc will choose some non-vector mode
4815 in order to implement the type. By computing the natural mode, we'll
4816 select the proper ABI location for the operand and not depend on whatever
4817 the middle-end decides to do with these vector types.
4818
4819 The midde-end can't deal with the vector types > 16 bytes. In this
4820 case, we return the original mode and warn ABI change if CUM isn't
4821 NULL. */
4822
4823 static enum machine_mode
4824 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4825 {
4826 enum machine_mode mode = TYPE_MODE (type);
4827
4828 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4829 {
4830 HOST_WIDE_INT size = int_size_in_bytes (type);
4831 if ((size == 8 || size == 16 || size == 32)
4832 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4833 && TYPE_VECTOR_SUBPARTS (type) > 1)
4834 {
4835 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4836
4837 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4838 mode = MIN_MODE_VECTOR_FLOAT;
4839 else
4840 mode = MIN_MODE_VECTOR_INT;
4841
4842 /* Get the mode which has this inner mode and number of units. */
4843 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4844 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4845 && GET_MODE_INNER (mode) == innermode)
4846 {
4847 if (size == 32 && !TARGET_AVX)
4848 {
4849 static bool warnedavx;
4850
4851 if (cum
4852 && !warnedavx
4853 && cum->warn_avx)
4854 {
4855 warnedavx = true;
4856 warning (0, "AVX vector argument without AVX "
4857 "enabled changes the ABI");
4858 }
4859 return TYPE_MODE (type);
4860 }
4861 else
4862 return mode;
4863 }
4864
4865 gcc_unreachable ();
4866 }
4867 }
4868
4869 return mode;
4870 }
4871
4872 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4873 this may not agree with the mode that the type system has chosen for the
4874 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4875 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4876
4877 static rtx
4878 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4879 unsigned int regno)
4880 {
4881 rtx tmp;
4882
4883 if (orig_mode != BLKmode)
4884 tmp = gen_rtx_REG (orig_mode, regno);
4885 else
4886 {
4887 tmp = gen_rtx_REG (mode, regno);
4888 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4889 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4890 }
4891
4892 return tmp;
4893 }
4894
4895 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4896 of this code is to classify each 8bytes of incoming argument by the register
4897 class and assign registers accordingly. */
4898
4899 /* Return the union class of CLASS1 and CLASS2.
4900 See the x86-64 PS ABI for details. */
4901
4902 static enum x86_64_reg_class
4903 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4904 {
4905 /* Rule #1: If both classes are equal, this is the resulting class. */
4906 if (class1 == class2)
4907 return class1;
4908
4909 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4910 the other class. */
4911 if (class1 == X86_64_NO_CLASS)
4912 return class2;
4913 if (class2 == X86_64_NO_CLASS)
4914 return class1;
4915
4916 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4917 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4918 return X86_64_MEMORY_CLASS;
4919
4920 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4921 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4922 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4923 return X86_64_INTEGERSI_CLASS;
4924 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4925 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4926 return X86_64_INTEGER_CLASS;
4927
4928 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4929 MEMORY is used. */
4930 if (class1 == X86_64_X87_CLASS
4931 || class1 == X86_64_X87UP_CLASS
4932 || class1 == X86_64_COMPLEX_X87_CLASS
4933 || class2 == X86_64_X87_CLASS
4934 || class2 == X86_64_X87UP_CLASS
4935 || class2 == X86_64_COMPLEX_X87_CLASS)
4936 return X86_64_MEMORY_CLASS;
4937
4938 /* Rule #6: Otherwise class SSE is used. */
4939 return X86_64_SSE_CLASS;
4940 }
4941
4942 /* Classify the argument of type TYPE and mode MODE.
4943 CLASSES will be filled by the register class used to pass each word
4944 of the operand. The number of words is returned. In case the parameter
4945 should be passed in memory, 0 is returned. As a special case for zero
4946 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4947
4948 BIT_OFFSET is used internally for handling records and specifies offset
4949 of the offset in bits modulo 256 to avoid overflow cases.
4950
4951 See the x86-64 PS ABI for details.
4952 */
4953
4954 static int
4955 classify_argument (enum machine_mode mode, const_tree type,
4956 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4957 {
4958 HOST_WIDE_INT bytes =
4959 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4960 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4961
4962 /* Variable sized entities are always passed/returned in memory. */
4963 if (bytes < 0)
4964 return 0;
4965
4966 if (mode != VOIDmode
4967 && targetm.calls.must_pass_in_stack (mode, type))
4968 return 0;
4969
4970 if (type && AGGREGATE_TYPE_P (type))
4971 {
4972 int i;
4973 tree field;
4974 enum x86_64_reg_class subclasses[MAX_CLASSES];
4975
4976 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
4977 if (bytes > 32)
4978 return 0;
4979
4980 for (i = 0; i < words; i++)
4981 classes[i] = X86_64_NO_CLASS;
4982
4983 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
4984 signalize memory class, so handle it as special case. */
4985 if (!words)
4986 {
4987 classes[0] = X86_64_NO_CLASS;
4988 return 1;
4989 }
4990
4991 /* Classify each field of record and merge classes. */
4992 switch (TREE_CODE (type))
4993 {
4994 case RECORD_TYPE:
4995 /* And now merge the fields of structure. */
4996 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4997 {
4998 if (TREE_CODE (field) == FIELD_DECL)
4999 {
5000 int num;
5001
5002 if (TREE_TYPE (field) == error_mark_node)
5003 continue;
5004
5005 /* Bitfields are always classified as integer. Handle them
5006 early, since later code would consider them to be
5007 misaligned integers. */
5008 if (DECL_BIT_FIELD (field))
5009 {
5010 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5011 i < ((int_bit_position (field) + (bit_offset % 64))
5012 + tree_low_cst (DECL_SIZE (field), 0)
5013 + 63) / 8 / 8; i++)
5014 classes[i] =
5015 merge_classes (X86_64_INTEGER_CLASS,
5016 classes[i]);
5017 }
5018 else
5019 {
5020 type = TREE_TYPE (field);
5021
5022 /* Flexible array member is ignored. */
5023 if (TYPE_MODE (type) == BLKmode
5024 && TREE_CODE (type) == ARRAY_TYPE
5025 && TYPE_SIZE (type) == NULL_TREE
5026 && TYPE_DOMAIN (type) != NULL_TREE
5027 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5028 == NULL_TREE))
5029 {
5030 static bool warned;
5031
5032 if (!warned && warn_psabi)
5033 {
5034 warned = true;
5035 inform (input_location,
5036 "The ABI of passing struct with"
5037 " a flexible array member has"
5038 " changed in GCC 4.4");
5039 }
5040 continue;
5041 }
5042 num = classify_argument (TYPE_MODE (type), type,
5043 subclasses,
5044 (int_bit_position (field)
5045 + bit_offset) % 256);
5046 if (!num)
5047 return 0;
5048 for (i = 0; i < num; i++)
5049 {
5050 int pos =
5051 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5052 classes[i + pos] =
5053 merge_classes (subclasses[i], classes[i + pos]);
5054 }
5055 }
5056 }
5057 }
5058 break;
5059
5060 case ARRAY_TYPE:
5061 /* Arrays are handled as small records. */
5062 {
5063 int num;
5064 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5065 TREE_TYPE (type), subclasses, bit_offset);
5066 if (!num)
5067 return 0;
5068
5069 /* The partial classes are now full classes. */
5070 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5071 subclasses[0] = X86_64_SSE_CLASS;
5072 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5073 && !((bit_offset % 64) == 0 && bytes == 4))
5074 subclasses[0] = X86_64_INTEGER_CLASS;
5075
5076 for (i = 0; i < words; i++)
5077 classes[i] = subclasses[i % num];
5078
5079 break;
5080 }
5081 case UNION_TYPE:
5082 case QUAL_UNION_TYPE:
5083 /* Unions are similar to RECORD_TYPE but offset is always 0.
5084 */
5085 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5086 {
5087 if (TREE_CODE (field) == FIELD_DECL)
5088 {
5089 int num;
5090
5091 if (TREE_TYPE (field) == error_mark_node)
5092 continue;
5093
5094 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5095 TREE_TYPE (field), subclasses,
5096 bit_offset);
5097 if (!num)
5098 return 0;
5099 for (i = 0; i < num; i++)
5100 classes[i] = merge_classes (subclasses[i], classes[i]);
5101 }
5102 }
5103 break;
5104
5105 default:
5106 gcc_unreachable ();
5107 }
5108
5109 if (words > 2)
5110 {
5111 /* When size > 16 bytes, if the first one isn't
5112 X86_64_SSE_CLASS or any other ones aren't
5113 X86_64_SSEUP_CLASS, everything should be passed in
5114 memory. */
5115 if (classes[0] != X86_64_SSE_CLASS)
5116 return 0;
5117
5118 for (i = 1; i < words; i++)
5119 if (classes[i] != X86_64_SSEUP_CLASS)
5120 return 0;
5121 }
5122
5123 /* Final merger cleanup. */
5124 for (i = 0; i < words; i++)
5125 {
5126 /* If one class is MEMORY, everything should be passed in
5127 memory. */
5128 if (classes[i] == X86_64_MEMORY_CLASS)
5129 return 0;
5130
5131 /* The X86_64_SSEUP_CLASS should be always preceded by
5132 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5133 if (classes[i] == X86_64_SSEUP_CLASS
5134 && classes[i - 1] != X86_64_SSE_CLASS
5135 && classes[i - 1] != X86_64_SSEUP_CLASS)
5136 {
5137 /* The first one should never be X86_64_SSEUP_CLASS. */
5138 gcc_assert (i != 0);
5139 classes[i] = X86_64_SSE_CLASS;
5140 }
5141
5142 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5143 everything should be passed in memory. */
5144 if (classes[i] == X86_64_X87UP_CLASS
5145 && (classes[i - 1] != X86_64_X87_CLASS))
5146 {
5147 static bool warned;
5148
5149 /* The first one should never be X86_64_X87UP_CLASS. */
5150 gcc_assert (i != 0);
5151 if (!warned && warn_psabi)
5152 {
5153 warned = true;
5154 inform (input_location,
5155 "The ABI of passing union with long double"
5156 " has changed in GCC 4.4");
5157 }
5158 return 0;
5159 }
5160 }
5161 return words;
5162 }
5163
5164 /* Compute alignment needed. We align all types to natural boundaries with
5165 exception of XFmode that is aligned to 64bits. */
5166 if (mode != VOIDmode && mode != BLKmode)
5167 {
5168 int mode_alignment = GET_MODE_BITSIZE (mode);
5169
5170 if (mode == XFmode)
5171 mode_alignment = 128;
5172 else if (mode == XCmode)
5173 mode_alignment = 256;
5174 if (COMPLEX_MODE_P (mode))
5175 mode_alignment /= 2;
5176 /* Misaligned fields are always returned in memory. */
5177 if (bit_offset % mode_alignment)
5178 return 0;
5179 }
5180
5181 /* for V1xx modes, just use the base mode */
5182 if (VECTOR_MODE_P (mode) && mode != V1DImode
5183 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5184 mode = GET_MODE_INNER (mode);
5185
5186 /* Classification of atomic types. */
5187 switch (mode)
5188 {
5189 case SDmode:
5190 case DDmode:
5191 classes[0] = X86_64_SSE_CLASS;
5192 return 1;
5193 case TDmode:
5194 classes[0] = X86_64_SSE_CLASS;
5195 classes[1] = X86_64_SSEUP_CLASS;
5196 return 2;
5197 case DImode:
5198 case SImode:
5199 case HImode:
5200 case QImode:
5201 case CSImode:
5202 case CHImode:
5203 case CQImode:
5204 {
5205 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5206
5207 if (size <= 32)
5208 {
5209 classes[0] = X86_64_INTEGERSI_CLASS;
5210 return 1;
5211 }
5212 else if (size <= 64)
5213 {
5214 classes[0] = X86_64_INTEGER_CLASS;
5215 return 1;
5216 }
5217 else if (size <= 64+32)
5218 {
5219 classes[0] = X86_64_INTEGER_CLASS;
5220 classes[1] = X86_64_INTEGERSI_CLASS;
5221 return 2;
5222 }
5223 else if (size <= 64+64)
5224 {
5225 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5226 return 2;
5227 }
5228 else
5229 gcc_unreachable ();
5230 }
5231 case CDImode:
5232 case TImode:
5233 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5234 return 2;
5235 case COImode:
5236 case OImode:
5237 /* OImode shouldn't be used directly. */
5238 gcc_unreachable ();
5239 case CTImode:
5240 return 0;
5241 case SFmode:
5242 if (!(bit_offset % 64))
5243 classes[0] = X86_64_SSESF_CLASS;
5244 else
5245 classes[0] = X86_64_SSE_CLASS;
5246 return 1;
5247 case DFmode:
5248 classes[0] = X86_64_SSEDF_CLASS;
5249 return 1;
5250 case XFmode:
5251 classes[0] = X86_64_X87_CLASS;
5252 classes[1] = X86_64_X87UP_CLASS;
5253 return 2;
5254 case TFmode:
5255 classes[0] = X86_64_SSE_CLASS;
5256 classes[1] = X86_64_SSEUP_CLASS;
5257 return 2;
5258 case SCmode:
5259 classes[0] = X86_64_SSE_CLASS;
5260 if (!(bit_offset % 64))
5261 return 1;
5262 else
5263 {
5264 static bool warned;
5265
5266 if (!warned && warn_psabi)
5267 {
5268 warned = true;
5269 inform (input_location,
5270 "The ABI of passing structure with complex float"
5271 " member has changed in GCC 4.4");
5272 }
5273 classes[1] = X86_64_SSESF_CLASS;
5274 return 2;
5275 }
5276 case DCmode:
5277 classes[0] = X86_64_SSEDF_CLASS;
5278 classes[1] = X86_64_SSEDF_CLASS;
5279 return 2;
5280 case XCmode:
5281 classes[0] = X86_64_COMPLEX_X87_CLASS;
5282 return 1;
5283 case TCmode:
5284 /* This modes is larger than 16 bytes. */
5285 return 0;
5286 case V8SFmode:
5287 case V8SImode:
5288 case V32QImode:
5289 case V16HImode:
5290 case V4DFmode:
5291 case V4DImode:
5292 classes[0] = X86_64_SSE_CLASS;
5293 classes[1] = X86_64_SSEUP_CLASS;
5294 classes[2] = X86_64_SSEUP_CLASS;
5295 classes[3] = X86_64_SSEUP_CLASS;
5296 return 4;
5297 case V4SFmode:
5298 case V4SImode:
5299 case V16QImode:
5300 case V8HImode:
5301 case V2DFmode:
5302 case V2DImode:
5303 classes[0] = X86_64_SSE_CLASS;
5304 classes[1] = X86_64_SSEUP_CLASS;
5305 return 2;
5306 case V1DImode:
5307 case V2SFmode:
5308 case V2SImode:
5309 case V4HImode:
5310 case V8QImode:
5311 classes[0] = X86_64_SSE_CLASS;
5312 return 1;
5313 case BLKmode:
5314 case VOIDmode:
5315 return 0;
5316 default:
5317 gcc_assert (VECTOR_MODE_P (mode));
5318
5319 if (bytes > 16)
5320 return 0;
5321
5322 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5323
5324 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5325 classes[0] = X86_64_INTEGERSI_CLASS;
5326 else
5327 classes[0] = X86_64_INTEGER_CLASS;
5328 classes[1] = X86_64_INTEGER_CLASS;
5329 return 1 + (bytes > 8);
5330 }
5331 }
5332
5333 /* Examine the argument and return set number of register required in each
5334 class. Return 0 iff parameter should be passed in memory. */
5335 static int
5336 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5337 int *int_nregs, int *sse_nregs)
5338 {
5339 enum x86_64_reg_class regclass[MAX_CLASSES];
5340 int n = classify_argument (mode, type, regclass, 0);
5341
5342 *int_nregs = 0;
5343 *sse_nregs = 0;
5344 if (!n)
5345 return 0;
5346 for (n--; n >= 0; n--)
5347 switch (regclass[n])
5348 {
5349 case X86_64_INTEGER_CLASS:
5350 case X86_64_INTEGERSI_CLASS:
5351 (*int_nregs)++;
5352 break;
5353 case X86_64_SSE_CLASS:
5354 case X86_64_SSESF_CLASS:
5355 case X86_64_SSEDF_CLASS:
5356 (*sse_nregs)++;
5357 break;
5358 case X86_64_NO_CLASS:
5359 case X86_64_SSEUP_CLASS:
5360 break;
5361 case X86_64_X87_CLASS:
5362 case X86_64_X87UP_CLASS:
5363 if (!in_return)
5364 return 0;
5365 break;
5366 case X86_64_COMPLEX_X87_CLASS:
5367 return in_return ? 2 : 0;
5368 case X86_64_MEMORY_CLASS:
5369 gcc_unreachable ();
5370 }
5371 return 1;
5372 }
5373
5374 /* Construct container for the argument used by GCC interface. See
5375 FUNCTION_ARG for the detailed description. */
5376
5377 static rtx
5378 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5379 const_tree type, int in_return, int nintregs, int nsseregs,
5380 const int *intreg, int sse_regno)
5381 {
5382 /* The following variables hold the static issued_error state. */
5383 static bool issued_sse_arg_error;
5384 static bool issued_sse_ret_error;
5385 static bool issued_x87_ret_error;
5386
5387 enum machine_mode tmpmode;
5388 int bytes =
5389 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5390 enum x86_64_reg_class regclass[MAX_CLASSES];
5391 int n;
5392 int i;
5393 int nexps = 0;
5394 int needed_sseregs, needed_intregs;
5395 rtx exp[MAX_CLASSES];
5396 rtx ret;
5397
5398 n = classify_argument (mode, type, regclass, 0);
5399 if (!n)
5400 return NULL;
5401 if (!examine_argument (mode, type, in_return, &needed_intregs,
5402 &needed_sseregs))
5403 return NULL;
5404 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5405 return NULL;
5406
5407 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5408 some less clueful developer tries to use floating-point anyway. */
5409 if (needed_sseregs && !TARGET_SSE)
5410 {
5411 if (in_return)
5412 {
5413 if (!issued_sse_ret_error)
5414 {
5415 error ("SSE register return with SSE disabled");
5416 issued_sse_ret_error = true;
5417 }
5418 }
5419 else if (!issued_sse_arg_error)
5420 {
5421 error ("SSE register argument with SSE disabled");
5422 issued_sse_arg_error = true;
5423 }
5424 return NULL;
5425 }
5426
5427 /* Likewise, error if the ABI requires us to return values in the
5428 x87 registers and the user specified -mno-80387. */
5429 if (!TARGET_80387 && in_return)
5430 for (i = 0; i < n; i++)
5431 if (regclass[i] == X86_64_X87_CLASS
5432 || regclass[i] == X86_64_X87UP_CLASS
5433 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5434 {
5435 if (!issued_x87_ret_error)
5436 {
5437 error ("x87 register return with x87 disabled");
5438 issued_x87_ret_error = true;
5439 }
5440 return NULL;
5441 }
5442
5443 /* First construct simple cases. Avoid SCmode, since we want to use
5444 single register to pass this type. */
5445 if (n == 1 && mode != SCmode)
5446 switch (regclass[0])
5447 {
5448 case X86_64_INTEGER_CLASS:
5449 case X86_64_INTEGERSI_CLASS:
5450 return gen_rtx_REG (mode, intreg[0]);
5451 case X86_64_SSE_CLASS:
5452 case X86_64_SSESF_CLASS:
5453 case X86_64_SSEDF_CLASS:
5454 if (mode != BLKmode)
5455 return gen_reg_or_parallel (mode, orig_mode,
5456 SSE_REGNO (sse_regno));
5457 break;
5458 case X86_64_X87_CLASS:
5459 case X86_64_COMPLEX_X87_CLASS:
5460 return gen_rtx_REG (mode, FIRST_STACK_REG);
5461 case X86_64_NO_CLASS:
5462 /* Zero sized array, struct or class. */
5463 return NULL;
5464 default:
5465 gcc_unreachable ();
5466 }
5467 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5468 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5469 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5470 if (n == 4
5471 && regclass[0] == X86_64_SSE_CLASS
5472 && regclass[1] == X86_64_SSEUP_CLASS
5473 && regclass[2] == X86_64_SSEUP_CLASS
5474 && regclass[3] == X86_64_SSEUP_CLASS
5475 && mode != BLKmode)
5476 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5477
5478 if (n == 2
5479 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5480 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5481 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5482 && regclass[1] == X86_64_INTEGER_CLASS
5483 && (mode == CDImode || mode == TImode || mode == TFmode)
5484 && intreg[0] + 1 == intreg[1])
5485 return gen_rtx_REG (mode, intreg[0]);
5486
5487 /* Otherwise figure out the entries of the PARALLEL. */
5488 for (i = 0; i < n; i++)
5489 {
5490 int pos;
5491
5492 switch (regclass[i])
5493 {
5494 case X86_64_NO_CLASS:
5495 break;
5496 case X86_64_INTEGER_CLASS:
5497 case X86_64_INTEGERSI_CLASS:
5498 /* Merge TImodes on aligned occasions here too. */
5499 if (i * 8 + 8 > bytes)
5500 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5501 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5502 tmpmode = SImode;
5503 else
5504 tmpmode = DImode;
5505 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5506 if (tmpmode == BLKmode)
5507 tmpmode = DImode;
5508 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5509 gen_rtx_REG (tmpmode, *intreg),
5510 GEN_INT (i*8));
5511 intreg++;
5512 break;
5513 case X86_64_SSESF_CLASS:
5514 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5515 gen_rtx_REG (SFmode,
5516 SSE_REGNO (sse_regno)),
5517 GEN_INT (i*8));
5518 sse_regno++;
5519 break;
5520 case X86_64_SSEDF_CLASS:
5521 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5522 gen_rtx_REG (DFmode,
5523 SSE_REGNO (sse_regno)),
5524 GEN_INT (i*8));
5525 sse_regno++;
5526 break;
5527 case X86_64_SSE_CLASS:
5528 pos = i;
5529 switch (n)
5530 {
5531 case 1:
5532 tmpmode = DImode;
5533 break;
5534 case 2:
5535 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5536 {
5537 tmpmode = TImode;
5538 i++;
5539 }
5540 else
5541 tmpmode = DImode;
5542 break;
5543 case 4:
5544 gcc_assert (i == 0
5545 && regclass[1] == X86_64_SSEUP_CLASS
5546 && regclass[2] == X86_64_SSEUP_CLASS
5547 && regclass[3] == X86_64_SSEUP_CLASS);
5548 tmpmode = OImode;
5549 i += 3;
5550 break;
5551 default:
5552 gcc_unreachable ();
5553 }
5554 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5555 gen_rtx_REG (tmpmode,
5556 SSE_REGNO (sse_regno)),
5557 GEN_INT (pos*8));
5558 sse_regno++;
5559 break;
5560 default:
5561 gcc_unreachable ();
5562 }
5563 }
5564
5565 /* Empty aligned struct, union or class. */
5566 if (nexps == 0)
5567 return NULL;
5568
5569 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5570 for (i = 0; i < nexps; i++)
5571 XVECEXP (ret, 0, i) = exp [i];
5572 return ret;
5573 }
5574
5575 /* Update the data in CUM to advance over an argument of mode MODE
5576 and data type TYPE. (TYPE is null for libcalls where that information
5577 may not be available.) */
5578
5579 static void
5580 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5581 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5582 {
5583 switch (mode)
5584 {
5585 default:
5586 break;
5587
5588 case BLKmode:
5589 if (bytes < 0)
5590 break;
5591 /* FALLTHRU */
5592
5593 case DImode:
5594 case SImode:
5595 case HImode:
5596 case QImode:
5597 cum->words += words;
5598 cum->nregs -= words;
5599 cum->regno += words;
5600
5601 if (cum->nregs <= 0)
5602 {
5603 cum->nregs = 0;
5604 cum->regno = 0;
5605 }
5606 break;
5607
5608 case OImode:
5609 /* OImode shouldn't be used directly. */
5610 gcc_unreachable ();
5611
5612 case DFmode:
5613 if (cum->float_in_sse < 2)
5614 break;
5615 case SFmode:
5616 if (cum->float_in_sse < 1)
5617 break;
5618 /* FALLTHRU */
5619
5620 case V8SFmode:
5621 case V8SImode:
5622 case V32QImode:
5623 case V16HImode:
5624 case V4DFmode:
5625 case V4DImode:
5626 case TImode:
5627 case V16QImode:
5628 case V8HImode:
5629 case V4SImode:
5630 case V2DImode:
5631 case V4SFmode:
5632 case V2DFmode:
5633 if (!type || !AGGREGATE_TYPE_P (type))
5634 {
5635 cum->sse_words += words;
5636 cum->sse_nregs -= 1;
5637 cum->sse_regno += 1;
5638 if (cum->sse_nregs <= 0)
5639 {
5640 cum->sse_nregs = 0;
5641 cum->sse_regno = 0;
5642 }
5643 }
5644 break;
5645
5646 case V8QImode:
5647 case V4HImode:
5648 case V2SImode:
5649 case V2SFmode:
5650 case V1DImode:
5651 if (!type || !AGGREGATE_TYPE_P (type))
5652 {
5653 cum->mmx_words += words;
5654 cum->mmx_nregs -= 1;
5655 cum->mmx_regno += 1;
5656 if (cum->mmx_nregs <= 0)
5657 {
5658 cum->mmx_nregs = 0;
5659 cum->mmx_regno = 0;
5660 }
5661 }
5662 break;
5663 }
5664 }
5665
5666 static void
5667 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5668 tree type, HOST_WIDE_INT words, int named)
5669 {
5670 int int_nregs, sse_nregs;
5671
5672 /* Unnamed 256bit vector mode parameters are passed on stack. */
5673 if (!named && VALID_AVX256_REG_MODE (mode))
5674 return;
5675
5676 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5677 cum->words += words;
5678 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5679 {
5680 cum->nregs -= int_nregs;
5681 cum->sse_nregs -= sse_nregs;
5682 cum->regno += int_nregs;
5683 cum->sse_regno += sse_nregs;
5684 }
5685 else
5686 cum->words += words;
5687 }
5688
5689 static void
5690 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5691 HOST_WIDE_INT words)
5692 {
5693 /* Otherwise, this should be passed indirect. */
5694 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5695
5696 cum->words += words;
5697 if (cum->nregs > 0)
5698 {
5699 cum->nregs -= 1;
5700 cum->regno += 1;
5701 }
5702 }
5703
5704 void
5705 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5706 tree type, int named)
5707 {
5708 HOST_WIDE_INT bytes, words;
5709
5710 if (mode == BLKmode)
5711 bytes = int_size_in_bytes (type);
5712 else
5713 bytes = GET_MODE_SIZE (mode);
5714 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5715
5716 if (type)
5717 mode = type_natural_mode (type, NULL);
5718
5719 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5720 function_arg_advance_ms_64 (cum, bytes, words);
5721 else if (TARGET_64BIT)
5722 function_arg_advance_64 (cum, mode, type, words, named);
5723 else
5724 function_arg_advance_32 (cum, mode, type, bytes, words);
5725 }
5726
5727 /* Define where to put the arguments to a function.
5728 Value is zero to push the argument on the stack,
5729 or a hard register in which to store the argument.
5730
5731 MODE is the argument's machine mode.
5732 TYPE is the data type of the argument (as a tree).
5733 This is null for libcalls where that information may
5734 not be available.
5735 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5736 the preceding args and about the function being called.
5737 NAMED is nonzero if this argument is a named parameter
5738 (otherwise it is an extra parameter matching an ellipsis). */
5739
5740 static rtx
5741 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5742 enum machine_mode orig_mode, tree type,
5743 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5744 {
5745 static bool warnedsse, warnedmmx;
5746
5747 /* Avoid the AL settings for the Unix64 ABI. */
5748 if (mode == VOIDmode)
5749 return constm1_rtx;
5750
5751 switch (mode)
5752 {
5753 default:
5754 break;
5755
5756 case BLKmode:
5757 if (bytes < 0)
5758 break;
5759 /* FALLTHRU */
5760 case DImode:
5761 case SImode:
5762 case HImode:
5763 case QImode:
5764 if (words <= cum->nregs)
5765 {
5766 int regno = cum->regno;
5767
5768 /* Fastcall allocates the first two DWORD (SImode) or
5769 smaller arguments to ECX and EDX if it isn't an
5770 aggregate type . */
5771 if (cum->fastcall)
5772 {
5773 if (mode == BLKmode
5774 || mode == DImode
5775 || (type && AGGREGATE_TYPE_P (type)))
5776 break;
5777
5778 /* ECX not EAX is the first allocated register. */
5779 if (regno == AX_REG)
5780 regno = CX_REG;
5781 }
5782 return gen_rtx_REG (mode, regno);
5783 }
5784 break;
5785
5786 case DFmode:
5787 if (cum->float_in_sse < 2)
5788 break;
5789 case SFmode:
5790 if (cum->float_in_sse < 1)
5791 break;
5792 /* FALLTHRU */
5793 case TImode:
5794 /* In 32bit, we pass TImode in xmm registers. */
5795 case V16QImode:
5796 case V8HImode:
5797 case V4SImode:
5798 case V2DImode:
5799 case V4SFmode:
5800 case V2DFmode:
5801 if (!type || !AGGREGATE_TYPE_P (type))
5802 {
5803 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5804 {
5805 warnedsse = true;
5806 warning (0, "SSE vector argument without SSE enabled "
5807 "changes the ABI");
5808 }
5809 if (cum->sse_nregs)
5810 return gen_reg_or_parallel (mode, orig_mode,
5811 cum->sse_regno + FIRST_SSE_REG);
5812 }
5813 break;
5814
5815 case OImode:
5816 /* OImode shouldn't be used directly. */
5817 gcc_unreachable ();
5818
5819 case V8SFmode:
5820 case V8SImode:
5821 case V32QImode:
5822 case V16HImode:
5823 case V4DFmode:
5824 case V4DImode:
5825 if (!type || !AGGREGATE_TYPE_P (type))
5826 {
5827 if (cum->sse_nregs)
5828 return gen_reg_or_parallel (mode, orig_mode,
5829 cum->sse_regno + FIRST_SSE_REG);
5830 }
5831 break;
5832
5833 case V8QImode:
5834 case V4HImode:
5835 case V2SImode:
5836 case V2SFmode:
5837 case V1DImode:
5838 if (!type || !AGGREGATE_TYPE_P (type))
5839 {
5840 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5841 {
5842 warnedmmx = true;
5843 warning (0, "MMX vector argument without MMX enabled "
5844 "changes the ABI");
5845 }
5846 if (cum->mmx_nregs)
5847 return gen_reg_or_parallel (mode, orig_mode,
5848 cum->mmx_regno + FIRST_MMX_REG);
5849 }
5850 break;
5851 }
5852
5853 return NULL_RTX;
5854 }
5855
5856 static rtx
5857 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5858 enum machine_mode orig_mode, tree type, int named)
5859 {
5860 /* Handle a hidden AL argument containing number of registers
5861 for varargs x86-64 functions. */
5862 if (mode == VOIDmode)
5863 return GEN_INT (cum->maybe_vaarg
5864 ? (cum->sse_nregs < 0
5865 ? (cum->call_abi == DEFAULT_ABI
5866 ? SSE_REGPARM_MAX
5867 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5868 : X64_SSE_REGPARM_MAX))
5869 : cum->sse_regno)
5870 : -1);
5871
5872 switch (mode)
5873 {
5874 default:
5875 break;
5876
5877 case V8SFmode:
5878 case V8SImode:
5879 case V32QImode:
5880 case V16HImode:
5881 case V4DFmode:
5882 case V4DImode:
5883 /* Unnamed 256bit vector mode parameters are passed on stack. */
5884 if (!named)
5885 return NULL;
5886 break;
5887 }
5888
5889 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5890 cum->sse_nregs,
5891 &x86_64_int_parameter_registers [cum->regno],
5892 cum->sse_regno);
5893 }
5894
5895 static rtx
5896 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5897 enum machine_mode orig_mode, int named,
5898 HOST_WIDE_INT bytes)
5899 {
5900 unsigned int regno;
5901
5902 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5903 We use value of -2 to specify that current function call is MSABI. */
5904 if (mode == VOIDmode)
5905 return GEN_INT (-2);
5906
5907 /* If we've run out of registers, it goes on the stack. */
5908 if (cum->nregs == 0)
5909 return NULL_RTX;
5910
5911 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5912
5913 /* Only floating point modes are passed in anything but integer regs. */
5914 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5915 {
5916 if (named)
5917 regno = cum->regno + FIRST_SSE_REG;
5918 else
5919 {
5920 rtx t1, t2;
5921
5922 /* Unnamed floating parameters are passed in both the
5923 SSE and integer registers. */
5924 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5925 t2 = gen_rtx_REG (mode, regno);
5926 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5927 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5928 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5929 }
5930 }
5931 /* Handle aggregated types passed in register. */
5932 if (orig_mode == BLKmode)
5933 {
5934 if (bytes > 0 && bytes <= 8)
5935 mode = (bytes > 4 ? DImode : SImode);
5936 if (mode == BLKmode)
5937 mode = DImode;
5938 }
5939
5940 return gen_reg_or_parallel (mode, orig_mode, regno);
5941 }
5942
5943 rtx
5944 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5945 tree type, int named)
5946 {
5947 enum machine_mode mode = omode;
5948 HOST_WIDE_INT bytes, words;
5949
5950 if (mode == BLKmode)
5951 bytes = int_size_in_bytes (type);
5952 else
5953 bytes = GET_MODE_SIZE (mode);
5954 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5955
5956 /* To simplify the code below, represent vector types with a vector mode
5957 even if MMX/SSE are not active. */
5958 if (type && TREE_CODE (type) == VECTOR_TYPE)
5959 mode = type_natural_mode (type, cum);
5960
5961 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5962 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5963 else if (TARGET_64BIT)
5964 return function_arg_64 (cum, mode, omode, type, named);
5965 else
5966 return function_arg_32 (cum, mode, omode, type, bytes, words);
5967 }
5968
5969 /* A C expression that indicates when an argument must be passed by
5970 reference. If nonzero for an argument, a copy of that argument is
5971 made in memory and a pointer to the argument is passed instead of
5972 the argument itself. The pointer is passed in whatever way is
5973 appropriate for passing a pointer to that type. */
5974
5975 static bool
5976 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5977 enum machine_mode mode ATTRIBUTE_UNUSED,
5978 const_tree type, bool named ATTRIBUTE_UNUSED)
5979 {
5980 /* See Windows x64 Software Convention. */
5981 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5982 {
5983 int msize = (int) GET_MODE_SIZE (mode);
5984 if (type)
5985 {
5986 /* Arrays are passed by reference. */
5987 if (TREE_CODE (type) == ARRAY_TYPE)
5988 return true;
5989
5990 if (AGGREGATE_TYPE_P (type))
5991 {
5992 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
5993 are passed by reference. */
5994 msize = int_size_in_bytes (type);
5995 }
5996 }
5997
5998 /* __m128 is passed by reference. */
5999 switch (msize) {
6000 case 1: case 2: case 4: case 8:
6001 break;
6002 default:
6003 return true;
6004 }
6005 }
6006 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6007 return 1;
6008
6009 return 0;
6010 }
6011
6012 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6013 ABI. */
6014 static bool
6015 contains_aligned_value_p (tree type)
6016 {
6017 enum machine_mode mode = TYPE_MODE (type);
6018 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6019 || mode == TDmode
6020 || mode == TFmode
6021 || mode == TCmode)
6022 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6023 return true;
6024 if (TYPE_ALIGN (type) < 128)
6025 return false;
6026
6027 if (AGGREGATE_TYPE_P (type))
6028 {
6029 /* Walk the aggregates recursively. */
6030 switch (TREE_CODE (type))
6031 {
6032 case RECORD_TYPE:
6033 case UNION_TYPE:
6034 case QUAL_UNION_TYPE:
6035 {
6036 tree field;
6037
6038 /* Walk all the structure fields. */
6039 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6040 {
6041 if (TREE_CODE (field) == FIELD_DECL
6042 && contains_aligned_value_p (TREE_TYPE (field)))
6043 return true;
6044 }
6045 break;
6046 }
6047
6048 case ARRAY_TYPE:
6049 /* Just for use if some languages passes arrays by value. */
6050 if (contains_aligned_value_p (TREE_TYPE (type)))
6051 return true;
6052 break;
6053
6054 default:
6055 gcc_unreachable ();
6056 }
6057 }
6058 return false;
6059 }
6060
6061 /* Gives the alignment boundary, in bits, of an argument with the
6062 specified mode and type. */
6063
6064 int
6065 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6066 {
6067 int align;
6068 if (type)
6069 {
6070 /* Since canonical type is used for call, we convert it to
6071 canonical type if needed. */
6072 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6073 type = TYPE_CANONICAL (type);
6074 align = TYPE_ALIGN (type);
6075 }
6076 else
6077 align = GET_MODE_ALIGNMENT (mode);
6078 if (align < PARM_BOUNDARY)
6079 align = PARM_BOUNDARY;
6080 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6081 natural boundaries. */
6082 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6083 {
6084 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6085 make an exception for SSE modes since these require 128bit
6086 alignment.
6087
6088 The handling here differs from field_alignment. ICC aligns MMX
6089 arguments to 4 byte boundaries, while structure fields are aligned
6090 to 8 byte boundaries. */
6091 if (!type)
6092 {
6093 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6094 align = PARM_BOUNDARY;
6095 }
6096 else
6097 {
6098 if (!contains_aligned_value_p (type))
6099 align = PARM_BOUNDARY;
6100 }
6101 }
6102 if (align > BIGGEST_ALIGNMENT)
6103 align = BIGGEST_ALIGNMENT;
6104 return align;
6105 }
6106
6107 /* Return true if N is a possible register number of function value. */
6108
6109 bool
6110 ix86_function_value_regno_p (int regno)
6111 {
6112 switch (regno)
6113 {
6114 case 0:
6115 return true;
6116
6117 case FIRST_FLOAT_REG:
6118 /* TODO: The function should depend on current function ABI but
6119 builtins.c would need updating then. Therefore we use the
6120 default ABI. */
6121 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
6122 return false;
6123 return TARGET_FLOAT_RETURNS_IN_80387;
6124
6125 case FIRST_SSE_REG:
6126 return TARGET_SSE;
6127
6128 case FIRST_MMX_REG:
6129 if (TARGET_MACHO || TARGET_64BIT)
6130 return false;
6131 return TARGET_MMX;
6132 }
6133
6134 return false;
6135 }
6136
6137 /* Define how to find the value returned by a function.
6138 VALTYPE is the data type of the value (as a tree).
6139 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6140 otherwise, FUNC is 0. */
6141
6142 static rtx
6143 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6144 const_tree fntype, const_tree fn)
6145 {
6146 unsigned int regno;
6147
6148 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6149 we normally prevent this case when mmx is not available. However
6150 some ABIs may require the result to be returned like DImode. */
6151 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6152 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6153
6154 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6155 we prevent this case when sse is not available. However some ABIs
6156 may require the result to be returned like integer TImode. */
6157 else if (mode == TImode
6158 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6159 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6160
6161 /* 32-byte vector modes in %ymm0. */
6162 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6163 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6164
6165 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6166 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6167 regno = FIRST_FLOAT_REG;
6168 else
6169 /* Most things go in %eax. */
6170 regno = AX_REG;
6171
6172 /* Override FP return register with %xmm0 for local functions when
6173 SSE math is enabled or for functions with sseregparm attribute. */
6174 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6175 {
6176 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6177 if ((sse_level >= 1 && mode == SFmode)
6178 || (sse_level == 2 && mode == DFmode))
6179 regno = FIRST_SSE_REG;
6180 }
6181
6182 /* OImode shouldn't be used directly. */
6183 gcc_assert (mode != OImode);
6184
6185 return gen_rtx_REG (orig_mode, regno);
6186 }
6187
6188 static rtx
6189 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6190 const_tree valtype)
6191 {
6192 rtx ret;
6193
6194 /* Handle libcalls, which don't provide a type node. */
6195 if (valtype == NULL)
6196 {
6197 switch (mode)
6198 {
6199 case SFmode:
6200 case SCmode:
6201 case DFmode:
6202 case DCmode:
6203 case TFmode:
6204 case SDmode:
6205 case DDmode:
6206 case TDmode:
6207 return gen_rtx_REG (mode, FIRST_SSE_REG);
6208 case XFmode:
6209 case XCmode:
6210 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6211 case TCmode:
6212 return NULL;
6213 default:
6214 return gen_rtx_REG (mode, AX_REG);
6215 }
6216 }
6217
6218 ret = construct_container (mode, orig_mode, valtype, 1,
6219 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6220 x86_64_int_return_registers, 0);
6221
6222 /* For zero sized structures, construct_container returns NULL, but we
6223 need to keep rest of compiler happy by returning meaningful value. */
6224 if (!ret)
6225 ret = gen_rtx_REG (orig_mode, AX_REG);
6226
6227 return ret;
6228 }
6229
6230 static rtx
6231 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6232 {
6233 unsigned int regno = AX_REG;
6234
6235 if (TARGET_SSE)
6236 {
6237 switch (GET_MODE_SIZE (mode))
6238 {
6239 case 16:
6240 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6241 && !COMPLEX_MODE_P (mode))
6242 regno = FIRST_SSE_REG;
6243 break;
6244 case 8:
6245 case 4:
6246 if (mode == SFmode || mode == DFmode)
6247 regno = FIRST_SSE_REG;
6248 break;
6249 default:
6250 break;
6251 }
6252 }
6253 return gen_rtx_REG (orig_mode, regno);
6254 }
6255
6256 static rtx
6257 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6258 enum machine_mode orig_mode, enum machine_mode mode)
6259 {
6260 const_tree fn, fntype;
6261
6262 fn = NULL_TREE;
6263 if (fntype_or_decl && DECL_P (fntype_or_decl))
6264 fn = fntype_or_decl;
6265 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6266
6267 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6268 return function_value_ms_64 (orig_mode, mode);
6269 else if (TARGET_64BIT)
6270 return function_value_64 (orig_mode, mode, valtype);
6271 else
6272 return function_value_32 (orig_mode, mode, fntype, fn);
6273 }
6274
6275 static rtx
6276 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6277 bool outgoing ATTRIBUTE_UNUSED)
6278 {
6279 enum machine_mode mode, orig_mode;
6280
6281 orig_mode = TYPE_MODE (valtype);
6282 mode = type_natural_mode (valtype, NULL);
6283 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6284 }
6285
6286 rtx
6287 ix86_libcall_value (enum machine_mode mode)
6288 {
6289 return ix86_function_value_1 (NULL, NULL, mode, mode);
6290 }
6291
6292 /* Return true iff type is returned in memory. */
6293
6294 static int ATTRIBUTE_UNUSED
6295 return_in_memory_32 (const_tree type, enum machine_mode mode)
6296 {
6297 HOST_WIDE_INT size;
6298
6299 if (mode == BLKmode)
6300 return 1;
6301
6302 size = int_size_in_bytes (type);
6303
6304 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6305 return 0;
6306
6307 if (VECTOR_MODE_P (mode) || mode == TImode)
6308 {
6309 /* User-created vectors small enough to fit in EAX. */
6310 if (size < 8)
6311 return 0;
6312
6313 /* MMX/3dNow values are returned in MM0,
6314 except when it doesn't exits. */
6315 if (size == 8)
6316 return (TARGET_MMX ? 0 : 1);
6317
6318 /* SSE values are returned in XMM0, except when it doesn't exist. */
6319 if (size == 16)
6320 return (TARGET_SSE ? 0 : 1);
6321
6322 /* AVX values are returned in YMM0, except when it doesn't exist. */
6323 if (size == 32)
6324 return TARGET_AVX ? 0 : 1;
6325 }
6326
6327 if (mode == XFmode)
6328 return 0;
6329
6330 if (size > 12)
6331 return 1;
6332
6333 /* OImode shouldn't be used directly. */
6334 gcc_assert (mode != OImode);
6335
6336 return 0;
6337 }
6338
6339 static int ATTRIBUTE_UNUSED
6340 return_in_memory_64 (const_tree type, enum machine_mode mode)
6341 {
6342 int needed_intregs, needed_sseregs;
6343 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6344 }
6345
6346 static int ATTRIBUTE_UNUSED
6347 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6348 {
6349 HOST_WIDE_INT size = int_size_in_bytes (type);
6350
6351 /* __m128 is returned in xmm0. */
6352 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6353 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6354 return 0;
6355
6356 /* Otherwise, the size must be exactly in [1248]. */
6357 return (size != 1 && size != 2 && size != 4 && size != 8);
6358 }
6359
6360 static bool
6361 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6362 {
6363 #ifdef SUBTARGET_RETURN_IN_MEMORY
6364 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6365 #else
6366 const enum machine_mode mode = type_natural_mode (type, NULL);
6367
6368 if (TARGET_64BIT)
6369 {
6370 if (ix86_function_type_abi (fntype) == MS_ABI)
6371 return return_in_memory_ms_64 (type, mode);
6372 else
6373 return return_in_memory_64 (type, mode);
6374 }
6375 else
6376 return return_in_memory_32 (type, mode);
6377 #endif
6378 }
6379
6380 /* Return false iff TYPE is returned in memory. This version is used
6381 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6382 but differs notably in that when MMX is available, 8-byte vectors
6383 are returned in memory, rather than in MMX registers. */
6384
6385 bool
6386 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6387 {
6388 int size;
6389 enum machine_mode mode = type_natural_mode (type, NULL);
6390
6391 if (TARGET_64BIT)
6392 return return_in_memory_64 (type, mode);
6393
6394 if (mode == BLKmode)
6395 return 1;
6396
6397 size = int_size_in_bytes (type);
6398
6399 if (VECTOR_MODE_P (mode))
6400 {
6401 /* Return in memory only if MMX registers *are* available. This
6402 seems backwards, but it is consistent with the existing
6403 Solaris x86 ABI. */
6404 if (size == 8)
6405 return TARGET_MMX;
6406 if (size == 16)
6407 return !TARGET_SSE;
6408 }
6409 else if (mode == TImode)
6410 return !TARGET_SSE;
6411 else if (mode == XFmode)
6412 return 0;
6413
6414 return size > 12;
6415 }
6416
6417 /* When returning SSE vector types, we have a choice of either
6418 (1) being abi incompatible with a -march switch, or
6419 (2) generating an error.
6420 Given no good solution, I think the safest thing is one warning.
6421 The user won't be able to use -Werror, but....
6422
6423 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6424 called in response to actually generating a caller or callee that
6425 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6426 via aggregate_value_p for general type probing from tree-ssa. */
6427
6428 static rtx
6429 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6430 {
6431 static bool warnedsse, warnedmmx;
6432
6433 if (!TARGET_64BIT && type)
6434 {
6435 /* Look at the return type of the function, not the function type. */
6436 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6437
6438 if (!TARGET_SSE && !warnedsse)
6439 {
6440 if (mode == TImode
6441 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6442 {
6443 warnedsse = true;
6444 warning (0, "SSE vector return without SSE enabled "
6445 "changes the ABI");
6446 }
6447 }
6448
6449 if (!TARGET_MMX && !warnedmmx)
6450 {
6451 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6452 {
6453 warnedmmx = true;
6454 warning (0, "MMX vector return without MMX enabled "
6455 "changes the ABI");
6456 }
6457 }
6458 }
6459
6460 return NULL;
6461 }
6462
6463
6464 /* Create the va_list data type. */
6465
6466 /* Returns the calling convention specific va_list date type.
6467 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6468
6469 static tree
6470 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6471 {
6472 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6473
6474 /* For i386 we use plain pointer to argument area. */
6475 if (!TARGET_64BIT || abi == MS_ABI)
6476 return build_pointer_type (char_type_node);
6477
6478 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6479 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6480
6481 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6482 unsigned_type_node);
6483 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6484 unsigned_type_node);
6485 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6486 ptr_type_node);
6487 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6488 ptr_type_node);
6489
6490 va_list_gpr_counter_field = f_gpr;
6491 va_list_fpr_counter_field = f_fpr;
6492
6493 DECL_FIELD_CONTEXT (f_gpr) = record;
6494 DECL_FIELD_CONTEXT (f_fpr) = record;
6495 DECL_FIELD_CONTEXT (f_ovf) = record;
6496 DECL_FIELD_CONTEXT (f_sav) = record;
6497
6498 TREE_CHAIN (record) = type_decl;
6499 TYPE_NAME (record) = type_decl;
6500 TYPE_FIELDS (record) = f_gpr;
6501 TREE_CHAIN (f_gpr) = f_fpr;
6502 TREE_CHAIN (f_fpr) = f_ovf;
6503 TREE_CHAIN (f_ovf) = f_sav;
6504
6505 layout_type (record);
6506
6507 /* The correct type is an array type of one element. */
6508 return build_array_type (record, build_index_type (size_zero_node));
6509 }
6510
6511 /* Setup the builtin va_list data type and for 64-bit the additional
6512 calling convention specific va_list data types. */
6513
6514 static tree
6515 ix86_build_builtin_va_list (void)
6516 {
6517 tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
6518
6519 /* Initialize abi specific va_list builtin types. */
6520 if (TARGET_64BIT)
6521 {
6522 tree t;
6523 if (DEFAULT_ABI == MS_ABI)
6524 {
6525 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6526 if (TREE_CODE (t) != RECORD_TYPE)
6527 t = build_variant_type_copy (t);
6528 sysv_va_list_type_node = t;
6529 }
6530 else
6531 {
6532 t = ret;
6533 if (TREE_CODE (t) != RECORD_TYPE)
6534 t = build_variant_type_copy (t);
6535 sysv_va_list_type_node = t;
6536 }
6537 if (DEFAULT_ABI != MS_ABI)
6538 {
6539 t = ix86_build_builtin_va_list_abi (MS_ABI);
6540 if (TREE_CODE (t) != RECORD_TYPE)
6541 t = build_variant_type_copy (t);
6542 ms_va_list_type_node = t;
6543 }
6544 else
6545 {
6546 t = ret;
6547 if (TREE_CODE (t) != RECORD_TYPE)
6548 t = build_variant_type_copy (t);
6549 ms_va_list_type_node = t;
6550 }
6551 }
6552
6553 return ret;
6554 }
6555
6556 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6557
6558 static void
6559 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6560 {
6561 rtx save_area, mem;
6562 rtx label;
6563 rtx label_ref;
6564 rtx tmp_reg;
6565 rtx nsse_reg;
6566 alias_set_type set;
6567 int i;
6568 int regparm = ix86_regparm;
6569
6570 if (cum->call_abi != DEFAULT_ABI)
6571 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6572
6573 /* GPR size of varargs save area. */
6574 if (cfun->va_list_gpr_size)
6575 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6576 else
6577 ix86_varargs_gpr_size = 0;
6578
6579 /* FPR size of varargs save area. We don't need it if we don't pass
6580 anything in SSE registers. */
6581 if (cum->sse_nregs && cfun->va_list_fpr_size)
6582 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6583 else
6584 ix86_varargs_fpr_size = 0;
6585
6586 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6587 return;
6588
6589 save_area = frame_pointer_rtx;
6590 set = get_varargs_alias_set ();
6591
6592 for (i = cum->regno;
6593 i < regparm
6594 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6595 i++)
6596 {
6597 mem = gen_rtx_MEM (Pmode,
6598 plus_constant (save_area, i * UNITS_PER_WORD));
6599 MEM_NOTRAP_P (mem) = 1;
6600 set_mem_alias_set (mem, set);
6601 emit_move_insn (mem, gen_rtx_REG (Pmode,
6602 x86_64_int_parameter_registers[i]));
6603 }
6604
6605 if (ix86_varargs_fpr_size)
6606 {
6607 /* Stack must be aligned to 16byte for FP register save area. */
6608 if (crtl->stack_alignment_needed < 128)
6609 crtl->stack_alignment_needed = 128;
6610
6611 /* Now emit code to save SSE registers. The AX parameter contains number
6612 of SSE parameter registers used to call this function. We use
6613 sse_prologue_save insn template that produces computed jump across
6614 SSE saves. We need some preparation work to get this working. */
6615
6616 label = gen_label_rtx ();
6617 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6618
6619 /* Compute address to jump to :
6620 label - eax*4 + nnamed_sse_arguments*4 Or
6621 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6622 tmp_reg = gen_reg_rtx (Pmode);
6623 nsse_reg = gen_reg_rtx (Pmode);
6624 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6625 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6626 gen_rtx_MULT (Pmode, nsse_reg,
6627 GEN_INT (4))));
6628
6629 /* vmovaps is one byte longer than movaps. */
6630 if (TARGET_AVX)
6631 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6632 gen_rtx_PLUS (Pmode, tmp_reg,
6633 nsse_reg)));
6634
6635 if (cum->sse_regno)
6636 emit_move_insn
6637 (nsse_reg,
6638 gen_rtx_CONST (DImode,
6639 gen_rtx_PLUS (DImode,
6640 label_ref,
6641 GEN_INT (cum->sse_regno
6642 * (TARGET_AVX ? 5 : 4)))));
6643 else
6644 emit_move_insn (nsse_reg, label_ref);
6645 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6646
6647 /* Compute address of memory block we save into. We always use pointer
6648 pointing 127 bytes after first byte to store - this is needed to keep
6649 instruction size limited by 4 bytes (5 bytes for AVX) with one
6650 byte displacement. */
6651 tmp_reg = gen_reg_rtx (Pmode);
6652 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6653 plus_constant (save_area,
6654 ix86_varargs_gpr_size + 127)));
6655 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6656 MEM_NOTRAP_P (mem) = 1;
6657 set_mem_alias_set (mem, set);
6658 set_mem_align (mem, BITS_PER_WORD);
6659
6660 /* And finally do the dirty job! */
6661 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6662 GEN_INT (cum->sse_regno), label));
6663 }
6664 }
6665
6666 static void
6667 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6668 {
6669 alias_set_type set = get_varargs_alias_set ();
6670 int i;
6671
6672 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6673 {
6674 rtx reg, mem;
6675
6676 mem = gen_rtx_MEM (Pmode,
6677 plus_constant (virtual_incoming_args_rtx,
6678 i * UNITS_PER_WORD));
6679 MEM_NOTRAP_P (mem) = 1;
6680 set_mem_alias_set (mem, set);
6681
6682 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6683 emit_move_insn (mem, reg);
6684 }
6685 }
6686
6687 static void
6688 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6689 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6690 int no_rtl)
6691 {
6692 CUMULATIVE_ARGS next_cum;
6693 tree fntype;
6694
6695 /* This argument doesn't appear to be used anymore. Which is good,
6696 because the old code here didn't suppress rtl generation. */
6697 gcc_assert (!no_rtl);
6698
6699 if (!TARGET_64BIT)
6700 return;
6701
6702 fntype = TREE_TYPE (current_function_decl);
6703
6704 /* For varargs, we do not want to skip the dummy va_dcl argument.
6705 For stdargs, we do want to skip the last named argument. */
6706 next_cum = *cum;
6707 if (stdarg_p (fntype))
6708 function_arg_advance (&next_cum, mode, type, 1);
6709
6710 if (cum->call_abi == MS_ABI)
6711 setup_incoming_varargs_ms_64 (&next_cum);
6712 else
6713 setup_incoming_varargs_64 (&next_cum);
6714 }
6715
6716 /* Checks if TYPE is of kind va_list char *. */
6717
6718 static bool
6719 is_va_list_char_pointer (tree type)
6720 {
6721 tree canonic;
6722
6723 /* For 32-bit it is always true. */
6724 if (!TARGET_64BIT)
6725 return true;
6726 canonic = ix86_canonical_va_list_type (type);
6727 return (canonic == ms_va_list_type_node
6728 || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
6729 }
6730
6731 /* Implement va_start. */
6732
6733 static void
6734 ix86_va_start (tree valist, rtx nextarg)
6735 {
6736 HOST_WIDE_INT words, n_gpr, n_fpr;
6737 tree f_gpr, f_fpr, f_ovf, f_sav;
6738 tree gpr, fpr, ovf, sav, t;
6739 tree type;
6740
6741 /* Only 64bit target needs something special. */
6742 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6743 {
6744 std_expand_builtin_va_start (valist, nextarg);
6745 return;
6746 }
6747
6748 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6749 f_fpr = TREE_CHAIN (f_gpr);
6750 f_ovf = TREE_CHAIN (f_fpr);
6751 f_sav = TREE_CHAIN (f_ovf);
6752
6753 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6754 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6755 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6756 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6757 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6758
6759 /* Count number of gp and fp argument registers used. */
6760 words = crtl->args.info.words;
6761 n_gpr = crtl->args.info.regno;
6762 n_fpr = crtl->args.info.sse_regno;
6763
6764 if (cfun->va_list_gpr_size)
6765 {
6766 type = TREE_TYPE (gpr);
6767 t = build2 (MODIFY_EXPR, type,
6768 gpr, build_int_cst (type, n_gpr * 8));
6769 TREE_SIDE_EFFECTS (t) = 1;
6770 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6771 }
6772
6773 if (TARGET_SSE && cfun->va_list_fpr_size)
6774 {
6775 type = TREE_TYPE (fpr);
6776 t = build2 (MODIFY_EXPR, type, fpr,
6777 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6778 TREE_SIDE_EFFECTS (t) = 1;
6779 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6780 }
6781
6782 /* Find the overflow area. */
6783 type = TREE_TYPE (ovf);
6784 t = make_tree (type, crtl->args.internal_arg_pointer);
6785 if (words != 0)
6786 t = build2 (POINTER_PLUS_EXPR, type, t,
6787 size_int (words * UNITS_PER_WORD));
6788 t = build2 (MODIFY_EXPR, type, ovf, t);
6789 TREE_SIDE_EFFECTS (t) = 1;
6790 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6791
6792 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6793 {
6794 /* Find the register save area.
6795 Prologue of the function save it right above stack frame. */
6796 type = TREE_TYPE (sav);
6797 t = make_tree (type, frame_pointer_rtx);
6798 if (!ix86_varargs_gpr_size)
6799 t = build2 (POINTER_PLUS_EXPR, type, t,
6800 size_int (-8 * X86_64_REGPARM_MAX));
6801 t = build2 (MODIFY_EXPR, type, sav, t);
6802 TREE_SIDE_EFFECTS (t) = 1;
6803 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6804 }
6805 }
6806
6807 /* Implement va_arg. */
6808
6809 static tree
6810 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6811 gimple_seq *post_p)
6812 {
6813 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6814 tree f_gpr, f_fpr, f_ovf, f_sav;
6815 tree gpr, fpr, ovf, sav, t;
6816 int size, rsize;
6817 tree lab_false, lab_over = NULL_TREE;
6818 tree addr, t2;
6819 rtx container;
6820 int indirect_p = 0;
6821 tree ptrtype;
6822 enum machine_mode nat_mode;
6823 int arg_boundary;
6824
6825 /* Only 64bit target needs something special. */
6826 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6827 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6828
6829 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6830 f_fpr = TREE_CHAIN (f_gpr);
6831 f_ovf = TREE_CHAIN (f_fpr);
6832 f_sav = TREE_CHAIN (f_ovf);
6833
6834 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6835 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6836 valist = build_va_arg_indirect_ref (valist);
6837 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6838 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6839 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6840
6841 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6842 if (indirect_p)
6843 type = build_pointer_type (type);
6844 size = int_size_in_bytes (type);
6845 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6846
6847 nat_mode = type_natural_mode (type, NULL);
6848 switch (nat_mode)
6849 {
6850 case V8SFmode:
6851 case V8SImode:
6852 case V32QImode:
6853 case V16HImode:
6854 case V4DFmode:
6855 case V4DImode:
6856 /* Unnamed 256bit vector mode parameters are passed on stack. */
6857 if (ix86_cfun_abi () == SYSV_ABI)
6858 {
6859 container = NULL;
6860 break;
6861 }
6862
6863 default:
6864 container = construct_container (nat_mode, TYPE_MODE (type),
6865 type, 0, X86_64_REGPARM_MAX,
6866 X86_64_SSE_REGPARM_MAX, intreg,
6867 0);
6868 break;
6869 }
6870
6871 /* Pull the value out of the saved registers. */
6872
6873 addr = create_tmp_var (ptr_type_node, "addr");
6874 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6875
6876 if (container)
6877 {
6878 int needed_intregs, needed_sseregs;
6879 bool need_temp;
6880 tree int_addr, sse_addr;
6881
6882 lab_false = create_artificial_label ();
6883 lab_over = create_artificial_label ();
6884
6885 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6886
6887 need_temp = (!REG_P (container)
6888 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6889 || TYPE_ALIGN (type) > 128));
6890
6891 /* In case we are passing structure, verify that it is consecutive block
6892 on the register save area. If not we need to do moves. */
6893 if (!need_temp && !REG_P (container))
6894 {
6895 /* Verify that all registers are strictly consecutive */
6896 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6897 {
6898 int i;
6899
6900 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6901 {
6902 rtx slot = XVECEXP (container, 0, i);
6903 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6904 || INTVAL (XEXP (slot, 1)) != i * 16)
6905 need_temp = 1;
6906 }
6907 }
6908 else
6909 {
6910 int i;
6911
6912 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6913 {
6914 rtx slot = XVECEXP (container, 0, i);
6915 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6916 || INTVAL (XEXP (slot, 1)) != i * 8)
6917 need_temp = 1;
6918 }
6919 }
6920 }
6921 if (!need_temp)
6922 {
6923 int_addr = addr;
6924 sse_addr = addr;
6925 }
6926 else
6927 {
6928 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6929 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6930 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6931 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6932 }
6933
6934 /* First ensure that we fit completely in registers. */
6935 if (needed_intregs)
6936 {
6937 t = build_int_cst (TREE_TYPE (gpr),
6938 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6939 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6940 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6941 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6942 gimplify_and_add (t, pre_p);
6943 }
6944 if (needed_sseregs)
6945 {
6946 t = build_int_cst (TREE_TYPE (fpr),
6947 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6948 + X86_64_REGPARM_MAX * 8);
6949 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6950 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6951 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6952 gimplify_and_add (t, pre_p);
6953 }
6954
6955 /* Compute index to start of area used for integer regs. */
6956 if (needed_intregs)
6957 {
6958 /* int_addr = gpr + sav; */
6959 t = fold_convert (sizetype, gpr);
6960 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6961 gimplify_assign (int_addr, t, pre_p);
6962 }
6963 if (needed_sseregs)
6964 {
6965 /* sse_addr = fpr + sav; */
6966 t = fold_convert (sizetype, fpr);
6967 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6968 gimplify_assign (sse_addr, t, pre_p);
6969 }
6970 if (need_temp)
6971 {
6972 int i;
6973 tree temp = create_tmp_var (type, "va_arg_tmp");
6974
6975 /* addr = &temp; */
6976 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6977 gimplify_assign (addr, t, pre_p);
6978
6979 for (i = 0; i < XVECLEN (container, 0); i++)
6980 {
6981 rtx slot = XVECEXP (container, 0, i);
6982 rtx reg = XEXP (slot, 0);
6983 enum machine_mode mode = GET_MODE (reg);
6984 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6985 tree addr_type = build_pointer_type (piece_type);
6986 tree daddr_type = build_pointer_type_for_mode (piece_type,
6987 ptr_mode, true);
6988 tree src_addr, src;
6989 int src_offset;
6990 tree dest_addr, dest;
6991
6992 if (SSE_REGNO_P (REGNO (reg)))
6993 {
6994 src_addr = sse_addr;
6995 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
6996 }
6997 else
6998 {
6999 src_addr = int_addr;
7000 src_offset = REGNO (reg) * 8;
7001 }
7002 src_addr = fold_convert (addr_type, src_addr);
7003 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7004 size_int (src_offset));
7005 src = build_va_arg_indirect_ref (src_addr);
7006
7007 dest_addr = fold_convert (daddr_type, addr);
7008 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7009 size_int (INTVAL (XEXP (slot, 1))));
7010 dest = build_va_arg_indirect_ref (dest_addr);
7011
7012 gimplify_assign (dest, src, pre_p);
7013 }
7014 }
7015
7016 if (needed_intregs)
7017 {
7018 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7019 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7020 gimplify_assign (gpr, t, pre_p);
7021 }
7022
7023 if (needed_sseregs)
7024 {
7025 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7026 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7027 gimplify_assign (fpr, t, pre_p);
7028 }
7029
7030 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7031
7032 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7033 }
7034
7035 /* ... otherwise out of the overflow area. */
7036
7037 /* When we align parameter on stack for caller, if the parameter
7038 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7039 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7040 here with caller. */
7041 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7042 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7043 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7044
7045 /* Care for on-stack alignment if needed. */
7046 if (arg_boundary <= 64
7047 || integer_zerop (TYPE_SIZE (type)))
7048 t = ovf;
7049 else
7050 {
7051 HOST_WIDE_INT align = arg_boundary / 8;
7052 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7053 size_int (align - 1));
7054 t = fold_convert (sizetype, t);
7055 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7056 size_int (-align));
7057 t = fold_convert (TREE_TYPE (ovf), t);
7058 }
7059 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7060 gimplify_assign (addr, t, pre_p);
7061
7062 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7063 size_int (rsize * UNITS_PER_WORD));
7064 gimplify_assign (unshare_expr (ovf), t, pre_p);
7065
7066 if (container)
7067 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7068
7069 ptrtype = build_pointer_type (type);
7070 addr = fold_convert (ptrtype, addr);
7071
7072 if (indirect_p)
7073 addr = build_va_arg_indirect_ref (addr);
7074 return build_va_arg_indirect_ref (addr);
7075 }
7076
7077 /* Return nonzero if OPNUM's MEM should be matched
7078 in movabs* patterns. */
7079
7080 int
7081 ix86_check_movabs (rtx insn, int opnum)
7082 {
7083 rtx set, mem;
7084
7085 set = PATTERN (insn);
7086 if (GET_CODE (set) == PARALLEL)
7087 set = XVECEXP (set, 0, 0);
7088 gcc_assert (GET_CODE (set) == SET);
7089 mem = XEXP (set, opnum);
7090 while (GET_CODE (mem) == SUBREG)
7091 mem = SUBREG_REG (mem);
7092 gcc_assert (MEM_P (mem));
7093 return (volatile_ok || !MEM_VOLATILE_P (mem));
7094 }
7095
7096 /* Initialize the table of extra 80387 mathematical constants. */
7097
7098 static void
7099 init_ext_80387_constants (void)
7100 {
7101 static const char * cst[5] =
7102 {
7103 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7104 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7105 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7106 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7107 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7108 };
7109 int i;
7110
7111 for (i = 0; i < 5; i++)
7112 {
7113 real_from_string (&ext_80387_constants_table[i], cst[i]);
7114 /* Ensure each constant is rounded to XFmode precision. */
7115 real_convert (&ext_80387_constants_table[i],
7116 XFmode, &ext_80387_constants_table[i]);
7117 }
7118
7119 ext_80387_constants_init = 1;
7120 }
7121
7122 /* Return true if the constant is something that can be loaded with
7123 a special instruction. */
7124
7125 int
7126 standard_80387_constant_p (rtx x)
7127 {
7128 enum machine_mode mode = GET_MODE (x);
7129
7130 REAL_VALUE_TYPE r;
7131
7132 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7133 return -1;
7134
7135 if (x == CONST0_RTX (mode))
7136 return 1;
7137 if (x == CONST1_RTX (mode))
7138 return 2;
7139
7140 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7141
7142 /* For XFmode constants, try to find a special 80387 instruction when
7143 optimizing for size or on those CPUs that benefit from them. */
7144 if (mode == XFmode
7145 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7146 {
7147 int i;
7148
7149 if (! ext_80387_constants_init)
7150 init_ext_80387_constants ();
7151
7152 for (i = 0; i < 5; i++)
7153 if (real_identical (&r, &ext_80387_constants_table[i]))
7154 return i + 3;
7155 }
7156
7157 /* Load of the constant -0.0 or -1.0 will be split as
7158 fldz;fchs or fld1;fchs sequence. */
7159 if (real_isnegzero (&r))
7160 return 8;
7161 if (real_identical (&r, &dconstm1))
7162 return 9;
7163
7164 return 0;
7165 }
7166
7167 /* Return the opcode of the special instruction to be used to load
7168 the constant X. */
7169
7170 const char *
7171 standard_80387_constant_opcode (rtx x)
7172 {
7173 switch (standard_80387_constant_p (x))
7174 {
7175 case 1:
7176 return "fldz";
7177 case 2:
7178 return "fld1";
7179 case 3:
7180 return "fldlg2";
7181 case 4:
7182 return "fldln2";
7183 case 5:
7184 return "fldl2e";
7185 case 6:
7186 return "fldl2t";
7187 case 7:
7188 return "fldpi";
7189 case 8:
7190 case 9:
7191 return "#";
7192 default:
7193 gcc_unreachable ();
7194 }
7195 }
7196
7197 /* Return the CONST_DOUBLE representing the 80387 constant that is
7198 loaded by the specified special instruction. The argument IDX
7199 matches the return value from standard_80387_constant_p. */
7200
7201 rtx
7202 standard_80387_constant_rtx (int idx)
7203 {
7204 int i;
7205
7206 if (! ext_80387_constants_init)
7207 init_ext_80387_constants ();
7208
7209 switch (idx)
7210 {
7211 case 3:
7212 case 4:
7213 case 5:
7214 case 6:
7215 case 7:
7216 i = idx - 3;
7217 break;
7218
7219 default:
7220 gcc_unreachable ();
7221 }
7222
7223 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7224 XFmode);
7225 }
7226
7227 /* Return 1 if mode is a valid mode for sse. */
7228 static int
7229 standard_sse_mode_p (enum machine_mode mode)
7230 {
7231 switch (mode)
7232 {
7233 case V16QImode:
7234 case V8HImode:
7235 case V4SImode:
7236 case V2DImode:
7237 case V4SFmode:
7238 case V2DFmode:
7239 return 1;
7240
7241 default:
7242 return 0;
7243 }
7244 }
7245
7246 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7247 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7248 modes and AVX is enabled. */
7249
7250 int
7251 standard_sse_constant_p (rtx x)
7252 {
7253 enum machine_mode mode = GET_MODE (x);
7254
7255 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7256 return 1;
7257 if (vector_all_ones_operand (x, mode))
7258 {
7259 if (standard_sse_mode_p (mode))
7260 return TARGET_SSE2 ? 2 : -2;
7261 else if (VALID_AVX256_REG_MODE (mode))
7262 return TARGET_AVX ? 3 : -3;
7263 }
7264
7265 return 0;
7266 }
7267
7268 /* Return the opcode of the special instruction to be used to load
7269 the constant X. */
7270
7271 const char *
7272 standard_sse_constant_opcode (rtx insn, rtx x)
7273 {
7274 switch (standard_sse_constant_p (x))
7275 {
7276 case 1:
7277 switch (get_attr_mode (insn))
7278 {
7279 case MODE_V4SF:
7280 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7281 case MODE_V2DF:
7282 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7283 case MODE_TI:
7284 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7285 case MODE_V8SF:
7286 return "vxorps\t%x0, %x0, %x0";
7287 case MODE_V4DF:
7288 return "vxorpd\t%x0, %x0, %x0";
7289 case MODE_OI:
7290 return "vpxor\t%x0, %x0, %x0";
7291 default:
7292 gcc_unreachable ();
7293 }
7294 case 2:
7295 if (TARGET_AVX)
7296 switch (get_attr_mode (insn))
7297 {
7298 case MODE_V4SF:
7299 case MODE_V2DF:
7300 case MODE_TI:
7301 return "vpcmpeqd\t%0, %0, %0";
7302 break;
7303 default:
7304 gcc_unreachable ();
7305 }
7306 else
7307 return "pcmpeqd\t%0, %0";
7308 }
7309 gcc_unreachable ();
7310 }
7311
7312 /* Returns 1 if OP contains a symbol reference */
7313
7314 int
7315 symbolic_reference_mentioned_p (rtx op)
7316 {
7317 const char *fmt;
7318 int i;
7319
7320 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7321 return 1;
7322
7323 fmt = GET_RTX_FORMAT (GET_CODE (op));
7324 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7325 {
7326 if (fmt[i] == 'E')
7327 {
7328 int j;
7329
7330 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7331 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7332 return 1;
7333 }
7334
7335 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7336 return 1;
7337 }
7338
7339 return 0;
7340 }
7341
7342 /* Return 1 if it is appropriate to emit `ret' instructions in the
7343 body of a function. Do this only if the epilogue is simple, needing a
7344 couple of insns. Prior to reloading, we can't tell how many registers
7345 must be saved, so return 0 then. Return 0 if there is no frame
7346 marker to de-allocate. */
7347
7348 int
7349 ix86_can_use_return_insn_p (void)
7350 {
7351 struct ix86_frame frame;
7352
7353 if (! reload_completed || frame_pointer_needed)
7354 return 0;
7355
7356 /* Don't allow more than 32 pop, since that's all we can do
7357 with one instruction. */
7358 if (crtl->args.pops_args
7359 && crtl->args.size >= 32768)
7360 return 0;
7361
7362 ix86_compute_frame_layout (&frame);
7363 return frame.to_allocate == 0 && frame.padding05 == 0 &&
7364 frame.nmsave_args == 0 && (frame.nregs + frame.nsseregs) == 0;
7365 }
7366
7367 /* Value should be nonzero if functions must have frame pointers.
7368 Zero means the frame pointer need not be set up (and parms may
7369 be accessed via the stack pointer) in functions that seem suitable. */
7370
7371 int
7372 ix86_frame_pointer_required (void)
7373 {
7374 /* If we accessed previous frames, then the generated code expects
7375 to be able to access the saved ebp value in our frame. */
7376 if (cfun->machine->accesses_prev_frame)
7377 return 1;
7378
7379 /* Several x86 os'es need a frame pointer for other reasons,
7380 usually pertaining to setjmp. */
7381 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7382 return 1;
7383
7384 if (TARGET_SAVE_ARGS)
7385 return 1;
7386
7387 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7388 the frame pointer by default. Turn it back on now if we've not
7389 got a leaf function. */
7390 if (TARGET_OMIT_LEAF_FRAME_POINTER
7391 && (!current_function_is_leaf
7392 || ix86_current_function_calls_tls_descriptor))
7393 return 1;
7394
7395 if (crtl->profile)
7396 return 1;
7397
7398 return 0;
7399 }
7400
7401 /* Record that the current function accesses previous call frames. */
7402
7403 void
7404 ix86_setup_frame_addresses (void)
7405 {
7406 cfun->machine->accesses_prev_frame = 1;
7407 }
7408
7409 #ifndef USE_HIDDEN_LINKONCE
7410 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7411 # define USE_HIDDEN_LINKONCE 1
7412 # else
7413 # define USE_HIDDEN_LINKONCE 0
7414 # endif
7415 #endif
7416
7417 static int pic_labels_used;
7418
7419 /* Fills in the label name that should be used for a pc thunk for
7420 the given register. */
7421
7422 static void
7423 get_pc_thunk_name (char name[32], unsigned int regno)
7424 {
7425 gcc_assert (!TARGET_64BIT);
7426
7427 if (USE_HIDDEN_LINKONCE)
7428 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7429 else
7430 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7431 }
7432
7433
7434 /* This function generates code for -fpic that loads %ebx with
7435 the return address of the caller and then returns. */
7436
7437 void
7438 ix86_file_end (void)
7439 {
7440 rtx xops[2];
7441 int regno;
7442
7443 for (regno = 0; regno < 8; ++regno)
7444 {
7445 char name[32];
7446
7447 if (! ((pic_labels_used >> regno) & 1))
7448 continue;
7449
7450 get_pc_thunk_name (name, regno);
7451
7452 #if TARGET_MACHO
7453 if (TARGET_MACHO)
7454 {
7455 switch_to_section (darwin_sections[text_coal_section]);
7456 fputs ("\t.weak_definition\t", asm_out_file);
7457 assemble_name (asm_out_file, name);
7458 fputs ("\n\t.private_extern\t", asm_out_file);
7459 assemble_name (asm_out_file, name);
7460 fputs ("\n", asm_out_file);
7461 ASM_OUTPUT_LABEL (asm_out_file, name);
7462 }
7463 else
7464 #endif
7465 if (USE_HIDDEN_LINKONCE)
7466 {
7467 tree decl;
7468
7469 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7470 error_mark_node);
7471 TREE_PUBLIC (decl) = 1;
7472 TREE_STATIC (decl) = 1;
7473 DECL_ONE_ONLY (decl) = 1;
7474
7475 (*targetm.asm_out.unique_section) (decl, 0);
7476 switch_to_section (get_named_section (decl, NULL, 0));
7477
7478 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7479 fputs ("\t.hidden\t", asm_out_file);
7480 assemble_name (asm_out_file, name);
7481 fputc ('\n', asm_out_file);
7482 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7483 }
7484 else
7485 {
7486 switch_to_section (text_section);
7487 ASM_OUTPUT_LABEL (asm_out_file, name);
7488 }
7489
7490 xops[0] = gen_rtx_REG (Pmode, regno);
7491 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7492 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7493 output_asm_insn ("ret", xops);
7494 }
7495
7496 if (NEED_INDICATE_EXEC_STACK)
7497 file_end_indicate_exec_stack ();
7498 }
7499
7500 /* Emit code for the SET_GOT patterns. */
7501
7502 const char *
7503 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7504 {
7505 rtx xops[3];
7506
7507 xops[0] = dest;
7508
7509 if (TARGET_VXWORKS_RTP && flag_pic)
7510 {
7511 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7512 xops[2] = gen_rtx_MEM (Pmode,
7513 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7514 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7515
7516 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7517 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7518 an unadorned address. */
7519 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7520 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7521 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7522 return "";
7523 }
7524
7525 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7526
7527 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7528 {
7529 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7530
7531 if (!flag_pic)
7532 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7533 else
7534 output_asm_insn ("call\t%a2", xops);
7535
7536 #if TARGET_MACHO
7537 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7538 is what will be referenced by the Mach-O PIC subsystem. */
7539 if (!label)
7540 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7541 #endif
7542
7543 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7544 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7545
7546 if (flag_pic)
7547 output_asm_insn ("pop%z0\t%0", xops);
7548 }
7549 else
7550 {
7551 char name[32];
7552 get_pc_thunk_name (name, REGNO (dest));
7553 pic_labels_used |= 1 << REGNO (dest);
7554
7555 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7556 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7557 output_asm_insn ("call\t%X2", xops);
7558 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7559 is what will be referenced by the Mach-O PIC subsystem. */
7560 #if TARGET_MACHO
7561 if (!label)
7562 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7563 else
7564 targetm.asm_out.internal_label (asm_out_file, "L",
7565 CODE_LABEL_NUMBER (label));
7566 #endif
7567 }
7568
7569 if (TARGET_MACHO)
7570 return "";
7571
7572 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7573 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7574 else
7575 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7576
7577 return "";
7578 }
7579
7580 /* Generate an "push" pattern for input ARG. */
7581
7582 static rtx
7583 gen_push (rtx arg)
7584 {
7585 return gen_rtx_SET (VOIDmode,
7586 gen_rtx_MEM (Pmode,
7587 gen_rtx_PRE_DEC (Pmode,
7588 stack_pointer_rtx)),
7589 arg);
7590 }
7591
7592 /* Return >= 0 if there is an unused call-clobbered register available
7593 for the entire function. */
7594
7595 static unsigned int
7596 ix86_select_alt_pic_regnum (void)
7597 {
7598 if (current_function_is_leaf && !crtl->profile
7599 && !ix86_current_function_calls_tls_descriptor)
7600 {
7601 int i, drap;
7602 /* Can't use the same register for both PIC and DRAP. */
7603 if (crtl->drap_reg)
7604 drap = REGNO (crtl->drap_reg);
7605 else
7606 drap = -1;
7607 for (i = 2; i >= 0; --i)
7608 if (i != drap && !df_regs_ever_live_p (i))
7609 return i;
7610 }
7611
7612 return INVALID_REGNUM;
7613 }
7614
7615 /* Return 1 if we need to save REGNO. */
7616 static int
7617 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7618 {
7619 if (pic_offset_table_rtx
7620 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7621 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7622 || crtl->profile
7623 || crtl->calls_eh_return
7624 || crtl->uses_const_pool))
7625 {
7626 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7627 return 0;
7628 return 1;
7629 }
7630
7631 if (crtl->calls_eh_return && maybe_eh_return)
7632 {
7633 unsigned i;
7634 for (i = 0; ; i++)
7635 {
7636 unsigned test = EH_RETURN_DATA_REGNO (i);
7637 if (test == INVALID_REGNUM)
7638 break;
7639 if (test == regno)
7640 return 1;
7641 }
7642 }
7643
7644 if (crtl->drap_reg
7645 && regno == REGNO (crtl->drap_reg))
7646 return 1;
7647
7648 return (df_regs_ever_live_p (regno)
7649 && !call_used_regs[regno]
7650 && !fixed_regs[regno]
7651 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7652 }
7653
7654 /* Return number of saved general prupose registers. */
7655
7656 static int
7657 ix86_nsaved_regs (void)
7658 {
7659 int nregs = 0;
7660 int regno;
7661
7662 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7663 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7664 nregs ++;
7665 return nregs;
7666 }
7667
7668 /* Return number of saved SSE registrers. */
7669
7670 static int
7671 ix86_nsaved_sseregs (void)
7672 {
7673 int nregs = 0;
7674 int regno;
7675
7676 if (ix86_cfun_abi () != MS_ABI)
7677 return 0;
7678 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7679 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7680 nregs ++;
7681 return nregs;
7682 }
7683
7684 /* Given FROM and TO register numbers, say whether this elimination is
7685 allowed. If stack alignment is needed, we can only replace argument
7686 pointer with hard frame pointer, or replace frame pointer with stack
7687 pointer. Otherwise, frame pointer elimination is automatically
7688 handled and all other eliminations are valid. */
7689
7690 int
7691 ix86_can_eliminate (int from, int to)
7692 {
7693 if (stack_realign_fp)
7694 return ((from == ARG_POINTER_REGNUM
7695 && to == HARD_FRAME_POINTER_REGNUM)
7696 || (from == FRAME_POINTER_REGNUM
7697 && to == STACK_POINTER_REGNUM));
7698 else
7699 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7700 }
7701
7702 /* Return the offset between two registers, one to be eliminated, and the other
7703 its replacement, at the start of a routine. */
7704
7705 HOST_WIDE_INT
7706 ix86_initial_elimination_offset (int from, int to)
7707 {
7708 struct ix86_frame frame;
7709 ix86_compute_frame_layout (&frame);
7710
7711 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7712 return frame.hard_frame_pointer_offset;
7713 else if (from == FRAME_POINTER_REGNUM
7714 && to == HARD_FRAME_POINTER_REGNUM)
7715 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7716 else
7717 {
7718 gcc_assert (to == STACK_POINTER_REGNUM);
7719
7720 if (from == ARG_POINTER_REGNUM)
7721 return frame.stack_pointer_offset;
7722
7723 gcc_assert (from == FRAME_POINTER_REGNUM);
7724 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7725 }
7726 }
7727
7728 /* In a dynamically-aligned function, we can't know the offset from
7729 stack pointer to frame pointer, so we must ensure that setjmp
7730 eliminates fp against the hard fp (%ebp) rather than trying to
7731 index from %esp up to the top of the frame across a gap that is
7732 of unknown (at compile-time) size. */
7733 static rtx
7734 ix86_builtin_setjmp_frame_value (void)
7735 {
7736 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7737 }
7738
7739 /* Fill structure ix86_frame about frame of currently computed function. */
7740
7741 static void
7742 ix86_compute_frame_layout (struct ix86_frame *frame)
7743 {
7744 HOST_WIDE_INT total_size;
7745 unsigned int stack_alignment_needed;
7746 HOST_WIDE_INT offset;
7747 unsigned int preferred_alignment;
7748 HOST_WIDE_INT size = get_frame_size ();
7749
7750 frame->nregs = ix86_nsaved_regs ();
7751 frame->nsseregs = ix86_nsaved_sseregs ();
7752 frame->nmsave_args = ix86_nsaved_args ();
7753 total_size = size;
7754
7755 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7756 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7757
7758 /* MS ABI seem to require stack alignment to be always 16 except for function
7759 prologues. */
7760 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7761 {
7762 preferred_alignment = 16;
7763 stack_alignment_needed = 16;
7764 crtl->preferred_stack_boundary = 128;
7765 crtl->stack_alignment_needed = 128;
7766 }
7767
7768 gcc_assert (!size || stack_alignment_needed);
7769 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7770 gcc_assert (preferred_alignment <= stack_alignment_needed);
7771
7772 /* During reload iteration the amount of registers saved can change.
7773 Recompute the value as needed. Do not recompute when amount of registers
7774 didn't change as reload does multiple calls to the function and does not
7775 expect the decision to change within single iteration. */
7776 if (!optimize_function_for_size_p (cfun)
7777 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7778 {
7779 int count = frame->nregs;
7780
7781 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7782 /* The fast prologue uses move instead of push to save registers. This
7783 is significantly longer, but also executes faster as modern hardware
7784 can execute the moves in parallel, but can't do that for push/pop.
7785
7786 Be careful about choosing what prologue to emit: When function takes
7787 many instructions to execute we may use slow version as well as in
7788 case function is known to be outside hot spot (this is known with
7789 feedback only). Weight the size of function by number of registers
7790 to save as it is cheap to use one or two push instructions but very
7791 slow to use many of them. */
7792 if (count)
7793 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7794 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7795 || (flag_branch_probabilities
7796 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7797 cfun->machine->use_fast_prologue_epilogue = false;
7798 else
7799 cfun->machine->use_fast_prologue_epilogue
7800 = !expensive_function_p (count);
7801 }
7802 if (TARGET_PROLOGUE_USING_MOVE
7803 && cfun->machine->use_fast_prologue_epilogue)
7804 frame->save_regs_using_mov = true;
7805 else
7806 frame->save_regs_using_mov = false;
7807
7808 if (TARGET_SAVE_ARGS)
7809 {
7810 cfun->machine->use_fast_prologue_epilogue = true;
7811 frame->save_regs_using_mov = true;
7812 }
7813
7814 /* Skip return address and saved base pointer. */
7815 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7816
7817 frame->hard_frame_pointer_offset = offset;
7818
7819 /* Set offset to aligned because the realigned frame starts from
7820 here. */
7821 if (stack_realign_fp)
7822 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7823
7824 /* Argument save area */
7825 if (TARGET_SAVE_ARGS)
7826 {
7827 offset += frame->nmsave_args * UNITS_PER_WORD;
7828 frame->padding0 = (frame->nmsave_args % 2) * UNITS_PER_WORD;
7829 offset += frame->padding0;
7830 }
7831 else
7832 frame->padding0 = 0;
7833
7834 /* Register save area */
7835 offset += frame->nregs * UNITS_PER_WORD;
7836
7837 /* Align SSE reg save area. */
7838 if (frame->nsseregs)
7839 frame->padding05 = ((offset + 16 - 1) & -16) - offset;
7840 else
7841 frame->padding05 = 0;
7842
7843 /* SSE register save area. */
7844 offset += frame->padding05 + frame->nsseregs * 16;
7845
7846 /* Va-arg area */
7847 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7848 offset += frame->va_arg_size;
7849
7850 /* Align start of frame for local function. */
7851 frame->padding1 = ((offset + stack_alignment_needed - 1)
7852 & -stack_alignment_needed) - offset;
7853
7854 offset += frame->padding1;
7855
7856 /* Frame pointer points here. */
7857 frame->frame_pointer_offset = offset;
7858
7859 offset += size;
7860
7861 /* Add outgoing arguments area. Can be skipped if we eliminated
7862 all the function calls as dead code.
7863 Skipping is however impossible when function calls alloca. Alloca
7864 expander assumes that last crtl->outgoing_args_size
7865 of stack frame are unused. */
7866 if (ACCUMULATE_OUTGOING_ARGS
7867 && (!current_function_is_leaf || cfun->calls_alloca
7868 || ix86_current_function_calls_tls_descriptor))
7869 {
7870 offset += crtl->outgoing_args_size;
7871 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7872 }
7873 else
7874 frame->outgoing_arguments_size = 0;
7875
7876 /* Align stack boundary. Only needed if we're calling another function
7877 or using alloca. */
7878 if (!current_function_is_leaf || cfun->calls_alloca
7879 || ix86_current_function_calls_tls_descriptor)
7880 frame->padding2 = ((offset + preferred_alignment - 1)
7881 & -preferred_alignment) - offset;
7882 else
7883 frame->padding2 = 0;
7884
7885 offset += frame->padding2;
7886
7887 /* We've reached end of stack frame. */
7888 frame->stack_pointer_offset = offset;
7889
7890 /* Size prologue needs to allocate. */
7891 frame->to_allocate =
7892 (size + frame->padding1 + frame->padding2
7893 + frame->outgoing_arguments_size + frame->va_arg_size);
7894
7895 if (!TARGET_SAVE_ARGS
7896 && ((!frame->to_allocate && frame->nregs <= 1)
7897 || (TARGET_64BIT
7898 && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000)))
7899 frame->save_regs_using_mov = false;
7900
7901 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
7902 && current_function_sp_is_unchanging
7903 && current_function_is_leaf
7904 && !ix86_current_function_calls_tls_descriptor)
7905 {
7906 frame->red_zone_size = frame->to_allocate;
7907 if (frame->save_regs_using_mov)
7908 {
7909 frame->red_zone_size
7910 += (frame->nregs + frame->nmsave_args) * UNITS_PER_WORD;
7911 frame->red_zone_size += frame->padding0;
7912 }
7913 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7914 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7915 }
7916 else
7917 frame->red_zone_size = 0;
7918 frame->to_allocate -= frame->red_zone_size;
7919 frame->stack_pointer_offset -= frame->red_zone_size;
7920 #if 0
7921 fprintf (stderr, "\n");
7922 fprintf (stderr, "size: %ld\n", (long)size);
7923 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7924 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7925 fprintf (stderr, "nmsave_args: %ld\n", (long)frame->nmsave_args);
7926 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7927 fprintf (stderr, "padding05: %ld\n", (long)frame->padding0);
7928 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7929 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7930 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7931 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7932 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7933 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7934 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7935 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7936 (long)frame->hard_frame_pointer_offset);
7937 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7938 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7939 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7940 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7941 #endif
7942 }
7943
7944
7945 /* Emit code to save registers in the prologue. */
7946
7947 static void
7948 ix86_emit_save_regs (void)
7949 {
7950 unsigned int regno;
7951 rtx insn;
7952
7953 if (TARGET_SAVE_ARGS)
7954 {
7955 int i;
7956 int nsaved = ix86_nsaved_args ();
7957 int start = cfun->returns_struct;
7958 for (i = start; i < start + nsaved; i++)
7959 {
7960 regno = x86_64_int_parameter_registers[i];
7961 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7962 RTX_FRAME_RELATED_P (insn) = 1;
7963 }
7964 if (nsaved % 2 != 0)
7965 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7966 GEN_INT (-UNITS_PER_WORD), -1);
7967 }
7968
7969 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7970 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7971 {
7972 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7973 RTX_FRAME_RELATED_P (insn) = 1;
7974 }
7975 }
7976
7977 /* Emit code to save registers using MOV insns. First register
7978 is restored from POINTER + OFFSET. */
7979 static void
7980 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7981 {
7982 unsigned int regno;
7983 rtx insn;
7984
7985 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7986 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7987 {
7988 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7989 Pmode, offset),
7990 gen_rtx_REG (Pmode, regno));
7991 RTX_FRAME_RELATED_P (insn) = 1;
7992 offset += UNITS_PER_WORD;
7993 }
7994
7995 if (TARGET_SAVE_ARGS)
7996 {
7997 int i;
7998 int nsaved = ix86_nsaved_args ();
7999 int start = cfun->returns_struct;
8000 if (nsaved % 2 != 0)
8001 offset += UNITS_PER_WORD;
8002 for (i = start + nsaved - 1; i >= start; i--)
8003 {
8004 regno = x86_64_int_parameter_registers[i];
8005 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
8006 Pmode, offset),
8007 gen_rtx_REG (Pmode, regno));
8008 RTX_FRAME_RELATED_P (insn) = 1;
8009 offset += UNITS_PER_WORD;
8010 }
8011 }
8012 }
8013
8014 /* Emit code to save registers using MOV insns. First register
8015 is restored from POINTER + OFFSET. */
8016 static void
8017 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8018 {
8019 unsigned int regno;
8020 rtx insn;
8021 rtx mem;
8022
8023 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8024 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8025 {
8026 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
8027 set_mem_align (mem, 128);
8028 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
8029 RTX_FRAME_RELATED_P (insn) = 1;
8030 offset += 16;
8031 }
8032 }
8033
8034 /* Expand prologue or epilogue stack adjustment.
8035 The pattern exist to put a dependency on all ebp-based memory accesses.
8036 STYLE should be negative if instructions should be marked as frame related,
8037 zero if %r11 register is live and cannot be freely used and positive
8038 otherwise. */
8039
8040 static void
8041 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
8042 {
8043 rtx insn;
8044
8045 if (! TARGET_64BIT)
8046 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
8047 else if (x86_64_immediate_operand (offset, DImode))
8048 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
8049 else
8050 {
8051 rtx r11;
8052 /* r11 is used by indirect sibcall return as well, set before the
8053 epilogue and used after the epilogue. ATM indirect sibcall
8054 shouldn't be used together with huge frame sizes in one
8055 function because of the frame_size check in sibcall.c. */
8056 gcc_assert (style);
8057 r11 = gen_rtx_REG (DImode, R11_REG);
8058 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8059 if (style < 0)
8060 RTX_FRAME_RELATED_P (insn) = 1;
8061 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8062 offset));
8063 }
8064 if (style < 0)
8065 RTX_FRAME_RELATED_P (insn) = 1;
8066 }
8067
8068 /* Find an available register to be used as dynamic realign argument
8069 pointer regsiter. Such a register will be written in prologue and
8070 used in begin of body, so it must not be
8071 1. parameter passing register.
8072 2. GOT pointer.
8073 We reuse static-chain register if it is available. Otherwise, we
8074 use DI for i386 and R13 for x86-64. We chose R13 since it has
8075 shorter encoding.
8076
8077 Return: the regno of chosen register. */
8078
8079 static unsigned int
8080 find_drap_reg (void)
8081 {
8082 tree decl = cfun->decl;
8083
8084 if (TARGET_64BIT)
8085 {
8086 /* Use R13 for nested function or function need static chain.
8087 Since function with tail call may use any caller-saved
8088 registers in epilogue, DRAP must not use caller-saved
8089 register in such case. */
8090 if ((decl_function_context (decl)
8091 && !DECL_NO_STATIC_CHAIN (decl))
8092 || crtl->tail_call_emit)
8093 return R13_REG;
8094
8095 return R10_REG;
8096 }
8097 else
8098 {
8099 /* Use DI for nested function or function need static chain.
8100 Since function with tail call may use any caller-saved
8101 registers in epilogue, DRAP must not use caller-saved
8102 register in such case. */
8103 if ((decl_function_context (decl)
8104 && !DECL_NO_STATIC_CHAIN (decl))
8105 || crtl->tail_call_emit)
8106 return DI_REG;
8107
8108 /* Reuse static chain register if it isn't used for parameter
8109 passing. */
8110 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8111 && !lookup_attribute ("fastcall",
8112 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8113 return CX_REG;
8114 else
8115 return DI_REG;
8116 }
8117 }
8118
8119 /* Update incoming stack boundary and estimated stack alignment. */
8120
8121 static void
8122 ix86_update_stack_boundary (void)
8123 {
8124 /* Prefer the one specified at command line. */
8125 ix86_incoming_stack_boundary
8126 = (ix86_user_incoming_stack_boundary
8127 ? ix86_user_incoming_stack_boundary
8128 : ix86_default_incoming_stack_boundary);
8129
8130 /* Incoming stack alignment can be changed on individual functions
8131 via force_align_arg_pointer attribute. We use the smallest
8132 incoming stack boundary. */
8133 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8134 && lookup_attribute (ix86_force_align_arg_pointer_string,
8135 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8136 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8137
8138 /* The incoming stack frame has to be aligned at least at
8139 parm_stack_boundary. */
8140 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8141 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8142
8143 /* Stack at entrance of main is aligned by runtime. We use the
8144 smallest incoming stack boundary. */
8145 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8146 && DECL_NAME (current_function_decl)
8147 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8148 && DECL_FILE_SCOPE_P (current_function_decl))
8149 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8150
8151 /* x86_64 vararg needs 16byte stack alignment for register save
8152 area. */
8153 if (TARGET_64BIT
8154 && cfun->stdarg
8155 && crtl->stack_alignment_estimated < 128)
8156 crtl->stack_alignment_estimated = 128;
8157 }
8158
8159 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8160 needed or an rtx for DRAP otherwise. */
8161
8162 static rtx
8163 ix86_get_drap_rtx (void)
8164 {
8165 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8166 crtl->need_drap = true;
8167
8168 if (stack_realign_drap)
8169 {
8170 /* Assign DRAP to vDRAP and returns vDRAP */
8171 unsigned int regno = find_drap_reg ();
8172 rtx drap_vreg;
8173 rtx arg_ptr;
8174 rtx seq, insn;
8175
8176 arg_ptr = gen_rtx_REG (Pmode, regno);
8177 crtl->drap_reg = arg_ptr;
8178
8179 start_sequence ();
8180 drap_vreg = copy_to_reg (arg_ptr);
8181 seq = get_insns ();
8182 end_sequence ();
8183
8184 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8185 RTX_FRAME_RELATED_P (insn) = 1;
8186 return drap_vreg;
8187 }
8188 else
8189 return NULL;
8190 }
8191
8192 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8193
8194 static rtx
8195 ix86_internal_arg_pointer (void)
8196 {
8197 return virtual_incoming_args_rtx;
8198 }
8199
8200 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
8201 This is called from dwarf2out.c to emit call frame instructions
8202 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
8203 static void
8204 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
8205 {
8206 rtx unspec = SET_SRC (pattern);
8207 gcc_assert (GET_CODE (unspec) == UNSPEC);
8208
8209 switch (index)
8210 {
8211 case UNSPEC_REG_SAVE:
8212 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
8213 SET_DEST (pattern));
8214 break;
8215 case UNSPEC_DEF_CFA:
8216 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
8217 INTVAL (XVECEXP (unspec, 0, 0)));
8218 break;
8219 default:
8220 gcc_unreachable ();
8221 }
8222 }
8223
8224 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8225 to be generated in correct form. */
8226 static void
8227 ix86_finalize_stack_realign_flags (void)
8228 {
8229 /* Check if stack realign is really needed after reload, and
8230 stores result in cfun */
8231 unsigned int incoming_stack_boundary
8232 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8233 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8234 unsigned int stack_realign = (incoming_stack_boundary
8235 < (current_function_is_leaf
8236 ? crtl->max_used_stack_slot_alignment
8237 : crtl->stack_alignment_needed));
8238
8239 if (crtl->stack_realign_finalized)
8240 {
8241 /* After stack_realign_needed is finalized, we can't no longer
8242 change it. */
8243 gcc_assert (crtl->stack_realign_needed == stack_realign);
8244 }
8245 else
8246 {
8247 crtl->stack_realign_needed = stack_realign;
8248 crtl->stack_realign_finalized = true;
8249 }
8250 }
8251
8252 /* Expand the prologue into a bunch of separate insns. */
8253
8254 void
8255 ix86_expand_prologue (void)
8256 {
8257 rtx insn;
8258 bool pic_reg_used;
8259 struct ix86_frame frame;
8260 HOST_WIDE_INT allocate;
8261
8262 ix86_finalize_stack_realign_flags ();
8263
8264 /* DRAP should not coexist with stack_realign_fp */
8265 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8266
8267 ix86_compute_frame_layout (&frame);
8268
8269 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8270 of DRAP is needed and stack realignment is really needed after reload */
8271 if (crtl->drap_reg && crtl->stack_realign_needed)
8272 {
8273 rtx x, y;
8274 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8275 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8276 ? 0 : UNITS_PER_WORD);
8277
8278 gcc_assert (stack_realign_drap);
8279
8280 /* Grab the argument pointer. */
8281 x = plus_constant (stack_pointer_rtx,
8282 (UNITS_PER_WORD + param_ptr_offset));
8283 y = crtl->drap_reg;
8284
8285 /* Only need to push parameter pointer reg if it is caller
8286 saved reg */
8287 if (!call_used_regs[REGNO (crtl->drap_reg)])
8288 {
8289 /* Push arg pointer reg */
8290 insn = emit_insn (gen_push (y));
8291 RTX_FRAME_RELATED_P (insn) = 1;
8292 }
8293
8294 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8295 RTX_FRAME_RELATED_P (insn) = 1;
8296
8297 /* Align the stack. */
8298 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8299 stack_pointer_rtx,
8300 GEN_INT (-align_bytes)));
8301 RTX_FRAME_RELATED_P (insn) = 1;
8302
8303 /* Replicate the return address on the stack so that return
8304 address can be reached via (argp - 1) slot. This is needed
8305 to implement macro RETURN_ADDR_RTX and intrinsic function
8306 expand_builtin_return_addr etc. */
8307 x = crtl->drap_reg;
8308 x = gen_frame_mem (Pmode,
8309 plus_constant (x, -UNITS_PER_WORD));
8310 insn = emit_insn (gen_push (x));
8311 RTX_FRAME_RELATED_P (insn) = 1;
8312 }
8313
8314 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8315 slower on all targets. Also sdb doesn't like it. */
8316
8317 if (frame_pointer_needed)
8318 {
8319 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8320 RTX_FRAME_RELATED_P (insn) = 1;
8321
8322 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8323 RTX_FRAME_RELATED_P (insn) = 1;
8324 }
8325
8326 if (stack_realign_fp)
8327 {
8328 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8329 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8330
8331 /* Align the stack. */
8332 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8333 stack_pointer_rtx,
8334 GEN_INT (-align_bytes)));
8335 RTX_FRAME_RELATED_P (insn) = 1;
8336 }
8337
8338 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding05;
8339
8340 if (!frame.save_regs_using_mov)
8341 ix86_emit_save_regs ();
8342 else
8343 allocate += (frame.nregs + frame.nmsave_args) * UNITS_PER_WORD
8344 + frame.padding0;
8345
8346 /* When using red zone we may start register saving before allocating
8347 the stack frame saving one cycle of the prologue. However I will
8348 avoid doing this if I am going to have to probe the stack since
8349 at least on x86_64 the stack probe can turn into a call that clobbers
8350 a red zone location */
8351 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8352 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8353 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8354 && !crtl->stack_realign_needed)
8355 ? hard_frame_pointer_rtx
8356 : stack_pointer_rtx,
8357 -(frame.nregs + frame.nmsave_args)
8358 * UNITS_PER_WORD - frame.padding0);
8359
8360 if (allocate == 0)
8361 ;
8362 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8363 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8364 GEN_INT (-allocate), -1);
8365 else
8366 {
8367 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8368 bool eax_live;
8369 rtx t;
8370
8371 if (cfun->machine->call_abi == MS_ABI)
8372 eax_live = false;
8373 else
8374 eax_live = ix86_eax_live_at_start_p ();
8375
8376 if (eax_live)
8377 {
8378 emit_insn (gen_push (eax));
8379 allocate -= UNITS_PER_WORD;
8380 }
8381
8382 emit_move_insn (eax, GEN_INT (allocate));
8383
8384 if (TARGET_64BIT)
8385 insn = gen_allocate_stack_worker_64 (eax, eax);
8386 else
8387 insn = gen_allocate_stack_worker_32 (eax, eax);
8388 insn = emit_insn (insn);
8389 RTX_FRAME_RELATED_P (insn) = 1;
8390 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8391 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8392 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
8393 t, REG_NOTES (insn));
8394
8395 if (eax_live)
8396 {
8397 if (frame_pointer_needed)
8398 t = plus_constant (hard_frame_pointer_rtx,
8399 allocate
8400 - frame.to_allocate
8401 - frame.nregs * UNITS_PER_WORD);
8402 else
8403 t = plus_constant (stack_pointer_rtx, allocate);
8404 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8405 }
8406 }
8407
8408 if (frame.save_regs_using_mov
8409 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8410 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8411 {
8412 if (!TARGET_SAVE_ARGS &&
8413 (!frame_pointer_needed
8414 || !(frame.to_allocate + frame.padding05)
8415 || crtl->stack_realign_needed))
8416 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8417 frame.to_allocate
8418 + frame.nsseregs * 16 + frame.padding05);
8419 else
8420 /* XXX: Does this need help for SSE? */
8421 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8422 -(frame.nregs + frame.nmsave_args)
8423 * UNITS_PER_WORD - frame.padding0);
8424 }
8425 /* XXX: Does these need help for save-args? */
8426 if (!frame_pointer_needed
8427 || !(frame.to_allocate + frame.padding0)
8428 || crtl->stack_realign_needed)
8429 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8430 frame.to_allocate);
8431 else
8432 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8433 - frame.nregs * UNITS_PER_WORD
8434 - frame.nsseregs * 16
8435 - frame.padding05);
8436
8437 pic_reg_used = false;
8438 if (pic_offset_table_rtx
8439 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8440 || crtl->profile))
8441 {
8442 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8443
8444 if (alt_pic_reg_used != INVALID_REGNUM)
8445 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8446
8447 pic_reg_used = true;
8448 }
8449
8450 if (pic_reg_used)
8451 {
8452 if (TARGET_64BIT)
8453 {
8454 if (ix86_cmodel == CM_LARGE_PIC)
8455 {
8456 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8457 rtx label = gen_label_rtx ();
8458 emit_label (label);
8459 LABEL_PRESERVE_P (label) = 1;
8460 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8461 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8462 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8463 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8464 pic_offset_table_rtx, tmp_reg));
8465 }
8466 else
8467 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8468 }
8469 else
8470 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8471 }
8472
8473 /* In the pic_reg_used case, make sure that the got load isn't deleted
8474 when mcount needs it. Blockage to avoid call movement across mcount
8475 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8476 note. */
8477 if (crtl->profile && pic_reg_used)
8478 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8479
8480 if (crtl->drap_reg && !crtl->stack_realign_needed)
8481 {
8482 /* vDRAP is setup but after reload it turns out stack realign
8483 isn't necessary, here we will emit prologue to setup DRAP
8484 without stack realign adjustment */
8485 int drap_bp_offset = UNITS_PER_WORD * 2;
8486 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8487 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8488 }
8489
8490 /* Prevent instructions from being scheduled into register save push
8491 sequence when access to the redzone area is done through frame pointer.
8492 The offset betweeh the frame pointer and the stack pointer is calculated
8493 relative to the value of the stack pointer at the end of the function
8494 prologue, and moving instructions that access redzone area via frame
8495 pointer inside push sequence violates this assumption. */
8496 if (frame_pointer_needed && frame.red_zone_size)
8497 emit_insn (gen_memory_blockage ());
8498
8499 /* Emit cld instruction if stringops are used in the function. */
8500 if (TARGET_CLD && ix86_current_function_needs_cld)
8501 emit_insn (gen_cld ());
8502 }
8503
8504 /* Emit code to restore saved registers using MOV insns. First register
8505 is restored from POINTER + OFFSET. */
8506 static void
8507 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8508 int maybe_eh_return)
8509 {
8510 int regno;
8511 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8512
8513 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8514 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8515 {
8516 /* Ensure that adjust_address won't be forced to produce pointer
8517 out of range allowed by x86-64 instruction set. */
8518 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8519 {
8520 rtx r11;
8521
8522 r11 = gen_rtx_REG (DImode, R11_REG);
8523 emit_move_insn (r11, GEN_INT (offset));
8524 emit_insn (gen_adddi3 (r11, r11, pointer));
8525 base_address = gen_rtx_MEM (Pmode, r11);
8526 offset = 0;
8527 }
8528 emit_move_insn (gen_rtx_REG (Pmode, regno),
8529 adjust_address (base_address, Pmode, offset));
8530 offset += UNITS_PER_WORD;
8531 }
8532 }
8533
8534 /* Emit code to restore saved registers using MOV insns. First register
8535 is restored from POINTER + OFFSET. */
8536 static void
8537 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8538 int maybe_eh_return)
8539 {
8540 int regno;
8541 rtx base_address = gen_rtx_MEM (TImode, pointer);
8542 rtx mem;
8543
8544 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8545 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8546 {
8547 /* Ensure that adjust_address won't be forced to produce pointer
8548 out of range allowed by x86-64 instruction set. */
8549 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8550 {
8551 rtx r11;
8552
8553 r11 = gen_rtx_REG (DImode, R11_REG);
8554 emit_move_insn (r11, GEN_INT (offset));
8555 emit_insn (gen_adddi3 (r11, r11, pointer));
8556 base_address = gen_rtx_MEM (TImode, r11);
8557 offset = 0;
8558 }
8559 mem = adjust_address (base_address, TImode, offset);
8560 set_mem_align (mem, 128);
8561 emit_move_insn (gen_rtx_REG (TImode, regno), mem);
8562 offset += 16;
8563 }
8564 }
8565
8566 /* Restore function stack, frame, and registers. */
8567
8568 void
8569 ix86_expand_epilogue (int style)
8570 {
8571 int regno;
8572 int sp_valid;
8573 struct ix86_frame frame;
8574 HOST_WIDE_INT offset;
8575
8576 ix86_finalize_stack_realign_flags ();
8577
8578 /* When stack is realigned, SP must be valid. */
8579 sp_valid = (!frame_pointer_needed
8580 || current_function_sp_is_unchanging
8581 || stack_realign_fp);
8582
8583 ix86_compute_frame_layout (&frame);
8584
8585 /* See the comment about red zone and frame
8586 pointer usage in ix86_expand_prologue. */
8587 if (frame_pointer_needed && frame.red_zone_size)
8588 emit_insn (gen_memory_blockage ());
8589
8590 /* Calculate start of saved registers relative to ebp. Special care
8591 must be taken for the normal return case of a function using
8592 eh_return: the eax and edx registers are marked as saved, but not
8593 restored along this path. */
8594 offset = frame.nregs + frame.nmsave_args;
8595 if (crtl->calls_eh_return && style != 2)
8596 offset -= 2;
8597 offset *= -UNITS_PER_WORD;
8598 offset -= frame.nsseregs * 16 + frame.padding05 + frame.padding0;
8599
8600 /* If we're only restoring one register and sp is not valid then
8601 using a move instruction to restore the register since it's
8602 less work than reloading sp and popping the register.
8603
8604 The default code result in stack adjustment using add/lea instruction,
8605 while this code results in LEAVE instruction (or discrete equivalent),
8606 so it is profitable in some other cases as well. Especially when there
8607 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8608 and there is exactly one register to pop. This heuristic may need some
8609 tuning in future. */
8610 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8611 || (TARGET_EPILOGUE_USING_MOVE
8612 && cfun->machine->use_fast_prologue_epilogue
8613 && ((frame.nregs + frame.nsseregs) > 1
8614 || (frame.to_allocate + frame.padding0) != 0))
8615 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
8616 && (frame.to_allocate + frame.padding0) != 0)
8617 || (frame_pointer_needed && TARGET_USE_LEAVE
8618 && cfun->machine->use_fast_prologue_epilogue
8619 && (frame.nregs + frame.nsseregs) == 1)
8620 || crtl->calls_eh_return)
8621 {
8622 /* Restore registers. We can use ebp or esp to address the memory
8623 locations. If both are available, default to ebp, since offsets
8624 are known to be small. Only exception is esp pointing directly
8625 to the end of block of saved registers, where we may simplify
8626 addressing mode.
8627
8628 If we are realigning stack with bp and sp, regs restore can't
8629 be addressed by bp. sp must be used instead. */
8630
8631 if (!frame_pointer_needed
8632 || (sp_valid && !(frame.to_allocate + frame.padding0))
8633 || stack_realign_fp)
8634 {
8635 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8636 frame.to_allocate, style == 2);
8637 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8638 frame.to_allocate
8639 + frame.nsseregs * 16
8640 + frame.padding05, style == 2);
8641 }
8642 else
8643 {
8644 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8645 offset, style == 2);
8646 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8647 offset
8648 + frame.nsseregs * 16
8649 + frame.padding05, style == 2);
8650 }
8651
8652 /* eh_return epilogues need %ecx added to the stack pointer. */
8653 if (style == 2)
8654 {
8655 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8656
8657 /* Stack align doesn't work with eh_return. */
8658 gcc_assert (!crtl->stack_realign_needed);
8659
8660 if (frame_pointer_needed)
8661 {
8662 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8663 tmp = plus_constant (tmp, UNITS_PER_WORD);
8664 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8665
8666 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8667 emit_move_insn (hard_frame_pointer_rtx, tmp);
8668
8669 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8670 const0_rtx, style);
8671 }
8672 else
8673 {
8674 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8675 tmp = plus_constant (tmp, (frame.to_allocate
8676 + (frame.nregs + frame.nmsave_args)
8677 * UNITS_PER_WORD
8678 + frame.nsseregs * 16
8679 + frame.padding05 + frame.padding0));
8680 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8681 }
8682 }
8683 else if (!frame_pointer_needed)
8684 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8685 GEN_INT (frame.to_allocate
8686 + (frame.nregs + frame.nmsave_args)
8687 * UNITS_PER_WORD
8688 + frame.nsseregs * 16
8689 + frame.padding05 + frame.padding0),
8690 style);
8691 /* If not an i386, mov & pop is faster than "leave". */
8692 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8693 || !cfun->machine->use_fast_prologue_epilogue)
8694 emit_insn ((*ix86_gen_leave) ());
8695 else
8696 {
8697 pro_epilogue_adjust_stack (stack_pointer_rtx,
8698 hard_frame_pointer_rtx,
8699 const0_rtx, style);
8700
8701 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8702 }
8703 }
8704 else
8705 {
8706 /* First step is to deallocate the stack frame so that we can
8707 pop the registers.
8708
8709 If we realign stack with frame pointer, then stack pointer
8710 won't be able to recover via lea $offset(%bp), %sp, because
8711 there is a padding area between bp and sp for realign.
8712 "add $to_allocate, %sp" must be used instead. */
8713 if (!sp_valid)
8714 {
8715 gcc_assert (frame_pointer_needed);
8716 gcc_assert (!stack_realign_fp);
8717 pro_epilogue_adjust_stack (stack_pointer_rtx,
8718 hard_frame_pointer_rtx,
8719 GEN_INT (offset), style);
8720 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8721 0, style == 2);
8722 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8723 GEN_INT (frame.nsseregs * 16 +
8724 frame.padding0), style);
8725 }
8726 else if (frame.to_allocate || frame.padding0 || frame.nsseregs)
8727 {
8728 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8729 frame.to_allocate,
8730 style == 2);
8731 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8732 GEN_INT (frame.to_allocate
8733 + frame.nsseregs * 16
8734 + frame.padding05), style);
8735 }
8736
8737 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8738 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8739 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8740
8741 /* XXX: Needs adjustment for SSE regs? */
8742 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8743 GEN_INT (frame.nmsave_args * UNITS_PER_WORD
8744 + frame.padding0), style);
8745 if (frame_pointer_needed)
8746 {
8747 /* Leave results in shorter dependency chains on CPUs that are
8748 able to grok it fast. */
8749 if (TARGET_USE_LEAVE)
8750 emit_insn ((*ix86_gen_leave) ());
8751 else
8752 {
8753 /* For stack realigned really happens, recover stack
8754 pointer to hard frame pointer is a must, if not using
8755 leave. */
8756 if (stack_realign_fp)
8757 pro_epilogue_adjust_stack (stack_pointer_rtx,
8758 hard_frame_pointer_rtx,
8759 const0_rtx, style);
8760 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8761 }
8762 }
8763 }
8764
8765 if (crtl->drap_reg && crtl->stack_realign_needed)
8766 {
8767 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8768 ? 0 : UNITS_PER_WORD);
8769 gcc_assert (stack_realign_drap);
8770 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8771 crtl->drap_reg,
8772 GEN_INT (-(UNITS_PER_WORD
8773 + param_ptr_offset))));
8774 if (!call_used_regs[REGNO (crtl->drap_reg)])
8775 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8776
8777 }
8778
8779 /* Sibcall epilogues don't want a return instruction. */
8780 if (style == 0)
8781 return;
8782
8783 if (crtl->args.pops_args && crtl->args.size)
8784 {
8785 rtx popc = GEN_INT (crtl->args.pops_args);
8786
8787 /* i386 can only pop 64K bytes. If asked to pop more, pop
8788 return address, do explicit add, and jump indirectly to the
8789 caller. */
8790
8791 if (crtl->args.pops_args >= 65536)
8792 {
8793 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8794
8795 /* There is no "pascal" calling convention in any 64bit ABI. */
8796 gcc_assert (!TARGET_64BIT);
8797
8798 emit_insn (gen_popsi1 (ecx));
8799 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8800 emit_jump_insn (gen_return_indirect_internal (ecx));
8801 }
8802 else
8803 emit_jump_insn (gen_return_pop_internal (popc));
8804 }
8805 else
8806 emit_jump_insn (gen_return_internal ());
8807 }
8808
8809 /* Reset from the function's potential modifications. */
8810
8811 static void
8812 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8813 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8814 {
8815 if (pic_offset_table_rtx)
8816 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8817 #if TARGET_MACHO
8818 /* Mach-O doesn't support labels at the end of objects, so if
8819 it looks like we might want one, insert a NOP. */
8820 {
8821 rtx insn = get_last_insn ();
8822 while (insn
8823 && NOTE_P (insn)
8824 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8825 insn = PREV_INSN (insn);
8826 if (insn
8827 && (LABEL_P (insn)
8828 || (NOTE_P (insn)
8829 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8830 fputs ("\tnop\n", file);
8831 }
8832 #endif
8833
8834 }
8835
8836 /* Extract the parts of an RTL expression that is a valid memory address
8837 for an instruction. Return 0 if the structure of the address is
8838 grossly off. Return -1 if the address contains ASHIFT, so it is not
8839 strictly valid, but still used for computing length of lea instruction. */
8840
8841 int
8842 ix86_decompose_address (rtx addr, struct ix86_address *out)
8843 {
8844 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8845 rtx base_reg, index_reg;
8846 HOST_WIDE_INT scale = 1;
8847 rtx scale_rtx = NULL_RTX;
8848 int retval = 1;
8849 enum ix86_address_seg seg = SEG_DEFAULT;
8850
8851 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8852 base = addr;
8853 else if (GET_CODE (addr) == PLUS)
8854 {
8855 rtx addends[4], op;
8856 int n = 0, i;
8857
8858 op = addr;
8859 do
8860 {
8861 if (n >= 4)
8862 return 0;
8863 addends[n++] = XEXP (op, 1);
8864 op = XEXP (op, 0);
8865 }
8866 while (GET_CODE (op) == PLUS);
8867 if (n >= 4)
8868 return 0;
8869 addends[n] = op;
8870
8871 for (i = n; i >= 0; --i)
8872 {
8873 op = addends[i];
8874 switch (GET_CODE (op))
8875 {
8876 case MULT:
8877 if (index)
8878 return 0;
8879 index = XEXP (op, 0);
8880 scale_rtx = XEXP (op, 1);
8881 break;
8882
8883 case UNSPEC:
8884 if (XINT (op, 1) == UNSPEC_TP
8885 && TARGET_TLS_DIRECT_SEG_REFS
8886 && seg == SEG_DEFAULT)
8887 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8888 else
8889 return 0;
8890 break;
8891
8892 case REG:
8893 case SUBREG:
8894 if (!base)
8895 base = op;
8896 else if (!index)
8897 index = op;
8898 else
8899 return 0;
8900 break;
8901
8902 case CONST:
8903 case CONST_INT:
8904 case SYMBOL_REF:
8905 case LABEL_REF:
8906 if (disp)
8907 return 0;
8908 disp = op;
8909 break;
8910
8911 default:
8912 return 0;
8913 }
8914 }
8915 }
8916 else if (GET_CODE (addr) == MULT)
8917 {
8918 index = XEXP (addr, 0); /* index*scale */
8919 scale_rtx = XEXP (addr, 1);
8920 }
8921 else if (GET_CODE (addr) == ASHIFT)
8922 {
8923 rtx tmp;
8924
8925 /* We're called for lea too, which implements ashift on occasion. */
8926 index = XEXP (addr, 0);
8927 tmp = XEXP (addr, 1);
8928 if (!CONST_INT_P (tmp))
8929 return 0;
8930 scale = INTVAL (tmp);
8931 if ((unsigned HOST_WIDE_INT) scale > 3)
8932 return 0;
8933 scale = 1 << scale;
8934 retval = -1;
8935 }
8936 else
8937 disp = addr; /* displacement */
8938
8939 /* Extract the integral value of scale. */
8940 if (scale_rtx)
8941 {
8942 if (!CONST_INT_P (scale_rtx))
8943 return 0;
8944 scale = INTVAL (scale_rtx);
8945 }
8946
8947 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8948 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8949
8950 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8951 if (base_reg && index_reg && scale == 1
8952 && (index_reg == arg_pointer_rtx
8953 || index_reg == frame_pointer_rtx
8954 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8955 {
8956 rtx tmp;
8957 tmp = base, base = index, index = tmp;
8958 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8959 }
8960
8961 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8962 if ((base_reg == hard_frame_pointer_rtx
8963 || base_reg == frame_pointer_rtx
8964 || base_reg == arg_pointer_rtx) && !disp)
8965 disp = const0_rtx;
8966
8967 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8968 Avoid this by transforming to [%esi+0].
8969 Reload calls address legitimization without cfun defined, so we need
8970 to test cfun for being non-NULL. */
8971 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8972 && base_reg && !index_reg && !disp
8973 && REG_P (base_reg)
8974 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8975 disp = const0_rtx;
8976
8977 /* Special case: encode reg+reg instead of reg*2. */
8978 if (!base && index && scale && scale == 2)
8979 base = index, base_reg = index_reg, scale = 1;
8980
8981 /* Special case: scaling cannot be encoded without base or displacement. */
8982 if (!base && !disp && index && scale != 1)
8983 disp = const0_rtx;
8984
8985 out->base = base;
8986 out->index = index;
8987 out->disp = disp;
8988 out->scale = scale;
8989 out->seg = seg;
8990
8991 return retval;
8992 }
8993
8994 /* Return cost of the memory address x.
8995 For i386, it is better to use a complex address than let gcc copy
8996 the address into a reg and make a new pseudo. But not if the address
8997 requires to two regs - that would mean more pseudos with longer
8998 lifetimes. */
8999 static int
9000 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
9001 {
9002 struct ix86_address parts;
9003 int cost = 1;
9004 int ok = ix86_decompose_address (x, &parts);
9005
9006 gcc_assert (ok);
9007
9008 if (parts.base && GET_CODE (parts.base) == SUBREG)
9009 parts.base = SUBREG_REG (parts.base);
9010 if (parts.index && GET_CODE (parts.index) == SUBREG)
9011 parts.index = SUBREG_REG (parts.index);
9012
9013 /* Attempt to minimize number of registers in the address. */
9014 if ((parts.base
9015 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
9016 || (parts.index
9017 && (!REG_P (parts.index)
9018 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
9019 cost++;
9020
9021 if (parts.base
9022 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
9023 && parts.index
9024 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
9025 && parts.base != parts.index)
9026 cost++;
9027
9028 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9029 since it's predecode logic can't detect the length of instructions
9030 and it degenerates to vector decoded. Increase cost of such
9031 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
9032 to split such addresses or even refuse such addresses at all.
9033
9034 Following addressing modes are affected:
9035 [base+scale*index]
9036 [scale*index+disp]
9037 [base+index]
9038
9039 The first and last case may be avoidable by explicitly coding the zero in
9040 memory address, but I don't have AMD-K6 machine handy to check this
9041 theory. */
9042
9043 if (TARGET_K6
9044 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
9045 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
9046 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9047 cost += 10;
9048
9049 return cost;
9050 }
9051
9052 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9053 this is used for to form addresses to local data when -fPIC is in
9054 use. */
9055
9056 static bool
9057 darwin_local_data_pic (rtx disp)
9058 {
9059 return (GET_CODE (disp) == UNSPEC
9060 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9061 }
9062
9063 /* Determine if a given RTX is a valid constant. We already know this
9064 satisfies CONSTANT_P. */
9065
9066 bool
9067 legitimate_constant_p (rtx x)
9068 {
9069 switch (GET_CODE (x))
9070 {
9071 case CONST:
9072 x = XEXP (x, 0);
9073
9074 if (GET_CODE (x) == PLUS)
9075 {
9076 if (!CONST_INT_P (XEXP (x, 1)))
9077 return false;
9078 x = XEXP (x, 0);
9079 }
9080
9081 if (TARGET_MACHO && darwin_local_data_pic (x))
9082 return true;
9083
9084 /* Only some unspecs are valid as "constants". */
9085 if (GET_CODE (x) == UNSPEC)
9086 switch (XINT (x, 1))
9087 {
9088 case UNSPEC_GOT:
9089 case UNSPEC_GOTOFF:
9090 case UNSPEC_PLTOFF:
9091 return TARGET_64BIT;
9092 case UNSPEC_TPOFF:
9093 case UNSPEC_NTPOFF:
9094 x = XVECEXP (x, 0, 0);
9095 return (GET_CODE (x) == SYMBOL_REF
9096 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9097 case UNSPEC_DTPOFF:
9098 x = XVECEXP (x, 0, 0);
9099 return (GET_CODE (x) == SYMBOL_REF
9100 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9101 default:
9102 return false;
9103 }
9104
9105 /* We must have drilled down to a symbol. */
9106 if (GET_CODE (x) == LABEL_REF)
9107 return true;
9108 if (GET_CODE (x) != SYMBOL_REF)
9109 return false;
9110 /* FALLTHRU */
9111
9112 case SYMBOL_REF:
9113 /* TLS symbols are never valid. */
9114 if (SYMBOL_REF_TLS_MODEL (x))
9115 return false;
9116
9117 /* DLLIMPORT symbols are never valid. */
9118 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9119 && SYMBOL_REF_DLLIMPORT_P (x))
9120 return false;
9121 break;
9122
9123 case CONST_DOUBLE:
9124 if (GET_MODE (x) == TImode
9125 && x != CONST0_RTX (TImode)
9126 && !TARGET_64BIT)
9127 return false;
9128 break;
9129
9130 case CONST_VECTOR:
9131 if (x == CONST0_RTX (GET_MODE (x)))
9132 return true;
9133 return false;
9134
9135 default:
9136 break;
9137 }
9138
9139 /* Otherwise we handle everything else in the move patterns. */
9140 return true;
9141 }
9142
9143 /* Determine if it's legal to put X into the constant pool. This
9144 is not possible for the address of thread-local symbols, which
9145 is checked above. */
9146
9147 static bool
9148 ix86_cannot_force_const_mem (rtx x)
9149 {
9150 /* We can always put integral constants and vectors in memory. */
9151 switch (GET_CODE (x))
9152 {
9153 case CONST_INT:
9154 case CONST_DOUBLE:
9155 case CONST_VECTOR:
9156 return false;
9157
9158 default:
9159 break;
9160 }
9161 return !legitimate_constant_p (x);
9162 }
9163
9164 /* Determine if a given RTX is a valid constant address. */
9165
9166 bool
9167 constant_address_p (rtx x)
9168 {
9169 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
9170 }
9171
9172 /* Return number of arguments to be saved on the stack with
9173 -msave-args. */
9174
9175 static int
9176 ix86_nsaved_args (void)
9177 {
9178 if (TARGET_SAVE_ARGS)
9179 return crtl->args.info.regno - cfun->returns_struct;
9180 else
9181 return 0;
9182 }
9183
9184 /* Nonzero if the constant value X is a legitimate general operand
9185 when generating PIC code. It is given that flag_pic is on and
9186 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9187 bool
9188 legitimate_pic_operand_p (rtx x)
9189 {
9190 rtx inner;
9191
9192 switch (GET_CODE (x))
9193 {
9194 case CONST:
9195 inner = XEXP (x, 0);
9196 if (GET_CODE (inner) == PLUS
9197 && CONST_INT_P (XEXP (inner, 1)))
9198 inner = XEXP (inner, 0);
9199
9200 /* Only some unspecs are valid as "constants". */
9201 if (GET_CODE (inner) == UNSPEC)
9202 switch (XINT (inner, 1))
9203 {
9204 case UNSPEC_GOT:
9205 case UNSPEC_GOTOFF:
9206 case UNSPEC_PLTOFF:
9207 return TARGET_64BIT;
9208 case UNSPEC_TPOFF:
9209 x = XVECEXP (inner, 0, 0);
9210 return (GET_CODE (x) == SYMBOL_REF
9211 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9212 case UNSPEC_MACHOPIC_OFFSET:
9213 return legitimate_pic_address_disp_p (x);
9214 default:
9215 return false;
9216 }
9217 /* FALLTHRU */
9218
9219 case SYMBOL_REF:
9220 case LABEL_REF:
9221 return legitimate_pic_address_disp_p (x);
9222
9223 default:
9224 return true;
9225 }
9226 }
9227
9228 /* Determine if a given CONST RTX is a valid memory displacement
9229 in PIC mode. */
9230
9231 int
9232 legitimate_pic_address_disp_p (rtx disp)
9233 {
9234 bool saw_plus;
9235
9236 /* In 64bit mode we can allow direct addresses of symbols and labels
9237 when they are not dynamic symbols. */
9238 if (TARGET_64BIT)
9239 {
9240 rtx op0 = disp, op1;
9241
9242 switch (GET_CODE (disp))
9243 {
9244 case LABEL_REF:
9245 return true;
9246
9247 case CONST:
9248 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9249 break;
9250 op0 = XEXP (XEXP (disp, 0), 0);
9251 op1 = XEXP (XEXP (disp, 0), 1);
9252 if (!CONST_INT_P (op1)
9253 || INTVAL (op1) >= 16*1024*1024
9254 || INTVAL (op1) < -16*1024*1024)
9255 break;
9256 if (GET_CODE (op0) == LABEL_REF)
9257 return true;
9258 if (GET_CODE (op0) != SYMBOL_REF)
9259 break;
9260 /* FALLTHRU */
9261
9262 case SYMBOL_REF:
9263 /* TLS references should always be enclosed in UNSPEC. */
9264 if (SYMBOL_REF_TLS_MODEL (op0))
9265 return false;
9266 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9267 && ix86_cmodel != CM_LARGE_PIC)
9268 return true;
9269 break;
9270
9271 default:
9272 break;
9273 }
9274 }
9275 if (GET_CODE (disp) != CONST)
9276 return 0;
9277 disp = XEXP (disp, 0);
9278
9279 if (TARGET_64BIT)
9280 {
9281 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9282 of GOT tables. We should not need these anyway. */
9283 if (GET_CODE (disp) != UNSPEC
9284 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9285 && XINT (disp, 1) != UNSPEC_GOTOFF
9286 && XINT (disp, 1) != UNSPEC_PLTOFF))
9287 return 0;
9288
9289 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9290 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9291 return 0;
9292 return 1;
9293 }
9294
9295 saw_plus = false;
9296 if (GET_CODE (disp) == PLUS)
9297 {
9298 if (!CONST_INT_P (XEXP (disp, 1)))
9299 return 0;
9300 disp = XEXP (disp, 0);
9301 saw_plus = true;
9302 }
9303
9304 if (TARGET_MACHO && darwin_local_data_pic (disp))
9305 return 1;
9306
9307 if (GET_CODE (disp) != UNSPEC)
9308 return 0;
9309
9310 switch (XINT (disp, 1))
9311 {
9312 case UNSPEC_GOT:
9313 if (saw_plus)
9314 return false;
9315 /* We need to check for both symbols and labels because VxWorks loads
9316 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9317 details. */
9318 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9319 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9320 case UNSPEC_GOTOFF:
9321 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9322 While ABI specify also 32bit relocation but we don't produce it in
9323 small PIC model at all. */
9324 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9325 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9326 && !TARGET_64BIT)
9327 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9328 return false;
9329 case UNSPEC_GOTTPOFF:
9330 case UNSPEC_GOTNTPOFF:
9331 case UNSPEC_INDNTPOFF:
9332 if (saw_plus)
9333 return false;
9334 disp = XVECEXP (disp, 0, 0);
9335 return (GET_CODE (disp) == SYMBOL_REF
9336 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9337 case UNSPEC_NTPOFF:
9338 disp = XVECEXP (disp, 0, 0);
9339 return (GET_CODE (disp) == SYMBOL_REF
9340 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9341 case UNSPEC_DTPOFF:
9342 disp = XVECEXP (disp, 0, 0);
9343 return (GET_CODE (disp) == SYMBOL_REF
9344 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9345 }
9346
9347 return 0;
9348 }
9349
9350 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
9351 memory address for an instruction. The MODE argument is the machine mode
9352 for the MEM expression that wants to use this address.
9353
9354 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9355 convert common non-canonical forms to canonical form so that they will
9356 be recognized. */
9357
9358 int
9359 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9360 rtx addr, int strict)
9361 {
9362 struct ix86_address parts;
9363 rtx base, index, disp;
9364 HOST_WIDE_INT scale;
9365 const char *reason = NULL;
9366 rtx reason_rtx = NULL_RTX;
9367
9368 if (ix86_decompose_address (addr, &parts) <= 0)
9369 {
9370 reason = "decomposition failed";
9371 goto report_error;
9372 }
9373
9374 base = parts.base;
9375 index = parts.index;
9376 disp = parts.disp;
9377 scale = parts.scale;
9378
9379 /* Validate base register.
9380
9381 Don't allow SUBREG's that span more than a word here. It can lead to spill
9382 failures when the base is one word out of a two word structure, which is
9383 represented internally as a DImode int. */
9384
9385 if (base)
9386 {
9387 rtx reg;
9388 reason_rtx = base;
9389
9390 if (REG_P (base))
9391 reg = base;
9392 else if (GET_CODE (base) == SUBREG
9393 && REG_P (SUBREG_REG (base))
9394 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9395 <= UNITS_PER_WORD)
9396 reg = SUBREG_REG (base);
9397 else
9398 {
9399 reason = "base is not a register";
9400 goto report_error;
9401 }
9402
9403 if (GET_MODE (base) != Pmode)
9404 {
9405 reason = "base is not in Pmode";
9406 goto report_error;
9407 }
9408
9409 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9410 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9411 {
9412 reason = "base is not valid";
9413 goto report_error;
9414 }
9415 }
9416
9417 /* Validate index register.
9418
9419 Don't allow SUBREG's that span more than a word here -- same as above. */
9420
9421 if (index)
9422 {
9423 rtx reg;
9424 reason_rtx = index;
9425
9426 if (REG_P (index))
9427 reg = index;
9428 else if (GET_CODE (index) == SUBREG
9429 && REG_P (SUBREG_REG (index))
9430 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9431 <= UNITS_PER_WORD)
9432 reg = SUBREG_REG (index);
9433 else
9434 {
9435 reason = "index is not a register";
9436 goto report_error;
9437 }
9438
9439 if (GET_MODE (index) != Pmode)
9440 {
9441 reason = "index is not in Pmode";
9442 goto report_error;
9443 }
9444
9445 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9446 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9447 {
9448 reason = "index is not valid";
9449 goto report_error;
9450 }
9451 }
9452
9453 /* Validate scale factor. */
9454 if (scale != 1)
9455 {
9456 reason_rtx = GEN_INT (scale);
9457 if (!index)
9458 {
9459 reason = "scale without index";
9460 goto report_error;
9461 }
9462
9463 if (scale != 2 && scale != 4 && scale != 8)
9464 {
9465 reason = "scale is not a valid multiplier";
9466 goto report_error;
9467 }
9468 }
9469
9470 /* Validate displacement. */
9471 if (disp)
9472 {
9473 reason_rtx = disp;
9474
9475 if (GET_CODE (disp) == CONST
9476 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9477 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9478 switch (XINT (XEXP (disp, 0), 1))
9479 {
9480 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9481 used. While ABI specify also 32bit relocations, we don't produce
9482 them at all and use IP relative instead. */
9483 case UNSPEC_GOT:
9484 case UNSPEC_GOTOFF:
9485 gcc_assert (flag_pic);
9486 if (!TARGET_64BIT)
9487 goto is_legitimate_pic;
9488 reason = "64bit address unspec";
9489 goto report_error;
9490
9491 case UNSPEC_GOTPCREL:
9492 gcc_assert (flag_pic);
9493 goto is_legitimate_pic;
9494
9495 case UNSPEC_GOTTPOFF:
9496 case UNSPEC_GOTNTPOFF:
9497 case UNSPEC_INDNTPOFF:
9498 case UNSPEC_NTPOFF:
9499 case UNSPEC_DTPOFF:
9500 break;
9501
9502 default:
9503 reason = "invalid address unspec";
9504 goto report_error;
9505 }
9506
9507 else if (SYMBOLIC_CONST (disp)
9508 && (flag_pic
9509 || (TARGET_MACHO
9510 #if TARGET_MACHO
9511 && MACHOPIC_INDIRECT
9512 && !machopic_operand_p (disp)
9513 #endif
9514 )))
9515 {
9516
9517 is_legitimate_pic:
9518 if (TARGET_64BIT && (index || base))
9519 {
9520 /* foo@dtpoff(%rX) is ok. */
9521 if (GET_CODE (disp) != CONST
9522 || GET_CODE (XEXP (disp, 0)) != PLUS
9523 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9524 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9525 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9526 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9527 {
9528 reason = "non-constant pic memory reference";
9529 goto report_error;
9530 }
9531 }
9532 else if (! legitimate_pic_address_disp_p (disp))
9533 {
9534 reason = "displacement is an invalid pic construct";
9535 goto report_error;
9536 }
9537
9538 /* This code used to verify that a symbolic pic displacement
9539 includes the pic_offset_table_rtx register.
9540
9541 While this is good idea, unfortunately these constructs may
9542 be created by "adds using lea" optimization for incorrect
9543 code like:
9544
9545 int a;
9546 int foo(int i)
9547 {
9548 return *(&a+i);
9549 }
9550
9551 This code is nonsensical, but results in addressing
9552 GOT table with pic_offset_table_rtx base. We can't
9553 just refuse it easily, since it gets matched by
9554 "addsi3" pattern, that later gets split to lea in the
9555 case output register differs from input. While this
9556 can be handled by separate addsi pattern for this case
9557 that never results in lea, this seems to be easier and
9558 correct fix for crash to disable this test. */
9559 }
9560 else if (GET_CODE (disp) != LABEL_REF
9561 && !CONST_INT_P (disp)
9562 && (GET_CODE (disp) != CONST
9563 || !legitimate_constant_p (disp))
9564 && (GET_CODE (disp) != SYMBOL_REF
9565 || !legitimate_constant_p (disp)))
9566 {
9567 reason = "displacement is not constant";
9568 goto report_error;
9569 }
9570 else if (TARGET_64BIT
9571 && !x86_64_immediate_operand (disp, VOIDmode))
9572 {
9573 reason = "displacement is out of range";
9574 goto report_error;
9575 }
9576 }
9577
9578 /* Everything looks valid. */
9579 return TRUE;
9580
9581 report_error:
9582 return FALSE;
9583 }
9584
9585 /* Return a unique alias set for the GOT. */
9586
9587 static alias_set_type
9588 ix86_GOT_alias_set (void)
9589 {
9590 static alias_set_type set = -1;
9591 if (set == -1)
9592 set = new_alias_set ();
9593 return set;
9594 }
9595
9596 /* Return a legitimate reference for ORIG (an address) using the
9597 register REG. If REG is 0, a new pseudo is generated.
9598
9599 There are two types of references that must be handled:
9600
9601 1. Global data references must load the address from the GOT, via
9602 the PIC reg. An insn is emitted to do this load, and the reg is
9603 returned.
9604
9605 2. Static data references, constant pool addresses, and code labels
9606 compute the address as an offset from the GOT, whose base is in
9607 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9608 differentiate them from global data objects. The returned
9609 address is the PIC reg + an unspec constant.
9610
9611 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9612 reg also appears in the address. */
9613
9614 static rtx
9615 legitimize_pic_address (rtx orig, rtx reg)
9616 {
9617 rtx addr = orig;
9618 rtx new_rtx = orig;
9619 rtx base;
9620
9621 #if TARGET_MACHO
9622 if (TARGET_MACHO && !TARGET_64BIT)
9623 {
9624 if (reg == 0)
9625 reg = gen_reg_rtx (Pmode);
9626 /* Use the generic Mach-O PIC machinery. */
9627 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9628 }
9629 #endif
9630
9631 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9632 new_rtx = addr;
9633 else if (TARGET_64BIT
9634 && ix86_cmodel != CM_SMALL_PIC
9635 && gotoff_operand (addr, Pmode))
9636 {
9637 rtx tmpreg;
9638 /* This symbol may be referenced via a displacement from the PIC
9639 base address (@GOTOFF). */
9640
9641 if (reload_in_progress)
9642 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9643 if (GET_CODE (addr) == CONST)
9644 addr = XEXP (addr, 0);
9645 if (GET_CODE (addr) == PLUS)
9646 {
9647 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9648 UNSPEC_GOTOFF);
9649 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9650 }
9651 else
9652 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9653 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9654 if (!reg)
9655 tmpreg = gen_reg_rtx (Pmode);
9656 else
9657 tmpreg = reg;
9658 emit_move_insn (tmpreg, new_rtx);
9659
9660 if (reg != 0)
9661 {
9662 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9663 tmpreg, 1, OPTAB_DIRECT);
9664 new_rtx = reg;
9665 }
9666 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9667 }
9668 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9669 {
9670 /* This symbol may be referenced via a displacement from the PIC
9671 base address (@GOTOFF). */
9672
9673 if (reload_in_progress)
9674 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9675 if (GET_CODE (addr) == CONST)
9676 addr = XEXP (addr, 0);
9677 if (GET_CODE (addr) == PLUS)
9678 {
9679 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9680 UNSPEC_GOTOFF);
9681 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9682 }
9683 else
9684 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9685 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9686 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9687
9688 if (reg != 0)
9689 {
9690 emit_move_insn (reg, new_rtx);
9691 new_rtx = reg;
9692 }
9693 }
9694 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9695 /* We can't use @GOTOFF for text labels on VxWorks;
9696 see gotoff_operand. */
9697 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9698 {
9699 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9700 {
9701 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9702 return legitimize_dllimport_symbol (addr, true);
9703 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9704 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9705 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9706 {
9707 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9708 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9709 }
9710 }
9711
9712 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9713 {
9714 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9715 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9716 new_rtx = gen_const_mem (Pmode, new_rtx);
9717 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9718
9719 if (reg == 0)
9720 reg = gen_reg_rtx (Pmode);
9721 /* Use directly gen_movsi, otherwise the address is loaded
9722 into register for CSE. We don't want to CSE this addresses,
9723 instead we CSE addresses from the GOT table, so skip this. */
9724 emit_insn (gen_movsi (reg, new_rtx));
9725 new_rtx = reg;
9726 }
9727 else
9728 {
9729 /* This symbol must be referenced via a load from the
9730 Global Offset Table (@GOT). */
9731
9732 if (reload_in_progress)
9733 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9734 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9735 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9736 if (TARGET_64BIT)
9737 new_rtx = force_reg (Pmode, new_rtx);
9738 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9739 new_rtx = gen_const_mem (Pmode, new_rtx);
9740 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9741
9742 if (reg == 0)
9743 reg = gen_reg_rtx (Pmode);
9744 emit_move_insn (reg, new_rtx);
9745 new_rtx = reg;
9746 }
9747 }
9748 else
9749 {
9750 if (CONST_INT_P (addr)
9751 && !x86_64_immediate_operand (addr, VOIDmode))
9752 {
9753 if (reg)
9754 {
9755 emit_move_insn (reg, addr);
9756 new_rtx = reg;
9757 }
9758 else
9759 new_rtx = force_reg (Pmode, addr);
9760 }
9761 else if (GET_CODE (addr) == CONST)
9762 {
9763 addr = XEXP (addr, 0);
9764
9765 /* We must match stuff we generate before. Assume the only
9766 unspecs that can get here are ours. Not that we could do
9767 anything with them anyway.... */
9768 if (GET_CODE (addr) == UNSPEC
9769 || (GET_CODE (addr) == PLUS
9770 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9771 return orig;
9772 gcc_assert (GET_CODE (addr) == PLUS);
9773 }
9774 if (GET_CODE (addr) == PLUS)
9775 {
9776 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9777
9778 /* Check first to see if this is a constant offset from a @GOTOFF
9779 symbol reference. */
9780 if (gotoff_operand (op0, Pmode)
9781 && CONST_INT_P (op1))
9782 {
9783 if (!TARGET_64BIT)
9784 {
9785 if (reload_in_progress)
9786 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9787 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9788 UNSPEC_GOTOFF);
9789 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9790 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9791 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9792
9793 if (reg != 0)
9794 {
9795 emit_move_insn (reg, new_rtx);
9796 new_rtx = reg;
9797 }
9798 }
9799 else
9800 {
9801 if (INTVAL (op1) < -16*1024*1024
9802 || INTVAL (op1) >= 16*1024*1024)
9803 {
9804 if (!x86_64_immediate_operand (op1, Pmode))
9805 op1 = force_reg (Pmode, op1);
9806 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9807 }
9808 }
9809 }
9810 else
9811 {
9812 base = legitimize_pic_address (XEXP (addr, 0), reg);
9813 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9814 base == reg ? NULL_RTX : reg);
9815
9816 if (CONST_INT_P (new_rtx))
9817 new_rtx = plus_constant (base, INTVAL (new_rtx));
9818 else
9819 {
9820 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9821 {
9822 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9823 new_rtx = XEXP (new_rtx, 1);
9824 }
9825 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9826 }
9827 }
9828 }
9829 }
9830 return new_rtx;
9831 }
9832
9833 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9834
9835 static rtx
9836 get_thread_pointer (int to_reg)
9837 {
9838 rtx tp, reg, insn;
9839
9840 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9841 if (!to_reg)
9842 return tp;
9843
9844 reg = gen_reg_rtx (Pmode);
9845 insn = gen_rtx_SET (VOIDmode, reg, tp);
9846 insn = emit_insn (insn);
9847
9848 return reg;
9849 }
9850
9851 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
9852 false if we expect this to be used for a memory address and true if
9853 we expect to load the address into a register. */
9854
9855 static rtx
9856 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9857 {
9858 rtx dest, base, off, pic, tp;
9859 int type;
9860
9861 switch (model)
9862 {
9863 case TLS_MODEL_GLOBAL_DYNAMIC:
9864 dest = gen_reg_rtx (Pmode);
9865 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9866
9867 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9868 {
9869 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9870
9871 start_sequence ();
9872 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9873 insns = get_insns ();
9874 end_sequence ();
9875
9876 RTL_CONST_CALL_P (insns) = 1;
9877 emit_libcall_block (insns, dest, rax, x);
9878 }
9879 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9880 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9881 else
9882 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9883
9884 if (TARGET_GNU2_TLS)
9885 {
9886 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9887
9888 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9889 }
9890 break;
9891
9892 case TLS_MODEL_LOCAL_DYNAMIC:
9893 base = gen_reg_rtx (Pmode);
9894 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9895
9896 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9897 {
9898 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9899
9900 start_sequence ();
9901 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9902 insns = get_insns ();
9903 end_sequence ();
9904
9905 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9906 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9907 RTL_CONST_CALL_P (insns) = 1;
9908 emit_libcall_block (insns, base, rax, note);
9909 }
9910 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9911 emit_insn (gen_tls_local_dynamic_base_64 (base));
9912 else
9913 emit_insn (gen_tls_local_dynamic_base_32 (base));
9914
9915 if (TARGET_GNU2_TLS)
9916 {
9917 rtx x = ix86_tls_module_base ();
9918
9919 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9920 gen_rtx_MINUS (Pmode, x, tp));
9921 }
9922
9923 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9924 off = gen_rtx_CONST (Pmode, off);
9925
9926 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9927
9928 if (TARGET_GNU2_TLS)
9929 {
9930 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9931
9932 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9933 }
9934
9935 break;
9936
9937 case TLS_MODEL_INITIAL_EXEC:
9938 if (TARGET_64BIT)
9939 {
9940 pic = NULL;
9941 type = UNSPEC_GOTNTPOFF;
9942 }
9943 else if (flag_pic)
9944 {
9945 if (reload_in_progress)
9946 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9947 pic = pic_offset_table_rtx;
9948 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9949 }
9950 else if (!TARGET_ANY_GNU_TLS)
9951 {
9952 pic = gen_reg_rtx (Pmode);
9953 emit_insn (gen_set_got (pic));
9954 type = UNSPEC_GOTTPOFF;
9955 }
9956 else
9957 {
9958 pic = NULL;
9959 type = UNSPEC_INDNTPOFF;
9960 }
9961
9962 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9963 off = gen_rtx_CONST (Pmode, off);
9964 if (pic)
9965 off = gen_rtx_PLUS (Pmode, pic, off);
9966 off = gen_const_mem (Pmode, off);
9967 set_mem_alias_set (off, ix86_GOT_alias_set ());
9968
9969 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9970 {
9971 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9972 off = force_reg (Pmode, off);
9973 return gen_rtx_PLUS (Pmode, base, off);
9974 }
9975 else
9976 {
9977 base = get_thread_pointer (true);
9978 dest = gen_reg_rtx (Pmode);
9979 emit_insn (gen_subsi3 (dest, base, off));
9980 }
9981 break;
9982
9983 case TLS_MODEL_LOCAL_EXEC:
9984 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9985 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9986 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9987 off = gen_rtx_CONST (Pmode, off);
9988
9989 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9990 {
9991 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9992 return gen_rtx_PLUS (Pmode, base, off);
9993 }
9994 else
9995 {
9996 base = get_thread_pointer (true);
9997 dest = gen_reg_rtx (Pmode);
9998 emit_insn (gen_subsi3 (dest, base, off));
9999 }
10000 break;
10001
10002 default:
10003 gcc_unreachable ();
10004 }
10005
10006 return dest;
10007 }
10008
10009 /* Create or return the unique __imp_DECL dllimport symbol corresponding
10010 to symbol DECL. */
10011
10012 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
10013 htab_t dllimport_map;
10014
10015 static tree
10016 get_dllimport_decl (tree decl)
10017 {
10018 struct tree_map *h, in;
10019 void **loc;
10020 const char *name;
10021 const char *prefix;
10022 size_t namelen, prefixlen;
10023 char *imp_name;
10024 tree to;
10025 rtx rtl;
10026
10027 if (!dllimport_map)
10028 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
10029
10030 in.hash = htab_hash_pointer (decl);
10031 in.base.from = decl;
10032 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
10033 h = (struct tree_map *) *loc;
10034 if (h)
10035 return h->to;
10036
10037 *loc = h = GGC_NEW (struct tree_map);
10038 h->hash = in.hash;
10039 h->base.from = decl;
10040 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
10041 DECL_ARTIFICIAL (to) = 1;
10042 DECL_IGNORED_P (to) = 1;
10043 DECL_EXTERNAL (to) = 1;
10044 TREE_READONLY (to) = 1;
10045
10046 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
10047 name = targetm.strip_name_encoding (name);
10048 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
10049 ? "*__imp_" : "*__imp__";
10050 namelen = strlen (name);
10051 prefixlen = strlen (prefix);
10052 imp_name = (char *) alloca (namelen + prefixlen + 1);
10053 memcpy (imp_name, prefix, prefixlen);
10054 memcpy (imp_name + prefixlen, name, namelen + 1);
10055
10056 name = ggc_alloc_string (imp_name, namelen + prefixlen);
10057 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
10058 SET_SYMBOL_REF_DECL (rtl, to);
10059 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
10060
10061 rtl = gen_const_mem (Pmode, rtl);
10062 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
10063
10064 SET_DECL_RTL (to, rtl);
10065 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
10066
10067 return to;
10068 }
10069
10070 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
10071 true if we require the result be a register. */
10072
10073 static rtx
10074 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
10075 {
10076 tree imp_decl;
10077 rtx x;
10078
10079 gcc_assert (SYMBOL_REF_DECL (symbol));
10080 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10081
10082 x = DECL_RTL (imp_decl);
10083 if (want_reg)
10084 x = force_reg (Pmode, x);
10085 return x;
10086 }
10087
10088 /* Try machine-dependent ways of modifying an illegitimate address
10089 to be legitimate. If we find one, return the new, valid address.
10090 This macro is used in only one place: `memory_address' in explow.c.
10091
10092 OLDX is the address as it was before break_out_memory_refs was called.
10093 In some cases it is useful to look at this to decide what needs to be done.
10094
10095 MODE and WIN are passed so that this macro can use
10096 GO_IF_LEGITIMATE_ADDRESS.
10097
10098 It is always safe for this macro to do nothing. It exists to recognize
10099 opportunities to optimize the output.
10100
10101 For the 80386, we handle X+REG by loading X into a register R and
10102 using R+REG. R will go in a general reg and indexing will be used.
10103 However, if REG is a broken-out memory address or multiplication,
10104 nothing needs to be done because REG can certainly go in a general reg.
10105
10106 When -fpic is used, special handling is needed for symbolic references.
10107 See comments by legitimize_pic_address in i386.c for details. */
10108
10109 rtx
10110 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
10111 {
10112 int changed = 0;
10113 unsigned log;
10114
10115 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10116 if (log)
10117 return legitimize_tls_address (x, (enum tls_model) log, false);
10118 if (GET_CODE (x) == CONST
10119 && GET_CODE (XEXP (x, 0)) == PLUS
10120 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10121 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10122 {
10123 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10124 (enum tls_model) log, false);
10125 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10126 }
10127
10128 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10129 {
10130 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10131 return legitimize_dllimport_symbol (x, true);
10132 if (GET_CODE (x) == CONST
10133 && GET_CODE (XEXP (x, 0)) == PLUS
10134 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10135 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10136 {
10137 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10138 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10139 }
10140 }
10141
10142 if (flag_pic && SYMBOLIC_CONST (x))
10143 return legitimize_pic_address (x, 0);
10144
10145 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10146 if (GET_CODE (x) == ASHIFT
10147 && CONST_INT_P (XEXP (x, 1))
10148 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10149 {
10150 changed = 1;
10151 log = INTVAL (XEXP (x, 1));
10152 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10153 GEN_INT (1 << log));
10154 }
10155
10156 if (GET_CODE (x) == PLUS)
10157 {
10158 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10159
10160 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10161 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10162 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10163 {
10164 changed = 1;
10165 log = INTVAL (XEXP (XEXP (x, 0), 1));
10166 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10167 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10168 GEN_INT (1 << log));
10169 }
10170
10171 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10172 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10173 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10174 {
10175 changed = 1;
10176 log = INTVAL (XEXP (XEXP (x, 1), 1));
10177 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10178 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10179 GEN_INT (1 << log));
10180 }
10181
10182 /* Put multiply first if it isn't already. */
10183 if (GET_CODE (XEXP (x, 1)) == MULT)
10184 {
10185 rtx tmp = XEXP (x, 0);
10186 XEXP (x, 0) = XEXP (x, 1);
10187 XEXP (x, 1) = tmp;
10188 changed = 1;
10189 }
10190
10191 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10192 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10193 created by virtual register instantiation, register elimination, and
10194 similar optimizations. */
10195 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10196 {
10197 changed = 1;
10198 x = gen_rtx_PLUS (Pmode,
10199 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10200 XEXP (XEXP (x, 1), 0)),
10201 XEXP (XEXP (x, 1), 1));
10202 }
10203
10204 /* Canonicalize
10205 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10206 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10207 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10208 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10209 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10210 && CONSTANT_P (XEXP (x, 1)))
10211 {
10212 rtx constant;
10213 rtx other = NULL_RTX;
10214
10215 if (CONST_INT_P (XEXP (x, 1)))
10216 {
10217 constant = XEXP (x, 1);
10218 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10219 }
10220 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10221 {
10222 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10223 other = XEXP (x, 1);
10224 }
10225 else
10226 constant = 0;
10227
10228 if (constant)
10229 {
10230 changed = 1;
10231 x = gen_rtx_PLUS (Pmode,
10232 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10233 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10234 plus_constant (other, INTVAL (constant)));
10235 }
10236 }
10237
10238 if (changed && legitimate_address_p (mode, x, FALSE))
10239 return x;
10240
10241 if (GET_CODE (XEXP (x, 0)) == MULT)
10242 {
10243 changed = 1;
10244 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10245 }
10246
10247 if (GET_CODE (XEXP (x, 1)) == MULT)
10248 {
10249 changed = 1;
10250 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10251 }
10252
10253 if (changed
10254 && REG_P (XEXP (x, 1))
10255 && REG_P (XEXP (x, 0)))
10256 return x;
10257
10258 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10259 {
10260 changed = 1;
10261 x = legitimize_pic_address (x, 0);
10262 }
10263
10264 if (changed && legitimate_address_p (mode, x, FALSE))
10265 return x;
10266
10267 if (REG_P (XEXP (x, 0)))
10268 {
10269 rtx temp = gen_reg_rtx (Pmode);
10270 rtx val = force_operand (XEXP (x, 1), temp);
10271 if (val != temp)
10272 emit_move_insn (temp, val);
10273
10274 XEXP (x, 1) = temp;
10275 return x;
10276 }
10277
10278 else if (REG_P (XEXP (x, 1)))
10279 {
10280 rtx temp = gen_reg_rtx (Pmode);
10281 rtx val = force_operand (XEXP (x, 0), temp);
10282 if (val != temp)
10283 emit_move_insn (temp, val);
10284
10285 XEXP (x, 0) = temp;
10286 return x;
10287 }
10288 }
10289
10290 return x;
10291 }
10292
10293 /* Print an integer constant expression in assembler syntax. Addition
10294 and subtraction are the only arithmetic that may appear in these
10295 expressions. FILE is the stdio stream to write to, X is the rtx, and
10296 CODE is the operand print code from the output string. */
10297
10298 static void
10299 output_pic_addr_const (FILE *file, rtx x, int code)
10300 {
10301 char buf[256];
10302
10303 switch (GET_CODE (x))
10304 {
10305 case PC:
10306 gcc_assert (flag_pic);
10307 putc ('.', file);
10308 break;
10309
10310 case SYMBOL_REF:
10311 if (! TARGET_MACHO || TARGET_64BIT)
10312 output_addr_const (file, x);
10313 else
10314 {
10315 const char *name = XSTR (x, 0);
10316
10317 /* Mark the decl as referenced so that cgraph will
10318 output the function. */
10319 if (SYMBOL_REF_DECL (x))
10320 mark_decl_referenced (SYMBOL_REF_DECL (x));
10321
10322 #if TARGET_MACHO
10323 if (MACHOPIC_INDIRECT
10324 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10325 name = machopic_indirection_name (x, /*stub_p=*/true);
10326 #endif
10327 assemble_name (file, name);
10328 }
10329 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10330 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10331 fputs ("@PLT", file);
10332 break;
10333
10334 case LABEL_REF:
10335 x = XEXP (x, 0);
10336 /* FALLTHRU */
10337 case CODE_LABEL:
10338 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10339 assemble_name (asm_out_file, buf);
10340 break;
10341
10342 case CONST_INT:
10343 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10344 break;
10345
10346 case CONST:
10347 /* This used to output parentheses around the expression,
10348 but that does not work on the 386 (either ATT or BSD assembler). */
10349 output_pic_addr_const (file, XEXP (x, 0), code);
10350 break;
10351
10352 case CONST_DOUBLE:
10353 if (GET_MODE (x) == VOIDmode)
10354 {
10355 /* We can use %d if the number is <32 bits and positive. */
10356 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10357 fprintf (file, "0x%lx%08lx",
10358 (unsigned long) CONST_DOUBLE_HIGH (x),
10359 (unsigned long) CONST_DOUBLE_LOW (x));
10360 else
10361 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10362 }
10363 else
10364 /* We can't handle floating point constants;
10365 PRINT_OPERAND must handle them. */
10366 output_operand_lossage ("floating constant misused");
10367 break;
10368
10369 case PLUS:
10370 /* Some assemblers need integer constants to appear first. */
10371 if (CONST_INT_P (XEXP (x, 0)))
10372 {
10373 output_pic_addr_const (file, XEXP (x, 0), code);
10374 putc ('+', file);
10375 output_pic_addr_const (file, XEXP (x, 1), code);
10376 }
10377 else
10378 {
10379 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10380 output_pic_addr_const (file, XEXP (x, 1), code);
10381 putc ('+', file);
10382 output_pic_addr_const (file, XEXP (x, 0), code);
10383 }
10384 break;
10385
10386 case MINUS:
10387 if (!TARGET_MACHO)
10388 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10389 output_pic_addr_const (file, XEXP (x, 0), code);
10390 putc ('-', file);
10391 output_pic_addr_const (file, XEXP (x, 1), code);
10392 if (!TARGET_MACHO)
10393 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10394 break;
10395
10396 case UNSPEC:
10397 gcc_assert (XVECLEN (x, 0) == 1);
10398 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10399 switch (XINT (x, 1))
10400 {
10401 case UNSPEC_GOT:
10402 fputs ("@GOT", file);
10403 break;
10404 case UNSPEC_GOTOFF:
10405 fputs ("@GOTOFF", file);
10406 break;
10407 case UNSPEC_PLTOFF:
10408 fputs ("@PLTOFF", file);
10409 break;
10410 case UNSPEC_GOTPCREL:
10411 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10412 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10413 break;
10414 case UNSPEC_GOTTPOFF:
10415 /* FIXME: This might be @TPOFF in Sun ld too. */
10416 fputs ("@GOTTPOFF", file);
10417 break;
10418 case UNSPEC_TPOFF:
10419 fputs ("@TPOFF", file);
10420 break;
10421 case UNSPEC_NTPOFF:
10422 if (TARGET_64BIT)
10423 fputs ("@TPOFF", file);
10424 else
10425 fputs ("@NTPOFF", file);
10426 break;
10427 case UNSPEC_DTPOFF:
10428 fputs ("@DTPOFF", file);
10429 break;
10430 case UNSPEC_GOTNTPOFF:
10431 if (TARGET_64BIT)
10432 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10433 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10434 else
10435 fputs ("@GOTNTPOFF", file);
10436 break;
10437 case UNSPEC_INDNTPOFF:
10438 fputs ("@INDNTPOFF", file);
10439 break;
10440 #if TARGET_MACHO
10441 case UNSPEC_MACHOPIC_OFFSET:
10442 putc ('-', file);
10443 machopic_output_function_base_name (file);
10444 break;
10445 #endif
10446 default:
10447 output_operand_lossage ("invalid UNSPEC as operand");
10448 break;
10449 }
10450 break;
10451
10452 default:
10453 output_operand_lossage ("invalid expression as operand");
10454 }
10455 }
10456
10457 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10458 We need to emit DTP-relative relocations. */
10459
10460 static void ATTRIBUTE_UNUSED
10461 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10462 {
10463 fputs (ASM_LONG, file);
10464 output_addr_const (file, x);
10465 fputs ("@DTPOFF", file);
10466 switch (size)
10467 {
10468 case 4:
10469 break;
10470 case 8:
10471 fputs (", 0", file);
10472 break;
10473 default:
10474 gcc_unreachable ();
10475 }
10476 }
10477
10478 /* Return true if X is a representation of the PIC register. This copes
10479 with calls from ix86_find_base_term, where the register might have
10480 been replaced by a cselib value. */
10481
10482 static bool
10483 ix86_pic_register_p (rtx x)
10484 {
10485 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
10486 return (pic_offset_table_rtx
10487 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10488 else
10489 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10490 }
10491
10492 /* In the name of slightly smaller debug output, and to cater to
10493 general assembler lossage, recognize PIC+GOTOFF and turn it back
10494 into a direct symbol reference.
10495
10496 On Darwin, this is necessary to avoid a crash, because Darwin
10497 has a different PIC label for each routine but the DWARF debugging
10498 information is not associated with any particular routine, so it's
10499 necessary to remove references to the PIC label from RTL stored by
10500 the DWARF output code. */
10501
10502 static rtx
10503 ix86_delegitimize_address (rtx orig_x)
10504 {
10505 rtx x = orig_x;
10506 /* reg_addend is NULL or a multiple of some register. */
10507 rtx reg_addend = NULL_RTX;
10508 /* const_addend is NULL or a const_int. */
10509 rtx const_addend = NULL_RTX;
10510 /* This is the result, or NULL. */
10511 rtx result = NULL_RTX;
10512
10513 if (MEM_P (x))
10514 x = XEXP (x, 0);
10515
10516 if (TARGET_64BIT)
10517 {
10518 if (GET_CODE (x) != CONST
10519 || GET_CODE (XEXP (x, 0)) != UNSPEC
10520 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10521 || !MEM_P (orig_x))
10522 return orig_x;
10523 return XVECEXP (XEXP (x, 0), 0, 0);
10524 }
10525
10526 if (GET_CODE (x) != PLUS
10527 || GET_CODE (XEXP (x, 1)) != CONST)
10528 return orig_x;
10529
10530 if (ix86_pic_register_p (XEXP (x, 0)))
10531 /* %ebx + GOT/GOTOFF */
10532 ;
10533 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10534 {
10535 /* %ebx + %reg * scale + GOT/GOTOFF */
10536 reg_addend = XEXP (x, 0);
10537 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10538 reg_addend = XEXP (reg_addend, 1);
10539 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10540 reg_addend = XEXP (reg_addend, 0);
10541 else
10542 return orig_x;
10543 if (!REG_P (reg_addend)
10544 && GET_CODE (reg_addend) != MULT
10545 && GET_CODE (reg_addend) != ASHIFT)
10546 return orig_x;
10547 }
10548 else
10549 return orig_x;
10550
10551 x = XEXP (XEXP (x, 1), 0);
10552 if (GET_CODE (x) == PLUS
10553 && CONST_INT_P (XEXP (x, 1)))
10554 {
10555 const_addend = XEXP (x, 1);
10556 x = XEXP (x, 0);
10557 }
10558
10559 if (GET_CODE (x) == UNSPEC
10560 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10561 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10562 result = XVECEXP (x, 0, 0);
10563
10564 if (TARGET_MACHO && darwin_local_data_pic (x)
10565 && !MEM_P (orig_x))
10566 result = XVECEXP (x, 0, 0);
10567
10568 if (! result)
10569 return orig_x;
10570
10571 if (const_addend)
10572 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10573 if (reg_addend)
10574 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10575 return result;
10576 }
10577
10578 /* If X is a machine specific address (i.e. a symbol or label being
10579 referenced as a displacement from the GOT implemented using an
10580 UNSPEC), then return the base term. Otherwise return X. */
10581
10582 rtx
10583 ix86_find_base_term (rtx x)
10584 {
10585 rtx term;
10586
10587 if (TARGET_64BIT)
10588 {
10589 if (GET_CODE (x) != CONST)
10590 return x;
10591 term = XEXP (x, 0);
10592 if (GET_CODE (term) == PLUS
10593 && (CONST_INT_P (XEXP (term, 1))
10594 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10595 term = XEXP (term, 0);
10596 if (GET_CODE (term) != UNSPEC
10597 || XINT (term, 1) != UNSPEC_GOTPCREL)
10598 return x;
10599
10600 return XVECEXP (term, 0, 0);
10601 }
10602
10603 return ix86_delegitimize_address (x);
10604 }
10605
10606 static void
10607 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10608 int fp, FILE *file)
10609 {
10610 const char *suffix;
10611
10612 if (mode == CCFPmode || mode == CCFPUmode)
10613 {
10614 enum rtx_code second_code, bypass_code;
10615 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10616 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10617 code = ix86_fp_compare_code_to_integer (code);
10618 mode = CCmode;
10619 }
10620 if (reverse)
10621 code = reverse_condition (code);
10622
10623 switch (code)
10624 {
10625 case EQ:
10626 switch (mode)
10627 {
10628 case CCAmode:
10629 suffix = "a";
10630 break;
10631
10632 case CCCmode:
10633 suffix = "c";
10634 break;
10635
10636 case CCOmode:
10637 suffix = "o";
10638 break;
10639
10640 case CCSmode:
10641 suffix = "s";
10642 break;
10643
10644 default:
10645 suffix = "e";
10646 }
10647 break;
10648 case NE:
10649 switch (mode)
10650 {
10651 case CCAmode:
10652 suffix = "na";
10653 break;
10654
10655 case CCCmode:
10656 suffix = "nc";
10657 break;
10658
10659 case CCOmode:
10660 suffix = "no";
10661 break;
10662
10663 case CCSmode:
10664 suffix = "ns";
10665 break;
10666
10667 default:
10668 suffix = "ne";
10669 }
10670 break;
10671 case GT:
10672 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10673 suffix = "g";
10674 break;
10675 case GTU:
10676 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10677 Those same assemblers have the same but opposite lossage on cmov. */
10678 if (mode == CCmode)
10679 suffix = fp ? "nbe" : "a";
10680 else if (mode == CCCmode)
10681 suffix = "b";
10682 else
10683 gcc_unreachable ();
10684 break;
10685 case LT:
10686 switch (mode)
10687 {
10688 case CCNOmode:
10689 case CCGOCmode:
10690 suffix = "s";
10691 break;
10692
10693 case CCmode:
10694 case CCGCmode:
10695 suffix = "l";
10696 break;
10697
10698 default:
10699 gcc_unreachable ();
10700 }
10701 break;
10702 case LTU:
10703 gcc_assert (mode == CCmode || mode == CCCmode);
10704 suffix = "b";
10705 break;
10706 case GE:
10707 switch (mode)
10708 {
10709 case CCNOmode:
10710 case CCGOCmode:
10711 suffix = "ns";
10712 break;
10713
10714 case CCmode:
10715 case CCGCmode:
10716 suffix = "ge";
10717 break;
10718
10719 default:
10720 gcc_unreachable ();
10721 }
10722 break;
10723 case GEU:
10724 /* ??? As above. */
10725 gcc_assert (mode == CCmode || mode == CCCmode);
10726 suffix = fp ? "nb" : "ae";
10727 break;
10728 case LE:
10729 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10730 suffix = "le";
10731 break;
10732 case LEU:
10733 /* ??? As above. */
10734 if (mode == CCmode)
10735 suffix = "be";
10736 else if (mode == CCCmode)
10737 suffix = fp ? "nb" : "ae";
10738 else
10739 gcc_unreachable ();
10740 break;
10741 case UNORDERED:
10742 suffix = fp ? "u" : "p";
10743 break;
10744 case ORDERED:
10745 suffix = fp ? "nu" : "np";
10746 break;
10747 default:
10748 gcc_unreachable ();
10749 }
10750 fputs (suffix, file);
10751 }
10752
10753 /* Print the name of register X to FILE based on its machine mode and number.
10754 If CODE is 'w', pretend the mode is HImode.
10755 If CODE is 'b', pretend the mode is QImode.
10756 If CODE is 'k', pretend the mode is SImode.
10757 If CODE is 'q', pretend the mode is DImode.
10758 If CODE is 'x', pretend the mode is V4SFmode.
10759 If CODE is 't', pretend the mode is V8SFmode.
10760 If CODE is 'h', pretend the reg is the 'high' byte register.
10761 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10762 If CODE is 'd', duplicate the operand for AVX instruction.
10763 */
10764
10765 void
10766 print_reg (rtx x, int code, FILE *file)
10767 {
10768 const char *reg;
10769 bool duplicated = code == 'd' && TARGET_AVX;
10770
10771 gcc_assert (x == pc_rtx
10772 || (REGNO (x) != ARG_POINTER_REGNUM
10773 && REGNO (x) != FRAME_POINTER_REGNUM
10774 && REGNO (x) != FLAGS_REG
10775 && REGNO (x) != FPSR_REG
10776 && REGNO (x) != FPCR_REG));
10777
10778 if (ASSEMBLER_DIALECT == ASM_ATT)
10779 putc ('%', file);
10780
10781 if (x == pc_rtx)
10782 {
10783 gcc_assert (TARGET_64BIT);
10784 fputs ("rip", file);
10785 return;
10786 }
10787
10788 if (code == 'w' || MMX_REG_P (x))
10789 code = 2;
10790 else if (code == 'b')
10791 code = 1;
10792 else if (code == 'k')
10793 code = 4;
10794 else if (code == 'q')
10795 code = 8;
10796 else if (code == 'y')
10797 code = 3;
10798 else if (code == 'h')
10799 code = 0;
10800 else if (code == 'x')
10801 code = 16;
10802 else if (code == 't')
10803 code = 32;
10804 else
10805 code = GET_MODE_SIZE (GET_MODE (x));
10806
10807 /* Irritatingly, AMD extended registers use different naming convention
10808 from the normal registers. */
10809 if (REX_INT_REG_P (x))
10810 {
10811 gcc_assert (TARGET_64BIT);
10812 switch (code)
10813 {
10814 case 0:
10815 error ("extended registers have no high halves");
10816 break;
10817 case 1:
10818 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10819 break;
10820 case 2:
10821 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10822 break;
10823 case 4:
10824 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10825 break;
10826 case 8:
10827 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10828 break;
10829 default:
10830 error ("unsupported operand size for extended register");
10831 break;
10832 }
10833 return;
10834 }
10835
10836 reg = NULL;
10837 switch (code)
10838 {
10839 case 3:
10840 if (STACK_TOP_P (x))
10841 {
10842 reg = "st(0)";
10843 break;
10844 }
10845 /* FALLTHRU */
10846 case 8:
10847 case 4:
10848 case 12:
10849 if (! ANY_FP_REG_P (x))
10850 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10851 /* FALLTHRU */
10852 case 16:
10853 case 2:
10854 normal:
10855 reg = hi_reg_name[REGNO (x)];
10856 break;
10857 case 1:
10858 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10859 goto normal;
10860 reg = qi_reg_name[REGNO (x)];
10861 break;
10862 case 0:
10863 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10864 goto normal;
10865 reg = qi_high_reg_name[REGNO (x)];
10866 break;
10867 case 32:
10868 if (SSE_REG_P (x))
10869 {
10870 gcc_assert (!duplicated);
10871 putc ('y', file);
10872 fputs (hi_reg_name[REGNO (x)] + 1, file);
10873 return;
10874 }
10875 break;
10876 default:
10877 gcc_unreachable ();
10878 }
10879
10880 fputs (reg, file);
10881 if (duplicated)
10882 {
10883 if (ASSEMBLER_DIALECT == ASM_ATT)
10884 fprintf (file, ", %%%s", reg);
10885 else
10886 fprintf (file, ", %s", reg);
10887 }
10888 }
10889
10890 /* Locate some local-dynamic symbol still in use by this function
10891 so that we can print its name in some tls_local_dynamic_base
10892 pattern. */
10893
10894 static int
10895 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10896 {
10897 rtx x = *px;
10898
10899 if (GET_CODE (x) == SYMBOL_REF
10900 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10901 {
10902 cfun->machine->some_ld_name = XSTR (x, 0);
10903 return 1;
10904 }
10905
10906 return 0;
10907 }
10908
10909 static const char *
10910 get_some_local_dynamic_name (void)
10911 {
10912 rtx insn;
10913
10914 if (cfun->machine->some_ld_name)
10915 return cfun->machine->some_ld_name;
10916
10917 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10918 if (INSN_P (insn)
10919 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10920 return cfun->machine->some_ld_name;
10921
10922 gcc_unreachable ();
10923 }
10924
10925 /* Meaning of CODE:
10926 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10927 C -- print opcode suffix for set/cmov insn.
10928 c -- like C, but print reversed condition
10929 F,f -- likewise, but for floating-point.
10930 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10931 otherwise nothing
10932 R -- print the prefix for register names.
10933 z -- print the opcode suffix for the size of the current operand.
10934 * -- print a star (in certain assembler syntax)
10935 A -- print an absolute memory reference.
10936 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10937 s -- print a shift double count, followed by the assemblers argument
10938 delimiter.
10939 b -- print the QImode name of the register for the indicated operand.
10940 %b0 would print %al if operands[0] is reg 0.
10941 w -- likewise, print the HImode name of the register.
10942 k -- likewise, print the SImode name of the register.
10943 q -- likewise, print the DImode name of the register.
10944 x -- likewise, print the V4SFmode name of the register.
10945 t -- likewise, print the V8SFmode name of the register.
10946 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10947 y -- print "st(0)" instead of "st" as a register.
10948 d -- print duplicated register operand for AVX instruction.
10949 D -- print condition for SSE cmp instruction.
10950 P -- if PIC, print an @PLT suffix.
10951 X -- don't print any sort of PIC '@' suffix for a symbol.
10952 & -- print some in-use local-dynamic symbol name.
10953 H -- print a memory address offset by 8; used for sse high-parts
10954 Y -- print condition for SSE5 com* instruction.
10955 + -- print a branch hint as 'cs' or 'ds' prefix
10956 ; -- print a semicolon (after prefixes due to bug in older gas).
10957 */
10958
10959 void
10960 print_operand (FILE *file, rtx x, int code)
10961 {
10962 if (code)
10963 {
10964 switch (code)
10965 {
10966 case '*':
10967 if (ASSEMBLER_DIALECT == ASM_ATT)
10968 putc ('*', file);
10969 return;
10970
10971 case '&':
10972 assemble_name (file, get_some_local_dynamic_name ());
10973 return;
10974
10975 case 'A':
10976 switch (ASSEMBLER_DIALECT)
10977 {
10978 case ASM_ATT:
10979 putc ('*', file);
10980 break;
10981
10982 case ASM_INTEL:
10983 /* Intel syntax. For absolute addresses, registers should not
10984 be surrounded by braces. */
10985 if (!REG_P (x))
10986 {
10987 putc ('[', file);
10988 PRINT_OPERAND (file, x, 0);
10989 putc (']', file);
10990 return;
10991 }
10992 break;
10993
10994 default:
10995 gcc_unreachable ();
10996 }
10997
10998 PRINT_OPERAND (file, x, 0);
10999 return;
11000
11001
11002 case 'L':
11003 if (ASSEMBLER_DIALECT == ASM_ATT)
11004 putc ('l', file);
11005 return;
11006
11007 case 'W':
11008 if (ASSEMBLER_DIALECT == ASM_ATT)
11009 putc ('w', file);
11010 return;
11011
11012 case 'B':
11013 if (ASSEMBLER_DIALECT == ASM_ATT)
11014 putc ('b', file);
11015 return;
11016
11017 case 'Q':
11018 if (ASSEMBLER_DIALECT == ASM_ATT)
11019 putc ('l', file);
11020 return;
11021
11022 case 'S':
11023 if (ASSEMBLER_DIALECT == ASM_ATT)
11024 putc ('s', file);
11025 return;
11026
11027 case 'T':
11028 if (ASSEMBLER_DIALECT == ASM_ATT)
11029 putc ('t', file);
11030 return;
11031
11032 case 'z':
11033 /* 387 opcodes don't get size suffixes if the operands are
11034 registers. */
11035 if (STACK_REG_P (x))
11036 return;
11037
11038 /* Likewise if using Intel opcodes. */
11039 if (ASSEMBLER_DIALECT == ASM_INTEL)
11040 return;
11041
11042 /* This is the size of op from size of operand. */
11043 switch (GET_MODE_SIZE (GET_MODE (x)))
11044 {
11045 case 1:
11046 putc ('b', file);
11047 return;
11048
11049 case 2:
11050 if (MEM_P (x))
11051 {
11052 #ifdef HAVE_GAS_FILDS_FISTS
11053 putc ('s', file);
11054 #endif
11055 return;
11056 }
11057 else
11058 putc ('w', file);
11059 return;
11060
11061 case 4:
11062 if (GET_MODE (x) == SFmode)
11063 {
11064 putc ('s', file);
11065 return;
11066 }
11067 else
11068 putc ('l', file);
11069 return;
11070
11071 case 12:
11072 case 16:
11073 putc ('t', file);
11074 return;
11075
11076 case 8:
11077 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11078 {
11079 if (MEM_P (x))
11080 {
11081 #ifdef GAS_MNEMONICS
11082 putc ('q', file);
11083 #else
11084 putc ('l', file);
11085 putc ('l', file);
11086 #endif
11087 }
11088 else
11089 putc ('q', file);
11090 }
11091 else
11092 putc ('l', file);
11093 return;
11094
11095 default:
11096 gcc_unreachable ();
11097 }
11098
11099 case 'd':
11100 case 'b':
11101 case 'w':
11102 case 'k':
11103 case 'q':
11104 case 'h':
11105 case 't':
11106 case 'y':
11107 case 'x':
11108 case 'X':
11109 case 'P':
11110 break;
11111
11112 case 's':
11113 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11114 {
11115 PRINT_OPERAND (file, x, 0);
11116 fputs (", ", file);
11117 }
11118 return;
11119
11120 case 'D':
11121 /* Little bit of braindamage here. The SSE compare instructions
11122 does use completely different names for the comparisons that the
11123 fp conditional moves. */
11124 if (TARGET_AVX)
11125 {
11126 switch (GET_CODE (x))
11127 {
11128 case EQ:
11129 fputs ("eq", file);
11130 break;
11131 case UNEQ:
11132 fputs ("eq_us", file);
11133 break;
11134 case LT:
11135 fputs ("lt", file);
11136 break;
11137 case UNLT:
11138 fputs ("nge", file);
11139 break;
11140 case LE:
11141 fputs ("le", file);
11142 break;
11143 case UNLE:
11144 fputs ("ngt", file);
11145 break;
11146 case UNORDERED:
11147 fputs ("unord", file);
11148 break;
11149 case NE:
11150 fputs ("neq", file);
11151 break;
11152 case LTGT:
11153 fputs ("neq_oq", file);
11154 break;
11155 case GE:
11156 fputs ("ge", file);
11157 break;
11158 case UNGE:
11159 fputs ("nlt", file);
11160 break;
11161 case GT:
11162 fputs ("gt", file);
11163 break;
11164 case UNGT:
11165 fputs ("nle", file);
11166 break;
11167 case ORDERED:
11168 fputs ("ord", file);
11169 break;
11170 default:
11171 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11172 return;
11173 }
11174 }
11175 else
11176 {
11177 switch (GET_CODE (x))
11178 {
11179 case EQ:
11180 case UNEQ:
11181 fputs ("eq", file);
11182 break;
11183 case LT:
11184 case UNLT:
11185 fputs ("lt", file);
11186 break;
11187 case LE:
11188 case UNLE:
11189 fputs ("le", file);
11190 break;
11191 case UNORDERED:
11192 fputs ("unord", file);
11193 break;
11194 case NE:
11195 case LTGT:
11196 fputs ("neq", file);
11197 break;
11198 case UNGE:
11199 case GE:
11200 fputs ("nlt", file);
11201 break;
11202 case UNGT:
11203 case GT:
11204 fputs ("nle", file);
11205 break;
11206 case ORDERED:
11207 fputs ("ord", file);
11208 break;
11209 default:
11210 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11211 return;
11212 }
11213 }
11214 return;
11215 case 'O':
11216 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11217 if (ASSEMBLER_DIALECT == ASM_ATT)
11218 {
11219 switch (GET_MODE (x))
11220 {
11221 case HImode: putc ('w', file); break;
11222 case SImode:
11223 case SFmode: putc ('l', file); break;
11224 case DImode:
11225 case DFmode: putc ('q', file); break;
11226 default: gcc_unreachable ();
11227 }
11228 putc ('.', file);
11229 }
11230 #endif
11231 return;
11232 case 'C':
11233 if (!COMPARISON_P (x))
11234 {
11235 output_operand_lossage ("operand is neither a constant nor a "
11236 "condition code, invalid operand code "
11237 "'C'");
11238 return;
11239 }
11240 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11241 return;
11242 case 'F':
11243 if (!COMPARISON_P (x))
11244 {
11245 output_operand_lossage ("operand is neither a constant nor a "
11246 "condition code, invalid operand code "
11247 "'F'");
11248 return;
11249 }
11250 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11251 if (ASSEMBLER_DIALECT == ASM_ATT)
11252 putc ('.', file);
11253 #endif
11254 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11255 return;
11256
11257 /* Like above, but reverse condition */
11258 case 'c':
11259 /* Check to see if argument to %c is really a constant
11260 and not a condition code which needs to be reversed. */
11261 if (!COMPARISON_P (x))
11262 {
11263 output_operand_lossage ("operand is neither a constant nor a "
11264 "condition code, invalid operand "
11265 "code 'c'");
11266 return;
11267 }
11268 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11269 return;
11270 case 'f':
11271 if (!COMPARISON_P (x))
11272 {
11273 output_operand_lossage ("operand is neither a constant nor a "
11274 "condition code, invalid operand "
11275 "code 'f'");
11276 return;
11277 }
11278 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11279 if (ASSEMBLER_DIALECT == ASM_ATT)
11280 putc ('.', file);
11281 #endif
11282 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11283 return;
11284
11285 case 'H':
11286 /* It doesn't actually matter what mode we use here, as we're
11287 only going to use this for printing. */
11288 x = adjust_address_nv (x, DImode, 8);
11289 break;
11290
11291 case '+':
11292 {
11293 rtx x;
11294
11295 if (!optimize
11296 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11297 return;
11298
11299 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11300 if (x)
11301 {
11302 int pred_val = INTVAL (XEXP (x, 0));
11303
11304 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11305 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11306 {
11307 int taken = pred_val > REG_BR_PROB_BASE / 2;
11308 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11309
11310 /* Emit hints only in the case default branch prediction
11311 heuristics would fail. */
11312 if (taken != cputaken)
11313 {
11314 /* We use 3e (DS) prefix for taken branches and
11315 2e (CS) prefix for not taken branches. */
11316 if (taken)
11317 fputs ("ds ; ", file);
11318 else
11319 fputs ("cs ; ", file);
11320 }
11321 }
11322 }
11323 return;
11324 }
11325
11326 case 'Y':
11327 switch (GET_CODE (x))
11328 {
11329 case NE:
11330 fputs ("neq", file);
11331 break;
11332 case EQ:
11333 fputs ("eq", file);
11334 break;
11335 case GE:
11336 case GEU:
11337 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11338 break;
11339 case GT:
11340 case GTU:
11341 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11342 break;
11343 case LE:
11344 case LEU:
11345 fputs ("le", file);
11346 break;
11347 case LT:
11348 case LTU:
11349 fputs ("lt", file);
11350 break;
11351 case UNORDERED:
11352 fputs ("unord", file);
11353 break;
11354 case ORDERED:
11355 fputs ("ord", file);
11356 break;
11357 case UNEQ:
11358 fputs ("ueq", file);
11359 break;
11360 case UNGE:
11361 fputs ("nlt", file);
11362 break;
11363 case UNGT:
11364 fputs ("nle", file);
11365 break;
11366 case UNLE:
11367 fputs ("ule", file);
11368 break;
11369 case UNLT:
11370 fputs ("ult", file);
11371 break;
11372 case LTGT:
11373 fputs ("une", file);
11374 break;
11375 default:
11376 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11377 return;
11378 }
11379 return;
11380
11381 case ';':
11382 #if TARGET_MACHO
11383 fputs (" ; ", file);
11384 #else
11385 fputc (' ', file);
11386 #endif
11387 return;
11388
11389 default:
11390 output_operand_lossage ("invalid operand code '%c'", code);
11391 }
11392 }
11393
11394 if (REG_P (x))
11395 print_reg (x, code, file);
11396
11397 else if (MEM_P (x))
11398 {
11399 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11400 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11401 && GET_MODE (x) != BLKmode)
11402 {
11403 const char * size;
11404 switch (GET_MODE_SIZE (GET_MODE (x)))
11405 {
11406 case 1: size = "BYTE"; break;
11407 case 2: size = "WORD"; break;
11408 case 4: size = "DWORD"; break;
11409 case 8: size = "QWORD"; break;
11410 case 12: size = "TBYTE"; break;
11411 case 16:
11412 if (GET_MODE (x) == XFmode)
11413 size = "TBYTE";
11414 else
11415 size = "XMMWORD";
11416 break;
11417 case 32: size = "YMMWORD"; break;
11418 default:
11419 gcc_unreachable ();
11420 }
11421
11422 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11423 if (code == 'b')
11424 size = "BYTE";
11425 else if (code == 'w')
11426 size = "WORD";
11427 else if (code == 'k')
11428 size = "DWORD";
11429
11430 fputs (size, file);
11431 fputs (" PTR ", file);
11432 }
11433
11434 x = XEXP (x, 0);
11435 /* Avoid (%rip) for call operands. */
11436 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11437 && !CONST_INT_P (x))
11438 output_addr_const (file, x);
11439 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11440 output_operand_lossage ("invalid constraints for operand");
11441 else
11442 output_address (x);
11443 }
11444
11445 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11446 {
11447 REAL_VALUE_TYPE r;
11448 long l;
11449
11450 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11451 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11452
11453 if (ASSEMBLER_DIALECT == ASM_ATT)
11454 putc ('$', file);
11455 fprintf (file, "0x%08lx", (long unsigned int) l);
11456 }
11457
11458 /* These float cases don't actually occur as immediate operands. */
11459 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11460 {
11461 char dstr[30];
11462
11463 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11464 fprintf (file, "%s", dstr);
11465 }
11466
11467 else if (GET_CODE (x) == CONST_DOUBLE
11468 && GET_MODE (x) == XFmode)
11469 {
11470 char dstr[30];
11471
11472 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11473 fprintf (file, "%s", dstr);
11474 }
11475
11476 else
11477 {
11478 /* We have patterns that allow zero sets of memory, for instance.
11479 In 64-bit mode, we should probably support all 8-byte vectors,
11480 since we can in fact encode that into an immediate. */
11481 if (GET_CODE (x) == CONST_VECTOR)
11482 {
11483 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11484 x = const0_rtx;
11485 }
11486
11487 if (code != 'P')
11488 {
11489 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11490 {
11491 if (ASSEMBLER_DIALECT == ASM_ATT)
11492 putc ('$', file);
11493 }
11494 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11495 || GET_CODE (x) == LABEL_REF)
11496 {
11497 if (ASSEMBLER_DIALECT == ASM_ATT)
11498 putc ('$', file);
11499 else
11500 fputs ("OFFSET FLAT:", file);
11501 }
11502 }
11503 if (CONST_INT_P (x))
11504 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11505 else if (flag_pic)
11506 output_pic_addr_const (file, x, code);
11507 else
11508 output_addr_const (file, x);
11509 }
11510 }
11511
11512 /* Print a memory operand whose address is ADDR. */
11513
11514 void
11515 print_operand_address (FILE *file, rtx addr)
11516 {
11517 struct ix86_address parts;
11518 rtx base, index, disp;
11519 int scale;
11520 int ok = ix86_decompose_address (addr, &parts);
11521
11522 gcc_assert (ok);
11523
11524 base = parts.base;
11525 index = parts.index;
11526 disp = parts.disp;
11527 scale = parts.scale;
11528
11529 switch (parts.seg)
11530 {
11531 case SEG_DEFAULT:
11532 break;
11533 case SEG_FS:
11534 case SEG_GS:
11535 if (ASSEMBLER_DIALECT == ASM_ATT)
11536 putc ('%', file);
11537 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11538 break;
11539 default:
11540 gcc_unreachable ();
11541 }
11542
11543 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11544 if (TARGET_64BIT && !base && !index)
11545 {
11546 rtx symbol = disp;
11547
11548 if (GET_CODE (disp) == CONST
11549 && GET_CODE (XEXP (disp, 0)) == PLUS
11550 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11551 symbol = XEXP (XEXP (disp, 0), 0);
11552
11553 if (GET_CODE (symbol) == LABEL_REF
11554 || (GET_CODE (symbol) == SYMBOL_REF
11555 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11556 base = pc_rtx;
11557 }
11558 if (!base && !index)
11559 {
11560 /* Displacement only requires special attention. */
11561
11562 if (CONST_INT_P (disp))
11563 {
11564 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11565 fputs ("ds:", file);
11566 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11567 }
11568 else if (flag_pic)
11569 output_pic_addr_const (file, disp, 0);
11570 else
11571 output_addr_const (file, disp);
11572 }
11573 else
11574 {
11575 if (ASSEMBLER_DIALECT == ASM_ATT)
11576 {
11577 if (disp)
11578 {
11579 if (flag_pic)
11580 output_pic_addr_const (file, disp, 0);
11581 else if (GET_CODE (disp) == LABEL_REF)
11582 output_asm_label (disp);
11583 else
11584 output_addr_const (file, disp);
11585 }
11586
11587 putc ('(', file);
11588 if (base)
11589 print_reg (base, 0, file);
11590 if (index)
11591 {
11592 putc (',', file);
11593 print_reg (index, 0, file);
11594 if (scale != 1)
11595 fprintf (file, ",%d", scale);
11596 }
11597 putc (')', file);
11598 }
11599 else
11600 {
11601 rtx offset = NULL_RTX;
11602
11603 if (disp)
11604 {
11605 /* Pull out the offset of a symbol; print any symbol itself. */
11606 if (GET_CODE (disp) == CONST
11607 && GET_CODE (XEXP (disp, 0)) == PLUS
11608 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11609 {
11610 offset = XEXP (XEXP (disp, 0), 1);
11611 disp = gen_rtx_CONST (VOIDmode,
11612 XEXP (XEXP (disp, 0), 0));
11613 }
11614
11615 if (flag_pic)
11616 output_pic_addr_const (file, disp, 0);
11617 else if (GET_CODE (disp) == LABEL_REF)
11618 output_asm_label (disp);
11619 else if (CONST_INT_P (disp))
11620 offset = disp;
11621 else
11622 output_addr_const (file, disp);
11623 }
11624
11625 putc ('[', file);
11626 if (base)
11627 {
11628 print_reg (base, 0, file);
11629 if (offset)
11630 {
11631 if (INTVAL (offset) >= 0)
11632 putc ('+', file);
11633 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11634 }
11635 }
11636 else if (offset)
11637 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11638 else
11639 putc ('0', file);
11640
11641 if (index)
11642 {
11643 putc ('+', file);
11644 print_reg (index, 0, file);
11645 if (scale != 1)
11646 fprintf (file, "*%d", scale);
11647 }
11648 putc (']', file);
11649 }
11650 }
11651 }
11652
11653 bool
11654 output_addr_const_extra (FILE *file, rtx x)
11655 {
11656 rtx op;
11657
11658 if (GET_CODE (x) != UNSPEC)
11659 return false;
11660
11661 op = XVECEXP (x, 0, 0);
11662 switch (XINT (x, 1))
11663 {
11664 case UNSPEC_GOTTPOFF:
11665 output_addr_const (file, op);
11666 /* FIXME: This might be @TPOFF in Sun ld. */
11667 fputs ("@GOTTPOFF", file);
11668 break;
11669 case UNSPEC_TPOFF:
11670 output_addr_const (file, op);
11671 fputs ("@TPOFF", file);
11672 break;
11673 case UNSPEC_NTPOFF:
11674 output_addr_const (file, op);
11675 if (TARGET_64BIT)
11676 fputs ("@TPOFF", file);
11677 else
11678 fputs ("@NTPOFF", file);
11679 break;
11680 case UNSPEC_DTPOFF:
11681 output_addr_const (file, op);
11682 fputs ("@DTPOFF", file);
11683 break;
11684 case UNSPEC_GOTNTPOFF:
11685 output_addr_const (file, op);
11686 if (TARGET_64BIT)
11687 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11688 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11689 else
11690 fputs ("@GOTNTPOFF", file);
11691 break;
11692 case UNSPEC_INDNTPOFF:
11693 output_addr_const (file, op);
11694 fputs ("@INDNTPOFF", file);
11695 break;
11696 #if TARGET_MACHO
11697 case UNSPEC_MACHOPIC_OFFSET:
11698 output_addr_const (file, op);
11699 putc ('-', file);
11700 machopic_output_function_base_name (file);
11701 break;
11702 #endif
11703
11704 default:
11705 return false;
11706 }
11707
11708 return true;
11709 }
11710
11711 /* Split one or more DImode RTL references into pairs of SImode
11712 references. The RTL can be REG, offsettable MEM, integer constant, or
11713 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11714 split and "num" is its length. lo_half and hi_half are output arrays
11715 that parallel "operands". */
11716
11717 void
11718 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11719 {
11720 while (num--)
11721 {
11722 rtx op = operands[num];
11723
11724 /* simplify_subreg refuse to split volatile memory addresses,
11725 but we still have to handle it. */
11726 if (MEM_P (op))
11727 {
11728 lo_half[num] = adjust_address (op, SImode, 0);
11729 hi_half[num] = adjust_address (op, SImode, 4);
11730 }
11731 else
11732 {
11733 lo_half[num] = simplify_gen_subreg (SImode, op,
11734 GET_MODE (op) == VOIDmode
11735 ? DImode : GET_MODE (op), 0);
11736 hi_half[num] = simplify_gen_subreg (SImode, op,
11737 GET_MODE (op) == VOIDmode
11738 ? DImode : GET_MODE (op), 4);
11739 }
11740 }
11741 }
11742 /* Split one or more TImode RTL references into pairs of DImode
11743 references. The RTL can be REG, offsettable MEM, integer constant, or
11744 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11745 split and "num" is its length. lo_half and hi_half are output arrays
11746 that parallel "operands". */
11747
11748 void
11749 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11750 {
11751 while (num--)
11752 {
11753 rtx op = operands[num];
11754
11755 /* simplify_subreg refuse to split volatile memory addresses, but we
11756 still have to handle it. */
11757 if (MEM_P (op))
11758 {
11759 lo_half[num] = adjust_address (op, DImode, 0);
11760 hi_half[num] = adjust_address (op, DImode, 8);
11761 }
11762 else
11763 {
11764 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11765 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11766 }
11767 }
11768 }
11769
11770 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11771 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11772 is the expression of the binary operation. The output may either be
11773 emitted here, or returned to the caller, like all output_* functions.
11774
11775 There is no guarantee that the operands are the same mode, as they
11776 might be within FLOAT or FLOAT_EXTEND expressions. */
11777
11778 #ifndef SYSV386_COMPAT
11779 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11780 wants to fix the assemblers because that causes incompatibility
11781 with gcc. No-one wants to fix gcc because that causes
11782 incompatibility with assemblers... You can use the option of
11783 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11784 #define SYSV386_COMPAT 1
11785 #endif
11786
11787 const char *
11788 output_387_binary_op (rtx insn, rtx *operands)
11789 {
11790 static char buf[40];
11791 const char *p;
11792 const char *ssep;
11793 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11794
11795 #ifdef ENABLE_CHECKING
11796 /* Even if we do not want to check the inputs, this documents input
11797 constraints. Which helps in understanding the following code. */
11798 if (STACK_REG_P (operands[0])
11799 && ((REG_P (operands[1])
11800 && REGNO (operands[0]) == REGNO (operands[1])
11801 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11802 || (REG_P (operands[2])
11803 && REGNO (operands[0]) == REGNO (operands[2])
11804 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11805 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11806 ; /* ok */
11807 else
11808 gcc_assert (is_sse);
11809 #endif
11810
11811 switch (GET_CODE (operands[3]))
11812 {
11813 case PLUS:
11814 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11815 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11816 p = "fiadd";
11817 else
11818 p = "fadd";
11819 ssep = "vadd";
11820 break;
11821
11822 case MINUS:
11823 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11824 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11825 p = "fisub";
11826 else
11827 p = "fsub";
11828 ssep = "vsub";
11829 break;
11830
11831 case MULT:
11832 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11833 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11834 p = "fimul";
11835 else
11836 p = "fmul";
11837 ssep = "vmul";
11838 break;
11839
11840 case DIV:
11841 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11842 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11843 p = "fidiv";
11844 else
11845 p = "fdiv";
11846 ssep = "vdiv";
11847 break;
11848
11849 default:
11850 gcc_unreachable ();
11851 }
11852
11853 if (is_sse)
11854 {
11855 if (TARGET_AVX)
11856 {
11857 strcpy (buf, ssep);
11858 if (GET_MODE (operands[0]) == SFmode)
11859 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11860 else
11861 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11862 }
11863 else
11864 {
11865 strcpy (buf, ssep + 1);
11866 if (GET_MODE (operands[0]) == SFmode)
11867 strcat (buf, "ss\t{%2, %0|%0, %2}");
11868 else
11869 strcat (buf, "sd\t{%2, %0|%0, %2}");
11870 }
11871 return buf;
11872 }
11873 strcpy (buf, p);
11874
11875 switch (GET_CODE (operands[3]))
11876 {
11877 case MULT:
11878 case PLUS:
11879 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11880 {
11881 rtx temp = operands[2];
11882 operands[2] = operands[1];
11883 operands[1] = temp;
11884 }
11885
11886 /* know operands[0] == operands[1]. */
11887
11888 if (MEM_P (operands[2]))
11889 {
11890 p = "%z2\t%2";
11891 break;
11892 }
11893
11894 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11895 {
11896 if (STACK_TOP_P (operands[0]))
11897 /* How is it that we are storing to a dead operand[2]?
11898 Well, presumably operands[1] is dead too. We can't
11899 store the result to st(0) as st(0) gets popped on this
11900 instruction. Instead store to operands[2] (which I
11901 think has to be st(1)). st(1) will be popped later.
11902 gcc <= 2.8.1 didn't have this check and generated
11903 assembly code that the Unixware assembler rejected. */
11904 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11905 else
11906 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11907 break;
11908 }
11909
11910 if (STACK_TOP_P (operands[0]))
11911 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11912 else
11913 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11914 break;
11915
11916 case MINUS:
11917 case DIV:
11918 if (MEM_P (operands[1]))
11919 {
11920 p = "r%z1\t%1";
11921 break;
11922 }
11923
11924 if (MEM_P (operands[2]))
11925 {
11926 p = "%z2\t%2";
11927 break;
11928 }
11929
11930 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11931 {
11932 #if SYSV386_COMPAT
11933 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11934 derived assemblers, confusingly reverse the direction of
11935 the operation for fsub{r} and fdiv{r} when the
11936 destination register is not st(0). The Intel assembler
11937 doesn't have this brain damage. Read !SYSV386_COMPAT to
11938 figure out what the hardware really does. */
11939 if (STACK_TOP_P (operands[0]))
11940 p = "{p\t%0, %2|rp\t%2, %0}";
11941 else
11942 p = "{rp\t%2, %0|p\t%0, %2}";
11943 #else
11944 if (STACK_TOP_P (operands[0]))
11945 /* As above for fmul/fadd, we can't store to st(0). */
11946 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11947 else
11948 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11949 #endif
11950 break;
11951 }
11952
11953 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11954 {
11955 #if SYSV386_COMPAT
11956 if (STACK_TOP_P (operands[0]))
11957 p = "{rp\t%0, %1|p\t%1, %0}";
11958 else
11959 p = "{p\t%1, %0|rp\t%0, %1}";
11960 #else
11961 if (STACK_TOP_P (operands[0]))
11962 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11963 else
11964 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11965 #endif
11966 break;
11967 }
11968
11969 if (STACK_TOP_P (operands[0]))
11970 {
11971 if (STACK_TOP_P (operands[1]))
11972 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11973 else
11974 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11975 break;
11976 }
11977 else if (STACK_TOP_P (operands[1]))
11978 {
11979 #if SYSV386_COMPAT
11980 p = "{\t%1, %0|r\t%0, %1}";
11981 #else
11982 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11983 #endif
11984 }
11985 else
11986 {
11987 #if SYSV386_COMPAT
11988 p = "{r\t%2, %0|\t%0, %2}";
11989 #else
11990 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11991 #endif
11992 }
11993 break;
11994
11995 default:
11996 gcc_unreachable ();
11997 }
11998
11999 strcat (buf, p);
12000 return buf;
12001 }
12002
12003 /* Return needed mode for entity in optimize_mode_switching pass. */
12004
12005 int
12006 ix86_mode_needed (int entity, rtx insn)
12007 {
12008 enum attr_i387_cw mode;
12009
12010 /* The mode UNINITIALIZED is used to store control word after a
12011 function call or ASM pattern. The mode ANY specify that function
12012 has no requirements on the control word and make no changes in the
12013 bits we are interested in. */
12014
12015 if (CALL_P (insn)
12016 || (NONJUMP_INSN_P (insn)
12017 && (asm_noperands (PATTERN (insn)) >= 0
12018 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
12019 return I387_CW_UNINITIALIZED;
12020
12021 if (recog_memoized (insn) < 0)
12022 return I387_CW_ANY;
12023
12024 mode = get_attr_i387_cw (insn);
12025
12026 switch (entity)
12027 {
12028 case I387_TRUNC:
12029 if (mode == I387_CW_TRUNC)
12030 return mode;
12031 break;
12032
12033 case I387_FLOOR:
12034 if (mode == I387_CW_FLOOR)
12035 return mode;
12036 break;
12037
12038 case I387_CEIL:
12039 if (mode == I387_CW_CEIL)
12040 return mode;
12041 break;
12042
12043 case I387_MASK_PM:
12044 if (mode == I387_CW_MASK_PM)
12045 return mode;
12046 break;
12047
12048 default:
12049 gcc_unreachable ();
12050 }
12051
12052 return I387_CW_ANY;
12053 }
12054
12055 /* Output code to initialize control word copies used by trunc?f?i and
12056 rounding patterns. CURRENT_MODE is set to current control word,
12057 while NEW_MODE is set to new control word. */
12058
12059 void
12060 emit_i387_cw_initialization (int mode)
12061 {
12062 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12063 rtx new_mode;
12064
12065 enum ix86_stack_slot slot;
12066
12067 rtx reg = gen_reg_rtx (HImode);
12068
12069 emit_insn (gen_x86_fnstcw_1 (stored_mode));
12070 emit_move_insn (reg, copy_rtx (stored_mode));
12071
12072 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12073 || optimize_function_for_size_p (cfun))
12074 {
12075 switch (mode)
12076 {
12077 case I387_CW_TRUNC:
12078 /* round toward zero (truncate) */
12079 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12080 slot = SLOT_CW_TRUNC;
12081 break;
12082
12083 case I387_CW_FLOOR:
12084 /* round down toward -oo */
12085 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12086 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12087 slot = SLOT_CW_FLOOR;
12088 break;
12089
12090 case I387_CW_CEIL:
12091 /* round up toward +oo */
12092 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12093 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12094 slot = SLOT_CW_CEIL;
12095 break;
12096
12097 case I387_CW_MASK_PM:
12098 /* mask precision exception for nearbyint() */
12099 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12100 slot = SLOT_CW_MASK_PM;
12101 break;
12102
12103 default:
12104 gcc_unreachable ();
12105 }
12106 }
12107 else
12108 {
12109 switch (mode)
12110 {
12111 case I387_CW_TRUNC:
12112 /* round toward zero (truncate) */
12113 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12114 slot = SLOT_CW_TRUNC;
12115 break;
12116
12117 case I387_CW_FLOOR:
12118 /* round down toward -oo */
12119 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12120 slot = SLOT_CW_FLOOR;
12121 break;
12122
12123 case I387_CW_CEIL:
12124 /* round up toward +oo */
12125 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12126 slot = SLOT_CW_CEIL;
12127 break;
12128
12129 case I387_CW_MASK_PM:
12130 /* mask precision exception for nearbyint() */
12131 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12132 slot = SLOT_CW_MASK_PM;
12133 break;
12134
12135 default:
12136 gcc_unreachable ();
12137 }
12138 }
12139
12140 gcc_assert (slot < MAX_386_STACK_LOCALS);
12141
12142 new_mode = assign_386_stack_local (HImode, slot);
12143 emit_move_insn (new_mode, reg);
12144 }
12145
12146 /* Output code for INSN to convert a float to a signed int. OPERANDS
12147 are the insn operands. The output may be [HSD]Imode and the input
12148 operand may be [SDX]Fmode. */
12149
12150 const char *
12151 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12152 {
12153 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12154 int dimode_p = GET_MODE (operands[0]) == DImode;
12155 int round_mode = get_attr_i387_cw (insn);
12156
12157 /* Jump through a hoop or two for DImode, since the hardware has no
12158 non-popping instruction. We used to do this a different way, but
12159 that was somewhat fragile and broke with post-reload splitters. */
12160 if ((dimode_p || fisttp) && !stack_top_dies)
12161 output_asm_insn ("fld\t%y1", operands);
12162
12163 gcc_assert (STACK_TOP_P (operands[1]));
12164 gcc_assert (MEM_P (operands[0]));
12165 gcc_assert (GET_MODE (operands[1]) != TFmode);
12166
12167 if (fisttp)
12168 output_asm_insn ("fisttp%z0\t%0", operands);
12169 else
12170 {
12171 if (round_mode != I387_CW_ANY)
12172 output_asm_insn ("fldcw\t%3", operands);
12173 if (stack_top_dies || dimode_p)
12174 output_asm_insn ("fistp%z0\t%0", operands);
12175 else
12176 output_asm_insn ("fist%z0\t%0", operands);
12177 if (round_mode != I387_CW_ANY)
12178 output_asm_insn ("fldcw\t%2", operands);
12179 }
12180
12181 return "";
12182 }
12183
12184 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12185 have the values zero or one, indicates the ffreep insn's operand
12186 from the OPERANDS array. */
12187
12188 static const char *
12189 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12190 {
12191 if (TARGET_USE_FFREEP)
12192 #ifdef HAVE_AS_IX86_FFREEP
12193 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12194 #else
12195 {
12196 static char retval[32];
12197 int regno = REGNO (operands[opno]);
12198
12199 gcc_assert (FP_REGNO_P (regno));
12200
12201 regno -= FIRST_STACK_REG;
12202
12203 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
12204 return retval;
12205 }
12206 #endif
12207
12208 return opno ? "fstp\t%y1" : "fstp\t%y0";
12209 }
12210
12211
12212 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12213 should be used. UNORDERED_P is true when fucom should be used. */
12214
12215 const char *
12216 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12217 {
12218 int stack_top_dies;
12219 rtx cmp_op0, cmp_op1;
12220 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12221
12222 if (eflags_p)
12223 {
12224 cmp_op0 = operands[0];
12225 cmp_op1 = operands[1];
12226 }
12227 else
12228 {
12229 cmp_op0 = operands[1];
12230 cmp_op1 = operands[2];
12231 }
12232
12233 if (is_sse)
12234 {
12235 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12236 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12237 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12238 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12239
12240 if (GET_MODE (operands[0]) == SFmode)
12241 if (unordered_p)
12242 return &ucomiss[TARGET_AVX ? 0 : 1];
12243 else
12244 return &comiss[TARGET_AVX ? 0 : 1];
12245 else
12246 if (unordered_p)
12247 return &ucomisd[TARGET_AVX ? 0 : 1];
12248 else
12249 return &comisd[TARGET_AVX ? 0 : 1];
12250 }
12251
12252 gcc_assert (STACK_TOP_P (cmp_op0));
12253
12254 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12255
12256 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12257 {
12258 if (stack_top_dies)
12259 {
12260 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12261 return output_387_ffreep (operands, 1);
12262 }
12263 else
12264 return "ftst\n\tfnstsw\t%0";
12265 }
12266
12267 if (STACK_REG_P (cmp_op1)
12268 && stack_top_dies
12269 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12270 && REGNO (cmp_op1) != FIRST_STACK_REG)
12271 {
12272 /* If both the top of the 387 stack dies, and the other operand
12273 is also a stack register that dies, then this must be a
12274 `fcompp' float compare */
12275
12276 if (eflags_p)
12277 {
12278 /* There is no double popping fcomi variant. Fortunately,
12279 eflags is immune from the fstp's cc clobbering. */
12280 if (unordered_p)
12281 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12282 else
12283 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12284 return output_387_ffreep (operands, 0);
12285 }
12286 else
12287 {
12288 if (unordered_p)
12289 return "fucompp\n\tfnstsw\t%0";
12290 else
12291 return "fcompp\n\tfnstsw\t%0";
12292 }
12293 }
12294 else
12295 {
12296 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12297
12298 static const char * const alt[16] =
12299 {
12300 "fcom%z2\t%y2\n\tfnstsw\t%0",
12301 "fcomp%z2\t%y2\n\tfnstsw\t%0",
12302 "fucom%z2\t%y2\n\tfnstsw\t%0",
12303 "fucomp%z2\t%y2\n\tfnstsw\t%0",
12304
12305 "ficom%z2\t%y2\n\tfnstsw\t%0",
12306 "ficomp%z2\t%y2\n\tfnstsw\t%0",
12307 NULL,
12308 NULL,
12309
12310 "fcomi\t{%y1, %0|%0, %y1}",
12311 "fcomip\t{%y1, %0|%0, %y1}",
12312 "fucomi\t{%y1, %0|%0, %y1}",
12313 "fucomip\t{%y1, %0|%0, %y1}",
12314
12315 NULL,
12316 NULL,
12317 NULL,
12318 NULL
12319 };
12320
12321 int mask;
12322 const char *ret;
12323
12324 mask = eflags_p << 3;
12325 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12326 mask |= unordered_p << 1;
12327 mask |= stack_top_dies;
12328
12329 gcc_assert (mask < 16);
12330 ret = alt[mask];
12331 gcc_assert (ret);
12332
12333 return ret;
12334 }
12335 }
12336
12337 void
12338 ix86_output_addr_vec_elt (FILE *file, int value)
12339 {
12340 const char *directive = ASM_LONG;
12341
12342 #ifdef ASM_QUAD
12343 if (TARGET_64BIT)
12344 directive = ASM_QUAD;
12345 #else
12346 gcc_assert (!TARGET_64BIT);
12347 #endif
12348
12349 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12350 }
12351
12352 void
12353 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12354 {
12355 const char *directive = ASM_LONG;
12356
12357 #ifdef ASM_QUAD
12358 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12359 directive = ASM_QUAD;
12360 #else
12361 gcc_assert (!TARGET_64BIT);
12362 #endif
12363 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12364 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12365 fprintf (file, "%s%s%d-%s%d\n",
12366 directive, LPREFIX, value, LPREFIX, rel);
12367 else if (HAVE_AS_GOTOFF_IN_DATA)
12368 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12369 #if TARGET_MACHO
12370 else if (TARGET_MACHO)
12371 {
12372 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12373 machopic_output_function_base_name (file);
12374 fprintf(file, "\n");
12375 }
12376 #endif
12377 else
12378 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12379 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12380 }
12381
12382 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12383 for the target. */
12384
12385 void
12386 ix86_expand_clear (rtx dest)
12387 {
12388 rtx tmp;
12389
12390 /* We play register width games, which are only valid after reload. */
12391 gcc_assert (reload_completed);
12392
12393 /* Avoid HImode and its attendant prefix byte. */
12394 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12395 dest = gen_rtx_REG (SImode, REGNO (dest));
12396 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12397
12398 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12399 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12400 {
12401 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12402 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12403 }
12404
12405 emit_insn (tmp);
12406 }
12407
12408 /* X is an unchanging MEM. If it is a constant pool reference, return
12409 the constant pool rtx, else NULL. */
12410
12411 rtx
12412 maybe_get_pool_constant (rtx x)
12413 {
12414 x = ix86_delegitimize_address (XEXP (x, 0));
12415
12416 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12417 return get_pool_constant (x);
12418
12419 return NULL_RTX;
12420 }
12421
12422 void
12423 ix86_expand_move (enum machine_mode mode, rtx operands[])
12424 {
12425 rtx op0, op1;
12426 enum tls_model model;
12427
12428 op0 = operands[0];
12429 op1 = operands[1];
12430
12431 if (GET_CODE (op1) == SYMBOL_REF)
12432 {
12433 model = SYMBOL_REF_TLS_MODEL (op1);
12434 if (model)
12435 {
12436 op1 = legitimize_tls_address (op1, model, true);
12437 op1 = force_operand (op1, op0);
12438 if (op1 == op0)
12439 return;
12440 }
12441 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12442 && SYMBOL_REF_DLLIMPORT_P (op1))
12443 op1 = legitimize_dllimport_symbol (op1, false);
12444 }
12445 else if (GET_CODE (op1) == CONST
12446 && GET_CODE (XEXP (op1, 0)) == PLUS
12447 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12448 {
12449 rtx addend = XEXP (XEXP (op1, 0), 1);
12450 rtx symbol = XEXP (XEXP (op1, 0), 0);
12451 rtx tmp = NULL;
12452
12453 model = SYMBOL_REF_TLS_MODEL (symbol);
12454 if (model)
12455 tmp = legitimize_tls_address (symbol, model, true);
12456 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12457 && SYMBOL_REF_DLLIMPORT_P (symbol))
12458 tmp = legitimize_dllimport_symbol (symbol, true);
12459
12460 if (tmp)
12461 {
12462 tmp = force_operand (tmp, NULL);
12463 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12464 op0, 1, OPTAB_DIRECT);
12465 if (tmp == op0)
12466 return;
12467 }
12468 }
12469
12470 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12471 {
12472 if (TARGET_MACHO && !TARGET_64BIT)
12473 {
12474 #if TARGET_MACHO
12475 if (MACHOPIC_PURE)
12476 {
12477 rtx temp = ((reload_in_progress
12478 || ((op0 && REG_P (op0))
12479 && mode == Pmode))
12480 ? op0 : gen_reg_rtx (Pmode));
12481 op1 = machopic_indirect_data_reference (op1, temp);
12482 op1 = machopic_legitimize_pic_address (op1, mode,
12483 temp == op1 ? 0 : temp);
12484 }
12485 else if (MACHOPIC_INDIRECT)
12486 op1 = machopic_indirect_data_reference (op1, 0);
12487 if (op0 == op1)
12488 return;
12489 #endif
12490 }
12491 else
12492 {
12493 if (MEM_P (op0))
12494 op1 = force_reg (Pmode, op1);
12495 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12496 {
12497 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12498 op1 = legitimize_pic_address (op1, reg);
12499 if (op0 == op1)
12500 return;
12501 }
12502 }
12503 }
12504 else
12505 {
12506 if (MEM_P (op0)
12507 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12508 || !push_operand (op0, mode))
12509 && MEM_P (op1))
12510 op1 = force_reg (mode, op1);
12511
12512 if (push_operand (op0, mode)
12513 && ! general_no_elim_operand (op1, mode))
12514 op1 = copy_to_mode_reg (mode, op1);
12515
12516 /* Force large constants in 64bit compilation into register
12517 to get them CSEed. */
12518 if (can_create_pseudo_p ()
12519 && (mode == DImode) && TARGET_64BIT
12520 && immediate_operand (op1, mode)
12521 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12522 && !register_operand (op0, mode)
12523 && optimize)
12524 op1 = copy_to_mode_reg (mode, op1);
12525
12526 if (can_create_pseudo_p ()
12527 && FLOAT_MODE_P (mode)
12528 && GET_CODE (op1) == CONST_DOUBLE)
12529 {
12530 /* If we are loading a floating point constant to a register,
12531 force the value to memory now, since we'll get better code
12532 out the back end. */
12533
12534 op1 = validize_mem (force_const_mem (mode, op1));
12535 if (!register_operand (op0, mode))
12536 {
12537 rtx temp = gen_reg_rtx (mode);
12538 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12539 emit_move_insn (op0, temp);
12540 return;
12541 }
12542 }
12543 }
12544
12545 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12546 }
12547
12548 void
12549 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12550 {
12551 rtx op0 = operands[0], op1 = operands[1];
12552 unsigned int align = GET_MODE_ALIGNMENT (mode);
12553
12554 /* Force constants other than zero into memory. We do not know how
12555 the instructions used to build constants modify the upper 64 bits
12556 of the register, once we have that information we may be able
12557 to handle some of them more efficiently. */
12558 if (can_create_pseudo_p ()
12559 && register_operand (op0, mode)
12560 && (CONSTANT_P (op1)
12561 || (GET_CODE (op1) == SUBREG
12562 && CONSTANT_P (SUBREG_REG (op1))))
12563 && standard_sse_constant_p (op1) <= 0)
12564 op1 = validize_mem (force_const_mem (mode, op1));
12565
12566 /* We need to check memory alignment for SSE mode since attribute
12567 can make operands unaligned. */
12568 if (can_create_pseudo_p ()
12569 && SSE_REG_MODE_P (mode)
12570 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12571 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12572 {
12573 rtx tmp[2];
12574
12575 /* ix86_expand_vector_move_misalign() does not like constants ... */
12576 if (CONSTANT_P (op1)
12577 || (GET_CODE (op1) == SUBREG
12578 && CONSTANT_P (SUBREG_REG (op1))))
12579 op1 = validize_mem (force_const_mem (mode, op1));
12580
12581 /* ... nor both arguments in memory. */
12582 if (!register_operand (op0, mode)
12583 && !register_operand (op1, mode))
12584 op1 = force_reg (mode, op1);
12585
12586 tmp[0] = op0; tmp[1] = op1;
12587 ix86_expand_vector_move_misalign (mode, tmp);
12588 return;
12589 }
12590
12591 /* Make operand1 a register if it isn't already. */
12592 if (can_create_pseudo_p ()
12593 && !register_operand (op0, mode)
12594 && !register_operand (op1, mode))
12595 {
12596 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12597 return;
12598 }
12599
12600 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12601 }
12602
12603 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12604 straight to ix86_expand_vector_move. */
12605 /* Code generation for scalar reg-reg moves of single and double precision data:
12606 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12607 movaps reg, reg
12608 else
12609 movss reg, reg
12610 if (x86_sse_partial_reg_dependency == true)
12611 movapd reg, reg
12612 else
12613 movsd reg, reg
12614
12615 Code generation for scalar loads of double precision data:
12616 if (x86_sse_split_regs == true)
12617 movlpd mem, reg (gas syntax)
12618 else
12619 movsd mem, reg
12620
12621 Code generation for unaligned packed loads of single precision data
12622 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12623 if (x86_sse_unaligned_move_optimal)
12624 movups mem, reg
12625
12626 if (x86_sse_partial_reg_dependency == true)
12627 {
12628 xorps reg, reg
12629 movlps mem, reg
12630 movhps mem+8, reg
12631 }
12632 else
12633 {
12634 movlps mem, reg
12635 movhps mem+8, reg
12636 }
12637
12638 Code generation for unaligned packed loads of double precision data
12639 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12640 if (x86_sse_unaligned_move_optimal)
12641 movupd mem, reg
12642
12643 if (x86_sse_split_regs == true)
12644 {
12645 movlpd mem, reg
12646 movhpd mem+8, reg
12647 }
12648 else
12649 {
12650 movsd mem, reg
12651 movhpd mem+8, reg
12652 }
12653 */
12654
12655 void
12656 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12657 {
12658 rtx op0, op1, m;
12659
12660 op0 = operands[0];
12661 op1 = operands[1];
12662
12663 if (TARGET_AVX)
12664 {
12665 switch (GET_MODE_CLASS (mode))
12666 {
12667 case MODE_VECTOR_INT:
12668 case MODE_INT:
12669 switch (GET_MODE_SIZE (mode))
12670 {
12671 case 16:
12672 op0 = gen_lowpart (V16QImode, op0);
12673 op1 = gen_lowpart (V16QImode, op1);
12674 emit_insn (gen_avx_movdqu (op0, op1));
12675 break;
12676 case 32:
12677 op0 = gen_lowpart (V32QImode, op0);
12678 op1 = gen_lowpart (V32QImode, op1);
12679 emit_insn (gen_avx_movdqu256 (op0, op1));
12680 break;
12681 default:
12682 gcc_unreachable ();
12683 }
12684 break;
12685 case MODE_VECTOR_FLOAT:
12686 op0 = gen_lowpart (mode, op0);
12687 op1 = gen_lowpart (mode, op1);
12688
12689 switch (mode)
12690 {
12691 case V4SFmode:
12692 emit_insn (gen_avx_movups (op0, op1));
12693 break;
12694 case V8SFmode:
12695 emit_insn (gen_avx_movups256 (op0, op1));
12696 break;
12697 case V2DFmode:
12698 emit_insn (gen_avx_movupd (op0, op1));
12699 break;
12700 case V4DFmode:
12701 emit_insn (gen_avx_movupd256 (op0, op1));
12702 break;
12703 default:
12704 gcc_unreachable ();
12705 }
12706 break;
12707
12708 default:
12709 gcc_unreachable ();
12710 }
12711
12712 return;
12713 }
12714
12715 if (MEM_P (op1))
12716 {
12717 /* If we're optimizing for size, movups is the smallest. */
12718 if (optimize_insn_for_size_p ())
12719 {
12720 op0 = gen_lowpart (V4SFmode, op0);
12721 op1 = gen_lowpart (V4SFmode, op1);
12722 emit_insn (gen_sse_movups (op0, op1));
12723 return;
12724 }
12725
12726 /* ??? If we have typed data, then it would appear that using
12727 movdqu is the only way to get unaligned data loaded with
12728 integer type. */
12729 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12730 {
12731 op0 = gen_lowpart (V16QImode, op0);
12732 op1 = gen_lowpart (V16QImode, op1);
12733 emit_insn (gen_sse2_movdqu (op0, op1));
12734 return;
12735 }
12736
12737 if (TARGET_SSE2 && mode == V2DFmode)
12738 {
12739 rtx zero;
12740
12741 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12742 {
12743 op0 = gen_lowpart (V2DFmode, op0);
12744 op1 = gen_lowpart (V2DFmode, op1);
12745 emit_insn (gen_sse2_movupd (op0, op1));
12746 return;
12747 }
12748
12749 /* When SSE registers are split into halves, we can avoid
12750 writing to the top half twice. */
12751 if (TARGET_SSE_SPLIT_REGS)
12752 {
12753 emit_clobber (op0);
12754 zero = op0;
12755 }
12756 else
12757 {
12758 /* ??? Not sure about the best option for the Intel chips.
12759 The following would seem to satisfy; the register is
12760 entirely cleared, breaking the dependency chain. We
12761 then store to the upper half, with a dependency depth
12762 of one. A rumor has it that Intel recommends two movsd
12763 followed by an unpacklpd, but this is unconfirmed. And
12764 given that the dependency depth of the unpacklpd would
12765 still be one, I'm not sure why this would be better. */
12766 zero = CONST0_RTX (V2DFmode);
12767 }
12768
12769 m = adjust_address (op1, DFmode, 0);
12770 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12771 m = adjust_address (op1, DFmode, 8);
12772 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12773 }
12774 else
12775 {
12776 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12777 {
12778 op0 = gen_lowpart (V4SFmode, op0);
12779 op1 = gen_lowpart (V4SFmode, op1);
12780 emit_insn (gen_sse_movups (op0, op1));
12781 return;
12782 }
12783
12784 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12785 emit_move_insn (op0, CONST0_RTX (mode));
12786 else
12787 emit_clobber (op0);
12788
12789 if (mode != V4SFmode)
12790 op0 = gen_lowpart (V4SFmode, op0);
12791 m = adjust_address (op1, V2SFmode, 0);
12792 emit_insn (gen_sse_loadlps (op0, op0, m));
12793 m = adjust_address (op1, V2SFmode, 8);
12794 emit_insn (gen_sse_loadhps (op0, op0, m));
12795 }
12796 }
12797 else if (MEM_P (op0))
12798 {
12799 /* If we're optimizing for size, movups is the smallest. */
12800 if (optimize_insn_for_size_p ())
12801 {
12802 op0 = gen_lowpart (V4SFmode, op0);
12803 op1 = gen_lowpart (V4SFmode, op1);
12804 emit_insn (gen_sse_movups (op0, op1));
12805 return;
12806 }
12807
12808 /* ??? Similar to above, only less clear because of quote
12809 typeless stores unquote. */
12810 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12811 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12812 {
12813 op0 = gen_lowpart (V16QImode, op0);
12814 op1 = gen_lowpart (V16QImode, op1);
12815 emit_insn (gen_sse2_movdqu (op0, op1));
12816 return;
12817 }
12818
12819 if (TARGET_SSE2 && mode == V2DFmode)
12820 {
12821 m = adjust_address (op0, DFmode, 0);
12822 emit_insn (gen_sse2_storelpd (m, op1));
12823 m = adjust_address (op0, DFmode, 8);
12824 emit_insn (gen_sse2_storehpd (m, op1));
12825 }
12826 else
12827 {
12828 if (mode != V4SFmode)
12829 op1 = gen_lowpart (V4SFmode, op1);
12830 m = adjust_address (op0, V2SFmode, 0);
12831 emit_insn (gen_sse_storelps (m, op1));
12832 m = adjust_address (op0, V2SFmode, 8);
12833 emit_insn (gen_sse_storehps (m, op1));
12834 }
12835 }
12836 else
12837 gcc_unreachable ();
12838 }
12839
12840 /* Expand a push in MODE. This is some mode for which we do not support
12841 proper push instructions, at least from the registers that we expect
12842 the value to live in. */
12843
12844 void
12845 ix86_expand_push (enum machine_mode mode, rtx x)
12846 {
12847 rtx tmp;
12848
12849 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12850 GEN_INT (-GET_MODE_SIZE (mode)),
12851 stack_pointer_rtx, 1, OPTAB_DIRECT);
12852 if (tmp != stack_pointer_rtx)
12853 emit_move_insn (stack_pointer_rtx, tmp);
12854
12855 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12856
12857 /* When we push an operand onto stack, it has to be aligned at least
12858 at the function argument boundary. However since we don't have
12859 the argument type, we can't determine the actual argument
12860 boundary. */
12861 emit_move_insn (tmp, x);
12862 }
12863
12864 /* Helper function of ix86_fixup_binary_operands to canonicalize
12865 operand order. Returns true if the operands should be swapped. */
12866
12867 static bool
12868 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12869 rtx operands[])
12870 {
12871 rtx dst = operands[0];
12872 rtx src1 = operands[1];
12873 rtx src2 = operands[2];
12874
12875 /* If the operation is not commutative, we can't do anything. */
12876 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12877 return false;
12878
12879 /* Highest priority is that src1 should match dst. */
12880 if (rtx_equal_p (dst, src1))
12881 return false;
12882 if (rtx_equal_p (dst, src2))
12883 return true;
12884
12885 /* Next highest priority is that immediate constants come second. */
12886 if (immediate_operand (src2, mode))
12887 return false;
12888 if (immediate_operand (src1, mode))
12889 return true;
12890
12891 /* Lowest priority is that memory references should come second. */
12892 if (MEM_P (src2))
12893 return false;
12894 if (MEM_P (src1))
12895 return true;
12896
12897 return false;
12898 }
12899
12900
12901 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12902 destination to use for the operation. If different from the true
12903 destination in operands[0], a copy operation will be required. */
12904
12905 rtx
12906 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12907 rtx operands[])
12908 {
12909 rtx dst = operands[0];
12910 rtx src1 = operands[1];
12911 rtx src2 = operands[2];
12912
12913 /* Canonicalize operand order. */
12914 if (ix86_swap_binary_operands_p (code, mode, operands))
12915 {
12916 rtx temp;
12917
12918 /* It is invalid to swap operands of different modes. */
12919 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12920
12921 temp = src1;
12922 src1 = src2;
12923 src2 = temp;
12924 }
12925
12926 /* Both source operands cannot be in memory. */
12927 if (MEM_P (src1) && MEM_P (src2))
12928 {
12929 /* Optimization: Only read from memory once. */
12930 if (rtx_equal_p (src1, src2))
12931 {
12932 src2 = force_reg (mode, src2);
12933 src1 = src2;
12934 }
12935 else
12936 src2 = force_reg (mode, src2);
12937 }
12938
12939 /* If the destination is memory, and we do not have matching source
12940 operands, do things in registers. */
12941 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12942 dst = gen_reg_rtx (mode);
12943
12944 /* Source 1 cannot be a constant. */
12945 if (CONSTANT_P (src1))
12946 src1 = force_reg (mode, src1);
12947
12948 /* Source 1 cannot be a non-matching memory. */
12949 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12950 src1 = force_reg (mode, src1);
12951
12952 operands[1] = src1;
12953 operands[2] = src2;
12954 return dst;
12955 }
12956
12957 /* Similarly, but assume that the destination has already been
12958 set up properly. */
12959
12960 void
12961 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12962 enum machine_mode mode, rtx operands[])
12963 {
12964 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12965 gcc_assert (dst == operands[0]);
12966 }
12967
12968 /* Attempt to expand a binary operator. Make the expansion closer to the
12969 actual machine, then just general_operand, which will allow 3 separate
12970 memory references (one output, two input) in a single insn. */
12971
12972 void
12973 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12974 rtx operands[])
12975 {
12976 rtx src1, src2, dst, op, clob;
12977
12978 dst = ix86_fixup_binary_operands (code, mode, operands);
12979 src1 = operands[1];
12980 src2 = operands[2];
12981
12982 /* Emit the instruction. */
12983
12984 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12985 if (reload_in_progress)
12986 {
12987 /* Reload doesn't know about the flags register, and doesn't know that
12988 it doesn't want to clobber it. We can only do this with PLUS. */
12989 gcc_assert (code == PLUS);
12990 emit_insn (op);
12991 }
12992 else
12993 {
12994 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12995 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12996 }
12997
12998 /* Fix up the destination if needed. */
12999 if (dst != operands[0])
13000 emit_move_insn (operands[0], dst);
13001 }
13002
13003 /* Return TRUE or FALSE depending on whether the binary operator meets the
13004 appropriate constraints. */
13005
13006 int
13007 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
13008 rtx operands[3])
13009 {
13010 rtx dst = operands[0];
13011 rtx src1 = operands[1];
13012 rtx src2 = operands[2];
13013
13014 /* Both source operands cannot be in memory. */
13015 if (MEM_P (src1) && MEM_P (src2))
13016 return 0;
13017
13018 /* Canonicalize operand order for commutative operators. */
13019 if (ix86_swap_binary_operands_p (code, mode, operands))
13020 {
13021 rtx temp = src1;
13022 src1 = src2;
13023 src2 = temp;
13024 }
13025
13026 /* If the destination is memory, we must have a matching source operand. */
13027 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13028 return 0;
13029
13030 /* Source 1 cannot be a constant. */
13031 if (CONSTANT_P (src1))
13032 return 0;
13033
13034 /* Source 1 cannot be a non-matching memory. */
13035 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13036 return 0;
13037
13038 return 1;
13039 }
13040
13041 /* Attempt to expand a unary operator. Make the expansion closer to the
13042 actual machine, then just general_operand, which will allow 2 separate
13043 memory references (one output, one input) in a single insn. */
13044
13045 void
13046 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13047 rtx operands[])
13048 {
13049 int matching_memory;
13050 rtx src, dst, op, clob;
13051
13052 dst = operands[0];
13053 src = operands[1];
13054
13055 /* If the destination is memory, and we do not have matching source
13056 operands, do things in registers. */
13057 matching_memory = 0;
13058 if (MEM_P (dst))
13059 {
13060 if (rtx_equal_p (dst, src))
13061 matching_memory = 1;
13062 else
13063 dst = gen_reg_rtx (mode);
13064 }
13065
13066 /* When source operand is memory, destination must match. */
13067 if (MEM_P (src) && !matching_memory)
13068 src = force_reg (mode, src);
13069
13070 /* Emit the instruction. */
13071
13072 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13073 if (reload_in_progress || code == NOT)
13074 {
13075 /* Reload doesn't know about the flags register, and doesn't know that
13076 it doesn't want to clobber it. */
13077 gcc_assert (code == NOT);
13078 emit_insn (op);
13079 }
13080 else
13081 {
13082 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13083 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13084 }
13085
13086 /* Fix up the destination if needed. */
13087 if (dst != operands[0])
13088 emit_move_insn (operands[0], dst);
13089 }
13090
13091 /* Return TRUE or FALSE depending on whether the unary operator meets the
13092 appropriate constraints. */
13093
13094 int
13095 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13096 enum machine_mode mode ATTRIBUTE_UNUSED,
13097 rtx operands[2] ATTRIBUTE_UNUSED)
13098 {
13099 /* If one of operands is memory, source and destination must match. */
13100 if ((MEM_P (operands[0])
13101 || MEM_P (operands[1]))
13102 && ! rtx_equal_p (operands[0], operands[1]))
13103 return FALSE;
13104 return TRUE;
13105 }
13106
13107 /* Post-reload splitter for converting an SF or DFmode value in an
13108 SSE register into an unsigned SImode. */
13109
13110 void
13111 ix86_split_convert_uns_si_sse (rtx operands[])
13112 {
13113 enum machine_mode vecmode;
13114 rtx value, large, zero_or_two31, input, two31, x;
13115
13116 large = operands[1];
13117 zero_or_two31 = operands[2];
13118 input = operands[3];
13119 two31 = operands[4];
13120 vecmode = GET_MODE (large);
13121 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13122
13123 /* Load up the value into the low element. We must ensure that the other
13124 elements are valid floats -- zero is the easiest such value. */
13125 if (MEM_P (input))
13126 {
13127 if (vecmode == V4SFmode)
13128 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13129 else
13130 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13131 }
13132 else
13133 {
13134 input = gen_rtx_REG (vecmode, REGNO (input));
13135 emit_move_insn (value, CONST0_RTX (vecmode));
13136 if (vecmode == V4SFmode)
13137 emit_insn (gen_sse_movss (value, value, input));
13138 else
13139 emit_insn (gen_sse2_movsd (value, value, input));
13140 }
13141
13142 emit_move_insn (large, two31);
13143 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13144
13145 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13146 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13147
13148 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13149 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13150
13151 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13152 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13153
13154 large = gen_rtx_REG (V4SImode, REGNO (large));
13155 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13156
13157 x = gen_rtx_REG (V4SImode, REGNO (value));
13158 if (vecmode == V4SFmode)
13159 emit_insn (gen_sse2_cvttps2dq (x, value));
13160 else
13161 emit_insn (gen_sse2_cvttpd2dq (x, value));
13162 value = x;
13163
13164 emit_insn (gen_xorv4si3 (value, value, large));
13165 }
13166
13167 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13168 Expects the 64-bit DImode to be supplied in a pair of integral
13169 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13170 -mfpmath=sse, !optimize_size only. */
13171
13172 void
13173 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13174 {
13175 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13176 rtx int_xmm, fp_xmm;
13177 rtx biases, exponents;
13178 rtx x;
13179
13180 int_xmm = gen_reg_rtx (V4SImode);
13181 if (TARGET_INTER_UNIT_MOVES)
13182 emit_insn (gen_movdi_to_sse (int_xmm, input));
13183 else if (TARGET_SSE_SPLIT_REGS)
13184 {
13185 emit_clobber (int_xmm);
13186 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13187 }
13188 else
13189 {
13190 x = gen_reg_rtx (V2DImode);
13191 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13192 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13193 }
13194
13195 x = gen_rtx_CONST_VECTOR (V4SImode,
13196 gen_rtvec (4, GEN_INT (0x43300000UL),
13197 GEN_INT (0x45300000UL),
13198 const0_rtx, const0_rtx));
13199 exponents = validize_mem (force_const_mem (V4SImode, x));
13200
13201 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13202 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13203
13204 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13205 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13206 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13207 (0x1.0p84 + double(fp_value_hi_xmm)).
13208 Note these exponents differ by 32. */
13209
13210 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13211
13212 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13213 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13214 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13215 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13216 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13217 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13218 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13219 biases = validize_mem (force_const_mem (V2DFmode, biases));
13220 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13221
13222 /* Add the upper and lower DFmode values together. */
13223 if (TARGET_SSE3)
13224 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13225 else
13226 {
13227 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13228 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13229 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13230 }
13231
13232 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13233 }
13234
13235 /* Not used, but eases macroization of patterns. */
13236 void
13237 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13238 rtx input ATTRIBUTE_UNUSED)
13239 {
13240 gcc_unreachable ();
13241 }
13242
13243 /* Convert an unsigned SImode value into a DFmode. Only currently used
13244 for SSE, but applicable anywhere. */
13245
13246 void
13247 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13248 {
13249 REAL_VALUE_TYPE TWO31r;
13250 rtx x, fp;
13251
13252 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13253 NULL, 1, OPTAB_DIRECT);
13254
13255 fp = gen_reg_rtx (DFmode);
13256 emit_insn (gen_floatsidf2 (fp, x));
13257
13258 real_ldexp (&TWO31r, &dconst1, 31);
13259 x = const_double_from_real_value (TWO31r, DFmode);
13260
13261 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13262 if (x != target)
13263 emit_move_insn (target, x);
13264 }
13265
13266 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13267 32-bit mode; otherwise we have a direct convert instruction. */
13268
13269 void
13270 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13271 {
13272 REAL_VALUE_TYPE TWO32r;
13273 rtx fp_lo, fp_hi, x;
13274
13275 fp_lo = gen_reg_rtx (DFmode);
13276 fp_hi = gen_reg_rtx (DFmode);
13277
13278 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13279
13280 real_ldexp (&TWO32r, &dconst1, 32);
13281 x = const_double_from_real_value (TWO32r, DFmode);
13282 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13283
13284 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13285
13286 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13287 0, OPTAB_DIRECT);
13288 if (x != target)
13289 emit_move_insn (target, x);
13290 }
13291
13292 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13293 For x86_32, -mfpmath=sse, !optimize_size only. */
13294 void
13295 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13296 {
13297 REAL_VALUE_TYPE ONE16r;
13298 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13299
13300 real_ldexp (&ONE16r, &dconst1, 16);
13301 x = const_double_from_real_value (ONE16r, SFmode);
13302 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13303 NULL, 0, OPTAB_DIRECT);
13304 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13305 NULL, 0, OPTAB_DIRECT);
13306 fp_hi = gen_reg_rtx (SFmode);
13307 fp_lo = gen_reg_rtx (SFmode);
13308 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13309 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13310 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13311 0, OPTAB_DIRECT);
13312 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13313 0, OPTAB_DIRECT);
13314 if (!rtx_equal_p (target, fp_hi))
13315 emit_move_insn (target, fp_hi);
13316 }
13317
13318 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
13319 then replicate the value for all elements of the vector
13320 register. */
13321
13322 rtx
13323 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13324 {
13325 rtvec v;
13326 switch (mode)
13327 {
13328 case SImode:
13329 gcc_assert (vect);
13330 v = gen_rtvec (4, value, value, value, value);
13331 return gen_rtx_CONST_VECTOR (V4SImode, v);
13332
13333 case DImode:
13334 gcc_assert (vect);
13335 v = gen_rtvec (2, value, value);
13336 return gen_rtx_CONST_VECTOR (V2DImode, v);
13337
13338 case SFmode:
13339 if (vect)
13340 v = gen_rtvec (4, value, value, value, value);
13341 else
13342 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13343 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13344 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13345
13346 case DFmode:
13347 if (vect)
13348 v = gen_rtvec (2, value, value);
13349 else
13350 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13351 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13352
13353 default:
13354 gcc_unreachable ();
13355 }
13356 }
13357
13358 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13359 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13360 for an SSE register. If VECT is true, then replicate the mask for
13361 all elements of the vector register. If INVERT is true, then create
13362 a mask excluding the sign bit. */
13363
13364 rtx
13365 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13366 {
13367 enum machine_mode vec_mode, imode;
13368 HOST_WIDE_INT hi, lo;
13369 int shift = 63;
13370 rtx v;
13371 rtx mask;
13372
13373 /* Find the sign bit, sign extended to 2*HWI. */
13374 switch (mode)
13375 {
13376 case SImode:
13377 case SFmode:
13378 imode = SImode;
13379 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13380 lo = 0x80000000, hi = lo < 0;
13381 break;
13382
13383 case DImode:
13384 case DFmode:
13385 imode = DImode;
13386 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13387 if (HOST_BITS_PER_WIDE_INT >= 64)
13388 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13389 else
13390 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13391 break;
13392
13393 case TImode:
13394 case TFmode:
13395 vec_mode = VOIDmode;
13396 if (HOST_BITS_PER_WIDE_INT >= 64)
13397 {
13398 imode = TImode;
13399 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13400 }
13401 else
13402 {
13403 rtvec vec;
13404
13405 imode = DImode;
13406 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13407
13408 if (invert)
13409 {
13410 lo = ~lo, hi = ~hi;
13411 v = constm1_rtx;
13412 }
13413 else
13414 v = const0_rtx;
13415
13416 mask = immed_double_const (lo, hi, imode);
13417
13418 vec = gen_rtvec (2, v, mask);
13419 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13420 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13421
13422 return v;
13423 }
13424 break;
13425
13426 default:
13427 gcc_unreachable ();
13428 }
13429
13430 if (invert)
13431 lo = ~lo, hi = ~hi;
13432
13433 /* Force this value into the low part of a fp vector constant. */
13434 mask = immed_double_const (lo, hi, imode);
13435 mask = gen_lowpart (mode, mask);
13436
13437 if (vec_mode == VOIDmode)
13438 return force_reg (mode, mask);
13439
13440 v = ix86_build_const_vector (mode, vect, mask);
13441 return force_reg (vec_mode, v);
13442 }
13443
13444 /* Generate code for floating point ABS or NEG. */
13445
13446 void
13447 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13448 rtx operands[])
13449 {
13450 rtx mask, set, use, clob, dst, src;
13451 bool use_sse = false;
13452 bool vector_mode = VECTOR_MODE_P (mode);
13453 enum machine_mode elt_mode = mode;
13454
13455 if (vector_mode)
13456 {
13457 elt_mode = GET_MODE_INNER (mode);
13458 use_sse = true;
13459 }
13460 else if (mode == TFmode)
13461 use_sse = true;
13462 else if (TARGET_SSE_MATH)
13463 use_sse = SSE_FLOAT_MODE_P (mode);
13464
13465 /* NEG and ABS performed with SSE use bitwise mask operations.
13466 Create the appropriate mask now. */
13467 if (use_sse)
13468 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13469 else
13470 mask = NULL_RTX;
13471
13472 dst = operands[0];
13473 src = operands[1];
13474
13475 if (vector_mode)
13476 {
13477 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13478 set = gen_rtx_SET (VOIDmode, dst, set);
13479 emit_insn (set);
13480 }
13481 else
13482 {
13483 set = gen_rtx_fmt_e (code, mode, src);
13484 set = gen_rtx_SET (VOIDmode, dst, set);
13485 if (mask)
13486 {
13487 use = gen_rtx_USE (VOIDmode, mask);
13488 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13489 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13490 gen_rtvec (3, set, use, clob)));
13491 }
13492 else
13493 emit_insn (set);
13494 }
13495 }
13496
13497 /* Expand a copysign operation. Special case operand 0 being a constant. */
13498
13499 void
13500 ix86_expand_copysign (rtx operands[])
13501 {
13502 enum machine_mode mode;
13503 rtx dest, op0, op1, mask, nmask;
13504
13505 dest = operands[0];
13506 op0 = operands[1];
13507 op1 = operands[2];
13508
13509 mode = GET_MODE (dest);
13510
13511 if (GET_CODE (op0) == CONST_DOUBLE)
13512 {
13513 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13514
13515 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13516 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13517
13518 if (mode == SFmode || mode == DFmode)
13519 {
13520 enum machine_mode vmode;
13521
13522 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13523
13524 if (op0 == CONST0_RTX (mode))
13525 op0 = CONST0_RTX (vmode);
13526 else
13527 {
13528 rtvec v;
13529
13530 if (mode == SFmode)
13531 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13532 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13533 else
13534 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13535
13536 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13537 }
13538 }
13539 else if (op0 != CONST0_RTX (mode))
13540 op0 = force_reg (mode, op0);
13541
13542 mask = ix86_build_signbit_mask (mode, 0, 0);
13543
13544 if (mode == SFmode)
13545 copysign_insn = gen_copysignsf3_const;
13546 else if (mode == DFmode)
13547 copysign_insn = gen_copysigndf3_const;
13548 else
13549 copysign_insn = gen_copysigntf3_const;
13550
13551 emit_insn (copysign_insn (dest, op0, op1, mask));
13552 }
13553 else
13554 {
13555 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13556
13557 nmask = ix86_build_signbit_mask (mode, 0, 1);
13558 mask = ix86_build_signbit_mask (mode, 0, 0);
13559
13560 if (mode == SFmode)
13561 copysign_insn = gen_copysignsf3_var;
13562 else if (mode == DFmode)
13563 copysign_insn = gen_copysigndf3_var;
13564 else
13565 copysign_insn = gen_copysigntf3_var;
13566
13567 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13568 }
13569 }
13570
13571 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13572 be a constant, and so has already been expanded into a vector constant. */
13573
13574 void
13575 ix86_split_copysign_const (rtx operands[])
13576 {
13577 enum machine_mode mode, vmode;
13578 rtx dest, op0, op1, mask, x;
13579
13580 dest = operands[0];
13581 op0 = operands[1];
13582 op1 = operands[2];
13583 mask = operands[3];
13584
13585 mode = GET_MODE (dest);
13586 vmode = GET_MODE (mask);
13587
13588 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13589 x = gen_rtx_AND (vmode, dest, mask);
13590 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13591
13592 if (op0 != CONST0_RTX (vmode))
13593 {
13594 x = gen_rtx_IOR (vmode, dest, op0);
13595 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13596 }
13597 }
13598
13599 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13600 so we have to do two masks. */
13601
13602 void
13603 ix86_split_copysign_var (rtx operands[])
13604 {
13605 enum machine_mode mode, vmode;
13606 rtx dest, scratch, op0, op1, mask, nmask, x;
13607
13608 dest = operands[0];
13609 scratch = operands[1];
13610 op0 = operands[2];
13611 op1 = operands[3];
13612 nmask = operands[4];
13613 mask = operands[5];
13614
13615 mode = GET_MODE (dest);
13616 vmode = GET_MODE (mask);
13617
13618 if (rtx_equal_p (op0, op1))
13619 {
13620 /* Shouldn't happen often (it's useless, obviously), but when it does
13621 we'd generate incorrect code if we continue below. */
13622 emit_move_insn (dest, op0);
13623 return;
13624 }
13625
13626 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13627 {
13628 gcc_assert (REGNO (op1) == REGNO (scratch));
13629
13630 x = gen_rtx_AND (vmode, scratch, mask);
13631 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13632
13633 dest = mask;
13634 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13635 x = gen_rtx_NOT (vmode, dest);
13636 x = gen_rtx_AND (vmode, x, op0);
13637 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13638 }
13639 else
13640 {
13641 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13642 {
13643 x = gen_rtx_AND (vmode, scratch, mask);
13644 }
13645 else /* alternative 2,4 */
13646 {
13647 gcc_assert (REGNO (mask) == REGNO (scratch));
13648 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13649 x = gen_rtx_AND (vmode, scratch, op1);
13650 }
13651 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13652
13653 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13654 {
13655 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13656 x = gen_rtx_AND (vmode, dest, nmask);
13657 }
13658 else /* alternative 3,4 */
13659 {
13660 gcc_assert (REGNO (nmask) == REGNO (dest));
13661 dest = nmask;
13662 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13663 x = gen_rtx_AND (vmode, dest, op0);
13664 }
13665 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13666 }
13667
13668 x = gen_rtx_IOR (vmode, dest, scratch);
13669 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13670 }
13671
13672 /* Return TRUE or FALSE depending on whether the first SET in INSN
13673 has source and destination with matching CC modes, and that the
13674 CC mode is at least as constrained as REQ_MODE. */
13675
13676 int
13677 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13678 {
13679 rtx set;
13680 enum machine_mode set_mode;
13681
13682 set = PATTERN (insn);
13683 if (GET_CODE (set) == PARALLEL)
13684 set = XVECEXP (set, 0, 0);
13685 gcc_assert (GET_CODE (set) == SET);
13686 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13687
13688 set_mode = GET_MODE (SET_DEST (set));
13689 switch (set_mode)
13690 {
13691 case CCNOmode:
13692 if (req_mode != CCNOmode
13693 && (req_mode != CCmode
13694 || XEXP (SET_SRC (set), 1) != const0_rtx))
13695 return 0;
13696 break;
13697 case CCmode:
13698 if (req_mode == CCGCmode)
13699 return 0;
13700 /* FALLTHRU */
13701 case CCGCmode:
13702 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13703 return 0;
13704 /* FALLTHRU */
13705 case CCGOCmode:
13706 if (req_mode == CCZmode)
13707 return 0;
13708 /* FALLTHRU */
13709 case CCAmode:
13710 case CCCmode:
13711 case CCOmode:
13712 case CCSmode:
13713 case CCZmode:
13714 break;
13715
13716 default:
13717 gcc_unreachable ();
13718 }
13719
13720 return (GET_MODE (SET_SRC (set)) == set_mode);
13721 }
13722
13723 /* Generate insn patterns to do an integer compare of OPERANDS. */
13724
13725 static rtx
13726 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
13727 {
13728 enum machine_mode cmpmode;
13729 rtx tmp, flags;
13730
13731 cmpmode = SELECT_CC_MODE (code, op0, op1);
13732 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
13733
13734 /* This is very simple, but making the interface the same as in the
13735 FP case makes the rest of the code easier. */
13736 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
13737 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
13738
13739 /* Return the test that should be put into the flags user, i.e.
13740 the bcc, scc, or cmov instruction. */
13741 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
13742 }
13743
13744 /* Figure out whether to use ordered or unordered fp comparisons.
13745 Return the appropriate mode to use. */
13746
13747 enum machine_mode
13748 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
13749 {
13750 /* ??? In order to make all comparisons reversible, we do all comparisons
13751 non-trapping when compiling for IEEE. Once gcc is able to distinguish
13752 all forms trapping and nontrapping comparisons, we can make inequality
13753 comparisons trapping again, since it results in better code when using
13754 FCOM based compares. */
13755 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
13756 }
13757
13758 enum machine_mode
13759 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
13760 {
13761 enum machine_mode mode = GET_MODE (op0);
13762
13763 if (SCALAR_FLOAT_MODE_P (mode))
13764 {
13765 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
13766 return ix86_fp_compare_mode (code);
13767 }
13768
13769 switch (code)
13770 {
13771 /* Only zero flag is needed. */
13772 case EQ: /* ZF=0 */
13773 case NE: /* ZF!=0 */
13774 return CCZmode;
13775 /* Codes needing carry flag. */
13776 case GEU: /* CF=0 */
13777 case LTU: /* CF=1 */
13778 /* Detect overflow checks. They need just the carry flag. */
13779 if (GET_CODE (op0) == PLUS
13780 && rtx_equal_p (op1, XEXP (op0, 0)))
13781 return CCCmode;
13782 else
13783 return CCmode;
13784 case GTU: /* CF=0 & ZF=0 */
13785 case LEU: /* CF=1 | ZF=1 */
13786 /* Detect overflow checks. They need just the carry flag. */
13787 if (GET_CODE (op0) == MINUS
13788 && rtx_equal_p (op1, XEXP (op0, 0)))
13789 return CCCmode;
13790 else
13791 return CCmode;
13792 /* Codes possibly doable only with sign flag when
13793 comparing against zero. */
13794 case GE: /* SF=OF or SF=0 */
13795 case LT: /* SF<>OF or SF=1 */
13796 if (op1 == const0_rtx)
13797 return CCGOCmode;
13798 else
13799 /* For other cases Carry flag is not required. */
13800 return CCGCmode;
13801 /* Codes doable only with sign flag when comparing
13802 against zero, but we miss jump instruction for it
13803 so we need to use relational tests against overflow
13804 that thus needs to be zero. */
13805 case GT: /* ZF=0 & SF=OF */
13806 case LE: /* ZF=1 | SF<>OF */
13807 if (op1 == const0_rtx)
13808 return CCNOmode;
13809 else
13810 return CCGCmode;
13811 /* strcmp pattern do (use flags) and combine may ask us for proper
13812 mode. */
13813 case USE:
13814 return CCmode;
13815 default:
13816 gcc_unreachable ();
13817 }
13818 }
13819
13820 /* Return the fixed registers used for condition codes. */
13821
13822 static bool
13823 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13824 {
13825 *p1 = FLAGS_REG;
13826 *p2 = FPSR_REG;
13827 return true;
13828 }
13829
13830 /* If two condition code modes are compatible, return a condition code
13831 mode which is compatible with both. Otherwise, return
13832 VOIDmode. */
13833
13834 static enum machine_mode
13835 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
13836 {
13837 if (m1 == m2)
13838 return m1;
13839
13840 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
13841 return VOIDmode;
13842
13843 if ((m1 == CCGCmode && m2 == CCGOCmode)
13844 || (m1 == CCGOCmode && m2 == CCGCmode))
13845 return CCGCmode;
13846
13847 switch (m1)
13848 {
13849 default:
13850 gcc_unreachable ();
13851
13852 case CCmode:
13853 case CCGCmode:
13854 case CCGOCmode:
13855 case CCNOmode:
13856 case CCAmode:
13857 case CCCmode:
13858 case CCOmode:
13859 case CCSmode:
13860 case CCZmode:
13861 switch (m2)
13862 {
13863 default:
13864 return VOIDmode;
13865
13866 case CCmode:
13867 case CCGCmode:
13868 case CCGOCmode:
13869 case CCNOmode:
13870 case CCAmode:
13871 case CCCmode:
13872 case CCOmode:
13873 case CCSmode:
13874 case CCZmode:
13875 return CCmode;
13876 }
13877
13878 case CCFPmode:
13879 case CCFPUmode:
13880 /* These are only compatible with themselves, which we already
13881 checked above. */
13882 return VOIDmode;
13883 }
13884 }
13885
13886 /* Split comparison code CODE into comparisons we can do using branch
13887 instructions. BYPASS_CODE is comparison code for branch that will
13888 branch around FIRST_CODE and SECOND_CODE. If some of branches
13889 is not required, set value to UNKNOWN.
13890 We never require more than two branches. */
13891
13892 void
13893 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
13894 enum rtx_code *first_code,
13895 enum rtx_code *second_code)
13896 {
13897 *first_code = code;
13898 *bypass_code = UNKNOWN;
13899 *second_code = UNKNOWN;
13900
13901 /* The fcomi comparison sets flags as follows:
13902
13903 cmp ZF PF CF
13904 > 0 0 0
13905 < 0 0 1
13906 = 1 0 0
13907 un 1 1 1 */
13908
13909 switch (code)
13910 {
13911 case GT: /* GTU - CF=0 & ZF=0 */
13912 case GE: /* GEU - CF=0 */
13913 case ORDERED: /* PF=0 */
13914 case UNORDERED: /* PF=1 */
13915 case UNEQ: /* EQ - ZF=1 */
13916 case UNLT: /* LTU - CF=1 */
13917 case UNLE: /* LEU - CF=1 | ZF=1 */
13918 case LTGT: /* EQ - ZF=0 */
13919 break;
13920 case LT: /* LTU - CF=1 - fails on unordered */
13921 *first_code = UNLT;
13922 *bypass_code = UNORDERED;
13923 break;
13924 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
13925 *first_code = UNLE;
13926 *bypass_code = UNORDERED;
13927 break;
13928 case EQ: /* EQ - ZF=1 - fails on unordered */
13929 *first_code = UNEQ;
13930 *bypass_code = UNORDERED;
13931 break;
13932 case NE: /* NE - ZF=0 - fails on unordered */
13933 *first_code = LTGT;
13934 *second_code = UNORDERED;
13935 break;
13936 case UNGE: /* GEU - CF=0 - fails on unordered */
13937 *first_code = GE;
13938 *second_code = UNORDERED;
13939 break;
13940 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
13941 *first_code = GT;
13942 *second_code = UNORDERED;
13943 break;
13944 default:
13945 gcc_unreachable ();
13946 }
13947 if (!TARGET_IEEE_FP)
13948 {
13949 *second_code = UNKNOWN;
13950 *bypass_code = UNKNOWN;
13951 }
13952 }
13953
13954 /* Return cost of comparison done fcom + arithmetics operations on AX.
13955 All following functions do use number of instructions as a cost metrics.
13956 In future this should be tweaked to compute bytes for optimize_size and
13957 take into account performance of various instructions on various CPUs. */
13958 static int
13959 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
13960 {
13961 if (!TARGET_IEEE_FP)
13962 return 4;
13963 /* The cost of code output by ix86_expand_fp_compare. */
13964 switch (code)
13965 {
13966 case UNLE:
13967 case UNLT:
13968 case LTGT:
13969 case GT:
13970 case GE:
13971 case UNORDERED:
13972 case ORDERED:
13973 case UNEQ:
13974 return 4;
13975 break;
13976 case LT:
13977 case NE:
13978 case EQ:
13979 case UNGE:
13980 return 5;
13981 break;
13982 case LE:
13983 case UNGT:
13984 return 6;
13985 break;
13986 default:
13987 gcc_unreachable ();
13988 }
13989 }
13990
13991 /* Return cost of comparison done using fcomi operation.
13992 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13993 static int
13994 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
13995 {
13996 enum rtx_code bypass_code, first_code, second_code;
13997 /* Return arbitrarily high cost when instruction is not supported - this
13998 prevents gcc from using it. */
13999 if (!TARGET_CMOVE)
14000 return 1024;
14001 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14002 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
14003 }
14004
14005 /* Return cost of comparison done using sahf operation.
14006 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14007 static int
14008 ix86_fp_comparison_sahf_cost (enum rtx_code code)
14009 {
14010 enum rtx_code bypass_code, first_code, second_code;
14011 /* Return arbitrarily high cost when instruction is not preferred - this
14012 avoids gcc from using it. */
14013 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
14014 return 1024;
14015 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14016 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
14017 }
14018
14019 /* Compute cost of the comparison done using any method.
14020 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14021 static int
14022 ix86_fp_comparison_cost (enum rtx_code code)
14023 {
14024 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
14025 int min;
14026
14027 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
14028 sahf_cost = ix86_fp_comparison_sahf_cost (code);
14029
14030 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
14031 if (min > sahf_cost)
14032 min = sahf_cost;
14033 if (min > fcomi_cost)
14034 min = fcomi_cost;
14035 return min;
14036 }
14037
14038 /* Return true if we should use an FCOMI instruction for this
14039 fp comparison. */
14040
14041 int
14042 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
14043 {
14044 enum rtx_code swapped_code = swap_condition (code);
14045
14046 return ((ix86_fp_comparison_cost (code)
14047 == ix86_fp_comparison_fcomi_cost (code))
14048 || (ix86_fp_comparison_cost (swapped_code)
14049 == ix86_fp_comparison_fcomi_cost (swapped_code)));
14050 }
14051
14052 /* Swap, force into registers, or otherwise massage the two operands
14053 to a fp comparison. The operands are updated in place; the new
14054 comparison code is returned. */
14055
14056 static enum rtx_code
14057 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14058 {
14059 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14060 rtx op0 = *pop0, op1 = *pop1;
14061 enum machine_mode op_mode = GET_MODE (op0);
14062 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14063
14064 /* All of the unordered compare instructions only work on registers.
14065 The same is true of the fcomi compare instructions. The XFmode
14066 compare instructions require registers except when comparing
14067 against zero or when converting operand 1 from fixed point to
14068 floating point. */
14069
14070 if (!is_sse
14071 && (fpcmp_mode == CCFPUmode
14072 || (op_mode == XFmode
14073 && ! (standard_80387_constant_p (op0) == 1
14074 || standard_80387_constant_p (op1) == 1)
14075 && GET_CODE (op1) != FLOAT)
14076 || ix86_use_fcomi_compare (code)))
14077 {
14078 op0 = force_reg (op_mode, op0);
14079 op1 = force_reg (op_mode, op1);
14080 }
14081 else
14082 {
14083 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14084 things around if they appear profitable, otherwise force op0
14085 into a register. */
14086
14087 if (standard_80387_constant_p (op0) == 0
14088 || (MEM_P (op0)
14089 && ! (standard_80387_constant_p (op1) == 0
14090 || MEM_P (op1))))
14091 {
14092 rtx tmp;
14093 tmp = op0, op0 = op1, op1 = tmp;
14094 code = swap_condition (code);
14095 }
14096
14097 if (!REG_P (op0))
14098 op0 = force_reg (op_mode, op0);
14099
14100 if (CONSTANT_P (op1))
14101 {
14102 int tmp = standard_80387_constant_p (op1);
14103 if (tmp == 0)
14104 op1 = validize_mem (force_const_mem (op_mode, op1));
14105 else if (tmp == 1)
14106 {
14107 if (TARGET_CMOVE)
14108 op1 = force_reg (op_mode, op1);
14109 }
14110 else
14111 op1 = force_reg (op_mode, op1);
14112 }
14113 }
14114
14115 /* Try to rearrange the comparison to make it cheaper. */
14116 if (ix86_fp_comparison_cost (code)
14117 > ix86_fp_comparison_cost (swap_condition (code))
14118 && (REG_P (op1) || can_create_pseudo_p ()))
14119 {
14120 rtx tmp;
14121 tmp = op0, op0 = op1, op1 = tmp;
14122 code = swap_condition (code);
14123 if (!REG_P (op0))
14124 op0 = force_reg (op_mode, op0);
14125 }
14126
14127 *pop0 = op0;
14128 *pop1 = op1;
14129 return code;
14130 }
14131
14132 /* Convert comparison codes we use to represent FP comparison to integer
14133 code that will result in proper branch. Return UNKNOWN if no such code
14134 is available. */
14135
14136 enum rtx_code
14137 ix86_fp_compare_code_to_integer (enum rtx_code code)
14138 {
14139 switch (code)
14140 {
14141 case GT:
14142 return GTU;
14143 case GE:
14144 return GEU;
14145 case ORDERED:
14146 case UNORDERED:
14147 return code;
14148 break;
14149 case UNEQ:
14150 return EQ;
14151 break;
14152 case UNLT:
14153 return LTU;
14154 break;
14155 case UNLE:
14156 return LEU;
14157 break;
14158 case LTGT:
14159 return NE;
14160 break;
14161 default:
14162 return UNKNOWN;
14163 }
14164 }
14165
14166 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14167
14168 static rtx
14169 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
14170 rtx *second_test, rtx *bypass_test)
14171 {
14172 enum machine_mode fpcmp_mode, intcmp_mode;
14173 rtx tmp, tmp2;
14174 int cost = ix86_fp_comparison_cost (code);
14175 enum rtx_code bypass_code, first_code, second_code;
14176
14177 fpcmp_mode = ix86_fp_compare_mode (code);
14178 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14179
14180 if (second_test)
14181 *second_test = NULL_RTX;
14182 if (bypass_test)
14183 *bypass_test = NULL_RTX;
14184
14185 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14186
14187 /* Do fcomi/sahf based test when profitable. */
14188 if (ix86_fp_comparison_arithmetics_cost (code) > cost
14189 && (bypass_code == UNKNOWN || bypass_test)
14190 && (second_code == UNKNOWN || second_test))
14191 {
14192 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14193 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14194 tmp);
14195 if (TARGET_CMOVE)
14196 emit_insn (tmp);
14197 else
14198 {
14199 gcc_assert (TARGET_SAHF);
14200
14201 if (!scratch)
14202 scratch = gen_reg_rtx (HImode);
14203 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14204
14205 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14206 }
14207
14208 /* The FP codes work out to act like unsigned. */
14209 intcmp_mode = fpcmp_mode;
14210 code = first_code;
14211 if (bypass_code != UNKNOWN)
14212 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
14213 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14214 const0_rtx);
14215 if (second_code != UNKNOWN)
14216 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
14217 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14218 const0_rtx);
14219 }
14220 else
14221 {
14222 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14223 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14224 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14225 if (!scratch)
14226 scratch = gen_reg_rtx (HImode);
14227 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14228
14229 /* In the unordered case, we have to check C2 for NaN's, which
14230 doesn't happen to work out to anything nice combination-wise.
14231 So do some bit twiddling on the value we've got in AH to come
14232 up with an appropriate set of condition codes. */
14233
14234 intcmp_mode = CCNOmode;
14235 switch (code)
14236 {
14237 case GT:
14238 case UNGT:
14239 if (code == GT || !TARGET_IEEE_FP)
14240 {
14241 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14242 code = EQ;
14243 }
14244 else
14245 {
14246 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14247 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14248 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14249 intcmp_mode = CCmode;
14250 code = GEU;
14251 }
14252 break;
14253 case LT:
14254 case UNLT:
14255 if (code == LT && TARGET_IEEE_FP)
14256 {
14257 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14258 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14259 intcmp_mode = CCmode;
14260 code = EQ;
14261 }
14262 else
14263 {
14264 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14265 code = NE;
14266 }
14267 break;
14268 case GE:
14269 case UNGE:
14270 if (code == GE || !TARGET_IEEE_FP)
14271 {
14272 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14273 code = EQ;
14274 }
14275 else
14276 {
14277 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14278 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14279 GEN_INT (0x01)));
14280 code = NE;
14281 }
14282 break;
14283 case LE:
14284 case UNLE:
14285 if (code == LE && TARGET_IEEE_FP)
14286 {
14287 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14288 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14289 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14290 intcmp_mode = CCmode;
14291 code = LTU;
14292 }
14293 else
14294 {
14295 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14296 code = NE;
14297 }
14298 break;
14299 case EQ:
14300 case UNEQ:
14301 if (code == EQ && TARGET_IEEE_FP)
14302 {
14303 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14304 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14305 intcmp_mode = CCmode;
14306 code = EQ;
14307 }
14308 else
14309 {
14310 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14311 code = NE;
14312 break;
14313 }
14314 break;
14315 case NE:
14316 case LTGT:
14317 if (code == NE && TARGET_IEEE_FP)
14318 {
14319 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14320 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14321 GEN_INT (0x40)));
14322 code = NE;
14323 }
14324 else
14325 {
14326 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14327 code = EQ;
14328 }
14329 break;
14330
14331 case UNORDERED:
14332 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14333 code = NE;
14334 break;
14335 case ORDERED:
14336 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14337 code = EQ;
14338 break;
14339
14340 default:
14341 gcc_unreachable ();
14342 }
14343 }
14344
14345 /* Return the test that should be put into the flags user, i.e.
14346 the bcc, scc, or cmov instruction. */
14347 return gen_rtx_fmt_ee (code, VOIDmode,
14348 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14349 const0_rtx);
14350 }
14351
14352 rtx
14353 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14354 {
14355 rtx op0, op1, ret;
14356 op0 = ix86_compare_op0;
14357 op1 = ix86_compare_op1;
14358
14359 if (second_test)
14360 *second_test = NULL_RTX;
14361 if (bypass_test)
14362 *bypass_test = NULL_RTX;
14363
14364 if (ix86_compare_emitted)
14365 {
14366 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
14367 ix86_compare_emitted = NULL_RTX;
14368 }
14369 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14370 {
14371 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14372 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14373 second_test, bypass_test);
14374 }
14375 else
14376 ret = ix86_expand_int_compare (code, op0, op1);
14377
14378 return ret;
14379 }
14380
14381 /* Return true if the CODE will result in nontrivial jump sequence. */
14382 bool
14383 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14384 {
14385 enum rtx_code bypass_code, first_code, second_code;
14386 if (!TARGET_CMOVE)
14387 return true;
14388 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14389 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14390 }
14391
14392 void
14393 ix86_expand_branch (enum rtx_code code, rtx label)
14394 {
14395 rtx tmp;
14396
14397 /* If we have emitted a compare insn, go straight to simple.
14398 ix86_expand_compare won't emit anything if ix86_compare_emitted
14399 is non NULL. */
14400 if (ix86_compare_emitted)
14401 goto simple;
14402
14403 switch (GET_MODE (ix86_compare_op0))
14404 {
14405 case QImode:
14406 case HImode:
14407 case SImode:
14408 simple:
14409 tmp = ix86_expand_compare (code, NULL, NULL);
14410 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14411 gen_rtx_LABEL_REF (VOIDmode, label),
14412 pc_rtx);
14413 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14414 return;
14415
14416 case SFmode:
14417 case DFmode:
14418 case XFmode:
14419 {
14420 rtvec vec;
14421 int use_fcomi;
14422 enum rtx_code bypass_code, first_code, second_code;
14423
14424 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14425 &ix86_compare_op1);
14426
14427 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14428
14429 /* Check whether we will use the natural sequence with one jump. If
14430 so, we can expand jump early. Otherwise delay expansion by
14431 creating compound insn to not confuse optimizers. */
14432 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14433 {
14434 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14435 gen_rtx_LABEL_REF (VOIDmode, label),
14436 pc_rtx, NULL_RTX, NULL_RTX);
14437 }
14438 else
14439 {
14440 tmp = gen_rtx_fmt_ee (code, VOIDmode,
14441 ix86_compare_op0, ix86_compare_op1);
14442 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14443 gen_rtx_LABEL_REF (VOIDmode, label),
14444 pc_rtx);
14445 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14446
14447 use_fcomi = ix86_use_fcomi_compare (code);
14448 vec = rtvec_alloc (3 + !use_fcomi);
14449 RTVEC_ELT (vec, 0) = tmp;
14450 RTVEC_ELT (vec, 1)
14451 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14452 RTVEC_ELT (vec, 2)
14453 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14454 if (! use_fcomi)
14455 RTVEC_ELT (vec, 3)
14456 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14457
14458 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14459 }
14460 return;
14461 }
14462
14463 case DImode:
14464 if (TARGET_64BIT)
14465 goto simple;
14466 case TImode:
14467 /* Expand DImode branch into multiple compare+branch. */
14468 {
14469 rtx lo[2], hi[2], label2;
14470 enum rtx_code code1, code2, code3;
14471 enum machine_mode submode;
14472
14473 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14474 {
14475 tmp = ix86_compare_op0;
14476 ix86_compare_op0 = ix86_compare_op1;
14477 ix86_compare_op1 = tmp;
14478 code = swap_condition (code);
14479 }
14480 if (GET_MODE (ix86_compare_op0) == DImode)
14481 {
14482 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14483 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14484 submode = SImode;
14485 }
14486 else
14487 {
14488 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14489 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14490 submode = DImode;
14491 }
14492
14493 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14494 avoid two branches. This costs one extra insn, so disable when
14495 optimizing for size. */
14496
14497 if ((code == EQ || code == NE)
14498 && (!optimize_insn_for_size_p ()
14499 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14500 {
14501 rtx xor0, xor1;
14502
14503 xor1 = hi[0];
14504 if (hi[1] != const0_rtx)
14505 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14506 NULL_RTX, 0, OPTAB_WIDEN);
14507
14508 xor0 = lo[0];
14509 if (lo[1] != const0_rtx)
14510 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14511 NULL_RTX, 0, OPTAB_WIDEN);
14512
14513 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14514 NULL_RTX, 0, OPTAB_WIDEN);
14515
14516 ix86_compare_op0 = tmp;
14517 ix86_compare_op1 = const0_rtx;
14518 ix86_expand_branch (code, label);
14519 return;
14520 }
14521
14522 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14523 op1 is a constant and the low word is zero, then we can just
14524 examine the high word. Similarly for low word -1 and
14525 less-or-equal-than or greater-than. */
14526
14527 if (CONST_INT_P (hi[1]))
14528 switch (code)
14529 {
14530 case LT: case LTU: case GE: case GEU:
14531 if (lo[1] == const0_rtx)
14532 {
14533 ix86_compare_op0 = hi[0];
14534 ix86_compare_op1 = hi[1];
14535 ix86_expand_branch (code, label);
14536 return;
14537 }
14538 break;
14539 case LE: case LEU: case GT: case GTU:
14540 if (lo[1] == constm1_rtx)
14541 {
14542 ix86_compare_op0 = hi[0];
14543 ix86_compare_op1 = hi[1];
14544 ix86_expand_branch (code, label);
14545 return;
14546 }
14547 break;
14548 default:
14549 break;
14550 }
14551
14552 /* Otherwise, we need two or three jumps. */
14553
14554 label2 = gen_label_rtx ();
14555
14556 code1 = code;
14557 code2 = swap_condition (code);
14558 code3 = unsigned_condition (code);
14559
14560 switch (code)
14561 {
14562 case LT: case GT: case LTU: case GTU:
14563 break;
14564
14565 case LE: code1 = LT; code2 = GT; break;
14566 case GE: code1 = GT; code2 = LT; break;
14567 case LEU: code1 = LTU; code2 = GTU; break;
14568 case GEU: code1 = GTU; code2 = LTU; break;
14569
14570 case EQ: code1 = UNKNOWN; code2 = NE; break;
14571 case NE: code2 = UNKNOWN; break;
14572
14573 default:
14574 gcc_unreachable ();
14575 }
14576
14577 /*
14578 * a < b =>
14579 * if (hi(a) < hi(b)) goto true;
14580 * if (hi(a) > hi(b)) goto false;
14581 * if (lo(a) < lo(b)) goto true;
14582 * false:
14583 */
14584
14585 ix86_compare_op0 = hi[0];
14586 ix86_compare_op1 = hi[1];
14587
14588 if (code1 != UNKNOWN)
14589 ix86_expand_branch (code1, label);
14590 if (code2 != UNKNOWN)
14591 ix86_expand_branch (code2, label2);
14592
14593 ix86_compare_op0 = lo[0];
14594 ix86_compare_op1 = lo[1];
14595 ix86_expand_branch (code3, label);
14596
14597 if (code2 != UNKNOWN)
14598 emit_label (label2);
14599 return;
14600 }
14601
14602 default:
14603 gcc_unreachable ();
14604 }
14605 }
14606
14607 /* Split branch based on floating point condition. */
14608 void
14609 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14610 rtx target1, rtx target2, rtx tmp, rtx pushed)
14611 {
14612 rtx second, bypass;
14613 rtx label = NULL_RTX;
14614 rtx condition;
14615 int bypass_probability = -1, second_probability = -1, probability = -1;
14616 rtx i;
14617
14618 if (target2 != pc_rtx)
14619 {
14620 rtx tmp = target2;
14621 code = reverse_condition_maybe_unordered (code);
14622 target2 = target1;
14623 target1 = tmp;
14624 }
14625
14626 condition = ix86_expand_fp_compare (code, op1, op2,
14627 tmp, &second, &bypass);
14628
14629 /* Remove pushed operand from stack. */
14630 if (pushed)
14631 ix86_free_from_memory (GET_MODE (pushed));
14632
14633 if (split_branch_probability >= 0)
14634 {
14635 /* Distribute the probabilities across the jumps.
14636 Assume the BYPASS and SECOND to be always test
14637 for UNORDERED. */
14638 probability = split_branch_probability;
14639
14640 /* Value of 1 is low enough to make no need for probability
14641 to be updated. Later we may run some experiments and see
14642 if unordered values are more frequent in practice. */
14643 if (bypass)
14644 bypass_probability = 1;
14645 if (second)
14646 second_probability = 1;
14647 }
14648 if (bypass != NULL_RTX)
14649 {
14650 label = gen_label_rtx ();
14651 i = emit_jump_insn (gen_rtx_SET
14652 (VOIDmode, pc_rtx,
14653 gen_rtx_IF_THEN_ELSE (VOIDmode,
14654 bypass,
14655 gen_rtx_LABEL_REF (VOIDmode,
14656 label),
14657 pc_rtx)));
14658 if (bypass_probability >= 0)
14659 REG_NOTES (i)
14660 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14661 GEN_INT (bypass_probability),
14662 REG_NOTES (i));
14663 }
14664 i = emit_jump_insn (gen_rtx_SET
14665 (VOIDmode, pc_rtx,
14666 gen_rtx_IF_THEN_ELSE (VOIDmode,
14667 condition, target1, target2)));
14668 if (probability >= 0)
14669 REG_NOTES (i)
14670 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14671 GEN_INT (probability),
14672 REG_NOTES (i));
14673 if (second != NULL_RTX)
14674 {
14675 i = emit_jump_insn (gen_rtx_SET
14676 (VOIDmode, pc_rtx,
14677 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14678 target2)));
14679 if (second_probability >= 0)
14680 REG_NOTES (i)
14681 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14682 GEN_INT (second_probability),
14683 REG_NOTES (i));
14684 }
14685 if (label != NULL_RTX)
14686 emit_label (label);
14687 }
14688
14689 int
14690 ix86_expand_setcc (enum rtx_code code, rtx dest)
14691 {
14692 rtx ret, tmp, tmpreg, equiv;
14693 rtx second_test, bypass_test;
14694
14695 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14696 return 0; /* FAIL */
14697
14698 gcc_assert (GET_MODE (dest) == QImode);
14699
14700 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14701 PUT_MODE (ret, QImode);
14702
14703 tmp = dest;
14704 tmpreg = dest;
14705
14706 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14707 if (bypass_test || second_test)
14708 {
14709 rtx test = second_test;
14710 int bypass = 0;
14711 rtx tmp2 = gen_reg_rtx (QImode);
14712 if (bypass_test)
14713 {
14714 gcc_assert (!second_test);
14715 test = bypass_test;
14716 bypass = 1;
14717 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14718 }
14719 PUT_MODE (test, QImode);
14720 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14721
14722 if (bypass)
14723 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14724 else
14725 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14726 }
14727
14728 /* Attach a REG_EQUAL note describing the comparison result. */
14729 if (ix86_compare_op0 && ix86_compare_op1)
14730 {
14731 equiv = simplify_gen_relational (code, QImode,
14732 GET_MODE (ix86_compare_op0),
14733 ix86_compare_op0, ix86_compare_op1);
14734 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14735 }
14736
14737 return 1; /* DONE */
14738 }
14739
14740 /* Expand comparison setting or clearing carry flag. Return true when
14741 successful and set pop for the operation. */
14742 static bool
14743 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
14744 {
14745 enum machine_mode mode =
14746 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
14747
14748 /* Do not handle DImode compares that go through special path. */
14749 if (mode == (TARGET_64BIT ? TImode : DImode))
14750 return false;
14751
14752 if (SCALAR_FLOAT_MODE_P (mode))
14753 {
14754 rtx second_test = NULL, bypass_test = NULL;
14755 rtx compare_op, compare_seq;
14756
14757 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14758
14759 /* Shortcut: following common codes never translate
14760 into carry flag compares. */
14761 if (code == EQ || code == NE || code == UNEQ || code == LTGT
14762 || code == ORDERED || code == UNORDERED)
14763 return false;
14764
14765 /* These comparisons require zero flag; swap operands so they won't. */
14766 if ((code == GT || code == UNLE || code == LE || code == UNGT)
14767 && !TARGET_IEEE_FP)
14768 {
14769 rtx tmp = op0;
14770 op0 = op1;
14771 op1 = tmp;
14772 code = swap_condition (code);
14773 }
14774
14775 /* Try to expand the comparison and verify that we end up with
14776 carry flag based comparison. This fails to be true only when
14777 we decide to expand comparison using arithmetic that is not
14778 too common scenario. */
14779 start_sequence ();
14780 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14781 &second_test, &bypass_test);
14782 compare_seq = get_insns ();
14783 end_sequence ();
14784
14785 if (second_test || bypass_test)
14786 return false;
14787
14788 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14789 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14790 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
14791 else
14792 code = GET_CODE (compare_op);
14793
14794 if (code != LTU && code != GEU)
14795 return false;
14796
14797 emit_insn (compare_seq);
14798 *pop = compare_op;
14799 return true;
14800 }
14801
14802 if (!INTEGRAL_MODE_P (mode))
14803 return false;
14804
14805 switch (code)
14806 {
14807 case LTU:
14808 case GEU:
14809 break;
14810
14811 /* Convert a==0 into (unsigned)a<1. */
14812 case EQ:
14813 case NE:
14814 if (op1 != const0_rtx)
14815 return false;
14816 op1 = const1_rtx;
14817 code = (code == EQ ? LTU : GEU);
14818 break;
14819
14820 /* Convert a>b into b<a or a>=b-1. */
14821 case GTU:
14822 case LEU:
14823 if (CONST_INT_P (op1))
14824 {
14825 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
14826 /* Bail out on overflow. We still can swap operands but that
14827 would force loading of the constant into register. */
14828 if (op1 == const0_rtx
14829 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
14830 return false;
14831 code = (code == GTU ? GEU : LTU);
14832 }
14833 else
14834 {
14835 rtx tmp = op1;
14836 op1 = op0;
14837 op0 = tmp;
14838 code = (code == GTU ? LTU : GEU);
14839 }
14840 break;
14841
14842 /* Convert a>=0 into (unsigned)a<0x80000000. */
14843 case LT:
14844 case GE:
14845 if (mode == DImode || op1 != const0_rtx)
14846 return false;
14847 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14848 code = (code == LT ? GEU : LTU);
14849 break;
14850 case LE:
14851 case GT:
14852 if (mode == DImode || op1 != constm1_rtx)
14853 return false;
14854 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14855 code = (code == LE ? GEU : LTU);
14856 break;
14857
14858 default:
14859 return false;
14860 }
14861 /* Swapping operands may cause constant to appear as first operand. */
14862 if (!nonimmediate_operand (op0, VOIDmode))
14863 {
14864 if (!can_create_pseudo_p ())
14865 return false;
14866 op0 = force_reg (mode, op0);
14867 }
14868 ix86_compare_op0 = op0;
14869 ix86_compare_op1 = op1;
14870 *pop = ix86_expand_compare (code, NULL, NULL);
14871 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
14872 return true;
14873 }
14874
14875 int
14876 ix86_expand_int_movcc (rtx operands[])
14877 {
14878 enum rtx_code code = GET_CODE (operands[1]), compare_code;
14879 rtx compare_seq, compare_op;
14880 rtx second_test, bypass_test;
14881 enum machine_mode mode = GET_MODE (operands[0]);
14882 bool sign_bit_compare_p = false;;
14883
14884 start_sequence ();
14885 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14886 compare_seq = get_insns ();
14887 end_sequence ();
14888
14889 compare_code = GET_CODE (compare_op);
14890
14891 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
14892 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
14893 sign_bit_compare_p = true;
14894
14895 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
14896 HImode insns, we'd be swallowed in word prefix ops. */
14897
14898 if ((mode != HImode || TARGET_FAST_PREFIX)
14899 && (mode != (TARGET_64BIT ? TImode : DImode))
14900 && CONST_INT_P (operands[2])
14901 && CONST_INT_P (operands[3]))
14902 {
14903 rtx out = operands[0];
14904 HOST_WIDE_INT ct = INTVAL (operands[2]);
14905 HOST_WIDE_INT cf = INTVAL (operands[3]);
14906 HOST_WIDE_INT diff;
14907
14908 diff = ct - cf;
14909 /* Sign bit compares are better done using shifts than we do by using
14910 sbb. */
14911 if (sign_bit_compare_p
14912 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14913 ix86_compare_op1, &compare_op))
14914 {
14915 /* Detect overlap between destination and compare sources. */
14916 rtx tmp = out;
14917
14918 if (!sign_bit_compare_p)
14919 {
14920 bool fpcmp = false;
14921
14922 compare_code = GET_CODE (compare_op);
14923
14924 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14925 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14926 {
14927 fpcmp = true;
14928 compare_code = ix86_fp_compare_code_to_integer (compare_code);
14929 }
14930
14931 /* To simplify rest of code, restrict to the GEU case. */
14932 if (compare_code == LTU)
14933 {
14934 HOST_WIDE_INT tmp = ct;
14935 ct = cf;
14936 cf = tmp;
14937 compare_code = reverse_condition (compare_code);
14938 code = reverse_condition (code);
14939 }
14940 else
14941 {
14942 if (fpcmp)
14943 PUT_CODE (compare_op,
14944 reverse_condition_maybe_unordered
14945 (GET_CODE (compare_op)));
14946 else
14947 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14948 }
14949 diff = ct - cf;
14950
14951 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
14952 || reg_overlap_mentioned_p (out, ix86_compare_op1))
14953 tmp = gen_reg_rtx (mode);
14954
14955 if (mode == DImode)
14956 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
14957 else
14958 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
14959 }
14960 else
14961 {
14962 if (code == GT || code == GE)
14963 code = reverse_condition (code);
14964 else
14965 {
14966 HOST_WIDE_INT tmp = ct;
14967 ct = cf;
14968 cf = tmp;
14969 diff = ct - cf;
14970 }
14971 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
14972 ix86_compare_op1, VOIDmode, 0, -1);
14973 }
14974
14975 if (diff == 1)
14976 {
14977 /*
14978 * cmpl op0,op1
14979 * sbbl dest,dest
14980 * [addl dest, ct]
14981 *
14982 * Size 5 - 8.
14983 */
14984 if (ct)
14985 tmp = expand_simple_binop (mode, PLUS,
14986 tmp, GEN_INT (ct),
14987 copy_rtx (tmp), 1, OPTAB_DIRECT);
14988 }
14989 else if (cf == -1)
14990 {
14991 /*
14992 * cmpl op0,op1
14993 * sbbl dest,dest
14994 * orl $ct, dest
14995 *
14996 * Size 8.
14997 */
14998 tmp = expand_simple_binop (mode, IOR,
14999 tmp, GEN_INT (ct),
15000 copy_rtx (tmp), 1, OPTAB_DIRECT);
15001 }
15002 else if (diff == -1 && ct)
15003 {
15004 /*
15005 * cmpl op0,op1
15006 * sbbl dest,dest
15007 * notl dest
15008 * [addl dest, cf]
15009 *
15010 * Size 8 - 11.
15011 */
15012 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15013 if (cf)
15014 tmp = expand_simple_binop (mode, PLUS,
15015 copy_rtx (tmp), GEN_INT (cf),
15016 copy_rtx (tmp), 1, OPTAB_DIRECT);
15017 }
15018 else
15019 {
15020 /*
15021 * cmpl op0,op1
15022 * sbbl dest,dest
15023 * [notl dest]
15024 * andl cf - ct, dest
15025 * [addl dest, ct]
15026 *
15027 * Size 8 - 11.
15028 */
15029
15030 if (cf == 0)
15031 {
15032 cf = ct;
15033 ct = 0;
15034 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15035 }
15036
15037 tmp = expand_simple_binop (mode, AND,
15038 copy_rtx (tmp),
15039 gen_int_mode (cf - ct, mode),
15040 copy_rtx (tmp), 1, OPTAB_DIRECT);
15041 if (ct)
15042 tmp = expand_simple_binop (mode, PLUS,
15043 copy_rtx (tmp), GEN_INT (ct),
15044 copy_rtx (tmp), 1, OPTAB_DIRECT);
15045 }
15046
15047 if (!rtx_equal_p (tmp, out))
15048 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15049
15050 return 1; /* DONE */
15051 }
15052
15053 if (diff < 0)
15054 {
15055 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15056
15057 HOST_WIDE_INT tmp;
15058 tmp = ct, ct = cf, cf = tmp;
15059 diff = -diff;
15060
15061 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15062 {
15063 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15064
15065 /* We may be reversing unordered compare to normal compare, that
15066 is not valid in general (we may convert non-trapping condition
15067 to trapping one), however on i386 we currently emit all
15068 comparisons unordered. */
15069 compare_code = reverse_condition_maybe_unordered (compare_code);
15070 code = reverse_condition_maybe_unordered (code);
15071 }
15072 else
15073 {
15074 compare_code = reverse_condition (compare_code);
15075 code = reverse_condition (code);
15076 }
15077 }
15078
15079 compare_code = UNKNOWN;
15080 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15081 && CONST_INT_P (ix86_compare_op1))
15082 {
15083 if (ix86_compare_op1 == const0_rtx
15084 && (code == LT || code == GE))
15085 compare_code = code;
15086 else if (ix86_compare_op1 == constm1_rtx)
15087 {
15088 if (code == LE)
15089 compare_code = LT;
15090 else if (code == GT)
15091 compare_code = GE;
15092 }
15093 }
15094
15095 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15096 if (compare_code != UNKNOWN
15097 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15098 && (cf == -1 || ct == -1))
15099 {
15100 /* If lea code below could be used, only optimize
15101 if it results in a 2 insn sequence. */
15102
15103 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15104 || diff == 3 || diff == 5 || diff == 9)
15105 || (compare_code == LT && ct == -1)
15106 || (compare_code == GE && cf == -1))
15107 {
15108 /*
15109 * notl op1 (if necessary)
15110 * sarl $31, op1
15111 * orl cf, op1
15112 */
15113 if (ct != -1)
15114 {
15115 cf = ct;
15116 ct = -1;
15117 code = reverse_condition (code);
15118 }
15119
15120 out = emit_store_flag (out, code, ix86_compare_op0,
15121 ix86_compare_op1, VOIDmode, 0, -1);
15122
15123 out = expand_simple_binop (mode, IOR,
15124 out, GEN_INT (cf),
15125 out, 1, OPTAB_DIRECT);
15126 if (out != operands[0])
15127 emit_move_insn (operands[0], out);
15128
15129 return 1; /* DONE */
15130 }
15131 }
15132
15133
15134 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15135 || diff == 3 || diff == 5 || diff == 9)
15136 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15137 && (mode != DImode
15138 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15139 {
15140 /*
15141 * xorl dest,dest
15142 * cmpl op1,op2
15143 * setcc dest
15144 * lea cf(dest*(ct-cf)),dest
15145 *
15146 * Size 14.
15147 *
15148 * This also catches the degenerate setcc-only case.
15149 */
15150
15151 rtx tmp;
15152 int nops;
15153
15154 out = emit_store_flag (out, code, ix86_compare_op0,
15155 ix86_compare_op1, VOIDmode, 0, 1);
15156
15157 nops = 0;
15158 /* On x86_64 the lea instruction operates on Pmode, so we need
15159 to get arithmetics done in proper mode to match. */
15160 if (diff == 1)
15161 tmp = copy_rtx (out);
15162 else
15163 {
15164 rtx out1;
15165 out1 = copy_rtx (out);
15166 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15167 nops++;
15168 if (diff & 1)
15169 {
15170 tmp = gen_rtx_PLUS (mode, tmp, out1);
15171 nops++;
15172 }
15173 }
15174 if (cf != 0)
15175 {
15176 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15177 nops++;
15178 }
15179 if (!rtx_equal_p (tmp, out))
15180 {
15181 if (nops == 1)
15182 out = force_operand (tmp, copy_rtx (out));
15183 else
15184 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15185 }
15186 if (!rtx_equal_p (out, operands[0]))
15187 emit_move_insn (operands[0], copy_rtx (out));
15188
15189 return 1; /* DONE */
15190 }
15191
15192 /*
15193 * General case: Jumpful:
15194 * xorl dest,dest cmpl op1, op2
15195 * cmpl op1, op2 movl ct, dest
15196 * setcc dest jcc 1f
15197 * decl dest movl cf, dest
15198 * andl (cf-ct),dest 1:
15199 * addl ct,dest
15200 *
15201 * Size 20. Size 14.
15202 *
15203 * This is reasonably steep, but branch mispredict costs are
15204 * high on modern cpus, so consider failing only if optimizing
15205 * for space.
15206 */
15207
15208 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15209 && BRANCH_COST (optimize_insn_for_speed_p (),
15210 false) >= 2)
15211 {
15212 if (cf == 0)
15213 {
15214 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15215
15216 cf = ct;
15217 ct = 0;
15218
15219 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15220 {
15221 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15222
15223 /* We may be reversing unordered compare to normal compare,
15224 that is not valid in general (we may convert non-trapping
15225 condition to trapping one), however on i386 we currently
15226 emit all comparisons unordered. */
15227 code = reverse_condition_maybe_unordered (code);
15228 }
15229 else
15230 {
15231 code = reverse_condition (code);
15232 if (compare_code != UNKNOWN)
15233 compare_code = reverse_condition (compare_code);
15234 }
15235 }
15236
15237 if (compare_code != UNKNOWN)
15238 {
15239 /* notl op1 (if needed)
15240 sarl $31, op1
15241 andl (cf-ct), op1
15242 addl ct, op1
15243
15244 For x < 0 (resp. x <= -1) there will be no notl,
15245 so if possible swap the constants to get rid of the
15246 complement.
15247 True/false will be -1/0 while code below (store flag
15248 followed by decrement) is 0/-1, so the constants need
15249 to be exchanged once more. */
15250
15251 if (compare_code == GE || !cf)
15252 {
15253 code = reverse_condition (code);
15254 compare_code = LT;
15255 }
15256 else
15257 {
15258 HOST_WIDE_INT tmp = cf;
15259 cf = ct;
15260 ct = tmp;
15261 }
15262
15263 out = emit_store_flag (out, code, ix86_compare_op0,
15264 ix86_compare_op1, VOIDmode, 0, -1);
15265 }
15266 else
15267 {
15268 out = emit_store_flag (out, code, ix86_compare_op0,
15269 ix86_compare_op1, VOIDmode, 0, 1);
15270
15271 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15272 copy_rtx (out), 1, OPTAB_DIRECT);
15273 }
15274
15275 out = expand_simple_binop (mode, AND, copy_rtx (out),
15276 gen_int_mode (cf - ct, mode),
15277 copy_rtx (out), 1, OPTAB_DIRECT);
15278 if (ct)
15279 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15280 copy_rtx (out), 1, OPTAB_DIRECT);
15281 if (!rtx_equal_p (out, operands[0]))
15282 emit_move_insn (operands[0], copy_rtx (out));
15283
15284 return 1; /* DONE */
15285 }
15286 }
15287
15288 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15289 {
15290 /* Try a few things more with specific constants and a variable. */
15291
15292 optab op;
15293 rtx var, orig_out, out, tmp;
15294
15295 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15296 return 0; /* FAIL */
15297
15298 /* If one of the two operands is an interesting constant, load a
15299 constant with the above and mask it in with a logical operation. */
15300
15301 if (CONST_INT_P (operands[2]))
15302 {
15303 var = operands[3];
15304 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15305 operands[3] = constm1_rtx, op = and_optab;
15306 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15307 operands[3] = const0_rtx, op = ior_optab;
15308 else
15309 return 0; /* FAIL */
15310 }
15311 else if (CONST_INT_P (operands[3]))
15312 {
15313 var = operands[2];
15314 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15315 operands[2] = constm1_rtx, op = and_optab;
15316 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15317 operands[2] = const0_rtx, op = ior_optab;
15318 else
15319 return 0; /* FAIL */
15320 }
15321 else
15322 return 0; /* FAIL */
15323
15324 orig_out = operands[0];
15325 tmp = gen_reg_rtx (mode);
15326 operands[0] = tmp;
15327
15328 /* Recurse to get the constant loaded. */
15329 if (ix86_expand_int_movcc (operands) == 0)
15330 return 0; /* FAIL */
15331
15332 /* Mask in the interesting variable. */
15333 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15334 OPTAB_WIDEN);
15335 if (!rtx_equal_p (out, orig_out))
15336 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15337
15338 return 1; /* DONE */
15339 }
15340
15341 /*
15342 * For comparison with above,
15343 *
15344 * movl cf,dest
15345 * movl ct,tmp
15346 * cmpl op1,op2
15347 * cmovcc tmp,dest
15348 *
15349 * Size 15.
15350 */
15351
15352 if (! nonimmediate_operand (operands[2], mode))
15353 operands[2] = force_reg (mode, operands[2]);
15354 if (! nonimmediate_operand (operands[3], mode))
15355 operands[3] = force_reg (mode, operands[3]);
15356
15357 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15358 {
15359 rtx tmp = gen_reg_rtx (mode);
15360 emit_move_insn (tmp, operands[3]);
15361 operands[3] = tmp;
15362 }
15363 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15364 {
15365 rtx tmp = gen_reg_rtx (mode);
15366 emit_move_insn (tmp, operands[2]);
15367 operands[2] = tmp;
15368 }
15369
15370 if (! register_operand (operands[2], VOIDmode)
15371 && (mode == QImode
15372 || ! register_operand (operands[3], VOIDmode)))
15373 operands[2] = force_reg (mode, operands[2]);
15374
15375 if (mode == QImode
15376 && ! register_operand (operands[3], VOIDmode))
15377 operands[3] = force_reg (mode, operands[3]);
15378
15379 emit_insn (compare_seq);
15380 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15381 gen_rtx_IF_THEN_ELSE (mode,
15382 compare_op, operands[2],
15383 operands[3])));
15384 if (bypass_test)
15385 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15386 gen_rtx_IF_THEN_ELSE (mode,
15387 bypass_test,
15388 copy_rtx (operands[3]),
15389 copy_rtx (operands[0]))));
15390 if (second_test)
15391 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15392 gen_rtx_IF_THEN_ELSE (mode,
15393 second_test,
15394 copy_rtx (operands[2]),
15395 copy_rtx (operands[0]))));
15396
15397 return 1; /* DONE */
15398 }
15399
15400 /* Swap, force into registers, or otherwise massage the two operands
15401 to an sse comparison with a mask result. Thus we differ a bit from
15402 ix86_prepare_fp_compare_args which expects to produce a flags result.
15403
15404 The DEST operand exists to help determine whether to commute commutative
15405 operators. The POP0/POP1 operands are updated in place. The new
15406 comparison code is returned, or UNKNOWN if not implementable. */
15407
15408 static enum rtx_code
15409 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15410 rtx *pop0, rtx *pop1)
15411 {
15412 rtx tmp;
15413
15414 switch (code)
15415 {
15416 case LTGT:
15417 case UNEQ:
15418 /* We have no LTGT as an operator. We could implement it with
15419 NE & ORDERED, but this requires an extra temporary. It's
15420 not clear that it's worth it. */
15421 return UNKNOWN;
15422
15423 case LT:
15424 case LE:
15425 case UNGT:
15426 case UNGE:
15427 /* These are supported directly. */
15428 break;
15429
15430 case EQ:
15431 case NE:
15432 case UNORDERED:
15433 case ORDERED:
15434 /* For commutative operators, try to canonicalize the destination
15435 operand to be first in the comparison - this helps reload to
15436 avoid extra moves. */
15437 if (!dest || !rtx_equal_p (dest, *pop1))
15438 break;
15439 /* FALLTHRU */
15440
15441 case GE:
15442 case GT:
15443 case UNLE:
15444 case UNLT:
15445 /* These are not supported directly. Swap the comparison operands
15446 to transform into something that is supported. */
15447 tmp = *pop0;
15448 *pop0 = *pop1;
15449 *pop1 = tmp;
15450 code = swap_condition (code);
15451 break;
15452
15453 default:
15454 gcc_unreachable ();
15455 }
15456
15457 return code;
15458 }
15459
15460 /* Detect conditional moves that exactly match min/max operational
15461 semantics. Note that this is IEEE safe, as long as we don't
15462 interchange the operands.
15463
15464 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15465 and TRUE if the operation is successful and instructions are emitted. */
15466
15467 static bool
15468 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15469 rtx cmp_op1, rtx if_true, rtx if_false)
15470 {
15471 enum machine_mode mode;
15472 bool is_min;
15473 rtx tmp;
15474
15475 if (code == LT)
15476 ;
15477 else if (code == UNGE)
15478 {
15479 tmp = if_true;
15480 if_true = if_false;
15481 if_false = tmp;
15482 }
15483 else
15484 return false;
15485
15486 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15487 is_min = true;
15488 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15489 is_min = false;
15490 else
15491 return false;
15492
15493 mode = GET_MODE (dest);
15494
15495 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15496 but MODE may be a vector mode and thus not appropriate. */
15497 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15498 {
15499 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15500 rtvec v;
15501
15502 if_true = force_reg (mode, if_true);
15503 v = gen_rtvec (2, if_true, if_false);
15504 tmp = gen_rtx_UNSPEC (mode, v, u);
15505 }
15506 else
15507 {
15508 code = is_min ? SMIN : SMAX;
15509 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15510 }
15511
15512 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15513 return true;
15514 }
15515
15516 /* Expand an sse vector comparison. Return the register with the result. */
15517
15518 static rtx
15519 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15520 rtx op_true, rtx op_false)
15521 {
15522 enum machine_mode mode = GET_MODE (dest);
15523 rtx x;
15524
15525 cmp_op0 = force_reg (mode, cmp_op0);
15526 if (!nonimmediate_operand (cmp_op1, mode))
15527 cmp_op1 = force_reg (mode, cmp_op1);
15528
15529 if (optimize
15530 || reg_overlap_mentioned_p (dest, op_true)
15531 || reg_overlap_mentioned_p (dest, op_false))
15532 dest = gen_reg_rtx (mode);
15533
15534 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15535 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15536
15537 return dest;
15538 }
15539
15540 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15541 operations. This is used for both scalar and vector conditional moves. */
15542
15543 static void
15544 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15545 {
15546 enum machine_mode mode = GET_MODE (dest);
15547 rtx t2, t3, x;
15548
15549 if (op_false == CONST0_RTX (mode))
15550 {
15551 op_true = force_reg (mode, op_true);
15552 x = gen_rtx_AND (mode, cmp, op_true);
15553 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15554 }
15555 else if (op_true == CONST0_RTX (mode))
15556 {
15557 op_false = force_reg (mode, op_false);
15558 x = gen_rtx_NOT (mode, cmp);
15559 x = gen_rtx_AND (mode, x, op_false);
15560 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15561 }
15562 else if (TARGET_SSE5)
15563 {
15564 rtx pcmov = gen_rtx_SET (mode, dest,
15565 gen_rtx_IF_THEN_ELSE (mode, cmp,
15566 op_true,
15567 op_false));
15568 emit_insn (pcmov);
15569 }
15570 else
15571 {
15572 op_true = force_reg (mode, op_true);
15573 op_false = force_reg (mode, op_false);
15574
15575 t2 = gen_reg_rtx (mode);
15576 if (optimize)
15577 t3 = gen_reg_rtx (mode);
15578 else
15579 t3 = dest;
15580
15581 x = gen_rtx_AND (mode, op_true, cmp);
15582 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15583
15584 x = gen_rtx_NOT (mode, cmp);
15585 x = gen_rtx_AND (mode, x, op_false);
15586 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15587
15588 x = gen_rtx_IOR (mode, t3, t2);
15589 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15590 }
15591 }
15592
15593 /* Expand a floating-point conditional move. Return true if successful. */
15594
15595 int
15596 ix86_expand_fp_movcc (rtx operands[])
15597 {
15598 enum machine_mode mode = GET_MODE (operands[0]);
15599 enum rtx_code code = GET_CODE (operands[1]);
15600 rtx tmp, compare_op, second_test, bypass_test;
15601
15602 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15603 {
15604 enum machine_mode cmode;
15605
15606 /* Since we've no cmove for sse registers, don't force bad register
15607 allocation just to gain access to it. Deny movcc when the
15608 comparison mode doesn't match the move mode. */
15609 cmode = GET_MODE (ix86_compare_op0);
15610 if (cmode == VOIDmode)
15611 cmode = GET_MODE (ix86_compare_op1);
15612 if (cmode != mode)
15613 return 0;
15614
15615 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15616 &ix86_compare_op0,
15617 &ix86_compare_op1);
15618 if (code == UNKNOWN)
15619 return 0;
15620
15621 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15622 ix86_compare_op1, operands[2],
15623 operands[3]))
15624 return 1;
15625
15626 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15627 ix86_compare_op1, operands[2], operands[3]);
15628 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15629 return 1;
15630 }
15631
15632 /* The floating point conditional move instructions don't directly
15633 support conditions resulting from a signed integer comparison. */
15634
15635 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15636
15637 /* The floating point conditional move instructions don't directly
15638 support signed integer comparisons. */
15639
15640 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15641 {
15642 gcc_assert (!second_test && !bypass_test);
15643 tmp = gen_reg_rtx (QImode);
15644 ix86_expand_setcc (code, tmp);
15645 code = NE;
15646 ix86_compare_op0 = tmp;
15647 ix86_compare_op1 = const0_rtx;
15648 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15649 }
15650 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15651 {
15652 tmp = gen_reg_rtx (mode);
15653 emit_move_insn (tmp, operands[3]);
15654 operands[3] = tmp;
15655 }
15656 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15657 {
15658 tmp = gen_reg_rtx (mode);
15659 emit_move_insn (tmp, operands[2]);
15660 operands[2] = tmp;
15661 }
15662
15663 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15664 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15665 operands[2], operands[3])));
15666 if (bypass_test)
15667 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15668 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15669 operands[3], operands[0])));
15670 if (second_test)
15671 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15672 gen_rtx_IF_THEN_ELSE (mode, second_test,
15673 operands[2], operands[0])));
15674
15675 return 1;
15676 }
15677
15678 /* Expand a floating-point vector conditional move; a vcond operation
15679 rather than a movcc operation. */
15680
15681 bool
15682 ix86_expand_fp_vcond (rtx operands[])
15683 {
15684 enum rtx_code code = GET_CODE (operands[3]);
15685 rtx cmp;
15686
15687 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15688 &operands[4], &operands[5]);
15689 if (code == UNKNOWN)
15690 return false;
15691
15692 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15693 operands[5], operands[1], operands[2]))
15694 return true;
15695
15696 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15697 operands[1], operands[2]);
15698 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15699 return true;
15700 }
15701
15702 /* Expand a signed/unsigned integral vector conditional move. */
15703
15704 bool
15705 ix86_expand_int_vcond (rtx operands[])
15706 {
15707 enum machine_mode mode = GET_MODE (operands[0]);
15708 enum rtx_code code = GET_CODE (operands[3]);
15709 bool negate = false;
15710 rtx x, cop0, cop1;
15711
15712 cop0 = operands[4];
15713 cop1 = operands[5];
15714
15715 /* SSE5 supports all of the comparisons on all vector int types. */
15716 if (!TARGET_SSE5)
15717 {
15718 /* Canonicalize the comparison to EQ, GT, GTU. */
15719 switch (code)
15720 {
15721 case EQ:
15722 case GT:
15723 case GTU:
15724 break;
15725
15726 case NE:
15727 case LE:
15728 case LEU:
15729 code = reverse_condition (code);
15730 negate = true;
15731 break;
15732
15733 case GE:
15734 case GEU:
15735 code = reverse_condition (code);
15736 negate = true;
15737 /* FALLTHRU */
15738
15739 case LT:
15740 case LTU:
15741 code = swap_condition (code);
15742 x = cop0, cop0 = cop1, cop1 = x;
15743 break;
15744
15745 default:
15746 gcc_unreachable ();
15747 }
15748
15749 /* Only SSE4.1/SSE4.2 supports V2DImode. */
15750 if (mode == V2DImode)
15751 {
15752 switch (code)
15753 {
15754 case EQ:
15755 /* SSE4.1 supports EQ. */
15756 if (!TARGET_SSE4_1)
15757 return false;
15758 break;
15759
15760 case GT:
15761 case GTU:
15762 /* SSE4.2 supports GT/GTU. */
15763 if (!TARGET_SSE4_2)
15764 return false;
15765 break;
15766
15767 default:
15768 gcc_unreachable ();
15769 }
15770 }
15771
15772 /* Unsigned parallel compare is not supported by the hardware.
15773 Play some tricks to turn this into a signed comparison
15774 against 0. */
15775 if (code == GTU)
15776 {
15777 cop0 = force_reg (mode, cop0);
15778
15779 switch (mode)
15780 {
15781 case V4SImode:
15782 case V2DImode:
15783 {
15784 rtx t1, t2, mask;
15785 rtx (*gen_sub3) (rtx, rtx, rtx);
15786
15787 /* Subtract (-(INT MAX) - 1) from both operands to make
15788 them signed. */
15789 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
15790 true, false);
15791 gen_sub3 = (mode == V4SImode
15792 ? gen_subv4si3 : gen_subv2di3);
15793 t1 = gen_reg_rtx (mode);
15794 emit_insn (gen_sub3 (t1, cop0, mask));
15795
15796 t2 = gen_reg_rtx (mode);
15797 emit_insn (gen_sub3 (t2, cop1, mask));
15798
15799 cop0 = t1;
15800 cop1 = t2;
15801 code = GT;
15802 }
15803 break;
15804
15805 case V16QImode:
15806 case V8HImode:
15807 /* Perform a parallel unsigned saturating subtraction. */
15808 x = gen_reg_rtx (mode);
15809 emit_insn (gen_rtx_SET (VOIDmode, x,
15810 gen_rtx_US_MINUS (mode, cop0, cop1)));
15811
15812 cop0 = x;
15813 cop1 = CONST0_RTX (mode);
15814 code = EQ;
15815 negate = !negate;
15816 break;
15817
15818 default:
15819 gcc_unreachable ();
15820 }
15821 }
15822 }
15823
15824 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
15825 operands[1+negate], operands[2-negate]);
15826
15827 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
15828 operands[2-negate]);
15829 return true;
15830 }
15831
15832 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
15833 true if we should do zero extension, else sign extension. HIGH_P is
15834 true if we want the N/2 high elements, else the low elements. */
15835
15836 void
15837 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15838 {
15839 enum machine_mode imode = GET_MODE (operands[1]);
15840 rtx (*unpack)(rtx, rtx, rtx);
15841 rtx se, dest;
15842
15843 switch (imode)
15844 {
15845 case V16QImode:
15846 if (high_p)
15847 unpack = gen_vec_interleave_highv16qi;
15848 else
15849 unpack = gen_vec_interleave_lowv16qi;
15850 break;
15851 case V8HImode:
15852 if (high_p)
15853 unpack = gen_vec_interleave_highv8hi;
15854 else
15855 unpack = gen_vec_interleave_lowv8hi;
15856 break;
15857 case V4SImode:
15858 if (high_p)
15859 unpack = gen_vec_interleave_highv4si;
15860 else
15861 unpack = gen_vec_interleave_lowv4si;
15862 break;
15863 default:
15864 gcc_unreachable ();
15865 }
15866
15867 dest = gen_lowpart (imode, operands[0]);
15868
15869 if (unsigned_p)
15870 se = force_reg (imode, CONST0_RTX (imode));
15871 else
15872 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
15873 operands[1], pc_rtx, pc_rtx);
15874
15875 emit_insn (unpack (dest, operands[1], se));
15876 }
15877
15878 /* This function performs the same task as ix86_expand_sse_unpack,
15879 but with SSE4.1 instructions. */
15880
15881 void
15882 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15883 {
15884 enum machine_mode imode = GET_MODE (operands[1]);
15885 rtx (*unpack)(rtx, rtx);
15886 rtx src, dest;
15887
15888 switch (imode)
15889 {
15890 case V16QImode:
15891 if (unsigned_p)
15892 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
15893 else
15894 unpack = gen_sse4_1_extendv8qiv8hi2;
15895 break;
15896 case V8HImode:
15897 if (unsigned_p)
15898 unpack = gen_sse4_1_zero_extendv4hiv4si2;
15899 else
15900 unpack = gen_sse4_1_extendv4hiv4si2;
15901 break;
15902 case V4SImode:
15903 if (unsigned_p)
15904 unpack = gen_sse4_1_zero_extendv2siv2di2;
15905 else
15906 unpack = gen_sse4_1_extendv2siv2di2;
15907 break;
15908 default:
15909 gcc_unreachable ();
15910 }
15911
15912 dest = operands[0];
15913 if (high_p)
15914 {
15915 /* Shift higher 8 bytes to lower 8 bytes. */
15916 src = gen_reg_rtx (imode);
15917 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
15918 gen_lowpart (TImode, operands[1]),
15919 GEN_INT (64)));
15920 }
15921 else
15922 src = operands[1];
15923
15924 emit_insn (unpack (dest, src));
15925 }
15926
15927 /* This function performs the same task as ix86_expand_sse_unpack,
15928 but with sse5 instructions. */
15929
15930 void
15931 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15932 {
15933 enum machine_mode imode = GET_MODE (operands[1]);
15934 int pperm_bytes[16];
15935 int i;
15936 int h = (high_p) ? 8 : 0;
15937 int h2;
15938 int sign_extend;
15939 rtvec v = rtvec_alloc (16);
15940 rtvec vs;
15941 rtx x, p;
15942 rtx op0 = operands[0], op1 = operands[1];
15943
15944 switch (imode)
15945 {
15946 case V16QImode:
15947 vs = rtvec_alloc (8);
15948 h2 = (high_p) ? 8 : 0;
15949 for (i = 0; i < 8; i++)
15950 {
15951 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
15952 pperm_bytes[2*i+1] = ((unsigned_p)
15953 ? PPERM_ZERO
15954 : PPERM_SIGN | PPERM_SRC2 | i | h);
15955 }
15956
15957 for (i = 0; i < 16; i++)
15958 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15959
15960 for (i = 0; i < 8; i++)
15961 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15962
15963 p = gen_rtx_PARALLEL (VOIDmode, vs);
15964 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15965 if (unsigned_p)
15966 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
15967 else
15968 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
15969 break;
15970
15971 case V8HImode:
15972 vs = rtvec_alloc (4);
15973 h2 = (high_p) ? 4 : 0;
15974 for (i = 0; i < 4; i++)
15975 {
15976 sign_extend = ((unsigned_p)
15977 ? PPERM_ZERO
15978 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
15979 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
15980 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
15981 pperm_bytes[4*i+2] = sign_extend;
15982 pperm_bytes[4*i+3] = sign_extend;
15983 }
15984
15985 for (i = 0; i < 16; i++)
15986 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15987
15988 for (i = 0; i < 4; i++)
15989 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15990
15991 p = gen_rtx_PARALLEL (VOIDmode, vs);
15992 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15993 if (unsigned_p)
15994 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
15995 else
15996 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
15997 break;
15998
15999 case V4SImode:
16000 vs = rtvec_alloc (2);
16001 h2 = (high_p) ? 2 : 0;
16002 for (i = 0; i < 2; i++)
16003 {
16004 sign_extend = ((unsigned_p)
16005 ? PPERM_ZERO
16006 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16007 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16008 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16009 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16010 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16011 pperm_bytes[8*i+4] = sign_extend;
16012 pperm_bytes[8*i+5] = sign_extend;
16013 pperm_bytes[8*i+6] = sign_extend;
16014 pperm_bytes[8*i+7] = sign_extend;
16015 }
16016
16017 for (i = 0; i < 16; i++)
16018 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16019
16020 for (i = 0; i < 2; i++)
16021 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16022
16023 p = gen_rtx_PARALLEL (VOIDmode, vs);
16024 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16025 if (unsigned_p)
16026 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16027 else
16028 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16029 break;
16030
16031 default:
16032 gcc_unreachable ();
16033 }
16034
16035 return;
16036 }
16037
16038 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16039 next narrower integer vector type */
16040 void
16041 ix86_expand_sse5_pack (rtx operands[3])
16042 {
16043 enum machine_mode imode = GET_MODE (operands[0]);
16044 int pperm_bytes[16];
16045 int i;
16046 rtvec v = rtvec_alloc (16);
16047 rtx x;
16048 rtx op0 = operands[0];
16049 rtx op1 = operands[1];
16050 rtx op2 = operands[2];
16051
16052 switch (imode)
16053 {
16054 case V16QImode:
16055 for (i = 0; i < 8; i++)
16056 {
16057 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16058 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16059 }
16060
16061 for (i = 0; i < 16; i++)
16062 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16063
16064 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16065 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16066 break;
16067
16068 case V8HImode:
16069 for (i = 0; i < 4; i++)
16070 {
16071 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16072 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16073 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16074 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16075 }
16076
16077 for (i = 0; i < 16; i++)
16078 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16079
16080 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16081 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16082 break;
16083
16084 case V4SImode:
16085 for (i = 0; i < 2; i++)
16086 {
16087 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16088 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16089 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16090 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16091 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16092 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16093 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16094 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16095 }
16096
16097 for (i = 0; i < 16; i++)
16098 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16099
16100 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16101 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16102 break;
16103
16104 default:
16105 gcc_unreachable ();
16106 }
16107
16108 return;
16109 }
16110
16111 /* Expand conditional increment or decrement using adb/sbb instructions.
16112 The default case using setcc followed by the conditional move can be
16113 done by generic code. */
16114 int
16115 ix86_expand_int_addcc (rtx operands[])
16116 {
16117 enum rtx_code code = GET_CODE (operands[1]);
16118 rtx compare_op;
16119 rtx val = const0_rtx;
16120 bool fpcmp = false;
16121 enum machine_mode mode = GET_MODE (operands[0]);
16122
16123 if (operands[3] != const1_rtx
16124 && operands[3] != constm1_rtx)
16125 return 0;
16126 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16127 ix86_compare_op1, &compare_op))
16128 return 0;
16129 code = GET_CODE (compare_op);
16130
16131 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16132 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16133 {
16134 fpcmp = true;
16135 code = ix86_fp_compare_code_to_integer (code);
16136 }
16137
16138 if (code != LTU)
16139 {
16140 val = constm1_rtx;
16141 if (fpcmp)
16142 PUT_CODE (compare_op,
16143 reverse_condition_maybe_unordered
16144 (GET_CODE (compare_op)));
16145 else
16146 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16147 }
16148 PUT_MODE (compare_op, mode);
16149
16150 /* Construct either adc or sbb insn. */
16151 if ((code == LTU) == (operands[3] == constm1_rtx))
16152 {
16153 switch (GET_MODE (operands[0]))
16154 {
16155 case QImode:
16156 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16157 break;
16158 case HImode:
16159 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16160 break;
16161 case SImode:
16162 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16163 break;
16164 case DImode:
16165 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16166 break;
16167 default:
16168 gcc_unreachable ();
16169 }
16170 }
16171 else
16172 {
16173 switch (GET_MODE (operands[0]))
16174 {
16175 case QImode:
16176 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16177 break;
16178 case HImode:
16179 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16180 break;
16181 case SImode:
16182 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16183 break;
16184 case DImode:
16185 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16186 break;
16187 default:
16188 gcc_unreachable ();
16189 }
16190 }
16191 return 1; /* DONE */
16192 }
16193
16194
16195 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16196 works for floating pointer parameters and nonoffsetable memories.
16197 For pushes, it returns just stack offsets; the values will be saved
16198 in the right order. Maximally three parts are generated. */
16199
16200 static int
16201 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16202 {
16203 int size;
16204
16205 if (!TARGET_64BIT)
16206 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16207 else
16208 size = (GET_MODE_SIZE (mode) + 4) / 8;
16209
16210 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16211 gcc_assert (size >= 2 && size <= 4);
16212
16213 /* Optimize constant pool reference to immediates. This is used by fp
16214 moves, that force all constants to memory to allow combining. */
16215 if (MEM_P (operand) && MEM_READONLY_P (operand))
16216 {
16217 rtx tmp = maybe_get_pool_constant (operand);
16218 if (tmp)
16219 operand = tmp;
16220 }
16221
16222 if (MEM_P (operand) && !offsettable_memref_p (operand))
16223 {
16224 /* The only non-offsetable memories we handle are pushes. */
16225 int ok = push_operand (operand, VOIDmode);
16226
16227 gcc_assert (ok);
16228
16229 operand = copy_rtx (operand);
16230 PUT_MODE (operand, Pmode);
16231 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16232 return size;
16233 }
16234
16235 if (GET_CODE (operand) == CONST_VECTOR)
16236 {
16237 enum machine_mode imode = int_mode_for_mode (mode);
16238 /* Caution: if we looked through a constant pool memory above,
16239 the operand may actually have a different mode now. That's
16240 ok, since we want to pun this all the way back to an integer. */
16241 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16242 gcc_assert (operand != NULL);
16243 mode = imode;
16244 }
16245
16246 if (!TARGET_64BIT)
16247 {
16248 if (mode == DImode)
16249 split_di (&operand, 1, &parts[0], &parts[1]);
16250 else
16251 {
16252 int i;
16253
16254 if (REG_P (operand))
16255 {
16256 gcc_assert (reload_completed);
16257 for (i = 0; i < size; i++)
16258 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16259 }
16260 else if (offsettable_memref_p (operand))
16261 {
16262 operand = adjust_address (operand, SImode, 0);
16263 parts[0] = operand;
16264 for (i = 1; i < size; i++)
16265 parts[i] = adjust_address (operand, SImode, 4 * i);
16266 }
16267 else if (GET_CODE (operand) == CONST_DOUBLE)
16268 {
16269 REAL_VALUE_TYPE r;
16270 long l[4];
16271
16272 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16273 switch (mode)
16274 {
16275 case TFmode:
16276 real_to_target (l, &r, mode);
16277 parts[3] = gen_int_mode (l[3], SImode);
16278 parts[2] = gen_int_mode (l[2], SImode);
16279 break;
16280 case XFmode:
16281 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16282 parts[2] = gen_int_mode (l[2], SImode);
16283 break;
16284 case DFmode:
16285 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16286 break;
16287 default:
16288 gcc_unreachable ();
16289 }
16290 parts[1] = gen_int_mode (l[1], SImode);
16291 parts[0] = gen_int_mode (l[0], SImode);
16292 }
16293 else
16294 gcc_unreachable ();
16295 }
16296 }
16297 else
16298 {
16299 if (mode == TImode)
16300 split_ti (&operand, 1, &parts[0], &parts[1]);
16301 if (mode == XFmode || mode == TFmode)
16302 {
16303 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16304 if (REG_P (operand))
16305 {
16306 gcc_assert (reload_completed);
16307 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16308 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16309 }
16310 else if (offsettable_memref_p (operand))
16311 {
16312 operand = adjust_address (operand, DImode, 0);
16313 parts[0] = operand;
16314 parts[1] = adjust_address (operand, upper_mode, 8);
16315 }
16316 else if (GET_CODE (operand) == CONST_DOUBLE)
16317 {
16318 REAL_VALUE_TYPE r;
16319 long l[4];
16320
16321 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16322 real_to_target (l, &r, mode);
16323
16324 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16325 if (HOST_BITS_PER_WIDE_INT >= 64)
16326 parts[0]
16327 = gen_int_mode
16328 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16329 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16330 DImode);
16331 else
16332 parts[0] = immed_double_const (l[0], l[1], DImode);
16333
16334 if (upper_mode == SImode)
16335 parts[1] = gen_int_mode (l[2], SImode);
16336 else if (HOST_BITS_PER_WIDE_INT >= 64)
16337 parts[1]
16338 = gen_int_mode
16339 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16340 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16341 DImode);
16342 else
16343 parts[1] = immed_double_const (l[2], l[3], DImode);
16344 }
16345 else
16346 gcc_unreachable ();
16347 }
16348 }
16349
16350 return size;
16351 }
16352
16353 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16354 Return false when normal moves are needed; true when all required
16355 insns have been emitted. Operands 2-4 contain the input values
16356 int the correct order; operands 5-7 contain the output values. */
16357
16358 void
16359 ix86_split_long_move (rtx operands[])
16360 {
16361 rtx part[2][4];
16362 int nparts, i, j;
16363 int push = 0;
16364 int collisions = 0;
16365 enum machine_mode mode = GET_MODE (operands[0]);
16366 bool collisionparts[4];
16367
16368 /* The DFmode expanders may ask us to move double.
16369 For 64bit target this is single move. By hiding the fact
16370 here we simplify i386.md splitters. */
16371 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16372 {
16373 /* Optimize constant pool reference to immediates. This is used by
16374 fp moves, that force all constants to memory to allow combining. */
16375
16376 if (MEM_P (operands[1])
16377 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16378 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16379 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16380 if (push_operand (operands[0], VOIDmode))
16381 {
16382 operands[0] = copy_rtx (operands[0]);
16383 PUT_MODE (operands[0], Pmode);
16384 }
16385 else
16386 operands[0] = gen_lowpart (DImode, operands[0]);
16387 operands[1] = gen_lowpart (DImode, operands[1]);
16388 emit_move_insn (operands[0], operands[1]);
16389 return;
16390 }
16391
16392 /* The only non-offsettable memory we handle is push. */
16393 if (push_operand (operands[0], VOIDmode))
16394 push = 1;
16395 else
16396 gcc_assert (!MEM_P (operands[0])
16397 || offsettable_memref_p (operands[0]));
16398
16399 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16400 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16401
16402 /* When emitting push, take care for source operands on the stack. */
16403 if (push && MEM_P (operands[1])
16404 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16405 {
16406 rtx src_base = XEXP (part[1][nparts - 1], 0);
16407
16408 /* Compensate for the stack decrement by 4. */
16409 if (!TARGET_64BIT && nparts == 3
16410 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
16411 src_base = plus_constant (src_base, 4);
16412
16413 /* src_base refers to the stack pointer and is
16414 automatically decreased by emitted push. */
16415 for (i = 0; i < nparts; i++)
16416 part[1][i] = change_address (part[1][i],
16417 GET_MODE (part[1][i]), src_base);
16418 }
16419
16420 /* We need to do copy in the right order in case an address register
16421 of the source overlaps the destination. */
16422 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16423 {
16424 rtx tmp;
16425
16426 for (i = 0; i < nparts; i++)
16427 {
16428 collisionparts[i]
16429 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16430 if (collisionparts[i])
16431 collisions++;
16432 }
16433
16434 /* Collision in the middle part can be handled by reordering. */
16435 if (collisions == 1 && nparts == 3 && collisionparts [1])
16436 {
16437 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16438 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16439 }
16440 else if (collisions == 1
16441 && nparts == 4
16442 && (collisionparts [1] || collisionparts [2]))
16443 {
16444 if (collisionparts [1])
16445 {
16446 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16447 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16448 }
16449 else
16450 {
16451 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16452 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16453 }
16454 }
16455
16456 /* If there are more collisions, we can't handle it by reordering.
16457 Do an lea to the last part and use only one colliding move. */
16458 else if (collisions > 1)
16459 {
16460 rtx base;
16461
16462 collisions = 1;
16463
16464 base = part[0][nparts - 1];
16465
16466 /* Handle the case when the last part isn't valid for lea.
16467 Happens in 64-bit mode storing the 12-byte XFmode. */
16468 if (GET_MODE (base) != Pmode)
16469 base = gen_rtx_REG (Pmode, REGNO (base));
16470
16471 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16472 part[1][0] = replace_equiv_address (part[1][0], base);
16473 for (i = 1; i < nparts; i++)
16474 {
16475 tmp = plus_constant (base, UNITS_PER_WORD * i);
16476 part[1][i] = replace_equiv_address (part[1][i], tmp);
16477 }
16478 }
16479 }
16480
16481 if (push)
16482 {
16483 if (!TARGET_64BIT)
16484 {
16485 if (nparts == 3)
16486 {
16487 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16488 emit_insn (gen_addsi3 (stack_pointer_rtx,
16489 stack_pointer_rtx, GEN_INT (-4)));
16490 emit_move_insn (part[0][2], part[1][2]);
16491 }
16492 else if (nparts == 4)
16493 {
16494 emit_move_insn (part[0][3], part[1][3]);
16495 emit_move_insn (part[0][2], part[1][2]);
16496 }
16497 }
16498 else
16499 {
16500 /* In 64bit mode we don't have 32bit push available. In case this is
16501 register, it is OK - we will just use larger counterpart. We also
16502 retype memory - these comes from attempt to avoid REX prefix on
16503 moving of second half of TFmode value. */
16504 if (GET_MODE (part[1][1]) == SImode)
16505 {
16506 switch (GET_CODE (part[1][1]))
16507 {
16508 case MEM:
16509 part[1][1] = adjust_address (part[1][1], DImode, 0);
16510 break;
16511
16512 case REG:
16513 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16514 break;
16515
16516 default:
16517 gcc_unreachable ();
16518 }
16519
16520 if (GET_MODE (part[1][0]) == SImode)
16521 part[1][0] = part[1][1];
16522 }
16523 }
16524 emit_move_insn (part[0][1], part[1][1]);
16525 emit_move_insn (part[0][0], part[1][0]);
16526 return;
16527 }
16528
16529 /* Choose correct order to not overwrite the source before it is copied. */
16530 if ((REG_P (part[0][0])
16531 && REG_P (part[1][1])
16532 && (REGNO (part[0][0]) == REGNO (part[1][1])
16533 || (nparts == 3
16534 && REGNO (part[0][0]) == REGNO (part[1][2]))
16535 || (nparts == 4
16536 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16537 || (collisions > 0
16538 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16539 {
16540 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16541 {
16542 operands[2 + i] = part[0][j];
16543 operands[6 + i] = part[1][j];
16544 }
16545 }
16546 else
16547 {
16548 for (i = 0; i < nparts; i++)
16549 {
16550 operands[2 + i] = part[0][i];
16551 operands[6 + i] = part[1][i];
16552 }
16553 }
16554
16555 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16556 if (optimize_insn_for_size_p ())
16557 {
16558 for (j = 0; j < nparts - 1; j++)
16559 if (CONST_INT_P (operands[6 + j])
16560 && operands[6 + j] != const0_rtx
16561 && REG_P (operands[2 + j]))
16562 for (i = j; i < nparts - 1; i++)
16563 if (CONST_INT_P (operands[7 + i])
16564 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16565 operands[7 + i] = operands[2 + j];
16566 }
16567
16568 for (i = 0; i < nparts; i++)
16569 emit_move_insn (operands[2 + i], operands[6 + i]);
16570
16571 return;
16572 }
16573
16574 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16575 left shift by a constant, either using a single shift or
16576 a sequence of add instructions. */
16577
16578 static void
16579 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16580 {
16581 if (count == 1)
16582 {
16583 emit_insn ((mode == DImode
16584 ? gen_addsi3
16585 : gen_adddi3) (operand, operand, operand));
16586 }
16587 else if (!optimize_insn_for_size_p ()
16588 && count * ix86_cost->add <= ix86_cost->shift_const)
16589 {
16590 int i;
16591 for (i=0; i<count; i++)
16592 {
16593 emit_insn ((mode == DImode
16594 ? gen_addsi3
16595 : gen_adddi3) (operand, operand, operand));
16596 }
16597 }
16598 else
16599 emit_insn ((mode == DImode
16600 ? gen_ashlsi3
16601 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16602 }
16603
16604 void
16605 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16606 {
16607 rtx low[2], high[2];
16608 int count;
16609 const int single_width = mode == DImode ? 32 : 64;
16610
16611 if (CONST_INT_P (operands[2]))
16612 {
16613 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16614 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16615
16616 if (count >= single_width)
16617 {
16618 emit_move_insn (high[0], low[1]);
16619 emit_move_insn (low[0], const0_rtx);
16620
16621 if (count > single_width)
16622 ix86_expand_ashl_const (high[0], count - single_width, mode);
16623 }
16624 else
16625 {
16626 if (!rtx_equal_p (operands[0], operands[1]))
16627 emit_move_insn (operands[0], operands[1]);
16628 emit_insn ((mode == DImode
16629 ? gen_x86_shld
16630 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16631 ix86_expand_ashl_const (low[0], count, mode);
16632 }
16633 return;
16634 }
16635
16636 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16637
16638 if (operands[1] == const1_rtx)
16639 {
16640 /* Assuming we've chosen a QImode capable registers, then 1 << N
16641 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16642 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16643 {
16644 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16645
16646 ix86_expand_clear (low[0]);
16647 ix86_expand_clear (high[0]);
16648 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16649
16650 d = gen_lowpart (QImode, low[0]);
16651 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16652 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16653 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16654
16655 d = gen_lowpart (QImode, high[0]);
16656 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16657 s = gen_rtx_NE (QImode, flags, const0_rtx);
16658 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16659 }
16660
16661 /* Otherwise, we can get the same results by manually performing
16662 a bit extract operation on bit 5/6, and then performing the two
16663 shifts. The two methods of getting 0/1 into low/high are exactly
16664 the same size. Avoiding the shift in the bit extract case helps
16665 pentium4 a bit; no one else seems to care much either way. */
16666 else
16667 {
16668 rtx x;
16669
16670 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16671 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16672 else
16673 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16674 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16675
16676 emit_insn ((mode == DImode
16677 ? gen_lshrsi3
16678 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16679 emit_insn ((mode == DImode
16680 ? gen_andsi3
16681 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16682 emit_move_insn (low[0], high[0]);
16683 emit_insn ((mode == DImode
16684 ? gen_xorsi3
16685 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16686 }
16687
16688 emit_insn ((mode == DImode
16689 ? gen_ashlsi3
16690 : gen_ashldi3) (low[0], low[0], operands[2]));
16691 emit_insn ((mode == DImode
16692 ? gen_ashlsi3
16693 : gen_ashldi3) (high[0], high[0], operands[2]));
16694 return;
16695 }
16696
16697 if (operands[1] == constm1_rtx)
16698 {
16699 /* For -1 << N, we can avoid the shld instruction, because we
16700 know that we're shifting 0...31/63 ones into a -1. */
16701 emit_move_insn (low[0], constm1_rtx);
16702 if (optimize_insn_for_size_p ())
16703 emit_move_insn (high[0], low[0]);
16704 else
16705 emit_move_insn (high[0], constm1_rtx);
16706 }
16707 else
16708 {
16709 if (!rtx_equal_p (operands[0], operands[1]))
16710 emit_move_insn (operands[0], operands[1]);
16711
16712 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16713 emit_insn ((mode == DImode
16714 ? gen_x86_shld
16715 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16716 }
16717
16718 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16719
16720 if (TARGET_CMOVE && scratch)
16721 {
16722 ix86_expand_clear (scratch);
16723 emit_insn ((mode == DImode
16724 ? gen_x86_shift_adj_1
16725 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16726 scratch));
16727 }
16728 else
16729 emit_insn ((mode == DImode
16730 ? gen_x86_shift_adj_2
16731 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16732 }
16733
16734 void
16735 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16736 {
16737 rtx low[2], high[2];
16738 int count;
16739 const int single_width = mode == DImode ? 32 : 64;
16740
16741 if (CONST_INT_P (operands[2]))
16742 {
16743 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16744 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16745
16746 if (count == single_width * 2 - 1)
16747 {
16748 emit_move_insn (high[0], high[1]);
16749 emit_insn ((mode == DImode
16750 ? gen_ashrsi3
16751 : gen_ashrdi3) (high[0], high[0],
16752 GEN_INT (single_width - 1)));
16753 emit_move_insn (low[0], high[0]);
16754
16755 }
16756 else if (count >= single_width)
16757 {
16758 emit_move_insn (low[0], high[1]);
16759 emit_move_insn (high[0], low[0]);
16760 emit_insn ((mode == DImode
16761 ? gen_ashrsi3
16762 : gen_ashrdi3) (high[0], high[0],
16763 GEN_INT (single_width - 1)));
16764 if (count > single_width)
16765 emit_insn ((mode == DImode
16766 ? gen_ashrsi3
16767 : gen_ashrdi3) (low[0], low[0],
16768 GEN_INT (count - single_width)));
16769 }
16770 else
16771 {
16772 if (!rtx_equal_p (operands[0], operands[1]))
16773 emit_move_insn (operands[0], operands[1]);
16774 emit_insn ((mode == DImode
16775 ? gen_x86_shrd
16776 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16777 emit_insn ((mode == DImode
16778 ? gen_ashrsi3
16779 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
16780 }
16781 }
16782 else
16783 {
16784 if (!rtx_equal_p (operands[0], operands[1]))
16785 emit_move_insn (operands[0], operands[1]);
16786
16787 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16788
16789 emit_insn ((mode == DImode
16790 ? gen_x86_shrd
16791 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16792 emit_insn ((mode == DImode
16793 ? gen_ashrsi3
16794 : gen_ashrdi3) (high[0], high[0], operands[2]));
16795
16796 if (TARGET_CMOVE && scratch)
16797 {
16798 emit_move_insn (scratch, high[0]);
16799 emit_insn ((mode == DImode
16800 ? gen_ashrsi3
16801 : gen_ashrdi3) (scratch, scratch,
16802 GEN_INT (single_width - 1)));
16803 emit_insn ((mode == DImode
16804 ? gen_x86_shift_adj_1
16805 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16806 scratch));
16807 }
16808 else
16809 emit_insn ((mode == DImode
16810 ? gen_x86_shift_adj_3
16811 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
16812 }
16813 }
16814
16815 void
16816 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
16817 {
16818 rtx low[2], high[2];
16819 int count;
16820 const int single_width = mode == DImode ? 32 : 64;
16821
16822 if (CONST_INT_P (operands[2]))
16823 {
16824 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16825 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16826
16827 if (count >= single_width)
16828 {
16829 emit_move_insn (low[0], high[1]);
16830 ix86_expand_clear (high[0]);
16831
16832 if (count > single_width)
16833 emit_insn ((mode == DImode
16834 ? gen_lshrsi3
16835 : gen_lshrdi3) (low[0], low[0],
16836 GEN_INT (count - single_width)));
16837 }
16838 else
16839 {
16840 if (!rtx_equal_p (operands[0], operands[1]))
16841 emit_move_insn (operands[0], operands[1]);
16842 emit_insn ((mode == DImode
16843 ? gen_x86_shrd
16844 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16845 emit_insn ((mode == DImode
16846 ? gen_lshrsi3
16847 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
16848 }
16849 }
16850 else
16851 {
16852 if (!rtx_equal_p (operands[0], operands[1]))
16853 emit_move_insn (operands[0], operands[1]);
16854
16855 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16856
16857 emit_insn ((mode == DImode
16858 ? gen_x86_shrd
16859 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16860 emit_insn ((mode == DImode
16861 ? gen_lshrsi3
16862 : gen_lshrdi3) (high[0], high[0], operands[2]));
16863
16864 /* Heh. By reversing the arguments, we can reuse this pattern. */
16865 if (TARGET_CMOVE && scratch)
16866 {
16867 ix86_expand_clear (scratch);
16868 emit_insn ((mode == DImode
16869 ? gen_x86_shift_adj_1
16870 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16871 scratch));
16872 }
16873 else
16874 emit_insn ((mode == DImode
16875 ? gen_x86_shift_adj_2
16876 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
16877 }
16878 }
16879
16880 /* Predict just emitted jump instruction to be taken with probability PROB. */
16881 static void
16882 predict_jump (int prob)
16883 {
16884 rtx insn = get_last_insn ();
16885 gcc_assert (JUMP_P (insn));
16886 REG_NOTES (insn)
16887 = gen_rtx_EXPR_LIST (REG_BR_PROB,
16888 GEN_INT (prob),
16889 REG_NOTES (insn));
16890 }
16891
16892 /* Helper function for the string operations below. Dest VARIABLE whether
16893 it is aligned to VALUE bytes. If true, jump to the label. */
16894 static rtx
16895 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
16896 {
16897 rtx label = gen_label_rtx ();
16898 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
16899 if (GET_MODE (variable) == DImode)
16900 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
16901 else
16902 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
16903 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
16904 1, label);
16905 if (epilogue)
16906 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16907 else
16908 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16909 return label;
16910 }
16911
16912 /* Adjust COUNTER by the VALUE. */
16913 static void
16914 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
16915 {
16916 if (GET_MODE (countreg) == DImode)
16917 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
16918 else
16919 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
16920 }
16921
16922 /* Zero extend possibly SImode EXP to Pmode register. */
16923 rtx
16924 ix86_zero_extend_to_Pmode (rtx exp)
16925 {
16926 rtx r;
16927 if (GET_MODE (exp) == VOIDmode)
16928 return force_reg (Pmode, exp);
16929 if (GET_MODE (exp) == Pmode)
16930 return copy_to_mode_reg (Pmode, exp);
16931 r = gen_reg_rtx (Pmode);
16932 emit_insn (gen_zero_extendsidi2 (r, exp));
16933 return r;
16934 }
16935
16936 /* Divide COUNTREG by SCALE. */
16937 static rtx
16938 scale_counter (rtx countreg, int scale)
16939 {
16940 rtx sc;
16941 rtx piece_size_mask;
16942
16943 if (scale == 1)
16944 return countreg;
16945 if (CONST_INT_P (countreg))
16946 return GEN_INT (INTVAL (countreg) / scale);
16947 gcc_assert (REG_P (countreg));
16948
16949 piece_size_mask = GEN_INT (scale - 1);
16950 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
16951 GEN_INT (exact_log2 (scale)),
16952 NULL, 1, OPTAB_DIRECT);
16953 return sc;
16954 }
16955
16956 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
16957 DImode for constant loop counts. */
16958
16959 static enum machine_mode
16960 counter_mode (rtx count_exp)
16961 {
16962 if (GET_MODE (count_exp) != VOIDmode)
16963 return GET_MODE (count_exp);
16964 if (GET_CODE (count_exp) != CONST_INT)
16965 return Pmode;
16966 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
16967 return DImode;
16968 return SImode;
16969 }
16970
16971 /* When SRCPTR is non-NULL, output simple loop to move memory
16972 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
16973 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
16974 equivalent loop to set memory by VALUE (supposed to be in MODE).
16975
16976 The size is rounded down to whole number of chunk size moved at once.
16977 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
16978
16979
16980 static void
16981 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
16982 rtx destptr, rtx srcptr, rtx value,
16983 rtx count, enum machine_mode mode, int unroll,
16984 int expected_size)
16985 {
16986 rtx out_label, top_label, iter, tmp;
16987 enum machine_mode iter_mode = counter_mode (count);
16988 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
16989 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
16990 rtx size;
16991 rtx x_addr;
16992 rtx y_addr;
16993 int i;
16994
16995 top_label = gen_label_rtx ();
16996 out_label = gen_label_rtx ();
16997 iter = gen_reg_rtx (iter_mode);
16998
16999 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
17000 NULL, 1, OPTAB_DIRECT);
17001 /* Those two should combine. */
17002 if (piece_size == const1_rtx)
17003 {
17004 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17005 true, out_label);
17006 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17007 }
17008 emit_move_insn (iter, const0_rtx);
17009
17010 emit_label (top_label);
17011
17012 tmp = convert_modes (Pmode, iter_mode, iter, true);
17013 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17014 destmem = change_address (destmem, mode, x_addr);
17015
17016 if (srcmem)
17017 {
17018 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17019 srcmem = change_address (srcmem, mode, y_addr);
17020
17021 /* When unrolling for chips that reorder memory reads and writes,
17022 we can save registers by using single temporary.
17023 Also using 4 temporaries is overkill in 32bit mode. */
17024 if (!TARGET_64BIT && 0)
17025 {
17026 for (i = 0; i < unroll; i++)
17027 {
17028 if (i)
17029 {
17030 destmem =
17031 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17032 srcmem =
17033 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17034 }
17035 emit_move_insn (destmem, srcmem);
17036 }
17037 }
17038 else
17039 {
17040 rtx tmpreg[4];
17041 gcc_assert (unroll <= 4);
17042 for (i = 0; i < unroll; i++)
17043 {
17044 tmpreg[i] = gen_reg_rtx (mode);
17045 if (i)
17046 {
17047 srcmem =
17048 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17049 }
17050 emit_move_insn (tmpreg[i], srcmem);
17051 }
17052 for (i = 0; i < unroll; i++)
17053 {
17054 if (i)
17055 {
17056 destmem =
17057 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17058 }
17059 emit_move_insn (destmem, tmpreg[i]);
17060 }
17061 }
17062 }
17063 else
17064 for (i = 0; i < unroll; i++)
17065 {
17066 if (i)
17067 destmem =
17068 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17069 emit_move_insn (destmem, value);
17070 }
17071
17072 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17073 true, OPTAB_LIB_WIDEN);
17074 if (tmp != iter)
17075 emit_move_insn (iter, tmp);
17076
17077 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17078 true, top_label);
17079 if (expected_size != -1)
17080 {
17081 expected_size /= GET_MODE_SIZE (mode) * unroll;
17082 if (expected_size == 0)
17083 predict_jump (0);
17084 else if (expected_size > REG_BR_PROB_BASE)
17085 predict_jump (REG_BR_PROB_BASE - 1);
17086 else
17087 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17088 }
17089 else
17090 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17091 iter = ix86_zero_extend_to_Pmode (iter);
17092 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17093 true, OPTAB_LIB_WIDEN);
17094 if (tmp != destptr)
17095 emit_move_insn (destptr, tmp);
17096 if (srcptr)
17097 {
17098 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17099 true, OPTAB_LIB_WIDEN);
17100 if (tmp != srcptr)
17101 emit_move_insn (srcptr, tmp);
17102 }
17103 emit_label (out_label);
17104 }
17105
17106 /* Output "rep; mov" instruction.
17107 Arguments have same meaning as for previous function */
17108 static void
17109 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17110 rtx destptr, rtx srcptr,
17111 rtx count,
17112 enum machine_mode mode)
17113 {
17114 rtx destexp;
17115 rtx srcexp;
17116 rtx countreg;
17117
17118 /* If the size is known, it is shorter to use rep movs. */
17119 if (mode == QImode && CONST_INT_P (count)
17120 && !(INTVAL (count) & 3))
17121 mode = SImode;
17122
17123 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17124 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17125 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17126 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17127 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17128 if (mode != QImode)
17129 {
17130 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17131 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17132 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17133 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17134 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17135 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17136 }
17137 else
17138 {
17139 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17140 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17141 }
17142 if (CONST_INT_P (count))
17143 {
17144 count = GEN_INT (INTVAL (count)
17145 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17146 destmem = shallow_copy_rtx (destmem);
17147 srcmem = shallow_copy_rtx (srcmem);
17148 set_mem_size (destmem, count);
17149 set_mem_size (srcmem, count);
17150 }
17151 else
17152 {
17153 if (MEM_SIZE (destmem))
17154 set_mem_size (destmem, NULL_RTX);
17155 if (MEM_SIZE (srcmem))
17156 set_mem_size (srcmem, NULL_RTX);
17157 }
17158 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17159 destexp, srcexp));
17160 }
17161
17162 /* Output "rep; stos" instruction.
17163 Arguments have same meaning as for previous function */
17164 static void
17165 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17166 rtx count, enum machine_mode mode,
17167 rtx orig_value)
17168 {
17169 rtx destexp;
17170 rtx countreg;
17171
17172 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17173 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17174 value = force_reg (mode, gen_lowpart (mode, value));
17175 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17176 if (mode != QImode)
17177 {
17178 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17179 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17180 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17181 }
17182 else
17183 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17184 if (orig_value == const0_rtx && CONST_INT_P (count))
17185 {
17186 count = GEN_INT (INTVAL (count)
17187 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17188 destmem = shallow_copy_rtx (destmem);
17189 set_mem_size (destmem, count);
17190 }
17191 else if (MEM_SIZE (destmem))
17192 set_mem_size (destmem, NULL_RTX);
17193 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17194 }
17195
17196 static void
17197 emit_strmov (rtx destmem, rtx srcmem,
17198 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17199 {
17200 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17201 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17202 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17203 }
17204
17205 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17206 static void
17207 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17208 rtx destptr, rtx srcptr, rtx count, int max_size)
17209 {
17210 rtx src, dest;
17211 if (CONST_INT_P (count))
17212 {
17213 HOST_WIDE_INT countval = INTVAL (count);
17214 int offset = 0;
17215
17216 if ((countval & 0x10) && max_size > 16)
17217 {
17218 if (TARGET_64BIT)
17219 {
17220 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17221 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17222 }
17223 else
17224 gcc_unreachable ();
17225 offset += 16;
17226 }
17227 if ((countval & 0x08) && max_size > 8)
17228 {
17229 if (TARGET_64BIT)
17230 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17231 else
17232 {
17233 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17234 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17235 }
17236 offset += 8;
17237 }
17238 if ((countval & 0x04) && max_size > 4)
17239 {
17240 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17241 offset += 4;
17242 }
17243 if ((countval & 0x02) && max_size > 2)
17244 {
17245 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17246 offset += 2;
17247 }
17248 if ((countval & 0x01) && max_size > 1)
17249 {
17250 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17251 offset += 1;
17252 }
17253 return;
17254 }
17255 if (max_size > 8)
17256 {
17257 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17258 count, 1, OPTAB_DIRECT);
17259 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17260 count, QImode, 1, 4);
17261 return;
17262 }
17263
17264 /* When there are stringops, we can cheaply increase dest and src pointers.
17265 Otherwise we save code size by maintaining offset (zero is readily
17266 available from preceding rep operation) and using x86 addressing modes.
17267 */
17268 if (TARGET_SINGLE_STRINGOP)
17269 {
17270 if (max_size > 4)
17271 {
17272 rtx label = ix86_expand_aligntest (count, 4, true);
17273 src = change_address (srcmem, SImode, srcptr);
17274 dest = change_address (destmem, SImode, destptr);
17275 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17276 emit_label (label);
17277 LABEL_NUSES (label) = 1;
17278 }
17279 if (max_size > 2)
17280 {
17281 rtx label = ix86_expand_aligntest (count, 2, true);
17282 src = change_address (srcmem, HImode, srcptr);
17283 dest = change_address (destmem, HImode, destptr);
17284 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17285 emit_label (label);
17286 LABEL_NUSES (label) = 1;
17287 }
17288 if (max_size > 1)
17289 {
17290 rtx label = ix86_expand_aligntest (count, 1, true);
17291 src = change_address (srcmem, QImode, srcptr);
17292 dest = change_address (destmem, QImode, destptr);
17293 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17294 emit_label (label);
17295 LABEL_NUSES (label) = 1;
17296 }
17297 }
17298 else
17299 {
17300 rtx offset = force_reg (Pmode, const0_rtx);
17301 rtx tmp;
17302
17303 if (max_size > 4)
17304 {
17305 rtx label = ix86_expand_aligntest (count, 4, true);
17306 src = change_address (srcmem, SImode, srcptr);
17307 dest = change_address (destmem, SImode, destptr);
17308 emit_move_insn (dest, src);
17309 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17310 true, OPTAB_LIB_WIDEN);
17311 if (tmp != offset)
17312 emit_move_insn (offset, tmp);
17313 emit_label (label);
17314 LABEL_NUSES (label) = 1;
17315 }
17316 if (max_size > 2)
17317 {
17318 rtx label = ix86_expand_aligntest (count, 2, true);
17319 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17320 src = change_address (srcmem, HImode, tmp);
17321 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17322 dest = change_address (destmem, HImode, tmp);
17323 emit_move_insn (dest, src);
17324 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17325 true, OPTAB_LIB_WIDEN);
17326 if (tmp != offset)
17327 emit_move_insn (offset, tmp);
17328 emit_label (label);
17329 LABEL_NUSES (label) = 1;
17330 }
17331 if (max_size > 1)
17332 {
17333 rtx label = ix86_expand_aligntest (count, 1, true);
17334 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17335 src = change_address (srcmem, QImode, tmp);
17336 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17337 dest = change_address (destmem, QImode, tmp);
17338 emit_move_insn (dest, src);
17339 emit_label (label);
17340 LABEL_NUSES (label) = 1;
17341 }
17342 }
17343 }
17344
17345 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17346 static void
17347 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17348 rtx count, int max_size)
17349 {
17350 count =
17351 expand_simple_binop (counter_mode (count), AND, count,
17352 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17353 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17354 gen_lowpart (QImode, value), count, QImode,
17355 1, max_size / 2);
17356 }
17357
17358 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17359 static void
17360 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17361 {
17362 rtx dest;
17363
17364 if (CONST_INT_P (count))
17365 {
17366 HOST_WIDE_INT countval = INTVAL (count);
17367 int offset = 0;
17368
17369 if ((countval & 0x10) && max_size > 16)
17370 {
17371 if (TARGET_64BIT)
17372 {
17373 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17374 emit_insn (gen_strset (destptr, dest, value));
17375 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17376 emit_insn (gen_strset (destptr, dest, value));
17377 }
17378 else
17379 gcc_unreachable ();
17380 offset += 16;
17381 }
17382 if ((countval & 0x08) && max_size > 8)
17383 {
17384 if (TARGET_64BIT)
17385 {
17386 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17387 emit_insn (gen_strset (destptr, dest, value));
17388 }
17389 else
17390 {
17391 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17392 emit_insn (gen_strset (destptr, dest, value));
17393 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17394 emit_insn (gen_strset (destptr, dest, value));
17395 }
17396 offset += 8;
17397 }
17398 if ((countval & 0x04) && max_size > 4)
17399 {
17400 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17401 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17402 offset += 4;
17403 }
17404 if ((countval & 0x02) && max_size > 2)
17405 {
17406 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17407 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17408 offset += 2;
17409 }
17410 if ((countval & 0x01) && max_size > 1)
17411 {
17412 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17413 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17414 offset += 1;
17415 }
17416 return;
17417 }
17418 if (max_size > 32)
17419 {
17420 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17421 return;
17422 }
17423 if (max_size > 16)
17424 {
17425 rtx label = ix86_expand_aligntest (count, 16, true);
17426 if (TARGET_64BIT)
17427 {
17428 dest = change_address (destmem, DImode, destptr);
17429 emit_insn (gen_strset (destptr, dest, value));
17430 emit_insn (gen_strset (destptr, dest, value));
17431 }
17432 else
17433 {
17434 dest = change_address (destmem, SImode, destptr);
17435 emit_insn (gen_strset (destptr, dest, value));
17436 emit_insn (gen_strset (destptr, dest, value));
17437 emit_insn (gen_strset (destptr, dest, value));
17438 emit_insn (gen_strset (destptr, dest, value));
17439 }
17440 emit_label (label);
17441 LABEL_NUSES (label) = 1;
17442 }
17443 if (max_size > 8)
17444 {
17445 rtx label = ix86_expand_aligntest (count, 8, true);
17446 if (TARGET_64BIT)
17447 {
17448 dest = change_address (destmem, DImode, destptr);
17449 emit_insn (gen_strset (destptr, dest, value));
17450 }
17451 else
17452 {
17453 dest = change_address (destmem, SImode, destptr);
17454 emit_insn (gen_strset (destptr, dest, value));
17455 emit_insn (gen_strset (destptr, dest, value));
17456 }
17457 emit_label (label);
17458 LABEL_NUSES (label) = 1;
17459 }
17460 if (max_size > 4)
17461 {
17462 rtx label = ix86_expand_aligntest (count, 4, true);
17463 dest = change_address (destmem, SImode, destptr);
17464 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17465 emit_label (label);
17466 LABEL_NUSES (label) = 1;
17467 }
17468 if (max_size > 2)
17469 {
17470 rtx label = ix86_expand_aligntest (count, 2, true);
17471 dest = change_address (destmem, HImode, destptr);
17472 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17473 emit_label (label);
17474 LABEL_NUSES (label) = 1;
17475 }
17476 if (max_size > 1)
17477 {
17478 rtx label = ix86_expand_aligntest (count, 1, true);
17479 dest = change_address (destmem, QImode, destptr);
17480 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17481 emit_label (label);
17482 LABEL_NUSES (label) = 1;
17483 }
17484 }
17485
17486 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17487 DESIRED_ALIGNMENT. */
17488 static void
17489 expand_movmem_prologue (rtx destmem, rtx srcmem,
17490 rtx destptr, rtx srcptr, rtx count,
17491 int align, int desired_alignment)
17492 {
17493 if (align <= 1 && desired_alignment > 1)
17494 {
17495 rtx label = ix86_expand_aligntest (destptr, 1, false);
17496 srcmem = change_address (srcmem, QImode, srcptr);
17497 destmem = change_address (destmem, QImode, destptr);
17498 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17499 ix86_adjust_counter (count, 1);
17500 emit_label (label);
17501 LABEL_NUSES (label) = 1;
17502 }
17503 if (align <= 2 && desired_alignment > 2)
17504 {
17505 rtx label = ix86_expand_aligntest (destptr, 2, false);
17506 srcmem = change_address (srcmem, HImode, srcptr);
17507 destmem = change_address (destmem, HImode, destptr);
17508 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17509 ix86_adjust_counter (count, 2);
17510 emit_label (label);
17511 LABEL_NUSES (label) = 1;
17512 }
17513 if (align <= 4 && desired_alignment > 4)
17514 {
17515 rtx label = ix86_expand_aligntest (destptr, 4, false);
17516 srcmem = change_address (srcmem, SImode, srcptr);
17517 destmem = change_address (destmem, SImode, destptr);
17518 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17519 ix86_adjust_counter (count, 4);
17520 emit_label (label);
17521 LABEL_NUSES (label) = 1;
17522 }
17523 gcc_assert (desired_alignment <= 8);
17524 }
17525
17526 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17527 ALIGN_BYTES is how many bytes need to be copied. */
17528 static rtx
17529 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17530 int desired_align, int align_bytes)
17531 {
17532 rtx src = *srcp;
17533 rtx src_size, dst_size;
17534 int off = 0;
17535 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17536 if (src_align_bytes >= 0)
17537 src_align_bytes = desired_align - src_align_bytes;
17538 src_size = MEM_SIZE (src);
17539 dst_size = MEM_SIZE (dst);
17540 if (align_bytes & 1)
17541 {
17542 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17543 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17544 off = 1;
17545 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17546 }
17547 if (align_bytes & 2)
17548 {
17549 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17550 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17551 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17552 set_mem_align (dst, 2 * BITS_PER_UNIT);
17553 if (src_align_bytes >= 0
17554 && (src_align_bytes & 1) == (align_bytes & 1)
17555 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17556 set_mem_align (src, 2 * BITS_PER_UNIT);
17557 off = 2;
17558 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17559 }
17560 if (align_bytes & 4)
17561 {
17562 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17563 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17564 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17565 set_mem_align (dst, 4 * BITS_PER_UNIT);
17566 if (src_align_bytes >= 0)
17567 {
17568 unsigned int src_align = 0;
17569 if ((src_align_bytes & 3) == (align_bytes & 3))
17570 src_align = 4;
17571 else if ((src_align_bytes & 1) == (align_bytes & 1))
17572 src_align = 2;
17573 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17574 set_mem_align (src, src_align * BITS_PER_UNIT);
17575 }
17576 off = 4;
17577 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17578 }
17579 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17580 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17581 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17582 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17583 if (src_align_bytes >= 0)
17584 {
17585 unsigned int src_align = 0;
17586 if ((src_align_bytes & 7) == (align_bytes & 7))
17587 src_align = 8;
17588 else if ((src_align_bytes & 3) == (align_bytes & 3))
17589 src_align = 4;
17590 else if ((src_align_bytes & 1) == (align_bytes & 1))
17591 src_align = 2;
17592 if (src_align > (unsigned int) desired_align)
17593 src_align = desired_align;
17594 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17595 set_mem_align (src, src_align * BITS_PER_UNIT);
17596 }
17597 if (dst_size)
17598 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17599 if (src_size)
17600 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17601 *srcp = src;
17602 return dst;
17603 }
17604
17605 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17606 DESIRED_ALIGNMENT. */
17607 static void
17608 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17609 int align, int desired_alignment)
17610 {
17611 if (align <= 1 && desired_alignment > 1)
17612 {
17613 rtx label = ix86_expand_aligntest (destptr, 1, false);
17614 destmem = change_address (destmem, QImode, destptr);
17615 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17616 ix86_adjust_counter (count, 1);
17617 emit_label (label);
17618 LABEL_NUSES (label) = 1;
17619 }
17620 if (align <= 2 && desired_alignment > 2)
17621 {
17622 rtx label = ix86_expand_aligntest (destptr, 2, false);
17623 destmem = change_address (destmem, HImode, destptr);
17624 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17625 ix86_adjust_counter (count, 2);
17626 emit_label (label);
17627 LABEL_NUSES (label) = 1;
17628 }
17629 if (align <= 4 && desired_alignment > 4)
17630 {
17631 rtx label = ix86_expand_aligntest (destptr, 4, false);
17632 destmem = change_address (destmem, SImode, destptr);
17633 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17634 ix86_adjust_counter (count, 4);
17635 emit_label (label);
17636 LABEL_NUSES (label) = 1;
17637 }
17638 gcc_assert (desired_alignment <= 8);
17639 }
17640
17641 /* Set enough from DST to align DST known to by aligned by ALIGN to
17642 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17643 static rtx
17644 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17645 int desired_align, int align_bytes)
17646 {
17647 int off = 0;
17648 rtx dst_size = MEM_SIZE (dst);
17649 if (align_bytes & 1)
17650 {
17651 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17652 off = 1;
17653 emit_insn (gen_strset (destreg, dst,
17654 gen_lowpart (QImode, value)));
17655 }
17656 if (align_bytes & 2)
17657 {
17658 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17659 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17660 set_mem_align (dst, 2 * BITS_PER_UNIT);
17661 off = 2;
17662 emit_insn (gen_strset (destreg, dst,
17663 gen_lowpart (HImode, value)));
17664 }
17665 if (align_bytes & 4)
17666 {
17667 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17668 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17669 set_mem_align (dst, 4 * BITS_PER_UNIT);
17670 off = 4;
17671 emit_insn (gen_strset (destreg, dst,
17672 gen_lowpart (SImode, value)));
17673 }
17674 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17675 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17676 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17677 if (dst_size)
17678 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17679 return dst;
17680 }
17681
17682 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17683 static enum stringop_alg
17684 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17685 int *dynamic_check)
17686 {
17687 const struct stringop_algs * algs;
17688 bool optimize_for_speed;
17689 /* Algorithms using the rep prefix want at least edi and ecx;
17690 additionally, memset wants eax and memcpy wants esi. Don't
17691 consider such algorithms if the user has appropriated those
17692 registers for their own purposes. */
17693 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17694 || (memset
17695 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17696
17697 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17698 || (alg != rep_prefix_1_byte \
17699 && alg != rep_prefix_4_byte \
17700 && alg != rep_prefix_8_byte))
17701 const struct processor_costs *cost;
17702
17703 /* Even if the string operation call is cold, we still might spend a lot
17704 of time processing large blocks. */
17705 if (optimize_function_for_size_p (cfun)
17706 || (optimize_insn_for_size_p ()
17707 && expected_size != -1 && expected_size < 256))
17708 optimize_for_speed = false;
17709 else
17710 optimize_for_speed = true;
17711
17712 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17713
17714 *dynamic_check = -1;
17715 if (memset)
17716 algs = &cost->memset[TARGET_64BIT != 0];
17717 else
17718 algs = &cost->memcpy[TARGET_64BIT != 0];
17719 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17720 return stringop_alg;
17721 /* rep; movq or rep; movl is the smallest variant. */
17722 else if (!optimize_for_speed)
17723 {
17724 if (!count || (count & 3))
17725 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17726 else
17727 return rep_prefix_usable ? rep_prefix_4_byte : loop;
17728 }
17729 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17730 */
17731 else if (expected_size != -1 && expected_size < 4)
17732 return loop_1_byte;
17733 else if (expected_size != -1)
17734 {
17735 unsigned int i;
17736 enum stringop_alg alg = libcall;
17737 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17738 {
17739 /* We get here if the algorithms that were not libcall-based
17740 were rep-prefix based and we are unable to use rep prefixes
17741 based on global register usage. Break out of the loop and
17742 use the heuristic below. */
17743 if (algs->size[i].max == 0)
17744 break;
17745 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
17746 {
17747 enum stringop_alg candidate = algs->size[i].alg;
17748
17749 if (candidate != libcall && ALG_USABLE_P (candidate))
17750 alg = candidate;
17751 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
17752 last non-libcall inline algorithm. */
17753 if (TARGET_INLINE_ALL_STRINGOPS)
17754 {
17755 /* When the current size is best to be copied by a libcall,
17756 but we are still forced to inline, run the heuristic below
17757 that will pick code for medium sized blocks. */
17758 if (alg != libcall)
17759 return alg;
17760 break;
17761 }
17762 else if (ALG_USABLE_P (candidate))
17763 return candidate;
17764 }
17765 }
17766 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
17767 }
17768 /* When asked to inline the call anyway, try to pick meaningful choice.
17769 We look for maximal size of block that is faster to copy by hand and
17770 take blocks of at most of that size guessing that average size will
17771 be roughly half of the block.
17772
17773 If this turns out to be bad, we might simply specify the preferred
17774 choice in ix86_costs. */
17775 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17776 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
17777 {
17778 int max = -1;
17779 enum stringop_alg alg;
17780 int i;
17781 bool any_alg_usable_p = true;
17782
17783 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17784 {
17785 enum stringop_alg candidate = algs->size[i].alg;
17786 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
17787
17788 if (candidate != libcall && candidate
17789 && ALG_USABLE_P (candidate))
17790 max = algs->size[i].max;
17791 }
17792 /* If there aren't any usable algorithms, then recursing on
17793 smaller sizes isn't going to find anything. Just return the
17794 simple byte-at-a-time copy loop. */
17795 if (!any_alg_usable_p)
17796 {
17797 /* Pick something reasonable. */
17798 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17799 *dynamic_check = 128;
17800 return loop_1_byte;
17801 }
17802 if (max == -1)
17803 max = 4096;
17804 alg = decide_alg (count, max / 2, memset, dynamic_check);
17805 gcc_assert (*dynamic_check == -1);
17806 gcc_assert (alg != libcall);
17807 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17808 *dynamic_check = max;
17809 return alg;
17810 }
17811 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
17812 #undef ALG_USABLE_P
17813 }
17814
17815 /* Decide on alignment. We know that the operand is already aligned to ALIGN
17816 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
17817 static int
17818 decide_alignment (int align,
17819 enum stringop_alg alg,
17820 int expected_size)
17821 {
17822 int desired_align = 0;
17823 switch (alg)
17824 {
17825 case no_stringop:
17826 gcc_unreachable ();
17827 case loop:
17828 case unrolled_loop:
17829 desired_align = GET_MODE_SIZE (Pmode);
17830 break;
17831 case rep_prefix_8_byte:
17832 desired_align = 8;
17833 break;
17834 case rep_prefix_4_byte:
17835 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17836 copying whole cacheline at once. */
17837 if (TARGET_PENTIUMPRO)
17838 desired_align = 8;
17839 else
17840 desired_align = 4;
17841 break;
17842 case rep_prefix_1_byte:
17843 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17844 copying whole cacheline at once. */
17845 if (TARGET_PENTIUMPRO)
17846 desired_align = 8;
17847 else
17848 desired_align = 1;
17849 break;
17850 case loop_1_byte:
17851 desired_align = 1;
17852 break;
17853 case libcall:
17854 return 0;
17855 }
17856
17857 if (optimize_size)
17858 desired_align = 1;
17859 if (desired_align < align)
17860 desired_align = align;
17861 if (expected_size != -1 && expected_size < 4)
17862 desired_align = align;
17863 return desired_align;
17864 }
17865
17866 /* Return the smallest power of 2 greater than VAL. */
17867 static int
17868 smallest_pow2_greater_than (int val)
17869 {
17870 int ret = 1;
17871 while (ret <= val)
17872 ret <<= 1;
17873 return ret;
17874 }
17875
17876 /* Expand string move (memcpy) operation. Use i386 string operations when
17877 profitable. expand_setmem contains similar code. The code depends upon
17878 architecture, block size and alignment, but always has the same
17879 overall structure:
17880
17881 1) Prologue guard: Conditional that jumps up to epilogues for small
17882 blocks that can be handled by epilogue alone. This is faster but
17883 also needed for correctness, since prologue assume the block is larger
17884 than the desired alignment.
17885
17886 Optional dynamic check for size and libcall for large
17887 blocks is emitted here too, with -minline-stringops-dynamically.
17888
17889 2) Prologue: copy first few bytes in order to get destination aligned
17890 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
17891 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
17892 We emit either a jump tree on power of two sized blocks, or a byte loop.
17893
17894 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
17895 with specified algorithm.
17896
17897 4) Epilogue: code copying tail of the block that is too small to be
17898 handled by main body (or up to size guarded by prologue guard). */
17899
17900 int
17901 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
17902 rtx expected_align_exp, rtx expected_size_exp)
17903 {
17904 rtx destreg;
17905 rtx srcreg;
17906 rtx label = NULL;
17907 rtx tmp;
17908 rtx jump_around_label = NULL;
17909 HOST_WIDE_INT align = 1;
17910 unsigned HOST_WIDE_INT count = 0;
17911 HOST_WIDE_INT expected_size = -1;
17912 int size_needed = 0, epilogue_size_needed;
17913 int desired_align = 0, align_bytes = 0;
17914 enum stringop_alg alg;
17915 int dynamic_check;
17916 bool need_zero_guard = false;
17917
17918 if (CONST_INT_P (align_exp))
17919 align = INTVAL (align_exp);
17920 /* i386 can do misaligned access on reasonably increased cost. */
17921 if (CONST_INT_P (expected_align_exp)
17922 && INTVAL (expected_align_exp) > align)
17923 align = INTVAL (expected_align_exp);
17924 /* ALIGN is the minimum of destination and source alignment, but we care here
17925 just about destination alignment. */
17926 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
17927 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
17928
17929 if (CONST_INT_P (count_exp))
17930 count = expected_size = INTVAL (count_exp);
17931 if (CONST_INT_P (expected_size_exp) && count == 0)
17932 expected_size = INTVAL (expected_size_exp);
17933
17934 /* Make sure we don't need to care about overflow later on. */
17935 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17936 return 0;
17937
17938 /* Step 0: Decide on preferred algorithm, desired alignment and
17939 size of chunks to be copied by main loop. */
17940
17941 alg = decide_alg (count, expected_size, false, &dynamic_check);
17942 desired_align = decide_alignment (align, alg, expected_size);
17943
17944 if (!TARGET_ALIGN_STRINGOPS)
17945 align = desired_align;
17946
17947 if (alg == libcall)
17948 return 0;
17949 gcc_assert (alg != no_stringop);
17950 if (!count)
17951 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
17952 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17953 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
17954 switch (alg)
17955 {
17956 case libcall:
17957 case no_stringop:
17958 gcc_unreachable ();
17959 case loop:
17960 need_zero_guard = true;
17961 size_needed = GET_MODE_SIZE (Pmode);
17962 break;
17963 case unrolled_loop:
17964 need_zero_guard = true;
17965 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
17966 break;
17967 case rep_prefix_8_byte:
17968 size_needed = 8;
17969 break;
17970 case rep_prefix_4_byte:
17971 size_needed = 4;
17972 break;
17973 case rep_prefix_1_byte:
17974 size_needed = 1;
17975 break;
17976 case loop_1_byte:
17977 need_zero_guard = true;
17978 size_needed = 1;
17979 break;
17980 }
17981
17982 epilogue_size_needed = size_needed;
17983
17984 /* Step 1: Prologue guard. */
17985
17986 /* Alignment code needs count to be in register. */
17987 if (CONST_INT_P (count_exp) && desired_align > align)
17988 {
17989 if (INTVAL (count_exp) > desired_align
17990 && INTVAL (count_exp) > size_needed)
17991 {
17992 align_bytes
17993 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
17994 if (align_bytes <= 0)
17995 align_bytes = 0;
17996 else
17997 align_bytes = desired_align - align_bytes;
17998 }
17999 if (align_bytes == 0)
18000 count_exp = force_reg (counter_mode (count_exp), count_exp);
18001 }
18002 gcc_assert (desired_align >= 1 && align >= 1);
18003
18004 /* Ensure that alignment prologue won't copy past end of block. */
18005 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18006 {
18007 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18008 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18009 Make sure it is power of 2. */
18010 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18011
18012 if (count)
18013 {
18014 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18015 {
18016 /* If main algorithm works on QImode, no epilogue is needed.
18017 For small sizes just don't align anything. */
18018 if (size_needed == 1)
18019 desired_align = align;
18020 else
18021 goto epilogue;
18022 }
18023 }
18024 else
18025 {
18026 label = gen_label_rtx ();
18027 emit_cmp_and_jump_insns (count_exp,
18028 GEN_INT (epilogue_size_needed),
18029 LTU, 0, counter_mode (count_exp), 1, label);
18030 if (expected_size == -1 || expected_size < epilogue_size_needed)
18031 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18032 else
18033 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18034 }
18035 }
18036
18037 /* Emit code to decide on runtime whether library call or inline should be
18038 used. */
18039 if (dynamic_check != -1)
18040 {
18041 if (CONST_INT_P (count_exp))
18042 {
18043 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18044 {
18045 emit_block_move_via_libcall (dst, src, count_exp, false);
18046 count_exp = const0_rtx;
18047 goto epilogue;
18048 }
18049 }
18050 else
18051 {
18052 rtx hot_label = gen_label_rtx ();
18053 jump_around_label = gen_label_rtx ();
18054 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18055 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18056 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18057 emit_block_move_via_libcall (dst, src, count_exp, false);
18058 emit_jump (jump_around_label);
18059 emit_label (hot_label);
18060 }
18061 }
18062
18063 /* Step 2: Alignment prologue. */
18064
18065 if (desired_align > align)
18066 {
18067 if (align_bytes == 0)
18068 {
18069 /* Except for the first move in epilogue, we no longer know
18070 constant offset in aliasing info. It don't seems to worth
18071 the pain to maintain it for the first move, so throw away
18072 the info early. */
18073 src = change_address (src, BLKmode, srcreg);
18074 dst = change_address (dst, BLKmode, destreg);
18075 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18076 desired_align);
18077 }
18078 else
18079 {
18080 /* If we know how many bytes need to be stored before dst is
18081 sufficiently aligned, maintain aliasing info accurately. */
18082 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18083 desired_align, align_bytes);
18084 count_exp = plus_constant (count_exp, -align_bytes);
18085 count -= align_bytes;
18086 }
18087 if (need_zero_guard
18088 && (count < (unsigned HOST_WIDE_INT) size_needed
18089 || (align_bytes == 0
18090 && count < ((unsigned HOST_WIDE_INT) size_needed
18091 + desired_align - align))))
18092 {
18093 /* It is possible that we copied enough so the main loop will not
18094 execute. */
18095 gcc_assert (size_needed > 1);
18096 if (label == NULL_RTX)
18097 label = gen_label_rtx ();
18098 emit_cmp_and_jump_insns (count_exp,
18099 GEN_INT (size_needed),
18100 LTU, 0, counter_mode (count_exp), 1, label);
18101 if (expected_size == -1
18102 || expected_size < (desired_align - align) / 2 + size_needed)
18103 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18104 else
18105 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18106 }
18107 }
18108 if (label && size_needed == 1)
18109 {
18110 emit_label (label);
18111 LABEL_NUSES (label) = 1;
18112 label = NULL;
18113 epilogue_size_needed = 1;
18114 }
18115 else if (label == NULL_RTX)
18116 epilogue_size_needed = size_needed;
18117
18118 /* Step 3: Main loop. */
18119
18120 switch (alg)
18121 {
18122 case libcall:
18123 case no_stringop:
18124 gcc_unreachable ();
18125 case loop_1_byte:
18126 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18127 count_exp, QImode, 1, expected_size);
18128 break;
18129 case loop:
18130 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18131 count_exp, Pmode, 1, expected_size);
18132 break;
18133 case unrolled_loop:
18134 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18135 registers for 4 temporaries anyway. */
18136 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18137 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18138 expected_size);
18139 break;
18140 case rep_prefix_8_byte:
18141 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18142 DImode);
18143 break;
18144 case rep_prefix_4_byte:
18145 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18146 SImode);
18147 break;
18148 case rep_prefix_1_byte:
18149 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18150 QImode);
18151 break;
18152 }
18153 /* Adjust properly the offset of src and dest memory for aliasing. */
18154 if (CONST_INT_P (count_exp))
18155 {
18156 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18157 (count / size_needed) * size_needed);
18158 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18159 (count / size_needed) * size_needed);
18160 }
18161 else
18162 {
18163 src = change_address (src, BLKmode, srcreg);
18164 dst = change_address (dst, BLKmode, destreg);
18165 }
18166
18167 /* Step 4: Epilogue to copy the remaining bytes. */
18168 epilogue:
18169 if (label)
18170 {
18171 /* When the main loop is done, COUNT_EXP might hold original count,
18172 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18173 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18174 bytes. Compensate if needed. */
18175
18176 if (size_needed < epilogue_size_needed)
18177 {
18178 tmp =
18179 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18180 GEN_INT (size_needed - 1), count_exp, 1,
18181 OPTAB_DIRECT);
18182 if (tmp != count_exp)
18183 emit_move_insn (count_exp, tmp);
18184 }
18185 emit_label (label);
18186 LABEL_NUSES (label) = 1;
18187 }
18188
18189 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18190 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18191 epilogue_size_needed);
18192 if (jump_around_label)
18193 emit_label (jump_around_label);
18194 return 1;
18195 }
18196
18197 /* Helper function for memcpy. For QImode value 0xXY produce
18198 0xXYXYXYXY of wide specified by MODE. This is essentially
18199 a * 0x10101010, but we can do slightly better than
18200 synth_mult by unwinding the sequence by hand on CPUs with
18201 slow multiply. */
18202 static rtx
18203 promote_duplicated_reg (enum machine_mode mode, rtx val)
18204 {
18205 enum machine_mode valmode = GET_MODE (val);
18206 rtx tmp;
18207 int nops = mode == DImode ? 3 : 2;
18208
18209 gcc_assert (mode == SImode || mode == DImode);
18210 if (val == const0_rtx)
18211 return copy_to_mode_reg (mode, const0_rtx);
18212 if (CONST_INT_P (val))
18213 {
18214 HOST_WIDE_INT v = INTVAL (val) & 255;
18215
18216 v |= v << 8;
18217 v |= v << 16;
18218 if (mode == DImode)
18219 v |= (v << 16) << 16;
18220 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18221 }
18222
18223 if (valmode == VOIDmode)
18224 valmode = QImode;
18225 if (valmode != QImode)
18226 val = gen_lowpart (QImode, val);
18227 if (mode == QImode)
18228 return val;
18229 if (!TARGET_PARTIAL_REG_STALL)
18230 nops--;
18231 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18232 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18233 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18234 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18235 {
18236 rtx reg = convert_modes (mode, QImode, val, true);
18237 tmp = promote_duplicated_reg (mode, const1_rtx);
18238 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18239 OPTAB_DIRECT);
18240 }
18241 else
18242 {
18243 rtx reg = convert_modes (mode, QImode, val, true);
18244
18245 if (!TARGET_PARTIAL_REG_STALL)
18246 if (mode == SImode)
18247 emit_insn (gen_movsi_insv_1 (reg, reg));
18248 else
18249 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18250 else
18251 {
18252 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18253 NULL, 1, OPTAB_DIRECT);
18254 reg =
18255 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18256 }
18257 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18258 NULL, 1, OPTAB_DIRECT);
18259 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18260 if (mode == SImode)
18261 return reg;
18262 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18263 NULL, 1, OPTAB_DIRECT);
18264 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18265 return reg;
18266 }
18267 }
18268
18269 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18270 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18271 alignment from ALIGN to DESIRED_ALIGN. */
18272 static rtx
18273 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18274 {
18275 rtx promoted_val;
18276
18277 if (TARGET_64BIT
18278 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18279 promoted_val = promote_duplicated_reg (DImode, val);
18280 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18281 promoted_val = promote_duplicated_reg (SImode, val);
18282 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18283 promoted_val = promote_duplicated_reg (HImode, val);
18284 else
18285 promoted_val = val;
18286
18287 return promoted_val;
18288 }
18289
18290 /* Expand string clear operation (bzero). Use i386 string operations when
18291 profitable. See expand_movmem comment for explanation of individual
18292 steps performed. */
18293 int
18294 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18295 rtx expected_align_exp, rtx expected_size_exp)
18296 {
18297 rtx destreg;
18298 rtx label = NULL;
18299 rtx tmp;
18300 rtx jump_around_label = NULL;
18301 HOST_WIDE_INT align = 1;
18302 unsigned HOST_WIDE_INT count = 0;
18303 HOST_WIDE_INT expected_size = -1;
18304 int size_needed = 0, epilogue_size_needed;
18305 int desired_align = 0, align_bytes = 0;
18306 enum stringop_alg alg;
18307 rtx promoted_val = NULL;
18308 bool force_loopy_epilogue = false;
18309 int dynamic_check;
18310 bool need_zero_guard = false;
18311
18312 if (CONST_INT_P (align_exp))
18313 align = INTVAL (align_exp);
18314 /* i386 can do misaligned access on reasonably increased cost. */
18315 if (CONST_INT_P (expected_align_exp)
18316 && INTVAL (expected_align_exp) > align)
18317 align = INTVAL (expected_align_exp);
18318 if (CONST_INT_P (count_exp))
18319 count = expected_size = INTVAL (count_exp);
18320 if (CONST_INT_P (expected_size_exp) && count == 0)
18321 expected_size = INTVAL (expected_size_exp);
18322
18323 /* Make sure we don't need to care about overflow later on. */
18324 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18325 return 0;
18326
18327 /* Step 0: Decide on preferred algorithm, desired alignment and
18328 size of chunks to be copied by main loop. */
18329
18330 alg = decide_alg (count, expected_size, true, &dynamic_check);
18331 desired_align = decide_alignment (align, alg, expected_size);
18332
18333 if (!TARGET_ALIGN_STRINGOPS)
18334 align = desired_align;
18335
18336 if (alg == libcall)
18337 return 0;
18338 gcc_assert (alg != no_stringop);
18339 if (!count)
18340 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18341 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18342 switch (alg)
18343 {
18344 case libcall:
18345 case no_stringop:
18346 gcc_unreachable ();
18347 case loop:
18348 need_zero_guard = true;
18349 size_needed = GET_MODE_SIZE (Pmode);
18350 break;
18351 case unrolled_loop:
18352 need_zero_guard = true;
18353 size_needed = GET_MODE_SIZE (Pmode) * 4;
18354 break;
18355 case rep_prefix_8_byte:
18356 size_needed = 8;
18357 break;
18358 case rep_prefix_4_byte:
18359 size_needed = 4;
18360 break;
18361 case rep_prefix_1_byte:
18362 size_needed = 1;
18363 break;
18364 case loop_1_byte:
18365 need_zero_guard = true;
18366 size_needed = 1;
18367 break;
18368 }
18369 epilogue_size_needed = size_needed;
18370
18371 /* Step 1: Prologue guard. */
18372
18373 /* Alignment code needs count to be in register. */
18374 if (CONST_INT_P (count_exp) && desired_align > align)
18375 {
18376 if (INTVAL (count_exp) > desired_align
18377 && INTVAL (count_exp) > size_needed)
18378 {
18379 align_bytes
18380 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18381 if (align_bytes <= 0)
18382 align_bytes = 0;
18383 else
18384 align_bytes = desired_align - align_bytes;
18385 }
18386 if (align_bytes == 0)
18387 {
18388 enum machine_mode mode = SImode;
18389 if (TARGET_64BIT && (count & ~0xffffffff))
18390 mode = DImode;
18391 count_exp = force_reg (mode, count_exp);
18392 }
18393 }
18394 /* Do the cheap promotion to allow better CSE across the
18395 main loop and epilogue (ie one load of the big constant in the
18396 front of all code. */
18397 if (CONST_INT_P (val_exp))
18398 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18399 desired_align, align);
18400 /* Ensure that alignment prologue won't copy past end of block. */
18401 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18402 {
18403 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18404 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18405 Make sure it is power of 2. */
18406 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18407
18408 /* To improve performance of small blocks, we jump around the VAL
18409 promoting mode. This mean that if the promoted VAL is not constant,
18410 we might not use it in the epilogue and have to use byte
18411 loop variant. */
18412 if (epilogue_size_needed > 2 && !promoted_val)
18413 force_loopy_epilogue = true;
18414 if (count)
18415 {
18416 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18417 {
18418 /* If main algorithm works on QImode, no epilogue is needed.
18419 For small sizes just don't align anything. */
18420 if (size_needed == 1)
18421 desired_align = align;
18422 else
18423 goto epilogue;
18424 }
18425 }
18426 else
18427 {
18428 label = gen_label_rtx ();
18429 emit_cmp_and_jump_insns (count_exp,
18430 GEN_INT (epilogue_size_needed),
18431 LTU, 0, counter_mode (count_exp), 1, label);
18432 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18433 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18434 else
18435 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18436 }
18437 }
18438 if (dynamic_check != -1)
18439 {
18440 rtx hot_label = gen_label_rtx ();
18441 jump_around_label = gen_label_rtx ();
18442 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18443 LEU, 0, counter_mode (count_exp), 1, hot_label);
18444 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18445 set_storage_via_libcall (dst, count_exp, val_exp, false);
18446 emit_jump (jump_around_label);
18447 emit_label (hot_label);
18448 }
18449
18450 /* Step 2: Alignment prologue. */
18451
18452 /* Do the expensive promotion once we branched off the small blocks. */
18453 if (!promoted_val)
18454 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18455 desired_align, align);
18456 gcc_assert (desired_align >= 1 && align >= 1);
18457
18458 if (desired_align > align)
18459 {
18460 if (align_bytes == 0)
18461 {
18462 /* Except for the first move in epilogue, we no longer know
18463 constant offset in aliasing info. It don't seems to worth
18464 the pain to maintain it for the first move, so throw away
18465 the info early. */
18466 dst = change_address (dst, BLKmode, destreg);
18467 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18468 desired_align);
18469 }
18470 else
18471 {
18472 /* If we know how many bytes need to be stored before dst is
18473 sufficiently aligned, maintain aliasing info accurately. */
18474 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18475 desired_align, align_bytes);
18476 count_exp = plus_constant (count_exp, -align_bytes);
18477 count -= align_bytes;
18478 }
18479 if (need_zero_guard
18480 && (count < (unsigned HOST_WIDE_INT) size_needed
18481 || (align_bytes == 0
18482 && count < ((unsigned HOST_WIDE_INT) size_needed
18483 + desired_align - align))))
18484 {
18485 /* It is possible that we copied enough so the main loop will not
18486 execute. */
18487 gcc_assert (size_needed > 1);
18488 if (label == NULL_RTX)
18489 label = gen_label_rtx ();
18490 emit_cmp_and_jump_insns (count_exp,
18491 GEN_INT (size_needed),
18492 LTU, 0, counter_mode (count_exp), 1, label);
18493 if (expected_size == -1
18494 || expected_size < (desired_align - align) / 2 + size_needed)
18495 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18496 else
18497 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18498 }
18499 }
18500 if (label && size_needed == 1)
18501 {
18502 emit_label (label);
18503 LABEL_NUSES (label) = 1;
18504 label = NULL;
18505 promoted_val = val_exp;
18506 epilogue_size_needed = 1;
18507 }
18508 else if (label == NULL_RTX)
18509 epilogue_size_needed = size_needed;
18510
18511 /* Step 3: Main loop. */
18512
18513 switch (alg)
18514 {
18515 case libcall:
18516 case no_stringop:
18517 gcc_unreachable ();
18518 case loop_1_byte:
18519 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18520 count_exp, QImode, 1, expected_size);
18521 break;
18522 case loop:
18523 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18524 count_exp, Pmode, 1, expected_size);
18525 break;
18526 case unrolled_loop:
18527 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18528 count_exp, Pmode, 4, expected_size);
18529 break;
18530 case rep_prefix_8_byte:
18531 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18532 DImode, val_exp);
18533 break;
18534 case rep_prefix_4_byte:
18535 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18536 SImode, val_exp);
18537 break;
18538 case rep_prefix_1_byte:
18539 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18540 QImode, val_exp);
18541 break;
18542 }
18543 /* Adjust properly the offset of src and dest memory for aliasing. */
18544 if (CONST_INT_P (count_exp))
18545 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18546 (count / size_needed) * size_needed);
18547 else
18548 dst = change_address (dst, BLKmode, destreg);
18549
18550 /* Step 4: Epilogue to copy the remaining bytes. */
18551
18552 if (label)
18553 {
18554 /* When the main loop is done, COUNT_EXP might hold original count,
18555 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18556 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18557 bytes. Compensate if needed. */
18558
18559 if (size_needed < epilogue_size_needed)
18560 {
18561 tmp =
18562 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18563 GEN_INT (size_needed - 1), count_exp, 1,
18564 OPTAB_DIRECT);
18565 if (tmp != count_exp)
18566 emit_move_insn (count_exp, tmp);
18567 }
18568 emit_label (label);
18569 LABEL_NUSES (label) = 1;
18570 }
18571 epilogue:
18572 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18573 {
18574 if (force_loopy_epilogue)
18575 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18576 epilogue_size_needed);
18577 else
18578 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18579 epilogue_size_needed);
18580 }
18581 if (jump_around_label)
18582 emit_label (jump_around_label);
18583 return 1;
18584 }
18585
18586 /* Expand the appropriate insns for doing strlen if not just doing
18587 repnz; scasb
18588
18589 out = result, initialized with the start address
18590 align_rtx = alignment of the address.
18591 scratch = scratch register, initialized with the startaddress when
18592 not aligned, otherwise undefined
18593
18594 This is just the body. It needs the initializations mentioned above and
18595 some address computing at the end. These things are done in i386.md. */
18596
18597 static void
18598 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18599 {
18600 int align;
18601 rtx tmp;
18602 rtx align_2_label = NULL_RTX;
18603 rtx align_3_label = NULL_RTX;
18604 rtx align_4_label = gen_label_rtx ();
18605 rtx end_0_label = gen_label_rtx ();
18606 rtx mem;
18607 rtx tmpreg = gen_reg_rtx (SImode);
18608 rtx scratch = gen_reg_rtx (SImode);
18609 rtx cmp;
18610
18611 align = 0;
18612 if (CONST_INT_P (align_rtx))
18613 align = INTVAL (align_rtx);
18614
18615 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18616
18617 /* Is there a known alignment and is it less than 4? */
18618 if (align < 4)
18619 {
18620 rtx scratch1 = gen_reg_rtx (Pmode);
18621 emit_move_insn (scratch1, out);
18622 /* Is there a known alignment and is it not 2? */
18623 if (align != 2)
18624 {
18625 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18626 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18627
18628 /* Leave just the 3 lower bits. */
18629 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18630 NULL_RTX, 0, OPTAB_WIDEN);
18631
18632 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18633 Pmode, 1, align_4_label);
18634 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18635 Pmode, 1, align_2_label);
18636 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18637 Pmode, 1, align_3_label);
18638 }
18639 else
18640 {
18641 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18642 check if is aligned to 4 - byte. */
18643
18644 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18645 NULL_RTX, 0, OPTAB_WIDEN);
18646
18647 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18648 Pmode, 1, align_4_label);
18649 }
18650
18651 mem = change_address (src, QImode, out);
18652
18653 /* Now compare the bytes. */
18654
18655 /* Compare the first n unaligned byte on a byte per byte basis. */
18656 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18657 QImode, 1, end_0_label);
18658
18659 /* Increment the address. */
18660 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18661
18662 /* Not needed with an alignment of 2 */
18663 if (align != 2)
18664 {
18665 emit_label (align_2_label);
18666
18667 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18668 end_0_label);
18669
18670 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18671
18672 emit_label (align_3_label);
18673 }
18674
18675 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18676 end_0_label);
18677
18678 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18679 }
18680
18681 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18682 align this loop. It gives only huge programs, but does not help to
18683 speed up. */
18684 emit_label (align_4_label);
18685
18686 mem = change_address (src, SImode, out);
18687 emit_move_insn (scratch, mem);
18688 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18689
18690 /* This formula yields a nonzero result iff one of the bytes is zero.
18691 This saves three branches inside loop and many cycles. */
18692
18693 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18694 emit_insn (gen_one_cmplsi2 (scratch, scratch));
18695 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18696 emit_insn (gen_andsi3 (tmpreg, tmpreg,
18697 gen_int_mode (0x80808080, SImode)));
18698 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18699 align_4_label);
18700
18701 if (TARGET_CMOVE)
18702 {
18703 rtx reg = gen_reg_rtx (SImode);
18704 rtx reg2 = gen_reg_rtx (Pmode);
18705 emit_move_insn (reg, tmpreg);
18706 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18707
18708 /* If zero is not in the first two bytes, move two bytes forward. */
18709 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18710 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18711 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18712 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18713 gen_rtx_IF_THEN_ELSE (SImode, tmp,
18714 reg,
18715 tmpreg)));
18716 /* Emit lea manually to avoid clobbering of flags. */
18717 emit_insn (gen_rtx_SET (SImode, reg2,
18718 gen_rtx_PLUS (Pmode, out, const2_rtx)));
18719
18720 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18721 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18722 emit_insn (gen_rtx_SET (VOIDmode, out,
18723 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
18724 reg2,
18725 out)));
18726
18727 }
18728 else
18729 {
18730 rtx end_2_label = gen_label_rtx ();
18731 /* Is zero in the first two bytes? */
18732
18733 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18734 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18735 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
18736 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18737 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
18738 pc_rtx);
18739 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18740 JUMP_LABEL (tmp) = end_2_label;
18741
18742 /* Not in the first two. Move two bytes forward. */
18743 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
18744 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
18745
18746 emit_label (end_2_label);
18747
18748 }
18749
18750 /* Avoid branch in fixing the byte. */
18751 tmpreg = gen_lowpart (QImode, tmpreg);
18752 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
18753 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18754 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
18755
18756 emit_label (end_0_label);
18757 }
18758
18759 /* Expand strlen. */
18760
18761 int
18762 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
18763 {
18764 rtx addr, scratch1, scratch2, scratch3, scratch4;
18765
18766 /* The generic case of strlen expander is long. Avoid it's
18767 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
18768
18769 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18770 && !TARGET_INLINE_ALL_STRINGOPS
18771 && !optimize_insn_for_size_p ()
18772 && (!CONST_INT_P (align) || INTVAL (align) < 4))
18773 return 0;
18774
18775 addr = force_reg (Pmode, XEXP (src, 0));
18776 scratch1 = gen_reg_rtx (Pmode);
18777
18778 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18779 && !optimize_insn_for_size_p ())
18780 {
18781 /* Well it seems that some optimizer does not combine a call like
18782 foo(strlen(bar), strlen(bar));
18783 when the move and the subtraction is done here. It does calculate
18784 the length just once when these instructions are done inside of
18785 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
18786 often used and I use one fewer register for the lifetime of
18787 output_strlen_unroll() this is better. */
18788
18789 emit_move_insn (out, addr);
18790
18791 ix86_expand_strlensi_unroll_1 (out, src, align);
18792
18793 /* strlensi_unroll_1 returns the address of the zero at the end of
18794 the string, like memchr(), so compute the length by subtracting
18795 the start address. */
18796 emit_insn ((*ix86_gen_sub3) (out, out, addr));
18797 }
18798 else
18799 {
18800 rtx unspec;
18801
18802 /* Can't use this if the user has appropriated eax, ecx, or edi. */
18803 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
18804 return false;
18805
18806 scratch2 = gen_reg_rtx (Pmode);
18807 scratch3 = gen_reg_rtx (Pmode);
18808 scratch4 = force_reg (Pmode, constm1_rtx);
18809
18810 emit_move_insn (scratch3, addr);
18811 eoschar = force_reg (QImode, eoschar);
18812
18813 src = replace_equiv_address_nv (src, scratch3);
18814
18815 /* If .md starts supporting :P, this can be done in .md. */
18816 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
18817 scratch4), UNSPEC_SCAS);
18818 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
18819 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
18820 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
18821 }
18822 return 1;
18823 }
18824
18825 /* For given symbol (function) construct code to compute address of it's PLT
18826 entry in large x86-64 PIC model. */
18827 rtx
18828 construct_plt_address (rtx symbol)
18829 {
18830 rtx tmp = gen_reg_rtx (Pmode);
18831 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
18832
18833 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
18834 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
18835
18836 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
18837 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
18838 return tmp;
18839 }
18840
18841 void
18842 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
18843 rtx callarg2,
18844 rtx pop, int sibcall)
18845 {
18846 rtx use = NULL, call;
18847
18848 if (pop == const0_rtx)
18849 pop = NULL;
18850 gcc_assert (!TARGET_64BIT || !pop);
18851
18852 if (TARGET_MACHO && !TARGET_64BIT)
18853 {
18854 #if TARGET_MACHO
18855 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
18856 fnaddr = machopic_indirect_call_target (fnaddr);
18857 #endif
18858 }
18859 else
18860 {
18861 /* Static functions and indirect calls don't need the pic register. */
18862 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
18863 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18864 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
18865 use_reg (&use, pic_offset_table_rtx);
18866 }
18867
18868 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
18869 {
18870 rtx al = gen_rtx_REG (QImode, AX_REG);
18871 emit_move_insn (al, callarg2);
18872 use_reg (&use, al);
18873 }
18874
18875 if (ix86_cmodel == CM_LARGE_PIC
18876 && GET_CODE (fnaddr) == MEM
18877 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18878 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
18879 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
18880 else if (sibcall
18881 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
18882 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
18883 {
18884 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18885 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18886 }
18887
18888 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
18889 if (retval)
18890 call = gen_rtx_SET (VOIDmode, retval, call);
18891 if (pop)
18892 {
18893 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
18894 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
18895 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
18896 }
18897 if (TARGET_64BIT
18898 && ix86_cfun_abi () == MS_ABI
18899 && (!callarg2 || INTVAL (callarg2) != -2))
18900 {
18901 /* We need to represent that SI and DI registers are clobbered
18902 by SYSV calls. */
18903 static int clobbered_registers[] = {
18904 XMM6_REG, XMM7_REG, XMM8_REG,
18905 XMM9_REG, XMM10_REG, XMM11_REG,
18906 XMM12_REG, XMM13_REG, XMM14_REG,
18907 XMM15_REG, SI_REG, DI_REG
18908 };
18909 unsigned int i;
18910 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
18911 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
18912 UNSPEC_MS_TO_SYSV_CALL);
18913
18914 vec[0] = call;
18915 vec[1] = unspec;
18916 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
18917 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
18918 ? TImode : DImode,
18919 gen_rtx_REG
18920 (SSE_REGNO_P (clobbered_registers[i])
18921 ? TImode : DImode,
18922 clobbered_registers[i]));
18923
18924 call = gen_rtx_PARALLEL (VOIDmode,
18925 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
18926 + 2, vec));
18927 }
18928
18929 call = emit_call_insn (call);
18930 if (use)
18931 CALL_INSN_FUNCTION_USAGE (call) = use;
18932 }
18933
18934
18935 /* Clear stack slot assignments remembered from previous functions.
18936 This is called from INIT_EXPANDERS once before RTL is emitted for each
18937 function. */
18938
18939 static struct machine_function *
18940 ix86_init_machine_status (void)
18941 {
18942 struct machine_function *f;
18943
18944 f = GGC_CNEW (struct machine_function);
18945 f->use_fast_prologue_epilogue_nregs = -1;
18946 f->tls_descriptor_call_expanded_p = 0;
18947 f->call_abi = DEFAULT_ABI;
18948
18949 return f;
18950 }
18951
18952 /* Return a MEM corresponding to a stack slot with mode MODE.
18953 Allocate a new slot if necessary.
18954
18955 The RTL for a function can have several slots available: N is
18956 which slot to use. */
18957
18958 rtx
18959 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
18960 {
18961 struct stack_local_entry *s;
18962
18963 gcc_assert (n < MAX_386_STACK_LOCALS);
18964
18965 /* Virtual slot is valid only before vregs are instantiated. */
18966 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
18967
18968 for (s = ix86_stack_locals; s; s = s->next)
18969 if (s->mode == mode && s->n == n)
18970 return copy_rtx (s->rtl);
18971
18972 s = (struct stack_local_entry *)
18973 ggc_alloc (sizeof (struct stack_local_entry));
18974 s->n = n;
18975 s->mode = mode;
18976 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
18977
18978 s->next = ix86_stack_locals;
18979 ix86_stack_locals = s;
18980 return s->rtl;
18981 }
18982
18983 /* Construct the SYMBOL_REF for the tls_get_addr function. */
18984
18985 static GTY(()) rtx ix86_tls_symbol;
18986 rtx
18987 ix86_tls_get_addr (void)
18988 {
18989
18990 if (!ix86_tls_symbol)
18991 {
18992 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
18993 (TARGET_ANY_GNU_TLS
18994 && !TARGET_64BIT)
18995 ? "___tls_get_addr"
18996 : "__tls_get_addr");
18997 }
18998
18999 return ix86_tls_symbol;
19000 }
19001
19002 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19003
19004 static GTY(()) rtx ix86_tls_module_base_symbol;
19005 rtx
19006 ix86_tls_module_base (void)
19007 {
19008
19009 if (!ix86_tls_module_base_symbol)
19010 {
19011 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19012 "_TLS_MODULE_BASE_");
19013 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19014 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19015 }
19016
19017 return ix86_tls_module_base_symbol;
19018 }
19019
19020 /* Calculate the length of the memory address in the instruction
19021 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19022
19023 int
19024 memory_address_length (rtx addr)
19025 {
19026 struct ix86_address parts;
19027 rtx base, index, disp;
19028 int len;
19029 int ok;
19030
19031 if (GET_CODE (addr) == PRE_DEC
19032 || GET_CODE (addr) == POST_INC
19033 || GET_CODE (addr) == PRE_MODIFY
19034 || GET_CODE (addr) == POST_MODIFY)
19035 return 0;
19036
19037 ok = ix86_decompose_address (addr, &parts);
19038 gcc_assert (ok);
19039
19040 if (parts.base && GET_CODE (parts.base) == SUBREG)
19041 parts.base = SUBREG_REG (parts.base);
19042 if (parts.index && GET_CODE (parts.index) == SUBREG)
19043 parts.index = SUBREG_REG (parts.index);
19044
19045 base = parts.base;
19046 index = parts.index;
19047 disp = parts.disp;
19048 len = 0;
19049
19050 /* Rule of thumb:
19051 - esp as the base always wants an index,
19052 - ebp as the base always wants a displacement. */
19053
19054 /* Register Indirect. */
19055 if (base && !index && !disp)
19056 {
19057 /* esp (for its index) and ebp (for its displacement) need
19058 the two-byte modrm form. */
19059 if (addr == stack_pointer_rtx
19060 || addr == arg_pointer_rtx
19061 || addr == frame_pointer_rtx
19062 || addr == hard_frame_pointer_rtx)
19063 len = 1;
19064 }
19065
19066 /* Direct Addressing. */
19067 else if (disp && !base && !index)
19068 len = 4;
19069
19070 else
19071 {
19072 /* Find the length of the displacement constant. */
19073 if (disp)
19074 {
19075 if (base && satisfies_constraint_K (disp))
19076 len = 1;
19077 else
19078 len = 4;
19079 }
19080 /* ebp always wants a displacement. */
19081 else if (base == hard_frame_pointer_rtx)
19082 len = 1;
19083
19084 /* An index requires the two-byte modrm form.... */
19085 if (index
19086 /* ...like esp, which always wants an index. */
19087 || base == stack_pointer_rtx
19088 || base == arg_pointer_rtx
19089 || base == frame_pointer_rtx)
19090 len += 1;
19091 }
19092
19093 return len;
19094 }
19095
19096 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19097 is set, expect that insn have 8bit immediate alternative. */
19098 int
19099 ix86_attr_length_immediate_default (rtx insn, int shortform)
19100 {
19101 int len = 0;
19102 int i;
19103 extract_insn_cached (insn);
19104 for (i = recog_data.n_operands - 1; i >= 0; --i)
19105 if (CONSTANT_P (recog_data.operand[i]))
19106 {
19107 gcc_assert (!len);
19108 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
19109 len = 1;
19110 else
19111 {
19112 switch (get_attr_mode (insn))
19113 {
19114 case MODE_QI:
19115 len+=1;
19116 break;
19117 case MODE_HI:
19118 len+=2;
19119 break;
19120 case MODE_SI:
19121 len+=4;
19122 break;
19123 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19124 case MODE_DI:
19125 len+=4;
19126 break;
19127 default:
19128 fatal_insn ("unknown insn mode", insn);
19129 }
19130 }
19131 }
19132 return len;
19133 }
19134 /* Compute default value for "length_address" attribute. */
19135 int
19136 ix86_attr_length_address_default (rtx insn)
19137 {
19138 int i;
19139
19140 if (get_attr_type (insn) == TYPE_LEA)
19141 {
19142 rtx set = PATTERN (insn);
19143
19144 if (GET_CODE (set) == PARALLEL)
19145 set = XVECEXP (set, 0, 0);
19146
19147 gcc_assert (GET_CODE (set) == SET);
19148
19149 return memory_address_length (SET_SRC (set));
19150 }
19151
19152 extract_insn_cached (insn);
19153 for (i = recog_data.n_operands - 1; i >= 0; --i)
19154 if (MEM_P (recog_data.operand[i]))
19155 {
19156 return memory_address_length (XEXP (recog_data.operand[i], 0));
19157 break;
19158 }
19159 return 0;
19160 }
19161
19162 /* Compute default value for "length_vex" attribute. It includes
19163 2 or 3 byte VEX prefix and 1 opcode byte. */
19164
19165 int
19166 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19167 int has_vex_w)
19168 {
19169 int i;
19170
19171 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19172 byte VEX prefix. */
19173 if (!has_0f_opcode || has_vex_w)
19174 return 3 + 1;
19175
19176 /* We can always use 2 byte VEX prefix in 32bit. */
19177 if (!TARGET_64BIT)
19178 return 2 + 1;
19179
19180 extract_insn_cached (insn);
19181
19182 for (i = recog_data.n_operands - 1; i >= 0; --i)
19183 if (REG_P (recog_data.operand[i]))
19184 {
19185 /* REX.W bit uses 3 byte VEX prefix. */
19186 if (GET_MODE (recog_data.operand[i]) == DImode)
19187 return 3 + 1;
19188 }
19189 else
19190 {
19191 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19192 if (MEM_P (recog_data.operand[i])
19193 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19194 return 3 + 1;
19195 }
19196
19197 return 2 + 1;
19198 }
19199
19200 /* Return the maximum number of instructions a cpu can issue. */
19201
19202 static int
19203 ix86_issue_rate (void)
19204 {
19205 switch (ix86_tune)
19206 {
19207 case PROCESSOR_PENTIUM:
19208 case PROCESSOR_K6:
19209 return 2;
19210
19211 case PROCESSOR_PENTIUMPRO:
19212 case PROCESSOR_PENTIUM4:
19213 case PROCESSOR_ATHLON:
19214 case PROCESSOR_K8:
19215 case PROCESSOR_AMDFAM10:
19216 case PROCESSOR_NOCONA:
19217 case PROCESSOR_GENERIC32:
19218 case PROCESSOR_GENERIC64:
19219 return 3;
19220
19221 case PROCESSOR_CORE2:
19222 return 4;
19223
19224 default:
19225 return 1;
19226 }
19227 }
19228
19229 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19230 by DEP_INSN and nothing set by DEP_INSN. */
19231
19232 static int
19233 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19234 {
19235 rtx set, set2;
19236
19237 /* Simplify the test for uninteresting insns. */
19238 if (insn_type != TYPE_SETCC
19239 && insn_type != TYPE_ICMOV
19240 && insn_type != TYPE_FCMOV
19241 && insn_type != TYPE_IBR)
19242 return 0;
19243
19244 if ((set = single_set (dep_insn)) != 0)
19245 {
19246 set = SET_DEST (set);
19247 set2 = NULL_RTX;
19248 }
19249 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19250 && XVECLEN (PATTERN (dep_insn), 0) == 2
19251 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19252 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19253 {
19254 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19255 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19256 }
19257 else
19258 return 0;
19259
19260 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19261 return 0;
19262
19263 /* This test is true if the dependent insn reads the flags but
19264 not any other potentially set register. */
19265 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19266 return 0;
19267
19268 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19269 return 0;
19270
19271 return 1;
19272 }
19273
19274 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
19275 address with operands set by DEP_INSN. */
19276
19277 static int
19278 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19279 {
19280 rtx addr;
19281
19282 if (insn_type == TYPE_LEA
19283 && TARGET_PENTIUM)
19284 {
19285 addr = PATTERN (insn);
19286
19287 if (GET_CODE (addr) == PARALLEL)
19288 addr = XVECEXP (addr, 0, 0);
19289
19290 gcc_assert (GET_CODE (addr) == SET);
19291
19292 addr = SET_SRC (addr);
19293 }
19294 else
19295 {
19296 int i;
19297 extract_insn_cached (insn);
19298 for (i = recog_data.n_operands - 1; i >= 0; --i)
19299 if (MEM_P (recog_data.operand[i]))
19300 {
19301 addr = XEXP (recog_data.operand[i], 0);
19302 goto found;
19303 }
19304 return 0;
19305 found:;
19306 }
19307
19308 return modified_in_p (addr, dep_insn);
19309 }
19310
19311 static int
19312 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19313 {
19314 enum attr_type insn_type, dep_insn_type;
19315 enum attr_memory memory;
19316 rtx set, set2;
19317 int dep_insn_code_number;
19318
19319 /* Anti and output dependencies have zero cost on all CPUs. */
19320 if (REG_NOTE_KIND (link) != 0)
19321 return 0;
19322
19323 dep_insn_code_number = recog_memoized (dep_insn);
19324
19325 /* If we can't recognize the insns, we can't really do anything. */
19326 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19327 return cost;
19328
19329 insn_type = get_attr_type (insn);
19330 dep_insn_type = get_attr_type (dep_insn);
19331
19332 switch (ix86_tune)
19333 {
19334 case PROCESSOR_PENTIUM:
19335 /* Address Generation Interlock adds a cycle of latency. */
19336 if (ix86_agi_dependent (insn, dep_insn, insn_type))
19337 cost += 1;
19338
19339 /* ??? Compares pair with jump/setcc. */
19340 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19341 cost = 0;
19342
19343 /* Floating point stores require value to be ready one cycle earlier. */
19344 if (insn_type == TYPE_FMOV
19345 && get_attr_memory (insn) == MEMORY_STORE
19346 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19347 cost += 1;
19348 break;
19349
19350 case PROCESSOR_PENTIUMPRO:
19351 memory = get_attr_memory (insn);
19352
19353 /* INT->FP conversion is expensive. */
19354 if (get_attr_fp_int_src (dep_insn))
19355 cost += 5;
19356
19357 /* There is one cycle extra latency between an FP op and a store. */
19358 if (insn_type == TYPE_FMOV
19359 && (set = single_set (dep_insn)) != NULL_RTX
19360 && (set2 = single_set (insn)) != NULL_RTX
19361 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19362 && MEM_P (SET_DEST (set2)))
19363 cost += 1;
19364
19365 /* Show ability of reorder buffer to hide latency of load by executing
19366 in parallel with previous instruction in case
19367 previous instruction is not needed to compute the address. */
19368 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19369 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19370 {
19371 /* Claim moves to take one cycle, as core can issue one load
19372 at time and the next load can start cycle later. */
19373 if (dep_insn_type == TYPE_IMOV
19374 || dep_insn_type == TYPE_FMOV)
19375 cost = 1;
19376 else if (cost > 1)
19377 cost--;
19378 }
19379 break;
19380
19381 case PROCESSOR_K6:
19382 memory = get_attr_memory (insn);
19383
19384 /* The esp dependency is resolved before the instruction is really
19385 finished. */
19386 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19387 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19388 return 1;
19389
19390 /* INT->FP conversion is expensive. */
19391 if (get_attr_fp_int_src (dep_insn))
19392 cost += 5;
19393
19394 /* Show ability of reorder buffer to hide latency of load by executing
19395 in parallel with previous instruction in case
19396 previous instruction is not needed to compute the address. */
19397 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19398 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19399 {
19400 /* Claim moves to take one cycle, as core can issue one load
19401 at time and the next load can start cycle later. */
19402 if (dep_insn_type == TYPE_IMOV
19403 || dep_insn_type == TYPE_FMOV)
19404 cost = 1;
19405 else if (cost > 2)
19406 cost -= 2;
19407 else
19408 cost = 1;
19409 }
19410 break;
19411
19412 case PROCESSOR_ATHLON:
19413 case PROCESSOR_K8:
19414 case PROCESSOR_AMDFAM10:
19415 case PROCESSOR_GENERIC32:
19416 case PROCESSOR_GENERIC64:
19417 memory = get_attr_memory (insn);
19418
19419 /* Show ability of reorder buffer to hide latency of load by executing
19420 in parallel with previous instruction in case
19421 previous instruction is not needed to compute the address. */
19422 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19423 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19424 {
19425 enum attr_unit unit = get_attr_unit (insn);
19426 int loadcost = 3;
19427
19428 /* Because of the difference between the length of integer and
19429 floating unit pipeline preparation stages, the memory operands
19430 for floating point are cheaper.
19431
19432 ??? For Athlon it the difference is most probably 2. */
19433 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19434 loadcost = 3;
19435 else
19436 loadcost = TARGET_ATHLON ? 2 : 0;
19437
19438 if (cost >= loadcost)
19439 cost -= loadcost;
19440 else
19441 cost = 0;
19442 }
19443
19444 default:
19445 break;
19446 }
19447
19448 return cost;
19449 }
19450
19451 /* How many alternative schedules to try. This should be as wide as the
19452 scheduling freedom in the DFA, but no wider. Making this value too
19453 large results extra work for the scheduler. */
19454
19455 static int
19456 ia32_multipass_dfa_lookahead (void)
19457 {
19458 switch (ix86_tune)
19459 {
19460 case PROCESSOR_PENTIUM:
19461 return 2;
19462
19463 case PROCESSOR_PENTIUMPRO:
19464 case PROCESSOR_K6:
19465 return 1;
19466
19467 default:
19468 return 0;
19469 }
19470 }
19471
19472
19473 /* Compute the alignment given to a constant that is being placed in memory.
19474 EXP is the constant and ALIGN is the alignment that the object would
19475 ordinarily have.
19476 The value of this function is used instead of that alignment to align
19477 the object. */
19478
19479 int
19480 ix86_constant_alignment (tree exp, int align)
19481 {
19482 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19483 || TREE_CODE (exp) == INTEGER_CST)
19484 {
19485 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19486 return 64;
19487 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19488 return 128;
19489 }
19490 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19491 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19492 return BITS_PER_WORD;
19493
19494 return align;
19495 }
19496
19497 /* Compute the alignment for a static variable.
19498 TYPE is the data type, and ALIGN is the alignment that
19499 the object would ordinarily have. The value of this function is used
19500 instead of that alignment to align the object. */
19501
19502 int
19503 ix86_data_alignment (tree type, int align)
19504 {
19505 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19506
19507 if (AGGREGATE_TYPE_P (type)
19508 && TYPE_SIZE (type)
19509 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19510 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19511 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19512 && align < max_align)
19513 align = max_align;
19514
19515 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19516 to 16byte boundary. */
19517 if (TARGET_64BIT)
19518 {
19519 if (AGGREGATE_TYPE_P (type)
19520 && TYPE_SIZE (type)
19521 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19522 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19523 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19524 return 128;
19525 }
19526
19527 if (TREE_CODE (type) == ARRAY_TYPE)
19528 {
19529 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19530 return 64;
19531 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19532 return 128;
19533 }
19534 else if (TREE_CODE (type) == COMPLEX_TYPE)
19535 {
19536
19537 if (TYPE_MODE (type) == DCmode && align < 64)
19538 return 64;
19539 if ((TYPE_MODE (type) == XCmode
19540 || TYPE_MODE (type) == TCmode) && align < 128)
19541 return 128;
19542 }
19543 else if ((TREE_CODE (type) == RECORD_TYPE
19544 || TREE_CODE (type) == UNION_TYPE
19545 || TREE_CODE (type) == QUAL_UNION_TYPE)
19546 && TYPE_FIELDS (type))
19547 {
19548 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19549 return 64;
19550 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19551 return 128;
19552 }
19553 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19554 || TREE_CODE (type) == INTEGER_TYPE)
19555 {
19556 if (TYPE_MODE (type) == DFmode && align < 64)
19557 return 64;
19558 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19559 return 128;
19560 }
19561
19562 return align;
19563 }
19564
19565 /* Compute the alignment for a local variable or a stack slot. EXP is
19566 the data type or decl itself, MODE is the widest mode available and
19567 ALIGN is the alignment that the object would ordinarily have. The
19568 value of this macro is used instead of that alignment to align the
19569 object. */
19570
19571 unsigned int
19572 ix86_local_alignment (tree exp, enum machine_mode mode,
19573 unsigned int align)
19574 {
19575 tree type, decl;
19576
19577 if (exp && DECL_P (exp))
19578 {
19579 type = TREE_TYPE (exp);
19580 decl = exp;
19581 }
19582 else
19583 {
19584 type = exp;
19585 decl = NULL;
19586 }
19587
19588 /* Don't do dynamic stack realignment for long long objects with
19589 -mpreferred-stack-boundary=2. */
19590 if (!TARGET_64BIT
19591 && align == 64
19592 && ix86_preferred_stack_boundary < 64
19593 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
19594 && (!type || !TYPE_USER_ALIGN (type))
19595 && (!decl || !DECL_USER_ALIGN (decl)))
19596 align = 32;
19597
19598 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19599 register in MODE. We will return the largest alignment of XF
19600 and DF. */
19601 if (!type)
19602 {
19603 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19604 align = GET_MODE_ALIGNMENT (DFmode);
19605 return align;
19606 }
19607
19608 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19609 to 16byte boundary. */
19610 if (TARGET_64BIT)
19611 {
19612 if (AGGREGATE_TYPE_P (type)
19613 && TYPE_SIZE (type)
19614 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19615 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19616 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19617 return 128;
19618 }
19619 if (TREE_CODE (type) == ARRAY_TYPE)
19620 {
19621 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19622 return 64;
19623 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19624 return 128;
19625 }
19626 else if (TREE_CODE (type) == COMPLEX_TYPE)
19627 {
19628 if (TYPE_MODE (type) == DCmode && align < 64)
19629 return 64;
19630 if ((TYPE_MODE (type) == XCmode
19631 || TYPE_MODE (type) == TCmode) && align < 128)
19632 return 128;
19633 }
19634 else if ((TREE_CODE (type) == RECORD_TYPE
19635 || TREE_CODE (type) == UNION_TYPE
19636 || TREE_CODE (type) == QUAL_UNION_TYPE)
19637 && TYPE_FIELDS (type))
19638 {
19639 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19640 return 64;
19641 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19642 return 128;
19643 }
19644 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19645 || TREE_CODE (type) == INTEGER_TYPE)
19646 {
19647
19648 if (TYPE_MODE (type) == DFmode && align < 64)
19649 return 64;
19650 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19651 return 128;
19652 }
19653 return align;
19654 }
19655
19656 /* Compute the minimum required alignment for dynamic stack realignment
19657 purposes for a local variable, parameter or a stack slot. EXP is
19658 the data type or decl itself, MODE is its mode and ALIGN is the
19659 alignment that the object would ordinarily have. */
19660
19661 unsigned int
19662 ix86_minimum_alignment (tree exp, enum machine_mode mode,
19663 unsigned int align)
19664 {
19665 tree type, decl;
19666
19667 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
19668 return align;
19669
19670 if (exp && DECL_P (exp))
19671 {
19672 type = TREE_TYPE (exp);
19673 decl = exp;
19674 }
19675 else
19676 {
19677 type = exp;
19678 decl = NULL;
19679 }
19680
19681 /* Don't do dynamic stack realignment for long long objects with
19682 -mpreferred-stack-boundary=2. */
19683 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
19684 && (!type || !TYPE_USER_ALIGN (type))
19685 && (!decl || !DECL_USER_ALIGN (decl)))
19686 return 32;
19687
19688 return align;
19689 }
19690
19691 /* Emit RTL insns to initialize the variable parts of a trampoline.
19692 FNADDR is an RTX for the address of the function's pure code.
19693 CXT is an RTX for the static chain value for the function. */
19694 void
19695 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
19696 {
19697 if (!TARGET_64BIT)
19698 {
19699 /* Compute offset from the end of the jmp to the target function. */
19700 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
19701 plus_constant (tramp, 10),
19702 NULL_RTX, 1, OPTAB_DIRECT);
19703 emit_move_insn (gen_rtx_MEM (QImode, tramp),
19704 gen_int_mode (0xb9, QImode));
19705 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
19706 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
19707 gen_int_mode (0xe9, QImode));
19708 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
19709 }
19710 else
19711 {
19712 int offset = 0;
19713 /* Try to load address using shorter movl instead of movabs.
19714 We may want to support movq for kernel mode, but kernel does not use
19715 trampolines at the moment. */
19716 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
19717 {
19718 fnaddr = copy_to_mode_reg (DImode, fnaddr);
19719 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19720 gen_int_mode (0xbb41, HImode));
19721 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
19722 gen_lowpart (SImode, fnaddr));
19723 offset += 6;
19724 }
19725 else
19726 {
19727 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19728 gen_int_mode (0xbb49, HImode));
19729 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19730 fnaddr);
19731 offset += 10;
19732 }
19733 /* Load static chain using movabs to r10. */
19734 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19735 gen_int_mode (0xba49, HImode));
19736 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19737 cxt);
19738 offset += 10;
19739 /* Jump to the r11 */
19740 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19741 gen_int_mode (0xff49, HImode));
19742 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
19743 gen_int_mode (0xe3, QImode));
19744 offset += 3;
19745 gcc_assert (offset <= TRAMPOLINE_SIZE);
19746 }
19747
19748 #ifdef ENABLE_EXECUTE_STACK
19749 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19750 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
19751 #endif
19752 }
19753
19754 /* Codes for all the SSE/MMX builtins. */
19755 enum ix86_builtins
19756 {
19757 IX86_BUILTIN_ADDPS,
19758 IX86_BUILTIN_ADDSS,
19759 IX86_BUILTIN_DIVPS,
19760 IX86_BUILTIN_DIVSS,
19761 IX86_BUILTIN_MULPS,
19762 IX86_BUILTIN_MULSS,
19763 IX86_BUILTIN_SUBPS,
19764 IX86_BUILTIN_SUBSS,
19765
19766 IX86_BUILTIN_CMPEQPS,
19767 IX86_BUILTIN_CMPLTPS,
19768 IX86_BUILTIN_CMPLEPS,
19769 IX86_BUILTIN_CMPGTPS,
19770 IX86_BUILTIN_CMPGEPS,
19771 IX86_BUILTIN_CMPNEQPS,
19772 IX86_BUILTIN_CMPNLTPS,
19773 IX86_BUILTIN_CMPNLEPS,
19774 IX86_BUILTIN_CMPNGTPS,
19775 IX86_BUILTIN_CMPNGEPS,
19776 IX86_BUILTIN_CMPORDPS,
19777 IX86_BUILTIN_CMPUNORDPS,
19778 IX86_BUILTIN_CMPEQSS,
19779 IX86_BUILTIN_CMPLTSS,
19780 IX86_BUILTIN_CMPLESS,
19781 IX86_BUILTIN_CMPNEQSS,
19782 IX86_BUILTIN_CMPNLTSS,
19783 IX86_BUILTIN_CMPNLESS,
19784 IX86_BUILTIN_CMPNGTSS,
19785 IX86_BUILTIN_CMPNGESS,
19786 IX86_BUILTIN_CMPORDSS,
19787 IX86_BUILTIN_CMPUNORDSS,
19788
19789 IX86_BUILTIN_COMIEQSS,
19790 IX86_BUILTIN_COMILTSS,
19791 IX86_BUILTIN_COMILESS,
19792 IX86_BUILTIN_COMIGTSS,
19793 IX86_BUILTIN_COMIGESS,
19794 IX86_BUILTIN_COMINEQSS,
19795 IX86_BUILTIN_UCOMIEQSS,
19796 IX86_BUILTIN_UCOMILTSS,
19797 IX86_BUILTIN_UCOMILESS,
19798 IX86_BUILTIN_UCOMIGTSS,
19799 IX86_BUILTIN_UCOMIGESS,
19800 IX86_BUILTIN_UCOMINEQSS,
19801
19802 IX86_BUILTIN_CVTPI2PS,
19803 IX86_BUILTIN_CVTPS2PI,
19804 IX86_BUILTIN_CVTSI2SS,
19805 IX86_BUILTIN_CVTSI642SS,
19806 IX86_BUILTIN_CVTSS2SI,
19807 IX86_BUILTIN_CVTSS2SI64,
19808 IX86_BUILTIN_CVTTPS2PI,
19809 IX86_BUILTIN_CVTTSS2SI,
19810 IX86_BUILTIN_CVTTSS2SI64,
19811
19812 IX86_BUILTIN_MAXPS,
19813 IX86_BUILTIN_MAXSS,
19814 IX86_BUILTIN_MINPS,
19815 IX86_BUILTIN_MINSS,
19816
19817 IX86_BUILTIN_LOADUPS,
19818 IX86_BUILTIN_STOREUPS,
19819 IX86_BUILTIN_MOVSS,
19820
19821 IX86_BUILTIN_MOVHLPS,
19822 IX86_BUILTIN_MOVLHPS,
19823 IX86_BUILTIN_LOADHPS,
19824 IX86_BUILTIN_LOADLPS,
19825 IX86_BUILTIN_STOREHPS,
19826 IX86_BUILTIN_STORELPS,
19827
19828 IX86_BUILTIN_MASKMOVQ,
19829 IX86_BUILTIN_MOVMSKPS,
19830 IX86_BUILTIN_PMOVMSKB,
19831
19832 IX86_BUILTIN_MOVNTPS,
19833 IX86_BUILTIN_MOVNTQ,
19834
19835 IX86_BUILTIN_LOADDQU,
19836 IX86_BUILTIN_STOREDQU,
19837
19838 IX86_BUILTIN_PACKSSWB,
19839 IX86_BUILTIN_PACKSSDW,
19840 IX86_BUILTIN_PACKUSWB,
19841
19842 IX86_BUILTIN_PADDB,
19843 IX86_BUILTIN_PADDW,
19844 IX86_BUILTIN_PADDD,
19845 IX86_BUILTIN_PADDQ,
19846 IX86_BUILTIN_PADDSB,
19847 IX86_BUILTIN_PADDSW,
19848 IX86_BUILTIN_PADDUSB,
19849 IX86_BUILTIN_PADDUSW,
19850 IX86_BUILTIN_PSUBB,
19851 IX86_BUILTIN_PSUBW,
19852 IX86_BUILTIN_PSUBD,
19853 IX86_BUILTIN_PSUBQ,
19854 IX86_BUILTIN_PSUBSB,
19855 IX86_BUILTIN_PSUBSW,
19856 IX86_BUILTIN_PSUBUSB,
19857 IX86_BUILTIN_PSUBUSW,
19858
19859 IX86_BUILTIN_PAND,
19860 IX86_BUILTIN_PANDN,
19861 IX86_BUILTIN_POR,
19862 IX86_BUILTIN_PXOR,
19863
19864 IX86_BUILTIN_PAVGB,
19865 IX86_BUILTIN_PAVGW,
19866
19867 IX86_BUILTIN_PCMPEQB,
19868 IX86_BUILTIN_PCMPEQW,
19869 IX86_BUILTIN_PCMPEQD,
19870 IX86_BUILTIN_PCMPGTB,
19871 IX86_BUILTIN_PCMPGTW,
19872 IX86_BUILTIN_PCMPGTD,
19873
19874 IX86_BUILTIN_PMADDWD,
19875
19876 IX86_BUILTIN_PMAXSW,
19877 IX86_BUILTIN_PMAXUB,
19878 IX86_BUILTIN_PMINSW,
19879 IX86_BUILTIN_PMINUB,
19880
19881 IX86_BUILTIN_PMULHUW,
19882 IX86_BUILTIN_PMULHW,
19883 IX86_BUILTIN_PMULLW,
19884
19885 IX86_BUILTIN_PSADBW,
19886 IX86_BUILTIN_PSHUFW,
19887
19888 IX86_BUILTIN_PSLLW,
19889 IX86_BUILTIN_PSLLD,
19890 IX86_BUILTIN_PSLLQ,
19891 IX86_BUILTIN_PSRAW,
19892 IX86_BUILTIN_PSRAD,
19893 IX86_BUILTIN_PSRLW,
19894 IX86_BUILTIN_PSRLD,
19895 IX86_BUILTIN_PSRLQ,
19896 IX86_BUILTIN_PSLLWI,
19897 IX86_BUILTIN_PSLLDI,
19898 IX86_BUILTIN_PSLLQI,
19899 IX86_BUILTIN_PSRAWI,
19900 IX86_BUILTIN_PSRADI,
19901 IX86_BUILTIN_PSRLWI,
19902 IX86_BUILTIN_PSRLDI,
19903 IX86_BUILTIN_PSRLQI,
19904
19905 IX86_BUILTIN_PUNPCKHBW,
19906 IX86_BUILTIN_PUNPCKHWD,
19907 IX86_BUILTIN_PUNPCKHDQ,
19908 IX86_BUILTIN_PUNPCKLBW,
19909 IX86_BUILTIN_PUNPCKLWD,
19910 IX86_BUILTIN_PUNPCKLDQ,
19911
19912 IX86_BUILTIN_SHUFPS,
19913
19914 IX86_BUILTIN_RCPPS,
19915 IX86_BUILTIN_RCPSS,
19916 IX86_BUILTIN_RSQRTPS,
19917 IX86_BUILTIN_RSQRTPS_NR,
19918 IX86_BUILTIN_RSQRTSS,
19919 IX86_BUILTIN_RSQRTF,
19920 IX86_BUILTIN_SQRTPS,
19921 IX86_BUILTIN_SQRTPS_NR,
19922 IX86_BUILTIN_SQRTSS,
19923
19924 IX86_BUILTIN_UNPCKHPS,
19925 IX86_BUILTIN_UNPCKLPS,
19926
19927 IX86_BUILTIN_ANDPS,
19928 IX86_BUILTIN_ANDNPS,
19929 IX86_BUILTIN_ORPS,
19930 IX86_BUILTIN_XORPS,
19931
19932 IX86_BUILTIN_EMMS,
19933 IX86_BUILTIN_LDMXCSR,
19934 IX86_BUILTIN_STMXCSR,
19935 IX86_BUILTIN_SFENCE,
19936
19937 /* 3DNow! Original */
19938 IX86_BUILTIN_FEMMS,
19939 IX86_BUILTIN_PAVGUSB,
19940 IX86_BUILTIN_PF2ID,
19941 IX86_BUILTIN_PFACC,
19942 IX86_BUILTIN_PFADD,
19943 IX86_BUILTIN_PFCMPEQ,
19944 IX86_BUILTIN_PFCMPGE,
19945 IX86_BUILTIN_PFCMPGT,
19946 IX86_BUILTIN_PFMAX,
19947 IX86_BUILTIN_PFMIN,
19948 IX86_BUILTIN_PFMUL,
19949 IX86_BUILTIN_PFRCP,
19950 IX86_BUILTIN_PFRCPIT1,
19951 IX86_BUILTIN_PFRCPIT2,
19952 IX86_BUILTIN_PFRSQIT1,
19953 IX86_BUILTIN_PFRSQRT,
19954 IX86_BUILTIN_PFSUB,
19955 IX86_BUILTIN_PFSUBR,
19956 IX86_BUILTIN_PI2FD,
19957 IX86_BUILTIN_PMULHRW,
19958
19959 /* 3DNow! Athlon Extensions */
19960 IX86_BUILTIN_PF2IW,
19961 IX86_BUILTIN_PFNACC,
19962 IX86_BUILTIN_PFPNACC,
19963 IX86_BUILTIN_PI2FW,
19964 IX86_BUILTIN_PSWAPDSI,
19965 IX86_BUILTIN_PSWAPDSF,
19966
19967 /* SSE2 */
19968 IX86_BUILTIN_ADDPD,
19969 IX86_BUILTIN_ADDSD,
19970 IX86_BUILTIN_DIVPD,
19971 IX86_BUILTIN_DIVSD,
19972 IX86_BUILTIN_MULPD,
19973 IX86_BUILTIN_MULSD,
19974 IX86_BUILTIN_SUBPD,
19975 IX86_BUILTIN_SUBSD,
19976
19977 IX86_BUILTIN_CMPEQPD,
19978 IX86_BUILTIN_CMPLTPD,
19979 IX86_BUILTIN_CMPLEPD,
19980 IX86_BUILTIN_CMPGTPD,
19981 IX86_BUILTIN_CMPGEPD,
19982 IX86_BUILTIN_CMPNEQPD,
19983 IX86_BUILTIN_CMPNLTPD,
19984 IX86_BUILTIN_CMPNLEPD,
19985 IX86_BUILTIN_CMPNGTPD,
19986 IX86_BUILTIN_CMPNGEPD,
19987 IX86_BUILTIN_CMPORDPD,
19988 IX86_BUILTIN_CMPUNORDPD,
19989 IX86_BUILTIN_CMPEQSD,
19990 IX86_BUILTIN_CMPLTSD,
19991 IX86_BUILTIN_CMPLESD,
19992 IX86_BUILTIN_CMPNEQSD,
19993 IX86_BUILTIN_CMPNLTSD,
19994 IX86_BUILTIN_CMPNLESD,
19995 IX86_BUILTIN_CMPORDSD,
19996 IX86_BUILTIN_CMPUNORDSD,
19997
19998 IX86_BUILTIN_COMIEQSD,
19999 IX86_BUILTIN_COMILTSD,
20000 IX86_BUILTIN_COMILESD,
20001 IX86_BUILTIN_COMIGTSD,
20002 IX86_BUILTIN_COMIGESD,
20003 IX86_BUILTIN_COMINEQSD,
20004 IX86_BUILTIN_UCOMIEQSD,
20005 IX86_BUILTIN_UCOMILTSD,
20006 IX86_BUILTIN_UCOMILESD,
20007 IX86_BUILTIN_UCOMIGTSD,
20008 IX86_BUILTIN_UCOMIGESD,
20009 IX86_BUILTIN_UCOMINEQSD,
20010
20011 IX86_BUILTIN_MAXPD,
20012 IX86_BUILTIN_MAXSD,
20013 IX86_BUILTIN_MINPD,
20014 IX86_BUILTIN_MINSD,
20015
20016 IX86_BUILTIN_ANDPD,
20017 IX86_BUILTIN_ANDNPD,
20018 IX86_BUILTIN_ORPD,
20019 IX86_BUILTIN_XORPD,
20020
20021 IX86_BUILTIN_SQRTPD,
20022 IX86_BUILTIN_SQRTSD,
20023
20024 IX86_BUILTIN_UNPCKHPD,
20025 IX86_BUILTIN_UNPCKLPD,
20026
20027 IX86_BUILTIN_SHUFPD,
20028
20029 IX86_BUILTIN_LOADUPD,
20030 IX86_BUILTIN_STOREUPD,
20031 IX86_BUILTIN_MOVSD,
20032
20033 IX86_BUILTIN_LOADHPD,
20034 IX86_BUILTIN_LOADLPD,
20035
20036 IX86_BUILTIN_CVTDQ2PD,
20037 IX86_BUILTIN_CVTDQ2PS,
20038
20039 IX86_BUILTIN_CVTPD2DQ,
20040 IX86_BUILTIN_CVTPD2PI,
20041 IX86_BUILTIN_CVTPD2PS,
20042 IX86_BUILTIN_CVTTPD2DQ,
20043 IX86_BUILTIN_CVTTPD2PI,
20044
20045 IX86_BUILTIN_CVTPI2PD,
20046 IX86_BUILTIN_CVTSI2SD,
20047 IX86_BUILTIN_CVTSI642SD,
20048
20049 IX86_BUILTIN_CVTSD2SI,
20050 IX86_BUILTIN_CVTSD2SI64,
20051 IX86_BUILTIN_CVTSD2SS,
20052 IX86_BUILTIN_CVTSS2SD,
20053 IX86_BUILTIN_CVTTSD2SI,
20054 IX86_BUILTIN_CVTTSD2SI64,
20055
20056 IX86_BUILTIN_CVTPS2DQ,
20057 IX86_BUILTIN_CVTPS2PD,
20058 IX86_BUILTIN_CVTTPS2DQ,
20059
20060 IX86_BUILTIN_MOVNTI,
20061 IX86_BUILTIN_MOVNTPD,
20062 IX86_BUILTIN_MOVNTDQ,
20063
20064 IX86_BUILTIN_MOVQ128,
20065
20066 /* SSE2 MMX */
20067 IX86_BUILTIN_MASKMOVDQU,
20068 IX86_BUILTIN_MOVMSKPD,
20069 IX86_BUILTIN_PMOVMSKB128,
20070
20071 IX86_BUILTIN_PACKSSWB128,
20072 IX86_BUILTIN_PACKSSDW128,
20073 IX86_BUILTIN_PACKUSWB128,
20074
20075 IX86_BUILTIN_PADDB128,
20076 IX86_BUILTIN_PADDW128,
20077 IX86_BUILTIN_PADDD128,
20078 IX86_BUILTIN_PADDQ128,
20079 IX86_BUILTIN_PADDSB128,
20080 IX86_BUILTIN_PADDSW128,
20081 IX86_BUILTIN_PADDUSB128,
20082 IX86_BUILTIN_PADDUSW128,
20083 IX86_BUILTIN_PSUBB128,
20084 IX86_BUILTIN_PSUBW128,
20085 IX86_BUILTIN_PSUBD128,
20086 IX86_BUILTIN_PSUBQ128,
20087 IX86_BUILTIN_PSUBSB128,
20088 IX86_BUILTIN_PSUBSW128,
20089 IX86_BUILTIN_PSUBUSB128,
20090 IX86_BUILTIN_PSUBUSW128,
20091
20092 IX86_BUILTIN_PAND128,
20093 IX86_BUILTIN_PANDN128,
20094 IX86_BUILTIN_POR128,
20095 IX86_BUILTIN_PXOR128,
20096
20097 IX86_BUILTIN_PAVGB128,
20098 IX86_BUILTIN_PAVGW128,
20099
20100 IX86_BUILTIN_PCMPEQB128,
20101 IX86_BUILTIN_PCMPEQW128,
20102 IX86_BUILTIN_PCMPEQD128,
20103 IX86_BUILTIN_PCMPGTB128,
20104 IX86_BUILTIN_PCMPGTW128,
20105 IX86_BUILTIN_PCMPGTD128,
20106
20107 IX86_BUILTIN_PMADDWD128,
20108
20109 IX86_BUILTIN_PMAXSW128,
20110 IX86_BUILTIN_PMAXUB128,
20111 IX86_BUILTIN_PMINSW128,
20112 IX86_BUILTIN_PMINUB128,
20113
20114 IX86_BUILTIN_PMULUDQ,
20115 IX86_BUILTIN_PMULUDQ128,
20116 IX86_BUILTIN_PMULHUW128,
20117 IX86_BUILTIN_PMULHW128,
20118 IX86_BUILTIN_PMULLW128,
20119
20120 IX86_BUILTIN_PSADBW128,
20121 IX86_BUILTIN_PSHUFHW,
20122 IX86_BUILTIN_PSHUFLW,
20123 IX86_BUILTIN_PSHUFD,
20124
20125 IX86_BUILTIN_PSLLDQI128,
20126 IX86_BUILTIN_PSLLWI128,
20127 IX86_BUILTIN_PSLLDI128,
20128 IX86_BUILTIN_PSLLQI128,
20129 IX86_BUILTIN_PSRAWI128,
20130 IX86_BUILTIN_PSRADI128,
20131 IX86_BUILTIN_PSRLDQI128,
20132 IX86_BUILTIN_PSRLWI128,
20133 IX86_BUILTIN_PSRLDI128,
20134 IX86_BUILTIN_PSRLQI128,
20135
20136 IX86_BUILTIN_PSLLDQ128,
20137 IX86_BUILTIN_PSLLW128,
20138 IX86_BUILTIN_PSLLD128,
20139 IX86_BUILTIN_PSLLQ128,
20140 IX86_BUILTIN_PSRAW128,
20141 IX86_BUILTIN_PSRAD128,
20142 IX86_BUILTIN_PSRLW128,
20143 IX86_BUILTIN_PSRLD128,
20144 IX86_BUILTIN_PSRLQ128,
20145
20146 IX86_BUILTIN_PUNPCKHBW128,
20147 IX86_BUILTIN_PUNPCKHWD128,
20148 IX86_BUILTIN_PUNPCKHDQ128,
20149 IX86_BUILTIN_PUNPCKHQDQ128,
20150 IX86_BUILTIN_PUNPCKLBW128,
20151 IX86_BUILTIN_PUNPCKLWD128,
20152 IX86_BUILTIN_PUNPCKLDQ128,
20153 IX86_BUILTIN_PUNPCKLQDQ128,
20154
20155 IX86_BUILTIN_CLFLUSH,
20156 IX86_BUILTIN_MFENCE,
20157 IX86_BUILTIN_LFENCE,
20158
20159 /* SSE3. */
20160 IX86_BUILTIN_ADDSUBPS,
20161 IX86_BUILTIN_HADDPS,
20162 IX86_BUILTIN_HSUBPS,
20163 IX86_BUILTIN_MOVSHDUP,
20164 IX86_BUILTIN_MOVSLDUP,
20165 IX86_BUILTIN_ADDSUBPD,
20166 IX86_BUILTIN_HADDPD,
20167 IX86_BUILTIN_HSUBPD,
20168 IX86_BUILTIN_LDDQU,
20169
20170 IX86_BUILTIN_MONITOR,
20171 IX86_BUILTIN_MWAIT,
20172
20173 /* SSSE3. */
20174 IX86_BUILTIN_PHADDW,
20175 IX86_BUILTIN_PHADDD,
20176 IX86_BUILTIN_PHADDSW,
20177 IX86_BUILTIN_PHSUBW,
20178 IX86_BUILTIN_PHSUBD,
20179 IX86_BUILTIN_PHSUBSW,
20180 IX86_BUILTIN_PMADDUBSW,
20181 IX86_BUILTIN_PMULHRSW,
20182 IX86_BUILTIN_PSHUFB,
20183 IX86_BUILTIN_PSIGNB,
20184 IX86_BUILTIN_PSIGNW,
20185 IX86_BUILTIN_PSIGND,
20186 IX86_BUILTIN_PALIGNR,
20187 IX86_BUILTIN_PABSB,
20188 IX86_BUILTIN_PABSW,
20189 IX86_BUILTIN_PABSD,
20190
20191 IX86_BUILTIN_PHADDW128,
20192 IX86_BUILTIN_PHADDD128,
20193 IX86_BUILTIN_PHADDSW128,
20194 IX86_BUILTIN_PHSUBW128,
20195 IX86_BUILTIN_PHSUBD128,
20196 IX86_BUILTIN_PHSUBSW128,
20197 IX86_BUILTIN_PMADDUBSW128,
20198 IX86_BUILTIN_PMULHRSW128,
20199 IX86_BUILTIN_PSHUFB128,
20200 IX86_BUILTIN_PSIGNB128,
20201 IX86_BUILTIN_PSIGNW128,
20202 IX86_BUILTIN_PSIGND128,
20203 IX86_BUILTIN_PALIGNR128,
20204 IX86_BUILTIN_PABSB128,
20205 IX86_BUILTIN_PABSW128,
20206 IX86_BUILTIN_PABSD128,
20207
20208 /* AMDFAM10 - SSE4A New Instructions. */
20209 IX86_BUILTIN_MOVNTSD,
20210 IX86_BUILTIN_MOVNTSS,
20211 IX86_BUILTIN_EXTRQI,
20212 IX86_BUILTIN_EXTRQ,
20213 IX86_BUILTIN_INSERTQI,
20214 IX86_BUILTIN_INSERTQ,
20215
20216 /* SSE4.1. */
20217 IX86_BUILTIN_BLENDPD,
20218 IX86_BUILTIN_BLENDPS,
20219 IX86_BUILTIN_BLENDVPD,
20220 IX86_BUILTIN_BLENDVPS,
20221 IX86_BUILTIN_PBLENDVB128,
20222 IX86_BUILTIN_PBLENDW128,
20223
20224 IX86_BUILTIN_DPPD,
20225 IX86_BUILTIN_DPPS,
20226
20227 IX86_BUILTIN_INSERTPS128,
20228
20229 IX86_BUILTIN_MOVNTDQA,
20230 IX86_BUILTIN_MPSADBW128,
20231 IX86_BUILTIN_PACKUSDW128,
20232 IX86_BUILTIN_PCMPEQQ,
20233 IX86_BUILTIN_PHMINPOSUW128,
20234
20235 IX86_BUILTIN_PMAXSB128,
20236 IX86_BUILTIN_PMAXSD128,
20237 IX86_BUILTIN_PMAXUD128,
20238 IX86_BUILTIN_PMAXUW128,
20239
20240 IX86_BUILTIN_PMINSB128,
20241 IX86_BUILTIN_PMINSD128,
20242 IX86_BUILTIN_PMINUD128,
20243 IX86_BUILTIN_PMINUW128,
20244
20245 IX86_BUILTIN_PMOVSXBW128,
20246 IX86_BUILTIN_PMOVSXBD128,
20247 IX86_BUILTIN_PMOVSXBQ128,
20248 IX86_BUILTIN_PMOVSXWD128,
20249 IX86_BUILTIN_PMOVSXWQ128,
20250 IX86_BUILTIN_PMOVSXDQ128,
20251
20252 IX86_BUILTIN_PMOVZXBW128,
20253 IX86_BUILTIN_PMOVZXBD128,
20254 IX86_BUILTIN_PMOVZXBQ128,
20255 IX86_BUILTIN_PMOVZXWD128,
20256 IX86_BUILTIN_PMOVZXWQ128,
20257 IX86_BUILTIN_PMOVZXDQ128,
20258
20259 IX86_BUILTIN_PMULDQ128,
20260 IX86_BUILTIN_PMULLD128,
20261
20262 IX86_BUILTIN_ROUNDPD,
20263 IX86_BUILTIN_ROUNDPS,
20264 IX86_BUILTIN_ROUNDSD,
20265 IX86_BUILTIN_ROUNDSS,
20266
20267 IX86_BUILTIN_PTESTZ,
20268 IX86_BUILTIN_PTESTC,
20269 IX86_BUILTIN_PTESTNZC,
20270
20271 IX86_BUILTIN_VEC_INIT_V2SI,
20272 IX86_BUILTIN_VEC_INIT_V4HI,
20273 IX86_BUILTIN_VEC_INIT_V8QI,
20274 IX86_BUILTIN_VEC_EXT_V2DF,
20275 IX86_BUILTIN_VEC_EXT_V2DI,
20276 IX86_BUILTIN_VEC_EXT_V4SF,
20277 IX86_BUILTIN_VEC_EXT_V4SI,
20278 IX86_BUILTIN_VEC_EXT_V8HI,
20279 IX86_BUILTIN_VEC_EXT_V2SI,
20280 IX86_BUILTIN_VEC_EXT_V4HI,
20281 IX86_BUILTIN_VEC_EXT_V16QI,
20282 IX86_BUILTIN_VEC_SET_V2DI,
20283 IX86_BUILTIN_VEC_SET_V4SF,
20284 IX86_BUILTIN_VEC_SET_V4SI,
20285 IX86_BUILTIN_VEC_SET_V8HI,
20286 IX86_BUILTIN_VEC_SET_V4HI,
20287 IX86_BUILTIN_VEC_SET_V16QI,
20288
20289 IX86_BUILTIN_VEC_PACK_SFIX,
20290
20291 /* SSE4.2. */
20292 IX86_BUILTIN_CRC32QI,
20293 IX86_BUILTIN_CRC32HI,
20294 IX86_BUILTIN_CRC32SI,
20295 IX86_BUILTIN_CRC32DI,
20296
20297 IX86_BUILTIN_PCMPESTRI128,
20298 IX86_BUILTIN_PCMPESTRM128,
20299 IX86_BUILTIN_PCMPESTRA128,
20300 IX86_BUILTIN_PCMPESTRC128,
20301 IX86_BUILTIN_PCMPESTRO128,
20302 IX86_BUILTIN_PCMPESTRS128,
20303 IX86_BUILTIN_PCMPESTRZ128,
20304 IX86_BUILTIN_PCMPISTRI128,
20305 IX86_BUILTIN_PCMPISTRM128,
20306 IX86_BUILTIN_PCMPISTRA128,
20307 IX86_BUILTIN_PCMPISTRC128,
20308 IX86_BUILTIN_PCMPISTRO128,
20309 IX86_BUILTIN_PCMPISTRS128,
20310 IX86_BUILTIN_PCMPISTRZ128,
20311
20312 IX86_BUILTIN_PCMPGTQ,
20313
20314 /* AES instructions */
20315 IX86_BUILTIN_AESENC128,
20316 IX86_BUILTIN_AESENCLAST128,
20317 IX86_BUILTIN_AESDEC128,
20318 IX86_BUILTIN_AESDECLAST128,
20319 IX86_BUILTIN_AESIMC128,
20320 IX86_BUILTIN_AESKEYGENASSIST128,
20321
20322 /* PCLMUL instruction */
20323 IX86_BUILTIN_PCLMULQDQ128,
20324
20325 /* AVX */
20326 IX86_BUILTIN_ADDPD256,
20327 IX86_BUILTIN_ADDPS256,
20328 IX86_BUILTIN_ADDSUBPD256,
20329 IX86_BUILTIN_ADDSUBPS256,
20330 IX86_BUILTIN_ANDPD256,
20331 IX86_BUILTIN_ANDPS256,
20332 IX86_BUILTIN_ANDNPD256,
20333 IX86_BUILTIN_ANDNPS256,
20334 IX86_BUILTIN_BLENDPD256,
20335 IX86_BUILTIN_BLENDPS256,
20336 IX86_BUILTIN_BLENDVPD256,
20337 IX86_BUILTIN_BLENDVPS256,
20338 IX86_BUILTIN_DIVPD256,
20339 IX86_BUILTIN_DIVPS256,
20340 IX86_BUILTIN_DPPS256,
20341 IX86_BUILTIN_HADDPD256,
20342 IX86_BUILTIN_HADDPS256,
20343 IX86_BUILTIN_HSUBPD256,
20344 IX86_BUILTIN_HSUBPS256,
20345 IX86_BUILTIN_MAXPD256,
20346 IX86_BUILTIN_MAXPS256,
20347 IX86_BUILTIN_MINPD256,
20348 IX86_BUILTIN_MINPS256,
20349 IX86_BUILTIN_MULPD256,
20350 IX86_BUILTIN_MULPS256,
20351 IX86_BUILTIN_ORPD256,
20352 IX86_BUILTIN_ORPS256,
20353 IX86_BUILTIN_SHUFPD256,
20354 IX86_BUILTIN_SHUFPS256,
20355 IX86_BUILTIN_SUBPD256,
20356 IX86_BUILTIN_SUBPS256,
20357 IX86_BUILTIN_XORPD256,
20358 IX86_BUILTIN_XORPS256,
20359 IX86_BUILTIN_CMPSD,
20360 IX86_BUILTIN_CMPSS,
20361 IX86_BUILTIN_CMPPD,
20362 IX86_BUILTIN_CMPPS,
20363 IX86_BUILTIN_CMPPD256,
20364 IX86_BUILTIN_CMPPS256,
20365 IX86_BUILTIN_CVTDQ2PD256,
20366 IX86_BUILTIN_CVTDQ2PS256,
20367 IX86_BUILTIN_CVTPD2PS256,
20368 IX86_BUILTIN_CVTPS2DQ256,
20369 IX86_BUILTIN_CVTPS2PD256,
20370 IX86_BUILTIN_CVTTPD2DQ256,
20371 IX86_BUILTIN_CVTPD2DQ256,
20372 IX86_BUILTIN_CVTTPS2DQ256,
20373 IX86_BUILTIN_EXTRACTF128PD256,
20374 IX86_BUILTIN_EXTRACTF128PS256,
20375 IX86_BUILTIN_EXTRACTF128SI256,
20376 IX86_BUILTIN_VZEROALL,
20377 IX86_BUILTIN_VZEROUPPER,
20378 IX86_BUILTIN_VZEROUPPER_REX64,
20379 IX86_BUILTIN_VPERMILVARPD,
20380 IX86_BUILTIN_VPERMILVARPS,
20381 IX86_BUILTIN_VPERMILVARPD256,
20382 IX86_BUILTIN_VPERMILVARPS256,
20383 IX86_BUILTIN_VPERMILPD,
20384 IX86_BUILTIN_VPERMILPS,
20385 IX86_BUILTIN_VPERMILPD256,
20386 IX86_BUILTIN_VPERMILPS256,
20387 IX86_BUILTIN_VPERM2F128PD256,
20388 IX86_BUILTIN_VPERM2F128PS256,
20389 IX86_BUILTIN_VPERM2F128SI256,
20390 IX86_BUILTIN_VBROADCASTSS,
20391 IX86_BUILTIN_VBROADCASTSD256,
20392 IX86_BUILTIN_VBROADCASTSS256,
20393 IX86_BUILTIN_VBROADCASTPD256,
20394 IX86_BUILTIN_VBROADCASTPS256,
20395 IX86_BUILTIN_VINSERTF128PD256,
20396 IX86_BUILTIN_VINSERTF128PS256,
20397 IX86_BUILTIN_VINSERTF128SI256,
20398 IX86_BUILTIN_LOADUPD256,
20399 IX86_BUILTIN_LOADUPS256,
20400 IX86_BUILTIN_STOREUPD256,
20401 IX86_BUILTIN_STOREUPS256,
20402 IX86_BUILTIN_LDDQU256,
20403 IX86_BUILTIN_MOVNTDQ256,
20404 IX86_BUILTIN_MOVNTPD256,
20405 IX86_BUILTIN_MOVNTPS256,
20406 IX86_BUILTIN_LOADDQU256,
20407 IX86_BUILTIN_STOREDQU256,
20408 IX86_BUILTIN_MASKLOADPD,
20409 IX86_BUILTIN_MASKLOADPS,
20410 IX86_BUILTIN_MASKSTOREPD,
20411 IX86_BUILTIN_MASKSTOREPS,
20412 IX86_BUILTIN_MASKLOADPD256,
20413 IX86_BUILTIN_MASKLOADPS256,
20414 IX86_BUILTIN_MASKSTOREPD256,
20415 IX86_BUILTIN_MASKSTOREPS256,
20416 IX86_BUILTIN_MOVSHDUP256,
20417 IX86_BUILTIN_MOVSLDUP256,
20418 IX86_BUILTIN_MOVDDUP256,
20419
20420 IX86_BUILTIN_SQRTPD256,
20421 IX86_BUILTIN_SQRTPS256,
20422 IX86_BUILTIN_SQRTPS_NR256,
20423 IX86_BUILTIN_RSQRTPS256,
20424 IX86_BUILTIN_RSQRTPS_NR256,
20425
20426 IX86_BUILTIN_RCPPS256,
20427
20428 IX86_BUILTIN_ROUNDPD256,
20429 IX86_BUILTIN_ROUNDPS256,
20430
20431 IX86_BUILTIN_UNPCKHPD256,
20432 IX86_BUILTIN_UNPCKLPD256,
20433 IX86_BUILTIN_UNPCKHPS256,
20434 IX86_BUILTIN_UNPCKLPS256,
20435
20436 IX86_BUILTIN_SI256_SI,
20437 IX86_BUILTIN_PS256_PS,
20438 IX86_BUILTIN_PD256_PD,
20439 IX86_BUILTIN_SI_SI256,
20440 IX86_BUILTIN_PS_PS256,
20441 IX86_BUILTIN_PD_PD256,
20442
20443 IX86_BUILTIN_VTESTZPD,
20444 IX86_BUILTIN_VTESTCPD,
20445 IX86_BUILTIN_VTESTNZCPD,
20446 IX86_BUILTIN_VTESTZPS,
20447 IX86_BUILTIN_VTESTCPS,
20448 IX86_BUILTIN_VTESTNZCPS,
20449 IX86_BUILTIN_VTESTZPD256,
20450 IX86_BUILTIN_VTESTCPD256,
20451 IX86_BUILTIN_VTESTNZCPD256,
20452 IX86_BUILTIN_VTESTZPS256,
20453 IX86_BUILTIN_VTESTCPS256,
20454 IX86_BUILTIN_VTESTNZCPS256,
20455 IX86_BUILTIN_PTESTZ256,
20456 IX86_BUILTIN_PTESTC256,
20457 IX86_BUILTIN_PTESTNZC256,
20458
20459 IX86_BUILTIN_MOVMSKPD256,
20460 IX86_BUILTIN_MOVMSKPS256,
20461
20462 /* TFmode support builtins. */
20463 IX86_BUILTIN_INFQ,
20464 IX86_BUILTIN_FABSQ,
20465 IX86_BUILTIN_COPYSIGNQ,
20466
20467 /* SSE5 instructions */
20468 IX86_BUILTIN_FMADDSS,
20469 IX86_BUILTIN_FMADDSD,
20470 IX86_BUILTIN_FMADDPS,
20471 IX86_BUILTIN_FMADDPD,
20472 IX86_BUILTIN_FMSUBSS,
20473 IX86_BUILTIN_FMSUBSD,
20474 IX86_BUILTIN_FMSUBPS,
20475 IX86_BUILTIN_FMSUBPD,
20476 IX86_BUILTIN_FNMADDSS,
20477 IX86_BUILTIN_FNMADDSD,
20478 IX86_BUILTIN_FNMADDPS,
20479 IX86_BUILTIN_FNMADDPD,
20480 IX86_BUILTIN_FNMSUBSS,
20481 IX86_BUILTIN_FNMSUBSD,
20482 IX86_BUILTIN_FNMSUBPS,
20483 IX86_BUILTIN_FNMSUBPD,
20484 IX86_BUILTIN_PCMOV,
20485 IX86_BUILTIN_PCMOV_V2DI,
20486 IX86_BUILTIN_PCMOV_V4SI,
20487 IX86_BUILTIN_PCMOV_V8HI,
20488 IX86_BUILTIN_PCMOV_V16QI,
20489 IX86_BUILTIN_PCMOV_V4SF,
20490 IX86_BUILTIN_PCMOV_V2DF,
20491 IX86_BUILTIN_PPERM,
20492 IX86_BUILTIN_PERMPS,
20493 IX86_BUILTIN_PERMPD,
20494 IX86_BUILTIN_PMACSSWW,
20495 IX86_BUILTIN_PMACSWW,
20496 IX86_BUILTIN_PMACSSWD,
20497 IX86_BUILTIN_PMACSWD,
20498 IX86_BUILTIN_PMACSSDD,
20499 IX86_BUILTIN_PMACSDD,
20500 IX86_BUILTIN_PMACSSDQL,
20501 IX86_BUILTIN_PMACSSDQH,
20502 IX86_BUILTIN_PMACSDQL,
20503 IX86_BUILTIN_PMACSDQH,
20504 IX86_BUILTIN_PMADCSSWD,
20505 IX86_BUILTIN_PMADCSWD,
20506 IX86_BUILTIN_PHADDBW,
20507 IX86_BUILTIN_PHADDBD,
20508 IX86_BUILTIN_PHADDBQ,
20509 IX86_BUILTIN_PHADDWD,
20510 IX86_BUILTIN_PHADDWQ,
20511 IX86_BUILTIN_PHADDDQ,
20512 IX86_BUILTIN_PHADDUBW,
20513 IX86_BUILTIN_PHADDUBD,
20514 IX86_BUILTIN_PHADDUBQ,
20515 IX86_BUILTIN_PHADDUWD,
20516 IX86_BUILTIN_PHADDUWQ,
20517 IX86_BUILTIN_PHADDUDQ,
20518 IX86_BUILTIN_PHSUBBW,
20519 IX86_BUILTIN_PHSUBWD,
20520 IX86_BUILTIN_PHSUBDQ,
20521 IX86_BUILTIN_PROTB,
20522 IX86_BUILTIN_PROTW,
20523 IX86_BUILTIN_PROTD,
20524 IX86_BUILTIN_PROTQ,
20525 IX86_BUILTIN_PROTB_IMM,
20526 IX86_BUILTIN_PROTW_IMM,
20527 IX86_BUILTIN_PROTD_IMM,
20528 IX86_BUILTIN_PROTQ_IMM,
20529 IX86_BUILTIN_PSHLB,
20530 IX86_BUILTIN_PSHLW,
20531 IX86_BUILTIN_PSHLD,
20532 IX86_BUILTIN_PSHLQ,
20533 IX86_BUILTIN_PSHAB,
20534 IX86_BUILTIN_PSHAW,
20535 IX86_BUILTIN_PSHAD,
20536 IX86_BUILTIN_PSHAQ,
20537 IX86_BUILTIN_FRCZSS,
20538 IX86_BUILTIN_FRCZSD,
20539 IX86_BUILTIN_FRCZPS,
20540 IX86_BUILTIN_FRCZPD,
20541 IX86_BUILTIN_CVTPH2PS,
20542 IX86_BUILTIN_CVTPS2PH,
20543
20544 IX86_BUILTIN_COMEQSS,
20545 IX86_BUILTIN_COMNESS,
20546 IX86_BUILTIN_COMLTSS,
20547 IX86_BUILTIN_COMLESS,
20548 IX86_BUILTIN_COMGTSS,
20549 IX86_BUILTIN_COMGESS,
20550 IX86_BUILTIN_COMUEQSS,
20551 IX86_BUILTIN_COMUNESS,
20552 IX86_BUILTIN_COMULTSS,
20553 IX86_BUILTIN_COMULESS,
20554 IX86_BUILTIN_COMUGTSS,
20555 IX86_BUILTIN_COMUGESS,
20556 IX86_BUILTIN_COMORDSS,
20557 IX86_BUILTIN_COMUNORDSS,
20558 IX86_BUILTIN_COMFALSESS,
20559 IX86_BUILTIN_COMTRUESS,
20560
20561 IX86_BUILTIN_COMEQSD,
20562 IX86_BUILTIN_COMNESD,
20563 IX86_BUILTIN_COMLTSD,
20564 IX86_BUILTIN_COMLESD,
20565 IX86_BUILTIN_COMGTSD,
20566 IX86_BUILTIN_COMGESD,
20567 IX86_BUILTIN_COMUEQSD,
20568 IX86_BUILTIN_COMUNESD,
20569 IX86_BUILTIN_COMULTSD,
20570 IX86_BUILTIN_COMULESD,
20571 IX86_BUILTIN_COMUGTSD,
20572 IX86_BUILTIN_COMUGESD,
20573 IX86_BUILTIN_COMORDSD,
20574 IX86_BUILTIN_COMUNORDSD,
20575 IX86_BUILTIN_COMFALSESD,
20576 IX86_BUILTIN_COMTRUESD,
20577
20578 IX86_BUILTIN_COMEQPS,
20579 IX86_BUILTIN_COMNEPS,
20580 IX86_BUILTIN_COMLTPS,
20581 IX86_BUILTIN_COMLEPS,
20582 IX86_BUILTIN_COMGTPS,
20583 IX86_BUILTIN_COMGEPS,
20584 IX86_BUILTIN_COMUEQPS,
20585 IX86_BUILTIN_COMUNEPS,
20586 IX86_BUILTIN_COMULTPS,
20587 IX86_BUILTIN_COMULEPS,
20588 IX86_BUILTIN_COMUGTPS,
20589 IX86_BUILTIN_COMUGEPS,
20590 IX86_BUILTIN_COMORDPS,
20591 IX86_BUILTIN_COMUNORDPS,
20592 IX86_BUILTIN_COMFALSEPS,
20593 IX86_BUILTIN_COMTRUEPS,
20594
20595 IX86_BUILTIN_COMEQPD,
20596 IX86_BUILTIN_COMNEPD,
20597 IX86_BUILTIN_COMLTPD,
20598 IX86_BUILTIN_COMLEPD,
20599 IX86_BUILTIN_COMGTPD,
20600 IX86_BUILTIN_COMGEPD,
20601 IX86_BUILTIN_COMUEQPD,
20602 IX86_BUILTIN_COMUNEPD,
20603 IX86_BUILTIN_COMULTPD,
20604 IX86_BUILTIN_COMULEPD,
20605 IX86_BUILTIN_COMUGTPD,
20606 IX86_BUILTIN_COMUGEPD,
20607 IX86_BUILTIN_COMORDPD,
20608 IX86_BUILTIN_COMUNORDPD,
20609 IX86_BUILTIN_COMFALSEPD,
20610 IX86_BUILTIN_COMTRUEPD,
20611
20612 IX86_BUILTIN_PCOMEQUB,
20613 IX86_BUILTIN_PCOMNEUB,
20614 IX86_BUILTIN_PCOMLTUB,
20615 IX86_BUILTIN_PCOMLEUB,
20616 IX86_BUILTIN_PCOMGTUB,
20617 IX86_BUILTIN_PCOMGEUB,
20618 IX86_BUILTIN_PCOMFALSEUB,
20619 IX86_BUILTIN_PCOMTRUEUB,
20620 IX86_BUILTIN_PCOMEQUW,
20621 IX86_BUILTIN_PCOMNEUW,
20622 IX86_BUILTIN_PCOMLTUW,
20623 IX86_BUILTIN_PCOMLEUW,
20624 IX86_BUILTIN_PCOMGTUW,
20625 IX86_BUILTIN_PCOMGEUW,
20626 IX86_BUILTIN_PCOMFALSEUW,
20627 IX86_BUILTIN_PCOMTRUEUW,
20628 IX86_BUILTIN_PCOMEQUD,
20629 IX86_BUILTIN_PCOMNEUD,
20630 IX86_BUILTIN_PCOMLTUD,
20631 IX86_BUILTIN_PCOMLEUD,
20632 IX86_BUILTIN_PCOMGTUD,
20633 IX86_BUILTIN_PCOMGEUD,
20634 IX86_BUILTIN_PCOMFALSEUD,
20635 IX86_BUILTIN_PCOMTRUEUD,
20636 IX86_BUILTIN_PCOMEQUQ,
20637 IX86_BUILTIN_PCOMNEUQ,
20638 IX86_BUILTIN_PCOMLTUQ,
20639 IX86_BUILTIN_PCOMLEUQ,
20640 IX86_BUILTIN_PCOMGTUQ,
20641 IX86_BUILTIN_PCOMGEUQ,
20642 IX86_BUILTIN_PCOMFALSEUQ,
20643 IX86_BUILTIN_PCOMTRUEUQ,
20644
20645 IX86_BUILTIN_PCOMEQB,
20646 IX86_BUILTIN_PCOMNEB,
20647 IX86_BUILTIN_PCOMLTB,
20648 IX86_BUILTIN_PCOMLEB,
20649 IX86_BUILTIN_PCOMGTB,
20650 IX86_BUILTIN_PCOMGEB,
20651 IX86_BUILTIN_PCOMFALSEB,
20652 IX86_BUILTIN_PCOMTRUEB,
20653 IX86_BUILTIN_PCOMEQW,
20654 IX86_BUILTIN_PCOMNEW,
20655 IX86_BUILTIN_PCOMLTW,
20656 IX86_BUILTIN_PCOMLEW,
20657 IX86_BUILTIN_PCOMGTW,
20658 IX86_BUILTIN_PCOMGEW,
20659 IX86_BUILTIN_PCOMFALSEW,
20660 IX86_BUILTIN_PCOMTRUEW,
20661 IX86_BUILTIN_PCOMEQD,
20662 IX86_BUILTIN_PCOMNED,
20663 IX86_BUILTIN_PCOMLTD,
20664 IX86_BUILTIN_PCOMLED,
20665 IX86_BUILTIN_PCOMGTD,
20666 IX86_BUILTIN_PCOMGED,
20667 IX86_BUILTIN_PCOMFALSED,
20668 IX86_BUILTIN_PCOMTRUED,
20669 IX86_BUILTIN_PCOMEQQ,
20670 IX86_BUILTIN_PCOMNEQ,
20671 IX86_BUILTIN_PCOMLTQ,
20672 IX86_BUILTIN_PCOMLEQ,
20673 IX86_BUILTIN_PCOMGTQ,
20674 IX86_BUILTIN_PCOMGEQ,
20675 IX86_BUILTIN_PCOMFALSEQ,
20676 IX86_BUILTIN_PCOMTRUEQ,
20677
20678 IX86_BUILTIN_MAX
20679 };
20680
20681 /* Table for the ix86 builtin decls. */
20682 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
20683
20684 /* Table of all of the builtin functions that are possible with different ISA's
20685 but are waiting to be built until a function is declared to use that
20686 ISA. */
20687 struct builtin_isa GTY(())
20688 {
20689 tree type; /* builtin type to use in the declaration */
20690 const char *name; /* function name */
20691 int isa; /* isa_flags this builtin is defined for */
20692 bool const_p; /* true if the declaration is constant */
20693 };
20694
20695 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
20696
20697
20698 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
20699 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
20700 * function decl in the ix86_builtins array. Returns the function decl or
20701 * NULL_TREE, if the builtin was not added.
20702 *
20703 * If the front end has a special hook for builtin functions, delay adding
20704 * builtin functions that aren't in the current ISA until the ISA is changed
20705 * with function specific optimization. Doing so, can save about 300K for the
20706 * default compiler. When the builtin is expanded, check at that time whether
20707 * it is valid.
20708 *
20709 * If the front end doesn't have a special hook, record all builtins, even if
20710 * it isn't an instruction set in the current ISA in case the user uses
20711 * function specific options for a different ISA, so that we don't get scope
20712 * errors if a builtin is added in the middle of a function scope. */
20713
20714 static inline tree
20715 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
20716 {
20717 tree decl = NULL_TREE;
20718
20719 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
20720 {
20721 ix86_builtins_isa[(int) code].isa = mask;
20722
20723 if ((mask & ix86_isa_flags) != 0
20724 || (lang_hooks.builtin_function
20725 == lang_hooks.builtin_function_ext_scope))
20726
20727 {
20728 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
20729 NULL_TREE);
20730 ix86_builtins[(int) code] = decl;
20731 ix86_builtins_isa[(int) code].type = NULL_TREE;
20732 }
20733 else
20734 {
20735 ix86_builtins[(int) code] = NULL_TREE;
20736 ix86_builtins_isa[(int) code].const_p = false;
20737 ix86_builtins_isa[(int) code].type = type;
20738 ix86_builtins_isa[(int) code].name = name;
20739 }
20740 }
20741
20742 return decl;
20743 }
20744
20745 /* Like def_builtin, but also marks the function decl "const". */
20746
20747 static inline tree
20748 def_builtin_const (int mask, const char *name, tree type,
20749 enum ix86_builtins code)
20750 {
20751 tree decl = def_builtin (mask, name, type, code);
20752 if (decl)
20753 TREE_READONLY (decl) = 1;
20754 else
20755 ix86_builtins_isa[(int) code].const_p = true;
20756
20757 return decl;
20758 }
20759
20760 /* Add any new builtin functions for a given ISA that may not have been
20761 declared. This saves a bit of space compared to adding all of the
20762 declarations to the tree, even if we didn't use them. */
20763
20764 static void
20765 ix86_add_new_builtins (int isa)
20766 {
20767 int i;
20768 tree decl;
20769
20770 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
20771 {
20772 if ((ix86_builtins_isa[i].isa & isa) != 0
20773 && ix86_builtins_isa[i].type != NULL_TREE)
20774 {
20775 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
20776 ix86_builtins_isa[i].type,
20777 i, BUILT_IN_MD, NULL,
20778 NULL_TREE);
20779
20780 ix86_builtins[i] = decl;
20781 ix86_builtins_isa[i].type = NULL_TREE;
20782 if (ix86_builtins_isa[i].const_p)
20783 TREE_READONLY (decl) = 1;
20784 }
20785 }
20786 }
20787
20788 /* Bits for builtin_description.flag. */
20789
20790 /* Set when we don't support the comparison natively, and should
20791 swap_comparison in order to support it. */
20792 #define BUILTIN_DESC_SWAP_OPERANDS 1
20793
20794 struct builtin_description
20795 {
20796 const unsigned int mask;
20797 const enum insn_code icode;
20798 const char *const name;
20799 const enum ix86_builtins code;
20800 const enum rtx_code comparison;
20801 const int flag;
20802 };
20803
20804 static const struct builtin_description bdesc_comi[] =
20805 {
20806 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
20807 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
20808 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
20809 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
20810 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
20811 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
20812 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
20813 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
20814 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
20815 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
20816 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
20817 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
20818 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
20819 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
20820 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
20821 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
20822 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
20823 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
20824 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
20825 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
20826 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
20827 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
20828 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
20829 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
20830 };
20831
20832 static const struct builtin_description bdesc_pcmpestr[] =
20833 {
20834 /* SSE4.2 */
20835 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
20836 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
20837 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
20838 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
20839 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
20840 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
20841 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
20842 };
20843
20844 static const struct builtin_description bdesc_pcmpistr[] =
20845 {
20846 /* SSE4.2 */
20847 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
20848 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
20849 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
20850 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
20851 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
20852 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
20853 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
20854 };
20855
20856 /* Special builtin types */
20857 enum ix86_special_builtin_type
20858 {
20859 SPECIAL_FTYPE_UNKNOWN,
20860 VOID_FTYPE_VOID,
20861 V32QI_FTYPE_PCCHAR,
20862 V16QI_FTYPE_PCCHAR,
20863 V8SF_FTYPE_PCV4SF,
20864 V8SF_FTYPE_PCFLOAT,
20865 V4DF_FTYPE_PCV2DF,
20866 V4DF_FTYPE_PCDOUBLE,
20867 V4SF_FTYPE_PCFLOAT,
20868 V2DF_FTYPE_PCDOUBLE,
20869 V8SF_FTYPE_PCV8SF_V8SF,
20870 V4DF_FTYPE_PCV4DF_V4DF,
20871 V4SF_FTYPE_V4SF_PCV2SF,
20872 V4SF_FTYPE_PCV4SF_V4SF,
20873 V2DF_FTYPE_V2DF_PCDOUBLE,
20874 V2DF_FTYPE_PCV2DF_V2DF,
20875 V2DI_FTYPE_PV2DI,
20876 VOID_FTYPE_PV2SF_V4SF,
20877 VOID_FTYPE_PV4DI_V4DI,
20878 VOID_FTYPE_PV2DI_V2DI,
20879 VOID_FTYPE_PCHAR_V32QI,
20880 VOID_FTYPE_PCHAR_V16QI,
20881 VOID_FTYPE_PFLOAT_V8SF,
20882 VOID_FTYPE_PFLOAT_V4SF,
20883 VOID_FTYPE_PDOUBLE_V4DF,
20884 VOID_FTYPE_PDOUBLE_V2DF,
20885 VOID_FTYPE_PDI_DI,
20886 VOID_FTYPE_PINT_INT,
20887 VOID_FTYPE_PV8SF_V8SF_V8SF,
20888 VOID_FTYPE_PV4DF_V4DF_V4DF,
20889 VOID_FTYPE_PV4SF_V4SF_V4SF,
20890 VOID_FTYPE_PV2DF_V2DF_V2DF
20891 };
20892
20893 /* Builtin types */
20894 enum ix86_builtin_type
20895 {
20896 FTYPE_UNKNOWN,
20897 FLOAT128_FTYPE_FLOAT128,
20898 FLOAT_FTYPE_FLOAT,
20899 FLOAT128_FTYPE_FLOAT128_FLOAT128,
20900 INT_FTYPE_V8SF_V8SF_PTEST,
20901 INT_FTYPE_V4DI_V4DI_PTEST,
20902 INT_FTYPE_V4DF_V4DF_PTEST,
20903 INT_FTYPE_V4SF_V4SF_PTEST,
20904 INT_FTYPE_V2DI_V2DI_PTEST,
20905 INT_FTYPE_V2DF_V2DF_PTEST,
20906 INT64_FTYPE_V4SF,
20907 INT64_FTYPE_V2DF,
20908 INT_FTYPE_V16QI,
20909 INT_FTYPE_V8QI,
20910 INT_FTYPE_V8SF,
20911 INT_FTYPE_V4DF,
20912 INT_FTYPE_V4SF,
20913 INT_FTYPE_V2DF,
20914 V16QI_FTYPE_V16QI,
20915 V8SI_FTYPE_V8SF,
20916 V8SI_FTYPE_V4SI,
20917 V8HI_FTYPE_V8HI,
20918 V8HI_FTYPE_V16QI,
20919 V8QI_FTYPE_V8QI,
20920 V8SF_FTYPE_V8SF,
20921 V8SF_FTYPE_V8SI,
20922 V8SF_FTYPE_V4SF,
20923 V4SI_FTYPE_V4SI,
20924 V4SI_FTYPE_V16QI,
20925 V4SI_FTYPE_V8SI,
20926 V4SI_FTYPE_V8HI,
20927 V4SI_FTYPE_V4DF,
20928 V4SI_FTYPE_V4SF,
20929 V4SI_FTYPE_V2DF,
20930 V4HI_FTYPE_V4HI,
20931 V4DF_FTYPE_V4DF,
20932 V4DF_FTYPE_V4SI,
20933 V4DF_FTYPE_V4SF,
20934 V4DF_FTYPE_V2DF,
20935 V4SF_FTYPE_V4DF,
20936 V4SF_FTYPE_V4SF,
20937 V4SF_FTYPE_V4SF_VEC_MERGE,
20938 V4SF_FTYPE_V8SF,
20939 V4SF_FTYPE_V4SI,
20940 V4SF_FTYPE_V2DF,
20941 V2DI_FTYPE_V2DI,
20942 V2DI_FTYPE_V16QI,
20943 V2DI_FTYPE_V8HI,
20944 V2DI_FTYPE_V4SI,
20945 V2DF_FTYPE_V2DF,
20946 V2DF_FTYPE_V2DF_VEC_MERGE,
20947 V2DF_FTYPE_V4SI,
20948 V2DF_FTYPE_V4DF,
20949 V2DF_FTYPE_V4SF,
20950 V2DF_FTYPE_V2SI,
20951 V2SI_FTYPE_V2SI,
20952 V2SI_FTYPE_V4SF,
20953 V2SI_FTYPE_V2SF,
20954 V2SI_FTYPE_V2DF,
20955 V2SF_FTYPE_V2SF,
20956 V2SF_FTYPE_V2SI,
20957 V16QI_FTYPE_V16QI_V16QI,
20958 V16QI_FTYPE_V8HI_V8HI,
20959 V8QI_FTYPE_V8QI_V8QI,
20960 V8QI_FTYPE_V4HI_V4HI,
20961 V8HI_FTYPE_V8HI_V8HI,
20962 V8HI_FTYPE_V8HI_V8HI_COUNT,
20963 V8HI_FTYPE_V16QI_V16QI,
20964 V8HI_FTYPE_V4SI_V4SI,
20965 V8HI_FTYPE_V8HI_SI_COUNT,
20966 V8SF_FTYPE_V8SF_V8SF,
20967 V8SF_FTYPE_V8SF_V8SI,
20968 V4SI_FTYPE_V4SI_V4SI,
20969 V4SI_FTYPE_V4SI_V4SI_COUNT,
20970 V4SI_FTYPE_V8HI_V8HI,
20971 V4SI_FTYPE_V4SF_V4SF,
20972 V4SI_FTYPE_V2DF_V2DF,
20973 V4SI_FTYPE_V4SI_SI_COUNT,
20974 V4HI_FTYPE_V4HI_V4HI,
20975 V4HI_FTYPE_V4HI_V4HI_COUNT,
20976 V4HI_FTYPE_V8QI_V8QI,
20977 V4HI_FTYPE_V2SI_V2SI,
20978 V4HI_FTYPE_V4HI_SI_COUNT,
20979 V4DF_FTYPE_V4DF_V4DF,
20980 V4DF_FTYPE_V4DF_V4DI,
20981 V4SF_FTYPE_V4SF_V4SF,
20982 V4SF_FTYPE_V4SF_V4SF_SWAP,
20983 V4SF_FTYPE_V4SF_V4SI,
20984 V4SF_FTYPE_V4SF_V2SI,
20985 V4SF_FTYPE_V4SF_V2DF,
20986 V4SF_FTYPE_V4SF_DI,
20987 V4SF_FTYPE_V4SF_SI,
20988 V2DI_FTYPE_V2DI_V2DI,
20989 V2DI_FTYPE_V2DI_V2DI_COUNT,
20990 V2DI_FTYPE_V16QI_V16QI,
20991 V2DI_FTYPE_V4SI_V4SI,
20992 V2DI_FTYPE_V2DI_V16QI,
20993 V2DI_FTYPE_V2DF_V2DF,
20994 V2DI_FTYPE_V2DI_SI_COUNT,
20995 V2SI_FTYPE_V2SI_V2SI,
20996 V2SI_FTYPE_V2SI_V2SI_COUNT,
20997 V2SI_FTYPE_V4HI_V4HI,
20998 V2SI_FTYPE_V2SF_V2SF,
20999 V2SI_FTYPE_V2SI_SI_COUNT,
21000 V2DF_FTYPE_V2DF_V2DF,
21001 V2DF_FTYPE_V2DF_V2DF_SWAP,
21002 V2DF_FTYPE_V2DF_V4SF,
21003 V2DF_FTYPE_V2DF_V2DI,
21004 V2DF_FTYPE_V2DF_DI,
21005 V2DF_FTYPE_V2DF_SI,
21006 V2SF_FTYPE_V2SF_V2SF,
21007 V1DI_FTYPE_V1DI_V1DI,
21008 V1DI_FTYPE_V1DI_V1DI_COUNT,
21009 V1DI_FTYPE_V8QI_V8QI,
21010 V1DI_FTYPE_V2SI_V2SI,
21011 V1DI_FTYPE_V1DI_SI_COUNT,
21012 UINT64_FTYPE_UINT64_UINT64,
21013 UINT_FTYPE_UINT_UINT,
21014 UINT_FTYPE_UINT_USHORT,
21015 UINT_FTYPE_UINT_UCHAR,
21016 V8HI_FTYPE_V8HI_INT,
21017 V4SI_FTYPE_V4SI_INT,
21018 V4HI_FTYPE_V4HI_INT,
21019 V8SF_FTYPE_V8SF_INT,
21020 V4SI_FTYPE_V8SI_INT,
21021 V4SF_FTYPE_V8SF_INT,
21022 V2DF_FTYPE_V4DF_INT,
21023 V4DF_FTYPE_V4DF_INT,
21024 V4SF_FTYPE_V4SF_INT,
21025 V2DI_FTYPE_V2DI_INT,
21026 V2DI2TI_FTYPE_V2DI_INT,
21027 V2DF_FTYPE_V2DF_INT,
21028 V16QI_FTYPE_V16QI_V16QI_V16QI,
21029 V8SF_FTYPE_V8SF_V8SF_V8SF,
21030 V4DF_FTYPE_V4DF_V4DF_V4DF,
21031 V4SF_FTYPE_V4SF_V4SF_V4SF,
21032 V2DF_FTYPE_V2DF_V2DF_V2DF,
21033 V16QI_FTYPE_V16QI_V16QI_INT,
21034 V8SI_FTYPE_V8SI_V8SI_INT,
21035 V8SI_FTYPE_V8SI_V4SI_INT,
21036 V8HI_FTYPE_V8HI_V8HI_INT,
21037 V8SF_FTYPE_V8SF_V8SF_INT,
21038 V8SF_FTYPE_V8SF_V4SF_INT,
21039 V4SI_FTYPE_V4SI_V4SI_INT,
21040 V4DF_FTYPE_V4DF_V4DF_INT,
21041 V4DF_FTYPE_V4DF_V2DF_INT,
21042 V4SF_FTYPE_V4SF_V4SF_INT,
21043 V2DI_FTYPE_V2DI_V2DI_INT,
21044 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21045 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21046 V2DF_FTYPE_V2DF_V2DF_INT,
21047 V2DI_FTYPE_V2DI_UINT_UINT,
21048 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21049 };
21050
21051 /* Special builtins with variable number of arguments. */
21052 static const struct builtin_description bdesc_special_args[] =
21053 {
21054 /* MMX */
21055 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21056
21057 /* 3DNow! */
21058 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21059
21060 /* SSE */
21061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21064
21065 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21066 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21067 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21069
21070 /* SSE or 3DNow!A */
21071 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21072 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21073
21074 /* SSE2 */
21075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21082 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21083 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21084
21085 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21086 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21087
21088 /* SSE3 */
21089 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21090
21091 /* SSE4.1 */
21092 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21093
21094 /* SSE4A */
21095 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21096 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21097
21098 /* AVX */
21099 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21100 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21101 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21102
21103 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21104 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21105 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21106 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21107 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21108
21109 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21110 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21111 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21112 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21113 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21114 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21115 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21116
21117 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21118 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21119 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21120
21121 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21122 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21123 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21124 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21125 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21126 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21127 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21128 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21129 };
21130
21131 /* Builtins with variable number of arguments. */
21132 static const struct builtin_description bdesc_args[] =
21133 {
21134 /* MMX */
21135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21139 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21141
21142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21150
21151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21153
21154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21155 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21156 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21158
21159 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21161 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21162 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21163 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21165
21166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21168 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21170 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21172
21173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21175 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21176
21177 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21178
21179 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21180 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21181 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21182 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21183 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21184 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21185
21186 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21187 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21188 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21190 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21191 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21192
21193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21197
21198 /* 3DNow! */
21199 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21200 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21201 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21202 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21203
21204 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21205 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21206 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21207 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21208 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21209 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21210 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21211 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21212 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21213 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21214 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21215 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21216 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21217 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21218 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21219
21220 /* 3DNow!A */
21221 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21222 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21223 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21224 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21225 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21226 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21227
21228 /* SSE */
21229 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21230 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21231 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21232 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21233 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21237 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21240 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21241
21242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21243
21244 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21245 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21246 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21250 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21251 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21252
21253 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21254 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21258 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21259 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21260 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21261 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21262 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21263 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21264 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21265 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21266 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21267 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21268 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21269 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21270 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21271 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21272 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21275
21276 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21277 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21280
21281 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21283 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21284 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21285
21286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21289 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21290 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21291
21292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21293 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21294 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21295
21296 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21297
21298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21301
21302 /* SSE MMX or 3Dnow!A */
21303 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21304 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21305 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21306
21307 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21308 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21309 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21310 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21311
21312 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21313 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21314
21315 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21316
21317 /* SSE2 */
21318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21319
21320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21325
21326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21331
21332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21333
21334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21336 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21337 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21338
21339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21342
21343 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21344 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21345 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21346 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21351
21352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21360 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21363 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21364 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21365 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21366 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21367 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21370 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21372
21373 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21374 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21375 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21377
21378 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21380 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21381 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21382
21383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21386
21387 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21388
21389 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21390 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21391 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21392 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21393 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21394 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21395 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21396 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21397
21398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21406
21407 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21408 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21409
21410 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21412 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21413 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21414
21415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21417
21418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21424
21425 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21426 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21427 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21429
21430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21438
21439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21442
21443 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21445
21446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21448
21449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21450
21451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21452 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21455
21456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21457 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21458 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21459 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21460 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21461 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21462 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21463
21464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21465 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21466 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21467 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21468 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21469 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21470 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21471
21472 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21473 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21474 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21475 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21476
21477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21480
21481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21482
21483 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21484 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21485
21486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21487
21488 /* SSE2 MMX */
21489 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21490 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21491
21492 /* SSE3 */
21493 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21494 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21495
21496 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21497 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21498 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21499 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21500 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21501 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21502
21503 /* SSSE3 */
21504 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21505 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21506 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21507 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21508 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21509 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21510
21511 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21512 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21513 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21514 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21515 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21516 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21517 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21518 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21519 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21520 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21521 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21522 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21523 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21524 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21525 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21526 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21527 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21528 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21529 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21530 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21531 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21532 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21533 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21534 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21535
21536 /* SSSE3. */
21537 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21538 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21539
21540 /* SSE4.1 */
21541 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21542 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21543 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21544 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21545 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21546 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21547 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21548 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21549 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21550 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21551
21552 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21553 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21554 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21555 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21556 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21557 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21558 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21559 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21560 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21561 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21562 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21563 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21564 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21565
21566 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21567 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21568 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21569 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21570 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21571 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21572 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21573 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21574 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21575 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21576 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21577 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21578
21579 /* SSE4.1 and SSE5 */
21580 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21581 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21582 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21583 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21584
21585 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21586 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21587 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21588
21589 /* SSE4.2 */
21590 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21591 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21592 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21593 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21594 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21595
21596 /* SSE4A */
21597 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21598 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21599 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21600 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21601
21602 /* AES */
21603 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21604 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21605
21606 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21607 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21608 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21609 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21610
21611 /* PCLMUL */
21612 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21613
21614 /* AVX */
21615 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21616 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21617 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21618 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21619 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21620 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21623 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21629 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21630 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21631 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21632 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21633 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21634 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21635 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21636 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21637 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21638 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21639 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21640 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21641
21642 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21643 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21644 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
21645 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
21646
21647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21649 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
21650 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
21651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21656 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
21661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
21662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
21663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
21664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
21665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
21666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21667 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
21668 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21672 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21673 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
21674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21675 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21678 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
21679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
21680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
21681
21682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21685
21686 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21688 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21690 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21691
21692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21693
21694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21696
21697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21701
21702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
21703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
21704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
21705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
21706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
21707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
21708
21709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21724
21725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
21726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
21727 };
21728
21729 /* SSE5 */
21730 enum multi_arg_type {
21731 MULTI_ARG_UNKNOWN,
21732 MULTI_ARG_3_SF,
21733 MULTI_ARG_3_DF,
21734 MULTI_ARG_3_DI,
21735 MULTI_ARG_3_SI,
21736 MULTI_ARG_3_SI_DI,
21737 MULTI_ARG_3_HI,
21738 MULTI_ARG_3_HI_SI,
21739 MULTI_ARG_3_QI,
21740 MULTI_ARG_3_PERMPS,
21741 MULTI_ARG_3_PERMPD,
21742 MULTI_ARG_2_SF,
21743 MULTI_ARG_2_DF,
21744 MULTI_ARG_2_DI,
21745 MULTI_ARG_2_SI,
21746 MULTI_ARG_2_HI,
21747 MULTI_ARG_2_QI,
21748 MULTI_ARG_2_DI_IMM,
21749 MULTI_ARG_2_SI_IMM,
21750 MULTI_ARG_2_HI_IMM,
21751 MULTI_ARG_2_QI_IMM,
21752 MULTI_ARG_2_SF_CMP,
21753 MULTI_ARG_2_DF_CMP,
21754 MULTI_ARG_2_DI_CMP,
21755 MULTI_ARG_2_SI_CMP,
21756 MULTI_ARG_2_HI_CMP,
21757 MULTI_ARG_2_QI_CMP,
21758 MULTI_ARG_2_DI_TF,
21759 MULTI_ARG_2_SI_TF,
21760 MULTI_ARG_2_HI_TF,
21761 MULTI_ARG_2_QI_TF,
21762 MULTI_ARG_2_SF_TF,
21763 MULTI_ARG_2_DF_TF,
21764 MULTI_ARG_1_SF,
21765 MULTI_ARG_1_DF,
21766 MULTI_ARG_1_DI,
21767 MULTI_ARG_1_SI,
21768 MULTI_ARG_1_HI,
21769 MULTI_ARG_1_QI,
21770 MULTI_ARG_1_SI_DI,
21771 MULTI_ARG_1_HI_DI,
21772 MULTI_ARG_1_HI_SI,
21773 MULTI_ARG_1_QI_DI,
21774 MULTI_ARG_1_QI_SI,
21775 MULTI_ARG_1_QI_HI,
21776 MULTI_ARG_1_PH2PS,
21777 MULTI_ARG_1_PS2PH
21778 };
21779
21780 static const struct builtin_description bdesc_multi_arg[] =
21781 {
21782 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
21783 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
21784 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
21785 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
21786 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
21787 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
21788 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
21789 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
21790 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
21791 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
21792 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
21793 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
21794 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
21795 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
21796 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
21797 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
21798 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
21799 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
21800 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
21801 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
21802 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
21803 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
21804 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
21805 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
21806 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
21807 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
21808 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
21809 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
21810 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
21811 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
21812 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
21813 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
21814 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
21815 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
21816 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
21817 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
21818 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
21819 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
21820 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
21821 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
21822 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
21823 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
21824 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
21825 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
21826 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
21827 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
21828 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
21829 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
21830 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
21831 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
21832 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
21833 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
21834 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
21835 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
21836 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
21837 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
21838 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
21839 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
21840 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
21841 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
21842 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
21843 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
21844 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
21845 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
21846 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
21847 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
21848 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
21849 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
21850 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
21851 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
21852 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
21853 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
21854 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
21855 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
21856 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
21857
21858 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
21859 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
21860 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
21861 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
21862 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
21863 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
21864 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
21865 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21866 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21867 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21868 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21869 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21870 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21871 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21872 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21873 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21874
21875 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
21876 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21877 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21878 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
21879 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
21880 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
21881 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
21882 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21883 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21884 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21885 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21886 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21887 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21888 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21889 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21890 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21891
21892 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
21893 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21894 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21895 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
21896 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
21897 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
21898 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
21899 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21900 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21901 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21902 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21903 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21904 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21905 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21906 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21907 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21908
21909 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
21910 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21911 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21912 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
21913 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
21914 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
21915 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
21916 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21917 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21918 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21919 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21920 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21921 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21922 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21923 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21924 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21925
21926 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
21927 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21928 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21929 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
21930 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
21931 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
21932 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
21933
21934 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
21935 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21936 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21937 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
21938 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
21939 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
21940 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
21941
21942 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
21943 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21944 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21945 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
21946 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
21947 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
21948 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
21949
21950 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21951 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21952 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21953 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
21954 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
21955 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
21956 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
21957
21958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
21959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
21962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
21963 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
21964 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
21965
21966 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
21967 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
21970 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
21971 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
21972 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
21973
21974 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
21975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
21978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
21979 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
21980 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
21981
21982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
21986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
21987 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
21988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
21989
21990 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
21991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
21992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
21993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
21994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
21995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
21996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
21997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
21998
21999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22007
22008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22016 };
22017
22018 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22019 in the current target ISA to allow the user to compile particular modules
22020 with different target specific options that differ from the command line
22021 options. */
22022 static void
22023 ix86_init_mmx_sse_builtins (void)
22024 {
22025 const struct builtin_description * d;
22026 size_t i;
22027
22028 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22029 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22030 tree V1DI_type_node
22031 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22032 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22033 tree V2DI_type_node
22034 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22035 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22036 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22037 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22038 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22039 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22040 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22041
22042 tree pchar_type_node = build_pointer_type (char_type_node);
22043 tree pcchar_type_node
22044 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22045 tree pfloat_type_node = build_pointer_type (float_type_node);
22046 tree pcfloat_type_node
22047 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22048 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22049 tree pcv2sf_type_node
22050 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22051 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22052 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22053
22054 /* Comparisons. */
22055 tree int_ftype_v4sf_v4sf
22056 = build_function_type_list (integer_type_node,
22057 V4SF_type_node, V4SF_type_node, NULL_TREE);
22058 tree v4si_ftype_v4sf_v4sf
22059 = build_function_type_list (V4SI_type_node,
22060 V4SF_type_node, V4SF_type_node, NULL_TREE);
22061 /* MMX/SSE/integer conversions. */
22062 tree int_ftype_v4sf
22063 = build_function_type_list (integer_type_node,
22064 V4SF_type_node, NULL_TREE);
22065 tree int64_ftype_v4sf
22066 = build_function_type_list (long_long_integer_type_node,
22067 V4SF_type_node, NULL_TREE);
22068 tree int_ftype_v8qi
22069 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22070 tree v4sf_ftype_v4sf_int
22071 = build_function_type_list (V4SF_type_node,
22072 V4SF_type_node, integer_type_node, NULL_TREE);
22073 tree v4sf_ftype_v4sf_int64
22074 = build_function_type_list (V4SF_type_node,
22075 V4SF_type_node, long_long_integer_type_node,
22076 NULL_TREE);
22077 tree v4sf_ftype_v4sf_v2si
22078 = build_function_type_list (V4SF_type_node,
22079 V4SF_type_node, V2SI_type_node, NULL_TREE);
22080
22081 /* Miscellaneous. */
22082 tree v8qi_ftype_v4hi_v4hi
22083 = build_function_type_list (V8QI_type_node,
22084 V4HI_type_node, V4HI_type_node, NULL_TREE);
22085 tree v4hi_ftype_v2si_v2si
22086 = build_function_type_list (V4HI_type_node,
22087 V2SI_type_node, V2SI_type_node, NULL_TREE);
22088 tree v4sf_ftype_v4sf_v4sf_int
22089 = build_function_type_list (V4SF_type_node,
22090 V4SF_type_node, V4SF_type_node,
22091 integer_type_node, NULL_TREE);
22092 tree v2si_ftype_v4hi_v4hi
22093 = build_function_type_list (V2SI_type_node,
22094 V4HI_type_node, V4HI_type_node, NULL_TREE);
22095 tree v4hi_ftype_v4hi_int
22096 = build_function_type_list (V4HI_type_node,
22097 V4HI_type_node, integer_type_node, NULL_TREE);
22098 tree v2si_ftype_v2si_int
22099 = build_function_type_list (V2SI_type_node,
22100 V2SI_type_node, integer_type_node, NULL_TREE);
22101 tree v1di_ftype_v1di_int
22102 = build_function_type_list (V1DI_type_node,
22103 V1DI_type_node, integer_type_node, NULL_TREE);
22104
22105 tree void_ftype_void
22106 = build_function_type (void_type_node, void_list_node);
22107 tree void_ftype_unsigned
22108 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22109 tree void_ftype_unsigned_unsigned
22110 = build_function_type_list (void_type_node, unsigned_type_node,
22111 unsigned_type_node, NULL_TREE);
22112 tree void_ftype_pcvoid_unsigned_unsigned
22113 = build_function_type_list (void_type_node, const_ptr_type_node,
22114 unsigned_type_node, unsigned_type_node,
22115 NULL_TREE);
22116 tree unsigned_ftype_void
22117 = build_function_type (unsigned_type_node, void_list_node);
22118 tree v2si_ftype_v4sf
22119 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22120 /* Loads/stores. */
22121 tree void_ftype_v8qi_v8qi_pchar
22122 = build_function_type_list (void_type_node,
22123 V8QI_type_node, V8QI_type_node,
22124 pchar_type_node, NULL_TREE);
22125 tree v4sf_ftype_pcfloat
22126 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22127 tree v4sf_ftype_v4sf_pcv2sf
22128 = build_function_type_list (V4SF_type_node,
22129 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22130 tree void_ftype_pv2sf_v4sf
22131 = build_function_type_list (void_type_node,
22132 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22133 tree void_ftype_pfloat_v4sf
22134 = build_function_type_list (void_type_node,
22135 pfloat_type_node, V4SF_type_node, NULL_TREE);
22136 tree void_ftype_pdi_di
22137 = build_function_type_list (void_type_node,
22138 pdi_type_node, long_long_unsigned_type_node,
22139 NULL_TREE);
22140 tree void_ftype_pv2di_v2di
22141 = build_function_type_list (void_type_node,
22142 pv2di_type_node, V2DI_type_node, NULL_TREE);
22143 /* Normal vector unops. */
22144 tree v4sf_ftype_v4sf
22145 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22146 tree v16qi_ftype_v16qi
22147 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22148 tree v8hi_ftype_v8hi
22149 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22150 tree v4si_ftype_v4si
22151 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22152 tree v8qi_ftype_v8qi
22153 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22154 tree v4hi_ftype_v4hi
22155 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22156
22157 /* Normal vector binops. */
22158 tree v4sf_ftype_v4sf_v4sf
22159 = build_function_type_list (V4SF_type_node,
22160 V4SF_type_node, V4SF_type_node, NULL_TREE);
22161 tree v8qi_ftype_v8qi_v8qi
22162 = build_function_type_list (V8QI_type_node,
22163 V8QI_type_node, V8QI_type_node, NULL_TREE);
22164 tree v4hi_ftype_v4hi_v4hi
22165 = build_function_type_list (V4HI_type_node,
22166 V4HI_type_node, V4HI_type_node, NULL_TREE);
22167 tree v2si_ftype_v2si_v2si
22168 = build_function_type_list (V2SI_type_node,
22169 V2SI_type_node, V2SI_type_node, NULL_TREE);
22170 tree v1di_ftype_v1di_v1di
22171 = build_function_type_list (V1DI_type_node,
22172 V1DI_type_node, V1DI_type_node, NULL_TREE);
22173 tree v1di_ftype_v1di_v1di_int
22174 = build_function_type_list (V1DI_type_node,
22175 V1DI_type_node, V1DI_type_node,
22176 integer_type_node, NULL_TREE);
22177 tree v2si_ftype_v2sf
22178 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22179 tree v2sf_ftype_v2si
22180 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22181 tree v2si_ftype_v2si
22182 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22183 tree v2sf_ftype_v2sf
22184 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22185 tree v2sf_ftype_v2sf_v2sf
22186 = build_function_type_list (V2SF_type_node,
22187 V2SF_type_node, V2SF_type_node, NULL_TREE);
22188 tree v2si_ftype_v2sf_v2sf
22189 = build_function_type_list (V2SI_type_node,
22190 V2SF_type_node, V2SF_type_node, NULL_TREE);
22191 tree pint_type_node = build_pointer_type (integer_type_node);
22192 tree pdouble_type_node = build_pointer_type (double_type_node);
22193 tree pcdouble_type_node = build_pointer_type (
22194 build_type_variant (double_type_node, 1, 0));
22195 tree int_ftype_v2df_v2df
22196 = build_function_type_list (integer_type_node,
22197 V2DF_type_node, V2DF_type_node, NULL_TREE);
22198
22199 tree void_ftype_pcvoid
22200 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22201 tree v4sf_ftype_v4si
22202 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22203 tree v4si_ftype_v4sf
22204 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22205 tree v2df_ftype_v4si
22206 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22207 tree v4si_ftype_v2df
22208 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22209 tree v4si_ftype_v2df_v2df
22210 = build_function_type_list (V4SI_type_node,
22211 V2DF_type_node, V2DF_type_node, NULL_TREE);
22212 tree v2si_ftype_v2df
22213 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22214 tree v4sf_ftype_v2df
22215 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22216 tree v2df_ftype_v2si
22217 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22218 tree v2df_ftype_v4sf
22219 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22220 tree int_ftype_v2df
22221 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22222 tree int64_ftype_v2df
22223 = build_function_type_list (long_long_integer_type_node,
22224 V2DF_type_node, NULL_TREE);
22225 tree v2df_ftype_v2df_int
22226 = build_function_type_list (V2DF_type_node,
22227 V2DF_type_node, integer_type_node, NULL_TREE);
22228 tree v2df_ftype_v2df_int64
22229 = build_function_type_list (V2DF_type_node,
22230 V2DF_type_node, long_long_integer_type_node,
22231 NULL_TREE);
22232 tree v4sf_ftype_v4sf_v2df
22233 = build_function_type_list (V4SF_type_node,
22234 V4SF_type_node, V2DF_type_node, NULL_TREE);
22235 tree v2df_ftype_v2df_v4sf
22236 = build_function_type_list (V2DF_type_node,
22237 V2DF_type_node, V4SF_type_node, NULL_TREE);
22238 tree v2df_ftype_v2df_v2df_int
22239 = build_function_type_list (V2DF_type_node,
22240 V2DF_type_node, V2DF_type_node,
22241 integer_type_node,
22242 NULL_TREE);
22243 tree v2df_ftype_v2df_pcdouble
22244 = build_function_type_list (V2DF_type_node,
22245 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22246 tree void_ftype_pdouble_v2df
22247 = build_function_type_list (void_type_node,
22248 pdouble_type_node, V2DF_type_node, NULL_TREE);
22249 tree void_ftype_pint_int
22250 = build_function_type_list (void_type_node,
22251 pint_type_node, integer_type_node, NULL_TREE);
22252 tree void_ftype_v16qi_v16qi_pchar
22253 = build_function_type_list (void_type_node,
22254 V16QI_type_node, V16QI_type_node,
22255 pchar_type_node, NULL_TREE);
22256 tree v2df_ftype_pcdouble
22257 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22258 tree v2df_ftype_v2df_v2df
22259 = build_function_type_list (V2DF_type_node,
22260 V2DF_type_node, V2DF_type_node, NULL_TREE);
22261 tree v16qi_ftype_v16qi_v16qi
22262 = build_function_type_list (V16QI_type_node,
22263 V16QI_type_node, V16QI_type_node, NULL_TREE);
22264 tree v8hi_ftype_v8hi_v8hi
22265 = build_function_type_list (V8HI_type_node,
22266 V8HI_type_node, V8HI_type_node, NULL_TREE);
22267 tree v4si_ftype_v4si_v4si
22268 = build_function_type_list (V4SI_type_node,
22269 V4SI_type_node, V4SI_type_node, NULL_TREE);
22270 tree v2di_ftype_v2di_v2di
22271 = build_function_type_list (V2DI_type_node,
22272 V2DI_type_node, V2DI_type_node, NULL_TREE);
22273 tree v2di_ftype_v2df_v2df
22274 = build_function_type_list (V2DI_type_node,
22275 V2DF_type_node, V2DF_type_node, NULL_TREE);
22276 tree v2df_ftype_v2df
22277 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22278 tree v2di_ftype_v2di_int
22279 = build_function_type_list (V2DI_type_node,
22280 V2DI_type_node, integer_type_node, NULL_TREE);
22281 tree v2di_ftype_v2di_v2di_int
22282 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22283 V2DI_type_node, integer_type_node, NULL_TREE);
22284 tree v4si_ftype_v4si_int
22285 = build_function_type_list (V4SI_type_node,
22286 V4SI_type_node, integer_type_node, NULL_TREE);
22287 tree v8hi_ftype_v8hi_int
22288 = build_function_type_list (V8HI_type_node,
22289 V8HI_type_node, integer_type_node, NULL_TREE);
22290 tree v4si_ftype_v8hi_v8hi
22291 = build_function_type_list (V4SI_type_node,
22292 V8HI_type_node, V8HI_type_node, NULL_TREE);
22293 tree v1di_ftype_v8qi_v8qi
22294 = build_function_type_list (V1DI_type_node,
22295 V8QI_type_node, V8QI_type_node, NULL_TREE);
22296 tree v1di_ftype_v2si_v2si
22297 = build_function_type_list (V1DI_type_node,
22298 V2SI_type_node, V2SI_type_node, NULL_TREE);
22299 tree v2di_ftype_v16qi_v16qi
22300 = build_function_type_list (V2DI_type_node,
22301 V16QI_type_node, V16QI_type_node, NULL_TREE);
22302 tree v2di_ftype_v4si_v4si
22303 = build_function_type_list (V2DI_type_node,
22304 V4SI_type_node, V4SI_type_node, NULL_TREE);
22305 tree int_ftype_v16qi
22306 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22307 tree v16qi_ftype_pcchar
22308 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22309 tree void_ftype_pchar_v16qi
22310 = build_function_type_list (void_type_node,
22311 pchar_type_node, V16QI_type_node, NULL_TREE);
22312
22313 tree v2di_ftype_v2di_unsigned_unsigned
22314 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22315 unsigned_type_node, unsigned_type_node,
22316 NULL_TREE);
22317 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22318 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22319 unsigned_type_node, unsigned_type_node,
22320 NULL_TREE);
22321 tree v2di_ftype_v2di_v16qi
22322 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22323 NULL_TREE);
22324 tree v2df_ftype_v2df_v2df_v2df
22325 = build_function_type_list (V2DF_type_node,
22326 V2DF_type_node, V2DF_type_node,
22327 V2DF_type_node, NULL_TREE);
22328 tree v4sf_ftype_v4sf_v4sf_v4sf
22329 = build_function_type_list (V4SF_type_node,
22330 V4SF_type_node, V4SF_type_node,
22331 V4SF_type_node, NULL_TREE);
22332 tree v8hi_ftype_v16qi
22333 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22334 NULL_TREE);
22335 tree v4si_ftype_v16qi
22336 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22337 NULL_TREE);
22338 tree v2di_ftype_v16qi
22339 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22340 NULL_TREE);
22341 tree v4si_ftype_v8hi
22342 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22343 NULL_TREE);
22344 tree v2di_ftype_v8hi
22345 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22346 NULL_TREE);
22347 tree v2di_ftype_v4si
22348 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22349 NULL_TREE);
22350 tree v2di_ftype_pv2di
22351 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22352 NULL_TREE);
22353 tree v16qi_ftype_v16qi_v16qi_int
22354 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22355 V16QI_type_node, integer_type_node,
22356 NULL_TREE);
22357 tree v16qi_ftype_v16qi_v16qi_v16qi
22358 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22359 V16QI_type_node, V16QI_type_node,
22360 NULL_TREE);
22361 tree v8hi_ftype_v8hi_v8hi_int
22362 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22363 V8HI_type_node, integer_type_node,
22364 NULL_TREE);
22365 tree v4si_ftype_v4si_v4si_int
22366 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22367 V4SI_type_node, integer_type_node,
22368 NULL_TREE);
22369 tree int_ftype_v2di_v2di
22370 = build_function_type_list (integer_type_node,
22371 V2DI_type_node, V2DI_type_node,
22372 NULL_TREE);
22373 tree int_ftype_v16qi_int_v16qi_int_int
22374 = build_function_type_list (integer_type_node,
22375 V16QI_type_node,
22376 integer_type_node,
22377 V16QI_type_node,
22378 integer_type_node,
22379 integer_type_node,
22380 NULL_TREE);
22381 tree v16qi_ftype_v16qi_int_v16qi_int_int
22382 = build_function_type_list (V16QI_type_node,
22383 V16QI_type_node,
22384 integer_type_node,
22385 V16QI_type_node,
22386 integer_type_node,
22387 integer_type_node,
22388 NULL_TREE);
22389 tree int_ftype_v16qi_v16qi_int
22390 = build_function_type_list (integer_type_node,
22391 V16QI_type_node,
22392 V16QI_type_node,
22393 integer_type_node,
22394 NULL_TREE);
22395
22396 /* SSE5 instructions */
22397 tree v2di_ftype_v2di_v2di_v2di
22398 = build_function_type_list (V2DI_type_node,
22399 V2DI_type_node,
22400 V2DI_type_node,
22401 V2DI_type_node,
22402 NULL_TREE);
22403
22404 tree v4si_ftype_v4si_v4si_v4si
22405 = build_function_type_list (V4SI_type_node,
22406 V4SI_type_node,
22407 V4SI_type_node,
22408 V4SI_type_node,
22409 NULL_TREE);
22410
22411 tree v4si_ftype_v4si_v4si_v2di
22412 = build_function_type_list (V4SI_type_node,
22413 V4SI_type_node,
22414 V4SI_type_node,
22415 V2DI_type_node,
22416 NULL_TREE);
22417
22418 tree v8hi_ftype_v8hi_v8hi_v8hi
22419 = build_function_type_list (V8HI_type_node,
22420 V8HI_type_node,
22421 V8HI_type_node,
22422 V8HI_type_node,
22423 NULL_TREE);
22424
22425 tree v8hi_ftype_v8hi_v8hi_v4si
22426 = build_function_type_list (V8HI_type_node,
22427 V8HI_type_node,
22428 V8HI_type_node,
22429 V4SI_type_node,
22430 NULL_TREE);
22431
22432 tree v2df_ftype_v2df_v2df_v16qi
22433 = build_function_type_list (V2DF_type_node,
22434 V2DF_type_node,
22435 V2DF_type_node,
22436 V16QI_type_node,
22437 NULL_TREE);
22438
22439 tree v4sf_ftype_v4sf_v4sf_v16qi
22440 = build_function_type_list (V4SF_type_node,
22441 V4SF_type_node,
22442 V4SF_type_node,
22443 V16QI_type_node,
22444 NULL_TREE);
22445
22446 tree v2di_ftype_v2di_si
22447 = build_function_type_list (V2DI_type_node,
22448 V2DI_type_node,
22449 integer_type_node,
22450 NULL_TREE);
22451
22452 tree v4si_ftype_v4si_si
22453 = build_function_type_list (V4SI_type_node,
22454 V4SI_type_node,
22455 integer_type_node,
22456 NULL_TREE);
22457
22458 tree v8hi_ftype_v8hi_si
22459 = build_function_type_list (V8HI_type_node,
22460 V8HI_type_node,
22461 integer_type_node,
22462 NULL_TREE);
22463
22464 tree v16qi_ftype_v16qi_si
22465 = build_function_type_list (V16QI_type_node,
22466 V16QI_type_node,
22467 integer_type_node,
22468 NULL_TREE);
22469 tree v4sf_ftype_v4hi
22470 = build_function_type_list (V4SF_type_node,
22471 V4HI_type_node,
22472 NULL_TREE);
22473
22474 tree v4hi_ftype_v4sf
22475 = build_function_type_list (V4HI_type_node,
22476 V4SF_type_node,
22477 NULL_TREE);
22478
22479 tree v2di_ftype_v2di
22480 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22481
22482 tree v16qi_ftype_v8hi_v8hi
22483 = build_function_type_list (V16QI_type_node,
22484 V8HI_type_node, V8HI_type_node,
22485 NULL_TREE);
22486 tree v8hi_ftype_v4si_v4si
22487 = build_function_type_list (V8HI_type_node,
22488 V4SI_type_node, V4SI_type_node,
22489 NULL_TREE);
22490 tree v8hi_ftype_v16qi_v16qi
22491 = build_function_type_list (V8HI_type_node,
22492 V16QI_type_node, V16QI_type_node,
22493 NULL_TREE);
22494 tree v4hi_ftype_v8qi_v8qi
22495 = build_function_type_list (V4HI_type_node,
22496 V8QI_type_node, V8QI_type_node,
22497 NULL_TREE);
22498 tree unsigned_ftype_unsigned_uchar
22499 = build_function_type_list (unsigned_type_node,
22500 unsigned_type_node,
22501 unsigned_char_type_node,
22502 NULL_TREE);
22503 tree unsigned_ftype_unsigned_ushort
22504 = build_function_type_list (unsigned_type_node,
22505 unsigned_type_node,
22506 short_unsigned_type_node,
22507 NULL_TREE);
22508 tree unsigned_ftype_unsigned_unsigned
22509 = build_function_type_list (unsigned_type_node,
22510 unsigned_type_node,
22511 unsigned_type_node,
22512 NULL_TREE);
22513 tree uint64_ftype_uint64_uint64
22514 = build_function_type_list (long_long_unsigned_type_node,
22515 long_long_unsigned_type_node,
22516 long_long_unsigned_type_node,
22517 NULL_TREE);
22518 tree float_ftype_float
22519 = build_function_type_list (float_type_node,
22520 float_type_node,
22521 NULL_TREE);
22522
22523 /* AVX builtins */
22524 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22525 V32QImode);
22526 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22527 V8SImode);
22528 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22529 V8SFmode);
22530 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22531 V4DImode);
22532 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22533 V4DFmode);
22534 tree v8sf_ftype_v8sf
22535 = build_function_type_list (V8SF_type_node,
22536 V8SF_type_node,
22537 NULL_TREE);
22538 tree v8si_ftype_v8sf
22539 = build_function_type_list (V8SI_type_node,
22540 V8SF_type_node,
22541 NULL_TREE);
22542 tree v8sf_ftype_v8si
22543 = build_function_type_list (V8SF_type_node,
22544 V8SI_type_node,
22545 NULL_TREE);
22546 tree v4si_ftype_v4df
22547 = build_function_type_list (V4SI_type_node,
22548 V4DF_type_node,
22549 NULL_TREE);
22550 tree v4df_ftype_v4df
22551 = build_function_type_list (V4DF_type_node,
22552 V4DF_type_node,
22553 NULL_TREE);
22554 tree v4df_ftype_v4si
22555 = build_function_type_list (V4DF_type_node,
22556 V4SI_type_node,
22557 NULL_TREE);
22558 tree v4df_ftype_v4sf
22559 = build_function_type_list (V4DF_type_node,
22560 V4SF_type_node,
22561 NULL_TREE);
22562 tree v4sf_ftype_v4df
22563 = build_function_type_list (V4SF_type_node,
22564 V4DF_type_node,
22565 NULL_TREE);
22566 tree v8sf_ftype_v8sf_v8sf
22567 = build_function_type_list (V8SF_type_node,
22568 V8SF_type_node, V8SF_type_node,
22569 NULL_TREE);
22570 tree v4df_ftype_v4df_v4df
22571 = build_function_type_list (V4DF_type_node,
22572 V4DF_type_node, V4DF_type_node,
22573 NULL_TREE);
22574 tree v8sf_ftype_v8sf_int
22575 = build_function_type_list (V8SF_type_node,
22576 V8SF_type_node, integer_type_node,
22577 NULL_TREE);
22578 tree v4si_ftype_v8si_int
22579 = build_function_type_list (V4SI_type_node,
22580 V8SI_type_node, integer_type_node,
22581 NULL_TREE);
22582 tree v4df_ftype_v4df_int
22583 = build_function_type_list (V4DF_type_node,
22584 V4DF_type_node, integer_type_node,
22585 NULL_TREE);
22586 tree v4sf_ftype_v8sf_int
22587 = build_function_type_list (V4SF_type_node,
22588 V8SF_type_node, integer_type_node,
22589 NULL_TREE);
22590 tree v2df_ftype_v4df_int
22591 = build_function_type_list (V2DF_type_node,
22592 V4DF_type_node, integer_type_node,
22593 NULL_TREE);
22594 tree v8sf_ftype_v8sf_v8sf_int
22595 = build_function_type_list (V8SF_type_node,
22596 V8SF_type_node, V8SF_type_node,
22597 integer_type_node,
22598 NULL_TREE);
22599 tree v8sf_ftype_v8sf_v8sf_v8sf
22600 = build_function_type_list (V8SF_type_node,
22601 V8SF_type_node, V8SF_type_node,
22602 V8SF_type_node,
22603 NULL_TREE);
22604 tree v4df_ftype_v4df_v4df_v4df
22605 = build_function_type_list (V4DF_type_node,
22606 V4DF_type_node, V4DF_type_node,
22607 V4DF_type_node,
22608 NULL_TREE);
22609 tree v8si_ftype_v8si_v8si_int
22610 = build_function_type_list (V8SI_type_node,
22611 V8SI_type_node, V8SI_type_node,
22612 integer_type_node,
22613 NULL_TREE);
22614 tree v4df_ftype_v4df_v4df_int
22615 = build_function_type_list (V4DF_type_node,
22616 V4DF_type_node, V4DF_type_node,
22617 integer_type_node,
22618 NULL_TREE);
22619 tree v8sf_ftype_pcfloat
22620 = build_function_type_list (V8SF_type_node,
22621 pcfloat_type_node,
22622 NULL_TREE);
22623 tree v4df_ftype_pcdouble
22624 = build_function_type_list (V4DF_type_node,
22625 pcdouble_type_node,
22626 NULL_TREE);
22627 tree pcv4sf_type_node
22628 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22629 tree pcv2df_type_node
22630 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22631 tree v8sf_ftype_pcv4sf
22632 = build_function_type_list (V8SF_type_node,
22633 pcv4sf_type_node,
22634 NULL_TREE);
22635 tree v4df_ftype_pcv2df
22636 = build_function_type_list (V4DF_type_node,
22637 pcv2df_type_node,
22638 NULL_TREE);
22639 tree v32qi_ftype_pcchar
22640 = build_function_type_list (V32QI_type_node,
22641 pcchar_type_node,
22642 NULL_TREE);
22643 tree void_ftype_pchar_v32qi
22644 = build_function_type_list (void_type_node,
22645 pchar_type_node, V32QI_type_node,
22646 NULL_TREE);
22647 tree v8si_ftype_v8si_v4si_int
22648 = build_function_type_list (V8SI_type_node,
22649 V8SI_type_node, V4SI_type_node,
22650 integer_type_node,
22651 NULL_TREE);
22652 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
22653 tree void_ftype_pv4di_v4di
22654 = build_function_type_list (void_type_node,
22655 pv4di_type_node, V4DI_type_node,
22656 NULL_TREE);
22657 tree v8sf_ftype_v8sf_v4sf_int
22658 = build_function_type_list (V8SF_type_node,
22659 V8SF_type_node, V4SF_type_node,
22660 integer_type_node,
22661 NULL_TREE);
22662 tree v4df_ftype_v4df_v2df_int
22663 = build_function_type_list (V4DF_type_node,
22664 V4DF_type_node, V2DF_type_node,
22665 integer_type_node,
22666 NULL_TREE);
22667 tree void_ftype_pfloat_v8sf
22668 = build_function_type_list (void_type_node,
22669 pfloat_type_node, V8SF_type_node,
22670 NULL_TREE);
22671 tree void_ftype_pdouble_v4df
22672 = build_function_type_list (void_type_node,
22673 pdouble_type_node, V4DF_type_node,
22674 NULL_TREE);
22675 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
22676 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
22677 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
22678 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
22679 tree pcv8sf_type_node
22680 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
22681 tree pcv4df_type_node
22682 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
22683 tree v8sf_ftype_pcv8sf_v8sf
22684 = build_function_type_list (V8SF_type_node,
22685 pcv8sf_type_node, V8SF_type_node,
22686 NULL_TREE);
22687 tree v4df_ftype_pcv4df_v4df
22688 = build_function_type_list (V4DF_type_node,
22689 pcv4df_type_node, V4DF_type_node,
22690 NULL_TREE);
22691 tree v4sf_ftype_pcv4sf_v4sf
22692 = build_function_type_list (V4SF_type_node,
22693 pcv4sf_type_node, V4SF_type_node,
22694 NULL_TREE);
22695 tree v2df_ftype_pcv2df_v2df
22696 = build_function_type_list (V2DF_type_node,
22697 pcv2df_type_node, V2DF_type_node,
22698 NULL_TREE);
22699 tree void_ftype_pv8sf_v8sf_v8sf
22700 = build_function_type_list (void_type_node,
22701 pv8sf_type_node, V8SF_type_node,
22702 V8SF_type_node,
22703 NULL_TREE);
22704 tree void_ftype_pv4df_v4df_v4df
22705 = build_function_type_list (void_type_node,
22706 pv4df_type_node, V4DF_type_node,
22707 V4DF_type_node,
22708 NULL_TREE);
22709 tree void_ftype_pv4sf_v4sf_v4sf
22710 = build_function_type_list (void_type_node,
22711 pv4sf_type_node, V4SF_type_node,
22712 V4SF_type_node,
22713 NULL_TREE);
22714 tree void_ftype_pv2df_v2df_v2df
22715 = build_function_type_list (void_type_node,
22716 pv2df_type_node, V2DF_type_node,
22717 V2DF_type_node,
22718 NULL_TREE);
22719 tree v4df_ftype_v2df
22720 = build_function_type_list (V4DF_type_node,
22721 V2DF_type_node,
22722 NULL_TREE);
22723 tree v8sf_ftype_v4sf
22724 = build_function_type_list (V8SF_type_node,
22725 V4SF_type_node,
22726 NULL_TREE);
22727 tree v8si_ftype_v4si
22728 = build_function_type_list (V8SI_type_node,
22729 V4SI_type_node,
22730 NULL_TREE);
22731 tree v2df_ftype_v4df
22732 = build_function_type_list (V2DF_type_node,
22733 V4DF_type_node,
22734 NULL_TREE);
22735 tree v4sf_ftype_v8sf
22736 = build_function_type_list (V4SF_type_node,
22737 V8SF_type_node,
22738 NULL_TREE);
22739 tree v4si_ftype_v8si
22740 = build_function_type_list (V4SI_type_node,
22741 V8SI_type_node,
22742 NULL_TREE);
22743 tree int_ftype_v4df
22744 = build_function_type_list (integer_type_node,
22745 V4DF_type_node,
22746 NULL_TREE);
22747 tree int_ftype_v8sf
22748 = build_function_type_list (integer_type_node,
22749 V8SF_type_node,
22750 NULL_TREE);
22751 tree int_ftype_v8sf_v8sf
22752 = build_function_type_list (integer_type_node,
22753 V8SF_type_node, V8SF_type_node,
22754 NULL_TREE);
22755 tree int_ftype_v4di_v4di
22756 = build_function_type_list (integer_type_node,
22757 V4DI_type_node, V4DI_type_node,
22758 NULL_TREE);
22759 tree int_ftype_v4df_v4df
22760 = build_function_type_list (integer_type_node,
22761 V4DF_type_node, V4DF_type_node,
22762 NULL_TREE);
22763 tree v8sf_ftype_v8sf_v8si
22764 = build_function_type_list (V8SF_type_node,
22765 V8SF_type_node, V8SI_type_node,
22766 NULL_TREE);
22767 tree v4df_ftype_v4df_v4di
22768 = build_function_type_list (V4DF_type_node,
22769 V4DF_type_node, V4DI_type_node,
22770 NULL_TREE);
22771 tree v4sf_ftype_v4sf_v4si
22772 = build_function_type_list (V4SF_type_node,
22773 V4SF_type_node, V4SI_type_node, NULL_TREE);
22774 tree v2df_ftype_v2df_v2di
22775 = build_function_type_list (V2DF_type_node,
22776 V2DF_type_node, V2DI_type_node, NULL_TREE);
22777
22778 tree ftype;
22779
22780 /* Add all special builtins with variable number of operands. */
22781 for (i = 0, d = bdesc_special_args;
22782 i < ARRAY_SIZE (bdesc_special_args);
22783 i++, d++)
22784 {
22785 tree type;
22786
22787 if (d->name == 0)
22788 continue;
22789
22790 switch ((enum ix86_special_builtin_type) d->flag)
22791 {
22792 case VOID_FTYPE_VOID:
22793 type = void_ftype_void;
22794 break;
22795 case V32QI_FTYPE_PCCHAR:
22796 type = v32qi_ftype_pcchar;
22797 break;
22798 case V16QI_FTYPE_PCCHAR:
22799 type = v16qi_ftype_pcchar;
22800 break;
22801 case V8SF_FTYPE_PCV4SF:
22802 type = v8sf_ftype_pcv4sf;
22803 break;
22804 case V8SF_FTYPE_PCFLOAT:
22805 type = v8sf_ftype_pcfloat;
22806 break;
22807 case V4DF_FTYPE_PCV2DF:
22808 type = v4df_ftype_pcv2df;
22809 break;
22810 case V4DF_FTYPE_PCDOUBLE:
22811 type = v4df_ftype_pcdouble;
22812 break;
22813 case V4SF_FTYPE_PCFLOAT:
22814 type = v4sf_ftype_pcfloat;
22815 break;
22816 case V2DI_FTYPE_PV2DI:
22817 type = v2di_ftype_pv2di;
22818 break;
22819 case V2DF_FTYPE_PCDOUBLE:
22820 type = v2df_ftype_pcdouble;
22821 break;
22822 case V8SF_FTYPE_PCV8SF_V8SF:
22823 type = v8sf_ftype_pcv8sf_v8sf;
22824 break;
22825 case V4DF_FTYPE_PCV4DF_V4DF:
22826 type = v4df_ftype_pcv4df_v4df;
22827 break;
22828 case V4SF_FTYPE_V4SF_PCV2SF:
22829 type = v4sf_ftype_v4sf_pcv2sf;
22830 break;
22831 case V4SF_FTYPE_PCV4SF_V4SF:
22832 type = v4sf_ftype_pcv4sf_v4sf;
22833 break;
22834 case V2DF_FTYPE_V2DF_PCDOUBLE:
22835 type = v2df_ftype_v2df_pcdouble;
22836 break;
22837 case V2DF_FTYPE_PCV2DF_V2DF:
22838 type = v2df_ftype_pcv2df_v2df;
22839 break;
22840 case VOID_FTYPE_PV2SF_V4SF:
22841 type = void_ftype_pv2sf_v4sf;
22842 break;
22843 case VOID_FTYPE_PV4DI_V4DI:
22844 type = void_ftype_pv4di_v4di;
22845 break;
22846 case VOID_FTYPE_PV2DI_V2DI:
22847 type = void_ftype_pv2di_v2di;
22848 break;
22849 case VOID_FTYPE_PCHAR_V32QI:
22850 type = void_ftype_pchar_v32qi;
22851 break;
22852 case VOID_FTYPE_PCHAR_V16QI:
22853 type = void_ftype_pchar_v16qi;
22854 break;
22855 case VOID_FTYPE_PFLOAT_V8SF:
22856 type = void_ftype_pfloat_v8sf;
22857 break;
22858 case VOID_FTYPE_PFLOAT_V4SF:
22859 type = void_ftype_pfloat_v4sf;
22860 break;
22861 case VOID_FTYPE_PDOUBLE_V4DF:
22862 type = void_ftype_pdouble_v4df;
22863 break;
22864 case VOID_FTYPE_PDOUBLE_V2DF:
22865 type = void_ftype_pdouble_v2df;
22866 break;
22867 case VOID_FTYPE_PDI_DI:
22868 type = void_ftype_pdi_di;
22869 break;
22870 case VOID_FTYPE_PINT_INT:
22871 type = void_ftype_pint_int;
22872 break;
22873 case VOID_FTYPE_PV8SF_V8SF_V8SF:
22874 type = void_ftype_pv8sf_v8sf_v8sf;
22875 break;
22876 case VOID_FTYPE_PV4DF_V4DF_V4DF:
22877 type = void_ftype_pv4df_v4df_v4df;
22878 break;
22879 case VOID_FTYPE_PV4SF_V4SF_V4SF:
22880 type = void_ftype_pv4sf_v4sf_v4sf;
22881 break;
22882 case VOID_FTYPE_PV2DF_V2DF_V2DF:
22883 type = void_ftype_pv2df_v2df_v2df;
22884 break;
22885 default:
22886 gcc_unreachable ();
22887 }
22888
22889 def_builtin (d->mask, d->name, type, d->code);
22890 }
22891
22892 /* Add all builtins with variable number of operands. */
22893 for (i = 0, d = bdesc_args;
22894 i < ARRAY_SIZE (bdesc_args);
22895 i++, d++)
22896 {
22897 tree type;
22898
22899 if (d->name == 0)
22900 continue;
22901
22902 switch ((enum ix86_builtin_type) d->flag)
22903 {
22904 case FLOAT_FTYPE_FLOAT:
22905 type = float_ftype_float;
22906 break;
22907 case INT_FTYPE_V8SF_V8SF_PTEST:
22908 type = int_ftype_v8sf_v8sf;
22909 break;
22910 case INT_FTYPE_V4DI_V4DI_PTEST:
22911 type = int_ftype_v4di_v4di;
22912 break;
22913 case INT_FTYPE_V4DF_V4DF_PTEST:
22914 type = int_ftype_v4df_v4df;
22915 break;
22916 case INT_FTYPE_V4SF_V4SF_PTEST:
22917 type = int_ftype_v4sf_v4sf;
22918 break;
22919 case INT_FTYPE_V2DI_V2DI_PTEST:
22920 type = int_ftype_v2di_v2di;
22921 break;
22922 case INT_FTYPE_V2DF_V2DF_PTEST:
22923 type = int_ftype_v2df_v2df;
22924 break;
22925 case INT64_FTYPE_V4SF:
22926 type = int64_ftype_v4sf;
22927 break;
22928 case INT64_FTYPE_V2DF:
22929 type = int64_ftype_v2df;
22930 break;
22931 case INT_FTYPE_V16QI:
22932 type = int_ftype_v16qi;
22933 break;
22934 case INT_FTYPE_V8QI:
22935 type = int_ftype_v8qi;
22936 break;
22937 case INT_FTYPE_V8SF:
22938 type = int_ftype_v8sf;
22939 break;
22940 case INT_FTYPE_V4DF:
22941 type = int_ftype_v4df;
22942 break;
22943 case INT_FTYPE_V4SF:
22944 type = int_ftype_v4sf;
22945 break;
22946 case INT_FTYPE_V2DF:
22947 type = int_ftype_v2df;
22948 break;
22949 case V16QI_FTYPE_V16QI:
22950 type = v16qi_ftype_v16qi;
22951 break;
22952 case V8SI_FTYPE_V8SF:
22953 type = v8si_ftype_v8sf;
22954 break;
22955 case V8SI_FTYPE_V4SI:
22956 type = v8si_ftype_v4si;
22957 break;
22958 case V8HI_FTYPE_V8HI:
22959 type = v8hi_ftype_v8hi;
22960 break;
22961 case V8HI_FTYPE_V16QI:
22962 type = v8hi_ftype_v16qi;
22963 break;
22964 case V8QI_FTYPE_V8QI:
22965 type = v8qi_ftype_v8qi;
22966 break;
22967 case V8SF_FTYPE_V8SF:
22968 type = v8sf_ftype_v8sf;
22969 break;
22970 case V8SF_FTYPE_V8SI:
22971 type = v8sf_ftype_v8si;
22972 break;
22973 case V8SF_FTYPE_V4SF:
22974 type = v8sf_ftype_v4sf;
22975 break;
22976 case V4SI_FTYPE_V4DF:
22977 type = v4si_ftype_v4df;
22978 break;
22979 case V4SI_FTYPE_V4SI:
22980 type = v4si_ftype_v4si;
22981 break;
22982 case V4SI_FTYPE_V16QI:
22983 type = v4si_ftype_v16qi;
22984 break;
22985 case V4SI_FTYPE_V8SI:
22986 type = v4si_ftype_v8si;
22987 break;
22988 case V4SI_FTYPE_V8HI:
22989 type = v4si_ftype_v8hi;
22990 break;
22991 case V4SI_FTYPE_V4SF:
22992 type = v4si_ftype_v4sf;
22993 break;
22994 case V4SI_FTYPE_V2DF:
22995 type = v4si_ftype_v2df;
22996 break;
22997 case V4HI_FTYPE_V4HI:
22998 type = v4hi_ftype_v4hi;
22999 break;
23000 case V4DF_FTYPE_V4DF:
23001 type = v4df_ftype_v4df;
23002 break;
23003 case V4DF_FTYPE_V4SI:
23004 type = v4df_ftype_v4si;
23005 break;
23006 case V4DF_FTYPE_V4SF:
23007 type = v4df_ftype_v4sf;
23008 break;
23009 case V4DF_FTYPE_V2DF:
23010 type = v4df_ftype_v2df;
23011 break;
23012 case V4SF_FTYPE_V4SF:
23013 case V4SF_FTYPE_V4SF_VEC_MERGE:
23014 type = v4sf_ftype_v4sf;
23015 break;
23016 case V4SF_FTYPE_V8SF:
23017 type = v4sf_ftype_v8sf;
23018 break;
23019 case V4SF_FTYPE_V4SI:
23020 type = v4sf_ftype_v4si;
23021 break;
23022 case V4SF_FTYPE_V4DF:
23023 type = v4sf_ftype_v4df;
23024 break;
23025 case V4SF_FTYPE_V2DF:
23026 type = v4sf_ftype_v2df;
23027 break;
23028 case V2DI_FTYPE_V2DI:
23029 type = v2di_ftype_v2di;
23030 break;
23031 case V2DI_FTYPE_V16QI:
23032 type = v2di_ftype_v16qi;
23033 break;
23034 case V2DI_FTYPE_V8HI:
23035 type = v2di_ftype_v8hi;
23036 break;
23037 case V2DI_FTYPE_V4SI:
23038 type = v2di_ftype_v4si;
23039 break;
23040 case V2SI_FTYPE_V2SI:
23041 type = v2si_ftype_v2si;
23042 break;
23043 case V2SI_FTYPE_V4SF:
23044 type = v2si_ftype_v4sf;
23045 break;
23046 case V2SI_FTYPE_V2DF:
23047 type = v2si_ftype_v2df;
23048 break;
23049 case V2SI_FTYPE_V2SF:
23050 type = v2si_ftype_v2sf;
23051 break;
23052 case V2DF_FTYPE_V4DF:
23053 type = v2df_ftype_v4df;
23054 break;
23055 case V2DF_FTYPE_V4SF:
23056 type = v2df_ftype_v4sf;
23057 break;
23058 case V2DF_FTYPE_V2DF:
23059 case V2DF_FTYPE_V2DF_VEC_MERGE:
23060 type = v2df_ftype_v2df;
23061 break;
23062 case V2DF_FTYPE_V2SI:
23063 type = v2df_ftype_v2si;
23064 break;
23065 case V2DF_FTYPE_V4SI:
23066 type = v2df_ftype_v4si;
23067 break;
23068 case V2SF_FTYPE_V2SF:
23069 type = v2sf_ftype_v2sf;
23070 break;
23071 case V2SF_FTYPE_V2SI:
23072 type = v2sf_ftype_v2si;
23073 break;
23074 case V16QI_FTYPE_V16QI_V16QI:
23075 type = v16qi_ftype_v16qi_v16qi;
23076 break;
23077 case V16QI_FTYPE_V8HI_V8HI:
23078 type = v16qi_ftype_v8hi_v8hi;
23079 break;
23080 case V8QI_FTYPE_V8QI_V8QI:
23081 type = v8qi_ftype_v8qi_v8qi;
23082 break;
23083 case V8QI_FTYPE_V4HI_V4HI:
23084 type = v8qi_ftype_v4hi_v4hi;
23085 break;
23086 case V8HI_FTYPE_V8HI_V8HI:
23087 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23088 type = v8hi_ftype_v8hi_v8hi;
23089 break;
23090 case V8HI_FTYPE_V16QI_V16QI:
23091 type = v8hi_ftype_v16qi_v16qi;
23092 break;
23093 case V8HI_FTYPE_V4SI_V4SI:
23094 type = v8hi_ftype_v4si_v4si;
23095 break;
23096 case V8HI_FTYPE_V8HI_SI_COUNT:
23097 type = v8hi_ftype_v8hi_int;
23098 break;
23099 case V8SF_FTYPE_V8SF_V8SF:
23100 type = v8sf_ftype_v8sf_v8sf;
23101 break;
23102 case V8SF_FTYPE_V8SF_V8SI:
23103 type = v8sf_ftype_v8sf_v8si;
23104 break;
23105 case V4SI_FTYPE_V4SI_V4SI:
23106 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23107 type = v4si_ftype_v4si_v4si;
23108 break;
23109 case V4SI_FTYPE_V8HI_V8HI:
23110 type = v4si_ftype_v8hi_v8hi;
23111 break;
23112 case V4SI_FTYPE_V4SF_V4SF:
23113 type = v4si_ftype_v4sf_v4sf;
23114 break;
23115 case V4SI_FTYPE_V2DF_V2DF:
23116 type = v4si_ftype_v2df_v2df;
23117 break;
23118 case V4SI_FTYPE_V4SI_SI_COUNT:
23119 type = v4si_ftype_v4si_int;
23120 break;
23121 case V4HI_FTYPE_V4HI_V4HI:
23122 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23123 type = v4hi_ftype_v4hi_v4hi;
23124 break;
23125 case V4HI_FTYPE_V8QI_V8QI:
23126 type = v4hi_ftype_v8qi_v8qi;
23127 break;
23128 case V4HI_FTYPE_V2SI_V2SI:
23129 type = v4hi_ftype_v2si_v2si;
23130 break;
23131 case V4HI_FTYPE_V4HI_SI_COUNT:
23132 type = v4hi_ftype_v4hi_int;
23133 break;
23134 case V4DF_FTYPE_V4DF_V4DF:
23135 type = v4df_ftype_v4df_v4df;
23136 break;
23137 case V4DF_FTYPE_V4DF_V4DI:
23138 type = v4df_ftype_v4df_v4di;
23139 break;
23140 case V4SF_FTYPE_V4SF_V4SF:
23141 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23142 type = v4sf_ftype_v4sf_v4sf;
23143 break;
23144 case V4SF_FTYPE_V4SF_V4SI:
23145 type = v4sf_ftype_v4sf_v4si;
23146 break;
23147 case V4SF_FTYPE_V4SF_V2SI:
23148 type = v4sf_ftype_v4sf_v2si;
23149 break;
23150 case V4SF_FTYPE_V4SF_V2DF:
23151 type = v4sf_ftype_v4sf_v2df;
23152 break;
23153 case V4SF_FTYPE_V4SF_DI:
23154 type = v4sf_ftype_v4sf_int64;
23155 break;
23156 case V4SF_FTYPE_V4SF_SI:
23157 type = v4sf_ftype_v4sf_int;
23158 break;
23159 case V2DI_FTYPE_V2DI_V2DI:
23160 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23161 type = v2di_ftype_v2di_v2di;
23162 break;
23163 case V2DI_FTYPE_V16QI_V16QI:
23164 type = v2di_ftype_v16qi_v16qi;
23165 break;
23166 case V2DI_FTYPE_V4SI_V4SI:
23167 type = v2di_ftype_v4si_v4si;
23168 break;
23169 case V2DI_FTYPE_V2DI_V16QI:
23170 type = v2di_ftype_v2di_v16qi;
23171 break;
23172 case V2DI_FTYPE_V2DF_V2DF:
23173 type = v2di_ftype_v2df_v2df;
23174 break;
23175 case V2DI_FTYPE_V2DI_SI_COUNT:
23176 type = v2di_ftype_v2di_int;
23177 break;
23178 case V2SI_FTYPE_V2SI_V2SI:
23179 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23180 type = v2si_ftype_v2si_v2si;
23181 break;
23182 case V2SI_FTYPE_V4HI_V4HI:
23183 type = v2si_ftype_v4hi_v4hi;
23184 break;
23185 case V2SI_FTYPE_V2SF_V2SF:
23186 type = v2si_ftype_v2sf_v2sf;
23187 break;
23188 case V2SI_FTYPE_V2SI_SI_COUNT:
23189 type = v2si_ftype_v2si_int;
23190 break;
23191 case V2DF_FTYPE_V2DF_V2DF:
23192 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23193 type = v2df_ftype_v2df_v2df;
23194 break;
23195 case V2DF_FTYPE_V2DF_V4SF:
23196 type = v2df_ftype_v2df_v4sf;
23197 break;
23198 case V2DF_FTYPE_V2DF_V2DI:
23199 type = v2df_ftype_v2df_v2di;
23200 break;
23201 case V2DF_FTYPE_V2DF_DI:
23202 type = v2df_ftype_v2df_int64;
23203 break;
23204 case V2DF_FTYPE_V2DF_SI:
23205 type = v2df_ftype_v2df_int;
23206 break;
23207 case V2SF_FTYPE_V2SF_V2SF:
23208 type = v2sf_ftype_v2sf_v2sf;
23209 break;
23210 case V1DI_FTYPE_V1DI_V1DI:
23211 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23212 type = v1di_ftype_v1di_v1di;
23213 break;
23214 case V1DI_FTYPE_V8QI_V8QI:
23215 type = v1di_ftype_v8qi_v8qi;
23216 break;
23217 case V1DI_FTYPE_V2SI_V2SI:
23218 type = v1di_ftype_v2si_v2si;
23219 break;
23220 case V1DI_FTYPE_V1DI_SI_COUNT:
23221 type = v1di_ftype_v1di_int;
23222 break;
23223 case UINT64_FTYPE_UINT64_UINT64:
23224 type = uint64_ftype_uint64_uint64;
23225 break;
23226 case UINT_FTYPE_UINT_UINT:
23227 type = unsigned_ftype_unsigned_unsigned;
23228 break;
23229 case UINT_FTYPE_UINT_USHORT:
23230 type = unsigned_ftype_unsigned_ushort;
23231 break;
23232 case UINT_FTYPE_UINT_UCHAR:
23233 type = unsigned_ftype_unsigned_uchar;
23234 break;
23235 case V8HI_FTYPE_V8HI_INT:
23236 type = v8hi_ftype_v8hi_int;
23237 break;
23238 case V8SF_FTYPE_V8SF_INT:
23239 type = v8sf_ftype_v8sf_int;
23240 break;
23241 case V4SI_FTYPE_V4SI_INT:
23242 type = v4si_ftype_v4si_int;
23243 break;
23244 case V4SI_FTYPE_V8SI_INT:
23245 type = v4si_ftype_v8si_int;
23246 break;
23247 case V4HI_FTYPE_V4HI_INT:
23248 type = v4hi_ftype_v4hi_int;
23249 break;
23250 case V4DF_FTYPE_V4DF_INT:
23251 type = v4df_ftype_v4df_int;
23252 break;
23253 case V4SF_FTYPE_V4SF_INT:
23254 type = v4sf_ftype_v4sf_int;
23255 break;
23256 case V4SF_FTYPE_V8SF_INT:
23257 type = v4sf_ftype_v8sf_int;
23258 break;
23259 case V2DI_FTYPE_V2DI_INT:
23260 case V2DI2TI_FTYPE_V2DI_INT:
23261 type = v2di_ftype_v2di_int;
23262 break;
23263 case V2DF_FTYPE_V2DF_INT:
23264 type = v2df_ftype_v2df_int;
23265 break;
23266 case V2DF_FTYPE_V4DF_INT:
23267 type = v2df_ftype_v4df_int;
23268 break;
23269 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23270 type = v16qi_ftype_v16qi_v16qi_v16qi;
23271 break;
23272 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23273 type = v8sf_ftype_v8sf_v8sf_v8sf;
23274 break;
23275 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23276 type = v4df_ftype_v4df_v4df_v4df;
23277 break;
23278 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23279 type = v4sf_ftype_v4sf_v4sf_v4sf;
23280 break;
23281 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23282 type = v2df_ftype_v2df_v2df_v2df;
23283 break;
23284 case V16QI_FTYPE_V16QI_V16QI_INT:
23285 type = v16qi_ftype_v16qi_v16qi_int;
23286 break;
23287 case V8SI_FTYPE_V8SI_V8SI_INT:
23288 type = v8si_ftype_v8si_v8si_int;
23289 break;
23290 case V8SI_FTYPE_V8SI_V4SI_INT:
23291 type = v8si_ftype_v8si_v4si_int;
23292 break;
23293 case V8HI_FTYPE_V8HI_V8HI_INT:
23294 type = v8hi_ftype_v8hi_v8hi_int;
23295 break;
23296 case V8SF_FTYPE_V8SF_V8SF_INT:
23297 type = v8sf_ftype_v8sf_v8sf_int;
23298 break;
23299 case V8SF_FTYPE_V8SF_V4SF_INT:
23300 type = v8sf_ftype_v8sf_v4sf_int;
23301 break;
23302 case V4SI_FTYPE_V4SI_V4SI_INT:
23303 type = v4si_ftype_v4si_v4si_int;
23304 break;
23305 case V4DF_FTYPE_V4DF_V4DF_INT:
23306 type = v4df_ftype_v4df_v4df_int;
23307 break;
23308 case V4DF_FTYPE_V4DF_V2DF_INT:
23309 type = v4df_ftype_v4df_v2df_int;
23310 break;
23311 case V4SF_FTYPE_V4SF_V4SF_INT:
23312 type = v4sf_ftype_v4sf_v4sf_int;
23313 break;
23314 case V2DI_FTYPE_V2DI_V2DI_INT:
23315 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23316 type = v2di_ftype_v2di_v2di_int;
23317 break;
23318 case V2DF_FTYPE_V2DF_V2DF_INT:
23319 type = v2df_ftype_v2df_v2df_int;
23320 break;
23321 case V2DI_FTYPE_V2DI_UINT_UINT:
23322 type = v2di_ftype_v2di_unsigned_unsigned;
23323 break;
23324 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23325 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23326 break;
23327 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23328 type = v1di_ftype_v1di_v1di_int;
23329 break;
23330 default:
23331 gcc_unreachable ();
23332 }
23333
23334 def_builtin_const (d->mask, d->name, type, d->code);
23335 }
23336
23337 /* pcmpestr[im] insns. */
23338 for (i = 0, d = bdesc_pcmpestr;
23339 i < ARRAY_SIZE (bdesc_pcmpestr);
23340 i++, d++)
23341 {
23342 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23343 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23344 else
23345 ftype = int_ftype_v16qi_int_v16qi_int_int;
23346 def_builtin_const (d->mask, d->name, ftype, d->code);
23347 }
23348
23349 /* pcmpistr[im] insns. */
23350 for (i = 0, d = bdesc_pcmpistr;
23351 i < ARRAY_SIZE (bdesc_pcmpistr);
23352 i++, d++)
23353 {
23354 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23355 ftype = v16qi_ftype_v16qi_v16qi_int;
23356 else
23357 ftype = int_ftype_v16qi_v16qi_int;
23358 def_builtin_const (d->mask, d->name, ftype, d->code);
23359 }
23360
23361 /* comi/ucomi insns. */
23362 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23363 if (d->mask == OPTION_MASK_ISA_SSE2)
23364 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23365 else
23366 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23367
23368 /* SSE */
23369 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23370 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23371
23372 /* SSE or 3DNow!A */
23373 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23374
23375 /* SSE2 */
23376 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23377
23378 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23379 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23380
23381 /* SSE3. */
23382 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23383 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23384
23385 /* AES */
23386 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23387 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23388 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23389 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23390 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23391 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23392
23393 /* PCLMUL */
23394 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23395
23396 /* AVX */
23397 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23398 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23399
23400 /* Access to the vec_init patterns. */
23401 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23402 integer_type_node, NULL_TREE);
23403 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23404
23405 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23406 short_integer_type_node,
23407 short_integer_type_node,
23408 short_integer_type_node, NULL_TREE);
23409 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23410
23411 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23412 char_type_node, char_type_node,
23413 char_type_node, char_type_node,
23414 char_type_node, char_type_node,
23415 char_type_node, NULL_TREE);
23416 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23417
23418 /* Access to the vec_extract patterns. */
23419 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23420 integer_type_node, NULL_TREE);
23421 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23422
23423 ftype = build_function_type_list (long_long_integer_type_node,
23424 V2DI_type_node, integer_type_node,
23425 NULL_TREE);
23426 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23427
23428 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23429 integer_type_node, NULL_TREE);
23430 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23431
23432 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23433 integer_type_node, NULL_TREE);
23434 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23435
23436 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23437 integer_type_node, NULL_TREE);
23438 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23439
23440 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23441 integer_type_node, NULL_TREE);
23442 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23443
23444 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23445 integer_type_node, NULL_TREE);
23446 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23447
23448 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23449 integer_type_node, NULL_TREE);
23450 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23451
23452 /* Access to the vec_set patterns. */
23453 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23454 intDI_type_node,
23455 integer_type_node, NULL_TREE);
23456 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23457
23458 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23459 float_type_node,
23460 integer_type_node, NULL_TREE);
23461 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23462
23463 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23464 intSI_type_node,
23465 integer_type_node, NULL_TREE);
23466 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23467
23468 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23469 intHI_type_node,
23470 integer_type_node, NULL_TREE);
23471 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23472
23473 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23474 intHI_type_node,
23475 integer_type_node, NULL_TREE);
23476 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23477
23478 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23479 intQI_type_node,
23480 integer_type_node, NULL_TREE);
23481 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23482
23483 /* Add SSE5 multi-arg argument instructions */
23484 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23485 {
23486 tree mtype = NULL_TREE;
23487
23488 if (d->name == 0)
23489 continue;
23490
23491 switch ((enum multi_arg_type)d->flag)
23492 {
23493 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23494 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23495 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23496 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23497 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23498 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23499 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23500 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23501 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23502 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23503 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23504 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23505 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23506 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23507 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23508 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23509 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23510 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23511 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23512 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23513 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23514 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23515 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23516 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23517 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23518 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23519 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23520 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23521 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
23522 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
23523 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
23524 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
23525 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
23526 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
23527 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
23528 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
23529 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
23530 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
23531 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
23532 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
23533 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
23534 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
23535 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
23536 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
23537 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
23538 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
23539 case MULTI_ARG_UNKNOWN:
23540 default:
23541 gcc_unreachable ();
23542 }
23543
23544 if (mtype)
23545 def_builtin_const (d->mask, d->name, mtype, d->code);
23546 }
23547 }
23548
23549 /* Internal method for ix86_init_builtins. */
23550
23551 static void
23552 ix86_init_builtins_va_builtins_abi (void)
23553 {
23554 tree ms_va_ref, sysv_va_ref;
23555 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23556 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23557 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23558 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23559
23560 if (!TARGET_64BIT)
23561 return;
23562 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23563 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23564 ms_va_ref = build_reference_type (ms_va_list_type_node);
23565 sysv_va_ref =
23566 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23567
23568 fnvoid_va_end_ms =
23569 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23570 fnvoid_va_start_ms =
23571 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23572 fnvoid_va_end_sysv =
23573 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23574 fnvoid_va_start_sysv =
23575 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23576 NULL_TREE);
23577 fnvoid_va_copy_ms =
23578 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23579 NULL_TREE);
23580 fnvoid_va_copy_sysv =
23581 build_function_type_list (void_type_node, sysv_va_ref,
23582 sysv_va_ref, NULL_TREE);
23583
23584 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23585 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23586 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23587 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23588 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23589 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23590 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23591 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23592 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23593 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23594 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23595 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23596 }
23597
23598 static void
23599 ix86_init_builtins (void)
23600 {
23601 tree float128_type_node = make_node (REAL_TYPE);
23602 tree ftype, decl;
23603
23604 /* The __float80 type. */
23605 if (TYPE_MODE (long_double_type_node) == XFmode)
23606 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
23607 "__float80");
23608 else
23609 {
23610 /* The __float80 type. */
23611 tree float80_type_node = make_node (REAL_TYPE);
23612
23613 TYPE_PRECISION (float80_type_node) = 80;
23614 layout_type (float80_type_node);
23615 (*lang_hooks.types.register_builtin_type) (float80_type_node,
23616 "__float80");
23617 }
23618
23619 /* The __float128 type. */
23620 TYPE_PRECISION (float128_type_node) = 128;
23621 layout_type (float128_type_node);
23622 (*lang_hooks.types.register_builtin_type) (float128_type_node,
23623 "__float128");
23624
23625 /* TFmode support builtins. */
23626 ftype = build_function_type (float128_type_node, void_list_node);
23627 decl = add_builtin_function ("__builtin_infq", ftype,
23628 IX86_BUILTIN_INFQ, BUILT_IN_MD,
23629 NULL, NULL_TREE);
23630 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
23631
23632 /* We will expand them to normal call if SSE2 isn't available since
23633 they are used by libgcc. */
23634 ftype = build_function_type_list (float128_type_node,
23635 float128_type_node,
23636 NULL_TREE);
23637 decl = add_builtin_function ("__builtin_fabsq", ftype,
23638 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
23639 "__fabstf2", NULL_TREE);
23640 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
23641 TREE_READONLY (decl) = 1;
23642
23643 ftype = build_function_type_list (float128_type_node,
23644 float128_type_node,
23645 float128_type_node,
23646 NULL_TREE);
23647 decl = add_builtin_function ("__builtin_copysignq", ftype,
23648 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
23649 "__copysigntf3", NULL_TREE);
23650 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
23651 TREE_READONLY (decl) = 1;
23652
23653 ix86_init_mmx_sse_builtins ();
23654 if (TARGET_64BIT)
23655 ix86_init_builtins_va_builtins_abi ();
23656 }
23657
23658 /* Errors in the source file can cause expand_expr to return const0_rtx
23659 where we expect a vector. To avoid crashing, use one of the vector
23660 clear instructions. */
23661 static rtx
23662 safe_vector_operand (rtx x, enum machine_mode mode)
23663 {
23664 if (x == const0_rtx)
23665 x = CONST0_RTX (mode);
23666 return x;
23667 }
23668
23669 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23670
23671 static rtx
23672 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23673 {
23674 rtx pat;
23675 tree arg0 = CALL_EXPR_ARG (exp, 0);
23676 tree arg1 = CALL_EXPR_ARG (exp, 1);
23677 rtx op0 = expand_normal (arg0);
23678 rtx op1 = expand_normal (arg1);
23679 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23680 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23681 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23682
23683 if (VECTOR_MODE_P (mode0))
23684 op0 = safe_vector_operand (op0, mode0);
23685 if (VECTOR_MODE_P (mode1))
23686 op1 = safe_vector_operand (op1, mode1);
23687
23688 if (optimize || !target
23689 || GET_MODE (target) != tmode
23690 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23691 target = gen_reg_rtx (tmode);
23692
23693 if (GET_MODE (op1) == SImode && mode1 == TImode)
23694 {
23695 rtx x = gen_reg_rtx (V4SImode);
23696 emit_insn (gen_sse2_loadd (x, op1));
23697 op1 = gen_lowpart (TImode, x);
23698 }
23699
23700 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
23701 op0 = copy_to_mode_reg (mode0, op0);
23702 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
23703 op1 = copy_to_mode_reg (mode1, op1);
23704
23705 pat = GEN_FCN (icode) (target, op0, op1);
23706 if (! pat)
23707 return 0;
23708
23709 emit_insn (pat);
23710
23711 return target;
23712 }
23713
23714 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23715
23716 static rtx
23717 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23718 enum multi_arg_type m_type,
23719 enum insn_code sub_code)
23720 {
23721 rtx pat;
23722 int i;
23723 int nargs;
23724 bool comparison_p = false;
23725 bool tf_p = false;
23726 bool last_arg_constant = false;
23727 int num_memory = 0;
23728 struct {
23729 rtx op;
23730 enum machine_mode mode;
23731 } args[4];
23732
23733 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23734
23735 switch (m_type)
23736 {
23737 case MULTI_ARG_3_SF:
23738 case MULTI_ARG_3_DF:
23739 case MULTI_ARG_3_DI:
23740 case MULTI_ARG_3_SI:
23741 case MULTI_ARG_3_SI_DI:
23742 case MULTI_ARG_3_HI:
23743 case MULTI_ARG_3_HI_SI:
23744 case MULTI_ARG_3_QI:
23745 case MULTI_ARG_3_PERMPS:
23746 case MULTI_ARG_3_PERMPD:
23747 nargs = 3;
23748 break;
23749
23750 case MULTI_ARG_2_SF:
23751 case MULTI_ARG_2_DF:
23752 case MULTI_ARG_2_DI:
23753 case MULTI_ARG_2_SI:
23754 case MULTI_ARG_2_HI:
23755 case MULTI_ARG_2_QI:
23756 nargs = 2;
23757 break;
23758
23759 case MULTI_ARG_2_DI_IMM:
23760 case MULTI_ARG_2_SI_IMM:
23761 case MULTI_ARG_2_HI_IMM:
23762 case MULTI_ARG_2_QI_IMM:
23763 nargs = 2;
23764 last_arg_constant = true;
23765 break;
23766
23767 case MULTI_ARG_1_SF:
23768 case MULTI_ARG_1_DF:
23769 case MULTI_ARG_1_DI:
23770 case MULTI_ARG_1_SI:
23771 case MULTI_ARG_1_HI:
23772 case MULTI_ARG_1_QI:
23773 case MULTI_ARG_1_SI_DI:
23774 case MULTI_ARG_1_HI_DI:
23775 case MULTI_ARG_1_HI_SI:
23776 case MULTI_ARG_1_QI_DI:
23777 case MULTI_ARG_1_QI_SI:
23778 case MULTI_ARG_1_QI_HI:
23779 case MULTI_ARG_1_PH2PS:
23780 case MULTI_ARG_1_PS2PH:
23781 nargs = 1;
23782 break;
23783
23784 case MULTI_ARG_2_SF_CMP:
23785 case MULTI_ARG_2_DF_CMP:
23786 case MULTI_ARG_2_DI_CMP:
23787 case MULTI_ARG_2_SI_CMP:
23788 case MULTI_ARG_2_HI_CMP:
23789 case MULTI_ARG_2_QI_CMP:
23790 nargs = 2;
23791 comparison_p = true;
23792 break;
23793
23794 case MULTI_ARG_2_SF_TF:
23795 case MULTI_ARG_2_DF_TF:
23796 case MULTI_ARG_2_DI_TF:
23797 case MULTI_ARG_2_SI_TF:
23798 case MULTI_ARG_2_HI_TF:
23799 case MULTI_ARG_2_QI_TF:
23800 nargs = 2;
23801 tf_p = true;
23802 break;
23803
23804 case MULTI_ARG_UNKNOWN:
23805 default:
23806 gcc_unreachable ();
23807 }
23808
23809 if (optimize || !target
23810 || GET_MODE (target) != tmode
23811 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23812 target = gen_reg_rtx (tmode);
23813
23814 gcc_assert (nargs <= 4);
23815
23816 for (i = 0; i < nargs; i++)
23817 {
23818 tree arg = CALL_EXPR_ARG (exp, i);
23819 rtx op = expand_normal (arg);
23820 int adjust = (comparison_p) ? 1 : 0;
23821 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
23822
23823 if (last_arg_constant && i == nargs-1)
23824 {
23825 if (GET_CODE (op) != CONST_INT)
23826 {
23827 error ("last argument must be an immediate");
23828 return gen_reg_rtx (tmode);
23829 }
23830 }
23831 else
23832 {
23833 if (VECTOR_MODE_P (mode))
23834 op = safe_vector_operand (op, mode);
23835
23836 /* If we aren't optimizing, only allow one memory operand to be
23837 generated. */
23838 if (memory_operand (op, mode))
23839 num_memory++;
23840
23841 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
23842
23843 if (optimize
23844 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
23845 || num_memory > 1)
23846 op = force_reg (mode, op);
23847 }
23848
23849 args[i].op = op;
23850 args[i].mode = mode;
23851 }
23852
23853 switch (nargs)
23854 {
23855 case 1:
23856 pat = GEN_FCN (icode) (target, args[0].op);
23857 break;
23858
23859 case 2:
23860 if (tf_p)
23861 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
23862 GEN_INT ((int)sub_code));
23863 else if (! comparison_p)
23864 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23865 else
23866 {
23867 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
23868 args[0].op,
23869 args[1].op);
23870
23871 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
23872 }
23873 break;
23874
23875 case 3:
23876 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23877 break;
23878
23879 default:
23880 gcc_unreachable ();
23881 }
23882
23883 if (! pat)
23884 return 0;
23885
23886 emit_insn (pat);
23887 return target;
23888 }
23889
23890 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
23891 insns with vec_merge. */
23892
23893 static rtx
23894 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23895 rtx target)
23896 {
23897 rtx pat;
23898 tree arg0 = CALL_EXPR_ARG (exp, 0);
23899 rtx op1, op0 = expand_normal (arg0);
23900 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23901 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23902
23903 if (optimize || !target
23904 || GET_MODE (target) != tmode
23905 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23906 target = gen_reg_rtx (tmode);
23907
23908 if (VECTOR_MODE_P (mode0))
23909 op0 = safe_vector_operand (op0, mode0);
23910
23911 if ((optimize && !register_operand (op0, mode0))
23912 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
23913 op0 = copy_to_mode_reg (mode0, op0);
23914
23915 op1 = op0;
23916 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
23917 op1 = copy_to_mode_reg (mode0, op1);
23918
23919 pat = GEN_FCN (icode) (target, op0, op1);
23920 if (! pat)
23921 return 0;
23922 emit_insn (pat);
23923 return target;
23924 }
23925
23926 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
23927
23928 static rtx
23929 ix86_expand_sse_compare (const struct builtin_description *d,
23930 tree exp, rtx target, bool swap)
23931 {
23932 rtx pat;
23933 tree arg0 = CALL_EXPR_ARG (exp, 0);
23934 tree arg1 = CALL_EXPR_ARG (exp, 1);
23935 rtx op0 = expand_normal (arg0);
23936 rtx op1 = expand_normal (arg1);
23937 rtx op2;
23938 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23939 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23940 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23941 enum rtx_code comparison = d->comparison;
23942
23943 if (VECTOR_MODE_P (mode0))
23944 op0 = safe_vector_operand (op0, mode0);
23945 if (VECTOR_MODE_P (mode1))
23946 op1 = safe_vector_operand (op1, mode1);
23947
23948 /* Swap operands if we have a comparison that isn't available in
23949 hardware. */
23950 if (swap)
23951 {
23952 rtx tmp = gen_reg_rtx (mode1);
23953 emit_move_insn (tmp, op1);
23954 op1 = op0;
23955 op0 = tmp;
23956 }
23957
23958 if (optimize || !target
23959 || GET_MODE (target) != tmode
23960 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
23961 target = gen_reg_rtx (tmode);
23962
23963 if ((optimize && !register_operand (op0, mode0))
23964 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
23965 op0 = copy_to_mode_reg (mode0, op0);
23966 if ((optimize && !register_operand (op1, mode1))
23967 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
23968 op1 = copy_to_mode_reg (mode1, op1);
23969
23970 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23971 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23972 if (! pat)
23973 return 0;
23974 emit_insn (pat);
23975 return target;
23976 }
23977
23978 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
23979
23980 static rtx
23981 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23982 rtx target)
23983 {
23984 rtx pat;
23985 tree arg0 = CALL_EXPR_ARG (exp, 0);
23986 tree arg1 = CALL_EXPR_ARG (exp, 1);
23987 rtx op0 = expand_normal (arg0);
23988 rtx op1 = expand_normal (arg1);
23989 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23990 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23991 enum rtx_code comparison = d->comparison;
23992
23993 if (VECTOR_MODE_P (mode0))
23994 op0 = safe_vector_operand (op0, mode0);
23995 if (VECTOR_MODE_P (mode1))
23996 op1 = safe_vector_operand (op1, mode1);
23997
23998 /* Swap operands if we have a comparison that isn't available in
23999 hardware. */
24000 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
24001 {
24002 rtx tmp = op1;
24003 op1 = op0;
24004 op0 = tmp;
24005 }
24006
24007 target = gen_reg_rtx (SImode);
24008 emit_move_insn (target, const0_rtx);
24009 target = gen_rtx_SUBREG (QImode, target, 0);
24010
24011 if ((optimize && !register_operand (op0, mode0))
24012 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24013 op0 = copy_to_mode_reg (mode0, op0);
24014 if ((optimize && !register_operand (op1, mode1))
24015 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24016 op1 = copy_to_mode_reg (mode1, op1);
24017
24018 pat = GEN_FCN (d->icode) (op0, op1);
24019 if (! pat)
24020 return 0;
24021 emit_insn (pat);
24022 emit_insn (gen_rtx_SET (VOIDmode,
24023 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24024 gen_rtx_fmt_ee (comparison, QImode,
24025 SET_DEST (pat),
24026 const0_rtx)));
24027
24028 return SUBREG_REG (target);
24029 }
24030
24031 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24032
24033 static rtx
24034 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24035 rtx target)
24036 {
24037 rtx pat;
24038 tree arg0 = CALL_EXPR_ARG (exp, 0);
24039 tree arg1 = CALL_EXPR_ARG (exp, 1);
24040 rtx op0 = expand_normal (arg0);
24041 rtx op1 = expand_normal (arg1);
24042 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24043 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24044 enum rtx_code comparison = d->comparison;
24045
24046 if (VECTOR_MODE_P (mode0))
24047 op0 = safe_vector_operand (op0, mode0);
24048 if (VECTOR_MODE_P (mode1))
24049 op1 = safe_vector_operand (op1, mode1);
24050
24051 target = gen_reg_rtx (SImode);
24052 emit_move_insn (target, const0_rtx);
24053 target = gen_rtx_SUBREG (QImode, target, 0);
24054
24055 if ((optimize && !register_operand (op0, mode0))
24056 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24057 op0 = copy_to_mode_reg (mode0, op0);
24058 if ((optimize && !register_operand (op1, mode1))
24059 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24060 op1 = copy_to_mode_reg (mode1, op1);
24061
24062 pat = GEN_FCN (d->icode) (op0, op1);
24063 if (! pat)
24064 return 0;
24065 emit_insn (pat);
24066 emit_insn (gen_rtx_SET (VOIDmode,
24067 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24068 gen_rtx_fmt_ee (comparison, QImode,
24069 SET_DEST (pat),
24070 const0_rtx)));
24071
24072 return SUBREG_REG (target);
24073 }
24074
24075 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24076
24077 static rtx
24078 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24079 tree exp, rtx target)
24080 {
24081 rtx pat;
24082 tree arg0 = CALL_EXPR_ARG (exp, 0);
24083 tree arg1 = CALL_EXPR_ARG (exp, 1);
24084 tree arg2 = CALL_EXPR_ARG (exp, 2);
24085 tree arg3 = CALL_EXPR_ARG (exp, 3);
24086 tree arg4 = CALL_EXPR_ARG (exp, 4);
24087 rtx scratch0, scratch1;
24088 rtx op0 = expand_normal (arg0);
24089 rtx op1 = expand_normal (arg1);
24090 rtx op2 = expand_normal (arg2);
24091 rtx op3 = expand_normal (arg3);
24092 rtx op4 = expand_normal (arg4);
24093 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24094
24095 tmode0 = insn_data[d->icode].operand[0].mode;
24096 tmode1 = insn_data[d->icode].operand[1].mode;
24097 modev2 = insn_data[d->icode].operand[2].mode;
24098 modei3 = insn_data[d->icode].operand[3].mode;
24099 modev4 = insn_data[d->icode].operand[4].mode;
24100 modei5 = insn_data[d->icode].operand[5].mode;
24101 modeimm = insn_data[d->icode].operand[6].mode;
24102
24103 if (VECTOR_MODE_P (modev2))
24104 op0 = safe_vector_operand (op0, modev2);
24105 if (VECTOR_MODE_P (modev4))
24106 op2 = safe_vector_operand (op2, modev4);
24107
24108 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24109 op0 = copy_to_mode_reg (modev2, op0);
24110 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24111 op1 = copy_to_mode_reg (modei3, op1);
24112 if ((optimize && !register_operand (op2, modev4))
24113 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24114 op2 = copy_to_mode_reg (modev4, op2);
24115 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24116 op3 = copy_to_mode_reg (modei5, op3);
24117
24118 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24119 {
24120 error ("the fifth argument must be a 8-bit immediate");
24121 return const0_rtx;
24122 }
24123
24124 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24125 {
24126 if (optimize || !target
24127 || GET_MODE (target) != tmode0
24128 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24129 target = gen_reg_rtx (tmode0);
24130
24131 scratch1 = gen_reg_rtx (tmode1);
24132
24133 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24134 }
24135 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24136 {
24137 if (optimize || !target
24138 || GET_MODE (target) != tmode1
24139 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24140 target = gen_reg_rtx (tmode1);
24141
24142 scratch0 = gen_reg_rtx (tmode0);
24143
24144 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24145 }
24146 else
24147 {
24148 gcc_assert (d->flag);
24149
24150 scratch0 = gen_reg_rtx (tmode0);
24151 scratch1 = gen_reg_rtx (tmode1);
24152
24153 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24154 }
24155
24156 if (! pat)
24157 return 0;
24158
24159 emit_insn (pat);
24160
24161 if (d->flag)
24162 {
24163 target = gen_reg_rtx (SImode);
24164 emit_move_insn (target, const0_rtx);
24165 target = gen_rtx_SUBREG (QImode, target, 0);
24166
24167 emit_insn
24168 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24169 gen_rtx_fmt_ee (EQ, QImode,
24170 gen_rtx_REG ((enum machine_mode) d->flag,
24171 FLAGS_REG),
24172 const0_rtx)));
24173 return SUBREG_REG (target);
24174 }
24175 else
24176 return target;
24177 }
24178
24179
24180 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24181
24182 static rtx
24183 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24184 tree exp, rtx target)
24185 {
24186 rtx pat;
24187 tree arg0 = CALL_EXPR_ARG (exp, 0);
24188 tree arg1 = CALL_EXPR_ARG (exp, 1);
24189 tree arg2 = CALL_EXPR_ARG (exp, 2);
24190 rtx scratch0, scratch1;
24191 rtx op0 = expand_normal (arg0);
24192 rtx op1 = expand_normal (arg1);
24193 rtx op2 = expand_normal (arg2);
24194 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24195
24196 tmode0 = insn_data[d->icode].operand[0].mode;
24197 tmode1 = insn_data[d->icode].operand[1].mode;
24198 modev2 = insn_data[d->icode].operand[2].mode;
24199 modev3 = insn_data[d->icode].operand[3].mode;
24200 modeimm = insn_data[d->icode].operand[4].mode;
24201
24202 if (VECTOR_MODE_P (modev2))
24203 op0 = safe_vector_operand (op0, modev2);
24204 if (VECTOR_MODE_P (modev3))
24205 op1 = safe_vector_operand (op1, modev3);
24206
24207 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24208 op0 = copy_to_mode_reg (modev2, op0);
24209 if ((optimize && !register_operand (op1, modev3))
24210 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24211 op1 = copy_to_mode_reg (modev3, op1);
24212
24213 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24214 {
24215 error ("the third argument must be a 8-bit immediate");
24216 return const0_rtx;
24217 }
24218
24219 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24220 {
24221 if (optimize || !target
24222 || GET_MODE (target) != tmode0
24223 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24224 target = gen_reg_rtx (tmode0);
24225
24226 scratch1 = gen_reg_rtx (tmode1);
24227
24228 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24229 }
24230 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24231 {
24232 if (optimize || !target
24233 || GET_MODE (target) != tmode1
24234 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24235 target = gen_reg_rtx (tmode1);
24236
24237 scratch0 = gen_reg_rtx (tmode0);
24238
24239 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24240 }
24241 else
24242 {
24243 gcc_assert (d->flag);
24244
24245 scratch0 = gen_reg_rtx (tmode0);
24246 scratch1 = gen_reg_rtx (tmode1);
24247
24248 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24249 }
24250
24251 if (! pat)
24252 return 0;
24253
24254 emit_insn (pat);
24255
24256 if (d->flag)
24257 {
24258 target = gen_reg_rtx (SImode);
24259 emit_move_insn (target, const0_rtx);
24260 target = gen_rtx_SUBREG (QImode, target, 0);
24261
24262 emit_insn
24263 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24264 gen_rtx_fmt_ee (EQ, QImode,
24265 gen_rtx_REG ((enum machine_mode) d->flag,
24266 FLAGS_REG),
24267 const0_rtx)));
24268 return SUBREG_REG (target);
24269 }
24270 else
24271 return target;
24272 }
24273
24274 /* Subroutine of ix86_expand_builtin to take care of insns with
24275 variable number of operands. */
24276
24277 static rtx
24278 ix86_expand_args_builtin (const struct builtin_description *d,
24279 tree exp, rtx target)
24280 {
24281 rtx pat, real_target;
24282 unsigned int i, nargs;
24283 unsigned int nargs_constant = 0;
24284 int num_memory = 0;
24285 struct
24286 {
24287 rtx op;
24288 enum machine_mode mode;
24289 } args[4];
24290 bool last_arg_count = false;
24291 enum insn_code icode = d->icode;
24292 const struct insn_data *insn_p = &insn_data[icode];
24293 enum machine_mode tmode = insn_p->operand[0].mode;
24294 enum machine_mode rmode = VOIDmode;
24295 bool swap = false;
24296 enum rtx_code comparison = d->comparison;
24297
24298 switch ((enum ix86_builtin_type) d->flag)
24299 {
24300 case INT_FTYPE_V8SF_V8SF_PTEST:
24301 case INT_FTYPE_V4DI_V4DI_PTEST:
24302 case INT_FTYPE_V4DF_V4DF_PTEST:
24303 case INT_FTYPE_V4SF_V4SF_PTEST:
24304 case INT_FTYPE_V2DI_V2DI_PTEST:
24305 case INT_FTYPE_V2DF_V2DF_PTEST:
24306 return ix86_expand_sse_ptest (d, exp, target);
24307 case FLOAT128_FTYPE_FLOAT128:
24308 case FLOAT_FTYPE_FLOAT:
24309 case INT64_FTYPE_V4SF:
24310 case INT64_FTYPE_V2DF:
24311 case INT_FTYPE_V16QI:
24312 case INT_FTYPE_V8QI:
24313 case INT_FTYPE_V8SF:
24314 case INT_FTYPE_V4DF:
24315 case INT_FTYPE_V4SF:
24316 case INT_FTYPE_V2DF:
24317 case V16QI_FTYPE_V16QI:
24318 case V8SI_FTYPE_V8SF:
24319 case V8SI_FTYPE_V4SI:
24320 case V8HI_FTYPE_V8HI:
24321 case V8HI_FTYPE_V16QI:
24322 case V8QI_FTYPE_V8QI:
24323 case V8SF_FTYPE_V8SF:
24324 case V8SF_FTYPE_V8SI:
24325 case V8SF_FTYPE_V4SF:
24326 case V4SI_FTYPE_V4SI:
24327 case V4SI_FTYPE_V16QI:
24328 case V4SI_FTYPE_V4SF:
24329 case V4SI_FTYPE_V8SI:
24330 case V4SI_FTYPE_V8HI:
24331 case V4SI_FTYPE_V4DF:
24332 case V4SI_FTYPE_V2DF:
24333 case V4HI_FTYPE_V4HI:
24334 case V4DF_FTYPE_V4DF:
24335 case V4DF_FTYPE_V4SI:
24336 case V4DF_FTYPE_V4SF:
24337 case V4DF_FTYPE_V2DF:
24338 case V4SF_FTYPE_V4SF:
24339 case V4SF_FTYPE_V4SI:
24340 case V4SF_FTYPE_V8SF:
24341 case V4SF_FTYPE_V4DF:
24342 case V4SF_FTYPE_V2DF:
24343 case V2DI_FTYPE_V2DI:
24344 case V2DI_FTYPE_V16QI:
24345 case V2DI_FTYPE_V8HI:
24346 case V2DI_FTYPE_V4SI:
24347 case V2DF_FTYPE_V2DF:
24348 case V2DF_FTYPE_V4SI:
24349 case V2DF_FTYPE_V4DF:
24350 case V2DF_FTYPE_V4SF:
24351 case V2DF_FTYPE_V2SI:
24352 case V2SI_FTYPE_V2SI:
24353 case V2SI_FTYPE_V4SF:
24354 case V2SI_FTYPE_V2SF:
24355 case V2SI_FTYPE_V2DF:
24356 case V2SF_FTYPE_V2SF:
24357 case V2SF_FTYPE_V2SI:
24358 nargs = 1;
24359 break;
24360 case V4SF_FTYPE_V4SF_VEC_MERGE:
24361 case V2DF_FTYPE_V2DF_VEC_MERGE:
24362 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24363 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24364 case V16QI_FTYPE_V16QI_V16QI:
24365 case V16QI_FTYPE_V8HI_V8HI:
24366 case V8QI_FTYPE_V8QI_V8QI:
24367 case V8QI_FTYPE_V4HI_V4HI:
24368 case V8HI_FTYPE_V8HI_V8HI:
24369 case V8HI_FTYPE_V16QI_V16QI:
24370 case V8HI_FTYPE_V4SI_V4SI:
24371 case V8SF_FTYPE_V8SF_V8SF:
24372 case V8SF_FTYPE_V8SF_V8SI:
24373 case V4SI_FTYPE_V4SI_V4SI:
24374 case V4SI_FTYPE_V8HI_V8HI:
24375 case V4SI_FTYPE_V4SF_V4SF:
24376 case V4SI_FTYPE_V2DF_V2DF:
24377 case V4HI_FTYPE_V4HI_V4HI:
24378 case V4HI_FTYPE_V8QI_V8QI:
24379 case V4HI_FTYPE_V2SI_V2SI:
24380 case V4DF_FTYPE_V4DF_V4DF:
24381 case V4DF_FTYPE_V4DF_V4DI:
24382 case V4SF_FTYPE_V4SF_V4SF:
24383 case V4SF_FTYPE_V4SF_V4SI:
24384 case V4SF_FTYPE_V4SF_V2SI:
24385 case V4SF_FTYPE_V4SF_V2DF:
24386 case V4SF_FTYPE_V4SF_DI:
24387 case V4SF_FTYPE_V4SF_SI:
24388 case V2DI_FTYPE_V2DI_V2DI:
24389 case V2DI_FTYPE_V16QI_V16QI:
24390 case V2DI_FTYPE_V4SI_V4SI:
24391 case V2DI_FTYPE_V2DI_V16QI:
24392 case V2DI_FTYPE_V2DF_V2DF:
24393 case V2SI_FTYPE_V2SI_V2SI:
24394 case V2SI_FTYPE_V4HI_V4HI:
24395 case V2SI_FTYPE_V2SF_V2SF:
24396 case V2DF_FTYPE_V2DF_V2DF:
24397 case V2DF_FTYPE_V2DF_V4SF:
24398 case V2DF_FTYPE_V2DF_V2DI:
24399 case V2DF_FTYPE_V2DF_DI:
24400 case V2DF_FTYPE_V2DF_SI:
24401 case V2SF_FTYPE_V2SF_V2SF:
24402 case V1DI_FTYPE_V1DI_V1DI:
24403 case V1DI_FTYPE_V8QI_V8QI:
24404 case V1DI_FTYPE_V2SI_V2SI:
24405 if (comparison == UNKNOWN)
24406 return ix86_expand_binop_builtin (icode, exp, target);
24407 nargs = 2;
24408 break;
24409 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24410 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24411 gcc_assert (comparison != UNKNOWN);
24412 nargs = 2;
24413 swap = true;
24414 break;
24415 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24416 case V8HI_FTYPE_V8HI_SI_COUNT:
24417 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24418 case V4SI_FTYPE_V4SI_SI_COUNT:
24419 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24420 case V4HI_FTYPE_V4HI_SI_COUNT:
24421 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24422 case V2DI_FTYPE_V2DI_SI_COUNT:
24423 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24424 case V2SI_FTYPE_V2SI_SI_COUNT:
24425 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24426 case V1DI_FTYPE_V1DI_SI_COUNT:
24427 nargs = 2;
24428 last_arg_count = true;
24429 break;
24430 case UINT64_FTYPE_UINT64_UINT64:
24431 case UINT_FTYPE_UINT_UINT:
24432 case UINT_FTYPE_UINT_USHORT:
24433 case UINT_FTYPE_UINT_UCHAR:
24434 nargs = 2;
24435 break;
24436 case V2DI2TI_FTYPE_V2DI_INT:
24437 nargs = 2;
24438 rmode = V2DImode;
24439 nargs_constant = 1;
24440 break;
24441 case V8HI_FTYPE_V8HI_INT:
24442 case V8SF_FTYPE_V8SF_INT:
24443 case V4SI_FTYPE_V4SI_INT:
24444 case V4SI_FTYPE_V8SI_INT:
24445 case V4HI_FTYPE_V4HI_INT:
24446 case V4DF_FTYPE_V4DF_INT:
24447 case V4SF_FTYPE_V4SF_INT:
24448 case V4SF_FTYPE_V8SF_INT:
24449 case V2DI_FTYPE_V2DI_INT:
24450 case V2DF_FTYPE_V2DF_INT:
24451 case V2DF_FTYPE_V4DF_INT:
24452 nargs = 2;
24453 nargs_constant = 1;
24454 break;
24455 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24456 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24457 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24458 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24459 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24460 nargs = 3;
24461 break;
24462 case V16QI_FTYPE_V16QI_V16QI_INT:
24463 case V8HI_FTYPE_V8HI_V8HI_INT:
24464 case V8SI_FTYPE_V8SI_V8SI_INT:
24465 case V8SI_FTYPE_V8SI_V4SI_INT:
24466 case V8SF_FTYPE_V8SF_V8SF_INT:
24467 case V8SF_FTYPE_V8SF_V4SF_INT:
24468 case V4SI_FTYPE_V4SI_V4SI_INT:
24469 case V4DF_FTYPE_V4DF_V4DF_INT:
24470 case V4DF_FTYPE_V4DF_V2DF_INT:
24471 case V4SF_FTYPE_V4SF_V4SF_INT:
24472 case V2DI_FTYPE_V2DI_V2DI_INT:
24473 case V2DF_FTYPE_V2DF_V2DF_INT:
24474 nargs = 3;
24475 nargs_constant = 1;
24476 break;
24477 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24478 nargs = 3;
24479 rmode = V2DImode;
24480 nargs_constant = 1;
24481 break;
24482 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24483 nargs = 3;
24484 rmode = DImode;
24485 nargs_constant = 1;
24486 break;
24487 case V2DI_FTYPE_V2DI_UINT_UINT:
24488 nargs = 3;
24489 nargs_constant = 2;
24490 break;
24491 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24492 nargs = 4;
24493 nargs_constant = 2;
24494 break;
24495 default:
24496 gcc_unreachable ();
24497 }
24498
24499 gcc_assert (nargs <= ARRAY_SIZE (args));
24500
24501 if (comparison != UNKNOWN)
24502 {
24503 gcc_assert (nargs == 2);
24504 return ix86_expand_sse_compare (d, exp, target, swap);
24505 }
24506
24507 if (rmode == VOIDmode || rmode == tmode)
24508 {
24509 if (optimize
24510 || target == 0
24511 || GET_MODE (target) != tmode
24512 || ! (*insn_p->operand[0].predicate) (target, tmode))
24513 target = gen_reg_rtx (tmode);
24514 real_target = target;
24515 }
24516 else
24517 {
24518 target = gen_reg_rtx (rmode);
24519 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24520 }
24521
24522 for (i = 0; i < nargs; i++)
24523 {
24524 tree arg = CALL_EXPR_ARG (exp, i);
24525 rtx op = expand_normal (arg);
24526 enum machine_mode mode = insn_p->operand[i + 1].mode;
24527 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24528
24529 if (last_arg_count && (i + 1) == nargs)
24530 {
24531 /* SIMD shift insns take either an 8-bit immediate or
24532 register as count. But builtin functions take int as
24533 count. If count doesn't match, we put it in register. */
24534 if (!match)
24535 {
24536 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24537 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24538 op = copy_to_reg (op);
24539 }
24540 }
24541 else if ((nargs - i) <= nargs_constant)
24542 {
24543 if (!match)
24544 switch (icode)
24545 {
24546 case CODE_FOR_sse4_1_roundpd:
24547 case CODE_FOR_sse4_1_roundps:
24548 case CODE_FOR_sse4_1_roundsd:
24549 case CODE_FOR_sse4_1_roundss:
24550 case CODE_FOR_sse4_1_blendps:
24551 case CODE_FOR_avx_blendpd256:
24552 case CODE_FOR_avx_vpermilv4df:
24553 case CODE_FOR_avx_roundpd256:
24554 case CODE_FOR_avx_roundps256:
24555 error ("the last argument must be a 4-bit immediate");
24556 return const0_rtx;
24557
24558 case CODE_FOR_sse4_1_blendpd:
24559 case CODE_FOR_avx_vpermilv2df:
24560 error ("the last argument must be a 2-bit immediate");
24561 return const0_rtx;
24562
24563 case CODE_FOR_avx_vextractf128v4df:
24564 case CODE_FOR_avx_vextractf128v8sf:
24565 case CODE_FOR_avx_vextractf128v8si:
24566 case CODE_FOR_avx_vinsertf128v4df:
24567 case CODE_FOR_avx_vinsertf128v8sf:
24568 case CODE_FOR_avx_vinsertf128v8si:
24569 error ("the last argument must be a 1-bit immediate");
24570 return const0_rtx;
24571
24572 case CODE_FOR_avx_cmpsdv2df3:
24573 case CODE_FOR_avx_cmpssv4sf3:
24574 case CODE_FOR_avx_cmppdv2df3:
24575 case CODE_FOR_avx_cmppsv4sf3:
24576 case CODE_FOR_avx_cmppdv4df3:
24577 case CODE_FOR_avx_cmppsv8sf3:
24578 error ("the last argument must be a 5-bit immediate");
24579 return const0_rtx;
24580
24581 default:
24582 switch (nargs_constant)
24583 {
24584 case 2:
24585 if ((nargs - i) == nargs_constant)
24586 {
24587 error ("the next to last argument must be an 8-bit immediate");
24588 break;
24589 }
24590 case 1:
24591 error ("the last argument must be an 8-bit immediate");
24592 break;
24593 default:
24594 gcc_unreachable ();
24595 }
24596 return const0_rtx;
24597 }
24598 }
24599 else
24600 {
24601 if (VECTOR_MODE_P (mode))
24602 op = safe_vector_operand (op, mode);
24603
24604 /* If we aren't optimizing, only allow one memory operand to
24605 be generated. */
24606 if (memory_operand (op, mode))
24607 num_memory++;
24608
24609 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24610 {
24611 if (optimize || !match || num_memory > 1)
24612 op = copy_to_mode_reg (mode, op);
24613 }
24614 else
24615 {
24616 op = copy_to_reg (op);
24617 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24618 }
24619 }
24620
24621 args[i].op = op;
24622 args[i].mode = mode;
24623 }
24624
24625 switch (nargs)
24626 {
24627 case 1:
24628 pat = GEN_FCN (icode) (real_target, args[0].op);
24629 break;
24630 case 2:
24631 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24632 break;
24633 case 3:
24634 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24635 args[2].op);
24636 break;
24637 case 4:
24638 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24639 args[2].op, args[3].op);
24640 break;
24641 default:
24642 gcc_unreachable ();
24643 }
24644
24645 if (! pat)
24646 return 0;
24647
24648 emit_insn (pat);
24649 return target;
24650 }
24651
24652 /* Subroutine of ix86_expand_builtin to take care of special insns
24653 with variable number of operands. */
24654
24655 static rtx
24656 ix86_expand_special_args_builtin (const struct builtin_description *d,
24657 tree exp, rtx target)
24658 {
24659 tree arg;
24660 rtx pat, op;
24661 unsigned int i, nargs, arg_adjust, memory;
24662 struct
24663 {
24664 rtx op;
24665 enum machine_mode mode;
24666 } args[2];
24667 enum insn_code icode = d->icode;
24668 bool last_arg_constant = false;
24669 const struct insn_data *insn_p = &insn_data[icode];
24670 enum machine_mode tmode = insn_p->operand[0].mode;
24671 enum { load, store } klass;
24672
24673 switch ((enum ix86_special_builtin_type) d->flag)
24674 {
24675 case VOID_FTYPE_VOID:
24676 emit_insn (GEN_FCN (icode) (target));
24677 return 0;
24678 case V2DI_FTYPE_PV2DI:
24679 case V32QI_FTYPE_PCCHAR:
24680 case V16QI_FTYPE_PCCHAR:
24681 case V8SF_FTYPE_PCV4SF:
24682 case V8SF_FTYPE_PCFLOAT:
24683 case V4SF_FTYPE_PCFLOAT:
24684 case V4DF_FTYPE_PCV2DF:
24685 case V4DF_FTYPE_PCDOUBLE:
24686 case V2DF_FTYPE_PCDOUBLE:
24687 nargs = 1;
24688 klass = load;
24689 memory = 0;
24690 break;
24691 case VOID_FTYPE_PV2SF_V4SF:
24692 case VOID_FTYPE_PV4DI_V4DI:
24693 case VOID_FTYPE_PV2DI_V2DI:
24694 case VOID_FTYPE_PCHAR_V32QI:
24695 case VOID_FTYPE_PCHAR_V16QI:
24696 case VOID_FTYPE_PFLOAT_V8SF:
24697 case VOID_FTYPE_PFLOAT_V4SF:
24698 case VOID_FTYPE_PDOUBLE_V4DF:
24699 case VOID_FTYPE_PDOUBLE_V2DF:
24700 case VOID_FTYPE_PDI_DI:
24701 case VOID_FTYPE_PINT_INT:
24702 nargs = 1;
24703 klass = store;
24704 /* Reserve memory operand for target. */
24705 memory = ARRAY_SIZE (args);
24706 break;
24707 case V4SF_FTYPE_V4SF_PCV2SF:
24708 case V2DF_FTYPE_V2DF_PCDOUBLE:
24709 nargs = 2;
24710 klass = load;
24711 memory = 1;
24712 break;
24713 case V8SF_FTYPE_PCV8SF_V8SF:
24714 case V4DF_FTYPE_PCV4DF_V4DF:
24715 case V4SF_FTYPE_PCV4SF_V4SF:
24716 case V2DF_FTYPE_PCV2DF_V2DF:
24717 nargs = 2;
24718 klass = load;
24719 memory = 0;
24720 break;
24721 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24722 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24723 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24724 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24725 nargs = 2;
24726 klass = store;
24727 /* Reserve memory operand for target. */
24728 memory = ARRAY_SIZE (args);
24729 break;
24730 default:
24731 gcc_unreachable ();
24732 }
24733
24734 gcc_assert (nargs <= ARRAY_SIZE (args));
24735
24736 if (klass == store)
24737 {
24738 arg = CALL_EXPR_ARG (exp, 0);
24739 op = expand_normal (arg);
24740 gcc_assert (target == 0);
24741 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24742 arg_adjust = 1;
24743 }
24744 else
24745 {
24746 arg_adjust = 0;
24747 if (optimize
24748 || target == 0
24749 || GET_MODE (target) != tmode
24750 || ! (*insn_p->operand[0].predicate) (target, tmode))
24751 target = gen_reg_rtx (tmode);
24752 }
24753
24754 for (i = 0; i < nargs; i++)
24755 {
24756 enum machine_mode mode = insn_p->operand[i + 1].mode;
24757 bool match;
24758
24759 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24760 op = expand_normal (arg);
24761 match = (*insn_p->operand[i + 1].predicate) (op, mode);
24762
24763 if (last_arg_constant && (i + 1) == nargs)
24764 {
24765 if (!match)
24766 switch (icode)
24767 {
24768 default:
24769 error ("the last argument must be an 8-bit immediate");
24770 return const0_rtx;
24771 }
24772 }
24773 else
24774 {
24775 if (i == memory)
24776 {
24777 /* This must be the memory operand. */
24778 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
24779 gcc_assert (GET_MODE (op) == mode
24780 || GET_MODE (op) == VOIDmode);
24781 }
24782 else
24783 {
24784 /* This must be register. */
24785 if (VECTOR_MODE_P (mode))
24786 op = safe_vector_operand (op, mode);
24787
24788 gcc_assert (GET_MODE (op) == mode
24789 || GET_MODE (op) == VOIDmode);
24790 op = copy_to_mode_reg (mode, op);
24791 }
24792 }
24793
24794 args[i].op = op;
24795 args[i].mode = mode;
24796 }
24797
24798 switch (nargs)
24799 {
24800 case 1:
24801 pat = GEN_FCN (icode) (target, args[0].op);
24802 break;
24803 case 2:
24804 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24805 break;
24806 default:
24807 gcc_unreachable ();
24808 }
24809
24810 if (! pat)
24811 return 0;
24812 emit_insn (pat);
24813 return klass == store ? 0 : target;
24814 }
24815
24816 /* Return the integer constant in ARG. Constrain it to be in the range
24817 of the subparts of VEC_TYPE; issue an error if not. */
24818
24819 static int
24820 get_element_number (tree vec_type, tree arg)
24821 {
24822 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
24823
24824 if (!host_integerp (arg, 1)
24825 || (elt = tree_low_cst (arg, 1), elt > max))
24826 {
24827 error ("selector must be an integer constant in the range 0..%wi", max);
24828 return 0;
24829 }
24830
24831 return elt;
24832 }
24833
24834 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24835 ix86_expand_vector_init. We DO have language-level syntax for this, in
24836 the form of (type){ init-list }. Except that since we can't place emms
24837 instructions from inside the compiler, we can't allow the use of MMX
24838 registers unless the user explicitly asks for it. So we do *not* define
24839 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
24840 we have builtins invoked by mmintrin.h that gives us license to emit
24841 these sorts of instructions. */
24842
24843 static rtx
24844 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
24845 {
24846 enum machine_mode tmode = TYPE_MODE (type);
24847 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
24848 int i, n_elt = GET_MODE_NUNITS (tmode);
24849 rtvec v = rtvec_alloc (n_elt);
24850
24851 gcc_assert (VECTOR_MODE_P (tmode));
24852 gcc_assert (call_expr_nargs (exp) == n_elt);
24853
24854 for (i = 0; i < n_elt; ++i)
24855 {
24856 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
24857 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
24858 }
24859
24860 if (!target || !register_operand (target, tmode))
24861 target = gen_reg_rtx (tmode);
24862
24863 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
24864 return target;
24865 }
24866
24867 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24868 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
24869 had a language-level syntax for referencing vector elements. */
24870
24871 static rtx
24872 ix86_expand_vec_ext_builtin (tree exp, rtx target)
24873 {
24874 enum machine_mode tmode, mode0;
24875 tree arg0, arg1;
24876 int elt;
24877 rtx op0;
24878
24879 arg0 = CALL_EXPR_ARG (exp, 0);
24880 arg1 = CALL_EXPR_ARG (exp, 1);
24881
24882 op0 = expand_normal (arg0);
24883 elt = get_element_number (TREE_TYPE (arg0), arg1);
24884
24885 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24886 mode0 = TYPE_MODE (TREE_TYPE (arg0));
24887 gcc_assert (VECTOR_MODE_P (mode0));
24888
24889 op0 = force_reg (mode0, op0);
24890
24891 if (optimize || !target || !register_operand (target, tmode))
24892 target = gen_reg_rtx (tmode);
24893
24894 ix86_expand_vector_extract (true, target, op0, elt);
24895
24896 return target;
24897 }
24898
24899 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24900 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
24901 a language-level syntax for referencing vector elements. */
24902
24903 static rtx
24904 ix86_expand_vec_set_builtin (tree exp)
24905 {
24906 enum machine_mode tmode, mode1;
24907 tree arg0, arg1, arg2;
24908 int elt;
24909 rtx op0, op1, target;
24910
24911 arg0 = CALL_EXPR_ARG (exp, 0);
24912 arg1 = CALL_EXPR_ARG (exp, 1);
24913 arg2 = CALL_EXPR_ARG (exp, 2);
24914
24915 tmode = TYPE_MODE (TREE_TYPE (arg0));
24916 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24917 gcc_assert (VECTOR_MODE_P (tmode));
24918
24919 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
24920 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
24921 elt = get_element_number (TREE_TYPE (arg0), arg2);
24922
24923 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
24924 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
24925
24926 op0 = force_reg (tmode, op0);
24927 op1 = force_reg (mode1, op1);
24928
24929 /* OP0 is the source of these builtin functions and shouldn't be
24930 modified. Create a copy, use it and return it as target. */
24931 target = gen_reg_rtx (tmode);
24932 emit_move_insn (target, op0);
24933 ix86_expand_vector_set (true, target, op1, elt);
24934
24935 return target;
24936 }
24937
24938 /* Expand an expression EXP that calls a built-in function,
24939 with result going to TARGET if that's convenient
24940 (and in mode MODE if that's convenient).
24941 SUBTARGET may be used as the target for computing one of EXP's operands.
24942 IGNORE is nonzero if the value is to be ignored. */
24943
24944 static rtx
24945 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
24946 enum machine_mode mode ATTRIBUTE_UNUSED,
24947 int ignore ATTRIBUTE_UNUSED)
24948 {
24949 const struct builtin_description *d;
24950 size_t i;
24951 enum insn_code icode;
24952 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
24953 tree arg0, arg1, arg2;
24954 rtx op0, op1, op2, pat;
24955 enum machine_mode mode0, mode1, mode2;
24956 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
24957
24958 /* Determine whether the builtin function is available under the current ISA.
24959 Originally the builtin was not created if it wasn't applicable to the
24960 current ISA based on the command line switches. With function specific
24961 options, we need to check in the context of the function making the call
24962 whether it is supported. */
24963 if (ix86_builtins_isa[fcode].isa
24964 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
24965 {
24966 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
24967 NULL, NULL, false);
24968
24969 if (!opts)
24970 error ("%qE needs unknown isa option", fndecl);
24971 else
24972 {
24973 gcc_assert (opts != NULL);
24974 error ("%qE needs isa option %s", fndecl, opts);
24975 free (opts);
24976 }
24977 return const0_rtx;
24978 }
24979
24980 switch (fcode)
24981 {
24982 case IX86_BUILTIN_MASKMOVQ:
24983 case IX86_BUILTIN_MASKMOVDQU:
24984 icode = (fcode == IX86_BUILTIN_MASKMOVQ
24985 ? CODE_FOR_mmx_maskmovq
24986 : CODE_FOR_sse2_maskmovdqu);
24987 /* Note the arg order is different from the operand order. */
24988 arg1 = CALL_EXPR_ARG (exp, 0);
24989 arg2 = CALL_EXPR_ARG (exp, 1);
24990 arg0 = CALL_EXPR_ARG (exp, 2);
24991 op0 = expand_normal (arg0);
24992 op1 = expand_normal (arg1);
24993 op2 = expand_normal (arg2);
24994 mode0 = insn_data[icode].operand[0].mode;
24995 mode1 = insn_data[icode].operand[1].mode;
24996 mode2 = insn_data[icode].operand[2].mode;
24997
24998 op0 = force_reg (Pmode, op0);
24999 op0 = gen_rtx_MEM (mode1, op0);
25000
25001 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25002 op0 = copy_to_mode_reg (mode0, op0);
25003 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25004 op1 = copy_to_mode_reg (mode1, op1);
25005 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25006 op2 = copy_to_mode_reg (mode2, op2);
25007 pat = GEN_FCN (icode) (op0, op1, op2);
25008 if (! pat)
25009 return 0;
25010 emit_insn (pat);
25011 return 0;
25012
25013 case IX86_BUILTIN_LDMXCSR:
25014 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25015 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25016 emit_move_insn (target, op0);
25017 emit_insn (gen_sse_ldmxcsr (target));
25018 return 0;
25019
25020 case IX86_BUILTIN_STMXCSR:
25021 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25022 emit_insn (gen_sse_stmxcsr (target));
25023 return copy_to_mode_reg (SImode, target);
25024
25025 case IX86_BUILTIN_CLFLUSH:
25026 arg0 = CALL_EXPR_ARG (exp, 0);
25027 op0 = expand_normal (arg0);
25028 icode = CODE_FOR_sse2_clflush;
25029 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25030 op0 = copy_to_mode_reg (Pmode, op0);
25031
25032 emit_insn (gen_sse2_clflush (op0));
25033 return 0;
25034
25035 case IX86_BUILTIN_MONITOR:
25036 arg0 = CALL_EXPR_ARG (exp, 0);
25037 arg1 = CALL_EXPR_ARG (exp, 1);
25038 arg2 = CALL_EXPR_ARG (exp, 2);
25039 op0 = expand_normal (arg0);
25040 op1 = expand_normal (arg1);
25041 op2 = expand_normal (arg2);
25042 if (!REG_P (op0))
25043 op0 = copy_to_mode_reg (Pmode, op0);
25044 if (!REG_P (op1))
25045 op1 = copy_to_mode_reg (SImode, op1);
25046 if (!REG_P (op2))
25047 op2 = copy_to_mode_reg (SImode, op2);
25048 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25049 return 0;
25050
25051 case IX86_BUILTIN_MWAIT:
25052 arg0 = CALL_EXPR_ARG (exp, 0);
25053 arg1 = CALL_EXPR_ARG (exp, 1);
25054 op0 = expand_normal (arg0);
25055 op1 = expand_normal (arg1);
25056 if (!REG_P (op0))
25057 op0 = copy_to_mode_reg (SImode, op0);
25058 if (!REG_P (op1))
25059 op1 = copy_to_mode_reg (SImode, op1);
25060 emit_insn (gen_sse3_mwait (op0, op1));
25061 return 0;
25062
25063 case IX86_BUILTIN_VEC_INIT_V2SI:
25064 case IX86_BUILTIN_VEC_INIT_V4HI:
25065 case IX86_BUILTIN_VEC_INIT_V8QI:
25066 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25067
25068 case IX86_BUILTIN_VEC_EXT_V2DF:
25069 case IX86_BUILTIN_VEC_EXT_V2DI:
25070 case IX86_BUILTIN_VEC_EXT_V4SF:
25071 case IX86_BUILTIN_VEC_EXT_V4SI:
25072 case IX86_BUILTIN_VEC_EXT_V8HI:
25073 case IX86_BUILTIN_VEC_EXT_V2SI:
25074 case IX86_BUILTIN_VEC_EXT_V4HI:
25075 case IX86_BUILTIN_VEC_EXT_V16QI:
25076 return ix86_expand_vec_ext_builtin (exp, target);
25077
25078 case IX86_BUILTIN_VEC_SET_V2DI:
25079 case IX86_BUILTIN_VEC_SET_V4SF:
25080 case IX86_BUILTIN_VEC_SET_V4SI:
25081 case IX86_BUILTIN_VEC_SET_V8HI:
25082 case IX86_BUILTIN_VEC_SET_V4HI:
25083 case IX86_BUILTIN_VEC_SET_V16QI:
25084 return ix86_expand_vec_set_builtin (exp);
25085
25086 case IX86_BUILTIN_INFQ:
25087 {
25088 REAL_VALUE_TYPE inf;
25089 rtx tmp;
25090
25091 real_inf (&inf);
25092 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25093
25094 tmp = validize_mem (force_const_mem (mode, tmp));
25095
25096 if (target == 0)
25097 target = gen_reg_rtx (mode);
25098
25099 emit_move_insn (target, tmp);
25100 return target;
25101 }
25102
25103 default:
25104 break;
25105 }
25106
25107 for (i = 0, d = bdesc_special_args;
25108 i < ARRAY_SIZE (bdesc_special_args);
25109 i++, d++)
25110 if (d->code == fcode)
25111 return ix86_expand_special_args_builtin (d, exp, target);
25112
25113 for (i = 0, d = bdesc_args;
25114 i < ARRAY_SIZE (bdesc_args);
25115 i++, d++)
25116 if (d->code == fcode)
25117 switch (fcode)
25118 {
25119 case IX86_BUILTIN_FABSQ:
25120 case IX86_BUILTIN_COPYSIGNQ:
25121 if (!TARGET_SSE2)
25122 /* Emit a normal call if SSE2 isn't available. */
25123 return expand_call (exp, target, ignore);
25124 default:
25125 return ix86_expand_args_builtin (d, exp, target);
25126 }
25127
25128 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25129 if (d->code == fcode)
25130 return ix86_expand_sse_comi (d, exp, target);
25131
25132 for (i = 0, d = bdesc_pcmpestr;
25133 i < ARRAY_SIZE (bdesc_pcmpestr);
25134 i++, d++)
25135 if (d->code == fcode)
25136 return ix86_expand_sse_pcmpestr (d, exp, target);
25137
25138 for (i = 0, d = bdesc_pcmpistr;
25139 i < ARRAY_SIZE (bdesc_pcmpistr);
25140 i++, d++)
25141 if (d->code == fcode)
25142 return ix86_expand_sse_pcmpistr (d, exp, target);
25143
25144 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25145 if (d->code == fcode)
25146 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25147 (enum multi_arg_type)d->flag,
25148 d->comparison);
25149
25150 gcc_unreachable ();
25151 }
25152
25153 /* Returns a function decl for a vectorized version of the builtin function
25154 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25155 if it is not available. */
25156
25157 static tree
25158 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25159 tree type_in)
25160 {
25161 enum machine_mode in_mode, out_mode;
25162 int in_n, out_n;
25163
25164 if (TREE_CODE (type_out) != VECTOR_TYPE
25165 || TREE_CODE (type_in) != VECTOR_TYPE)
25166 return NULL_TREE;
25167
25168 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25169 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25170 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25171 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25172
25173 switch (fn)
25174 {
25175 case BUILT_IN_SQRT:
25176 if (out_mode == DFmode && out_n == 2
25177 && in_mode == DFmode && in_n == 2)
25178 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25179 break;
25180
25181 case BUILT_IN_SQRTF:
25182 if (out_mode == SFmode && out_n == 4
25183 && in_mode == SFmode && in_n == 4)
25184 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25185 break;
25186
25187 case BUILT_IN_LRINT:
25188 if (out_mode == SImode && out_n == 4
25189 && in_mode == DFmode && in_n == 2)
25190 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25191 break;
25192
25193 case BUILT_IN_LRINTF:
25194 if (out_mode == SImode && out_n == 4
25195 && in_mode == SFmode && in_n == 4)
25196 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25197 break;
25198
25199 default:
25200 ;
25201 }
25202
25203 /* Dispatch to a handler for a vectorization library. */
25204 if (ix86_veclib_handler)
25205 return (*ix86_veclib_handler)(fn, type_out, type_in);
25206
25207 return NULL_TREE;
25208 }
25209
25210 /* Handler for an SVML-style interface to
25211 a library with vectorized intrinsics. */
25212
25213 static tree
25214 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25215 {
25216 char name[20];
25217 tree fntype, new_fndecl, args;
25218 unsigned arity;
25219 const char *bname;
25220 enum machine_mode el_mode, in_mode;
25221 int n, in_n;
25222
25223 /* The SVML is suitable for unsafe math only. */
25224 if (!flag_unsafe_math_optimizations)
25225 return NULL_TREE;
25226
25227 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25228 n = TYPE_VECTOR_SUBPARTS (type_out);
25229 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25230 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25231 if (el_mode != in_mode
25232 || n != in_n)
25233 return NULL_TREE;
25234
25235 switch (fn)
25236 {
25237 case BUILT_IN_EXP:
25238 case BUILT_IN_LOG:
25239 case BUILT_IN_LOG10:
25240 case BUILT_IN_POW:
25241 case BUILT_IN_TANH:
25242 case BUILT_IN_TAN:
25243 case BUILT_IN_ATAN:
25244 case BUILT_IN_ATAN2:
25245 case BUILT_IN_ATANH:
25246 case BUILT_IN_CBRT:
25247 case BUILT_IN_SINH:
25248 case BUILT_IN_SIN:
25249 case BUILT_IN_ASINH:
25250 case BUILT_IN_ASIN:
25251 case BUILT_IN_COSH:
25252 case BUILT_IN_COS:
25253 case BUILT_IN_ACOSH:
25254 case BUILT_IN_ACOS:
25255 if (el_mode != DFmode || n != 2)
25256 return NULL_TREE;
25257 break;
25258
25259 case BUILT_IN_EXPF:
25260 case BUILT_IN_LOGF:
25261 case BUILT_IN_LOG10F:
25262 case BUILT_IN_POWF:
25263 case BUILT_IN_TANHF:
25264 case BUILT_IN_TANF:
25265 case BUILT_IN_ATANF:
25266 case BUILT_IN_ATAN2F:
25267 case BUILT_IN_ATANHF:
25268 case BUILT_IN_CBRTF:
25269 case BUILT_IN_SINHF:
25270 case BUILT_IN_SINF:
25271 case BUILT_IN_ASINHF:
25272 case BUILT_IN_ASINF:
25273 case BUILT_IN_COSHF:
25274 case BUILT_IN_COSF:
25275 case BUILT_IN_ACOSHF:
25276 case BUILT_IN_ACOSF:
25277 if (el_mode != SFmode || n != 4)
25278 return NULL_TREE;
25279 break;
25280
25281 default:
25282 return NULL_TREE;
25283 }
25284
25285 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25286
25287 if (fn == BUILT_IN_LOGF)
25288 strcpy (name, "vmlsLn4");
25289 else if (fn == BUILT_IN_LOG)
25290 strcpy (name, "vmldLn2");
25291 else if (n == 4)
25292 {
25293 sprintf (name, "vmls%s", bname+10);
25294 name[strlen (name)-1] = '4';
25295 }
25296 else
25297 sprintf (name, "vmld%s2", bname+10);
25298
25299 /* Convert to uppercase. */
25300 name[4] &= ~0x20;
25301
25302 arity = 0;
25303 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25304 args = TREE_CHAIN (args))
25305 arity++;
25306
25307 if (arity == 1)
25308 fntype = build_function_type_list (type_out, type_in, NULL);
25309 else
25310 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25311
25312 /* Build a function declaration for the vectorized function. */
25313 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25314 TREE_PUBLIC (new_fndecl) = 1;
25315 DECL_EXTERNAL (new_fndecl) = 1;
25316 DECL_IS_NOVOPS (new_fndecl) = 1;
25317 TREE_READONLY (new_fndecl) = 1;
25318
25319 return new_fndecl;
25320 }
25321
25322 /* Handler for an ACML-style interface to
25323 a library with vectorized intrinsics. */
25324
25325 static tree
25326 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25327 {
25328 char name[20] = "__vr.._";
25329 tree fntype, new_fndecl, args;
25330 unsigned arity;
25331 const char *bname;
25332 enum machine_mode el_mode, in_mode;
25333 int n, in_n;
25334
25335 /* The ACML is 64bits only and suitable for unsafe math only as
25336 it does not correctly support parts of IEEE with the required
25337 precision such as denormals. */
25338 if (!TARGET_64BIT
25339 || !flag_unsafe_math_optimizations)
25340 return NULL_TREE;
25341
25342 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25343 n = TYPE_VECTOR_SUBPARTS (type_out);
25344 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25345 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25346 if (el_mode != in_mode
25347 || n != in_n)
25348 return NULL_TREE;
25349
25350 switch (fn)
25351 {
25352 case BUILT_IN_SIN:
25353 case BUILT_IN_COS:
25354 case BUILT_IN_EXP:
25355 case BUILT_IN_LOG:
25356 case BUILT_IN_LOG2:
25357 case BUILT_IN_LOG10:
25358 name[4] = 'd';
25359 name[5] = '2';
25360 if (el_mode != DFmode
25361 || n != 2)
25362 return NULL_TREE;
25363 break;
25364
25365 case BUILT_IN_SINF:
25366 case BUILT_IN_COSF:
25367 case BUILT_IN_EXPF:
25368 case BUILT_IN_POWF:
25369 case BUILT_IN_LOGF:
25370 case BUILT_IN_LOG2F:
25371 case BUILT_IN_LOG10F:
25372 name[4] = 's';
25373 name[5] = '4';
25374 if (el_mode != SFmode
25375 || n != 4)
25376 return NULL_TREE;
25377 break;
25378
25379 default:
25380 return NULL_TREE;
25381 }
25382
25383 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25384 sprintf (name + 7, "%s", bname+10);
25385
25386 arity = 0;
25387 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25388 args = TREE_CHAIN (args))
25389 arity++;
25390
25391 if (arity == 1)
25392 fntype = build_function_type_list (type_out, type_in, NULL);
25393 else
25394 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25395
25396 /* Build a function declaration for the vectorized function. */
25397 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25398 TREE_PUBLIC (new_fndecl) = 1;
25399 DECL_EXTERNAL (new_fndecl) = 1;
25400 DECL_IS_NOVOPS (new_fndecl) = 1;
25401 TREE_READONLY (new_fndecl) = 1;
25402
25403 return new_fndecl;
25404 }
25405
25406
25407 /* Returns a decl of a function that implements conversion of an integer vector
25408 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25409 side of the conversion.
25410 Return NULL_TREE if it is not available. */
25411
25412 static tree
25413 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25414 {
25415 if (!TARGET_SSE2 || TREE_CODE (type) != VECTOR_TYPE
25416 /* There are only conversions from/to signed integers. */
25417 || TYPE_UNSIGNED (TREE_TYPE (type)))
25418 return NULL_TREE;
25419
25420 switch (code)
25421 {
25422 case FLOAT_EXPR:
25423 switch (TYPE_MODE (type))
25424 {
25425 case V4SImode:
25426 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25427 default:
25428 return NULL_TREE;
25429 }
25430
25431 case FIX_TRUNC_EXPR:
25432 switch (TYPE_MODE (type))
25433 {
25434 case V4SImode:
25435 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25436 default:
25437 return NULL_TREE;
25438 }
25439 default:
25440 return NULL_TREE;
25441
25442 }
25443 }
25444
25445 /* Returns a code for a target-specific builtin that implements
25446 reciprocal of the function, or NULL_TREE if not available. */
25447
25448 static tree
25449 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25450 bool sqrt ATTRIBUTE_UNUSED)
25451 {
25452 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25453 && flag_finite_math_only && !flag_trapping_math
25454 && flag_unsafe_math_optimizations))
25455 return NULL_TREE;
25456
25457 if (md_fn)
25458 /* Machine dependent builtins. */
25459 switch (fn)
25460 {
25461 /* Vectorized version of sqrt to rsqrt conversion. */
25462 case IX86_BUILTIN_SQRTPS_NR:
25463 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25464
25465 default:
25466 return NULL_TREE;
25467 }
25468 else
25469 /* Normal builtins. */
25470 switch (fn)
25471 {
25472 /* Sqrt to rsqrt conversion. */
25473 case BUILT_IN_SQRTF:
25474 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25475
25476 default:
25477 return NULL_TREE;
25478 }
25479 }
25480
25481 /* Store OPERAND to the memory after reload is completed. This means
25482 that we can't easily use assign_stack_local. */
25483 rtx
25484 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25485 {
25486 rtx result;
25487
25488 gcc_assert (reload_completed);
25489 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25490 {
25491 result = gen_rtx_MEM (mode,
25492 gen_rtx_PLUS (Pmode,
25493 stack_pointer_rtx,
25494 GEN_INT (-RED_ZONE_SIZE)));
25495 emit_move_insn (result, operand);
25496 }
25497 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25498 {
25499 switch (mode)
25500 {
25501 case HImode:
25502 case SImode:
25503 operand = gen_lowpart (DImode, operand);
25504 /* FALLTHRU */
25505 case DImode:
25506 emit_insn (
25507 gen_rtx_SET (VOIDmode,
25508 gen_rtx_MEM (DImode,
25509 gen_rtx_PRE_DEC (DImode,
25510 stack_pointer_rtx)),
25511 operand));
25512 break;
25513 default:
25514 gcc_unreachable ();
25515 }
25516 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25517 }
25518 else
25519 {
25520 switch (mode)
25521 {
25522 case DImode:
25523 {
25524 rtx operands[2];
25525 split_di (&operand, 1, operands, operands + 1);
25526 emit_insn (
25527 gen_rtx_SET (VOIDmode,
25528 gen_rtx_MEM (SImode,
25529 gen_rtx_PRE_DEC (Pmode,
25530 stack_pointer_rtx)),
25531 operands[1]));
25532 emit_insn (
25533 gen_rtx_SET (VOIDmode,
25534 gen_rtx_MEM (SImode,
25535 gen_rtx_PRE_DEC (Pmode,
25536 stack_pointer_rtx)),
25537 operands[0]));
25538 }
25539 break;
25540 case HImode:
25541 /* Store HImodes as SImodes. */
25542 operand = gen_lowpart (SImode, operand);
25543 /* FALLTHRU */
25544 case SImode:
25545 emit_insn (
25546 gen_rtx_SET (VOIDmode,
25547 gen_rtx_MEM (GET_MODE (operand),
25548 gen_rtx_PRE_DEC (SImode,
25549 stack_pointer_rtx)),
25550 operand));
25551 break;
25552 default:
25553 gcc_unreachable ();
25554 }
25555 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25556 }
25557 return result;
25558 }
25559
25560 /* Free operand from the memory. */
25561 void
25562 ix86_free_from_memory (enum machine_mode mode)
25563 {
25564 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25565 {
25566 int size;
25567
25568 if (mode == DImode || TARGET_64BIT)
25569 size = 8;
25570 else
25571 size = 4;
25572 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25573 to pop or add instruction if registers are available. */
25574 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25575 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25576 GEN_INT (size))));
25577 }
25578 }
25579
25580 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25581 QImode must go into class Q_REGS.
25582 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25583 movdf to do mem-to-mem moves through integer regs. */
25584 enum reg_class
25585 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25586 {
25587 enum machine_mode mode = GET_MODE (x);
25588
25589 /* We're only allowed to return a subclass of CLASS. Many of the
25590 following checks fail for NO_REGS, so eliminate that early. */
25591 if (regclass == NO_REGS)
25592 return NO_REGS;
25593
25594 /* All classes can load zeros. */
25595 if (x == CONST0_RTX (mode))
25596 return regclass;
25597
25598 /* Force constants into memory if we are loading a (nonzero) constant into
25599 an MMX or SSE register. This is because there are no MMX/SSE instructions
25600 to load from a constant. */
25601 if (CONSTANT_P (x)
25602 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25603 return NO_REGS;
25604
25605 /* Prefer SSE regs only, if we can use them for math. */
25606 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25607 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25608
25609 /* Floating-point constants need more complex checks. */
25610 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25611 {
25612 /* General regs can load everything. */
25613 if (reg_class_subset_p (regclass, GENERAL_REGS))
25614 return regclass;
25615
25616 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25617 zero above. We only want to wind up preferring 80387 registers if
25618 we plan on doing computation with them. */
25619 if (TARGET_80387
25620 && standard_80387_constant_p (x))
25621 {
25622 /* Limit class to non-sse. */
25623 if (regclass == FLOAT_SSE_REGS)
25624 return FLOAT_REGS;
25625 if (regclass == FP_TOP_SSE_REGS)
25626 return FP_TOP_REG;
25627 if (regclass == FP_SECOND_SSE_REGS)
25628 return FP_SECOND_REG;
25629 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25630 return regclass;
25631 }
25632
25633 return NO_REGS;
25634 }
25635
25636 /* Generally when we see PLUS here, it's the function invariant
25637 (plus soft-fp const_int). Which can only be computed into general
25638 regs. */
25639 if (GET_CODE (x) == PLUS)
25640 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25641
25642 /* QImode constants are easy to load, but non-constant QImode data
25643 must go into Q_REGS. */
25644 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25645 {
25646 if (reg_class_subset_p (regclass, Q_REGS))
25647 return regclass;
25648 if (reg_class_subset_p (Q_REGS, regclass))
25649 return Q_REGS;
25650 return NO_REGS;
25651 }
25652
25653 return regclass;
25654 }
25655
25656 /* Discourage putting floating-point values in SSE registers unless
25657 SSE math is being used, and likewise for the 387 registers. */
25658 enum reg_class
25659 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25660 {
25661 enum machine_mode mode = GET_MODE (x);
25662
25663 /* Restrict the output reload class to the register bank that we are doing
25664 math on. If we would like not to return a subset of CLASS, reject this
25665 alternative: if reload cannot do this, it will still use its choice. */
25666 mode = GET_MODE (x);
25667 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25668 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25669
25670 if (X87_FLOAT_MODE_P (mode))
25671 {
25672 if (regclass == FP_TOP_SSE_REGS)
25673 return FP_TOP_REG;
25674 else if (regclass == FP_SECOND_SSE_REGS)
25675 return FP_SECOND_REG;
25676 else
25677 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25678 }
25679
25680 return regclass;
25681 }
25682
25683 static enum reg_class
25684 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25685 enum machine_mode mode,
25686 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25687 {
25688 /* QImode spills from non-QI registers require
25689 intermediate register on 32bit targets. */
25690 if (!in_p && mode == QImode && !TARGET_64BIT
25691 && (rclass == GENERAL_REGS
25692 || rclass == LEGACY_REGS
25693 || rclass == INDEX_REGS))
25694 {
25695 int regno;
25696
25697 if (REG_P (x))
25698 regno = REGNO (x);
25699 else
25700 regno = -1;
25701
25702 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25703 regno = true_regnum (x);
25704
25705 /* Return Q_REGS if the operand is in memory. */
25706 if (regno == -1)
25707 return Q_REGS;
25708 }
25709
25710 return NO_REGS;
25711 }
25712
25713 /* If we are copying between general and FP registers, we need a memory
25714 location. The same is true for SSE and MMX registers.
25715
25716 To optimize register_move_cost performance, allow inline variant.
25717
25718 The macro can't work reliably when one of the CLASSES is class containing
25719 registers from multiple units (SSE, MMX, integer). We avoid this by never
25720 combining those units in single alternative in the machine description.
25721 Ensure that this constraint holds to avoid unexpected surprises.
25722
25723 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25724 enforce these sanity checks. */
25725
25726 static inline int
25727 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25728 enum machine_mode mode, int strict)
25729 {
25730 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25731 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25732 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25733 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
25734 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
25735 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
25736 {
25737 gcc_assert (!strict);
25738 return true;
25739 }
25740
25741 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25742 return true;
25743
25744 /* ??? This is a lie. We do have moves between mmx/general, and for
25745 mmx/sse2. But by saying we need secondary memory we discourage the
25746 register allocator from using the mmx registers unless needed. */
25747 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25748 return true;
25749
25750 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25751 {
25752 /* SSE1 doesn't have any direct moves from other classes. */
25753 if (!TARGET_SSE2)
25754 return true;
25755
25756 /* If the target says that inter-unit moves are more expensive
25757 than moving through memory, then don't generate them. */
25758 if (!TARGET_INTER_UNIT_MOVES)
25759 return true;
25760
25761 /* Between SSE and general, we have moves no larger than word size. */
25762 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25763 return true;
25764 }
25765
25766 return false;
25767 }
25768
25769 int
25770 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25771 enum machine_mode mode, int strict)
25772 {
25773 return inline_secondary_memory_needed (class1, class2, mode, strict);
25774 }
25775
25776 /* Return true if the registers in CLASS cannot represent the change from
25777 modes FROM to TO. */
25778
25779 bool
25780 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
25781 enum reg_class regclass)
25782 {
25783 if (from == to)
25784 return false;
25785
25786 /* x87 registers can't do subreg at all, as all values are reformatted
25787 to extended precision. */
25788 if (MAYBE_FLOAT_CLASS_P (regclass))
25789 return true;
25790
25791 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
25792 {
25793 /* Vector registers do not support QI or HImode loads. If we don't
25794 disallow a change to these modes, reload will assume it's ok to
25795 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
25796 the vec_dupv4hi pattern. */
25797 if (GET_MODE_SIZE (from) < 4)
25798 return true;
25799
25800 /* Vector registers do not support subreg with nonzero offsets, which
25801 are otherwise valid for integer registers. Since we can't see
25802 whether we have a nonzero offset from here, prohibit all
25803 nonparadoxical subregs changing size. */
25804 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
25805 return true;
25806 }
25807
25808 return false;
25809 }
25810
25811 /* Return the cost of moving data of mode M between a
25812 register and memory. A value of 2 is the default; this cost is
25813 relative to those in `REGISTER_MOVE_COST'.
25814
25815 This function is used extensively by register_move_cost that is used to
25816 build tables at startup. Make it inline in this case.
25817 When IN is 2, return maximum of in and out move cost.
25818
25819 If moving between registers and memory is more expensive than
25820 between two registers, you should define this macro to express the
25821 relative cost.
25822
25823 Model also increased moving costs of QImode registers in non
25824 Q_REGS classes.
25825 */
25826 static inline int
25827 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
25828 int in)
25829 {
25830 int cost;
25831 if (FLOAT_CLASS_P (regclass))
25832 {
25833 int index;
25834 switch (mode)
25835 {
25836 case SFmode:
25837 index = 0;
25838 break;
25839 case DFmode:
25840 index = 1;
25841 break;
25842 case XFmode:
25843 index = 2;
25844 break;
25845 default:
25846 return 100;
25847 }
25848 if (in == 2)
25849 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
25850 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
25851 }
25852 if (SSE_CLASS_P (regclass))
25853 {
25854 int index;
25855 switch (GET_MODE_SIZE (mode))
25856 {
25857 case 4:
25858 index = 0;
25859 break;
25860 case 8:
25861 index = 1;
25862 break;
25863 case 16:
25864 index = 2;
25865 break;
25866 default:
25867 return 100;
25868 }
25869 if (in == 2)
25870 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
25871 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
25872 }
25873 if (MMX_CLASS_P (regclass))
25874 {
25875 int index;
25876 switch (GET_MODE_SIZE (mode))
25877 {
25878 case 4:
25879 index = 0;
25880 break;
25881 case 8:
25882 index = 1;
25883 break;
25884 default:
25885 return 100;
25886 }
25887 if (in)
25888 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
25889 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
25890 }
25891 switch (GET_MODE_SIZE (mode))
25892 {
25893 case 1:
25894 if (Q_CLASS_P (regclass) || TARGET_64BIT)
25895 {
25896 if (!in)
25897 return ix86_cost->int_store[0];
25898 if (TARGET_PARTIAL_REG_DEPENDENCY
25899 && optimize_function_for_speed_p (cfun))
25900 cost = ix86_cost->movzbl_load;
25901 else
25902 cost = ix86_cost->int_load[0];
25903 if (in == 2)
25904 return MAX (cost, ix86_cost->int_store[0]);
25905 return cost;
25906 }
25907 else
25908 {
25909 if (in == 2)
25910 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
25911 if (in)
25912 return ix86_cost->movzbl_load;
25913 else
25914 return ix86_cost->int_store[0] + 4;
25915 }
25916 break;
25917 case 2:
25918 if (in == 2)
25919 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
25920 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
25921 default:
25922 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
25923 if (mode == TFmode)
25924 mode = XFmode;
25925 if (in == 2)
25926 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
25927 else if (in)
25928 cost = ix86_cost->int_load[2];
25929 else
25930 cost = ix86_cost->int_store[2];
25931 return (cost * (((int) GET_MODE_SIZE (mode)
25932 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
25933 }
25934 }
25935
25936 int
25937 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
25938 {
25939 return inline_memory_move_cost (mode, regclass, in);
25940 }
25941
25942
25943 /* Return the cost of moving data from a register in class CLASS1 to
25944 one in class CLASS2.
25945
25946 It is not required that the cost always equal 2 when FROM is the same as TO;
25947 on some machines it is expensive to move between registers if they are not
25948 general registers. */
25949
25950 int
25951 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
25952 enum reg_class class2)
25953 {
25954 /* In case we require secondary memory, compute cost of the store followed
25955 by load. In order to avoid bad register allocation choices, we need
25956 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
25957
25958 if (inline_secondary_memory_needed (class1, class2, mode, 0))
25959 {
25960 int cost = 1;
25961
25962 cost += inline_memory_move_cost (mode, class1, 2);
25963 cost += inline_memory_move_cost (mode, class2, 2);
25964
25965 /* In case of copying from general_purpose_register we may emit multiple
25966 stores followed by single load causing memory size mismatch stall.
25967 Count this as arbitrarily high cost of 20. */
25968 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
25969 cost += 20;
25970
25971 /* In the case of FP/MMX moves, the registers actually overlap, and we
25972 have to switch modes in order to treat them differently. */
25973 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
25974 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
25975 cost += 20;
25976
25977 return cost;
25978 }
25979
25980 /* Moves between SSE/MMX and integer unit are expensive. */
25981 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
25982 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25983
25984 /* ??? By keeping returned value relatively high, we limit the number
25985 of moves between integer and MMX/SSE registers for all targets.
25986 Additionally, high value prevents problem with x86_modes_tieable_p(),
25987 where integer modes in MMX/SSE registers are not tieable
25988 because of missing QImode and HImode moves to, from or between
25989 MMX/SSE registers. */
25990 return MAX (8, ix86_cost->mmxsse_to_integer);
25991
25992 if (MAYBE_FLOAT_CLASS_P (class1))
25993 return ix86_cost->fp_move;
25994 if (MAYBE_SSE_CLASS_P (class1))
25995 return ix86_cost->sse_move;
25996 if (MAYBE_MMX_CLASS_P (class1))
25997 return ix86_cost->mmx_move;
25998 return 2;
25999 }
26000
26001 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26002
26003 bool
26004 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26005 {
26006 /* Flags and only flags can only hold CCmode values. */
26007 if (CC_REGNO_P (regno))
26008 return GET_MODE_CLASS (mode) == MODE_CC;
26009 if (GET_MODE_CLASS (mode) == MODE_CC
26010 || GET_MODE_CLASS (mode) == MODE_RANDOM
26011 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26012 return 0;
26013 if (FP_REGNO_P (regno))
26014 return VALID_FP_MODE_P (mode);
26015 if (SSE_REGNO_P (regno))
26016 {
26017 /* We implement the move patterns for all vector modes into and
26018 out of SSE registers, even when no operation instructions
26019 are available. OImode move is available only when AVX is
26020 enabled. */
26021 return ((TARGET_AVX && mode == OImode)
26022 || VALID_AVX256_REG_MODE (mode)
26023 || VALID_SSE_REG_MODE (mode)
26024 || VALID_SSE2_REG_MODE (mode)
26025 || VALID_MMX_REG_MODE (mode)
26026 || VALID_MMX_REG_MODE_3DNOW (mode));
26027 }
26028 if (MMX_REGNO_P (regno))
26029 {
26030 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26031 so if the register is available at all, then we can move data of
26032 the given mode into or out of it. */
26033 return (VALID_MMX_REG_MODE (mode)
26034 || VALID_MMX_REG_MODE_3DNOW (mode));
26035 }
26036
26037 if (mode == QImode)
26038 {
26039 /* Take care for QImode values - they can be in non-QI regs,
26040 but then they do cause partial register stalls. */
26041 if (regno <= BX_REG || TARGET_64BIT)
26042 return 1;
26043 if (!TARGET_PARTIAL_REG_STALL)
26044 return 1;
26045 return reload_in_progress || reload_completed;
26046 }
26047 /* We handle both integer and floats in the general purpose registers. */
26048 else if (VALID_INT_MODE_P (mode))
26049 return 1;
26050 else if (VALID_FP_MODE_P (mode))
26051 return 1;
26052 else if (VALID_DFP_MODE_P (mode))
26053 return 1;
26054 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26055 on to use that value in smaller contexts, this can easily force a
26056 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26057 supporting DImode, allow it. */
26058 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26059 return 1;
26060
26061 return 0;
26062 }
26063
26064 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26065 tieable integer mode. */
26066
26067 static bool
26068 ix86_tieable_integer_mode_p (enum machine_mode mode)
26069 {
26070 switch (mode)
26071 {
26072 case HImode:
26073 case SImode:
26074 return true;
26075
26076 case QImode:
26077 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26078
26079 case DImode:
26080 return TARGET_64BIT;
26081
26082 default:
26083 return false;
26084 }
26085 }
26086
26087 /* Return true if MODE1 is accessible in a register that can hold MODE2
26088 without copying. That is, all register classes that can hold MODE2
26089 can also hold MODE1. */
26090
26091 bool
26092 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26093 {
26094 if (mode1 == mode2)
26095 return true;
26096
26097 if (ix86_tieable_integer_mode_p (mode1)
26098 && ix86_tieable_integer_mode_p (mode2))
26099 return true;
26100
26101 /* MODE2 being XFmode implies fp stack or general regs, which means we
26102 can tie any smaller floating point modes to it. Note that we do not
26103 tie this with TFmode. */
26104 if (mode2 == XFmode)
26105 return mode1 == SFmode || mode1 == DFmode;
26106
26107 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26108 that we can tie it with SFmode. */
26109 if (mode2 == DFmode)
26110 return mode1 == SFmode;
26111
26112 /* If MODE2 is only appropriate for an SSE register, then tie with
26113 any other mode acceptable to SSE registers. */
26114 if (GET_MODE_SIZE (mode2) == 16
26115 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26116 return (GET_MODE_SIZE (mode1) == 16
26117 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26118
26119 /* If MODE2 is appropriate for an MMX register, then tie
26120 with any other mode acceptable to MMX registers. */
26121 if (GET_MODE_SIZE (mode2) == 8
26122 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26123 return (GET_MODE_SIZE (mode1) == 8
26124 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26125
26126 return false;
26127 }
26128
26129 /* Compute a (partial) cost for rtx X. Return true if the complete
26130 cost has been computed, and false if subexpressions should be
26131 scanned. In either case, *TOTAL contains the cost result. */
26132
26133 static bool
26134 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26135 {
26136 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26137 enum machine_mode mode = GET_MODE (x);
26138 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26139
26140 switch (code)
26141 {
26142 case CONST_INT:
26143 case CONST:
26144 case LABEL_REF:
26145 case SYMBOL_REF:
26146 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26147 *total = 3;
26148 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26149 *total = 2;
26150 else if (flag_pic && SYMBOLIC_CONST (x)
26151 && (!TARGET_64BIT
26152 || (!GET_CODE (x) != LABEL_REF
26153 && (GET_CODE (x) != SYMBOL_REF
26154 || !SYMBOL_REF_LOCAL_P (x)))))
26155 *total = 1;
26156 else
26157 *total = 0;
26158 return true;
26159
26160 case CONST_DOUBLE:
26161 if (mode == VOIDmode)
26162 *total = 0;
26163 else
26164 switch (standard_80387_constant_p (x))
26165 {
26166 case 1: /* 0.0 */
26167 *total = 1;
26168 break;
26169 default: /* Other constants */
26170 *total = 2;
26171 break;
26172 case 0:
26173 case -1:
26174 /* Start with (MEM (SYMBOL_REF)), since that's where
26175 it'll probably end up. Add a penalty for size. */
26176 *total = (COSTS_N_INSNS (1)
26177 + (flag_pic != 0 && !TARGET_64BIT)
26178 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26179 break;
26180 }
26181 return true;
26182
26183 case ZERO_EXTEND:
26184 /* The zero extensions is often completely free on x86_64, so make
26185 it as cheap as possible. */
26186 if (TARGET_64BIT && mode == DImode
26187 && GET_MODE (XEXP (x, 0)) == SImode)
26188 *total = 1;
26189 else if (TARGET_ZERO_EXTEND_WITH_AND)
26190 *total = cost->add;
26191 else
26192 *total = cost->movzx;
26193 return false;
26194
26195 case SIGN_EXTEND:
26196 *total = cost->movsx;
26197 return false;
26198
26199 case ASHIFT:
26200 if (CONST_INT_P (XEXP (x, 1))
26201 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26202 {
26203 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26204 if (value == 1)
26205 {
26206 *total = cost->add;
26207 return false;
26208 }
26209 if ((value == 2 || value == 3)
26210 && cost->lea <= cost->shift_const)
26211 {
26212 *total = cost->lea;
26213 return false;
26214 }
26215 }
26216 /* FALLTHRU */
26217
26218 case ROTATE:
26219 case ASHIFTRT:
26220 case LSHIFTRT:
26221 case ROTATERT:
26222 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26223 {
26224 if (CONST_INT_P (XEXP (x, 1)))
26225 {
26226 if (INTVAL (XEXP (x, 1)) > 32)
26227 *total = cost->shift_const + COSTS_N_INSNS (2);
26228 else
26229 *total = cost->shift_const * 2;
26230 }
26231 else
26232 {
26233 if (GET_CODE (XEXP (x, 1)) == AND)
26234 *total = cost->shift_var * 2;
26235 else
26236 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26237 }
26238 }
26239 else
26240 {
26241 if (CONST_INT_P (XEXP (x, 1)))
26242 *total = cost->shift_const;
26243 else
26244 *total = cost->shift_var;
26245 }
26246 return false;
26247
26248 case MULT:
26249 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26250 {
26251 /* ??? SSE scalar cost should be used here. */
26252 *total = cost->fmul;
26253 return false;
26254 }
26255 else if (X87_FLOAT_MODE_P (mode))
26256 {
26257 *total = cost->fmul;
26258 return false;
26259 }
26260 else if (FLOAT_MODE_P (mode))
26261 {
26262 /* ??? SSE vector cost should be used here. */
26263 *total = cost->fmul;
26264 return false;
26265 }
26266 else
26267 {
26268 rtx op0 = XEXP (x, 0);
26269 rtx op1 = XEXP (x, 1);
26270 int nbits;
26271 if (CONST_INT_P (XEXP (x, 1)))
26272 {
26273 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26274 for (nbits = 0; value != 0; value &= value - 1)
26275 nbits++;
26276 }
26277 else
26278 /* This is arbitrary. */
26279 nbits = 7;
26280
26281 /* Compute costs correctly for widening multiplication. */
26282 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26283 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26284 == GET_MODE_SIZE (mode))
26285 {
26286 int is_mulwiden = 0;
26287 enum machine_mode inner_mode = GET_MODE (op0);
26288
26289 if (GET_CODE (op0) == GET_CODE (op1))
26290 is_mulwiden = 1, op1 = XEXP (op1, 0);
26291 else if (CONST_INT_P (op1))
26292 {
26293 if (GET_CODE (op0) == SIGN_EXTEND)
26294 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26295 == INTVAL (op1);
26296 else
26297 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26298 }
26299
26300 if (is_mulwiden)
26301 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26302 }
26303
26304 *total = (cost->mult_init[MODE_INDEX (mode)]
26305 + nbits * cost->mult_bit
26306 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26307
26308 return true;
26309 }
26310
26311 case DIV:
26312 case UDIV:
26313 case MOD:
26314 case UMOD:
26315 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26316 /* ??? SSE cost should be used here. */
26317 *total = cost->fdiv;
26318 else if (X87_FLOAT_MODE_P (mode))
26319 *total = cost->fdiv;
26320 else if (FLOAT_MODE_P (mode))
26321 /* ??? SSE vector cost should be used here. */
26322 *total = cost->fdiv;
26323 else
26324 *total = cost->divide[MODE_INDEX (mode)];
26325 return false;
26326
26327 case PLUS:
26328 if (GET_MODE_CLASS (mode) == MODE_INT
26329 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26330 {
26331 if (GET_CODE (XEXP (x, 0)) == PLUS
26332 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26333 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26334 && CONSTANT_P (XEXP (x, 1)))
26335 {
26336 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26337 if (val == 2 || val == 4 || val == 8)
26338 {
26339 *total = cost->lea;
26340 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26341 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26342 outer_code, speed);
26343 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26344 return true;
26345 }
26346 }
26347 else if (GET_CODE (XEXP (x, 0)) == MULT
26348 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26349 {
26350 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26351 if (val == 2 || val == 4 || val == 8)
26352 {
26353 *total = cost->lea;
26354 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26355 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26356 return true;
26357 }
26358 }
26359 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26360 {
26361 *total = cost->lea;
26362 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26363 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26364 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26365 return true;
26366 }
26367 }
26368 /* FALLTHRU */
26369
26370 case MINUS:
26371 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26372 {
26373 /* ??? SSE cost should be used here. */
26374 *total = cost->fadd;
26375 return false;
26376 }
26377 else if (X87_FLOAT_MODE_P (mode))
26378 {
26379 *total = cost->fadd;
26380 return false;
26381 }
26382 else if (FLOAT_MODE_P (mode))
26383 {
26384 /* ??? SSE vector cost should be used here. */
26385 *total = cost->fadd;
26386 return false;
26387 }
26388 /* FALLTHRU */
26389
26390 case AND:
26391 case IOR:
26392 case XOR:
26393 if (!TARGET_64BIT && mode == DImode)
26394 {
26395 *total = (cost->add * 2
26396 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26397 << (GET_MODE (XEXP (x, 0)) != DImode))
26398 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26399 << (GET_MODE (XEXP (x, 1)) != DImode)));
26400 return true;
26401 }
26402 /* FALLTHRU */
26403
26404 case NEG:
26405 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26406 {
26407 /* ??? SSE cost should be used here. */
26408 *total = cost->fchs;
26409 return false;
26410 }
26411 else if (X87_FLOAT_MODE_P (mode))
26412 {
26413 *total = cost->fchs;
26414 return false;
26415 }
26416 else if (FLOAT_MODE_P (mode))
26417 {
26418 /* ??? SSE vector cost should be used here. */
26419 *total = cost->fchs;
26420 return false;
26421 }
26422 /* FALLTHRU */
26423
26424 case NOT:
26425 if (!TARGET_64BIT && mode == DImode)
26426 *total = cost->add * 2;
26427 else
26428 *total = cost->add;
26429 return false;
26430
26431 case COMPARE:
26432 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26433 && XEXP (XEXP (x, 0), 1) == const1_rtx
26434 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26435 && XEXP (x, 1) == const0_rtx)
26436 {
26437 /* This kind of construct is implemented using test[bwl].
26438 Treat it as if we had an AND. */
26439 *total = (cost->add
26440 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26441 + rtx_cost (const1_rtx, outer_code, speed));
26442 return true;
26443 }
26444 return false;
26445
26446 case FLOAT_EXTEND:
26447 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26448 *total = 0;
26449 return false;
26450
26451 case ABS:
26452 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26453 /* ??? SSE cost should be used here. */
26454 *total = cost->fabs;
26455 else if (X87_FLOAT_MODE_P (mode))
26456 *total = cost->fabs;
26457 else if (FLOAT_MODE_P (mode))
26458 /* ??? SSE vector cost should be used here. */
26459 *total = cost->fabs;
26460 return false;
26461
26462 case SQRT:
26463 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26464 /* ??? SSE cost should be used here. */
26465 *total = cost->fsqrt;
26466 else if (X87_FLOAT_MODE_P (mode))
26467 *total = cost->fsqrt;
26468 else if (FLOAT_MODE_P (mode))
26469 /* ??? SSE vector cost should be used here. */
26470 *total = cost->fsqrt;
26471 return false;
26472
26473 case UNSPEC:
26474 if (XINT (x, 1) == UNSPEC_TP)
26475 *total = 0;
26476 return false;
26477
26478 default:
26479 return false;
26480 }
26481 }
26482
26483 #if TARGET_MACHO
26484
26485 static int current_machopic_label_num;
26486
26487 /* Given a symbol name and its associated stub, write out the
26488 definition of the stub. */
26489
26490 void
26491 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26492 {
26493 unsigned int length;
26494 char *binder_name, *symbol_name, lazy_ptr_name[32];
26495 int label = ++current_machopic_label_num;
26496
26497 /* For 64-bit we shouldn't get here. */
26498 gcc_assert (!TARGET_64BIT);
26499
26500 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26501 symb = (*targetm.strip_name_encoding) (symb);
26502
26503 length = strlen (stub);
26504 binder_name = XALLOCAVEC (char, length + 32);
26505 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26506
26507 length = strlen (symb);
26508 symbol_name = XALLOCAVEC (char, length + 32);
26509 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26510
26511 sprintf (lazy_ptr_name, "L%d$lz", label);
26512
26513 if (MACHOPIC_PURE)
26514 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26515 else
26516 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26517
26518 fprintf (file, "%s:\n", stub);
26519 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26520
26521 if (MACHOPIC_PURE)
26522 {
26523 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26524 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26525 fprintf (file, "\tjmp\t*%%edx\n");
26526 }
26527 else
26528 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26529
26530 fprintf (file, "%s:\n", binder_name);
26531
26532 if (MACHOPIC_PURE)
26533 {
26534 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26535 fprintf (file, "\tpushl\t%%eax\n");
26536 }
26537 else
26538 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26539
26540 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26541
26542 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26543 fprintf (file, "%s:\n", lazy_ptr_name);
26544 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26545 fprintf (file, "\t.long %s\n", binder_name);
26546 }
26547
26548 void
26549 darwin_x86_file_end (void)
26550 {
26551 darwin_file_end ();
26552 ix86_file_end ();
26553 }
26554 #endif /* TARGET_MACHO */
26555
26556 /* Order the registers for register allocator. */
26557
26558 void
26559 x86_order_regs_for_local_alloc (void)
26560 {
26561 int pos = 0;
26562 int i;
26563
26564 /* First allocate the local general purpose registers. */
26565 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26566 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26567 reg_alloc_order [pos++] = i;
26568
26569 /* Global general purpose registers. */
26570 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26571 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26572 reg_alloc_order [pos++] = i;
26573
26574 /* x87 registers come first in case we are doing FP math
26575 using them. */
26576 if (!TARGET_SSE_MATH)
26577 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26578 reg_alloc_order [pos++] = i;
26579
26580 /* SSE registers. */
26581 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26582 reg_alloc_order [pos++] = i;
26583 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26584 reg_alloc_order [pos++] = i;
26585
26586 /* x87 registers. */
26587 if (TARGET_SSE_MATH)
26588 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26589 reg_alloc_order [pos++] = i;
26590
26591 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26592 reg_alloc_order [pos++] = i;
26593
26594 /* Initialize the rest of array as we do not allocate some registers
26595 at all. */
26596 while (pos < FIRST_PSEUDO_REGISTER)
26597 reg_alloc_order [pos++] = 0;
26598 }
26599
26600 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26601 struct attribute_spec.handler. */
26602 static tree
26603 ix86_handle_abi_attribute (tree *node, tree name,
26604 tree args ATTRIBUTE_UNUSED,
26605 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26606 {
26607 if (TREE_CODE (*node) != FUNCTION_TYPE
26608 && TREE_CODE (*node) != METHOD_TYPE
26609 && TREE_CODE (*node) != FIELD_DECL
26610 && TREE_CODE (*node) != TYPE_DECL)
26611 {
26612 warning (OPT_Wattributes, "%qs attribute only applies to functions",
26613 IDENTIFIER_POINTER (name));
26614 *no_add_attrs = true;
26615 return NULL_TREE;
26616 }
26617 if (!TARGET_64BIT)
26618 {
26619 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
26620 IDENTIFIER_POINTER (name));
26621 *no_add_attrs = true;
26622 return NULL_TREE;
26623 }
26624
26625 /* Can combine regparm with all attributes but fastcall. */
26626 if (is_attribute_p ("ms_abi", name))
26627 {
26628 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26629 {
26630 error ("ms_abi and sysv_abi attributes are not compatible");
26631 }
26632
26633 return NULL_TREE;
26634 }
26635 else if (is_attribute_p ("sysv_abi", name))
26636 {
26637 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26638 {
26639 error ("ms_abi and sysv_abi attributes are not compatible");
26640 }
26641
26642 return NULL_TREE;
26643 }
26644
26645 return NULL_TREE;
26646 }
26647
26648 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26649 struct attribute_spec.handler. */
26650 static tree
26651 ix86_handle_struct_attribute (tree *node, tree name,
26652 tree args ATTRIBUTE_UNUSED,
26653 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26654 {
26655 tree *type = NULL;
26656 if (DECL_P (*node))
26657 {
26658 if (TREE_CODE (*node) == TYPE_DECL)
26659 type = &TREE_TYPE (*node);
26660 }
26661 else
26662 type = node;
26663
26664 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26665 || TREE_CODE (*type) == UNION_TYPE)))
26666 {
26667 warning (OPT_Wattributes, "%qs attribute ignored",
26668 IDENTIFIER_POINTER (name));
26669 *no_add_attrs = true;
26670 }
26671
26672 else if ((is_attribute_p ("ms_struct", name)
26673 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26674 || ((is_attribute_p ("gcc_struct", name)
26675 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26676 {
26677 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
26678 IDENTIFIER_POINTER (name));
26679 *no_add_attrs = true;
26680 }
26681
26682 return NULL_TREE;
26683 }
26684
26685 static bool
26686 ix86_ms_bitfield_layout_p (const_tree record_type)
26687 {
26688 return (TARGET_MS_BITFIELD_LAYOUT &&
26689 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26690 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26691 }
26692
26693 /* Returns an expression indicating where the this parameter is
26694 located on entry to the FUNCTION. */
26695
26696 static rtx
26697 x86_this_parameter (tree function)
26698 {
26699 tree type = TREE_TYPE (function);
26700 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26701 int nregs;
26702
26703 if (TARGET_64BIT)
26704 {
26705 const int *parm_regs;
26706
26707 if (ix86_function_type_abi (type) == MS_ABI)
26708 parm_regs = x86_64_ms_abi_int_parameter_registers;
26709 else
26710 parm_regs = x86_64_int_parameter_registers;
26711 return gen_rtx_REG (DImode, parm_regs[aggr]);
26712 }
26713
26714 nregs = ix86_function_regparm (type, function);
26715
26716 if (nregs > 0 && !stdarg_p (type))
26717 {
26718 int regno;
26719
26720 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26721 regno = aggr ? DX_REG : CX_REG;
26722 else
26723 {
26724 regno = AX_REG;
26725 if (aggr)
26726 {
26727 regno = DX_REG;
26728 if (nregs == 1)
26729 return gen_rtx_MEM (SImode,
26730 plus_constant (stack_pointer_rtx, 4));
26731 }
26732 }
26733 return gen_rtx_REG (SImode, regno);
26734 }
26735
26736 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26737 }
26738
26739 /* Determine whether x86_output_mi_thunk can succeed. */
26740
26741 static bool
26742 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26743 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26744 HOST_WIDE_INT vcall_offset, const_tree function)
26745 {
26746 /* 64-bit can handle anything. */
26747 if (TARGET_64BIT)
26748 return true;
26749
26750 /* For 32-bit, everything's fine if we have one free register. */
26751 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26752 return true;
26753
26754 /* Need a free register for vcall_offset. */
26755 if (vcall_offset)
26756 return false;
26757
26758 /* Need a free register for GOT references. */
26759 if (flag_pic && !(*targetm.binds_local_p) (function))
26760 return false;
26761
26762 /* Otherwise ok. */
26763 return true;
26764 }
26765
26766 /* Output the assembler code for a thunk function. THUNK_DECL is the
26767 declaration for the thunk function itself, FUNCTION is the decl for
26768 the target function. DELTA is an immediate constant offset to be
26769 added to THIS. If VCALL_OFFSET is nonzero, the word at
26770 *(*this + vcall_offset) should be added to THIS. */
26771
26772 static void
26773 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
26774 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
26775 HOST_WIDE_INT vcall_offset, tree function)
26776 {
26777 rtx xops[3];
26778 rtx this_param = x86_this_parameter (function);
26779 rtx this_reg, tmp;
26780
26781 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
26782 pull it in now and let DELTA benefit. */
26783 if (REG_P (this_param))
26784 this_reg = this_param;
26785 else if (vcall_offset)
26786 {
26787 /* Put the this parameter into %eax. */
26788 xops[0] = this_param;
26789 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
26790 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26791 }
26792 else
26793 this_reg = NULL_RTX;
26794
26795 /* Adjust the this parameter by a fixed constant. */
26796 if (delta)
26797 {
26798 xops[0] = GEN_INT (delta);
26799 xops[1] = this_reg ? this_reg : this_param;
26800 if (TARGET_64BIT)
26801 {
26802 if (!x86_64_general_operand (xops[0], DImode))
26803 {
26804 tmp = gen_rtx_REG (DImode, R10_REG);
26805 xops[1] = tmp;
26806 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
26807 xops[0] = tmp;
26808 xops[1] = this_param;
26809 }
26810 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
26811 }
26812 else
26813 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
26814 }
26815
26816 /* Adjust the this parameter by a value stored in the vtable. */
26817 if (vcall_offset)
26818 {
26819 if (TARGET_64BIT)
26820 tmp = gen_rtx_REG (DImode, R10_REG);
26821 else
26822 {
26823 int tmp_regno = CX_REG;
26824 if (lookup_attribute ("fastcall",
26825 TYPE_ATTRIBUTES (TREE_TYPE (function))))
26826 tmp_regno = AX_REG;
26827 tmp = gen_rtx_REG (SImode, tmp_regno);
26828 }
26829
26830 xops[0] = gen_rtx_MEM (Pmode, this_reg);
26831 xops[1] = tmp;
26832 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26833
26834 /* Adjust the this parameter. */
26835 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
26836 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
26837 {
26838 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
26839 xops[0] = GEN_INT (vcall_offset);
26840 xops[1] = tmp2;
26841 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
26842 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
26843 }
26844 xops[1] = this_reg;
26845 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
26846 }
26847
26848 /* If necessary, drop THIS back to its stack slot. */
26849 if (this_reg && this_reg != this_param)
26850 {
26851 xops[0] = this_reg;
26852 xops[1] = this_param;
26853 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26854 }
26855
26856 xops[0] = XEXP (DECL_RTL (function), 0);
26857 if (TARGET_64BIT)
26858 {
26859 if (!flag_pic || (*targetm.binds_local_p) (function))
26860 output_asm_insn ("jmp\t%P0", xops);
26861 /* All thunks should be in the same object as their target,
26862 and thus binds_local_p should be true. */
26863 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
26864 gcc_unreachable ();
26865 else
26866 {
26867 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
26868 tmp = gen_rtx_CONST (Pmode, tmp);
26869 tmp = gen_rtx_MEM (QImode, tmp);
26870 xops[0] = tmp;
26871 output_asm_insn ("jmp\t%A0", xops);
26872 }
26873 }
26874 else
26875 {
26876 if (!flag_pic || (*targetm.binds_local_p) (function))
26877 output_asm_insn ("jmp\t%P0", xops);
26878 else
26879 #if TARGET_MACHO
26880 if (TARGET_MACHO)
26881 {
26882 rtx sym_ref = XEXP (DECL_RTL (function), 0);
26883 tmp = (gen_rtx_SYMBOL_REF
26884 (Pmode,
26885 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
26886 tmp = gen_rtx_MEM (QImode, tmp);
26887 xops[0] = tmp;
26888 output_asm_insn ("jmp\t%0", xops);
26889 }
26890 else
26891 #endif /* TARGET_MACHO */
26892 {
26893 tmp = gen_rtx_REG (SImode, CX_REG);
26894 output_set_got (tmp, NULL_RTX);
26895
26896 xops[1] = tmp;
26897 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
26898 output_asm_insn ("jmp\t{*}%1", xops);
26899 }
26900 }
26901 }
26902
26903 static void
26904 x86_file_start (void)
26905 {
26906 default_file_start ();
26907 #if TARGET_MACHO
26908 darwin_file_start ();
26909 #endif
26910 if (X86_FILE_START_VERSION_DIRECTIVE)
26911 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
26912 if (X86_FILE_START_FLTUSED)
26913 fputs ("\t.global\t__fltused\n", asm_out_file);
26914 if (ix86_asm_dialect == ASM_INTEL)
26915 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
26916 }
26917
26918 int
26919 x86_field_alignment (tree field, int computed)
26920 {
26921 enum machine_mode mode;
26922 tree type = TREE_TYPE (field);
26923
26924 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
26925 return computed;
26926 mode = TYPE_MODE (strip_array_types (type));
26927 if (mode == DFmode || mode == DCmode
26928 || GET_MODE_CLASS (mode) == MODE_INT
26929 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
26930 return MIN (32, computed);
26931 return computed;
26932 }
26933
26934 /* Output assembler code to FILE to increment profiler label # LABELNO
26935 for profiling a function entry. */
26936 void
26937 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
26938 {
26939 if (TARGET_64BIT)
26940 {
26941 #ifndef NO_PROFILE_COUNTERS
26942 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
26943 #endif
26944
26945 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
26946 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
26947 else
26948 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26949 }
26950 else if (flag_pic)
26951 {
26952 #ifndef NO_PROFILE_COUNTERS
26953 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
26954 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
26955 #endif
26956 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
26957 }
26958 else
26959 {
26960 #ifndef NO_PROFILE_COUNTERS
26961 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
26962 PROFILE_COUNT_REGISTER);
26963 #endif
26964 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26965 }
26966 }
26967
26968 /* We don't have exact information about the insn sizes, but we may assume
26969 quite safely that we are informed about all 1 byte insns and memory
26970 address sizes. This is enough to eliminate unnecessary padding in
26971 99% of cases. */
26972
26973 static int
26974 min_insn_size (rtx insn)
26975 {
26976 int l = 0;
26977
26978 if (!INSN_P (insn) || !active_insn_p (insn))
26979 return 0;
26980
26981 /* Discard alignments we've emit and jump instructions. */
26982 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
26983 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
26984 return 0;
26985 if (JUMP_P (insn)
26986 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
26987 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
26988 return 0;
26989
26990 /* Important case - calls are always 5 bytes.
26991 It is common to have many calls in the row. */
26992 if (CALL_P (insn)
26993 && symbolic_reference_mentioned_p (PATTERN (insn))
26994 && !SIBLING_CALL_P (insn))
26995 return 5;
26996 if (get_attr_length (insn) <= 1)
26997 return 1;
26998
26999 /* For normal instructions we may rely on the sizes of addresses
27000 and the presence of symbol to require 4 bytes of encoding.
27001 This is not the case for jumps where references are PC relative. */
27002 if (!JUMP_P (insn))
27003 {
27004 l = get_attr_length_address (insn);
27005 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27006 l = 4;
27007 }
27008 if (l)
27009 return 1+l;
27010 else
27011 return 2;
27012 }
27013
27014 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27015 window. */
27016
27017 static void
27018 ix86_avoid_jump_misspredicts (void)
27019 {
27020 rtx insn, start = get_insns ();
27021 int nbytes = 0, njumps = 0;
27022 int isjump = 0;
27023
27024 /* Look for all minimal intervals of instructions containing 4 jumps.
27025 The intervals are bounded by START and INSN. NBYTES is the total
27026 size of instructions in the interval including INSN and not including
27027 START. When the NBYTES is smaller than 16 bytes, it is possible
27028 that the end of START and INSN ends up in the same 16byte page.
27029
27030 The smallest offset in the page INSN can start is the case where START
27031 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27032 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
27033 */
27034 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
27035 {
27036
27037 nbytes += min_insn_size (insn);
27038 if (dump_file)
27039 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
27040 INSN_UID (insn), min_insn_size (insn));
27041 if ((JUMP_P (insn)
27042 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27043 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27044 || CALL_P (insn))
27045 njumps++;
27046 else
27047 continue;
27048
27049 while (njumps > 3)
27050 {
27051 start = NEXT_INSN (start);
27052 if ((JUMP_P (start)
27053 && GET_CODE (PATTERN (start)) != ADDR_VEC
27054 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27055 || CALL_P (start))
27056 njumps--, isjump = 1;
27057 else
27058 isjump = 0;
27059 nbytes -= min_insn_size (start);
27060 }
27061 gcc_assert (njumps >= 0);
27062 if (dump_file)
27063 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27064 INSN_UID (start), INSN_UID (insn), nbytes);
27065
27066 if (njumps == 3 && isjump && nbytes < 16)
27067 {
27068 int padsize = 15 - nbytes + min_insn_size (insn);
27069
27070 if (dump_file)
27071 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27072 INSN_UID (insn), padsize);
27073 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
27074 }
27075 }
27076 }
27077
27078 /* AMD Athlon works faster
27079 when RET is not destination of conditional jump or directly preceded
27080 by other jump instruction. We avoid the penalty by inserting NOP just
27081 before the RET instructions in such cases. */
27082 static void
27083 ix86_pad_returns (void)
27084 {
27085 edge e;
27086 edge_iterator ei;
27087
27088 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27089 {
27090 basic_block bb = e->src;
27091 rtx ret = BB_END (bb);
27092 rtx prev;
27093 bool replace = false;
27094
27095 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27096 || optimize_bb_for_size_p (bb))
27097 continue;
27098 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27099 if (active_insn_p (prev) || LABEL_P (prev))
27100 break;
27101 if (prev && LABEL_P (prev))
27102 {
27103 edge e;
27104 edge_iterator ei;
27105
27106 FOR_EACH_EDGE (e, ei, bb->preds)
27107 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27108 && !(e->flags & EDGE_FALLTHRU))
27109 replace = true;
27110 }
27111 if (!replace)
27112 {
27113 prev = prev_active_insn (ret);
27114 if (prev
27115 && ((JUMP_P (prev) && any_condjump_p (prev))
27116 || CALL_P (prev)))
27117 replace = true;
27118 /* Empty functions get branch mispredict even when the jump destination
27119 is not visible to us. */
27120 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27121 replace = true;
27122 }
27123 if (replace)
27124 {
27125 emit_insn_before (gen_return_internal_long (), ret);
27126 delete_insn (ret);
27127 }
27128 }
27129 }
27130
27131 /* Implement machine specific optimizations. We implement padding of returns
27132 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27133 static void
27134 ix86_reorg (void)
27135 {
27136 if (TARGET_PAD_RETURNS && optimize
27137 && optimize_function_for_speed_p (cfun))
27138 ix86_pad_returns ();
27139 if (TARGET_FOUR_JUMP_LIMIT && optimize
27140 && optimize_function_for_speed_p (cfun))
27141 ix86_avoid_jump_misspredicts ();
27142 }
27143
27144 /* Return nonzero when QImode register that must be represented via REX prefix
27145 is used. */
27146 bool
27147 x86_extended_QIreg_mentioned_p (rtx insn)
27148 {
27149 int i;
27150 extract_insn_cached (insn);
27151 for (i = 0; i < recog_data.n_operands; i++)
27152 if (REG_P (recog_data.operand[i])
27153 && REGNO (recog_data.operand[i]) > BX_REG)
27154 return true;
27155 return false;
27156 }
27157
27158 /* Return nonzero when P points to register encoded via REX prefix.
27159 Called via for_each_rtx. */
27160 static int
27161 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27162 {
27163 unsigned int regno;
27164 if (!REG_P (*p))
27165 return 0;
27166 regno = REGNO (*p);
27167 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27168 }
27169
27170 /* Return true when INSN mentions register that must be encoded using REX
27171 prefix. */
27172 bool
27173 x86_extended_reg_mentioned_p (rtx insn)
27174 {
27175 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27176 extended_reg_mentioned_1, NULL);
27177 }
27178
27179 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27180 optabs would emit if we didn't have TFmode patterns. */
27181
27182 void
27183 x86_emit_floatuns (rtx operands[2])
27184 {
27185 rtx neglab, donelab, i0, i1, f0, in, out;
27186 enum machine_mode mode, inmode;
27187
27188 inmode = GET_MODE (operands[1]);
27189 gcc_assert (inmode == SImode || inmode == DImode);
27190
27191 out = operands[0];
27192 in = force_reg (inmode, operands[1]);
27193 mode = GET_MODE (out);
27194 neglab = gen_label_rtx ();
27195 donelab = gen_label_rtx ();
27196 f0 = gen_reg_rtx (mode);
27197
27198 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27199
27200 expand_float (out, in, 0);
27201
27202 emit_jump_insn (gen_jump (donelab));
27203 emit_barrier ();
27204
27205 emit_label (neglab);
27206
27207 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27208 1, OPTAB_DIRECT);
27209 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27210 1, OPTAB_DIRECT);
27211 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27212
27213 expand_float (f0, i0, 0);
27214
27215 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27216
27217 emit_label (donelab);
27218 }
27219
27220 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27221 with all elements equal to VAR. Return true if successful. */
27222
27223 static bool
27224 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27225 rtx target, rtx val)
27226 {
27227 enum machine_mode hmode, smode, wsmode, wvmode;
27228 rtx x;
27229
27230 switch (mode)
27231 {
27232 case V2SImode:
27233 case V2SFmode:
27234 if (!mmx_ok)
27235 return false;
27236 /* FALLTHRU */
27237
27238 case V2DFmode:
27239 case V2DImode:
27240 case V4SFmode:
27241 case V4SImode:
27242 val = force_reg (GET_MODE_INNER (mode), val);
27243 x = gen_rtx_VEC_DUPLICATE (mode, val);
27244 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27245 return true;
27246
27247 case V4HImode:
27248 if (!mmx_ok)
27249 return false;
27250 if (TARGET_SSE || TARGET_3DNOW_A)
27251 {
27252 val = gen_lowpart (SImode, val);
27253 x = gen_rtx_TRUNCATE (HImode, val);
27254 x = gen_rtx_VEC_DUPLICATE (mode, x);
27255 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27256 return true;
27257 }
27258 else
27259 {
27260 smode = HImode;
27261 wsmode = SImode;
27262 wvmode = V2SImode;
27263 goto widen;
27264 }
27265
27266 case V8QImode:
27267 if (!mmx_ok)
27268 return false;
27269 smode = QImode;
27270 wsmode = HImode;
27271 wvmode = V4HImode;
27272 goto widen;
27273 case V8HImode:
27274 if (TARGET_SSE2)
27275 {
27276 rtx tmp1, tmp2;
27277 /* Extend HImode to SImode using a paradoxical SUBREG. */
27278 tmp1 = gen_reg_rtx (SImode);
27279 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27280 /* Insert the SImode value as low element of V4SImode vector. */
27281 tmp2 = gen_reg_rtx (V4SImode);
27282 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27283 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27284 CONST0_RTX (V4SImode),
27285 const1_rtx);
27286 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27287 /* Cast the V4SImode vector back to a V8HImode vector. */
27288 tmp1 = gen_reg_rtx (V8HImode);
27289 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27290 /* Duplicate the low short through the whole low SImode word. */
27291 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27292 /* Cast the V8HImode vector back to a V4SImode vector. */
27293 tmp2 = gen_reg_rtx (V4SImode);
27294 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27295 /* Replicate the low element of the V4SImode vector. */
27296 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27297 /* Cast the V2SImode back to V8HImode, and store in target. */
27298 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27299 return true;
27300 }
27301 smode = HImode;
27302 wsmode = SImode;
27303 wvmode = V4SImode;
27304 goto widen;
27305 case V16QImode:
27306 if (TARGET_SSE2)
27307 {
27308 rtx tmp1, tmp2;
27309 /* Extend QImode to SImode using a paradoxical SUBREG. */
27310 tmp1 = gen_reg_rtx (SImode);
27311 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27312 /* Insert the SImode value as low element of V4SImode vector. */
27313 tmp2 = gen_reg_rtx (V4SImode);
27314 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27315 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27316 CONST0_RTX (V4SImode),
27317 const1_rtx);
27318 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27319 /* Cast the V4SImode vector back to a V16QImode vector. */
27320 tmp1 = gen_reg_rtx (V16QImode);
27321 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27322 /* Duplicate the low byte through the whole low SImode word. */
27323 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27324 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27325 /* Cast the V16QImode vector back to a V4SImode vector. */
27326 tmp2 = gen_reg_rtx (V4SImode);
27327 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27328 /* Replicate the low element of the V4SImode vector. */
27329 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27330 /* Cast the V2SImode back to V16QImode, and store in target. */
27331 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27332 return true;
27333 }
27334 smode = QImode;
27335 wsmode = HImode;
27336 wvmode = V8HImode;
27337 goto widen;
27338 widen:
27339 /* Replicate the value once into the next wider mode and recurse. */
27340 val = convert_modes (wsmode, smode, val, true);
27341 x = expand_simple_binop (wsmode, ASHIFT, val,
27342 GEN_INT (GET_MODE_BITSIZE (smode)),
27343 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27344 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27345
27346 x = gen_reg_rtx (wvmode);
27347 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27348 gcc_unreachable ();
27349 emit_move_insn (target, gen_lowpart (mode, x));
27350 return true;
27351
27352 case V4DFmode:
27353 hmode = V2DFmode;
27354 goto half;
27355 case V4DImode:
27356 hmode = V2DImode;
27357 goto half;
27358 case V8SFmode:
27359 hmode = V4SFmode;
27360 goto half;
27361 case V8SImode:
27362 hmode = V4SImode;
27363 goto half;
27364 case V16HImode:
27365 hmode = V8HImode;
27366 goto half;
27367 case V32QImode:
27368 hmode = V16QImode;
27369 goto half;
27370 half:
27371 {
27372 rtx tmp = gen_reg_rtx (hmode);
27373 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27374 emit_insn (gen_rtx_SET (VOIDmode, target,
27375 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27376 }
27377 return true;
27378
27379 default:
27380 return false;
27381 }
27382 }
27383
27384 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27385 whose ONE_VAR element is VAR, and other elements are zero. Return true
27386 if successful. */
27387
27388 static bool
27389 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27390 rtx target, rtx var, int one_var)
27391 {
27392 enum machine_mode vsimode;
27393 rtx new_target;
27394 rtx x, tmp;
27395 bool use_vector_set = false;
27396
27397 switch (mode)
27398 {
27399 case V2DImode:
27400 /* For SSE4.1, we normally use vector set. But if the second
27401 element is zero and inter-unit moves are OK, we use movq
27402 instead. */
27403 use_vector_set = (TARGET_64BIT
27404 && TARGET_SSE4_1
27405 && !(TARGET_INTER_UNIT_MOVES
27406 && one_var == 0));
27407 break;
27408 case V16QImode:
27409 case V4SImode:
27410 case V4SFmode:
27411 use_vector_set = TARGET_SSE4_1;
27412 break;
27413 case V8HImode:
27414 use_vector_set = TARGET_SSE2;
27415 break;
27416 case V4HImode:
27417 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27418 break;
27419 case V32QImode:
27420 case V16HImode:
27421 case V8SImode:
27422 case V8SFmode:
27423 case V4DFmode:
27424 use_vector_set = TARGET_AVX;
27425 break;
27426 case V4DImode:
27427 /* Use ix86_expand_vector_set in 64bit mode only. */
27428 use_vector_set = TARGET_AVX && TARGET_64BIT;
27429 break;
27430 default:
27431 break;
27432 }
27433
27434 if (use_vector_set)
27435 {
27436 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27437 var = force_reg (GET_MODE_INNER (mode), var);
27438 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27439 return true;
27440 }
27441
27442 switch (mode)
27443 {
27444 case V2SFmode:
27445 case V2SImode:
27446 if (!mmx_ok)
27447 return false;
27448 /* FALLTHRU */
27449
27450 case V2DFmode:
27451 case V2DImode:
27452 if (one_var != 0)
27453 return false;
27454 var = force_reg (GET_MODE_INNER (mode), var);
27455 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27456 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27457 return true;
27458
27459 case V4SFmode:
27460 case V4SImode:
27461 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27462 new_target = gen_reg_rtx (mode);
27463 else
27464 new_target = target;
27465 var = force_reg (GET_MODE_INNER (mode), var);
27466 x = gen_rtx_VEC_DUPLICATE (mode, var);
27467 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27468 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27469 if (one_var != 0)
27470 {
27471 /* We need to shuffle the value to the correct position, so
27472 create a new pseudo to store the intermediate result. */
27473
27474 /* With SSE2, we can use the integer shuffle insns. */
27475 if (mode != V4SFmode && TARGET_SSE2)
27476 {
27477 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27478 GEN_INT (1),
27479 GEN_INT (one_var == 1 ? 0 : 1),
27480 GEN_INT (one_var == 2 ? 0 : 1),
27481 GEN_INT (one_var == 3 ? 0 : 1)));
27482 if (target != new_target)
27483 emit_move_insn (target, new_target);
27484 return true;
27485 }
27486
27487 /* Otherwise convert the intermediate result to V4SFmode and
27488 use the SSE1 shuffle instructions. */
27489 if (mode != V4SFmode)
27490 {
27491 tmp = gen_reg_rtx (V4SFmode);
27492 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27493 }
27494 else
27495 tmp = new_target;
27496
27497 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27498 GEN_INT (1),
27499 GEN_INT (one_var == 1 ? 0 : 1),
27500 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27501 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27502
27503 if (mode != V4SFmode)
27504 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27505 else if (tmp != target)
27506 emit_move_insn (target, tmp);
27507 }
27508 else if (target != new_target)
27509 emit_move_insn (target, new_target);
27510 return true;
27511
27512 case V8HImode:
27513 case V16QImode:
27514 vsimode = V4SImode;
27515 goto widen;
27516 case V4HImode:
27517 case V8QImode:
27518 if (!mmx_ok)
27519 return false;
27520 vsimode = V2SImode;
27521 goto widen;
27522 widen:
27523 if (one_var != 0)
27524 return false;
27525
27526 /* Zero extend the variable element to SImode and recurse. */
27527 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27528
27529 x = gen_reg_rtx (vsimode);
27530 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27531 var, one_var))
27532 gcc_unreachable ();
27533
27534 emit_move_insn (target, gen_lowpart (mode, x));
27535 return true;
27536
27537 default:
27538 return false;
27539 }
27540 }
27541
27542 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27543 consisting of the values in VALS. It is known that all elements
27544 except ONE_VAR are constants. Return true if successful. */
27545
27546 static bool
27547 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27548 rtx target, rtx vals, int one_var)
27549 {
27550 rtx var = XVECEXP (vals, 0, one_var);
27551 enum machine_mode wmode;
27552 rtx const_vec, x;
27553
27554 const_vec = copy_rtx (vals);
27555 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27556 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27557
27558 switch (mode)
27559 {
27560 case V2DFmode:
27561 case V2DImode:
27562 case V2SFmode:
27563 case V2SImode:
27564 /* For the two element vectors, it's just as easy to use
27565 the general case. */
27566 return false;
27567
27568 case V4DImode:
27569 /* Use ix86_expand_vector_set in 64bit mode only. */
27570 if (!TARGET_64BIT)
27571 return false;
27572 case V4DFmode:
27573 case V8SFmode:
27574 case V8SImode:
27575 case V16HImode:
27576 case V32QImode:
27577 case V4SFmode:
27578 case V4SImode:
27579 case V8HImode:
27580 case V4HImode:
27581 break;
27582
27583 case V16QImode:
27584 if (TARGET_SSE4_1)
27585 break;
27586 wmode = V8HImode;
27587 goto widen;
27588 case V8QImode:
27589 wmode = V4HImode;
27590 goto widen;
27591 widen:
27592 /* There's no way to set one QImode entry easily. Combine
27593 the variable value with its adjacent constant value, and
27594 promote to an HImode set. */
27595 x = XVECEXP (vals, 0, one_var ^ 1);
27596 if (one_var & 1)
27597 {
27598 var = convert_modes (HImode, QImode, var, true);
27599 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27600 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27601 x = GEN_INT (INTVAL (x) & 0xff);
27602 }
27603 else
27604 {
27605 var = convert_modes (HImode, QImode, var, true);
27606 x = gen_int_mode (INTVAL (x) << 8, HImode);
27607 }
27608 if (x != const0_rtx)
27609 var = expand_simple_binop (HImode, IOR, var, x, var,
27610 1, OPTAB_LIB_WIDEN);
27611
27612 x = gen_reg_rtx (wmode);
27613 emit_move_insn (x, gen_lowpart (wmode, const_vec));
27614 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27615
27616 emit_move_insn (target, gen_lowpart (mode, x));
27617 return true;
27618
27619 default:
27620 return false;
27621 }
27622
27623 emit_move_insn (target, const_vec);
27624 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27625 return true;
27626 }
27627
27628 /* A subroutine of ix86_expand_vector_init_general. Use vector
27629 concatenate to handle the most general case: all values variable,
27630 and none identical. */
27631
27632 static void
27633 ix86_expand_vector_init_concat (enum machine_mode mode,
27634 rtx target, rtx *ops, int n)
27635 {
27636 enum machine_mode cmode, hmode = VOIDmode;
27637 rtx first[8], second[4];
27638 rtvec v;
27639 int i, j;
27640
27641 switch (n)
27642 {
27643 case 2:
27644 switch (mode)
27645 {
27646 case V8SImode:
27647 cmode = V4SImode;
27648 break;
27649 case V8SFmode:
27650 cmode = V4SFmode;
27651 break;
27652 case V4DImode:
27653 cmode = V2DImode;
27654 break;
27655 case V4DFmode:
27656 cmode = V2DFmode;
27657 break;
27658 case V4SImode:
27659 cmode = V2SImode;
27660 break;
27661 case V4SFmode:
27662 cmode = V2SFmode;
27663 break;
27664 case V2DImode:
27665 cmode = DImode;
27666 break;
27667 case V2SImode:
27668 cmode = SImode;
27669 break;
27670 case V2DFmode:
27671 cmode = DFmode;
27672 break;
27673 case V2SFmode:
27674 cmode = SFmode;
27675 break;
27676 default:
27677 gcc_unreachable ();
27678 }
27679
27680 if (!register_operand (ops[1], cmode))
27681 ops[1] = force_reg (cmode, ops[1]);
27682 if (!register_operand (ops[0], cmode))
27683 ops[0] = force_reg (cmode, ops[0]);
27684 emit_insn (gen_rtx_SET (VOIDmode, target,
27685 gen_rtx_VEC_CONCAT (mode, ops[0],
27686 ops[1])));
27687 break;
27688
27689 case 4:
27690 switch (mode)
27691 {
27692 case V4DImode:
27693 cmode = V2DImode;
27694 break;
27695 case V4DFmode:
27696 cmode = V2DFmode;
27697 break;
27698 case V4SImode:
27699 cmode = V2SImode;
27700 break;
27701 case V4SFmode:
27702 cmode = V2SFmode;
27703 break;
27704 default:
27705 gcc_unreachable ();
27706 }
27707 goto half;
27708
27709 case 8:
27710 switch (mode)
27711 {
27712 case V8SImode:
27713 cmode = V2SImode;
27714 hmode = V4SImode;
27715 break;
27716 case V8SFmode:
27717 cmode = V2SFmode;
27718 hmode = V4SFmode;
27719 break;
27720 default:
27721 gcc_unreachable ();
27722 }
27723 goto half;
27724
27725 half:
27726 /* FIXME: We process inputs backward to help RA. PR 36222. */
27727 i = n - 1;
27728 j = (n >> 1) - 1;
27729 for (; i > 0; i -= 2, j--)
27730 {
27731 first[j] = gen_reg_rtx (cmode);
27732 v = gen_rtvec (2, ops[i - 1], ops[i]);
27733 ix86_expand_vector_init (false, first[j],
27734 gen_rtx_PARALLEL (cmode, v));
27735 }
27736
27737 n >>= 1;
27738 if (n > 2)
27739 {
27740 gcc_assert (hmode != VOIDmode);
27741 for (i = j = 0; i < n; i += 2, j++)
27742 {
27743 second[j] = gen_reg_rtx (hmode);
27744 ix86_expand_vector_init_concat (hmode, second [j],
27745 &first [i], 2);
27746 }
27747 n >>= 1;
27748 ix86_expand_vector_init_concat (mode, target, second, n);
27749 }
27750 else
27751 ix86_expand_vector_init_concat (mode, target, first, n);
27752 break;
27753
27754 default:
27755 gcc_unreachable ();
27756 }
27757 }
27758
27759 /* A subroutine of ix86_expand_vector_init_general. Use vector
27760 interleave to handle the most general case: all values variable,
27761 and none identical. */
27762
27763 static void
27764 ix86_expand_vector_init_interleave (enum machine_mode mode,
27765 rtx target, rtx *ops, int n)
27766 {
27767 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
27768 int i, j;
27769 rtx op0, op1;
27770 rtx (*gen_load_even) (rtx, rtx, rtx);
27771 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
27772 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
27773
27774 switch (mode)
27775 {
27776 case V8HImode:
27777 gen_load_even = gen_vec_setv8hi;
27778 gen_interleave_first_low = gen_vec_interleave_lowv4si;
27779 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27780 inner_mode = HImode;
27781 first_imode = V4SImode;
27782 second_imode = V2DImode;
27783 third_imode = VOIDmode;
27784 break;
27785 case V16QImode:
27786 gen_load_even = gen_vec_setv16qi;
27787 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
27788 gen_interleave_second_low = gen_vec_interleave_lowv4si;
27789 inner_mode = QImode;
27790 first_imode = V8HImode;
27791 second_imode = V4SImode;
27792 third_imode = V2DImode;
27793 break;
27794 default:
27795 gcc_unreachable ();
27796 }
27797
27798 for (i = 0; i < n; i++)
27799 {
27800 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
27801 op0 = gen_reg_rtx (SImode);
27802 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
27803
27804 /* Insert the SImode value as low element of V4SImode vector. */
27805 op1 = gen_reg_rtx (V4SImode);
27806 op0 = gen_rtx_VEC_MERGE (V4SImode,
27807 gen_rtx_VEC_DUPLICATE (V4SImode,
27808 op0),
27809 CONST0_RTX (V4SImode),
27810 const1_rtx);
27811 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
27812
27813 /* Cast the V4SImode vector back to a vector in orignal mode. */
27814 op0 = gen_reg_rtx (mode);
27815 emit_move_insn (op0, gen_lowpart (mode, op1));
27816
27817 /* Load even elements into the second positon. */
27818 emit_insn ((*gen_load_even) (op0,
27819 force_reg (inner_mode,
27820 ops [i + i + 1]),
27821 const1_rtx));
27822
27823 /* Cast vector to FIRST_IMODE vector. */
27824 ops[i] = gen_reg_rtx (first_imode);
27825 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
27826 }
27827
27828 /* Interleave low FIRST_IMODE vectors. */
27829 for (i = j = 0; i < n; i += 2, j++)
27830 {
27831 op0 = gen_reg_rtx (first_imode);
27832 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
27833
27834 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
27835 ops[j] = gen_reg_rtx (second_imode);
27836 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
27837 }
27838
27839 /* Interleave low SECOND_IMODE vectors. */
27840 switch (second_imode)
27841 {
27842 case V4SImode:
27843 for (i = j = 0; i < n / 2; i += 2, j++)
27844 {
27845 op0 = gen_reg_rtx (second_imode);
27846 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
27847 ops[i + 1]));
27848
27849 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
27850 vector. */
27851 ops[j] = gen_reg_rtx (third_imode);
27852 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
27853 }
27854 second_imode = V2DImode;
27855 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27856 /* FALLTHRU */
27857
27858 case V2DImode:
27859 op0 = gen_reg_rtx (second_imode);
27860 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
27861 ops[1]));
27862
27863 /* Cast the SECOND_IMODE vector back to a vector on original
27864 mode. */
27865 emit_insn (gen_rtx_SET (VOIDmode, target,
27866 gen_lowpart (mode, op0)));
27867 break;
27868
27869 default:
27870 gcc_unreachable ();
27871 }
27872 }
27873
27874 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
27875 all values variable, and none identical. */
27876
27877 static void
27878 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
27879 rtx target, rtx vals)
27880 {
27881 rtx ops[32], op0, op1;
27882 enum machine_mode half_mode = VOIDmode;
27883 int n, i;
27884
27885 switch (mode)
27886 {
27887 case V2SFmode:
27888 case V2SImode:
27889 if (!mmx_ok && !TARGET_SSE)
27890 break;
27891 /* FALLTHRU */
27892
27893 case V8SFmode:
27894 case V8SImode:
27895 case V4DFmode:
27896 case V4DImode:
27897 case V4SFmode:
27898 case V4SImode:
27899 case V2DFmode:
27900 case V2DImode:
27901 n = GET_MODE_NUNITS (mode);
27902 for (i = 0; i < n; i++)
27903 ops[i] = XVECEXP (vals, 0, i);
27904 ix86_expand_vector_init_concat (mode, target, ops, n);
27905 return;
27906
27907 case V32QImode:
27908 half_mode = V16QImode;
27909 goto half;
27910
27911 case V16HImode:
27912 half_mode = V8HImode;
27913 goto half;
27914
27915 half:
27916 n = GET_MODE_NUNITS (mode);
27917 for (i = 0; i < n; i++)
27918 ops[i] = XVECEXP (vals, 0, i);
27919 op0 = gen_reg_rtx (half_mode);
27920 op1 = gen_reg_rtx (half_mode);
27921 ix86_expand_vector_init_interleave (half_mode, op0, ops,
27922 n >> 2);
27923 ix86_expand_vector_init_interleave (half_mode, op1,
27924 &ops [n >> 1], n >> 2);
27925 emit_insn (gen_rtx_SET (VOIDmode, target,
27926 gen_rtx_VEC_CONCAT (mode, op0, op1)));
27927 return;
27928
27929 case V16QImode:
27930 if (!TARGET_SSE4_1)
27931 break;
27932 /* FALLTHRU */
27933
27934 case V8HImode:
27935 if (!TARGET_SSE2)
27936 break;
27937
27938 /* Don't use ix86_expand_vector_init_interleave if we can't
27939 move from GPR to SSE register directly. */
27940 if (!TARGET_INTER_UNIT_MOVES)
27941 break;
27942
27943 n = GET_MODE_NUNITS (mode);
27944 for (i = 0; i < n; i++)
27945 ops[i] = XVECEXP (vals, 0, i);
27946 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
27947 return;
27948
27949 case V4HImode:
27950 case V8QImode:
27951 break;
27952
27953 default:
27954 gcc_unreachable ();
27955 }
27956
27957 {
27958 int i, j, n_elts, n_words, n_elt_per_word;
27959 enum machine_mode inner_mode;
27960 rtx words[4], shift;
27961
27962 inner_mode = GET_MODE_INNER (mode);
27963 n_elts = GET_MODE_NUNITS (mode);
27964 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27965 n_elt_per_word = n_elts / n_words;
27966 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
27967
27968 for (i = 0; i < n_words; ++i)
27969 {
27970 rtx word = NULL_RTX;
27971
27972 for (j = 0; j < n_elt_per_word; ++j)
27973 {
27974 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
27975 elt = convert_modes (word_mode, inner_mode, elt, true);
27976
27977 if (j == 0)
27978 word = elt;
27979 else
27980 {
27981 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
27982 word, 1, OPTAB_LIB_WIDEN);
27983 word = expand_simple_binop (word_mode, IOR, word, elt,
27984 word, 1, OPTAB_LIB_WIDEN);
27985 }
27986 }
27987
27988 words[i] = word;
27989 }
27990
27991 if (n_words == 1)
27992 emit_move_insn (target, gen_lowpart (mode, words[0]));
27993 else if (n_words == 2)
27994 {
27995 rtx tmp = gen_reg_rtx (mode);
27996 emit_clobber (tmp);
27997 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
27998 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
27999 emit_move_insn (target, tmp);
28000 }
28001 else if (n_words == 4)
28002 {
28003 rtx tmp = gen_reg_rtx (V4SImode);
28004 gcc_assert (word_mode == SImode);
28005 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28006 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28007 emit_move_insn (target, gen_lowpart (mode, tmp));
28008 }
28009 else
28010 gcc_unreachable ();
28011 }
28012 }
28013
28014 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28015 instructions unless MMX_OK is true. */
28016
28017 void
28018 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28019 {
28020 enum machine_mode mode = GET_MODE (target);
28021 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28022 int n_elts = GET_MODE_NUNITS (mode);
28023 int n_var = 0, one_var = -1;
28024 bool all_same = true, all_const_zero = true;
28025 int i;
28026 rtx x;
28027
28028 for (i = 0; i < n_elts; ++i)
28029 {
28030 x = XVECEXP (vals, 0, i);
28031 if (!(CONST_INT_P (x)
28032 || GET_CODE (x) == CONST_DOUBLE
28033 || GET_CODE (x) == CONST_FIXED))
28034 n_var++, one_var = i;
28035 else if (x != CONST0_RTX (inner_mode))
28036 all_const_zero = false;
28037 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28038 all_same = false;
28039 }
28040
28041 /* Constants are best loaded from the constant pool. */
28042 if (n_var == 0)
28043 {
28044 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28045 return;
28046 }
28047
28048 /* If all values are identical, broadcast the value. */
28049 if (all_same
28050 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28051 XVECEXP (vals, 0, 0)))
28052 return;
28053
28054 /* Values where only one field is non-constant are best loaded from
28055 the pool and overwritten via move later. */
28056 if (n_var == 1)
28057 {
28058 if (all_const_zero
28059 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28060 XVECEXP (vals, 0, one_var),
28061 one_var))
28062 return;
28063
28064 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28065 return;
28066 }
28067
28068 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28069 }
28070
28071 void
28072 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28073 {
28074 enum machine_mode mode = GET_MODE (target);
28075 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28076 enum machine_mode half_mode;
28077 bool use_vec_merge = false;
28078 rtx tmp;
28079 static rtx (*gen_extract[6][2]) (rtx, rtx)
28080 = {
28081 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28082 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28083 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28084 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28085 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28086 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28087 };
28088 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28089 = {
28090 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28091 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28092 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28093 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28094 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28095 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28096 };
28097 int i, j, n;
28098
28099 switch (mode)
28100 {
28101 case V2SFmode:
28102 case V2SImode:
28103 if (mmx_ok)
28104 {
28105 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28106 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28107 if (elt == 0)
28108 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28109 else
28110 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28111 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28112 return;
28113 }
28114 break;
28115
28116 case V2DImode:
28117 use_vec_merge = TARGET_SSE4_1;
28118 if (use_vec_merge)
28119 break;
28120
28121 case V2DFmode:
28122 {
28123 rtx op0, op1;
28124
28125 /* For the two element vectors, we implement a VEC_CONCAT with
28126 the extraction of the other element. */
28127
28128 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28129 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28130
28131 if (elt == 0)
28132 op0 = val, op1 = tmp;
28133 else
28134 op0 = tmp, op1 = val;
28135
28136 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28137 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28138 }
28139 return;
28140
28141 case V4SFmode:
28142 use_vec_merge = TARGET_SSE4_1;
28143 if (use_vec_merge)
28144 break;
28145
28146 switch (elt)
28147 {
28148 case 0:
28149 use_vec_merge = true;
28150 break;
28151
28152 case 1:
28153 /* tmp = target = A B C D */
28154 tmp = copy_to_reg (target);
28155 /* target = A A B B */
28156 emit_insn (gen_sse_unpcklps (target, target, target));
28157 /* target = X A B B */
28158 ix86_expand_vector_set (false, target, val, 0);
28159 /* target = A X C D */
28160 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28161 GEN_INT (1), GEN_INT (0),
28162 GEN_INT (2+4), GEN_INT (3+4)));
28163 return;
28164
28165 case 2:
28166 /* tmp = target = A B C D */
28167 tmp = copy_to_reg (target);
28168 /* tmp = X B C D */
28169 ix86_expand_vector_set (false, tmp, val, 0);
28170 /* target = A B X D */
28171 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28172 GEN_INT (0), GEN_INT (1),
28173 GEN_INT (0+4), GEN_INT (3+4)));
28174 return;
28175
28176 case 3:
28177 /* tmp = target = A B C D */
28178 tmp = copy_to_reg (target);
28179 /* tmp = X B C D */
28180 ix86_expand_vector_set (false, tmp, val, 0);
28181 /* target = A B X D */
28182 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28183 GEN_INT (0), GEN_INT (1),
28184 GEN_INT (2+4), GEN_INT (0+4)));
28185 return;
28186
28187 default:
28188 gcc_unreachable ();
28189 }
28190 break;
28191
28192 case V4SImode:
28193 use_vec_merge = TARGET_SSE4_1;
28194 if (use_vec_merge)
28195 break;
28196
28197 /* Element 0 handled by vec_merge below. */
28198 if (elt == 0)
28199 {
28200 use_vec_merge = true;
28201 break;
28202 }
28203
28204 if (TARGET_SSE2)
28205 {
28206 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28207 store into element 0, then shuffle them back. */
28208
28209 rtx order[4];
28210
28211 order[0] = GEN_INT (elt);
28212 order[1] = const1_rtx;
28213 order[2] = const2_rtx;
28214 order[3] = GEN_INT (3);
28215 order[elt] = const0_rtx;
28216
28217 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28218 order[1], order[2], order[3]));
28219
28220 ix86_expand_vector_set (false, target, val, 0);
28221
28222 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28223 order[1], order[2], order[3]));
28224 }
28225 else
28226 {
28227 /* For SSE1, we have to reuse the V4SF code. */
28228 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28229 gen_lowpart (SFmode, val), elt);
28230 }
28231 return;
28232
28233 case V8HImode:
28234 use_vec_merge = TARGET_SSE2;
28235 break;
28236 case V4HImode:
28237 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28238 break;
28239
28240 case V16QImode:
28241 use_vec_merge = TARGET_SSE4_1;
28242 break;
28243
28244 case V8QImode:
28245 break;
28246
28247 case V32QImode:
28248 half_mode = V16QImode;
28249 j = 0;
28250 n = 16;
28251 goto half;
28252
28253 case V16HImode:
28254 half_mode = V8HImode;
28255 j = 1;
28256 n = 8;
28257 goto half;
28258
28259 case V8SImode:
28260 half_mode = V4SImode;
28261 j = 2;
28262 n = 4;
28263 goto half;
28264
28265 case V4DImode:
28266 half_mode = V2DImode;
28267 j = 3;
28268 n = 2;
28269 goto half;
28270
28271 case V8SFmode:
28272 half_mode = V4SFmode;
28273 j = 4;
28274 n = 4;
28275 goto half;
28276
28277 case V4DFmode:
28278 half_mode = V2DFmode;
28279 j = 5;
28280 n = 2;
28281 goto half;
28282
28283 half:
28284 /* Compute offset. */
28285 i = elt / n;
28286 elt %= n;
28287
28288 gcc_assert (i <= 1);
28289
28290 /* Extract the half. */
28291 tmp = gen_reg_rtx (half_mode);
28292 emit_insn ((*gen_extract[j][i]) (tmp, target));
28293
28294 /* Put val in tmp at elt. */
28295 ix86_expand_vector_set (false, tmp, val, elt);
28296
28297 /* Put it back. */
28298 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28299 return;
28300
28301 default:
28302 break;
28303 }
28304
28305 if (use_vec_merge)
28306 {
28307 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28308 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28309 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28310 }
28311 else
28312 {
28313 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28314
28315 emit_move_insn (mem, target);
28316
28317 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28318 emit_move_insn (tmp, val);
28319
28320 emit_move_insn (target, mem);
28321 }
28322 }
28323
28324 void
28325 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28326 {
28327 enum machine_mode mode = GET_MODE (vec);
28328 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28329 bool use_vec_extr = false;
28330 rtx tmp;
28331
28332 switch (mode)
28333 {
28334 case V2SImode:
28335 case V2SFmode:
28336 if (!mmx_ok)
28337 break;
28338 /* FALLTHRU */
28339
28340 case V2DFmode:
28341 case V2DImode:
28342 use_vec_extr = true;
28343 break;
28344
28345 case V4SFmode:
28346 use_vec_extr = TARGET_SSE4_1;
28347 if (use_vec_extr)
28348 break;
28349
28350 switch (elt)
28351 {
28352 case 0:
28353 tmp = vec;
28354 break;
28355
28356 case 1:
28357 case 3:
28358 tmp = gen_reg_rtx (mode);
28359 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28360 GEN_INT (elt), GEN_INT (elt),
28361 GEN_INT (elt+4), GEN_INT (elt+4)));
28362 break;
28363
28364 case 2:
28365 tmp = gen_reg_rtx (mode);
28366 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28367 break;
28368
28369 default:
28370 gcc_unreachable ();
28371 }
28372 vec = tmp;
28373 use_vec_extr = true;
28374 elt = 0;
28375 break;
28376
28377 case V4SImode:
28378 use_vec_extr = TARGET_SSE4_1;
28379 if (use_vec_extr)
28380 break;
28381
28382 if (TARGET_SSE2)
28383 {
28384 switch (elt)
28385 {
28386 case 0:
28387 tmp = vec;
28388 break;
28389
28390 case 1:
28391 case 3:
28392 tmp = gen_reg_rtx (mode);
28393 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28394 GEN_INT (elt), GEN_INT (elt),
28395 GEN_INT (elt), GEN_INT (elt)));
28396 break;
28397
28398 case 2:
28399 tmp = gen_reg_rtx (mode);
28400 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28401 break;
28402
28403 default:
28404 gcc_unreachable ();
28405 }
28406 vec = tmp;
28407 use_vec_extr = true;
28408 elt = 0;
28409 }
28410 else
28411 {
28412 /* For SSE1, we have to reuse the V4SF code. */
28413 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28414 gen_lowpart (V4SFmode, vec), elt);
28415 return;
28416 }
28417 break;
28418
28419 case V8HImode:
28420 use_vec_extr = TARGET_SSE2;
28421 break;
28422 case V4HImode:
28423 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28424 break;
28425
28426 case V16QImode:
28427 use_vec_extr = TARGET_SSE4_1;
28428 break;
28429
28430 case V8QImode:
28431 /* ??? Could extract the appropriate HImode element and shift. */
28432 default:
28433 break;
28434 }
28435
28436 if (use_vec_extr)
28437 {
28438 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28439 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28440
28441 /* Let the rtl optimizers know about the zero extension performed. */
28442 if (inner_mode == QImode || inner_mode == HImode)
28443 {
28444 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28445 target = gen_lowpart (SImode, target);
28446 }
28447
28448 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28449 }
28450 else
28451 {
28452 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28453
28454 emit_move_insn (mem, vec);
28455
28456 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28457 emit_move_insn (target, tmp);
28458 }
28459 }
28460
28461 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28462 pattern to reduce; DEST is the destination; IN is the input vector. */
28463
28464 void
28465 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28466 {
28467 rtx tmp1, tmp2, tmp3;
28468
28469 tmp1 = gen_reg_rtx (V4SFmode);
28470 tmp2 = gen_reg_rtx (V4SFmode);
28471 tmp3 = gen_reg_rtx (V4SFmode);
28472
28473 emit_insn (gen_sse_movhlps (tmp1, in, in));
28474 emit_insn (fn (tmp2, tmp1, in));
28475
28476 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28477 GEN_INT (1), GEN_INT (1),
28478 GEN_INT (1+4), GEN_INT (1+4)));
28479 emit_insn (fn (dest, tmp2, tmp3));
28480 }
28481
28482 /* Target hook for scalar_mode_supported_p. */
28483 static bool
28484 ix86_scalar_mode_supported_p (enum machine_mode mode)
28485 {
28486 if (DECIMAL_FLOAT_MODE_P (mode))
28487 return true;
28488 else if (mode == TFmode)
28489 return true;
28490 else
28491 return default_scalar_mode_supported_p (mode);
28492 }
28493
28494 /* Implements target hook vector_mode_supported_p. */
28495 static bool
28496 ix86_vector_mode_supported_p (enum machine_mode mode)
28497 {
28498 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28499 return true;
28500 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28501 return true;
28502 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28503 return true;
28504 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28505 return true;
28506 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28507 return true;
28508 return false;
28509 }
28510
28511 /* Target hook for c_mode_for_suffix. */
28512 static enum machine_mode
28513 ix86_c_mode_for_suffix (char suffix)
28514 {
28515 if (suffix == 'q')
28516 return TFmode;
28517 if (suffix == 'w')
28518 return XFmode;
28519
28520 return VOIDmode;
28521 }
28522
28523 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28524
28525 We do this in the new i386 backend to maintain source compatibility
28526 with the old cc0-based compiler. */
28527
28528 static tree
28529 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28530 tree inputs ATTRIBUTE_UNUSED,
28531 tree clobbers)
28532 {
28533 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28534 clobbers);
28535 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28536 clobbers);
28537 return clobbers;
28538 }
28539
28540 /* Implements target vector targetm.asm.encode_section_info. This
28541 is not used by netware. */
28542
28543 static void ATTRIBUTE_UNUSED
28544 ix86_encode_section_info (tree decl, rtx rtl, int first)
28545 {
28546 default_encode_section_info (decl, rtl, first);
28547
28548 if (TREE_CODE (decl) == VAR_DECL
28549 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28550 && ix86_in_large_data_p (decl))
28551 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28552 }
28553
28554 /* Worker function for REVERSE_CONDITION. */
28555
28556 enum rtx_code
28557 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28558 {
28559 return (mode != CCFPmode && mode != CCFPUmode
28560 ? reverse_condition (code)
28561 : reverse_condition_maybe_unordered (code));
28562 }
28563
28564 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28565 to OPERANDS[0]. */
28566
28567 const char *
28568 output_387_reg_move (rtx insn, rtx *operands)
28569 {
28570 if (REG_P (operands[0]))
28571 {
28572 if (REG_P (operands[1])
28573 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28574 {
28575 if (REGNO (operands[0]) == FIRST_STACK_REG)
28576 return output_387_ffreep (operands, 0);
28577 return "fstp\t%y0";
28578 }
28579 if (STACK_TOP_P (operands[0]))
28580 return "fld%z1\t%y1";
28581 return "fst\t%y0";
28582 }
28583 else if (MEM_P (operands[0]))
28584 {
28585 gcc_assert (REG_P (operands[1]));
28586 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28587 return "fstp%z0\t%y0";
28588 else
28589 {
28590 /* There is no non-popping store to memory for XFmode.
28591 So if we need one, follow the store with a load. */
28592 if (GET_MODE (operands[0]) == XFmode)
28593 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
28594 else
28595 return "fst%z0\t%y0";
28596 }
28597 }
28598 else
28599 gcc_unreachable();
28600 }
28601
28602 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28603 FP status register is set. */
28604
28605 void
28606 ix86_emit_fp_unordered_jump (rtx label)
28607 {
28608 rtx reg = gen_reg_rtx (HImode);
28609 rtx temp;
28610
28611 emit_insn (gen_x86_fnstsw_1 (reg));
28612
28613 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28614 {
28615 emit_insn (gen_x86_sahf_1 (reg));
28616
28617 temp = gen_rtx_REG (CCmode, FLAGS_REG);
28618 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28619 }
28620 else
28621 {
28622 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28623
28624 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28625 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28626 }
28627
28628 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28629 gen_rtx_LABEL_REF (VOIDmode, label),
28630 pc_rtx);
28631 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28632
28633 emit_jump_insn (temp);
28634 predict_jump (REG_BR_PROB_BASE * 10 / 100);
28635 }
28636
28637 /* Output code to perform a log1p XFmode calculation. */
28638
28639 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28640 {
28641 rtx label1 = gen_label_rtx ();
28642 rtx label2 = gen_label_rtx ();
28643
28644 rtx tmp = gen_reg_rtx (XFmode);
28645 rtx tmp2 = gen_reg_rtx (XFmode);
28646
28647 emit_insn (gen_absxf2 (tmp, op1));
28648 emit_insn (gen_cmpxf (tmp,
28649 CONST_DOUBLE_FROM_REAL_VALUE (
28650 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28651 XFmode)));
28652 emit_jump_insn (gen_bge (label1));
28653
28654 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28655 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28656 emit_jump (label2);
28657
28658 emit_label (label1);
28659 emit_move_insn (tmp, CONST1_RTX (XFmode));
28660 emit_insn (gen_addxf3 (tmp, op1, tmp));
28661 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28662 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28663
28664 emit_label (label2);
28665 }
28666
28667 /* Output code to perform a Newton-Rhapson approximation of a single precision
28668 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
28669
28670 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28671 {
28672 rtx x0, x1, e0, e1, two;
28673
28674 x0 = gen_reg_rtx (mode);
28675 e0 = gen_reg_rtx (mode);
28676 e1 = gen_reg_rtx (mode);
28677 x1 = gen_reg_rtx (mode);
28678
28679 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28680
28681 if (VECTOR_MODE_P (mode))
28682 two = ix86_build_const_vector (SFmode, true, two);
28683
28684 two = force_reg (mode, two);
28685
28686 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28687
28688 /* x0 = rcp(b) estimate */
28689 emit_insn (gen_rtx_SET (VOIDmode, x0,
28690 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28691 UNSPEC_RCP)));
28692 /* e0 = x0 * b */
28693 emit_insn (gen_rtx_SET (VOIDmode, e0,
28694 gen_rtx_MULT (mode, x0, b)));
28695 /* e1 = 2. - e0 */
28696 emit_insn (gen_rtx_SET (VOIDmode, e1,
28697 gen_rtx_MINUS (mode, two, e0)));
28698 /* x1 = x0 * e1 */
28699 emit_insn (gen_rtx_SET (VOIDmode, x1,
28700 gen_rtx_MULT (mode, x0, e1)));
28701 /* res = a * x1 */
28702 emit_insn (gen_rtx_SET (VOIDmode, res,
28703 gen_rtx_MULT (mode, a, x1)));
28704 }
28705
28706 /* Output code to perform a Newton-Rhapson approximation of a
28707 single precision floating point [reciprocal] square root. */
28708
28709 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28710 bool recip)
28711 {
28712 rtx x0, e0, e1, e2, e3, mthree, mhalf;
28713 REAL_VALUE_TYPE r;
28714
28715 x0 = gen_reg_rtx (mode);
28716 e0 = gen_reg_rtx (mode);
28717 e1 = gen_reg_rtx (mode);
28718 e2 = gen_reg_rtx (mode);
28719 e3 = gen_reg_rtx (mode);
28720
28721 real_from_integer (&r, VOIDmode, -3, -1, 0);
28722 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28723
28724 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28725 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28726
28727 if (VECTOR_MODE_P (mode))
28728 {
28729 mthree = ix86_build_const_vector (SFmode, true, mthree);
28730 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
28731 }
28732
28733 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
28734 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
28735
28736 /* x0 = rsqrt(a) estimate */
28737 emit_insn (gen_rtx_SET (VOIDmode, x0,
28738 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
28739 UNSPEC_RSQRT)));
28740
28741 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
28742 if (!recip)
28743 {
28744 rtx zero, mask;
28745
28746 zero = gen_reg_rtx (mode);
28747 mask = gen_reg_rtx (mode);
28748
28749 zero = force_reg (mode, CONST0_RTX(mode));
28750 emit_insn (gen_rtx_SET (VOIDmode, mask,
28751 gen_rtx_NE (mode, zero, a)));
28752
28753 emit_insn (gen_rtx_SET (VOIDmode, x0,
28754 gen_rtx_AND (mode, x0, mask)));
28755 }
28756
28757 /* e0 = x0 * a */
28758 emit_insn (gen_rtx_SET (VOIDmode, e0,
28759 gen_rtx_MULT (mode, x0, a)));
28760 /* e1 = e0 * x0 */
28761 emit_insn (gen_rtx_SET (VOIDmode, e1,
28762 gen_rtx_MULT (mode, e0, x0)));
28763
28764 /* e2 = e1 - 3. */
28765 mthree = force_reg (mode, mthree);
28766 emit_insn (gen_rtx_SET (VOIDmode, e2,
28767 gen_rtx_PLUS (mode, e1, mthree)));
28768
28769 mhalf = force_reg (mode, mhalf);
28770 if (recip)
28771 /* e3 = -.5 * x0 */
28772 emit_insn (gen_rtx_SET (VOIDmode, e3,
28773 gen_rtx_MULT (mode, x0, mhalf)));
28774 else
28775 /* e3 = -.5 * e0 */
28776 emit_insn (gen_rtx_SET (VOIDmode, e3,
28777 gen_rtx_MULT (mode, e0, mhalf)));
28778 /* ret = e2 * e3 */
28779 emit_insn (gen_rtx_SET (VOIDmode, res,
28780 gen_rtx_MULT (mode, e2, e3)));
28781 }
28782
28783 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
28784
28785 static void ATTRIBUTE_UNUSED
28786 i386_solaris_elf_named_section (const char *name, unsigned int flags,
28787 tree decl)
28788 {
28789 /* With Binutils 2.15, the "@unwind" marker must be specified on
28790 every occurrence of the ".eh_frame" section, not just the first
28791 one. */
28792 if (TARGET_64BIT
28793 && strcmp (name, ".eh_frame") == 0)
28794 {
28795 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
28796 flags & SECTION_WRITE ? "aw" : "a");
28797 return;
28798 }
28799 default_elf_asm_named_section (name, flags, decl);
28800 }
28801
28802 /* Return the mangling of TYPE if it is an extended fundamental type. */
28803
28804 static const char *
28805 ix86_mangle_type (const_tree type)
28806 {
28807 type = TYPE_MAIN_VARIANT (type);
28808
28809 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28810 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28811 return NULL;
28812
28813 switch (TYPE_MODE (type))
28814 {
28815 case TFmode:
28816 /* __float128 is "g". */
28817 return "g";
28818 case XFmode:
28819 /* "long double" or __float80 is "e". */
28820 return "e";
28821 default:
28822 return NULL;
28823 }
28824 }
28825
28826 /* For 32-bit code we can save PIC register setup by using
28827 __stack_chk_fail_local hidden function instead of calling
28828 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
28829 register, so it is better to call __stack_chk_fail directly. */
28830
28831 static tree
28832 ix86_stack_protect_fail (void)
28833 {
28834 return TARGET_64BIT
28835 ? default_external_stack_protect_fail ()
28836 : default_hidden_stack_protect_fail ();
28837 }
28838
28839 /* Select a format to encode pointers in exception handling data. CODE
28840 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
28841 true if the symbol may be affected by dynamic relocations.
28842
28843 ??? All x86 object file formats are capable of representing this.
28844 After all, the relocation needed is the same as for the call insn.
28845 Whether or not a particular assembler allows us to enter such, I
28846 guess we'll have to see. */
28847 int
28848 asm_preferred_eh_data_format (int code, int global)
28849 {
28850 if (flag_pic)
28851 {
28852 int type = DW_EH_PE_sdata8;
28853 if (!TARGET_64BIT
28854 || ix86_cmodel == CM_SMALL_PIC
28855 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
28856 type = DW_EH_PE_sdata4;
28857 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
28858 }
28859 if (ix86_cmodel == CM_SMALL
28860 || (ix86_cmodel == CM_MEDIUM && code))
28861 return DW_EH_PE_udata4;
28862 return DW_EH_PE_absptr;
28863 }
28864
28865 /* Expand copysign from SIGN to the positive value ABS_VALUE
28866 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
28867 the sign-bit. */
28868 static void
28869 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
28870 {
28871 enum machine_mode mode = GET_MODE (sign);
28872 rtx sgn = gen_reg_rtx (mode);
28873 if (mask == NULL_RTX)
28874 {
28875 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
28876 if (!VECTOR_MODE_P (mode))
28877 {
28878 /* We need to generate a scalar mode mask in this case. */
28879 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28880 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28881 mask = gen_reg_rtx (mode);
28882 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28883 }
28884 }
28885 else
28886 mask = gen_rtx_NOT (mode, mask);
28887 emit_insn (gen_rtx_SET (VOIDmode, sgn,
28888 gen_rtx_AND (mode, mask, sign)));
28889 emit_insn (gen_rtx_SET (VOIDmode, result,
28890 gen_rtx_IOR (mode, abs_value, sgn)));
28891 }
28892
28893 /* Expand fabs (OP0) and return a new rtx that holds the result. The
28894 mask for masking out the sign-bit is stored in *SMASK, if that is
28895 non-null. */
28896 static rtx
28897 ix86_expand_sse_fabs (rtx op0, rtx *smask)
28898 {
28899 enum machine_mode mode = GET_MODE (op0);
28900 rtx xa, mask;
28901
28902 xa = gen_reg_rtx (mode);
28903 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
28904 if (!VECTOR_MODE_P (mode))
28905 {
28906 /* We need to generate a scalar mode mask in this case. */
28907 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28908 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28909 mask = gen_reg_rtx (mode);
28910 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28911 }
28912 emit_insn (gen_rtx_SET (VOIDmode, xa,
28913 gen_rtx_AND (mode, op0, mask)));
28914
28915 if (smask)
28916 *smask = mask;
28917
28918 return xa;
28919 }
28920
28921 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
28922 swapping the operands if SWAP_OPERANDS is true. The expanded
28923 code is a forward jump to a newly created label in case the
28924 comparison is true. The generated label rtx is returned. */
28925 static rtx
28926 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
28927 bool swap_operands)
28928 {
28929 rtx label, tmp;
28930
28931 if (swap_operands)
28932 {
28933 tmp = op0;
28934 op0 = op1;
28935 op1 = tmp;
28936 }
28937
28938 label = gen_label_rtx ();
28939 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
28940 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28941 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
28942 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
28943 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28944 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
28945 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
28946 JUMP_LABEL (tmp) = label;
28947
28948 return label;
28949 }
28950
28951 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
28952 using comparison code CODE. Operands are swapped for the comparison if
28953 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
28954 static rtx
28955 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
28956 bool swap_operands)
28957 {
28958 enum machine_mode mode = GET_MODE (op0);
28959 rtx mask = gen_reg_rtx (mode);
28960
28961 if (swap_operands)
28962 {
28963 rtx tmp = op0;
28964 op0 = op1;
28965 op1 = tmp;
28966 }
28967
28968 if (mode == DFmode)
28969 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
28970 gen_rtx_fmt_ee (code, mode, op0, op1)));
28971 else
28972 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
28973 gen_rtx_fmt_ee (code, mode, op0, op1)));
28974
28975 return mask;
28976 }
28977
28978 /* Generate and return a rtx of mode MODE for 2**n where n is the number
28979 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
28980 static rtx
28981 ix86_gen_TWO52 (enum machine_mode mode)
28982 {
28983 REAL_VALUE_TYPE TWO52r;
28984 rtx TWO52;
28985
28986 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
28987 TWO52 = const_double_from_real_value (TWO52r, mode);
28988 TWO52 = force_reg (mode, TWO52);
28989
28990 return TWO52;
28991 }
28992
28993 /* Expand SSE sequence for computing lround from OP1 storing
28994 into OP0. */
28995 void
28996 ix86_expand_lround (rtx op0, rtx op1)
28997 {
28998 /* C code for the stuff we're doing below:
28999 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
29000 return (long)tmp;
29001 */
29002 enum machine_mode mode = GET_MODE (op1);
29003 const struct real_format *fmt;
29004 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29005 rtx adj;
29006
29007 /* load nextafter (0.5, 0.0) */
29008 fmt = REAL_MODE_FORMAT (mode);
29009 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29010 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29011
29012 /* adj = copysign (0.5, op1) */
29013 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29014 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29015
29016 /* adj = op1 + adj */
29017 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29018
29019 /* op0 = (imode)adj */
29020 expand_fix (op0, adj, 0);
29021 }
29022
29023 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29024 into OPERAND0. */
29025 void
29026 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29027 {
29028 /* C code for the stuff we're doing below (for do_floor):
29029 xi = (long)op1;
29030 xi -= (double)xi > op1 ? 1 : 0;
29031 return xi;
29032 */
29033 enum machine_mode fmode = GET_MODE (op1);
29034 enum machine_mode imode = GET_MODE (op0);
29035 rtx ireg, freg, label, tmp;
29036
29037 /* reg = (long)op1 */
29038 ireg = gen_reg_rtx (imode);
29039 expand_fix (ireg, op1, 0);
29040
29041 /* freg = (double)reg */
29042 freg = gen_reg_rtx (fmode);
29043 expand_float (freg, ireg, 0);
29044
29045 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29046 label = ix86_expand_sse_compare_and_jump (UNLE,
29047 freg, op1, !do_floor);
29048 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29049 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29050 emit_move_insn (ireg, tmp);
29051
29052 emit_label (label);
29053 LABEL_NUSES (label) = 1;
29054
29055 emit_move_insn (op0, ireg);
29056 }
29057
29058 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29059 result in OPERAND0. */
29060 void
29061 ix86_expand_rint (rtx operand0, rtx operand1)
29062 {
29063 /* C code for the stuff we're doing below:
29064 xa = fabs (operand1);
29065 if (!isless (xa, 2**52))
29066 return operand1;
29067 xa = xa + 2**52 - 2**52;
29068 return copysign (xa, operand1);
29069 */
29070 enum machine_mode mode = GET_MODE (operand0);
29071 rtx res, xa, label, TWO52, mask;
29072
29073 res = gen_reg_rtx (mode);
29074 emit_move_insn (res, operand1);
29075
29076 /* xa = abs (operand1) */
29077 xa = ix86_expand_sse_fabs (res, &mask);
29078
29079 /* if (!isless (xa, TWO52)) goto label; */
29080 TWO52 = ix86_gen_TWO52 (mode);
29081 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29082
29083 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29084 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29085
29086 ix86_sse_copysign_to_positive (res, xa, res, mask);
29087
29088 emit_label (label);
29089 LABEL_NUSES (label) = 1;
29090
29091 emit_move_insn (operand0, res);
29092 }
29093
29094 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29095 into OPERAND0. */
29096 void
29097 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29098 {
29099 /* C code for the stuff we expand below.
29100 double xa = fabs (x), x2;
29101 if (!isless (xa, TWO52))
29102 return x;
29103 xa = xa + TWO52 - TWO52;
29104 x2 = copysign (xa, x);
29105 Compensate. Floor:
29106 if (x2 > x)
29107 x2 -= 1;
29108 Compensate. Ceil:
29109 if (x2 < x)
29110 x2 -= -1;
29111 return x2;
29112 */
29113 enum machine_mode mode = GET_MODE (operand0);
29114 rtx xa, TWO52, tmp, label, one, res, mask;
29115
29116 TWO52 = ix86_gen_TWO52 (mode);
29117
29118 /* Temporary for holding the result, initialized to the input
29119 operand to ease control flow. */
29120 res = gen_reg_rtx (mode);
29121 emit_move_insn (res, operand1);
29122
29123 /* xa = abs (operand1) */
29124 xa = ix86_expand_sse_fabs (res, &mask);
29125
29126 /* if (!isless (xa, TWO52)) goto label; */
29127 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29128
29129 /* xa = xa + TWO52 - TWO52; */
29130 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29131 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29132
29133 /* xa = copysign (xa, operand1) */
29134 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29135
29136 /* generate 1.0 or -1.0 */
29137 one = force_reg (mode,
29138 const_double_from_real_value (do_floor
29139 ? dconst1 : dconstm1, mode));
29140
29141 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29142 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29143 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29144 gen_rtx_AND (mode, one, tmp)));
29145 /* We always need to subtract here to preserve signed zero. */
29146 tmp = expand_simple_binop (mode, MINUS,
29147 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29148 emit_move_insn (res, tmp);
29149
29150 emit_label (label);
29151 LABEL_NUSES (label) = 1;
29152
29153 emit_move_insn (operand0, res);
29154 }
29155
29156 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29157 into OPERAND0. */
29158 void
29159 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29160 {
29161 /* C code for the stuff we expand below.
29162 double xa = fabs (x), x2;
29163 if (!isless (xa, TWO52))
29164 return x;
29165 x2 = (double)(long)x;
29166 Compensate. Floor:
29167 if (x2 > x)
29168 x2 -= 1;
29169 Compensate. Ceil:
29170 if (x2 < x)
29171 x2 += 1;
29172 if (HONOR_SIGNED_ZEROS (mode))
29173 return copysign (x2, x);
29174 return x2;
29175 */
29176 enum machine_mode mode = GET_MODE (operand0);
29177 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29178
29179 TWO52 = ix86_gen_TWO52 (mode);
29180
29181 /* Temporary for holding the result, initialized to the input
29182 operand to ease control flow. */
29183 res = gen_reg_rtx (mode);
29184 emit_move_insn (res, operand1);
29185
29186 /* xa = abs (operand1) */
29187 xa = ix86_expand_sse_fabs (res, &mask);
29188
29189 /* if (!isless (xa, TWO52)) goto label; */
29190 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29191
29192 /* xa = (double)(long)x */
29193 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29194 expand_fix (xi, res, 0);
29195 expand_float (xa, xi, 0);
29196
29197 /* generate 1.0 */
29198 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29199
29200 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29201 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29202 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29203 gen_rtx_AND (mode, one, tmp)));
29204 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29205 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29206 emit_move_insn (res, tmp);
29207
29208 if (HONOR_SIGNED_ZEROS (mode))
29209 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29210
29211 emit_label (label);
29212 LABEL_NUSES (label) = 1;
29213
29214 emit_move_insn (operand0, res);
29215 }
29216
29217 /* Expand SSE sequence for computing round from OPERAND1 storing
29218 into OPERAND0. Sequence that works without relying on DImode truncation
29219 via cvttsd2siq that is only available on 64bit targets. */
29220 void
29221 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29222 {
29223 /* C code for the stuff we expand below.
29224 double xa = fabs (x), xa2, x2;
29225 if (!isless (xa, TWO52))
29226 return x;
29227 Using the absolute value and copying back sign makes
29228 -0.0 -> -0.0 correct.
29229 xa2 = xa + TWO52 - TWO52;
29230 Compensate.
29231 dxa = xa2 - xa;
29232 if (dxa <= -0.5)
29233 xa2 += 1;
29234 else if (dxa > 0.5)
29235 xa2 -= 1;
29236 x2 = copysign (xa2, x);
29237 return x2;
29238 */
29239 enum machine_mode mode = GET_MODE (operand0);
29240 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29241
29242 TWO52 = ix86_gen_TWO52 (mode);
29243
29244 /* Temporary for holding the result, initialized to the input
29245 operand to ease control flow. */
29246 res = gen_reg_rtx (mode);
29247 emit_move_insn (res, operand1);
29248
29249 /* xa = abs (operand1) */
29250 xa = ix86_expand_sse_fabs (res, &mask);
29251
29252 /* if (!isless (xa, TWO52)) goto label; */
29253 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29254
29255 /* xa2 = xa + TWO52 - TWO52; */
29256 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29257 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29258
29259 /* dxa = xa2 - xa; */
29260 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29261
29262 /* generate 0.5, 1.0 and -0.5 */
29263 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29264 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29265 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29266 0, OPTAB_DIRECT);
29267
29268 /* Compensate. */
29269 tmp = gen_reg_rtx (mode);
29270 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29271 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29272 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29273 gen_rtx_AND (mode, one, tmp)));
29274 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29275 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29276 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29277 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29278 gen_rtx_AND (mode, one, tmp)));
29279 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29280
29281 /* res = copysign (xa2, operand1) */
29282 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29283
29284 emit_label (label);
29285 LABEL_NUSES (label) = 1;
29286
29287 emit_move_insn (operand0, res);
29288 }
29289
29290 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29291 into OPERAND0. */
29292 void
29293 ix86_expand_trunc (rtx operand0, rtx operand1)
29294 {
29295 /* C code for SSE variant we expand below.
29296 double xa = fabs (x), x2;
29297 if (!isless (xa, TWO52))
29298 return x;
29299 x2 = (double)(long)x;
29300 if (HONOR_SIGNED_ZEROS (mode))
29301 return copysign (x2, x);
29302 return x2;
29303 */
29304 enum machine_mode mode = GET_MODE (operand0);
29305 rtx xa, xi, TWO52, label, res, mask;
29306
29307 TWO52 = ix86_gen_TWO52 (mode);
29308
29309 /* Temporary for holding the result, initialized to the input
29310 operand to ease control flow. */
29311 res = gen_reg_rtx (mode);
29312 emit_move_insn (res, operand1);
29313
29314 /* xa = abs (operand1) */
29315 xa = ix86_expand_sse_fabs (res, &mask);
29316
29317 /* if (!isless (xa, TWO52)) goto label; */
29318 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29319
29320 /* x = (double)(long)x */
29321 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29322 expand_fix (xi, res, 0);
29323 expand_float (res, xi, 0);
29324
29325 if (HONOR_SIGNED_ZEROS (mode))
29326 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29327
29328 emit_label (label);
29329 LABEL_NUSES (label) = 1;
29330
29331 emit_move_insn (operand0, res);
29332 }
29333
29334 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29335 into OPERAND0. */
29336 void
29337 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29338 {
29339 enum machine_mode mode = GET_MODE (operand0);
29340 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29341
29342 /* C code for SSE variant we expand below.
29343 double xa = fabs (x), x2;
29344 if (!isless (xa, TWO52))
29345 return x;
29346 xa2 = xa + TWO52 - TWO52;
29347 Compensate:
29348 if (xa2 > xa)
29349 xa2 -= 1.0;
29350 x2 = copysign (xa2, x);
29351 return x2;
29352 */
29353
29354 TWO52 = ix86_gen_TWO52 (mode);
29355
29356 /* Temporary for holding the result, initialized to the input
29357 operand to ease control flow. */
29358 res = gen_reg_rtx (mode);
29359 emit_move_insn (res, operand1);
29360
29361 /* xa = abs (operand1) */
29362 xa = ix86_expand_sse_fabs (res, &smask);
29363
29364 /* if (!isless (xa, TWO52)) goto label; */
29365 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29366
29367 /* res = xa + TWO52 - TWO52; */
29368 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29369 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29370 emit_move_insn (res, tmp);
29371
29372 /* generate 1.0 */
29373 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29374
29375 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29376 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29377 emit_insn (gen_rtx_SET (VOIDmode, mask,
29378 gen_rtx_AND (mode, mask, one)));
29379 tmp = expand_simple_binop (mode, MINUS,
29380 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29381 emit_move_insn (res, tmp);
29382
29383 /* res = copysign (res, operand1) */
29384 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29385
29386 emit_label (label);
29387 LABEL_NUSES (label) = 1;
29388
29389 emit_move_insn (operand0, res);
29390 }
29391
29392 /* Expand SSE sequence for computing round from OPERAND1 storing
29393 into OPERAND0. */
29394 void
29395 ix86_expand_round (rtx operand0, rtx operand1)
29396 {
29397 /* C code for the stuff we're doing below:
29398 double xa = fabs (x);
29399 if (!isless (xa, TWO52))
29400 return x;
29401 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29402 return copysign (xa, x);
29403 */
29404 enum machine_mode mode = GET_MODE (operand0);
29405 rtx res, TWO52, xa, label, xi, half, mask;
29406 const struct real_format *fmt;
29407 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29408
29409 /* Temporary for holding the result, initialized to the input
29410 operand to ease control flow. */
29411 res = gen_reg_rtx (mode);
29412 emit_move_insn (res, operand1);
29413
29414 TWO52 = ix86_gen_TWO52 (mode);
29415 xa = ix86_expand_sse_fabs (res, &mask);
29416 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29417
29418 /* load nextafter (0.5, 0.0) */
29419 fmt = REAL_MODE_FORMAT (mode);
29420 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29421 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29422
29423 /* xa = xa + 0.5 */
29424 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29425 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29426
29427 /* xa = (double)(int64_t)xa */
29428 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29429 expand_fix (xi, xa, 0);
29430 expand_float (xa, xi, 0);
29431
29432 /* res = copysign (xa, operand1) */
29433 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29434
29435 emit_label (label);
29436 LABEL_NUSES (label) = 1;
29437
29438 emit_move_insn (operand0, res);
29439 }
29440
29441
29442 /* Validate whether a SSE5 instruction is valid or not.
29443 OPERANDS is the array of operands.
29444 NUM is the number of operands.
29445 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29446 NUM_MEMORY is the maximum number of memory operands to accept.
29447 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29448
29449 bool
29450 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29451 bool uses_oc0, int num_memory, bool commutative)
29452 {
29453 int mem_mask;
29454 int mem_count;
29455 int i;
29456
29457 /* Count the number of memory arguments */
29458 mem_mask = 0;
29459 mem_count = 0;
29460 for (i = 0; i < num; i++)
29461 {
29462 enum machine_mode mode = GET_MODE (operands[i]);
29463 if (register_operand (operands[i], mode))
29464 ;
29465
29466 else if (memory_operand (operands[i], mode))
29467 {
29468 mem_mask |= (1 << i);
29469 mem_count++;
29470 }
29471
29472 else
29473 {
29474 rtx pattern = PATTERN (insn);
29475
29476 /* allow 0 for pcmov */
29477 if (GET_CODE (pattern) != SET
29478 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29479 || i < 2
29480 || operands[i] != CONST0_RTX (mode))
29481 return false;
29482 }
29483 }
29484
29485 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29486 a memory operation. */
29487 if (num_memory < 0)
29488 {
29489 num_memory = -num_memory;
29490 if ((mem_mask & (1 << (num-1))) != 0)
29491 {
29492 mem_mask &= ~(1 << (num-1));
29493 mem_count--;
29494 }
29495 }
29496
29497 /* If there were no memory operations, allow the insn */
29498 if (mem_mask == 0)
29499 return true;
29500
29501 /* Do not allow the destination register to be a memory operand. */
29502 else if (mem_mask & (1 << 0))
29503 return false;
29504
29505 /* If there are too many memory operations, disallow the instruction. While
29506 the hardware only allows 1 memory reference, before register allocation
29507 for some insns, we allow two memory operations sometimes in order to allow
29508 code like the following to be optimized:
29509
29510 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29511
29512 or similar cases that are vectorized into using the fmaddss
29513 instruction. */
29514 else if (mem_count > num_memory)
29515 return false;
29516
29517 /* Don't allow more than one memory operation if not optimizing. */
29518 else if (mem_count > 1 && !optimize)
29519 return false;
29520
29521 else if (num == 4 && mem_count == 1)
29522 {
29523 /* formats (destination is the first argument), example fmaddss:
29524 xmm1, xmm1, xmm2, xmm3/mem
29525 xmm1, xmm1, xmm2/mem, xmm3
29526 xmm1, xmm2, xmm3/mem, xmm1
29527 xmm1, xmm2/mem, xmm3, xmm1 */
29528 if (uses_oc0)
29529 return ((mem_mask == (1 << 1))
29530 || (mem_mask == (1 << 2))
29531 || (mem_mask == (1 << 3)));
29532
29533 /* format, example pmacsdd:
29534 xmm1, xmm2, xmm3/mem, xmm1 */
29535 if (commutative)
29536 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
29537 else
29538 return (mem_mask == (1 << 2));
29539 }
29540
29541 else if (num == 4 && num_memory == 2)
29542 {
29543 /* If there are two memory operations, we can load one of the memory ops
29544 into the destination register. This is for optimizing the
29545 multiply/add ops, which the combiner has optimized both the multiply
29546 and the add insns to have a memory operation. We have to be careful
29547 that the destination doesn't overlap with the inputs. */
29548 rtx op0 = operands[0];
29549
29550 if (reg_mentioned_p (op0, operands[1])
29551 || reg_mentioned_p (op0, operands[2])
29552 || reg_mentioned_p (op0, operands[3]))
29553 return false;
29554
29555 /* formats (destination is the first argument), example fmaddss:
29556 xmm1, xmm1, xmm2, xmm3/mem
29557 xmm1, xmm1, xmm2/mem, xmm3
29558 xmm1, xmm2, xmm3/mem, xmm1
29559 xmm1, xmm2/mem, xmm3, xmm1
29560
29561 For the oc0 case, we will load either operands[1] or operands[3] into
29562 operands[0], so any combination of 2 memory operands is ok. */
29563 if (uses_oc0)
29564 return true;
29565
29566 /* format, example pmacsdd:
29567 xmm1, xmm2, xmm3/mem, xmm1
29568
29569 For the integer multiply/add instructions be more restrictive and
29570 require operands[2] and operands[3] to be the memory operands. */
29571 if (commutative)
29572 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
29573 else
29574 return (mem_mask == ((1 << 2) | (1 << 3)));
29575 }
29576
29577 else if (num == 3 && num_memory == 1)
29578 {
29579 /* formats, example protb:
29580 xmm1, xmm2, xmm3/mem
29581 xmm1, xmm2/mem, xmm3 */
29582 if (uses_oc0)
29583 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
29584
29585 /* format, example comeq:
29586 xmm1, xmm2, xmm3/mem */
29587 else
29588 return (mem_mask == (1 << 2));
29589 }
29590
29591 else
29592 gcc_unreachable ();
29593
29594 return false;
29595 }
29596
29597
29598 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
29599 hardware will allow by using the destination register to load one of the
29600 memory operations. Presently this is used by the multiply/add routines to
29601 allow 2 memory references. */
29602
29603 void
29604 ix86_expand_sse5_multiple_memory (rtx operands[],
29605 int num,
29606 enum machine_mode mode)
29607 {
29608 rtx op0 = operands[0];
29609 if (num != 4
29610 || memory_operand (op0, mode)
29611 || reg_mentioned_p (op0, operands[1])
29612 || reg_mentioned_p (op0, operands[2])
29613 || reg_mentioned_p (op0, operands[3]))
29614 gcc_unreachable ();
29615
29616 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
29617 the destination register. */
29618 if (memory_operand (operands[1], mode))
29619 {
29620 emit_move_insn (op0, operands[1]);
29621 operands[1] = op0;
29622 }
29623 else if (memory_operand (operands[3], mode))
29624 {
29625 emit_move_insn (op0, operands[3]);
29626 operands[3] = op0;
29627 }
29628 else
29629 gcc_unreachable ();
29630
29631 return;
29632 }
29633
29634
29635 /* Table of valid machine attributes. */
29636 static const struct attribute_spec ix86_attribute_table[] =
29637 {
29638 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29639 /* Stdcall attribute says callee is responsible for popping arguments
29640 if they are not variable. */
29641 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29642 /* Fastcall attribute says callee is responsible for popping arguments
29643 if they are not variable. */
29644 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29645 /* Cdecl attribute says the callee is a normal C declaration */
29646 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29647 /* Regparm attribute specifies how many integer arguments are to be
29648 passed in registers. */
29649 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29650 /* Sseregparm attribute says we are using x86_64 calling conventions
29651 for FP arguments. */
29652 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29653 /* force_align_arg_pointer says this function realigns the stack at entry. */
29654 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29655 false, true, true, ix86_handle_cconv_attribute },
29656 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29657 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29658 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29659 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29660 #endif
29661 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29662 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29663 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29664 SUBTARGET_ATTRIBUTE_TABLE,
29665 #endif
29666 /* ms_abi and sysv_abi calling convention function attributes. */
29667 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29668 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29669 /* End element. */
29670 { NULL, 0, 0, false, false, false, NULL }
29671 };
29672
29673 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29674 static int
29675 x86_builtin_vectorization_cost (bool runtime_test)
29676 {
29677 /* If the branch of the runtime test is taken - i.e. - the vectorized
29678 version is skipped - this incurs a misprediction cost (because the
29679 vectorized version is expected to be the fall-through). So we subtract
29680 the latency of a mispredicted branch from the costs that are incured
29681 when the vectorized version is executed.
29682
29683 TODO: The values in individual target tables have to be tuned or new
29684 fields may be needed. For eg. on K8, the default branch path is the
29685 not-taken path. If the taken path is predicted correctly, the minimum
29686 penalty of going down the taken-path is 1 cycle. If the taken-path is
29687 not predicted correctly, then the minimum penalty is 10 cycles. */
29688
29689 if (runtime_test)
29690 {
29691 return (-(ix86_cost->cond_taken_branch_cost));
29692 }
29693 else
29694 return 0;
29695 }
29696
29697 /* This function returns the calling abi specific va_list type node.
29698 It returns the FNDECL specific va_list type. */
29699
29700 tree
29701 ix86_fn_abi_va_list (tree fndecl)
29702 {
29703 int abi;
29704
29705 if (!TARGET_64BIT)
29706 return va_list_type_node;
29707 gcc_assert (fndecl != NULL_TREE);
29708 abi = ix86_function_abi ((const_tree) fndecl);
29709
29710 if (abi == MS_ABI)
29711 return ms_va_list_type_node;
29712 else
29713 return sysv_va_list_type_node;
29714 }
29715
29716 /* Returns the canonical va_list type specified by TYPE. If there
29717 is no valid TYPE provided, it return NULL_TREE. */
29718
29719 tree
29720 ix86_canonical_va_list_type (tree type)
29721 {
29722 tree wtype, htype;
29723
29724 /* Resolve references and pointers to va_list type. */
29725 if (INDIRECT_REF_P (type))
29726 type = TREE_TYPE (type);
29727 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
29728 type = TREE_TYPE (type);
29729
29730 if (TARGET_64BIT)
29731 {
29732 wtype = va_list_type_node;
29733 gcc_assert (wtype != NULL_TREE);
29734 htype = type;
29735 if (TREE_CODE (wtype) == ARRAY_TYPE)
29736 {
29737 /* If va_list is an array type, the argument may have decayed
29738 to a pointer type, e.g. by being passed to another function.
29739 In that case, unwrap both types so that we can compare the
29740 underlying records. */
29741 if (TREE_CODE (htype) == ARRAY_TYPE
29742 || POINTER_TYPE_P (htype))
29743 {
29744 wtype = TREE_TYPE (wtype);
29745 htype = TREE_TYPE (htype);
29746 }
29747 }
29748 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29749 return va_list_type_node;
29750 wtype = sysv_va_list_type_node;
29751 gcc_assert (wtype != NULL_TREE);
29752 htype = type;
29753 if (TREE_CODE (wtype) == ARRAY_TYPE)
29754 {
29755 /* If va_list is an array type, the argument may have decayed
29756 to a pointer type, e.g. by being passed to another function.
29757 In that case, unwrap both types so that we can compare the
29758 underlying records. */
29759 if (TREE_CODE (htype) == ARRAY_TYPE
29760 || POINTER_TYPE_P (htype))
29761 {
29762 wtype = TREE_TYPE (wtype);
29763 htype = TREE_TYPE (htype);
29764 }
29765 }
29766 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29767 return sysv_va_list_type_node;
29768 wtype = ms_va_list_type_node;
29769 gcc_assert (wtype != NULL_TREE);
29770 htype = type;
29771 if (TREE_CODE (wtype) == ARRAY_TYPE)
29772 {
29773 /* If va_list is an array type, the argument may have decayed
29774 to a pointer type, e.g. by being passed to another function.
29775 In that case, unwrap both types so that we can compare the
29776 underlying records. */
29777 if (TREE_CODE (htype) == ARRAY_TYPE
29778 || POINTER_TYPE_P (htype))
29779 {
29780 wtype = TREE_TYPE (wtype);
29781 htype = TREE_TYPE (htype);
29782 }
29783 }
29784 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29785 return ms_va_list_type_node;
29786 return NULL_TREE;
29787 }
29788 return std_canonical_va_list_type (type);
29789 }
29790
29791 /* Iterate through the target-specific builtin types for va_list.
29792 IDX denotes the iterator, *PTREE is set to the result type of
29793 the va_list builtin, and *PNAME to its internal type.
29794 Returns zero if there is no element for this index, otherwise
29795 IDX should be increased upon the next call.
29796 Note, do not iterate a base builtin's name like __builtin_va_list.
29797 Used from c_common_nodes_and_builtins. */
29798
29799 int
29800 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
29801 {
29802 if (!TARGET_64BIT)
29803 return 0;
29804 switch (idx) {
29805 case 0:
29806 *ptree = ms_va_list_type_node;
29807 *pname = "__builtin_ms_va_list";
29808 break;
29809 case 1:
29810 *ptree = sysv_va_list_type_node;
29811 *pname = "__builtin_sysv_va_list";
29812 break;
29813 default:
29814 return 0;
29815 }
29816 return 1;
29817 }
29818
29819 /* Initialize the GCC target structure. */
29820 #undef TARGET_RETURN_IN_MEMORY
29821 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
29822
29823 #undef TARGET_ATTRIBUTE_TABLE
29824 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
29825 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29826 # undef TARGET_MERGE_DECL_ATTRIBUTES
29827 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
29828 #endif
29829
29830 #undef TARGET_COMP_TYPE_ATTRIBUTES
29831 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
29832
29833 #undef TARGET_INIT_BUILTINS
29834 #define TARGET_INIT_BUILTINS ix86_init_builtins
29835 #undef TARGET_EXPAND_BUILTIN
29836 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
29837
29838 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
29839 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
29840 ix86_builtin_vectorized_function
29841
29842 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
29843 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
29844
29845 #undef TARGET_BUILTIN_RECIPROCAL
29846 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
29847
29848 #undef TARGET_ASM_FUNCTION_EPILOGUE
29849 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
29850
29851 #undef TARGET_ENCODE_SECTION_INFO
29852 #ifndef SUBTARGET_ENCODE_SECTION_INFO
29853 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
29854 #else
29855 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
29856 #endif
29857
29858 #undef TARGET_ASM_OPEN_PAREN
29859 #define TARGET_ASM_OPEN_PAREN ""
29860 #undef TARGET_ASM_CLOSE_PAREN
29861 #define TARGET_ASM_CLOSE_PAREN ""
29862
29863 #undef TARGET_ASM_ALIGNED_HI_OP
29864 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
29865 #undef TARGET_ASM_ALIGNED_SI_OP
29866 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
29867 #ifdef ASM_QUAD
29868 #undef TARGET_ASM_ALIGNED_DI_OP
29869 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
29870 #endif
29871
29872 #undef TARGET_ASM_UNALIGNED_HI_OP
29873 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
29874 #undef TARGET_ASM_UNALIGNED_SI_OP
29875 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
29876 #undef TARGET_ASM_UNALIGNED_DI_OP
29877 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
29878
29879 #undef TARGET_SCHED_ADJUST_COST
29880 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
29881 #undef TARGET_SCHED_ISSUE_RATE
29882 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
29883 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
29884 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
29885 ia32_multipass_dfa_lookahead
29886
29887 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
29888 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
29889
29890 #ifdef HAVE_AS_TLS
29891 #undef TARGET_HAVE_TLS
29892 #define TARGET_HAVE_TLS true
29893 #endif
29894 #undef TARGET_CANNOT_FORCE_CONST_MEM
29895 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
29896 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
29897 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
29898
29899 #undef TARGET_DELEGITIMIZE_ADDRESS
29900 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
29901
29902 #undef TARGET_MS_BITFIELD_LAYOUT_P
29903 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
29904
29905 #if TARGET_MACHO
29906 #undef TARGET_BINDS_LOCAL_P
29907 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
29908 #endif
29909 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29910 #undef TARGET_BINDS_LOCAL_P
29911 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
29912 #endif
29913
29914 #undef TARGET_ASM_OUTPUT_MI_THUNK
29915 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
29916 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
29917 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
29918
29919 #undef TARGET_ASM_FILE_START
29920 #define TARGET_ASM_FILE_START x86_file_start
29921
29922 #undef TARGET_DEFAULT_TARGET_FLAGS
29923 #define TARGET_DEFAULT_TARGET_FLAGS \
29924 (TARGET_DEFAULT \
29925 | TARGET_SUBTARGET_DEFAULT \
29926 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
29927
29928 #undef TARGET_HANDLE_OPTION
29929 #define TARGET_HANDLE_OPTION ix86_handle_option
29930
29931 #undef TARGET_RTX_COSTS
29932 #define TARGET_RTX_COSTS ix86_rtx_costs
29933 #undef TARGET_ADDRESS_COST
29934 #define TARGET_ADDRESS_COST ix86_address_cost
29935
29936 #undef TARGET_FIXED_CONDITION_CODE_REGS
29937 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
29938 #undef TARGET_CC_MODES_COMPATIBLE
29939 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
29940
29941 #undef TARGET_MACHINE_DEPENDENT_REORG
29942 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
29943
29944 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
29945 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
29946
29947 #undef TARGET_BUILD_BUILTIN_VA_LIST
29948 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
29949
29950 #undef TARGET_FN_ABI_VA_LIST
29951 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
29952
29953 #undef TARGET_CANONICAL_VA_LIST_TYPE
29954 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
29955
29956 #undef TARGET_EXPAND_BUILTIN_VA_START
29957 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
29958
29959 #undef TARGET_MD_ASM_CLOBBERS
29960 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
29961
29962 #undef TARGET_PROMOTE_PROTOTYPES
29963 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
29964 #undef TARGET_STRUCT_VALUE_RTX
29965 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
29966 #undef TARGET_SETUP_INCOMING_VARARGS
29967 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
29968 #undef TARGET_MUST_PASS_IN_STACK
29969 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
29970 #undef TARGET_PASS_BY_REFERENCE
29971 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
29972 #undef TARGET_INTERNAL_ARG_POINTER
29973 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
29974 #undef TARGET_UPDATE_STACK_BOUNDARY
29975 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
29976 #undef TARGET_GET_DRAP_RTX
29977 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
29978 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
29979 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
29980 #undef TARGET_STRICT_ARGUMENT_NAMING
29981 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
29982
29983 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
29984 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
29985
29986 #undef TARGET_SCALAR_MODE_SUPPORTED_P
29987 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
29988
29989 #undef TARGET_VECTOR_MODE_SUPPORTED_P
29990 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
29991
29992 #undef TARGET_C_MODE_FOR_SUFFIX
29993 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
29994
29995 #ifdef HAVE_AS_TLS
29996 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
29997 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
29998 #endif
29999
30000 #ifdef SUBTARGET_INSERT_ATTRIBUTES
30001 #undef TARGET_INSERT_ATTRIBUTES
30002 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30003 #endif
30004
30005 #undef TARGET_MANGLE_TYPE
30006 #define TARGET_MANGLE_TYPE ix86_mangle_type
30007
30008 #undef TARGET_STACK_PROTECT_FAIL
30009 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30010
30011 #undef TARGET_FUNCTION_VALUE
30012 #define TARGET_FUNCTION_VALUE ix86_function_value
30013
30014 #undef TARGET_SECONDARY_RELOAD
30015 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30016
30017 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30018 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30019
30020 #undef TARGET_SET_CURRENT_FUNCTION
30021 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30022
30023 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30024 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30025
30026 #undef TARGET_OPTION_SAVE
30027 #define TARGET_OPTION_SAVE ix86_function_specific_save
30028
30029 #undef TARGET_OPTION_RESTORE
30030 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30031
30032 #undef TARGET_OPTION_PRINT
30033 #define TARGET_OPTION_PRINT ix86_function_specific_print
30034
30035 #undef TARGET_OPTION_CAN_INLINE_P
30036 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30037
30038 #undef TARGET_EXPAND_TO_RTL_HOOK
30039 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30040
30041 struct gcc_target targetm = TARGET_INITIALIZER;
30042
30043 #include "gt-i386.h"