Print this page
Implement -fstrict-calling-conventions
Stock GCC is overly willing to violate the ABI when calling local functions,
such that it passes arguments in registers on i386. This hampers debugging
with anything other than a fully-aware DWARF debugger, and is generally not
something we desire.
Implement a flag which disables this behaviour, enabled by default. The flag is
global, though only effective on i386, to more easily allow its globalization
later which, given the odds, is likely to be necessary.
Split |
Close |
Expand all |
Collapse all |
--- old/gcc/config/i386/i386.c
+++ new/gcc/config/i386/i386.c
1 1 /* Subroutines used for code generation on IA-32.
2 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
4 4 Free Software Foundation, Inc.
5 5
6 6 This file is part of GCC.
7 7
8 8 GCC is free software; you can redistribute it and/or modify
9 9 it under the terms of the GNU General Public License as published by
10 10 the Free Software Foundation; either version 3, or (at your option)
11 11 any later version.
12 12
13 13 GCC is distributed in the hope that it will be useful,
14 14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 16 GNU General Public License for more details.
17 17
18 18 You should have received a copy of the GNU General Public License
19 19 along with GCC; see the file COPYING3. If not see
20 20 <http://www.gnu.org/licenses/>. */
21 21
22 22 #include "config.h"
23 23 #include "system.h"
24 24 #include "coretypes.h"
25 25 #include "tm.h"
26 26 #include "rtl.h"
27 27 #include "tree.h"
28 28 #include "tm_p.h"
29 29 #include "regs.h"
30 30 #include "hard-reg-set.h"
31 31 #include "real.h"
32 32 #include "insn-config.h"
33 33 #include "conditions.h"
34 34 #include "output.h"
35 35 #include "insn-codes.h"
36 36 #include "insn-attr.h"
37 37 #include "flags.h"
38 38 #include "c-common.h"
39 39 #include "except.h"
40 40 #include "function.h"
41 41 #include "recog.h"
42 42 #include "expr.h"
43 43 #include "optabs.h"
44 44 #include "toplev.h"
45 45 #include "basic-block.h"
46 46 #include "ggc.h"
47 47 #include "target.h"
48 48 #include "target-def.h"
49 49 #include "langhooks.h"
50 50 #include "cgraph.h"
51 51 #include "gimple.h"
52 52 #include "dwarf2.h"
53 53 #include "df.h"
54 54 #include "tm-constrs.h"
55 55 #include "params.h"
56 56 #include "cselib.h"
57 57
58 58 static int x86_builtin_vectorization_cost (bool);
59 59 static rtx legitimize_dllimport_symbol (rtx, bool);
60 60
61 61 #ifndef CHECK_STACK_LIMIT
62 62 #define CHECK_STACK_LIMIT (-1)
63 63 #endif
64 64
65 65 /* Return index of given mode in mult and division cost tables. */
66 66 #define MODE_INDEX(mode) \
67 67 ((mode) == QImode ? 0 \
68 68 : (mode) == HImode ? 1 \
69 69 : (mode) == SImode ? 2 \
70 70 : (mode) == DImode ? 3 \
71 71 : 4)
72 72
73 73 /* Processor costs (relative to an add) */
74 74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
75 75 #define COSTS_N_BYTES(N) ((N) * 2)
76 76
77 77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
78 78
79 79 const
80 80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
81 81 COSTS_N_BYTES (2), /* cost of an add instruction */
82 82 COSTS_N_BYTES (3), /* cost of a lea instruction */
83 83 COSTS_N_BYTES (2), /* variable shift costs */
84 84 COSTS_N_BYTES (3), /* constant shift costs */
85 85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
86 86 COSTS_N_BYTES (3), /* HI */
87 87 COSTS_N_BYTES (3), /* SI */
88 88 COSTS_N_BYTES (3), /* DI */
89 89 COSTS_N_BYTES (5)}, /* other */
90 90 0, /* cost of multiply per each bit set */
91 91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
92 92 COSTS_N_BYTES (3), /* HI */
93 93 COSTS_N_BYTES (3), /* SI */
94 94 COSTS_N_BYTES (3), /* DI */
95 95 COSTS_N_BYTES (5)}, /* other */
96 96 COSTS_N_BYTES (3), /* cost of movsx */
97 97 COSTS_N_BYTES (3), /* cost of movzx */
98 98 0, /* "large" insn */
99 99 2, /* MOVE_RATIO */
100 100 2, /* cost for loading QImode using movzbl */
101 101 {2, 2, 2}, /* cost of loading integer registers
102 102 in QImode, HImode and SImode.
103 103 Relative to reg-reg move (2). */
104 104 {2, 2, 2}, /* cost of storing integer registers */
105 105 2, /* cost of reg,reg fld/fst */
106 106 {2, 2, 2}, /* cost of loading fp registers
107 107 in SFmode, DFmode and XFmode */
108 108 {2, 2, 2}, /* cost of storing fp registers
109 109 in SFmode, DFmode and XFmode */
110 110 3, /* cost of moving MMX register */
111 111 {3, 3}, /* cost of loading MMX registers
112 112 in SImode and DImode */
113 113 {3, 3}, /* cost of storing MMX registers
114 114 in SImode and DImode */
115 115 3, /* cost of moving SSE register */
116 116 {3, 3, 3}, /* cost of loading SSE registers
117 117 in SImode, DImode and TImode */
118 118 {3, 3, 3}, /* cost of storing SSE registers
119 119 in SImode, DImode and TImode */
120 120 3, /* MMX or SSE register to integer */
121 121 0, /* size of l1 cache */
122 122 0, /* size of l2 cache */
123 123 0, /* size of prefetch block */
124 124 0, /* number of parallel prefetches */
125 125 2, /* Branch cost */
126 126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
127 127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
128 128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
129 129 COSTS_N_BYTES (2), /* cost of FABS instruction. */
130 130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
131 131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
132 132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
133 133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
134 134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
135 135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
136 136 1, /* scalar_stmt_cost. */
137 137 1, /* scalar load_cost. */
138 138 1, /* scalar_store_cost. */
139 139 1, /* vec_stmt_cost. */
140 140 1, /* vec_to_scalar_cost. */
141 141 1, /* scalar_to_vec_cost. */
142 142 1, /* vec_align_load_cost. */
143 143 1, /* vec_unalign_load_cost. */
144 144 1, /* vec_store_cost. */
145 145 1, /* cond_taken_branch_cost. */
146 146 1, /* cond_not_taken_branch_cost. */
147 147 };
148 148
149 149 /* Processor costs (relative to an add) */
150 150 static const
151 151 struct processor_costs i386_cost = { /* 386 specific costs */
152 152 COSTS_N_INSNS (1), /* cost of an add instruction */
153 153 COSTS_N_INSNS (1), /* cost of a lea instruction */
154 154 COSTS_N_INSNS (3), /* variable shift costs */
155 155 COSTS_N_INSNS (2), /* constant shift costs */
156 156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
157 157 COSTS_N_INSNS (6), /* HI */
158 158 COSTS_N_INSNS (6), /* SI */
159 159 COSTS_N_INSNS (6), /* DI */
160 160 COSTS_N_INSNS (6)}, /* other */
161 161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
162 162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
163 163 COSTS_N_INSNS (23), /* HI */
164 164 COSTS_N_INSNS (23), /* SI */
165 165 COSTS_N_INSNS (23), /* DI */
166 166 COSTS_N_INSNS (23)}, /* other */
167 167 COSTS_N_INSNS (3), /* cost of movsx */
168 168 COSTS_N_INSNS (2), /* cost of movzx */
169 169 15, /* "large" insn */
170 170 3, /* MOVE_RATIO */
171 171 4, /* cost for loading QImode using movzbl */
172 172 {2, 4, 2}, /* cost of loading integer registers
173 173 in QImode, HImode and SImode.
174 174 Relative to reg-reg move (2). */
175 175 {2, 4, 2}, /* cost of storing integer registers */
176 176 2, /* cost of reg,reg fld/fst */
177 177 {8, 8, 8}, /* cost of loading fp registers
178 178 in SFmode, DFmode and XFmode */
179 179 {8, 8, 8}, /* cost of storing fp registers
180 180 in SFmode, DFmode and XFmode */
181 181 2, /* cost of moving MMX register */
182 182 {4, 8}, /* cost of loading MMX registers
183 183 in SImode and DImode */
184 184 {4, 8}, /* cost of storing MMX registers
185 185 in SImode and DImode */
186 186 2, /* cost of moving SSE register */
187 187 {4, 8, 16}, /* cost of loading SSE registers
188 188 in SImode, DImode and TImode */
189 189 {4, 8, 16}, /* cost of storing SSE registers
190 190 in SImode, DImode and TImode */
191 191 3, /* MMX or SSE register to integer */
192 192 0, /* size of l1 cache */
193 193 0, /* size of l2 cache */
194 194 0, /* size of prefetch block */
195 195 0, /* number of parallel prefetches */
196 196 1, /* Branch cost */
197 197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
198 198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
199 199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
200 200 COSTS_N_INSNS (22), /* cost of FABS instruction. */
201 201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
202 202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
203 203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
204 204 DUMMY_STRINGOP_ALGS},
205 205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
206 206 DUMMY_STRINGOP_ALGS},
207 207 1, /* scalar_stmt_cost. */
208 208 1, /* scalar load_cost. */
209 209 1, /* scalar_store_cost. */
210 210 1, /* vec_stmt_cost. */
211 211 1, /* vec_to_scalar_cost. */
212 212 1, /* scalar_to_vec_cost. */
213 213 1, /* vec_align_load_cost. */
214 214 2, /* vec_unalign_load_cost. */
215 215 1, /* vec_store_cost. */
216 216 3, /* cond_taken_branch_cost. */
217 217 1, /* cond_not_taken_branch_cost. */
218 218 };
219 219
220 220 static const
221 221 struct processor_costs i486_cost = { /* 486 specific costs */
222 222 COSTS_N_INSNS (1), /* cost of an add instruction */
223 223 COSTS_N_INSNS (1), /* cost of a lea instruction */
224 224 COSTS_N_INSNS (3), /* variable shift costs */
225 225 COSTS_N_INSNS (2), /* constant shift costs */
226 226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
227 227 COSTS_N_INSNS (12), /* HI */
228 228 COSTS_N_INSNS (12), /* SI */
229 229 COSTS_N_INSNS (12), /* DI */
230 230 COSTS_N_INSNS (12)}, /* other */
231 231 1, /* cost of multiply per each bit set */
232 232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
233 233 COSTS_N_INSNS (40), /* HI */
234 234 COSTS_N_INSNS (40), /* SI */
235 235 COSTS_N_INSNS (40), /* DI */
236 236 COSTS_N_INSNS (40)}, /* other */
237 237 COSTS_N_INSNS (3), /* cost of movsx */
238 238 COSTS_N_INSNS (2), /* cost of movzx */
239 239 15, /* "large" insn */
240 240 3, /* MOVE_RATIO */
241 241 4, /* cost for loading QImode using movzbl */
242 242 {2, 4, 2}, /* cost of loading integer registers
243 243 in QImode, HImode and SImode.
244 244 Relative to reg-reg move (2). */
245 245 {2, 4, 2}, /* cost of storing integer registers */
246 246 2, /* cost of reg,reg fld/fst */
247 247 {8, 8, 8}, /* cost of loading fp registers
248 248 in SFmode, DFmode and XFmode */
249 249 {8, 8, 8}, /* cost of storing fp registers
250 250 in SFmode, DFmode and XFmode */
251 251 2, /* cost of moving MMX register */
252 252 {4, 8}, /* cost of loading MMX registers
253 253 in SImode and DImode */
254 254 {4, 8}, /* cost of storing MMX registers
255 255 in SImode and DImode */
256 256 2, /* cost of moving SSE register */
257 257 {4, 8, 16}, /* cost of loading SSE registers
258 258 in SImode, DImode and TImode */
259 259 {4, 8, 16}, /* cost of storing SSE registers
260 260 in SImode, DImode and TImode */
261 261 3, /* MMX or SSE register to integer */
262 262 4, /* size of l1 cache. 486 has 8kB cache
263 263 shared for code and data, so 4kB is
264 264 not really precise. */
265 265 4, /* size of l2 cache */
266 266 0, /* size of prefetch block */
267 267 0, /* number of parallel prefetches */
268 268 1, /* Branch cost */
269 269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
270 270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
271 271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
272 272 COSTS_N_INSNS (3), /* cost of FABS instruction. */
273 273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
274 274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
275 275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
276 276 DUMMY_STRINGOP_ALGS},
277 277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
278 278 DUMMY_STRINGOP_ALGS},
279 279 1, /* scalar_stmt_cost. */
280 280 1, /* scalar load_cost. */
281 281 1, /* scalar_store_cost. */
282 282 1, /* vec_stmt_cost. */
283 283 1, /* vec_to_scalar_cost. */
284 284 1, /* scalar_to_vec_cost. */
285 285 1, /* vec_align_load_cost. */
286 286 2, /* vec_unalign_load_cost. */
287 287 1, /* vec_store_cost. */
288 288 3, /* cond_taken_branch_cost. */
289 289 1, /* cond_not_taken_branch_cost. */
290 290 };
291 291
292 292 static const
293 293 struct processor_costs pentium_cost = {
294 294 COSTS_N_INSNS (1), /* cost of an add instruction */
295 295 COSTS_N_INSNS (1), /* cost of a lea instruction */
296 296 COSTS_N_INSNS (4), /* variable shift costs */
297 297 COSTS_N_INSNS (1), /* constant shift costs */
298 298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
299 299 COSTS_N_INSNS (11), /* HI */
300 300 COSTS_N_INSNS (11), /* SI */
301 301 COSTS_N_INSNS (11), /* DI */
302 302 COSTS_N_INSNS (11)}, /* other */
303 303 0, /* cost of multiply per each bit set */
304 304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
305 305 COSTS_N_INSNS (25), /* HI */
306 306 COSTS_N_INSNS (25), /* SI */
307 307 COSTS_N_INSNS (25), /* DI */
308 308 COSTS_N_INSNS (25)}, /* other */
309 309 COSTS_N_INSNS (3), /* cost of movsx */
310 310 COSTS_N_INSNS (2), /* cost of movzx */
311 311 8, /* "large" insn */
312 312 6, /* MOVE_RATIO */
313 313 6, /* cost for loading QImode using movzbl */
314 314 {2, 4, 2}, /* cost of loading integer registers
315 315 in QImode, HImode and SImode.
316 316 Relative to reg-reg move (2). */
317 317 {2, 4, 2}, /* cost of storing integer registers */
318 318 2, /* cost of reg,reg fld/fst */
319 319 {2, 2, 6}, /* cost of loading fp registers
320 320 in SFmode, DFmode and XFmode */
321 321 {4, 4, 6}, /* cost of storing fp registers
322 322 in SFmode, DFmode and XFmode */
323 323 8, /* cost of moving MMX register */
324 324 {8, 8}, /* cost of loading MMX registers
325 325 in SImode and DImode */
326 326 {8, 8}, /* cost of storing MMX registers
327 327 in SImode and DImode */
328 328 2, /* cost of moving SSE register */
329 329 {4, 8, 16}, /* cost of loading SSE registers
330 330 in SImode, DImode and TImode */
331 331 {4, 8, 16}, /* cost of storing SSE registers
332 332 in SImode, DImode and TImode */
333 333 3, /* MMX or SSE register to integer */
334 334 8, /* size of l1 cache. */
335 335 8, /* size of l2 cache */
336 336 0, /* size of prefetch block */
337 337 0, /* number of parallel prefetches */
338 338 2, /* Branch cost */
339 339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
340 340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
341 341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
342 342 COSTS_N_INSNS (1), /* cost of FABS instruction. */
343 343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
344 344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
345 345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
346 346 DUMMY_STRINGOP_ALGS},
347 347 {{libcall, {{-1, rep_prefix_4_byte}}},
348 348 DUMMY_STRINGOP_ALGS},
349 349 1, /* scalar_stmt_cost. */
350 350 1, /* scalar load_cost. */
351 351 1, /* scalar_store_cost. */
352 352 1, /* vec_stmt_cost. */
353 353 1, /* vec_to_scalar_cost. */
354 354 1, /* scalar_to_vec_cost. */
355 355 1, /* vec_align_load_cost. */
356 356 2, /* vec_unalign_load_cost. */
357 357 1, /* vec_store_cost. */
358 358 3, /* cond_taken_branch_cost. */
359 359 1, /* cond_not_taken_branch_cost. */
360 360 };
361 361
362 362 static const
363 363 struct processor_costs pentiumpro_cost = {
364 364 COSTS_N_INSNS (1), /* cost of an add instruction */
365 365 COSTS_N_INSNS (1), /* cost of a lea instruction */
366 366 COSTS_N_INSNS (1), /* variable shift costs */
367 367 COSTS_N_INSNS (1), /* constant shift costs */
368 368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
369 369 COSTS_N_INSNS (4), /* HI */
370 370 COSTS_N_INSNS (4), /* SI */
371 371 COSTS_N_INSNS (4), /* DI */
372 372 COSTS_N_INSNS (4)}, /* other */
373 373 0, /* cost of multiply per each bit set */
374 374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
375 375 COSTS_N_INSNS (17), /* HI */
376 376 COSTS_N_INSNS (17), /* SI */
377 377 COSTS_N_INSNS (17), /* DI */
378 378 COSTS_N_INSNS (17)}, /* other */
379 379 COSTS_N_INSNS (1), /* cost of movsx */
380 380 COSTS_N_INSNS (1), /* cost of movzx */
381 381 8, /* "large" insn */
382 382 6, /* MOVE_RATIO */
383 383 2, /* cost for loading QImode using movzbl */
384 384 {4, 4, 4}, /* cost of loading integer registers
385 385 in QImode, HImode and SImode.
386 386 Relative to reg-reg move (2). */
387 387 {2, 2, 2}, /* cost of storing integer registers */
388 388 2, /* cost of reg,reg fld/fst */
389 389 {2, 2, 6}, /* cost of loading fp registers
390 390 in SFmode, DFmode and XFmode */
391 391 {4, 4, 6}, /* cost of storing fp registers
392 392 in SFmode, DFmode and XFmode */
393 393 2, /* cost of moving MMX register */
394 394 {2, 2}, /* cost of loading MMX registers
395 395 in SImode and DImode */
396 396 {2, 2}, /* cost of storing MMX registers
397 397 in SImode and DImode */
398 398 2, /* cost of moving SSE register */
399 399 {2, 2, 8}, /* cost of loading SSE registers
400 400 in SImode, DImode and TImode */
401 401 {2, 2, 8}, /* cost of storing SSE registers
402 402 in SImode, DImode and TImode */
403 403 3, /* MMX or SSE register to integer */
404 404 8, /* size of l1 cache. */
405 405 256, /* size of l2 cache */
406 406 32, /* size of prefetch block */
407 407 6, /* number of parallel prefetches */
408 408 2, /* Branch cost */
409 409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
410 410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
411 411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
412 412 COSTS_N_INSNS (2), /* cost of FABS instruction. */
413 413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
414 414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
415 415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
416 416 the alignment). For small blocks inline loop is still a noticeable win, for bigger
417 417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
418 418 more expensive startup time in CPU, but after 4K the difference is down in the noise.
419 419 */
420 420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
421 421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
422 422 DUMMY_STRINGOP_ALGS},
423 423 {{rep_prefix_4_byte, {{1024, unrolled_loop},
424 424 {8192, rep_prefix_4_byte}, {-1, libcall}}},
425 425 DUMMY_STRINGOP_ALGS},
426 426 1, /* scalar_stmt_cost. */
427 427 1, /* scalar load_cost. */
428 428 1, /* scalar_store_cost. */
429 429 1, /* vec_stmt_cost. */
430 430 1, /* vec_to_scalar_cost. */
431 431 1, /* scalar_to_vec_cost. */
432 432 1, /* vec_align_load_cost. */
433 433 2, /* vec_unalign_load_cost. */
434 434 1, /* vec_store_cost. */
435 435 3, /* cond_taken_branch_cost. */
436 436 1, /* cond_not_taken_branch_cost. */
437 437 };
438 438
439 439 static const
440 440 struct processor_costs geode_cost = {
441 441 COSTS_N_INSNS (1), /* cost of an add instruction */
442 442 COSTS_N_INSNS (1), /* cost of a lea instruction */
443 443 COSTS_N_INSNS (2), /* variable shift costs */
444 444 COSTS_N_INSNS (1), /* constant shift costs */
445 445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
446 446 COSTS_N_INSNS (4), /* HI */
447 447 COSTS_N_INSNS (7), /* SI */
448 448 COSTS_N_INSNS (7), /* DI */
449 449 COSTS_N_INSNS (7)}, /* other */
450 450 0, /* cost of multiply per each bit set */
451 451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
452 452 COSTS_N_INSNS (23), /* HI */
453 453 COSTS_N_INSNS (39), /* SI */
454 454 COSTS_N_INSNS (39), /* DI */
455 455 COSTS_N_INSNS (39)}, /* other */
456 456 COSTS_N_INSNS (1), /* cost of movsx */
457 457 COSTS_N_INSNS (1), /* cost of movzx */
458 458 8, /* "large" insn */
459 459 4, /* MOVE_RATIO */
460 460 1, /* cost for loading QImode using movzbl */
461 461 {1, 1, 1}, /* cost of loading integer registers
462 462 in QImode, HImode and SImode.
463 463 Relative to reg-reg move (2). */
464 464 {1, 1, 1}, /* cost of storing integer registers */
465 465 1, /* cost of reg,reg fld/fst */
466 466 {1, 1, 1}, /* cost of loading fp registers
467 467 in SFmode, DFmode and XFmode */
468 468 {4, 6, 6}, /* cost of storing fp registers
469 469 in SFmode, DFmode and XFmode */
470 470
471 471 1, /* cost of moving MMX register */
472 472 {1, 1}, /* cost of loading MMX registers
473 473 in SImode and DImode */
474 474 {1, 1}, /* cost of storing MMX registers
475 475 in SImode and DImode */
476 476 1, /* cost of moving SSE register */
477 477 {1, 1, 1}, /* cost of loading SSE registers
478 478 in SImode, DImode and TImode */
479 479 {1, 1, 1}, /* cost of storing SSE registers
480 480 in SImode, DImode and TImode */
481 481 1, /* MMX or SSE register to integer */
482 482 64, /* size of l1 cache. */
483 483 128, /* size of l2 cache. */
484 484 32, /* size of prefetch block */
485 485 1, /* number of parallel prefetches */
486 486 1, /* Branch cost */
487 487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
488 488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
489 489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
490 490 COSTS_N_INSNS (1), /* cost of FABS instruction. */
491 491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
492 492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
493 493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
494 494 DUMMY_STRINGOP_ALGS},
495 495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
496 496 DUMMY_STRINGOP_ALGS},
497 497 1, /* scalar_stmt_cost. */
498 498 1, /* scalar load_cost. */
499 499 1, /* scalar_store_cost. */
500 500 1, /* vec_stmt_cost. */
501 501 1, /* vec_to_scalar_cost. */
502 502 1, /* scalar_to_vec_cost. */
503 503 1, /* vec_align_load_cost. */
504 504 2, /* vec_unalign_load_cost. */
505 505 1, /* vec_store_cost. */
506 506 3, /* cond_taken_branch_cost. */
507 507 1, /* cond_not_taken_branch_cost. */
508 508 };
509 509
510 510 static const
511 511 struct processor_costs k6_cost = {
512 512 COSTS_N_INSNS (1), /* cost of an add instruction */
513 513 COSTS_N_INSNS (2), /* cost of a lea instruction */
514 514 COSTS_N_INSNS (1), /* variable shift costs */
515 515 COSTS_N_INSNS (1), /* constant shift costs */
516 516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
517 517 COSTS_N_INSNS (3), /* HI */
518 518 COSTS_N_INSNS (3), /* SI */
519 519 COSTS_N_INSNS (3), /* DI */
520 520 COSTS_N_INSNS (3)}, /* other */
521 521 0, /* cost of multiply per each bit set */
522 522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
523 523 COSTS_N_INSNS (18), /* HI */
524 524 COSTS_N_INSNS (18), /* SI */
525 525 COSTS_N_INSNS (18), /* DI */
526 526 COSTS_N_INSNS (18)}, /* other */
527 527 COSTS_N_INSNS (2), /* cost of movsx */
528 528 COSTS_N_INSNS (2), /* cost of movzx */
529 529 8, /* "large" insn */
530 530 4, /* MOVE_RATIO */
531 531 3, /* cost for loading QImode using movzbl */
532 532 {4, 5, 4}, /* cost of loading integer registers
533 533 in QImode, HImode and SImode.
534 534 Relative to reg-reg move (2). */
535 535 {2, 3, 2}, /* cost of storing integer registers */
536 536 4, /* cost of reg,reg fld/fst */
537 537 {6, 6, 6}, /* cost of loading fp registers
538 538 in SFmode, DFmode and XFmode */
539 539 {4, 4, 4}, /* cost of storing fp registers
540 540 in SFmode, DFmode and XFmode */
541 541 2, /* cost of moving MMX register */
542 542 {2, 2}, /* cost of loading MMX registers
543 543 in SImode and DImode */
544 544 {2, 2}, /* cost of storing MMX registers
545 545 in SImode and DImode */
546 546 2, /* cost of moving SSE register */
547 547 {2, 2, 8}, /* cost of loading SSE registers
548 548 in SImode, DImode and TImode */
549 549 {2, 2, 8}, /* cost of storing SSE registers
550 550 in SImode, DImode and TImode */
551 551 6, /* MMX or SSE register to integer */
552 552 32, /* size of l1 cache. */
553 553 32, /* size of l2 cache. Some models
554 554 have integrated l2 cache, but
555 555 optimizing for k6 is not important
556 556 enough to worry about that. */
557 557 32, /* size of prefetch block */
558 558 1, /* number of parallel prefetches */
559 559 1, /* Branch cost */
560 560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
561 561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
562 562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
563 563 COSTS_N_INSNS (2), /* cost of FABS instruction. */
564 564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
565 565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
566 566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
567 567 DUMMY_STRINGOP_ALGS},
568 568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
569 569 DUMMY_STRINGOP_ALGS},
570 570 1, /* scalar_stmt_cost. */
571 571 1, /* scalar load_cost. */
572 572 1, /* scalar_store_cost. */
573 573 1, /* vec_stmt_cost. */
574 574 1, /* vec_to_scalar_cost. */
575 575 1, /* scalar_to_vec_cost. */
576 576 1, /* vec_align_load_cost. */
577 577 2, /* vec_unalign_load_cost. */
578 578 1, /* vec_store_cost. */
579 579 3, /* cond_taken_branch_cost. */
580 580 1, /* cond_not_taken_branch_cost. */
581 581 };
582 582
583 583 static const
584 584 struct processor_costs athlon_cost = {
585 585 COSTS_N_INSNS (1), /* cost of an add instruction */
586 586 COSTS_N_INSNS (2), /* cost of a lea instruction */
587 587 COSTS_N_INSNS (1), /* variable shift costs */
588 588 COSTS_N_INSNS (1), /* constant shift costs */
589 589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
590 590 COSTS_N_INSNS (5), /* HI */
591 591 COSTS_N_INSNS (5), /* SI */
592 592 COSTS_N_INSNS (5), /* DI */
593 593 COSTS_N_INSNS (5)}, /* other */
594 594 0, /* cost of multiply per each bit set */
595 595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
596 596 COSTS_N_INSNS (26), /* HI */
597 597 COSTS_N_INSNS (42), /* SI */
598 598 COSTS_N_INSNS (74), /* DI */
599 599 COSTS_N_INSNS (74)}, /* other */
600 600 COSTS_N_INSNS (1), /* cost of movsx */
601 601 COSTS_N_INSNS (1), /* cost of movzx */
602 602 8, /* "large" insn */
603 603 9, /* MOVE_RATIO */
604 604 4, /* cost for loading QImode using movzbl */
605 605 {3, 4, 3}, /* cost of loading integer registers
606 606 in QImode, HImode and SImode.
607 607 Relative to reg-reg move (2). */
608 608 {3, 4, 3}, /* cost of storing integer registers */
609 609 4, /* cost of reg,reg fld/fst */
610 610 {4, 4, 12}, /* cost of loading fp registers
611 611 in SFmode, DFmode and XFmode */
612 612 {6, 6, 8}, /* cost of storing fp registers
613 613 in SFmode, DFmode and XFmode */
614 614 2, /* cost of moving MMX register */
615 615 {4, 4}, /* cost of loading MMX registers
616 616 in SImode and DImode */
617 617 {4, 4}, /* cost of storing MMX registers
618 618 in SImode and DImode */
619 619 2, /* cost of moving SSE register */
620 620 {4, 4, 6}, /* cost of loading SSE registers
621 621 in SImode, DImode and TImode */
622 622 {4, 4, 5}, /* cost of storing SSE registers
623 623 in SImode, DImode and TImode */
624 624 5, /* MMX or SSE register to integer */
625 625 64, /* size of l1 cache. */
626 626 256, /* size of l2 cache. */
627 627 64, /* size of prefetch block */
628 628 6, /* number of parallel prefetches */
629 629 5, /* Branch cost */
630 630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
631 631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
632 632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
633 633 COSTS_N_INSNS (2), /* cost of FABS instruction. */
634 634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
635 635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
636 636 /* For some reason, Athlon deals better with REP prefix (relative to loops)
637 637 compared to K8. Alignment becomes important after 8 bytes for memcpy and
638 638 128 bytes for memset. */
639 639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
640 640 DUMMY_STRINGOP_ALGS},
641 641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
642 642 DUMMY_STRINGOP_ALGS},
643 643 1, /* scalar_stmt_cost. */
644 644 1, /* scalar load_cost. */
645 645 1, /* scalar_store_cost. */
646 646 1, /* vec_stmt_cost. */
647 647 1, /* vec_to_scalar_cost. */
648 648 1, /* scalar_to_vec_cost. */
649 649 1, /* vec_align_load_cost. */
650 650 2, /* vec_unalign_load_cost. */
651 651 1, /* vec_store_cost. */
652 652 3, /* cond_taken_branch_cost. */
653 653 1, /* cond_not_taken_branch_cost. */
654 654 };
655 655
656 656 static const
657 657 struct processor_costs k8_cost = {
658 658 COSTS_N_INSNS (1), /* cost of an add instruction */
659 659 COSTS_N_INSNS (2), /* cost of a lea instruction */
660 660 COSTS_N_INSNS (1), /* variable shift costs */
661 661 COSTS_N_INSNS (1), /* constant shift costs */
662 662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
663 663 COSTS_N_INSNS (4), /* HI */
664 664 COSTS_N_INSNS (3), /* SI */
665 665 COSTS_N_INSNS (4), /* DI */
666 666 COSTS_N_INSNS (5)}, /* other */
667 667 0, /* cost of multiply per each bit set */
668 668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
669 669 COSTS_N_INSNS (26), /* HI */
670 670 COSTS_N_INSNS (42), /* SI */
671 671 COSTS_N_INSNS (74), /* DI */
672 672 COSTS_N_INSNS (74)}, /* other */
673 673 COSTS_N_INSNS (1), /* cost of movsx */
674 674 COSTS_N_INSNS (1), /* cost of movzx */
675 675 8, /* "large" insn */
676 676 9, /* MOVE_RATIO */
677 677 4, /* cost for loading QImode using movzbl */
678 678 {3, 4, 3}, /* cost of loading integer registers
679 679 in QImode, HImode and SImode.
680 680 Relative to reg-reg move (2). */
681 681 {3, 4, 3}, /* cost of storing integer registers */
682 682 4, /* cost of reg,reg fld/fst */
683 683 {4, 4, 12}, /* cost of loading fp registers
684 684 in SFmode, DFmode and XFmode */
685 685 {6, 6, 8}, /* cost of storing fp registers
686 686 in SFmode, DFmode and XFmode */
687 687 2, /* cost of moving MMX register */
688 688 {3, 3}, /* cost of loading MMX registers
689 689 in SImode and DImode */
690 690 {4, 4}, /* cost of storing MMX registers
691 691 in SImode and DImode */
692 692 2, /* cost of moving SSE register */
693 693 {4, 3, 6}, /* cost of loading SSE registers
694 694 in SImode, DImode and TImode */
695 695 {4, 4, 5}, /* cost of storing SSE registers
696 696 in SImode, DImode and TImode */
697 697 5, /* MMX or SSE register to integer */
698 698 64, /* size of l1 cache. */
699 699 512, /* size of l2 cache. */
700 700 64, /* size of prefetch block */
701 701 /* New AMD processors never drop prefetches; if they cannot be performed
702 702 immediately, they are queued. We set number of simultaneous prefetches
703 703 to a large constant to reflect this (it probably is not a good idea not
704 704 to limit number of prefetches at all, as their execution also takes some
705 705 time). */
706 706 100, /* number of parallel prefetches */
707 707 3, /* Branch cost */
708 708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
709 709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
710 710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
711 711 COSTS_N_INSNS (2), /* cost of FABS instruction. */
712 712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
713 713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
714 714 /* K8 has optimized REP instruction for medium sized blocks, but for very small
715 715 blocks it is better to use loop. For large blocks, libcall can do
716 716 nontemporary accesses and beat inline considerably. */
717 717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
718 718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
719 719 {{libcall, {{8, loop}, {24, unrolled_loop},
720 720 {2048, rep_prefix_4_byte}, {-1, libcall}}},
721 721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
722 722 4, /* scalar_stmt_cost. */
723 723 2, /* scalar load_cost. */
724 724 2, /* scalar_store_cost. */
725 725 5, /* vec_stmt_cost. */
726 726 0, /* vec_to_scalar_cost. */
727 727 2, /* scalar_to_vec_cost. */
728 728 2, /* vec_align_load_cost. */
729 729 3, /* vec_unalign_load_cost. */
730 730 3, /* vec_store_cost. */
731 731 3, /* cond_taken_branch_cost. */
732 732 2, /* cond_not_taken_branch_cost. */
733 733 };
734 734
735 735 struct processor_costs amdfam10_cost = {
736 736 COSTS_N_INSNS (1), /* cost of an add instruction */
737 737 COSTS_N_INSNS (2), /* cost of a lea instruction */
738 738 COSTS_N_INSNS (1), /* variable shift costs */
739 739 COSTS_N_INSNS (1), /* constant shift costs */
740 740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
741 741 COSTS_N_INSNS (4), /* HI */
742 742 COSTS_N_INSNS (3), /* SI */
743 743 COSTS_N_INSNS (4), /* DI */
744 744 COSTS_N_INSNS (5)}, /* other */
745 745 0, /* cost of multiply per each bit set */
746 746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
747 747 COSTS_N_INSNS (35), /* HI */
748 748 COSTS_N_INSNS (51), /* SI */
749 749 COSTS_N_INSNS (83), /* DI */
750 750 COSTS_N_INSNS (83)}, /* other */
751 751 COSTS_N_INSNS (1), /* cost of movsx */
752 752 COSTS_N_INSNS (1), /* cost of movzx */
753 753 8, /* "large" insn */
754 754 9, /* MOVE_RATIO */
755 755 4, /* cost for loading QImode using movzbl */
756 756 {3, 4, 3}, /* cost of loading integer registers
757 757 in QImode, HImode and SImode.
758 758 Relative to reg-reg move (2). */
759 759 {3, 4, 3}, /* cost of storing integer registers */
760 760 4, /* cost of reg,reg fld/fst */
761 761 {4, 4, 12}, /* cost of loading fp registers
762 762 in SFmode, DFmode and XFmode */
763 763 {6, 6, 8}, /* cost of storing fp registers
764 764 in SFmode, DFmode and XFmode */
765 765 2, /* cost of moving MMX register */
766 766 {3, 3}, /* cost of loading MMX registers
767 767 in SImode and DImode */
768 768 {4, 4}, /* cost of storing MMX registers
769 769 in SImode and DImode */
770 770 2, /* cost of moving SSE register */
771 771 {4, 4, 3}, /* cost of loading SSE registers
772 772 in SImode, DImode and TImode */
773 773 {4, 4, 5}, /* cost of storing SSE registers
774 774 in SImode, DImode and TImode */
775 775 3, /* MMX or SSE register to integer */
776 776 /* On K8
777 777 MOVD reg64, xmmreg Double FSTORE 4
778 778 MOVD reg32, xmmreg Double FSTORE 4
779 779 On AMDFAM10
780 780 MOVD reg64, xmmreg Double FADD 3
781 781 1/1 1/1
782 782 MOVD reg32, xmmreg Double FADD 3
783 783 1/1 1/1 */
784 784 64, /* size of l1 cache. */
785 785 512, /* size of l2 cache. */
786 786 64, /* size of prefetch block */
787 787 /* New AMD processors never drop prefetches; if they cannot be performed
788 788 immediately, they are queued. We set number of simultaneous prefetches
789 789 to a large constant to reflect this (it probably is not a good idea not
790 790 to limit number of prefetches at all, as their execution also takes some
791 791 time). */
792 792 100, /* number of parallel prefetches */
793 793 2, /* Branch cost */
794 794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
795 795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
796 796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
797 797 COSTS_N_INSNS (2), /* cost of FABS instruction. */
798 798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
799 799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
800 800
801 801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
802 802 very small blocks it is better to use loop. For large blocks, libcall can
803 803 do nontemporary accesses and beat inline considerably. */
804 804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
805 805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
806 806 {{libcall, {{8, loop}, {24, unrolled_loop},
807 807 {2048, rep_prefix_4_byte}, {-1, libcall}}},
808 808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
809 809 4, /* scalar_stmt_cost. */
810 810 2, /* scalar load_cost. */
811 811 2, /* scalar_store_cost. */
812 812 6, /* vec_stmt_cost. */
813 813 0, /* vec_to_scalar_cost. */
814 814 2, /* scalar_to_vec_cost. */
815 815 2, /* vec_align_load_cost. */
816 816 2, /* vec_unalign_load_cost. */
817 817 2, /* vec_store_cost. */
818 818 2, /* cond_taken_branch_cost. */
819 819 1, /* cond_not_taken_branch_cost. */
820 820 };
821 821
822 822 static const
823 823 struct processor_costs pentium4_cost = {
824 824 COSTS_N_INSNS (1), /* cost of an add instruction */
825 825 COSTS_N_INSNS (3), /* cost of a lea instruction */
826 826 COSTS_N_INSNS (4), /* variable shift costs */
827 827 COSTS_N_INSNS (4), /* constant shift costs */
828 828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
829 829 COSTS_N_INSNS (15), /* HI */
830 830 COSTS_N_INSNS (15), /* SI */
831 831 COSTS_N_INSNS (15), /* DI */
832 832 COSTS_N_INSNS (15)}, /* other */
833 833 0, /* cost of multiply per each bit set */
834 834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
835 835 COSTS_N_INSNS (56), /* HI */
836 836 COSTS_N_INSNS (56), /* SI */
837 837 COSTS_N_INSNS (56), /* DI */
838 838 COSTS_N_INSNS (56)}, /* other */
839 839 COSTS_N_INSNS (1), /* cost of movsx */
840 840 COSTS_N_INSNS (1), /* cost of movzx */
841 841 16, /* "large" insn */
842 842 6, /* MOVE_RATIO */
843 843 2, /* cost for loading QImode using movzbl */
844 844 {4, 5, 4}, /* cost of loading integer registers
845 845 in QImode, HImode and SImode.
846 846 Relative to reg-reg move (2). */
847 847 {2, 3, 2}, /* cost of storing integer registers */
848 848 2, /* cost of reg,reg fld/fst */
849 849 {2, 2, 6}, /* cost of loading fp registers
850 850 in SFmode, DFmode and XFmode */
851 851 {4, 4, 6}, /* cost of storing fp registers
852 852 in SFmode, DFmode and XFmode */
853 853 2, /* cost of moving MMX register */
854 854 {2, 2}, /* cost of loading MMX registers
855 855 in SImode and DImode */
856 856 {2, 2}, /* cost of storing MMX registers
857 857 in SImode and DImode */
858 858 12, /* cost of moving SSE register */
859 859 {12, 12, 12}, /* cost of loading SSE registers
860 860 in SImode, DImode and TImode */
861 861 {2, 2, 8}, /* cost of storing SSE registers
862 862 in SImode, DImode and TImode */
863 863 10, /* MMX or SSE register to integer */
864 864 8, /* size of l1 cache. */
865 865 256, /* size of l2 cache. */
866 866 64, /* size of prefetch block */
867 867 6, /* number of parallel prefetches */
868 868 2, /* Branch cost */
869 869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
870 870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
871 871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
872 872 COSTS_N_INSNS (2), /* cost of FABS instruction. */
873 873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
874 874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
875 875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
876 876 DUMMY_STRINGOP_ALGS},
877 877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
878 878 {-1, libcall}}},
879 879 DUMMY_STRINGOP_ALGS},
880 880 1, /* scalar_stmt_cost. */
881 881 1, /* scalar load_cost. */
882 882 1, /* scalar_store_cost. */
883 883 1, /* vec_stmt_cost. */
884 884 1, /* vec_to_scalar_cost. */
885 885 1, /* scalar_to_vec_cost. */
886 886 1, /* vec_align_load_cost. */
887 887 2, /* vec_unalign_load_cost. */
888 888 1, /* vec_store_cost. */
889 889 3, /* cond_taken_branch_cost. */
890 890 1, /* cond_not_taken_branch_cost. */
891 891 };
892 892
893 893 static const
894 894 struct processor_costs nocona_cost = {
895 895 COSTS_N_INSNS (1), /* cost of an add instruction */
896 896 COSTS_N_INSNS (1), /* cost of a lea instruction */
897 897 COSTS_N_INSNS (1), /* variable shift costs */
898 898 COSTS_N_INSNS (1), /* constant shift costs */
899 899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
900 900 COSTS_N_INSNS (10), /* HI */
901 901 COSTS_N_INSNS (10), /* SI */
902 902 COSTS_N_INSNS (10), /* DI */
903 903 COSTS_N_INSNS (10)}, /* other */
904 904 0, /* cost of multiply per each bit set */
905 905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
906 906 COSTS_N_INSNS (66), /* HI */
907 907 COSTS_N_INSNS (66), /* SI */
908 908 COSTS_N_INSNS (66), /* DI */
909 909 COSTS_N_INSNS (66)}, /* other */
910 910 COSTS_N_INSNS (1), /* cost of movsx */
911 911 COSTS_N_INSNS (1), /* cost of movzx */
912 912 16, /* "large" insn */
913 913 17, /* MOVE_RATIO */
914 914 4, /* cost for loading QImode using movzbl */
915 915 {4, 4, 4}, /* cost of loading integer registers
916 916 in QImode, HImode and SImode.
917 917 Relative to reg-reg move (2). */
918 918 {4, 4, 4}, /* cost of storing integer registers */
919 919 3, /* cost of reg,reg fld/fst */
920 920 {12, 12, 12}, /* cost of loading fp registers
921 921 in SFmode, DFmode and XFmode */
922 922 {4, 4, 4}, /* cost of storing fp registers
923 923 in SFmode, DFmode and XFmode */
924 924 6, /* cost of moving MMX register */
925 925 {12, 12}, /* cost of loading MMX registers
926 926 in SImode and DImode */
927 927 {12, 12}, /* cost of storing MMX registers
928 928 in SImode and DImode */
929 929 6, /* cost of moving SSE register */
930 930 {12, 12, 12}, /* cost of loading SSE registers
931 931 in SImode, DImode and TImode */
932 932 {12, 12, 12}, /* cost of storing SSE registers
933 933 in SImode, DImode and TImode */
934 934 8, /* MMX or SSE register to integer */
935 935 8, /* size of l1 cache. */
936 936 1024, /* size of l2 cache. */
937 937 128, /* size of prefetch block */
938 938 8, /* number of parallel prefetches */
939 939 1, /* Branch cost */
940 940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
941 941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
942 942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
943 943 COSTS_N_INSNS (3), /* cost of FABS instruction. */
944 944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
945 945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
946 946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
947 947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
948 948 {100000, unrolled_loop}, {-1, libcall}}}},
949 949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
950 950 {-1, libcall}}},
951 951 {libcall, {{24, loop}, {64, unrolled_loop},
952 952 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
953 953 1, /* scalar_stmt_cost. */
954 954 1, /* scalar load_cost. */
955 955 1, /* scalar_store_cost. */
956 956 1, /* vec_stmt_cost. */
957 957 1, /* vec_to_scalar_cost. */
958 958 1, /* scalar_to_vec_cost. */
959 959 1, /* vec_align_load_cost. */
960 960 2, /* vec_unalign_load_cost. */
961 961 1, /* vec_store_cost. */
962 962 3, /* cond_taken_branch_cost. */
963 963 1, /* cond_not_taken_branch_cost. */
964 964 };
965 965
966 966 static const
967 967 struct processor_costs core2_cost = {
968 968 COSTS_N_INSNS (1), /* cost of an add instruction */
969 969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
970 970 COSTS_N_INSNS (1), /* variable shift costs */
971 971 COSTS_N_INSNS (1), /* constant shift costs */
972 972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
973 973 COSTS_N_INSNS (3), /* HI */
974 974 COSTS_N_INSNS (3), /* SI */
975 975 COSTS_N_INSNS (3), /* DI */
976 976 COSTS_N_INSNS (3)}, /* other */
977 977 0, /* cost of multiply per each bit set */
978 978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
979 979 COSTS_N_INSNS (22), /* HI */
980 980 COSTS_N_INSNS (22), /* SI */
981 981 COSTS_N_INSNS (22), /* DI */
982 982 COSTS_N_INSNS (22)}, /* other */
983 983 COSTS_N_INSNS (1), /* cost of movsx */
984 984 COSTS_N_INSNS (1), /* cost of movzx */
985 985 8, /* "large" insn */
986 986 16, /* MOVE_RATIO */
987 987 2, /* cost for loading QImode using movzbl */
988 988 {6, 6, 6}, /* cost of loading integer registers
989 989 in QImode, HImode and SImode.
990 990 Relative to reg-reg move (2). */
991 991 {4, 4, 4}, /* cost of storing integer registers */
992 992 2, /* cost of reg,reg fld/fst */
993 993 {6, 6, 6}, /* cost of loading fp registers
994 994 in SFmode, DFmode and XFmode */
995 995 {4, 4, 4}, /* cost of storing fp registers
996 996 in SFmode, DFmode and XFmode */
997 997 2, /* cost of moving MMX register */
998 998 {6, 6}, /* cost of loading MMX registers
999 999 in SImode and DImode */
1000 1000 {4, 4}, /* cost of storing MMX registers
1001 1001 in SImode and DImode */
1002 1002 2, /* cost of moving SSE register */
1003 1003 {6, 6, 6}, /* cost of loading SSE registers
1004 1004 in SImode, DImode and TImode */
1005 1005 {4, 4, 4}, /* cost of storing SSE registers
1006 1006 in SImode, DImode and TImode */
1007 1007 2, /* MMX or SSE register to integer */
1008 1008 32, /* size of l1 cache. */
1009 1009 2048, /* size of l2 cache. */
1010 1010 128, /* size of prefetch block */
1011 1011 8, /* number of parallel prefetches */
1012 1012 3, /* Branch cost */
1013 1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
1014 1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
1015 1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
1016 1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */
1017 1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
1018 1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
1019 1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
1020 1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
1021 1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1022 1022 {{libcall, {{8, loop}, {15, unrolled_loop},
1023 1023 {2048, rep_prefix_4_byte}, {-1, libcall}}},
1024 1024 {libcall, {{24, loop}, {32, unrolled_loop},
1025 1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1026 1026 1, /* scalar_stmt_cost. */
1027 1027 1, /* scalar load_cost. */
1028 1028 1, /* scalar_store_cost. */
1029 1029 1, /* vec_stmt_cost. */
1030 1030 1, /* vec_to_scalar_cost. */
1031 1031 1, /* scalar_to_vec_cost. */
1032 1032 1, /* vec_align_load_cost. */
1033 1033 2, /* vec_unalign_load_cost. */
1034 1034 1, /* vec_store_cost. */
1035 1035 3, /* cond_taken_branch_cost. */
1036 1036 1, /* cond_not_taken_branch_cost. */
1037 1037 };
1038 1038
1039 1039 /* Generic64 should produce code tuned for Nocona and K8. */
1040 1040 static const
1041 1041 struct processor_costs generic64_cost = {
1042 1042 COSTS_N_INSNS (1), /* cost of an add instruction */
1043 1043 /* On all chips taken into consideration lea is 2 cycles and more. With
1044 1044 this cost however our current implementation of synth_mult results in
1045 1045 use of unnecessary temporary registers causing regression on several
1046 1046 SPECfp benchmarks. */
1047 1047 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1048 1048 COSTS_N_INSNS (1), /* variable shift costs */
1049 1049 COSTS_N_INSNS (1), /* constant shift costs */
1050 1050 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1051 1051 COSTS_N_INSNS (4), /* HI */
1052 1052 COSTS_N_INSNS (3), /* SI */
1053 1053 COSTS_N_INSNS (4), /* DI */
1054 1054 COSTS_N_INSNS (2)}, /* other */
1055 1055 0, /* cost of multiply per each bit set */
1056 1056 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1057 1057 COSTS_N_INSNS (26), /* HI */
1058 1058 COSTS_N_INSNS (42), /* SI */
1059 1059 COSTS_N_INSNS (74), /* DI */
1060 1060 COSTS_N_INSNS (74)}, /* other */
1061 1061 COSTS_N_INSNS (1), /* cost of movsx */
1062 1062 COSTS_N_INSNS (1), /* cost of movzx */
1063 1063 8, /* "large" insn */
1064 1064 17, /* MOVE_RATIO */
1065 1065 4, /* cost for loading QImode using movzbl */
1066 1066 {4, 4, 4}, /* cost of loading integer registers
1067 1067 in QImode, HImode and SImode.
1068 1068 Relative to reg-reg move (2). */
1069 1069 {4, 4, 4}, /* cost of storing integer registers */
1070 1070 4, /* cost of reg,reg fld/fst */
1071 1071 {12, 12, 12}, /* cost of loading fp registers
1072 1072 in SFmode, DFmode and XFmode */
1073 1073 {6, 6, 8}, /* cost of storing fp registers
1074 1074 in SFmode, DFmode and XFmode */
1075 1075 2, /* cost of moving MMX register */
1076 1076 {8, 8}, /* cost of loading MMX registers
1077 1077 in SImode and DImode */
1078 1078 {8, 8}, /* cost of storing MMX registers
1079 1079 in SImode and DImode */
1080 1080 2, /* cost of moving SSE register */
1081 1081 {8, 8, 8}, /* cost of loading SSE registers
1082 1082 in SImode, DImode and TImode */
1083 1083 {8, 8, 8}, /* cost of storing SSE registers
1084 1084 in SImode, DImode and TImode */
1085 1085 5, /* MMX or SSE register to integer */
1086 1086 32, /* size of l1 cache. */
1087 1087 512, /* size of l2 cache. */
1088 1088 64, /* size of prefetch block */
1089 1089 6, /* number of parallel prefetches */
1090 1090 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
1091 1091 is increased to perhaps more appropriate value of 5. */
1092 1092 3, /* Branch cost */
1093 1093 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1094 1094 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1095 1095 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1096 1096 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1097 1097 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1098 1098 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1099 1099 {DUMMY_STRINGOP_ALGS,
1100 1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1101 1101 {DUMMY_STRINGOP_ALGS,
1102 1102 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
1103 1103 1, /* scalar_stmt_cost. */
1104 1104 1, /* scalar load_cost. */
1105 1105 1, /* scalar_store_cost. */
1106 1106 1, /* vec_stmt_cost. */
1107 1107 1, /* vec_to_scalar_cost. */
1108 1108 1, /* scalar_to_vec_cost. */
1109 1109 1, /* vec_align_load_cost. */
1110 1110 2, /* vec_unalign_load_cost. */
1111 1111 1, /* vec_store_cost. */
1112 1112 3, /* cond_taken_branch_cost. */
1113 1113 1, /* cond_not_taken_branch_cost. */
1114 1114 };
1115 1115
1116 1116 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
1117 1117 static const
1118 1118 struct processor_costs generic32_cost = {
1119 1119 COSTS_N_INSNS (1), /* cost of an add instruction */
1120 1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
1121 1121 COSTS_N_INSNS (1), /* variable shift costs */
1122 1122 COSTS_N_INSNS (1), /* constant shift costs */
1123 1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
1124 1124 COSTS_N_INSNS (4), /* HI */
1125 1125 COSTS_N_INSNS (3), /* SI */
1126 1126 COSTS_N_INSNS (4), /* DI */
1127 1127 COSTS_N_INSNS (2)}, /* other */
1128 1128 0, /* cost of multiply per each bit set */
1129 1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
1130 1130 COSTS_N_INSNS (26), /* HI */
1131 1131 COSTS_N_INSNS (42), /* SI */
1132 1132 COSTS_N_INSNS (74), /* DI */
1133 1133 COSTS_N_INSNS (74)}, /* other */
1134 1134 COSTS_N_INSNS (1), /* cost of movsx */
1135 1135 COSTS_N_INSNS (1), /* cost of movzx */
1136 1136 8, /* "large" insn */
1137 1137 17, /* MOVE_RATIO */
1138 1138 4, /* cost for loading QImode using movzbl */
1139 1139 {4, 4, 4}, /* cost of loading integer registers
1140 1140 in QImode, HImode and SImode.
1141 1141 Relative to reg-reg move (2). */
1142 1142 {4, 4, 4}, /* cost of storing integer registers */
1143 1143 4, /* cost of reg,reg fld/fst */
1144 1144 {12, 12, 12}, /* cost of loading fp registers
1145 1145 in SFmode, DFmode and XFmode */
1146 1146 {6, 6, 8}, /* cost of storing fp registers
1147 1147 in SFmode, DFmode and XFmode */
1148 1148 2, /* cost of moving MMX register */
1149 1149 {8, 8}, /* cost of loading MMX registers
1150 1150 in SImode and DImode */
1151 1151 {8, 8}, /* cost of storing MMX registers
1152 1152 in SImode and DImode */
1153 1153 2, /* cost of moving SSE register */
1154 1154 {8, 8, 8}, /* cost of loading SSE registers
1155 1155 in SImode, DImode and TImode */
1156 1156 {8, 8, 8}, /* cost of storing SSE registers
1157 1157 in SImode, DImode and TImode */
1158 1158 5, /* MMX or SSE register to integer */
1159 1159 32, /* size of l1 cache. */
1160 1160 256, /* size of l2 cache. */
1161 1161 64, /* size of prefetch block */
1162 1162 6, /* number of parallel prefetches */
1163 1163 3, /* Branch cost */
1164 1164 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1165 1165 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1166 1166 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1167 1167 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1168 1168 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1169 1169 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1170 1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1171 1171 DUMMY_STRINGOP_ALGS},
1172 1172 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1173 1173 DUMMY_STRINGOP_ALGS},
1174 1174 1, /* scalar_stmt_cost. */
1175 1175 1, /* scalar load_cost. */
1176 1176 1, /* scalar_store_cost. */
1177 1177 1, /* vec_stmt_cost. */
1178 1178 1, /* vec_to_scalar_cost. */
1179 1179 1, /* scalar_to_vec_cost. */
1180 1180 1, /* vec_align_load_cost. */
1181 1181 2, /* vec_unalign_load_cost. */
1182 1182 1, /* vec_store_cost. */
1183 1183 3, /* cond_taken_branch_cost. */
1184 1184 1, /* cond_not_taken_branch_cost. */
1185 1185 };
1186 1186
1187 1187 const struct processor_costs *ix86_cost = &pentium_cost;
1188 1188
1189 1189 /* Processor feature/optimization bitmasks. */
1190 1190 #define m_386 (1<<PROCESSOR_I386)
1191 1191 #define m_486 (1<<PROCESSOR_I486)
1192 1192 #define m_PENT (1<<PROCESSOR_PENTIUM)
1193 1193 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1194 1194 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1195 1195 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1196 1196 #define m_CORE2 (1<<PROCESSOR_CORE2)
1197 1197
1198 1198 #define m_GEODE (1<<PROCESSOR_GEODE)
1199 1199 #define m_K6 (1<<PROCESSOR_K6)
1200 1200 #define m_K6_GEODE (m_K6 | m_GEODE)
1201 1201 #define m_K8 (1<<PROCESSOR_K8)
1202 1202 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1203 1203 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1204 1204 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1205 1205 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10)
1206 1206
1207 1207 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1208 1208 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1209 1209
1210 1210 /* Generic instruction choice should be common subset of supported CPUs
1211 1211 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1212 1212 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1213 1213
1214 1214 /* Feature tests against the various tunings. */
1215 1215 unsigned char ix86_tune_features[X86_TUNE_LAST];
1216 1216
1217 1217 /* Feature tests against the various tunings used to create ix86_tune_features
1218 1218 based on the processor mask. */
1219 1219 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
1220 1220 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1221 1221 negatively, so enabling for Generic64 seems like good code size
1222 1222 tradeoff. We can't enable it for 32bit generic because it does not
1223 1223 work well with PPro base chips. */
1224 1224 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64,
1225 1225
1226 1226 /* X86_TUNE_PUSH_MEMORY */
1227 1227 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4
1228 1228 | m_NOCONA | m_CORE2 | m_GENERIC,
1229 1229
1230 1230 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1231 1231 m_486 | m_PENT,
1232 1232
1233 1233 /* X86_TUNE_UNROLL_STRLEN */
1234 1234 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC,
1235 1235
1236 1236 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1237 1237 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC,
1238 1238
1239 1239 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1240 1240 on simulation result. But after P4 was made, no performance benefit
1241 1241 was observed with branch hints. It also increases the code size.
1242 1242 As a result, icc never generates branch hints. */
1243 1243 0,
1244 1244
1245 1245 /* X86_TUNE_DOUBLE_WITH_ADD */
1246 1246 ~m_386,
1247 1247
1248 1248 /* X86_TUNE_USE_SAHF */
1249 1249 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1250 1250 | m_NOCONA | m_CORE2 | m_GENERIC,
1251 1251
1252 1252 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1253 1253 partial dependencies. */
1254 1254 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA
1255 1255 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1256 1256
1257 1257 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1258 1258 register stalls on Generic32 compilation setting as well. However
1259 1259 in current implementation the partial register stalls are not eliminated
1260 1260 very well - they can be introduced via subregs synthesized by combine
1261 1261 and can happen in caller/callee saving sequences. Because this option
1262 1262 pays back little on PPro based chips and is in conflict with partial reg
1263 1263 dependencies used by Athlon/P4 based chips, it is better to leave it off
1264 1264 for generic32 for now. */
1265 1265 m_PPRO,
1266 1266
1267 1267 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1268 1268 m_CORE2 | m_GENERIC,
1269 1269
1270 1270 /* X86_TUNE_USE_HIMODE_FIOP */
1271 1271 m_386 | m_486 | m_K6_GEODE,
1272 1272
1273 1273 /* X86_TUNE_USE_SIMODE_FIOP */
1274 1274 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC),
1275 1275
1276 1276 /* X86_TUNE_USE_MOV0 */
1277 1277 m_K6,
1278 1278
1279 1279 /* X86_TUNE_USE_CLTD */
1280 1280 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1281 1281
1282 1282 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1283 1283 m_PENT4,
1284 1284
1285 1285 /* X86_TUNE_SPLIT_LONG_MOVES */
1286 1286 m_PPRO,
1287 1287
1288 1288 /* X86_TUNE_READ_MODIFY_WRITE */
1289 1289 ~m_PENT,
1290 1290
1291 1291 /* X86_TUNE_READ_MODIFY */
1292 1292 ~(m_PENT | m_PPRO),
1293 1293
1294 1294 /* X86_TUNE_PROMOTE_QIMODE */
1295 1295 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2
1296 1296 | m_GENERIC /* | m_PENT4 ? */,
1297 1297
1298 1298 /* X86_TUNE_FAST_PREFIX */
1299 1299 ~(m_PENT | m_486 | m_386),
1300 1300
1301 1301 /* X86_TUNE_SINGLE_STRINGOP */
1302 1302 m_386 | m_PENT4 | m_NOCONA,
1303 1303
1304 1304 /* X86_TUNE_QIMODE_MATH */
1305 1305 ~0,
1306 1306
1307 1307 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1308 1308 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1309 1309 might be considered for Generic32 if our scheme for avoiding partial
1310 1310 stalls was more effective. */
1311 1311 ~m_PPRO,
1312 1312
1313 1313 /* X86_TUNE_PROMOTE_QI_REGS */
1314 1314 0,
1315 1315
1316 1316 /* X86_TUNE_PROMOTE_HI_REGS */
1317 1317 m_PPRO,
1318 1318
1319 1319 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1320 1320 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1321 1321
1322 1322 /* X86_TUNE_ADD_ESP_8 */
1323 1323 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386
1324 1324 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1325 1325
1326 1326 /* X86_TUNE_SUB_ESP_4 */
1327 1327 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1328 1328
1329 1329 /* X86_TUNE_SUB_ESP_8 */
1330 1330 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486
1331 1331 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1332 1332
1333 1333 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1334 1334 for DFmode copies */
1335 1335 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1336 1336 | m_GENERIC | m_GEODE),
1337 1337
1338 1338 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1339 1339 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1340 1340
1341 1341 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1342 1342 conflict here in between PPro/Pentium4 based chips that thread 128bit
1343 1343 SSE registers as single units versus K8 based chips that divide SSE
1344 1344 registers to two 64bit halves. This knob promotes all store destinations
1345 1345 to be 128bit to allow register renaming on 128bit SSE units, but usually
1346 1346 results in one extra microop on 64bit SSE units. Experimental results
1347 1347 shows that disabling this option on P4 brings over 20% SPECfp regression,
1348 1348 while enabling it on K8 brings roughly 2.4% regression that can be partly
1349 1349 masked by careful scheduling of moves. */
1350 1350 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1351 1351
1352 1352 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1353 1353 m_AMDFAM10,
1354 1354
1355 1355 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1356 1356 are resolved on SSE register parts instead of whole registers, so we may
1357 1357 maintain just lower part of scalar values in proper format leaving the
1358 1358 upper part undefined. */
1359 1359 m_ATHLON_K8,
1360 1360
1361 1361 /* X86_TUNE_SSE_TYPELESS_STORES */
1362 1362 m_AMD_MULTIPLE,
1363 1363
1364 1364 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1365 1365 m_PPRO | m_PENT4 | m_NOCONA,
1366 1366
1367 1367 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1368 1368 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1369 1369
1370 1370 /* X86_TUNE_PROLOGUE_USING_MOVE */
1371 1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1372 1372
1373 1373 /* X86_TUNE_EPILOGUE_USING_MOVE */
1374 1374 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1375 1375
1376 1376 /* X86_TUNE_SHIFT1 */
1377 1377 ~m_486,
1378 1378
1379 1379 /* X86_TUNE_USE_FFREEP */
1380 1380 m_AMD_MULTIPLE,
1381 1381
1382 1382 /* X86_TUNE_INTER_UNIT_MOVES */
1383 1383 ~(m_AMD_MULTIPLE | m_GENERIC),
1384 1384
1385 1385 /* X86_TUNE_INTER_UNIT_CONVERSIONS */
1386 1386 ~(m_AMDFAM10),
1387 1387
1388 1388 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1389 1389 than 4 branch instructions in the 16 byte window. */
1390 1390 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1391 1391
1392 1392 /* X86_TUNE_SCHEDULE */
1393 1393 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1394 1394
1395 1395 /* X86_TUNE_USE_BT */
1396 1396 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1397 1397
1398 1398 /* X86_TUNE_USE_INCDEC */
1399 1399 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1400 1400
1401 1401 /* X86_TUNE_PAD_RETURNS */
1402 1402 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC,
1403 1403
1404 1404 /* X86_TUNE_EXT_80387_CONSTANTS */
1405 1405 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1406 1406
1407 1407 /* X86_TUNE_SHORTEN_X87_SSE */
1408 1408 ~m_K8,
1409 1409
1410 1410 /* X86_TUNE_AVOID_VECTOR_DECODE */
1411 1411 m_K8 | m_GENERIC64,
1412 1412
1413 1413 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1414 1414 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1415 1415 ~(m_386 | m_486),
1416 1416
1417 1417 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1418 1418 vector path on AMD machines. */
1419 1419 m_K8 | m_GENERIC64 | m_AMDFAM10,
1420 1420
1421 1421 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1422 1422 machines. */
1423 1423 m_K8 | m_GENERIC64 | m_AMDFAM10,
1424 1424
1425 1425 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1426 1426 than a MOV. */
1427 1427 m_PENT,
1428 1428
1429 1429 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1430 1430 but one byte longer. */
1431 1431 m_PENT,
1432 1432
1433 1433 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1434 1434 operand that cannot be represented using a modRM byte. The XOR
1435 1435 replacement is long decoded, so this split helps here as well. */
1436 1436 m_K6,
1437 1437
1438 1438 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion
1439 1439 from FP to FP. */
1440 1440 m_AMDFAM10 | m_GENERIC,
1441 1441
1442 1442 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion
1443 1443 from integer to FP. */
1444 1444 m_AMDFAM10,
1445 1445
1446 1446 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction
1447 1447 with a subsequent conditional jump instruction into a single
1448 1448 compare-and-branch uop. */
1449 1449 m_CORE2,
1450 1450 };
1451 1451
1452 1452 /* Feature tests against the various architecture variations. */
1453 1453 unsigned char ix86_arch_features[X86_ARCH_LAST];
1454 1454
1455 1455 /* Feature tests against the various architecture variations, used to create
1456 1456 ix86_arch_features based on the processor mask. */
1457 1457 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
1458 1458 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1459 1459 ~(m_386 | m_486 | m_PENT | m_K6),
1460 1460
1461 1461 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1462 1462 ~m_386,
1463 1463
1464 1464 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1465 1465 ~(m_386 | m_486),
1466 1466
1467 1467 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1468 1468 ~m_386,
1469 1469
1470 1470 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1471 1471 ~m_386,
1472 1472 };
1473 1473
1474 1474 static const unsigned int x86_accumulate_outgoing_args
1475 1475 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1476 1476
1477 1477 static const unsigned int x86_arch_always_fancy_math_387
1478 1478 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4
1479 1479 | m_NOCONA | m_CORE2 | m_GENERIC;
1480 1480
1481 1481 static enum stringop_alg stringop_alg = no_stringop;
1482 1482
1483 1483 /* In case the average insn count for single function invocation is
1484 1484 lower than this constant, emit fast (but longer) prologue and
1485 1485 epilogue code. */
1486 1486 #define FAST_PROLOGUE_INSN_COUNT 20
1487 1487
1488 1488 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1489 1489 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1490 1490 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1491 1491 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1492 1492
1493 1493 /* Array of the smallest class containing reg number REGNO, indexed by
1494 1494 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1495 1495
1496 1496 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1497 1497 {
1498 1498 /* ax, dx, cx, bx */
1499 1499 AREG, DREG, CREG, BREG,
1500 1500 /* si, di, bp, sp */
1501 1501 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1502 1502 /* FP registers */
1503 1503 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1504 1504 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1505 1505 /* arg pointer */
1506 1506 NON_Q_REGS,
1507 1507 /* flags, fpsr, fpcr, frame */
1508 1508 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1509 1509 /* SSE registers */
1510 1510 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1511 1511 SSE_REGS, SSE_REGS,
1512 1512 /* MMX registers */
1513 1513 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1514 1514 MMX_REGS, MMX_REGS,
1515 1515 /* REX registers */
1516 1516 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1517 1517 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1518 1518 /* SSE REX registers */
1519 1519 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1520 1520 SSE_REGS, SSE_REGS,
1521 1521 };
1522 1522
1523 1523 /* The "default" register map used in 32bit mode. */
1524 1524
1525 1525 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1526 1526 {
1527 1527 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1528 1528 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1529 1529 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1530 1530 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1531 1531 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1532 1532 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1533 1533 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1534 1534 };
1535 1535
1536 1536 /* The "default" register map used in 64bit mode. */
1537 1537
1538 1538 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1539 1539 {
1540 1540 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1541 1541 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1542 1542 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1543 1543 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1544 1544 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1545 1545 8,9,10,11,12,13,14,15, /* extended integer registers */
1546 1546 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1547 1547 };
1548 1548
1549 1549 /* Define the register numbers to be used in Dwarf debugging information.
1550 1550 The SVR4 reference port C compiler uses the following register numbers
1551 1551 in its Dwarf output code:
1552 1552 0 for %eax (gcc regno = 0)
1553 1553 1 for %ecx (gcc regno = 2)
1554 1554 2 for %edx (gcc regno = 1)
1555 1555 3 for %ebx (gcc regno = 3)
1556 1556 4 for %esp (gcc regno = 7)
1557 1557 5 for %ebp (gcc regno = 6)
1558 1558 6 for %esi (gcc regno = 4)
1559 1559 7 for %edi (gcc regno = 5)
1560 1560 The following three DWARF register numbers are never generated by
1561 1561 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1562 1562 believes these numbers have these meanings.
1563 1563 8 for %eip (no gcc equivalent)
1564 1564 9 for %eflags (gcc regno = 17)
1565 1565 10 for %trapno (no gcc equivalent)
1566 1566 It is not at all clear how we should number the FP stack registers
1567 1567 for the x86 architecture. If the version of SDB on x86/svr4 were
1568 1568 a bit less brain dead with respect to floating-point then we would
1569 1569 have a precedent to follow with respect to DWARF register numbers
1570 1570 for x86 FP registers, but the SDB on x86/svr4 is so completely
1571 1571 broken with respect to FP registers that it is hardly worth thinking
1572 1572 of it as something to strive for compatibility with.
1573 1573 The version of x86/svr4 SDB I have at the moment does (partially)
1574 1574 seem to believe that DWARF register number 11 is associated with
1575 1575 the x86 register %st(0), but that's about all. Higher DWARF
1576 1576 register numbers don't seem to be associated with anything in
1577 1577 particular, and even for DWARF regno 11, SDB only seems to under-
1578 1578 stand that it should say that a variable lives in %st(0) (when
1579 1579 asked via an `=' command) if we said it was in DWARF regno 11,
1580 1580 but SDB still prints garbage when asked for the value of the
1581 1581 variable in question (via a `/' command).
1582 1582 (Also note that the labels SDB prints for various FP stack regs
1583 1583 when doing an `x' command are all wrong.)
1584 1584 Note that these problems generally don't affect the native SVR4
1585 1585 C compiler because it doesn't allow the use of -O with -g and
1586 1586 because when it is *not* optimizing, it allocates a memory
1587 1587 location for each floating-point variable, and the memory
1588 1588 location is what gets described in the DWARF AT_location
1589 1589 attribute for the variable in question.
1590 1590 Regardless of the severe mental illness of the x86/svr4 SDB, we
1591 1591 do something sensible here and we use the following DWARF
1592 1592 register numbers. Note that these are all stack-top-relative
1593 1593 numbers.
1594 1594 11 for %st(0) (gcc regno = 8)
1595 1595 12 for %st(1) (gcc regno = 9)
1596 1596 13 for %st(2) (gcc regno = 10)
1597 1597 14 for %st(3) (gcc regno = 11)
1598 1598 15 for %st(4) (gcc regno = 12)
1599 1599 16 for %st(5) (gcc regno = 13)
1600 1600 17 for %st(6) (gcc regno = 14)
1601 1601 18 for %st(7) (gcc regno = 15)
1602 1602 */
1603 1603 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1604 1604 {
1605 1605 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1606 1606 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1607 1607 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1608 1608 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1609 1609 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1610 1610 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1611 1611 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1612 1612 };
1613 1613
1614 1614 /* Test and compare insns in i386.md store the information needed to
1615 1615 generate branch and scc insns here. */
1616 1616
1617 1617 rtx ix86_compare_op0 = NULL_RTX;
1618 1618 rtx ix86_compare_op1 = NULL_RTX;
1619 1619 rtx ix86_compare_emitted = NULL_RTX;
1620 1620
1621 1621 /* Define parameter passing and return registers. */
1622 1622
1623 1623 static int const x86_64_int_parameter_registers[6] =
1624 1624 {
1625 1625 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG
1626 1626 };
1627 1627
1628 1628 static int const x86_64_ms_abi_int_parameter_registers[4] =
1629 1629 {
1630 1630 CX_REG, DX_REG, R8_REG, R9_REG
1631 1631 };
1632 1632
1633 1633 static int const x86_64_int_return_registers[4] =
1634 1634 {
1635 1635 AX_REG, DX_REG, DI_REG, SI_REG
1636 1636 };
1637 1637
1638 1638 /* Define the structure for the machine field in struct function. */
1639 1639
1640 1640 struct stack_local_entry GTY(())
1641 1641 {
1642 1642 unsigned short mode;
1643 1643 unsigned short n;
1644 1644 rtx rtl;
1645 1645 struct stack_local_entry *next;
1646 1646 };
1647 1647
1648 1648 /* Structure describing stack frame layout.
1649 1649 Stack grows downward:
1650 1650
1651 1651 [arguments]
1652 1652 <- ARG_POINTER
1653 1653 saved pc
1654 1654
1655 1655 saved frame pointer if frame_pointer_needed
1656 1656 <- HARD_FRAME_POINTER
1657 1657 [-msave-args]
1658 1658
1659 1659 [padding0]
1660 1660
1661 1661 [saved regs]
1662 1662
1663 1663 [padding05]
1664 1664
1665 1665 [saved SSE regs]
1666 1666
1667 1667 [padding1] \
1668 1668 )
1669 1669 [va_arg registers] (
1670 1670 > to_allocate <- FRAME_POINTER
1671 1671 [frame] (
1672 1672 )
1673 1673 [padding2] /
1674 1674 */
1675 1675 struct ix86_frame
1676 1676 {
1677 1677 int nmsave_args;
1678 1678 int padding0;
1679 1679 int nsseregs;
1680 1680 int padding05;
1681 1681 int nregs;
1682 1682 int padding1;
1683 1683 int va_arg_size;
1684 1684 HOST_WIDE_INT frame;
1685 1685 int padding2;
1686 1686 int outgoing_arguments_size;
1687 1687 int red_zone_size;
1688 1688
1689 1689 HOST_WIDE_INT to_allocate;
1690 1690 /* The offsets relative to ARG_POINTER. */
1691 1691 HOST_WIDE_INT frame_pointer_offset;
1692 1692 HOST_WIDE_INT hard_frame_pointer_offset;
1693 1693 HOST_WIDE_INT stack_pointer_offset;
1694 1694
1695 1695 /* When save_regs_using_mov is set, emit prologue using
1696 1696 move instead of push instructions. */
1697 1697 bool save_regs_using_mov;
1698 1698 };
1699 1699
1700 1700 /* Code model option. */
1701 1701 enum cmodel ix86_cmodel;
1702 1702 /* Asm dialect. */
1703 1703 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1704 1704 /* TLS dialects. */
1705 1705 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1706 1706
1707 1707 /* Which unit we are generating floating point math for. */
1708 1708 enum fpmath_unit ix86_fpmath;
1709 1709
1710 1710 /* Which cpu are we scheduling for. */
1711 1711 enum attr_cpu ix86_schedule;
1712 1712
1713 1713 /* Which cpu are we optimizing for. */
1714 1714 enum processor_type ix86_tune;
1715 1715
1716 1716 /* Which instruction set architecture to use. */
1717 1717 enum processor_type ix86_arch;
1718 1718
1719 1719 /* true if sse prefetch instruction is not NOOP. */
1720 1720 int x86_prefetch_sse;
1721 1721
1722 1722 /* ix86_regparm_string as a number */
1723 1723 static int ix86_regparm;
1724 1724
1725 1725 /* -mstackrealign option */
1726 1726 extern int ix86_force_align_arg_pointer;
1727 1727 static const char ix86_force_align_arg_pointer_string[]
1728 1728 = "force_align_arg_pointer";
1729 1729
1730 1730 static rtx (*ix86_gen_leave) (void);
1731 1731 static rtx (*ix86_gen_pop1) (rtx);
1732 1732 static rtx (*ix86_gen_add3) (rtx, rtx, rtx);
1733 1733 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx);
1734 1734 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx);
1735 1735 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx);
1736 1736 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx);
1737 1737 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx);
1738 1738
1739 1739 /* Preferred alignment for stack boundary in bits. */
1740 1740 unsigned int ix86_preferred_stack_boundary;
1741 1741
1742 1742 /* Alignment for incoming stack boundary in bits specified at
1743 1743 command line. */
1744 1744 static unsigned int ix86_user_incoming_stack_boundary;
1745 1745
1746 1746 /* Default alignment for incoming stack boundary in bits. */
1747 1747 static unsigned int ix86_default_incoming_stack_boundary;
1748 1748
1749 1749 /* Alignment for incoming stack boundary in bits. */
1750 1750 unsigned int ix86_incoming_stack_boundary;
1751 1751
1752 1752 /* Values 1-5: see jump.c */
1753 1753 int ix86_branch_cost;
1754 1754
1755 1755 /* Calling abi specific va_list type nodes. */
1756 1756 static GTY(()) tree sysv_va_list_type_node;
1757 1757 static GTY(()) tree ms_va_list_type_node;
1758 1758
1759 1759 /* Variables which are this size or smaller are put in the data/bss
1760 1760 or ldata/lbss sections. */
1761 1761
1762 1762 int ix86_section_threshold = 65536;
1763 1763
1764 1764 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1765 1765 char internal_label_prefix[16];
1766 1766 int internal_label_prefix_len;
1767 1767
1768 1768 /* Fence to use after loop using movnt. */
1769 1769 tree x86_mfence;
1770 1770
1771 1771 static int ix86_nsaved_args (void);
1772 1772
1773 1773 /* Register class used for passing given 64bit part of the argument.
1774 1774 These represent classes as documented by the PS ABI, with the exception
1775 1775 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1776 1776 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1777 1777
1778 1778 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1779 1779 whenever possible (upper half does contain padding). */
1780 1780 enum x86_64_reg_class
1781 1781 {
1782 1782 X86_64_NO_CLASS,
1783 1783 X86_64_INTEGER_CLASS,
1784 1784 X86_64_INTEGERSI_CLASS,
1785 1785 X86_64_SSE_CLASS,
1786 1786 X86_64_SSESF_CLASS,
1787 1787 X86_64_SSEDF_CLASS,
1788 1788 X86_64_SSEUP_CLASS,
1789 1789 X86_64_X87_CLASS,
1790 1790 X86_64_X87UP_CLASS,
1791 1791 X86_64_COMPLEX_X87_CLASS,
1792 1792 X86_64_MEMORY_CLASS
1793 1793 };
1794 1794
1795 1795 #define MAX_CLASSES 4
1796 1796
1797 1797 /* Table of constants used by fldpi, fldln2, etc.... */
1798 1798 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1799 1799 static bool ext_80387_constants_init = 0;
1800 1800
1801 1801
1802 1802 static struct machine_function * ix86_init_machine_status (void);
1803 1803 static rtx ix86_function_value (const_tree, const_tree, bool);
1804 1804 static int ix86_function_regparm (const_tree, const_tree);
1805 1805 static void ix86_compute_frame_layout (struct ix86_frame *);
1806 1806 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1807 1807 rtx, rtx, int);
1808 1808 static void ix86_add_new_builtins (int);
1809 1809
1810 1810 enum ix86_function_specific_strings
1811 1811 {
1812 1812 IX86_FUNCTION_SPECIFIC_ARCH,
1813 1813 IX86_FUNCTION_SPECIFIC_TUNE,
1814 1814 IX86_FUNCTION_SPECIFIC_FPMATH,
1815 1815 IX86_FUNCTION_SPECIFIC_MAX
1816 1816 };
1817 1817
1818 1818 static char *ix86_target_string (int, int, const char *, const char *,
1819 1819 const char *, bool);
1820 1820 static void ix86_debug_options (void) ATTRIBUTE_UNUSED;
1821 1821 static void ix86_function_specific_save (struct cl_target_option *);
1822 1822 static void ix86_function_specific_restore (struct cl_target_option *);
1823 1823 static void ix86_function_specific_print (FILE *, int,
1824 1824 struct cl_target_option *);
1825 1825 static bool ix86_valid_target_attribute_p (tree, tree, tree, int);
1826 1826 static bool ix86_valid_target_attribute_inner_p (tree, char *[]);
1827 1827 static bool ix86_can_inline_p (tree, tree);
1828 1828 static void ix86_set_current_function (tree);
1829 1829
1830 1830 static void pro_epilogue_adjust_stack (rtx, rtx, rtx, int);
1831 1831
1832 1832
1833 1833 /* The svr4 ABI for the i386 says that records and unions are returned
1834 1834 in memory. */
1835 1835 #ifndef DEFAULT_PCC_STRUCT_RETURN
1836 1836 #define DEFAULT_PCC_STRUCT_RETURN 1
1837 1837 #endif
1838 1838
1839 1839 /* Whether -mtune= or -march= were specified */
1840 1840 static int ix86_tune_defaulted;
1841 1841 static int ix86_arch_specified;
1842 1842
1843 1843 /* Bit flags that specify the ISA we are compiling for. */
1844 1844 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1845 1845
1846 1846 /* A mask of ix86_isa_flags that includes bit X if X
1847 1847 was set or cleared on the command line. */
1848 1848 static int ix86_isa_flags_explicit;
1849 1849
1850 1850 /* Define a set of ISAs which are available when a given ISA is
1851 1851 enabled. MMX and SSE ISAs are handled separately. */
1852 1852
1853 1853 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX
1854 1854 #define OPTION_MASK_ISA_3DNOW_SET \
1855 1855 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET)
1856 1856
1857 1857 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE
1858 1858 #define OPTION_MASK_ISA_SSE2_SET \
1859 1859 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET)
1860 1860 #define OPTION_MASK_ISA_SSE3_SET \
1861 1861 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET)
1862 1862 #define OPTION_MASK_ISA_SSSE3_SET \
1863 1863 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET)
1864 1864 #define OPTION_MASK_ISA_SSE4_1_SET \
1865 1865 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET)
1866 1866 #define OPTION_MASK_ISA_SSE4_2_SET \
1867 1867 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET)
1868 1868 #define OPTION_MASK_ISA_AVX_SET \
1869 1869 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET)
1870 1870 #define OPTION_MASK_ISA_FMA_SET \
1871 1871 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET)
1872 1872
1873 1873 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1874 1874 as -msse4.2. */
1875 1875 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET
1876 1876
1877 1877 #define OPTION_MASK_ISA_SSE4A_SET \
1878 1878 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET)
1879 1879 #define OPTION_MASK_ISA_SSE5_SET \
1880 1880 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET)
1881 1881
1882 1882 /* AES and PCLMUL need SSE2 because they use xmm registers */
1883 1883 #define OPTION_MASK_ISA_AES_SET \
1884 1884 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET)
1885 1885 #define OPTION_MASK_ISA_PCLMUL_SET \
1886 1886 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET)
1887 1887
1888 1888 #define OPTION_MASK_ISA_ABM_SET \
1889 1889 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT)
1890 1890 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT
1891 1891 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16
1892 1892 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF
1893 1893
1894 1894 /* Define a set of ISAs which aren't available when a given ISA is
1895 1895 disabled. MMX and SSE ISAs are handled separately. */
1896 1896
1897 1897 #define OPTION_MASK_ISA_MMX_UNSET \
1898 1898 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET)
1899 1899 #define OPTION_MASK_ISA_3DNOW_UNSET \
1900 1900 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET)
1901 1901 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A
1902 1902
1903 1903 #define OPTION_MASK_ISA_SSE_UNSET \
1904 1904 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET)
1905 1905 #define OPTION_MASK_ISA_SSE2_UNSET \
1906 1906 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET)
1907 1907 #define OPTION_MASK_ISA_SSE3_UNSET \
1908 1908 (OPTION_MASK_ISA_SSE3 \
1909 1909 | OPTION_MASK_ISA_SSSE3_UNSET \
1910 1910 | OPTION_MASK_ISA_SSE4A_UNSET )
1911 1911 #define OPTION_MASK_ISA_SSSE3_UNSET \
1912 1912 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET)
1913 1913 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1914 1914 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET)
1915 1915 #define OPTION_MASK_ISA_SSE4_2_UNSET \
1916 1916 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET )
1917 1917 #define OPTION_MASK_ISA_AVX_UNSET \
1918 1918 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET)
1919 1919 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA
1920 1920
1921 1921 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same
1922 1922 as -mno-sse4.1. */
1923 1923 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1924 1924
1925 1925 #define OPTION_MASK_ISA_SSE4A_UNSET \
1926 1926 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET)
1927 1927 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5
1928 1928 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES
1929 1929 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL
1930 1930 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM
1931 1931 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT
1932 1932 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16
1933 1933 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF
1934 1934
1935 1935 /* Vectorization library interface and handlers. */
1936 1936 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1937 1937 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree);
1938 1938 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1939 1939
1940 1940 /* Processor target table, indexed by processor number */
1941 1941 struct ptt
1942 1942 {
1943 1943 const struct processor_costs *cost; /* Processor costs */
1944 1944 const int align_loop; /* Default alignments. */
1945 1945 const int align_loop_max_skip;
1946 1946 const int align_jump;
1947 1947 const int align_jump_max_skip;
1948 1948 const int align_func;
1949 1949 };
1950 1950
1951 1951 static const struct ptt processor_target_table[PROCESSOR_max] =
1952 1952 {
1953 1953 {&i386_cost, 4, 3, 4, 3, 4},
1954 1954 {&i486_cost, 16, 15, 16, 15, 16},
1955 1955 {&pentium_cost, 16, 7, 16, 7, 16},
1956 1956 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1957 1957 {&geode_cost, 0, 0, 0, 0, 0},
1958 1958 {&k6_cost, 32, 7, 32, 7, 32},
1959 1959 {&athlon_cost, 16, 7, 16, 7, 16},
1960 1960 {&pentium4_cost, 0, 0, 0, 0, 0},
1961 1961 {&k8_cost, 16, 7, 16, 7, 16},
1962 1962 {&nocona_cost, 0, 0, 0, 0, 0},
1963 1963 {&core2_cost, 16, 10, 16, 10, 16},
1964 1964 {&generic32_cost, 16, 7, 16, 7, 16},
1965 1965 {&generic64_cost, 16, 10, 16, 10, 16},
1966 1966 {&amdfam10_cost, 32, 24, 32, 7, 32}
1967 1967 };
1968 1968
1969 1969 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
1970 1970 {
1971 1971 "generic",
1972 1972 "i386",
1973 1973 "i486",
1974 1974 "pentium",
1975 1975 "pentium-mmx",
1976 1976 "pentiumpro",
1977 1977 "pentium2",
1978 1978 "pentium3",
1979 1979 "pentium4",
1980 1980 "pentium-m",
1981 1981 "prescott",
1982 1982 "nocona",
1983 1983 "core2",
1984 1984 "geode",
1985 1985 "k6",
1986 1986 "k6-2",
1987 1987 "k6-3",
1988 1988 "athlon",
1989 1989 "athlon-4",
1990 1990 "k8",
1991 1991 "amdfam10"
1992 1992 };
1993 1993
1994 1994 /* Implement TARGET_HANDLE_OPTION. */
1995 1995
1996 1996 static bool
1997 1997 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1998 1998 {
1999 1999 switch (code)
2000 2000 {
2001 2001 case OPT_mmmx:
2002 2002 if (value)
2003 2003 {
2004 2004 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET;
2005 2005 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET;
2006 2006 }
2007 2007 else
2008 2008 {
2009 2009 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
2010 2010 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
2011 2011 }
2012 2012 return true;
2013 2013
2014 2014 case OPT_m3dnow:
2015 2015 if (value)
2016 2016 {
2017 2017 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET;
2018 2018 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET;
2019 2019 }
2020 2020 else
2021 2021 {
2022 2022 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
2023 2023 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
2024 2024 }
2025 2025 return true;
2026 2026
2027 2027 case OPT_m3dnowa:
2028 2028 return false;
2029 2029
2030 2030 case OPT_msse:
2031 2031 if (value)
2032 2032 {
2033 2033 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET;
2034 2034 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET;
2035 2035 }
2036 2036 else
2037 2037 {
2038 2038 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
2039 2039 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
2040 2040 }
2041 2041 return true;
2042 2042
2043 2043 case OPT_msse2:
2044 2044 if (value)
2045 2045 {
2046 2046 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET;
2047 2047 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET;
2048 2048 }
2049 2049 else
2050 2050 {
2051 2051 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
2052 2052 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
2053 2053 }
2054 2054 return true;
2055 2055
2056 2056 case OPT_msse3:
2057 2057 if (value)
2058 2058 {
2059 2059 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET;
2060 2060 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET;
2061 2061 }
2062 2062 else
2063 2063 {
2064 2064 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
2065 2065 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
2066 2066 }
2067 2067 return true;
2068 2068
2069 2069 case OPT_mssse3:
2070 2070 if (value)
2071 2071 {
2072 2072 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET;
2073 2073 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET;
2074 2074 }
2075 2075 else
2076 2076 {
2077 2077 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
2078 2078 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
2079 2079 }
2080 2080 return true;
2081 2081
2082 2082 case OPT_msse4_1:
2083 2083 if (value)
2084 2084 {
2085 2085 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET;
2086 2086 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET;
2087 2087 }
2088 2088 else
2089 2089 {
2090 2090 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
2091 2091 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
2092 2092 }
2093 2093 return true;
2094 2094
2095 2095 case OPT_msse4_2:
2096 2096 if (value)
2097 2097 {
2098 2098 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET;
2099 2099 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET;
2100 2100 }
2101 2101 else
2102 2102 {
2103 2103 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
2104 2104 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
2105 2105 }
2106 2106 return true;
2107 2107
2108 2108 case OPT_mavx:
2109 2109 if (value)
2110 2110 {
2111 2111 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET;
2112 2112 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET;
2113 2113 }
2114 2114 else
2115 2115 {
2116 2116 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET;
2117 2117 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET;
2118 2118 }
2119 2119 return true;
2120 2120
2121 2121 case OPT_mfma:
2122 2122 if (value)
2123 2123 {
2124 2124 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET;
2125 2125 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET;
2126 2126 }
2127 2127 else
2128 2128 {
2129 2129 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET;
2130 2130 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET;
2131 2131 }
2132 2132 return true;
2133 2133
2134 2134 case OPT_msse4:
2135 2135 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET;
2136 2136 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET;
2137 2137 return true;
2138 2138
2139 2139 case OPT_mno_sse4:
2140 2140 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
2141 2141 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
2142 2142 return true;
2143 2143
2144 2144 case OPT_msse4a:
2145 2145 if (value)
2146 2146 {
2147 2147 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET;
2148 2148 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET;
2149 2149 }
2150 2150 else
2151 2151 {
2152 2152 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
2153 2153 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
2154 2154 }
2155 2155 return true;
2156 2156
2157 2157 case OPT_msse5:
2158 2158 if (value)
2159 2159 {
2160 2160 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET;
2161 2161 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET;
2162 2162 }
2163 2163 else
2164 2164 {
2165 2165 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET;
2166 2166 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET;
2167 2167 }
2168 2168 return true;
2169 2169
2170 2170 case OPT_mabm:
2171 2171 if (value)
2172 2172 {
2173 2173 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET;
2174 2174 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET;
2175 2175 }
2176 2176 else
2177 2177 {
2178 2178 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET;
2179 2179 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET;
2180 2180 }
2181 2181 return true;
2182 2182
2183 2183 case OPT_mpopcnt:
2184 2184 if (value)
2185 2185 {
2186 2186 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET;
2187 2187 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET;
2188 2188 }
2189 2189 else
2190 2190 {
2191 2191 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET;
2192 2192 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET;
2193 2193 }
2194 2194 return true;
2195 2195
2196 2196 case OPT_msahf:
2197 2197 if (value)
2198 2198 {
2199 2199 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET;
2200 2200 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET;
2201 2201 }
2202 2202 else
2203 2203 {
2204 2204 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET;
2205 2205 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET;
2206 2206 }
2207 2207 return true;
2208 2208
2209 2209 case OPT_mcx16:
2210 2210 if (value)
2211 2211 {
2212 2212 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET;
2213 2213 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET;
2214 2214 }
2215 2215 else
2216 2216 {
2217 2217 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET;
2218 2218 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET;
2219 2219 }
2220 2220 return true;
2221 2221
2222 2222 case OPT_maes:
2223 2223 if (value)
2224 2224 {
2225 2225 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET;
2226 2226 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET;
2227 2227 }
2228 2228 else
2229 2229 {
2230 2230 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET;
2231 2231 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET;
2232 2232 }
2233 2233 return true;
2234 2234
2235 2235 case OPT_mpclmul:
2236 2236 if (value)
2237 2237 {
2238 2238 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET;
2239 2239 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET;
2240 2240 }
2241 2241 else
2242 2242 {
2243 2243 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET;
2244 2244 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET;
2245 2245 }
2246 2246 return true;
2247 2247
2248 2248 default:
2249 2249 return true;
2250 2250 }
2251 2251 }
2252 2252
2253 2253 /* Return a string the documents the current -m options. The caller is
2254 2254 responsible for freeing the string. */
2255 2255
2256 2256 static char *
2257 2257 ix86_target_string (int isa, int flags, const char *arch, const char *tune,
2258 2258 const char *fpmath, bool add_nl_p)
2259 2259 {
2260 2260 struct ix86_target_opts
2261 2261 {
2262 2262 const char *option; /* option string */
2263 2263 int mask; /* isa mask options */
2264 2264 };
2265 2265
2266 2266 /* This table is ordered so that options like -msse5 or -msse4.2 that imply
2267 2267 preceding options while match those first. */
2268 2268 static struct ix86_target_opts isa_opts[] =
2269 2269 {
2270 2270 { "-m64", OPTION_MASK_ISA_64BIT },
2271 2271 { "-msse5", OPTION_MASK_ISA_SSE5 },
2272 2272 { "-msse4a", OPTION_MASK_ISA_SSE4A },
2273 2273 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 },
2274 2274 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 },
2275 2275 { "-mssse3", OPTION_MASK_ISA_SSSE3 },
2276 2276 { "-msse3", OPTION_MASK_ISA_SSE3 },
2277 2277 { "-msse2", OPTION_MASK_ISA_SSE2 },
2278 2278 { "-msse", OPTION_MASK_ISA_SSE },
2279 2279 { "-m3dnow", OPTION_MASK_ISA_3DNOW },
2280 2280 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A },
2281 2281 { "-mmmx", OPTION_MASK_ISA_MMX },
2282 2282 { "-mabm", OPTION_MASK_ISA_ABM },
2283 2283 { "-mpopcnt", OPTION_MASK_ISA_POPCNT },
2284 2284 { "-maes", OPTION_MASK_ISA_AES },
2285 2285 { "-mpclmul", OPTION_MASK_ISA_PCLMUL },
2286 2286 };
2287 2287
2288 2288 /* Flag options. */
2289 2289 static struct ix86_target_opts flag_opts[] =
2290 2290 {
2291 2291 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE },
2292 2292 { "-m80387", MASK_80387 },
2293 2293 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS },
2294 2294 { "-malign-double", MASK_ALIGN_DOUBLE },
2295 2295 { "-mcld", MASK_CLD },
2296 2296 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS },
2297 2297 { "-mieee-fp", MASK_IEEE_FP },
2298 2298 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS },
2299 2299 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY },
2300 2300 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT },
2301 2301 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS },
2302 2302 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 },
2303 2303 { "-mno-fused-madd", MASK_NO_FUSED_MADD },
2304 2304 { "-mno-push-args", MASK_NO_PUSH_ARGS },
2305 2305 { "-mno-red-zone", MASK_NO_RED_ZONE },
2306 2306 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER },
2307 2307 { "-mrecip", MASK_RECIP },
2308 2308 { "-mrtd", MASK_RTD },
2309 2309 { "-msseregparm", MASK_SSEREGPARM },
2310 2310 { "-mstack-arg-probe", MASK_STACK_PROBE },
2311 2311 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS },
2312 2312 };
2313 2313
2314 2314 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2];
2315 2315
2316 2316 char isa_other[40];
2317 2317 char target_other[40];
2318 2318 unsigned num = 0;
2319 2319 unsigned i, j;
2320 2320 char *ret;
2321 2321 char *ptr;
2322 2322 size_t len;
2323 2323 size_t line_len;
2324 2324 size_t sep_len;
2325 2325
2326 2326 memset (opts, '\0', sizeof (opts));
2327 2327
2328 2328 /* Add -march= option. */
2329 2329 if (arch)
2330 2330 {
2331 2331 opts[num][0] = "-march=";
2332 2332 opts[num++][1] = arch;
2333 2333 }
2334 2334
2335 2335 /* Add -mtune= option. */
2336 2336 if (tune)
2337 2337 {
2338 2338 opts[num][0] = "-mtune=";
2339 2339 opts[num++][1] = tune;
2340 2340 }
2341 2341
2342 2342 /* Pick out the options in isa options. */
2343 2343 for (i = 0; i < ARRAY_SIZE (isa_opts); i++)
2344 2344 {
2345 2345 if ((isa & isa_opts[i].mask) != 0)
2346 2346 {
2347 2347 opts[num++][0] = isa_opts[i].option;
2348 2348 isa &= ~ isa_opts[i].mask;
2349 2349 }
2350 2350 }
2351 2351
2352 2352 if (isa && add_nl_p)
2353 2353 {
2354 2354 opts[num++][0] = isa_other;
2355 2355 sprintf (isa_other, "(other isa: 0x%x)", isa);
2356 2356 }
2357 2357
2358 2358 /* Add flag options. */
2359 2359 for (i = 0; i < ARRAY_SIZE (flag_opts); i++)
2360 2360 {
2361 2361 if ((flags & flag_opts[i].mask) != 0)
2362 2362 {
2363 2363 opts[num++][0] = flag_opts[i].option;
2364 2364 flags &= ~ flag_opts[i].mask;
2365 2365 }
2366 2366 }
2367 2367
2368 2368 if (flags && add_nl_p)
2369 2369 {
2370 2370 opts[num++][0] = target_other;
2371 2371 sprintf (target_other, "(other flags: 0x%x)", isa);
2372 2372 }
2373 2373
2374 2374 /* Add -fpmath= option. */
2375 2375 if (fpmath)
2376 2376 {
2377 2377 opts[num][0] = "-mfpmath=";
2378 2378 opts[num++][1] = fpmath;
2379 2379 }
2380 2380
2381 2381 /* Any options? */
2382 2382 if (num == 0)
2383 2383 return NULL;
2384 2384
2385 2385 gcc_assert (num < ARRAY_SIZE (opts));
2386 2386
2387 2387 /* Size the string. */
2388 2388 len = 0;
2389 2389 sep_len = (add_nl_p) ? 3 : 1;
2390 2390 for (i = 0; i < num; i++)
2391 2391 {
2392 2392 len += sep_len;
2393 2393 for (j = 0; j < 2; j++)
2394 2394 if (opts[i][j])
2395 2395 len += strlen (opts[i][j]);
2396 2396 }
2397 2397
2398 2398 /* Build the string. */
2399 2399 ret = ptr = (char *) xmalloc (len);
2400 2400 line_len = 0;
2401 2401
2402 2402 for (i = 0; i < num; i++)
2403 2403 {
2404 2404 size_t len2[2];
2405 2405
2406 2406 for (j = 0; j < 2; j++)
2407 2407 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0;
2408 2408
2409 2409 if (i != 0)
2410 2410 {
2411 2411 *ptr++ = ' ';
2412 2412 line_len++;
2413 2413
2414 2414 if (add_nl_p && line_len + len2[0] + len2[1] > 70)
2415 2415 {
2416 2416 *ptr++ = '\\';
2417 2417 *ptr++ = '\n';
2418 2418 line_len = 0;
2419 2419 }
2420 2420 }
2421 2421
2422 2422 for (j = 0; j < 2; j++)
2423 2423 if (opts[i][j])
2424 2424 {
2425 2425 memcpy (ptr, opts[i][j], len2[j]);
2426 2426 ptr += len2[j];
2427 2427 line_len += len2[j];
2428 2428 }
2429 2429 }
2430 2430
2431 2431 *ptr = '\0';
2432 2432 gcc_assert (ret + len >= ptr);
2433 2433
2434 2434 return ret;
2435 2435 }
2436 2436
2437 2437 /* Function that is callable from the debugger to print the current
2438 2438 options. */
2439 2439 void
2440 2440 ix86_debug_options (void)
2441 2441 {
2442 2442 char *opts = ix86_target_string (ix86_isa_flags, target_flags,
2443 2443 ix86_arch_string, ix86_tune_string,
2444 2444 ix86_fpmath_string, true);
2445 2445
2446 2446 if (opts)
2447 2447 {
2448 2448 fprintf (stderr, "%s\n\n", opts);
2449 2449 free (opts);
2450 2450 }
2451 2451 else
2452 2452 fprintf (stderr, "<no options>\n\n");
2453 2453
2454 2454 return;
2455 2455 }
2456 2456
2457 2457 /* Sometimes certain combinations of command options do not make
2458 2458 sense on a particular target machine. You can define a macro
2459 2459 `OVERRIDE_OPTIONS' to take account of this. This macro, if
2460 2460 defined, is executed once just after all the command options have
2461 2461 been parsed.
2462 2462
2463 2463 Don't use this macro to turn on various extra optimizations for
2464 2464 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
2465 2465
2466 2466 void
2467 2467 override_options (bool main_args_p)
2468 2468 {
2469 2469 int i;
2470 2470 unsigned int ix86_arch_mask, ix86_tune_mask;
2471 2471 const char *prefix;
2472 2472 const char *suffix;
2473 2473 const char *sw;
2474 2474
2475 2475 /* Comes from final.c -- no real reason to change it. */
2476 2476 #define MAX_CODE_ALIGN 16
2477 2477
2478 2478 enum pta_flags
2479 2479 {
2480 2480 PTA_SSE = 1 << 0,
2481 2481 PTA_SSE2 = 1 << 1,
2482 2482 PTA_SSE3 = 1 << 2,
2483 2483 PTA_MMX = 1 << 3,
2484 2484 PTA_PREFETCH_SSE = 1 << 4,
2485 2485 PTA_3DNOW = 1 << 5,
2486 2486 PTA_3DNOW_A = 1 << 6,
2487 2487 PTA_64BIT = 1 << 7,
2488 2488 PTA_SSSE3 = 1 << 8,
2489 2489 PTA_CX16 = 1 << 9,
2490 2490 PTA_POPCNT = 1 << 10,
2491 2491 PTA_ABM = 1 << 11,
2492 2492 PTA_SSE4A = 1 << 12,
2493 2493 PTA_NO_SAHF = 1 << 13,
2494 2494 PTA_SSE4_1 = 1 << 14,
2495 2495 PTA_SSE4_2 = 1 << 15,
2496 2496 PTA_SSE5 = 1 << 16,
2497 2497 PTA_AES = 1 << 17,
2498 2498 PTA_PCLMUL = 1 << 18,
2499 2499 PTA_AVX = 1 << 19,
2500 2500 PTA_FMA = 1 << 20
2501 2501 };
2502 2502
2503 2503 static struct pta
2504 2504 {
2505 2505 const char *const name; /* processor name or nickname. */
2506 2506 const enum processor_type processor;
2507 2507 const enum attr_cpu schedule;
2508 2508 const unsigned /*enum pta_flags*/ flags;
2509 2509 }
2510 2510 const processor_alias_table[] =
2511 2511 {
2512 2512 {"i386", PROCESSOR_I386, CPU_NONE, 0},
2513 2513 {"i486", PROCESSOR_I486, CPU_NONE, 0},
2514 2514 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2515 2515 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0},
2516 2516 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX},
2517 2517 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX},
2518 2518 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2519 2519 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW},
2520 2520 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE},
2521 2521 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2522 2522 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0},
2523 2523 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX},
2524 2524 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2525 2525 PTA_MMX | PTA_SSE},
2526 2526 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2527 2527 PTA_MMX | PTA_SSE},
2528 2528 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO,
2529 2529 PTA_MMX | PTA_SSE | PTA_SSE2},
2530 2530 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE,
2531 2531 PTA_MMX |PTA_SSE | PTA_SSE2},
2532 2532 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE,
2533 2533 PTA_MMX | PTA_SSE | PTA_SSE2},
2534 2534 {"prescott", PROCESSOR_NOCONA, CPU_NONE,
2535 2535 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
2536 2536 {"nocona", PROCESSOR_NOCONA, CPU_NONE,
2537 2537 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2538 2538 | PTA_CX16 | PTA_NO_SAHF},
2539 2539 {"core2", PROCESSOR_CORE2, CPU_CORE2,
2540 2540 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
2541 2541 | PTA_SSSE3 | PTA_CX16},
2542 2542 {"geode", PROCESSOR_GEODE, CPU_GEODE,
2543 2543 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE},
2544 2544 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
2545 2545 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2546 2546 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW},
2547 2547 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON,
2548 2548 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2549 2549 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON,
2550 2550 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE},
2551 2551 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON,
2552 2552 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2553 2553 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON,
2554 2554 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2555 2555 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON,
2556 2556 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE},
2557 2557 {"x86-64", PROCESSOR_K8, CPU_K8,
2558 2558 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
2559 2559 {"k8", PROCESSOR_K8, CPU_K8,
2560 2560 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2561 2561 | PTA_SSE2 | PTA_NO_SAHF},
2562 2562 {"k8-sse3", PROCESSOR_K8, CPU_K8,
2563 2563 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2564 2564 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2565 2565 {"opteron", PROCESSOR_K8, CPU_K8,
2566 2566 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2567 2567 | PTA_SSE2 | PTA_NO_SAHF},
2568 2568 {"opteron-sse3", PROCESSOR_K8, CPU_K8,
2569 2569 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2570 2570 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2571 2571 {"athlon64", PROCESSOR_K8, CPU_K8,
2572 2572 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2573 2573 | PTA_SSE2 | PTA_NO_SAHF},
2574 2574 {"athlon64-sse3", PROCESSOR_K8, CPU_K8,
2575 2575 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2576 2576 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF},
2577 2577 {"athlon-fx", PROCESSOR_K8, CPU_K8,
2578 2578 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2579 2579 | PTA_SSE2 | PTA_NO_SAHF},
2580 2580 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2581 2581 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2582 2582 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2583 2583 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10,
2584 2584 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE
2585 2585 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM},
2586 2586 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO,
2587 2587 0 /* flags are only used for -march switch. */ },
2588 2588 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64,
2589 2589 PTA_64BIT /* flags are only used for -march switch. */ },
2590 2590 };
2591 2591
2592 2592 int const pta_size = ARRAY_SIZE (processor_alias_table);
2593 2593
2594 2594 /* Set up prefix/suffix so the error messages refer to either the command
2595 2595 line argument, or the attribute(target). */
2596 2596 if (main_args_p)
2597 2597 {
2598 2598 prefix = "-m";
2599 2599 suffix = "";
2600 2600 sw = "switch";
2601 2601 }
2602 2602 else
2603 2603 {
2604 2604 prefix = "option(\"";
2605 2605 suffix = "\")";
2606 2606 sw = "attribute";
2607 2607 }
2608 2608
2609 2609 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2610 2610 SUBTARGET_OVERRIDE_OPTIONS;
2611 2611 #endif
2612 2612
2613 2613 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2614 2614 SUBSUBTARGET_OVERRIDE_OPTIONS;
2615 2615 #endif
2616 2616
2617 2617 /* -fPIC is the default for x86_64. */
2618 2618 if (TARGET_MACHO && TARGET_64BIT)
2619 2619 flag_pic = 2;
2620 2620
2621 2621 /* Set the default values for switches whose default depends on TARGET_64BIT
2622 2622 in case they weren't overwritten by command line options. */
2623 2623 if (TARGET_64BIT)
2624 2624 {
2625 2625 /* Mach-O doesn't support omitting the frame pointer for now. */
2626 2626 if (flag_omit_frame_pointer == 2)
2627 2627 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2628 2628 if (flag_asynchronous_unwind_tables == 2)
2629 2629 flag_asynchronous_unwind_tables = 1;
2630 2630 if (flag_pcc_struct_return == 2)
2631 2631 flag_pcc_struct_return = 0;
2632 2632 }
2633 2633 else
2634 2634 {
2635 2635 if (flag_omit_frame_pointer == 2)
2636 2636 flag_omit_frame_pointer = 0;
2637 2637 if (flag_asynchronous_unwind_tables == 2)
2638 2638 flag_asynchronous_unwind_tables = 0;
2639 2639 if (flag_pcc_struct_return == 2)
2640 2640 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2641 2641 }
2642 2642
2643 2643 /* Need to check -mtune=generic first. */
2644 2644 if (ix86_tune_string)
2645 2645 {
2646 2646 if (!strcmp (ix86_tune_string, "generic")
2647 2647 || !strcmp (ix86_tune_string, "i686")
2648 2648 /* As special support for cross compilers we read -mtune=native
2649 2649 as -mtune=generic. With native compilers we won't see the
2650 2650 -mtune=native, as it was changed by the driver. */
2651 2651 || !strcmp (ix86_tune_string, "native"))
2652 2652 {
2653 2653 if (TARGET_64BIT)
2654 2654 ix86_tune_string = "generic64";
2655 2655 else
2656 2656 ix86_tune_string = "generic32";
2657 2657 }
2658 2658 /* If this call is for setting the option attribute, allow the
2659 2659 generic32/generic64 that was previously set. */
2660 2660 else if (!main_args_p
2661 2661 && (!strcmp (ix86_tune_string, "generic32")
2662 2662 || !strcmp (ix86_tune_string, "generic64")))
2663 2663 ;
2664 2664 else if (!strncmp (ix86_tune_string, "generic", 7))
2665 2665 error ("bad value (%s) for %stune=%s %s",
2666 2666 ix86_tune_string, prefix, suffix, sw);
2667 2667 }
2668 2668 else
2669 2669 {
2670 2670 if (ix86_arch_string)
2671 2671 ix86_tune_string = ix86_arch_string;
2672 2672 if (!ix86_tune_string)
2673 2673 {
2674 2674 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT];
2675 2675 ix86_tune_defaulted = 1;
2676 2676 }
2677 2677
2678 2678 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2679 2679 need to use a sensible tune option. */
2680 2680 if (!strcmp (ix86_tune_string, "generic")
2681 2681 || !strcmp (ix86_tune_string, "x86-64")
2682 2682 || !strcmp (ix86_tune_string, "i686"))
2683 2683 {
2684 2684 if (TARGET_64BIT)
2685 2685 ix86_tune_string = "generic64";
2686 2686 else
2687 2687 ix86_tune_string = "generic32";
2688 2688 }
2689 2689 }
2690 2690 if (ix86_stringop_string)
2691 2691 {
2692 2692 if (!strcmp (ix86_stringop_string, "rep_byte"))
2693 2693 stringop_alg = rep_prefix_1_byte;
2694 2694 else if (!strcmp (ix86_stringop_string, "libcall"))
2695 2695 stringop_alg = libcall;
2696 2696 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2697 2697 stringop_alg = rep_prefix_4_byte;
2698 2698 else if (!strcmp (ix86_stringop_string, "rep_8byte")
2699 2699 && TARGET_64BIT)
2700 2700 /* rep; movq isn't available in 32-bit code. */
2701 2701 stringop_alg = rep_prefix_8_byte;
2702 2702 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2703 2703 stringop_alg = loop_1_byte;
2704 2704 else if (!strcmp (ix86_stringop_string, "loop"))
2705 2705 stringop_alg = loop;
2706 2706 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2707 2707 stringop_alg = unrolled_loop;
2708 2708 else
2709 2709 error ("bad value (%s) for %sstringop-strategy=%s %s",
2710 2710 ix86_stringop_string, prefix, suffix, sw);
2711 2711 }
2712 2712 if (!strcmp (ix86_tune_string, "x86-64"))
2713 2713 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use "
2714 2714 "%stune=k8%s or %stune=generic%s instead as appropriate.",
2715 2715 prefix, suffix, prefix, suffix, prefix, suffix);
2716 2716
2717 2717 if (!ix86_arch_string)
2718 2718 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2719 2719 else
2720 2720 ix86_arch_specified = 1;
2721 2721
2722 2722 if (!strcmp (ix86_arch_string, "generic"))
2723 2723 error ("generic CPU can be used only for %stune=%s %s",
2724 2724 prefix, suffix, sw);
2725 2725 if (!strncmp (ix86_arch_string, "generic", 7))
2726 2726 error ("bad value (%s) for %sarch=%s %s",
2727 2727 ix86_arch_string, prefix, suffix, sw);
2728 2728
2729 2729 if (ix86_cmodel_string != 0)
2730 2730 {
2731 2731 if (!strcmp (ix86_cmodel_string, "small"))
2732 2732 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2733 2733 else if (!strcmp (ix86_cmodel_string, "medium"))
2734 2734 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2735 2735 else if (!strcmp (ix86_cmodel_string, "large"))
2736 2736 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2737 2737 else if (flag_pic)
2738 2738 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2739 2739 else if (!strcmp (ix86_cmodel_string, "32"))
2740 2740 ix86_cmodel = CM_32;
2741 2741 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2742 2742 ix86_cmodel = CM_KERNEL;
2743 2743 else
2744 2744 error ("bad value (%s) for %scmodel=%s %s",
2745 2745 ix86_cmodel_string, prefix, suffix, sw);
2746 2746 }
2747 2747 else
2748 2748 {
2749 2749 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the
2750 2750 use of rip-relative addressing. This eliminates fixups that
2751 2751 would otherwise be needed if this object is to be placed in a
2752 2752 DLL, and is essentially just as efficient as direct addressing. */
2753 2753 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
2754 2754 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2755 2755 else if (TARGET_64BIT)
2756 2756 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2757 2757 else
2758 2758 ix86_cmodel = CM_32;
2759 2759 }
2760 2760 if (ix86_asm_string != 0)
2761 2761 {
2762 2762 if (! TARGET_MACHO
2763 2763 && !strcmp (ix86_asm_string, "intel"))
2764 2764 ix86_asm_dialect = ASM_INTEL;
2765 2765 else if (!strcmp (ix86_asm_string, "att"))
2766 2766 ix86_asm_dialect = ASM_ATT;
2767 2767 else
2768 2768 error ("bad value (%s) for %sasm=%s %s",
2769 2769 ix86_asm_string, prefix, suffix, sw);
2770 2770 }
2771 2771 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2772 2772 error ("code model %qs not supported in the %s bit mode",
2773 2773 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2774 2774 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2775 2775 sorry ("%i-bit mode not compiled in",
2776 2776 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2777 2777
2778 2778 for (i = 0; i < pta_size; i++)
2779 2779 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2780 2780 {
2781 2781 ix86_schedule = processor_alias_table[i].schedule;
2782 2782 ix86_arch = processor_alias_table[i].processor;
2783 2783 /* Default cpu tuning to the architecture. */
2784 2784 ix86_tune = ix86_arch;
2785 2785
2786 2786 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2787 2787 error ("CPU you selected does not support x86-64 "
2788 2788 "instruction set");
2789 2789
2790 2790 if (processor_alias_table[i].flags & PTA_MMX
2791 2791 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2792 2792 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2793 2793 if (processor_alias_table[i].flags & PTA_3DNOW
2794 2794 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2795 2795 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2796 2796 if (processor_alias_table[i].flags & PTA_3DNOW_A
2797 2797 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2798 2798 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2799 2799 if (processor_alias_table[i].flags & PTA_SSE
2800 2800 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2801 2801 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2802 2802 if (processor_alias_table[i].flags & PTA_SSE2
2803 2803 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2804 2804 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2805 2805 if (processor_alias_table[i].flags & PTA_SSE3
2806 2806 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2807 2807 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2808 2808 if (processor_alias_table[i].flags & PTA_SSSE3
2809 2809 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2810 2810 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2811 2811 if (processor_alias_table[i].flags & PTA_SSE4_1
2812 2812 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2813 2813 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2814 2814 if (processor_alias_table[i].flags & PTA_SSE4_2
2815 2815 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2816 2816 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2817 2817 if (processor_alias_table[i].flags & PTA_AVX
2818 2818 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX))
2819 2819 ix86_isa_flags |= OPTION_MASK_ISA_AVX;
2820 2820 if (processor_alias_table[i].flags & PTA_FMA
2821 2821 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA))
2822 2822 ix86_isa_flags |= OPTION_MASK_ISA_FMA;
2823 2823 if (processor_alias_table[i].flags & PTA_SSE4A
2824 2824 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2825 2825 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2826 2826 if (processor_alias_table[i].flags & PTA_SSE5
2827 2827 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5))
2828 2828 ix86_isa_flags |= OPTION_MASK_ISA_SSE5;
2829 2829 if (processor_alias_table[i].flags & PTA_ABM
2830 2830 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM))
2831 2831 ix86_isa_flags |= OPTION_MASK_ISA_ABM;
2832 2832 if (processor_alias_table[i].flags & PTA_CX16
2833 2833 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16))
2834 2834 ix86_isa_flags |= OPTION_MASK_ISA_CX16;
2835 2835 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM)
2836 2836 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT))
2837 2837 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT;
2838 2838 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF))
2839 2839 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF))
2840 2840 ix86_isa_flags |= OPTION_MASK_ISA_SAHF;
2841 2841 if (processor_alias_table[i].flags & PTA_AES
2842 2842 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES))
2843 2843 ix86_isa_flags |= OPTION_MASK_ISA_AES;
2844 2844 if (processor_alias_table[i].flags & PTA_PCLMUL
2845 2845 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL))
2846 2846 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL;
2847 2847 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2848 2848 x86_prefetch_sse = true;
2849 2849
2850 2850 break;
2851 2851 }
2852 2852
2853 2853 if (i == pta_size)
2854 2854 error ("bad value (%s) for %sarch=%s %s",
2855 2855 ix86_arch_string, prefix, suffix, sw);
2856 2856
2857 2857 ix86_arch_mask = 1u << ix86_arch;
2858 2858 for (i = 0; i < X86_ARCH_LAST; ++i)
2859 2859 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
2860 2860
2861 2861 for (i = 0; i < pta_size; i++)
2862 2862 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2863 2863 {
2864 2864 ix86_schedule = processor_alias_table[i].schedule;
2865 2865 ix86_tune = processor_alias_table[i].processor;
2866 2866 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2867 2867 {
2868 2868 if (ix86_tune_defaulted)
2869 2869 {
2870 2870 ix86_tune_string = "x86-64";
2871 2871 for (i = 0; i < pta_size; i++)
2872 2872 if (! strcmp (ix86_tune_string,
2873 2873 processor_alias_table[i].name))
2874 2874 break;
2875 2875 ix86_schedule = processor_alias_table[i].schedule;
2876 2876 ix86_tune = processor_alias_table[i].processor;
2877 2877 }
2878 2878 else
2879 2879 error ("CPU you selected does not support x86-64 "
2880 2880 "instruction set");
2881 2881 }
2882 2882
2883 2883 /* Intel CPUs have always interpreted SSE prefetch instructions as
2884 2884 NOPs; so, we can enable SSE prefetch instructions even when
2885 2885 -mtune (rather than -march) points us to a processor that has them.
2886 2886 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2887 2887 higher processors. */
2888 2888 if (TARGET_CMOVE
2889 2889 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2890 2890 x86_prefetch_sse = true;
2891 2891 break;
2892 2892 }
2893 2893 if (i == pta_size)
2894 2894 error ("bad value (%s) for %stune=%s %s",
2895 2895 ix86_tune_string, prefix, suffix, sw);
2896 2896
2897 2897 ix86_tune_mask = 1u << ix86_tune;
2898 2898 for (i = 0; i < X86_TUNE_LAST; ++i)
2899 2899 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
2900 2900
2901 2901 if (optimize_size)
2902 2902 ix86_cost = &ix86_size_cost;
2903 2903 else
2904 2904 ix86_cost = processor_target_table[ix86_tune].cost;
2905 2905
2906 2906 /* Arrange to set up i386_stack_locals for all functions. */
2907 2907 init_machine_status = ix86_init_machine_status;
2908 2908
2909 2909 /* Validate -mregparm= value. */
2910 2910 if (ix86_regparm_string)
2911 2911 {
2912 2912 if (TARGET_64BIT)
2913 2913 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix);
2914 2914 i = atoi (ix86_regparm_string);
2915 2915 if (i < 0 || i > REGPARM_MAX)
2916 2916 error ("%sregparm=%d%s is not between 0 and %d",
2917 2917 prefix, i, suffix, REGPARM_MAX);
2918 2918 else
2919 2919 ix86_regparm = i;
2920 2920 }
2921 2921 if (TARGET_64BIT)
2922 2922 ix86_regparm = REGPARM_MAX;
2923 2923
2924 2924 /* If the user has provided any of the -malign-* options,
2925 2925 warn and use that value only if -falign-* is not set.
2926 2926 Remove this code in GCC 3.2 or later. */
2927 2927 if (ix86_align_loops_string)
2928 2928 {
2929 2929 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s",
2930 2930 prefix, suffix, suffix);
2931 2931 if (align_loops == 0)
2932 2932 {
2933 2933 i = atoi (ix86_align_loops_string);
2934 2934 if (i < 0 || i > MAX_CODE_ALIGN)
2935 2935 error ("%salign-loops=%d%s is not between 0 and %d",
2936 2936 prefix, i, suffix, MAX_CODE_ALIGN);
2937 2937 else
2938 2938 align_loops = 1 << i;
2939 2939 }
2940 2940 }
2941 2941
2942 2942 if (ix86_align_jumps_string)
2943 2943 {
2944 2944 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s",
2945 2945 prefix, suffix, suffix);
2946 2946 if (align_jumps == 0)
2947 2947 {
2948 2948 i = atoi (ix86_align_jumps_string);
2949 2949 if (i < 0 || i > MAX_CODE_ALIGN)
2950 2950 error ("%salign-loops=%d%s is not between 0 and %d",
2951 2951 prefix, i, suffix, MAX_CODE_ALIGN);
2952 2952 else
2953 2953 align_jumps = 1 << i;
2954 2954 }
2955 2955 }
2956 2956
2957 2957 if (ix86_align_funcs_string)
2958 2958 {
2959 2959 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s",
2960 2960 prefix, suffix, suffix);
2961 2961 if (align_functions == 0)
2962 2962 {
2963 2963 i = atoi (ix86_align_funcs_string);
2964 2964 if (i < 0 || i > MAX_CODE_ALIGN)
2965 2965 error ("%salign-loops=%d%s is not between 0 and %d",
2966 2966 prefix, i, suffix, MAX_CODE_ALIGN);
2967 2967 else
2968 2968 align_functions = 1 << i;
2969 2969 }
2970 2970 }
2971 2971
2972 2972 /* Default align_* from the processor table. */
2973 2973 if (align_loops == 0)
2974 2974 {
2975 2975 align_loops = processor_target_table[ix86_tune].align_loop;
2976 2976 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2977 2977 }
2978 2978 if (align_jumps == 0)
2979 2979 {
2980 2980 align_jumps = processor_target_table[ix86_tune].align_jump;
2981 2981 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2982 2982 }
2983 2983 if (align_functions == 0)
2984 2984 {
2985 2985 align_functions = processor_target_table[ix86_tune].align_func;
2986 2986 }
2987 2987
2988 2988 /* Validate -mbranch-cost= value, or provide default. */
2989 2989 ix86_branch_cost = ix86_cost->branch_cost;
2990 2990 if (ix86_branch_cost_string)
2991 2991 {
2992 2992 i = atoi (ix86_branch_cost_string);
2993 2993 if (i < 0 || i > 5)
2994 2994 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix);
2995 2995 else
2996 2996 ix86_branch_cost = i;
2997 2997 }
2998 2998 if (ix86_section_threshold_string)
2999 2999 {
3000 3000 i = atoi (ix86_section_threshold_string);
3001 3001 if (i < 0)
3002 3002 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix);
3003 3003 else
3004 3004 ix86_section_threshold = i;
3005 3005 }
3006 3006
3007 3007 if (ix86_tls_dialect_string)
3008 3008 {
3009 3009 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
3010 3010 ix86_tls_dialect = TLS_DIALECT_GNU;
3011 3011 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
3012 3012 ix86_tls_dialect = TLS_DIALECT_GNU2;
3013 3013 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
3014 3014 ix86_tls_dialect = TLS_DIALECT_SUN;
3015 3015 else
3016 3016 error ("bad value (%s) for %stls-dialect=%s %s",
3017 3017 ix86_tls_dialect_string, prefix, suffix, sw);
3018 3018 }
3019 3019
3020 3020 if (ix87_precision_string)
3021 3021 {
3022 3022 i = atoi (ix87_precision_string);
3023 3023 if (i != 32 && i != 64 && i != 80)
3024 3024 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
3025 3025 }
3026 3026
3027 3027 if (TARGET_64BIT)
3028 3028 {
3029 3029 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
3030 3030
3031 3031 /* Enable by default the SSE and MMX builtins. Do allow the user to
3032 3032 explicitly disable any of these. In particular, disabling SSE and
3033 3033 MMX for kernel code is extremely useful. */
3034 3034 if (!ix86_arch_specified)
3035 3035 ix86_isa_flags
3036 3036 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
3037 3037 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
3038 3038
3039 3039 if (TARGET_RTD)
3040 3040 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix);
3041 3041 }
3042 3042 else
3043 3043 {
3044 3044 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
3045 3045
3046 3046 if (!ix86_arch_specified)
3047 3047 ix86_isa_flags
3048 3048 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
3049 3049
3050 3050 /* i386 ABI does not specify red zone. It still makes sense to use it
3051 3051 when programmer takes care to stack from being destroyed. */
3052 3052 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
3053 3053 target_flags |= MASK_NO_RED_ZONE;
3054 3054 }
3055 3055
3056 3056 /* Keep nonleaf frame pointers. */
3057 3057 if (flag_omit_frame_pointer)
3058 3058 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
3059 3059 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
3060 3060 flag_omit_frame_pointer = 1;
3061 3061
3062 3062 /* If we're doing fast math, we don't care about comparison order
3063 3063 wrt NaNs. This lets us use a shorter comparison sequence. */
3064 3064 if (flag_finite_math_only)
3065 3065 target_flags &= ~MASK_IEEE_FP;
3066 3066
3067 3067 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
3068 3068 since the insns won't need emulation. */
3069 3069 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
3070 3070 target_flags &= ~MASK_NO_FANCY_MATH_387;
3071 3071
3072 3072 /* Likewise, if the target doesn't have a 387, or we've specified
3073 3073 software floating point, don't use 387 inline intrinsics. */
3074 3074 if (!TARGET_80387)
3075 3075 target_flags |= MASK_NO_FANCY_MATH_387;
3076 3076
3077 3077 /* Turn on MMX builtins for -msse. */
3078 3078 if (TARGET_SSE)
3079 3079 {
3080 3080 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
3081 3081 x86_prefetch_sse = true;
3082 3082 }
3083 3083
3084 3084 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
3085 3085 if (TARGET_SSE4_2 || TARGET_ABM)
3086 3086 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit;
3087 3087
3088 3088 if (!TARGET_64BIT && TARGET_SAVE_ARGS)
3089 3089 error ("-msave-args makes no sense in the 32-bit mode");
3090 3090
3091 3091 /* Validate -mpreferred-stack-boundary= value or default it to
3092 3092 PREFERRED_STACK_BOUNDARY_DEFAULT. */
3093 3093 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
3094 3094 if (ix86_preferred_stack_boundary_string)
3095 3095 {
3096 3096 i = atoi (ix86_preferred_stack_boundary_string);
3097 3097 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3098 3098 error ("%spreferred-stack-boundary=%d%s is not between %d and 12",
3099 3099 prefix, i, suffix, TARGET_64BIT ? 4 : 2);
3100 3100 else
3101 3101 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
3102 3102 }
3103 3103
3104 3104 /* Set the default value for -mstackrealign. */
3105 3105 if (ix86_force_align_arg_pointer == -1)
3106 3106 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT;
3107 3107
3108 3108 /* Validate -mincoming-stack-boundary= value or default it to
3109 3109 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */
3110 3110 if (ix86_force_align_arg_pointer)
3111 3111 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY;
3112 3112 else
3113 3113 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY;
3114 3114 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary;
3115 3115 if (ix86_incoming_stack_boundary_string)
3116 3116 {
3117 3117 i = atoi (ix86_incoming_stack_boundary_string);
3118 3118 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
3119 3119 error ("-mincoming-stack-boundary=%d is not between %d and 12",
3120 3120 i, TARGET_64BIT ? 4 : 2);
3121 3121 else
3122 3122 {
3123 3123 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT;
3124 3124 ix86_incoming_stack_boundary
3125 3125 = ix86_user_incoming_stack_boundary;
3126 3126 }
3127 3127 }
3128 3128
3129 3129 /* Accept -msseregparm only if at least SSE support is enabled. */
3130 3130 if (TARGET_SSEREGPARM
3131 3131 && ! TARGET_SSE)
3132 3132 error ("%ssseregparm%s used without SSE enabled", prefix, suffix);
3133 3133
3134 3134 ix86_fpmath = TARGET_FPMATH_DEFAULT;
3135 3135 if (ix86_fpmath_string != 0)
3136 3136 {
3137 3137 if (! strcmp (ix86_fpmath_string, "387"))
3138 3138 ix86_fpmath = FPMATH_387;
3139 3139 else if (! strcmp (ix86_fpmath_string, "sse"))
3140 3140 {
3141 3141 if (!TARGET_SSE)
3142 3142 {
3143 3143 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3144 3144 ix86_fpmath = FPMATH_387;
3145 3145 }
3146 3146 else
3147 3147 ix86_fpmath = FPMATH_SSE;
3148 3148 }
3149 3149 else if (! strcmp (ix86_fpmath_string, "387,sse")
3150 3150 || ! strcmp (ix86_fpmath_string, "387+sse")
3151 3151 || ! strcmp (ix86_fpmath_string, "sse,387")
3152 3152 || ! strcmp (ix86_fpmath_string, "sse+387")
3153 3153 || ! strcmp (ix86_fpmath_string, "both"))
3154 3154 {
3155 3155 if (!TARGET_SSE)
3156 3156 {
3157 3157 warning (0, "SSE instruction set disabled, using 387 arithmetics");
3158 3158 ix86_fpmath = FPMATH_387;
3159 3159 }
3160 3160 else if (!TARGET_80387)
3161 3161 {
3162 3162 warning (0, "387 instruction set disabled, using SSE arithmetics");
3163 3163 ix86_fpmath = FPMATH_SSE;
3164 3164 }
3165 3165 else
3166 3166 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
3167 3167 }
3168 3168 else
3169 3169 error ("bad value (%s) for %sfpmath=%s %s",
3170 3170 ix86_fpmath_string, prefix, suffix, sw);
3171 3171 }
3172 3172
3173 3173 /* If the i387 is disabled, then do not return values in it. */
3174 3174 if (!TARGET_80387)
3175 3175 target_flags &= ~MASK_FLOAT_RETURNS;
3176 3176
3177 3177 /* Use external vectorized library in vectorizing intrinsics. */
3178 3178 if (ix86_veclibabi_string)
3179 3179 {
3180 3180 if (strcmp (ix86_veclibabi_string, "svml") == 0)
3181 3181 ix86_veclib_handler = ix86_veclibabi_svml;
3182 3182 else if (strcmp (ix86_veclibabi_string, "acml") == 0)
3183 3183 ix86_veclib_handler = ix86_veclibabi_acml;
3184 3184 else
3185 3185 error ("unknown vectorization library ABI type (%s) for "
3186 3186 "%sveclibabi=%s %s", ix86_veclibabi_string,
3187 3187 prefix, suffix, sw);
3188 3188 }
3189 3189
3190 3190 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
3191 3191 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3192 3192 && !optimize_size)
3193 3193 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3194 3194
3195 3195 /* ??? Unwind info is not correct around the CFG unless either a frame
3196 3196 pointer is present or M_A_O_A is set. Fixing this requires rewriting
3197 3197 unwind info generation to be aware of the CFG and propagating states
3198 3198 around edges. */
3199 3199 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
3200 3200 || flag_exceptions || flag_non_call_exceptions)
3201 3201 && flag_omit_frame_pointer
3202 3202 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3203 3203 {
3204 3204 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3205 3205 warning (0, "unwind tables currently require either a frame pointer "
3206 3206 "or %saccumulate-outgoing-args%s for correctness",
3207 3207 prefix, suffix);
3208 3208 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3209 3209 }
3210 3210
3211 3211 /* If stack probes are required, the space used for large function
3212 3212 arguments on the stack must also be probed, so enable
3213 3213 -maccumulate-outgoing-args so this happens in the prologue. */
3214 3214 if (TARGET_STACK_PROBE
3215 3215 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
3216 3216 {
3217 3217 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
3218 3218 warning (0, "stack probing requires %saccumulate-outgoing-args%s "
3219 3219 "for correctness", prefix, suffix);
3220 3220 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
3221 3221 }
3222 3222
3223 3223 /* For sane SSE instruction set generation we need fcomi instruction.
3224 3224 It is safe to enable all CMOVE instructions. */
3225 3225 if (TARGET_SSE)
3226 3226 TARGET_CMOVE = 1;
3227 3227
3228 3228 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
3229 3229 {
3230 3230 char *p;
3231 3231 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
3232 3232 p = strchr (internal_label_prefix, 'X');
3233 3233 internal_label_prefix_len = p - internal_label_prefix;
3234 3234 *p = '\0';
3235 3235 }
3236 3236
3237 3237 /* When scheduling description is not available, disable scheduler pass
3238 3238 so it won't slow down the compilation and make x87 code slower. */
3239 3239 if (!TARGET_SCHEDULE)
3240 3240 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
3241 3241
3242 3242 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
3243 3243 set_param_value ("simultaneous-prefetches",
3244 3244 ix86_cost->simultaneous_prefetches);
3245 3245 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
3246 3246 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
3247 3247 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
3248 3248 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
3249 3249 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
3250 3250 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
3251 3251
3252 3252 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0)
3253 3253 can be optimized to ap = __builtin_next_arg (0). */
3254 3254 if (!TARGET_64BIT)
3255 3255 targetm.expand_builtin_va_start = NULL;
3256 3256
3257 3257 if (TARGET_64BIT)
3258 3258 {
3259 3259 ix86_gen_leave = gen_leave_rex64;
3260 3260 ix86_gen_pop1 = gen_popdi1;
3261 3261 ix86_gen_add3 = gen_adddi3;
3262 3262 ix86_gen_sub3 = gen_subdi3;
3263 3263 ix86_gen_sub3_carry = gen_subdi3_carry_rex64;
3264 3264 ix86_gen_one_cmpl2 = gen_one_cmpldi2;
3265 3265 ix86_gen_monitor = gen_sse3_monitor64;
3266 3266 ix86_gen_andsp = gen_anddi3;
3267 3267 }
3268 3268 else
3269 3269 {
3270 3270 ix86_gen_leave = gen_leave;
3271 3271 ix86_gen_pop1 = gen_popsi1;
3272 3272 ix86_gen_add3 = gen_addsi3;
3273 3273 ix86_gen_sub3 = gen_subsi3;
3274 3274 ix86_gen_sub3_carry = gen_subsi3_carry;
3275 3275 ix86_gen_one_cmpl2 = gen_one_cmplsi2;
3276 3276 ix86_gen_monitor = gen_sse3_monitor;
3277 3277 ix86_gen_andsp = gen_andsi3;
3278 3278 }
3279 3279
3280 3280 #ifdef USE_IX86_CLD
3281 3281 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */
3282 3282 if (!TARGET_64BIT)
3283 3283 target_flags |= MASK_CLD & ~target_flags_explicit;
3284 3284 #endif
3285 3285
3286 3286 /* Save the initial options in case the user does function specific options */
3287 3287 if (main_args_p)
3288 3288 target_option_default_node = target_option_current_node
3289 3289 = build_target_option_node ();
3290 3290 }
3291 3291
3292 3292 /* Update register usage after having seen the compiler flags. */
3293 3293
3294 3294 void
3295 3295 ix86_conditional_register_usage (void)
3296 3296 {
3297 3297 int i;
3298 3298 unsigned int j;
3299 3299
3300 3300 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3301 3301 {
3302 3302 if (fixed_regs[i] > 1)
3303 3303 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2));
3304 3304 if (call_used_regs[i] > 1)
3305 3305 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2));
3306 3306 }
3307 3307
3308 3308 /* The PIC register, if it exists, is fixed. */
3309 3309 j = PIC_OFFSET_TABLE_REGNUM;
3310 3310 if (j != INVALID_REGNUM)
3311 3311 fixed_regs[j] = call_used_regs[j] = 1;
3312 3312
3313 3313 /* The MS_ABI changes the set of call-used registers. */
3314 3314 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI)
3315 3315 {
3316 3316 call_used_regs[SI_REG] = 0;
3317 3317 call_used_regs[DI_REG] = 0;
3318 3318 call_used_regs[XMM6_REG] = 0;
3319 3319 call_used_regs[XMM7_REG] = 0;
3320 3320 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3321 3321 call_used_regs[i] = 0;
3322 3322 }
3323 3323
3324 3324 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the
3325 3325 other call-clobbered regs for 64-bit. */
3326 3326 if (TARGET_64BIT)
3327 3327 {
3328 3328 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
3329 3329
3330 3330 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3331 3331 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i)
3332 3332 && call_used_regs[i])
3333 3333 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i);
3334 3334 }
3335 3335
3336 3336 /* If MMX is disabled, squash the registers. */
3337 3337 if (! TARGET_MMX)
3338 3338 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3339 3339 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i))
3340 3340 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3341 3341
3342 3342 /* If SSE is disabled, squash the registers. */
3343 3343 if (! TARGET_SSE)
3344 3344 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3345 3345 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i))
3346 3346 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3347 3347
3348 3348 /* If the FPU is disabled, squash the registers. */
3349 3349 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387))
3350 3350 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
3351 3351 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i))
3352 3352 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
3353 3353
3354 3354 /* If 32-bit, squash the 64-bit registers. */
3355 3355 if (! TARGET_64BIT)
3356 3356 {
3357 3357 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++)
3358 3358 reg_names[i] = "";
3359 3359 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
3360 3360 reg_names[i] = "";
3361 3361 }
3362 3362 }
3363 3363
3364 3364
3365 3365 /* Save the current options */
3366 3366
3367 3367 static void
3368 3368 ix86_function_specific_save (struct cl_target_option *ptr)
3369 3369 {
3370 3370 gcc_assert (IN_RANGE (ix86_arch, 0, 255));
3371 3371 gcc_assert (IN_RANGE (ix86_schedule, 0, 255));
3372 3372 gcc_assert (IN_RANGE (ix86_tune, 0, 255));
3373 3373 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255));
3374 3374 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255));
3375 3375
3376 3376 ptr->arch = ix86_arch;
3377 3377 ptr->schedule = ix86_schedule;
3378 3378 ptr->tune = ix86_tune;
3379 3379 ptr->fpmath = ix86_fpmath;
3380 3380 ptr->branch_cost = ix86_branch_cost;
3381 3381 ptr->tune_defaulted = ix86_tune_defaulted;
3382 3382 ptr->arch_specified = ix86_arch_specified;
3383 3383 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit;
3384 3384 ptr->target_flags_explicit = target_flags_explicit;
3385 3385 }
3386 3386
3387 3387 /* Restore the current options */
3388 3388
3389 3389 static void
3390 3390 ix86_function_specific_restore (struct cl_target_option *ptr)
3391 3391 {
3392 3392 enum processor_type old_tune = ix86_tune;
3393 3393 enum processor_type old_arch = ix86_arch;
3394 3394 unsigned int ix86_arch_mask, ix86_tune_mask;
3395 3395 int i;
3396 3396
3397 3397 ix86_arch = ptr->arch;
3398 3398 ix86_schedule = ptr->schedule;
3399 3399 ix86_tune = ptr->tune;
3400 3400 ix86_fpmath = ptr->fpmath;
3401 3401 ix86_branch_cost = ptr->branch_cost;
3402 3402 ix86_tune_defaulted = ptr->tune_defaulted;
3403 3403 ix86_arch_specified = ptr->arch_specified;
3404 3404 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit;
3405 3405 target_flags_explicit = ptr->target_flags_explicit;
3406 3406
3407 3407 /* Recreate the arch feature tests if the arch changed */
3408 3408 if (old_arch != ix86_arch)
3409 3409 {
3410 3410 ix86_arch_mask = 1u << ix86_arch;
3411 3411 for (i = 0; i < X86_ARCH_LAST; ++i)
3412 3412 ix86_arch_features[i]
3413 3413 = !!(initial_ix86_arch_features[i] & ix86_arch_mask);
3414 3414 }
3415 3415
3416 3416 /* Recreate the tune optimization tests */
3417 3417 if (old_tune != ix86_tune)
3418 3418 {
3419 3419 ix86_tune_mask = 1u << ix86_tune;
3420 3420 for (i = 0; i < X86_TUNE_LAST; ++i)
3421 3421 ix86_tune_features[i]
3422 3422 = !!(initial_ix86_tune_features[i] & ix86_tune_mask);
3423 3423 }
3424 3424 }
3425 3425
3426 3426 /* Print the current options */
3427 3427
3428 3428 static void
3429 3429 ix86_function_specific_print (FILE *file, int indent,
3430 3430 struct cl_target_option *ptr)
3431 3431 {
3432 3432 char *target_string
3433 3433 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags,
3434 3434 NULL, NULL, NULL, false);
3435 3435
3436 3436 fprintf (file, "%*sarch = %d (%s)\n",
3437 3437 indent, "",
3438 3438 ptr->arch,
3439 3439 ((ptr->arch < TARGET_CPU_DEFAULT_max)
3440 3440 ? cpu_names[ptr->arch]
3441 3441 : "<unknown>"));
3442 3442
3443 3443 fprintf (file, "%*stune = %d (%s)\n",
3444 3444 indent, "",
3445 3445 ptr->tune,
3446 3446 ((ptr->tune < TARGET_CPU_DEFAULT_max)
3447 3447 ? cpu_names[ptr->tune]
3448 3448 : "<unknown>"));
3449 3449
3450 3450 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath,
3451 3451 (ptr->fpmath & FPMATH_387) ? ", 387" : "",
3452 3452 (ptr->fpmath & FPMATH_SSE) ? ", sse" : "");
3453 3453 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost);
3454 3454
3455 3455 if (target_string)
3456 3456 {
3457 3457 fprintf (file, "%*s%s\n", indent, "", target_string);
3458 3458 free (target_string);
3459 3459 }
3460 3460 }
3461 3461
3462 3462
3463 3463 /* Inner function to process the attribute((target(...))), take an argument and
3464 3464 set the current options from the argument. If we have a list, recursively go
3465 3465 over the list. */
3466 3466
3467 3467 static bool
3468 3468 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[])
3469 3469 {
3470 3470 char *next_optstr;
3471 3471 bool ret = true;
3472 3472
3473 3473 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 }
3474 3474 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 }
3475 3475 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M }
3476 3476 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M }
3477 3477
3478 3478 enum ix86_opt_type
3479 3479 {
3480 3480 ix86_opt_unknown,
3481 3481 ix86_opt_yes,
3482 3482 ix86_opt_no,
3483 3483 ix86_opt_str,
3484 3484 ix86_opt_isa
3485 3485 };
3486 3486
3487 3487 static const struct
3488 3488 {
3489 3489 const char *string;
3490 3490 size_t len;
3491 3491 enum ix86_opt_type type;
3492 3492 int opt;
3493 3493 int mask;
3494 3494 } attrs[] = {
3495 3495 /* isa options */
3496 3496 IX86_ATTR_ISA ("3dnow", OPT_m3dnow),
3497 3497 IX86_ATTR_ISA ("abm", OPT_mabm),
3498 3498 IX86_ATTR_ISA ("aes", OPT_maes),
3499 3499 IX86_ATTR_ISA ("avx", OPT_mavx),
3500 3500 IX86_ATTR_ISA ("mmx", OPT_mmmx),
3501 3501 IX86_ATTR_ISA ("pclmul", OPT_mpclmul),
3502 3502 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt),
3503 3503 IX86_ATTR_ISA ("sse", OPT_msse),
3504 3504 IX86_ATTR_ISA ("sse2", OPT_msse2),
3505 3505 IX86_ATTR_ISA ("sse3", OPT_msse3),
3506 3506 IX86_ATTR_ISA ("sse4", OPT_msse4),
3507 3507 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1),
3508 3508 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2),
3509 3509 IX86_ATTR_ISA ("sse4a", OPT_msse4a),
3510 3510 IX86_ATTR_ISA ("sse5", OPT_msse5),
3511 3511 IX86_ATTR_ISA ("ssse3", OPT_mssse3),
3512 3512
3513 3513 /* string options */
3514 3514 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH),
3515 3515 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH),
3516 3516 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE),
3517 3517
3518 3518 /* flag options */
3519 3519 IX86_ATTR_YES ("cld",
3520 3520 OPT_mcld,
3521 3521 MASK_CLD),
3522 3522
3523 3523 IX86_ATTR_NO ("fancy-math-387",
3524 3524 OPT_mfancy_math_387,
3525 3525 MASK_NO_FANCY_MATH_387),
3526 3526
3527 3527 IX86_ATTR_NO ("fused-madd",
3528 3528 OPT_mfused_madd,
3529 3529 MASK_NO_FUSED_MADD),
3530 3530
3531 3531 IX86_ATTR_YES ("ieee-fp",
3532 3532 OPT_mieee_fp,
3533 3533 MASK_IEEE_FP),
3534 3534
3535 3535 IX86_ATTR_YES ("inline-all-stringops",
3536 3536 OPT_minline_all_stringops,
3537 3537 MASK_INLINE_ALL_STRINGOPS),
3538 3538
3539 3539 IX86_ATTR_YES ("inline-stringops-dynamically",
3540 3540 OPT_minline_stringops_dynamically,
3541 3541 MASK_INLINE_STRINGOPS_DYNAMICALLY),
3542 3542
3543 3543 IX86_ATTR_NO ("align-stringops",
3544 3544 OPT_mno_align_stringops,
3545 3545 MASK_NO_ALIGN_STRINGOPS),
3546 3546
3547 3547 IX86_ATTR_YES ("recip",
3548 3548 OPT_mrecip,
3549 3549 MASK_RECIP),
3550 3550
3551 3551 };
3552 3552
3553 3553 /* If this is a list, recurse to get the options. */
3554 3554 if (TREE_CODE (args) == TREE_LIST)
3555 3555 {
3556 3556 bool ret = true;
3557 3557
3558 3558 for (; args; args = TREE_CHAIN (args))
3559 3559 if (TREE_VALUE (args)
3560 3560 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings))
3561 3561 ret = false;
3562 3562
3563 3563 return ret;
3564 3564 }
3565 3565
3566 3566 else if (TREE_CODE (args) != STRING_CST)
3567 3567 gcc_unreachable ();
3568 3568
3569 3569 /* Handle multiple arguments separated by commas. */
3570 3570 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
3571 3571
3572 3572 while (next_optstr && *next_optstr != '\0')
3573 3573 {
3574 3574 char *p = next_optstr;
3575 3575 char *orig_p = p;
3576 3576 char *comma = strchr (next_optstr, ',');
3577 3577 const char *opt_string;
3578 3578 size_t len, opt_len;
3579 3579 int opt;
3580 3580 bool opt_set_p;
3581 3581 char ch;
3582 3582 unsigned i;
3583 3583 enum ix86_opt_type type = ix86_opt_unknown;
3584 3584 int mask = 0;
3585 3585
3586 3586 if (comma)
3587 3587 {
3588 3588 *comma = '\0';
3589 3589 len = comma - next_optstr;
3590 3590 next_optstr = comma + 1;
3591 3591 }
3592 3592 else
3593 3593 {
3594 3594 len = strlen (p);
3595 3595 next_optstr = NULL;
3596 3596 }
3597 3597
3598 3598 /* Recognize no-xxx. */
3599 3599 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
3600 3600 {
3601 3601 opt_set_p = false;
3602 3602 p += 3;
3603 3603 len -= 3;
3604 3604 }
3605 3605 else
3606 3606 opt_set_p = true;
3607 3607
3608 3608 /* Find the option. */
3609 3609 ch = *p;
3610 3610 opt = N_OPTS;
3611 3611 for (i = 0; i < ARRAY_SIZE (attrs); i++)
3612 3612 {
3613 3613 type = attrs[i].type;
3614 3614 opt_len = attrs[i].len;
3615 3615 if (ch == attrs[i].string[0]
3616 3616 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len)
3617 3617 && memcmp (p, attrs[i].string, opt_len) == 0)
3618 3618 {
3619 3619 opt = attrs[i].opt;
3620 3620 mask = attrs[i].mask;
3621 3621 opt_string = attrs[i].string;
3622 3622 break;
3623 3623 }
3624 3624 }
3625 3625
3626 3626 /* Process the option. */
3627 3627 if (opt == N_OPTS)
3628 3628 {
3629 3629 error ("attribute(target(\"%s\")) is unknown", orig_p);
3630 3630 ret = false;
3631 3631 }
3632 3632
3633 3633 else if (type == ix86_opt_isa)
3634 3634 ix86_handle_option (opt, p, opt_set_p);
3635 3635
3636 3636 else if (type == ix86_opt_yes || type == ix86_opt_no)
3637 3637 {
3638 3638 if (type == ix86_opt_no)
3639 3639 opt_set_p = !opt_set_p;
3640 3640
3641 3641 if (opt_set_p)
3642 3642 target_flags |= mask;
3643 3643 else
3644 3644 target_flags &= ~mask;
3645 3645 }
3646 3646
3647 3647 else if (type == ix86_opt_str)
3648 3648 {
3649 3649 if (p_strings[opt])
3650 3650 {
3651 3651 error ("option(\"%s\") was already specified", opt_string);
3652 3652 ret = false;
3653 3653 }
3654 3654 else
3655 3655 p_strings[opt] = xstrdup (p + opt_len);
3656 3656 }
3657 3657
3658 3658 else
3659 3659 gcc_unreachable ();
3660 3660 }
3661 3661
3662 3662 return ret;
3663 3663 }
3664 3664
3665 3665 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
3666 3666
3667 3667 tree
3668 3668 ix86_valid_target_attribute_tree (tree args)
3669 3669 {
3670 3670 const char *orig_arch_string = ix86_arch_string;
3671 3671 const char *orig_tune_string = ix86_tune_string;
3672 3672 const char *orig_fpmath_string = ix86_fpmath_string;
3673 3673 int orig_tune_defaulted = ix86_tune_defaulted;
3674 3674 int orig_arch_specified = ix86_arch_specified;
3675 3675 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL };
3676 3676 tree t = NULL_TREE;
3677 3677 int i;
3678 3678 struct cl_target_option *def
3679 3679 = TREE_TARGET_OPTION (target_option_default_node);
3680 3680
3681 3681 /* Process each of the options on the chain. */
3682 3682 if (! ix86_valid_target_attribute_inner_p (args, option_strings))
3683 3683 return NULL_TREE;
3684 3684
3685 3685 /* If the changed options are different from the default, rerun override_options,
3686 3686 and then save the options away. The string options are are attribute options,
3687 3687 and will be undone when we copy the save structure. */
3688 3688 if (ix86_isa_flags != def->ix86_isa_flags
3689 3689 || target_flags != def->target_flags
3690 3690 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
3691 3691 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
3692 3692 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3693 3693 {
3694 3694 /* If we are using the default tune= or arch=, undo the string assigned,
3695 3695 and use the default. */
3696 3696 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH])
3697 3697 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH];
3698 3698 else if (!orig_arch_specified)
3699 3699 ix86_arch_string = NULL;
3700 3700
3701 3701 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE])
3702 3702 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE];
3703 3703 else if (orig_tune_defaulted)
3704 3704 ix86_tune_string = NULL;
3705 3705
3706 3706 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */
3707 3707 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH])
3708 3708 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH];
3709 3709 else if (!TARGET_64BIT && TARGET_SSE)
3710 3710 ix86_fpmath_string = "sse,387";
3711 3711
3712 3712 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
3713 3713 override_options (false);
3714 3714
3715 3715 /* Add any builtin functions with the new isa if any. */
3716 3716 ix86_add_new_builtins (ix86_isa_flags);
3717 3717
3718 3718 /* Save the current options unless we are validating options for
3719 3719 #pragma. */
3720 3720 t = build_target_option_node ();
3721 3721
3722 3722 ix86_arch_string = orig_arch_string;
3723 3723 ix86_tune_string = orig_tune_string;
3724 3724 ix86_fpmath_string = orig_fpmath_string;
3725 3725
3726 3726 /* Free up memory allocated to hold the strings */
3727 3727 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++)
3728 3728 if (option_strings[i])
3729 3729 free (option_strings[i]);
3730 3730 }
3731 3731
3732 3732 return t;
3733 3733 }
3734 3734
3735 3735 /* Hook to validate attribute((target("string"))). */
3736 3736
3737 3737 static bool
3738 3738 ix86_valid_target_attribute_p (tree fndecl,
3739 3739 tree ARG_UNUSED (name),
3740 3740 tree args,
3741 3741 int ARG_UNUSED (flags))
3742 3742 {
3743 3743 struct cl_target_option cur_target;
3744 3744 bool ret = true;
3745 3745 tree old_optimize = build_optimization_node ();
3746 3746 tree new_target, new_optimize;
3747 3747 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
3748 3748
3749 3749 /* If the function changed the optimization levels as well as setting target
3750 3750 options, start with the optimizations specified. */
3751 3751 if (func_optimize && func_optimize != old_optimize)
3752 3752 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize));
3753 3753
3754 3754 /* The target attributes may also change some optimization flags, so update
3755 3755 the optimization options if necessary. */
3756 3756 cl_target_option_save (&cur_target);
3757 3757 new_target = ix86_valid_target_attribute_tree (args);
3758 3758 new_optimize = build_optimization_node ();
3759 3759
3760 3760 if (!new_target)
3761 3761 ret = false;
3762 3762
3763 3763 else if (fndecl)
3764 3764 {
3765 3765 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
3766 3766
3767 3767 if (old_optimize != new_optimize)
3768 3768 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
3769 3769 }
3770 3770
3771 3771 cl_target_option_restore (&cur_target);
3772 3772
3773 3773 if (old_optimize != new_optimize)
3774 3774 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize));
3775 3775
3776 3776 return ret;
3777 3777 }
3778 3778
3779 3779
3780 3780 /* Hook to determine if one function can safely inline another. */
3781 3781
3782 3782 static bool
3783 3783 ix86_can_inline_p (tree caller, tree callee)
3784 3784 {
3785 3785 bool ret = false;
3786 3786 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
3787 3787 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
3788 3788
3789 3789 /* If callee has no option attributes, then it is ok to inline. */
3790 3790 if (!callee_tree)
3791 3791 ret = true;
3792 3792
3793 3793 /* If caller has no option attributes, but callee does then it is not ok to
3794 3794 inline. */
3795 3795 else if (!caller_tree)
3796 3796 ret = false;
3797 3797
3798 3798 else
3799 3799 {
3800 3800 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
3801 3801 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
3802 3802
3803 3803 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function
3804 3804 can inline a SSE2 function but a SSE2 function can't inline a SSE5
3805 3805 function. */
3806 3806 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags)
3807 3807 != callee_opts->ix86_isa_flags)
3808 3808 ret = false;
3809 3809
3810 3810 /* See if we have the same non-isa options. */
3811 3811 else if (caller_opts->target_flags != callee_opts->target_flags)
3812 3812 ret = false;
3813 3813
3814 3814 /* See if arch, tune, etc. are the same. */
3815 3815 else if (caller_opts->arch != callee_opts->arch)
3816 3816 ret = false;
3817 3817
3818 3818 else if (caller_opts->tune != callee_opts->tune)
3819 3819 ret = false;
3820 3820
3821 3821 else if (caller_opts->fpmath != callee_opts->fpmath)
3822 3822 ret = false;
3823 3823
3824 3824 else if (caller_opts->branch_cost != callee_opts->branch_cost)
3825 3825 ret = false;
3826 3826
3827 3827 else
3828 3828 ret = true;
3829 3829 }
3830 3830
3831 3831 return ret;
3832 3832 }
3833 3833
3834 3834
3835 3835 /* Remember the last target of ix86_set_current_function. */
3836 3836 static GTY(()) tree ix86_previous_fndecl;
3837 3837
3838 3838 /* Establish appropriate back-end context for processing the function
3839 3839 FNDECL. The argument might be NULL to indicate processing at top
3840 3840 level, outside of any function scope. */
3841 3841 static void
3842 3842 ix86_set_current_function (tree fndecl)
3843 3843 {
3844 3844 /* Only change the context if the function changes. This hook is called
3845 3845 several times in the course of compiling a function, and we don't want to
3846 3846 slow things down too much or call target_reinit when it isn't safe. */
3847 3847 if (fndecl && fndecl != ix86_previous_fndecl)
3848 3848 {
3849 3849 tree old_tree = (ix86_previous_fndecl
3850 3850 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl)
3851 3851 : NULL_TREE);
3852 3852
3853 3853 tree new_tree = (fndecl
3854 3854 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
3855 3855 : NULL_TREE);
3856 3856
3857 3857 ix86_previous_fndecl = fndecl;
3858 3858 if (old_tree == new_tree)
3859 3859 ;
3860 3860
3861 3861 else if (new_tree)
3862 3862 {
3863 3863 cl_target_option_restore (TREE_TARGET_OPTION (new_tree));
3864 3864 target_reinit ();
3865 3865 }
3866 3866
3867 3867 else if (old_tree)
3868 3868 {
3869 3869 struct cl_target_option *def
3870 3870 = TREE_TARGET_OPTION (target_option_current_node);
3871 3871
3872 3872 cl_target_option_restore (def);
3873 3873 target_reinit ();
3874 3874 }
3875 3875 }
3876 3876 }
3877 3877
3878 3878
3879 3879 /* Return true if this goes in large data/bss. */
3880 3880
3881 3881 static bool
3882 3882 ix86_in_large_data_p (tree exp)
3883 3883 {
3884 3884 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
3885 3885 return false;
3886 3886
3887 3887 /* Functions are never large data. */
3888 3888 if (TREE_CODE (exp) == FUNCTION_DECL)
3889 3889 return false;
3890 3890
3891 3891 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
3892 3892 {
3893 3893 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
3894 3894 if (strcmp (section, ".ldata") == 0
3895 3895 || strcmp (section, ".lbss") == 0)
3896 3896 return true;
3897 3897 return false;
3898 3898 }
3899 3899 else
3900 3900 {
3901 3901 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
3902 3902
3903 3903 /* If this is an incomplete type with size 0, then we can't put it
3904 3904 in data because it might be too big when completed. */
3905 3905 if (!size || size > ix86_section_threshold)
3906 3906 return true;
3907 3907 }
3908 3908
3909 3909 return false;
3910 3910 }
3911 3911
3912 3912 /* Switch to the appropriate section for output of DECL.
3913 3913 DECL is either a `VAR_DECL' node or a constant of some sort.
3914 3914 RELOC indicates whether forming the initial value of DECL requires
3915 3915 link-time relocations. */
3916 3916
3917 3917 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
3918 3918 ATTRIBUTE_UNUSED;
3919 3919
3920 3920 static section *
3921 3921 x86_64_elf_select_section (tree decl, int reloc,
3922 3922 unsigned HOST_WIDE_INT align)
3923 3923 {
3924 3924 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3925 3925 && ix86_in_large_data_p (decl))
3926 3926 {
3927 3927 const char *sname = NULL;
3928 3928 unsigned int flags = SECTION_WRITE;
3929 3929 switch (categorize_decl_for_section (decl, reloc))
3930 3930 {
3931 3931 case SECCAT_DATA:
3932 3932 sname = ".ldata";
3933 3933 break;
3934 3934 case SECCAT_DATA_REL:
3935 3935 sname = ".ldata.rel";
3936 3936 break;
3937 3937 case SECCAT_DATA_REL_LOCAL:
3938 3938 sname = ".ldata.rel.local";
3939 3939 break;
3940 3940 case SECCAT_DATA_REL_RO:
3941 3941 sname = ".ldata.rel.ro";
3942 3942 break;
3943 3943 case SECCAT_DATA_REL_RO_LOCAL:
3944 3944 sname = ".ldata.rel.ro.local";
3945 3945 break;
3946 3946 case SECCAT_BSS:
3947 3947 sname = ".lbss";
3948 3948 flags |= SECTION_BSS;
3949 3949 break;
3950 3950 case SECCAT_RODATA:
3951 3951 case SECCAT_RODATA_MERGE_STR:
3952 3952 case SECCAT_RODATA_MERGE_STR_INIT:
3953 3953 case SECCAT_RODATA_MERGE_CONST:
3954 3954 sname = ".lrodata";
3955 3955 flags = 0;
3956 3956 break;
3957 3957 case SECCAT_SRODATA:
3958 3958 case SECCAT_SDATA:
3959 3959 case SECCAT_SBSS:
3960 3960 gcc_unreachable ();
3961 3961 case SECCAT_TEXT:
3962 3962 case SECCAT_TDATA:
3963 3963 case SECCAT_TBSS:
3964 3964 /* We don't split these for medium model. Place them into
3965 3965 default sections and hope for best. */
3966 3966 break;
3967 3967 case SECCAT_EMUTLS_VAR:
3968 3968 case SECCAT_EMUTLS_TMPL:
3969 3969 gcc_unreachable ();
3970 3970 }
3971 3971 if (sname)
3972 3972 {
3973 3973 /* We might get called with string constants, but get_named_section
3974 3974 doesn't like them as they are not DECLs. Also, we need to set
3975 3975 flags in that case. */
3976 3976 if (!DECL_P (decl))
3977 3977 return get_section (sname, flags, NULL);
3978 3978 return get_named_section (decl, sname, reloc);
3979 3979 }
3980 3980 }
3981 3981 return default_elf_select_section (decl, reloc, align);
3982 3982 }
3983 3983
3984 3984 /* Build up a unique section name, expressed as a
3985 3985 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
3986 3986 RELOC indicates whether the initial value of EXP requires
3987 3987 link-time relocations. */
3988 3988
3989 3989 static void ATTRIBUTE_UNUSED
3990 3990 x86_64_elf_unique_section (tree decl, int reloc)
3991 3991 {
3992 3992 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
3993 3993 && ix86_in_large_data_p (decl))
3994 3994 {
3995 3995 const char *prefix = NULL;
3996 3996 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
3997 3997 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
3998 3998
3999 3999 switch (categorize_decl_for_section (decl, reloc))
4000 4000 {
4001 4001 case SECCAT_DATA:
4002 4002 case SECCAT_DATA_REL:
4003 4003 case SECCAT_DATA_REL_LOCAL:
4004 4004 case SECCAT_DATA_REL_RO:
4005 4005 case SECCAT_DATA_REL_RO_LOCAL:
4006 4006 prefix = one_only ? ".ld" : ".ldata";
4007 4007 break;
4008 4008 case SECCAT_BSS:
4009 4009 prefix = one_only ? ".lb" : ".lbss";
4010 4010 break;
4011 4011 case SECCAT_RODATA:
4012 4012 case SECCAT_RODATA_MERGE_STR:
4013 4013 case SECCAT_RODATA_MERGE_STR_INIT:
4014 4014 case SECCAT_RODATA_MERGE_CONST:
4015 4015 prefix = one_only ? ".lr" : ".lrodata";
4016 4016 break;
4017 4017 case SECCAT_SRODATA:
4018 4018 case SECCAT_SDATA:
4019 4019 case SECCAT_SBSS:
4020 4020 gcc_unreachable ();
4021 4021 case SECCAT_TEXT:
4022 4022 case SECCAT_TDATA:
4023 4023 case SECCAT_TBSS:
4024 4024 /* We don't split these for medium model. Place them into
4025 4025 default sections and hope for best. */
4026 4026 break;
4027 4027 case SECCAT_EMUTLS_VAR:
4028 4028 prefix = targetm.emutls.var_section;
4029 4029 break;
4030 4030 case SECCAT_EMUTLS_TMPL:
4031 4031 prefix = targetm.emutls.tmpl_section;
4032 4032 break;
4033 4033 }
4034 4034 if (prefix)
4035 4035 {
4036 4036 const char *name, *linkonce;
4037 4037 char *string;
4038 4038
4039 4039 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
4040 4040 name = targetm.strip_name_encoding (name);
4041 4041
4042 4042 /* If we're using one_only, then there needs to be a .gnu.linkonce
4043 4043 prefix to the section name. */
4044 4044 linkonce = one_only ? ".gnu.linkonce" : "";
4045 4045
4046 4046 string = ACONCAT ((linkonce, prefix, ".", name, NULL));
4047 4047
4048 4048 DECL_SECTION_NAME (decl) = build_string (strlen (string), string);
4049 4049 return;
4050 4050 }
4051 4051 }
4052 4052 default_unique_section (decl, reloc);
4053 4053 }
4054 4054
4055 4055 #ifdef COMMON_ASM_OP
4056 4056 /* This says how to output assembler code to declare an
4057 4057 uninitialized external linkage data object.
4058 4058
4059 4059 For medium model x86-64 we need to use .largecomm opcode for
4060 4060 large objects. */
4061 4061 void
4062 4062 x86_elf_aligned_common (FILE *file,
4063 4063 const char *name, unsigned HOST_WIDE_INT size,
4064 4064 int align)
4065 4065 {
4066 4066 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4067 4067 && size > (unsigned int)ix86_section_threshold)
4068 4068 fprintf (file, ".largecomm\t");
4069 4069 else
4070 4070 fprintf (file, "%s", COMMON_ASM_OP);
4071 4071 assemble_name (file, name);
4072 4072 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
4073 4073 size, align / BITS_PER_UNIT);
4074 4074 }
4075 4075 #endif
4076 4076
4077 4077 /* Utility function for targets to use in implementing
4078 4078 ASM_OUTPUT_ALIGNED_BSS. */
4079 4079
4080 4080 void
4081 4081 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
4082 4082 const char *name, unsigned HOST_WIDE_INT size,
4083 4083 int align)
4084 4084 {
4085 4085 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
4086 4086 && size > (unsigned int)ix86_section_threshold)
4087 4087 switch_to_section (get_named_section (decl, ".lbss", 0));
4088 4088 else
4089 4089 switch_to_section (bss_section);
4090 4090 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
4091 4091 #ifdef ASM_DECLARE_OBJECT_NAME
4092 4092 last_assemble_variable_decl = decl;
4093 4093 ASM_DECLARE_OBJECT_NAME (file, name, decl);
4094 4094 #else
4095 4095 /* Standard thing is just output label for the object. */
4096 4096 ASM_OUTPUT_LABEL (file, name);
4097 4097 #endif /* ASM_DECLARE_OBJECT_NAME */
4098 4098 ASM_OUTPUT_SKIP (file, size ? size : 1);
4099 4099 }
4100 4100
4101 4101 void
4102 4102 optimization_options (int level, int size ATTRIBUTE_UNUSED)
4103 4103 {
4104 4104 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
4105 4105 make the problem with not enough registers even worse. */
4106 4106 #ifdef INSN_SCHEDULING
4107 4107 if (level > 1)
4108 4108 flag_schedule_insns = 0;
4109 4109 #endif
4110 4110
4111 4111 if (TARGET_MACHO)
4112 4112 /* The Darwin libraries never set errno, so we might as well
4113 4113 avoid calling them when that's the only reason we would. */
4114 4114 flag_errno_math = 0;
4115 4115
4116 4116 /* The default values of these switches depend on the TARGET_64BIT
4117 4117 that is not known at this moment. Mark these values with 2 and
4118 4118 let user the to override these. In case there is no command line option
4119 4119 specifying them, we will set the defaults in override_options. */
4120 4120 if (optimize >= 1)
4121 4121 flag_omit_frame_pointer = 2;
4122 4122 flag_pcc_struct_return = 2;
4123 4123 flag_asynchronous_unwind_tables = 2;
4124 4124 flag_vect_cost_model = 1;
4125 4125 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
4126 4126 SUBTARGET_OPTIMIZATION_OPTIONS;
4127 4127 #endif
4128 4128 }
4129 4129
4130 4130 /* Decide whether we can make a sibling call to a function. DECL is the
4131 4131 declaration of the function being targeted by the call and EXP is the
4132 4132 CALL_EXPR representing the call. */
4133 4133
4134 4134 static bool
4135 4135 ix86_function_ok_for_sibcall (tree decl, tree exp)
4136 4136 {
4137 4137 tree type, decl_or_type;
4138 4138 rtx a, b;
4139 4139
4140 4140 /* If we are generating position-independent code, we cannot sibcall
4141 4141 optimize any indirect call, or a direct call to a global function,
4142 4142 as the PLT requires %ebx be live. */
4143 4143 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
4144 4144 return false;
4145 4145
4146 4146 /* If we need to align the outgoing stack, then sibcalling would
4147 4147 unalign the stack, which may break the called function. */
4148 4148 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY)
4149 4149 return false;
4150 4150
4151 4151 if (decl)
4152 4152 {
4153 4153 decl_or_type = decl;
4154 4154 type = TREE_TYPE (decl);
4155 4155 }
4156 4156 else
4157 4157 {
4158 4158 /* We're looking at the CALL_EXPR, we need the type of the function. */
4159 4159 type = CALL_EXPR_FN (exp); /* pointer expression */
4160 4160 type = TREE_TYPE (type); /* pointer type */
4161 4161 type = TREE_TYPE (type); /* function type */
4162 4162 decl_or_type = type;
4163 4163 }
4164 4164
4165 4165 /* Check that the return value locations are the same. Like
4166 4166 if we are returning floats on the 80387 register stack, we cannot
4167 4167 make a sibcall from a function that doesn't return a float to a
4168 4168 function that does or, conversely, from a function that does return
4169 4169 a float to a function that doesn't; the necessary stack adjustment
4170 4170 would not be executed. This is also the place we notice
4171 4171 differences in the return value ABI. Note that it is ok for one
4172 4172 of the functions to have void return type as long as the return
4173 4173 value of the other is passed in a register. */
4174 4174 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false);
4175 4175 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
4176 4176 cfun->decl, false);
4177 4177 if (STACK_REG_P (a) || STACK_REG_P (b))
4178 4178 {
4179 4179 if (!rtx_equal_p (a, b))
4180 4180 return false;
4181 4181 }
4182 4182 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
4183 4183 ;
4184 4184 else if (!rtx_equal_p (a, b))
4185 4185 return false;
4186 4186
4187 4187 if (TARGET_64BIT)
4188 4188 {
4189 4189 /* The SYSV ABI has more call-clobbered registers;
4190 4190 disallow sibcalls from MS to SYSV. */
4191 4191 if (cfun->machine->call_abi == MS_ABI
4192 4192 && ix86_function_type_abi (type) == SYSV_ABI)
4193 4193 return false;
4194 4194 }
4195 4195 else
4196 4196 {
4197 4197 /* If this call is indirect, we'll need to be able to use a
4198 4198 call-clobbered register for the address of the target function.
4199 4199 Make sure that all such registers are not used for passing
4200 4200 parameters. Note that DLLIMPORT functions are indirect. */
4201 4201 if (!decl
4202 4202 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl)))
4203 4203 {
4204 4204 if (ix86_function_regparm (type, NULL) >= 3)
4205 4205 {
4206 4206 /* ??? Need to count the actual number of registers to be used,
4207 4207 not the possible number of registers. Fix later. */
4208 4208 return false;
4209 4209 }
4210 4210 }
4211 4211 }
4212 4212
4213 4213 /* Otherwise okay. That also includes certain types of indirect calls. */
4214 4214 return true;
4215 4215 }
4216 4216
4217 4217 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
4218 4218 calling convention attributes;
4219 4219 arguments as in struct attribute_spec.handler. */
4220 4220
4221 4221 static tree
4222 4222 ix86_handle_cconv_attribute (tree *node, tree name,
4223 4223 tree args,
4224 4224 int flags ATTRIBUTE_UNUSED,
4225 4225 bool *no_add_attrs)
4226 4226 {
4227 4227 if (TREE_CODE (*node) != FUNCTION_TYPE
4228 4228 && TREE_CODE (*node) != METHOD_TYPE
4229 4229 && TREE_CODE (*node) != FIELD_DECL
4230 4230 && TREE_CODE (*node) != TYPE_DECL)
4231 4231 {
4232 4232 warning (OPT_Wattributes, "%qs attribute only applies to functions",
4233 4233 IDENTIFIER_POINTER (name));
4234 4234 *no_add_attrs = true;
4235 4235 return NULL_TREE;
4236 4236 }
4237 4237
4238 4238 /* Can combine regparm with all attributes but fastcall. */
4239 4239 if (is_attribute_p ("regparm", name))
4240 4240 {
4241 4241 tree cst;
4242 4242
4243 4243 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4244 4244 {
4245 4245 error ("fastcall and regparm attributes are not compatible");
4246 4246 }
4247 4247
4248 4248 cst = TREE_VALUE (args);
4249 4249 if (TREE_CODE (cst) != INTEGER_CST)
4250 4250 {
4251 4251 warning (OPT_Wattributes,
4252 4252 "%qs attribute requires an integer constant argument",
4253 4253 IDENTIFIER_POINTER (name));
4254 4254 *no_add_attrs = true;
4255 4255 }
4256 4256 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
4257 4257 {
4258 4258 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
4259 4259 IDENTIFIER_POINTER (name), REGPARM_MAX);
4260 4260 *no_add_attrs = true;
4261 4261 }
4262 4262
4263 4263 return NULL_TREE;
4264 4264 }
4265 4265
4266 4266 if (TARGET_64BIT)
4267 4267 {
4268 4268 /* Do not warn when emulating the MS ABI. */
4269 4269 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI)
4270 4270 warning (OPT_Wattributes, "%qs attribute ignored",
4271 4271 IDENTIFIER_POINTER (name));
4272 4272 *no_add_attrs = true;
4273 4273 return NULL_TREE;
4274 4274 }
4275 4275
4276 4276 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
4277 4277 if (is_attribute_p ("fastcall", name))
4278 4278 {
4279 4279 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4280 4280 {
4281 4281 error ("fastcall and cdecl attributes are not compatible");
4282 4282 }
4283 4283 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4284 4284 {
4285 4285 error ("fastcall and stdcall attributes are not compatible");
4286 4286 }
4287 4287 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
4288 4288 {
4289 4289 error ("fastcall and regparm attributes are not compatible");
4290 4290 }
4291 4291 }
4292 4292
4293 4293 /* Can combine stdcall with fastcall (redundant), regparm and
4294 4294 sseregparm. */
4295 4295 else if (is_attribute_p ("stdcall", name))
4296 4296 {
4297 4297 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
4298 4298 {
4299 4299 error ("stdcall and cdecl attributes are not compatible");
4300 4300 }
4301 4301 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4302 4302 {
4303 4303 error ("stdcall and fastcall attributes are not compatible");
4304 4304 }
4305 4305 }
4306 4306
4307 4307 /* Can combine cdecl with regparm and sseregparm. */
4308 4308 else if (is_attribute_p ("cdecl", name))
4309 4309 {
4310 4310 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
4311 4311 {
4312 4312 error ("stdcall and cdecl attributes are not compatible");
4313 4313 }
4314 4314 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
4315 4315 {
4316 4316 error ("fastcall and cdecl attributes are not compatible");
4317 4317 }
4318 4318 }
4319 4319
4320 4320 /* Can combine sseregparm with all attributes. */
4321 4321
4322 4322 return NULL_TREE;
4323 4323 }
4324 4324
4325 4325 /* Return 0 if the attributes for two types are incompatible, 1 if they
4326 4326 are compatible, and 2 if they are nearly compatible (which causes a
4327 4327 warning to be generated). */
4328 4328
4329 4329 static int
4330 4330 ix86_comp_type_attributes (const_tree type1, const_tree type2)
4331 4331 {
4332 4332 /* Check for mismatch of non-default calling convention. */
4333 4333 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
4334 4334
4335 4335 if (TREE_CODE (type1) != FUNCTION_TYPE
4336 4336 && TREE_CODE (type1) != METHOD_TYPE)
4337 4337 return 1;
4338 4338
4339 4339 /* Check for mismatched fastcall/regparm types. */
4340 4340 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
4341 4341 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
4342 4342 || (ix86_function_regparm (type1, NULL)
4343 4343 != ix86_function_regparm (type2, NULL)))
4344 4344 return 0;
4345 4345
4346 4346 /* Check for mismatched sseregparm types. */
4347 4347 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
4348 4348 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
4349 4349 return 0;
4350 4350
4351 4351 /* Check for mismatched return types (cdecl vs stdcall). */
4352 4352 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
4353 4353 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
4354 4354 return 0;
4355 4355
4356 4356 return 1;
4357 4357 }
4358 4358
4359 4359 /* Return the regparm value for a function with the indicated TYPE and DECL.
4360 4360 DECL may be NULL when calling function indirectly
4361 4361 or considering a libcall. */
4362 4362
4363 4363 static int
4364 4364 ix86_function_regparm (const_tree type, const_tree decl)
4365 4365 {
4366 4366 tree attr;
4367 4367 int regparm;
4368 4368
4369 4369 static bool error_issued;
4370 4370
4371 4371 if (TARGET_64BIT)
4372 4372 return (ix86_function_type_abi (type) == SYSV_ABI
4373 4373 ? X86_64_REGPARM_MAX : X64_REGPARM_MAX);
4374 4374
4375 4375 regparm = ix86_regparm;
4376 4376 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
4377 4377 if (attr)
4378 4378 {
4379 4379 regparm
4380 4380 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
4381 4381
4382 4382 if (decl && TREE_CODE (decl) == FUNCTION_DECL)
4383 4383 {
4384 4384 /* We can't use regparm(3) for nested functions because
4385 4385 these pass static chain pointer in %ecx register. */
4386 4386 if (!error_issued && regparm == 3
4387 4387 && decl_function_context (decl)
4388 4388 && !DECL_NO_STATIC_CHAIN (decl))
4389 4389 {
4390 4390 error ("nested functions are limited to 2 register parameters");
4391 4391 error_issued = true;
4392 4392 return 0;
4393 4393 }
4394 4394 }
4395 4395
↓ open down ↓ |
4395 lines elided |
↑ open up ↑ |
4396 4396 return regparm;
4397 4397 }
4398 4398
4399 4399 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
4400 4400 return 2;
4401 4401
4402 4402 /* Use register calling convention for local functions when possible. */
4403 4403 if (decl
4404 4404 && TREE_CODE (decl) == FUNCTION_DECL
4405 4405 && optimize
4406 + && (TARGET_64BIT || !flag_strict_calling_conventions)
4406 4407 && !profile_flag)
4407 4408 {
4408 4409 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4409 4410 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4410 4411 if (i && i->local)
4411 4412 {
4412 4413 int local_regparm, globals = 0, regno;
4413 4414 struct function *f;
4414 4415
4415 4416 /* Make sure no regparm register is taken by a
4416 4417 fixed register variable. */
4417 4418 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++)
4418 4419 if (fixed_regs[local_regparm])
4419 4420 break;
4420 4421
4421 4422 /* We can't use regparm(3) for nested functions as these use
4422 4423 static chain pointer in third argument. */
4423 4424 if (local_regparm == 3
4424 4425 && decl_function_context (decl)
4425 4426 && !DECL_NO_STATIC_CHAIN (decl))
4426 4427 local_regparm = 2;
4427 4428
4428 4429 /* If the function realigns its stackpointer, the prologue will
4429 4430 clobber %ecx. If we've already generated code for the callee,
4430 4431 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
4431 4432 scanning the attributes for the self-realigning property. */
4432 4433 f = DECL_STRUCT_FUNCTION (decl);
4433 4434 /* Since current internal arg pointer won't conflict with
4434 4435 parameter passing regs, so no need to change stack
4435 4436 realignment and adjust regparm number.
4436 4437
4437 4438 Each fixed register usage increases register pressure,
4438 4439 so less registers should be used for argument passing.
4439 4440 This functionality can be overriden by an explicit
4440 4441 regparm value. */
4441 4442 for (regno = 0; regno <= DI_REG; regno++)
4442 4443 if (fixed_regs[regno])
4443 4444 globals++;
4444 4445
4445 4446 local_regparm
4446 4447 = globals < local_regparm ? local_regparm - globals : 0;
4447 4448
4448 4449 if (local_regparm > regparm)
4449 4450 regparm = local_regparm;
4450 4451 }
4451 4452 }
4452 4453
4453 4454 return regparm;
4454 4455 }
4455 4456
4456 4457 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
4457 4458 DFmode (2) arguments in SSE registers for a function with the
4458 4459 indicated TYPE and DECL. DECL may be NULL when calling function
4459 4460 indirectly or considering a libcall. Otherwise return 0. */
4460 4461
4461 4462 static int
4462 4463 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn)
4463 4464 {
4464 4465 gcc_assert (!TARGET_64BIT);
4465 4466
4466 4467 /* Use SSE registers to pass SFmode and DFmode arguments if requested
4467 4468 by the sseregparm attribute. */
4468 4469 if (TARGET_SSEREGPARM
4469 4470 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
4470 4471 {
4471 4472 if (!TARGET_SSE)
4472 4473 {
4473 4474 if (warn)
4474 4475 {
4475 4476 if (decl)
4476 4477 error ("Calling %qD with attribute sseregparm without "
4477 4478 "SSE/SSE2 enabled", decl);
4478 4479 else
4479 4480 error ("Calling %qT with attribute sseregparm without "
↓ open down ↓ |
64 lines elided |
↑ open up ↑ |
4480 4481 "SSE/SSE2 enabled", type);
4481 4482 }
4482 4483 return 0;
4483 4484 }
4484 4485
4485 4486 return 2;
4486 4487 }
4487 4488
4488 4489 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
4489 4490 (and DFmode for SSE2) arguments in SSE registers. */
4490 - if (decl && TARGET_SSE_MATH && optimize && !profile_flag)
4491 + if (decl && TARGET_SSE_MATH && optimize && !profile_flag &&
4492 + (TARGET_64BIT || !flag_strict_calling_conventions))
4491 4493 {
4492 4494 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
4493 4495 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
4494 4496 if (i && i->local)
4495 4497 return TARGET_SSE2 ? 2 : 1;
4496 4498 }
4497 4499
4498 4500 return 0;
4499 4501 }
4500 4502
4501 4503 /* Return true if EAX is live at the start of the function. Used by
4502 4504 ix86_expand_prologue to determine if we need special help before
4503 4505 calling allocate_stack_worker. */
4504 4506
4505 4507 static bool
4506 4508 ix86_eax_live_at_start_p (void)
4507 4509 {
4508 4510 /* Cheat. Don't bother working forward from ix86_function_regparm
4509 4511 to the function type to whether an actual argument is located in
4510 4512 eax. Instead just look at cfg info, which is still close enough
4511 4513 to correct at this point. This gives false positives for broken
4512 4514 functions that might use uninitialized data that happens to be
4513 4515 allocated in eax, but who cares? */
4514 4516 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
4515 4517 }
4516 4518
4517 4519 /* Value is the number of bytes of arguments automatically
4518 4520 popped when returning from a subroutine call.
4519 4521 FUNDECL is the declaration node of the function (as a tree),
4520 4522 FUNTYPE is the data type of the function (as a tree),
4521 4523 or for a library call it is an identifier node for the subroutine name.
4522 4524 SIZE is the number of bytes of arguments passed on the stack.
4523 4525
4524 4526 On the 80386, the RTD insn may be used to pop them if the number
4525 4527 of args is fixed, but if the number is variable then the caller
4526 4528 must pop them all. RTD can't be used for library calls now
4527 4529 because the library is compiled with the Unix compiler.
4528 4530 Use of RTD is a selectable option, since it is incompatible with
4529 4531 standard Unix calling sequences. If the option is not selected,
4530 4532 the caller must always pop the args.
4531 4533
4532 4534 The attribute stdcall is equivalent to RTD on a per module basis. */
4533 4535
4534 4536 int
4535 4537 ix86_return_pops_args (tree fundecl, tree funtype, int size)
4536 4538 {
4537 4539 int rtd;
4538 4540
4539 4541 /* None of the 64-bit ABIs pop arguments. */
4540 4542 if (TARGET_64BIT)
4541 4543 return 0;
4542 4544
4543 4545 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
4544 4546
4545 4547 /* Cdecl functions override -mrtd, and never pop the stack. */
4546 4548 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
4547 4549 {
4548 4550 /* Stdcall and fastcall functions will pop the stack if not
4549 4551 variable args. */
4550 4552 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
4551 4553 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
4552 4554 rtd = 1;
4553 4555
4554 4556 if (rtd && ! stdarg_p (funtype))
4555 4557 return size;
4556 4558 }
4557 4559
4558 4560 /* Lose any fake structure return argument if it is passed on the stack. */
4559 4561 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
4560 4562 && !KEEP_AGGREGATE_RETURN_POINTER)
4561 4563 {
4562 4564 int nregs = ix86_function_regparm (funtype, fundecl);
4563 4565 if (nregs == 0)
4564 4566 return GET_MODE_SIZE (Pmode);
4565 4567 }
4566 4568
4567 4569 return 0;
4568 4570 }
4569 4571
4570 4572 /* Argument support functions. */
4571 4573
4572 4574 /* Return true when register may be used to pass function parameters. */
4573 4575 bool
4574 4576 ix86_function_arg_regno_p (int regno)
4575 4577 {
4576 4578 int i;
4577 4579 const int *parm_regs;
4578 4580
4579 4581 if (!TARGET_64BIT)
4580 4582 {
4581 4583 if (TARGET_MACHO)
4582 4584 return (regno < REGPARM_MAX
4583 4585 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
4584 4586 else
4585 4587 return (regno < REGPARM_MAX
4586 4588 || (TARGET_MMX && MMX_REGNO_P (regno)
4587 4589 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
4588 4590 || (TARGET_SSE && SSE_REGNO_P (regno)
4589 4591 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
4590 4592 }
4591 4593
4592 4594 if (TARGET_MACHO)
4593 4595 {
4594 4596 if (SSE_REGNO_P (regno) && TARGET_SSE)
4595 4597 return true;
4596 4598 }
4597 4599 else
4598 4600 {
4599 4601 if (TARGET_SSE && SSE_REGNO_P (regno)
4600 4602 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
4601 4603 return true;
4602 4604 }
4603 4605
4604 4606 /* TODO: The function should depend on current function ABI but
4605 4607 builtins.c would need updating then. Therefore we use the
4606 4608 default ABI. */
4607 4609
4608 4610 /* RAX is used as hidden argument to va_arg functions. */
4609 4611 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG)
4610 4612 return true;
4611 4613
4612 4614 if (DEFAULT_ABI == MS_ABI)
4613 4615 parm_regs = x86_64_ms_abi_int_parameter_registers;
4614 4616 else
4615 4617 parm_regs = x86_64_int_parameter_registers;
4616 4618 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX
4617 4619 : X86_64_REGPARM_MAX); i++)
4618 4620 if (regno == parm_regs[i])
4619 4621 return true;
4620 4622 return false;
4621 4623 }
4622 4624
4623 4625 /* Return if we do not know how to pass TYPE solely in registers. */
4624 4626
4625 4627 static bool
4626 4628 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
4627 4629 {
4628 4630 if (must_pass_in_stack_var_size_or_pad (mode, type))
4629 4631 return true;
4630 4632
4631 4633 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
4632 4634 The layout_type routine is crafty and tries to trick us into passing
4633 4635 currently unsupported vector types on the stack by using TImode. */
4634 4636 return (!TARGET_64BIT && mode == TImode
4635 4637 && type && TREE_CODE (type) != VECTOR_TYPE);
4636 4638 }
4637 4639
4638 4640 /* It returns the size, in bytes, of the area reserved for arguments passed
4639 4641 in registers for the function represented by fndecl dependent to the used
4640 4642 abi format. */
4641 4643 int
4642 4644 ix86_reg_parm_stack_space (const_tree fndecl)
4643 4645 {
4644 4646 int call_abi = SYSV_ABI;
4645 4647 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL)
4646 4648 call_abi = ix86_function_abi (fndecl);
4647 4649 else
4648 4650 call_abi = ix86_function_type_abi (fndecl);
4649 4651 if (call_abi == MS_ABI)
4650 4652 return 32;
4651 4653 return 0;
4652 4654 }
4653 4655
4654 4656 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the
4655 4657 call abi used. */
4656 4658 int
4657 4659 ix86_function_type_abi (const_tree fntype)
4658 4660 {
4659 4661 if (TARGET_64BIT && fntype != NULL)
4660 4662 {
4661 4663 int abi;
4662 4664 if (DEFAULT_ABI == SYSV_ABI)
4663 4665 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI;
4664 4666 else
4665 4667 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI;
4666 4668
4667 4669 return abi;
4668 4670 }
4669 4671 return DEFAULT_ABI;
4670 4672 }
4671 4673
4672 4674 int
4673 4675 ix86_function_abi (const_tree fndecl)
4674 4676 {
4675 4677 if (! fndecl)
4676 4678 return DEFAULT_ABI;
4677 4679 return ix86_function_type_abi (TREE_TYPE (fndecl));
4678 4680 }
4679 4681
4680 4682 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the
4681 4683 call abi used. */
4682 4684 int
4683 4685 ix86_cfun_abi (void)
4684 4686 {
4685 4687 if (! cfun || ! TARGET_64BIT)
4686 4688 return DEFAULT_ABI;
4687 4689 return cfun->machine->call_abi;
4688 4690 }
4689 4691
4690 4692 /* regclass.c */
4691 4693 extern void init_regs (void);
4692 4694
4693 4695 /* Implementation of call abi switching target hook. Specific to FNDECL
4694 4696 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE
4695 4697 for more details. */
4696 4698 void
4697 4699 ix86_call_abi_override (const_tree fndecl)
4698 4700 {
4699 4701 if (fndecl == NULL_TREE)
4700 4702 cfun->machine->call_abi = DEFAULT_ABI;
4701 4703 else
4702 4704 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl));
4703 4705 }
4704 4706
4705 4707 /* MS and SYSV ABI have different set of call used registers. Avoid expensive
4706 4708 re-initialization of init_regs each time we switch function context since
4707 4709 this is needed only during RTL expansion. */
4708 4710 static void
4709 4711 ix86_maybe_switch_abi (void)
4710 4712 {
4711 4713 if (TARGET_64BIT &&
4712 4714 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI))
4713 4715 reinit_regs ();
4714 4716 }
4715 4717
4716 4718 /* Initialize a variable CUM of type CUMULATIVE_ARGS
4717 4719 for a call to a function whose data type is FNTYPE.
4718 4720 For a library call, FNTYPE is 0. */
4719 4721
4720 4722 void
4721 4723 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
4722 4724 tree fntype, /* tree ptr for function decl */
4723 4725 rtx libname, /* SYMBOL_REF of library name or 0 */
4724 4726 tree fndecl)
4725 4727 {
4726 4728 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL;
4727 4729 memset (cum, 0, sizeof (*cum));
4728 4730
4729 4731 if (fndecl)
4730 4732 cum->call_abi = ix86_function_abi (fndecl);
4731 4733 else
4732 4734 cum->call_abi = ix86_function_type_abi (fntype);
4733 4735 /* Set up the number of registers to use for passing arguments. */
4734 4736
4735 4737 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS)
4736 4738 sorry ("ms_abi attribute requires -maccumulate-outgoing-args "
4737 4739 "or subtarget optimization implying it");
4738 4740 cum->nregs = ix86_regparm;
4739 4741 if (TARGET_64BIT)
4740 4742 {
4741 4743 if (cum->call_abi != DEFAULT_ABI)
4742 4744 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX
4743 4745 : X64_REGPARM_MAX;
4744 4746 }
4745 4747 if (TARGET_SSE)
4746 4748 {
4747 4749 cum->sse_nregs = SSE_REGPARM_MAX;
4748 4750 if (TARGET_64BIT)
4749 4751 {
4750 4752 if (cum->call_abi != DEFAULT_ABI)
4751 4753 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
4752 4754 : X64_SSE_REGPARM_MAX;
4753 4755 }
4754 4756 }
4755 4757 if (TARGET_MMX)
4756 4758 cum->mmx_nregs = MMX_REGPARM_MAX;
4757 4759 cum->warn_avx = true;
4758 4760 cum->warn_sse = true;
4759 4761 cum->warn_mmx = true;
4760 4762
4761 4763 /* Because type might mismatch in between caller and callee, we need to
4762 4764 use actual type of function for local calls.
4763 4765 FIXME: cgraph_analyze can be told to actually record if function uses
4764 4766 va_start so for local functions maybe_vaarg can be made aggressive
4765 4767 helping K&R code.
4766 4768 FIXME: once typesytem is fixed, we won't need this code anymore. */
4767 4769 if (i && i->local)
4768 4770 fntype = TREE_TYPE (fndecl);
4769 4771 cum->maybe_vaarg = (fntype
4770 4772 ? (!prototype_p (fntype) || stdarg_p (fntype))
4771 4773 : !libname);
4772 4774
4773 4775 if (!TARGET_64BIT)
4774 4776 {
4775 4777 /* If there are variable arguments, then we won't pass anything
4776 4778 in registers in 32-bit mode. */
4777 4779 if (stdarg_p (fntype))
4778 4780 {
4779 4781 cum->nregs = 0;
4780 4782 cum->sse_nregs = 0;
4781 4783 cum->mmx_nregs = 0;
4782 4784 cum->warn_avx = 0;
4783 4785 cum->warn_sse = 0;
4784 4786 cum->warn_mmx = 0;
4785 4787 return;
4786 4788 }
4787 4789
4788 4790 /* Use ecx and edx registers if function has fastcall attribute,
4789 4791 else look for regparm information. */
4790 4792 if (fntype)
4791 4793 {
4792 4794 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
4793 4795 {
4794 4796 cum->nregs = 2;
4795 4797 cum->fastcall = 1;
4796 4798 }
4797 4799 else
4798 4800 cum->nregs = ix86_function_regparm (fntype, fndecl);
4799 4801 }
4800 4802
4801 4803 /* Set up the number of SSE registers used for passing SFmode
4802 4804 and DFmode arguments. Warn for mismatching ABI. */
4803 4805 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true);
4804 4806 }
4805 4807 }
4806 4808
4807 4809 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
4808 4810 But in the case of vector types, it is some vector mode.
4809 4811
4810 4812 When we have only some of our vector isa extensions enabled, then there
4811 4813 are some modes for which vector_mode_supported_p is false. For these
4812 4814 modes, the generic vector support in gcc will choose some non-vector mode
4813 4815 in order to implement the type. By computing the natural mode, we'll
4814 4816 select the proper ABI location for the operand and not depend on whatever
4815 4817 the middle-end decides to do with these vector types.
4816 4818
4817 4819 The midde-end can't deal with the vector types > 16 bytes. In this
4818 4820 case, we return the original mode and warn ABI change if CUM isn't
4819 4821 NULL. */
4820 4822
4821 4823 static enum machine_mode
4822 4824 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum)
4823 4825 {
4824 4826 enum machine_mode mode = TYPE_MODE (type);
4825 4827
4826 4828 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
4827 4829 {
4828 4830 HOST_WIDE_INT size = int_size_in_bytes (type);
4829 4831 if ((size == 8 || size == 16 || size == 32)
4830 4832 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
4831 4833 && TYPE_VECTOR_SUBPARTS (type) > 1)
4832 4834 {
4833 4835 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
4834 4836
4835 4837 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
4836 4838 mode = MIN_MODE_VECTOR_FLOAT;
4837 4839 else
4838 4840 mode = MIN_MODE_VECTOR_INT;
4839 4841
4840 4842 /* Get the mode which has this inner mode and number of units. */
4841 4843 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
4842 4844 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
4843 4845 && GET_MODE_INNER (mode) == innermode)
4844 4846 {
4845 4847 if (size == 32 && !TARGET_AVX)
4846 4848 {
4847 4849 static bool warnedavx;
4848 4850
4849 4851 if (cum
4850 4852 && !warnedavx
4851 4853 && cum->warn_avx)
4852 4854 {
4853 4855 warnedavx = true;
4854 4856 warning (0, "AVX vector argument without AVX "
4855 4857 "enabled changes the ABI");
4856 4858 }
4857 4859 return TYPE_MODE (type);
4858 4860 }
4859 4861 else
4860 4862 return mode;
4861 4863 }
4862 4864
4863 4865 gcc_unreachable ();
4864 4866 }
4865 4867 }
4866 4868
4867 4869 return mode;
4868 4870 }
4869 4871
4870 4872 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
4871 4873 this may not agree with the mode that the type system has chosen for the
4872 4874 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
4873 4875 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
4874 4876
4875 4877 static rtx
4876 4878 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
4877 4879 unsigned int regno)
4878 4880 {
4879 4881 rtx tmp;
4880 4882
4881 4883 if (orig_mode != BLKmode)
4882 4884 tmp = gen_rtx_REG (orig_mode, regno);
4883 4885 else
4884 4886 {
4885 4887 tmp = gen_rtx_REG (mode, regno);
4886 4888 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
4887 4889 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
4888 4890 }
4889 4891
4890 4892 return tmp;
4891 4893 }
4892 4894
4893 4895 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
4894 4896 of this code is to classify each 8bytes of incoming argument by the register
4895 4897 class and assign registers accordingly. */
4896 4898
4897 4899 /* Return the union class of CLASS1 and CLASS2.
4898 4900 See the x86-64 PS ABI for details. */
4899 4901
4900 4902 static enum x86_64_reg_class
4901 4903 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
4902 4904 {
4903 4905 /* Rule #1: If both classes are equal, this is the resulting class. */
4904 4906 if (class1 == class2)
4905 4907 return class1;
4906 4908
4907 4909 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
4908 4910 the other class. */
4909 4911 if (class1 == X86_64_NO_CLASS)
4910 4912 return class2;
4911 4913 if (class2 == X86_64_NO_CLASS)
4912 4914 return class1;
4913 4915
4914 4916 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
4915 4917 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
4916 4918 return X86_64_MEMORY_CLASS;
4917 4919
4918 4920 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
4919 4921 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
4920 4922 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
4921 4923 return X86_64_INTEGERSI_CLASS;
4922 4924 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
4923 4925 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
4924 4926 return X86_64_INTEGER_CLASS;
4925 4927
4926 4928 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
4927 4929 MEMORY is used. */
4928 4930 if (class1 == X86_64_X87_CLASS
4929 4931 || class1 == X86_64_X87UP_CLASS
4930 4932 || class1 == X86_64_COMPLEX_X87_CLASS
4931 4933 || class2 == X86_64_X87_CLASS
4932 4934 || class2 == X86_64_X87UP_CLASS
4933 4935 || class2 == X86_64_COMPLEX_X87_CLASS)
4934 4936 return X86_64_MEMORY_CLASS;
4935 4937
4936 4938 /* Rule #6: Otherwise class SSE is used. */
4937 4939 return X86_64_SSE_CLASS;
4938 4940 }
4939 4941
4940 4942 /* Classify the argument of type TYPE and mode MODE.
4941 4943 CLASSES will be filled by the register class used to pass each word
4942 4944 of the operand. The number of words is returned. In case the parameter
4943 4945 should be passed in memory, 0 is returned. As a special case for zero
4944 4946 sized containers, classes[0] will be NO_CLASS and 1 is returned.
4945 4947
4946 4948 BIT_OFFSET is used internally for handling records and specifies offset
4947 4949 of the offset in bits modulo 256 to avoid overflow cases.
4948 4950
4949 4951 See the x86-64 PS ABI for details.
4950 4952 */
4951 4953
4952 4954 static int
4953 4955 classify_argument (enum machine_mode mode, const_tree type,
4954 4956 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
4955 4957 {
4956 4958 HOST_WIDE_INT bytes =
4957 4959 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4958 4960 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4959 4961
4960 4962 /* Variable sized entities are always passed/returned in memory. */
4961 4963 if (bytes < 0)
4962 4964 return 0;
4963 4965
4964 4966 if (mode != VOIDmode
4965 4967 && targetm.calls.must_pass_in_stack (mode, type))
4966 4968 return 0;
4967 4969
4968 4970 if (type && AGGREGATE_TYPE_P (type))
4969 4971 {
4970 4972 int i;
4971 4973 tree field;
4972 4974 enum x86_64_reg_class subclasses[MAX_CLASSES];
4973 4975
4974 4976 /* On x86-64 we pass structures larger than 32 bytes on the stack. */
4975 4977 if (bytes > 32)
4976 4978 return 0;
4977 4979
4978 4980 for (i = 0; i < words; i++)
4979 4981 classes[i] = X86_64_NO_CLASS;
4980 4982
4981 4983 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
4982 4984 signalize memory class, so handle it as special case. */
4983 4985 if (!words)
4984 4986 {
4985 4987 classes[0] = X86_64_NO_CLASS;
4986 4988 return 1;
4987 4989 }
4988 4990
4989 4991 /* Classify each field of record and merge classes. */
4990 4992 switch (TREE_CODE (type))
4991 4993 {
4992 4994 case RECORD_TYPE:
4993 4995 /* And now merge the fields of structure. */
4994 4996 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4995 4997 {
4996 4998 if (TREE_CODE (field) == FIELD_DECL)
4997 4999 {
4998 5000 int num;
4999 5001
5000 5002 if (TREE_TYPE (field) == error_mark_node)
5001 5003 continue;
5002 5004
5003 5005 /* Bitfields are always classified as integer. Handle them
5004 5006 early, since later code would consider them to be
5005 5007 misaligned integers. */
5006 5008 if (DECL_BIT_FIELD (field))
5007 5009 {
5008 5010 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5009 5011 i < ((int_bit_position (field) + (bit_offset % 64))
5010 5012 + tree_low_cst (DECL_SIZE (field), 0)
5011 5013 + 63) / 8 / 8; i++)
5012 5014 classes[i] =
5013 5015 merge_classes (X86_64_INTEGER_CLASS,
5014 5016 classes[i]);
5015 5017 }
5016 5018 else
5017 5019 {
5018 5020 type = TREE_TYPE (field);
5019 5021
5020 5022 /* Flexible array member is ignored. */
5021 5023 if (TYPE_MODE (type) == BLKmode
5022 5024 && TREE_CODE (type) == ARRAY_TYPE
5023 5025 && TYPE_SIZE (type) == NULL_TREE
5024 5026 && TYPE_DOMAIN (type) != NULL_TREE
5025 5027 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type))
5026 5028 == NULL_TREE))
5027 5029 {
5028 5030 static bool warned;
5029 5031
5030 5032 if (!warned && warn_psabi)
5031 5033 {
5032 5034 warned = true;
5033 5035 inform (input_location,
5034 5036 "The ABI of passing struct with"
5035 5037 " a flexible array member has"
5036 5038 " changed in GCC 4.4");
5037 5039 }
5038 5040 continue;
5039 5041 }
5040 5042 num = classify_argument (TYPE_MODE (type), type,
5041 5043 subclasses,
5042 5044 (int_bit_position (field)
5043 5045 + bit_offset) % 256);
5044 5046 if (!num)
5045 5047 return 0;
5046 5048 for (i = 0; i < num; i++)
5047 5049 {
5048 5050 int pos =
5049 5051 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
5050 5052 classes[i + pos] =
5051 5053 merge_classes (subclasses[i], classes[i + pos]);
5052 5054 }
5053 5055 }
5054 5056 }
5055 5057 }
5056 5058 break;
5057 5059
5058 5060 case ARRAY_TYPE:
5059 5061 /* Arrays are handled as small records. */
5060 5062 {
5061 5063 int num;
5062 5064 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
5063 5065 TREE_TYPE (type), subclasses, bit_offset);
5064 5066 if (!num)
5065 5067 return 0;
5066 5068
5067 5069 /* The partial classes are now full classes. */
5068 5070 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
5069 5071 subclasses[0] = X86_64_SSE_CLASS;
5070 5072 if (subclasses[0] == X86_64_INTEGERSI_CLASS
5071 5073 && !((bit_offset % 64) == 0 && bytes == 4))
5072 5074 subclasses[0] = X86_64_INTEGER_CLASS;
5073 5075
5074 5076 for (i = 0; i < words; i++)
5075 5077 classes[i] = subclasses[i % num];
5076 5078
5077 5079 break;
5078 5080 }
5079 5081 case UNION_TYPE:
5080 5082 case QUAL_UNION_TYPE:
5081 5083 /* Unions are similar to RECORD_TYPE but offset is always 0.
5082 5084 */
5083 5085 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
5084 5086 {
5085 5087 if (TREE_CODE (field) == FIELD_DECL)
5086 5088 {
5087 5089 int num;
5088 5090
5089 5091 if (TREE_TYPE (field) == error_mark_node)
5090 5092 continue;
5091 5093
5092 5094 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
5093 5095 TREE_TYPE (field), subclasses,
5094 5096 bit_offset);
5095 5097 if (!num)
5096 5098 return 0;
5097 5099 for (i = 0; i < num; i++)
5098 5100 classes[i] = merge_classes (subclasses[i], classes[i]);
5099 5101 }
5100 5102 }
5101 5103 break;
5102 5104
5103 5105 default:
5104 5106 gcc_unreachable ();
5105 5107 }
5106 5108
5107 5109 if (words > 2)
5108 5110 {
5109 5111 /* When size > 16 bytes, if the first one isn't
5110 5112 X86_64_SSE_CLASS or any other ones aren't
5111 5113 X86_64_SSEUP_CLASS, everything should be passed in
5112 5114 memory. */
5113 5115 if (classes[0] != X86_64_SSE_CLASS)
5114 5116 return 0;
5115 5117
5116 5118 for (i = 1; i < words; i++)
5117 5119 if (classes[i] != X86_64_SSEUP_CLASS)
5118 5120 return 0;
5119 5121 }
5120 5122
5121 5123 /* Final merger cleanup. */
5122 5124 for (i = 0; i < words; i++)
5123 5125 {
5124 5126 /* If one class is MEMORY, everything should be passed in
5125 5127 memory. */
5126 5128 if (classes[i] == X86_64_MEMORY_CLASS)
5127 5129 return 0;
5128 5130
5129 5131 /* The X86_64_SSEUP_CLASS should be always preceded by
5130 5132 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */
5131 5133 if (classes[i] == X86_64_SSEUP_CLASS
5132 5134 && classes[i - 1] != X86_64_SSE_CLASS
5133 5135 && classes[i - 1] != X86_64_SSEUP_CLASS)
5134 5136 {
5135 5137 /* The first one should never be X86_64_SSEUP_CLASS. */
5136 5138 gcc_assert (i != 0);
5137 5139 classes[i] = X86_64_SSE_CLASS;
5138 5140 }
5139 5141
5140 5142 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS,
5141 5143 everything should be passed in memory. */
5142 5144 if (classes[i] == X86_64_X87UP_CLASS
5143 5145 && (classes[i - 1] != X86_64_X87_CLASS))
5144 5146 {
5145 5147 static bool warned;
5146 5148
5147 5149 /* The first one should never be X86_64_X87UP_CLASS. */
5148 5150 gcc_assert (i != 0);
5149 5151 if (!warned && warn_psabi)
5150 5152 {
5151 5153 warned = true;
5152 5154 inform (input_location,
5153 5155 "The ABI of passing union with long double"
5154 5156 " has changed in GCC 4.4");
5155 5157 }
5156 5158 return 0;
5157 5159 }
5158 5160 }
5159 5161 return words;
5160 5162 }
5161 5163
5162 5164 /* Compute alignment needed. We align all types to natural boundaries with
5163 5165 exception of XFmode that is aligned to 64bits. */
5164 5166 if (mode != VOIDmode && mode != BLKmode)
5165 5167 {
5166 5168 int mode_alignment = GET_MODE_BITSIZE (mode);
5167 5169
5168 5170 if (mode == XFmode)
5169 5171 mode_alignment = 128;
5170 5172 else if (mode == XCmode)
5171 5173 mode_alignment = 256;
5172 5174 if (COMPLEX_MODE_P (mode))
5173 5175 mode_alignment /= 2;
5174 5176 /* Misaligned fields are always returned in memory. */
5175 5177 if (bit_offset % mode_alignment)
5176 5178 return 0;
5177 5179 }
5178 5180
5179 5181 /* for V1xx modes, just use the base mode */
5180 5182 if (VECTOR_MODE_P (mode) && mode != V1DImode
5181 5183 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
5182 5184 mode = GET_MODE_INNER (mode);
5183 5185
5184 5186 /* Classification of atomic types. */
5185 5187 switch (mode)
5186 5188 {
5187 5189 case SDmode:
5188 5190 case DDmode:
5189 5191 classes[0] = X86_64_SSE_CLASS;
5190 5192 return 1;
5191 5193 case TDmode:
5192 5194 classes[0] = X86_64_SSE_CLASS;
5193 5195 classes[1] = X86_64_SSEUP_CLASS;
5194 5196 return 2;
5195 5197 case DImode:
5196 5198 case SImode:
5197 5199 case HImode:
5198 5200 case QImode:
5199 5201 case CSImode:
5200 5202 case CHImode:
5201 5203 case CQImode:
5202 5204 {
5203 5205 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode);
5204 5206
5205 5207 if (size <= 32)
5206 5208 {
5207 5209 classes[0] = X86_64_INTEGERSI_CLASS;
5208 5210 return 1;
5209 5211 }
5210 5212 else if (size <= 64)
5211 5213 {
5212 5214 classes[0] = X86_64_INTEGER_CLASS;
5213 5215 return 1;
5214 5216 }
5215 5217 else if (size <= 64+32)
5216 5218 {
5217 5219 classes[0] = X86_64_INTEGER_CLASS;
5218 5220 classes[1] = X86_64_INTEGERSI_CLASS;
5219 5221 return 2;
5220 5222 }
5221 5223 else if (size <= 64+64)
5222 5224 {
5223 5225 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5224 5226 return 2;
5225 5227 }
5226 5228 else
5227 5229 gcc_unreachable ();
5228 5230 }
5229 5231 case CDImode:
5230 5232 case TImode:
5231 5233 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
5232 5234 return 2;
5233 5235 case COImode:
5234 5236 case OImode:
5235 5237 /* OImode shouldn't be used directly. */
5236 5238 gcc_unreachable ();
5237 5239 case CTImode:
5238 5240 return 0;
5239 5241 case SFmode:
5240 5242 if (!(bit_offset % 64))
5241 5243 classes[0] = X86_64_SSESF_CLASS;
5242 5244 else
5243 5245 classes[0] = X86_64_SSE_CLASS;
5244 5246 return 1;
5245 5247 case DFmode:
5246 5248 classes[0] = X86_64_SSEDF_CLASS;
5247 5249 return 1;
5248 5250 case XFmode:
5249 5251 classes[0] = X86_64_X87_CLASS;
5250 5252 classes[1] = X86_64_X87UP_CLASS;
5251 5253 return 2;
5252 5254 case TFmode:
5253 5255 classes[0] = X86_64_SSE_CLASS;
5254 5256 classes[1] = X86_64_SSEUP_CLASS;
5255 5257 return 2;
5256 5258 case SCmode:
5257 5259 classes[0] = X86_64_SSE_CLASS;
5258 5260 if (!(bit_offset % 64))
5259 5261 return 1;
5260 5262 else
5261 5263 {
5262 5264 static bool warned;
5263 5265
5264 5266 if (!warned && warn_psabi)
5265 5267 {
5266 5268 warned = true;
5267 5269 inform (input_location,
5268 5270 "The ABI of passing structure with complex float"
5269 5271 " member has changed in GCC 4.4");
5270 5272 }
5271 5273 classes[1] = X86_64_SSESF_CLASS;
5272 5274 return 2;
5273 5275 }
5274 5276 case DCmode:
5275 5277 classes[0] = X86_64_SSEDF_CLASS;
5276 5278 classes[1] = X86_64_SSEDF_CLASS;
5277 5279 return 2;
5278 5280 case XCmode:
5279 5281 classes[0] = X86_64_COMPLEX_X87_CLASS;
5280 5282 return 1;
5281 5283 case TCmode:
5282 5284 /* This modes is larger than 16 bytes. */
5283 5285 return 0;
5284 5286 case V8SFmode:
5285 5287 case V8SImode:
5286 5288 case V32QImode:
5287 5289 case V16HImode:
5288 5290 case V4DFmode:
5289 5291 case V4DImode:
5290 5292 classes[0] = X86_64_SSE_CLASS;
5291 5293 classes[1] = X86_64_SSEUP_CLASS;
5292 5294 classes[2] = X86_64_SSEUP_CLASS;
5293 5295 classes[3] = X86_64_SSEUP_CLASS;
5294 5296 return 4;
5295 5297 case V4SFmode:
5296 5298 case V4SImode:
5297 5299 case V16QImode:
5298 5300 case V8HImode:
5299 5301 case V2DFmode:
5300 5302 case V2DImode:
5301 5303 classes[0] = X86_64_SSE_CLASS;
5302 5304 classes[1] = X86_64_SSEUP_CLASS;
5303 5305 return 2;
5304 5306 case V1DImode:
5305 5307 case V2SFmode:
5306 5308 case V2SImode:
5307 5309 case V4HImode:
5308 5310 case V8QImode:
5309 5311 classes[0] = X86_64_SSE_CLASS;
5310 5312 return 1;
5311 5313 case BLKmode:
5312 5314 case VOIDmode:
5313 5315 return 0;
5314 5316 default:
5315 5317 gcc_assert (VECTOR_MODE_P (mode));
5316 5318
5317 5319 if (bytes > 16)
5318 5320 return 0;
5319 5321
5320 5322 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
5321 5323
5322 5324 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
5323 5325 classes[0] = X86_64_INTEGERSI_CLASS;
5324 5326 else
5325 5327 classes[0] = X86_64_INTEGER_CLASS;
5326 5328 classes[1] = X86_64_INTEGER_CLASS;
5327 5329 return 1 + (bytes > 8);
5328 5330 }
5329 5331 }
5330 5332
5331 5333 /* Examine the argument and return set number of register required in each
5332 5334 class. Return 0 iff parameter should be passed in memory. */
5333 5335 static int
5334 5336 examine_argument (enum machine_mode mode, const_tree type, int in_return,
5335 5337 int *int_nregs, int *sse_nregs)
5336 5338 {
5337 5339 enum x86_64_reg_class regclass[MAX_CLASSES];
5338 5340 int n = classify_argument (mode, type, regclass, 0);
5339 5341
5340 5342 *int_nregs = 0;
5341 5343 *sse_nregs = 0;
5342 5344 if (!n)
5343 5345 return 0;
5344 5346 for (n--; n >= 0; n--)
5345 5347 switch (regclass[n])
5346 5348 {
5347 5349 case X86_64_INTEGER_CLASS:
5348 5350 case X86_64_INTEGERSI_CLASS:
5349 5351 (*int_nregs)++;
5350 5352 break;
5351 5353 case X86_64_SSE_CLASS:
5352 5354 case X86_64_SSESF_CLASS:
5353 5355 case X86_64_SSEDF_CLASS:
5354 5356 (*sse_nregs)++;
5355 5357 break;
5356 5358 case X86_64_NO_CLASS:
5357 5359 case X86_64_SSEUP_CLASS:
5358 5360 break;
5359 5361 case X86_64_X87_CLASS:
5360 5362 case X86_64_X87UP_CLASS:
5361 5363 if (!in_return)
5362 5364 return 0;
5363 5365 break;
5364 5366 case X86_64_COMPLEX_X87_CLASS:
5365 5367 return in_return ? 2 : 0;
5366 5368 case X86_64_MEMORY_CLASS:
5367 5369 gcc_unreachable ();
5368 5370 }
5369 5371 return 1;
5370 5372 }
5371 5373
5372 5374 /* Construct container for the argument used by GCC interface. See
5373 5375 FUNCTION_ARG for the detailed description. */
5374 5376
5375 5377 static rtx
5376 5378 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
5377 5379 const_tree type, int in_return, int nintregs, int nsseregs,
5378 5380 const int *intreg, int sse_regno)
5379 5381 {
5380 5382 /* The following variables hold the static issued_error state. */
5381 5383 static bool issued_sse_arg_error;
5382 5384 static bool issued_sse_ret_error;
5383 5385 static bool issued_x87_ret_error;
5384 5386
5385 5387 enum machine_mode tmpmode;
5386 5388 int bytes =
5387 5389 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
5388 5390 enum x86_64_reg_class regclass[MAX_CLASSES];
5389 5391 int n;
5390 5392 int i;
5391 5393 int nexps = 0;
5392 5394 int needed_sseregs, needed_intregs;
5393 5395 rtx exp[MAX_CLASSES];
5394 5396 rtx ret;
5395 5397
5396 5398 n = classify_argument (mode, type, regclass, 0);
5397 5399 if (!n)
5398 5400 return NULL;
5399 5401 if (!examine_argument (mode, type, in_return, &needed_intregs,
5400 5402 &needed_sseregs))
5401 5403 return NULL;
5402 5404 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
5403 5405 return NULL;
5404 5406
5405 5407 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
5406 5408 some less clueful developer tries to use floating-point anyway. */
5407 5409 if (needed_sseregs && !TARGET_SSE)
5408 5410 {
5409 5411 if (in_return)
5410 5412 {
5411 5413 if (!issued_sse_ret_error)
5412 5414 {
5413 5415 error ("SSE register return with SSE disabled");
5414 5416 issued_sse_ret_error = true;
5415 5417 }
5416 5418 }
5417 5419 else if (!issued_sse_arg_error)
5418 5420 {
5419 5421 error ("SSE register argument with SSE disabled");
5420 5422 issued_sse_arg_error = true;
5421 5423 }
5422 5424 return NULL;
5423 5425 }
5424 5426
5425 5427 /* Likewise, error if the ABI requires us to return values in the
5426 5428 x87 registers and the user specified -mno-80387. */
5427 5429 if (!TARGET_80387 && in_return)
5428 5430 for (i = 0; i < n; i++)
5429 5431 if (regclass[i] == X86_64_X87_CLASS
5430 5432 || regclass[i] == X86_64_X87UP_CLASS
5431 5433 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
5432 5434 {
5433 5435 if (!issued_x87_ret_error)
5434 5436 {
5435 5437 error ("x87 register return with x87 disabled");
5436 5438 issued_x87_ret_error = true;
5437 5439 }
5438 5440 return NULL;
5439 5441 }
5440 5442
5441 5443 /* First construct simple cases. Avoid SCmode, since we want to use
5442 5444 single register to pass this type. */
5443 5445 if (n == 1 && mode != SCmode)
5444 5446 switch (regclass[0])
5445 5447 {
5446 5448 case X86_64_INTEGER_CLASS:
5447 5449 case X86_64_INTEGERSI_CLASS:
5448 5450 return gen_rtx_REG (mode, intreg[0]);
5449 5451 case X86_64_SSE_CLASS:
5450 5452 case X86_64_SSESF_CLASS:
5451 5453 case X86_64_SSEDF_CLASS:
5452 5454 if (mode != BLKmode)
5453 5455 return gen_reg_or_parallel (mode, orig_mode,
5454 5456 SSE_REGNO (sse_regno));
5455 5457 break;
5456 5458 case X86_64_X87_CLASS:
5457 5459 case X86_64_COMPLEX_X87_CLASS:
5458 5460 return gen_rtx_REG (mode, FIRST_STACK_REG);
5459 5461 case X86_64_NO_CLASS:
5460 5462 /* Zero sized array, struct or class. */
5461 5463 return NULL;
5462 5464 default:
5463 5465 gcc_unreachable ();
5464 5466 }
5465 5467 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
5466 5468 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
5467 5469 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5468 5470 if (n == 4
5469 5471 && regclass[0] == X86_64_SSE_CLASS
5470 5472 && regclass[1] == X86_64_SSEUP_CLASS
5471 5473 && regclass[2] == X86_64_SSEUP_CLASS
5472 5474 && regclass[3] == X86_64_SSEUP_CLASS
5473 5475 && mode != BLKmode)
5474 5476 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
5475 5477
5476 5478 if (n == 2
5477 5479 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
5478 5480 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
5479 5481 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
5480 5482 && regclass[1] == X86_64_INTEGER_CLASS
5481 5483 && (mode == CDImode || mode == TImode || mode == TFmode)
5482 5484 && intreg[0] + 1 == intreg[1])
5483 5485 return gen_rtx_REG (mode, intreg[0]);
5484 5486
5485 5487 /* Otherwise figure out the entries of the PARALLEL. */
5486 5488 for (i = 0; i < n; i++)
5487 5489 {
5488 5490 int pos;
5489 5491
5490 5492 switch (regclass[i])
5491 5493 {
5492 5494 case X86_64_NO_CLASS:
5493 5495 break;
5494 5496 case X86_64_INTEGER_CLASS:
5495 5497 case X86_64_INTEGERSI_CLASS:
5496 5498 /* Merge TImodes on aligned occasions here too. */
5497 5499 if (i * 8 + 8 > bytes)
5498 5500 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
5499 5501 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
5500 5502 tmpmode = SImode;
5501 5503 else
5502 5504 tmpmode = DImode;
5503 5505 /* We've requested 24 bytes we don't have mode for. Use DImode. */
5504 5506 if (tmpmode == BLKmode)
5505 5507 tmpmode = DImode;
5506 5508 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5507 5509 gen_rtx_REG (tmpmode, *intreg),
5508 5510 GEN_INT (i*8));
5509 5511 intreg++;
5510 5512 break;
5511 5513 case X86_64_SSESF_CLASS:
5512 5514 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5513 5515 gen_rtx_REG (SFmode,
5514 5516 SSE_REGNO (sse_regno)),
5515 5517 GEN_INT (i*8));
5516 5518 sse_regno++;
5517 5519 break;
5518 5520 case X86_64_SSEDF_CLASS:
5519 5521 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5520 5522 gen_rtx_REG (DFmode,
5521 5523 SSE_REGNO (sse_regno)),
5522 5524 GEN_INT (i*8));
5523 5525 sse_regno++;
5524 5526 break;
5525 5527 case X86_64_SSE_CLASS:
5526 5528 pos = i;
5527 5529 switch (n)
5528 5530 {
5529 5531 case 1:
5530 5532 tmpmode = DImode;
5531 5533 break;
5532 5534 case 2:
5533 5535 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS)
5534 5536 {
5535 5537 tmpmode = TImode;
5536 5538 i++;
5537 5539 }
5538 5540 else
5539 5541 tmpmode = DImode;
5540 5542 break;
5541 5543 case 4:
5542 5544 gcc_assert (i == 0
5543 5545 && regclass[1] == X86_64_SSEUP_CLASS
5544 5546 && regclass[2] == X86_64_SSEUP_CLASS
5545 5547 && regclass[3] == X86_64_SSEUP_CLASS);
5546 5548 tmpmode = OImode;
5547 5549 i += 3;
5548 5550 break;
5549 5551 default:
5550 5552 gcc_unreachable ();
5551 5553 }
5552 5554 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
5553 5555 gen_rtx_REG (tmpmode,
5554 5556 SSE_REGNO (sse_regno)),
5555 5557 GEN_INT (pos*8));
5556 5558 sse_regno++;
5557 5559 break;
5558 5560 default:
5559 5561 gcc_unreachable ();
5560 5562 }
5561 5563 }
5562 5564
5563 5565 /* Empty aligned struct, union or class. */
5564 5566 if (nexps == 0)
5565 5567 return NULL;
5566 5568
5567 5569 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
5568 5570 for (i = 0; i < nexps; i++)
5569 5571 XVECEXP (ret, 0, i) = exp [i];
5570 5572 return ret;
5571 5573 }
5572 5574
5573 5575 /* Update the data in CUM to advance over an argument of mode MODE
5574 5576 and data type TYPE. (TYPE is null for libcalls where that information
5575 5577 may not be available.) */
5576 5578
5577 5579 static void
5578 5580 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5579 5581 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5580 5582 {
5581 5583 switch (mode)
5582 5584 {
5583 5585 default:
5584 5586 break;
5585 5587
5586 5588 case BLKmode:
5587 5589 if (bytes < 0)
5588 5590 break;
5589 5591 /* FALLTHRU */
5590 5592
5591 5593 case DImode:
5592 5594 case SImode:
5593 5595 case HImode:
5594 5596 case QImode:
5595 5597 cum->words += words;
5596 5598 cum->nregs -= words;
5597 5599 cum->regno += words;
5598 5600
5599 5601 if (cum->nregs <= 0)
5600 5602 {
5601 5603 cum->nregs = 0;
5602 5604 cum->regno = 0;
5603 5605 }
5604 5606 break;
5605 5607
5606 5608 case OImode:
5607 5609 /* OImode shouldn't be used directly. */
5608 5610 gcc_unreachable ();
5609 5611
5610 5612 case DFmode:
5611 5613 if (cum->float_in_sse < 2)
5612 5614 break;
5613 5615 case SFmode:
5614 5616 if (cum->float_in_sse < 1)
5615 5617 break;
5616 5618 /* FALLTHRU */
5617 5619
5618 5620 case V8SFmode:
5619 5621 case V8SImode:
5620 5622 case V32QImode:
5621 5623 case V16HImode:
5622 5624 case V4DFmode:
5623 5625 case V4DImode:
5624 5626 case TImode:
5625 5627 case V16QImode:
5626 5628 case V8HImode:
5627 5629 case V4SImode:
5628 5630 case V2DImode:
5629 5631 case V4SFmode:
5630 5632 case V2DFmode:
5631 5633 if (!type || !AGGREGATE_TYPE_P (type))
5632 5634 {
5633 5635 cum->sse_words += words;
5634 5636 cum->sse_nregs -= 1;
5635 5637 cum->sse_regno += 1;
5636 5638 if (cum->sse_nregs <= 0)
5637 5639 {
5638 5640 cum->sse_nregs = 0;
5639 5641 cum->sse_regno = 0;
5640 5642 }
5641 5643 }
5642 5644 break;
5643 5645
5644 5646 case V8QImode:
5645 5647 case V4HImode:
5646 5648 case V2SImode:
5647 5649 case V2SFmode:
5648 5650 case V1DImode:
5649 5651 if (!type || !AGGREGATE_TYPE_P (type))
5650 5652 {
5651 5653 cum->mmx_words += words;
5652 5654 cum->mmx_nregs -= 1;
5653 5655 cum->mmx_regno += 1;
5654 5656 if (cum->mmx_nregs <= 0)
5655 5657 {
5656 5658 cum->mmx_nregs = 0;
5657 5659 cum->mmx_regno = 0;
5658 5660 }
5659 5661 }
5660 5662 break;
5661 5663 }
5662 5664 }
5663 5665
5664 5666 static void
5665 5667 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5666 5668 tree type, HOST_WIDE_INT words, int named)
5667 5669 {
5668 5670 int int_nregs, sse_nregs;
5669 5671
5670 5672 /* Unnamed 256bit vector mode parameters are passed on stack. */
5671 5673 if (!named && VALID_AVX256_REG_MODE (mode))
5672 5674 return;
5673 5675
5674 5676 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
5675 5677 cum->words += words;
5676 5678 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
5677 5679 {
5678 5680 cum->nregs -= int_nregs;
5679 5681 cum->sse_nregs -= sse_nregs;
5680 5682 cum->regno += int_nregs;
5681 5683 cum->sse_regno += sse_nregs;
5682 5684 }
5683 5685 else
5684 5686 cum->words += words;
5685 5687 }
5686 5688
5687 5689 static void
5688 5690 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
5689 5691 HOST_WIDE_INT words)
5690 5692 {
5691 5693 /* Otherwise, this should be passed indirect. */
5692 5694 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
5693 5695
5694 5696 cum->words += words;
5695 5697 if (cum->nregs > 0)
5696 5698 {
5697 5699 cum->nregs -= 1;
5698 5700 cum->regno += 1;
5699 5701 }
5700 5702 }
5701 5703
5702 5704 void
5703 5705 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5704 5706 tree type, int named)
5705 5707 {
5706 5708 HOST_WIDE_INT bytes, words;
5707 5709
5708 5710 if (mode == BLKmode)
5709 5711 bytes = int_size_in_bytes (type);
5710 5712 else
5711 5713 bytes = GET_MODE_SIZE (mode);
5712 5714 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5713 5715
5714 5716 if (type)
5715 5717 mode = type_natural_mode (type, NULL);
5716 5718
5717 5719 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5718 5720 function_arg_advance_ms_64 (cum, bytes, words);
5719 5721 else if (TARGET_64BIT)
5720 5722 function_arg_advance_64 (cum, mode, type, words, named);
5721 5723 else
5722 5724 function_arg_advance_32 (cum, mode, type, bytes, words);
5723 5725 }
5724 5726
5725 5727 /* Define where to put the arguments to a function.
5726 5728 Value is zero to push the argument on the stack,
5727 5729 or a hard register in which to store the argument.
5728 5730
5729 5731 MODE is the argument's machine mode.
5730 5732 TYPE is the data type of the argument (as a tree).
5731 5733 This is null for libcalls where that information may
5732 5734 not be available.
5733 5735 CUM is a variable of type CUMULATIVE_ARGS which gives info about
5734 5736 the preceding args and about the function being called.
5735 5737 NAMED is nonzero if this argument is a named parameter
5736 5738 (otherwise it is an extra parameter matching an ellipsis). */
5737 5739
5738 5740 static rtx
5739 5741 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5740 5742 enum machine_mode orig_mode, tree type,
5741 5743 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
5742 5744 {
5743 5745 static bool warnedsse, warnedmmx;
5744 5746
5745 5747 /* Avoid the AL settings for the Unix64 ABI. */
5746 5748 if (mode == VOIDmode)
5747 5749 return constm1_rtx;
5748 5750
5749 5751 switch (mode)
5750 5752 {
5751 5753 default:
5752 5754 break;
5753 5755
5754 5756 case BLKmode:
5755 5757 if (bytes < 0)
5756 5758 break;
5757 5759 /* FALLTHRU */
5758 5760 case DImode:
5759 5761 case SImode:
5760 5762 case HImode:
5761 5763 case QImode:
5762 5764 if (words <= cum->nregs)
5763 5765 {
5764 5766 int regno = cum->regno;
5765 5767
5766 5768 /* Fastcall allocates the first two DWORD (SImode) or
5767 5769 smaller arguments to ECX and EDX if it isn't an
5768 5770 aggregate type . */
5769 5771 if (cum->fastcall)
5770 5772 {
5771 5773 if (mode == BLKmode
5772 5774 || mode == DImode
5773 5775 || (type && AGGREGATE_TYPE_P (type)))
5774 5776 break;
5775 5777
5776 5778 /* ECX not EAX is the first allocated register. */
5777 5779 if (regno == AX_REG)
5778 5780 regno = CX_REG;
5779 5781 }
5780 5782 return gen_rtx_REG (mode, regno);
5781 5783 }
5782 5784 break;
5783 5785
5784 5786 case DFmode:
5785 5787 if (cum->float_in_sse < 2)
5786 5788 break;
5787 5789 case SFmode:
5788 5790 if (cum->float_in_sse < 1)
5789 5791 break;
5790 5792 /* FALLTHRU */
5791 5793 case TImode:
5792 5794 /* In 32bit, we pass TImode in xmm registers. */
5793 5795 case V16QImode:
5794 5796 case V8HImode:
5795 5797 case V4SImode:
5796 5798 case V2DImode:
5797 5799 case V4SFmode:
5798 5800 case V2DFmode:
5799 5801 if (!type || !AGGREGATE_TYPE_P (type))
5800 5802 {
5801 5803 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
5802 5804 {
5803 5805 warnedsse = true;
5804 5806 warning (0, "SSE vector argument without SSE enabled "
5805 5807 "changes the ABI");
5806 5808 }
5807 5809 if (cum->sse_nregs)
5808 5810 return gen_reg_or_parallel (mode, orig_mode,
5809 5811 cum->sse_regno + FIRST_SSE_REG);
5810 5812 }
5811 5813 break;
5812 5814
5813 5815 case OImode:
5814 5816 /* OImode shouldn't be used directly. */
5815 5817 gcc_unreachable ();
5816 5818
5817 5819 case V8SFmode:
5818 5820 case V8SImode:
5819 5821 case V32QImode:
5820 5822 case V16HImode:
5821 5823 case V4DFmode:
5822 5824 case V4DImode:
5823 5825 if (!type || !AGGREGATE_TYPE_P (type))
5824 5826 {
5825 5827 if (cum->sse_nregs)
5826 5828 return gen_reg_or_parallel (mode, orig_mode,
5827 5829 cum->sse_regno + FIRST_SSE_REG);
5828 5830 }
5829 5831 break;
5830 5832
5831 5833 case V8QImode:
5832 5834 case V4HImode:
5833 5835 case V2SImode:
5834 5836 case V2SFmode:
5835 5837 case V1DImode:
5836 5838 if (!type || !AGGREGATE_TYPE_P (type))
5837 5839 {
5838 5840 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
5839 5841 {
5840 5842 warnedmmx = true;
5841 5843 warning (0, "MMX vector argument without MMX enabled "
5842 5844 "changes the ABI");
5843 5845 }
5844 5846 if (cum->mmx_nregs)
5845 5847 return gen_reg_or_parallel (mode, orig_mode,
5846 5848 cum->mmx_regno + FIRST_MMX_REG);
5847 5849 }
5848 5850 break;
5849 5851 }
5850 5852
5851 5853 return NULL_RTX;
5852 5854 }
5853 5855
5854 5856 static rtx
5855 5857 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5856 5858 enum machine_mode orig_mode, tree type, int named)
5857 5859 {
5858 5860 /* Handle a hidden AL argument containing number of registers
5859 5861 for varargs x86-64 functions. */
5860 5862 if (mode == VOIDmode)
5861 5863 return GEN_INT (cum->maybe_vaarg
5862 5864 ? (cum->sse_nregs < 0
5863 5865 ? (cum->call_abi == DEFAULT_ABI
5864 5866 ? SSE_REGPARM_MAX
5865 5867 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX
5866 5868 : X64_SSE_REGPARM_MAX))
5867 5869 : cum->sse_regno)
5868 5870 : -1);
5869 5871
5870 5872 switch (mode)
5871 5873 {
5872 5874 default:
5873 5875 break;
5874 5876
5875 5877 case V8SFmode:
5876 5878 case V8SImode:
5877 5879 case V32QImode:
5878 5880 case V16HImode:
5879 5881 case V4DFmode:
5880 5882 case V4DImode:
5881 5883 /* Unnamed 256bit vector mode parameters are passed on stack. */
5882 5884 if (!named)
5883 5885 return NULL;
5884 5886 break;
5885 5887 }
5886 5888
5887 5889 return construct_container (mode, orig_mode, type, 0, cum->nregs,
5888 5890 cum->sse_nregs,
5889 5891 &x86_64_int_parameter_registers [cum->regno],
5890 5892 cum->sse_regno);
5891 5893 }
5892 5894
5893 5895 static rtx
5894 5896 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
5895 5897 enum machine_mode orig_mode, int named,
5896 5898 HOST_WIDE_INT bytes)
5897 5899 {
5898 5900 unsigned int regno;
5899 5901
5900 5902 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call.
5901 5903 We use value of -2 to specify that current function call is MSABI. */
5902 5904 if (mode == VOIDmode)
5903 5905 return GEN_INT (-2);
5904 5906
5905 5907 /* If we've run out of registers, it goes on the stack. */
5906 5908 if (cum->nregs == 0)
5907 5909 return NULL_RTX;
5908 5910
5909 5911 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
5910 5912
5911 5913 /* Only floating point modes are passed in anything but integer regs. */
5912 5914 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
5913 5915 {
5914 5916 if (named)
5915 5917 regno = cum->regno + FIRST_SSE_REG;
5916 5918 else
5917 5919 {
5918 5920 rtx t1, t2;
5919 5921
5920 5922 /* Unnamed floating parameters are passed in both the
5921 5923 SSE and integer registers. */
5922 5924 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
5923 5925 t2 = gen_rtx_REG (mode, regno);
5924 5926 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
5925 5927 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
5926 5928 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
5927 5929 }
5928 5930 }
5929 5931 /* Handle aggregated types passed in register. */
5930 5932 if (orig_mode == BLKmode)
5931 5933 {
5932 5934 if (bytes > 0 && bytes <= 8)
5933 5935 mode = (bytes > 4 ? DImode : SImode);
5934 5936 if (mode == BLKmode)
5935 5937 mode = DImode;
5936 5938 }
5937 5939
5938 5940 return gen_reg_or_parallel (mode, orig_mode, regno);
5939 5941 }
5940 5942
5941 5943 rtx
5942 5944 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
5943 5945 tree type, int named)
5944 5946 {
5945 5947 enum machine_mode mode = omode;
5946 5948 HOST_WIDE_INT bytes, words;
5947 5949
5948 5950 if (mode == BLKmode)
5949 5951 bytes = int_size_in_bytes (type);
5950 5952 else
5951 5953 bytes = GET_MODE_SIZE (mode);
5952 5954 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
5953 5955
5954 5956 /* To simplify the code below, represent vector types with a vector mode
5955 5957 even if MMX/SSE are not active. */
5956 5958 if (type && TREE_CODE (type) == VECTOR_TYPE)
5957 5959 mode = type_natural_mode (type, cum);
5958 5960
5959 5961 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5960 5962 return function_arg_ms_64 (cum, mode, omode, named, bytes);
5961 5963 else if (TARGET_64BIT)
5962 5964 return function_arg_64 (cum, mode, omode, type, named);
5963 5965 else
5964 5966 return function_arg_32 (cum, mode, omode, type, bytes, words);
5965 5967 }
5966 5968
5967 5969 /* A C expression that indicates when an argument must be passed by
5968 5970 reference. If nonzero for an argument, a copy of that argument is
5969 5971 made in memory and a pointer to the argument is passed instead of
5970 5972 the argument itself. The pointer is passed in whatever way is
5971 5973 appropriate for passing a pointer to that type. */
5972 5974
5973 5975 static bool
5974 5976 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
5975 5977 enum machine_mode mode ATTRIBUTE_UNUSED,
5976 5978 const_tree type, bool named ATTRIBUTE_UNUSED)
5977 5979 {
5978 5980 /* See Windows x64 Software Convention. */
5979 5981 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI)
5980 5982 {
5981 5983 int msize = (int) GET_MODE_SIZE (mode);
5982 5984 if (type)
5983 5985 {
5984 5986 /* Arrays are passed by reference. */
5985 5987 if (TREE_CODE (type) == ARRAY_TYPE)
5986 5988 return true;
5987 5989
5988 5990 if (AGGREGATE_TYPE_P (type))
5989 5991 {
5990 5992 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
5991 5993 are passed by reference. */
5992 5994 msize = int_size_in_bytes (type);
5993 5995 }
5994 5996 }
5995 5997
5996 5998 /* __m128 is passed by reference. */
5997 5999 switch (msize) {
5998 6000 case 1: case 2: case 4: case 8:
5999 6001 break;
6000 6002 default:
6001 6003 return true;
6002 6004 }
6003 6005 }
6004 6006 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
6005 6007 return 1;
6006 6008
6007 6009 return 0;
6008 6010 }
6009 6011
6010 6012 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
6011 6013 ABI. */
6012 6014 static bool
6013 6015 contains_aligned_value_p (tree type)
6014 6016 {
6015 6017 enum machine_mode mode = TYPE_MODE (type);
6016 6018 if (((TARGET_SSE && SSE_REG_MODE_P (mode))
6017 6019 || mode == TDmode
6018 6020 || mode == TFmode
6019 6021 || mode == TCmode)
6020 6022 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
6021 6023 return true;
6022 6024 if (TYPE_ALIGN (type) < 128)
6023 6025 return false;
6024 6026
6025 6027 if (AGGREGATE_TYPE_P (type))
6026 6028 {
6027 6029 /* Walk the aggregates recursively. */
6028 6030 switch (TREE_CODE (type))
6029 6031 {
6030 6032 case RECORD_TYPE:
6031 6033 case UNION_TYPE:
6032 6034 case QUAL_UNION_TYPE:
6033 6035 {
6034 6036 tree field;
6035 6037
6036 6038 /* Walk all the structure fields. */
6037 6039 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6038 6040 {
6039 6041 if (TREE_CODE (field) == FIELD_DECL
6040 6042 && contains_aligned_value_p (TREE_TYPE (field)))
6041 6043 return true;
6042 6044 }
6043 6045 break;
6044 6046 }
6045 6047
6046 6048 case ARRAY_TYPE:
6047 6049 /* Just for use if some languages passes arrays by value. */
6048 6050 if (contains_aligned_value_p (TREE_TYPE (type)))
6049 6051 return true;
6050 6052 break;
6051 6053
6052 6054 default:
6053 6055 gcc_unreachable ();
6054 6056 }
6055 6057 }
6056 6058 return false;
6057 6059 }
6058 6060
6059 6061 /* Gives the alignment boundary, in bits, of an argument with the
6060 6062 specified mode and type. */
6061 6063
6062 6064 int
6063 6065 ix86_function_arg_boundary (enum machine_mode mode, tree type)
6064 6066 {
6065 6067 int align;
6066 6068 if (type)
6067 6069 {
6068 6070 /* Since canonical type is used for call, we convert it to
6069 6071 canonical type if needed. */
6070 6072 if (!TYPE_STRUCTURAL_EQUALITY_P (type))
6071 6073 type = TYPE_CANONICAL (type);
6072 6074 align = TYPE_ALIGN (type);
6073 6075 }
6074 6076 else
6075 6077 align = GET_MODE_ALIGNMENT (mode);
6076 6078 if (align < PARM_BOUNDARY)
6077 6079 align = PARM_BOUNDARY;
6078 6080 /* In 32bit, only _Decimal128 and __float128 are aligned to their
6079 6081 natural boundaries. */
6080 6082 if (!TARGET_64BIT && mode != TDmode && mode != TFmode)
6081 6083 {
6082 6084 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
6083 6085 make an exception for SSE modes since these require 128bit
6084 6086 alignment.
6085 6087
6086 6088 The handling here differs from field_alignment. ICC aligns MMX
6087 6089 arguments to 4 byte boundaries, while structure fields are aligned
6088 6090 to 8 byte boundaries. */
6089 6091 if (!type)
6090 6092 {
6091 6093 if (!(TARGET_SSE && SSE_REG_MODE_P (mode)))
6092 6094 align = PARM_BOUNDARY;
6093 6095 }
6094 6096 else
6095 6097 {
6096 6098 if (!contains_aligned_value_p (type))
6097 6099 align = PARM_BOUNDARY;
6098 6100 }
6099 6101 }
6100 6102 if (align > BIGGEST_ALIGNMENT)
6101 6103 align = BIGGEST_ALIGNMENT;
6102 6104 return align;
6103 6105 }
6104 6106
6105 6107 /* Return true if N is a possible register number of function value. */
6106 6108
6107 6109 bool
6108 6110 ix86_function_value_regno_p (int regno)
6109 6111 {
6110 6112 switch (regno)
6111 6113 {
6112 6114 case 0:
6113 6115 return true;
6114 6116
6115 6117 case FIRST_FLOAT_REG:
6116 6118 /* TODO: The function should depend on current function ABI but
6117 6119 builtins.c would need updating then. Therefore we use the
6118 6120 default ABI. */
6119 6121 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI)
6120 6122 return false;
6121 6123 return TARGET_FLOAT_RETURNS_IN_80387;
6122 6124
6123 6125 case FIRST_SSE_REG:
6124 6126 return TARGET_SSE;
6125 6127
6126 6128 case FIRST_MMX_REG:
6127 6129 if (TARGET_MACHO || TARGET_64BIT)
6128 6130 return false;
6129 6131 return TARGET_MMX;
6130 6132 }
6131 6133
6132 6134 return false;
6133 6135 }
6134 6136
6135 6137 /* Define how to find the value returned by a function.
6136 6138 VALTYPE is the data type of the value (as a tree).
6137 6139 If the precise function being called is known, FUNC is its FUNCTION_DECL;
6138 6140 otherwise, FUNC is 0. */
6139 6141
6140 6142 static rtx
6141 6143 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
6142 6144 const_tree fntype, const_tree fn)
6143 6145 {
6144 6146 unsigned int regno;
6145 6147
6146 6148 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
6147 6149 we normally prevent this case when mmx is not available. However
6148 6150 some ABIs may require the result to be returned like DImode. */
6149 6151 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6150 6152 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
6151 6153
6152 6154 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
6153 6155 we prevent this case when sse is not available. However some ABIs
6154 6156 may require the result to be returned like integer TImode. */
6155 6157 else if (mode == TImode
6156 6158 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6157 6159 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
6158 6160
6159 6161 /* 32-byte vector modes in %ymm0. */
6160 6162 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32)
6161 6163 regno = TARGET_AVX ? FIRST_SSE_REG : 0;
6162 6164
6163 6165 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
6164 6166 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
6165 6167 regno = FIRST_FLOAT_REG;
6166 6168 else
6167 6169 /* Most things go in %eax. */
6168 6170 regno = AX_REG;
6169 6171
6170 6172 /* Override FP return register with %xmm0 for local functions when
6171 6173 SSE math is enabled or for functions with sseregparm attribute. */
6172 6174 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
6173 6175 {
6174 6176 int sse_level = ix86_function_sseregparm (fntype, fn, false);
6175 6177 if ((sse_level >= 1 && mode == SFmode)
6176 6178 || (sse_level == 2 && mode == DFmode))
6177 6179 regno = FIRST_SSE_REG;
6178 6180 }
6179 6181
6180 6182 /* OImode shouldn't be used directly. */
6181 6183 gcc_assert (mode != OImode);
6182 6184
6183 6185 return gen_rtx_REG (orig_mode, regno);
6184 6186 }
6185 6187
6186 6188 static rtx
6187 6189 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
6188 6190 const_tree valtype)
6189 6191 {
6190 6192 rtx ret;
6191 6193
6192 6194 /* Handle libcalls, which don't provide a type node. */
6193 6195 if (valtype == NULL)
6194 6196 {
6195 6197 switch (mode)
6196 6198 {
6197 6199 case SFmode:
6198 6200 case SCmode:
6199 6201 case DFmode:
6200 6202 case DCmode:
6201 6203 case TFmode:
6202 6204 case SDmode:
6203 6205 case DDmode:
6204 6206 case TDmode:
6205 6207 return gen_rtx_REG (mode, FIRST_SSE_REG);
6206 6208 case XFmode:
6207 6209 case XCmode:
6208 6210 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
6209 6211 case TCmode:
6210 6212 return NULL;
6211 6213 default:
6212 6214 return gen_rtx_REG (mode, AX_REG);
6213 6215 }
6214 6216 }
6215 6217
6216 6218 ret = construct_container (mode, orig_mode, valtype, 1,
6217 6219 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX,
6218 6220 x86_64_int_return_registers, 0);
6219 6221
6220 6222 /* For zero sized structures, construct_container returns NULL, but we
6221 6223 need to keep rest of compiler happy by returning meaningful value. */
6222 6224 if (!ret)
6223 6225 ret = gen_rtx_REG (orig_mode, AX_REG);
6224 6226
6225 6227 return ret;
6226 6228 }
6227 6229
6228 6230 static rtx
6229 6231 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
6230 6232 {
6231 6233 unsigned int regno = AX_REG;
6232 6234
6233 6235 if (TARGET_SSE)
6234 6236 {
6235 6237 switch (GET_MODE_SIZE (mode))
6236 6238 {
6237 6239 case 16:
6238 6240 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6239 6241 && !COMPLEX_MODE_P (mode))
6240 6242 regno = FIRST_SSE_REG;
6241 6243 break;
6242 6244 case 8:
6243 6245 case 4:
6244 6246 if (mode == SFmode || mode == DFmode)
6245 6247 regno = FIRST_SSE_REG;
6246 6248 break;
6247 6249 default:
6248 6250 break;
6249 6251 }
6250 6252 }
6251 6253 return gen_rtx_REG (orig_mode, regno);
6252 6254 }
6253 6255
6254 6256 static rtx
6255 6257 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
6256 6258 enum machine_mode orig_mode, enum machine_mode mode)
6257 6259 {
6258 6260 const_tree fn, fntype;
6259 6261
6260 6262 fn = NULL_TREE;
6261 6263 if (fntype_or_decl && DECL_P (fntype_or_decl))
6262 6264 fn = fntype_or_decl;
6263 6265 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
6264 6266
6265 6267 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI)
6266 6268 return function_value_ms_64 (orig_mode, mode);
6267 6269 else if (TARGET_64BIT)
6268 6270 return function_value_64 (orig_mode, mode, valtype);
6269 6271 else
6270 6272 return function_value_32 (orig_mode, mode, fntype, fn);
6271 6273 }
6272 6274
6273 6275 static rtx
6274 6276 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
6275 6277 bool outgoing ATTRIBUTE_UNUSED)
6276 6278 {
6277 6279 enum machine_mode mode, orig_mode;
6278 6280
6279 6281 orig_mode = TYPE_MODE (valtype);
6280 6282 mode = type_natural_mode (valtype, NULL);
6281 6283 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
6282 6284 }
6283 6285
6284 6286 rtx
6285 6287 ix86_libcall_value (enum machine_mode mode)
6286 6288 {
6287 6289 return ix86_function_value_1 (NULL, NULL, mode, mode);
6288 6290 }
6289 6291
6290 6292 /* Return true iff type is returned in memory. */
6291 6293
6292 6294 static int ATTRIBUTE_UNUSED
6293 6295 return_in_memory_32 (const_tree type, enum machine_mode mode)
6294 6296 {
6295 6297 HOST_WIDE_INT size;
6296 6298
6297 6299 if (mode == BLKmode)
6298 6300 return 1;
6299 6301
6300 6302 size = int_size_in_bytes (type);
6301 6303
6302 6304 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
6303 6305 return 0;
6304 6306
6305 6307 if (VECTOR_MODE_P (mode) || mode == TImode)
6306 6308 {
6307 6309 /* User-created vectors small enough to fit in EAX. */
6308 6310 if (size < 8)
6309 6311 return 0;
6310 6312
6311 6313 /* MMX/3dNow values are returned in MM0,
6312 6314 except when it doesn't exits. */
6313 6315 if (size == 8)
6314 6316 return (TARGET_MMX ? 0 : 1);
6315 6317
6316 6318 /* SSE values are returned in XMM0, except when it doesn't exist. */
6317 6319 if (size == 16)
6318 6320 return (TARGET_SSE ? 0 : 1);
6319 6321
6320 6322 /* AVX values are returned in YMM0, except when it doesn't exist. */
6321 6323 if (size == 32)
6322 6324 return TARGET_AVX ? 0 : 1;
6323 6325 }
6324 6326
6325 6327 if (mode == XFmode)
6326 6328 return 0;
6327 6329
6328 6330 if (size > 12)
6329 6331 return 1;
6330 6332
6331 6333 /* OImode shouldn't be used directly. */
6332 6334 gcc_assert (mode != OImode);
6333 6335
6334 6336 return 0;
6335 6337 }
6336 6338
6337 6339 static int ATTRIBUTE_UNUSED
6338 6340 return_in_memory_64 (const_tree type, enum machine_mode mode)
6339 6341 {
6340 6342 int needed_intregs, needed_sseregs;
6341 6343 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
6342 6344 }
6343 6345
6344 6346 static int ATTRIBUTE_UNUSED
6345 6347 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
6346 6348 {
6347 6349 HOST_WIDE_INT size = int_size_in_bytes (type);
6348 6350
6349 6351 /* __m128 is returned in xmm0. */
6350 6352 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode))
6351 6353 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16))
6352 6354 return 0;
6353 6355
6354 6356 /* Otherwise, the size must be exactly in [1248]. */
6355 6357 return (size != 1 && size != 2 && size != 4 && size != 8);
6356 6358 }
6357 6359
6358 6360 static bool
6359 6361 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6360 6362 {
6361 6363 #ifdef SUBTARGET_RETURN_IN_MEMORY
6362 6364 return SUBTARGET_RETURN_IN_MEMORY (type, fntype);
6363 6365 #else
6364 6366 const enum machine_mode mode = type_natural_mode (type, NULL);
6365 6367
6366 6368 if (TARGET_64BIT)
6367 6369 {
6368 6370 if (ix86_function_type_abi (fntype) == MS_ABI)
6369 6371 return return_in_memory_ms_64 (type, mode);
6370 6372 else
6371 6373 return return_in_memory_64 (type, mode);
6372 6374 }
6373 6375 else
6374 6376 return return_in_memory_32 (type, mode);
6375 6377 #endif
6376 6378 }
6377 6379
6378 6380 /* Return false iff TYPE is returned in memory. This version is used
6379 6381 on Solaris 10. It is similar to the generic ix86_return_in_memory,
6380 6382 but differs notably in that when MMX is available, 8-byte vectors
6381 6383 are returned in memory, rather than in MMX registers. */
6382 6384
6383 6385 bool
6384 6386 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
6385 6387 {
6386 6388 int size;
6387 6389 enum machine_mode mode = type_natural_mode (type, NULL);
6388 6390
6389 6391 if (TARGET_64BIT)
6390 6392 return return_in_memory_64 (type, mode);
6391 6393
6392 6394 if (mode == BLKmode)
6393 6395 return 1;
6394 6396
6395 6397 size = int_size_in_bytes (type);
6396 6398
6397 6399 if (VECTOR_MODE_P (mode))
6398 6400 {
6399 6401 /* Return in memory only if MMX registers *are* available. This
6400 6402 seems backwards, but it is consistent with the existing
6401 6403 Solaris x86 ABI. */
6402 6404 if (size == 8)
6403 6405 return TARGET_MMX;
6404 6406 if (size == 16)
6405 6407 return !TARGET_SSE;
6406 6408 }
6407 6409 else if (mode == TImode)
6408 6410 return !TARGET_SSE;
6409 6411 else if (mode == XFmode)
6410 6412 return 0;
6411 6413
6412 6414 return size > 12;
6413 6415 }
6414 6416
6415 6417 /* When returning SSE vector types, we have a choice of either
6416 6418 (1) being abi incompatible with a -march switch, or
6417 6419 (2) generating an error.
6418 6420 Given no good solution, I think the safest thing is one warning.
6419 6421 The user won't be able to use -Werror, but....
6420 6422
6421 6423 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
6422 6424 called in response to actually generating a caller or callee that
6423 6425 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called
6424 6426 via aggregate_value_p for general type probing from tree-ssa. */
6425 6427
6426 6428 static rtx
6427 6429 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
6428 6430 {
6429 6431 static bool warnedsse, warnedmmx;
6430 6432
6431 6433 if (!TARGET_64BIT && type)
6432 6434 {
6433 6435 /* Look at the return type of the function, not the function type. */
6434 6436 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
6435 6437
6436 6438 if (!TARGET_SSE && !warnedsse)
6437 6439 {
6438 6440 if (mode == TImode
6439 6441 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
6440 6442 {
6441 6443 warnedsse = true;
6442 6444 warning (0, "SSE vector return without SSE enabled "
6443 6445 "changes the ABI");
6444 6446 }
6445 6447 }
6446 6448
6447 6449 if (!TARGET_MMX && !warnedmmx)
6448 6450 {
6449 6451 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
6450 6452 {
6451 6453 warnedmmx = true;
6452 6454 warning (0, "MMX vector return without MMX enabled "
6453 6455 "changes the ABI");
6454 6456 }
6455 6457 }
6456 6458 }
6457 6459
6458 6460 return NULL;
6459 6461 }
6460 6462
6461 6463
6462 6464 /* Create the va_list data type. */
6463 6465
6464 6466 /* Returns the calling convention specific va_list date type.
6465 6467 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */
6466 6468
6467 6469 static tree
6468 6470 ix86_build_builtin_va_list_abi (enum calling_abi abi)
6469 6471 {
6470 6472 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
6471 6473
6472 6474 /* For i386 we use plain pointer to argument area. */
6473 6475 if (!TARGET_64BIT || abi == MS_ABI)
6474 6476 return build_pointer_type (char_type_node);
6475 6477
6476 6478 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6477 6479 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
6478 6480
6479 6481 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
6480 6482 unsigned_type_node);
6481 6483 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
6482 6484 unsigned_type_node);
6483 6485 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
6484 6486 ptr_type_node);
6485 6487 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
6486 6488 ptr_type_node);
6487 6489
6488 6490 va_list_gpr_counter_field = f_gpr;
6489 6491 va_list_fpr_counter_field = f_fpr;
6490 6492
6491 6493 DECL_FIELD_CONTEXT (f_gpr) = record;
6492 6494 DECL_FIELD_CONTEXT (f_fpr) = record;
6493 6495 DECL_FIELD_CONTEXT (f_ovf) = record;
6494 6496 DECL_FIELD_CONTEXT (f_sav) = record;
6495 6497
6496 6498 TREE_CHAIN (record) = type_decl;
6497 6499 TYPE_NAME (record) = type_decl;
6498 6500 TYPE_FIELDS (record) = f_gpr;
6499 6501 TREE_CHAIN (f_gpr) = f_fpr;
6500 6502 TREE_CHAIN (f_fpr) = f_ovf;
6501 6503 TREE_CHAIN (f_ovf) = f_sav;
6502 6504
6503 6505 layout_type (record);
6504 6506
6505 6507 /* The correct type is an array type of one element. */
6506 6508 return build_array_type (record, build_index_type (size_zero_node));
6507 6509 }
6508 6510
6509 6511 /* Setup the builtin va_list data type and for 64-bit the additional
6510 6512 calling convention specific va_list data types. */
6511 6513
6512 6514 static tree
6513 6515 ix86_build_builtin_va_list (void)
6514 6516 {
6515 6517 tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI);
6516 6518
6517 6519 /* Initialize abi specific va_list builtin types. */
6518 6520 if (TARGET_64BIT)
6519 6521 {
6520 6522 tree t;
6521 6523 if (DEFAULT_ABI == MS_ABI)
6522 6524 {
6523 6525 t = ix86_build_builtin_va_list_abi (SYSV_ABI);
6524 6526 if (TREE_CODE (t) != RECORD_TYPE)
6525 6527 t = build_variant_type_copy (t);
6526 6528 sysv_va_list_type_node = t;
6527 6529 }
6528 6530 else
6529 6531 {
6530 6532 t = ret;
6531 6533 if (TREE_CODE (t) != RECORD_TYPE)
6532 6534 t = build_variant_type_copy (t);
6533 6535 sysv_va_list_type_node = t;
6534 6536 }
6535 6537 if (DEFAULT_ABI != MS_ABI)
6536 6538 {
6537 6539 t = ix86_build_builtin_va_list_abi (MS_ABI);
6538 6540 if (TREE_CODE (t) != RECORD_TYPE)
6539 6541 t = build_variant_type_copy (t);
6540 6542 ms_va_list_type_node = t;
6541 6543 }
6542 6544 else
6543 6545 {
6544 6546 t = ret;
6545 6547 if (TREE_CODE (t) != RECORD_TYPE)
6546 6548 t = build_variant_type_copy (t);
6547 6549 ms_va_list_type_node = t;
6548 6550 }
6549 6551 }
6550 6552
6551 6553 return ret;
6552 6554 }
6553 6555
6554 6556 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
6555 6557
6556 6558 static void
6557 6559 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
6558 6560 {
6559 6561 rtx save_area, mem;
6560 6562 rtx label;
6561 6563 rtx label_ref;
6562 6564 rtx tmp_reg;
6563 6565 rtx nsse_reg;
6564 6566 alias_set_type set;
6565 6567 int i;
6566 6568 int regparm = ix86_regparm;
6567 6569
6568 6570 if (cum->call_abi != DEFAULT_ABI)
6569 6571 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX;
6570 6572
6571 6573 /* GPR size of varargs save area. */
6572 6574 if (cfun->va_list_gpr_size)
6573 6575 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD;
6574 6576 else
6575 6577 ix86_varargs_gpr_size = 0;
6576 6578
6577 6579 /* FPR size of varargs save area. We don't need it if we don't pass
6578 6580 anything in SSE registers. */
6579 6581 if (cum->sse_nregs && cfun->va_list_fpr_size)
6580 6582 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16;
6581 6583 else
6582 6584 ix86_varargs_fpr_size = 0;
6583 6585
6584 6586 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size)
6585 6587 return;
6586 6588
6587 6589 save_area = frame_pointer_rtx;
6588 6590 set = get_varargs_alias_set ();
6589 6591
6590 6592 for (i = cum->regno;
6591 6593 i < regparm
6592 6594 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
6593 6595 i++)
6594 6596 {
6595 6597 mem = gen_rtx_MEM (Pmode,
6596 6598 plus_constant (save_area, i * UNITS_PER_WORD));
6597 6599 MEM_NOTRAP_P (mem) = 1;
6598 6600 set_mem_alias_set (mem, set);
6599 6601 emit_move_insn (mem, gen_rtx_REG (Pmode,
6600 6602 x86_64_int_parameter_registers[i]));
6601 6603 }
6602 6604
6603 6605 if (ix86_varargs_fpr_size)
6604 6606 {
6605 6607 /* Stack must be aligned to 16byte for FP register save area. */
6606 6608 if (crtl->stack_alignment_needed < 128)
6607 6609 crtl->stack_alignment_needed = 128;
6608 6610
6609 6611 /* Now emit code to save SSE registers. The AX parameter contains number
6610 6612 of SSE parameter registers used to call this function. We use
6611 6613 sse_prologue_save insn template that produces computed jump across
6612 6614 SSE saves. We need some preparation work to get this working. */
6613 6615
6614 6616 label = gen_label_rtx ();
6615 6617 label_ref = gen_rtx_LABEL_REF (Pmode, label);
6616 6618
6617 6619 /* Compute address to jump to :
6618 6620 label - eax*4 + nnamed_sse_arguments*4 Or
6619 6621 label - eax*5 + nnamed_sse_arguments*5 for AVX. */
6620 6622 tmp_reg = gen_reg_rtx (Pmode);
6621 6623 nsse_reg = gen_reg_rtx (Pmode);
6622 6624 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG)));
6623 6625 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6624 6626 gen_rtx_MULT (Pmode, nsse_reg,
6625 6627 GEN_INT (4))));
6626 6628
6627 6629 /* vmovaps is one byte longer than movaps. */
6628 6630 if (TARGET_AVX)
6629 6631 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6630 6632 gen_rtx_PLUS (Pmode, tmp_reg,
6631 6633 nsse_reg)));
6632 6634
6633 6635 if (cum->sse_regno)
6634 6636 emit_move_insn
6635 6637 (nsse_reg,
6636 6638 gen_rtx_CONST (DImode,
6637 6639 gen_rtx_PLUS (DImode,
6638 6640 label_ref,
6639 6641 GEN_INT (cum->sse_regno
6640 6642 * (TARGET_AVX ? 5 : 4)))));
6641 6643 else
6642 6644 emit_move_insn (nsse_reg, label_ref);
6643 6645 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
6644 6646
6645 6647 /* Compute address of memory block we save into. We always use pointer
6646 6648 pointing 127 bytes after first byte to store - this is needed to keep
6647 6649 instruction size limited by 4 bytes (5 bytes for AVX) with one
6648 6650 byte displacement. */
6649 6651 tmp_reg = gen_reg_rtx (Pmode);
6650 6652 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
6651 6653 plus_constant (save_area,
6652 6654 ix86_varargs_gpr_size + 127)));
6653 6655 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
6654 6656 MEM_NOTRAP_P (mem) = 1;
6655 6657 set_mem_alias_set (mem, set);
6656 6658 set_mem_align (mem, BITS_PER_WORD);
6657 6659
6658 6660 /* And finally do the dirty job! */
6659 6661 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
6660 6662 GEN_INT (cum->sse_regno), label));
6661 6663 }
6662 6664 }
6663 6665
6664 6666 static void
6665 6667 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
6666 6668 {
6667 6669 alias_set_type set = get_varargs_alias_set ();
6668 6670 int i;
6669 6671
6670 6672 for (i = cum->regno; i < X64_REGPARM_MAX; i++)
6671 6673 {
6672 6674 rtx reg, mem;
6673 6675
6674 6676 mem = gen_rtx_MEM (Pmode,
6675 6677 plus_constant (virtual_incoming_args_rtx,
6676 6678 i * UNITS_PER_WORD));
6677 6679 MEM_NOTRAP_P (mem) = 1;
6678 6680 set_mem_alias_set (mem, set);
6679 6681
6680 6682 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
6681 6683 emit_move_insn (mem, reg);
6682 6684 }
6683 6685 }
6684 6686
6685 6687 static void
6686 6688 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6687 6689 tree type, int *pretend_size ATTRIBUTE_UNUSED,
6688 6690 int no_rtl)
6689 6691 {
6690 6692 CUMULATIVE_ARGS next_cum;
6691 6693 tree fntype;
6692 6694
6693 6695 /* This argument doesn't appear to be used anymore. Which is good,
6694 6696 because the old code here didn't suppress rtl generation. */
6695 6697 gcc_assert (!no_rtl);
6696 6698
6697 6699 if (!TARGET_64BIT)
6698 6700 return;
6699 6701
6700 6702 fntype = TREE_TYPE (current_function_decl);
6701 6703
6702 6704 /* For varargs, we do not want to skip the dummy va_dcl argument.
6703 6705 For stdargs, we do want to skip the last named argument. */
6704 6706 next_cum = *cum;
6705 6707 if (stdarg_p (fntype))
6706 6708 function_arg_advance (&next_cum, mode, type, 1);
6707 6709
6708 6710 if (cum->call_abi == MS_ABI)
6709 6711 setup_incoming_varargs_ms_64 (&next_cum);
6710 6712 else
6711 6713 setup_incoming_varargs_64 (&next_cum);
6712 6714 }
6713 6715
6714 6716 /* Checks if TYPE is of kind va_list char *. */
6715 6717
6716 6718 static bool
6717 6719 is_va_list_char_pointer (tree type)
6718 6720 {
6719 6721 tree canonic;
6720 6722
6721 6723 /* For 32-bit it is always true. */
6722 6724 if (!TARGET_64BIT)
6723 6725 return true;
6724 6726 canonic = ix86_canonical_va_list_type (type);
6725 6727 return (canonic == ms_va_list_type_node
6726 6728 || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node));
6727 6729 }
6728 6730
6729 6731 /* Implement va_start. */
6730 6732
6731 6733 static void
6732 6734 ix86_va_start (tree valist, rtx nextarg)
6733 6735 {
6734 6736 HOST_WIDE_INT words, n_gpr, n_fpr;
6735 6737 tree f_gpr, f_fpr, f_ovf, f_sav;
6736 6738 tree gpr, fpr, ovf, sav, t;
6737 6739 tree type;
6738 6740
6739 6741 /* Only 64bit target needs something special. */
6740 6742 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6741 6743 {
6742 6744 std_expand_builtin_va_start (valist, nextarg);
6743 6745 return;
6744 6746 }
6745 6747
6746 6748 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6747 6749 f_fpr = TREE_CHAIN (f_gpr);
6748 6750 f_ovf = TREE_CHAIN (f_fpr);
6749 6751 f_sav = TREE_CHAIN (f_ovf);
6750 6752
6751 6753 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
6752 6754 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
6753 6755 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6754 6756 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6755 6757 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6756 6758
6757 6759 /* Count number of gp and fp argument registers used. */
6758 6760 words = crtl->args.info.words;
6759 6761 n_gpr = crtl->args.info.regno;
6760 6762 n_fpr = crtl->args.info.sse_regno;
6761 6763
6762 6764 if (cfun->va_list_gpr_size)
6763 6765 {
6764 6766 type = TREE_TYPE (gpr);
6765 6767 t = build2 (MODIFY_EXPR, type,
6766 6768 gpr, build_int_cst (type, n_gpr * 8));
6767 6769 TREE_SIDE_EFFECTS (t) = 1;
6768 6770 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6769 6771 }
6770 6772
6771 6773 if (TARGET_SSE && cfun->va_list_fpr_size)
6772 6774 {
6773 6775 type = TREE_TYPE (fpr);
6774 6776 t = build2 (MODIFY_EXPR, type, fpr,
6775 6777 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX));
6776 6778 TREE_SIDE_EFFECTS (t) = 1;
6777 6779 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6778 6780 }
6779 6781
6780 6782 /* Find the overflow area. */
6781 6783 type = TREE_TYPE (ovf);
6782 6784 t = make_tree (type, crtl->args.internal_arg_pointer);
6783 6785 if (words != 0)
6784 6786 t = build2 (POINTER_PLUS_EXPR, type, t,
6785 6787 size_int (words * UNITS_PER_WORD));
6786 6788 t = build2 (MODIFY_EXPR, type, ovf, t);
6787 6789 TREE_SIDE_EFFECTS (t) = 1;
6788 6790 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6789 6791
6790 6792 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size)
6791 6793 {
6792 6794 /* Find the register save area.
6793 6795 Prologue of the function save it right above stack frame. */
6794 6796 type = TREE_TYPE (sav);
6795 6797 t = make_tree (type, frame_pointer_rtx);
6796 6798 if (!ix86_varargs_gpr_size)
6797 6799 t = build2 (POINTER_PLUS_EXPR, type, t,
6798 6800 size_int (-8 * X86_64_REGPARM_MAX));
6799 6801 t = build2 (MODIFY_EXPR, type, sav, t);
6800 6802 TREE_SIDE_EFFECTS (t) = 1;
6801 6803 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6802 6804 }
6803 6805 }
6804 6806
6805 6807 /* Implement va_arg. */
6806 6808
6807 6809 static tree
6808 6810 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
6809 6811 gimple_seq *post_p)
6810 6812 {
6811 6813 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
6812 6814 tree f_gpr, f_fpr, f_ovf, f_sav;
6813 6815 tree gpr, fpr, ovf, sav, t;
6814 6816 int size, rsize;
6815 6817 tree lab_false, lab_over = NULL_TREE;
6816 6818 tree addr, t2;
6817 6819 rtx container;
6818 6820 int indirect_p = 0;
6819 6821 tree ptrtype;
6820 6822 enum machine_mode nat_mode;
6821 6823 int arg_boundary;
6822 6824
6823 6825 /* Only 64bit target needs something special. */
6824 6826 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist)))
6825 6827 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
6826 6828
6827 6829 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node));
6828 6830 f_fpr = TREE_CHAIN (f_gpr);
6829 6831 f_ovf = TREE_CHAIN (f_fpr);
6830 6832 f_sav = TREE_CHAIN (f_ovf);
6831 6833
6832 6834 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr),
6833 6835 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE);
6834 6836 valist = build_va_arg_indirect_ref (valist);
6835 6837 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
6836 6838 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
6837 6839 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
6838 6840
6839 6841 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6840 6842 if (indirect_p)
6841 6843 type = build_pointer_type (type);
6842 6844 size = int_size_in_bytes (type);
6843 6845 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6844 6846
6845 6847 nat_mode = type_natural_mode (type, NULL);
6846 6848 switch (nat_mode)
6847 6849 {
6848 6850 case V8SFmode:
6849 6851 case V8SImode:
6850 6852 case V32QImode:
6851 6853 case V16HImode:
6852 6854 case V4DFmode:
6853 6855 case V4DImode:
6854 6856 /* Unnamed 256bit vector mode parameters are passed on stack. */
6855 6857 if (ix86_cfun_abi () == SYSV_ABI)
6856 6858 {
6857 6859 container = NULL;
6858 6860 break;
6859 6861 }
6860 6862
6861 6863 default:
6862 6864 container = construct_container (nat_mode, TYPE_MODE (type),
6863 6865 type, 0, X86_64_REGPARM_MAX,
6864 6866 X86_64_SSE_REGPARM_MAX, intreg,
6865 6867 0);
6866 6868 break;
6867 6869 }
6868 6870
6869 6871 /* Pull the value out of the saved registers. */
6870 6872
6871 6873 addr = create_tmp_var (ptr_type_node, "addr");
6872 6874 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
6873 6875
6874 6876 if (container)
6875 6877 {
6876 6878 int needed_intregs, needed_sseregs;
6877 6879 bool need_temp;
6878 6880 tree int_addr, sse_addr;
6879 6881
6880 6882 lab_false = create_artificial_label ();
6881 6883 lab_over = create_artificial_label ();
6882 6884
6883 6885 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
6884 6886
6885 6887 need_temp = (!REG_P (container)
6886 6888 && ((needed_intregs && TYPE_ALIGN (type) > 64)
6887 6889 || TYPE_ALIGN (type) > 128));
6888 6890
6889 6891 /* In case we are passing structure, verify that it is consecutive block
6890 6892 on the register save area. If not we need to do moves. */
6891 6893 if (!need_temp && !REG_P (container))
6892 6894 {
6893 6895 /* Verify that all registers are strictly consecutive */
6894 6896 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
6895 6897 {
6896 6898 int i;
6897 6899
6898 6900 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6899 6901 {
6900 6902 rtx slot = XVECEXP (container, 0, i);
6901 6903 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
6902 6904 || INTVAL (XEXP (slot, 1)) != i * 16)
6903 6905 need_temp = 1;
6904 6906 }
6905 6907 }
6906 6908 else
6907 6909 {
6908 6910 int i;
6909 6911
6910 6912 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
6911 6913 {
6912 6914 rtx slot = XVECEXP (container, 0, i);
6913 6915 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
6914 6916 || INTVAL (XEXP (slot, 1)) != i * 8)
6915 6917 need_temp = 1;
6916 6918 }
6917 6919 }
6918 6920 }
6919 6921 if (!need_temp)
6920 6922 {
6921 6923 int_addr = addr;
6922 6924 sse_addr = addr;
6923 6925 }
6924 6926 else
6925 6927 {
6926 6928 int_addr = create_tmp_var (ptr_type_node, "int_addr");
6927 6929 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
6928 6930 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
6929 6931 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
6930 6932 }
6931 6933
6932 6934 /* First ensure that we fit completely in registers. */
6933 6935 if (needed_intregs)
6934 6936 {
6935 6937 t = build_int_cst (TREE_TYPE (gpr),
6936 6938 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8);
6937 6939 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
6938 6940 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6939 6941 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6940 6942 gimplify_and_add (t, pre_p);
6941 6943 }
6942 6944 if (needed_sseregs)
6943 6945 {
6944 6946 t = build_int_cst (TREE_TYPE (fpr),
6945 6947 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16
6946 6948 + X86_64_REGPARM_MAX * 8);
6947 6949 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
6948 6950 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
6949 6951 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
6950 6952 gimplify_and_add (t, pre_p);
6951 6953 }
6952 6954
6953 6955 /* Compute index to start of area used for integer regs. */
6954 6956 if (needed_intregs)
6955 6957 {
6956 6958 /* int_addr = gpr + sav; */
6957 6959 t = fold_convert (sizetype, gpr);
6958 6960 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6959 6961 gimplify_assign (int_addr, t, pre_p);
6960 6962 }
6961 6963 if (needed_sseregs)
6962 6964 {
6963 6965 /* sse_addr = fpr + sav; */
6964 6966 t = fold_convert (sizetype, fpr);
6965 6967 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
6966 6968 gimplify_assign (sse_addr, t, pre_p);
6967 6969 }
6968 6970 if (need_temp)
6969 6971 {
6970 6972 int i;
6971 6973 tree temp = create_tmp_var (type, "va_arg_tmp");
6972 6974
6973 6975 /* addr = &temp; */
6974 6976 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
6975 6977 gimplify_assign (addr, t, pre_p);
6976 6978
6977 6979 for (i = 0; i < XVECLEN (container, 0); i++)
6978 6980 {
6979 6981 rtx slot = XVECEXP (container, 0, i);
6980 6982 rtx reg = XEXP (slot, 0);
6981 6983 enum machine_mode mode = GET_MODE (reg);
6982 6984 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
6983 6985 tree addr_type = build_pointer_type (piece_type);
6984 6986 tree daddr_type = build_pointer_type_for_mode (piece_type,
6985 6987 ptr_mode, true);
6986 6988 tree src_addr, src;
6987 6989 int src_offset;
6988 6990 tree dest_addr, dest;
6989 6991
6990 6992 if (SSE_REGNO_P (REGNO (reg)))
6991 6993 {
6992 6994 src_addr = sse_addr;
6993 6995 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
6994 6996 }
6995 6997 else
6996 6998 {
6997 6999 src_addr = int_addr;
6998 7000 src_offset = REGNO (reg) * 8;
6999 7001 }
7000 7002 src_addr = fold_convert (addr_type, src_addr);
7001 7003 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
7002 7004 size_int (src_offset));
7003 7005 src = build_va_arg_indirect_ref (src_addr);
7004 7006
7005 7007 dest_addr = fold_convert (daddr_type, addr);
7006 7008 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr,
7007 7009 size_int (INTVAL (XEXP (slot, 1))));
7008 7010 dest = build_va_arg_indirect_ref (dest_addr);
7009 7011
7010 7012 gimplify_assign (dest, src, pre_p);
7011 7013 }
7012 7014 }
7013 7015
7014 7016 if (needed_intregs)
7015 7017 {
7016 7018 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
7017 7019 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
7018 7020 gimplify_assign (gpr, t, pre_p);
7019 7021 }
7020 7022
7021 7023 if (needed_sseregs)
7022 7024 {
7023 7025 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
7024 7026 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
7025 7027 gimplify_assign (fpr, t, pre_p);
7026 7028 }
7027 7029
7028 7030 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
7029 7031
7030 7032 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
7031 7033 }
7032 7034
7033 7035 /* ... otherwise out of the overflow area. */
7034 7036
7035 7037 /* When we align parameter on stack for caller, if the parameter
7036 7038 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
7037 7039 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee
7038 7040 here with caller. */
7039 7041 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type);
7040 7042 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
7041 7043 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
7042 7044
7043 7045 /* Care for on-stack alignment if needed. */
7044 7046 if (arg_boundary <= 64
7045 7047 || integer_zerop (TYPE_SIZE (type)))
7046 7048 t = ovf;
7047 7049 else
7048 7050 {
7049 7051 HOST_WIDE_INT align = arg_boundary / 8;
7050 7052 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
7051 7053 size_int (align - 1));
7052 7054 t = fold_convert (sizetype, t);
7053 7055 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
7054 7056 size_int (-align));
7055 7057 t = fold_convert (TREE_TYPE (ovf), t);
7056 7058 }
7057 7059 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
7058 7060 gimplify_assign (addr, t, pre_p);
7059 7061
7060 7062 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
7061 7063 size_int (rsize * UNITS_PER_WORD));
7062 7064 gimplify_assign (unshare_expr (ovf), t, pre_p);
7063 7065
7064 7066 if (container)
7065 7067 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
7066 7068
7067 7069 ptrtype = build_pointer_type (type);
7068 7070 addr = fold_convert (ptrtype, addr);
7069 7071
7070 7072 if (indirect_p)
7071 7073 addr = build_va_arg_indirect_ref (addr);
7072 7074 return build_va_arg_indirect_ref (addr);
7073 7075 }
7074 7076
7075 7077 /* Return nonzero if OPNUM's MEM should be matched
7076 7078 in movabs* patterns. */
7077 7079
7078 7080 int
7079 7081 ix86_check_movabs (rtx insn, int opnum)
7080 7082 {
7081 7083 rtx set, mem;
7082 7084
7083 7085 set = PATTERN (insn);
7084 7086 if (GET_CODE (set) == PARALLEL)
7085 7087 set = XVECEXP (set, 0, 0);
7086 7088 gcc_assert (GET_CODE (set) == SET);
7087 7089 mem = XEXP (set, opnum);
7088 7090 while (GET_CODE (mem) == SUBREG)
7089 7091 mem = SUBREG_REG (mem);
7090 7092 gcc_assert (MEM_P (mem));
7091 7093 return (volatile_ok || !MEM_VOLATILE_P (mem));
7092 7094 }
7093 7095
7094 7096 /* Initialize the table of extra 80387 mathematical constants. */
7095 7097
7096 7098 static void
7097 7099 init_ext_80387_constants (void)
7098 7100 {
7099 7101 static const char * cst[5] =
7100 7102 {
7101 7103 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
7102 7104 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
7103 7105 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
7104 7106 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
7105 7107 "3.1415926535897932385128089594061862044", /* 4: fldpi */
7106 7108 };
7107 7109 int i;
7108 7110
7109 7111 for (i = 0; i < 5; i++)
7110 7112 {
7111 7113 real_from_string (&ext_80387_constants_table[i], cst[i]);
7112 7114 /* Ensure each constant is rounded to XFmode precision. */
7113 7115 real_convert (&ext_80387_constants_table[i],
7114 7116 XFmode, &ext_80387_constants_table[i]);
7115 7117 }
7116 7118
7117 7119 ext_80387_constants_init = 1;
7118 7120 }
7119 7121
7120 7122 /* Return true if the constant is something that can be loaded with
7121 7123 a special instruction. */
7122 7124
7123 7125 int
7124 7126 standard_80387_constant_p (rtx x)
7125 7127 {
7126 7128 enum machine_mode mode = GET_MODE (x);
7127 7129
7128 7130 REAL_VALUE_TYPE r;
7129 7131
7130 7132 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
7131 7133 return -1;
7132 7134
7133 7135 if (x == CONST0_RTX (mode))
7134 7136 return 1;
7135 7137 if (x == CONST1_RTX (mode))
7136 7138 return 2;
7137 7139
7138 7140 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7139 7141
7140 7142 /* For XFmode constants, try to find a special 80387 instruction when
7141 7143 optimizing for size or on those CPUs that benefit from them. */
7142 7144 if (mode == XFmode
7143 7145 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS))
7144 7146 {
7145 7147 int i;
7146 7148
7147 7149 if (! ext_80387_constants_init)
7148 7150 init_ext_80387_constants ();
7149 7151
7150 7152 for (i = 0; i < 5; i++)
7151 7153 if (real_identical (&r, &ext_80387_constants_table[i]))
7152 7154 return i + 3;
7153 7155 }
7154 7156
7155 7157 /* Load of the constant -0.0 or -1.0 will be split as
7156 7158 fldz;fchs or fld1;fchs sequence. */
7157 7159 if (real_isnegzero (&r))
7158 7160 return 8;
7159 7161 if (real_identical (&r, &dconstm1))
7160 7162 return 9;
7161 7163
7162 7164 return 0;
7163 7165 }
7164 7166
7165 7167 /* Return the opcode of the special instruction to be used to load
7166 7168 the constant X. */
7167 7169
7168 7170 const char *
7169 7171 standard_80387_constant_opcode (rtx x)
7170 7172 {
7171 7173 switch (standard_80387_constant_p (x))
7172 7174 {
7173 7175 case 1:
7174 7176 return "fldz";
7175 7177 case 2:
7176 7178 return "fld1";
7177 7179 case 3:
7178 7180 return "fldlg2";
7179 7181 case 4:
7180 7182 return "fldln2";
7181 7183 case 5:
7182 7184 return "fldl2e";
7183 7185 case 6:
7184 7186 return "fldl2t";
7185 7187 case 7:
7186 7188 return "fldpi";
7187 7189 case 8:
7188 7190 case 9:
7189 7191 return "#";
7190 7192 default:
7191 7193 gcc_unreachable ();
7192 7194 }
7193 7195 }
7194 7196
7195 7197 /* Return the CONST_DOUBLE representing the 80387 constant that is
7196 7198 loaded by the specified special instruction. The argument IDX
7197 7199 matches the return value from standard_80387_constant_p. */
7198 7200
7199 7201 rtx
7200 7202 standard_80387_constant_rtx (int idx)
7201 7203 {
7202 7204 int i;
7203 7205
7204 7206 if (! ext_80387_constants_init)
7205 7207 init_ext_80387_constants ();
7206 7208
7207 7209 switch (idx)
7208 7210 {
7209 7211 case 3:
7210 7212 case 4:
7211 7213 case 5:
7212 7214 case 6:
7213 7215 case 7:
7214 7216 i = idx - 3;
7215 7217 break;
7216 7218
7217 7219 default:
7218 7220 gcc_unreachable ();
7219 7221 }
7220 7222
7221 7223 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
7222 7224 XFmode);
7223 7225 }
7224 7226
7225 7227 /* Return 1 if mode is a valid mode for sse. */
7226 7228 static int
7227 7229 standard_sse_mode_p (enum machine_mode mode)
7228 7230 {
7229 7231 switch (mode)
7230 7232 {
7231 7233 case V16QImode:
7232 7234 case V8HImode:
7233 7235 case V4SImode:
7234 7236 case V2DImode:
7235 7237 case V4SFmode:
7236 7238 case V2DFmode:
7237 7239 return 1;
7238 7240
7239 7241 default:
7240 7242 return 0;
7241 7243 }
7242 7244 }
7243 7245
7244 7246 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit
7245 7247 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX
7246 7248 modes and AVX is enabled. */
7247 7249
7248 7250 int
7249 7251 standard_sse_constant_p (rtx x)
7250 7252 {
7251 7253 enum machine_mode mode = GET_MODE (x);
7252 7254
7253 7255 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
7254 7256 return 1;
7255 7257 if (vector_all_ones_operand (x, mode))
7256 7258 {
7257 7259 if (standard_sse_mode_p (mode))
7258 7260 return TARGET_SSE2 ? 2 : -2;
7259 7261 else if (VALID_AVX256_REG_MODE (mode))
7260 7262 return TARGET_AVX ? 3 : -3;
7261 7263 }
7262 7264
7263 7265 return 0;
7264 7266 }
7265 7267
7266 7268 /* Return the opcode of the special instruction to be used to load
7267 7269 the constant X. */
7268 7270
7269 7271 const char *
7270 7272 standard_sse_constant_opcode (rtx insn, rtx x)
7271 7273 {
7272 7274 switch (standard_sse_constant_p (x))
7273 7275 {
7274 7276 case 1:
7275 7277 switch (get_attr_mode (insn))
7276 7278 {
7277 7279 case MODE_V4SF:
7278 7280 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0";
7279 7281 case MODE_V2DF:
7280 7282 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0";
7281 7283 case MODE_TI:
7282 7284 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0";
7283 7285 case MODE_V8SF:
7284 7286 return "vxorps\t%x0, %x0, %x0";
7285 7287 case MODE_V4DF:
7286 7288 return "vxorpd\t%x0, %x0, %x0";
7287 7289 case MODE_OI:
7288 7290 return "vpxor\t%x0, %x0, %x0";
7289 7291 default:
7290 7292 gcc_unreachable ();
7291 7293 }
7292 7294 case 2:
7293 7295 if (TARGET_AVX)
7294 7296 switch (get_attr_mode (insn))
7295 7297 {
7296 7298 case MODE_V4SF:
7297 7299 case MODE_V2DF:
7298 7300 case MODE_TI:
7299 7301 return "vpcmpeqd\t%0, %0, %0";
7300 7302 break;
7301 7303 default:
7302 7304 gcc_unreachable ();
7303 7305 }
7304 7306 else
7305 7307 return "pcmpeqd\t%0, %0";
7306 7308 }
7307 7309 gcc_unreachable ();
7308 7310 }
7309 7311
7310 7312 /* Returns 1 if OP contains a symbol reference */
7311 7313
7312 7314 int
7313 7315 symbolic_reference_mentioned_p (rtx op)
7314 7316 {
7315 7317 const char *fmt;
7316 7318 int i;
7317 7319
7318 7320 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
7319 7321 return 1;
7320 7322
7321 7323 fmt = GET_RTX_FORMAT (GET_CODE (op));
7322 7324 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
7323 7325 {
7324 7326 if (fmt[i] == 'E')
7325 7327 {
7326 7328 int j;
7327 7329
7328 7330 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
7329 7331 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
7330 7332 return 1;
7331 7333 }
7332 7334
7333 7335 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
7334 7336 return 1;
7335 7337 }
7336 7338
7337 7339 return 0;
7338 7340 }
7339 7341
7340 7342 /* Return 1 if it is appropriate to emit `ret' instructions in the
7341 7343 body of a function. Do this only if the epilogue is simple, needing a
7342 7344 couple of insns. Prior to reloading, we can't tell how many registers
7343 7345 must be saved, so return 0 then. Return 0 if there is no frame
7344 7346 marker to de-allocate. */
7345 7347
7346 7348 int
7347 7349 ix86_can_use_return_insn_p (void)
7348 7350 {
7349 7351 struct ix86_frame frame;
7350 7352
7351 7353 if (! reload_completed || frame_pointer_needed)
7352 7354 return 0;
7353 7355
7354 7356 /* Don't allow more than 32 pop, since that's all we can do
7355 7357 with one instruction. */
7356 7358 if (crtl->args.pops_args
7357 7359 && crtl->args.size >= 32768)
7358 7360 return 0;
7359 7361
7360 7362 ix86_compute_frame_layout (&frame);
7361 7363 return frame.to_allocate == 0 && frame.padding05 == 0 &&
7362 7364 frame.nmsave_args == 0 && (frame.nregs + frame.nsseregs) == 0;
7363 7365 }
7364 7366
7365 7367 /* Value should be nonzero if functions must have frame pointers.
7366 7368 Zero means the frame pointer need not be set up (and parms may
7367 7369 be accessed via the stack pointer) in functions that seem suitable. */
7368 7370
7369 7371 int
7370 7372 ix86_frame_pointer_required (void)
7371 7373 {
7372 7374 /* If we accessed previous frames, then the generated code expects
7373 7375 to be able to access the saved ebp value in our frame. */
7374 7376 if (cfun->machine->accesses_prev_frame)
7375 7377 return 1;
7376 7378
7377 7379 /* Several x86 os'es need a frame pointer for other reasons,
7378 7380 usually pertaining to setjmp. */
7379 7381 if (SUBTARGET_FRAME_POINTER_REQUIRED)
7380 7382 return 1;
7381 7383
7382 7384 if (TARGET_SAVE_ARGS)
7383 7385 return 1;
7384 7386
7385 7387 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
7386 7388 the frame pointer by default. Turn it back on now if we've not
7387 7389 got a leaf function. */
7388 7390 if (TARGET_OMIT_LEAF_FRAME_POINTER
7389 7391 && (!current_function_is_leaf
7390 7392 || ix86_current_function_calls_tls_descriptor))
7391 7393 return 1;
7392 7394
7393 7395 if (crtl->profile)
7394 7396 return 1;
7395 7397
7396 7398 return 0;
7397 7399 }
7398 7400
7399 7401 /* Record that the current function accesses previous call frames. */
7400 7402
7401 7403 void
7402 7404 ix86_setup_frame_addresses (void)
7403 7405 {
7404 7406 cfun->machine->accesses_prev_frame = 1;
7405 7407 }
7406 7408
7407 7409 #ifndef USE_HIDDEN_LINKONCE
7408 7410 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
7409 7411 # define USE_HIDDEN_LINKONCE 1
7410 7412 # else
7411 7413 # define USE_HIDDEN_LINKONCE 0
7412 7414 # endif
7413 7415 #endif
7414 7416
7415 7417 static int pic_labels_used;
7416 7418
7417 7419 /* Fills in the label name that should be used for a pc thunk for
7418 7420 the given register. */
7419 7421
7420 7422 static void
7421 7423 get_pc_thunk_name (char name[32], unsigned int regno)
7422 7424 {
7423 7425 gcc_assert (!TARGET_64BIT);
7424 7426
7425 7427 if (USE_HIDDEN_LINKONCE)
7426 7428 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
7427 7429 else
7428 7430 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
7429 7431 }
7430 7432
7431 7433
7432 7434 /* This function generates code for -fpic that loads %ebx with
7433 7435 the return address of the caller and then returns. */
7434 7436
7435 7437 void
7436 7438 ix86_file_end (void)
7437 7439 {
7438 7440 rtx xops[2];
7439 7441 int regno;
7440 7442
7441 7443 for (regno = 0; regno < 8; ++regno)
7442 7444 {
7443 7445 char name[32];
7444 7446
7445 7447 if (! ((pic_labels_used >> regno) & 1))
7446 7448 continue;
7447 7449
7448 7450 get_pc_thunk_name (name, regno);
7449 7451
7450 7452 #if TARGET_MACHO
7451 7453 if (TARGET_MACHO)
7452 7454 {
7453 7455 switch_to_section (darwin_sections[text_coal_section]);
7454 7456 fputs ("\t.weak_definition\t", asm_out_file);
7455 7457 assemble_name (asm_out_file, name);
7456 7458 fputs ("\n\t.private_extern\t", asm_out_file);
7457 7459 assemble_name (asm_out_file, name);
7458 7460 fputs ("\n", asm_out_file);
7459 7461 ASM_OUTPUT_LABEL (asm_out_file, name);
7460 7462 }
7461 7463 else
7462 7464 #endif
7463 7465 if (USE_HIDDEN_LINKONCE)
7464 7466 {
7465 7467 tree decl;
7466 7468
7467 7469 decl = build_decl (FUNCTION_DECL, get_identifier (name),
7468 7470 error_mark_node);
7469 7471 TREE_PUBLIC (decl) = 1;
7470 7472 TREE_STATIC (decl) = 1;
7471 7473 DECL_ONE_ONLY (decl) = 1;
7472 7474
7473 7475 (*targetm.asm_out.unique_section) (decl, 0);
7474 7476 switch_to_section (get_named_section (decl, NULL, 0));
7475 7477
7476 7478 (*targetm.asm_out.globalize_label) (asm_out_file, name);
7477 7479 fputs ("\t.hidden\t", asm_out_file);
7478 7480 assemble_name (asm_out_file, name);
7479 7481 fputc ('\n', asm_out_file);
7480 7482 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
7481 7483 }
7482 7484 else
7483 7485 {
7484 7486 switch_to_section (text_section);
7485 7487 ASM_OUTPUT_LABEL (asm_out_file, name);
7486 7488 }
7487 7489
7488 7490 xops[0] = gen_rtx_REG (Pmode, regno);
7489 7491 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx);
7490 7492 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops);
7491 7493 output_asm_insn ("ret", xops);
7492 7494 }
7493 7495
7494 7496 if (NEED_INDICATE_EXEC_STACK)
7495 7497 file_end_indicate_exec_stack ();
7496 7498 }
7497 7499
7498 7500 /* Emit code for the SET_GOT patterns. */
7499 7501
7500 7502 const char *
7501 7503 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
7502 7504 {
7503 7505 rtx xops[3];
7504 7506
7505 7507 xops[0] = dest;
7506 7508
7507 7509 if (TARGET_VXWORKS_RTP && flag_pic)
7508 7510 {
7509 7511 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
7510 7512 xops[2] = gen_rtx_MEM (Pmode,
7511 7513 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
7512 7514 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
7513 7515
7514 7516 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
7515 7517 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
7516 7518 an unadorned address. */
7517 7519 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7518 7520 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
7519 7521 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
7520 7522 return "";
7521 7523 }
7522 7524
7523 7525 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
7524 7526
7525 7527 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
7526 7528 {
7527 7529 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
7528 7530
7529 7531 if (!flag_pic)
7530 7532 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops);
7531 7533 else
7532 7534 output_asm_insn ("call\t%a2", xops);
7533 7535
7534 7536 #if TARGET_MACHO
7535 7537 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7536 7538 is what will be referenced by the Mach-O PIC subsystem. */
7537 7539 if (!label)
7538 7540 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7539 7541 #endif
7540 7542
7541 7543 (*targetm.asm_out.internal_label) (asm_out_file, "L",
7542 7544 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
7543 7545
7544 7546 if (flag_pic)
7545 7547 output_asm_insn ("pop%z0\t%0", xops);
7546 7548 }
7547 7549 else
7548 7550 {
7549 7551 char name[32];
7550 7552 get_pc_thunk_name (name, REGNO (dest));
7551 7553 pic_labels_used |= 1 << REGNO (dest);
7552 7554
7553 7555 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
7554 7556 xops[2] = gen_rtx_MEM (QImode, xops[2]);
7555 7557 output_asm_insn ("call\t%X2", xops);
7556 7558 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
7557 7559 is what will be referenced by the Mach-O PIC subsystem. */
7558 7560 #if TARGET_MACHO
7559 7561 if (!label)
7560 7562 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME);
7561 7563 else
7562 7564 targetm.asm_out.internal_label (asm_out_file, "L",
7563 7565 CODE_LABEL_NUMBER (label));
7564 7566 #endif
7565 7567 }
7566 7568
7567 7569 if (TARGET_MACHO)
7568 7570 return "";
7569 7571
7570 7572 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
7571 7573 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops);
7572 7574 else
7573 7575 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
7574 7576
7575 7577 return "";
7576 7578 }
7577 7579
7578 7580 /* Generate an "push" pattern for input ARG. */
7579 7581
7580 7582 static rtx
7581 7583 gen_push (rtx arg)
7582 7584 {
7583 7585 return gen_rtx_SET (VOIDmode,
7584 7586 gen_rtx_MEM (Pmode,
7585 7587 gen_rtx_PRE_DEC (Pmode,
7586 7588 stack_pointer_rtx)),
7587 7589 arg);
7588 7590 }
7589 7591
7590 7592 /* Return >= 0 if there is an unused call-clobbered register available
7591 7593 for the entire function. */
7592 7594
7593 7595 static unsigned int
7594 7596 ix86_select_alt_pic_regnum (void)
7595 7597 {
7596 7598 if (current_function_is_leaf && !crtl->profile
7597 7599 && !ix86_current_function_calls_tls_descriptor)
7598 7600 {
7599 7601 int i, drap;
7600 7602 /* Can't use the same register for both PIC and DRAP. */
7601 7603 if (crtl->drap_reg)
7602 7604 drap = REGNO (crtl->drap_reg);
7603 7605 else
7604 7606 drap = -1;
7605 7607 for (i = 2; i >= 0; --i)
7606 7608 if (i != drap && !df_regs_ever_live_p (i))
7607 7609 return i;
7608 7610 }
7609 7611
7610 7612 return INVALID_REGNUM;
7611 7613 }
7612 7614
7613 7615 /* Return 1 if we need to save REGNO. */
7614 7616 static int
7615 7617 ix86_save_reg (unsigned int regno, int maybe_eh_return)
7616 7618 {
7617 7619 if (pic_offset_table_rtx
7618 7620 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
7619 7621 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
7620 7622 || crtl->profile
7621 7623 || crtl->calls_eh_return
7622 7624 || crtl->uses_const_pool))
7623 7625 {
7624 7626 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
7625 7627 return 0;
7626 7628 return 1;
7627 7629 }
7628 7630
7629 7631 if (crtl->calls_eh_return && maybe_eh_return)
7630 7632 {
7631 7633 unsigned i;
7632 7634 for (i = 0; ; i++)
7633 7635 {
7634 7636 unsigned test = EH_RETURN_DATA_REGNO (i);
7635 7637 if (test == INVALID_REGNUM)
7636 7638 break;
7637 7639 if (test == regno)
7638 7640 return 1;
7639 7641 }
7640 7642 }
7641 7643
7642 7644 if (crtl->drap_reg
7643 7645 && regno == REGNO (crtl->drap_reg))
7644 7646 return 1;
7645 7647
7646 7648 return (df_regs_ever_live_p (regno)
7647 7649 && !call_used_regs[regno]
7648 7650 && !fixed_regs[regno]
7649 7651 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
7650 7652 }
7651 7653
7652 7654 /* Return number of saved general prupose registers. */
7653 7655
7654 7656 static int
7655 7657 ix86_nsaved_regs (void)
7656 7658 {
7657 7659 int nregs = 0;
7658 7660 int regno;
7659 7661
7660 7662 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7661 7663 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7662 7664 nregs ++;
7663 7665 return nregs;
7664 7666 }
7665 7667
7666 7668 /* Return number of saved SSE registrers. */
7667 7669
7668 7670 static int
7669 7671 ix86_nsaved_sseregs (void)
7670 7672 {
7671 7673 int nregs = 0;
7672 7674 int regno;
7673 7675
7674 7676 if (ix86_cfun_abi () != MS_ABI)
7675 7677 return 0;
7676 7678 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7677 7679 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7678 7680 nregs ++;
7679 7681 return nregs;
7680 7682 }
7681 7683
7682 7684 /* Given FROM and TO register numbers, say whether this elimination is
7683 7685 allowed. If stack alignment is needed, we can only replace argument
7684 7686 pointer with hard frame pointer, or replace frame pointer with stack
7685 7687 pointer. Otherwise, frame pointer elimination is automatically
7686 7688 handled and all other eliminations are valid. */
7687 7689
7688 7690 int
7689 7691 ix86_can_eliminate (int from, int to)
7690 7692 {
7691 7693 if (stack_realign_fp)
7692 7694 return ((from == ARG_POINTER_REGNUM
7693 7695 && to == HARD_FRAME_POINTER_REGNUM)
7694 7696 || (from == FRAME_POINTER_REGNUM
7695 7697 && to == STACK_POINTER_REGNUM));
7696 7698 else
7697 7699 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1;
7698 7700 }
7699 7701
7700 7702 /* Return the offset between two registers, one to be eliminated, and the other
7701 7703 its replacement, at the start of a routine. */
7702 7704
7703 7705 HOST_WIDE_INT
7704 7706 ix86_initial_elimination_offset (int from, int to)
7705 7707 {
7706 7708 struct ix86_frame frame;
7707 7709 ix86_compute_frame_layout (&frame);
7708 7710
7709 7711 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7710 7712 return frame.hard_frame_pointer_offset;
7711 7713 else if (from == FRAME_POINTER_REGNUM
7712 7714 && to == HARD_FRAME_POINTER_REGNUM)
7713 7715 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
7714 7716 else
7715 7717 {
7716 7718 gcc_assert (to == STACK_POINTER_REGNUM);
7717 7719
7718 7720 if (from == ARG_POINTER_REGNUM)
7719 7721 return frame.stack_pointer_offset;
7720 7722
7721 7723 gcc_assert (from == FRAME_POINTER_REGNUM);
7722 7724 return frame.stack_pointer_offset - frame.frame_pointer_offset;
7723 7725 }
7724 7726 }
7725 7727
7726 7728 /* In a dynamically-aligned function, we can't know the offset from
7727 7729 stack pointer to frame pointer, so we must ensure that setjmp
7728 7730 eliminates fp against the hard fp (%ebp) rather than trying to
7729 7731 index from %esp up to the top of the frame across a gap that is
7730 7732 of unknown (at compile-time) size. */
7731 7733 static rtx
7732 7734 ix86_builtin_setjmp_frame_value (void)
7733 7735 {
7734 7736 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx;
7735 7737 }
7736 7738
7737 7739 /* Fill structure ix86_frame about frame of currently computed function. */
7738 7740
7739 7741 static void
7740 7742 ix86_compute_frame_layout (struct ix86_frame *frame)
7741 7743 {
7742 7744 HOST_WIDE_INT total_size;
7743 7745 unsigned int stack_alignment_needed;
7744 7746 HOST_WIDE_INT offset;
7745 7747 unsigned int preferred_alignment;
7746 7748 HOST_WIDE_INT size = get_frame_size ();
7747 7749
7748 7750 frame->nregs = ix86_nsaved_regs ();
7749 7751 frame->nsseregs = ix86_nsaved_sseregs ();
7750 7752 frame->nmsave_args = ix86_nsaved_args ();
7751 7753 total_size = size;
7752 7754
7753 7755 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT;
7754 7756 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT;
7755 7757
7756 7758 /* MS ABI seem to require stack alignment to be always 16 except for function
7757 7759 prologues. */
7758 7760 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16)
7759 7761 {
7760 7762 preferred_alignment = 16;
7761 7763 stack_alignment_needed = 16;
7762 7764 crtl->preferred_stack_boundary = 128;
7763 7765 crtl->stack_alignment_needed = 128;
7764 7766 }
7765 7767
7766 7768 gcc_assert (!size || stack_alignment_needed);
7767 7769 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
7768 7770 gcc_assert (preferred_alignment <= stack_alignment_needed);
7769 7771
7770 7772 /* During reload iteration the amount of registers saved can change.
7771 7773 Recompute the value as needed. Do not recompute when amount of registers
7772 7774 didn't change as reload does multiple calls to the function and does not
7773 7775 expect the decision to change within single iteration. */
7774 7776 if (!optimize_function_for_size_p (cfun)
7775 7777 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
7776 7778 {
7777 7779 int count = frame->nregs;
7778 7780
7779 7781 cfun->machine->use_fast_prologue_epilogue_nregs = count;
7780 7782 /* The fast prologue uses move instead of push to save registers. This
7781 7783 is significantly longer, but also executes faster as modern hardware
7782 7784 can execute the moves in parallel, but can't do that for push/pop.
7783 7785
7784 7786 Be careful about choosing what prologue to emit: When function takes
7785 7787 many instructions to execute we may use slow version as well as in
7786 7788 case function is known to be outside hot spot (this is known with
7787 7789 feedback only). Weight the size of function by number of registers
7788 7790 to save as it is cheap to use one or two push instructions but very
7789 7791 slow to use many of them. */
7790 7792 if (count)
7791 7793 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
7792 7794 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
7793 7795 || (flag_branch_probabilities
7794 7796 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
7795 7797 cfun->machine->use_fast_prologue_epilogue = false;
7796 7798 else
7797 7799 cfun->machine->use_fast_prologue_epilogue
7798 7800 = !expensive_function_p (count);
7799 7801 }
7800 7802 if (TARGET_PROLOGUE_USING_MOVE
7801 7803 && cfun->machine->use_fast_prologue_epilogue)
7802 7804 frame->save_regs_using_mov = true;
7803 7805 else
7804 7806 frame->save_regs_using_mov = false;
7805 7807
7806 7808 if (TARGET_SAVE_ARGS)
7807 7809 {
7808 7810 cfun->machine->use_fast_prologue_epilogue = true;
7809 7811 frame->save_regs_using_mov = true;
7810 7812 }
7811 7813
7812 7814 /* Skip return address and saved base pointer. */
7813 7815 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
7814 7816
7815 7817 frame->hard_frame_pointer_offset = offset;
7816 7818
7817 7819 /* Set offset to aligned because the realigned frame starts from
7818 7820 here. */
7819 7821 if (stack_realign_fp)
7820 7822 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed;
7821 7823
7822 7824 /* Argument save area */
7823 7825 if (TARGET_SAVE_ARGS)
7824 7826 {
7825 7827 offset += frame->nmsave_args * UNITS_PER_WORD;
7826 7828 frame->padding0 = (frame->nmsave_args % 2) * UNITS_PER_WORD;
7827 7829 offset += frame->padding0;
7828 7830 }
7829 7831 else
7830 7832 frame->padding0 = 0;
7831 7833
7832 7834 /* Register save area */
7833 7835 offset += frame->nregs * UNITS_PER_WORD;
7834 7836
7835 7837 /* Align SSE reg save area. */
7836 7838 if (frame->nsseregs)
7837 7839 frame->padding05 = ((offset + 16 - 1) & -16) - offset;
7838 7840 else
7839 7841 frame->padding05 = 0;
7840 7842
7841 7843 /* SSE register save area. */
7842 7844 offset += frame->padding05 + frame->nsseregs * 16;
7843 7845
7844 7846 /* Va-arg area */
7845 7847 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size;
7846 7848 offset += frame->va_arg_size;
7847 7849
7848 7850 /* Align start of frame for local function. */
7849 7851 frame->padding1 = ((offset + stack_alignment_needed - 1)
7850 7852 & -stack_alignment_needed) - offset;
7851 7853
7852 7854 offset += frame->padding1;
7853 7855
7854 7856 /* Frame pointer points here. */
7855 7857 frame->frame_pointer_offset = offset;
7856 7858
7857 7859 offset += size;
7858 7860
7859 7861 /* Add outgoing arguments area. Can be skipped if we eliminated
7860 7862 all the function calls as dead code.
7861 7863 Skipping is however impossible when function calls alloca. Alloca
7862 7864 expander assumes that last crtl->outgoing_args_size
7863 7865 of stack frame are unused. */
7864 7866 if (ACCUMULATE_OUTGOING_ARGS
7865 7867 && (!current_function_is_leaf || cfun->calls_alloca
7866 7868 || ix86_current_function_calls_tls_descriptor))
7867 7869 {
7868 7870 offset += crtl->outgoing_args_size;
7869 7871 frame->outgoing_arguments_size = crtl->outgoing_args_size;
7870 7872 }
7871 7873 else
7872 7874 frame->outgoing_arguments_size = 0;
7873 7875
7874 7876 /* Align stack boundary. Only needed if we're calling another function
7875 7877 or using alloca. */
7876 7878 if (!current_function_is_leaf || cfun->calls_alloca
7877 7879 || ix86_current_function_calls_tls_descriptor)
7878 7880 frame->padding2 = ((offset + preferred_alignment - 1)
7879 7881 & -preferred_alignment) - offset;
7880 7882 else
7881 7883 frame->padding2 = 0;
7882 7884
7883 7885 offset += frame->padding2;
7884 7886
7885 7887 /* We've reached end of stack frame. */
7886 7888 frame->stack_pointer_offset = offset;
7887 7889
7888 7890 /* Size prologue needs to allocate. */
7889 7891 frame->to_allocate =
7890 7892 (size + frame->padding1 + frame->padding2
7891 7893 + frame->outgoing_arguments_size + frame->va_arg_size);
7892 7894
7893 7895 if (!TARGET_SAVE_ARGS
7894 7896 && ((!frame->to_allocate && frame->nregs <= 1)
7895 7897 || (TARGET_64BIT
7896 7898 && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000)))
7897 7899 frame->save_regs_using_mov = false;
7898 7900
7899 7901 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
7900 7902 && current_function_sp_is_unchanging
7901 7903 && current_function_is_leaf
7902 7904 && !ix86_current_function_calls_tls_descriptor)
7903 7905 {
7904 7906 frame->red_zone_size = frame->to_allocate;
7905 7907 if (frame->save_regs_using_mov)
7906 7908 {
7907 7909 frame->red_zone_size
7908 7910 += (frame->nregs + frame->nmsave_args) * UNITS_PER_WORD;
7909 7911 frame->red_zone_size += frame->padding0;
7910 7912 }
7911 7913 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
7912 7914 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
7913 7915 }
7914 7916 else
7915 7917 frame->red_zone_size = 0;
7916 7918 frame->to_allocate -= frame->red_zone_size;
7917 7919 frame->stack_pointer_offset -= frame->red_zone_size;
7918 7920 #if 0
7919 7921 fprintf (stderr, "\n");
7920 7922 fprintf (stderr, "size: %ld\n", (long)size);
7921 7923 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
7922 7924 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs);
7923 7925 fprintf (stderr, "nmsave_args: %ld\n", (long)frame->nmsave_args);
7924 7926 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0);
7925 7927 fprintf (stderr, "padding05: %ld\n", (long)frame->padding0);
7926 7928 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
7927 7929 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
7928 7930 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
7929 7931 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
7930 7932 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
7931 7933 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
7932 7934 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
7933 7935 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
7934 7936 (long)frame->hard_frame_pointer_offset);
7935 7937 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
7936 7938 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
7937 7939 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca);
7938 7940 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
7939 7941 #endif
7940 7942 }
7941 7943
7942 7944
7943 7945 /* Emit code to save registers in the prologue. */
7944 7946
7945 7947 static void
7946 7948 ix86_emit_save_regs (void)
7947 7949 {
7948 7950 unsigned int regno;
7949 7951 rtx insn;
7950 7952
7951 7953 if (TARGET_SAVE_ARGS)
7952 7954 {
7953 7955 int i;
7954 7956 int nsaved = ix86_nsaved_args ();
7955 7957 int start = cfun->returns_struct;
7956 7958 for (i = start; i < start + nsaved; i++)
7957 7959 {
7958 7960 regno = x86_64_int_parameter_registers[i];
7959 7961 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7960 7962 RTX_FRAME_RELATED_P (insn) = 1;
7961 7963 }
7962 7964 if (nsaved % 2 != 0)
7963 7965 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
7964 7966 GEN_INT (-UNITS_PER_WORD), -1);
7965 7967 }
7966 7968
7967 7969 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; )
7968 7970 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7969 7971 {
7970 7972 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
7971 7973 RTX_FRAME_RELATED_P (insn) = 1;
7972 7974 }
7973 7975 }
7974 7976
7975 7977 /* Emit code to save registers using MOV insns. First register
7976 7978 is restored from POINTER + OFFSET. */
7977 7979 static void
7978 7980 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
7979 7981 {
7980 7982 unsigned int regno;
7981 7983 rtx insn;
7982 7984
7983 7985 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
7984 7986 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
7985 7987 {
7986 7988 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
7987 7989 Pmode, offset),
7988 7990 gen_rtx_REG (Pmode, regno));
7989 7991 RTX_FRAME_RELATED_P (insn) = 1;
7990 7992 offset += UNITS_PER_WORD;
7991 7993 }
7992 7994
7993 7995 if (TARGET_SAVE_ARGS)
7994 7996 {
7995 7997 int i;
7996 7998 int nsaved = ix86_nsaved_args ();
7997 7999 int start = cfun->returns_struct;
7998 8000 if (nsaved % 2 != 0)
7999 8001 offset += UNITS_PER_WORD;
8000 8002 for (i = start + nsaved - 1; i >= start; i--)
8001 8003 {
8002 8004 regno = x86_64_int_parameter_registers[i];
8003 8005 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
8004 8006 Pmode, offset),
8005 8007 gen_rtx_REG (Pmode, regno));
8006 8008 RTX_FRAME_RELATED_P (insn) = 1;
8007 8009 offset += UNITS_PER_WORD;
8008 8010 }
8009 8011 }
8010 8012 }
8011 8013
8012 8014 /* Emit code to save registers using MOV insns. First register
8013 8015 is restored from POINTER + OFFSET. */
8014 8016 static void
8015 8017 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
8016 8018 {
8017 8019 unsigned int regno;
8018 8020 rtx insn;
8019 8021 rtx mem;
8020 8022
8021 8023 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8022 8024 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true))
8023 8025 {
8024 8026 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset);
8025 8027 set_mem_align (mem, 128);
8026 8028 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno));
8027 8029 RTX_FRAME_RELATED_P (insn) = 1;
8028 8030 offset += 16;
8029 8031 }
8030 8032 }
8031 8033
8032 8034 /* Expand prologue or epilogue stack adjustment.
8033 8035 The pattern exist to put a dependency on all ebp-based memory accesses.
8034 8036 STYLE should be negative if instructions should be marked as frame related,
8035 8037 zero if %r11 register is live and cannot be freely used and positive
8036 8038 otherwise. */
8037 8039
8038 8040 static void
8039 8041 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
8040 8042 {
8041 8043 rtx insn;
8042 8044
8043 8045 if (! TARGET_64BIT)
8044 8046 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
8045 8047 else if (x86_64_immediate_operand (offset, DImode))
8046 8048 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
8047 8049 else
8048 8050 {
8049 8051 rtx r11;
8050 8052 /* r11 is used by indirect sibcall return as well, set before the
8051 8053 epilogue and used after the epilogue. ATM indirect sibcall
8052 8054 shouldn't be used together with huge frame sizes in one
8053 8055 function because of the frame_size check in sibcall.c. */
8054 8056 gcc_assert (style);
8055 8057 r11 = gen_rtx_REG (DImode, R11_REG);
8056 8058 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
8057 8059 if (style < 0)
8058 8060 RTX_FRAME_RELATED_P (insn) = 1;
8059 8061 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
8060 8062 offset));
8061 8063 }
8062 8064 if (style < 0)
8063 8065 RTX_FRAME_RELATED_P (insn) = 1;
8064 8066 }
8065 8067
8066 8068 /* Find an available register to be used as dynamic realign argument
8067 8069 pointer regsiter. Such a register will be written in prologue and
8068 8070 used in begin of body, so it must not be
8069 8071 1. parameter passing register.
8070 8072 2. GOT pointer.
8071 8073 We reuse static-chain register if it is available. Otherwise, we
8072 8074 use DI for i386 and R13 for x86-64. We chose R13 since it has
8073 8075 shorter encoding.
8074 8076
8075 8077 Return: the regno of chosen register. */
8076 8078
8077 8079 static unsigned int
8078 8080 find_drap_reg (void)
8079 8081 {
8080 8082 tree decl = cfun->decl;
8081 8083
8082 8084 if (TARGET_64BIT)
8083 8085 {
8084 8086 /* Use R13 for nested function or function need static chain.
8085 8087 Since function with tail call may use any caller-saved
8086 8088 registers in epilogue, DRAP must not use caller-saved
8087 8089 register in such case. */
8088 8090 if ((decl_function_context (decl)
8089 8091 && !DECL_NO_STATIC_CHAIN (decl))
8090 8092 || crtl->tail_call_emit)
8091 8093 return R13_REG;
8092 8094
8093 8095 return R10_REG;
8094 8096 }
8095 8097 else
8096 8098 {
8097 8099 /* Use DI for nested function or function need static chain.
8098 8100 Since function with tail call may use any caller-saved
8099 8101 registers in epilogue, DRAP must not use caller-saved
8100 8102 register in such case. */
8101 8103 if ((decl_function_context (decl)
8102 8104 && !DECL_NO_STATIC_CHAIN (decl))
8103 8105 || crtl->tail_call_emit)
8104 8106 return DI_REG;
8105 8107
8106 8108 /* Reuse static chain register if it isn't used for parameter
8107 8109 passing. */
8108 8110 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2
8109 8111 && !lookup_attribute ("fastcall",
8110 8112 TYPE_ATTRIBUTES (TREE_TYPE (decl))))
8111 8113 return CX_REG;
8112 8114 else
8113 8115 return DI_REG;
8114 8116 }
8115 8117 }
8116 8118
8117 8119 /* Update incoming stack boundary and estimated stack alignment. */
8118 8120
8119 8121 static void
8120 8122 ix86_update_stack_boundary (void)
8121 8123 {
8122 8124 /* Prefer the one specified at command line. */
8123 8125 ix86_incoming_stack_boundary
8124 8126 = (ix86_user_incoming_stack_boundary
8125 8127 ? ix86_user_incoming_stack_boundary
8126 8128 : ix86_default_incoming_stack_boundary);
8127 8129
8128 8130 /* Incoming stack alignment can be changed on individual functions
8129 8131 via force_align_arg_pointer attribute. We use the smallest
8130 8132 incoming stack boundary. */
8131 8133 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY
8132 8134 && lookup_attribute (ix86_force_align_arg_pointer_string,
8133 8135 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))))
8134 8136 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY;
8135 8137
8136 8138 /* The incoming stack frame has to be aligned at least at
8137 8139 parm_stack_boundary. */
8138 8140 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary)
8139 8141 ix86_incoming_stack_boundary = crtl->parm_stack_boundary;
8140 8142
8141 8143 /* Stack at entrance of main is aligned by runtime. We use the
8142 8144 smallest incoming stack boundary. */
8143 8145 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY
8144 8146 && DECL_NAME (current_function_decl)
8145 8147 && MAIN_NAME_P (DECL_NAME (current_function_decl))
8146 8148 && DECL_FILE_SCOPE_P (current_function_decl))
8147 8149 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY;
8148 8150
8149 8151 /* x86_64 vararg needs 16byte stack alignment for register save
8150 8152 area. */
8151 8153 if (TARGET_64BIT
8152 8154 && cfun->stdarg
8153 8155 && crtl->stack_alignment_estimated < 128)
8154 8156 crtl->stack_alignment_estimated = 128;
8155 8157 }
8156 8158
8157 8159 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is
8158 8160 needed or an rtx for DRAP otherwise. */
8159 8161
8160 8162 static rtx
8161 8163 ix86_get_drap_rtx (void)
8162 8164 {
8163 8165 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS)
8164 8166 crtl->need_drap = true;
8165 8167
8166 8168 if (stack_realign_drap)
8167 8169 {
8168 8170 /* Assign DRAP to vDRAP and returns vDRAP */
8169 8171 unsigned int regno = find_drap_reg ();
8170 8172 rtx drap_vreg;
8171 8173 rtx arg_ptr;
8172 8174 rtx seq, insn;
8173 8175
8174 8176 arg_ptr = gen_rtx_REG (Pmode, regno);
8175 8177 crtl->drap_reg = arg_ptr;
8176 8178
8177 8179 start_sequence ();
8178 8180 drap_vreg = copy_to_reg (arg_ptr);
8179 8181 seq = get_insns ();
8180 8182 end_sequence ();
8181 8183
8182 8184 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
8183 8185 RTX_FRAME_RELATED_P (insn) = 1;
8184 8186 return drap_vreg;
8185 8187 }
8186 8188 else
8187 8189 return NULL;
8188 8190 }
8189 8191
8190 8192 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
8191 8193
8192 8194 static rtx
8193 8195 ix86_internal_arg_pointer (void)
8194 8196 {
8195 8197 return virtual_incoming_args_rtx;
8196 8198 }
8197 8199
8198 8200 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
8199 8201 This is called from dwarf2out.c to emit call frame instructions
8200 8202 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
8201 8203 static void
8202 8204 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
8203 8205 {
8204 8206 rtx unspec = SET_SRC (pattern);
8205 8207 gcc_assert (GET_CODE (unspec) == UNSPEC);
8206 8208
8207 8209 switch (index)
8208 8210 {
8209 8211 case UNSPEC_REG_SAVE:
8210 8212 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
8211 8213 SET_DEST (pattern));
8212 8214 break;
8213 8215 case UNSPEC_DEF_CFA:
8214 8216 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
8215 8217 INTVAL (XVECEXP (unspec, 0, 0)));
8216 8218 break;
8217 8219 default:
8218 8220 gcc_unreachable ();
8219 8221 }
8220 8222 }
8221 8223
8222 8224 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue
8223 8225 to be generated in correct form. */
8224 8226 static void
8225 8227 ix86_finalize_stack_realign_flags (void)
8226 8228 {
8227 8229 /* Check if stack realign is really needed after reload, and
8228 8230 stores result in cfun */
8229 8231 unsigned int incoming_stack_boundary
8230 8232 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary
8231 8233 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary);
8232 8234 unsigned int stack_realign = (incoming_stack_boundary
8233 8235 < (current_function_is_leaf
8234 8236 ? crtl->max_used_stack_slot_alignment
8235 8237 : crtl->stack_alignment_needed));
8236 8238
8237 8239 if (crtl->stack_realign_finalized)
8238 8240 {
8239 8241 /* After stack_realign_needed is finalized, we can't no longer
8240 8242 change it. */
8241 8243 gcc_assert (crtl->stack_realign_needed == stack_realign);
8242 8244 }
8243 8245 else
8244 8246 {
8245 8247 crtl->stack_realign_needed = stack_realign;
8246 8248 crtl->stack_realign_finalized = true;
8247 8249 }
8248 8250 }
8249 8251
8250 8252 /* Expand the prologue into a bunch of separate insns. */
8251 8253
8252 8254 void
8253 8255 ix86_expand_prologue (void)
8254 8256 {
8255 8257 rtx insn;
8256 8258 bool pic_reg_used;
8257 8259 struct ix86_frame frame;
8258 8260 HOST_WIDE_INT allocate;
8259 8261
8260 8262 ix86_finalize_stack_realign_flags ();
8261 8263
8262 8264 /* DRAP should not coexist with stack_realign_fp */
8263 8265 gcc_assert (!(crtl->drap_reg && stack_realign_fp));
8264 8266
8265 8267 ix86_compute_frame_layout (&frame);
8266 8268
8267 8269 /* Emit prologue code to adjust stack alignment and setup DRAP, in case
8268 8270 of DRAP is needed and stack realignment is really needed after reload */
8269 8271 if (crtl->drap_reg && crtl->stack_realign_needed)
8270 8272 {
8271 8273 rtx x, y;
8272 8274 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8273 8275 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8274 8276 ? 0 : UNITS_PER_WORD);
8275 8277
8276 8278 gcc_assert (stack_realign_drap);
8277 8279
8278 8280 /* Grab the argument pointer. */
8279 8281 x = plus_constant (stack_pointer_rtx,
8280 8282 (UNITS_PER_WORD + param_ptr_offset));
8281 8283 y = crtl->drap_reg;
8282 8284
8283 8285 /* Only need to push parameter pointer reg if it is caller
8284 8286 saved reg */
8285 8287 if (!call_used_regs[REGNO (crtl->drap_reg)])
8286 8288 {
8287 8289 /* Push arg pointer reg */
8288 8290 insn = emit_insn (gen_push (y));
8289 8291 RTX_FRAME_RELATED_P (insn) = 1;
8290 8292 }
8291 8293
8292 8294 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
8293 8295 RTX_FRAME_RELATED_P (insn) = 1;
8294 8296
8295 8297 /* Align the stack. */
8296 8298 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8297 8299 stack_pointer_rtx,
8298 8300 GEN_INT (-align_bytes)));
8299 8301 RTX_FRAME_RELATED_P (insn) = 1;
8300 8302
8301 8303 /* Replicate the return address on the stack so that return
8302 8304 address can be reached via (argp - 1) slot. This is needed
8303 8305 to implement macro RETURN_ADDR_RTX and intrinsic function
8304 8306 expand_builtin_return_addr etc. */
8305 8307 x = crtl->drap_reg;
8306 8308 x = gen_frame_mem (Pmode,
8307 8309 plus_constant (x, -UNITS_PER_WORD));
8308 8310 insn = emit_insn (gen_push (x));
8309 8311 RTX_FRAME_RELATED_P (insn) = 1;
8310 8312 }
8311 8313
8312 8314 /* Note: AT&T enter does NOT have reversed args. Enter is probably
8313 8315 slower on all targets. Also sdb doesn't like it. */
8314 8316
8315 8317 if (frame_pointer_needed)
8316 8318 {
8317 8319 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
8318 8320 RTX_FRAME_RELATED_P (insn) = 1;
8319 8321
8320 8322 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
8321 8323 RTX_FRAME_RELATED_P (insn) = 1;
8322 8324 }
8323 8325
8324 8326 if (stack_realign_fp)
8325 8327 {
8326 8328 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT;
8327 8329 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT);
8328 8330
8329 8331 /* Align the stack. */
8330 8332 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx,
8331 8333 stack_pointer_rtx,
8332 8334 GEN_INT (-align_bytes)));
8333 8335 RTX_FRAME_RELATED_P (insn) = 1;
8334 8336 }
8335 8337
8336 8338 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding05;
8337 8339
8338 8340 if (!frame.save_regs_using_mov)
8339 8341 ix86_emit_save_regs ();
8340 8342 else
8341 8343 allocate += (frame.nregs + frame.nmsave_args) * UNITS_PER_WORD
8342 8344 + frame.padding0;
8343 8345
8344 8346 /* When using red zone we may start register saving before allocating
8345 8347 the stack frame saving one cycle of the prologue. However I will
8346 8348 avoid doing this if I am going to have to probe the stack since
8347 8349 at least on x86_64 the stack probe can turn into a call that clobbers
8348 8350 a red zone location */
8349 8351 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov
8350 8352 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))
8351 8353 ix86_emit_save_regs_using_mov ((frame_pointer_needed
8352 8354 && !crtl->stack_realign_needed)
8353 8355 ? hard_frame_pointer_rtx
8354 8356 : stack_pointer_rtx,
8355 8357 -(frame.nregs + frame.nmsave_args)
8356 8358 * UNITS_PER_WORD - frame.padding0);
8357 8359
8358 8360 if (allocate == 0)
8359 8361 ;
8360 8362 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
8361 8363 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8362 8364 GEN_INT (-allocate), -1);
8363 8365 else
8364 8366 {
8365 8367 rtx eax = gen_rtx_REG (Pmode, AX_REG);
8366 8368 bool eax_live;
8367 8369 rtx t;
8368 8370
8369 8371 if (cfun->machine->call_abi == MS_ABI)
8370 8372 eax_live = false;
8371 8373 else
8372 8374 eax_live = ix86_eax_live_at_start_p ();
8373 8375
8374 8376 if (eax_live)
8375 8377 {
8376 8378 emit_insn (gen_push (eax));
8377 8379 allocate -= UNITS_PER_WORD;
8378 8380 }
8379 8381
8380 8382 emit_move_insn (eax, GEN_INT (allocate));
8381 8383
8382 8384 if (TARGET_64BIT)
8383 8385 insn = gen_allocate_stack_worker_64 (eax, eax);
8384 8386 else
8385 8387 insn = gen_allocate_stack_worker_32 (eax, eax);
8386 8388 insn = emit_insn (insn);
8387 8389 RTX_FRAME_RELATED_P (insn) = 1;
8388 8390 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
8389 8391 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
8390 8392 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
8391 8393 t, REG_NOTES (insn));
8392 8394
8393 8395 if (eax_live)
8394 8396 {
8395 8397 if (frame_pointer_needed)
8396 8398 t = plus_constant (hard_frame_pointer_rtx,
8397 8399 allocate
8398 8400 - frame.to_allocate
8399 8401 - frame.nregs * UNITS_PER_WORD);
8400 8402 else
8401 8403 t = plus_constant (stack_pointer_rtx, allocate);
8402 8404 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
8403 8405 }
8404 8406 }
8405 8407
8406 8408 if (frame.save_regs_using_mov
8407 8409 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE
8408 8410 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)))
8409 8411 {
8410 8412 if (!TARGET_SAVE_ARGS &&
8411 8413 (!frame_pointer_needed
8412 8414 || !(frame.to_allocate + frame.padding05)
8413 8415 || crtl->stack_realign_needed))
8414 8416 ix86_emit_save_regs_using_mov (stack_pointer_rtx,
8415 8417 frame.to_allocate
8416 8418 + frame.nsseregs * 16 + frame.padding05);
8417 8419 else
8418 8420 /* XXX: Does this need help for SSE? */
8419 8421 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
8420 8422 -(frame.nregs + frame.nmsave_args)
8421 8423 * UNITS_PER_WORD - frame.padding0);
8422 8424 }
8423 8425 /* XXX: Does these need help for save-args? */
8424 8426 if (!frame_pointer_needed
8425 8427 || !(frame.to_allocate + frame.padding0)
8426 8428 || crtl->stack_realign_needed)
8427 8429 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx,
8428 8430 frame.to_allocate);
8429 8431 else
8430 8432 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx,
8431 8433 - frame.nregs * UNITS_PER_WORD
8432 8434 - frame.nsseregs * 16
8433 8435 - frame.padding05);
8434 8436
8435 8437 pic_reg_used = false;
8436 8438 if (pic_offset_table_rtx
8437 8439 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
8438 8440 || crtl->profile))
8439 8441 {
8440 8442 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
8441 8443
8442 8444 if (alt_pic_reg_used != INVALID_REGNUM)
8443 8445 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
8444 8446
8445 8447 pic_reg_used = true;
8446 8448 }
8447 8449
8448 8450 if (pic_reg_used)
8449 8451 {
8450 8452 if (TARGET_64BIT)
8451 8453 {
8452 8454 if (ix86_cmodel == CM_LARGE_PIC)
8453 8455 {
8454 8456 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG);
8455 8457 rtx label = gen_label_rtx ();
8456 8458 emit_label (label);
8457 8459 LABEL_PRESERVE_P (label) = 1;
8458 8460 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
8459 8461 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
8460 8462 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
8461 8463 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
8462 8464 pic_offset_table_rtx, tmp_reg));
8463 8465 }
8464 8466 else
8465 8467 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
8466 8468 }
8467 8469 else
8468 8470 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
8469 8471 }
8470 8472
8471 8473 /* In the pic_reg_used case, make sure that the got load isn't deleted
8472 8474 when mcount needs it. Blockage to avoid call movement across mcount
8473 8475 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END
8474 8476 note. */
8475 8477 if (crtl->profile && pic_reg_used)
8476 8478 emit_insn (gen_prologue_use (pic_offset_table_rtx));
8477 8479
8478 8480 if (crtl->drap_reg && !crtl->stack_realign_needed)
8479 8481 {
8480 8482 /* vDRAP is setup but after reload it turns out stack realign
8481 8483 isn't necessary, here we will emit prologue to setup DRAP
8482 8484 without stack realign adjustment */
8483 8485 int drap_bp_offset = UNITS_PER_WORD * 2;
8484 8486 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset);
8485 8487 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x));
8486 8488 }
8487 8489
8488 8490 /* Prevent instructions from being scheduled into register save push
8489 8491 sequence when access to the redzone area is done through frame pointer.
8490 8492 The offset betweeh the frame pointer and the stack pointer is calculated
8491 8493 relative to the value of the stack pointer at the end of the function
8492 8494 prologue, and moving instructions that access redzone area via frame
8493 8495 pointer inside push sequence violates this assumption. */
8494 8496 if (frame_pointer_needed && frame.red_zone_size)
8495 8497 emit_insn (gen_memory_blockage ());
8496 8498
8497 8499 /* Emit cld instruction if stringops are used in the function. */
8498 8500 if (TARGET_CLD && ix86_current_function_needs_cld)
8499 8501 emit_insn (gen_cld ());
8500 8502 }
8501 8503
8502 8504 /* Emit code to restore saved registers using MOV insns. First register
8503 8505 is restored from POINTER + OFFSET. */
8504 8506 static void
8505 8507 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8506 8508 int maybe_eh_return)
8507 8509 {
8508 8510 int regno;
8509 8511 rtx base_address = gen_rtx_MEM (Pmode, pointer);
8510 8512
8511 8513 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8512 8514 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8513 8515 {
8514 8516 /* Ensure that adjust_address won't be forced to produce pointer
8515 8517 out of range allowed by x86-64 instruction set. */
8516 8518 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8517 8519 {
8518 8520 rtx r11;
8519 8521
8520 8522 r11 = gen_rtx_REG (DImode, R11_REG);
8521 8523 emit_move_insn (r11, GEN_INT (offset));
8522 8524 emit_insn (gen_adddi3 (r11, r11, pointer));
8523 8525 base_address = gen_rtx_MEM (Pmode, r11);
8524 8526 offset = 0;
8525 8527 }
8526 8528 emit_move_insn (gen_rtx_REG (Pmode, regno),
8527 8529 adjust_address (base_address, Pmode, offset));
8528 8530 offset += UNITS_PER_WORD;
8529 8531 }
8530 8532 }
8531 8533
8532 8534 /* Emit code to restore saved registers using MOV insns. First register
8533 8535 is restored from POINTER + OFFSET. */
8534 8536 static void
8535 8537 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
8536 8538 int maybe_eh_return)
8537 8539 {
8538 8540 int regno;
8539 8541 rtx base_address = gen_rtx_MEM (TImode, pointer);
8540 8542 rtx mem;
8541 8543
8542 8544 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8543 8545 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return))
8544 8546 {
8545 8547 /* Ensure that adjust_address won't be forced to produce pointer
8546 8548 out of range allowed by x86-64 instruction set. */
8547 8549 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
8548 8550 {
8549 8551 rtx r11;
8550 8552
8551 8553 r11 = gen_rtx_REG (DImode, R11_REG);
8552 8554 emit_move_insn (r11, GEN_INT (offset));
8553 8555 emit_insn (gen_adddi3 (r11, r11, pointer));
8554 8556 base_address = gen_rtx_MEM (TImode, r11);
8555 8557 offset = 0;
8556 8558 }
8557 8559 mem = adjust_address (base_address, TImode, offset);
8558 8560 set_mem_align (mem, 128);
8559 8561 emit_move_insn (gen_rtx_REG (TImode, regno), mem);
8560 8562 offset += 16;
8561 8563 }
8562 8564 }
8563 8565
8564 8566 /* Restore function stack, frame, and registers. */
8565 8567
8566 8568 void
8567 8569 ix86_expand_epilogue (int style)
8568 8570 {
8569 8571 int regno;
8570 8572 int sp_valid;
8571 8573 struct ix86_frame frame;
8572 8574 HOST_WIDE_INT offset;
8573 8575
8574 8576 ix86_finalize_stack_realign_flags ();
8575 8577
8576 8578 /* When stack is realigned, SP must be valid. */
8577 8579 sp_valid = (!frame_pointer_needed
8578 8580 || current_function_sp_is_unchanging
8579 8581 || stack_realign_fp);
8580 8582
8581 8583 ix86_compute_frame_layout (&frame);
8582 8584
8583 8585 /* See the comment about red zone and frame
8584 8586 pointer usage in ix86_expand_prologue. */
8585 8587 if (frame_pointer_needed && frame.red_zone_size)
8586 8588 emit_insn (gen_memory_blockage ());
8587 8589
8588 8590 /* Calculate start of saved registers relative to ebp. Special care
8589 8591 must be taken for the normal return case of a function using
8590 8592 eh_return: the eax and edx registers are marked as saved, but not
8591 8593 restored along this path. */
8592 8594 offset = frame.nregs + frame.nmsave_args;
8593 8595 if (crtl->calls_eh_return && style != 2)
8594 8596 offset -= 2;
8595 8597 offset *= -UNITS_PER_WORD;
8596 8598 offset -= frame.nsseregs * 16 + frame.padding05 + frame.padding0;
8597 8599
8598 8600 /* If we're only restoring one register and sp is not valid then
8599 8601 using a move instruction to restore the register since it's
8600 8602 less work than reloading sp and popping the register.
8601 8603
8602 8604 The default code result in stack adjustment using add/lea instruction,
8603 8605 while this code results in LEAVE instruction (or discrete equivalent),
8604 8606 so it is profitable in some other cases as well. Especially when there
8605 8607 are no registers to restore. We also use this code when TARGET_USE_LEAVE
8606 8608 and there is exactly one register to pop. This heuristic may need some
8607 8609 tuning in future. */
8608 8610 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1)
8609 8611 || (TARGET_EPILOGUE_USING_MOVE
8610 8612 && cfun->machine->use_fast_prologue_epilogue
8611 8613 && ((frame.nregs + frame.nsseregs) > 1
8612 8614 || (frame.to_allocate + frame.padding0) != 0))
8613 8615 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs)
8614 8616 && (frame.to_allocate + frame.padding0) != 0)
8615 8617 || (frame_pointer_needed && TARGET_USE_LEAVE
8616 8618 && cfun->machine->use_fast_prologue_epilogue
8617 8619 && (frame.nregs + frame.nsseregs) == 1)
8618 8620 || crtl->calls_eh_return)
8619 8621 {
8620 8622 /* Restore registers. We can use ebp or esp to address the memory
8621 8623 locations. If both are available, default to ebp, since offsets
8622 8624 are known to be small. Only exception is esp pointing directly
8623 8625 to the end of block of saved registers, where we may simplify
8624 8626 addressing mode.
8625 8627
8626 8628 If we are realigning stack with bp and sp, regs restore can't
8627 8629 be addressed by bp. sp must be used instead. */
8628 8630
8629 8631 if (!frame_pointer_needed
8630 8632 || (sp_valid && !(frame.to_allocate + frame.padding0))
8631 8633 || stack_realign_fp)
8632 8634 {
8633 8635 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8634 8636 frame.to_allocate, style == 2);
8635 8637 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
8636 8638 frame.to_allocate
8637 8639 + frame.nsseregs * 16
8638 8640 + frame.padding05, style == 2);
8639 8641 }
8640 8642 else
8641 8643 {
8642 8644 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx,
8643 8645 offset, style == 2);
8644 8646 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
8645 8647 offset
8646 8648 + frame.nsseregs * 16
8647 8649 + frame.padding05, style == 2);
8648 8650 }
8649 8651
8650 8652 /* eh_return epilogues need %ecx added to the stack pointer. */
8651 8653 if (style == 2)
8652 8654 {
8653 8655 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
8654 8656
8655 8657 /* Stack align doesn't work with eh_return. */
8656 8658 gcc_assert (!crtl->stack_realign_needed);
8657 8659
8658 8660 if (frame_pointer_needed)
8659 8661 {
8660 8662 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
8661 8663 tmp = plus_constant (tmp, UNITS_PER_WORD);
8662 8664 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
8663 8665
8664 8666 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
8665 8667 emit_move_insn (hard_frame_pointer_rtx, tmp);
8666 8668
8667 8669 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
8668 8670 const0_rtx, style);
8669 8671 }
8670 8672 else
8671 8673 {
8672 8674 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
8673 8675 tmp = plus_constant (tmp, (frame.to_allocate
8674 8676 + (frame.nregs + frame.nmsave_args)
8675 8677 * UNITS_PER_WORD
8676 8678 + frame.nsseregs * 16
8677 8679 + frame.padding05 + frame.padding0));
8678 8680 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
8679 8681 }
8680 8682 }
8681 8683 else if (!frame_pointer_needed)
8682 8684 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8683 8685 GEN_INT (frame.to_allocate
8684 8686 + (frame.nregs + frame.nmsave_args)
8685 8687 * UNITS_PER_WORD
8686 8688 + frame.nsseregs * 16
8687 8689 + frame.padding05 + frame.padding0),
8688 8690 style);
8689 8691 /* If not an i386, mov & pop is faster than "leave". */
8690 8692 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun)
8691 8693 || !cfun->machine->use_fast_prologue_epilogue)
8692 8694 emit_insn ((*ix86_gen_leave) ());
8693 8695 else
8694 8696 {
8695 8697 pro_epilogue_adjust_stack (stack_pointer_rtx,
8696 8698 hard_frame_pointer_rtx,
8697 8699 const0_rtx, style);
8698 8700
8699 8701 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8700 8702 }
8701 8703 }
8702 8704 else
8703 8705 {
8704 8706 /* First step is to deallocate the stack frame so that we can
8705 8707 pop the registers.
8706 8708
8707 8709 If we realign stack with frame pointer, then stack pointer
8708 8710 won't be able to recover via lea $offset(%bp), %sp, because
8709 8711 there is a padding area between bp and sp for realign.
8710 8712 "add $to_allocate, %sp" must be used instead. */
8711 8713 if (!sp_valid)
8712 8714 {
8713 8715 gcc_assert (frame_pointer_needed);
8714 8716 gcc_assert (!stack_realign_fp);
8715 8717 pro_epilogue_adjust_stack (stack_pointer_rtx,
8716 8718 hard_frame_pointer_rtx,
8717 8719 GEN_INT (offset), style);
8718 8720 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8719 8721 0, style == 2);
8720 8722 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8721 8723 GEN_INT (frame.nsseregs * 16 +
8722 8724 frame.padding0), style);
8723 8725 }
8724 8726 else if (frame.to_allocate || frame.padding0 || frame.nsseregs)
8725 8727 {
8726 8728 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx,
8727 8729 frame.to_allocate,
8728 8730 style == 2);
8729 8731 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8730 8732 GEN_INT (frame.to_allocate
8731 8733 + frame.nsseregs * 16
8732 8734 + frame.padding05), style);
8733 8735 }
8734 8736
8735 8737 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
8736 8738 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false))
8737 8739 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno)));
8738 8740
8739 8741 /* XXX: Needs adjustment for SSE regs? */
8740 8742 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
8741 8743 GEN_INT (frame.nmsave_args * UNITS_PER_WORD
8742 8744 + frame.padding0), style);
8743 8745 if (frame_pointer_needed)
8744 8746 {
8745 8747 /* Leave results in shorter dependency chains on CPUs that are
8746 8748 able to grok it fast. */
8747 8749 if (TARGET_USE_LEAVE)
8748 8750 emit_insn ((*ix86_gen_leave) ());
8749 8751 else
8750 8752 {
8751 8753 /* For stack realigned really happens, recover stack
8752 8754 pointer to hard frame pointer is a must, if not using
8753 8755 leave. */
8754 8756 if (stack_realign_fp)
8755 8757 pro_epilogue_adjust_stack (stack_pointer_rtx,
8756 8758 hard_frame_pointer_rtx,
8757 8759 const0_rtx, style);
8758 8760 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx));
8759 8761 }
8760 8762 }
8761 8763 }
8762 8764
8763 8765 if (crtl->drap_reg && crtl->stack_realign_needed)
8764 8766 {
8765 8767 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)]
8766 8768 ? 0 : UNITS_PER_WORD);
8767 8769 gcc_assert (stack_realign_drap);
8768 8770 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx,
8769 8771 crtl->drap_reg,
8770 8772 GEN_INT (-(UNITS_PER_WORD
8771 8773 + param_ptr_offset))));
8772 8774 if (!call_used_regs[REGNO (crtl->drap_reg)])
8773 8775 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg));
8774 8776
8775 8777 }
8776 8778
8777 8779 /* Sibcall epilogues don't want a return instruction. */
8778 8780 if (style == 0)
8779 8781 return;
8780 8782
8781 8783 if (crtl->args.pops_args && crtl->args.size)
8782 8784 {
8783 8785 rtx popc = GEN_INT (crtl->args.pops_args);
8784 8786
8785 8787 /* i386 can only pop 64K bytes. If asked to pop more, pop
8786 8788 return address, do explicit add, and jump indirectly to the
8787 8789 caller. */
8788 8790
8789 8791 if (crtl->args.pops_args >= 65536)
8790 8792 {
8791 8793 rtx ecx = gen_rtx_REG (SImode, CX_REG);
8792 8794
8793 8795 /* There is no "pascal" calling convention in any 64bit ABI. */
8794 8796 gcc_assert (!TARGET_64BIT);
8795 8797
8796 8798 emit_insn (gen_popsi1 (ecx));
8797 8799 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
8798 8800 emit_jump_insn (gen_return_indirect_internal (ecx));
8799 8801 }
8800 8802 else
8801 8803 emit_jump_insn (gen_return_pop_internal (popc));
8802 8804 }
8803 8805 else
8804 8806 emit_jump_insn (gen_return_internal ());
8805 8807 }
8806 8808
8807 8809 /* Reset from the function's potential modifications. */
8808 8810
8809 8811 static void
8810 8812 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
8811 8813 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
8812 8814 {
8813 8815 if (pic_offset_table_rtx)
8814 8816 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
8815 8817 #if TARGET_MACHO
8816 8818 /* Mach-O doesn't support labels at the end of objects, so if
8817 8819 it looks like we might want one, insert a NOP. */
8818 8820 {
8819 8821 rtx insn = get_last_insn ();
8820 8822 while (insn
8821 8823 && NOTE_P (insn)
8822 8824 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
8823 8825 insn = PREV_INSN (insn);
8824 8826 if (insn
8825 8827 && (LABEL_P (insn)
8826 8828 || (NOTE_P (insn)
8827 8829 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
8828 8830 fputs ("\tnop\n", file);
8829 8831 }
8830 8832 #endif
8831 8833
8832 8834 }
8833 8835
8834 8836 /* Extract the parts of an RTL expression that is a valid memory address
8835 8837 for an instruction. Return 0 if the structure of the address is
8836 8838 grossly off. Return -1 if the address contains ASHIFT, so it is not
8837 8839 strictly valid, but still used for computing length of lea instruction. */
8838 8840
8839 8841 int
8840 8842 ix86_decompose_address (rtx addr, struct ix86_address *out)
8841 8843 {
8842 8844 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
8843 8845 rtx base_reg, index_reg;
8844 8846 HOST_WIDE_INT scale = 1;
8845 8847 rtx scale_rtx = NULL_RTX;
8846 8848 int retval = 1;
8847 8849 enum ix86_address_seg seg = SEG_DEFAULT;
8848 8850
8849 8851 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
8850 8852 base = addr;
8851 8853 else if (GET_CODE (addr) == PLUS)
8852 8854 {
8853 8855 rtx addends[4], op;
8854 8856 int n = 0, i;
8855 8857
8856 8858 op = addr;
8857 8859 do
8858 8860 {
8859 8861 if (n >= 4)
8860 8862 return 0;
8861 8863 addends[n++] = XEXP (op, 1);
8862 8864 op = XEXP (op, 0);
8863 8865 }
8864 8866 while (GET_CODE (op) == PLUS);
8865 8867 if (n >= 4)
8866 8868 return 0;
8867 8869 addends[n] = op;
8868 8870
8869 8871 for (i = n; i >= 0; --i)
8870 8872 {
8871 8873 op = addends[i];
8872 8874 switch (GET_CODE (op))
8873 8875 {
8874 8876 case MULT:
8875 8877 if (index)
8876 8878 return 0;
8877 8879 index = XEXP (op, 0);
8878 8880 scale_rtx = XEXP (op, 1);
8879 8881 break;
8880 8882
8881 8883 case UNSPEC:
8882 8884 if (XINT (op, 1) == UNSPEC_TP
8883 8885 && TARGET_TLS_DIRECT_SEG_REFS
8884 8886 && seg == SEG_DEFAULT)
8885 8887 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
8886 8888 else
8887 8889 return 0;
8888 8890 break;
8889 8891
8890 8892 case REG:
8891 8893 case SUBREG:
8892 8894 if (!base)
8893 8895 base = op;
8894 8896 else if (!index)
8895 8897 index = op;
8896 8898 else
8897 8899 return 0;
8898 8900 break;
8899 8901
8900 8902 case CONST:
8901 8903 case CONST_INT:
8902 8904 case SYMBOL_REF:
8903 8905 case LABEL_REF:
8904 8906 if (disp)
8905 8907 return 0;
8906 8908 disp = op;
8907 8909 break;
8908 8910
8909 8911 default:
8910 8912 return 0;
8911 8913 }
8912 8914 }
8913 8915 }
8914 8916 else if (GET_CODE (addr) == MULT)
8915 8917 {
8916 8918 index = XEXP (addr, 0); /* index*scale */
8917 8919 scale_rtx = XEXP (addr, 1);
8918 8920 }
8919 8921 else if (GET_CODE (addr) == ASHIFT)
8920 8922 {
8921 8923 rtx tmp;
8922 8924
8923 8925 /* We're called for lea too, which implements ashift on occasion. */
8924 8926 index = XEXP (addr, 0);
8925 8927 tmp = XEXP (addr, 1);
8926 8928 if (!CONST_INT_P (tmp))
8927 8929 return 0;
8928 8930 scale = INTVAL (tmp);
8929 8931 if ((unsigned HOST_WIDE_INT) scale > 3)
8930 8932 return 0;
8931 8933 scale = 1 << scale;
8932 8934 retval = -1;
8933 8935 }
8934 8936 else
8935 8937 disp = addr; /* displacement */
8936 8938
8937 8939 /* Extract the integral value of scale. */
8938 8940 if (scale_rtx)
8939 8941 {
8940 8942 if (!CONST_INT_P (scale_rtx))
8941 8943 return 0;
8942 8944 scale = INTVAL (scale_rtx);
8943 8945 }
8944 8946
8945 8947 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
8946 8948 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
8947 8949
8948 8950 /* Allow arg pointer and stack pointer as index if there is not scaling. */
8949 8951 if (base_reg && index_reg && scale == 1
8950 8952 && (index_reg == arg_pointer_rtx
8951 8953 || index_reg == frame_pointer_rtx
8952 8954 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
8953 8955 {
8954 8956 rtx tmp;
8955 8957 tmp = base, base = index, index = tmp;
8956 8958 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
8957 8959 }
8958 8960
8959 8961 /* Special case: %ebp cannot be encoded as a base without a displacement. */
8960 8962 if ((base_reg == hard_frame_pointer_rtx
8961 8963 || base_reg == frame_pointer_rtx
8962 8964 || base_reg == arg_pointer_rtx) && !disp)
8963 8965 disp = const0_rtx;
8964 8966
8965 8967 /* Special case: on K6, [%esi] makes the instruction vector decoded.
8966 8968 Avoid this by transforming to [%esi+0].
8967 8969 Reload calls address legitimization without cfun defined, so we need
8968 8970 to test cfun for being non-NULL. */
8969 8971 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun)
8970 8972 && base_reg && !index_reg && !disp
8971 8973 && REG_P (base_reg)
8972 8974 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
8973 8975 disp = const0_rtx;
8974 8976
8975 8977 /* Special case: encode reg+reg instead of reg*2. */
8976 8978 if (!base && index && scale && scale == 2)
8977 8979 base = index, base_reg = index_reg, scale = 1;
8978 8980
8979 8981 /* Special case: scaling cannot be encoded without base or displacement. */
8980 8982 if (!base && !disp && index && scale != 1)
8981 8983 disp = const0_rtx;
8982 8984
8983 8985 out->base = base;
8984 8986 out->index = index;
8985 8987 out->disp = disp;
8986 8988 out->scale = scale;
8987 8989 out->seg = seg;
8988 8990
8989 8991 return retval;
8990 8992 }
8991 8993
8992 8994 /* Return cost of the memory address x.
8993 8995 For i386, it is better to use a complex address than let gcc copy
8994 8996 the address into a reg and make a new pseudo. But not if the address
8995 8997 requires to two regs - that would mean more pseudos with longer
8996 8998 lifetimes. */
8997 8999 static int
8998 9000 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED)
8999 9001 {
9000 9002 struct ix86_address parts;
9001 9003 int cost = 1;
9002 9004 int ok = ix86_decompose_address (x, &parts);
9003 9005
9004 9006 gcc_assert (ok);
9005 9007
9006 9008 if (parts.base && GET_CODE (parts.base) == SUBREG)
9007 9009 parts.base = SUBREG_REG (parts.base);
9008 9010 if (parts.index && GET_CODE (parts.index) == SUBREG)
9009 9011 parts.index = SUBREG_REG (parts.index);
9010 9012
9011 9013 /* Attempt to minimize number of registers in the address. */
9012 9014 if ((parts.base
9013 9015 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
9014 9016 || (parts.index
9015 9017 && (!REG_P (parts.index)
9016 9018 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
9017 9019 cost++;
9018 9020
9019 9021 if (parts.base
9020 9022 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
9021 9023 && parts.index
9022 9024 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
9023 9025 && parts.base != parts.index)
9024 9026 cost++;
9025 9027
9026 9028 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
9027 9029 since it's predecode logic can't detect the length of instructions
9028 9030 and it degenerates to vector decoded. Increase cost of such
9029 9031 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
9030 9032 to split such addresses or even refuse such addresses at all.
9031 9033
9032 9034 Following addressing modes are affected:
9033 9035 [base+scale*index]
9034 9036 [scale*index+disp]
9035 9037 [base+index]
9036 9038
9037 9039 The first and last case may be avoidable by explicitly coding the zero in
9038 9040 memory address, but I don't have AMD-K6 machine handy to check this
9039 9041 theory. */
9040 9042
9041 9043 if (TARGET_K6
9042 9044 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
9043 9045 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
9044 9046 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
9045 9047 cost += 10;
9046 9048
9047 9049 return cost;
9048 9050 }
9049 9051
9050 9052 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
9051 9053 this is used for to form addresses to local data when -fPIC is in
9052 9054 use. */
9053 9055
9054 9056 static bool
9055 9057 darwin_local_data_pic (rtx disp)
9056 9058 {
9057 9059 return (GET_CODE (disp) == UNSPEC
9058 9060 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET);
9059 9061 }
9060 9062
9061 9063 /* Determine if a given RTX is a valid constant. We already know this
9062 9064 satisfies CONSTANT_P. */
9063 9065
9064 9066 bool
9065 9067 legitimate_constant_p (rtx x)
9066 9068 {
9067 9069 switch (GET_CODE (x))
9068 9070 {
9069 9071 case CONST:
9070 9072 x = XEXP (x, 0);
9071 9073
9072 9074 if (GET_CODE (x) == PLUS)
9073 9075 {
9074 9076 if (!CONST_INT_P (XEXP (x, 1)))
9075 9077 return false;
9076 9078 x = XEXP (x, 0);
9077 9079 }
9078 9080
9079 9081 if (TARGET_MACHO && darwin_local_data_pic (x))
9080 9082 return true;
9081 9083
9082 9084 /* Only some unspecs are valid as "constants". */
9083 9085 if (GET_CODE (x) == UNSPEC)
9084 9086 switch (XINT (x, 1))
9085 9087 {
9086 9088 case UNSPEC_GOT:
9087 9089 case UNSPEC_GOTOFF:
9088 9090 case UNSPEC_PLTOFF:
9089 9091 return TARGET_64BIT;
9090 9092 case UNSPEC_TPOFF:
9091 9093 case UNSPEC_NTPOFF:
9092 9094 x = XVECEXP (x, 0, 0);
9093 9095 return (GET_CODE (x) == SYMBOL_REF
9094 9096 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9095 9097 case UNSPEC_DTPOFF:
9096 9098 x = XVECEXP (x, 0, 0);
9097 9099 return (GET_CODE (x) == SYMBOL_REF
9098 9100 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
9099 9101 default:
9100 9102 return false;
9101 9103 }
9102 9104
9103 9105 /* We must have drilled down to a symbol. */
9104 9106 if (GET_CODE (x) == LABEL_REF)
9105 9107 return true;
9106 9108 if (GET_CODE (x) != SYMBOL_REF)
9107 9109 return false;
9108 9110 /* FALLTHRU */
9109 9111
9110 9112 case SYMBOL_REF:
9111 9113 /* TLS symbols are never valid. */
9112 9114 if (SYMBOL_REF_TLS_MODEL (x))
9113 9115 return false;
9114 9116
9115 9117 /* DLLIMPORT symbols are never valid. */
9116 9118 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9117 9119 && SYMBOL_REF_DLLIMPORT_P (x))
9118 9120 return false;
9119 9121 break;
9120 9122
9121 9123 case CONST_DOUBLE:
9122 9124 if (GET_MODE (x) == TImode
9123 9125 && x != CONST0_RTX (TImode)
9124 9126 && !TARGET_64BIT)
9125 9127 return false;
9126 9128 break;
9127 9129
9128 9130 case CONST_VECTOR:
9129 9131 if (x == CONST0_RTX (GET_MODE (x)))
9130 9132 return true;
9131 9133 return false;
9132 9134
9133 9135 default:
9134 9136 break;
9135 9137 }
9136 9138
9137 9139 /* Otherwise we handle everything else in the move patterns. */
9138 9140 return true;
9139 9141 }
9140 9142
9141 9143 /* Determine if it's legal to put X into the constant pool. This
9142 9144 is not possible for the address of thread-local symbols, which
9143 9145 is checked above. */
9144 9146
9145 9147 static bool
9146 9148 ix86_cannot_force_const_mem (rtx x)
9147 9149 {
9148 9150 /* We can always put integral constants and vectors in memory. */
9149 9151 switch (GET_CODE (x))
9150 9152 {
9151 9153 case CONST_INT:
9152 9154 case CONST_DOUBLE:
9153 9155 case CONST_VECTOR:
9154 9156 return false;
9155 9157
9156 9158 default:
9157 9159 break;
9158 9160 }
9159 9161 return !legitimate_constant_p (x);
9160 9162 }
9161 9163
9162 9164 /* Determine if a given RTX is a valid constant address. */
9163 9165
9164 9166 bool
9165 9167 constant_address_p (rtx x)
9166 9168 {
9167 9169 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
9168 9170 }
9169 9171
9170 9172 /* Return number of arguments to be saved on the stack with
9171 9173 -msave-args. */
9172 9174
9173 9175 static int
9174 9176 ix86_nsaved_args (void)
9175 9177 {
9176 9178 if (TARGET_SAVE_ARGS)
9177 9179 return crtl->args.info.regno - cfun->returns_struct;
9178 9180 else
9179 9181 return 0;
9180 9182 }
9181 9183
9182 9184 /* Nonzero if the constant value X is a legitimate general operand
9183 9185 when generating PIC code. It is given that flag_pic is on and
9184 9186 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
9185 9187 bool
9186 9188 legitimate_pic_operand_p (rtx x)
9187 9189 {
9188 9190 rtx inner;
9189 9191
9190 9192 switch (GET_CODE (x))
9191 9193 {
9192 9194 case CONST:
9193 9195 inner = XEXP (x, 0);
9194 9196 if (GET_CODE (inner) == PLUS
9195 9197 && CONST_INT_P (XEXP (inner, 1)))
9196 9198 inner = XEXP (inner, 0);
9197 9199
9198 9200 /* Only some unspecs are valid as "constants". */
9199 9201 if (GET_CODE (inner) == UNSPEC)
9200 9202 switch (XINT (inner, 1))
9201 9203 {
9202 9204 case UNSPEC_GOT:
9203 9205 case UNSPEC_GOTOFF:
9204 9206 case UNSPEC_PLTOFF:
9205 9207 return TARGET_64BIT;
9206 9208 case UNSPEC_TPOFF:
9207 9209 x = XVECEXP (inner, 0, 0);
9208 9210 return (GET_CODE (x) == SYMBOL_REF
9209 9211 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
9210 9212 case UNSPEC_MACHOPIC_OFFSET:
9211 9213 return legitimate_pic_address_disp_p (x);
9212 9214 default:
9213 9215 return false;
9214 9216 }
9215 9217 /* FALLTHRU */
9216 9218
9217 9219 case SYMBOL_REF:
9218 9220 case LABEL_REF:
9219 9221 return legitimate_pic_address_disp_p (x);
9220 9222
9221 9223 default:
9222 9224 return true;
9223 9225 }
9224 9226 }
9225 9227
9226 9228 /* Determine if a given CONST RTX is a valid memory displacement
9227 9229 in PIC mode. */
9228 9230
9229 9231 int
9230 9232 legitimate_pic_address_disp_p (rtx disp)
9231 9233 {
9232 9234 bool saw_plus;
9233 9235
9234 9236 /* In 64bit mode we can allow direct addresses of symbols and labels
9235 9237 when they are not dynamic symbols. */
9236 9238 if (TARGET_64BIT)
9237 9239 {
9238 9240 rtx op0 = disp, op1;
9239 9241
9240 9242 switch (GET_CODE (disp))
9241 9243 {
9242 9244 case LABEL_REF:
9243 9245 return true;
9244 9246
9245 9247 case CONST:
9246 9248 if (GET_CODE (XEXP (disp, 0)) != PLUS)
9247 9249 break;
9248 9250 op0 = XEXP (XEXP (disp, 0), 0);
9249 9251 op1 = XEXP (XEXP (disp, 0), 1);
9250 9252 if (!CONST_INT_P (op1)
9251 9253 || INTVAL (op1) >= 16*1024*1024
9252 9254 || INTVAL (op1) < -16*1024*1024)
9253 9255 break;
9254 9256 if (GET_CODE (op0) == LABEL_REF)
9255 9257 return true;
9256 9258 if (GET_CODE (op0) != SYMBOL_REF)
9257 9259 break;
9258 9260 /* FALLTHRU */
9259 9261
9260 9262 case SYMBOL_REF:
9261 9263 /* TLS references should always be enclosed in UNSPEC. */
9262 9264 if (SYMBOL_REF_TLS_MODEL (op0))
9263 9265 return false;
9264 9266 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
9265 9267 && ix86_cmodel != CM_LARGE_PIC)
9266 9268 return true;
9267 9269 break;
9268 9270
9269 9271 default:
9270 9272 break;
9271 9273 }
9272 9274 }
9273 9275 if (GET_CODE (disp) != CONST)
9274 9276 return 0;
9275 9277 disp = XEXP (disp, 0);
9276 9278
9277 9279 if (TARGET_64BIT)
9278 9280 {
9279 9281 /* We are unsafe to allow PLUS expressions. This limit allowed distance
9280 9282 of GOT tables. We should not need these anyway. */
9281 9283 if (GET_CODE (disp) != UNSPEC
9282 9284 || (XINT (disp, 1) != UNSPEC_GOTPCREL
9283 9285 && XINT (disp, 1) != UNSPEC_GOTOFF
9284 9286 && XINT (disp, 1) != UNSPEC_PLTOFF))
9285 9287 return 0;
9286 9288
9287 9289 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
9288 9290 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
9289 9291 return 0;
9290 9292 return 1;
9291 9293 }
9292 9294
9293 9295 saw_plus = false;
9294 9296 if (GET_CODE (disp) == PLUS)
9295 9297 {
9296 9298 if (!CONST_INT_P (XEXP (disp, 1)))
9297 9299 return 0;
9298 9300 disp = XEXP (disp, 0);
9299 9301 saw_plus = true;
9300 9302 }
9301 9303
9302 9304 if (TARGET_MACHO && darwin_local_data_pic (disp))
9303 9305 return 1;
9304 9306
9305 9307 if (GET_CODE (disp) != UNSPEC)
9306 9308 return 0;
9307 9309
9308 9310 switch (XINT (disp, 1))
9309 9311 {
9310 9312 case UNSPEC_GOT:
9311 9313 if (saw_plus)
9312 9314 return false;
9313 9315 /* We need to check for both symbols and labels because VxWorks loads
9314 9316 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
9315 9317 details. */
9316 9318 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9317 9319 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
9318 9320 case UNSPEC_GOTOFF:
9319 9321 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
9320 9322 While ABI specify also 32bit relocation but we don't produce it in
9321 9323 small PIC model at all. */
9322 9324 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
9323 9325 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
9324 9326 && !TARGET_64BIT)
9325 9327 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
9326 9328 return false;
9327 9329 case UNSPEC_GOTTPOFF:
9328 9330 case UNSPEC_GOTNTPOFF:
9329 9331 case UNSPEC_INDNTPOFF:
9330 9332 if (saw_plus)
9331 9333 return false;
9332 9334 disp = XVECEXP (disp, 0, 0);
9333 9335 return (GET_CODE (disp) == SYMBOL_REF
9334 9336 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
9335 9337 case UNSPEC_NTPOFF:
9336 9338 disp = XVECEXP (disp, 0, 0);
9337 9339 return (GET_CODE (disp) == SYMBOL_REF
9338 9340 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
9339 9341 case UNSPEC_DTPOFF:
9340 9342 disp = XVECEXP (disp, 0, 0);
9341 9343 return (GET_CODE (disp) == SYMBOL_REF
9342 9344 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
9343 9345 }
9344 9346
9345 9347 return 0;
9346 9348 }
9347 9349
9348 9350 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
9349 9351 memory address for an instruction. The MODE argument is the machine mode
9350 9352 for the MEM expression that wants to use this address.
9351 9353
9352 9354 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
9353 9355 convert common non-canonical forms to canonical form so that they will
9354 9356 be recognized. */
9355 9357
9356 9358 int
9357 9359 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
9358 9360 rtx addr, int strict)
9359 9361 {
9360 9362 struct ix86_address parts;
9361 9363 rtx base, index, disp;
9362 9364 HOST_WIDE_INT scale;
9363 9365 const char *reason = NULL;
9364 9366 rtx reason_rtx = NULL_RTX;
9365 9367
9366 9368 if (ix86_decompose_address (addr, &parts) <= 0)
9367 9369 {
9368 9370 reason = "decomposition failed";
9369 9371 goto report_error;
9370 9372 }
9371 9373
9372 9374 base = parts.base;
9373 9375 index = parts.index;
9374 9376 disp = parts.disp;
9375 9377 scale = parts.scale;
9376 9378
9377 9379 /* Validate base register.
9378 9380
9379 9381 Don't allow SUBREG's that span more than a word here. It can lead to spill
9380 9382 failures when the base is one word out of a two word structure, which is
9381 9383 represented internally as a DImode int. */
9382 9384
9383 9385 if (base)
9384 9386 {
9385 9387 rtx reg;
9386 9388 reason_rtx = base;
9387 9389
9388 9390 if (REG_P (base))
9389 9391 reg = base;
9390 9392 else if (GET_CODE (base) == SUBREG
9391 9393 && REG_P (SUBREG_REG (base))
9392 9394 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
9393 9395 <= UNITS_PER_WORD)
9394 9396 reg = SUBREG_REG (base);
9395 9397 else
9396 9398 {
9397 9399 reason = "base is not a register";
9398 9400 goto report_error;
9399 9401 }
9400 9402
9401 9403 if (GET_MODE (base) != Pmode)
9402 9404 {
9403 9405 reason = "base is not in Pmode";
9404 9406 goto report_error;
9405 9407 }
9406 9408
9407 9409 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
9408 9410 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
9409 9411 {
9410 9412 reason = "base is not valid";
9411 9413 goto report_error;
9412 9414 }
9413 9415 }
9414 9416
9415 9417 /* Validate index register.
9416 9418
9417 9419 Don't allow SUBREG's that span more than a word here -- same as above. */
9418 9420
9419 9421 if (index)
9420 9422 {
9421 9423 rtx reg;
9422 9424 reason_rtx = index;
9423 9425
9424 9426 if (REG_P (index))
9425 9427 reg = index;
9426 9428 else if (GET_CODE (index) == SUBREG
9427 9429 && REG_P (SUBREG_REG (index))
9428 9430 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
9429 9431 <= UNITS_PER_WORD)
9430 9432 reg = SUBREG_REG (index);
9431 9433 else
9432 9434 {
9433 9435 reason = "index is not a register";
9434 9436 goto report_error;
9435 9437 }
9436 9438
9437 9439 if (GET_MODE (index) != Pmode)
9438 9440 {
9439 9441 reason = "index is not in Pmode";
9440 9442 goto report_error;
9441 9443 }
9442 9444
9443 9445 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
9444 9446 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
9445 9447 {
9446 9448 reason = "index is not valid";
9447 9449 goto report_error;
9448 9450 }
9449 9451 }
9450 9452
9451 9453 /* Validate scale factor. */
9452 9454 if (scale != 1)
9453 9455 {
9454 9456 reason_rtx = GEN_INT (scale);
9455 9457 if (!index)
9456 9458 {
9457 9459 reason = "scale without index";
9458 9460 goto report_error;
9459 9461 }
9460 9462
9461 9463 if (scale != 2 && scale != 4 && scale != 8)
9462 9464 {
9463 9465 reason = "scale is not a valid multiplier";
9464 9466 goto report_error;
9465 9467 }
9466 9468 }
9467 9469
9468 9470 /* Validate displacement. */
9469 9471 if (disp)
9470 9472 {
9471 9473 reason_rtx = disp;
9472 9474
9473 9475 if (GET_CODE (disp) == CONST
9474 9476 && GET_CODE (XEXP (disp, 0)) == UNSPEC
9475 9477 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET)
9476 9478 switch (XINT (XEXP (disp, 0), 1))
9477 9479 {
9478 9480 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
9479 9481 used. While ABI specify also 32bit relocations, we don't produce
9480 9482 them at all and use IP relative instead. */
9481 9483 case UNSPEC_GOT:
9482 9484 case UNSPEC_GOTOFF:
9483 9485 gcc_assert (flag_pic);
9484 9486 if (!TARGET_64BIT)
9485 9487 goto is_legitimate_pic;
9486 9488 reason = "64bit address unspec";
9487 9489 goto report_error;
9488 9490
9489 9491 case UNSPEC_GOTPCREL:
9490 9492 gcc_assert (flag_pic);
9491 9493 goto is_legitimate_pic;
9492 9494
9493 9495 case UNSPEC_GOTTPOFF:
9494 9496 case UNSPEC_GOTNTPOFF:
9495 9497 case UNSPEC_INDNTPOFF:
9496 9498 case UNSPEC_NTPOFF:
9497 9499 case UNSPEC_DTPOFF:
9498 9500 break;
9499 9501
9500 9502 default:
9501 9503 reason = "invalid address unspec";
9502 9504 goto report_error;
9503 9505 }
9504 9506
9505 9507 else if (SYMBOLIC_CONST (disp)
9506 9508 && (flag_pic
9507 9509 || (TARGET_MACHO
9508 9510 #if TARGET_MACHO
9509 9511 && MACHOPIC_INDIRECT
9510 9512 && !machopic_operand_p (disp)
9511 9513 #endif
9512 9514 )))
9513 9515 {
9514 9516
9515 9517 is_legitimate_pic:
9516 9518 if (TARGET_64BIT && (index || base))
9517 9519 {
9518 9520 /* foo@dtpoff(%rX) is ok. */
9519 9521 if (GET_CODE (disp) != CONST
9520 9522 || GET_CODE (XEXP (disp, 0)) != PLUS
9521 9523 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
9522 9524 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
9523 9525 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
9524 9526 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
9525 9527 {
9526 9528 reason = "non-constant pic memory reference";
9527 9529 goto report_error;
9528 9530 }
9529 9531 }
9530 9532 else if (! legitimate_pic_address_disp_p (disp))
9531 9533 {
9532 9534 reason = "displacement is an invalid pic construct";
9533 9535 goto report_error;
9534 9536 }
9535 9537
9536 9538 /* This code used to verify that a symbolic pic displacement
9537 9539 includes the pic_offset_table_rtx register.
9538 9540
9539 9541 While this is good idea, unfortunately these constructs may
9540 9542 be created by "adds using lea" optimization for incorrect
9541 9543 code like:
9542 9544
9543 9545 int a;
9544 9546 int foo(int i)
9545 9547 {
9546 9548 return *(&a+i);
9547 9549 }
9548 9550
9549 9551 This code is nonsensical, but results in addressing
9550 9552 GOT table with pic_offset_table_rtx base. We can't
9551 9553 just refuse it easily, since it gets matched by
9552 9554 "addsi3" pattern, that later gets split to lea in the
9553 9555 case output register differs from input. While this
9554 9556 can be handled by separate addsi pattern for this case
9555 9557 that never results in lea, this seems to be easier and
9556 9558 correct fix for crash to disable this test. */
9557 9559 }
9558 9560 else if (GET_CODE (disp) != LABEL_REF
9559 9561 && !CONST_INT_P (disp)
9560 9562 && (GET_CODE (disp) != CONST
9561 9563 || !legitimate_constant_p (disp))
9562 9564 && (GET_CODE (disp) != SYMBOL_REF
9563 9565 || !legitimate_constant_p (disp)))
9564 9566 {
9565 9567 reason = "displacement is not constant";
9566 9568 goto report_error;
9567 9569 }
9568 9570 else if (TARGET_64BIT
9569 9571 && !x86_64_immediate_operand (disp, VOIDmode))
9570 9572 {
9571 9573 reason = "displacement is out of range";
9572 9574 goto report_error;
9573 9575 }
9574 9576 }
9575 9577
9576 9578 /* Everything looks valid. */
9577 9579 return TRUE;
9578 9580
9579 9581 report_error:
9580 9582 return FALSE;
9581 9583 }
9582 9584
9583 9585 /* Return a unique alias set for the GOT. */
9584 9586
9585 9587 static alias_set_type
9586 9588 ix86_GOT_alias_set (void)
9587 9589 {
9588 9590 static alias_set_type set = -1;
9589 9591 if (set == -1)
9590 9592 set = new_alias_set ();
9591 9593 return set;
9592 9594 }
9593 9595
9594 9596 /* Return a legitimate reference for ORIG (an address) using the
9595 9597 register REG. If REG is 0, a new pseudo is generated.
9596 9598
9597 9599 There are two types of references that must be handled:
9598 9600
9599 9601 1. Global data references must load the address from the GOT, via
9600 9602 the PIC reg. An insn is emitted to do this load, and the reg is
9601 9603 returned.
9602 9604
9603 9605 2. Static data references, constant pool addresses, and code labels
9604 9606 compute the address as an offset from the GOT, whose base is in
9605 9607 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
9606 9608 differentiate them from global data objects. The returned
9607 9609 address is the PIC reg + an unspec constant.
9608 9610
9609 9611 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
9610 9612 reg also appears in the address. */
9611 9613
9612 9614 static rtx
9613 9615 legitimize_pic_address (rtx orig, rtx reg)
9614 9616 {
9615 9617 rtx addr = orig;
9616 9618 rtx new_rtx = orig;
9617 9619 rtx base;
9618 9620
9619 9621 #if TARGET_MACHO
9620 9622 if (TARGET_MACHO && !TARGET_64BIT)
9621 9623 {
9622 9624 if (reg == 0)
9623 9625 reg = gen_reg_rtx (Pmode);
9624 9626 /* Use the generic Mach-O PIC machinery. */
9625 9627 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
9626 9628 }
9627 9629 #endif
9628 9630
9629 9631 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
9630 9632 new_rtx = addr;
9631 9633 else if (TARGET_64BIT
9632 9634 && ix86_cmodel != CM_SMALL_PIC
9633 9635 && gotoff_operand (addr, Pmode))
9634 9636 {
9635 9637 rtx tmpreg;
9636 9638 /* This symbol may be referenced via a displacement from the PIC
9637 9639 base address (@GOTOFF). */
9638 9640
9639 9641 if (reload_in_progress)
9640 9642 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9641 9643 if (GET_CODE (addr) == CONST)
9642 9644 addr = XEXP (addr, 0);
9643 9645 if (GET_CODE (addr) == PLUS)
9644 9646 {
9645 9647 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9646 9648 UNSPEC_GOTOFF);
9647 9649 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9648 9650 }
9649 9651 else
9650 9652 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9651 9653 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9652 9654 if (!reg)
9653 9655 tmpreg = gen_reg_rtx (Pmode);
9654 9656 else
9655 9657 tmpreg = reg;
9656 9658 emit_move_insn (tmpreg, new_rtx);
9657 9659
9658 9660 if (reg != 0)
9659 9661 {
9660 9662 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
9661 9663 tmpreg, 1, OPTAB_DIRECT);
9662 9664 new_rtx = reg;
9663 9665 }
9664 9666 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
9665 9667 }
9666 9668 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
9667 9669 {
9668 9670 /* This symbol may be referenced via a displacement from the PIC
9669 9671 base address (@GOTOFF). */
9670 9672
9671 9673 if (reload_in_progress)
9672 9674 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9673 9675 if (GET_CODE (addr) == CONST)
9674 9676 addr = XEXP (addr, 0);
9675 9677 if (GET_CODE (addr) == PLUS)
9676 9678 {
9677 9679 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
9678 9680 UNSPEC_GOTOFF);
9679 9681 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
9680 9682 }
9681 9683 else
9682 9684 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
9683 9685 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9684 9686 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9685 9687
9686 9688 if (reg != 0)
9687 9689 {
9688 9690 emit_move_insn (reg, new_rtx);
9689 9691 new_rtx = reg;
9690 9692 }
9691 9693 }
9692 9694 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
9693 9695 /* We can't use @GOTOFF for text labels on VxWorks;
9694 9696 see gotoff_operand. */
9695 9697 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
9696 9698 {
9697 9699 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
9698 9700 {
9699 9701 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr))
9700 9702 return legitimize_dllimport_symbol (addr, true);
9701 9703 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
9702 9704 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF
9703 9705 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0)))
9704 9706 {
9705 9707 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true);
9706 9708 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1));
9707 9709 }
9708 9710 }
9709 9711
9710 9712 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
9711 9713 {
9712 9714 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
9713 9715 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9714 9716 new_rtx = gen_const_mem (Pmode, new_rtx);
9715 9717 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9716 9718
9717 9719 if (reg == 0)
9718 9720 reg = gen_reg_rtx (Pmode);
9719 9721 /* Use directly gen_movsi, otherwise the address is loaded
9720 9722 into register for CSE. We don't want to CSE this addresses,
9721 9723 instead we CSE addresses from the GOT table, so skip this. */
9722 9724 emit_insn (gen_movsi (reg, new_rtx));
9723 9725 new_rtx = reg;
9724 9726 }
9725 9727 else
9726 9728 {
9727 9729 /* This symbol must be referenced via a load from the
9728 9730 Global Offset Table (@GOT). */
9729 9731
9730 9732 if (reload_in_progress)
9731 9733 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9732 9734 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
9733 9735 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9734 9736 if (TARGET_64BIT)
9735 9737 new_rtx = force_reg (Pmode, new_rtx);
9736 9738 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9737 9739 new_rtx = gen_const_mem (Pmode, new_rtx);
9738 9740 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
9739 9741
9740 9742 if (reg == 0)
9741 9743 reg = gen_reg_rtx (Pmode);
9742 9744 emit_move_insn (reg, new_rtx);
9743 9745 new_rtx = reg;
9744 9746 }
9745 9747 }
9746 9748 else
9747 9749 {
9748 9750 if (CONST_INT_P (addr)
9749 9751 && !x86_64_immediate_operand (addr, VOIDmode))
9750 9752 {
9751 9753 if (reg)
9752 9754 {
9753 9755 emit_move_insn (reg, addr);
9754 9756 new_rtx = reg;
9755 9757 }
9756 9758 else
9757 9759 new_rtx = force_reg (Pmode, addr);
9758 9760 }
9759 9761 else if (GET_CODE (addr) == CONST)
9760 9762 {
9761 9763 addr = XEXP (addr, 0);
9762 9764
9763 9765 /* We must match stuff we generate before. Assume the only
9764 9766 unspecs that can get here are ours. Not that we could do
9765 9767 anything with them anyway.... */
9766 9768 if (GET_CODE (addr) == UNSPEC
9767 9769 || (GET_CODE (addr) == PLUS
9768 9770 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
9769 9771 return orig;
9770 9772 gcc_assert (GET_CODE (addr) == PLUS);
9771 9773 }
9772 9774 if (GET_CODE (addr) == PLUS)
9773 9775 {
9774 9776 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
9775 9777
9776 9778 /* Check first to see if this is a constant offset from a @GOTOFF
9777 9779 symbol reference. */
9778 9780 if (gotoff_operand (op0, Pmode)
9779 9781 && CONST_INT_P (op1))
9780 9782 {
9781 9783 if (!TARGET_64BIT)
9782 9784 {
9783 9785 if (reload_in_progress)
9784 9786 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9785 9787 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
9786 9788 UNSPEC_GOTOFF);
9787 9789 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
9788 9790 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
9789 9791 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
9790 9792
9791 9793 if (reg != 0)
9792 9794 {
9793 9795 emit_move_insn (reg, new_rtx);
9794 9796 new_rtx = reg;
9795 9797 }
9796 9798 }
9797 9799 else
9798 9800 {
9799 9801 if (INTVAL (op1) < -16*1024*1024
9800 9802 || INTVAL (op1) >= 16*1024*1024)
9801 9803 {
9802 9804 if (!x86_64_immediate_operand (op1, Pmode))
9803 9805 op1 = force_reg (Pmode, op1);
9804 9806 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
9805 9807 }
9806 9808 }
9807 9809 }
9808 9810 else
9809 9811 {
9810 9812 base = legitimize_pic_address (XEXP (addr, 0), reg);
9811 9813 new_rtx = legitimize_pic_address (XEXP (addr, 1),
9812 9814 base == reg ? NULL_RTX : reg);
9813 9815
9814 9816 if (CONST_INT_P (new_rtx))
9815 9817 new_rtx = plus_constant (base, INTVAL (new_rtx));
9816 9818 else
9817 9819 {
9818 9820 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
9819 9821 {
9820 9822 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
9821 9823 new_rtx = XEXP (new_rtx, 1);
9822 9824 }
9823 9825 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
9824 9826 }
9825 9827 }
9826 9828 }
9827 9829 }
9828 9830 return new_rtx;
9829 9831 }
9830 9832
9831 9833 /* Load the thread pointer. If TO_REG is true, force it into a register. */
9832 9834
9833 9835 static rtx
9834 9836 get_thread_pointer (int to_reg)
9835 9837 {
9836 9838 rtx tp, reg, insn;
9837 9839
9838 9840 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
9839 9841 if (!to_reg)
9840 9842 return tp;
9841 9843
9842 9844 reg = gen_reg_rtx (Pmode);
9843 9845 insn = gen_rtx_SET (VOIDmode, reg, tp);
9844 9846 insn = emit_insn (insn);
9845 9847
9846 9848 return reg;
9847 9849 }
9848 9850
9849 9851 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
9850 9852 false if we expect this to be used for a memory address and true if
9851 9853 we expect to load the address into a register. */
9852 9854
9853 9855 static rtx
9854 9856 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
9855 9857 {
9856 9858 rtx dest, base, off, pic, tp;
9857 9859 int type;
9858 9860
9859 9861 switch (model)
9860 9862 {
9861 9863 case TLS_MODEL_GLOBAL_DYNAMIC:
9862 9864 dest = gen_reg_rtx (Pmode);
9863 9865 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9864 9866
9865 9867 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9866 9868 {
9867 9869 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns;
9868 9870
9869 9871 start_sequence ();
9870 9872 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
9871 9873 insns = get_insns ();
9872 9874 end_sequence ();
9873 9875
9874 9876 RTL_CONST_CALL_P (insns) = 1;
9875 9877 emit_libcall_block (insns, dest, rax, x);
9876 9878 }
9877 9879 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9878 9880 emit_insn (gen_tls_global_dynamic_64 (dest, x));
9879 9881 else
9880 9882 emit_insn (gen_tls_global_dynamic_32 (dest, x));
9881 9883
9882 9884 if (TARGET_GNU2_TLS)
9883 9885 {
9884 9886 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
9885 9887
9886 9888 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9887 9889 }
9888 9890 break;
9889 9891
9890 9892 case TLS_MODEL_LOCAL_DYNAMIC:
9891 9893 base = gen_reg_rtx (Pmode);
9892 9894 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
9893 9895
9894 9896 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
9895 9897 {
9896 9898 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note;
9897 9899
9898 9900 start_sequence ();
9899 9901 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
9900 9902 insns = get_insns ();
9901 9903 end_sequence ();
9902 9904
9903 9905 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
9904 9906 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
9905 9907 RTL_CONST_CALL_P (insns) = 1;
9906 9908 emit_libcall_block (insns, base, rax, note);
9907 9909 }
9908 9910 else if (TARGET_64BIT && TARGET_GNU2_TLS)
9909 9911 emit_insn (gen_tls_local_dynamic_base_64 (base));
9910 9912 else
9911 9913 emit_insn (gen_tls_local_dynamic_base_32 (base));
9912 9914
9913 9915 if (TARGET_GNU2_TLS)
9914 9916 {
9915 9917 rtx x = ix86_tls_module_base ();
9916 9918
9917 9919 set_unique_reg_note (get_last_insn (), REG_EQUIV,
9918 9920 gen_rtx_MINUS (Pmode, x, tp));
9919 9921 }
9920 9922
9921 9923 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
9922 9924 off = gen_rtx_CONST (Pmode, off);
9923 9925
9924 9926 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
9925 9927
9926 9928 if (TARGET_GNU2_TLS)
9927 9929 {
9928 9930 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
9929 9931
9930 9932 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
9931 9933 }
9932 9934
9933 9935 break;
9934 9936
9935 9937 case TLS_MODEL_INITIAL_EXEC:
9936 9938 if (TARGET_64BIT)
9937 9939 {
9938 9940 pic = NULL;
9939 9941 type = UNSPEC_GOTNTPOFF;
9940 9942 }
9941 9943 else if (flag_pic)
9942 9944 {
9943 9945 if (reload_in_progress)
9944 9946 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
9945 9947 pic = pic_offset_table_rtx;
9946 9948 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
9947 9949 }
9948 9950 else if (!TARGET_ANY_GNU_TLS)
9949 9951 {
9950 9952 pic = gen_reg_rtx (Pmode);
9951 9953 emit_insn (gen_set_got (pic));
9952 9954 type = UNSPEC_GOTTPOFF;
9953 9955 }
9954 9956 else
9955 9957 {
9956 9958 pic = NULL;
9957 9959 type = UNSPEC_INDNTPOFF;
9958 9960 }
9959 9961
9960 9962 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
9961 9963 off = gen_rtx_CONST (Pmode, off);
9962 9964 if (pic)
9963 9965 off = gen_rtx_PLUS (Pmode, pic, off);
9964 9966 off = gen_const_mem (Pmode, off);
9965 9967 set_mem_alias_set (off, ix86_GOT_alias_set ());
9966 9968
9967 9969 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9968 9970 {
9969 9971 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9970 9972 off = force_reg (Pmode, off);
9971 9973 return gen_rtx_PLUS (Pmode, base, off);
9972 9974 }
9973 9975 else
9974 9976 {
9975 9977 base = get_thread_pointer (true);
9976 9978 dest = gen_reg_rtx (Pmode);
9977 9979 emit_insn (gen_subsi3 (dest, base, off));
9978 9980 }
9979 9981 break;
9980 9982
9981 9983 case TLS_MODEL_LOCAL_EXEC:
9982 9984 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
9983 9985 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9984 9986 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
9985 9987 off = gen_rtx_CONST (Pmode, off);
9986 9988
9987 9989 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
9988 9990 {
9989 9991 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
9990 9992 return gen_rtx_PLUS (Pmode, base, off);
9991 9993 }
9992 9994 else
9993 9995 {
9994 9996 base = get_thread_pointer (true);
9995 9997 dest = gen_reg_rtx (Pmode);
9996 9998 emit_insn (gen_subsi3 (dest, base, off));
9997 9999 }
9998 10000 break;
9999 10001
10000 10002 default:
10001 10003 gcc_unreachable ();
10002 10004 }
10003 10005
10004 10006 return dest;
10005 10007 }
10006 10008
10007 10009 /* Create or return the unique __imp_DECL dllimport symbol corresponding
10008 10010 to symbol DECL. */
10009 10011
10010 10012 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
10011 10013 htab_t dllimport_map;
10012 10014
10013 10015 static tree
10014 10016 get_dllimport_decl (tree decl)
10015 10017 {
10016 10018 struct tree_map *h, in;
10017 10019 void **loc;
10018 10020 const char *name;
10019 10021 const char *prefix;
10020 10022 size_t namelen, prefixlen;
10021 10023 char *imp_name;
10022 10024 tree to;
10023 10025 rtx rtl;
10024 10026
10025 10027 if (!dllimport_map)
10026 10028 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
10027 10029
10028 10030 in.hash = htab_hash_pointer (decl);
10029 10031 in.base.from = decl;
10030 10032 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
10031 10033 h = (struct tree_map *) *loc;
10032 10034 if (h)
10033 10035 return h->to;
10034 10036
10035 10037 *loc = h = GGC_NEW (struct tree_map);
10036 10038 h->hash = in.hash;
10037 10039 h->base.from = decl;
10038 10040 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
10039 10041 DECL_ARTIFICIAL (to) = 1;
10040 10042 DECL_IGNORED_P (to) = 1;
10041 10043 DECL_EXTERNAL (to) = 1;
10042 10044 TREE_READONLY (to) = 1;
10043 10045
10044 10046 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
10045 10047 name = targetm.strip_name_encoding (name);
10046 10048 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0
10047 10049 ? "*__imp_" : "*__imp__";
10048 10050 namelen = strlen (name);
10049 10051 prefixlen = strlen (prefix);
10050 10052 imp_name = (char *) alloca (namelen + prefixlen + 1);
10051 10053 memcpy (imp_name, prefix, prefixlen);
10052 10054 memcpy (imp_name + prefixlen, name, namelen + 1);
10053 10055
10054 10056 name = ggc_alloc_string (imp_name, namelen + prefixlen);
10055 10057 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
10056 10058 SET_SYMBOL_REF_DECL (rtl, to);
10057 10059 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
10058 10060
10059 10061 rtl = gen_const_mem (Pmode, rtl);
10060 10062 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
10061 10063
10062 10064 SET_DECL_RTL (to, rtl);
10063 10065 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name));
10064 10066
10065 10067 return to;
10066 10068 }
10067 10069
10068 10070 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
10069 10071 true if we require the result be a register. */
10070 10072
10071 10073 static rtx
10072 10074 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
10073 10075 {
10074 10076 tree imp_decl;
10075 10077 rtx x;
10076 10078
10077 10079 gcc_assert (SYMBOL_REF_DECL (symbol));
10078 10080 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
10079 10081
10080 10082 x = DECL_RTL (imp_decl);
10081 10083 if (want_reg)
10082 10084 x = force_reg (Pmode, x);
10083 10085 return x;
10084 10086 }
10085 10087
10086 10088 /* Try machine-dependent ways of modifying an illegitimate address
10087 10089 to be legitimate. If we find one, return the new, valid address.
10088 10090 This macro is used in only one place: `memory_address' in explow.c.
10089 10091
10090 10092 OLDX is the address as it was before break_out_memory_refs was called.
10091 10093 In some cases it is useful to look at this to decide what needs to be done.
10092 10094
10093 10095 MODE and WIN are passed so that this macro can use
10094 10096 GO_IF_LEGITIMATE_ADDRESS.
10095 10097
10096 10098 It is always safe for this macro to do nothing. It exists to recognize
10097 10099 opportunities to optimize the output.
10098 10100
10099 10101 For the 80386, we handle X+REG by loading X into a register R and
10100 10102 using R+REG. R will go in a general reg and indexing will be used.
10101 10103 However, if REG is a broken-out memory address or multiplication,
10102 10104 nothing needs to be done because REG can certainly go in a general reg.
10103 10105
10104 10106 When -fpic is used, special handling is needed for symbolic references.
10105 10107 See comments by legitimize_pic_address in i386.c for details. */
10106 10108
10107 10109 rtx
10108 10110 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
10109 10111 {
10110 10112 int changed = 0;
10111 10113 unsigned log;
10112 10114
10113 10115 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
10114 10116 if (log)
10115 10117 return legitimize_tls_address (x, (enum tls_model) log, false);
10116 10118 if (GET_CODE (x) == CONST
10117 10119 && GET_CODE (XEXP (x, 0)) == PLUS
10118 10120 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10119 10121 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
10120 10122 {
10121 10123 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
10122 10124 (enum tls_model) log, false);
10123 10125 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10124 10126 }
10125 10127
10126 10128 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
10127 10129 {
10128 10130 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
10129 10131 return legitimize_dllimport_symbol (x, true);
10130 10132 if (GET_CODE (x) == CONST
10131 10133 && GET_CODE (XEXP (x, 0)) == PLUS
10132 10134 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
10133 10135 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
10134 10136 {
10135 10137 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
10136 10138 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
10137 10139 }
10138 10140 }
10139 10141
10140 10142 if (flag_pic && SYMBOLIC_CONST (x))
10141 10143 return legitimize_pic_address (x, 0);
10142 10144
10143 10145 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
10144 10146 if (GET_CODE (x) == ASHIFT
10145 10147 && CONST_INT_P (XEXP (x, 1))
10146 10148 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
10147 10149 {
10148 10150 changed = 1;
10149 10151 log = INTVAL (XEXP (x, 1));
10150 10152 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
10151 10153 GEN_INT (1 << log));
10152 10154 }
10153 10155
10154 10156 if (GET_CODE (x) == PLUS)
10155 10157 {
10156 10158 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
10157 10159
10158 10160 if (GET_CODE (XEXP (x, 0)) == ASHIFT
10159 10161 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10160 10162 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
10161 10163 {
10162 10164 changed = 1;
10163 10165 log = INTVAL (XEXP (XEXP (x, 0), 1));
10164 10166 XEXP (x, 0) = gen_rtx_MULT (Pmode,
10165 10167 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
10166 10168 GEN_INT (1 << log));
10167 10169 }
10168 10170
10169 10171 if (GET_CODE (XEXP (x, 1)) == ASHIFT
10170 10172 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10171 10173 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
10172 10174 {
10173 10175 changed = 1;
10174 10176 log = INTVAL (XEXP (XEXP (x, 1), 1));
10175 10177 XEXP (x, 1) = gen_rtx_MULT (Pmode,
10176 10178 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
10177 10179 GEN_INT (1 << log));
10178 10180 }
10179 10181
10180 10182 /* Put multiply first if it isn't already. */
10181 10183 if (GET_CODE (XEXP (x, 1)) == MULT)
10182 10184 {
10183 10185 rtx tmp = XEXP (x, 0);
10184 10186 XEXP (x, 0) = XEXP (x, 1);
10185 10187 XEXP (x, 1) = tmp;
10186 10188 changed = 1;
10187 10189 }
10188 10190
10189 10191 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
10190 10192 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
10191 10193 created by virtual register instantiation, register elimination, and
10192 10194 similar optimizations. */
10193 10195 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
10194 10196 {
10195 10197 changed = 1;
10196 10198 x = gen_rtx_PLUS (Pmode,
10197 10199 gen_rtx_PLUS (Pmode, XEXP (x, 0),
10198 10200 XEXP (XEXP (x, 1), 0)),
10199 10201 XEXP (XEXP (x, 1), 1));
10200 10202 }
10201 10203
10202 10204 /* Canonicalize
10203 10205 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
10204 10206 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
10205 10207 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
10206 10208 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10207 10209 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
10208 10210 && CONSTANT_P (XEXP (x, 1)))
10209 10211 {
10210 10212 rtx constant;
10211 10213 rtx other = NULL_RTX;
10212 10214
10213 10215 if (CONST_INT_P (XEXP (x, 1)))
10214 10216 {
10215 10217 constant = XEXP (x, 1);
10216 10218 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
10217 10219 }
10218 10220 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
10219 10221 {
10220 10222 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
10221 10223 other = XEXP (x, 1);
10222 10224 }
10223 10225 else
10224 10226 constant = 0;
10225 10227
10226 10228 if (constant)
10227 10229 {
10228 10230 changed = 1;
10229 10231 x = gen_rtx_PLUS (Pmode,
10230 10232 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
10231 10233 XEXP (XEXP (XEXP (x, 0), 1), 0)),
10232 10234 plus_constant (other, INTVAL (constant)));
10233 10235 }
10234 10236 }
10235 10237
10236 10238 if (changed && legitimate_address_p (mode, x, FALSE))
10237 10239 return x;
10238 10240
10239 10241 if (GET_CODE (XEXP (x, 0)) == MULT)
10240 10242 {
10241 10243 changed = 1;
10242 10244 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
10243 10245 }
10244 10246
10245 10247 if (GET_CODE (XEXP (x, 1)) == MULT)
10246 10248 {
10247 10249 changed = 1;
10248 10250 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
10249 10251 }
10250 10252
10251 10253 if (changed
10252 10254 && REG_P (XEXP (x, 1))
10253 10255 && REG_P (XEXP (x, 0)))
10254 10256 return x;
10255 10257
10256 10258 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
10257 10259 {
10258 10260 changed = 1;
10259 10261 x = legitimize_pic_address (x, 0);
10260 10262 }
10261 10263
10262 10264 if (changed && legitimate_address_p (mode, x, FALSE))
10263 10265 return x;
10264 10266
10265 10267 if (REG_P (XEXP (x, 0)))
10266 10268 {
10267 10269 rtx temp = gen_reg_rtx (Pmode);
10268 10270 rtx val = force_operand (XEXP (x, 1), temp);
10269 10271 if (val != temp)
10270 10272 emit_move_insn (temp, val);
10271 10273
10272 10274 XEXP (x, 1) = temp;
10273 10275 return x;
10274 10276 }
10275 10277
10276 10278 else if (REG_P (XEXP (x, 1)))
10277 10279 {
10278 10280 rtx temp = gen_reg_rtx (Pmode);
10279 10281 rtx val = force_operand (XEXP (x, 0), temp);
10280 10282 if (val != temp)
10281 10283 emit_move_insn (temp, val);
10282 10284
10283 10285 XEXP (x, 0) = temp;
10284 10286 return x;
10285 10287 }
10286 10288 }
10287 10289
10288 10290 return x;
10289 10291 }
10290 10292
10291 10293 /* Print an integer constant expression in assembler syntax. Addition
10292 10294 and subtraction are the only arithmetic that may appear in these
10293 10295 expressions. FILE is the stdio stream to write to, X is the rtx, and
10294 10296 CODE is the operand print code from the output string. */
10295 10297
10296 10298 static void
10297 10299 output_pic_addr_const (FILE *file, rtx x, int code)
10298 10300 {
10299 10301 char buf[256];
10300 10302
10301 10303 switch (GET_CODE (x))
10302 10304 {
10303 10305 case PC:
10304 10306 gcc_assert (flag_pic);
10305 10307 putc ('.', file);
10306 10308 break;
10307 10309
10308 10310 case SYMBOL_REF:
10309 10311 if (! TARGET_MACHO || TARGET_64BIT)
10310 10312 output_addr_const (file, x);
10311 10313 else
10312 10314 {
10313 10315 const char *name = XSTR (x, 0);
10314 10316
10315 10317 /* Mark the decl as referenced so that cgraph will
10316 10318 output the function. */
10317 10319 if (SYMBOL_REF_DECL (x))
10318 10320 mark_decl_referenced (SYMBOL_REF_DECL (x));
10319 10321
10320 10322 #if TARGET_MACHO
10321 10323 if (MACHOPIC_INDIRECT
10322 10324 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
10323 10325 name = machopic_indirection_name (x, /*stub_p=*/true);
10324 10326 #endif
10325 10327 assemble_name (file, name);
10326 10328 }
10327 10329 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI)
10328 10330 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
10329 10331 fputs ("@PLT", file);
10330 10332 break;
10331 10333
10332 10334 case LABEL_REF:
10333 10335 x = XEXP (x, 0);
10334 10336 /* FALLTHRU */
10335 10337 case CODE_LABEL:
10336 10338 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
10337 10339 assemble_name (asm_out_file, buf);
10338 10340 break;
10339 10341
10340 10342 case CONST_INT:
10341 10343 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
10342 10344 break;
10343 10345
10344 10346 case CONST:
10345 10347 /* This used to output parentheses around the expression,
10346 10348 but that does not work on the 386 (either ATT or BSD assembler). */
10347 10349 output_pic_addr_const (file, XEXP (x, 0), code);
10348 10350 break;
10349 10351
10350 10352 case CONST_DOUBLE:
10351 10353 if (GET_MODE (x) == VOIDmode)
10352 10354 {
10353 10355 /* We can use %d if the number is <32 bits and positive. */
10354 10356 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
10355 10357 fprintf (file, "0x%lx%08lx",
10356 10358 (unsigned long) CONST_DOUBLE_HIGH (x),
10357 10359 (unsigned long) CONST_DOUBLE_LOW (x));
10358 10360 else
10359 10361 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
10360 10362 }
10361 10363 else
10362 10364 /* We can't handle floating point constants;
10363 10365 PRINT_OPERAND must handle them. */
10364 10366 output_operand_lossage ("floating constant misused");
10365 10367 break;
10366 10368
10367 10369 case PLUS:
10368 10370 /* Some assemblers need integer constants to appear first. */
10369 10371 if (CONST_INT_P (XEXP (x, 0)))
10370 10372 {
10371 10373 output_pic_addr_const (file, XEXP (x, 0), code);
10372 10374 putc ('+', file);
10373 10375 output_pic_addr_const (file, XEXP (x, 1), code);
10374 10376 }
10375 10377 else
10376 10378 {
10377 10379 gcc_assert (CONST_INT_P (XEXP (x, 1)));
10378 10380 output_pic_addr_const (file, XEXP (x, 1), code);
10379 10381 putc ('+', file);
10380 10382 output_pic_addr_const (file, XEXP (x, 0), code);
10381 10383 }
10382 10384 break;
10383 10385
10384 10386 case MINUS:
10385 10387 if (!TARGET_MACHO)
10386 10388 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
10387 10389 output_pic_addr_const (file, XEXP (x, 0), code);
10388 10390 putc ('-', file);
10389 10391 output_pic_addr_const (file, XEXP (x, 1), code);
10390 10392 if (!TARGET_MACHO)
10391 10393 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
10392 10394 break;
10393 10395
10394 10396 case UNSPEC:
10395 10397 gcc_assert (XVECLEN (x, 0) == 1);
10396 10398 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
10397 10399 switch (XINT (x, 1))
10398 10400 {
10399 10401 case UNSPEC_GOT:
10400 10402 fputs ("@GOT", file);
10401 10403 break;
10402 10404 case UNSPEC_GOTOFF:
10403 10405 fputs ("@GOTOFF", file);
10404 10406 break;
10405 10407 case UNSPEC_PLTOFF:
10406 10408 fputs ("@PLTOFF", file);
10407 10409 break;
10408 10410 case UNSPEC_GOTPCREL:
10409 10411 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10410 10412 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file);
10411 10413 break;
10412 10414 case UNSPEC_GOTTPOFF:
10413 10415 /* FIXME: This might be @TPOFF in Sun ld too. */
10414 10416 fputs ("@GOTTPOFF", file);
10415 10417 break;
10416 10418 case UNSPEC_TPOFF:
10417 10419 fputs ("@TPOFF", file);
10418 10420 break;
10419 10421 case UNSPEC_NTPOFF:
10420 10422 if (TARGET_64BIT)
10421 10423 fputs ("@TPOFF", file);
10422 10424 else
10423 10425 fputs ("@NTPOFF", file);
10424 10426 break;
10425 10427 case UNSPEC_DTPOFF:
10426 10428 fputs ("@DTPOFF", file);
10427 10429 break;
10428 10430 case UNSPEC_GOTNTPOFF:
10429 10431 if (TARGET_64BIT)
10430 10432 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
10431 10433 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file);
10432 10434 else
10433 10435 fputs ("@GOTNTPOFF", file);
10434 10436 break;
10435 10437 case UNSPEC_INDNTPOFF:
10436 10438 fputs ("@INDNTPOFF", file);
10437 10439 break;
10438 10440 #if TARGET_MACHO
10439 10441 case UNSPEC_MACHOPIC_OFFSET:
10440 10442 putc ('-', file);
10441 10443 machopic_output_function_base_name (file);
10442 10444 break;
10443 10445 #endif
10444 10446 default:
10445 10447 output_operand_lossage ("invalid UNSPEC as operand");
10446 10448 break;
10447 10449 }
10448 10450 break;
10449 10451
10450 10452 default:
10451 10453 output_operand_lossage ("invalid expression as operand");
10452 10454 }
10453 10455 }
10454 10456
10455 10457 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
10456 10458 We need to emit DTP-relative relocations. */
10457 10459
10458 10460 static void ATTRIBUTE_UNUSED
10459 10461 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
10460 10462 {
10461 10463 fputs (ASM_LONG, file);
10462 10464 output_addr_const (file, x);
10463 10465 fputs ("@DTPOFF", file);
10464 10466 switch (size)
10465 10467 {
10466 10468 case 4:
10467 10469 break;
10468 10470 case 8:
10469 10471 fputs (", 0", file);
10470 10472 break;
10471 10473 default:
10472 10474 gcc_unreachable ();
10473 10475 }
10474 10476 }
10475 10477
10476 10478 /* Return true if X is a representation of the PIC register. This copes
10477 10479 with calls from ix86_find_base_term, where the register might have
10478 10480 been replaced by a cselib value. */
10479 10481
10480 10482 static bool
10481 10483 ix86_pic_register_p (rtx x)
10482 10484 {
10483 10485 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x))
10484 10486 return (pic_offset_table_rtx
10485 10487 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx));
10486 10488 else
10487 10489 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM;
10488 10490 }
10489 10491
10490 10492 /* In the name of slightly smaller debug output, and to cater to
10491 10493 general assembler lossage, recognize PIC+GOTOFF and turn it back
10492 10494 into a direct symbol reference.
10493 10495
10494 10496 On Darwin, this is necessary to avoid a crash, because Darwin
10495 10497 has a different PIC label for each routine but the DWARF debugging
10496 10498 information is not associated with any particular routine, so it's
10497 10499 necessary to remove references to the PIC label from RTL stored by
10498 10500 the DWARF output code. */
10499 10501
10500 10502 static rtx
10501 10503 ix86_delegitimize_address (rtx orig_x)
10502 10504 {
10503 10505 rtx x = orig_x;
10504 10506 /* reg_addend is NULL or a multiple of some register. */
10505 10507 rtx reg_addend = NULL_RTX;
10506 10508 /* const_addend is NULL or a const_int. */
10507 10509 rtx const_addend = NULL_RTX;
10508 10510 /* This is the result, or NULL. */
10509 10511 rtx result = NULL_RTX;
10510 10512
10511 10513 if (MEM_P (x))
10512 10514 x = XEXP (x, 0);
10513 10515
10514 10516 if (TARGET_64BIT)
10515 10517 {
10516 10518 if (GET_CODE (x) != CONST
10517 10519 || GET_CODE (XEXP (x, 0)) != UNSPEC
10518 10520 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
10519 10521 || !MEM_P (orig_x))
10520 10522 return orig_x;
10521 10523 return XVECEXP (XEXP (x, 0), 0, 0);
10522 10524 }
10523 10525
10524 10526 if (GET_CODE (x) != PLUS
10525 10527 || GET_CODE (XEXP (x, 1)) != CONST)
10526 10528 return orig_x;
10527 10529
10528 10530 if (ix86_pic_register_p (XEXP (x, 0)))
10529 10531 /* %ebx + GOT/GOTOFF */
10530 10532 ;
10531 10533 else if (GET_CODE (XEXP (x, 0)) == PLUS)
10532 10534 {
10533 10535 /* %ebx + %reg * scale + GOT/GOTOFF */
10534 10536 reg_addend = XEXP (x, 0);
10535 10537 if (ix86_pic_register_p (XEXP (reg_addend, 0)))
10536 10538 reg_addend = XEXP (reg_addend, 1);
10537 10539 else if (ix86_pic_register_p (XEXP (reg_addend, 1)))
10538 10540 reg_addend = XEXP (reg_addend, 0);
10539 10541 else
10540 10542 return orig_x;
10541 10543 if (!REG_P (reg_addend)
10542 10544 && GET_CODE (reg_addend) != MULT
10543 10545 && GET_CODE (reg_addend) != ASHIFT)
10544 10546 return orig_x;
10545 10547 }
10546 10548 else
10547 10549 return orig_x;
10548 10550
10549 10551 x = XEXP (XEXP (x, 1), 0);
10550 10552 if (GET_CODE (x) == PLUS
10551 10553 && CONST_INT_P (XEXP (x, 1)))
10552 10554 {
10553 10555 const_addend = XEXP (x, 1);
10554 10556 x = XEXP (x, 0);
10555 10557 }
10556 10558
10557 10559 if (GET_CODE (x) == UNSPEC
10558 10560 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
10559 10561 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
10560 10562 result = XVECEXP (x, 0, 0);
10561 10563
10562 10564 if (TARGET_MACHO && darwin_local_data_pic (x)
10563 10565 && !MEM_P (orig_x))
10564 10566 result = XVECEXP (x, 0, 0);
10565 10567
10566 10568 if (! result)
10567 10569 return orig_x;
10568 10570
10569 10571 if (const_addend)
10570 10572 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend));
10571 10573 if (reg_addend)
10572 10574 result = gen_rtx_PLUS (Pmode, reg_addend, result);
10573 10575 return result;
10574 10576 }
10575 10577
10576 10578 /* If X is a machine specific address (i.e. a symbol or label being
10577 10579 referenced as a displacement from the GOT implemented using an
10578 10580 UNSPEC), then return the base term. Otherwise return X. */
10579 10581
10580 10582 rtx
10581 10583 ix86_find_base_term (rtx x)
10582 10584 {
10583 10585 rtx term;
10584 10586
10585 10587 if (TARGET_64BIT)
10586 10588 {
10587 10589 if (GET_CODE (x) != CONST)
10588 10590 return x;
10589 10591 term = XEXP (x, 0);
10590 10592 if (GET_CODE (term) == PLUS
10591 10593 && (CONST_INT_P (XEXP (term, 1))
10592 10594 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
10593 10595 term = XEXP (term, 0);
10594 10596 if (GET_CODE (term) != UNSPEC
10595 10597 || XINT (term, 1) != UNSPEC_GOTPCREL)
10596 10598 return x;
10597 10599
10598 10600 return XVECEXP (term, 0, 0);
10599 10601 }
10600 10602
10601 10603 return ix86_delegitimize_address (x);
10602 10604 }
10603 10605
10604 10606 static void
10605 10607 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
10606 10608 int fp, FILE *file)
10607 10609 {
10608 10610 const char *suffix;
10609 10611
10610 10612 if (mode == CCFPmode || mode == CCFPUmode)
10611 10613 {
10612 10614 enum rtx_code second_code, bypass_code;
10613 10615 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
10614 10616 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
10615 10617 code = ix86_fp_compare_code_to_integer (code);
10616 10618 mode = CCmode;
10617 10619 }
10618 10620 if (reverse)
10619 10621 code = reverse_condition (code);
10620 10622
10621 10623 switch (code)
10622 10624 {
10623 10625 case EQ:
10624 10626 switch (mode)
10625 10627 {
10626 10628 case CCAmode:
10627 10629 suffix = "a";
10628 10630 break;
10629 10631
10630 10632 case CCCmode:
10631 10633 suffix = "c";
10632 10634 break;
10633 10635
10634 10636 case CCOmode:
10635 10637 suffix = "o";
10636 10638 break;
10637 10639
10638 10640 case CCSmode:
10639 10641 suffix = "s";
10640 10642 break;
10641 10643
10642 10644 default:
10643 10645 suffix = "e";
10644 10646 }
10645 10647 break;
10646 10648 case NE:
10647 10649 switch (mode)
10648 10650 {
10649 10651 case CCAmode:
10650 10652 suffix = "na";
10651 10653 break;
10652 10654
10653 10655 case CCCmode:
10654 10656 suffix = "nc";
10655 10657 break;
10656 10658
10657 10659 case CCOmode:
10658 10660 suffix = "no";
10659 10661 break;
10660 10662
10661 10663 case CCSmode:
10662 10664 suffix = "ns";
10663 10665 break;
10664 10666
10665 10667 default:
10666 10668 suffix = "ne";
10667 10669 }
10668 10670 break;
10669 10671 case GT:
10670 10672 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
10671 10673 suffix = "g";
10672 10674 break;
10673 10675 case GTU:
10674 10676 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
10675 10677 Those same assemblers have the same but opposite lossage on cmov. */
10676 10678 if (mode == CCmode)
10677 10679 suffix = fp ? "nbe" : "a";
10678 10680 else if (mode == CCCmode)
10679 10681 suffix = "b";
10680 10682 else
10681 10683 gcc_unreachable ();
10682 10684 break;
10683 10685 case LT:
10684 10686 switch (mode)
10685 10687 {
10686 10688 case CCNOmode:
10687 10689 case CCGOCmode:
10688 10690 suffix = "s";
10689 10691 break;
10690 10692
10691 10693 case CCmode:
10692 10694 case CCGCmode:
10693 10695 suffix = "l";
10694 10696 break;
10695 10697
10696 10698 default:
10697 10699 gcc_unreachable ();
10698 10700 }
10699 10701 break;
10700 10702 case LTU:
10701 10703 gcc_assert (mode == CCmode || mode == CCCmode);
10702 10704 suffix = "b";
10703 10705 break;
10704 10706 case GE:
10705 10707 switch (mode)
10706 10708 {
10707 10709 case CCNOmode:
10708 10710 case CCGOCmode:
10709 10711 suffix = "ns";
10710 10712 break;
10711 10713
10712 10714 case CCmode:
10713 10715 case CCGCmode:
10714 10716 suffix = "ge";
10715 10717 break;
10716 10718
10717 10719 default:
10718 10720 gcc_unreachable ();
10719 10721 }
10720 10722 break;
10721 10723 case GEU:
10722 10724 /* ??? As above. */
10723 10725 gcc_assert (mode == CCmode || mode == CCCmode);
10724 10726 suffix = fp ? "nb" : "ae";
10725 10727 break;
10726 10728 case LE:
10727 10729 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
10728 10730 suffix = "le";
10729 10731 break;
10730 10732 case LEU:
10731 10733 /* ??? As above. */
10732 10734 if (mode == CCmode)
10733 10735 suffix = "be";
10734 10736 else if (mode == CCCmode)
10735 10737 suffix = fp ? "nb" : "ae";
10736 10738 else
10737 10739 gcc_unreachable ();
10738 10740 break;
10739 10741 case UNORDERED:
10740 10742 suffix = fp ? "u" : "p";
10741 10743 break;
10742 10744 case ORDERED:
10743 10745 suffix = fp ? "nu" : "np";
10744 10746 break;
10745 10747 default:
10746 10748 gcc_unreachable ();
10747 10749 }
10748 10750 fputs (suffix, file);
10749 10751 }
10750 10752
10751 10753 /* Print the name of register X to FILE based on its machine mode and number.
10752 10754 If CODE is 'w', pretend the mode is HImode.
10753 10755 If CODE is 'b', pretend the mode is QImode.
10754 10756 If CODE is 'k', pretend the mode is SImode.
10755 10757 If CODE is 'q', pretend the mode is DImode.
10756 10758 If CODE is 'x', pretend the mode is V4SFmode.
10757 10759 If CODE is 't', pretend the mode is V8SFmode.
10758 10760 If CODE is 'h', pretend the reg is the 'high' byte register.
10759 10761 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op.
10760 10762 If CODE is 'd', duplicate the operand for AVX instruction.
10761 10763 */
10762 10764
10763 10765 void
10764 10766 print_reg (rtx x, int code, FILE *file)
10765 10767 {
10766 10768 const char *reg;
10767 10769 bool duplicated = code == 'd' && TARGET_AVX;
10768 10770
10769 10771 gcc_assert (x == pc_rtx
10770 10772 || (REGNO (x) != ARG_POINTER_REGNUM
10771 10773 && REGNO (x) != FRAME_POINTER_REGNUM
10772 10774 && REGNO (x) != FLAGS_REG
10773 10775 && REGNO (x) != FPSR_REG
10774 10776 && REGNO (x) != FPCR_REG));
10775 10777
10776 10778 if (ASSEMBLER_DIALECT == ASM_ATT)
10777 10779 putc ('%', file);
10778 10780
10779 10781 if (x == pc_rtx)
10780 10782 {
10781 10783 gcc_assert (TARGET_64BIT);
10782 10784 fputs ("rip", file);
10783 10785 return;
10784 10786 }
10785 10787
10786 10788 if (code == 'w' || MMX_REG_P (x))
10787 10789 code = 2;
10788 10790 else if (code == 'b')
10789 10791 code = 1;
10790 10792 else if (code == 'k')
10791 10793 code = 4;
10792 10794 else if (code == 'q')
10793 10795 code = 8;
10794 10796 else if (code == 'y')
10795 10797 code = 3;
10796 10798 else if (code == 'h')
10797 10799 code = 0;
10798 10800 else if (code == 'x')
10799 10801 code = 16;
10800 10802 else if (code == 't')
10801 10803 code = 32;
10802 10804 else
10803 10805 code = GET_MODE_SIZE (GET_MODE (x));
10804 10806
10805 10807 /* Irritatingly, AMD extended registers use different naming convention
10806 10808 from the normal registers. */
10807 10809 if (REX_INT_REG_P (x))
10808 10810 {
10809 10811 gcc_assert (TARGET_64BIT);
10810 10812 switch (code)
10811 10813 {
10812 10814 case 0:
10813 10815 error ("extended registers have no high halves");
10814 10816 break;
10815 10817 case 1:
10816 10818 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
10817 10819 break;
10818 10820 case 2:
10819 10821 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
10820 10822 break;
10821 10823 case 4:
10822 10824 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
10823 10825 break;
10824 10826 case 8:
10825 10827 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
10826 10828 break;
10827 10829 default:
10828 10830 error ("unsupported operand size for extended register");
10829 10831 break;
10830 10832 }
10831 10833 return;
10832 10834 }
10833 10835
10834 10836 reg = NULL;
10835 10837 switch (code)
10836 10838 {
10837 10839 case 3:
10838 10840 if (STACK_TOP_P (x))
10839 10841 {
10840 10842 reg = "st(0)";
10841 10843 break;
10842 10844 }
10843 10845 /* FALLTHRU */
10844 10846 case 8:
10845 10847 case 4:
10846 10848 case 12:
10847 10849 if (! ANY_FP_REG_P (x))
10848 10850 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
10849 10851 /* FALLTHRU */
10850 10852 case 16:
10851 10853 case 2:
10852 10854 normal:
10853 10855 reg = hi_reg_name[REGNO (x)];
10854 10856 break;
10855 10857 case 1:
10856 10858 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
10857 10859 goto normal;
10858 10860 reg = qi_reg_name[REGNO (x)];
10859 10861 break;
10860 10862 case 0:
10861 10863 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
10862 10864 goto normal;
10863 10865 reg = qi_high_reg_name[REGNO (x)];
10864 10866 break;
10865 10867 case 32:
10866 10868 if (SSE_REG_P (x))
10867 10869 {
10868 10870 gcc_assert (!duplicated);
10869 10871 putc ('y', file);
10870 10872 fputs (hi_reg_name[REGNO (x)] + 1, file);
10871 10873 return;
10872 10874 }
10873 10875 break;
10874 10876 default:
10875 10877 gcc_unreachable ();
10876 10878 }
10877 10879
10878 10880 fputs (reg, file);
10879 10881 if (duplicated)
10880 10882 {
10881 10883 if (ASSEMBLER_DIALECT == ASM_ATT)
10882 10884 fprintf (file, ", %%%s", reg);
10883 10885 else
10884 10886 fprintf (file, ", %s", reg);
10885 10887 }
10886 10888 }
10887 10889
10888 10890 /* Locate some local-dynamic symbol still in use by this function
10889 10891 so that we can print its name in some tls_local_dynamic_base
10890 10892 pattern. */
10891 10893
10892 10894 static int
10893 10895 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
10894 10896 {
10895 10897 rtx x = *px;
10896 10898
10897 10899 if (GET_CODE (x) == SYMBOL_REF
10898 10900 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
10899 10901 {
10900 10902 cfun->machine->some_ld_name = XSTR (x, 0);
10901 10903 return 1;
10902 10904 }
10903 10905
10904 10906 return 0;
10905 10907 }
10906 10908
10907 10909 static const char *
10908 10910 get_some_local_dynamic_name (void)
10909 10911 {
10910 10912 rtx insn;
10911 10913
10912 10914 if (cfun->machine->some_ld_name)
10913 10915 return cfun->machine->some_ld_name;
10914 10916
10915 10917 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
10916 10918 if (INSN_P (insn)
10917 10919 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
10918 10920 return cfun->machine->some_ld_name;
10919 10921
10920 10922 gcc_unreachable ();
10921 10923 }
10922 10924
10923 10925 /* Meaning of CODE:
10924 10926 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
10925 10927 C -- print opcode suffix for set/cmov insn.
10926 10928 c -- like C, but print reversed condition
10927 10929 F,f -- likewise, but for floating-point.
10928 10930 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
10929 10931 otherwise nothing
10930 10932 R -- print the prefix for register names.
10931 10933 z -- print the opcode suffix for the size of the current operand.
10932 10934 * -- print a star (in certain assembler syntax)
10933 10935 A -- print an absolute memory reference.
10934 10936 w -- print the operand as if it's a "word" (HImode) even if it isn't.
10935 10937 s -- print a shift double count, followed by the assemblers argument
10936 10938 delimiter.
10937 10939 b -- print the QImode name of the register for the indicated operand.
10938 10940 %b0 would print %al if operands[0] is reg 0.
10939 10941 w -- likewise, print the HImode name of the register.
10940 10942 k -- likewise, print the SImode name of the register.
10941 10943 q -- likewise, print the DImode name of the register.
10942 10944 x -- likewise, print the V4SFmode name of the register.
10943 10945 t -- likewise, print the V8SFmode name of the register.
10944 10946 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
10945 10947 y -- print "st(0)" instead of "st" as a register.
10946 10948 d -- print duplicated register operand for AVX instruction.
10947 10949 D -- print condition for SSE cmp instruction.
10948 10950 P -- if PIC, print an @PLT suffix.
10949 10951 X -- don't print any sort of PIC '@' suffix for a symbol.
10950 10952 & -- print some in-use local-dynamic symbol name.
10951 10953 H -- print a memory address offset by 8; used for sse high-parts
10952 10954 Y -- print condition for SSE5 com* instruction.
10953 10955 + -- print a branch hint as 'cs' or 'ds' prefix
10954 10956 ; -- print a semicolon (after prefixes due to bug in older gas).
10955 10957 */
10956 10958
10957 10959 void
10958 10960 print_operand (FILE *file, rtx x, int code)
10959 10961 {
10960 10962 if (code)
10961 10963 {
10962 10964 switch (code)
10963 10965 {
10964 10966 case '*':
10965 10967 if (ASSEMBLER_DIALECT == ASM_ATT)
10966 10968 putc ('*', file);
10967 10969 return;
10968 10970
10969 10971 case '&':
10970 10972 assemble_name (file, get_some_local_dynamic_name ());
10971 10973 return;
10972 10974
10973 10975 case 'A':
10974 10976 switch (ASSEMBLER_DIALECT)
10975 10977 {
10976 10978 case ASM_ATT:
10977 10979 putc ('*', file);
10978 10980 break;
10979 10981
10980 10982 case ASM_INTEL:
10981 10983 /* Intel syntax. For absolute addresses, registers should not
10982 10984 be surrounded by braces. */
10983 10985 if (!REG_P (x))
10984 10986 {
10985 10987 putc ('[', file);
10986 10988 PRINT_OPERAND (file, x, 0);
10987 10989 putc (']', file);
10988 10990 return;
10989 10991 }
10990 10992 break;
10991 10993
10992 10994 default:
10993 10995 gcc_unreachable ();
10994 10996 }
10995 10997
10996 10998 PRINT_OPERAND (file, x, 0);
10997 10999 return;
10998 11000
10999 11001
11000 11002 case 'L':
11001 11003 if (ASSEMBLER_DIALECT == ASM_ATT)
11002 11004 putc ('l', file);
11003 11005 return;
11004 11006
11005 11007 case 'W':
11006 11008 if (ASSEMBLER_DIALECT == ASM_ATT)
11007 11009 putc ('w', file);
11008 11010 return;
11009 11011
11010 11012 case 'B':
11011 11013 if (ASSEMBLER_DIALECT == ASM_ATT)
11012 11014 putc ('b', file);
11013 11015 return;
11014 11016
11015 11017 case 'Q':
11016 11018 if (ASSEMBLER_DIALECT == ASM_ATT)
11017 11019 putc ('l', file);
11018 11020 return;
11019 11021
11020 11022 case 'S':
11021 11023 if (ASSEMBLER_DIALECT == ASM_ATT)
11022 11024 putc ('s', file);
11023 11025 return;
11024 11026
11025 11027 case 'T':
11026 11028 if (ASSEMBLER_DIALECT == ASM_ATT)
11027 11029 putc ('t', file);
11028 11030 return;
11029 11031
11030 11032 case 'z':
11031 11033 /* 387 opcodes don't get size suffixes if the operands are
11032 11034 registers. */
11033 11035 if (STACK_REG_P (x))
11034 11036 return;
11035 11037
11036 11038 /* Likewise if using Intel opcodes. */
11037 11039 if (ASSEMBLER_DIALECT == ASM_INTEL)
11038 11040 return;
11039 11041
11040 11042 /* This is the size of op from size of operand. */
11041 11043 switch (GET_MODE_SIZE (GET_MODE (x)))
11042 11044 {
11043 11045 case 1:
11044 11046 putc ('b', file);
11045 11047 return;
11046 11048
11047 11049 case 2:
11048 11050 if (MEM_P (x))
11049 11051 {
11050 11052 #ifdef HAVE_GAS_FILDS_FISTS
11051 11053 putc ('s', file);
11052 11054 #endif
11053 11055 return;
11054 11056 }
11055 11057 else
11056 11058 putc ('w', file);
11057 11059 return;
11058 11060
11059 11061 case 4:
11060 11062 if (GET_MODE (x) == SFmode)
11061 11063 {
11062 11064 putc ('s', file);
11063 11065 return;
11064 11066 }
11065 11067 else
11066 11068 putc ('l', file);
11067 11069 return;
11068 11070
11069 11071 case 12:
11070 11072 case 16:
11071 11073 putc ('t', file);
11072 11074 return;
11073 11075
11074 11076 case 8:
11075 11077 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
11076 11078 {
11077 11079 if (MEM_P (x))
11078 11080 {
11079 11081 #ifdef GAS_MNEMONICS
11080 11082 putc ('q', file);
11081 11083 #else
11082 11084 putc ('l', file);
11083 11085 putc ('l', file);
11084 11086 #endif
11085 11087 }
11086 11088 else
11087 11089 putc ('q', file);
11088 11090 }
11089 11091 else
11090 11092 putc ('l', file);
11091 11093 return;
11092 11094
11093 11095 default:
11094 11096 gcc_unreachable ();
11095 11097 }
11096 11098
11097 11099 case 'd':
11098 11100 case 'b':
11099 11101 case 'w':
11100 11102 case 'k':
11101 11103 case 'q':
11102 11104 case 'h':
11103 11105 case 't':
11104 11106 case 'y':
11105 11107 case 'x':
11106 11108 case 'X':
11107 11109 case 'P':
11108 11110 break;
11109 11111
11110 11112 case 's':
11111 11113 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
11112 11114 {
11113 11115 PRINT_OPERAND (file, x, 0);
11114 11116 fputs (", ", file);
11115 11117 }
11116 11118 return;
11117 11119
11118 11120 case 'D':
11119 11121 /* Little bit of braindamage here. The SSE compare instructions
11120 11122 does use completely different names for the comparisons that the
11121 11123 fp conditional moves. */
11122 11124 if (TARGET_AVX)
11123 11125 {
11124 11126 switch (GET_CODE (x))
11125 11127 {
11126 11128 case EQ:
11127 11129 fputs ("eq", file);
11128 11130 break;
11129 11131 case UNEQ:
11130 11132 fputs ("eq_us", file);
11131 11133 break;
11132 11134 case LT:
11133 11135 fputs ("lt", file);
11134 11136 break;
11135 11137 case UNLT:
11136 11138 fputs ("nge", file);
11137 11139 break;
11138 11140 case LE:
11139 11141 fputs ("le", file);
11140 11142 break;
11141 11143 case UNLE:
11142 11144 fputs ("ngt", file);
11143 11145 break;
11144 11146 case UNORDERED:
11145 11147 fputs ("unord", file);
11146 11148 break;
11147 11149 case NE:
11148 11150 fputs ("neq", file);
11149 11151 break;
11150 11152 case LTGT:
11151 11153 fputs ("neq_oq", file);
11152 11154 break;
11153 11155 case GE:
11154 11156 fputs ("ge", file);
11155 11157 break;
11156 11158 case UNGE:
11157 11159 fputs ("nlt", file);
11158 11160 break;
11159 11161 case GT:
11160 11162 fputs ("gt", file);
11161 11163 break;
11162 11164 case UNGT:
11163 11165 fputs ("nle", file);
11164 11166 break;
11165 11167 case ORDERED:
11166 11168 fputs ("ord", file);
11167 11169 break;
11168 11170 default:
11169 11171 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11170 11172 return;
11171 11173 }
11172 11174 }
11173 11175 else
11174 11176 {
11175 11177 switch (GET_CODE (x))
11176 11178 {
11177 11179 case EQ:
11178 11180 case UNEQ:
11179 11181 fputs ("eq", file);
11180 11182 break;
11181 11183 case LT:
11182 11184 case UNLT:
11183 11185 fputs ("lt", file);
11184 11186 break;
11185 11187 case LE:
11186 11188 case UNLE:
11187 11189 fputs ("le", file);
11188 11190 break;
11189 11191 case UNORDERED:
11190 11192 fputs ("unord", file);
11191 11193 break;
11192 11194 case NE:
11193 11195 case LTGT:
11194 11196 fputs ("neq", file);
11195 11197 break;
11196 11198 case UNGE:
11197 11199 case GE:
11198 11200 fputs ("nlt", file);
11199 11201 break;
11200 11202 case UNGT:
11201 11203 case GT:
11202 11204 fputs ("nle", file);
11203 11205 break;
11204 11206 case ORDERED:
11205 11207 fputs ("ord", file);
11206 11208 break;
11207 11209 default:
11208 11210 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11209 11211 return;
11210 11212 }
11211 11213 }
11212 11214 return;
11213 11215 case 'O':
11214 11216 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11215 11217 if (ASSEMBLER_DIALECT == ASM_ATT)
11216 11218 {
11217 11219 switch (GET_MODE (x))
11218 11220 {
11219 11221 case HImode: putc ('w', file); break;
11220 11222 case SImode:
11221 11223 case SFmode: putc ('l', file); break;
11222 11224 case DImode:
11223 11225 case DFmode: putc ('q', file); break;
11224 11226 default: gcc_unreachable ();
11225 11227 }
11226 11228 putc ('.', file);
11227 11229 }
11228 11230 #endif
11229 11231 return;
11230 11232 case 'C':
11231 11233 if (!COMPARISON_P (x))
11232 11234 {
11233 11235 output_operand_lossage ("operand is neither a constant nor a "
11234 11236 "condition code, invalid operand code "
11235 11237 "'C'");
11236 11238 return;
11237 11239 }
11238 11240 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
11239 11241 return;
11240 11242 case 'F':
11241 11243 if (!COMPARISON_P (x))
11242 11244 {
11243 11245 output_operand_lossage ("operand is neither a constant nor a "
11244 11246 "condition code, invalid operand code "
11245 11247 "'F'");
11246 11248 return;
11247 11249 }
11248 11250 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11249 11251 if (ASSEMBLER_DIALECT == ASM_ATT)
11250 11252 putc ('.', file);
11251 11253 #endif
11252 11254 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
11253 11255 return;
11254 11256
11255 11257 /* Like above, but reverse condition */
11256 11258 case 'c':
11257 11259 /* Check to see if argument to %c is really a constant
11258 11260 and not a condition code which needs to be reversed. */
11259 11261 if (!COMPARISON_P (x))
11260 11262 {
11261 11263 output_operand_lossage ("operand is neither a constant nor a "
11262 11264 "condition code, invalid operand "
11263 11265 "code 'c'");
11264 11266 return;
11265 11267 }
11266 11268 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
11267 11269 return;
11268 11270 case 'f':
11269 11271 if (!COMPARISON_P (x))
11270 11272 {
11271 11273 output_operand_lossage ("operand is neither a constant nor a "
11272 11274 "condition code, invalid operand "
11273 11275 "code 'f'");
11274 11276 return;
11275 11277 }
11276 11278 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
11277 11279 if (ASSEMBLER_DIALECT == ASM_ATT)
11278 11280 putc ('.', file);
11279 11281 #endif
11280 11282 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
11281 11283 return;
11282 11284
11283 11285 case 'H':
11284 11286 /* It doesn't actually matter what mode we use here, as we're
11285 11287 only going to use this for printing. */
11286 11288 x = adjust_address_nv (x, DImode, 8);
11287 11289 break;
11288 11290
11289 11291 case '+':
11290 11292 {
11291 11293 rtx x;
11292 11294
11293 11295 if (!optimize
11294 11296 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS)
11295 11297 return;
11296 11298
11297 11299 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
11298 11300 if (x)
11299 11301 {
11300 11302 int pred_val = INTVAL (XEXP (x, 0));
11301 11303
11302 11304 if (pred_val < REG_BR_PROB_BASE * 45 / 100
11303 11305 || pred_val > REG_BR_PROB_BASE * 55 / 100)
11304 11306 {
11305 11307 int taken = pred_val > REG_BR_PROB_BASE / 2;
11306 11308 int cputaken = final_forward_branch_p (current_output_insn) == 0;
11307 11309
11308 11310 /* Emit hints only in the case default branch prediction
11309 11311 heuristics would fail. */
11310 11312 if (taken != cputaken)
11311 11313 {
11312 11314 /* We use 3e (DS) prefix for taken branches and
11313 11315 2e (CS) prefix for not taken branches. */
11314 11316 if (taken)
11315 11317 fputs ("ds ; ", file);
11316 11318 else
11317 11319 fputs ("cs ; ", file);
11318 11320 }
11319 11321 }
11320 11322 }
11321 11323 return;
11322 11324 }
11323 11325
11324 11326 case 'Y':
11325 11327 switch (GET_CODE (x))
11326 11328 {
11327 11329 case NE:
11328 11330 fputs ("neq", file);
11329 11331 break;
11330 11332 case EQ:
11331 11333 fputs ("eq", file);
11332 11334 break;
11333 11335 case GE:
11334 11336 case GEU:
11335 11337 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file);
11336 11338 break;
11337 11339 case GT:
11338 11340 case GTU:
11339 11341 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file);
11340 11342 break;
11341 11343 case LE:
11342 11344 case LEU:
11343 11345 fputs ("le", file);
11344 11346 break;
11345 11347 case LT:
11346 11348 case LTU:
11347 11349 fputs ("lt", file);
11348 11350 break;
11349 11351 case UNORDERED:
11350 11352 fputs ("unord", file);
11351 11353 break;
11352 11354 case ORDERED:
11353 11355 fputs ("ord", file);
11354 11356 break;
11355 11357 case UNEQ:
11356 11358 fputs ("ueq", file);
11357 11359 break;
11358 11360 case UNGE:
11359 11361 fputs ("nlt", file);
11360 11362 break;
11361 11363 case UNGT:
11362 11364 fputs ("nle", file);
11363 11365 break;
11364 11366 case UNLE:
11365 11367 fputs ("ule", file);
11366 11368 break;
11367 11369 case UNLT:
11368 11370 fputs ("ult", file);
11369 11371 break;
11370 11372 case LTGT:
11371 11373 fputs ("une", file);
11372 11374 break;
11373 11375 default:
11374 11376 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'");
11375 11377 return;
11376 11378 }
11377 11379 return;
11378 11380
11379 11381 case ';':
11380 11382 #if TARGET_MACHO
11381 11383 fputs (" ; ", file);
11382 11384 #else
11383 11385 fputc (' ', file);
11384 11386 #endif
11385 11387 return;
11386 11388
11387 11389 default:
11388 11390 output_operand_lossage ("invalid operand code '%c'", code);
11389 11391 }
11390 11392 }
11391 11393
11392 11394 if (REG_P (x))
11393 11395 print_reg (x, code, file);
11394 11396
11395 11397 else if (MEM_P (x))
11396 11398 {
11397 11399 /* No `byte ptr' prefix for call instructions or BLKmode operands. */
11398 11400 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P'
11399 11401 && GET_MODE (x) != BLKmode)
11400 11402 {
11401 11403 const char * size;
11402 11404 switch (GET_MODE_SIZE (GET_MODE (x)))
11403 11405 {
11404 11406 case 1: size = "BYTE"; break;
11405 11407 case 2: size = "WORD"; break;
11406 11408 case 4: size = "DWORD"; break;
11407 11409 case 8: size = "QWORD"; break;
11408 11410 case 12: size = "TBYTE"; break;
11409 11411 case 16:
11410 11412 if (GET_MODE (x) == XFmode)
11411 11413 size = "TBYTE";
11412 11414 else
11413 11415 size = "XMMWORD";
11414 11416 break;
11415 11417 case 32: size = "YMMWORD"; break;
11416 11418 default:
11417 11419 gcc_unreachable ();
11418 11420 }
11419 11421
11420 11422 /* Check for explicit size override (codes 'b', 'w' and 'k') */
11421 11423 if (code == 'b')
11422 11424 size = "BYTE";
11423 11425 else if (code == 'w')
11424 11426 size = "WORD";
11425 11427 else if (code == 'k')
11426 11428 size = "DWORD";
11427 11429
11428 11430 fputs (size, file);
11429 11431 fputs (" PTR ", file);
11430 11432 }
11431 11433
11432 11434 x = XEXP (x, 0);
11433 11435 /* Avoid (%rip) for call operands. */
11434 11436 if (CONSTANT_ADDRESS_P (x) && code == 'P'
11435 11437 && !CONST_INT_P (x))
11436 11438 output_addr_const (file, x);
11437 11439 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
11438 11440 output_operand_lossage ("invalid constraints for operand");
11439 11441 else
11440 11442 output_address (x);
11441 11443 }
11442 11444
11443 11445 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
11444 11446 {
11445 11447 REAL_VALUE_TYPE r;
11446 11448 long l;
11447 11449
11448 11450 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
11449 11451 REAL_VALUE_TO_TARGET_SINGLE (r, l);
11450 11452
11451 11453 if (ASSEMBLER_DIALECT == ASM_ATT)
11452 11454 putc ('$', file);
11453 11455 fprintf (file, "0x%08lx", (long unsigned int) l);
11454 11456 }
11455 11457
11456 11458 /* These float cases don't actually occur as immediate operands. */
11457 11459 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
11458 11460 {
11459 11461 char dstr[30];
11460 11462
11461 11463 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11462 11464 fprintf (file, "%s", dstr);
11463 11465 }
11464 11466
11465 11467 else if (GET_CODE (x) == CONST_DOUBLE
11466 11468 && GET_MODE (x) == XFmode)
11467 11469 {
11468 11470 char dstr[30];
11469 11471
11470 11472 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
11471 11473 fprintf (file, "%s", dstr);
11472 11474 }
11473 11475
11474 11476 else
11475 11477 {
11476 11478 /* We have patterns that allow zero sets of memory, for instance.
11477 11479 In 64-bit mode, we should probably support all 8-byte vectors,
11478 11480 since we can in fact encode that into an immediate. */
11479 11481 if (GET_CODE (x) == CONST_VECTOR)
11480 11482 {
11481 11483 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
11482 11484 x = const0_rtx;
11483 11485 }
11484 11486
11485 11487 if (code != 'P')
11486 11488 {
11487 11489 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
11488 11490 {
11489 11491 if (ASSEMBLER_DIALECT == ASM_ATT)
11490 11492 putc ('$', file);
11491 11493 }
11492 11494 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
11493 11495 || GET_CODE (x) == LABEL_REF)
11494 11496 {
11495 11497 if (ASSEMBLER_DIALECT == ASM_ATT)
11496 11498 putc ('$', file);
11497 11499 else
11498 11500 fputs ("OFFSET FLAT:", file);
11499 11501 }
11500 11502 }
11501 11503 if (CONST_INT_P (x))
11502 11504 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
11503 11505 else if (flag_pic)
11504 11506 output_pic_addr_const (file, x, code);
11505 11507 else
11506 11508 output_addr_const (file, x);
11507 11509 }
11508 11510 }
11509 11511
11510 11512 /* Print a memory operand whose address is ADDR. */
11511 11513
11512 11514 void
11513 11515 print_operand_address (FILE *file, rtx addr)
11514 11516 {
11515 11517 struct ix86_address parts;
11516 11518 rtx base, index, disp;
11517 11519 int scale;
11518 11520 int ok = ix86_decompose_address (addr, &parts);
11519 11521
11520 11522 gcc_assert (ok);
11521 11523
11522 11524 base = parts.base;
11523 11525 index = parts.index;
11524 11526 disp = parts.disp;
11525 11527 scale = parts.scale;
11526 11528
11527 11529 switch (parts.seg)
11528 11530 {
11529 11531 case SEG_DEFAULT:
11530 11532 break;
11531 11533 case SEG_FS:
11532 11534 case SEG_GS:
11533 11535 if (ASSEMBLER_DIALECT == ASM_ATT)
11534 11536 putc ('%', file);
11535 11537 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
11536 11538 break;
11537 11539 default:
11538 11540 gcc_unreachable ();
11539 11541 }
11540 11542
11541 11543 /* Use one byte shorter RIP relative addressing for 64bit mode. */
11542 11544 if (TARGET_64BIT && !base && !index)
11543 11545 {
11544 11546 rtx symbol = disp;
11545 11547
11546 11548 if (GET_CODE (disp) == CONST
11547 11549 && GET_CODE (XEXP (disp, 0)) == PLUS
11548 11550 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11549 11551 symbol = XEXP (XEXP (disp, 0), 0);
11550 11552
11551 11553 if (GET_CODE (symbol) == LABEL_REF
11552 11554 || (GET_CODE (symbol) == SYMBOL_REF
11553 11555 && SYMBOL_REF_TLS_MODEL (symbol) == 0))
11554 11556 base = pc_rtx;
11555 11557 }
11556 11558 if (!base && !index)
11557 11559 {
11558 11560 /* Displacement only requires special attention. */
11559 11561
11560 11562 if (CONST_INT_P (disp))
11561 11563 {
11562 11564 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
11563 11565 fputs ("ds:", file);
11564 11566 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
11565 11567 }
11566 11568 else if (flag_pic)
11567 11569 output_pic_addr_const (file, disp, 0);
11568 11570 else
11569 11571 output_addr_const (file, disp);
11570 11572 }
11571 11573 else
11572 11574 {
11573 11575 if (ASSEMBLER_DIALECT == ASM_ATT)
11574 11576 {
11575 11577 if (disp)
11576 11578 {
11577 11579 if (flag_pic)
11578 11580 output_pic_addr_const (file, disp, 0);
11579 11581 else if (GET_CODE (disp) == LABEL_REF)
11580 11582 output_asm_label (disp);
11581 11583 else
11582 11584 output_addr_const (file, disp);
11583 11585 }
11584 11586
11585 11587 putc ('(', file);
11586 11588 if (base)
11587 11589 print_reg (base, 0, file);
11588 11590 if (index)
11589 11591 {
11590 11592 putc (',', file);
11591 11593 print_reg (index, 0, file);
11592 11594 if (scale != 1)
11593 11595 fprintf (file, ",%d", scale);
11594 11596 }
11595 11597 putc (')', file);
11596 11598 }
11597 11599 else
11598 11600 {
11599 11601 rtx offset = NULL_RTX;
11600 11602
11601 11603 if (disp)
11602 11604 {
11603 11605 /* Pull out the offset of a symbol; print any symbol itself. */
11604 11606 if (GET_CODE (disp) == CONST
11605 11607 && GET_CODE (XEXP (disp, 0)) == PLUS
11606 11608 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
11607 11609 {
11608 11610 offset = XEXP (XEXP (disp, 0), 1);
11609 11611 disp = gen_rtx_CONST (VOIDmode,
11610 11612 XEXP (XEXP (disp, 0), 0));
11611 11613 }
11612 11614
11613 11615 if (flag_pic)
11614 11616 output_pic_addr_const (file, disp, 0);
11615 11617 else if (GET_CODE (disp) == LABEL_REF)
11616 11618 output_asm_label (disp);
11617 11619 else if (CONST_INT_P (disp))
11618 11620 offset = disp;
11619 11621 else
11620 11622 output_addr_const (file, disp);
11621 11623 }
11622 11624
11623 11625 putc ('[', file);
11624 11626 if (base)
11625 11627 {
11626 11628 print_reg (base, 0, file);
11627 11629 if (offset)
11628 11630 {
11629 11631 if (INTVAL (offset) >= 0)
11630 11632 putc ('+', file);
11631 11633 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11632 11634 }
11633 11635 }
11634 11636 else if (offset)
11635 11637 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
11636 11638 else
11637 11639 putc ('0', file);
11638 11640
11639 11641 if (index)
11640 11642 {
11641 11643 putc ('+', file);
11642 11644 print_reg (index, 0, file);
11643 11645 if (scale != 1)
11644 11646 fprintf (file, "*%d", scale);
11645 11647 }
11646 11648 putc (']', file);
11647 11649 }
11648 11650 }
11649 11651 }
11650 11652
11651 11653 bool
11652 11654 output_addr_const_extra (FILE *file, rtx x)
11653 11655 {
11654 11656 rtx op;
11655 11657
11656 11658 if (GET_CODE (x) != UNSPEC)
11657 11659 return false;
11658 11660
11659 11661 op = XVECEXP (x, 0, 0);
11660 11662 switch (XINT (x, 1))
11661 11663 {
11662 11664 case UNSPEC_GOTTPOFF:
11663 11665 output_addr_const (file, op);
11664 11666 /* FIXME: This might be @TPOFF in Sun ld. */
11665 11667 fputs ("@GOTTPOFF", file);
11666 11668 break;
11667 11669 case UNSPEC_TPOFF:
11668 11670 output_addr_const (file, op);
11669 11671 fputs ("@TPOFF", file);
11670 11672 break;
11671 11673 case UNSPEC_NTPOFF:
11672 11674 output_addr_const (file, op);
11673 11675 if (TARGET_64BIT)
11674 11676 fputs ("@TPOFF", file);
11675 11677 else
11676 11678 fputs ("@NTPOFF", file);
11677 11679 break;
11678 11680 case UNSPEC_DTPOFF:
11679 11681 output_addr_const (file, op);
11680 11682 fputs ("@DTPOFF", file);
11681 11683 break;
11682 11684 case UNSPEC_GOTNTPOFF:
11683 11685 output_addr_const (file, op);
11684 11686 if (TARGET_64BIT)
11685 11687 fputs (ASSEMBLER_DIALECT == ASM_ATT ?
11686 11688 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file);
11687 11689 else
11688 11690 fputs ("@GOTNTPOFF", file);
11689 11691 break;
11690 11692 case UNSPEC_INDNTPOFF:
11691 11693 output_addr_const (file, op);
11692 11694 fputs ("@INDNTPOFF", file);
11693 11695 break;
11694 11696 #if TARGET_MACHO
11695 11697 case UNSPEC_MACHOPIC_OFFSET:
11696 11698 output_addr_const (file, op);
11697 11699 putc ('-', file);
11698 11700 machopic_output_function_base_name (file);
11699 11701 break;
11700 11702 #endif
11701 11703
11702 11704 default:
11703 11705 return false;
11704 11706 }
11705 11707
11706 11708 return true;
11707 11709 }
11708 11710
11709 11711 /* Split one or more DImode RTL references into pairs of SImode
11710 11712 references. The RTL can be REG, offsettable MEM, integer constant, or
11711 11713 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11712 11714 split and "num" is its length. lo_half and hi_half are output arrays
11713 11715 that parallel "operands". */
11714 11716
11715 11717 void
11716 11718 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11717 11719 {
11718 11720 while (num--)
11719 11721 {
11720 11722 rtx op = operands[num];
11721 11723
11722 11724 /* simplify_subreg refuse to split volatile memory addresses,
11723 11725 but we still have to handle it. */
11724 11726 if (MEM_P (op))
11725 11727 {
11726 11728 lo_half[num] = adjust_address (op, SImode, 0);
11727 11729 hi_half[num] = adjust_address (op, SImode, 4);
11728 11730 }
11729 11731 else
11730 11732 {
11731 11733 lo_half[num] = simplify_gen_subreg (SImode, op,
11732 11734 GET_MODE (op) == VOIDmode
11733 11735 ? DImode : GET_MODE (op), 0);
11734 11736 hi_half[num] = simplify_gen_subreg (SImode, op,
11735 11737 GET_MODE (op) == VOIDmode
11736 11738 ? DImode : GET_MODE (op), 4);
11737 11739 }
11738 11740 }
11739 11741 }
11740 11742 /* Split one or more TImode RTL references into pairs of DImode
11741 11743 references. The RTL can be REG, offsettable MEM, integer constant, or
11742 11744 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
11743 11745 split and "num" is its length. lo_half and hi_half are output arrays
11744 11746 that parallel "operands". */
11745 11747
11746 11748 void
11747 11749 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
11748 11750 {
11749 11751 while (num--)
11750 11752 {
11751 11753 rtx op = operands[num];
11752 11754
11753 11755 /* simplify_subreg refuse to split volatile memory addresses, but we
11754 11756 still have to handle it. */
11755 11757 if (MEM_P (op))
11756 11758 {
11757 11759 lo_half[num] = adjust_address (op, DImode, 0);
11758 11760 hi_half[num] = adjust_address (op, DImode, 8);
11759 11761 }
11760 11762 else
11761 11763 {
11762 11764 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
11763 11765 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
11764 11766 }
11765 11767 }
11766 11768 }
11767 11769
11768 11770 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
11769 11771 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
11770 11772 is the expression of the binary operation. The output may either be
11771 11773 emitted here, or returned to the caller, like all output_* functions.
11772 11774
11773 11775 There is no guarantee that the operands are the same mode, as they
11774 11776 might be within FLOAT or FLOAT_EXTEND expressions. */
11775 11777
11776 11778 #ifndef SYSV386_COMPAT
11777 11779 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
11778 11780 wants to fix the assemblers because that causes incompatibility
11779 11781 with gcc. No-one wants to fix gcc because that causes
11780 11782 incompatibility with assemblers... You can use the option of
11781 11783 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
11782 11784 #define SYSV386_COMPAT 1
11783 11785 #endif
11784 11786
11785 11787 const char *
11786 11788 output_387_binary_op (rtx insn, rtx *operands)
11787 11789 {
11788 11790 static char buf[40];
11789 11791 const char *p;
11790 11792 const char *ssep;
11791 11793 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
11792 11794
11793 11795 #ifdef ENABLE_CHECKING
11794 11796 /* Even if we do not want to check the inputs, this documents input
11795 11797 constraints. Which helps in understanding the following code. */
11796 11798 if (STACK_REG_P (operands[0])
11797 11799 && ((REG_P (operands[1])
11798 11800 && REGNO (operands[0]) == REGNO (operands[1])
11799 11801 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
11800 11802 || (REG_P (operands[2])
11801 11803 && REGNO (operands[0]) == REGNO (operands[2])
11802 11804 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
11803 11805 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
11804 11806 ; /* ok */
11805 11807 else
11806 11808 gcc_assert (is_sse);
11807 11809 #endif
11808 11810
11809 11811 switch (GET_CODE (operands[3]))
11810 11812 {
11811 11813 case PLUS:
11812 11814 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11813 11815 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11814 11816 p = "fiadd";
11815 11817 else
11816 11818 p = "fadd";
11817 11819 ssep = "vadd";
11818 11820 break;
11819 11821
11820 11822 case MINUS:
11821 11823 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11822 11824 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11823 11825 p = "fisub";
11824 11826 else
11825 11827 p = "fsub";
11826 11828 ssep = "vsub";
11827 11829 break;
11828 11830
11829 11831 case MULT:
11830 11832 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11831 11833 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11832 11834 p = "fimul";
11833 11835 else
11834 11836 p = "fmul";
11835 11837 ssep = "vmul";
11836 11838 break;
11837 11839
11838 11840 case DIV:
11839 11841 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
11840 11842 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
11841 11843 p = "fidiv";
11842 11844 else
11843 11845 p = "fdiv";
11844 11846 ssep = "vdiv";
11845 11847 break;
11846 11848
11847 11849 default:
11848 11850 gcc_unreachable ();
11849 11851 }
11850 11852
11851 11853 if (is_sse)
11852 11854 {
11853 11855 if (TARGET_AVX)
11854 11856 {
11855 11857 strcpy (buf, ssep);
11856 11858 if (GET_MODE (operands[0]) == SFmode)
11857 11859 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}");
11858 11860 else
11859 11861 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}");
11860 11862 }
11861 11863 else
11862 11864 {
11863 11865 strcpy (buf, ssep + 1);
11864 11866 if (GET_MODE (operands[0]) == SFmode)
11865 11867 strcat (buf, "ss\t{%2, %0|%0, %2}");
11866 11868 else
11867 11869 strcat (buf, "sd\t{%2, %0|%0, %2}");
11868 11870 }
11869 11871 return buf;
11870 11872 }
11871 11873 strcpy (buf, p);
11872 11874
11873 11875 switch (GET_CODE (operands[3]))
11874 11876 {
11875 11877 case MULT:
11876 11878 case PLUS:
11877 11879 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
11878 11880 {
11879 11881 rtx temp = operands[2];
11880 11882 operands[2] = operands[1];
11881 11883 operands[1] = temp;
11882 11884 }
11883 11885
11884 11886 /* know operands[0] == operands[1]. */
11885 11887
11886 11888 if (MEM_P (operands[2]))
11887 11889 {
11888 11890 p = "%z2\t%2";
11889 11891 break;
11890 11892 }
11891 11893
11892 11894 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11893 11895 {
11894 11896 if (STACK_TOP_P (operands[0]))
11895 11897 /* How is it that we are storing to a dead operand[2]?
11896 11898 Well, presumably operands[1] is dead too. We can't
11897 11899 store the result to st(0) as st(0) gets popped on this
11898 11900 instruction. Instead store to operands[2] (which I
11899 11901 think has to be st(1)). st(1) will be popped later.
11900 11902 gcc <= 2.8.1 didn't have this check and generated
11901 11903 assembly code that the Unixware assembler rejected. */
11902 11904 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11903 11905 else
11904 11906 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11905 11907 break;
11906 11908 }
11907 11909
11908 11910 if (STACK_TOP_P (operands[0]))
11909 11911 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11910 11912 else
11911 11913 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11912 11914 break;
11913 11915
11914 11916 case MINUS:
11915 11917 case DIV:
11916 11918 if (MEM_P (operands[1]))
11917 11919 {
11918 11920 p = "r%z1\t%1";
11919 11921 break;
11920 11922 }
11921 11923
11922 11924 if (MEM_P (operands[2]))
11923 11925 {
11924 11926 p = "%z2\t%2";
11925 11927 break;
11926 11928 }
11927 11929
11928 11930 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
11929 11931 {
11930 11932 #if SYSV386_COMPAT
11931 11933 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
11932 11934 derived assemblers, confusingly reverse the direction of
11933 11935 the operation for fsub{r} and fdiv{r} when the
11934 11936 destination register is not st(0). The Intel assembler
11935 11937 doesn't have this brain damage. Read !SYSV386_COMPAT to
11936 11938 figure out what the hardware really does. */
11937 11939 if (STACK_TOP_P (operands[0]))
11938 11940 p = "{p\t%0, %2|rp\t%2, %0}";
11939 11941 else
11940 11942 p = "{rp\t%2, %0|p\t%0, %2}";
11941 11943 #else
11942 11944 if (STACK_TOP_P (operands[0]))
11943 11945 /* As above for fmul/fadd, we can't store to st(0). */
11944 11946 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
11945 11947 else
11946 11948 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
11947 11949 #endif
11948 11950 break;
11949 11951 }
11950 11952
11951 11953 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
11952 11954 {
11953 11955 #if SYSV386_COMPAT
11954 11956 if (STACK_TOP_P (operands[0]))
11955 11957 p = "{rp\t%0, %1|p\t%1, %0}";
11956 11958 else
11957 11959 p = "{p\t%1, %0|rp\t%0, %1}";
11958 11960 #else
11959 11961 if (STACK_TOP_P (operands[0]))
11960 11962 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
11961 11963 else
11962 11964 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
11963 11965 #endif
11964 11966 break;
11965 11967 }
11966 11968
11967 11969 if (STACK_TOP_P (operands[0]))
11968 11970 {
11969 11971 if (STACK_TOP_P (operands[1]))
11970 11972 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
11971 11973 else
11972 11974 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
11973 11975 break;
11974 11976 }
11975 11977 else if (STACK_TOP_P (operands[1]))
11976 11978 {
11977 11979 #if SYSV386_COMPAT
11978 11980 p = "{\t%1, %0|r\t%0, %1}";
11979 11981 #else
11980 11982 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
11981 11983 #endif
11982 11984 }
11983 11985 else
11984 11986 {
11985 11987 #if SYSV386_COMPAT
11986 11988 p = "{r\t%2, %0|\t%0, %2}";
11987 11989 #else
11988 11990 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
11989 11991 #endif
11990 11992 }
11991 11993 break;
11992 11994
11993 11995 default:
11994 11996 gcc_unreachable ();
11995 11997 }
11996 11998
11997 11999 strcat (buf, p);
11998 12000 return buf;
11999 12001 }
12000 12002
12001 12003 /* Return needed mode for entity in optimize_mode_switching pass. */
12002 12004
12003 12005 int
12004 12006 ix86_mode_needed (int entity, rtx insn)
12005 12007 {
12006 12008 enum attr_i387_cw mode;
12007 12009
12008 12010 /* The mode UNINITIALIZED is used to store control word after a
12009 12011 function call or ASM pattern. The mode ANY specify that function
12010 12012 has no requirements on the control word and make no changes in the
12011 12013 bits we are interested in. */
12012 12014
12013 12015 if (CALL_P (insn)
12014 12016 || (NONJUMP_INSN_P (insn)
12015 12017 && (asm_noperands (PATTERN (insn)) >= 0
12016 12018 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
12017 12019 return I387_CW_UNINITIALIZED;
12018 12020
12019 12021 if (recog_memoized (insn) < 0)
12020 12022 return I387_CW_ANY;
12021 12023
12022 12024 mode = get_attr_i387_cw (insn);
12023 12025
12024 12026 switch (entity)
12025 12027 {
12026 12028 case I387_TRUNC:
12027 12029 if (mode == I387_CW_TRUNC)
12028 12030 return mode;
12029 12031 break;
12030 12032
12031 12033 case I387_FLOOR:
12032 12034 if (mode == I387_CW_FLOOR)
12033 12035 return mode;
12034 12036 break;
12035 12037
12036 12038 case I387_CEIL:
12037 12039 if (mode == I387_CW_CEIL)
12038 12040 return mode;
12039 12041 break;
12040 12042
12041 12043 case I387_MASK_PM:
12042 12044 if (mode == I387_CW_MASK_PM)
12043 12045 return mode;
12044 12046 break;
12045 12047
12046 12048 default:
12047 12049 gcc_unreachable ();
12048 12050 }
12049 12051
12050 12052 return I387_CW_ANY;
12051 12053 }
12052 12054
12053 12055 /* Output code to initialize control word copies used by trunc?f?i and
12054 12056 rounding patterns. CURRENT_MODE is set to current control word,
12055 12057 while NEW_MODE is set to new control word. */
12056 12058
12057 12059 void
12058 12060 emit_i387_cw_initialization (int mode)
12059 12061 {
12060 12062 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
12061 12063 rtx new_mode;
12062 12064
12063 12065 enum ix86_stack_slot slot;
12064 12066
12065 12067 rtx reg = gen_reg_rtx (HImode);
12066 12068
12067 12069 emit_insn (gen_x86_fnstcw_1 (stored_mode));
12068 12070 emit_move_insn (reg, copy_rtx (stored_mode));
12069 12071
12070 12072 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL
12071 12073 || optimize_function_for_size_p (cfun))
12072 12074 {
12073 12075 switch (mode)
12074 12076 {
12075 12077 case I387_CW_TRUNC:
12076 12078 /* round toward zero (truncate) */
12077 12079 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
12078 12080 slot = SLOT_CW_TRUNC;
12079 12081 break;
12080 12082
12081 12083 case I387_CW_FLOOR:
12082 12084 /* round down toward -oo */
12083 12085 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12084 12086 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
12085 12087 slot = SLOT_CW_FLOOR;
12086 12088 break;
12087 12089
12088 12090 case I387_CW_CEIL:
12089 12091 /* round up toward +oo */
12090 12092 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
12091 12093 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
12092 12094 slot = SLOT_CW_CEIL;
12093 12095 break;
12094 12096
12095 12097 case I387_CW_MASK_PM:
12096 12098 /* mask precision exception for nearbyint() */
12097 12099 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12098 12100 slot = SLOT_CW_MASK_PM;
12099 12101 break;
12100 12102
12101 12103 default:
12102 12104 gcc_unreachable ();
12103 12105 }
12104 12106 }
12105 12107 else
12106 12108 {
12107 12109 switch (mode)
12108 12110 {
12109 12111 case I387_CW_TRUNC:
12110 12112 /* round toward zero (truncate) */
12111 12113 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
12112 12114 slot = SLOT_CW_TRUNC;
12113 12115 break;
12114 12116
12115 12117 case I387_CW_FLOOR:
12116 12118 /* round down toward -oo */
12117 12119 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
12118 12120 slot = SLOT_CW_FLOOR;
12119 12121 break;
12120 12122
12121 12123 case I387_CW_CEIL:
12122 12124 /* round up toward +oo */
12123 12125 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
12124 12126 slot = SLOT_CW_CEIL;
12125 12127 break;
12126 12128
12127 12129 case I387_CW_MASK_PM:
12128 12130 /* mask precision exception for nearbyint() */
12129 12131 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
12130 12132 slot = SLOT_CW_MASK_PM;
12131 12133 break;
12132 12134
12133 12135 default:
12134 12136 gcc_unreachable ();
12135 12137 }
12136 12138 }
12137 12139
12138 12140 gcc_assert (slot < MAX_386_STACK_LOCALS);
12139 12141
12140 12142 new_mode = assign_386_stack_local (HImode, slot);
12141 12143 emit_move_insn (new_mode, reg);
12142 12144 }
12143 12145
12144 12146 /* Output code for INSN to convert a float to a signed int. OPERANDS
12145 12147 are the insn operands. The output may be [HSD]Imode and the input
12146 12148 operand may be [SDX]Fmode. */
12147 12149
12148 12150 const char *
12149 12151 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
12150 12152 {
12151 12153 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12152 12154 int dimode_p = GET_MODE (operands[0]) == DImode;
12153 12155 int round_mode = get_attr_i387_cw (insn);
12154 12156
12155 12157 /* Jump through a hoop or two for DImode, since the hardware has no
12156 12158 non-popping instruction. We used to do this a different way, but
12157 12159 that was somewhat fragile and broke with post-reload splitters. */
12158 12160 if ((dimode_p || fisttp) && !stack_top_dies)
12159 12161 output_asm_insn ("fld\t%y1", operands);
12160 12162
12161 12163 gcc_assert (STACK_TOP_P (operands[1]));
12162 12164 gcc_assert (MEM_P (operands[0]));
12163 12165 gcc_assert (GET_MODE (operands[1]) != TFmode);
12164 12166
12165 12167 if (fisttp)
12166 12168 output_asm_insn ("fisttp%z0\t%0", operands);
12167 12169 else
12168 12170 {
12169 12171 if (round_mode != I387_CW_ANY)
12170 12172 output_asm_insn ("fldcw\t%3", operands);
12171 12173 if (stack_top_dies || dimode_p)
12172 12174 output_asm_insn ("fistp%z0\t%0", operands);
12173 12175 else
12174 12176 output_asm_insn ("fist%z0\t%0", operands);
12175 12177 if (round_mode != I387_CW_ANY)
12176 12178 output_asm_insn ("fldcw\t%2", operands);
12177 12179 }
12178 12180
12179 12181 return "";
12180 12182 }
12181 12183
12182 12184 /* Output code for x87 ffreep insn. The OPNO argument, which may only
12183 12185 have the values zero or one, indicates the ffreep insn's operand
12184 12186 from the OPERANDS array. */
12185 12187
12186 12188 static const char *
12187 12189 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
12188 12190 {
12189 12191 if (TARGET_USE_FFREEP)
12190 12192 #ifdef HAVE_AS_IX86_FFREEP
12191 12193 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
12192 12194 #else
12193 12195 {
12194 12196 static char retval[32];
12195 12197 int regno = REGNO (operands[opno]);
12196 12198
12197 12199 gcc_assert (FP_REGNO_P (regno));
12198 12200
12199 12201 regno -= FIRST_STACK_REG;
12200 12202
12201 12203 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno);
12202 12204 return retval;
12203 12205 }
12204 12206 #endif
12205 12207
12206 12208 return opno ? "fstp\t%y1" : "fstp\t%y0";
12207 12209 }
12208 12210
12209 12211
12210 12212 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
12211 12213 should be used. UNORDERED_P is true when fucom should be used. */
12212 12214
12213 12215 const char *
12214 12216 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
12215 12217 {
12216 12218 int stack_top_dies;
12217 12219 rtx cmp_op0, cmp_op1;
12218 12220 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
12219 12221
12220 12222 if (eflags_p)
12221 12223 {
12222 12224 cmp_op0 = operands[0];
12223 12225 cmp_op1 = operands[1];
12224 12226 }
12225 12227 else
12226 12228 {
12227 12229 cmp_op0 = operands[1];
12228 12230 cmp_op1 = operands[2];
12229 12231 }
12230 12232
12231 12233 if (is_sse)
12232 12234 {
12233 12235 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}";
12234 12236 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}";
12235 12237 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}";
12236 12238 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}";
12237 12239
12238 12240 if (GET_MODE (operands[0]) == SFmode)
12239 12241 if (unordered_p)
12240 12242 return &ucomiss[TARGET_AVX ? 0 : 1];
12241 12243 else
12242 12244 return &comiss[TARGET_AVX ? 0 : 1];
12243 12245 else
12244 12246 if (unordered_p)
12245 12247 return &ucomisd[TARGET_AVX ? 0 : 1];
12246 12248 else
12247 12249 return &comisd[TARGET_AVX ? 0 : 1];
12248 12250 }
12249 12251
12250 12252 gcc_assert (STACK_TOP_P (cmp_op0));
12251 12253
12252 12254 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
12253 12255
12254 12256 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
12255 12257 {
12256 12258 if (stack_top_dies)
12257 12259 {
12258 12260 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
12259 12261 return output_387_ffreep (operands, 1);
12260 12262 }
12261 12263 else
12262 12264 return "ftst\n\tfnstsw\t%0";
12263 12265 }
12264 12266
12265 12267 if (STACK_REG_P (cmp_op1)
12266 12268 && stack_top_dies
12267 12269 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
12268 12270 && REGNO (cmp_op1) != FIRST_STACK_REG)
12269 12271 {
12270 12272 /* If both the top of the 387 stack dies, and the other operand
12271 12273 is also a stack register that dies, then this must be a
12272 12274 `fcompp' float compare */
12273 12275
12274 12276 if (eflags_p)
12275 12277 {
12276 12278 /* There is no double popping fcomi variant. Fortunately,
12277 12279 eflags is immune from the fstp's cc clobbering. */
12278 12280 if (unordered_p)
12279 12281 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
12280 12282 else
12281 12283 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
12282 12284 return output_387_ffreep (operands, 0);
12283 12285 }
12284 12286 else
12285 12287 {
12286 12288 if (unordered_p)
12287 12289 return "fucompp\n\tfnstsw\t%0";
12288 12290 else
12289 12291 return "fcompp\n\tfnstsw\t%0";
12290 12292 }
12291 12293 }
12292 12294 else
12293 12295 {
12294 12296 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
12295 12297
12296 12298 static const char * const alt[16] =
12297 12299 {
12298 12300 "fcom%z2\t%y2\n\tfnstsw\t%0",
12299 12301 "fcomp%z2\t%y2\n\tfnstsw\t%0",
12300 12302 "fucom%z2\t%y2\n\tfnstsw\t%0",
12301 12303 "fucomp%z2\t%y2\n\tfnstsw\t%0",
12302 12304
12303 12305 "ficom%z2\t%y2\n\tfnstsw\t%0",
12304 12306 "ficomp%z2\t%y2\n\tfnstsw\t%0",
12305 12307 NULL,
12306 12308 NULL,
12307 12309
12308 12310 "fcomi\t{%y1, %0|%0, %y1}",
12309 12311 "fcomip\t{%y1, %0|%0, %y1}",
12310 12312 "fucomi\t{%y1, %0|%0, %y1}",
12311 12313 "fucomip\t{%y1, %0|%0, %y1}",
12312 12314
12313 12315 NULL,
12314 12316 NULL,
12315 12317 NULL,
12316 12318 NULL
12317 12319 };
12318 12320
12319 12321 int mask;
12320 12322 const char *ret;
12321 12323
12322 12324 mask = eflags_p << 3;
12323 12325 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
12324 12326 mask |= unordered_p << 1;
12325 12327 mask |= stack_top_dies;
12326 12328
12327 12329 gcc_assert (mask < 16);
12328 12330 ret = alt[mask];
12329 12331 gcc_assert (ret);
12330 12332
12331 12333 return ret;
12332 12334 }
12333 12335 }
12334 12336
12335 12337 void
12336 12338 ix86_output_addr_vec_elt (FILE *file, int value)
12337 12339 {
12338 12340 const char *directive = ASM_LONG;
12339 12341
12340 12342 #ifdef ASM_QUAD
12341 12343 if (TARGET_64BIT)
12342 12344 directive = ASM_QUAD;
12343 12345 #else
12344 12346 gcc_assert (!TARGET_64BIT);
12345 12347 #endif
12346 12348
12347 12349 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
12348 12350 }
12349 12351
12350 12352 void
12351 12353 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
12352 12354 {
12353 12355 const char *directive = ASM_LONG;
12354 12356
12355 12357 #ifdef ASM_QUAD
12356 12358 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
12357 12359 directive = ASM_QUAD;
12358 12360 #else
12359 12361 gcc_assert (!TARGET_64BIT);
12360 12362 #endif
12361 12363 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
12362 12364 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
12363 12365 fprintf (file, "%s%s%d-%s%d\n",
12364 12366 directive, LPREFIX, value, LPREFIX, rel);
12365 12367 else if (HAVE_AS_GOTOFF_IN_DATA)
12366 12368 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
12367 12369 #if TARGET_MACHO
12368 12370 else if (TARGET_MACHO)
12369 12371 {
12370 12372 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
12371 12373 machopic_output_function_base_name (file);
12372 12374 fprintf(file, "\n");
12373 12375 }
12374 12376 #endif
12375 12377 else
12376 12378 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
12377 12379 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
12378 12380 }
12379 12381
12380 12382 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
12381 12383 for the target. */
12382 12384
12383 12385 void
12384 12386 ix86_expand_clear (rtx dest)
12385 12387 {
12386 12388 rtx tmp;
12387 12389
12388 12390 /* We play register width games, which are only valid after reload. */
12389 12391 gcc_assert (reload_completed);
12390 12392
12391 12393 /* Avoid HImode and its attendant prefix byte. */
12392 12394 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
12393 12395 dest = gen_rtx_REG (SImode, REGNO (dest));
12394 12396 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
12395 12397
12396 12398 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
12397 12399 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ()))
12398 12400 {
12399 12401 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12400 12402 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
12401 12403 }
12402 12404
12403 12405 emit_insn (tmp);
12404 12406 }
12405 12407
12406 12408 /* X is an unchanging MEM. If it is a constant pool reference, return
12407 12409 the constant pool rtx, else NULL. */
12408 12410
12409 12411 rtx
12410 12412 maybe_get_pool_constant (rtx x)
12411 12413 {
12412 12414 x = ix86_delegitimize_address (XEXP (x, 0));
12413 12415
12414 12416 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
12415 12417 return get_pool_constant (x);
12416 12418
12417 12419 return NULL_RTX;
12418 12420 }
12419 12421
12420 12422 void
12421 12423 ix86_expand_move (enum machine_mode mode, rtx operands[])
12422 12424 {
12423 12425 rtx op0, op1;
12424 12426 enum tls_model model;
12425 12427
12426 12428 op0 = operands[0];
12427 12429 op1 = operands[1];
12428 12430
12429 12431 if (GET_CODE (op1) == SYMBOL_REF)
12430 12432 {
12431 12433 model = SYMBOL_REF_TLS_MODEL (op1);
12432 12434 if (model)
12433 12435 {
12434 12436 op1 = legitimize_tls_address (op1, model, true);
12435 12437 op1 = force_operand (op1, op0);
12436 12438 if (op1 == op0)
12437 12439 return;
12438 12440 }
12439 12441 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12440 12442 && SYMBOL_REF_DLLIMPORT_P (op1))
12441 12443 op1 = legitimize_dllimport_symbol (op1, false);
12442 12444 }
12443 12445 else if (GET_CODE (op1) == CONST
12444 12446 && GET_CODE (XEXP (op1, 0)) == PLUS
12445 12447 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
12446 12448 {
12447 12449 rtx addend = XEXP (XEXP (op1, 0), 1);
12448 12450 rtx symbol = XEXP (XEXP (op1, 0), 0);
12449 12451 rtx tmp = NULL;
12450 12452
12451 12453 model = SYMBOL_REF_TLS_MODEL (symbol);
12452 12454 if (model)
12453 12455 tmp = legitimize_tls_address (symbol, model, true);
12454 12456 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
12455 12457 && SYMBOL_REF_DLLIMPORT_P (symbol))
12456 12458 tmp = legitimize_dllimport_symbol (symbol, true);
12457 12459
12458 12460 if (tmp)
12459 12461 {
12460 12462 tmp = force_operand (tmp, NULL);
12461 12463 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
12462 12464 op0, 1, OPTAB_DIRECT);
12463 12465 if (tmp == op0)
12464 12466 return;
12465 12467 }
12466 12468 }
12467 12469
12468 12470 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
12469 12471 {
12470 12472 if (TARGET_MACHO && !TARGET_64BIT)
12471 12473 {
12472 12474 #if TARGET_MACHO
12473 12475 if (MACHOPIC_PURE)
12474 12476 {
12475 12477 rtx temp = ((reload_in_progress
12476 12478 || ((op0 && REG_P (op0))
12477 12479 && mode == Pmode))
12478 12480 ? op0 : gen_reg_rtx (Pmode));
12479 12481 op1 = machopic_indirect_data_reference (op1, temp);
12480 12482 op1 = machopic_legitimize_pic_address (op1, mode,
12481 12483 temp == op1 ? 0 : temp);
12482 12484 }
12483 12485 else if (MACHOPIC_INDIRECT)
12484 12486 op1 = machopic_indirect_data_reference (op1, 0);
12485 12487 if (op0 == op1)
12486 12488 return;
12487 12489 #endif
12488 12490 }
12489 12491 else
12490 12492 {
12491 12493 if (MEM_P (op0))
12492 12494 op1 = force_reg (Pmode, op1);
12493 12495 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
12494 12496 {
12495 12497 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
12496 12498 op1 = legitimize_pic_address (op1, reg);
12497 12499 if (op0 == op1)
12498 12500 return;
12499 12501 }
12500 12502 }
12501 12503 }
12502 12504 else
12503 12505 {
12504 12506 if (MEM_P (op0)
12505 12507 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
12506 12508 || !push_operand (op0, mode))
12507 12509 && MEM_P (op1))
12508 12510 op1 = force_reg (mode, op1);
12509 12511
12510 12512 if (push_operand (op0, mode)
12511 12513 && ! general_no_elim_operand (op1, mode))
12512 12514 op1 = copy_to_mode_reg (mode, op1);
12513 12515
12514 12516 /* Force large constants in 64bit compilation into register
12515 12517 to get them CSEed. */
12516 12518 if (can_create_pseudo_p ()
12517 12519 && (mode == DImode) && TARGET_64BIT
12518 12520 && immediate_operand (op1, mode)
12519 12521 && !x86_64_zext_immediate_operand (op1, VOIDmode)
12520 12522 && !register_operand (op0, mode)
12521 12523 && optimize)
12522 12524 op1 = copy_to_mode_reg (mode, op1);
12523 12525
12524 12526 if (can_create_pseudo_p ()
12525 12527 && FLOAT_MODE_P (mode)
12526 12528 && GET_CODE (op1) == CONST_DOUBLE)
12527 12529 {
12528 12530 /* If we are loading a floating point constant to a register,
12529 12531 force the value to memory now, since we'll get better code
12530 12532 out the back end. */
12531 12533
12532 12534 op1 = validize_mem (force_const_mem (mode, op1));
12533 12535 if (!register_operand (op0, mode))
12534 12536 {
12535 12537 rtx temp = gen_reg_rtx (mode);
12536 12538 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
12537 12539 emit_move_insn (op0, temp);
12538 12540 return;
12539 12541 }
12540 12542 }
12541 12543 }
12542 12544
12543 12545 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12544 12546 }
12545 12547
12546 12548 void
12547 12549 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
12548 12550 {
12549 12551 rtx op0 = operands[0], op1 = operands[1];
12550 12552 unsigned int align = GET_MODE_ALIGNMENT (mode);
12551 12553
12552 12554 /* Force constants other than zero into memory. We do not know how
12553 12555 the instructions used to build constants modify the upper 64 bits
12554 12556 of the register, once we have that information we may be able
12555 12557 to handle some of them more efficiently. */
12556 12558 if (can_create_pseudo_p ()
12557 12559 && register_operand (op0, mode)
12558 12560 && (CONSTANT_P (op1)
12559 12561 || (GET_CODE (op1) == SUBREG
12560 12562 && CONSTANT_P (SUBREG_REG (op1))))
12561 12563 && standard_sse_constant_p (op1) <= 0)
12562 12564 op1 = validize_mem (force_const_mem (mode, op1));
12563 12565
12564 12566 /* We need to check memory alignment for SSE mode since attribute
12565 12567 can make operands unaligned. */
12566 12568 if (can_create_pseudo_p ()
12567 12569 && SSE_REG_MODE_P (mode)
12568 12570 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
12569 12571 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
12570 12572 {
12571 12573 rtx tmp[2];
12572 12574
12573 12575 /* ix86_expand_vector_move_misalign() does not like constants ... */
12574 12576 if (CONSTANT_P (op1)
12575 12577 || (GET_CODE (op1) == SUBREG
12576 12578 && CONSTANT_P (SUBREG_REG (op1))))
12577 12579 op1 = validize_mem (force_const_mem (mode, op1));
12578 12580
12579 12581 /* ... nor both arguments in memory. */
12580 12582 if (!register_operand (op0, mode)
12581 12583 && !register_operand (op1, mode))
12582 12584 op1 = force_reg (mode, op1);
12583 12585
12584 12586 tmp[0] = op0; tmp[1] = op1;
12585 12587 ix86_expand_vector_move_misalign (mode, tmp);
12586 12588 return;
12587 12589 }
12588 12590
12589 12591 /* Make operand1 a register if it isn't already. */
12590 12592 if (can_create_pseudo_p ()
12591 12593 && !register_operand (op0, mode)
12592 12594 && !register_operand (op1, mode))
12593 12595 {
12594 12596 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
12595 12597 return;
12596 12598 }
12597 12599
12598 12600 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
12599 12601 }
12600 12602
12601 12603 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
12602 12604 straight to ix86_expand_vector_move. */
12603 12605 /* Code generation for scalar reg-reg moves of single and double precision data:
12604 12606 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
12605 12607 movaps reg, reg
12606 12608 else
12607 12609 movss reg, reg
12608 12610 if (x86_sse_partial_reg_dependency == true)
12609 12611 movapd reg, reg
12610 12612 else
12611 12613 movsd reg, reg
12612 12614
12613 12615 Code generation for scalar loads of double precision data:
12614 12616 if (x86_sse_split_regs == true)
12615 12617 movlpd mem, reg (gas syntax)
12616 12618 else
12617 12619 movsd mem, reg
12618 12620
12619 12621 Code generation for unaligned packed loads of single precision data
12620 12622 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
12621 12623 if (x86_sse_unaligned_move_optimal)
12622 12624 movups mem, reg
12623 12625
12624 12626 if (x86_sse_partial_reg_dependency == true)
12625 12627 {
12626 12628 xorps reg, reg
12627 12629 movlps mem, reg
12628 12630 movhps mem+8, reg
12629 12631 }
12630 12632 else
12631 12633 {
12632 12634 movlps mem, reg
12633 12635 movhps mem+8, reg
12634 12636 }
12635 12637
12636 12638 Code generation for unaligned packed loads of double precision data
12637 12639 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
12638 12640 if (x86_sse_unaligned_move_optimal)
12639 12641 movupd mem, reg
12640 12642
12641 12643 if (x86_sse_split_regs == true)
12642 12644 {
12643 12645 movlpd mem, reg
12644 12646 movhpd mem+8, reg
12645 12647 }
12646 12648 else
12647 12649 {
12648 12650 movsd mem, reg
12649 12651 movhpd mem+8, reg
12650 12652 }
12651 12653 */
12652 12654
12653 12655 void
12654 12656 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
12655 12657 {
12656 12658 rtx op0, op1, m;
12657 12659
12658 12660 op0 = operands[0];
12659 12661 op1 = operands[1];
12660 12662
12661 12663 if (TARGET_AVX)
12662 12664 {
12663 12665 switch (GET_MODE_CLASS (mode))
12664 12666 {
12665 12667 case MODE_VECTOR_INT:
12666 12668 case MODE_INT:
12667 12669 switch (GET_MODE_SIZE (mode))
12668 12670 {
12669 12671 case 16:
12670 12672 op0 = gen_lowpart (V16QImode, op0);
12671 12673 op1 = gen_lowpart (V16QImode, op1);
12672 12674 emit_insn (gen_avx_movdqu (op0, op1));
12673 12675 break;
12674 12676 case 32:
12675 12677 op0 = gen_lowpart (V32QImode, op0);
12676 12678 op1 = gen_lowpart (V32QImode, op1);
12677 12679 emit_insn (gen_avx_movdqu256 (op0, op1));
12678 12680 break;
12679 12681 default:
12680 12682 gcc_unreachable ();
12681 12683 }
12682 12684 break;
12683 12685 case MODE_VECTOR_FLOAT:
12684 12686 op0 = gen_lowpart (mode, op0);
12685 12687 op1 = gen_lowpart (mode, op1);
12686 12688
12687 12689 switch (mode)
12688 12690 {
12689 12691 case V4SFmode:
12690 12692 emit_insn (gen_avx_movups (op0, op1));
12691 12693 break;
12692 12694 case V8SFmode:
12693 12695 emit_insn (gen_avx_movups256 (op0, op1));
12694 12696 break;
12695 12697 case V2DFmode:
12696 12698 emit_insn (gen_avx_movupd (op0, op1));
12697 12699 break;
12698 12700 case V4DFmode:
12699 12701 emit_insn (gen_avx_movupd256 (op0, op1));
12700 12702 break;
12701 12703 default:
12702 12704 gcc_unreachable ();
12703 12705 }
12704 12706 break;
12705 12707
12706 12708 default:
12707 12709 gcc_unreachable ();
12708 12710 }
12709 12711
12710 12712 return;
12711 12713 }
12712 12714
12713 12715 if (MEM_P (op1))
12714 12716 {
12715 12717 /* If we're optimizing for size, movups is the smallest. */
12716 12718 if (optimize_insn_for_size_p ())
12717 12719 {
12718 12720 op0 = gen_lowpart (V4SFmode, op0);
12719 12721 op1 = gen_lowpart (V4SFmode, op1);
12720 12722 emit_insn (gen_sse_movups (op0, op1));
12721 12723 return;
12722 12724 }
12723 12725
12724 12726 /* ??? If we have typed data, then it would appear that using
12725 12727 movdqu is the only way to get unaligned data loaded with
12726 12728 integer type. */
12727 12729 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12728 12730 {
12729 12731 op0 = gen_lowpart (V16QImode, op0);
12730 12732 op1 = gen_lowpart (V16QImode, op1);
12731 12733 emit_insn (gen_sse2_movdqu (op0, op1));
12732 12734 return;
12733 12735 }
12734 12736
12735 12737 if (TARGET_SSE2 && mode == V2DFmode)
12736 12738 {
12737 12739 rtx zero;
12738 12740
12739 12741 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12740 12742 {
12741 12743 op0 = gen_lowpart (V2DFmode, op0);
12742 12744 op1 = gen_lowpart (V2DFmode, op1);
12743 12745 emit_insn (gen_sse2_movupd (op0, op1));
12744 12746 return;
12745 12747 }
12746 12748
12747 12749 /* When SSE registers are split into halves, we can avoid
12748 12750 writing to the top half twice. */
12749 12751 if (TARGET_SSE_SPLIT_REGS)
12750 12752 {
12751 12753 emit_clobber (op0);
12752 12754 zero = op0;
12753 12755 }
12754 12756 else
12755 12757 {
12756 12758 /* ??? Not sure about the best option for the Intel chips.
12757 12759 The following would seem to satisfy; the register is
12758 12760 entirely cleared, breaking the dependency chain. We
12759 12761 then store to the upper half, with a dependency depth
12760 12762 of one. A rumor has it that Intel recommends two movsd
12761 12763 followed by an unpacklpd, but this is unconfirmed. And
12762 12764 given that the dependency depth of the unpacklpd would
12763 12765 still be one, I'm not sure why this would be better. */
12764 12766 zero = CONST0_RTX (V2DFmode);
12765 12767 }
12766 12768
12767 12769 m = adjust_address (op1, DFmode, 0);
12768 12770 emit_insn (gen_sse2_loadlpd (op0, zero, m));
12769 12771 m = adjust_address (op1, DFmode, 8);
12770 12772 emit_insn (gen_sse2_loadhpd (op0, op0, m));
12771 12773 }
12772 12774 else
12773 12775 {
12774 12776 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
12775 12777 {
12776 12778 op0 = gen_lowpart (V4SFmode, op0);
12777 12779 op1 = gen_lowpart (V4SFmode, op1);
12778 12780 emit_insn (gen_sse_movups (op0, op1));
12779 12781 return;
12780 12782 }
12781 12783
12782 12784 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
12783 12785 emit_move_insn (op0, CONST0_RTX (mode));
12784 12786 else
12785 12787 emit_clobber (op0);
12786 12788
12787 12789 if (mode != V4SFmode)
12788 12790 op0 = gen_lowpart (V4SFmode, op0);
12789 12791 m = adjust_address (op1, V2SFmode, 0);
12790 12792 emit_insn (gen_sse_loadlps (op0, op0, m));
12791 12793 m = adjust_address (op1, V2SFmode, 8);
12792 12794 emit_insn (gen_sse_loadhps (op0, op0, m));
12793 12795 }
12794 12796 }
12795 12797 else if (MEM_P (op0))
12796 12798 {
12797 12799 /* If we're optimizing for size, movups is the smallest. */
12798 12800 if (optimize_insn_for_size_p ())
12799 12801 {
12800 12802 op0 = gen_lowpart (V4SFmode, op0);
12801 12803 op1 = gen_lowpart (V4SFmode, op1);
12802 12804 emit_insn (gen_sse_movups (op0, op1));
12803 12805 return;
12804 12806 }
12805 12807
12806 12808 /* ??? Similar to above, only less clear because of quote
12807 12809 typeless stores unquote. */
12808 12810 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
12809 12811 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12810 12812 {
12811 12813 op0 = gen_lowpart (V16QImode, op0);
12812 12814 op1 = gen_lowpart (V16QImode, op1);
12813 12815 emit_insn (gen_sse2_movdqu (op0, op1));
12814 12816 return;
12815 12817 }
12816 12818
12817 12819 if (TARGET_SSE2 && mode == V2DFmode)
12818 12820 {
12819 12821 m = adjust_address (op0, DFmode, 0);
12820 12822 emit_insn (gen_sse2_storelpd (m, op1));
12821 12823 m = adjust_address (op0, DFmode, 8);
12822 12824 emit_insn (gen_sse2_storehpd (m, op1));
12823 12825 }
12824 12826 else
12825 12827 {
12826 12828 if (mode != V4SFmode)
12827 12829 op1 = gen_lowpart (V4SFmode, op1);
12828 12830 m = adjust_address (op0, V2SFmode, 0);
12829 12831 emit_insn (gen_sse_storelps (m, op1));
12830 12832 m = adjust_address (op0, V2SFmode, 8);
12831 12833 emit_insn (gen_sse_storehps (m, op1));
12832 12834 }
12833 12835 }
12834 12836 else
12835 12837 gcc_unreachable ();
12836 12838 }
12837 12839
12838 12840 /* Expand a push in MODE. This is some mode for which we do not support
12839 12841 proper push instructions, at least from the registers that we expect
12840 12842 the value to live in. */
12841 12843
12842 12844 void
12843 12845 ix86_expand_push (enum machine_mode mode, rtx x)
12844 12846 {
12845 12847 rtx tmp;
12846 12848
12847 12849 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
12848 12850 GEN_INT (-GET_MODE_SIZE (mode)),
12849 12851 stack_pointer_rtx, 1, OPTAB_DIRECT);
12850 12852 if (tmp != stack_pointer_rtx)
12851 12853 emit_move_insn (stack_pointer_rtx, tmp);
12852 12854
12853 12855 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
12854 12856
12855 12857 /* When we push an operand onto stack, it has to be aligned at least
12856 12858 at the function argument boundary. However since we don't have
12857 12859 the argument type, we can't determine the actual argument
12858 12860 boundary. */
12859 12861 emit_move_insn (tmp, x);
12860 12862 }
12861 12863
12862 12864 /* Helper function of ix86_fixup_binary_operands to canonicalize
12863 12865 operand order. Returns true if the operands should be swapped. */
12864 12866
12865 12867 static bool
12866 12868 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
12867 12869 rtx operands[])
12868 12870 {
12869 12871 rtx dst = operands[0];
12870 12872 rtx src1 = operands[1];
12871 12873 rtx src2 = operands[2];
12872 12874
12873 12875 /* If the operation is not commutative, we can't do anything. */
12874 12876 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
12875 12877 return false;
12876 12878
12877 12879 /* Highest priority is that src1 should match dst. */
12878 12880 if (rtx_equal_p (dst, src1))
12879 12881 return false;
12880 12882 if (rtx_equal_p (dst, src2))
12881 12883 return true;
12882 12884
12883 12885 /* Next highest priority is that immediate constants come second. */
12884 12886 if (immediate_operand (src2, mode))
12885 12887 return false;
12886 12888 if (immediate_operand (src1, mode))
12887 12889 return true;
12888 12890
12889 12891 /* Lowest priority is that memory references should come second. */
12890 12892 if (MEM_P (src2))
12891 12893 return false;
12892 12894 if (MEM_P (src1))
12893 12895 return true;
12894 12896
12895 12897 return false;
12896 12898 }
12897 12899
12898 12900
12899 12901 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
12900 12902 destination to use for the operation. If different from the true
12901 12903 destination in operands[0], a copy operation will be required. */
12902 12904
12903 12905 rtx
12904 12906 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
12905 12907 rtx operands[])
12906 12908 {
12907 12909 rtx dst = operands[0];
12908 12910 rtx src1 = operands[1];
12909 12911 rtx src2 = operands[2];
12910 12912
12911 12913 /* Canonicalize operand order. */
12912 12914 if (ix86_swap_binary_operands_p (code, mode, operands))
12913 12915 {
12914 12916 rtx temp;
12915 12917
12916 12918 /* It is invalid to swap operands of different modes. */
12917 12919 gcc_assert (GET_MODE (src1) == GET_MODE (src2));
12918 12920
12919 12921 temp = src1;
12920 12922 src1 = src2;
12921 12923 src2 = temp;
12922 12924 }
12923 12925
12924 12926 /* Both source operands cannot be in memory. */
12925 12927 if (MEM_P (src1) && MEM_P (src2))
12926 12928 {
12927 12929 /* Optimization: Only read from memory once. */
12928 12930 if (rtx_equal_p (src1, src2))
12929 12931 {
12930 12932 src2 = force_reg (mode, src2);
12931 12933 src1 = src2;
12932 12934 }
12933 12935 else
12934 12936 src2 = force_reg (mode, src2);
12935 12937 }
12936 12938
12937 12939 /* If the destination is memory, and we do not have matching source
12938 12940 operands, do things in registers. */
12939 12941 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
12940 12942 dst = gen_reg_rtx (mode);
12941 12943
12942 12944 /* Source 1 cannot be a constant. */
12943 12945 if (CONSTANT_P (src1))
12944 12946 src1 = force_reg (mode, src1);
12945 12947
12946 12948 /* Source 1 cannot be a non-matching memory. */
12947 12949 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
12948 12950 src1 = force_reg (mode, src1);
12949 12951
12950 12952 operands[1] = src1;
12951 12953 operands[2] = src2;
12952 12954 return dst;
12953 12955 }
12954 12956
12955 12957 /* Similarly, but assume that the destination has already been
12956 12958 set up properly. */
12957 12959
12958 12960 void
12959 12961 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
12960 12962 enum machine_mode mode, rtx operands[])
12961 12963 {
12962 12964 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
12963 12965 gcc_assert (dst == operands[0]);
12964 12966 }
12965 12967
12966 12968 /* Attempt to expand a binary operator. Make the expansion closer to the
12967 12969 actual machine, then just general_operand, which will allow 3 separate
12968 12970 memory references (one output, two input) in a single insn. */
12969 12971
12970 12972 void
12971 12973 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
12972 12974 rtx operands[])
12973 12975 {
12974 12976 rtx src1, src2, dst, op, clob;
12975 12977
12976 12978 dst = ix86_fixup_binary_operands (code, mode, operands);
12977 12979 src1 = operands[1];
12978 12980 src2 = operands[2];
12979 12981
12980 12982 /* Emit the instruction. */
12981 12983
12982 12984 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
12983 12985 if (reload_in_progress)
12984 12986 {
12985 12987 /* Reload doesn't know about the flags register, and doesn't know that
12986 12988 it doesn't want to clobber it. We can only do this with PLUS. */
12987 12989 gcc_assert (code == PLUS);
12988 12990 emit_insn (op);
12989 12991 }
12990 12992 else
12991 12993 {
12992 12994 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
12993 12995 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
12994 12996 }
12995 12997
12996 12998 /* Fix up the destination if needed. */
12997 12999 if (dst != operands[0])
12998 13000 emit_move_insn (operands[0], dst);
12999 13001 }
13000 13002
13001 13003 /* Return TRUE or FALSE depending on whether the binary operator meets the
13002 13004 appropriate constraints. */
13003 13005
13004 13006 int
13005 13007 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
13006 13008 rtx operands[3])
13007 13009 {
13008 13010 rtx dst = operands[0];
13009 13011 rtx src1 = operands[1];
13010 13012 rtx src2 = operands[2];
13011 13013
13012 13014 /* Both source operands cannot be in memory. */
13013 13015 if (MEM_P (src1) && MEM_P (src2))
13014 13016 return 0;
13015 13017
13016 13018 /* Canonicalize operand order for commutative operators. */
13017 13019 if (ix86_swap_binary_operands_p (code, mode, operands))
13018 13020 {
13019 13021 rtx temp = src1;
13020 13022 src1 = src2;
13021 13023 src2 = temp;
13022 13024 }
13023 13025
13024 13026 /* If the destination is memory, we must have a matching source operand. */
13025 13027 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
13026 13028 return 0;
13027 13029
13028 13030 /* Source 1 cannot be a constant. */
13029 13031 if (CONSTANT_P (src1))
13030 13032 return 0;
13031 13033
13032 13034 /* Source 1 cannot be a non-matching memory. */
13033 13035 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
13034 13036 return 0;
13035 13037
13036 13038 return 1;
13037 13039 }
13038 13040
13039 13041 /* Attempt to expand a unary operator. Make the expansion closer to the
13040 13042 actual machine, then just general_operand, which will allow 2 separate
13041 13043 memory references (one output, one input) in a single insn. */
13042 13044
13043 13045 void
13044 13046 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
13045 13047 rtx operands[])
13046 13048 {
13047 13049 int matching_memory;
13048 13050 rtx src, dst, op, clob;
13049 13051
13050 13052 dst = operands[0];
13051 13053 src = operands[1];
13052 13054
13053 13055 /* If the destination is memory, and we do not have matching source
13054 13056 operands, do things in registers. */
13055 13057 matching_memory = 0;
13056 13058 if (MEM_P (dst))
13057 13059 {
13058 13060 if (rtx_equal_p (dst, src))
13059 13061 matching_memory = 1;
13060 13062 else
13061 13063 dst = gen_reg_rtx (mode);
13062 13064 }
13063 13065
13064 13066 /* When source operand is memory, destination must match. */
13065 13067 if (MEM_P (src) && !matching_memory)
13066 13068 src = force_reg (mode, src);
13067 13069
13068 13070 /* Emit the instruction. */
13069 13071
13070 13072 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
13071 13073 if (reload_in_progress || code == NOT)
13072 13074 {
13073 13075 /* Reload doesn't know about the flags register, and doesn't know that
13074 13076 it doesn't want to clobber it. */
13075 13077 gcc_assert (code == NOT);
13076 13078 emit_insn (op);
13077 13079 }
13078 13080 else
13079 13081 {
13080 13082 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13081 13083 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
13082 13084 }
13083 13085
13084 13086 /* Fix up the destination if needed. */
13085 13087 if (dst != operands[0])
13086 13088 emit_move_insn (operands[0], dst);
13087 13089 }
13088 13090
13089 13091 /* Return TRUE or FALSE depending on whether the unary operator meets the
13090 13092 appropriate constraints. */
13091 13093
13092 13094 int
13093 13095 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
13094 13096 enum machine_mode mode ATTRIBUTE_UNUSED,
13095 13097 rtx operands[2] ATTRIBUTE_UNUSED)
13096 13098 {
13097 13099 /* If one of operands is memory, source and destination must match. */
13098 13100 if ((MEM_P (operands[0])
13099 13101 || MEM_P (operands[1]))
13100 13102 && ! rtx_equal_p (operands[0], operands[1]))
13101 13103 return FALSE;
13102 13104 return TRUE;
13103 13105 }
13104 13106
13105 13107 /* Post-reload splitter for converting an SF or DFmode value in an
13106 13108 SSE register into an unsigned SImode. */
13107 13109
13108 13110 void
13109 13111 ix86_split_convert_uns_si_sse (rtx operands[])
13110 13112 {
13111 13113 enum machine_mode vecmode;
13112 13114 rtx value, large, zero_or_two31, input, two31, x;
13113 13115
13114 13116 large = operands[1];
13115 13117 zero_or_two31 = operands[2];
13116 13118 input = operands[3];
13117 13119 two31 = operands[4];
13118 13120 vecmode = GET_MODE (large);
13119 13121 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
13120 13122
13121 13123 /* Load up the value into the low element. We must ensure that the other
13122 13124 elements are valid floats -- zero is the easiest such value. */
13123 13125 if (MEM_P (input))
13124 13126 {
13125 13127 if (vecmode == V4SFmode)
13126 13128 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
13127 13129 else
13128 13130 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
13129 13131 }
13130 13132 else
13131 13133 {
13132 13134 input = gen_rtx_REG (vecmode, REGNO (input));
13133 13135 emit_move_insn (value, CONST0_RTX (vecmode));
13134 13136 if (vecmode == V4SFmode)
13135 13137 emit_insn (gen_sse_movss (value, value, input));
13136 13138 else
13137 13139 emit_insn (gen_sse2_movsd (value, value, input));
13138 13140 }
13139 13141
13140 13142 emit_move_insn (large, two31);
13141 13143 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
13142 13144
13143 13145 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
13144 13146 emit_insn (gen_rtx_SET (VOIDmode, large, x));
13145 13147
13146 13148 x = gen_rtx_AND (vecmode, zero_or_two31, large);
13147 13149 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
13148 13150
13149 13151 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
13150 13152 emit_insn (gen_rtx_SET (VOIDmode, value, x));
13151 13153
13152 13154 large = gen_rtx_REG (V4SImode, REGNO (large));
13153 13155 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
13154 13156
13155 13157 x = gen_rtx_REG (V4SImode, REGNO (value));
13156 13158 if (vecmode == V4SFmode)
13157 13159 emit_insn (gen_sse2_cvttps2dq (x, value));
13158 13160 else
13159 13161 emit_insn (gen_sse2_cvttpd2dq (x, value));
13160 13162 value = x;
13161 13163
13162 13164 emit_insn (gen_xorv4si3 (value, value, large));
13163 13165 }
13164 13166
13165 13167 /* Convert an unsigned DImode value into a DFmode, using only SSE.
13166 13168 Expects the 64-bit DImode to be supplied in a pair of integral
13167 13169 registers. Requires SSE2; will use SSE3 if available. For x86_32,
13168 13170 -mfpmath=sse, !optimize_size only. */
13169 13171
13170 13172 void
13171 13173 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
13172 13174 {
13173 13175 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
13174 13176 rtx int_xmm, fp_xmm;
13175 13177 rtx biases, exponents;
13176 13178 rtx x;
13177 13179
13178 13180 int_xmm = gen_reg_rtx (V4SImode);
13179 13181 if (TARGET_INTER_UNIT_MOVES)
13180 13182 emit_insn (gen_movdi_to_sse (int_xmm, input));
13181 13183 else if (TARGET_SSE_SPLIT_REGS)
13182 13184 {
13183 13185 emit_clobber (int_xmm);
13184 13186 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
13185 13187 }
13186 13188 else
13187 13189 {
13188 13190 x = gen_reg_rtx (V2DImode);
13189 13191 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
13190 13192 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
13191 13193 }
13192 13194
13193 13195 x = gen_rtx_CONST_VECTOR (V4SImode,
13194 13196 gen_rtvec (4, GEN_INT (0x43300000UL),
13195 13197 GEN_INT (0x45300000UL),
13196 13198 const0_rtx, const0_rtx));
13197 13199 exponents = validize_mem (force_const_mem (V4SImode, x));
13198 13200
13199 13201 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
13200 13202 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
13201 13203
13202 13204 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
13203 13205 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
13204 13206 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
13205 13207 (0x1.0p84 + double(fp_value_hi_xmm)).
13206 13208 Note these exponents differ by 32. */
13207 13209
13208 13210 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
13209 13211
13210 13212 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
13211 13213 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
13212 13214 real_ldexp (&bias_lo_rvt, &dconst1, 52);
13213 13215 real_ldexp (&bias_hi_rvt, &dconst1, 84);
13214 13216 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
13215 13217 x = const_double_from_real_value (bias_hi_rvt, DFmode);
13216 13218 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
13217 13219 biases = validize_mem (force_const_mem (V2DFmode, biases));
13218 13220 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
13219 13221
13220 13222 /* Add the upper and lower DFmode values together. */
13221 13223 if (TARGET_SSE3)
13222 13224 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
13223 13225 else
13224 13226 {
13225 13227 x = copy_to_mode_reg (V2DFmode, fp_xmm);
13226 13228 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
13227 13229 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
13228 13230 }
13229 13231
13230 13232 ix86_expand_vector_extract (false, target, fp_xmm, 0);
13231 13233 }
13232 13234
13233 13235 /* Not used, but eases macroization of patterns. */
13234 13236 void
13235 13237 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED,
13236 13238 rtx input ATTRIBUTE_UNUSED)
13237 13239 {
13238 13240 gcc_unreachable ();
13239 13241 }
13240 13242
13241 13243 /* Convert an unsigned SImode value into a DFmode. Only currently used
13242 13244 for SSE, but applicable anywhere. */
13243 13245
13244 13246 void
13245 13247 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
13246 13248 {
13247 13249 REAL_VALUE_TYPE TWO31r;
13248 13250 rtx x, fp;
13249 13251
13250 13252 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
13251 13253 NULL, 1, OPTAB_DIRECT);
13252 13254
13253 13255 fp = gen_reg_rtx (DFmode);
13254 13256 emit_insn (gen_floatsidf2 (fp, x));
13255 13257
13256 13258 real_ldexp (&TWO31r, &dconst1, 31);
13257 13259 x = const_double_from_real_value (TWO31r, DFmode);
13258 13260
13259 13261 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
13260 13262 if (x != target)
13261 13263 emit_move_insn (target, x);
13262 13264 }
13263 13265
13264 13266 /* Convert a signed DImode value into a DFmode. Only used for SSE in
13265 13267 32-bit mode; otherwise we have a direct convert instruction. */
13266 13268
13267 13269 void
13268 13270 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
13269 13271 {
13270 13272 REAL_VALUE_TYPE TWO32r;
13271 13273 rtx fp_lo, fp_hi, x;
13272 13274
13273 13275 fp_lo = gen_reg_rtx (DFmode);
13274 13276 fp_hi = gen_reg_rtx (DFmode);
13275 13277
13276 13278 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
13277 13279
13278 13280 real_ldexp (&TWO32r, &dconst1, 32);
13279 13281 x = const_double_from_real_value (TWO32r, DFmode);
13280 13282 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
13281 13283
13282 13284 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
13283 13285
13284 13286 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
13285 13287 0, OPTAB_DIRECT);
13286 13288 if (x != target)
13287 13289 emit_move_insn (target, x);
13288 13290 }
13289 13291
13290 13292 /* Convert an unsigned SImode value into a SFmode, using only SSE.
13291 13293 For x86_32, -mfpmath=sse, !optimize_size only. */
13292 13294 void
13293 13295 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
13294 13296 {
13295 13297 REAL_VALUE_TYPE ONE16r;
13296 13298 rtx fp_hi, fp_lo, int_hi, int_lo, x;
13297 13299
13298 13300 real_ldexp (&ONE16r, &dconst1, 16);
13299 13301 x = const_double_from_real_value (ONE16r, SFmode);
13300 13302 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
13301 13303 NULL, 0, OPTAB_DIRECT);
13302 13304 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
13303 13305 NULL, 0, OPTAB_DIRECT);
13304 13306 fp_hi = gen_reg_rtx (SFmode);
13305 13307 fp_lo = gen_reg_rtx (SFmode);
13306 13308 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
13307 13309 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
13308 13310 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
13309 13311 0, OPTAB_DIRECT);
13310 13312 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
13311 13313 0, OPTAB_DIRECT);
13312 13314 if (!rtx_equal_p (target, fp_hi))
13313 13315 emit_move_insn (target, fp_hi);
13314 13316 }
13315 13317
13316 13318 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
13317 13319 then replicate the value for all elements of the vector
13318 13320 register. */
13319 13321
13320 13322 rtx
13321 13323 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
13322 13324 {
13323 13325 rtvec v;
13324 13326 switch (mode)
13325 13327 {
13326 13328 case SImode:
13327 13329 gcc_assert (vect);
13328 13330 v = gen_rtvec (4, value, value, value, value);
13329 13331 return gen_rtx_CONST_VECTOR (V4SImode, v);
13330 13332
13331 13333 case DImode:
13332 13334 gcc_assert (vect);
13333 13335 v = gen_rtvec (2, value, value);
13334 13336 return gen_rtx_CONST_VECTOR (V2DImode, v);
13335 13337
13336 13338 case SFmode:
13337 13339 if (vect)
13338 13340 v = gen_rtvec (4, value, value, value, value);
13339 13341 else
13340 13342 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
13341 13343 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13342 13344 return gen_rtx_CONST_VECTOR (V4SFmode, v);
13343 13345
13344 13346 case DFmode:
13345 13347 if (vect)
13346 13348 v = gen_rtvec (2, value, value);
13347 13349 else
13348 13350 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
13349 13351 return gen_rtx_CONST_VECTOR (V2DFmode, v);
13350 13352
13351 13353 default:
13352 13354 gcc_unreachable ();
13353 13355 }
13354 13356 }
13355 13357
13356 13358 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
13357 13359 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
13358 13360 for an SSE register. If VECT is true, then replicate the mask for
13359 13361 all elements of the vector register. If INVERT is true, then create
13360 13362 a mask excluding the sign bit. */
13361 13363
13362 13364 rtx
13363 13365 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
13364 13366 {
13365 13367 enum machine_mode vec_mode, imode;
13366 13368 HOST_WIDE_INT hi, lo;
13367 13369 int shift = 63;
13368 13370 rtx v;
13369 13371 rtx mask;
13370 13372
13371 13373 /* Find the sign bit, sign extended to 2*HWI. */
13372 13374 switch (mode)
13373 13375 {
13374 13376 case SImode:
13375 13377 case SFmode:
13376 13378 imode = SImode;
13377 13379 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
13378 13380 lo = 0x80000000, hi = lo < 0;
13379 13381 break;
13380 13382
13381 13383 case DImode:
13382 13384 case DFmode:
13383 13385 imode = DImode;
13384 13386 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
13385 13387 if (HOST_BITS_PER_WIDE_INT >= 64)
13386 13388 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
13387 13389 else
13388 13390 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13389 13391 break;
13390 13392
13391 13393 case TImode:
13392 13394 case TFmode:
13393 13395 vec_mode = VOIDmode;
13394 13396 if (HOST_BITS_PER_WIDE_INT >= 64)
13395 13397 {
13396 13398 imode = TImode;
13397 13399 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
13398 13400 }
13399 13401 else
13400 13402 {
13401 13403 rtvec vec;
13402 13404
13403 13405 imode = DImode;
13404 13406 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
13405 13407
13406 13408 if (invert)
13407 13409 {
13408 13410 lo = ~lo, hi = ~hi;
13409 13411 v = constm1_rtx;
13410 13412 }
13411 13413 else
13412 13414 v = const0_rtx;
13413 13415
13414 13416 mask = immed_double_const (lo, hi, imode);
13415 13417
13416 13418 vec = gen_rtvec (2, v, mask);
13417 13419 v = gen_rtx_CONST_VECTOR (V2DImode, vec);
13418 13420 v = copy_to_mode_reg (mode, gen_lowpart (mode, v));
13419 13421
13420 13422 return v;
13421 13423 }
13422 13424 break;
13423 13425
13424 13426 default:
13425 13427 gcc_unreachable ();
13426 13428 }
13427 13429
13428 13430 if (invert)
13429 13431 lo = ~lo, hi = ~hi;
13430 13432
13431 13433 /* Force this value into the low part of a fp vector constant. */
13432 13434 mask = immed_double_const (lo, hi, imode);
13433 13435 mask = gen_lowpart (mode, mask);
13434 13436
13435 13437 if (vec_mode == VOIDmode)
13436 13438 return force_reg (mode, mask);
13437 13439
13438 13440 v = ix86_build_const_vector (mode, vect, mask);
13439 13441 return force_reg (vec_mode, v);
13440 13442 }
13441 13443
13442 13444 /* Generate code for floating point ABS or NEG. */
13443 13445
13444 13446 void
13445 13447 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
13446 13448 rtx operands[])
13447 13449 {
13448 13450 rtx mask, set, use, clob, dst, src;
13449 13451 bool use_sse = false;
13450 13452 bool vector_mode = VECTOR_MODE_P (mode);
13451 13453 enum machine_mode elt_mode = mode;
13452 13454
13453 13455 if (vector_mode)
13454 13456 {
13455 13457 elt_mode = GET_MODE_INNER (mode);
13456 13458 use_sse = true;
13457 13459 }
13458 13460 else if (mode == TFmode)
13459 13461 use_sse = true;
13460 13462 else if (TARGET_SSE_MATH)
13461 13463 use_sse = SSE_FLOAT_MODE_P (mode);
13462 13464
13463 13465 /* NEG and ABS performed with SSE use bitwise mask operations.
13464 13466 Create the appropriate mask now. */
13465 13467 if (use_sse)
13466 13468 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
13467 13469 else
13468 13470 mask = NULL_RTX;
13469 13471
13470 13472 dst = operands[0];
13471 13473 src = operands[1];
13472 13474
13473 13475 if (vector_mode)
13474 13476 {
13475 13477 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
13476 13478 set = gen_rtx_SET (VOIDmode, dst, set);
13477 13479 emit_insn (set);
13478 13480 }
13479 13481 else
13480 13482 {
13481 13483 set = gen_rtx_fmt_e (code, mode, src);
13482 13484 set = gen_rtx_SET (VOIDmode, dst, set);
13483 13485 if (mask)
13484 13486 {
13485 13487 use = gen_rtx_USE (VOIDmode, mask);
13486 13488 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
13487 13489 emit_insn (gen_rtx_PARALLEL (VOIDmode,
13488 13490 gen_rtvec (3, set, use, clob)));
13489 13491 }
13490 13492 else
13491 13493 emit_insn (set);
13492 13494 }
13493 13495 }
13494 13496
13495 13497 /* Expand a copysign operation. Special case operand 0 being a constant. */
13496 13498
13497 13499 void
13498 13500 ix86_expand_copysign (rtx operands[])
13499 13501 {
13500 13502 enum machine_mode mode;
13501 13503 rtx dest, op0, op1, mask, nmask;
13502 13504
13503 13505 dest = operands[0];
13504 13506 op0 = operands[1];
13505 13507 op1 = operands[2];
13506 13508
13507 13509 mode = GET_MODE (dest);
13508 13510
13509 13511 if (GET_CODE (op0) == CONST_DOUBLE)
13510 13512 {
13511 13513 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
13512 13514
13513 13515 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
13514 13516 op0 = simplify_unary_operation (ABS, mode, op0, mode);
13515 13517
13516 13518 if (mode == SFmode || mode == DFmode)
13517 13519 {
13518 13520 enum machine_mode vmode;
13519 13521
13520 13522 vmode = mode == SFmode ? V4SFmode : V2DFmode;
13521 13523
13522 13524 if (op0 == CONST0_RTX (mode))
13523 13525 op0 = CONST0_RTX (vmode);
13524 13526 else
13525 13527 {
13526 13528 rtvec v;
13527 13529
13528 13530 if (mode == SFmode)
13529 13531 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
13530 13532 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
13531 13533 else
13532 13534 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
13533 13535
13534 13536 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
13535 13537 }
13536 13538 }
13537 13539 else if (op0 != CONST0_RTX (mode))
13538 13540 op0 = force_reg (mode, op0);
13539 13541
13540 13542 mask = ix86_build_signbit_mask (mode, 0, 0);
13541 13543
13542 13544 if (mode == SFmode)
13543 13545 copysign_insn = gen_copysignsf3_const;
13544 13546 else if (mode == DFmode)
13545 13547 copysign_insn = gen_copysigndf3_const;
13546 13548 else
13547 13549 copysign_insn = gen_copysigntf3_const;
13548 13550
13549 13551 emit_insn (copysign_insn (dest, op0, op1, mask));
13550 13552 }
13551 13553 else
13552 13554 {
13553 13555 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
13554 13556
13555 13557 nmask = ix86_build_signbit_mask (mode, 0, 1);
13556 13558 mask = ix86_build_signbit_mask (mode, 0, 0);
13557 13559
13558 13560 if (mode == SFmode)
13559 13561 copysign_insn = gen_copysignsf3_var;
13560 13562 else if (mode == DFmode)
13561 13563 copysign_insn = gen_copysigndf3_var;
13562 13564 else
13563 13565 copysign_insn = gen_copysigntf3_var;
13564 13566
13565 13567 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
13566 13568 }
13567 13569 }
13568 13570
13569 13571 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
13570 13572 be a constant, and so has already been expanded into a vector constant. */
13571 13573
13572 13574 void
13573 13575 ix86_split_copysign_const (rtx operands[])
13574 13576 {
13575 13577 enum machine_mode mode, vmode;
13576 13578 rtx dest, op0, op1, mask, x;
13577 13579
13578 13580 dest = operands[0];
13579 13581 op0 = operands[1];
13580 13582 op1 = operands[2];
13581 13583 mask = operands[3];
13582 13584
13583 13585 mode = GET_MODE (dest);
13584 13586 vmode = GET_MODE (mask);
13585 13587
13586 13588 dest = simplify_gen_subreg (vmode, dest, mode, 0);
13587 13589 x = gen_rtx_AND (vmode, dest, mask);
13588 13590 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13589 13591
13590 13592 if (op0 != CONST0_RTX (vmode))
13591 13593 {
13592 13594 x = gen_rtx_IOR (vmode, dest, op0);
13593 13595 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13594 13596 }
13595 13597 }
13596 13598
13597 13599 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
13598 13600 so we have to do two masks. */
13599 13601
13600 13602 void
13601 13603 ix86_split_copysign_var (rtx operands[])
13602 13604 {
13603 13605 enum machine_mode mode, vmode;
13604 13606 rtx dest, scratch, op0, op1, mask, nmask, x;
13605 13607
13606 13608 dest = operands[0];
13607 13609 scratch = operands[1];
13608 13610 op0 = operands[2];
13609 13611 op1 = operands[3];
13610 13612 nmask = operands[4];
13611 13613 mask = operands[5];
13612 13614
13613 13615 mode = GET_MODE (dest);
13614 13616 vmode = GET_MODE (mask);
13615 13617
13616 13618 if (rtx_equal_p (op0, op1))
13617 13619 {
13618 13620 /* Shouldn't happen often (it's useless, obviously), but when it does
13619 13621 we'd generate incorrect code if we continue below. */
13620 13622 emit_move_insn (dest, op0);
13621 13623 return;
13622 13624 }
13623 13625
13624 13626 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
13625 13627 {
13626 13628 gcc_assert (REGNO (op1) == REGNO (scratch));
13627 13629
13628 13630 x = gen_rtx_AND (vmode, scratch, mask);
13629 13631 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13630 13632
13631 13633 dest = mask;
13632 13634 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13633 13635 x = gen_rtx_NOT (vmode, dest);
13634 13636 x = gen_rtx_AND (vmode, x, op0);
13635 13637 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13636 13638 }
13637 13639 else
13638 13640 {
13639 13641 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
13640 13642 {
13641 13643 x = gen_rtx_AND (vmode, scratch, mask);
13642 13644 }
13643 13645 else /* alternative 2,4 */
13644 13646 {
13645 13647 gcc_assert (REGNO (mask) == REGNO (scratch));
13646 13648 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
13647 13649 x = gen_rtx_AND (vmode, scratch, op1);
13648 13650 }
13649 13651 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
13650 13652
13651 13653 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
13652 13654 {
13653 13655 dest = simplify_gen_subreg (vmode, op0, mode, 0);
13654 13656 x = gen_rtx_AND (vmode, dest, nmask);
13655 13657 }
13656 13658 else /* alternative 3,4 */
13657 13659 {
13658 13660 gcc_assert (REGNO (nmask) == REGNO (dest));
13659 13661 dest = nmask;
13660 13662 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
13661 13663 x = gen_rtx_AND (vmode, dest, op0);
13662 13664 }
13663 13665 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13664 13666 }
13665 13667
13666 13668 x = gen_rtx_IOR (vmode, dest, scratch);
13667 13669 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
13668 13670 }
13669 13671
13670 13672 /* Return TRUE or FALSE depending on whether the first SET in INSN
13671 13673 has source and destination with matching CC modes, and that the
13672 13674 CC mode is at least as constrained as REQ_MODE. */
13673 13675
13674 13676 int
13675 13677 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
13676 13678 {
13677 13679 rtx set;
13678 13680 enum machine_mode set_mode;
13679 13681
13680 13682 set = PATTERN (insn);
13681 13683 if (GET_CODE (set) == PARALLEL)
13682 13684 set = XVECEXP (set, 0, 0);
13683 13685 gcc_assert (GET_CODE (set) == SET);
13684 13686 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
13685 13687
13686 13688 set_mode = GET_MODE (SET_DEST (set));
13687 13689 switch (set_mode)
13688 13690 {
13689 13691 case CCNOmode:
13690 13692 if (req_mode != CCNOmode
13691 13693 && (req_mode != CCmode
13692 13694 || XEXP (SET_SRC (set), 1) != const0_rtx))
13693 13695 return 0;
13694 13696 break;
13695 13697 case CCmode:
13696 13698 if (req_mode == CCGCmode)
13697 13699 return 0;
13698 13700 /* FALLTHRU */
13699 13701 case CCGCmode:
13700 13702 if (req_mode == CCGOCmode || req_mode == CCNOmode)
13701 13703 return 0;
13702 13704 /* FALLTHRU */
13703 13705 case CCGOCmode:
13704 13706 if (req_mode == CCZmode)
13705 13707 return 0;
13706 13708 /* FALLTHRU */
13707 13709 case CCAmode:
13708 13710 case CCCmode:
13709 13711 case CCOmode:
13710 13712 case CCSmode:
13711 13713 case CCZmode:
13712 13714 break;
13713 13715
13714 13716 default:
13715 13717 gcc_unreachable ();
13716 13718 }
13717 13719
13718 13720 return (GET_MODE (SET_SRC (set)) == set_mode);
13719 13721 }
13720 13722
13721 13723 /* Generate insn patterns to do an integer compare of OPERANDS. */
13722 13724
13723 13725 static rtx
13724 13726 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
13725 13727 {
13726 13728 enum machine_mode cmpmode;
13727 13729 rtx tmp, flags;
13728 13730
13729 13731 cmpmode = SELECT_CC_MODE (code, op0, op1);
13730 13732 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
13731 13733
13732 13734 /* This is very simple, but making the interface the same as in the
13733 13735 FP case makes the rest of the code easier. */
13734 13736 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
13735 13737 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
13736 13738
13737 13739 /* Return the test that should be put into the flags user, i.e.
13738 13740 the bcc, scc, or cmov instruction. */
13739 13741 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
13740 13742 }
13741 13743
13742 13744 /* Figure out whether to use ordered or unordered fp comparisons.
13743 13745 Return the appropriate mode to use. */
13744 13746
13745 13747 enum machine_mode
13746 13748 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
13747 13749 {
13748 13750 /* ??? In order to make all comparisons reversible, we do all comparisons
13749 13751 non-trapping when compiling for IEEE. Once gcc is able to distinguish
13750 13752 all forms trapping and nontrapping comparisons, we can make inequality
13751 13753 comparisons trapping again, since it results in better code when using
13752 13754 FCOM based compares. */
13753 13755 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
13754 13756 }
13755 13757
13756 13758 enum machine_mode
13757 13759 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
13758 13760 {
13759 13761 enum machine_mode mode = GET_MODE (op0);
13760 13762
13761 13763 if (SCALAR_FLOAT_MODE_P (mode))
13762 13764 {
13763 13765 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
13764 13766 return ix86_fp_compare_mode (code);
13765 13767 }
13766 13768
13767 13769 switch (code)
13768 13770 {
13769 13771 /* Only zero flag is needed. */
13770 13772 case EQ: /* ZF=0 */
13771 13773 case NE: /* ZF!=0 */
13772 13774 return CCZmode;
13773 13775 /* Codes needing carry flag. */
13774 13776 case GEU: /* CF=0 */
13775 13777 case LTU: /* CF=1 */
13776 13778 /* Detect overflow checks. They need just the carry flag. */
13777 13779 if (GET_CODE (op0) == PLUS
13778 13780 && rtx_equal_p (op1, XEXP (op0, 0)))
13779 13781 return CCCmode;
13780 13782 else
13781 13783 return CCmode;
13782 13784 case GTU: /* CF=0 & ZF=0 */
13783 13785 case LEU: /* CF=1 | ZF=1 */
13784 13786 /* Detect overflow checks. They need just the carry flag. */
13785 13787 if (GET_CODE (op0) == MINUS
13786 13788 && rtx_equal_p (op1, XEXP (op0, 0)))
13787 13789 return CCCmode;
13788 13790 else
13789 13791 return CCmode;
13790 13792 /* Codes possibly doable only with sign flag when
13791 13793 comparing against zero. */
13792 13794 case GE: /* SF=OF or SF=0 */
13793 13795 case LT: /* SF<>OF or SF=1 */
13794 13796 if (op1 == const0_rtx)
13795 13797 return CCGOCmode;
13796 13798 else
13797 13799 /* For other cases Carry flag is not required. */
13798 13800 return CCGCmode;
13799 13801 /* Codes doable only with sign flag when comparing
13800 13802 against zero, but we miss jump instruction for it
13801 13803 so we need to use relational tests against overflow
13802 13804 that thus needs to be zero. */
13803 13805 case GT: /* ZF=0 & SF=OF */
13804 13806 case LE: /* ZF=1 | SF<>OF */
13805 13807 if (op1 == const0_rtx)
13806 13808 return CCNOmode;
13807 13809 else
13808 13810 return CCGCmode;
13809 13811 /* strcmp pattern do (use flags) and combine may ask us for proper
13810 13812 mode. */
13811 13813 case USE:
13812 13814 return CCmode;
13813 13815 default:
13814 13816 gcc_unreachable ();
13815 13817 }
13816 13818 }
13817 13819
13818 13820 /* Return the fixed registers used for condition codes. */
13819 13821
13820 13822 static bool
13821 13823 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13822 13824 {
13823 13825 *p1 = FLAGS_REG;
13824 13826 *p2 = FPSR_REG;
13825 13827 return true;
13826 13828 }
13827 13829
13828 13830 /* If two condition code modes are compatible, return a condition code
13829 13831 mode which is compatible with both. Otherwise, return
13830 13832 VOIDmode. */
13831 13833
13832 13834 static enum machine_mode
13833 13835 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
13834 13836 {
13835 13837 if (m1 == m2)
13836 13838 return m1;
13837 13839
13838 13840 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
13839 13841 return VOIDmode;
13840 13842
13841 13843 if ((m1 == CCGCmode && m2 == CCGOCmode)
13842 13844 || (m1 == CCGOCmode && m2 == CCGCmode))
13843 13845 return CCGCmode;
13844 13846
13845 13847 switch (m1)
13846 13848 {
13847 13849 default:
13848 13850 gcc_unreachable ();
13849 13851
13850 13852 case CCmode:
13851 13853 case CCGCmode:
13852 13854 case CCGOCmode:
13853 13855 case CCNOmode:
13854 13856 case CCAmode:
13855 13857 case CCCmode:
13856 13858 case CCOmode:
13857 13859 case CCSmode:
13858 13860 case CCZmode:
13859 13861 switch (m2)
13860 13862 {
13861 13863 default:
13862 13864 return VOIDmode;
13863 13865
13864 13866 case CCmode:
13865 13867 case CCGCmode:
13866 13868 case CCGOCmode:
13867 13869 case CCNOmode:
13868 13870 case CCAmode:
13869 13871 case CCCmode:
13870 13872 case CCOmode:
13871 13873 case CCSmode:
13872 13874 case CCZmode:
13873 13875 return CCmode;
13874 13876 }
13875 13877
13876 13878 case CCFPmode:
13877 13879 case CCFPUmode:
13878 13880 /* These are only compatible with themselves, which we already
13879 13881 checked above. */
13880 13882 return VOIDmode;
13881 13883 }
13882 13884 }
13883 13885
13884 13886 /* Split comparison code CODE into comparisons we can do using branch
13885 13887 instructions. BYPASS_CODE is comparison code for branch that will
13886 13888 branch around FIRST_CODE and SECOND_CODE. If some of branches
13887 13889 is not required, set value to UNKNOWN.
13888 13890 We never require more than two branches. */
13889 13891
13890 13892 void
13891 13893 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
13892 13894 enum rtx_code *first_code,
13893 13895 enum rtx_code *second_code)
13894 13896 {
13895 13897 *first_code = code;
13896 13898 *bypass_code = UNKNOWN;
13897 13899 *second_code = UNKNOWN;
13898 13900
13899 13901 /* The fcomi comparison sets flags as follows:
13900 13902
13901 13903 cmp ZF PF CF
13902 13904 > 0 0 0
13903 13905 < 0 0 1
13904 13906 = 1 0 0
13905 13907 un 1 1 1 */
13906 13908
13907 13909 switch (code)
13908 13910 {
13909 13911 case GT: /* GTU - CF=0 & ZF=0 */
13910 13912 case GE: /* GEU - CF=0 */
13911 13913 case ORDERED: /* PF=0 */
13912 13914 case UNORDERED: /* PF=1 */
13913 13915 case UNEQ: /* EQ - ZF=1 */
13914 13916 case UNLT: /* LTU - CF=1 */
13915 13917 case UNLE: /* LEU - CF=1 | ZF=1 */
13916 13918 case LTGT: /* EQ - ZF=0 */
13917 13919 break;
13918 13920 case LT: /* LTU - CF=1 - fails on unordered */
13919 13921 *first_code = UNLT;
13920 13922 *bypass_code = UNORDERED;
13921 13923 break;
13922 13924 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
13923 13925 *first_code = UNLE;
13924 13926 *bypass_code = UNORDERED;
13925 13927 break;
13926 13928 case EQ: /* EQ - ZF=1 - fails on unordered */
13927 13929 *first_code = UNEQ;
13928 13930 *bypass_code = UNORDERED;
13929 13931 break;
13930 13932 case NE: /* NE - ZF=0 - fails on unordered */
13931 13933 *first_code = LTGT;
13932 13934 *second_code = UNORDERED;
13933 13935 break;
13934 13936 case UNGE: /* GEU - CF=0 - fails on unordered */
13935 13937 *first_code = GE;
13936 13938 *second_code = UNORDERED;
13937 13939 break;
13938 13940 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
13939 13941 *first_code = GT;
13940 13942 *second_code = UNORDERED;
13941 13943 break;
13942 13944 default:
13943 13945 gcc_unreachable ();
13944 13946 }
13945 13947 if (!TARGET_IEEE_FP)
13946 13948 {
13947 13949 *second_code = UNKNOWN;
13948 13950 *bypass_code = UNKNOWN;
13949 13951 }
13950 13952 }
13951 13953
13952 13954 /* Return cost of comparison done fcom + arithmetics operations on AX.
13953 13955 All following functions do use number of instructions as a cost metrics.
13954 13956 In future this should be tweaked to compute bytes for optimize_size and
13955 13957 take into account performance of various instructions on various CPUs. */
13956 13958 static int
13957 13959 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
13958 13960 {
13959 13961 if (!TARGET_IEEE_FP)
13960 13962 return 4;
13961 13963 /* The cost of code output by ix86_expand_fp_compare. */
13962 13964 switch (code)
13963 13965 {
13964 13966 case UNLE:
13965 13967 case UNLT:
13966 13968 case LTGT:
13967 13969 case GT:
13968 13970 case GE:
13969 13971 case UNORDERED:
13970 13972 case ORDERED:
13971 13973 case UNEQ:
13972 13974 return 4;
13973 13975 break;
13974 13976 case LT:
13975 13977 case NE:
13976 13978 case EQ:
13977 13979 case UNGE:
13978 13980 return 5;
13979 13981 break;
13980 13982 case LE:
13981 13983 case UNGT:
13982 13984 return 6;
13983 13985 break;
13984 13986 default:
13985 13987 gcc_unreachable ();
13986 13988 }
13987 13989 }
13988 13990
13989 13991 /* Return cost of comparison done using fcomi operation.
13990 13992 See ix86_fp_comparison_arithmetics_cost for the metrics. */
13991 13993 static int
13992 13994 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
13993 13995 {
13994 13996 enum rtx_code bypass_code, first_code, second_code;
13995 13997 /* Return arbitrarily high cost when instruction is not supported - this
13996 13998 prevents gcc from using it. */
13997 13999 if (!TARGET_CMOVE)
13998 14000 return 1024;
13999 14001 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14000 14002 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
14001 14003 }
14002 14004
14003 14005 /* Return cost of comparison done using sahf operation.
14004 14006 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14005 14007 static int
14006 14008 ix86_fp_comparison_sahf_cost (enum rtx_code code)
14007 14009 {
14008 14010 enum rtx_code bypass_code, first_code, second_code;
14009 14011 /* Return arbitrarily high cost when instruction is not preferred - this
14010 14012 avoids gcc from using it. */
14011 14013 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())))
14012 14014 return 1024;
14013 14015 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14014 14016 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
14015 14017 }
14016 14018
14017 14019 /* Compute cost of the comparison done using any method.
14018 14020 See ix86_fp_comparison_arithmetics_cost for the metrics. */
14019 14021 static int
14020 14022 ix86_fp_comparison_cost (enum rtx_code code)
14021 14023 {
14022 14024 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
14023 14025 int min;
14024 14026
14025 14027 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
14026 14028 sahf_cost = ix86_fp_comparison_sahf_cost (code);
14027 14029
14028 14030 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
14029 14031 if (min > sahf_cost)
14030 14032 min = sahf_cost;
14031 14033 if (min > fcomi_cost)
14032 14034 min = fcomi_cost;
14033 14035 return min;
14034 14036 }
14035 14037
14036 14038 /* Return true if we should use an FCOMI instruction for this
14037 14039 fp comparison. */
14038 14040
14039 14041 int
14040 14042 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
14041 14043 {
14042 14044 enum rtx_code swapped_code = swap_condition (code);
14043 14045
14044 14046 return ((ix86_fp_comparison_cost (code)
14045 14047 == ix86_fp_comparison_fcomi_cost (code))
14046 14048 || (ix86_fp_comparison_cost (swapped_code)
14047 14049 == ix86_fp_comparison_fcomi_cost (swapped_code)));
14048 14050 }
14049 14051
14050 14052 /* Swap, force into registers, or otherwise massage the two operands
14051 14053 to a fp comparison. The operands are updated in place; the new
14052 14054 comparison code is returned. */
14053 14055
14054 14056 static enum rtx_code
14055 14057 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
14056 14058 {
14057 14059 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
14058 14060 rtx op0 = *pop0, op1 = *pop1;
14059 14061 enum machine_mode op_mode = GET_MODE (op0);
14060 14062 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
14061 14063
14062 14064 /* All of the unordered compare instructions only work on registers.
14063 14065 The same is true of the fcomi compare instructions. The XFmode
14064 14066 compare instructions require registers except when comparing
14065 14067 against zero or when converting operand 1 from fixed point to
14066 14068 floating point. */
14067 14069
14068 14070 if (!is_sse
14069 14071 && (fpcmp_mode == CCFPUmode
14070 14072 || (op_mode == XFmode
14071 14073 && ! (standard_80387_constant_p (op0) == 1
14072 14074 || standard_80387_constant_p (op1) == 1)
14073 14075 && GET_CODE (op1) != FLOAT)
14074 14076 || ix86_use_fcomi_compare (code)))
14075 14077 {
14076 14078 op0 = force_reg (op_mode, op0);
14077 14079 op1 = force_reg (op_mode, op1);
14078 14080 }
14079 14081 else
14080 14082 {
14081 14083 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
14082 14084 things around if they appear profitable, otherwise force op0
14083 14085 into a register. */
14084 14086
14085 14087 if (standard_80387_constant_p (op0) == 0
14086 14088 || (MEM_P (op0)
14087 14089 && ! (standard_80387_constant_p (op1) == 0
14088 14090 || MEM_P (op1))))
14089 14091 {
14090 14092 rtx tmp;
14091 14093 tmp = op0, op0 = op1, op1 = tmp;
14092 14094 code = swap_condition (code);
14093 14095 }
14094 14096
14095 14097 if (!REG_P (op0))
14096 14098 op0 = force_reg (op_mode, op0);
14097 14099
14098 14100 if (CONSTANT_P (op1))
14099 14101 {
14100 14102 int tmp = standard_80387_constant_p (op1);
14101 14103 if (tmp == 0)
14102 14104 op1 = validize_mem (force_const_mem (op_mode, op1));
14103 14105 else if (tmp == 1)
14104 14106 {
14105 14107 if (TARGET_CMOVE)
14106 14108 op1 = force_reg (op_mode, op1);
14107 14109 }
14108 14110 else
14109 14111 op1 = force_reg (op_mode, op1);
14110 14112 }
14111 14113 }
14112 14114
14113 14115 /* Try to rearrange the comparison to make it cheaper. */
14114 14116 if (ix86_fp_comparison_cost (code)
14115 14117 > ix86_fp_comparison_cost (swap_condition (code))
14116 14118 && (REG_P (op1) || can_create_pseudo_p ()))
14117 14119 {
14118 14120 rtx tmp;
14119 14121 tmp = op0, op0 = op1, op1 = tmp;
14120 14122 code = swap_condition (code);
14121 14123 if (!REG_P (op0))
14122 14124 op0 = force_reg (op_mode, op0);
14123 14125 }
14124 14126
14125 14127 *pop0 = op0;
14126 14128 *pop1 = op1;
14127 14129 return code;
14128 14130 }
14129 14131
14130 14132 /* Convert comparison codes we use to represent FP comparison to integer
14131 14133 code that will result in proper branch. Return UNKNOWN if no such code
14132 14134 is available. */
14133 14135
14134 14136 enum rtx_code
14135 14137 ix86_fp_compare_code_to_integer (enum rtx_code code)
14136 14138 {
14137 14139 switch (code)
14138 14140 {
14139 14141 case GT:
14140 14142 return GTU;
14141 14143 case GE:
14142 14144 return GEU;
14143 14145 case ORDERED:
14144 14146 case UNORDERED:
14145 14147 return code;
14146 14148 break;
14147 14149 case UNEQ:
14148 14150 return EQ;
14149 14151 break;
14150 14152 case UNLT:
14151 14153 return LTU;
14152 14154 break;
14153 14155 case UNLE:
14154 14156 return LEU;
14155 14157 break;
14156 14158 case LTGT:
14157 14159 return NE;
14158 14160 break;
14159 14161 default:
14160 14162 return UNKNOWN;
14161 14163 }
14162 14164 }
14163 14165
14164 14166 /* Generate insn patterns to do a floating point compare of OPERANDS. */
14165 14167
14166 14168 static rtx
14167 14169 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
14168 14170 rtx *second_test, rtx *bypass_test)
14169 14171 {
14170 14172 enum machine_mode fpcmp_mode, intcmp_mode;
14171 14173 rtx tmp, tmp2;
14172 14174 int cost = ix86_fp_comparison_cost (code);
14173 14175 enum rtx_code bypass_code, first_code, second_code;
14174 14176
14175 14177 fpcmp_mode = ix86_fp_compare_mode (code);
14176 14178 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
14177 14179
14178 14180 if (second_test)
14179 14181 *second_test = NULL_RTX;
14180 14182 if (bypass_test)
14181 14183 *bypass_test = NULL_RTX;
14182 14184
14183 14185 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14184 14186
14185 14187 /* Do fcomi/sahf based test when profitable. */
14186 14188 if (ix86_fp_comparison_arithmetics_cost (code) > cost
14187 14189 && (bypass_code == UNKNOWN || bypass_test)
14188 14190 && (second_code == UNKNOWN || second_test))
14189 14191 {
14190 14192 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14191 14193 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
14192 14194 tmp);
14193 14195 if (TARGET_CMOVE)
14194 14196 emit_insn (tmp);
14195 14197 else
14196 14198 {
14197 14199 gcc_assert (TARGET_SAHF);
14198 14200
14199 14201 if (!scratch)
14200 14202 scratch = gen_reg_rtx (HImode);
14201 14203 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
14202 14204
14203 14205 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
14204 14206 }
14205 14207
14206 14208 /* The FP codes work out to act like unsigned. */
14207 14209 intcmp_mode = fpcmp_mode;
14208 14210 code = first_code;
14209 14211 if (bypass_code != UNKNOWN)
14210 14212 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
14211 14213 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14212 14214 const0_rtx);
14213 14215 if (second_code != UNKNOWN)
14214 14216 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
14215 14217 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14216 14218 const0_rtx);
14217 14219 }
14218 14220 else
14219 14221 {
14220 14222 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
14221 14223 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
14222 14224 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
14223 14225 if (!scratch)
14224 14226 scratch = gen_reg_rtx (HImode);
14225 14227 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
14226 14228
14227 14229 /* In the unordered case, we have to check C2 for NaN's, which
14228 14230 doesn't happen to work out to anything nice combination-wise.
14229 14231 So do some bit twiddling on the value we've got in AH to come
14230 14232 up with an appropriate set of condition codes. */
14231 14233
14232 14234 intcmp_mode = CCNOmode;
14233 14235 switch (code)
14234 14236 {
14235 14237 case GT:
14236 14238 case UNGT:
14237 14239 if (code == GT || !TARGET_IEEE_FP)
14238 14240 {
14239 14241 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14240 14242 code = EQ;
14241 14243 }
14242 14244 else
14243 14245 {
14244 14246 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14245 14247 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14246 14248 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
14247 14249 intcmp_mode = CCmode;
14248 14250 code = GEU;
14249 14251 }
14250 14252 break;
14251 14253 case LT:
14252 14254 case UNLT:
14253 14255 if (code == LT && TARGET_IEEE_FP)
14254 14256 {
14255 14257 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14256 14258 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
14257 14259 intcmp_mode = CCmode;
14258 14260 code = EQ;
14259 14261 }
14260 14262 else
14261 14263 {
14262 14264 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
14263 14265 code = NE;
14264 14266 }
14265 14267 break;
14266 14268 case GE:
14267 14269 case UNGE:
14268 14270 if (code == GE || !TARGET_IEEE_FP)
14269 14271 {
14270 14272 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
14271 14273 code = EQ;
14272 14274 }
14273 14275 else
14274 14276 {
14275 14277 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14276 14278 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14277 14279 GEN_INT (0x01)));
14278 14280 code = NE;
14279 14281 }
14280 14282 break;
14281 14283 case LE:
14282 14284 case UNLE:
14283 14285 if (code == LE && TARGET_IEEE_FP)
14284 14286 {
14285 14287 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14286 14288 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
14287 14289 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14288 14290 intcmp_mode = CCmode;
14289 14291 code = LTU;
14290 14292 }
14291 14293 else
14292 14294 {
14293 14295 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
14294 14296 code = NE;
14295 14297 }
14296 14298 break;
14297 14299 case EQ:
14298 14300 case UNEQ:
14299 14301 if (code == EQ && TARGET_IEEE_FP)
14300 14302 {
14301 14303 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14302 14304 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
14303 14305 intcmp_mode = CCmode;
14304 14306 code = EQ;
14305 14307 }
14306 14308 else
14307 14309 {
14308 14310 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14309 14311 code = NE;
14310 14312 break;
14311 14313 }
14312 14314 break;
14313 14315 case NE:
14314 14316 case LTGT:
14315 14317 if (code == NE && TARGET_IEEE_FP)
14316 14318 {
14317 14319 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
14318 14320 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
14319 14321 GEN_INT (0x40)));
14320 14322 code = NE;
14321 14323 }
14322 14324 else
14323 14325 {
14324 14326 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
14325 14327 code = EQ;
14326 14328 }
14327 14329 break;
14328 14330
14329 14331 case UNORDERED:
14330 14332 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14331 14333 code = NE;
14332 14334 break;
14333 14335 case ORDERED:
14334 14336 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
14335 14337 code = EQ;
14336 14338 break;
14337 14339
14338 14340 default:
14339 14341 gcc_unreachable ();
14340 14342 }
14341 14343 }
14342 14344
14343 14345 /* Return the test that should be put into the flags user, i.e.
14344 14346 the bcc, scc, or cmov instruction. */
14345 14347 return gen_rtx_fmt_ee (code, VOIDmode,
14346 14348 gen_rtx_REG (intcmp_mode, FLAGS_REG),
14347 14349 const0_rtx);
14348 14350 }
14349 14351
14350 14352 rtx
14351 14353 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
14352 14354 {
14353 14355 rtx op0, op1, ret;
14354 14356 op0 = ix86_compare_op0;
14355 14357 op1 = ix86_compare_op1;
14356 14358
14357 14359 if (second_test)
14358 14360 *second_test = NULL_RTX;
14359 14361 if (bypass_test)
14360 14362 *bypass_test = NULL_RTX;
14361 14363
14362 14364 if (ix86_compare_emitted)
14363 14365 {
14364 14366 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
14365 14367 ix86_compare_emitted = NULL_RTX;
14366 14368 }
14367 14369 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
14368 14370 {
14369 14371 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
14370 14372 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14371 14373 second_test, bypass_test);
14372 14374 }
14373 14375 else
14374 14376 ret = ix86_expand_int_compare (code, op0, op1);
14375 14377
14376 14378 return ret;
14377 14379 }
14378 14380
14379 14381 /* Return true if the CODE will result in nontrivial jump sequence. */
14380 14382 bool
14381 14383 ix86_fp_jump_nontrivial_p (enum rtx_code code)
14382 14384 {
14383 14385 enum rtx_code bypass_code, first_code, second_code;
14384 14386 if (!TARGET_CMOVE)
14385 14387 return true;
14386 14388 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14387 14389 return bypass_code != UNKNOWN || second_code != UNKNOWN;
14388 14390 }
14389 14391
14390 14392 void
14391 14393 ix86_expand_branch (enum rtx_code code, rtx label)
14392 14394 {
14393 14395 rtx tmp;
14394 14396
14395 14397 /* If we have emitted a compare insn, go straight to simple.
14396 14398 ix86_expand_compare won't emit anything if ix86_compare_emitted
14397 14399 is non NULL. */
14398 14400 if (ix86_compare_emitted)
14399 14401 goto simple;
14400 14402
14401 14403 switch (GET_MODE (ix86_compare_op0))
14402 14404 {
14403 14405 case QImode:
14404 14406 case HImode:
14405 14407 case SImode:
14406 14408 simple:
14407 14409 tmp = ix86_expand_compare (code, NULL, NULL);
14408 14410 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14409 14411 gen_rtx_LABEL_REF (VOIDmode, label),
14410 14412 pc_rtx);
14411 14413 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14412 14414 return;
14413 14415
14414 14416 case SFmode:
14415 14417 case DFmode:
14416 14418 case XFmode:
14417 14419 {
14418 14420 rtvec vec;
14419 14421 int use_fcomi;
14420 14422 enum rtx_code bypass_code, first_code, second_code;
14421 14423
14422 14424 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
14423 14425 &ix86_compare_op1);
14424 14426
14425 14427 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
14426 14428
14427 14429 /* Check whether we will use the natural sequence with one jump. If
14428 14430 so, we can expand jump early. Otherwise delay expansion by
14429 14431 creating compound insn to not confuse optimizers. */
14430 14432 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
14431 14433 {
14432 14434 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
14433 14435 gen_rtx_LABEL_REF (VOIDmode, label),
14434 14436 pc_rtx, NULL_RTX, NULL_RTX);
14435 14437 }
14436 14438 else
14437 14439 {
14438 14440 tmp = gen_rtx_fmt_ee (code, VOIDmode,
14439 14441 ix86_compare_op0, ix86_compare_op1);
14440 14442 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14441 14443 gen_rtx_LABEL_REF (VOIDmode, label),
14442 14444 pc_rtx);
14443 14445 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
14444 14446
14445 14447 use_fcomi = ix86_use_fcomi_compare (code);
14446 14448 vec = rtvec_alloc (3 + !use_fcomi);
14447 14449 RTVEC_ELT (vec, 0) = tmp;
14448 14450 RTVEC_ELT (vec, 1)
14449 14451 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
14450 14452 RTVEC_ELT (vec, 2)
14451 14453 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
14452 14454 if (! use_fcomi)
14453 14455 RTVEC_ELT (vec, 3)
14454 14456 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
14455 14457
14456 14458 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
14457 14459 }
14458 14460 return;
14459 14461 }
14460 14462
14461 14463 case DImode:
14462 14464 if (TARGET_64BIT)
14463 14465 goto simple;
14464 14466 case TImode:
14465 14467 /* Expand DImode branch into multiple compare+branch. */
14466 14468 {
14467 14469 rtx lo[2], hi[2], label2;
14468 14470 enum rtx_code code1, code2, code3;
14469 14471 enum machine_mode submode;
14470 14472
14471 14473 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
14472 14474 {
14473 14475 tmp = ix86_compare_op0;
14474 14476 ix86_compare_op0 = ix86_compare_op1;
14475 14477 ix86_compare_op1 = tmp;
14476 14478 code = swap_condition (code);
14477 14479 }
14478 14480 if (GET_MODE (ix86_compare_op0) == DImode)
14479 14481 {
14480 14482 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
14481 14483 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
14482 14484 submode = SImode;
14483 14485 }
14484 14486 else
14485 14487 {
14486 14488 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
14487 14489 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
14488 14490 submode = DImode;
14489 14491 }
14490 14492
14491 14493 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
14492 14494 avoid two branches. This costs one extra insn, so disable when
14493 14495 optimizing for size. */
14494 14496
14495 14497 if ((code == EQ || code == NE)
14496 14498 && (!optimize_insn_for_size_p ()
14497 14499 || hi[1] == const0_rtx || lo[1] == const0_rtx))
14498 14500 {
14499 14501 rtx xor0, xor1;
14500 14502
14501 14503 xor1 = hi[0];
14502 14504 if (hi[1] != const0_rtx)
14503 14505 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
14504 14506 NULL_RTX, 0, OPTAB_WIDEN);
14505 14507
14506 14508 xor0 = lo[0];
14507 14509 if (lo[1] != const0_rtx)
14508 14510 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
14509 14511 NULL_RTX, 0, OPTAB_WIDEN);
14510 14512
14511 14513 tmp = expand_binop (submode, ior_optab, xor1, xor0,
14512 14514 NULL_RTX, 0, OPTAB_WIDEN);
14513 14515
14514 14516 ix86_compare_op0 = tmp;
14515 14517 ix86_compare_op1 = const0_rtx;
14516 14518 ix86_expand_branch (code, label);
14517 14519 return;
14518 14520 }
14519 14521
14520 14522 /* Otherwise, if we are doing less-than or greater-or-equal-than,
14521 14523 op1 is a constant and the low word is zero, then we can just
14522 14524 examine the high word. Similarly for low word -1 and
14523 14525 less-or-equal-than or greater-than. */
14524 14526
14525 14527 if (CONST_INT_P (hi[1]))
14526 14528 switch (code)
14527 14529 {
14528 14530 case LT: case LTU: case GE: case GEU:
14529 14531 if (lo[1] == const0_rtx)
14530 14532 {
14531 14533 ix86_compare_op0 = hi[0];
14532 14534 ix86_compare_op1 = hi[1];
14533 14535 ix86_expand_branch (code, label);
14534 14536 return;
14535 14537 }
14536 14538 break;
14537 14539 case LE: case LEU: case GT: case GTU:
14538 14540 if (lo[1] == constm1_rtx)
14539 14541 {
14540 14542 ix86_compare_op0 = hi[0];
14541 14543 ix86_compare_op1 = hi[1];
14542 14544 ix86_expand_branch (code, label);
14543 14545 return;
14544 14546 }
14545 14547 break;
14546 14548 default:
14547 14549 break;
14548 14550 }
14549 14551
14550 14552 /* Otherwise, we need two or three jumps. */
14551 14553
14552 14554 label2 = gen_label_rtx ();
14553 14555
14554 14556 code1 = code;
14555 14557 code2 = swap_condition (code);
14556 14558 code3 = unsigned_condition (code);
14557 14559
14558 14560 switch (code)
14559 14561 {
14560 14562 case LT: case GT: case LTU: case GTU:
14561 14563 break;
14562 14564
14563 14565 case LE: code1 = LT; code2 = GT; break;
14564 14566 case GE: code1 = GT; code2 = LT; break;
14565 14567 case LEU: code1 = LTU; code2 = GTU; break;
14566 14568 case GEU: code1 = GTU; code2 = LTU; break;
14567 14569
14568 14570 case EQ: code1 = UNKNOWN; code2 = NE; break;
14569 14571 case NE: code2 = UNKNOWN; break;
14570 14572
14571 14573 default:
14572 14574 gcc_unreachable ();
14573 14575 }
14574 14576
14575 14577 /*
14576 14578 * a < b =>
14577 14579 * if (hi(a) < hi(b)) goto true;
14578 14580 * if (hi(a) > hi(b)) goto false;
14579 14581 * if (lo(a) < lo(b)) goto true;
14580 14582 * false:
14581 14583 */
14582 14584
14583 14585 ix86_compare_op0 = hi[0];
14584 14586 ix86_compare_op1 = hi[1];
14585 14587
14586 14588 if (code1 != UNKNOWN)
14587 14589 ix86_expand_branch (code1, label);
14588 14590 if (code2 != UNKNOWN)
14589 14591 ix86_expand_branch (code2, label2);
14590 14592
14591 14593 ix86_compare_op0 = lo[0];
14592 14594 ix86_compare_op1 = lo[1];
14593 14595 ix86_expand_branch (code3, label);
14594 14596
14595 14597 if (code2 != UNKNOWN)
14596 14598 emit_label (label2);
14597 14599 return;
14598 14600 }
14599 14601
14600 14602 default:
14601 14603 gcc_unreachable ();
14602 14604 }
14603 14605 }
14604 14606
14605 14607 /* Split branch based on floating point condition. */
14606 14608 void
14607 14609 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
14608 14610 rtx target1, rtx target2, rtx tmp, rtx pushed)
14609 14611 {
14610 14612 rtx second, bypass;
14611 14613 rtx label = NULL_RTX;
14612 14614 rtx condition;
14613 14615 int bypass_probability = -1, second_probability = -1, probability = -1;
14614 14616 rtx i;
14615 14617
14616 14618 if (target2 != pc_rtx)
14617 14619 {
14618 14620 rtx tmp = target2;
14619 14621 code = reverse_condition_maybe_unordered (code);
14620 14622 target2 = target1;
14621 14623 target1 = tmp;
14622 14624 }
14623 14625
14624 14626 condition = ix86_expand_fp_compare (code, op1, op2,
14625 14627 tmp, &second, &bypass);
14626 14628
14627 14629 /* Remove pushed operand from stack. */
14628 14630 if (pushed)
14629 14631 ix86_free_from_memory (GET_MODE (pushed));
14630 14632
14631 14633 if (split_branch_probability >= 0)
14632 14634 {
14633 14635 /* Distribute the probabilities across the jumps.
14634 14636 Assume the BYPASS and SECOND to be always test
14635 14637 for UNORDERED. */
14636 14638 probability = split_branch_probability;
14637 14639
14638 14640 /* Value of 1 is low enough to make no need for probability
14639 14641 to be updated. Later we may run some experiments and see
14640 14642 if unordered values are more frequent in practice. */
14641 14643 if (bypass)
14642 14644 bypass_probability = 1;
14643 14645 if (second)
14644 14646 second_probability = 1;
14645 14647 }
14646 14648 if (bypass != NULL_RTX)
14647 14649 {
14648 14650 label = gen_label_rtx ();
14649 14651 i = emit_jump_insn (gen_rtx_SET
14650 14652 (VOIDmode, pc_rtx,
14651 14653 gen_rtx_IF_THEN_ELSE (VOIDmode,
14652 14654 bypass,
14653 14655 gen_rtx_LABEL_REF (VOIDmode,
14654 14656 label),
14655 14657 pc_rtx)));
14656 14658 if (bypass_probability >= 0)
14657 14659 REG_NOTES (i)
14658 14660 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14659 14661 GEN_INT (bypass_probability),
14660 14662 REG_NOTES (i));
14661 14663 }
14662 14664 i = emit_jump_insn (gen_rtx_SET
14663 14665 (VOIDmode, pc_rtx,
14664 14666 gen_rtx_IF_THEN_ELSE (VOIDmode,
14665 14667 condition, target1, target2)));
14666 14668 if (probability >= 0)
14667 14669 REG_NOTES (i)
14668 14670 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14669 14671 GEN_INT (probability),
14670 14672 REG_NOTES (i));
14671 14673 if (second != NULL_RTX)
14672 14674 {
14673 14675 i = emit_jump_insn (gen_rtx_SET
14674 14676 (VOIDmode, pc_rtx,
14675 14677 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
14676 14678 target2)));
14677 14679 if (second_probability >= 0)
14678 14680 REG_NOTES (i)
14679 14681 = gen_rtx_EXPR_LIST (REG_BR_PROB,
14680 14682 GEN_INT (second_probability),
14681 14683 REG_NOTES (i));
14682 14684 }
14683 14685 if (label != NULL_RTX)
14684 14686 emit_label (label);
14685 14687 }
14686 14688
14687 14689 int
14688 14690 ix86_expand_setcc (enum rtx_code code, rtx dest)
14689 14691 {
14690 14692 rtx ret, tmp, tmpreg, equiv;
14691 14693 rtx second_test, bypass_test;
14692 14694
14693 14695 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
14694 14696 return 0; /* FAIL */
14695 14697
14696 14698 gcc_assert (GET_MODE (dest) == QImode);
14697 14699
14698 14700 ret = ix86_expand_compare (code, &second_test, &bypass_test);
14699 14701 PUT_MODE (ret, QImode);
14700 14702
14701 14703 tmp = dest;
14702 14704 tmpreg = dest;
14703 14705
14704 14706 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
14705 14707 if (bypass_test || second_test)
14706 14708 {
14707 14709 rtx test = second_test;
14708 14710 int bypass = 0;
14709 14711 rtx tmp2 = gen_reg_rtx (QImode);
14710 14712 if (bypass_test)
14711 14713 {
14712 14714 gcc_assert (!second_test);
14713 14715 test = bypass_test;
14714 14716 bypass = 1;
14715 14717 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
14716 14718 }
14717 14719 PUT_MODE (test, QImode);
14718 14720 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
14719 14721
14720 14722 if (bypass)
14721 14723 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
14722 14724 else
14723 14725 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
14724 14726 }
14725 14727
14726 14728 /* Attach a REG_EQUAL note describing the comparison result. */
14727 14729 if (ix86_compare_op0 && ix86_compare_op1)
14728 14730 {
14729 14731 equiv = simplify_gen_relational (code, QImode,
14730 14732 GET_MODE (ix86_compare_op0),
14731 14733 ix86_compare_op0, ix86_compare_op1);
14732 14734 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
14733 14735 }
14734 14736
14735 14737 return 1; /* DONE */
14736 14738 }
14737 14739
14738 14740 /* Expand comparison setting or clearing carry flag. Return true when
14739 14741 successful and set pop for the operation. */
14740 14742 static bool
14741 14743 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
14742 14744 {
14743 14745 enum machine_mode mode =
14744 14746 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
14745 14747
14746 14748 /* Do not handle DImode compares that go through special path. */
14747 14749 if (mode == (TARGET_64BIT ? TImode : DImode))
14748 14750 return false;
14749 14751
14750 14752 if (SCALAR_FLOAT_MODE_P (mode))
14751 14753 {
14752 14754 rtx second_test = NULL, bypass_test = NULL;
14753 14755 rtx compare_op, compare_seq;
14754 14756
14755 14757 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
14756 14758
14757 14759 /* Shortcut: following common codes never translate
14758 14760 into carry flag compares. */
14759 14761 if (code == EQ || code == NE || code == UNEQ || code == LTGT
14760 14762 || code == ORDERED || code == UNORDERED)
14761 14763 return false;
14762 14764
14763 14765 /* These comparisons require zero flag; swap operands so they won't. */
14764 14766 if ((code == GT || code == UNLE || code == LE || code == UNGT)
14765 14767 && !TARGET_IEEE_FP)
14766 14768 {
14767 14769 rtx tmp = op0;
14768 14770 op0 = op1;
14769 14771 op1 = tmp;
14770 14772 code = swap_condition (code);
14771 14773 }
14772 14774
14773 14775 /* Try to expand the comparison and verify that we end up with
14774 14776 carry flag based comparison. This fails to be true only when
14775 14777 we decide to expand comparison using arithmetic that is not
14776 14778 too common scenario. */
14777 14779 start_sequence ();
14778 14780 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
14779 14781 &second_test, &bypass_test);
14780 14782 compare_seq = get_insns ();
14781 14783 end_sequence ();
14782 14784
14783 14785 if (second_test || bypass_test)
14784 14786 return false;
14785 14787
14786 14788 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14787 14789 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14788 14790 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
14789 14791 else
14790 14792 code = GET_CODE (compare_op);
14791 14793
14792 14794 if (code != LTU && code != GEU)
14793 14795 return false;
14794 14796
14795 14797 emit_insn (compare_seq);
14796 14798 *pop = compare_op;
14797 14799 return true;
14798 14800 }
14799 14801
14800 14802 if (!INTEGRAL_MODE_P (mode))
14801 14803 return false;
14802 14804
14803 14805 switch (code)
14804 14806 {
14805 14807 case LTU:
14806 14808 case GEU:
14807 14809 break;
14808 14810
14809 14811 /* Convert a==0 into (unsigned)a<1. */
14810 14812 case EQ:
14811 14813 case NE:
14812 14814 if (op1 != const0_rtx)
14813 14815 return false;
14814 14816 op1 = const1_rtx;
14815 14817 code = (code == EQ ? LTU : GEU);
14816 14818 break;
14817 14819
14818 14820 /* Convert a>b into b<a or a>=b-1. */
14819 14821 case GTU:
14820 14822 case LEU:
14821 14823 if (CONST_INT_P (op1))
14822 14824 {
14823 14825 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
14824 14826 /* Bail out on overflow. We still can swap operands but that
14825 14827 would force loading of the constant into register. */
14826 14828 if (op1 == const0_rtx
14827 14829 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
14828 14830 return false;
14829 14831 code = (code == GTU ? GEU : LTU);
14830 14832 }
14831 14833 else
14832 14834 {
14833 14835 rtx tmp = op1;
14834 14836 op1 = op0;
14835 14837 op0 = tmp;
14836 14838 code = (code == GTU ? LTU : GEU);
14837 14839 }
14838 14840 break;
14839 14841
14840 14842 /* Convert a>=0 into (unsigned)a<0x80000000. */
14841 14843 case LT:
14842 14844 case GE:
14843 14845 if (mode == DImode || op1 != const0_rtx)
14844 14846 return false;
14845 14847 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14846 14848 code = (code == LT ? GEU : LTU);
14847 14849 break;
14848 14850 case LE:
14849 14851 case GT:
14850 14852 if (mode == DImode || op1 != constm1_rtx)
14851 14853 return false;
14852 14854 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
14853 14855 code = (code == LE ? GEU : LTU);
14854 14856 break;
14855 14857
14856 14858 default:
14857 14859 return false;
14858 14860 }
14859 14861 /* Swapping operands may cause constant to appear as first operand. */
14860 14862 if (!nonimmediate_operand (op0, VOIDmode))
14861 14863 {
14862 14864 if (!can_create_pseudo_p ())
14863 14865 return false;
14864 14866 op0 = force_reg (mode, op0);
14865 14867 }
14866 14868 ix86_compare_op0 = op0;
14867 14869 ix86_compare_op1 = op1;
14868 14870 *pop = ix86_expand_compare (code, NULL, NULL);
14869 14871 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
14870 14872 return true;
14871 14873 }
14872 14874
14873 14875 int
14874 14876 ix86_expand_int_movcc (rtx operands[])
14875 14877 {
14876 14878 enum rtx_code code = GET_CODE (operands[1]), compare_code;
14877 14879 rtx compare_seq, compare_op;
14878 14880 rtx second_test, bypass_test;
14879 14881 enum machine_mode mode = GET_MODE (operands[0]);
14880 14882 bool sign_bit_compare_p = false;;
14881 14883
14882 14884 start_sequence ();
14883 14885 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
14884 14886 compare_seq = get_insns ();
14885 14887 end_sequence ();
14886 14888
14887 14889 compare_code = GET_CODE (compare_op);
14888 14890
14889 14891 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
14890 14892 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
14891 14893 sign_bit_compare_p = true;
14892 14894
14893 14895 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
14894 14896 HImode insns, we'd be swallowed in word prefix ops. */
14895 14897
14896 14898 if ((mode != HImode || TARGET_FAST_PREFIX)
14897 14899 && (mode != (TARGET_64BIT ? TImode : DImode))
14898 14900 && CONST_INT_P (operands[2])
14899 14901 && CONST_INT_P (operands[3]))
14900 14902 {
14901 14903 rtx out = operands[0];
14902 14904 HOST_WIDE_INT ct = INTVAL (operands[2]);
14903 14905 HOST_WIDE_INT cf = INTVAL (operands[3]);
14904 14906 HOST_WIDE_INT diff;
14905 14907
14906 14908 diff = ct - cf;
14907 14909 /* Sign bit compares are better done using shifts than we do by using
14908 14910 sbb. */
14909 14911 if (sign_bit_compare_p
14910 14912 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
14911 14913 ix86_compare_op1, &compare_op))
14912 14914 {
14913 14915 /* Detect overlap between destination and compare sources. */
14914 14916 rtx tmp = out;
14915 14917
14916 14918 if (!sign_bit_compare_p)
14917 14919 {
14918 14920 bool fpcmp = false;
14919 14921
14920 14922 compare_code = GET_CODE (compare_op);
14921 14923
14922 14924 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
14923 14925 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
14924 14926 {
14925 14927 fpcmp = true;
14926 14928 compare_code = ix86_fp_compare_code_to_integer (compare_code);
14927 14929 }
14928 14930
14929 14931 /* To simplify rest of code, restrict to the GEU case. */
14930 14932 if (compare_code == LTU)
14931 14933 {
14932 14934 HOST_WIDE_INT tmp = ct;
14933 14935 ct = cf;
14934 14936 cf = tmp;
14935 14937 compare_code = reverse_condition (compare_code);
14936 14938 code = reverse_condition (code);
14937 14939 }
14938 14940 else
14939 14941 {
14940 14942 if (fpcmp)
14941 14943 PUT_CODE (compare_op,
14942 14944 reverse_condition_maybe_unordered
14943 14945 (GET_CODE (compare_op)));
14944 14946 else
14945 14947 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
14946 14948 }
14947 14949 diff = ct - cf;
14948 14950
14949 14951 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
14950 14952 || reg_overlap_mentioned_p (out, ix86_compare_op1))
14951 14953 tmp = gen_reg_rtx (mode);
14952 14954
14953 14955 if (mode == DImode)
14954 14956 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
14955 14957 else
14956 14958 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
14957 14959 }
14958 14960 else
14959 14961 {
14960 14962 if (code == GT || code == GE)
14961 14963 code = reverse_condition (code);
14962 14964 else
14963 14965 {
14964 14966 HOST_WIDE_INT tmp = ct;
14965 14967 ct = cf;
14966 14968 cf = tmp;
14967 14969 diff = ct - cf;
14968 14970 }
14969 14971 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
14970 14972 ix86_compare_op1, VOIDmode, 0, -1);
14971 14973 }
14972 14974
14973 14975 if (diff == 1)
14974 14976 {
14975 14977 /*
14976 14978 * cmpl op0,op1
14977 14979 * sbbl dest,dest
14978 14980 * [addl dest, ct]
14979 14981 *
14980 14982 * Size 5 - 8.
14981 14983 */
14982 14984 if (ct)
14983 14985 tmp = expand_simple_binop (mode, PLUS,
14984 14986 tmp, GEN_INT (ct),
14985 14987 copy_rtx (tmp), 1, OPTAB_DIRECT);
14986 14988 }
14987 14989 else if (cf == -1)
14988 14990 {
14989 14991 /*
14990 14992 * cmpl op0,op1
14991 14993 * sbbl dest,dest
14992 14994 * orl $ct, dest
14993 14995 *
14994 14996 * Size 8.
14995 14997 */
14996 14998 tmp = expand_simple_binop (mode, IOR,
14997 14999 tmp, GEN_INT (ct),
14998 15000 copy_rtx (tmp), 1, OPTAB_DIRECT);
14999 15001 }
15000 15002 else if (diff == -1 && ct)
15001 15003 {
15002 15004 /*
15003 15005 * cmpl op0,op1
15004 15006 * sbbl dest,dest
15005 15007 * notl dest
15006 15008 * [addl dest, cf]
15007 15009 *
15008 15010 * Size 8 - 11.
15009 15011 */
15010 15012 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15011 15013 if (cf)
15012 15014 tmp = expand_simple_binop (mode, PLUS,
15013 15015 copy_rtx (tmp), GEN_INT (cf),
15014 15016 copy_rtx (tmp), 1, OPTAB_DIRECT);
15015 15017 }
15016 15018 else
15017 15019 {
15018 15020 /*
15019 15021 * cmpl op0,op1
15020 15022 * sbbl dest,dest
15021 15023 * [notl dest]
15022 15024 * andl cf - ct, dest
15023 15025 * [addl dest, ct]
15024 15026 *
15025 15027 * Size 8 - 11.
15026 15028 */
15027 15029
15028 15030 if (cf == 0)
15029 15031 {
15030 15032 cf = ct;
15031 15033 ct = 0;
15032 15034 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
15033 15035 }
15034 15036
15035 15037 tmp = expand_simple_binop (mode, AND,
15036 15038 copy_rtx (tmp),
15037 15039 gen_int_mode (cf - ct, mode),
15038 15040 copy_rtx (tmp), 1, OPTAB_DIRECT);
15039 15041 if (ct)
15040 15042 tmp = expand_simple_binop (mode, PLUS,
15041 15043 copy_rtx (tmp), GEN_INT (ct),
15042 15044 copy_rtx (tmp), 1, OPTAB_DIRECT);
15043 15045 }
15044 15046
15045 15047 if (!rtx_equal_p (tmp, out))
15046 15048 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
15047 15049
15048 15050 return 1; /* DONE */
15049 15051 }
15050 15052
15051 15053 if (diff < 0)
15052 15054 {
15053 15055 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15054 15056
15055 15057 HOST_WIDE_INT tmp;
15056 15058 tmp = ct, ct = cf, cf = tmp;
15057 15059 diff = -diff;
15058 15060
15059 15061 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15060 15062 {
15061 15063 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15062 15064
15063 15065 /* We may be reversing unordered compare to normal compare, that
15064 15066 is not valid in general (we may convert non-trapping condition
15065 15067 to trapping one), however on i386 we currently emit all
15066 15068 comparisons unordered. */
15067 15069 compare_code = reverse_condition_maybe_unordered (compare_code);
15068 15070 code = reverse_condition_maybe_unordered (code);
15069 15071 }
15070 15072 else
15071 15073 {
15072 15074 compare_code = reverse_condition (compare_code);
15073 15075 code = reverse_condition (code);
15074 15076 }
15075 15077 }
15076 15078
15077 15079 compare_code = UNKNOWN;
15078 15080 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
15079 15081 && CONST_INT_P (ix86_compare_op1))
15080 15082 {
15081 15083 if (ix86_compare_op1 == const0_rtx
15082 15084 && (code == LT || code == GE))
15083 15085 compare_code = code;
15084 15086 else if (ix86_compare_op1 == constm1_rtx)
15085 15087 {
15086 15088 if (code == LE)
15087 15089 compare_code = LT;
15088 15090 else if (code == GT)
15089 15091 compare_code = GE;
15090 15092 }
15091 15093 }
15092 15094
15093 15095 /* Optimize dest = (op0 < 0) ? -1 : cf. */
15094 15096 if (compare_code != UNKNOWN
15095 15097 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
15096 15098 && (cf == -1 || ct == -1))
15097 15099 {
15098 15100 /* If lea code below could be used, only optimize
15099 15101 if it results in a 2 insn sequence. */
15100 15102
15101 15103 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
15102 15104 || diff == 3 || diff == 5 || diff == 9)
15103 15105 || (compare_code == LT && ct == -1)
15104 15106 || (compare_code == GE && cf == -1))
15105 15107 {
15106 15108 /*
15107 15109 * notl op1 (if necessary)
15108 15110 * sarl $31, op1
15109 15111 * orl cf, op1
15110 15112 */
15111 15113 if (ct != -1)
15112 15114 {
15113 15115 cf = ct;
15114 15116 ct = -1;
15115 15117 code = reverse_condition (code);
15116 15118 }
15117 15119
15118 15120 out = emit_store_flag (out, code, ix86_compare_op0,
15119 15121 ix86_compare_op1, VOIDmode, 0, -1);
15120 15122
15121 15123 out = expand_simple_binop (mode, IOR,
15122 15124 out, GEN_INT (cf),
15123 15125 out, 1, OPTAB_DIRECT);
15124 15126 if (out != operands[0])
15125 15127 emit_move_insn (operands[0], out);
15126 15128
15127 15129 return 1; /* DONE */
15128 15130 }
15129 15131 }
15130 15132
15131 15133
15132 15134 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
15133 15135 || diff == 3 || diff == 5 || diff == 9)
15134 15136 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
15135 15137 && (mode != DImode
15136 15138 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
15137 15139 {
15138 15140 /*
15139 15141 * xorl dest,dest
15140 15142 * cmpl op1,op2
15141 15143 * setcc dest
15142 15144 * lea cf(dest*(ct-cf)),dest
15143 15145 *
15144 15146 * Size 14.
15145 15147 *
15146 15148 * This also catches the degenerate setcc-only case.
15147 15149 */
15148 15150
15149 15151 rtx tmp;
15150 15152 int nops;
15151 15153
15152 15154 out = emit_store_flag (out, code, ix86_compare_op0,
15153 15155 ix86_compare_op1, VOIDmode, 0, 1);
15154 15156
15155 15157 nops = 0;
15156 15158 /* On x86_64 the lea instruction operates on Pmode, so we need
15157 15159 to get arithmetics done in proper mode to match. */
15158 15160 if (diff == 1)
15159 15161 tmp = copy_rtx (out);
15160 15162 else
15161 15163 {
15162 15164 rtx out1;
15163 15165 out1 = copy_rtx (out);
15164 15166 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
15165 15167 nops++;
15166 15168 if (diff & 1)
15167 15169 {
15168 15170 tmp = gen_rtx_PLUS (mode, tmp, out1);
15169 15171 nops++;
15170 15172 }
15171 15173 }
15172 15174 if (cf != 0)
15173 15175 {
15174 15176 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
15175 15177 nops++;
15176 15178 }
15177 15179 if (!rtx_equal_p (tmp, out))
15178 15180 {
15179 15181 if (nops == 1)
15180 15182 out = force_operand (tmp, copy_rtx (out));
15181 15183 else
15182 15184 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
15183 15185 }
15184 15186 if (!rtx_equal_p (out, operands[0]))
15185 15187 emit_move_insn (operands[0], copy_rtx (out));
15186 15188
15187 15189 return 1; /* DONE */
15188 15190 }
15189 15191
15190 15192 /*
15191 15193 * General case: Jumpful:
15192 15194 * xorl dest,dest cmpl op1, op2
15193 15195 * cmpl op1, op2 movl ct, dest
15194 15196 * setcc dest jcc 1f
15195 15197 * decl dest movl cf, dest
15196 15198 * andl (cf-ct),dest 1:
15197 15199 * addl ct,dest
15198 15200 *
15199 15201 * Size 20. Size 14.
15200 15202 *
15201 15203 * This is reasonably steep, but branch mispredict costs are
15202 15204 * high on modern cpus, so consider failing only if optimizing
15203 15205 * for space.
15204 15206 */
15205 15207
15206 15208 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15207 15209 && BRANCH_COST (optimize_insn_for_speed_p (),
15208 15210 false) >= 2)
15209 15211 {
15210 15212 if (cf == 0)
15211 15213 {
15212 15214 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
15213 15215
15214 15216 cf = ct;
15215 15217 ct = 0;
15216 15218
15217 15219 if (SCALAR_FLOAT_MODE_P (cmp_mode))
15218 15220 {
15219 15221 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
15220 15222
15221 15223 /* We may be reversing unordered compare to normal compare,
15222 15224 that is not valid in general (we may convert non-trapping
15223 15225 condition to trapping one), however on i386 we currently
15224 15226 emit all comparisons unordered. */
15225 15227 code = reverse_condition_maybe_unordered (code);
15226 15228 }
15227 15229 else
15228 15230 {
15229 15231 code = reverse_condition (code);
15230 15232 if (compare_code != UNKNOWN)
15231 15233 compare_code = reverse_condition (compare_code);
15232 15234 }
15233 15235 }
15234 15236
15235 15237 if (compare_code != UNKNOWN)
15236 15238 {
15237 15239 /* notl op1 (if needed)
15238 15240 sarl $31, op1
15239 15241 andl (cf-ct), op1
15240 15242 addl ct, op1
15241 15243
15242 15244 For x < 0 (resp. x <= -1) there will be no notl,
15243 15245 so if possible swap the constants to get rid of the
15244 15246 complement.
15245 15247 True/false will be -1/0 while code below (store flag
15246 15248 followed by decrement) is 0/-1, so the constants need
15247 15249 to be exchanged once more. */
15248 15250
15249 15251 if (compare_code == GE || !cf)
15250 15252 {
15251 15253 code = reverse_condition (code);
15252 15254 compare_code = LT;
15253 15255 }
15254 15256 else
15255 15257 {
15256 15258 HOST_WIDE_INT tmp = cf;
15257 15259 cf = ct;
15258 15260 ct = tmp;
15259 15261 }
15260 15262
15261 15263 out = emit_store_flag (out, code, ix86_compare_op0,
15262 15264 ix86_compare_op1, VOIDmode, 0, -1);
15263 15265 }
15264 15266 else
15265 15267 {
15266 15268 out = emit_store_flag (out, code, ix86_compare_op0,
15267 15269 ix86_compare_op1, VOIDmode, 0, 1);
15268 15270
15269 15271 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
15270 15272 copy_rtx (out), 1, OPTAB_DIRECT);
15271 15273 }
15272 15274
15273 15275 out = expand_simple_binop (mode, AND, copy_rtx (out),
15274 15276 gen_int_mode (cf - ct, mode),
15275 15277 copy_rtx (out), 1, OPTAB_DIRECT);
15276 15278 if (ct)
15277 15279 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
15278 15280 copy_rtx (out), 1, OPTAB_DIRECT);
15279 15281 if (!rtx_equal_p (out, operands[0]))
15280 15282 emit_move_insn (operands[0], copy_rtx (out));
15281 15283
15282 15284 return 1; /* DONE */
15283 15285 }
15284 15286 }
15285 15287
15286 15288 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
15287 15289 {
15288 15290 /* Try a few things more with specific constants and a variable. */
15289 15291
15290 15292 optab op;
15291 15293 rtx var, orig_out, out, tmp;
15292 15294
15293 15295 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2)
15294 15296 return 0; /* FAIL */
15295 15297
15296 15298 /* If one of the two operands is an interesting constant, load a
15297 15299 constant with the above and mask it in with a logical operation. */
15298 15300
15299 15301 if (CONST_INT_P (operands[2]))
15300 15302 {
15301 15303 var = operands[3];
15302 15304 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
15303 15305 operands[3] = constm1_rtx, op = and_optab;
15304 15306 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
15305 15307 operands[3] = const0_rtx, op = ior_optab;
15306 15308 else
15307 15309 return 0; /* FAIL */
15308 15310 }
15309 15311 else if (CONST_INT_P (operands[3]))
15310 15312 {
15311 15313 var = operands[2];
15312 15314 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
15313 15315 operands[2] = constm1_rtx, op = and_optab;
15314 15316 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
15315 15317 operands[2] = const0_rtx, op = ior_optab;
15316 15318 else
15317 15319 return 0; /* FAIL */
15318 15320 }
15319 15321 else
15320 15322 return 0; /* FAIL */
15321 15323
15322 15324 orig_out = operands[0];
15323 15325 tmp = gen_reg_rtx (mode);
15324 15326 operands[0] = tmp;
15325 15327
15326 15328 /* Recurse to get the constant loaded. */
15327 15329 if (ix86_expand_int_movcc (operands) == 0)
15328 15330 return 0; /* FAIL */
15329 15331
15330 15332 /* Mask in the interesting variable. */
15331 15333 out = expand_binop (mode, op, var, tmp, orig_out, 0,
15332 15334 OPTAB_WIDEN);
15333 15335 if (!rtx_equal_p (out, orig_out))
15334 15336 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
15335 15337
15336 15338 return 1; /* DONE */
15337 15339 }
15338 15340
15339 15341 /*
15340 15342 * For comparison with above,
15341 15343 *
15342 15344 * movl cf,dest
15343 15345 * movl ct,tmp
15344 15346 * cmpl op1,op2
15345 15347 * cmovcc tmp,dest
15346 15348 *
15347 15349 * Size 15.
15348 15350 */
15349 15351
15350 15352 if (! nonimmediate_operand (operands[2], mode))
15351 15353 operands[2] = force_reg (mode, operands[2]);
15352 15354 if (! nonimmediate_operand (operands[3], mode))
15353 15355 operands[3] = force_reg (mode, operands[3]);
15354 15356
15355 15357 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15356 15358 {
15357 15359 rtx tmp = gen_reg_rtx (mode);
15358 15360 emit_move_insn (tmp, operands[3]);
15359 15361 operands[3] = tmp;
15360 15362 }
15361 15363 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15362 15364 {
15363 15365 rtx tmp = gen_reg_rtx (mode);
15364 15366 emit_move_insn (tmp, operands[2]);
15365 15367 operands[2] = tmp;
15366 15368 }
15367 15369
15368 15370 if (! register_operand (operands[2], VOIDmode)
15369 15371 && (mode == QImode
15370 15372 || ! register_operand (operands[3], VOIDmode)))
15371 15373 operands[2] = force_reg (mode, operands[2]);
15372 15374
15373 15375 if (mode == QImode
15374 15376 && ! register_operand (operands[3], VOIDmode))
15375 15377 operands[3] = force_reg (mode, operands[3]);
15376 15378
15377 15379 emit_insn (compare_seq);
15378 15380 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15379 15381 gen_rtx_IF_THEN_ELSE (mode,
15380 15382 compare_op, operands[2],
15381 15383 operands[3])));
15382 15384 if (bypass_test)
15383 15385 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15384 15386 gen_rtx_IF_THEN_ELSE (mode,
15385 15387 bypass_test,
15386 15388 copy_rtx (operands[3]),
15387 15389 copy_rtx (operands[0]))));
15388 15390 if (second_test)
15389 15391 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
15390 15392 gen_rtx_IF_THEN_ELSE (mode,
15391 15393 second_test,
15392 15394 copy_rtx (operands[2]),
15393 15395 copy_rtx (operands[0]))));
15394 15396
15395 15397 return 1; /* DONE */
15396 15398 }
15397 15399
15398 15400 /* Swap, force into registers, or otherwise massage the two operands
15399 15401 to an sse comparison with a mask result. Thus we differ a bit from
15400 15402 ix86_prepare_fp_compare_args which expects to produce a flags result.
15401 15403
15402 15404 The DEST operand exists to help determine whether to commute commutative
15403 15405 operators. The POP0/POP1 operands are updated in place. The new
15404 15406 comparison code is returned, or UNKNOWN if not implementable. */
15405 15407
15406 15408 static enum rtx_code
15407 15409 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
15408 15410 rtx *pop0, rtx *pop1)
15409 15411 {
15410 15412 rtx tmp;
15411 15413
15412 15414 switch (code)
15413 15415 {
15414 15416 case LTGT:
15415 15417 case UNEQ:
15416 15418 /* We have no LTGT as an operator. We could implement it with
15417 15419 NE & ORDERED, but this requires an extra temporary. It's
15418 15420 not clear that it's worth it. */
15419 15421 return UNKNOWN;
15420 15422
15421 15423 case LT:
15422 15424 case LE:
15423 15425 case UNGT:
15424 15426 case UNGE:
15425 15427 /* These are supported directly. */
15426 15428 break;
15427 15429
15428 15430 case EQ:
15429 15431 case NE:
15430 15432 case UNORDERED:
15431 15433 case ORDERED:
15432 15434 /* For commutative operators, try to canonicalize the destination
15433 15435 operand to be first in the comparison - this helps reload to
15434 15436 avoid extra moves. */
15435 15437 if (!dest || !rtx_equal_p (dest, *pop1))
15436 15438 break;
15437 15439 /* FALLTHRU */
15438 15440
15439 15441 case GE:
15440 15442 case GT:
15441 15443 case UNLE:
15442 15444 case UNLT:
15443 15445 /* These are not supported directly. Swap the comparison operands
15444 15446 to transform into something that is supported. */
15445 15447 tmp = *pop0;
15446 15448 *pop0 = *pop1;
15447 15449 *pop1 = tmp;
15448 15450 code = swap_condition (code);
15449 15451 break;
15450 15452
15451 15453 default:
15452 15454 gcc_unreachable ();
15453 15455 }
15454 15456
15455 15457 return code;
15456 15458 }
15457 15459
15458 15460 /* Detect conditional moves that exactly match min/max operational
15459 15461 semantics. Note that this is IEEE safe, as long as we don't
15460 15462 interchange the operands.
15461 15463
15462 15464 Returns FALSE if this conditional move doesn't match a MIN/MAX,
15463 15465 and TRUE if the operation is successful and instructions are emitted. */
15464 15466
15465 15467 static bool
15466 15468 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
15467 15469 rtx cmp_op1, rtx if_true, rtx if_false)
15468 15470 {
15469 15471 enum machine_mode mode;
15470 15472 bool is_min;
15471 15473 rtx tmp;
15472 15474
15473 15475 if (code == LT)
15474 15476 ;
15475 15477 else if (code == UNGE)
15476 15478 {
15477 15479 tmp = if_true;
15478 15480 if_true = if_false;
15479 15481 if_false = tmp;
15480 15482 }
15481 15483 else
15482 15484 return false;
15483 15485
15484 15486 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
15485 15487 is_min = true;
15486 15488 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
15487 15489 is_min = false;
15488 15490 else
15489 15491 return false;
15490 15492
15491 15493 mode = GET_MODE (dest);
15492 15494
15493 15495 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
15494 15496 but MODE may be a vector mode and thus not appropriate. */
15495 15497 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
15496 15498 {
15497 15499 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
15498 15500 rtvec v;
15499 15501
15500 15502 if_true = force_reg (mode, if_true);
15501 15503 v = gen_rtvec (2, if_true, if_false);
15502 15504 tmp = gen_rtx_UNSPEC (mode, v, u);
15503 15505 }
15504 15506 else
15505 15507 {
15506 15508 code = is_min ? SMIN : SMAX;
15507 15509 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
15508 15510 }
15509 15511
15510 15512 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
15511 15513 return true;
15512 15514 }
15513 15515
15514 15516 /* Expand an sse vector comparison. Return the register with the result. */
15515 15517
15516 15518 static rtx
15517 15519 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
15518 15520 rtx op_true, rtx op_false)
15519 15521 {
15520 15522 enum machine_mode mode = GET_MODE (dest);
15521 15523 rtx x;
15522 15524
15523 15525 cmp_op0 = force_reg (mode, cmp_op0);
15524 15526 if (!nonimmediate_operand (cmp_op1, mode))
15525 15527 cmp_op1 = force_reg (mode, cmp_op1);
15526 15528
15527 15529 if (optimize
15528 15530 || reg_overlap_mentioned_p (dest, op_true)
15529 15531 || reg_overlap_mentioned_p (dest, op_false))
15530 15532 dest = gen_reg_rtx (mode);
15531 15533
15532 15534 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
15533 15535 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15534 15536
15535 15537 return dest;
15536 15538 }
15537 15539
15538 15540 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
15539 15541 operations. This is used for both scalar and vector conditional moves. */
15540 15542
15541 15543 static void
15542 15544 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
15543 15545 {
15544 15546 enum machine_mode mode = GET_MODE (dest);
15545 15547 rtx t2, t3, x;
15546 15548
15547 15549 if (op_false == CONST0_RTX (mode))
15548 15550 {
15549 15551 op_true = force_reg (mode, op_true);
15550 15552 x = gen_rtx_AND (mode, cmp, op_true);
15551 15553 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15552 15554 }
15553 15555 else if (op_true == CONST0_RTX (mode))
15554 15556 {
15555 15557 op_false = force_reg (mode, op_false);
15556 15558 x = gen_rtx_NOT (mode, cmp);
15557 15559 x = gen_rtx_AND (mode, x, op_false);
15558 15560 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15559 15561 }
15560 15562 else if (TARGET_SSE5)
15561 15563 {
15562 15564 rtx pcmov = gen_rtx_SET (mode, dest,
15563 15565 gen_rtx_IF_THEN_ELSE (mode, cmp,
15564 15566 op_true,
15565 15567 op_false));
15566 15568 emit_insn (pcmov);
15567 15569 }
15568 15570 else
15569 15571 {
15570 15572 op_true = force_reg (mode, op_true);
15571 15573 op_false = force_reg (mode, op_false);
15572 15574
15573 15575 t2 = gen_reg_rtx (mode);
15574 15576 if (optimize)
15575 15577 t3 = gen_reg_rtx (mode);
15576 15578 else
15577 15579 t3 = dest;
15578 15580
15579 15581 x = gen_rtx_AND (mode, op_true, cmp);
15580 15582 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
15581 15583
15582 15584 x = gen_rtx_NOT (mode, cmp);
15583 15585 x = gen_rtx_AND (mode, x, op_false);
15584 15586 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
15585 15587
15586 15588 x = gen_rtx_IOR (mode, t3, t2);
15587 15589 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
15588 15590 }
15589 15591 }
15590 15592
15591 15593 /* Expand a floating-point conditional move. Return true if successful. */
15592 15594
15593 15595 int
15594 15596 ix86_expand_fp_movcc (rtx operands[])
15595 15597 {
15596 15598 enum machine_mode mode = GET_MODE (operands[0]);
15597 15599 enum rtx_code code = GET_CODE (operands[1]);
15598 15600 rtx tmp, compare_op, second_test, bypass_test;
15599 15601
15600 15602 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
15601 15603 {
15602 15604 enum machine_mode cmode;
15603 15605
15604 15606 /* Since we've no cmove for sse registers, don't force bad register
15605 15607 allocation just to gain access to it. Deny movcc when the
15606 15608 comparison mode doesn't match the move mode. */
15607 15609 cmode = GET_MODE (ix86_compare_op0);
15608 15610 if (cmode == VOIDmode)
15609 15611 cmode = GET_MODE (ix86_compare_op1);
15610 15612 if (cmode != mode)
15611 15613 return 0;
15612 15614
15613 15615 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15614 15616 &ix86_compare_op0,
15615 15617 &ix86_compare_op1);
15616 15618 if (code == UNKNOWN)
15617 15619 return 0;
15618 15620
15619 15621 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
15620 15622 ix86_compare_op1, operands[2],
15621 15623 operands[3]))
15622 15624 return 1;
15623 15625
15624 15626 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
15625 15627 ix86_compare_op1, operands[2], operands[3]);
15626 15628 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
15627 15629 return 1;
15628 15630 }
15629 15631
15630 15632 /* The floating point conditional move instructions don't directly
15631 15633 support conditions resulting from a signed integer comparison. */
15632 15634
15633 15635 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15634 15636
15635 15637 /* The floating point conditional move instructions don't directly
15636 15638 support signed integer comparisons. */
15637 15639
15638 15640 if (!fcmov_comparison_operator (compare_op, VOIDmode))
15639 15641 {
15640 15642 gcc_assert (!second_test && !bypass_test);
15641 15643 tmp = gen_reg_rtx (QImode);
15642 15644 ix86_expand_setcc (code, tmp);
15643 15645 code = NE;
15644 15646 ix86_compare_op0 = tmp;
15645 15647 ix86_compare_op1 = const0_rtx;
15646 15648 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
15647 15649 }
15648 15650 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
15649 15651 {
15650 15652 tmp = gen_reg_rtx (mode);
15651 15653 emit_move_insn (tmp, operands[3]);
15652 15654 operands[3] = tmp;
15653 15655 }
15654 15656 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
15655 15657 {
15656 15658 tmp = gen_reg_rtx (mode);
15657 15659 emit_move_insn (tmp, operands[2]);
15658 15660 operands[2] = tmp;
15659 15661 }
15660 15662
15661 15663 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15662 15664 gen_rtx_IF_THEN_ELSE (mode, compare_op,
15663 15665 operands[2], operands[3])));
15664 15666 if (bypass_test)
15665 15667 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15666 15668 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
15667 15669 operands[3], operands[0])));
15668 15670 if (second_test)
15669 15671 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
15670 15672 gen_rtx_IF_THEN_ELSE (mode, second_test,
15671 15673 operands[2], operands[0])));
15672 15674
15673 15675 return 1;
15674 15676 }
15675 15677
15676 15678 /* Expand a floating-point vector conditional move; a vcond operation
15677 15679 rather than a movcc operation. */
15678 15680
15679 15681 bool
15680 15682 ix86_expand_fp_vcond (rtx operands[])
15681 15683 {
15682 15684 enum rtx_code code = GET_CODE (operands[3]);
15683 15685 rtx cmp;
15684 15686
15685 15687 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
15686 15688 &operands[4], &operands[5]);
15687 15689 if (code == UNKNOWN)
15688 15690 return false;
15689 15691
15690 15692 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
15691 15693 operands[5], operands[1], operands[2]))
15692 15694 return true;
15693 15695
15694 15696 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
15695 15697 operands[1], operands[2]);
15696 15698 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
15697 15699 return true;
15698 15700 }
15699 15701
15700 15702 /* Expand a signed/unsigned integral vector conditional move. */
15701 15703
15702 15704 bool
15703 15705 ix86_expand_int_vcond (rtx operands[])
15704 15706 {
15705 15707 enum machine_mode mode = GET_MODE (operands[0]);
15706 15708 enum rtx_code code = GET_CODE (operands[3]);
15707 15709 bool negate = false;
15708 15710 rtx x, cop0, cop1;
15709 15711
15710 15712 cop0 = operands[4];
15711 15713 cop1 = operands[5];
15712 15714
15713 15715 /* SSE5 supports all of the comparisons on all vector int types. */
15714 15716 if (!TARGET_SSE5)
15715 15717 {
15716 15718 /* Canonicalize the comparison to EQ, GT, GTU. */
15717 15719 switch (code)
15718 15720 {
15719 15721 case EQ:
15720 15722 case GT:
15721 15723 case GTU:
15722 15724 break;
15723 15725
15724 15726 case NE:
15725 15727 case LE:
15726 15728 case LEU:
15727 15729 code = reverse_condition (code);
15728 15730 negate = true;
15729 15731 break;
15730 15732
15731 15733 case GE:
15732 15734 case GEU:
15733 15735 code = reverse_condition (code);
15734 15736 negate = true;
15735 15737 /* FALLTHRU */
15736 15738
15737 15739 case LT:
15738 15740 case LTU:
15739 15741 code = swap_condition (code);
15740 15742 x = cop0, cop0 = cop1, cop1 = x;
15741 15743 break;
15742 15744
15743 15745 default:
15744 15746 gcc_unreachable ();
15745 15747 }
15746 15748
15747 15749 /* Only SSE4.1/SSE4.2 supports V2DImode. */
15748 15750 if (mode == V2DImode)
15749 15751 {
15750 15752 switch (code)
15751 15753 {
15752 15754 case EQ:
15753 15755 /* SSE4.1 supports EQ. */
15754 15756 if (!TARGET_SSE4_1)
15755 15757 return false;
15756 15758 break;
15757 15759
15758 15760 case GT:
15759 15761 case GTU:
15760 15762 /* SSE4.2 supports GT/GTU. */
15761 15763 if (!TARGET_SSE4_2)
15762 15764 return false;
15763 15765 break;
15764 15766
15765 15767 default:
15766 15768 gcc_unreachable ();
15767 15769 }
15768 15770 }
15769 15771
15770 15772 /* Unsigned parallel compare is not supported by the hardware.
15771 15773 Play some tricks to turn this into a signed comparison
15772 15774 against 0. */
15773 15775 if (code == GTU)
15774 15776 {
15775 15777 cop0 = force_reg (mode, cop0);
15776 15778
15777 15779 switch (mode)
15778 15780 {
15779 15781 case V4SImode:
15780 15782 case V2DImode:
15781 15783 {
15782 15784 rtx t1, t2, mask;
15783 15785 rtx (*gen_sub3) (rtx, rtx, rtx);
15784 15786
15785 15787 /* Subtract (-(INT MAX) - 1) from both operands to make
15786 15788 them signed. */
15787 15789 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
15788 15790 true, false);
15789 15791 gen_sub3 = (mode == V4SImode
15790 15792 ? gen_subv4si3 : gen_subv2di3);
15791 15793 t1 = gen_reg_rtx (mode);
15792 15794 emit_insn (gen_sub3 (t1, cop0, mask));
15793 15795
15794 15796 t2 = gen_reg_rtx (mode);
15795 15797 emit_insn (gen_sub3 (t2, cop1, mask));
15796 15798
15797 15799 cop0 = t1;
15798 15800 cop1 = t2;
15799 15801 code = GT;
15800 15802 }
15801 15803 break;
15802 15804
15803 15805 case V16QImode:
15804 15806 case V8HImode:
15805 15807 /* Perform a parallel unsigned saturating subtraction. */
15806 15808 x = gen_reg_rtx (mode);
15807 15809 emit_insn (gen_rtx_SET (VOIDmode, x,
15808 15810 gen_rtx_US_MINUS (mode, cop0, cop1)));
15809 15811
15810 15812 cop0 = x;
15811 15813 cop1 = CONST0_RTX (mode);
15812 15814 code = EQ;
15813 15815 negate = !negate;
15814 15816 break;
15815 15817
15816 15818 default:
15817 15819 gcc_unreachable ();
15818 15820 }
15819 15821 }
15820 15822 }
15821 15823
15822 15824 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
15823 15825 operands[1+negate], operands[2-negate]);
15824 15826
15825 15827 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
15826 15828 operands[2-negate]);
15827 15829 return true;
15828 15830 }
15829 15831
15830 15832 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
15831 15833 true if we should do zero extension, else sign extension. HIGH_P is
15832 15834 true if we want the N/2 high elements, else the low elements. */
15833 15835
15834 15836 void
15835 15837 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15836 15838 {
15837 15839 enum machine_mode imode = GET_MODE (operands[1]);
15838 15840 rtx (*unpack)(rtx, rtx, rtx);
15839 15841 rtx se, dest;
15840 15842
15841 15843 switch (imode)
15842 15844 {
15843 15845 case V16QImode:
15844 15846 if (high_p)
15845 15847 unpack = gen_vec_interleave_highv16qi;
15846 15848 else
15847 15849 unpack = gen_vec_interleave_lowv16qi;
15848 15850 break;
15849 15851 case V8HImode:
15850 15852 if (high_p)
15851 15853 unpack = gen_vec_interleave_highv8hi;
15852 15854 else
15853 15855 unpack = gen_vec_interleave_lowv8hi;
15854 15856 break;
15855 15857 case V4SImode:
15856 15858 if (high_p)
15857 15859 unpack = gen_vec_interleave_highv4si;
15858 15860 else
15859 15861 unpack = gen_vec_interleave_lowv4si;
15860 15862 break;
15861 15863 default:
15862 15864 gcc_unreachable ();
15863 15865 }
15864 15866
15865 15867 dest = gen_lowpart (imode, operands[0]);
15866 15868
15867 15869 if (unsigned_p)
15868 15870 se = force_reg (imode, CONST0_RTX (imode));
15869 15871 else
15870 15872 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
15871 15873 operands[1], pc_rtx, pc_rtx);
15872 15874
15873 15875 emit_insn (unpack (dest, operands[1], se));
15874 15876 }
15875 15877
15876 15878 /* This function performs the same task as ix86_expand_sse_unpack,
15877 15879 but with SSE4.1 instructions. */
15878 15880
15879 15881 void
15880 15882 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15881 15883 {
15882 15884 enum machine_mode imode = GET_MODE (operands[1]);
15883 15885 rtx (*unpack)(rtx, rtx);
15884 15886 rtx src, dest;
15885 15887
15886 15888 switch (imode)
15887 15889 {
15888 15890 case V16QImode:
15889 15891 if (unsigned_p)
15890 15892 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
15891 15893 else
15892 15894 unpack = gen_sse4_1_extendv8qiv8hi2;
15893 15895 break;
15894 15896 case V8HImode:
15895 15897 if (unsigned_p)
15896 15898 unpack = gen_sse4_1_zero_extendv4hiv4si2;
15897 15899 else
15898 15900 unpack = gen_sse4_1_extendv4hiv4si2;
15899 15901 break;
15900 15902 case V4SImode:
15901 15903 if (unsigned_p)
15902 15904 unpack = gen_sse4_1_zero_extendv2siv2di2;
15903 15905 else
15904 15906 unpack = gen_sse4_1_extendv2siv2di2;
15905 15907 break;
15906 15908 default:
15907 15909 gcc_unreachable ();
15908 15910 }
15909 15911
15910 15912 dest = operands[0];
15911 15913 if (high_p)
15912 15914 {
15913 15915 /* Shift higher 8 bytes to lower 8 bytes. */
15914 15916 src = gen_reg_rtx (imode);
15915 15917 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
15916 15918 gen_lowpart (TImode, operands[1]),
15917 15919 GEN_INT (64)));
15918 15920 }
15919 15921 else
15920 15922 src = operands[1];
15921 15923
15922 15924 emit_insn (unpack (dest, src));
15923 15925 }
15924 15926
15925 15927 /* This function performs the same task as ix86_expand_sse_unpack,
15926 15928 but with sse5 instructions. */
15927 15929
15928 15930 void
15929 15931 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p)
15930 15932 {
15931 15933 enum machine_mode imode = GET_MODE (operands[1]);
15932 15934 int pperm_bytes[16];
15933 15935 int i;
15934 15936 int h = (high_p) ? 8 : 0;
15935 15937 int h2;
15936 15938 int sign_extend;
15937 15939 rtvec v = rtvec_alloc (16);
15938 15940 rtvec vs;
15939 15941 rtx x, p;
15940 15942 rtx op0 = operands[0], op1 = operands[1];
15941 15943
15942 15944 switch (imode)
15943 15945 {
15944 15946 case V16QImode:
15945 15947 vs = rtvec_alloc (8);
15946 15948 h2 = (high_p) ? 8 : 0;
15947 15949 for (i = 0; i < 8; i++)
15948 15950 {
15949 15951 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h;
15950 15952 pperm_bytes[2*i+1] = ((unsigned_p)
15951 15953 ? PPERM_ZERO
15952 15954 : PPERM_SIGN | PPERM_SRC2 | i | h);
15953 15955 }
15954 15956
15955 15957 for (i = 0; i < 16; i++)
15956 15958 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15957 15959
15958 15960 for (i = 0; i < 8; i++)
15959 15961 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15960 15962
15961 15963 p = gen_rtx_PARALLEL (VOIDmode, vs);
15962 15964 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15963 15965 if (unsigned_p)
15964 15966 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x));
15965 15967 else
15966 15968 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x));
15967 15969 break;
15968 15970
15969 15971 case V8HImode:
15970 15972 vs = rtvec_alloc (4);
15971 15973 h2 = (high_p) ? 4 : 0;
15972 15974 for (i = 0; i < 4; i++)
15973 15975 {
15974 15976 sign_extend = ((unsigned_p)
15975 15977 ? PPERM_ZERO
15976 15978 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h));
15977 15979 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h);
15978 15980 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h);
15979 15981 pperm_bytes[4*i+2] = sign_extend;
15980 15982 pperm_bytes[4*i+3] = sign_extend;
15981 15983 }
15982 15984
15983 15985 for (i = 0; i < 16; i++)
15984 15986 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
15985 15987
15986 15988 for (i = 0; i < 4; i++)
15987 15989 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
15988 15990
15989 15991 p = gen_rtx_PARALLEL (VOIDmode, vs);
15990 15992 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
15991 15993 if (unsigned_p)
15992 15994 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x));
15993 15995 else
15994 15996 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x));
15995 15997 break;
15996 15998
15997 15999 case V4SImode:
15998 16000 vs = rtvec_alloc (2);
15999 16001 h2 = (high_p) ? 2 : 0;
16000 16002 for (i = 0; i < 2; i++)
16001 16003 {
16002 16004 sign_extend = ((unsigned_p)
16003 16005 ? PPERM_ZERO
16004 16006 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h));
16005 16007 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h);
16006 16008 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h);
16007 16009 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h);
16008 16010 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h);
16009 16011 pperm_bytes[8*i+4] = sign_extend;
16010 16012 pperm_bytes[8*i+5] = sign_extend;
16011 16013 pperm_bytes[8*i+6] = sign_extend;
16012 16014 pperm_bytes[8*i+7] = sign_extend;
16013 16015 }
16014 16016
16015 16017 for (i = 0; i < 16; i++)
16016 16018 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16017 16019
16018 16020 for (i = 0; i < 2; i++)
16019 16021 RTVEC_ELT (vs, i) = GEN_INT (i + h2);
16020 16022
16021 16023 p = gen_rtx_PARALLEL (VOIDmode, vs);
16022 16024 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16023 16025 if (unsigned_p)
16024 16026 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x));
16025 16027 else
16026 16028 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x));
16027 16029 break;
16028 16030
16029 16031 default:
16030 16032 gcc_unreachable ();
16031 16033 }
16032 16034
16033 16035 return;
16034 16036 }
16035 16037
16036 16038 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the
16037 16039 next narrower integer vector type */
16038 16040 void
16039 16041 ix86_expand_sse5_pack (rtx operands[3])
16040 16042 {
16041 16043 enum machine_mode imode = GET_MODE (operands[0]);
16042 16044 int pperm_bytes[16];
16043 16045 int i;
16044 16046 rtvec v = rtvec_alloc (16);
16045 16047 rtx x;
16046 16048 rtx op0 = operands[0];
16047 16049 rtx op1 = operands[1];
16048 16050 rtx op2 = operands[2];
16049 16051
16050 16052 switch (imode)
16051 16053 {
16052 16054 case V16QImode:
16053 16055 for (i = 0; i < 8; i++)
16054 16056 {
16055 16057 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2);
16056 16058 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2);
16057 16059 }
16058 16060
16059 16061 for (i = 0; i < 16; i++)
16060 16062 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16061 16063
16062 16064 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16063 16065 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x));
16064 16066 break;
16065 16067
16066 16068 case V8HImode:
16067 16069 for (i = 0; i < 4; i++)
16068 16070 {
16069 16071 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0);
16070 16072 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1);
16071 16073 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0);
16072 16074 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1);
16073 16075 }
16074 16076
16075 16077 for (i = 0; i < 16; i++)
16076 16078 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16077 16079
16078 16080 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16079 16081 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x));
16080 16082 break;
16081 16083
16082 16084 case V4SImode:
16083 16085 for (i = 0; i < 2; i++)
16084 16086 {
16085 16087 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0);
16086 16088 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1);
16087 16089 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2);
16088 16090 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3);
16089 16091 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0);
16090 16092 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1);
16091 16093 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2);
16092 16094 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3);
16093 16095 }
16094 16096
16095 16097 for (i = 0; i < 16; i++)
16096 16098 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]);
16097 16099
16098 16100 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v));
16099 16101 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x));
16100 16102 break;
16101 16103
16102 16104 default:
16103 16105 gcc_unreachable ();
16104 16106 }
16105 16107
16106 16108 return;
16107 16109 }
16108 16110
16109 16111 /* Expand conditional increment or decrement using adb/sbb instructions.
16110 16112 The default case using setcc followed by the conditional move can be
16111 16113 done by generic code. */
16112 16114 int
16113 16115 ix86_expand_int_addcc (rtx operands[])
16114 16116 {
16115 16117 enum rtx_code code = GET_CODE (operands[1]);
16116 16118 rtx compare_op;
16117 16119 rtx val = const0_rtx;
16118 16120 bool fpcmp = false;
16119 16121 enum machine_mode mode = GET_MODE (operands[0]);
16120 16122
16121 16123 if (operands[3] != const1_rtx
16122 16124 && operands[3] != constm1_rtx)
16123 16125 return 0;
16124 16126 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
16125 16127 ix86_compare_op1, &compare_op))
16126 16128 return 0;
16127 16129 code = GET_CODE (compare_op);
16128 16130
16129 16131 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
16130 16132 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
16131 16133 {
16132 16134 fpcmp = true;
16133 16135 code = ix86_fp_compare_code_to_integer (code);
16134 16136 }
16135 16137
16136 16138 if (code != LTU)
16137 16139 {
16138 16140 val = constm1_rtx;
16139 16141 if (fpcmp)
16140 16142 PUT_CODE (compare_op,
16141 16143 reverse_condition_maybe_unordered
16142 16144 (GET_CODE (compare_op)));
16143 16145 else
16144 16146 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
16145 16147 }
16146 16148 PUT_MODE (compare_op, mode);
16147 16149
16148 16150 /* Construct either adc or sbb insn. */
16149 16151 if ((code == LTU) == (operands[3] == constm1_rtx))
16150 16152 {
16151 16153 switch (GET_MODE (operands[0]))
16152 16154 {
16153 16155 case QImode:
16154 16156 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
16155 16157 break;
16156 16158 case HImode:
16157 16159 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
16158 16160 break;
16159 16161 case SImode:
16160 16162 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
16161 16163 break;
16162 16164 case DImode:
16163 16165 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16164 16166 break;
16165 16167 default:
16166 16168 gcc_unreachable ();
16167 16169 }
16168 16170 }
16169 16171 else
16170 16172 {
16171 16173 switch (GET_MODE (operands[0]))
16172 16174 {
16173 16175 case QImode:
16174 16176 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
16175 16177 break;
16176 16178 case HImode:
16177 16179 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
16178 16180 break;
16179 16181 case SImode:
16180 16182 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
16181 16183 break;
16182 16184 case DImode:
16183 16185 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
16184 16186 break;
16185 16187 default:
16186 16188 gcc_unreachable ();
16187 16189 }
16188 16190 }
16189 16191 return 1; /* DONE */
16190 16192 }
16191 16193
16192 16194
16193 16195 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
16194 16196 works for floating pointer parameters and nonoffsetable memories.
16195 16197 For pushes, it returns just stack offsets; the values will be saved
16196 16198 in the right order. Maximally three parts are generated. */
16197 16199
16198 16200 static int
16199 16201 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
16200 16202 {
16201 16203 int size;
16202 16204
16203 16205 if (!TARGET_64BIT)
16204 16206 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
16205 16207 else
16206 16208 size = (GET_MODE_SIZE (mode) + 4) / 8;
16207 16209
16208 16210 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
16209 16211 gcc_assert (size >= 2 && size <= 4);
16210 16212
16211 16213 /* Optimize constant pool reference to immediates. This is used by fp
16212 16214 moves, that force all constants to memory to allow combining. */
16213 16215 if (MEM_P (operand) && MEM_READONLY_P (operand))
16214 16216 {
16215 16217 rtx tmp = maybe_get_pool_constant (operand);
16216 16218 if (tmp)
16217 16219 operand = tmp;
16218 16220 }
16219 16221
16220 16222 if (MEM_P (operand) && !offsettable_memref_p (operand))
16221 16223 {
16222 16224 /* The only non-offsetable memories we handle are pushes. */
16223 16225 int ok = push_operand (operand, VOIDmode);
16224 16226
16225 16227 gcc_assert (ok);
16226 16228
16227 16229 operand = copy_rtx (operand);
16228 16230 PUT_MODE (operand, Pmode);
16229 16231 parts[0] = parts[1] = parts[2] = parts[3] = operand;
16230 16232 return size;
16231 16233 }
16232 16234
16233 16235 if (GET_CODE (operand) == CONST_VECTOR)
16234 16236 {
16235 16237 enum machine_mode imode = int_mode_for_mode (mode);
16236 16238 /* Caution: if we looked through a constant pool memory above,
16237 16239 the operand may actually have a different mode now. That's
16238 16240 ok, since we want to pun this all the way back to an integer. */
16239 16241 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
16240 16242 gcc_assert (operand != NULL);
16241 16243 mode = imode;
16242 16244 }
16243 16245
16244 16246 if (!TARGET_64BIT)
16245 16247 {
16246 16248 if (mode == DImode)
16247 16249 split_di (&operand, 1, &parts[0], &parts[1]);
16248 16250 else
16249 16251 {
16250 16252 int i;
16251 16253
16252 16254 if (REG_P (operand))
16253 16255 {
16254 16256 gcc_assert (reload_completed);
16255 16257 for (i = 0; i < size; i++)
16256 16258 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i);
16257 16259 }
16258 16260 else if (offsettable_memref_p (operand))
16259 16261 {
16260 16262 operand = adjust_address (operand, SImode, 0);
16261 16263 parts[0] = operand;
16262 16264 for (i = 1; i < size; i++)
16263 16265 parts[i] = adjust_address (operand, SImode, 4 * i);
16264 16266 }
16265 16267 else if (GET_CODE (operand) == CONST_DOUBLE)
16266 16268 {
16267 16269 REAL_VALUE_TYPE r;
16268 16270 long l[4];
16269 16271
16270 16272 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16271 16273 switch (mode)
16272 16274 {
16273 16275 case TFmode:
16274 16276 real_to_target (l, &r, mode);
16275 16277 parts[3] = gen_int_mode (l[3], SImode);
16276 16278 parts[2] = gen_int_mode (l[2], SImode);
16277 16279 break;
16278 16280 case XFmode:
16279 16281 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
16280 16282 parts[2] = gen_int_mode (l[2], SImode);
16281 16283 break;
16282 16284 case DFmode:
16283 16285 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
16284 16286 break;
16285 16287 default:
16286 16288 gcc_unreachable ();
16287 16289 }
16288 16290 parts[1] = gen_int_mode (l[1], SImode);
16289 16291 parts[0] = gen_int_mode (l[0], SImode);
16290 16292 }
16291 16293 else
16292 16294 gcc_unreachable ();
16293 16295 }
16294 16296 }
16295 16297 else
16296 16298 {
16297 16299 if (mode == TImode)
16298 16300 split_ti (&operand, 1, &parts[0], &parts[1]);
16299 16301 if (mode == XFmode || mode == TFmode)
16300 16302 {
16301 16303 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
16302 16304 if (REG_P (operand))
16303 16305 {
16304 16306 gcc_assert (reload_completed);
16305 16307 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
16306 16308 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
16307 16309 }
16308 16310 else if (offsettable_memref_p (operand))
16309 16311 {
16310 16312 operand = adjust_address (operand, DImode, 0);
16311 16313 parts[0] = operand;
16312 16314 parts[1] = adjust_address (operand, upper_mode, 8);
16313 16315 }
16314 16316 else if (GET_CODE (operand) == CONST_DOUBLE)
16315 16317 {
16316 16318 REAL_VALUE_TYPE r;
16317 16319 long l[4];
16318 16320
16319 16321 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
16320 16322 real_to_target (l, &r, mode);
16321 16323
16322 16324 /* Do not use shift by 32 to avoid warning on 32bit systems. */
16323 16325 if (HOST_BITS_PER_WIDE_INT >= 64)
16324 16326 parts[0]
16325 16327 = gen_int_mode
16326 16328 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
16327 16329 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
16328 16330 DImode);
16329 16331 else
16330 16332 parts[0] = immed_double_const (l[0], l[1], DImode);
16331 16333
16332 16334 if (upper_mode == SImode)
16333 16335 parts[1] = gen_int_mode (l[2], SImode);
16334 16336 else if (HOST_BITS_PER_WIDE_INT >= 64)
16335 16337 parts[1]
16336 16338 = gen_int_mode
16337 16339 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
16338 16340 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
16339 16341 DImode);
16340 16342 else
16341 16343 parts[1] = immed_double_const (l[2], l[3], DImode);
16342 16344 }
16343 16345 else
16344 16346 gcc_unreachable ();
16345 16347 }
16346 16348 }
16347 16349
16348 16350 return size;
16349 16351 }
16350 16352
16351 16353 /* Emit insns to perform a move or push of DI, DF, XF, and TF values.
16352 16354 Return false when normal moves are needed; true when all required
16353 16355 insns have been emitted. Operands 2-4 contain the input values
16354 16356 int the correct order; operands 5-7 contain the output values. */
16355 16357
16356 16358 void
16357 16359 ix86_split_long_move (rtx operands[])
16358 16360 {
16359 16361 rtx part[2][4];
16360 16362 int nparts, i, j;
16361 16363 int push = 0;
16362 16364 int collisions = 0;
16363 16365 enum machine_mode mode = GET_MODE (operands[0]);
16364 16366 bool collisionparts[4];
16365 16367
16366 16368 /* The DFmode expanders may ask us to move double.
16367 16369 For 64bit target this is single move. By hiding the fact
16368 16370 here we simplify i386.md splitters. */
16369 16371 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
16370 16372 {
16371 16373 /* Optimize constant pool reference to immediates. This is used by
16372 16374 fp moves, that force all constants to memory to allow combining. */
16373 16375
16374 16376 if (MEM_P (operands[1])
16375 16377 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
16376 16378 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
16377 16379 operands[1] = get_pool_constant (XEXP (operands[1], 0));
16378 16380 if (push_operand (operands[0], VOIDmode))
16379 16381 {
16380 16382 operands[0] = copy_rtx (operands[0]);
16381 16383 PUT_MODE (operands[0], Pmode);
16382 16384 }
16383 16385 else
16384 16386 operands[0] = gen_lowpart (DImode, operands[0]);
16385 16387 operands[1] = gen_lowpart (DImode, operands[1]);
16386 16388 emit_move_insn (operands[0], operands[1]);
16387 16389 return;
16388 16390 }
16389 16391
16390 16392 /* The only non-offsettable memory we handle is push. */
16391 16393 if (push_operand (operands[0], VOIDmode))
16392 16394 push = 1;
16393 16395 else
16394 16396 gcc_assert (!MEM_P (operands[0])
16395 16397 || offsettable_memref_p (operands[0]));
16396 16398
16397 16399 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
16398 16400 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
16399 16401
16400 16402 /* When emitting push, take care for source operands on the stack. */
16401 16403 if (push && MEM_P (operands[1])
16402 16404 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
16403 16405 {
16404 16406 rtx src_base = XEXP (part[1][nparts - 1], 0);
16405 16407
16406 16408 /* Compensate for the stack decrement by 4. */
16407 16409 if (!TARGET_64BIT && nparts == 3
16408 16410 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE)
16409 16411 src_base = plus_constant (src_base, 4);
16410 16412
16411 16413 /* src_base refers to the stack pointer and is
16412 16414 automatically decreased by emitted push. */
16413 16415 for (i = 0; i < nparts; i++)
16414 16416 part[1][i] = change_address (part[1][i],
16415 16417 GET_MODE (part[1][i]), src_base);
16416 16418 }
16417 16419
16418 16420 /* We need to do copy in the right order in case an address register
16419 16421 of the source overlaps the destination. */
16420 16422 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
16421 16423 {
16422 16424 rtx tmp;
16423 16425
16424 16426 for (i = 0; i < nparts; i++)
16425 16427 {
16426 16428 collisionparts[i]
16427 16429 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0));
16428 16430 if (collisionparts[i])
16429 16431 collisions++;
16430 16432 }
16431 16433
16432 16434 /* Collision in the middle part can be handled by reordering. */
16433 16435 if (collisions == 1 && nparts == 3 && collisionparts [1])
16434 16436 {
16435 16437 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16436 16438 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16437 16439 }
16438 16440 else if (collisions == 1
16439 16441 && nparts == 4
16440 16442 && (collisionparts [1] || collisionparts [2]))
16441 16443 {
16442 16444 if (collisionparts [1])
16443 16445 {
16444 16446 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
16445 16447 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
16446 16448 }
16447 16449 else
16448 16450 {
16449 16451 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp;
16450 16452 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp;
16451 16453 }
16452 16454 }
16453 16455
16454 16456 /* If there are more collisions, we can't handle it by reordering.
16455 16457 Do an lea to the last part and use only one colliding move. */
16456 16458 else if (collisions > 1)
16457 16459 {
16458 16460 rtx base;
16459 16461
16460 16462 collisions = 1;
16461 16463
16462 16464 base = part[0][nparts - 1];
16463 16465
16464 16466 /* Handle the case when the last part isn't valid for lea.
16465 16467 Happens in 64-bit mode storing the 12-byte XFmode. */
16466 16468 if (GET_MODE (base) != Pmode)
16467 16469 base = gen_rtx_REG (Pmode, REGNO (base));
16468 16470
16469 16471 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
16470 16472 part[1][0] = replace_equiv_address (part[1][0], base);
16471 16473 for (i = 1; i < nparts; i++)
16472 16474 {
16473 16475 tmp = plus_constant (base, UNITS_PER_WORD * i);
16474 16476 part[1][i] = replace_equiv_address (part[1][i], tmp);
16475 16477 }
16476 16478 }
16477 16479 }
16478 16480
16479 16481 if (push)
16480 16482 {
16481 16483 if (!TARGET_64BIT)
16482 16484 {
16483 16485 if (nparts == 3)
16484 16486 {
16485 16487 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
16486 16488 emit_insn (gen_addsi3 (stack_pointer_rtx,
16487 16489 stack_pointer_rtx, GEN_INT (-4)));
16488 16490 emit_move_insn (part[0][2], part[1][2]);
16489 16491 }
16490 16492 else if (nparts == 4)
16491 16493 {
16492 16494 emit_move_insn (part[0][3], part[1][3]);
16493 16495 emit_move_insn (part[0][2], part[1][2]);
16494 16496 }
16495 16497 }
16496 16498 else
16497 16499 {
16498 16500 /* In 64bit mode we don't have 32bit push available. In case this is
16499 16501 register, it is OK - we will just use larger counterpart. We also
16500 16502 retype memory - these comes from attempt to avoid REX prefix on
16501 16503 moving of second half of TFmode value. */
16502 16504 if (GET_MODE (part[1][1]) == SImode)
16503 16505 {
16504 16506 switch (GET_CODE (part[1][1]))
16505 16507 {
16506 16508 case MEM:
16507 16509 part[1][1] = adjust_address (part[1][1], DImode, 0);
16508 16510 break;
16509 16511
16510 16512 case REG:
16511 16513 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
16512 16514 break;
16513 16515
16514 16516 default:
16515 16517 gcc_unreachable ();
16516 16518 }
16517 16519
16518 16520 if (GET_MODE (part[1][0]) == SImode)
16519 16521 part[1][0] = part[1][1];
16520 16522 }
16521 16523 }
16522 16524 emit_move_insn (part[0][1], part[1][1]);
16523 16525 emit_move_insn (part[0][0], part[1][0]);
16524 16526 return;
16525 16527 }
16526 16528
16527 16529 /* Choose correct order to not overwrite the source before it is copied. */
16528 16530 if ((REG_P (part[0][0])
16529 16531 && REG_P (part[1][1])
16530 16532 && (REGNO (part[0][0]) == REGNO (part[1][1])
16531 16533 || (nparts == 3
16532 16534 && REGNO (part[0][0]) == REGNO (part[1][2]))
16533 16535 || (nparts == 4
16534 16536 && REGNO (part[0][0]) == REGNO (part[1][3]))))
16535 16537 || (collisions > 0
16536 16538 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
16537 16539 {
16538 16540 for (i = 0, j = nparts - 1; i < nparts; i++, j--)
16539 16541 {
16540 16542 operands[2 + i] = part[0][j];
16541 16543 operands[6 + i] = part[1][j];
16542 16544 }
16543 16545 }
16544 16546 else
16545 16547 {
16546 16548 for (i = 0; i < nparts; i++)
16547 16549 {
16548 16550 operands[2 + i] = part[0][i];
16549 16551 operands[6 + i] = part[1][i];
16550 16552 }
16551 16553 }
16552 16554
16553 16555 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
16554 16556 if (optimize_insn_for_size_p ())
16555 16557 {
16556 16558 for (j = 0; j < nparts - 1; j++)
16557 16559 if (CONST_INT_P (operands[6 + j])
16558 16560 && operands[6 + j] != const0_rtx
16559 16561 && REG_P (operands[2 + j]))
16560 16562 for (i = j; i < nparts - 1; i++)
16561 16563 if (CONST_INT_P (operands[7 + i])
16562 16564 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j]))
16563 16565 operands[7 + i] = operands[2 + j];
16564 16566 }
16565 16567
16566 16568 for (i = 0; i < nparts; i++)
16567 16569 emit_move_insn (operands[2 + i], operands[6 + i]);
16568 16570
16569 16571 return;
16570 16572 }
16571 16573
16572 16574 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
16573 16575 left shift by a constant, either using a single shift or
16574 16576 a sequence of add instructions. */
16575 16577
16576 16578 static void
16577 16579 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
16578 16580 {
16579 16581 if (count == 1)
16580 16582 {
16581 16583 emit_insn ((mode == DImode
16582 16584 ? gen_addsi3
16583 16585 : gen_adddi3) (operand, operand, operand));
16584 16586 }
16585 16587 else if (!optimize_insn_for_size_p ()
16586 16588 && count * ix86_cost->add <= ix86_cost->shift_const)
16587 16589 {
16588 16590 int i;
16589 16591 for (i=0; i<count; i++)
16590 16592 {
16591 16593 emit_insn ((mode == DImode
16592 16594 ? gen_addsi3
16593 16595 : gen_adddi3) (operand, operand, operand));
16594 16596 }
16595 16597 }
16596 16598 else
16597 16599 emit_insn ((mode == DImode
16598 16600 ? gen_ashlsi3
16599 16601 : gen_ashldi3) (operand, operand, GEN_INT (count)));
16600 16602 }
16601 16603
16602 16604 void
16603 16605 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
16604 16606 {
16605 16607 rtx low[2], high[2];
16606 16608 int count;
16607 16609 const int single_width = mode == DImode ? 32 : 64;
16608 16610
16609 16611 if (CONST_INT_P (operands[2]))
16610 16612 {
16611 16613 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16612 16614 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16613 16615
16614 16616 if (count >= single_width)
16615 16617 {
16616 16618 emit_move_insn (high[0], low[1]);
16617 16619 emit_move_insn (low[0], const0_rtx);
16618 16620
16619 16621 if (count > single_width)
16620 16622 ix86_expand_ashl_const (high[0], count - single_width, mode);
16621 16623 }
16622 16624 else
16623 16625 {
16624 16626 if (!rtx_equal_p (operands[0], operands[1]))
16625 16627 emit_move_insn (operands[0], operands[1]);
16626 16628 emit_insn ((mode == DImode
16627 16629 ? gen_x86_shld
16628 16630 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
16629 16631 ix86_expand_ashl_const (low[0], count, mode);
16630 16632 }
16631 16633 return;
16632 16634 }
16633 16635
16634 16636 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16635 16637
16636 16638 if (operands[1] == const1_rtx)
16637 16639 {
16638 16640 /* Assuming we've chosen a QImode capable registers, then 1 << N
16639 16641 can be done with two 32/64-bit shifts, no branches, no cmoves. */
16640 16642 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
16641 16643 {
16642 16644 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
16643 16645
16644 16646 ix86_expand_clear (low[0]);
16645 16647 ix86_expand_clear (high[0]);
16646 16648 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
16647 16649
16648 16650 d = gen_lowpart (QImode, low[0]);
16649 16651 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16650 16652 s = gen_rtx_EQ (QImode, flags, const0_rtx);
16651 16653 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16652 16654
16653 16655 d = gen_lowpart (QImode, high[0]);
16654 16656 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
16655 16657 s = gen_rtx_NE (QImode, flags, const0_rtx);
16656 16658 emit_insn (gen_rtx_SET (VOIDmode, d, s));
16657 16659 }
16658 16660
16659 16661 /* Otherwise, we can get the same results by manually performing
16660 16662 a bit extract operation on bit 5/6, and then performing the two
16661 16663 shifts. The two methods of getting 0/1 into low/high are exactly
16662 16664 the same size. Avoiding the shift in the bit extract case helps
16663 16665 pentium4 a bit; no one else seems to care much either way. */
16664 16666 else
16665 16667 {
16666 16668 rtx x;
16667 16669
16668 16670 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ())
16669 16671 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
16670 16672 else
16671 16673 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
16672 16674 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
16673 16675
16674 16676 emit_insn ((mode == DImode
16675 16677 ? gen_lshrsi3
16676 16678 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
16677 16679 emit_insn ((mode == DImode
16678 16680 ? gen_andsi3
16679 16681 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
16680 16682 emit_move_insn (low[0], high[0]);
16681 16683 emit_insn ((mode == DImode
16682 16684 ? gen_xorsi3
16683 16685 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
16684 16686 }
16685 16687
16686 16688 emit_insn ((mode == DImode
16687 16689 ? gen_ashlsi3
16688 16690 : gen_ashldi3) (low[0], low[0], operands[2]));
16689 16691 emit_insn ((mode == DImode
16690 16692 ? gen_ashlsi3
16691 16693 : gen_ashldi3) (high[0], high[0], operands[2]));
16692 16694 return;
16693 16695 }
16694 16696
16695 16697 if (operands[1] == constm1_rtx)
16696 16698 {
16697 16699 /* For -1 << N, we can avoid the shld instruction, because we
16698 16700 know that we're shifting 0...31/63 ones into a -1. */
16699 16701 emit_move_insn (low[0], constm1_rtx);
16700 16702 if (optimize_insn_for_size_p ())
16701 16703 emit_move_insn (high[0], low[0]);
16702 16704 else
16703 16705 emit_move_insn (high[0], constm1_rtx);
16704 16706 }
16705 16707 else
16706 16708 {
16707 16709 if (!rtx_equal_p (operands[0], operands[1]))
16708 16710 emit_move_insn (operands[0], operands[1]);
16709 16711
16710 16712 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16711 16713 emit_insn ((mode == DImode
16712 16714 ? gen_x86_shld
16713 16715 : gen_x86_64_shld) (high[0], low[0], operands[2]));
16714 16716 }
16715 16717
16716 16718 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
16717 16719
16718 16720 if (TARGET_CMOVE && scratch)
16719 16721 {
16720 16722 ix86_expand_clear (scratch);
16721 16723 emit_insn ((mode == DImode
16722 16724 ? gen_x86_shift_adj_1
16723 16725 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2],
16724 16726 scratch));
16725 16727 }
16726 16728 else
16727 16729 emit_insn ((mode == DImode
16728 16730 ? gen_x86_shift_adj_2
16729 16731 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2]));
16730 16732 }
16731 16733
16732 16734 void
16733 16735 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
16734 16736 {
16735 16737 rtx low[2], high[2];
16736 16738 int count;
16737 16739 const int single_width = mode == DImode ? 32 : 64;
16738 16740
16739 16741 if (CONST_INT_P (operands[2]))
16740 16742 {
16741 16743 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16742 16744 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16743 16745
16744 16746 if (count == single_width * 2 - 1)
16745 16747 {
16746 16748 emit_move_insn (high[0], high[1]);
16747 16749 emit_insn ((mode == DImode
16748 16750 ? gen_ashrsi3
16749 16751 : gen_ashrdi3) (high[0], high[0],
16750 16752 GEN_INT (single_width - 1)));
16751 16753 emit_move_insn (low[0], high[0]);
16752 16754
16753 16755 }
16754 16756 else if (count >= single_width)
16755 16757 {
16756 16758 emit_move_insn (low[0], high[1]);
16757 16759 emit_move_insn (high[0], low[0]);
16758 16760 emit_insn ((mode == DImode
16759 16761 ? gen_ashrsi3
16760 16762 : gen_ashrdi3) (high[0], high[0],
16761 16763 GEN_INT (single_width - 1)));
16762 16764 if (count > single_width)
16763 16765 emit_insn ((mode == DImode
16764 16766 ? gen_ashrsi3
16765 16767 : gen_ashrdi3) (low[0], low[0],
16766 16768 GEN_INT (count - single_width)));
16767 16769 }
16768 16770 else
16769 16771 {
16770 16772 if (!rtx_equal_p (operands[0], operands[1]))
16771 16773 emit_move_insn (operands[0], operands[1]);
16772 16774 emit_insn ((mode == DImode
16773 16775 ? gen_x86_shrd
16774 16776 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16775 16777 emit_insn ((mode == DImode
16776 16778 ? gen_ashrsi3
16777 16779 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
16778 16780 }
16779 16781 }
16780 16782 else
16781 16783 {
16782 16784 if (!rtx_equal_p (operands[0], operands[1]))
16783 16785 emit_move_insn (operands[0], operands[1]);
16784 16786
16785 16787 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16786 16788
16787 16789 emit_insn ((mode == DImode
16788 16790 ? gen_x86_shrd
16789 16791 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16790 16792 emit_insn ((mode == DImode
16791 16793 ? gen_ashrsi3
16792 16794 : gen_ashrdi3) (high[0], high[0], operands[2]));
16793 16795
16794 16796 if (TARGET_CMOVE && scratch)
16795 16797 {
16796 16798 emit_move_insn (scratch, high[0]);
16797 16799 emit_insn ((mode == DImode
16798 16800 ? gen_ashrsi3
16799 16801 : gen_ashrdi3) (scratch, scratch,
16800 16802 GEN_INT (single_width - 1)));
16801 16803 emit_insn ((mode == DImode
16802 16804 ? gen_x86_shift_adj_1
16803 16805 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16804 16806 scratch));
16805 16807 }
16806 16808 else
16807 16809 emit_insn ((mode == DImode
16808 16810 ? gen_x86_shift_adj_3
16809 16811 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2]));
16810 16812 }
16811 16813 }
16812 16814
16813 16815 void
16814 16816 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
16815 16817 {
16816 16818 rtx low[2], high[2];
16817 16819 int count;
16818 16820 const int single_width = mode == DImode ? 32 : 64;
16819 16821
16820 16822 if (CONST_INT_P (operands[2]))
16821 16823 {
16822 16824 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
16823 16825 count = INTVAL (operands[2]) & (single_width * 2 - 1);
16824 16826
16825 16827 if (count >= single_width)
16826 16828 {
16827 16829 emit_move_insn (low[0], high[1]);
16828 16830 ix86_expand_clear (high[0]);
16829 16831
16830 16832 if (count > single_width)
16831 16833 emit_insn ((mode == DImode
16832 16834 ? gen_lshrsi3
16833 16835 : gen_lshrdi3) (low[0], low[0],
16834 16836 GEN_INT (count - single_width)));
16835 16837 }
16836 16838 else
16837 16839 {
16838 16840 if (!rtx_equal_p (operands[0], operands[1]))
16839 16841 emit_move_insn (operands[0], operands[1]);
16840 16842 emit_insn ((mode == DImode
16841 16843 ? gen_x86_shrd
16842 16844 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
16843 16845 emit_insn ((mode == DImode
16844 16846 ? gen_lshrsi3
16845 16847 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
16846 16848 }
16847 16849 }
16848 16850 else
16849 16851 {
16850 16852 if (!rtx_equal_p (operands[0], operands[1]))
16851 16853 emit_move_insn (operands[0], operands[1]);
16852 16854
16853 16855 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
16854 16856
16855 16857 emit_insn ((mode == DImode
16856 16858 ? gen_x86_shrd
16857 16859 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
16858 16860 emit_insn ((mode == DImode
16859 16861 ? gen_lshrsi3
16860 16862 : gen_lshrdi3) (high[0], high[0], operands[2]));
16861 16863
16862 16864 /* Heh. By reversing the arguments, we can reuse this pattern. */
16863 16865 if (TARGET_CMOVE && scratch)
16864 16866 {
16865 16867 ix86_expand_clear (scratch);
16866 16868 emit_insn ((mode == DImode
16867 16869 ? gen_x86_shift_adj_1
16868 16870 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2],
16869 16871 scratch));
16870 16872 }
16871 16873 else
16872 16874 emit_insn ((mode == DImode
16873 16875 ? gen_x86_shift_adj_2
16874 16876 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2]));
16875 16877 }
16876 16878 }
16877 16879
16878 16880 /* Predict just emitted jump instruction to be taken with probability PROB. */
16879 16881 static void
16880 16882 predict_jump (int prob)
16881 16883 {
16882 16884 rtx insn = get_last_insn ();
16883 16885 gcc_assert (JUMP_P (insn));
16884 16886 REG_NOTES (insn)
16885 16887 = gen_rtx_EXPR_LIST (REG_BR_PROB,
16886 16888 GEN_INT (prob),
16887 16889 REG_NOTES (insn));
16888 16890 }
16889 16891
16890 16892 /* Helper function for the string operations below. Dest VARIABLE whether
16891 16893 it is aligned to VALUE bytes. If true, jump to the label. */
16892 16894 static rtx
16893 16895 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
16894 16896 {
16895 16897 rtx label = gen_label_rtx ();
16896 16898 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
16897 16899 if (GET_MODE (variable) == DImode)
16898 16900 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
16899 16901 else
16900 16902 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
16901 16903 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
16902 16904 1, label);
16903 16905 if (epilogue)
16904 16906 predict_jump (REG_BR_PROB_BASE * 50 / 100);
16905 16907 else
16906 16908 predict_jump (REG_BR_PROB_BASE * 90 / 100);
16907 16909 return label;
16908 16910 }
16909 16911
16910 16912 /* Adjust COUNTER by the VALUE. */
16911 16913 static void
16912 16914 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
16913 16915 {
16914 16916 if (GET_MODE (countreg) == DImode)
16915 16917 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
16916 16918 else
16917 16919 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
16918 16920 }
16919 16921
16920 16922 /* Zero extend possibly SImode EXP to Pmode register. */
16921 16923 rtx
16922 16924 ix86_zero_extend_to_Pmode (rtx exp)
16923 16925 {
16924 16926 rtx r;
16925 16927 if (GET_MODE (exp) == VOIDmode)
16926 16928 return force_reg (Pmode, exp);
16927 16929 if (GET_MODE (exp) == Pmode)
16928 16930 return copy_to_mode_reg (Pmode, exp);
16929 16931 r = gen_reg_rtx (Pmode);
16930 16932 emit_insn (gen_zero_extendsidi2 (r, exp));
16931 16933 return r;
16932 16934 }
16933 16935
16934 16936 /* Divide COUNTREG by SCALE. */
16935 16937 static rtx
16936 16938 scale_counter (rtx countreg, int scale)
16937 16939 {
16938 16940 rtx sc;
16939 16941 rtx piece_size_mask;
16940 16942
16941 16943 if (scale == 1)
16942 16944 return countreg;
16943 16945 if (CONST_INT_P (countreg))
16944 16946 return GEN_INT (INTVAL (countreg) / scale);
16945 16947 gcc_assert (REG_P (countreg));
16946 16948
16947 16949 piece_size_mask = GEN_INT (scale - 1);
16948 16950 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
16949 16951 GEN_INT (exact_log2 (scale)),
16950 16952 NULL, 1, OPTAB_DIRECT);
16951 16953 return sc;
16952 16954 }
16953 16955
16954 16956 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
16955 16957 DImode for constant loop counts. */
16956 16958
16957 16959 static enum machine_mode
16958 16960 counter_mode (rtx count_exp)
16959 16961 {
16960 16962 if (GET_MODE (count_exp) != VOIDmode)
16961 16963 return GET_MODE (count_exp);
16962 16964 if (GET_CODE (count_exp) != CONST_INT)
16963 16965 return Pmode;
16964 16966 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
16965 16967 return DImode;
16966 16968 return SImode;
16967 16969 }
16968 16970
16969 16971 /* When SRCPTR is non-NULL, output simple loop to move memory
16970 16972 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
16971 16973 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
16972 16974 equivalent loop to set memory by VALUE (supposed to be in MODE).
16973 16975
16974 16976 The size is rounded down to whole number of chunk size moved at once.
16975 16977 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
16976 16978
16977 16979
16978 16980 static void
16979 16981 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
16980 16982 rtx destptr, rtx srcptr, rtx value,
16981 16983 rtx count, enum machine_mode mode, int unroll,
16982 16984 int expected_size)
16983 16985 {
16984 16986 rtx out_label, top_label, iter, tmp;
16985 16987 enum machine_mode iter_mode = counter_mode (count);
16986 16988 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
16987 16989 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
16988 16990 rtx size;
16989 16991 rtx x_addr;
16990 16992 rtx y_addr;
16991 16993 int i;
16992 16994
16993 16995 top_label = gen_label_rtx ();
16994 16996 out_label = gen_label_rtx ();
16995 16997 iter = gen_reg_rtx (iter_mode);
16996 16998
16997 16999 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
16998 17000 NULL, 1, OPTAB_DIRECT);
16999 17001 /* Those two should combine. */
17000 17002 if (piece_size == const1_rtx)
17001 17003 {
17002 17004 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
17003 17005 true, out_label);
17004 17006 predict_jump (REG_BR_PROB_BASE * 10 / 100);
17005 17007 }
17006 17008 emit_move_insn (iter, const0_rtx);
17007 17009
17008 17010 emit_label (top_label);
17009 17011
17010 17012 tmp = convert_modes (Pmode, iter_mode, iter, true);
17011 17013 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
17012 17014 destmem = change_address (destmem, mode, x_addr);
17013 17015
17014 17016 if (srcmem)
17015 17017 {
17016 17018 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
17017 17019 srcmem = change_address (srcmem, mode, y_addr);
17018 17020
17019 17021 /* When unrolling for chips that reorder memory reads and writes,
17020 17022 we can save registers by using single temporary.
17021 17023 Also using 4 temporaries is overkill in 32bit mode. */
17022 17024 if (!TARGET_64BIT && 0)
17023 17025 {
17024 17026 for (i = 0; i < unroll; i++)
17025 17027 {
17026 17028 if (i)
17027 17029 {
17028 17030 destmem =
17029 17031 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17030 17032 srcmem =
17031 17033 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17032 17034 }
17033 17035 emit_move_insn (destmem, srcmem);
17034 17036 }
17035 17037 }
17036 17038 else
17037 17039 {
17038 17040 rtx tmpreg[4];
17039 17041 gcc_assert (unroll <= 4);
17040 17042 for (i = 0; i < unroll; i++)
17041 17043 {
17042 17044 tmpreg[i] = gen_reg_rtx (mode);
17043 17045 if (i)
17044 17046 {
17045 17047 srcmem =
17046 17048 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
17047 17049 }
17048 17050 emit_move_insn (tmpreg[i], srcmem);
17049 17051 }
17050 17052 for (i = 0; i < unroll; i++)
17051 17053 {
17052 17054 if (i)
17053 17055 {
17054 17056 destmem =
17055 17057 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17056 17058 }
17057 17059 emit_move_insn (destmem, tmpreg[i]);
17058 17060 }
17059 17061 }
17060 17062 }
17061 17063 else
17062 17064 for (i = 0; i < unroll; i++)
17063 17065 {
17064 17066 if (i)
17065 17067 destmem =
17066 17068 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
17067 17069 emit_move_insn (destmem, value);
17068 17070 }
17069 17071
17070 17072 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
17071 17073 true, OPTAB_LIB_WIDEN);
17072 17074 if (tmp != iter)
17073 17075 emit_move_insn (iter, tmp);
17074 17076
17075 17077 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
17076 17078 true, top_label);
17077 17079 if (expected_size != -1)
17078 17080 {
17079 17081 expected_size /= GET_MODE_SIZE (mode) * unroll;
17080 17082 if (expected_size == 0)
17081 17083 predict_jump (0);
17082 17084 else if (expected_size > REG_BR_PROB_BASE)
17083 17085 predict_jump (REG_BR_PROB_BASE - 1);
17084 17086 else
17085 17087 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
17086 17088 }
17087 17089 else
17088 17090 predict_jump (REG_BR_PROB_BASE * 80 / 100);
17089 17091 iter = ix86_zero_extend_to_Pmode (iter);
17090 17092 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
17091 17093 true, OPTAB_LIB_WIDEN);
17092 17094 if (tmp != destptr)
17093 17095 emit_move_insn (destptr, tmp);
17094 17096 if (srcptr)
17095 17097 {
17096 17098 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
17097 17099 true, OPTAB_LIB_WIDEN);
17098 17100 if (tmp != srcptr)
17099 17101 emit_move_insn (srcptr, tmp);
17100 17102 }
17101 17103 emit_label (out_label);
17102 17104 }
17103 17105
17104 17106 /* Output "rep; mov" instruction.
17105 17107 Arguments have same meaning as for previous function */
17106 17108 static void
17107 17109 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
17108 17110 rtx destptr, rtx srcptr,
17109 17111 rtx count,
17110 17112 enum machine_mode mode)
17111 17113 {
17112 17114 rtx destexp;
17113 17115 rtx srcexp;
17114 17116 rtx countreg;
17115 17117
17116 17118 /* If the size is known, it is shorter to use rep movs. */
17117 17119 if (mode == QImode && CONST_INT_P (count)
17118 17120 && !(INTVAL (count) & 3))
17119 17121 mode = SImode;
17120 17122
17121 17123 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17122 17124 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17123 17125 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
17124 17126 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
17125 17127 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17126 17128 if (mode != QImode)
17127 17129 {
17128 17130 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17129 17131 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17130 17132 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17131 17133 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
17132 17134 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17133 17135 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
17134 17136 }
17135 17137 else
17136 17138 {
17137 17139 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17138 17140 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
17139 17141 }
17140 17142 if (CONST_INT_P (count))
17141 17143 {
17142 17144 count = GEN_INT (INTVAL (count)
17143 17145 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17144 17146 destmem = shallow_copy_rtx (destmem);
17145 17147 srcmem = shallow_copy_rtx (srcmem);
17146 17148 set_mem_size (destmem, count);
17147 17149 set_mem_size (srcmem, count);
17148 17150 }
17149 17151 else
17150 17152 {
17151 17153 if (MEM_SIZE (destmem))
17152 17154 set_mem_size (destmem, NULL_RTX);
17153 17155 if (MEM_SIZE (srcmem))
17154 17156 set_mem_size (srcmem, NULL_RTX);
17155 17157 }
17156 17158 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
17157 17159 destexp, srcexp));
17158 17160 }
17159 17161
17160 17162 /* Output "rep; stos" instruction.
17161 17163 Arguments have same meaning as for previous function */
17162 17164 static void
17163 17165 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
17164 17166 rtx count, enum machine_mode mode,
17165 17167 rtx orig_value)
17166 17168 {
17167 17169 rtx destexp;
17168 17170 rtx countreg;
17169 17171
17170 17172 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
17171 17173 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
17172 17174 value = force_reg (mode, gen_lowpart (mode, value));
17173 17175 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
17174 17176 if (mode != QImode)
17175 17177 {
17176 17178 destexp = gen_rtx_ASHIFT (Pmode, countreg,
17177 17179 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
17178 17180 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
17179 17181 }
17180 17182 else
17181 17183 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
17182 17184 if (orig_value == const0_rtx && CONST_INT_P (count))
17183 17185 {
17184 17186 count = GEN_INT (INTVAL (count)
17185 17187 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1));
17186 17188 destmem = shallow_copy_rtx (destmem);
17187 17189 set_mem_size (destmem, count);
17188 17190 }
17189 17191 else if (MEM_SIZE (destmem))
17190 17192 set_mem_size (destmem, NULL_RTX);
17191 17193 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
17192 17194 }
17193 17195
17194 17196 static void
17195 17197 emit_strmov (rtx destmem, rtx srcmem,
17196 17198 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
17197 17199 {
17198 17200 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
17199 17201 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
17200 17202 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17201 17203 }
17202 17204
17203 17205 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
17204 17206 static void
17205 17207 expand_movmem_epilogue (rtx destmem, rtx srcmem,
17206 17208 rtx destptr, rtx srcptr, rtx count, int max_size)
17207 17209 {
17208 17210 rtx src, dest;
17209 17211 if (CONST_INT_P (count))
17210 17212 {
17211 17213 HOST_WIDE_INT countval = INTVAL (count);
17212 17214 int offset = 0;
17213 17215
17214 17216 if ((countval & 0x10) && max_size > 16)
17215 17217 {
17216 17218 if (TARGET_64BIT)
17217 17219 {
17218 17220 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17219 17221 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
17220 17222 }
17221 17223 else
17222 17224 gcc_unreachable ();
17223 17225 offset += 16;
17224 17226 }
17225 17227 if ((countval & 0x08) && max_size > 8)
17226 17228 {
17227 17229 if (TARGET_64BIT)
17228 17230 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
17229 17231 else
17230 17232 {
17231 17233 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17232 17234 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
17233 17235 }
17234 17236 offset += 8;
17235 17237 }
17236 17238 if ((countval & 0x04) && max_size > 4)
17237 17239 {
17238 17240 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
17239 17241 offset += 4;
17240 17242 }
17241 17243 if ((countval & 0x02) && max_size > 2)
17242 17244 {
17243 17245 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
17244 17246 offset += 2;
17245 17247 }
17246 17248 if ((countval & 0x01) && max_size > 1)
17247 17249 {
17248 17250 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
17249 17251 offset += 1;
17250 17252 }
17251 17253 return;
17252 17254 }
17253 17255 if (max_size > 8)
17254 17256 {
17255 17257 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
17256 17258 count, 1, OPTAB_DIRECT);
17257 17259 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
17258 17260 count, QImode, 1, 4);
17259 17261 return;
17260 17262 }
17261 17263
17262 17264 /* When there are stringops, we can cheaply increase dest and src pointers.
17263 17265 Otherwise we save code size by maintaining offset (zero is readily
17264 17266 available from preceding rep operation) and using x86 addressing modes.
17265 17267 */
17266 17268 if (TARGET_SINGLE_STRINGOP)
17267 17269 {
17268 17270 if (max_size > 4)
17269 17271 {
17270 17272 rtx label = ix86_expand_aligntest (count, 4, true);
17271 17273 src = change_address (srcmem, SImode, srcptr);
17272 17274 dest = change_address (destmem, SImode, destptr);
17273 17275 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17274 17276 emit_label (label);
17275 17277 LABEL_NUSES (label) = 1;
17276 17278 }
17277 17279 if (max_size > 2)
17278 17280 {
17279 17281 rtx label = ix86_expand_aligntest (count, 2, true);
17280 17282 src = change_address (srcmem, HImode, srcptr);
17281 17283 dest = change_address (destmem, HImode, destptr);
17282 17284 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17283 17285 emit_label (label);
17284 17286 LABEL_NUSES (label) = 1;
17285 17287 }
17286 17288 if (max_size > 1)
17287 17289 {
17288 17290 rtx label = ix86_expand_aligntest (count, 1, true);
17289 17291 src = change_address (srcmem, QImode, srcptr);
17290 17292 dest = change_address (destmem, QImode, destptr);
17291 17293 emit_insn (gen_strmov (destptr, dest, srcptr, src));
17292 17294 emit_label (label);
17293 17295 LABEL_NUSES (label) = 1;
17294 17296 }
17295 17297 }
17296 17298 else
17297 17299 {
17298 17300 rtx offset = force_reg (Pmode, const0_rtx);
17299 17301 rtx tmp;
17300 17302
17301 17303 if (max_size > 4)
17302 17304 {
17303 17305 rtx label = ix86_expand_aligntest (count, 4, true);
17304 17306 src = change_address (srcmem, SImode, srcptr);
17305 17307 dest = change_address (destmem, SImode, destptr);
17306 17308 emit_move_insn (dest, src);
17307 17309 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
17308 17310 true, OPTAB_LIB_WIDEN);
17309 17311 if (tmp != offset)
17310 17312 emit_move_insn (offset, tmp);
17311 17313 emit_label (label);
17312 17314 LABEL_NUSES (label) = 1;
17313 17315 }
17314 17316 if (max_size > 2)
17315 17317 {
17316 17318 rtx label = ix86_expand_aligntest (count, 2, true);
17317 17319 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17318 17320 src = change_address (srcmem, HImode, tmp);
17319 17321 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17320 17322 dest = change_address (destmem, HImode, tmp);
17321 17323 emit_move_insn (dest, src);
17322 17324 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
17323 17325 true, OPTAB_LIB_WIDEN);
17324 17326 if (tmp != offset)
17325 17327 emit_move_insn (offset, tmp);
17326 17328 emit_label (label);
17327 17329 LABEL_NUSES (label) = 1;
17328 17330 }
17329 17331 if (max_size > 1)
17330 17332 {
17331 17333 rtx label = ix86_expand_aligntest (count, 1, true);
17332 17334 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
17333 17335 src = change_address (srcmem, QImode, tmp);
17334 17336 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
17335 17337 dest = change_address (destmem, QImode, tmp);
17336 17338 emit_move_insn (dest, src);
17337 17339 emit_label (label);
17338 17340 LABEL_NUSES (label) = 1;
17339 17341 }
17340 17342 }
17341 17343 }
17342 17344
17343 17345 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17344 17346 static void
17345 17347 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
17346 17348 rtx count, int max_size)
17347 17349 {
17348 17350 count =
17349 17351 expand_simple_binop (counter_mode (count), AND, count,
17350 17352 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
17351 17353 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
17352 17354 gen_lowpart (QImode, value), count, QImode,
17353 17355 1, max_size / 2);
17354 17356 }
17355 17357
17356 17358 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
17357 17359 static void
17358 17360 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
17359 17361 {
17360 17362 rtx dest;
17361 17363
17362 17364 if (CONST_INT_P (count))
17363 17365 {
17364 17366 HOST_WIDE_INT countval = INTVAL (count);
17365 17367 int offset = 0;
17366 17368
17367 17369 if ((countval & 0x10) && max_size > 16)
17368 17370 {
17369 17371 if (TARGET_64BIT)
17370 17372 {
17371 17373 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17372 17374 emit_insn (gen_strset (destptr, dest, value));
17373 17375 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
17374 17376 emit_insn (gen_strset (destptr, dest, value));
17375 17377 }
17376 17378 else
17377 17379 gcc_unreachable ();
17378 17380 offset += 16;
17379 17381 }
17380 17382 if ((countval & 0x08) && max_size > 8)
17381 17383 {
17382 17384 if (TARGET_64BIT)
17383 17385 {
17384 17386 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
17385 17387 emit_insn (gen_strset (destptr, dest, value));
17386 17388 }
17387 17389 else
17388 17390 {
17389 17391 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17390 17392 emit_insn (gen_strset (destptr, dest, value));
17391 17393 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
17392 17394 emit_insn (gen_strset (destptr, dest, value));
17393 17395 }
17394 17396 offset += 8;
17395 17397 }
17396 17398 if ((countval & 0x04) && max_size > 4)
17397 17399 {
17398 17400 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
17399 17401 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17400 17402 offset += 4;
17401 17403 }
17402 17404 if ((countval & 0x02) && max_size > 2)
17403 17405 {
17404 17406 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
17405 17407 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17406 17408 offset += 2;
17407 17409 }
17408 17410 if ((countval & 0x01) && max_size > 1)
17409 17411 {
17410 17412 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
17411 17413 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17412 17414 offset += 1;
17413 17415 }
17414 17416 return;
17415 17417 }
17416 17418 if (max_size > 32)
17417 17419 {
17418 17420 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
17419 17421 return;
17420 17422 }
17421 17423 if (max_size > 16)
17422 17424 {
17423 17425 rtx label = ix86_expand_aligntest (count, 16, true);
17424 17426 if (TARGET_64BIT)
17425 17427 {
17426 17428 dest = change_address (destmem, DImode, destptr);
17427 17429 emit_insn (gen_strset (destptr, dest, value));
17428 17430 emit_insn (gen_strset (destptr, dest, value));
17429 17431 }
17430 17432 else
17431 17433 {
17432 17434 dest = change_address (destmem, SImode, destptr);
17433 17435 emit_insn (gen_strset (destptr, dest, value));
17434 17436 emit_insn (gen_strset (destptr, dest, value));
17435 17437 emit_insn (gen_strset (destptr, dest, value));
17436 17438 emit_insn (gen_strset (destptr, dest, value));
17437 17439 }
17438 17440 emit_label (label);
17439 17441 LABEL_NUSES (label) = 1;
17440 17442 }
17441 17443 if (max_size > 8)
17442 17444 {
17443 17445 rtx label = ix86_expand_aligntest (count, 8, true);
17444 17446 if (TARGET_64BIT)
17445 17447 {
17446 17448 dest = change_address (destmem, DImode, destptr);
17447 17449 emit_insn (gen_strset (destptr, dest, value));
17448 17450 }
17449 17451 else
17450 17452 {
17451 17453 dest = change_address (destmem, SImode, destptr);
17452 17454 emit_insn (gen_strset (destptr, dest, value));
17453 17455 emit_insn (gen_strset (destptr, dest, value));
17454 17456 }
17455 17457 emit_label (label);
17456 17458 LABEL_NUSES (label) = 1;
17457 17459 }
17458 17460 if (max_size > 4)
17459 17461 {
17460 17462 rtx label = ix86_expand_aligntest (count, 4, true);
17461 17463 dest = change_address (destmem, SImode, destptr);
17462 17464 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
17463 17465 emit_label (label);
17464 17466 LABEL_NUSES (label) = 1;
17465 17467 }
17466 17468 if (max_size > 2)
17467 17469 {
17468 17470 rtx label = ix86_expand_aligntest (count, 2, true);
17469 17471 dest = change_address (destmem, HImode, destptr);
17470 17472 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
17471 17473 emit_label (label);
17472 17474 LABEL_NUSES (label) = 1;
17473 17475 }
17474 17476 if (max_size > 1)
17475 17477 {
17476 17478 rtx label = ix86_expand_aligntest (count, 1, true);
17477 17479 dest = change_address (destmem, QImode, destptr);
17478 17480 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
17479 17481 emit_label (label);
17480 17482 LABEL_NUSES (label) = 1;
17481 17483 }
17482 17484 }
17483 17485
17484 17486 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
17485 17487 DESIRED_ALIGNMENT. */
17486 17488 static void
17487 17489 expand_movmem_prologue (rtx destmem, rtx srcmem,
17488 17490 rtx destptr, rtx srcptr, rtx count,
17489 17491 int align, int desired_alignment)
17490 17492 {
17491 17493 if (align <= 1 && desired_alignment > 1)
17492 17494 {
17493 17495 rtx label = ix86_expand_aligntest (destptr, 1, false);
17494 17496 srcmem = change_address (srcmem, QImode, srcptr);
17495 17497 destmem = change_address (destmem, QImode, destptr);
17496 17498 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17497 17499 ix86_adjust_counter (count, 1);
17498 17500 emit_label (label);
17499 17501 LABEL_NUSES (label) = 1;
17500 17502 }
17501 17503 if (align <= 2 && desired_alignment > 2)
17502 17504 {
17503 17505 rtx label = ix86_expand_aligntest (destptr, 2, false);
17504 17506 srcmem = change_address (srcmem, HImode, srcptr);
17505 17507 destmem = change_address (destmem, HImode, destptr);
17506 17508 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17507 17509 ix86_adjust_counter (count, 2);
17508 17510 emit_label (label);
17509 17511 LABEL_NUSES (label) = 1;
17510 17512 }
17511 17513 if (align <= 4 && desired_alignment > 4)
17512 17514 {
17513 17515 rtx label = ix86_expand_aligntest (destptr, 4, false);
17514 17516 srcmem = change_address (srcmem, SImode, srcptr);
17515 17517 destmem = change_address (destmem, SImode, destptr);
17516 17518 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
17517 17519 ix86_adjust_counter (count, 4);
17518 17520 emit_label (label);
17519 17521 LABEL_NUSES (label) = 1;
17520 17522 }
17521 17523 gcc_assert (desired_alignment <= 8);
17522 17524 }
17523 17525
17524 17526 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN.
17525 17527 ALIGN_BYTES is how many bytes need to be copied. */
17526 17528 static rtx
17527 17529 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg,
17528 17530 int desired_align, int align_bytes)
17529 17531 {
17530 17532 rtx src = *srcp;
17531 17533 rtx src_size, dst_size;
17532 17534 int off = 0;
17533 17535 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT);
17534 17536 if (src_align_bytes >= 0)
17535 17537 src_align_bytes = desired_align - src_align_bytes;
17536 17538 src_size = MEM_SIZE (src);
17537 17539 dst_size = MEM_SIZE (dst);
17538 17540 if (align_bytes & 1)
17539 17541 {
17540 17542 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17541 17543 src = adjust_automodify_address_nv (src, QImode, srcreg, 0);
17542 17544 off = 1;
17543 17545 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17544 17546 }
17545 17547 if (align_bytes & 2)
17546 17548 {
17547 17549 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17548 17550 src = adjust_automodify_address_nv (src, HImode, srcreg, off);
17549 17551 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17550 17552 set_mem_align (dst, 2 * BITS_PER_UNIT);
17551 17553 if (src_align_bytes >= 0
17552 17554 && (src_align_bytes & 1) == (align_bytes & 1)
17553 17555 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT)
17554 17556 set_mem_align (src, 2 * BITS_PER_UNIT);
17555 17557 off = 2;
17556 17558 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17557 17559 }
17558 17560 if (align_bytes & 4)
17559 17561 {
17560 17562 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17561 17563 src = adjust_automodify_address_nv (src, SImode, srcreg, off);
17562 17564 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17563 17565 set_mem_align (dst, 4 * BITS_PER_UNIT);
17564 17566 if (src_align_bytes >= 0)
17565 17567 {
17566 17568 unsigned int src_align = 0;
17567 17569 if ((src_align_bytes & 3) == (align_bytes & 3))
17568 17570 src_align = 4;
17569 17571 else if ((src_align_bytes & 1) == (align_bytes & 1))
17570 17572 src_align = 2;
17571 17573 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17572 17574 set_mem_align (src, src_align * BITS_PER_UNIT);
17573 17575 }
17574 17576 off = 4;
17575 17577 emit_insn (gen_strmov (destreg, dst, srcreg, src));
17576 17578 }
17577 17579 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17578 17580 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off);
17579 17581 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17580 17582 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17581 17583 if (src_align_bytes >= 0)
17582 17584 {
17583 17585 unsigned int src_align = 0;
17584 17586 if ((src_align_bytes & 7) == (align_bytes & 7))
17585 17587 src_align = 8;
17586 17588 else if ((src_align_bytes & 3) == (align_bytes & 3))
17587 17589 src_align = 4;
17588 17590 else if ((src_align_bytes & 1) == (align_bytes & 1))
17589 17591 src_align = 2;
17590 17592 if (src_align > (unsigned int) desired_align)
17591 17593 src_align = desired_align;
17592 17594 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT)
17593 17595 set_mem_align (src, src_align * BITS_PER_UNIT);
17594 17596 }
17595 17597 if (dst_size)
17596 17598 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17597 17599 if (src_size)
17598 17600 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes));
17599 17601 *srcp = src;
17600 17602 return dst;
17601 17603 }
17602 17604
17603 17605 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
17604 17606 DESIRED_ALIGNMENT. */
17605 17607 static void
17606 17608 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
17607 17609 int align, int desired_alignment)
17608 17610 {
17609 17611 if (align <= 1 && desired_alignment > 1)
17610 17612 {
17611 17613 rtx label = ix86_expand_aligntest (destptr, 1, false);
17612 17614 destmem = change_address (destmem, QImode, destptr);
17613 17615 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
17614 17616 ix86_adjust_counter (count, 1);
17615 17617 emit_label (label);
17616 17618 LABEL_NUSES (label) = 1;
17617 17619 }
17618 17620 if (align <= 2 && desired_alignment > 2)
17619 17621 {
17620 17622 rtx label = ix86_expand_aligntest (destptr, 2, false);
17621 17623 destmem = change_address (destmem, HImode, destptr);
17622 17624 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
17623 17625 ix86_adjust_counter (count, 2);
17624 17626 emit_label (label);
17625 17627 LABEL_NUSES (label) = 1;
17626 17628 }
17627 17629 if (align <= 4 && desired_alignment > 4)
17628 17630 {
17629 17631 rtx label = ix86_expand_aligntest (destptr, 4, false);
17630 17632 destmem = change_address (destmem, SImode, destptr);
17631 17633 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
17632 17634 ix86_adjust_counter (count, 4);
17633 17635 emit_label (label);
17634 17636 LABEL_NUSES (label) = 1;
17635 17637 }
17636 17638 gcc_assert (desired_alignment <= 8);
17637 17639 }
17638 17640
17639 17641 /* Set enough from DST to align DST known to by aligned by ALIGN to
17640 17642 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */
17641 17643 static rtx
17642 17644 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value,
17643 17645 int desired_align, int align_bytes)
17644 17646 {
17645 17647 int off = 0;
17646 17648 rtx dst_size = MEM_SIZE (dst);
17647 17649 if (align_bytes & 1)
17648 17650 {
17649 17651 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0);
17650 17652 off = 1;
17651 17653 emit_insn (gen_strset (destreg, dst,
17652 17654 gen_lowpart (QImode, value)));
17653 17655 }
17654 17656 if (align_bytes & 2)
17655 17657 {
17656 17658 dst = adjust_automodify_address_nv (dst, HImode, destreg, off);
17657 17659 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT)
17658 17660 set_mem_align (dst, 2 * BITS_PER_UNIT);
17659 17661 off = 2;
17660 17662 emit_insn (gen_strset (destreg, dst,
17661 17663 gen_lowpart (HImode, value)));
17662 17664 }
17663 17665 if (align_bytes & 4)
17664 17666 {
17665 17667 dst = adjust_automodify_address_nv (dst, SImode, destreg, off);
17666 17668 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT)
17667 17669 set_mem_align (dst, 4 * BITS_PER_UNIT);
17668 17670 off = 4;
17669 17671 emit_insn (gen_strset (destreg, dst,
17670 17672 gen_lowpart (SImode, value)));
17671 17673 }
17672 17674 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off);
17673 17675 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT)
17674 17676 set_mem_align (dst, desired_align * BITS_PER_UNIT);
17675 17677 if (dst_size)
17676 17678 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes));
17677 17679 return dst;
17678 17680 }
17679 17681
17680 17682 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
17681 17683 static enum stringop_alg
17682 17684 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
17683 17685 int *dynamic_check)
17684 17686 {
17685 17687 const struct stringop_algs * algs;
17686 17688 bool optimize_for_speed;
17687 17689 /* Algorithms using the rep prefix want at least edi and ecx;
17688 17690 additionally, memset wants eax and memcpy wants esi. Don't
17689 17691 consider such algorithms if the user has appropriated those
17690 17692 registers for their own purposes. */
17691 17693 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG]
17692 17694 || (memset
17693 17695 ? fixed_regs[AX_REG] : fixed_regs[SI_REG]));
17694 17696
17695 17697 #define ALG_USABLE_P(alg) (rep_prefix_usable \
17696 17698 || (alg != rep_prefix_1_byte \
17697 17699 && alg != rep_prefix_4_byte \
17698 17700 && alg != rep_prefix_8_byte))
17699 17701 const struct processor_costs *cost;
17700 17702
17701 17703 /* Even if the string operation call is cold, we still might spend a lot
17702 17704 of time processing large blocks. */
17703 17705 if (optimize_function_for_size_p (cfun)
17704 17706 || (optimize_insn_for_size_p ()
17705 17707 && expected_size != -1 && expected_size < 256))
17706 17708 optimize_for_speed = false;
17707 17709 else
17708 17710 optimize_for_speed = true;
17709 17711
17710 17712 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost;
17711 17713
17712 17714 *dynamic_check = -1;
17713 17715 if (memset)
17714 17716 algs = &cost->memset[TARGET_64BIT != 0];
17715 17717 else
17716 17718 algs = &cost->memcpy[TARGET_64BIT != 0];
17717 17719 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg))
17718 17720 return stringop_alg;
17719 17721 /* rep; movq or rep; movl is the smallest variant. */
17720 17722 else if (!optimize_for_speed)
17721 17723 {
17722 17724 if (!count || (count & 3))
17723 17725 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte;
17724 17726 else
17725 17727 return rep_prefix_usable ? rep_prefix_4_byte : loop;
17726 17728 }
17727 17729 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
17728 17730 */
17729 17731 else if (expected_size != -1 && expected_size < 4)
17730 17732 return loop_1_byte;
17731 17733 else if (expected_size != -1)
17732 17734 {
17733 17735 unsigned int i;
17734 17736 enum stringop_alg alg = libcall;
17735 17737 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17736 17738 {
17737 17739 /* We get here if the algorithms that were not libcall-based
17738 17740 were rep-prefix based and we are unable to use rep prefixes
17739 17741 based on global register usage. Break out of the loop and
17740 17742 use the heuristic below. */
17741 17743 if (algs->size[i].max == 0)
17742 17744 break;
17743 17745 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
17744 17746 {
17745 17747 enum stringop_alg candidate = algs->size[i].alg;
17746 17748
17747 17749 if (candidate != libcall && ALG_USABLE_P (candidate))
17748 17750 alg = candidate;
17749 17751 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
17750 17752 last non-libcall inline algorithm. */
17751 17753 if (TARGET_INLINE_ALL_STRINGOPS)
17752 17754 {
17753 17755 /* When the current size is best to be copied by a libcall,
17754 17756 but we are still forced to inline, run the heuristic below
17755 17757 that will pick code for medium sized blocks. */
17756 17758 if (alg != libcall)
17757 17759 return alg;
17758 17760 break;
17759 17761 }
17760 17762 else if (ALG_USABLE_P (candidate))
17761 17763 return candidate;
17762 17764 }
17763 17765 }
17764 17766 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable);
17765 17767 }
17766 17768 /* When asked to inline the call anyway, try to pick meaningful choice.
17767 17769 We look for maximal size of block that is faster to copy by hand and
17768 17770 take blocks of at most of that size guessing that average size will
17769 17771 be roughly half of the block.
17770 17772
17771 17773 If this turns out to be bad, we might simply specify the preferred
17772 17774 choice in ix86_costs. */
17773 17775 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17774 17776 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size)))
17775 17777 {
17776 17778 int max = -1;
17777 17779 enum stringop_alg alg;
17778 17780 int i;
17779 17781 bool any_alg_usable_p = true;
17780 17782
17781 17783 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
17782 17784 {
17783 17785 enum stringop_alg candidate = algs->size[i].alg;
17784 17786 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate);
17785 17787
17786 17788 if (candidate != libcall && candidate
17787 17789 && ALG_USABLE_P (candidate))
17788 17790 max = algs->size[i].max;
17789 17791 }
17790 17792 /* If there aren't any usable algorithms, then recursing on
17791 17793 smaller sizes isn't going to find anything. Just return the
17792 17794 simple byte-at-a-time copy loop. */
17793 17795 if (!any_alg_usable_p)
17794 17796 {
17795 17797 /* Pick something reasonable. */
17796 17798 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17797 17799 *dynamic_check = 128;
17798 17800 return loop_1_byte;
17799 17801 }
17800 17802 if (max == -1)
17801 17803 max = 4096;
17802 17804 alg = decide_alg (count, max / 2, memset, dynamic_check);
17803 17805 gcc_assert (*dynamic_check == -1);
17804 17806 gcc_assert (alg != libcall);
17805 17807 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
17806 17808 *dynamic_check = max;
17807 17809 return alg;
17808 17810 }
17809 17811 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall;
17810 17812 #undef ALG_USABLE_P
17811 17813 }
17812 17814
17813 17815 /* Decide on alignment. We know that the operand is already aligned to ALIGN
17814 17816 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
17815 17817 static int
17816 17818 decide_alignment (int align,
17817 17819 enum stringop_alg alg,
17818 17820 int expected_size)
17819 17821 {
17820 17822 int desired_align = 0;
17821 17823 switch (alg)
17822 17824 {
17823 17825 case no_stringop:
17824 17826 gcc_unreachable ();
17825 17827 case loop:
17826 17828 case unrolled_loop:
17827 17829 desired_align = GET_MODE_SIZE (Pmode);
17828 17830 break;
17829 17831 case rep_prefix_8_byte:
17830 17832 desired_align = 8;
17831 17833 break;
17832 17834 case rep_prefix_4_byte:
17833 17835 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17834 17836 copying whole cacheline at once. */
17835 17837 if (TARGET_PENTIUMPRO)
17836 17838 desired_align = 8;
17837 17839 else
17838 17840 desired_align = 4;
17839 17841 break;
17840 17842 case rep_prefix_1_byte:
17841 17843 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
17842 17844 copying whole cacheline at once. */
17843 17845 if (TARGET_PENTIUMPRO)
17844 17846 desired_align = 8;
17845 17847 else
17846 17848 desired_align = 1;
17847 17849 break;
17848 17850 case loop_1_byte:
17849 17851 desired_align = 1;
17850 17852 break;
17851 17853 case libcall:
17852 17854 return 0;
17853 17855 }
17854 17856
17855 17857 if (optimize_size)
17856 17858 desired_align = 1;
17857 17859 if (desired_align < align)
17858 17860 desired_align = align;
17859 17861 if (expected_size != -1 && expected_size < 4)
17860 17862 desired_align = align;
17861 17863 return desired_align;
17862 17864 }
17863 17865
17864 17866 /* Return the smallest power of 2 greater than VAL. */
17865 17867 static int
17866 17868 smallest_pow2_greater_than (int val)
17867 17869 {
17868 17870 int ret = 1;
17869 17871 while (ret <= val)
17870 17872 ret <<= 1;
17871 17873 return ret;
17872 17874 }
17873 17875
17874 17876 /* Expand string move (memcpy) operation. Use i386 string operations when
17875 17877 profitable. expand_setmem contains similar code. The code depends upon
17876 17878 architecture, block size and alignment, but always has the same
17877 17879 overall structure:
17878 17880
17879 17881 1) Prologue guard: Conditional that jumps up to epilogues for small
17880 17882 blocks that can be handled by epilogue alone. This is faster but
17881 17883 also needed for correctness, since prologue assume the block is larger
17882 17884 than the desired alignment.
17883 17885
17884 17886 Optional dynamic check for size and libcall for large
17885 17887 blocks is emitted here too, with -minline-stringops-dynamically.
17886 17888
17887 17889 2) Prologue: copy first few bytes in order to get destination aligned
17888 17890 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
17889 17891 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
17890 17892 We emit either a jump tree on power of two sized blocks, or a byte loop.
17891 17893
17892 17894 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
17893 17895 with specified algorithm.
17894 17896
17895 17897 4) Epilogue: code copying tail of the block that is too small to be
17896 17898 handled by main body (or up to size guarded by prologue guard). */
17897 17899
17898 17900 int
17899 17901 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
17900 17902 rtx expected_align_exp, rtx expected_size_exp)
17901 17903 {
17902 17904 rtx destreg;
17903 17905 rtx srcreg;
17904 17906 rtx label = NULL;
17905 17907 rtx tmp;
17906 17908 rtx jump_around_label = NULL;
17907 17909 HOST_WIDE_INT align = 1;
17908 17910 unsigned HOST_WIDE_INT count = 0;
17909 17911 HOST_WIDE_INT expected_size = -1;
17910 17912 int size_needed = 0, epilogue_size_needed;
17911 17913 int desired_align = 0, align_bytes = 0;
17912 17914 enum stringop_alg alg;
17913 17915 int dynamic_check;
17914 17916 bool need_zero_guard = false;
17915 17917
17916 17918 if (CONST_INT_P (align_exp))
17917 17919 align = INTVAL (align_exp);
17918 17920 /* i386 can do misaligned access on reasonably increased cost. */
17919 17921 if (CONST_INT_P (expected_align_exp)
17920 17922 && INTVAL (expected_align_exp) > align)
17921 17923 align = INTVAL (expected_align_exp);
17922 17924 /* ALIGN is the minimum of destination and source alignment, but we care here
17923 17925 just about destination alignment. */
17924 17926 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT)
17925 17927 align = MEM_ALIGN (dst) / BITS_PER_UNIT;
17926 17928
17927 17929 if (CONST_INT_P (count_exp))
17928 17930 count = expected_size = INTVAL (count_exp);
17929 17931 if (CONST_INT_P (expected_size_exp) && count == 0)
17930 17932 expected_size = INTVAL (expected_size_exp);
17931 17933
17932 17934 /* Make sure we don't need to care about overflow later on. */
17933 17935 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
17934 17936 return 0;
17935 17937
17936 17938 /* Step 0: Decide on preferred algorithm, desired alignment and
17937 17939 size of chunks to be copied by main loop. */
17938 17940
17939 17941 alg = decide_alg (count, expected_size, false, &dynamic_check);
17940 17942 desired_align = decide_alignment (align, alg, expected_size);
17941 17943
17942 17944 if (!TARGET_ALIGN_STRINGOPS)
17943 17945 align = desired_align;
17944 17946
17945 17947 if (alg == libcall)
17946 17948 return 0;
17947 17949 gcc_assert (alg != no_stringop);
17948 17950 if (!count)
17949 17951 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
17950 17952 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
17951 17953 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
17952 17954 switch (alg)
17953 17955 {
17954 17956 case libcall:
17955 17957 case no_stringop:
17956 17958 gcc_unreachable ();
17957 17959 case loop:
17958 17960 need_zero_guard = true;
17959 17961 size_needed = GET_MODE_SIZE (Pmode);
17960 17962 break;
17961 17963 case unrolled_loop:
17962 17964 need_zero_guard = true;
17963 17965 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
17964 17966 break;
17965 17967 case rep_prefix_8_byte:
17966 17968 size_needed = 8;
17967 17969 break;
17968 17970 case rep_prefix_4_byte:
17969 17971 size_needed = 4;
17970 17972 break;
17971 17973 case rep_prefix_1_byte:
17972 17974 size_needed = 1;
17973 17975 break;
17974 17976 case loop_1_byte:
17975 17977 need_zero_guard = true;
17976 17978 size_needed = 1;
17977 17979 break;
17978 17980 }
17979 17981
17980 17982 epilogue_size_needed = size_needed;
17981 17983
17982 17984 /* Step 1: Prologue guard. */
17983 17985
17984 17986 /* Alignment code needs count to be in register. */
17985 17987 if (CONST_INT_P (count_exp) && desired_align > align)
17986 17988 {
17987 17989 if (INTVAL (count_exp) > desired_align
17988 17990 && INTVAL (count_exp) > size_needed)
17989 17991 {
17990 17992 align_bytes
17991 17993 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
17992 17994 if (align_bytes <= 0)
17993 17995 align_bytes = 0;
17994 17996 else
17995 17997 align_bytes = desired_align - align_bytes;
17996 17998 }
17997 17999 if (align_bytes == 0)
17998 18000 count_exp = force_reg (counter_mode (count_exp), count_exp);
17999 18001 }
18000 18002 gcc_assert (desired_align >= 1 && align >= 1);
18001 18003
18002 18004 /* Ensure that alignment prologue won't copy past end of block. */
18003 18005 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18004 18006 {
18005 18007 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18006 18008 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
18007 18009 Make sure it is power of 2. */
18008 18010 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18009 18011
18010 18012 if (count)
18011 18013 {
18012 18014 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18013 18015 {
18014 18016 /* If main algorithm works on QImode, no epilogue is needed.
18015 18017 For small sizes just don't align anything. */
18016 18018 if (size_needed == 1)
18017 18019 desired_align = align;
18018 18020 else
18019 18021 goto epilogue;
18020 18022 }
18021 18023 }
18022 18024 else
18023 18025 {
18024 18026 label = gen_label_rtx ();
18025 18027 emit_cmp_and_jump_insns (count_exp,
18026 18028 GEN_INT (epilogue_size_needed),
18027 18029 LTU, 0, counter_mode (count_exp), 1, label);
18028 18030 if (expected_size == -1 || expected_size < epilogue_size_needed)
18029 18031 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18030 18032 else
18031 18033 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18032 18034 }
18033 18035 }
18034 18036
18035 18037 /* Emit code to decide on runtime whether library call or inline should be
18036 18038 used. */
18037 18039 if (dynamic_check != -1)
18038 18040 {
18039 18041 if (CONST_INT_P (count_exp))
18040 18042 {
18041 18043 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check)
18042 18044 {
18043 18045 emit_block_move_via_libcall (dst, src, count_exp, false);
18044 18046 count_exp = const0_rtx;
18045 18047 goto epilogue;
18046 18048 }
18047 18049 }
18048 18050 else
18049 18051 {
18050 18052 rtx hot_label = gen_label_rtx ();
18051 18053 jump_around_label = gen_label_rtx ();
18052 18054 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18053 18055 LEU, 0, GET_MODE (count_exp), 1, hot_label);
18054 18056 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18055 18057 emit_block_move_via_libcall (dst, src, count_exp, false);
18056 18058 emit_jump (jump_around_label);
18057 18059 emit_label (hot_label);
18058 18060 }
18059 18061 }
18060 18062
18061 18063 /* Step 2: Alignment prologue. */
18062 18064
18063 18065 if (desired_align > align)
18064 18066 {
18065 18067 if (align_bytes == 0)
18066 18068 {
18067 18069 /* Except for the first move in epilogue, we no longer know
18068 18070 constant offset in aliasing info. It don't seems to worth
18069 18071 the pain to maintain it for the first move, so throw away
18070 18072 the info early. */
18071 18073 src = change_address (src, BLKmode, srcreg);
18072 18074 dst = change_address (dst, BLKmode, destreg);
18073 18075 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
18074 18076 desired_align);
18075 18077 }
18076 18078 else
18077 18079 {
18078 18080 /* If we know how many bytes need to be stored before dst is
18079 18081 sufficiently aligned, maintain aliasing info accurately. */
18080 18082 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg,
18081 18083 desired_align, align_bytes);
18082 18084 count_exp = plus_constant (count_exp, -align_bytes);
18083 18085 count -= align_bytes;
18084 18086 }
18085 18087 if (need_zero_guard
18086 18088 && (count < (unsigned HOST_WIDE_INT) size_needed
18087 18089 || (align_bytes == 0
18088 18090 && count < ((unsigned HOST_WIDE_INT) size_needed
18089 18091 + desired_align - align))))
18090 18092 {
18091 18093 /* It is possible that we copied enough so the main loop will not
18092 18094 execute. */
18093 18095 gcc_assert (size_needed > 1);
18094 18096 if (label == NULL_RTX)
18095 18097 label = gen_label_rtx ();
18096 18098 emit_cmp_and_jump_insns (count_exp,
18097 18099 GEN_INT (size_needed),
18098 18100 LTU, 0, counter_mode (count_exp), 1, label);
18099 18101 if (expected_size == -1
18100 18102 || expected_size < (desired_align - align) / 2 + size_needed)
18101 18103 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18102 18104 else
18103 18105 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18104 18106 }
18105 18107 }
18106 18108 if (label && size_needed == 1)
18107 18109 {
18108 18110 emit_label (label);
18109 18111 LABEL_NUSES (label) = 1;
18110 18112 label = NULL;
18111 18113 epilogue_size_needed = 1;
18112 18114 }
18113 18115 else if (label == NULL_RTX)
18114 18116 epilogue_size_needed = size_needed;
18115 18117
18116 18118 /* Step 3: Main loop. */
18117 18119
18118 18120 switch (alg)
18119 18121 {
18120 18122 case libcall:
18121 18123 case no_stringop:
18122 18124 gcc_unreachable ();
18123 18125 case loop_1_byte:
18124 18126 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18125 18127 count_exp, QImode, 1, expected_size);
18126 18128 break;
18127 18129 case loop:
18128 18130 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18129 18131 count_exp, Pmode, 1, expected_size);
18130 18132 break;
18131 18133 case unrolled_loop:
18132 18134 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
18133 18135 registers for 4 temporaries anyway. */
18134 18136 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
18135 18137 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
18136 18138 expected_size);
18137 18139 break;
18138 18140 case rep_prefix_8_byte:
18139 18141 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18140 18142 DImode);
18141 18143 break;
18142 18144 case rep_prefix_4_byte:
18143 18145 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18144 18146 SImode);
18145 18147 break;
18146 18148 case rep_prefix_1_byte:
18147 18149 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
18148 18150 QImode);
18149 18151 break;
18150 18152 }
18151 18153 /* Adjust properly the offset of src and dest memory for aliasing. */
18152 18154 if (CONST_INT_P (count_exp))
18153 18155 {
18154 18156 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
18155 18157 (count / size_needed) * size_needed);
18156 18158 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18157 18159 (count / size_needed) * size_needed);
18158 18160 }
18159 18161 else
18160 18162 {
18161 18163 src = change_address (src, BLKmode, srcreg);
18162 18164 dst = change_address (dst, BLKmode, destreg);
18163 18165 }
18164 18166
18165 18167 /* Step 4: Epilogue to copy the remaining bytes. */
18166 18168 epilogue:
18167 18169 if (label)
18168 18170 {
18169 18171 /* When the main loop is done, COUNT_EXP might hold original count,
18170 18172 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18171 18173 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18172 18174 bytes. Compensate if needed. */
18173 18175
18174 18176 if (size_needed < epilogue_size_needed)
18175 18177 {
18176 18178 tmp =
18177 18179 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18178 18180 GEN_INT (size_needed - 1), count_exp, 1,
18179 18181 OPTAB_DIRECT);
18180 18182 if (tmp != count_exp)
18181 18183 emit_move_insn (count_exp, tmp);
18182 18184 }
18183 18185 emit_label (label);
18184 18186 LABEL_NUSES (label) = 1;
18185 18187 }
18186 18188
18187 18189 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18188 18190 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
18189 18191 epilogue_size_needed);
18190 18192 if (jump_around_label)
18191 18193 emit_label (jump_around_label);
18192 18194 return 1;
18193 18195 }
18194 18196
18195 18197 /* Helper function for memcpy. For QImode value 0xXY produce
18196 18198 0xXYXYXYXY of wide specified by MODE. This is essentially
18197 18199 a * 0x10101010, but we can do slightly better than
18198 18200 synth_mult by unwinding the sequence by hand on CPUs with
18199 18201 slow multiply. */
18200 18202 static rtx
18201 18203 promote_duplicated_reg (enum machine_mode mode, rtx val)
18202 18204 {
18203 18205 enum machine_mode valmode = GET_MODE (val);
18204 18206 rtx tmp;
18205 18207 int nops = mode == DImode ? 3 : 2;
18206 18208
18207 18209 gcc_assert (mode == SImode || mode == DImode);
18208 18210 if (val == const0_rtx)
18209 18211 return copy_to_mode_reg (mode, const0_rtx);
18210 18212 if (CONST_INT_P (val))
18211 18213 {
18212 18214 HOST_WIDE_INT v = INTVAL (val) & 255;
18213 18215
18214 18216 v |= v << 8;
18215 18217 v |= v << 16;
18216 18218 if (mode == DImode)
18217 18219 v |= (v << 16) << 16;
18218 18220 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
18219 18221 }
18220 18222
18221 18223 if (valmode == VOIDmode)
18222 18224 valmode = QImode;
18223 18225 if (valmode != QImode)
18224 18226 val = gen_lowpart (QImode, val);
18225 18227 if (mode == QImode)
18226 18228 return val;
18227 18229 if (!TARGET_PARTIAL_REG_STALL)
18228 18230 nops--;
18229 18231 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
18230 18232 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
18231 18233 <= (ix86_cost->shift_const + ix86_cost->add) * nops
18232 18234 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
18233 18235 {
18234 18236 rtx reg = convert_modes (mode, QImode, val, true);
18235 18237 tmp = promote_duplicated_reg (mode, const1_rtx);
18236 18238 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
18237 18239 OPTAB_DIRECT);
18238 18240 }
18239 18241 else
18240 18242 {
18241 18243 rtx reg = convert_modes (mode, QImode, val, true);
18242 18244
18243 18245 if (!TARGET_PARTIAL_REG_STALL)
18244 18246 if (mode == SImode)
18245 18247 emit_insn (gen_movsi_insv_1 (reg, reg));
18246 18248 else
18247 18249 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
18248 18250 else
18249 18251 {
18250 18252 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
18251 18253 NULL, 1, OPTAB_DIRECT);
18252 18254 reg =
18253 18255 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18254 18256 }
18255 18257 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
18256 18258 NULL, 1, OPTAB_DIRECT);
18257 18259 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18258 18260 if (mode == SImode)
18259 18261 return reg;
18260 18262 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
18261 18263 NULL, 1, OPTAB_DIRECT);
18262 18264 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
18263 18265 return reg;
18264 18266 }
18265 18267 }
18266 18268
18267 18269 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
18268 18270 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
18269 18271 alignment from ALIGN to DESIRED_ALIGN. */
18270 18272 static rtx
18271 18273 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
18272 18274 {
18273 18275 rtx promoted_val;
18274 18276
18275 18277 if (TARGET_64BIT
18276 18278 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
18277 18279 promoted_val = promote_duplicated_reg (DImode, val);
18278 18280 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
18279 18281 promoted_val = promote_duplicated_reg (SImode, val);
18280 18282 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
18281 18283 promoted_val = promote_duplicated_reg (HImode, val);
18282 18284 else
18283 18285 promoted_val = val;
18284 18286
18285 18287 return promoted_val;
18286 18288 }
18287 18289
18288 18290 /* Expand string clear operation (bzero). Use i386 string operations when
18289 18291 profitable. See expand_movmem comment for explanation of individual
18290 18292 steps performed. */
18291 18293 int
18292 18294 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
18293 18295 rtx expected_align_exp, rtx expected_size_exp)
18294 18296 {
18295 18297 rtx destreg;
18296 18298 rtx label = NULL;
18297 18299 rtx tmp;
18298 18300 rtx jump_around_label = NULL;
18299 18301 HOST_WIDE_INT align = 1;
18300 18302 unsigned HOST_WIDE_INT count = 0;
18301 18303 HOST_WIDE_INT expected_size = -1;
18302 18304 int size_needed = 0, epilogue_size_needed;
18303 18305 int desired_align = 0, align_bytes = 0;
18304 18306 enum stringop_alg alg;
18305 18307 rtx promoted_val = NULL;
18306 18308 bool force_loopy_epilogue = false;
18307 18309 int dynamic_check;
18308 18310 bool need_zero_guard = false;
18309 18311
18310 18312 if (CONST_INT_P (align_exp))
18311 18313 align = INTVAL (align_exp);
18312 18314 /* i386 can do misaligned access on reasonably increased cost. */
18313 18315 if (CONST_INT_P (expected_align_exp)
18314 18316 && INTVAL (expected_align_exp) > align)
18315 18317 align = INTVAL (expected_align_exp);
18316 18318 if (CONST_INT_P (count_exp))
18317 18319 count = expected_size = INTVAL (count_exp);
18318 18320 if (CONST_INT_P (expected_size_exp) && count == 0)
18319 18321 expected_size = INTVAL (expected_size_exp);
18320 18322
18321 18323 /* Make sure we don't need to care about overflow later on. */
18322 18324 if (count > ((unsigned HOST_WIDE_INT) 1 << 30))
18323 18325 return 0;
18324 18326
18325 18327 /* Step 0: Decide on preferred algorithm, desired alignment and
18326 18328 size of chunks to be copied by main loop. */
18327 18329
18328 18330 alg = decide_alg (count, expected_size, true, &dynamic_check);
18329 18331 desired_align = decide_alignment (align, alg, expected_size);
18330 18332
18331 18333 if (!TARGET_ALIGN_STRINGOPS)
18332 18334 align = desired_align;
18333 18335
18334 18336 if (alg == libcall)
18335 18337 return 0;
18336 18338 gcc_assert (alg != no_stringop);
18337 18339 if (!count)
18338 18340 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
18339 18341 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
18340 18342 switch (alg)
18341 18343 {
18342 18344 case libcall:
18343 18345 case no_stringop:
18344 18346 gcc_unreachable ();
18345 18347 case loop:
18346 18348 need_zero_guard = true;
18347 18349 size_needed = GET_MODE_SIZE (Pmode);
18348 18350 break;
18349 18351 case unrolled_loop:
18350 18352 need_zero_guard = true;
18351 18353 size_needed = GET_MODE_SIZE (Pmode) * 4;
18352 18354 break;
18353 18355 case rep_prefix_8_byte:
18354 18356 size_needed = 8;
18355 18357 break;
18356 18358 case rep_prefix_4_byte:
18357 18359 size_needed = 4;
18358 18360 break;
18359 18361 case rep_prefix_1_byte:
18360 18362 size_needed = 1;
18361 18363 break;
18362 18364 case loop_1_byte:
18363 18365 need_zero_guard = true;
18364 18366 size_needed = 1;
18365 18367 break;
18366 18368 }
18367 18369 epilogue_size_needed = size_needed;
18368 18370
18369 18371 /* Step 1: Prologue guard. */
18370 18372
18371 18373 /* Alignment code needs count to be in register. */
18372 18374 if (CONST_INT_P (count_exp) && desired_align > align)
18373 18375 {
18374 18376 if (INTVAL (count_exp) > desired_align
18375 18377 && INTVAL (count_exp) > size_needed)
18376 18378 {
18377 18379 align_bytes
18378 18380 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT);
18379 18381 if (align_bytes <= 0)
18380 18382 align_bytes = 0;
18381 18383 else
18382 18384 align_bytes = desired_align - align_bytes;
18383 18385 }
18384 18386 if (align_bytes == 0)
18385 18387 {
18386 18388 enum machine_mode mode = SImode;
18387 18389 if (TARGET_64BIT && (count & ~0xffffffff))
18388 18390 mode = DImode;
18389 18391 count_exp = force_reg (mode, count_exp);
18390 18392 }
18391 18393 }
18392 18394 /* Do the cheap promotion to allow better CSE across the
18393 18395 main loop and epilogue (ie one load of the big constant in the
18394 18396 front of all code. */
18395 18397 if (CONST_INT_P (val_exp))
18396 18398 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18397 18399 desired_align, align);
18398 18400 /* Ensure that alignment prologue won't copy past end of block. */
18399 18401 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
18400 18402 {
18401 18403 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
18402 18404 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes.
18403 18405 Make sure it is power of 2. */
18404 18406 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
18405 18407
18406 18408 /* To improve performance of small blocks, we jump around the VAL
18407 18409 promoting mode. This mean that if the promoted VAL is not constant,
18408 18410 we might not use it in the epilogue and have to use byte
18409 18411 loop variant. */
18410 18412 if (epilogue_size_needed > 2 && !promoted_val)
18411 18413 force_loopy_epilogue = true;
18412 18414 if (count)
18413 18415 {
18414 18416 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed)
18415 18417 {
18416 18418 /* If main algorithm works on QImode, no epilogue is needed.
18417 18419 For small sizes just don't align anything. */
18418 18420 if (size_needed == 1)
18419 18421 desired_align = align;
18420 18422 else
18421 18423 goto epilogue;
18422 18424 }
18423 18425 }
18424 18426 else
18425 18427 {
18426 18428 label = gen_label_rtx ();
18427 18429 emit_cmp_and_jump_insns (count_exp,
18428 18430 GEN_INT (epilogue_size_needed),
18429 18431 LTU, 0, counter_mode (count_exp), 1, label);
18430 18432 if (expected_size == -1 || expected_size <= epilogue_size_needed)
18431 18433 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18432 18434 else
18433 18435 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18434 18436 }
18435 18437 }
18436 18438 if (dynamic_check != -1)
18437 18439 {
18438 18440 rtx hot_label = gen_label_rtx ();
18439 18441 jump_around_label = gen_label_rtx ();
18440 18442 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
18441 18443 LEU, 0, counter_mode (count_exp), 1, hot_label);
18442 18444 predict_jump (REG_BR_PROB_BASE * 90 / 100);
18443 18445 set_storage_via_libcall (dst, count_exp, val_exp, false);
18444 18446 emit_jump (jump_around_label);
18445 18447 emit_label (hot_label);
18446 18448 }
18447 18449
18448 18450 /* Step 2: Alignment prologue. */
18449 18451
18450 18452 /* Do the expensive promotion once we branched off the small blocks. */
18451 18453 if (!promoted_val)
18452 18454 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
18453 18455 desired_align, align);
18454 18456 gcc_assert (desired_align >= 1 && align >= 1);
18455 18457
18456 18458 if (desired_align > align)
18457 18459 {
18458 18460 if (align_bytes == 0)
18459 18461 {
18460 18462 /* Except for the first move in epilogue, we no longer know
18461 18463 constant offset in aliasing info. It don't seems to worth
18462 18464 the pain to maintain it for the first move, so throw away
18463 18465 the info early. */
18464 18466 dst = change_address (dst, BLKmode, destreg);
18465 18467 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
18466 18468 desired_align);
18467 18469 }
18468 18470 else
18469 18471 {
18470 18472 /* If we know how many bytes need to be stored before dst is
18471 18473 sufficiently aligned, maintain aliasing info accurately. */
18472 18474 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val,
18473 18475 desired_align, align_bytes);
18474 18476 count_exp = plus_constant (count_exp, -align_bytes);
18475 18477 count -= align_bytes;
18476 18478 }
18477 18479 if (need_zero_guard
18478 18480 && (count < (unsigned HOST_WIDE_INT) size_needed
18479 18481 || (align_bytes == 0
18480 18482 && count < ((unsigned HOST_WIDE_INT) size_needed
18481 18483 + desired_align - align))))
18482 18484 {
18483 18485 /* It is possible that we copied enough so the main loop will not
18484 18486 execute. */
18485 18487 gcc_assert (size_needed > 1);
18486 18488 if (label == NULL_RTX)
18487 18489 label = gen_label_rtx ();
18488 18490 emit_cmp_and_jump_insns (count_exp,
18489 18491 GEN_INT (size_needed),
18490 18492 LTU, 0, counter_mode (count_exp), 1, label);
18491 18493 if (expected_size == -1
18492 18494 || expected_size < (desired_align - align) / 2 + size_needed)
18493 18495 predict_jump (REG_BR_PROB_BASE * 20 / 100);
18494 18496 else
18495 18497 predict_jump (REG_BR_PROB_BASE * 60 / 100);
18496 18498 }
18497 18499 }
18498 18500 if (label && size_needed == 1)
18499 18501 {
18500 18502 emit_label (label);
18501 18503 LABEL_NUSES (label) = 1;
18502 18504 label = NULL;
18503 18505 promoted_val = val_exp;
18504 18506 epilogue_size_needed = 1;
18505 18507 }
18506 18508 else if (label == NULL_RTX)
18507 18509 epilogue_size_needed = size_needed;
18508 18510
18509 18511 /* Step 3: Main loop. */
18510 18512
18511 18513 switch (alg)
18512 18514 {
18513 18515 case libcall:
18514 18516 case no_stringop:
18515 18517 gcc_unreachable ();
18516 18518 case loop_1_byte:
18517 18519 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18518 18520 count_exp, QImode, 1, expected_size);
18519 18521 break;
18520 18522 case loop:
18521 18523 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18522 18524 count_exp, Pmode, 1, expected_size);
18523 18525 break;
18524 18526 case unrolled_loop:
18525 18527 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
18526 18528 count_exp, Pmode, 4, expected_size);
18527 18529 break;
18528 18530 case rep_prefix_8_byte:
18529 18531 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18530 18532 DImode, val_exp);
18531 18533 break;
18532 18534 case rep_prefix_4_byte:
18533 18535 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18534 18536 SImode, val_exp);
18535 18537 break;
18536 18538 case rep_prefix_1_byte:
18537 18539 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
18538 18540 QImode, val_exp);
18539 18541 break;
18540 18542 }
18541 18543 /* Adjust properly the offset of src and dest memory for aliasing. */
18542 18544 if (CONST_INT_P (count_exp))
18543 18545 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
18544 18546 (count / size_needed) * size_needed);
18545 18547 else
18546 18548 dst = change_address (dst, BLKmode, destreg);
18547 18549
18548 18550 /* Step 4: Epilogue to copy the remaining bytes. */
18549 18551
18550 18552 if (label)
18551 18553 {
18552 18554 /* When the main loop is done, COUNT_EXP might hold original count,
18553 18555 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
18554 18556 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
18555 18557 bytes. Compensate if needed. */
18556 18558
18557 18559 if (size_needed < epilogue_size_needed)
18558 18560 {
18559 18561 tmp =
18560 18562 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
18561 18563 GEN_INT (size_needed - 1), count_exp, 1,
18562 18564 OPTAB_DIRECT);
18563 18565 if (tmp != count_exp)
18564 18566 emit_move_insn (count_exp, tmp);
18565 18567 }
18566 18568 emit_label (label);
18567 18569 LABEL_NUSES (label) = 1;
18568 18570 }
18569 18571 epilogue:
18570 18572 if (count_exp != const0_rtx && epilogue_size_needed > 1)
18571 18573 {
18572 18574 if (force_loopy_epilogue)
18573 18575 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
18574 18576 epilogue_size_needed);
18575 18577 else
18576 18578 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
18577 18579 epilogue_size_needed);
18578 18580 }
18579 18581 if (jump_around_label)
18580 18582 emit_label (jump_around_label);
18581 18583 return 1;
18582 18584 }
18583 18585
18584 18586 /* Expand the appropriate insns for doing strlen if not just doing
18585 18587 repnz; scasb
18586 18588
18587 18589 out = result, initialized with the start address
18588 18590 align_rtx = alignment of the address.
18589 18591 scratch = scratch register, initialized with the startaddress when
18590 18592 not aligned, otherwise undefined
18591 18593
18592 18594 This is just the body. It needs the initializations mentioned above and
18593 18595 some address computing at the end. These things are done in i386.md. */
18594 18596
18595 18597 static void
18596 18598 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
18597 18599 {
18598 18600 int align;
18599 18601 rtx tmp;
18600 18602 rtx align_2_label = NULL_RTX;
18601 18603 rtx align_3_label = NULL_RTX;
18602 18604 rtx align_4_label = gen_label_rtx ();
18603 18605 rtx end_0_label = gen_label_rtx ();
18604 18606 rtx mem;
18605 18607 rtx tmpreg = gen_reg_rtx (SImode);
18606 18608 rtx scratch = gen_reg_rtx (SImode);
18607 18609 rtx cmp;
18608 18610
18609 18611 align = 0;
18610 18612 if (CONST_INT_P (align_rtx))
18611 18613 align = INTVAL (align_rtx);
18612 18614
18613 18615 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
18614 18616
18615 18617 /* Is there a known alignment and is it less than 4? */
18616 18618 if (align < 4)
18617 18619 {
18618 18620 rtx scratch1 = gen_reg_rtx (Pmode);
18619 18621 emit_move_insn (scratch1, out);
18620 18622 /* Is there a known alignment and is it not 2? */
18621 18623 if (align != 2)
18622 18624 {
18623 18625 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
18624 18626 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
18625 18627
18626 18628 /* Leave just the 3 lower bits. */
18627 18629 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
18628 18630 NULL_RTX, 0, OPTAB_WIDEN);
18629 18631
18630 18632 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18631 18633 Pmode, 1, align_4_label);
18632 18634 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
18633 18635 Pmode, 1, align_2_label);
18634 18636 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
18635 18637 Pmode, 1, align_3_label);
18636 18638 }
18637 18639 else
18638 18640 {
18639 18641 /* Since the alignment is 2, we have to check 2 or 0 bytes;
18640 18642 check if is aligned to 4 - byte. */
18641 18643
18642 18644 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
18643 18645 NULL_RTX, 0, OPTAB_WIDEN);
18644 18646
18645 18647 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
18646 18648 Pmode, 1, align_4_label);
18647 18649 }
18648 18650
18649 18651 mem = change_address (src, QImode, out);
18650 18652
18651 18653 /* Now compare the bytes. */
18652 18654
18653 18655 /* Compare the first n unaligned byte on a byte per byte basis. */
18654 18656 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
18655 18657 QImode, 1, end_0_label);
18656 18658
18657 18659 /* Increment the address. */
18658 18660 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18659 18661
18660 18662 /* Not needed with an alignment of 2 */
18661 18663 if (align != 2)
18662 18664 {
18663 18665 emit_label (align_2_label);
18664 18666
18665 18667 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18666 18668 end_0_label);
18667 18669
18668 18670 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18669 18671
18670 18672 emit_label (align_3_label);
18671 18673 }
18672 18674
18673 18675 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
18674 18676 end_0_label);
18675 18677
18676 18678 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx));
18677 18679 }
18678 18680
18679 18681 /* Generate loop to check 4 bytes at a time. It is not a good idea to
18680 18682 align this loop. It gives only huge programs, but does not help to
18681 18683 speed up. */
18682 18684 emit_label (align_4_label);
18683 18685
18684 18686 mem = change_address (src, SImode, out);
18685 18687 emit_move_insn (scratch, mem);
18686 18688 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4)));
18687 18689
18688 18690 /* This formula yields a nonzero result iff one of the bytes is zero.
18689 18691 This saves three branches inside loop and many cycles. */
18690 18692
18691 18693 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
18692 18694 emit_insn (gen_one_cmplsi2 (scratch, scratch));
18693 18695 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
18694 18696 emit_insn (gen_andsi3 (tmpreg, tmpreg,
18695 18697 gen_int_mode (0x80808080, SImode)));
18696 18698 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
18697 18699 align_4_label);
18698 18700
18699 18701 if (TARGET_CMOVE)
18700 18702 {
18701 18703 rtx reg = gen_reg_rtx (SImode);
18702 18704 rtx reg2 = gen_reg_rtx (Pmode);
18703 18705 emit_move_insn (reg, tmpreg);
18704 18706 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
18705 18707
18706 18708 /* If zero is not in the first two bytes, move two bytes forward. */
18707 18709 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18708 18710 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18709 18711 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18710 18712 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
18711 18713 gen_rtx_IF_THEN_ELSE (SImode, tmp,
18712 18714 reg,
18713 18715 tmpreg)));
18714 18716 /* Emit lea manually to avoid clobbering of flags. */
18715 18717 emit_insn (gen_rtx_SET (SImode, reg2,
18716 18718 gen_rtx_PLUS (Pmode, out, const2_rtx)));
18717 18719
18718 18720 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18719 18721 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
18720 18722 emit_insn (gen_rtx_SET (VOIDmode, out,
18721 18723 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
18722 18724 reg2,
18723 18725 out)));
18724 18726
18725 18727 }
18726 18728 else
18727 18729 {
18728 18730 rtx end_2_label = gen_label_rtx ();
18729 18731 /* Is zero in the first two bytes? */
18730 18732
18731 18733 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
18732 18734 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
18733 18735 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
18734 18736 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
18735 18737 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
18736 18738 pc_rtx);
18737 18739 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
18738 18740 JUMP_LABEL (tmp) = end_2_label;
18739 18741
18740 18742 /* Not in the first two. Move two bytes forward. */
18741 18743 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
18742 18744 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx));
18743 18745
18744 18746 emit_label (end_2_label);
18745 18747
18746 18748 }
18747 18749
18748 18750 /* Avoid branch in fixing the byte. */
18749 18751 tmpreg = gen_lowpart (QImode, tmpreg);
18750 18752 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
18751 18753 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
18752 18754 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp));
18753 18755
18754 18756 emit_label (end_0_label);
18755 18757 }
18756 18758
18757 18759 /* Expand strlen. */
18758 18760
18759 18761 int
18760 18762 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
18761 18763 {
18762 18764 rtx addr, scratch1, scratch2, scratch3, scratch4;
18763 18765
18764 18766 /* The generic case of strlen expander is long. Avoid it's
18765 18767 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
18766 18768
18767 18769 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18768 18770 && !TARGET_INLINE_ALL_STRINGOPS
18769 18771 && !optimize_insn_for_size_p ()
18770 18772 && (!CONST_INT_P (align) || INTVAL (align) < 4))
18771 18773 return 0;
18772 18774
18773 18775 addr = force_reg (Pmode, XEXP (src, 0));
18774 18776 scratch1 = gen_reg_rtx (Pmode);
18775 18777
18776 18778 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
18777 18779 && !optimize_insn_for_size_p ())
18778 18780 {
18779 18781 /* Well it seems that some optimizer does not combine a call like
18780 18782 foo(strlen(bar), strlen(bar));
18781 18783 when the move and the subtraction is done here. It does calculate
18782 18784 the length just once when these instructions are done inside of
18783 18785 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
18784 18786 often used and I use one fewer register for the lifetime of
18785 18787 output_strlen_unroll() this is better. */
18786 18788
18787 18789 emit_move_insn (out, addr);
18788 18790
18789 18791 ix86_expand_strlensi_unroll_1 (out, src, align);
18790 18792
18791 18793 /* strlensi_unroll_1 returns the address of the zero at the end of
18792 18794 the string, like memchr(), so compute the length by subtracting
18793 18795 the start address. */
18794 18796 emit_insn ((*ix86_gen_sub3) (out, out, addr));
18795 18797 }
18796 18798 else
18797 18799 {
18798 18800 rtx unspec;
18799 18801
18800 18802 /* Can't use this if the user has appropriated eax, ecx, or edi. */
18801 18803 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
18802 18804 return false;
18803 18805
18804 18806 scratch2 = gen_reg_rtx (Pmode);
18805 18807 scratch3 = gen_reg_rtx (Pmode);
18806 18808 scratch4 = force_reg (Pmode, constm1_rtx);
18807 18809
18808 18810 emit_move_insn (scratch3, addr);
18809 18811 eoschar = force_reg (QImode, eoschar);
18810 18812
18811 18813 src = replace_equiv_address_nv (src, scratch3);
18812 18814
18813 18815 /* If .md starts supporting :P, this can be done in .md. */
18814 18816 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
18815 18817 scratch4), UNSPEC_SCAS);
18816 18818 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
18817 18819 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1));
18818 18820 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx));
18819 18821 }
18820 18822 return 1;
18821 18823 }
18822 18824
18823 18825 /* For given symbol (function) construct code to compute address of it's PLT
18824 18826 entry in large x86-64 PIC model. */
18825 18827 rtx
18826 18828 construct_plt_address (rtx symbol)
18827 18829 {
18828 18830 rtx tmp = gen_reg_rtx (Pmode);
18829 18831 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
18830 18832
18831 18833 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
18832 18834 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
18833 18835
18834 18836 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
18835 18837 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
18836 18838 return tmp;
18837 18839 }
18838 18840
18839 18841 void
18840 18842 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
18841 18843 rtx callarg2,
18842 18844 rtx pop, int sibcall)
18843 18845 {
18844 18846 rtx use = NULL, call;
18845 18847
18846 18848 if (pop == const0_rtx)
18847 18849 pop = NULL;
18848 18850 gcc_assert (!TARGET_64BIT || !pop);
18849 18851
18850 18852 if (TARGET_MACHO && !TARGET_64BIT)
18851 18853 {
18852 18854 #if TARGET_MACHO
18853 18855 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
18854 18856 fnaddr = machopic_indirect_call_target (fnaddr);
18855 18857 #endif
18856 18858 }
18857 18859 else
18858 18860 {
18859 18861 /* Static functions and indirect calls don't need the pic register. */
18860 18862 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
18861 18863 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18862 18864 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
18863 18865 use_reg (&use, pic_offset_table_rtx);
18864 18866 }
18865 18867
18866 18868 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
18867 18869 {
18868 18870 rtx al = gen_rtx_REG (QImode, AX_REG);
18869 18871 emit_move_insn (al, callarg2);
18870 18872 use_reg (&use, al);
18871 18873 }
18872 18874
18873 18875 if (ix86_cmodel == CM_LARGE_PIC
18874 18876 && GET_CODE (fnaddr) == MEM
18875 18877 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
18876 18878 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
18877 18879 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
18878 18880 else if (sibcall
18879 18881 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode)
18880 18882 : !call_insn_operand (XEXP (fnaddr, 0), Pmode))
18881 18883 {
18882 18884 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
18883 18885 fnaddr = gen_rtx_MEM (QImode, fnaddr);
18884 18886 }
18885 18887
18886 18888 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
18887 18889 if (retval)
18888 18890 call = gen_rtx_SET (VOIDmode, retval, call);
18889 18891 if (pop)
18890 18892 {
18891 18893 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
18892 18894 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
18893 18895 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
18894 18896 }
18895 18897 if (TARGET_64BIT
18896 18898 && ix86_cfun_abi () == MS_ABI
18897 18899 && (!callarg2 || INTVAL (callarg2) != -2))
18898 18900 {
18899 18901 /* We need to represent that SI and DI registers are clobbered
18900 18902 by SYSV calls. */
18901 18903 static int clobbered_registers[] = {
18902 18904 XMM6_REG, XMM7_REG, XMM8_REG,
18903 18905 XMM9_REG, XMM10_REG, XMM11_REG,
18904 18906 XMM12_REG, XMM13_REG, XMM14_REG,
18905 18907 XMM15_REG, SI_REG, DI_REG
18906 18908 };
18907 18909 unsigned int i;
18908 18910 rtx vec[ARRAY_SIZE (clobbered_registers) + 2];
18909 18911 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx),
18910 18912 UNSPEC_MS_TO_SYSV_CALL);
18911 18913
18912 18914 vec[0] = call;
18913 18915 vec[1] = unspec;
18914 18916 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++)
18915 18917 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i])
18916 18918 ? TImode : DImode,
18917 18919 gen_rtx_REG
18918 18920 (SSE_REGNO_P (clobbered_registers[i])
18919 18921 ? TImode : DImode,
18920 18922 clobbered_registers[i]));
18921 18923
18922 18924 call = gen_rtx_PARALLEL (VOIDmode,
18923 18925 gen_rtvec_v (ARRAY_SIZE (clobbered_registers)
18924 18926 + 2, vec));
18925 18927 }
18926 18928
18927 18929 call = emit_call_insn (call);
18928 18930 if (use)
18929 18931 CALL_INSN_FUNCTION_USAGE (call) = use;
18930 18932 }
18931 18933
18932 18934
18933 18935 /* Clear stack slot assignments remembered from previous functions.
18934 18936 This is called from INIT_EXPANDERS once before RTL is emitted for each
18935 18937 function. */
18936 18938
18937 18939 static struct machine_function *
18938 18940 ix86_init_machine_status (void)
18939 18941 {
18940 18942 struct machine_function *f;
18941 18943
18942 18944 f = GGC_CNEW (struct machine_function);
18943 18945 f->use_fast_prologue_epilogue_nregs = -1;
18944 18946 f->tls_descriptor_call_expanded_p = 0;
18945 18947 f->call_abi = DEFAULT_ABI;
18946 18948
18947 18949 return f;
18948 18950 }
18949 18951
18950 18952 /* Return a MEM corresponding to a stack slot with mode MODE.
18951 18953 Allocate a new slot if necessary.
18952 18954
18953 18955 The RTL for a function can have several slots available: N is
18954 18956 which slot to use. */
18955 18957
18956 18958 rtx
18957 18959 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
18958 18960 {
18959 18961 struct stack_local_entry *s;
18960 18962
18961 18963 gcc_assert (n < MAX_386_STACK_LOCALS);
18962 18964
18963 18965 /* Virtual slot is valid only before vregs are instantiated. */
18964 18966 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
18965 18967
18966 18968 for (s = ix86_stack_locals; s; s = s->next)
18967 18969 if (s->mode == mode && s->n == n)
18968 18970 return copy_rtx (s->rtl);
18969 18971
18970 18972 s = (struct stack_local_entry *)
18971 18973 ggc_alloc (sizeof (struct stack_local_entry));
18972 18974 s->n = n;
18973 18975 s->mode = mode;
18974 18976 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
18975 18977
18976 18978 s->next = ix86_stack_locals;
18977 18979 ix86_stack_locals = s;
18978 18980 return s->rtl;
18979 18981 }
18980 18982
18981 18983 /* Construct the SYMBOL_REF for the tls_get_addr function. */
18982 18984
18983 18985 static GTY(()) rtx ix86_tls_symbol;
18984 18986 rtx
18985 18987 ix86_tls_get_addr (void)
18986 18988 {
18987 18989
18988 18990 if (!ix86_tls_symbol)
18989 18991 {
18990 18992 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
18991 18993 (TARGET_ANY_GNU_TLS
18992 18994 && !TARGET_64BIT)
18993 18995 ? "___tls_get_addr"
18994 18996 : "__tls_get_addr");
18995 18997 }
18996 18998
18997 18999 return ix86_tls_symbol;
18998 19000 }
18999 19001
19000 19002 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
19001 19003
19002 19004 static GTY(()) rtx ix86_tls_module_base_symbol;
19003 19005 rtx
19004 19006 ix86_tls_module_base (void)
19005 19007 {
19006 19008
19007 19009 if (!ix86_tls_module_base_symbol)
19008 19010 {
19009 19011 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
19010 19012 "_TLS_MODULE_BASE_");
19011 19013 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
19012 19014 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
19013 19015 }
19014 19016
19015 19017 return ix86_tls_module_base_symbol;
19016 19018 }
19017 19019
19018 19020 /* Calculate the length of the memory address in the instruction
19019 19021 encoding. Does not include the one-byte modrm, opcode, or prefix. */
19020 19022
19021 19023 int
19022 19024 memory_address_length (rtx addr)
19023 19025 {
19024 19026 struct ix86_address parts;
19025 19027 rtx base, index, disp;
19026 19028 int len;
19027 19029 int ok;
19028 19030
19029 19031 if (GET_CODE (addr) == PRE_DEC
19030 19032 || GET_CODE (addr) == POST_INC
19031 19033 || GET_CODE (addr) == PRE_MODIFY
19032 19034 || GET_CODE (addr) == POST_MODIFY)
19033 19035 return 0;
19034 19036
19035 19037 ok = ix86_decompose_address (addr, &parts);
19036 19038 gcc_assert (ok);
19037 19039
19038 19040 if (parts.base && GET_CODE (parts.base) == SUBREG)
19039 19041 parts.base = SUBREG_REG (parts.base);
19040 19042 if (parts.index && GET_CODE (parts.index) == SUBREG)
19041 19043 parts.index = SUBREG_REG (parts.index);
19042 19044
19043 19045 base = parts.base;
19044 19046 index = parts.index;
19045 19047 disp = parts.disp;
19046 19048 len = 0;
19047 19049
19048 19050 /* Rule of thumb:
19049 19051 - esp as the base always wants an index,
19050 19052 - ebp as the base always wants a displacement. */
19051 19053
19052 19054 /* Register Indirect. */
19053 19055 if (base && !index && !disp)
19054 19056 {
19055 19057 /* esp (for its index) and ebp (for its displacement) need
19056 19058 the two-byte modrm form. */
19057 19059 if (addr == stack_pointer_rtx
19058 19060 || addr == arg_pointer_rtx
19059 19061 || addr == frame_pointer_rtx
19060 19062 || addr == hard_frame_pointer_rtx)
19061 19063 len = 1;
19062 19064 }
19063 19065
19064 19066 /* Direct Addressing. */
19065 19067 else if (disp && !base && !index)
19066 19068 len = 4;
19067 19069
19068 19070 else
19069 19071 {
19070 19072 /* Find the length of the displacement constant. */
19071 19073 if (disp)
19072 19074 {
19073 19075 if (base && satisfies_constraint_K (disp))
19074 19076 len = 1;
19075 19077 else
19076 19078 len = 4;
19077 19079 }
19078 19080 /* ebp always wants a displacement. */
19079 19081 else if (base == hard_frame_pointer_rtx)
19080 19082 len = 1;
19081 19083
19082 19084 /* An index requires the two-byte modrm form.... */
19083 19085 if (index
19084 19086 /* ...like esp, which always wants an index. */
19085 19087 || base == stack_pointer_rtx
19086 19088 || base == arg_pointer_rtx
19087 19089 || base == frame_pointer_rtx)
19088 19090 len += 1;
19089 19091 }
19090 19092
19091 19093 return len;
19092 19094 }
19093 19095
19094 19096 /* Compute default value for "length_immediate" attribute. When SHORTFORM
19095 19097 is set, expect that insn have 8bit immediate alternative. */
19096 19098 int
19097 19099 ix86_attr_length_immediate_default (rtx insn, int shortform)
19098 19100 {
19099 19101 int len = 0;
19100 19102 int i;
19101 19103 extract_insn_cached (insn);
19102 19104 for (i = recog_data.n_operands - 1; i >= 0; --i)
19103 19105 if (CONSTANT_P (recog_data.operand[i]))
19104 19106 {
19105 19107 gcc_assert (!len);
19106 19108 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
19107 19109 len = 1;
19108 19110 else
19109 19111 {
19110 19112 switch (get_attr_mode (insn))
19111 19113 {
19112 19114 case MODE_QI:
19113 19115 len+=1;
19114 19116 break;
19115 19117 case MODE_HI:
19116 19118 len+=2;
19117 19119 break;
19118 19120 case MODE_SI:
19119 19121 len+=4;
19120 19122 break;
19121 19123 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
19122 19124 case MODE_DI:
19123 19125 len+=4;
19124 19126 break;
19125 19127 default:
19126 19128 fatal_insn ("unknown insn mode", insn);
19127 19129 }
19128 19130 }
19129 19131 }
19130 19132 return len;
19131 19133 }
19132 19134 /* Compute default value for "length_address" attribute. */
19133 19135 int
19134 19136 ix86_attr_length_address_default (rtx insn)
19135 19137 {
19136 19138 int i;
19137 19139
19138 19140 if (get_attr_type (insn) == TYPE_LEA)
19139 19141 {
19140 19142 rtx set = PATTERN (insn);
19141 19143
19142 19144 if (GET_CODE (set) == PARALLEL)
19143 19145 set = XVECEXP (set, 0, 0);
19144 19146
19145 19147 gcc_assert (GET_CODE (set) == SET);
19146 19148
19147 19149 return memory_address_length (SET_SRC (set));
19148 19150 }
19149 19151
19150 19152 extract_insn_cached (insn);
19151 19153 for (i = recog_data.n_operands - 1; i >= 0; --i)
19152 19154 if (MEM_P (recog_data.operand[i]))
19153 19155 {
19154 19156 return memory_address_length (XEXP (recog_data.operand[i], 0));
19155 19157 break;
19156 19158 }
19157 19159 return 0;
19158 19160 }
19159 19161
19160 19162 /* Compute default value for "length_vex" attribute. It includes
19161 19163 2 or 3 byte VEX prefix and 1 opcode byte. */
19162 19164
19163 19165 int
19164 19166 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode,
19165 19167 int has_vex_w)
19166 19168 {
19167 19169 int i;
19168 19170
19169 19171 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3
19170 19172 byte VEX prefix. */
19171 19173 if (!has_0f_opcode || has_vex_w)
19172 19174 return 3 + 1;
19173 19175
19174 19176 /* We can always use 2 byte VEX prefix in 32bit. */
19175 19177 if (!TARGET_64BIT)
19176 19178 return 2 + 1;
19177 19179
19178 19180 extract_insn_cached (insn);
19179 19181
19180 19182 for (i = recog_data.n_operands - 1; i >= 0; --i)
19181 19183 if (REG_P (recog_data.operand[i]))
19182 19184 {
19183 19185 /* REX.W bit uses 3 byte VEX prefix. */
19184 19186 if (GET_MODE (recog_data.operand[i]) == DImode)
19185 19187 return 3 + 1;
19186 19188 }
19187 19189 else
19188 19190 {
19189 19191 /* REX.X or REX.B bits use 3 byte VEX prefix. */
19190 19192 if (MEM_P (recog_data.operand[i])
19191 19193 && x86_extended_reg_mentioned_p (recog_data.operand[i]))
19192 19194 return 3 + 1;
19193 19195 }
19194 19196
19195 19197 return 2 + 1;
19196 19198 }
19197 19199
19198 19200 /* Return the maximum number of instructions a cpu can issue. */
19199 19201
19200 19202 static int
19201 19203 ix86_issue_rate (void)
19202 19204 {
19203 19205 switch (ix86_tune)
19204 19206 {
19205 19207 case PROCESSOR_PENTIUM:
19206 19208 case PROCESSOR_K6:
19207 19209 return 2;
19208 19210
19209 19211 case PROCESSOR_PENTIUMPRO:
19210 19212 case PROCESSOR_PENTIUM4:
19211 19213 case PROCESSOR_ATHLON:
19212 19214 case PROCESSOR_K8:
19213 19215 case PROCESSOR_AMDFAM10:
19214 19216 case PROCESSOR_NOCONA:
19215 19217 case PROCESSOR_GENERIC32:
19216 19218 case PROCESSOR_GENERIC64:
19217 19219 return 3;
19218 19220
19219 19221 case PROCESSOR_CORE2:
19220 19222 return 4;
19221 19223
19222 19224 default:
19223 19225 return 1;
19224 19226 }
19225 19227 }
19226 19228
19227 19229 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
19228 19230 by DEP_INSN and nothing set by DEP_INSN. */
19229 19231
19230 19232 static int
19231 19233 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19232 19234 {
19233 19235 rtx set, set2;
19234 19236
19235 19237 /* Simplify the test for uninteresting insns. */
19236 19238 if (insn_type != TYPE_SETCC
19237 19239 && insn_type != TYPE_ICMOV
19238 19240 && insn_type != TYPE_FCMOV
19239 19241 && insn_type != TYPE_IBR)
19240 19242 return 0;
19241 19243
19242 19244 if ((set = single_set (dep_insn)) != 0)
19243 19245 {
19244 19246 set = SET_DEST (set);
19245 19247 set2 = NULL_RTX;
19246 19248 }
19247 19249 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
19248 19250 && XVECLEN (PATTERN (dep_insn), 0) == 2
19249 19251 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
19250 19252 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
19251 19253 {
19252 19254 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19253 19255 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
19254 19256 }
19255 19257 else
19256 19258 return 0;
19257 19259
19258 19260 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
19259 19261 return 0;
19260 19262
19261 19263 /* This test is true if the dependent insn reads the flags but
19262 19264 not any other potentially set register. */
19263 19265 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
19264 19266 return 0;
19265 19267
19266 19268 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
19267 19269 return 0;
19268 19270
19269 19271 return 1;
19270 19272 }
19271 19273
19272 19274 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
19273 19275 address with operands set by DEP_INSN. */
19274 19276
19275 19277 static int
19276 19278 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
19277 19279 {
19278 19280 rtx addr;
19279 19281
19280 19282 if (insn_type == TYPE_LEA
19281 19283 && TARGET_PENTIUM)
19282 19284 {
19283 19285 addr = PATTERN (insn);
19284 19286
19285 19287 if (GET_CODE (addr) == PARALLEL)
19286 19288 addr = XVECEXP (addr, 0, 0);
19287 19289
19288 19290 gcc_assert (GET_CODE (addr) == SET);
19289 19291
19290 19292 addr = SET_SRC (addr);
19291 19293 }
19292 19294 else
19293 19295 {
19294 19296 int i;
19295 19297 extract_insn_cached (insn);
19296 19298 for (i = recog_data.n_operands - 1; i >= 0; --i)
19297 19299 if (MEM_P (recog_data.operand[i]))
19298 19300 {
19299 19301 addr = XEXP (recog_data.operand[i], 0);
19300 19302 goto found;
19301 19303 }
19302 19304 return 0;
19303 19305 found:;
19304 19306 }
19305 19307
19306 19308 return modified_in_p (addr, dep_insn);
19307 19309 }
19308 19310
19309 19311 static int
19310 19312 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
19311 19313 {
19312 19314 enum attr_type insn_type, dep_insn_type;
19313 19315 enum attr_memory memory;
19314 19316 rtx set, set2;
19315 19317 int dep_insn_code_number;
19316 19318
19317 19319 /* Anti and output dependencies have zero cost on all CPUs. */
19318 19320 if (REG_NOTE_KIND (link) != 0)
19319 19321 return 0;
19320 19322
19321 19323 dep_insn_code_number = recog_memoized (dep_insn);
19322 19324
19323 19325 /* If we can't recognize the insns, we can't really do anything. */
19324 19326 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
19325 19327 return cost;
19326 19328
19327 19329 insn_type = get_attr_type (insn);
19328 19330 dep_insn_type = get_attr_type (dep_insn);
19329 19331
19330 19332 switch (ix86_tune)
19331 19333 {
19332 19334 case PROCESSOR_PENTIUM:
19333 19335 /* Address Generation Interlock adds a cycle of latency. */
19334 19336 if (ix86_agi_dependent (insn, dep_insn, insn_type))
19335 19337 cost += 1;
19336 19338
19337 19339 /* ??? Compares pair with jump/setcc. */
19338 19340 if (ix86_flags_dependent (insn, dep_insn, insn_type))
19339 19341 cost = 0;
19340 19342
19341 19343 /* Floating point stores require value to be ready one cycle earlier. */
19342 19344 if (insn_type == TYPE_FMOV
19343 19345 && get_attr_memory (insn) == MEMORY_STORE
19344 19346 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19345 19347 cost += 1;
19346 19348 break;
19347 19349
19348 19350 case PROCESSOR_PENTIUMPRO:
19349 19351 memory = get_attr_memory (insn);
19350 19352
19351 19353 /* INT->FP conversion is expensive. */
19352 19354 if (get_attr_fp_int_src (dep_insn))
19353 19355 cost += 5;
19354 19356
19355 19357 /* There is one cycle extra latency between an FP op and a store. */
19356 19358 if (insn_type == TYPE_FMOV
19357 19359 && (set = single_set (dep_insn)) != NULL_RTX
19358 19360 && (set2 = single_set (insn)) != NULL_RTX
19359 19361 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
19360 19362 && MEM_P (SET_DEST (set2)))
19361 19363 cost += 1;
19362 19364
19363 19365 /* Show ability of reorder buffer to hide latency of load by executing
19364 19366 in parallel with previous instruction in case
19365 19367 previous instruction is not needed to compute the address. */
19366 19368 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19367 19369 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19368 19370 {
19369 19371 /* Claim moves to take one cycle, as core can issue one load
19370 19372 at time and the next load can start cycle later. */
19371 19373 if (dep_insn_type == TYPE_IMOV
19372 19374 || dep_insn_type == TYPE_FMOV)
19373 19375 cost = 1;
19374 19376 else if (cost > 1)
19375 19377 cost--;
19376 19378 }
19377 19379 break;
19378 19380
19379 19381 case PROCESSOR_K6:
19380 19382 memory = get_attr_memory (insn);
19381 19383
19382 19384 /* The esp dependency is resolved before the instruction is really
19383 19385 finished. */
19384 19386 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
19385 19387 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
19386 19388 return 1;
19387 19389
19388 19390 /* INT->FP conversion is expensive. */
19389 19391 if (get_attr_fp_int_src (dep_insn))
19390 19392 cost += 5;
19391 19393
19392 19394 /* Show ability of reorder buffer to hide latency of load by executing
19393 19395 in parallel with previous instruction in case
19394 19396 previous instruction is not needed to compute the address. */
19395 19397 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19396 19398 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19397 19399 {
19398 19400 /* Claim moves to take one cycle, as core can issue one load
19399 19401 at time and the next load can start cycle later. */
19400 19402 if (dep_insn_type == TYPE_IMOV
19401 19403 || dep_insn_type == TYPE_FMOV)
19402 19404 cost = 1;
19403 19405 else if (cost > 2)
19404 19406 cost -= 2;
19405 19407 else
19406 19408 cost = 1;
19407 19409 }
19408 19410 break;
19409 19411
19410 19412 case PROCESSOR_ATHLON:
19411 19413 case PROCESSOR_K8:
19412 19414 case PROCESSOR_AMDFAM10:
19413 19415 case PROCESSOR_GENERIC32:
19414 19416 case PROCESSOR_GENERIC64:
19415 19417 memory = get_attr_memory (insn);
19416 19418
19417 19419 /* Show ability of reorder buffer to hide latency of load by executing
19418 19420 in parallel with previous instruction in case
19419 19421 previous instruction is not needed to compute the address. */
19420 19422 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
19421 19423 && !ix86_agi_dependent (insn, dep_insn, insn_type))
19422 19424 {
19423 19425 enum attr_unit unit = get_attr_unit (insn);
19424 19426 int loadcost = 3;
19425 19427
19426 19428 /* Because of the difference between the length of integer and
19427 19429 floating unit pipeline preparation stages, the memory operands
19428 19430 for floating point are cheaper.
19429 19431
19430 19432 ??? For Athlon it the difference is most probably 2. */
19431 19433 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
19432 19434 loadcost = 3;
19433 19435 else
19434 19436 loadcost = TARGET_ATHLON ? 2 : 0;
19435 19437
19436 19438 if (cost >= loadcost)
19437 19439 cost -= loadcost;
19438 19440 else
19439 19441 cost = 0;
19440 19442 }
19441 19443
19442 19444 default:
19443 19445 break;
19444 19446 }
19445 19447
19446 19448 return cost;
19447 19449 }
19448 19450
19449 19451 /* How many alternative schedules to try. This should be as wide as the
19450 19452 scheduling freedom in the DFA, but no wider. Making this value too
19451 19453 large results extra work for the scheduler. */
19452 19454
19453 19455 static int
19454 19456 ia32_multipass_dfa_lookahead (void)
19455 19457 {
19456 19458 switch (ix86_tune)
19457 19459 {
19458 19460 case PROCESSOR_PENTIUM:
19459 19461 return 2;
19460 19462
19461 19463 case PROCESSOR_PENTIUMPRO:
19462 19464 case PROCESSOR_K6:
19463 19465 return 1;
19464 19466
19465 19467 default:
19466 19468 return 0;
19467 19469 }
19468 19470 }
19469 19471
19470 19472
19471 19473 /* Compute the alignment given to a constant that is being placed in memory.
19472 19474 EXP is the constant and ALIGN is the alignment that the object would
19473 19475 ordinarily have.
19474 19476 The value of this function is used instead of that alignment to align
19475 19477 the object. */
19476 19478
19477 19479 int
19478 19480 ix86_constant_alignment (tree exp, int align)
19479 19481 {
19480 19482 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST
19481 19483 || TREE_CODE (exp) == INTEGER_CST)
19482 19484 {
19483 19485 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
19484 19486 return 64;
19485 19487 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
19486 19488 return 128;
19487 19489 }
19488 19490 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
19489 19491 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
19490 19492 return BITS_PER_WORD;
19491 19493
19492 19494 return align;
19493 19495 }
19494 19496
19495 19497 /* Compute the alignment for a static variable.
19496 19498 TYPE is the data type, and ALIGN is the alignment that
19497 19499 the object would ordinarily have. The value of this function is used
19498 19500 instead of that alignment to align the object. */
19499 19501
19500 19502 int
19501 19503 ix86_data_alignment (tree type, int align)
19502 19504 {
19503 19505 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
19504 19506
19505 19507 if (AGGREGATE_TYPE_P (type)
19506 19508 && TYPE_SIZE (type)
19507 19509 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19508 19510 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
19509 19511 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
19510 19512 && align < max_align)
19511 19513 align = max_align;
19512 19514
19513 19515 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19514 19516 to 16byte boundary. */
19515 19517 if (TARGET_64BIT)
19516 19518 {
19517 19519 if (AGGREGATE_TYPE_P (type)
19518 19520 && TYPE_SIZE (type)
19519 19521 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19520 19522 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
19521 19523 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19522 19524 return 128;
19523 19525 }
19524 19526
19525 19527 if (TREE_CODE (type) == ARRAY_TYPE)
19526 19528 {
19527 19529 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19528 19530 return 64;
19529 19531 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19530 19532 return 128;
19531 19533 }
19532 19534 else if (TREE_CODE (type) == COMPLEX_TYPE)
19533 19535 {
19534 19536
19535 19537 if (TYPE_MODE (type) == DCmode && align < 64)
19536 19538 return 64;
19537 19539 if ((TYPE_MODE (type) == XCmode
19538 19540 || TYPE_MODE (type) == TCmode) && align < 128)
19539 19541 return 128;
19540 19542 }
19541 19543 else if ((TREE_CODE (type) == RECORD_TYPE
19542 19544 || TREE_CODE (type) == UNION_TYPE
19543 19545 || TREE_CODE (type) == QUAL_UNION_TYPE)
19544 19546 && TYPE_FIELDS (type))
19545 19547 {
19546 19548 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19547 19549 return 64;
19548 19550 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19549 19551 return 128;
19550 19552 }
19551 19553 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19552 19554 || TREE_CODE (type) == INTEGER_TYPE)
19553 19555 {
19554 19556 if (TYPE_MODE (type) == DFmode && align < 64)
19555 19557 return 64;
19556 19558 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19557 19559 return 128;
19558 19560 }
19559 19561
19560 19562 return align;
19561 19563 }
19562 19564
19563 19565 /* Compute the alignment for a local variable or a stack slot. EXP is
19564 19566 the data type or decl itself, MODE is the widest mode available and
19565 19567 ALIGN is the alignment that the object would ordinarily have. The
19566 19568 value of this macro is used instead of that alignment to align the
19567 19569 object. */
19568 19570
19569 19571 unsigned int
19570 19572 ix86_local_alignment (tree exp, enum machine_mode mode,
19571 19573 unsigned int align)
19572 19574 {
19573 19575 tree type, decl;
19574 19576
19575 19577 if (exp && DECL_P (exp))
19576 19578 {
19577 19579 type = TREE_TYPE (exp);
19578 19580 decl = exp;
19579 19581 }
19580 19582 else
19581 19583 {
19582 19584 type = exp;
19583 19585 decl = NULL;
19584 19586 }
19585 19587
19586 19588 /* Don't do dynamic stack realignment for long long objects with
19587 19589 -mpreferred-stack-boundary=2. */
19588 19590 if (!TARGET_64BIT
19589 19591 && align == 64
19590 19592 && ix86_preferred_stack_boundary < 64
19591 19593 && (mode == DImode || (type && TYPE_MODE (type) == DImode))
19592 19594 && (!type || !TYPE_USER_ALIGN (type))
19593 19595 && (!decl || !DECL_USER_ALIGN (decl)))
19594 19596 align = 32;
19595 19597
19596 19598 /* If TYPE is NULL, we are allocating a stack slot for caller-save
19597 19599 register in MODE. We will return the largest alignment of XF
19598 19600 and DF. */
19599 19601 if (!type)
19600 19602 {
19601 19603 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode))
19602 19604 align = GET_MODE_ALIGNMENT (DFmode);
19603 19605 return align;
19604 19606 }
19605 19607
19606 19608 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
19607 19609 to 16byte boundary. */
19608 19610 if (TARGET_64BIT)
19609 19611 {
19610 19612 if (AGGREGATE_TYPE_P (type)
19611 19613 && TYPE_SIZE (type)
19612 19614 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
19613 19615 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
19614 19616 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
19615 19617 return 128;
19616 19618 }
19617 19619 if (TREE_CODE (type) == ARRAY_TYPE)
19618 19620 {
19619 19621 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
19620 19622 return 64;
19621 19623 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
19622 19624 return 128;
19623 19625 }
19624 19626 else if (TREE_CODE (type) == COMPLEX_TYPE)
19625 19627 {
19626 19628 if (TYPE_MODE (type) == DCmode && align < 64)
19627 19629 return 64;
19628 19630 if ((TYPE_MODE (type) == XCmode
19629 19631 || TYPE_MODE (type) == TCmode) && align < 128)
19630 19632 return 128;
19631 19633 }
19632 19634 else if ((TREE_CODE (type) == RECORD_TYPE
19633 19635 || TREE_CODE (type) == UNION_TYPE
19634 19636 || TREE_CODE (type) == QUAL_UNION_TYPE)
19635 19637 && TYPE_FIELDS (type))
19636 19638 {
19637 19639 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
19638 19640 return 64;
19639 19641 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
19640 19642 return 128;
19641 19643 }
19642 19644 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
19643 19645 || TREE_CODE (type) == INTEGER_TYPE)
19644 19646 {
19645 19647
19646 19648 if (TYPE_MODE (type) == DFmode && align < 64)
19647 19649 return 64;
19648 19650 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
19649 19651 return 128;
19650 19652 }
19651 19653 return align;
19652 19654 }
19653 19655
19654 19656 /* Compute the minimum required alignment for dynamic stack realignment
19655 19657 purposes for a local variable, parameter or a stack slot. EXP is
19656 19658 the data type or decl itself, MODE is its mode and ALIGN is the
19657 19659 alignment that the object would ordinarily have. */
19658 19660
19659 19661 unsigned int
19660 19662 ix86_minimum_alignment (tree exp, enum machine_mode mode,
19661 19663 unsigned int align)
19662 19664 {
19663 19665 tree type, decl;
19664 19666
19665 19667 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64)
19666 19668 return align;
19667 19669
19668 19670 if (exp && DECL_P (exp))
19669 19671 {
19670 19672 type = TREE_TYPE (exp);
19671 19673 decl = exp;
19672 19674 }
19673 19675 else
19674 19676 {
19675 19677 type = exp;
19676 19678 decl = NULL;
19677 19679 }
19678 19680
19679 19681 /* Don't do dynamic stack realignment for long long objects with
19680 19682 -mpreferred-stack-boundary=2. */
19681 19683 if ((mode == DImode || (type && TYPE_MODE (type) == DImode))
19682 19684 && (!type || !TYPE_USER_ALIGN (type))
19683 19685 && (!decl || !DECL_USER_ALIGN (decl)))
19684 19686 return 32;
19685 19687
19686 19688 return align;
19687 19689 }
19688 19690
19689 19691 /* Emit RTL insns to initialize the variable parts of a trampoline.
19690 19692 FNADDR is an RTX for the address of the function's pure code.
19691 19693 CXT is an RTX for the static chain value for the function. */
19692 19694 void
19693 19695 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
19694 19696 {
19695 19697 if (!TARGET_64BIT)
19696 19698 {
19697 19699 /* Compute offset from the end of the jmp to the target function. */
19698 19700 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
19699 19701 plus_constant (tramp, 10),
19700 19702 NULL_RTX, 1, OPTAB_DIRECT);
19701 19703 emit_move_insn (gen_rtx_MEM (QImode, tramp),
19702 19704 gen_int_mode (0xb9, QImode));
19703 19705 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
19704 19706 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
19705 19707 gen_int_mode (0xe9, QImode));
19706 19708 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
19707 19709 }
19708 19710 else
19709 19711 {
19710 19712 int offset = 0;
19711 19713 /* Try to load address using shorter movl instead of movabs.
19712 19714 We may want to support movq for kernel mode, but kernel does not use
19713 19715 trampolines at the moment. */
19714 19716 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
19715 19717 {
19716 19718 fnaddr = copy_to_mode_reg (DImode, fnaddr);
19717 19719 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19718 19720 gen_int_mode (0xbb41, HImode));
19719 19721 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
19720 19722 gen_lowpart (SImode, fnaddr));
19721 19723 offset += 6;
19722 19724 }
19723 19725 else
19724 19726 {
19725 19727 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19726 19728 gen_int_mode (0xbb49, HImode));
19727 19729 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19728 19730 fnaddr);
19729 19731 offset += 10;
19730 19732 }
19731 19733 /* Load static chain using movabs to r10. */
19732 19734 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19733 19735 gen_int_mode (0xba49, HImode));
19734 19736 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
19735 19737 cxt);
19736 19738 offset += 10;
19737 19739 /* Jump to the r11 */
19738 19740 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
19739 19741 gen_int_mode (0xff49, HImode));
19740 19742 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
19741 19743 gen_int_mode (0xe3, QImode));
19742 19744 offset += 3;
19743 19745 gcc_assert (offset <= TRAMPOLINE_SIZE);
19744 19746 }
19745 19747
19746 19748 #ifdef ENABLE_EXECUTE_STACK
19747 19749 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
19748 19750 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
19749 19751 #endif
19750 19752 }
19751 19753
19752 19754 /* Codes for all the SSE/MMX builtins. */
19753 19755 enum ix86_builtins
19754 19756 {
19755 19757 IX86_BUILTIN_ADDPS,
19756 19758 IX86_BUILTIN_ADDSS,
19757 19759 IX86_BUILTIN_DIVPS,
19758 19760 IX86_BUILTIN_DIVSS,
19759 19761 IX86_BUILTIN_MULPS,
19760 19762 IX86_BUILTIN_MULSS,
19761 19763 IX86_BUILTIN_SUBPS,
19762 19764 IX86_BUILTIN_SUBSS,
19763 19765
19764 19766 IX86_BUILTIN_CMPEQPS,
19765 19767 IX86_BUILTIN_CMPLTPS,
19766 19768 IX86_BUILTIN_CMPLEPS,
19767 19769 IX86_BUILTIN_CMPGTPS,
19768 19770 IX86_BUILTIN_CMPGEPS,
19769 19771 IX86_BUILTIN_CMPNEQPS,
19770 19772 IX86_BUILTIN_CMPNLTPS,
19771 19773 IX86_BUILTIN_CMPNLEPS,
19772 19774 IX86_BUILTIN_CMPNGTPS,
19773 19775 IX86_BUILTIN_CMPNGEPS,
19774 19776 IX86_BUILTIN_CMPORDPS,
19775 19777 IX86_BUILTIN_CMPUNORDPS,
19776 19778 IX86_BUILTIN_CMPEQSS,
19777 19779 IX86_BUILTIN_CMPLTSS,
19778 19780 IX86_BUILTIN_CMPLESS,
19779 19781 IX86_BUILTIN_CMPNEQSS,
19780 19782 IX86_BUILTIN_CMPNLTSS,
19781 19783 IX86_BUILTIN_CMPNLESS,
19782 19784 IX86_BUILTIN_CMPNGTSS,
19783 19785 IX86_BUILTIN_CMPNGESS,
19784 19786 IX86_BUILTIN_CMPORDSS,
19785 19787 IX86_BUILTIN_CMPUNORDSS,
19786 19788
19787 19789 IX86_BUILTIN_COMIEQSS,
19788 19790 IX86_BUILTIN_COMILTSS,
19789 19791 IX86_BUILTIN_COMILESS,
19790 19792 IX86_BUILTIN_COMIGTSS,
19791 19793 IX86_BUILTIN_COMIGESS,
19792 19794 IX86_BUILTIN_COMINEQSS,
19793 19795 IX86_BUILTIN_UCOMIEQSS,
19794 19796 IX86_BUILTIN_UCOMILTSS,
19795 19797 IX86_BUILTIN_UCOMILESS,
19796 19798 IX86_BUILTIN_UCOMIGTSS,
19797 19799 IX86_BUILTIN_UCOMIGESS,
19798 19800 IX86_BUILTIN_UCOMINEQSS,
19799 19801
19800 19802 IX86_BUILTIN_CVTPI2PS,
19801 19803 IX86_BUILTIN_CVTPS2PI,
19802 19804 IX86_BUILTIN_CVTSI2SS,
19803 19805 IX86_BUILTIN_CVTSI642SS,
19804 19806 IX86_BUILTIN_CVTSS2SI,
19805 19807 IX86_BUILTIN_CVTSS2SI64,
19806 19808 IX86_BUILTIN_CVTTPS2PI,
19807 19809 IX86_BUILTIN_CVTTSS2SI,
19808 19810 IX86_BUILTIN_CVTTSS2SI64,
19809 19811
19810 19812 IX86_BUILTIN_MAXPS,
19811 19813 IX86_BUILTIN_MAXSS,
19812 19814 IX86_BUILTIN_MINPS,
19813 19815 IX86_BUILTIN_MINSS,
19814 19816
19815 19817 IX86_BUILTIN_LOADUPS,
19816 19818 IX86_BUILTIN_STOREUPS,
19817 19819 IX86_BUILTIN_MOVSS,
19818 19820
19819 19821 IX86_BUILTIN_MOVHLPS,
19820 19822 IX86_BUILTIN_MOVLHPS,
19821 19823 IX86_BUILTIN_LOADHPS,
19822 19824 IX86_BUILTIN_LOADLPS,
19823 19825 IX86_BUILTIN_STOREHPS,
19824 19826 IX86_BUILTIN_STORELPS,
19825 19827
19826 19828 IX86_BUILTIN_MASKMOVQ,
19827 19829 IX86_BUILTIN_MOVMSKPS,
19828 19830 IX86_BUILTIN_PMOVMSKB,
19829 19831
19830 19832 IX86_BUILTIN_MOVNTPS,
19831 19833 IX86_BUILTIN_MOVNTQ,
19832 19834
19833 19835 IX86_BUILTIN_LOADDQU,
19834 19836 IX86_BUILTIN_STOREDQU,
19835 19837
19836 19838 IX86_BUILTIN_PACKSSWB,
19837 19839 IX86_BUILTIN_PACKSSDW,
19838 19840 IX86_BUILTIN_PACKUSWB,
19839 19841
19840 19842 IX86_BUILTIN_PADDB,
19841 19843 IX86_BUILTIN_PADDW,
19842 19844 IX86_BUILTIN_PADDD,
19843 19845 IX86_BUILTIN_PADDQ,
19844 19846 IX86_BUILTIN_PADDSB,
19845 19847 IX86_BUILTIN_PADDSW,
19846 19848 IX86_BUILTIN_PADDUSB,
19847 19849 IX86_BUILTIN_PADDUSW,
19848 19850 IX86_BUILTIN_PSUBB,
19849 19851 IX86_BUILTIN_PSUBW,
19850 19852 IX86_BUILTIN_PSUBD,
19851 19853 IX86_BUILTIN_PSUBQ,
19852 19854 IX86_BUILTIN_PSUBSB,
19853 19855 IX86_BUILTIN_PSUBSW,
19854 19856 IX86_BUILTIN_PSUBUSB,
19855 19857 IX86_BUILTIN_PSUBUSW,
19856 19858
19857 19859 IX86_BUILTIN_PAND,
19858 19860 IX86_BUILTIN_PANDN,
19859 19861 IX86_BUILTIN_POR,
19860 19862 IX86_BUILTIN_PXOR,
19861 19863
19862 19864 IX86_BUILTIN_PAVGB,
19863 19865 IX86_BUILTIN_PAVGW,
19864 19866
19865 19867 IX86_BUILTIN_PCMPEQB,
19866 19868 IX86_BUILTIN_PCMPEQW,
19867 19869 IX86_BUILTIN_PCMPEQD,
19868 19870 IX86_BUILTIN_PCMPGTB,
19869 19871 IX86_BUILTIN_PCMPGTW,
19870 19872 IX86_BUILTIN_PCMPGTD,
19871 19873
19872 19874 IX86_BUILTIN_PMADDWD,
19873 19875
19874 19876 IX86_BUILTIN_PMAXSW,
19875 19877 IX86_BUILTIN_PMAXUB,
19876 19878 IX86_BUILTIN_PMINSW,
19877 19879 IX86_BUILTIN_PMINUB,
19878 19880
19879 19881 IX86_BUILTIN_PMULHUW,
19880 19882 IX86_BUILTIN_PMULHW,
19881 19883 IX86_BUILTIN_PMULLW,
19882 19884
19883 19885 IX86_BUILTIN_PSADBW,
19884 19886 IX86_BUILTIN_PSHUFW,
19885 19887
19886 19888 IX86_BUILTIN_PSLLW,
19887 19889 IX86_BUILTIN_PSLLD,
19888 19890 IX86_BUILTIN_PSLLQ,
19889 19891 IX86_BUILTIN_PSRAW,
19890 19892 IX86_BUILTIN_PSRAD,
19891 19893 IX86_BUILTIN_PSRLW,
19892 19894 IX86_BUILTIN_PSRLD,
19893 19895 IX86_BUILTIN_PSRLQ,
19894 19896 IX86_BUILTIN_PSLLWI,
19895 19897 IX86_BUILTIN_PSLLDI,
19896 19898 IX86_BUILTIN_PSLLQI,
19897 19899 IX86_BUILTIN_PSRAWI,
19898 19900 IX86_BUILTIN_PSRADI,
19899 19901 IX86_BUILTIN_PSRLWI,
19900 19902 IX86_BUILTIN_PSRLDI,
19901 19903 IX86_BUILTIN_PSRLQI,
19902 19904
19903 19905 IX86_BUILTIN_PUNPCKHBW,
19904 19906 IX86_BUILTIN_PUNPCKHWD,
19905 19907 IX86_BUILTIN_PUNPCKHDQ,
19906 19908 IX86_BUILTIN_PUNPCKLBW,
19907 19909 IX86_BUILTIN_PUNPCKLWD,
19908 19910 IX86_BUILTIN_PUNPCKLDQ,
19909 19911
19910 19912 IX86_BUILTIN_SHUFPS,
19911 19913
19912 19914 IX86_BUILTIN_RCPPS,
19913 19915 IX86_BUILTIN_RCPSS,
19914 19916 IX86_BUILTIN_RSQRTPS,
19915 19917 IX86_BUILTIN_RSQRTPS_NR,
19916 19918 IX86_BUILTIN_RSQRTSS,
19917 19919 IX86_BUILTIN_RSQRTF,
19918 19920 IX86_BUILTIN_SQRTPS,
19919 19921 IX86_BUILTIN_SQRTPS_NR,
19920 19922 IX86_BUILTIN_SQRTSS,
19921 19923
19922 19924 IX86_BUILTIN_UNPCKHPS,
19923 19925 IX86_BUILTIN_UNPCKLPS,
19924 19926
19925 19927 IX86_BUILTIN_ANDPS,
19926 19928 IX86_BUILTIN_ANDNPS,
19927 19929 IX86_BUILTIN_ORPS,
19928 19930 IX86_BUILTIN_XORPS,
19929 19931
19930 19932 IX86_BUILTIN_EMMS,
19931 19933 IX86_BUILTIN_LDMXCSR,
19932 19934 IX86_BUILTIN_STMXCSR,
19933 19935 IX86_BUILTIN_SFENCE,
19934 19936
19935 19937 /* 3DNow! Original */
19936 19938 IX86_BUILTIN_FEMMS,
19937 19939 IX86_BUILTIN_PAVGUSB,
19938 19940 IX86_BUILTIN_PF2ID,
19939 19941 IX86_BUILTIN_PFACC,
19940 19942 IX86_BUILTIN_PFADD,
19941 19943 IX86_BUILTIN_PFCMPEQ,
19942 19944 IX86_BUILTIN_PFCMPGE,
19943 19945 IX86_BUILTIN_PFCMPGT,
19944 19946 IX86_BUILTIN_PFMAX,
19945 19947 IX86_BUILTIN_PFMIN,
19946 19948 IX86_BUILTIN_PFMUL,
19947 19949 IX86_BUILTIN_PFRCP,
19948 19950 IX86_BUILTIN_PFRCPIT1,
19949 19951 IX86_BUILTIN_PFRCPIT2,
19950 19952 IX86_BUILTIN_PFRSQIT1,
19951 19953 IX86_BUILTIN_PFRSQRT,
19952 19954 IX86_BUILTIN_PFSUB,
19953 19955 IX86_BUILTIN_PFSUBR,
19954 19956 IX86_BUILTIN_PI2FD,
19955 19957 IX86_BUILTIN_PMULHRW,
19956 19958
19957 19959 /* 3DNow! Athlon Extensions */
19958 19960 IX86_BUILTIN_PF2IW,
19959 19961 IX86_BUILTIN_PFNACC,
19960 19962 IX86_BUILTIN_PFPNACC,
19961 19963 IX86_BUILTIN_PI2FW,
19962 19964 IX86_BUILTIN_PSWAPDSI,
19963 19965 IX86_BUILTIN_PSWAPDSF,
19964 19966
19965 19967 /* SSE2 */
19966 19968 IX86_BUILTIN_ADDPD,
19967 19969 IX86_BUILTIN_ADDSD,
19968 19970 IX86_BUILTIN_DIVPD,
19969 19971 IX86_BUILTIN_DIVSD,
19970 19972 IX86_BUILTIN_MULPD,
19971 19973 IX86_BUILTIN_MULSD,
19972 19974 IX86_BUILTIN_SUBPD,
19973 19975 IX86_BUILTIN_SUBSD,
19974 19976
19975 19977 IX86_BUILTIN_CMPEQPD,
19976 19978 IX86_BUILTIN_CMPLTPD,
19977 19979 IX86_BUILTIN_CMPLEPD,
19978 19980 IX86_BUILTIN_CMPGTPD,
19979 19981 IX86_BUILTIN_CMPGEPD,
19980 19982 IX86_BUILTIN_CMPNEQPD,
19981 19983 IX86_BUILTIN_CMPNLTPD,
19982 19984 IX86_BUILTIN_CMPNLEPD,
19983 19985 IX86_BUILTIN_CMPNGTPD,
19984 19986 IX86_BUILTIN_CMPNGEPD,
19985 19987 IX86_BUILTIN_CMPORDPD,
19986 19988 IX86_BUILTIN_CMPUNORDPD,
19987 19989 IX86_BUILTIN_CMPEQSD,
19988 19990 IX86_BUILTIN_CMPLTSD,
19989 19991 IX86_BUILTIN_CMPLESD,
19990 19992 IX86_BUILTIN_CMPNEQSD,
19991 19993 IX86_BUILTIN_CMPNLTSD,
19992 19994 IX86_BUILTIN_CMPNLESD,
19993 19995 IX86_BUILTIN_CMPORDSD,
19994 19996 IX86_BUILTIN_CMPUNORDSD,
19995 19997
19996 19998 IX86_BUILTIN_COMIEQSD,
19997 19999 IX86_BUILTIN_COMILTSD,
19998 20000 IX86_BUILTIN_COMILESD,
19999 20001 IX86_BUILTIN_COMIGTSD,
20000 20002 IX86_BUILTIN_COMIGESD,
20001 20003 IX86_BUILTIN_COMINEQSD,
20002 20004 IX86_BUILTIN_UCOMIEQSD,
20003 20005 IX86_BUILTIN_UCOMILTSD,
20004 20006 IX86_BUILTIN_UCOMILESD,
20005 20007 IX86_BUILTIN_UCOMIGTSD,
20006 20008 IX86_BUILTIN_UCOMIGESD,
20007 20009 IX86_BUILTIN_UCOMINEQSD,
20008 20010
20009 20011 IX86_BUILTIN_MAXPD,
20010 20012 IX86_BUILTIN_MAXSD,
20011 20013 IX86_BUILTIN_MINPD,
20012 20014 IX86_BUILTIN_MINSD,
20013 20015
20014 20016 IX86_BUILTIN_ANDPD,
20015 20017 IX86_BUILTIN_ANDNPD,
20016 20018 IX86_BUILTIN_ORPD,
20017 20019 IX86_BUILTIN_XORPD,
20018 20020
20019 20021 IX86_BUILTIN_SQRTPD,
20020 20022 IX86_BUILTIN_SQRTSD,
20021 20023
20022 20024 IX86_BUILTIN_UNPCKHPD,
20023 20025 IX86_BUILTIN_UNPCKLPD,
20024 20026
20025 20027 IX86_BUILTIN_SHUFPD,
20026 20028
20027 20029 IX86_BUILTIN_LOADUPD,
20028 20030 IX86_BUILTIN_STOREUPD,
20029 20031 IX86_BUILTIN_MOVSD,
20030 20032
20031 20033 IX86_BUILTIN_LOADHPD,
20032 20034 IX86_BUILTIN_LOADLPD,
20033 20035
20034 20036 IX86_BUILTIN_CVTDQ2PD,
20035 20037 IX86_BUILTIN_CVTDQ2PS,
20036 20038
20037 20039 IX86_BUILTIN_CVTPD2DQ,
20038 20040 IX86_BUILTIN_CVTPD2PI,
20039 20041 IX86_BUILTIN_CVTPD2PS,
20040 20042 IX86_BUILTIN_CVTTPD2DQ,
20041 20043 IX86_BUILTIN_CVTTPD2PI,
20042 20044
20043 20045 IX86_BUILTIN_CVTPI2PD,
20044 20046 IX86_BUILTIN_CVTSI2SD,
20045 20047 IX86_BUILTIN_CVTSI642SD,
20046 20048
20047 20049 IX86_BUILTIN_CVTSD2SI,
20048 20050 IX86_BUILTIN_CVTSD2SI64,
20049 20051 IX86_BUILTIN_CVTSD2SS,
20050 20052 IX86_BUILTIN_CVTSS2SD,
20051 20053 IX86_BUILTIN_CVTTSD2SI,
20052 20054 IX86_BUILTIN_CVTTSD2SI64,
20053 20055
20054 20056 IX86_BUILTIN_CVTPS2DQ,
20055 20057 IX86_BUILTIN_CVTPS2PD,
20056 20058 IX86_BUILTIN_CVTTPS2DQ,
20057 20059
20058 20060 IX86_BUILTIN_MOVNTI,
20059 20061 IX86_BUILTIN_MOVNTPD,
20060 20062 IX86_BUILTIN_MOVNTDQ,
20061 20063
20062 20064 IX86_BUILTIN_MOVQ128,
20063 20065
20064 20066 /* SSE2 MMX */
20065 20067 IX86_BUILTIN_MASKMOVDQU,
20066 20068 IX86_BUILTIN_MOVMSKPD,
20067 20069 IX86_BUILTIN_PMOVMSKB128,
20068 20070
20069 20071 IX86_BUILTIN_PACKSSWB128,
20070 20072 IX86_BUILTIN_PACKSSDW128,
20071 20073 IX86_BUILTIN_PACKUSWB128,
20072 20074
20073 20075 IX86_BUILTIN_PADDB128,
20074 20076 IX86_BUILTIN_PADDW128,
20075 20077 IX86_BUILTIN_PADDD128,
20076 20078 IX86_BUILTIN_PADDQ128,
20077 20079 IX86_BUILTIN_PADDSB128,
20078 20080 IX86_BUILTIN_PADDSW128,
20079 20081 IX86_BUILTIN_PADDUSB128,
20080 20082 IX86_BUILTIN_PADDUSW128,
20081 20083 IX86_BUILTIN_PSUBB128,
20082 20084 IX86_BUILTIN_PSUBW128,
20083 20085 IX86_BUILTIN_PSUBD128,
20084 20086 IX86_BUILTIN_PSUBQ128,
20085 20087 IX86_BUILTIN_PSUBSB128,
20086 20088 IX86_BUILTIN_PSUBSW128,
20087 20089 IX86_BUILTIN_PSUBUSB128,
20088 20090 IX86_BUILTIN_PSUBUSW128,
20089 20091
20090 20092 IX86_BUILTIN_PAND128,
20091 20093 IX86_BUILTIN_PANDN128,
20092 20094 IX86_BUILTIN_POR128,
20093 20095 IX86_BUILTIN_PXOR128,
20094 20096
20095 20097 IX86_BUILTIN_PAVGB128,
20096 20098 IX86_BUILTIN_PAVGW128,
20097 20099
20098 20100 IX86_BUILTIN_PCMPEQB128,
20099 20101 IX86_BUILTIN_PCMPEQW128,
20100 20102 IX86_BUILTIN_PCMPEQD128,
20101 20103 IX86_BUILTIN_PCMPGTB128,
20102 20104 IX86_BUILTIN_PCMPGTW128,
20103 20105 IX86_BUILTIN_PCMPGTD128,
20104 20106
20105 20107 IX86_BUILTIN_PMADDWD128,
20106 20108
20107 20109 IX86_BUILTIN_PMAXSW128,
20108 20110 IX86_BUILTIN_PMAXUB128,
20109 20111 IX86_BUILTIN_PMINSW128,
20110 20112 IX86_BUILTIN_PMINUB128,
20111 20113
20112 20114 IX86_BUILTIN_PMULUDQ,
20113 20115 IX86_BUILTIN_PMULUDQ128,
20114 20116 IX86_BUILTIN_PMULHUW128,
20115 20117 IX86_BUILTIN_PMULHW128,
20116 20118 IX86_BUILTIN_PMULLW128,
20117 20119
20118 20120 IX86_BUILTIN_PSADBW128,
20119 20121 IX86_BUILTIN_PSHUFHW,
20120 20122 IX86_BUILTIN_PSHUFLW,
20121 20123 IX86_BUILTIN_PSHUFD,
20122 20124
20123 20125 IX86_BUILTIN_PSLLDQI128,
20124 20126 IX86_BUILTIN_PSLLWI128,
20125 20127 IX86_BUILTIN_PSLLDI128,
20126 20128 IX86_BUILTIN_PSLLQI128,
20127 20129 IX86_BUILTIN_PSRAWI128,
20128 20130 IX86_BUILTIN_PSRADI128,
20129 20131 IX86_BUILTIN_PSRLDQI128,
20130 20132 IX86_BUILTIN_PSRLWI128,
20131 20133 IX86_BUILTIN_PSRLDI128,
20132 20134 IX86_BUILTIN_PSRLQI128,
20133 20135
20134 20136 IX86_BUILTIN_PSLLDQ128,
20135 20137 IX86_BUILTIN_PSLLW128,
20136 20138 IX86_BUILTIN_PSLLD128,
20137 20139 IX86_BUILTIN_PSLLQ128,
20138 20140 IX86_BUILTIN_PSRAW128,
20139 20141 IX86_BUILTIN_PSRAD128,
20140 20142 IX86_BUILTIN_PSRLW128,
20141 20143 IX86_BUILTIN_PSRLD128,
20142 20144 IX86_BUILTIN_PSRLQ128,
20143 20145
20144 20146 IX86_BUILTIN_PUNPCKHBW128,
20145 20147 IX86_BUILTIN_PUNPCKHWD128,
20146 20148 IX86_BUILTIN_PUNPCKHDQ128,
20147 20149 IX86_BUILTIN_PUNPCKHQDQ128,
20148 20150 IX86_BUILTIN_PUNPCKLBW128,
20149 20151 IX86_BUILTIN_PUNPCKLWD128,
20150 20152 IX86_BUILTIN_PUNPCKLDQ128,
20151 20153 IX86_BUILTIN_PUNPCKLQDQ128,
20152 20154
20153 20155 IX86_BUILTIN_CLFLUSH,
20154 20156 IX86_BUILTIN_MFENCE,
20155 20157 IX86_BUILTIN_LFENCE,
20156 20158
20157 20159 /* SSE3. */
20158 20160 IX86_BUILTIN_ADDSUBPS,
20159 20161 IX86_BUILTIN_HADDPS,
20160 20162 IX86_BUILTIN_HSUBPS,
20161 20163 IX86_BUILTIN_MOVSHDUP,
20162 20164 IX86_BUILTIN_MOVSLDUP,
20163 20165 IX86_BUILTIN_ADDSUBPD,
20164 20166 IX86_BUILTIN_HADDPD,
20165 20167 IX86_BUILTIN_HSUBPD,
20166 20168 IX86_BUILTIN_LDDQU,
20167 20169
20168 20170 IX86_BUILTIN_MONITOR,
20169 20171 IX86_BUILTIN_MWAIT,
20170 20172
20171 20173 /* SSSE3. */
20172 20174 IX86_BUILTIN_PHADDW,
20173 20175 IX86_BUILTIN_PHADDD,
20174 20176 IX86_BUILTIN_PHADDSW,
20175 20177 IX86_BUILTIN_PHSUBW,
20176 20178 IX86_BUILTIN_PHSUBD,
20177 20179 IX86_BUILTIN_PHSUBSW,
20178 20180 IX86_BUILTIN_PMADDUBSW,
20179 20181 IX86_BUILTIN_PMULHRSW,
20180 20182 IX86_BUILTIN_PSHUFB,
20181 20183 IX86_BUILTIN_PSIGNB,
20182 20184 IX86_BUILTIN_PSIGNW,
20183 20185 IX86_BUILTIN_PSIGND,
20184 20186 IX86_BUILTIN_PALIGNR,
20185 20187 IX86_BUILTIN_PABSB,
20186 20188 IX86_BUILTIN_PABSW,
20187 20189 IX86_BUILTIN_PABSD,
20188 20190
20189 20191 IX86_BUILTIN_PHADDW128,
20190 20192 IX86_BUILTIN_PHADDD128,
20191 20193 IX86_BUILTIN_PHADDSW128,
20192 20194 IX86_BUILTIN_PHSUBW128,
20193 20195 IX86_BUILTIN_PHSUBD128,
20194 20196 IX86_BUILTIN_PHSUBSW128,
20195 20197 IX86_BUILTIN_PMADDUBSW128,
20196 20198 IX86_BUILTIN_PMULHRSW128,
20197 20199 IX86_BUILTIN_PSHUFB128,
20198 20200 IX86_BUILTIN_PSIGNB128,
20199 20201 IX86_BUILTIN_PSIGNW128,
20200 20202 IX86_BUILTIN_PSIGND128,
20201 20203 IX86_BUILTIN_PALIGNR128,
20202 20204 IX86_BUILTIN_PABSB128,
20203 20205 IX86_BUILTIN_PABSW128,
20204 20206 IX86_BUILTIN_PABSD128,
20205 20207
20206 20208 /* AMDFAM10 - SSE4A New Instructions. */
20207 20209 IX86_BUILTIN_MOVNTSD,
20208 20210 IX86_BUILTIN_MOVNTSS,
20209 20211 IX86_BUILTIN_EXTRQI,
20210 20212 IX86_BUILTIN_EXTRQ,
20211 20213 IX86_BUILTIN_INSERTQI,
20212 20214 IX86_BUILTIN_INSERTQ,
20213 20215
20214 20216 /* SSE4.1. */
20215 20217 IX86_BUILTIN_BLENDPD,
20216 20218 IX86_BUILTIN_BLENDPS,
20217 20219 IX86_BUILTIN_BLENDVPD,
20218 20220 IX86_BUILTIN_BLENDVPS,
20219 20221 IX86_BUILTIN_PBLENDVB128,
20220 20222 IX86_BUILTIN_PBLENDW128,
20221 20223
20222 20224 IX86_BUILTIN_DPPD,
20223 20225 IX86_BUILTIN_DPPS,
20224 20226
20225 20227 IX86_BUILTIN_INSERTPS128,
20226 20228
20227 20229 IX86_BUILTIN_MOVNTDQA,
20228 20230 IX86_BUILTIN_MPSADBW128,
20229 20231 IX86_BUILTIN_PACKUSDW128,
20230 20232 IX86_BUILTIN_PCMPEQQ,
20231 20233 IX86_BUILTIN_PHMINPOSUW128,
20232 20234
20233 20235 IX86_BUILTIN_PMAXSB128,
20234 20236 IX86_BUILTIN_PMAXSD128,
20235 20237 IX86_BUILTIN_PMAXUD128,
20236 20238 IX86_BUILTIN_PMAXUW128,
20237 20239
20238 20240 IX86_BUILTIN_PMINSB128,
20239 20241 IX86_BUILTIN_PMINSD128,
20240 20242 IX86_BUILTIN_PMINUD128,
20241 20243 IX86_BUILTIN_PMINUW128,
20242 20244
20243 20245 IX86_BUILTIN_PMOVSXBW128,
20244 20246 IX86_BUILTIN_PMOVSXBD128,
20245 20247 IX86_BUILTIN_PMOVSXBQ128,
20246 20248 IX86_BUILTIN_PMOVSXWD128,
20247 20249 IX86_BUILTIN_PMOVSXWQ128,
20248 20250 IX86_BUILTIN_PMOVSXDQ128,
20249 20251
20250 20252 IX86_BUILTIN_PMOVZXBW128,
20251 20253 IX86_BUILTIN_PMOVZXBD128,
20252 20254 IX86_BUILTIN_PMOVZXBQ128,
20253 20255 IX86_BUILTIN_PMOVZXWD128,
20254 20256 IX86_BUILTIN_PMOVZXWQ128,
20255 20257 IX86_BUILTIN_PMOVZXDQ128,
20256 20258
20257 20259 IX86_BUILTIN_PMULDQ128,
20258 20260 IX86_BUILTIN_PMULLD128,
20259 20261
20260 20262 IX86_BUILTIN_ROUNDPD,
20261 20263 IX86_BUILTIN_ROUNDPS,
20262 20264 IX86_BUILTIN_ROUNDSD,
20263 20265 IX86_BUILTIN_ROUNDSS,
20264 20266
20265 20267 IX86_BUILTIN_PTESTZ,
20266 20268 IX86_BUILTIN_PTESTC,
20267 20269 IX86_BUILTIN_PTESTNZC,
20268 20270
20269 20271 IX86_BUILTIN_VEC_INIT_V2SI,
20270 20272 IX86_BUILTIN_VEC_INIT_V4HI,
20271 20273 IX86_BUILTIN_VEC_INIT_V8QI,
20272 20274 IX86_BUILTIN_VEC_EXT_V2DF,
20273 20275 IX86_BUILTIN_VEC_EXT_V2DI,
20274 20276 IX86_BUILTIN_VEC_EXT_V4SF,
20275 20277 IX86_BUILTIN_VEC_EXT_V4SI,
20276 20278 IX86_BUILTIN_VEC_EXT_V8HI,
20277 20279 IX86_BUILTIN_VEC_EXT_V2SI,
20278 20280 IX86_BUILTIN_VEC_EXT_V4HI,
20279 20281 IX86_BUILTIN_VEC_EXT_V16QI,
20280 20282 IX86_BUILTIN_VEC_SET_V2DI,
20281 20283 IX86_BUILTIN_VEC_SET_V4SF,
20282 20284 IX86_BUILTIN_VEC_SET_V4SI,
20283 20285 IX86_BUILTIN_VEC_SET_V8HI,
20284 20286 IX86_BUILTIN_VEC_SET_V4HI,
20285 20287 IX86_BUILTIN_VEC_SET_V16QI,
20286 20288
20287 20289 IX86_BUILTIN_VEC_PACK_SFIX,
20288 20290
20289 20291 /* SSE4.2. */
20290 20292 IX86_BUILTIN_CRC32QI,
20291 20293 IX86_BUILTIN_CRC32HI,
20292 20294 IX86_BUILTIN_CRC32SI,
20293 20295 IX86_BUILTIN_CRC32DI,
20294 20296
20295 20297 IX86_BUILTIN_PCMPESTRI128,
20296 20298 IX86_BUILTIN_PCMPESTRM128,
20297 20299 IX86_BUILTIN_PCMPESTRA128,
20298 20300 IX86_BUILTIN_PCMPESTRC128,
20299 20301 IX86_BUILTIN_PCMPESTRO128,
20300 20302 IX86_BUILTIN_PCMPESTRS128,
20301 20303 IX86_BUILTIN_PCMPESTRZ128,
20302 20304 IX86_BUILTIN_PCMPISTRI128,
20303 20305 IX86_BUILTIN_PCMPISTRM128,
20304 20306 IX86_BUILTIN_PCMPISTRA128,
20305 20307 IX86_BUILTIN_PCMPISTRC128,
20306 20308 IX86_BUILTIN_PCMPISTRO128,
20307 20309 IX86_BUILTIN_PCMPISTRS128,
20308 20310 IX86_BUILTIN_PCMPISTRZ128,
20309 20311
20310 20312 IX86_BUILTIN_PCMPGTQ,
20311 20313
20312 20314 /* AES instructions */
20313 20315 IX86_BUILTIN_AESENC128,
20314 20316 IX86_BUILTIN_AESENCLAST128,
20315 20317 IX86_BUILTIN_AESDEC128,
20316 20318 IX86_BUILTIN_AESDECLAST128,
20317 20319 IX86_BUILTIN_AESIMC128,
20318 20320 IX86_BUILTIN_AESKEYGENASSIST128,
20319 20321
20320 20322 /* PCLMUL instruction */
20321 20323 IX86_BUILTIN_PCLMULQDQ128,
20322 20324
20323 20325 /* AVX */
20324 20326 IX86_BUILTIN_ADDPD256,
20325 20327 IX86_BUILTIN_ADDPS256,
20326 20328 IX86_BUILTIN_ADDSUBPD256,
20327 20329 IX86_BUILTIN_ADDSUBPS256,
20328 20330 IX86_BUILTIN_ANDPD256,
20329 20331 IX86_BUILTIN_ANDPS256,
20330 20332 IX86_BUILTIN_ANDNPD256,
20331 20333 IX86_BUILTIN_ANDNPS256,
20332 20334 IX86_BUILTIN_BLENDPD256,
20333 20335 IX86_BUILTIN_BLENDPS256,
20334 20336 IX86_BUILTIN_BLENDVPD256,
20335 20337 IX86_BUILTIN_BLENDVPS256,
20336 20338 IX86_BUILTIN_DIVPD256,
20337 20339 IX86_BUILTIN_DIVPS256,
20338 20340 IX86_BUILTIN_DPPS256,
20339 20341 IX86_BUILTIN_HADDPD256,
20340 20342 IX86_BUILTIN_HADDPS256,
20341 20343 IX86_BUILTIN_HSUBPD256,
20342 20344 IX86_BUILTIN_HSUBPS256,
20343 20345 IX86_BUILTIN_MAXPD256,
20344 20346 IX86_BUILTIN_MAXPS256,
20345 20347 IX86_BUILTIN_MINPD256,
20346 20348 IX86_BUILTIN_MINPS256,
20347 20349 IX86_BUILTIN_MULPD256,
20348 20350 IX86_BUILTIN_MULPS256,
20349 20351 IX86_BUILTIN_ORPD256,
20350 20352 IX86_BUILTIN_ORPS256,
20351 20353 IX86_BUILTIN_SHUFPD256,
20352 20354 IX86_BUILTIN_SHUFPS256,
20353 20355 IX86_BUILTIN_SUBPD256,
20354 20356 IX86_BUILTIN_SUBPS256,
20355 20357 IX86_BUILTIN_XORPD256,
20356 20358 IX86_BUILTIN_XORPS256,
20357 20359 IX86_BUILTIN_CMPSD,
20358 20360 IX86_BUILTIN_CMPSS,
20359 20361 IX86_BUILTIN_CMPPD,
20360 20362 IX86_BUILTIN_CMPPS,
20361 20363 IX86_BUILTIN_CMPPD256,
20362 20364 IX86_BUILTIN_CMPPS256,
20363 20365 IX86_BUILTIN_CVTDQ2PD256,
20364 20366 IX86_BUILTIN_CVTDQ2PS256,
20365 20367 IX86_BUILTIN_CVTPD2PS256,
20366 20368 IX86_BUILTIN_CVTPS2DQ256,
20367 20369 IX86_BUILTIN_CVTPS2PD256,
20368 20370 IX86_BUILTIN_CVTTPD2DQ256,
20369 20371 IX86_BUILTIN_CVTPD2DQ256,
20370 20372 IX86_BUILTIN_CVTTPS2DQ256,
20371 20373 IX86_BUILTIN_EXTRACTF128PD256,
20372 20374 IX86_BUILTIN_EXTRACTF128PS256,
20373 20375 IX86_BUILTIN_EXTRACTF128SI256,
20374 20376 IX86_BUILTIN_VZEROALL,
20375 20377 IX86_BUILTIN_VZEROUPPER,
20376 20378 IX86_BUILTIN_VZEROUPPER_REX64,
20377 20379 IX86_BUILTIN_VPERMILVARPD,
20378 20380 IX86_BUILTIN_VPERMILVARPS,
20379 20381 IX86_BUILTIN_VPERMILVARPD256,
20380 20382 IX86_BUILTIN_VPERMILVARPS256,
20381 20383 IX86_BUILTIN_VPERMILPD,
20382 20384 IX86_BUILTIN_VPERMILPS,
20383 20385 IX86_BUILTIN_VPERMILPD256,
20384 20386 IX86_BUILTIN_VPERMILPS256,
20385 20387 IX86_BUILTIN_VPERM2F128PD256,
20386 20388 IX86_BUILTIN_VPERM2F128PS256,
20387 20389 IX86_BUILTIN_VPERM2F128SI256,
20388 20390 IX86_BUILTIN_VBROADCASTSS,
20389 20391 IX86_BUILTIN_VBROADCASTSD256,
20390 20392 IX86_BUILTIN_VBROADCASTSS256,
20391 20393 IX86_BUILTIN_VBROADCASTPD256,
20392 20394 IX86_BUILTIN_VBROADCASTPS256,
20393 20395 IX86_BUILTIN_VINSERTF128PD256,
20394 20396 IX86_BUILTIN_VINSERTF128PS256,
20395 20397 IX86_BUILTIN_VINSERTF128SI256,
20396 20398 IX86_BUILTIN_LOADUPD256,
20397 20399 IX86_BUILTIN_LOADUPS256,
20398 20400 IX86_BUILTIN_STOREUPD256,
20399 20401 IX86_BUILTIN_STOREUPS256,
20400 20402 IX86_BUILTIN_LDDQU256,
20401 20403 IX86_BUILTIN_MOVNTDQ256,
20402 20404 IX86_BUILTIN_MOVNTPD256,
20403 20405 IX86_BUILTIN_MOVNTPS256,
20404 20406 IX86_BUILTIN_LOADDQU256,
20405 20407 IX86_BUILTIN_STOREDQU256,
20406 20408 IX86_BUILTIN_MASKLOADPD,
20407 20409 IX86_BUILTIN_MASKLOADPS,
20408 20410 IX86_BUILTIN_MASKSTOREPD,
20409 20411 IX86_BUILTIN_MASKSTOREPS,
20410 20412 IX86_BUILTIN_MASKLOADPD256,
20411 20413 IX86_BUILTIN_MASKLOADPS256,
20412 20414 IX86_BUILTIN_MASKSTOREPD256,
20413 20415 IX86_BUILTIN_MASKSTOREPS256,
20414 20416 IX86_BUILTIN_MOVSHDUP256,
20415 20417 IX86_BUILTIN_MOVSLDUP256,
20416 20418 IX86_BUILTIN_MOVDDUP256,
20417 20419
20418 20420 IX86_BUILTIN_SQRTPD256,
20419 20421 IX86_BUILTIN_SQRTPS256,
20420 20422 IX86_BUILTIN_SQRTPS_NR256,
20421 20423 IX86_BUILTIN_RSQRTPS256,
20422 20424 IX86_BUILTIN_RSQRTPS_NR256,
20423 20425
20424 20426 IX86_BUILTIN_RCPPS256,
20425 20427
20426 20428 IX86_BUILTIN_ROUNDPD256,
20427 20429 IX86_BUILTIN_ROUNDPS256,
20428 20430
20429 20431 IX86_BUILTIN_UNPCKHPD256,
20430 20432 IX86_BUILTIN_UNPCKLPD256,
20431 20433 IX86_BUILTIN_UNPCKHPS256,
20432 20434 IX86_BUILTIN_UNPCKLPS256,
20433 20435
20434 20436 IX86_BUILTIN_SI256_SI,
20435 20437 IX86_BUILTIN_PS256_PS,
20436 20438 IX86_BUILTIN_PD256_PD,
20437 20439 IX86_BUILTIN_SI_SI256,
20438 20440 IX86_BUILTIN_PS_PS256,
20439 20441 IX86_BUILTIN_PD_PD256,
20440 20442
20441 20443 IX86_BUILTIN_VTESTZPD,
20442 20444 IX86_BUILTIN_VTESTCPD,
20443 20445 IX86_BUILTIN_VTESTNZCPD,
20444 20446 IX86_BUILTIN_VTESTZPS,
20445 20447 IX86_BUILTIN_VTESTCPS,
20446 20448 IX86_BUILTIN_VTESTNZCPS,
20447 20449 IX86_BUILTIN_VTESTZPD256,
20448 20450 IX86_BUILTIN_VTESTCPD256,
20449 20451 IX86_BUILTIN_VTESTNZCPD256,
20450 20452 IX86_BUILTIN_VTESTZPS256,
20451 20453 IX86_BUILTIN_VTESTCPS256,
20452 20454 IX86_BUILTIN_VTESTNZCPS256,
20453 20455 IX86_BUILTIN_PTESTZ256,
20454 20456 IX86_BUILTIN_PTESTC256,
20455 20457 IX86_BUILTIN_PTESTNZC256,
20456 20458
20457 20459 IX86_BUILTIN_MOVMSKPD256,
20458 20460 IX86_BUILTIN_MOVMSKPS256,
20459 20461
20460 20462 /* TFmode support builtins. */
20461 20463 IX86_BUILTIN_INFQ,
20462 20464 IX86_BUILTIN_FABSQ,
20463 20465 IX86_BUILTIN_COPYSIGNQ,
20464 20466
20465 20467 /* SSE5 instructions */
20466 20468 IX86_BUILTIN_FMADDSS,
20467 20469 IX86_BUILTIN_FMADDSD,
20468 20470 IX86_BUILTIN_FMADDPS,
20469 20471 IX86_BUILTIN_FMADDPD,
20470 20472 IX86_BUILTIN_FMSUBSS,
20471 20473 IX86_BUILTIN_FMSUBSD,
20472 20474 IX86_BUILTIN_FMSUBPS,
20473 20475 IX86_BUILTIN_FMSUBPD,
20474 20476 IX86_BUILTIN_FNMADDSS,
20475 20477 IX86_BUILTIN_FNMADDSD,
20476 20478 IX86_BUILTIN_FNMADDPS,
20477 20479 IX86_BUILTIN_FNMADDPD,
20478 20480 IX86_BUILTIN_FNMSUBSS,
20479 20481 IX86_BUILTIN_FNMSUBSD,
20480 20482 IX86_BUILTIN_FNMSUBPS,
20481 20483 IX86_BUILTIN_FNMSUBPD,
20482 20484 IX86_BUILTIN_PCMOV,
20483 20485 IX86_BUILTIN_PCMOV_V2DI,
20484 20486 IX86_BUILTIN_PCMOV_V4SI,
20485 20487 IX86_BUILTIN_PCMOV_V8HI,
20486 20488 IX86_BUILTIN_PCMOV_V16QI,
20487 20489 IX86_BUILTIN_PCMOV_V4SF,
20488 20490 IX86_BUILTIN_PCMOV_V2DF,
20489 20491 IX86_BUILTIN_PPERM,
20490 20492 IX86_BUILTIN_PERMPS,
20491 20493 IX86_BUILTIN_PERMPD,
20492 20494 IX86_BUILTIN_PMACSSWW,
20493 20495 IX86_BUILTIN_PMACSWW,
20494 20496 IX86_BUILTIN_PMACSSWD,
20495 20497 IX86_BUILTIN_PMACSWD,
20496 20498 IX86_BUILTIN_PMACSSDD,
20497 20499 IX86_BUILTIN_PMACSDD,
20498 20500 IX86_BUILTIN_PMACSSDQL,
20499 20501 IX86_BUILTIN_PMACSSDQH,
20500 20502 IX86_BUILTIN_PMACSDQL,
20501 20503 IX86_BUILTIN_PMACSDQH,
20502 20504 IX86_BUILTIN_PMADCSSWD,
20503 20505 IX86_BUILTIN_PMADCSWD,
20504 20506 IX86_BUILTIN_PHADDBW,
20505 20507 IX86_BUILTIN_PHADDBD,
20506 20508 IX86_BUILTIN_PHADDBQ,
20507 20509 IX86_BUILTIN_PHADDWD,
20508 20510 IX86_BUILTIN_PHADDWQ,
20509 20511 IX86_BUILTIN_PHADDDQ,
20510 20512 IX86_BUILTIN_PHADDUBW,
20511 20513 IX86_BUILTIN_PHADDUBD,
20512 20514 IX86_BUILTIN_PHADDUBQ,
20513 20515 IX86_BUILTIN_PHADDUWD,
20514 20516 IX86_BUILTIN_PHADDUWQ,
20515 20517 IX86_BUILTIN_PHADDUDQ,
20516 20518 IX86_BUILTIN_PHSUBBW,
20517 20519 IX86_BUILTIN_PHSUBWD,
20518 20520 IX86_BUILTIN_PHSUBDQ,
20519 20521 IX86_BUILTIN_PROTB,
20520 20522 IX86_BUILTIN_PROTW,
20521 20523 IX86_BUILTIN_PROTD,
20522 20524 IX86_BUILTIN_PROTQ,
20523 20525 IX86_BUILTIN_PROTB_IMM,
20524 20526 IX86_BUILTIN_PROTW_IMM,
20525 20527 IX86_BUILTIN_PROTD_IMM,
20526 20528 IX86_BUILTIN_PROTQ_IMM,
20527 20529 IX86_BUILTIN_PSHLB,
20528 20530 IX86_BUILTIN_PSHLW,
20529 20531 IX86_BUILTIN_PSHLD,
20530 20532 IX86_BUILTIN_PSHLQ,
20531 20533 IX86_BUILTIN_PSHAB,
20532 20534 IX86_BUILTIN_PSHAW,
20533 20535 IX86_BUILTIN_PSHAD,
20534 20536 IX86_BUILTIN_PSHAQ,
20535 20537 IX86_BUILTIN_FRCZSS,
20536 20538 IX86_BUILTIN_FRCZSD,
20537 20539 IX86_BUILTIN_FRCZPS,
20538 20540 IX86_BUILTIN_FRCZPD,
20539 20541 IX86_BUILTIN_CVTPH2PS,
20540 20542 IX86_BUILTIN_CVTPS2PH,
20541 20543
20542 20544 IX86_BUILTIN_COMEQSS,
20543 20545 IX86_BUILTIN_COMNESS,
20544 20546 IX86_BUILTIN_COMLTSS,
20545 20547 IX86_BUILTIN_COMLESS,
20546 20548 IX86_BUILTIN_COMGTSS,
20547 20549 IX86_BUILTIN_COMGESS,
20548 20550 IX86_BUILTIN_COMUEQSS,
20549 20551 IX86_BUILTIN_COMUNESS,
20550 20552 IX86_BUILTIN_COMULTSS,
20551 20553 IX86_BUILTIN_COMULESS,
20552 20554 IX86_BUILTIN_COMUGTSS,
20553 20555 IX86_BUILTIN_COMUGESS,
20554 20556 IX86_BUILTIN_COMORDSS,
20555 20557 IX86_BUILTIN_COMUNORDSS,
20556 20558 IX86_BUILTIN_COMFALSESS,
20557 20559 IX86_BUILTIN_COMTRUESS,
20558 20560
20559 20561 IX86_BUILTIN_COMEQSD,
20560 20562 IX86_BUILTIN_COMNESD,
20561 20563 IX86_BUILTIN_COMLTSD,
20562 20564 IX86_BUILTIN_COMLESD,
20563 20565 IX86_BUILTIN_COMGTSD,
20564 20566 IX86_BUILTIN_COMGESD,
20565 20567 IX86_BUILTIN_COMUEQSD,
20566 20568 IX86_BUILTIN_COMUNESD,
20567 20569 IX86_BUILTIN_COMULTSD,
20568 20570 IX86_BUILTIN_COMULESD,
20569 20571 IX86_BUILTIN_COMUGTSD,
20570 20572 IX86_BUILTIN_COMUGESD,
20571 20573 IX86_BUILTIN_COMORDSD,
20572 20574 IX86_BUILTIN_COMUNORDSD,
20573 20575 IX86_BUILTIN_COMFALSESD,
20574 20576 IX86_BUILTIN_COMTRUESD,
20575 20577
20576 20578 IX86_BUILTIN_COMEQPS,
20577 20579 IX86_BUILTIN_COMNEPS,
20578 20580 IX86_BUILTIN_COMLTPS,
20579 20581 IX86_BUILTIN_COMLEPS,
20580 20582 IX86_BUILTIN_COMGTPS,
20581 20583 IX86_BUILTIN_COMGEPS,
20582 20584 IX86_BUILTIN_COMUEQPS,
20583 20585 IX86_BUILTIN_COMUNEPS,
20584 20586 IX86_BUILTIN_COMULTPS,
20585 20587 IX86_BUILTIN_COMULEPS,
20586 20588 IX86_BUILTIN_COMUGTPS,
20587 20589 IX86_BUILTIN_COMUGEPS,
20588 20590 IX86_BUILTIN_COMORDPS,
20589 20591 IX86_BUILTIN_COMUNORDPS,
20590 20592 IX86_BUILTIN_COMFALSEPS,
20591 20593 IX86_BUILTIN_COMTRUEPS,
20592 20594
20593 20595 IX86_BUILTIN_COMEQPD,
20594 20596 IX86_BUILTIN_COMNEPD,
20595 20597 IX86_BUILTIN_COMLTPD,
20596 20598 IX86_BUILTIN_COMLEPD,
20597 20599 IX86_BUILTIN_COMGTPD,
20598 20600 IX86_BUILTIN_COMGEPD,
20599 20601 IX86_BUILTIN_COMUEQPD,
20600 20602 IX86_BUILTIN_COMUNEPD,
20601 20603 IX86_BUILTIN_COMULTPD,
20602 20604 IX86_BUILTIN_COMULEPD,
20603 20605 IX86_BUILTIN_COMUGTPD,
20604 20606 IX86_BUILTIN_COMUGEPD,
20605 20607 IX86_BUILTIN_COMORDPD,
20606 20608 IX86_BUILTIN_COMUNORDPD,
20607 20609 IX86_BUILTIN_COMFALSEPD,
20608 20610 IX86_BUILTIN_COMTRUEPD,
20609 20611
20610 20612 IX86_BUILTIN_PCOMEQUB,
20611 20613 IX86_BUILTIN_PCOMNEUB,
20612 20614 IX86_BUILTIN_PCOMLTUB,
20613 20615 IX86_BUILTIN_PCOMLEUB,
20614 20616 IX86_BUILTIN_PCOMGTUB,
20615 20617 IX86_BUILTIN_PCOMGEUB,
20616 20618 IX86_BUILTIN_PCOMFALSEUB,
20617 20619 IX86_BUILTIN_PCOMTRUEUB,
20618 20620 IX86_BUILTIN_PCOMEQUW,
20619 20621 IX86_BUILTIN_PCOMNEUW,
20620 20622 IX86_BUILTIN_PCOMLTUW,
20621 20623 IX86_BUILTIN_PCOMLEUW,
20622 20624 IX86_BUILTIN_PCOMGTUW,
20623 20625 IX86_BUILTIN_PCOMGEUW,
20624 20626 IX86_BUILTIN_PCOMFALSEUW,
20625 20627 IX86_BUILTIN_PCOMTRUEUW,
20626 20628 IX86_BUILTIN_PCOMEQUD,
20627 20629 IX86_BUILTIN_PCOMNEUD,
20628 20630 IX86_BUILTIN_PCOMLTUD,
20629 20631 IX86_BUILTIN_PCOMLEUD,
20630 20632 IX86_BUILTIN_PCOMGTUD,
20631 20633 IX86_BUILTIN_PCOMGEUD,
20632 20634 IX86_BUILTIN_PCOMFALSEUD,
20633 20635 IX86_BUILTIN_PCOMTRUEUD,
20634 20636 IX86_BUILTIN_PCOMEQUQ,
20635 20637 IX86_BUILTIN_PCOMNEUQ,
20636 20638 IX86_BUILTIN_PCOMLTUQ,
20637 20639 IX86_BUILTIN_PCOMLEUQ,
20638 20640 IX86_BUILTIN_PCOMGTUQ,
20639 20641 IX86_BUILTIN_PCOMGEUQ,
20640 20642 IX86_BUILTIN_PCOMFALSEUQ,
20641 20643 IX86_BUILTIN_PCOMTRUEUQ,
20642 20644
20643 20645 IX86_BUILTIN_PCOMEQB,
20644 20646 IX86_BUILTIN_PCOMNEB,
20645 20647 IX86_BUILTIN_PCOMLTB,
20646 20648 IX86_BUILTIN_PCOMLEB,
20647 20649 IX86_BUILTIN_PCOMGTB,
20648 20650 IX86_BUILTIN_PCOMGEB,
20649 20651 IX86_BUILTIN_PCOMFALSEB,
20650 20652 IX86_BUILTIN_PCOMTRUEB,
20651 20653 IX86_BUILTIN_PCOMEQW,
20652 20654 IX86_BUILTIN_PCOMNEW,
20653 20655 IX86_BUILTIN_PCOMLTW,
20654 20656 IX86_BUILTIN_PCOMLEW,
20655 20657 IX86_BUILTIN_PCOMGTW,
20656 20658 IX86_BUILTIN_PCOMGEW,
20657 20659 IX86_BUILTIN_PCOMFALSEW,
20658 20660 IX86_BUILTIN_PCOMTRUEW,
20659 20661 IX86_BUILTIN_PCOMEQD,
20660 20662 IX86_BUILTIN_PCOMNED,
20661 20663 IX86_BUILTIN_PCOMLTD,
20662 20664 IX86_BUILTIN_PCOMLED,
20663 20665 IX86_BUILTIN_PCOMGTD,
20664 20666 IX86_BUILTIN_PCOMGED,
20665 20667 IX86_BUILTIN_PCOMFALSED,
20666 20668 IX86_BUILTIN_PCOMTRUED,
20667 20669 IX86_BUILTIN_PCOMEQQ,
20668 20670 IX86_BUILTIN_PCOMNEQ,
20669 20671 IX86_BUILTIN_PCOMLTQ,
20670 20672 IX86_BUILTIN_PCOMLEQ,
20671 20673 IX86_BUILTIN_PCOMGTQ,
20672 20674 IX86_BUILTIN_PCOMGEQ,
20673 20675 IX86_BUILTIN_PCOMFALSEQ,
20674 20676 IX86_BUILTIN_PCOMTRUEQ,
20675 20677
20676 20678 IX86_BUILTIN_MAX
20677 20679 };
20678 20680
20679 20681 /* Table for the ix86 builtin decls. */
20680 20682 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
20681 20683
20682 20684 /* Table of all of the builtin functions that are possible with different ISA's
20683 20685 but are waiting to be built until a function is declared to use that
20684 20686 ISA. */
20685 20687 struct builtin_isa GTY(())
20686 20688 {
20687 20689 tree type; /* builtin type to use in the declaration */
20688 20690 const char *name; /* function name */
20689 20691 int isa; /* isa_flags this builtin is defined for */
20690 20692 bool const_p; /* true if the declaration is constant */
20691 20693 };
20692 20694
20693 20695 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX];
20694 20696
20695 20697
20696 20698 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK
20697 20699 * of which isa_flags to use in the ix86_builtins_isa array. Stores the
20698 20700 * function decl in the ix86_builtins array. Returns the function decl or
20699 20701 * NULL_TREE, if the builtin was not added.
20700 20702 *
20701 20703 * If the front end has a special hook for builtin functions, delay adding
20702 20704 * builtin functions that aren't in the current ISA until the ISA is changed
20703 20705 * with function specific optimization. Doing so, can save about 300K for the
20704 20706 * default compiler. When the builtin is expanded, check at that time whether
20705 20707 * it is valid.
20706 20708 *
20707 20709 * If the front end doesn't have a special hook, record all builtins, even if
20708 20710 * it isn't an instruction set in the current ISA in case the user uses
20709 20711 * function specific options for a different ISA, so that we don't get scope
20710 20712 * errors if a builtin is added in the middle of a function scope. */
20711 20713
20712 20714 static inline tree
20713 20715 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
20714 20716 {
20715 20717 tree decl = NULL_TREE;
20716 20718
20717 20719 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT)
20718 20720 {
20719 20721 ix86_builtins_isa[(int) code].isa = mask;
20720 20722
20721 20723 if ((mask & ix86_isa_flags) != 0
20722 20724 || (lang_hooks.builtin_function
20723 20725 == lang_hooks.builtin_function_ext_scope))
20724 20726
20725 20727 {
20726 20728 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL,
20727 20729 NULL_TREE);
20728 20730 ix86_builtins[(int) code] = decl;
20729 20731 ix86_builtins_isa[(int) code].type = NULL_TREE;
20730 20732 }
20731 20733 else
20732 20734 {
20733 20735 ix86_builtins[(int) code] = NULL_TREE;
20734 20736 ix86_builtins_isa[(int) code].const_p = false;
20735 20737 ix86_builtins_isa[(int) code].type = type;
20736 20738 ix86_builtins_isa[(int) code].name = name;
20737 20739 }
20738 20740 }
20739 20741
20740 20742 return decl;
20741 20743 }
20742 20744
20743 20745 /* Like def_builtin, but also marks the function decl "const". */
20744 20746
20745 20747 static inline tree
20746 20748 def_builtin_const (int mask, const char *name, tree type,
20747 20749 enum ix86_builtins code)
20748 20750 {
20749 20751 tree decl = def_builtin (mask, name, type, code);
20750 20752 if (decl)
20751 20753 TREE_READONLY (decl) = 1;
20752 20754 else
20753 20755 ix86_builtins_isa[(int) code].const_p = true;
20754 20756
20755 20757 return decl;
20756 20758 }
20757 20759
20758 20760 /* Add any new builtin functions for a given ISA that may not have been
20759 20761 declared. This saves a bit of space compared to adding all of the
20760 20762 declarations to the tree, even if we didn't use them. */
20761 20763
20762 20764 static void
20763 20765 ix86_add_new_builtins (int isa)
20764 20766 {
20765 20767 int i;
20766 20768 tree decl;
20767 20769
20768 20770 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++)
20769 20771 {
20770 20772 if ((ix86_builtins_isa[i].isa & isa) != 0
20771 20773 && ix86_builtins_isa[i].type != NULL_TREE)
20772 20774 {
20773 20775 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name,
20774 20776 ix86_builtins_isa[i].type,
20775 20777 i, BUILT_IN_MD, NULL,
20776 20778 NULL_TREE);
20777 20779
20778 20780 ix86_builtins[i] = decl;
20779 20781 ix86_builtins_isa[i].type = NULL_TREE;
20780 20782 if (ix86_builtins_isa[i].const_p)
20781 20783 TREE_READONLY (decl) = 1;
20782 20784 }
20783 20785 }
20784 20786 }
20785 20787
20786 20788 /* Bits for builtin_description.flag. */
20787 20789
20788 20790 /* Set when we don't support the comparison natively, and should
20789 20791 swap_comparison in order to support it. */
20790 20792 #define BUILTIN_DESC_SWAP_OPERANDS 1
20791 20793
20792 20794 struct builtin_description
20793 20795 {
20794 20796 const unsigned int mask;
20795 20797 const enum insn_code icode;
20796 20798 const char *const name;
20797 20799 const enum ix86_builtins code;
20798 20800 const enum rtx_code comparison;
20799 20801 const int flag;
20800 20802 };
20801 20803
20802 20804 static const struct builtin_description bdesc_comi[] =
20803 20805 {
20804 20806 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
20805 20807 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
20806 20808 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
20807 20809 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
20808 20810 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
20809 20811 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
20810 20812 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
20811 20813 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
20812 20814 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
20813 20815 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
20814 20816 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
20815 20817 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
20816 20818 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
20817 20819 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
20818 20820 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
20819 20821 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
20820 20822 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
20821 20823 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
20822 20824 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
20823 20825 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
20824 20826 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
20825 20827 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
20826 20828 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
20827 20829 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
20828 20830 };
20829 20831
20830 20832 static const struct builtin_description bdesc_pcmpestr[] =
20831 20833 {
20832 20834 /* SSE4.2 */
20833 20835 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
20834 20836 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
20835 20837 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
20836 20838 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
20837 20839 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
20838 20840 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
20839 20841 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
20840 20842 };
20841 20843
20842 20844 static const struct builtin_description bdesc_pcmpistr[] =
20843 20845 {
20844 20846 /* SSE4.2 */
20845 20847 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
20846 20848 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
20847 20849 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
20848 20850 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
20849 20851 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
20850 20852 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
20851 20853 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
20852 20854 };
20853 20855
20854 20856 /* Special builtin types */
20855 20857 enum ix86_special_builtin_type
20856 20858 {
20857 20859 SPECIAL_FTYPE_UNKNOWN,
20858 20860 VOID_FTYPE_VOID,
20859 20861 V32QI_FTYPE_PCCHAR,
20860 20862 V16QI_FTYPE_PCCHAR,
20861 20863 V8SF_FTYPE_PCV4SF,
20862 20864 V8SF_FTYPE_PCFLOAT,
20863 20865 V4DF_FTYPE_PCV2DF,
20864 20866 V4DF_FTYPE_PCDOUBLE,
20865 20867 V4SF_FTYPE_PCFLOAT,
20866 20868 V2DF_FTYPE_PCDOUBLE,
20867 20869 V8SF_FTYPE_PCV8SF_V8SF,
20868 20870 V4DF_FTYPE_PCV4DF_V4DF,
20869 20871 V4SF_FTYPE_V4SF_PCV2SF,
20870 20872 V4SF_FTYPE_PCV4SF_V4SF,
20871 20873 V2DF_FTYPE_V2DF_PCDOUBLE,
20872 20874 V2DF_FTYPE_PCV2DF_V2DF,
20873 20875 V2DI_FTYPE_PV2DI,
20874 20876 VOID_FTYPE_PV2SF_V4SF,
20875 20877 VOID_FTYPE_PV4DI_V4DI,
20876 20878 VOID_FTYPE_PV2DI_V2DI,
20877 20879 VOID_FTYPE_PCHAR_V32QI,
20878 20880 VOID_FTYPE_PCHAR_V16QI,
20879 20881 VOID_FTYPE_PFLOAT_V8SF,
20880 20882 VOID_FTYPE_PFLOAT_V4SF,
20881 20883 VOID_FTYPE_PDOUBLE_V4DF,
20882 20884 VOID_FTYPE_PDOUBLE_V2DF,
20883 20885 VOID_FTYPE_PDI_DI,
20884 20886 VOID_FTYPE_PINT_INT,
20885 20887 VOID_FTYPE_PV8SF_V8SF_V8SF,
20886 20888 VOID_FTYPE_PV4DF_V4DF_V4DF,
20887 20889 VOID_FTYPE_PV4SF_V4SF_V4SF,
20888 20890 VOID_FTYPE_PV2DF_V2DF_V2DF
20889 20891 };
20890 20892
20891 20893 /* Builtin types */
20892 20894 enum ix86_builtin_type
20893 20895 {
20894 20896 FTYPE_UNKNOWN,
20895 20897 FLOAT128_FTYPE_FLOAT128,
20896 20898 FLOAT_FTYPE_FLOAT,
20897 20899 FLOAT128_FTYPE_FLOAT128_FLOAT128,
20898 20900 INT_FTYPE_V8SF_V8SF_PTEST,
20899 20901 INT_FTYPE_V4DI_V4DI_PTEST,
20900 20902 INT_FTYPE_V4DF_V4DF_PTEST,
20901 20903 INT_FTYPE_V4SF_V4SF_PTEST,
20902 20904 INT_FTYPE_V2DI_V2DI_PTEST,
20903 20905 INT_FTYPE_V2DF_V2DF_PTEST,
20904 20906 INT64_FTYPE_V4SF,
20905 20907 INT64_FTYPE_V2DF,
20906 20908 INT_FTYPE_V16QI,
20907 20909 INT_FTYPE_V8QI,
20908 20910 INT_FTYPE_V8SF,
20909 20911 INT_FTYPE_V4DF,
20910 20912 INT_FTYPE_V4SF,
20911 20913 INT_FTYPE_V2DF,
20912 20914 V16QI_FTYPE_V16QI,
20913 20915 V8SI_FTYPE_V8SF,
20914 20916 V8SI_FTYPE_V4SI,
20915 20917 V8HI_FTYPE_V8HI,
20916 20918 V8HI_FTYPE_V16QI,
20917 20919 V8QI_FTYPE_V8QI,
20918 20920 V8SF_FTYPE_V8SF,
20919 20921 V8SF_FTYPE_V8SI,
20920 20922 V8SF_FTYPE_V4SF,
20921 20923 V4SI_FTYPE_V4SI,
20922 20924 V4SI_FTYPE_V16QI,
20923 20925 V4SI_FTYPE_V8SI,
20924 20926 V4SI_FTYPE_V8HI,
20925 20927 V4SI_FTYPE_V4DF,
20926 20928 V4SI_FTYPE_V4SF,
20927 20929 V4SI_FTYPE_V2DF,
20928 20930 V4HI_FTYPE_V4HI,
20929 20931 V4DF_FTYPE_V4DF,
20930 20932 V4DF_FTYPE_V4SI,
20931 20933 V4DF_FTYPE_V4SF,
20932 20934 V4DF_FTYPE_V2DF,
20933 20935 V4SF_FTYPE_V4DF,
20934 20936 V4SF_FTYPE_V4SF,
20935 20937 V4SF_FTYPE_V4SF_VEC_MERGE,
20936 20938 V4SF_FTYPE_V8SF,
20937 20939 V4SF_FTYPE_V4SI,
20938 20940 V4SF_FTYPE_V2DF,
20939 20941 V2DI_FTYPE_V2DI,
20940 20942 V2DI_FTYPE_V16QI,
20941 20943 V2DI_FTYPE_V8HI,
20942 20944 V2DI_FTYPE_V4SI,
20943 20945 V2DF_FTYPE_V2DF,
20944 20946 V2DF_FTYPE_V2DF_VEC_MERGE,
20945 20947 V2DF_FTYPE_V4SI,
20946 20948 V2DF_FTYPE_V4DF,
20947 20949 V2DF_FTYPE_V4SF,
20948 20950 V2DF_FTYPE_V2SI,
20949 20951 V2SI_FTYPE_V2SI,
20950 20952 V2SI_FTYPE_V4SF,
20951 20953 V2SI_FTYPE_V2SF,
20952 20954 V2SI_FTYPE_V2DF,
20953 20955 V2SF_FTYPE_V2SF,
20954 20956 V2SF_FTYPE_V2SI,
20955 20957 V16QI_FTYPE_V16QI_V16QI,
20956 20958 V16QI_FTYPE_V8HI_V8HI,
20957 20959 V8QI_FTYPE_V8QI_V8QI,
20958 20960 V8QI_FTYPE_V4HI_V4HI,
20959 20961 V8HI_FTYPE_V8HI_V8HI,
20960 20962 V8HI_FTYPE_V8HI_V8HI_COUNT,
20961 20963 V8HI_FTYPE_V16QI_V16QI,
20962 20964 V8HI_FTYPE_V4SI_V4SI,
20963 20965 V8HI_FTYPE_V8HI_SI_COUNT,
20964 20966 V8SF_FTYPE_V8SF_V8SF,
20965 20967 V8SF_FTYPE_V8SF_V8SI,
20966 20968 V4SI_FTYPE_V4SI_V4SI,
20967 20969 V4SI_FTYPE_V4SI_V4SI_COUNT,
20968 20970 V4SI_FTYPE_V8HI_V8HI,
20969 20971 V4SI_FTYPE_V4SF_V4SF,
20970 20972 V4SI_FTYPE_V2DF_V2DF,
20971 20973 V4SI_FTYPE_V4SI_SI_COUNT,
20972 20974 V4HI_FTYPE_V4HI_V4HI,
20973 20975 V4HI_FTYPE_V4HI_V4HI_COUNT,
20974 20976 V4HI_FTYPE_V8QI_V8QI,
20975 20977 V4HI_FTYPE_V2SI_V2SI,
20976 20978 V4HI_FTYPE_V4HI_SI_COUNT,
20977 20979 V4DF_FTYPE_V4DF_V4DF,
20978 20980 V4DF_FTYPE_V4DF_V4DI,
20979 20981 V4SF_FTYPE_V4SF_V4SF,
20980 20982 V4SF_FTYPE_V4SF_V4SF_SWAP,
20981 20983 V4SF_FTYPE_V4SF_V4SI,
20982 20984 V4SF_FTYPE_V4SF_V2SI,
20983 20985 V4SF_FTYPE_V4SF_V2DF,
20984 20986 V4SF_FTYPE_V4SF_DI,
20985 20987 V4SF_FTYPE_V4SF_SI,
20986 20988 V2DI_FTYPE_V2DI_V2DI,
20987 20989 V2DI_FTYPE_V2DI_V2DI_COUNT,
20988 20990 V2DI_FTYPE_V16QI_V16QI,
20989 20991 V2DI_FTYPE_V4SI_V4SI,
20990 20992 V2DI_FTYPE_V2DI_V16QI,
20991 20993 V2DI_FTYPE_V2DF_V2DF,
20992 20994 V2DI_FTYPE_V2DI_SI_COUNT,
20993 20995 V2SI_FTYPE_V2SI_V2SI,
20994 20996 V2SI_FTYPE_V2SI_V2SI_COUNT,
20995 20997 V2SI_FTYPE_V4HI_V4HI,
20996 20998 V2SI_FTYPE_V2SF_V2SF,
20997 20999 V2SI_FTYPE_V2SI_SI_COUNT,
20998 21000 V2DF_FTYPE_V2DF_V2DF,
20999 21001 V2DF_FTYPE_V2DF_V2DF_SWAP,
21000 21002 V2DF_FTYPE_V2DF_V4SF,
21001 21003 V2DF_FTYPE_V2DF_V2DI,
21002 21004 V2DF_FTYPE_V2DF_DI,
21003 21005 V2DF_FTYPE_V2DF_SI,
21004 21006 V2SF_FTYPE_V2SF_V2SF,
21005 21007 V1DI_FTYPE_V1DI_V1DI,
21006 21008 V1DI_FTYPE_V1DI_V1DI_COUNT,
21007 21009 V1DI_FTYPE_V8QI_V8QI,
21008 21010 V1DI_FTYPE_V2SI_V2SI,
21009 21011 V1DI_FTYPE_V1DI_SI_COUNT,
21010 21012 UINT64_FTYPE_UINT64_UINT64,
21011 21013 UINT_FTYPE_UINT_UINT,
21012 21014 UINT_FTYPE_UINT_USHORT,
21013 21015 UINT_FTYPE_UINT_UCHAR,
21014 21016 V8HI_FTYPE_V8HI_INT,
21015 21017 V4SI_FTYPE_V4SI_INT,
21016 21018 V4HI_FTYPE_V4HI_INT,
21017 21019 V8SF_FTYPE_V8SF_INT,
21018 21020 V4SI_FTYPE_V8SI_INT,
21019 21021 V4SF_FTYPE_V8SF_INT,
21020 21022 V2DF_FTYPE_V4DF_INT,
21021 21023 V4DF_FTYPE_V4DF_INT,
21022 21024 V4SF_FTYPE_V4SF_INT,
21023 21025 V2DI_FTYPE_V2DI_INT,
21024 21026 V2DI2TI_FTYPE_V2DI_INT,
21025 21027 V2DF_FTYPE_V2DF_INT,
21026 21028 V16QI_FTYPE_V16QI_V16QI_V16QI,
21027 21029 V8SF_FTYPE_V8SF_V8SF_V8SF,
21028 21030 V4DF_FTYPE_V4DF_V4DF_V4DF,
21029 21031 V4SF_FTYPE_V4SF_V4SF_V4SF,
21030 21032 V2DF_FTYPE_V2DF_V2DF_V2DF,
21031 21033 V16QI_FTYPE_V16QI_V16QI_INT,
21032 21034 V8SI_FTYPE_V8SI_V8SI_INT,
21033 21035 V8SI_FTYPE_V8SI_V4SI_INT,
21034 21036 V8HI_FTYPE_V8HI_V8HI_INT,
21035 21037 V8SF_FTYPE_V8SF_V8SF_INT,
21036 21038 V8SF_FTYPE_V8SF_V4SF_INT,
21037 21039 V4SI_FTYPE_V4SI_V4SI_INT,
21038 21040 V4DF_FTYPE_V4DF_V4DF_INT,
21039 21041 V4DF_FTYPE_V4DF_V2DF_INT,
21040 21042 V4SF_FTYPE_V4SF_V4SF_INT,
21041 21043 V2DI_FTYPE_V2DI_V2DI_INT,
21042 21044 V2DI2TI_FTYPE_V2DI_V2DI_INT,
21043 21045 V1DI2DI_FTYPE_V1DI_V1DI_INT,
21044 21046 V2DF_FTYPE_V2DF_V2DF_INT,
21045 21047 V2DI_FTYPE_V2DI_UINT_UINT,
21046 21048 V2DI_FTYPE_V2DI_V2DI_UINT_UINT
21047 21049 };
21048 21050
21049 21051 /* Special builtins with variable number of arguments. */
21050 21052 static const struct builtin_description bdesc_special_args[] =
21051 21053 {
21052 21054 /* MMX */
21053 21055 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21054 21056
21055 21057 /* 3DNow! */
21056 21058 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID },
21057 21059
21058 21060 /* SSE */
21059 21061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21060 21062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21061 21063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21062 21064
21063 21065 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21064 21066 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF },
21065 21067 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21066 21068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF },
21067 21069
21068 21070 /* SSE or 3DNow!A */
21069 21071 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21070 21072 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI },
21071 21073
21072 21074 /* SSE2 */
21073 21075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21074 21076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID },
21075 21077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21076 21078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI },
21077 21079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21078 21080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI },
21079 21081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT },
21080 21082 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE },
21081 21083 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21082 21084
21083 21085 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21084 21086 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE },
21085 21087
21086 21088 /* SSE3 */
21087 21089 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR },
21088 21090
21089 21091 /* SSE4.1 */
21090 21092 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI },
21091 21093
21092 21094 /* SSE4A */
21093 21095 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF },
21094 21096 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF },
21095 21097
21096 21098 /* AVX */
21097 21099 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID },
21098 21100 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID },
21099 21101 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID },
21100 21102
21101 21103 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT },
21102 21104 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21103 21105 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21104 21106 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF },
21105 21107 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF },
21106 21108
21107 21109 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE },
21108 21110 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT },
21109 21111 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21110 21112 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21111 21113 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21112 21114 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI },
21113 21115 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR },
21114 21116
21115 21117 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI },
21116 21118 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF },
21117 21119 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF },
21118 21120
21119 21121 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF },
21120 21122 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF },
21121 21123 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF },
21122 21124 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF },
21123 21125 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF },
21124 21126 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF },
21125 21127 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF },
21126 21128 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF },
21127 21129 };
21128 21130
21129 21131 /* Builtins with variable number of arguments. */
21130 21132 static const struct builtin_description bdesc_args[] =
21131 21133 {
21132 21134 /* MMX */
21133 21135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21134 21136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21135 21137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21136 21138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21137 21139 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21138 21140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21139 21141
21140 21142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21141 21143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21142 21144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21143 21145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21144 21146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21145 21147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21146 21148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21147 21149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21148 21150
21149 21151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21150 21152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21151 21153
21152 21154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21153 21155 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21154 21156 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21155 21157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21156 21158
21157 21159 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21158 21160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21159 21161 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21160 21162 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21161 21163 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21162 21164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21163 21165
21164 21166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21165 21167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21166 21168 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21167 21169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21168 21170 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI},
21169 21171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI},
21170 21172
21171 21173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21172 21174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI },
21173 21175 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI },
21174 21176
21175 21177 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI },
21176 21178
21177 21179 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21178 21180 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21179 21181 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21180 21182 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21181 21183 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21182 21184 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21183 21185
21184 21186 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21185 21187 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21186 21188 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT },
21187 21189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21188 21190 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21189 21191 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT },
21190 21192
21191 21193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT },
21192 21194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT },
21193 21195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT },
21194 21196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT },
21195 21197
21196 21198 /* 3DNow! */
21197 21199 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21198 21200 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21199 21201 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21200 21202 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21201 21203
21202 21204 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21203 21205 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21204 21206 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21205 21207 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21206 21208 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21207 21209 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF },
21208 21210 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21209 21211 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21210 21212 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21211 21213 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21212 21214 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21213 21215 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21214 21216 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21215 21217 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21216 21218 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21217 21219
21218 21220 /* 3DNow!A */
21219 21221 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF },
21220 21222 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI },
21221 21223 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21222 21224 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF },
21223 21225 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21224 21226 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF },
21225 21227
21226 21228 /* SSE */
21227 21229 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF },
21228 21230 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21229 21231 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21230 21232 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21231 21233 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21232 21234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21233 21235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21234 21236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21235 21237 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21236 21238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF },
21237 21239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF },
21238 21240 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF },
21239 21241
21240 21242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21241 21243
21242 21244 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21243 21245 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21244 21246 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21245 21247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21246 21248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21247 21249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21248 21250 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21249 21251 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21250 21252
21251 21253 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21252 21254 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21253 21255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21254 21256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21255 21257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21256 21258 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21257 21259 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21258 21260 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21259 21261 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21260 21262 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21261 21263 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP},
21262 21264 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21263 21265 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF },
21264 21266 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF },
21265 21267 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF },
21266 21268 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21267 21269 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
21268 21270 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
21269 21271 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
21270 21272 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21271 21273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
21272 21274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
21273 21275
21274 21276 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21275 21277 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21276 21278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21277 21279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21278 21280
21279 21281 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21280 21282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21281 21283 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21282 21284 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21283 21285
21284 21286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21285 21287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21286 21288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21287 21289 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21288 21290 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21289 21291
21290 21292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI },
21291 21293 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI },
21292 21294 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI },
21293 21295
21294 21296 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT },
21295 21297
21296 21298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21297 21299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21298 21300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE },
21299 21301
21300 21302 /* SSE MMX or 3Dnow!A */
21301 21303 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21302 21304 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21303 21305 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21304 21306
21305 21307 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21306 21308 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21307 21309 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21308 21310 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21309 21311
21310 21312 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI },
21311 21313 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI },
21312 21314
21313 21315 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT },
21314 21316
21315 21317 /* SSE2 */
21316 21318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21317 21319
21318 21320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF },
21319 21321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI },
21320 21322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF },
21321 21323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI },
21322 21324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI },
21323 21325
21324 21326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21325 21327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21326 21328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF },
21327 21329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF },
21328 21330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF },
21329 21331
21330 21332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI },
21331 21333
21332 21334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21333 21335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF },
21334 21336 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21335 21337 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF },
21336 21338
21337 21339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21338 21340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF },
21339 21341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF },
21340 21342
21341 21343 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21342 21344 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21343 21345 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21344 21346 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21345 21347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21346 21348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21347 21349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21348 21350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21349 21351
21350 21352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21351 21353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21352 21354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21353 21355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21354 21356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP},
21355 21357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21356 21358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21357 21359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21358 21360 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21359 21361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21360 21362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP },
21361 21363 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21362 21364 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF },
21363 21365 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF },
21364 21366 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF },
21365 21367 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21366 21368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF },
21367 21369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF },
21368 21370 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF },
21369 21371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF },
21370 21372
21371 21373 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21372 21374 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21373 21375 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21374 21376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21375 21377
21376 21378 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21377 21379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21378 21380 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21379 21381 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21380 21382
21381 21383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21382 21384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21383 21385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21384 21386
21385 21387 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF },
21386 21388
21387 21389 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21388 21390 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21389 21391 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21390 21392 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21391 21393 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21392 21394 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21393 21395 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21394 21396 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21395 21397
21396 21398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21397 21399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21398 21400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21399 21401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21400 21402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21401 21403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21402 21404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21403 21405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21404 21406
21405 21407 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21406 21408 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI },
21407 21409
21408 21410 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21409 21411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21410 21412 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21411 21413 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21412 21414
21413 21415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21414 21416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21415 21417
21416 21418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21417 21419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21418 21420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21419 21421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21420 21422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21421 21423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21422 21424
21423 21425 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21424 21426 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21425 21427 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21426 21428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21427 21429
21428 21430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21429 21431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21430 21432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21431 21433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21432 21434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21433 21435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21434 21436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21435 21437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21436 21438
21437 21439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21438 21440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21439 21441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI },
21440 21442
21441 21443 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21442 21444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI },
21443 21445
21444 21446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI },
21445 21447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21446 21448
21447 21449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI },
21448 21450
21449 21451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI },
21450 21452 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI },
21451 21453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF },
21452 21454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF },
21453 21455
21454 21456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21455 21457 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21456 21458 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21457 21459 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21458 21460 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21459 21461 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21460 21462 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21461 21463
21462 21464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT },
21463 21465 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21464 21466 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21465 21467 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT },
21466 21468 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21467 21469 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21468 21470 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT },
21469 21471
21470 21472 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT },
21471 21473 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT },
21472 21474 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT },
21473 21475 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT },
21474 21476
21475 21477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT },
21476 21478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21477 21479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT },
21478 21480
21479 21481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE },
21480 21482
21481 21483 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 },
21482 21484 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 },
21483 21485
21484 21486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21485 21487
21486 21488 /* SSE2 MMX */
21487 21489 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21488 21490 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI },
21489 21491
21490 21492 /* SSE3 */
21491 21493 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF},
21492 21494 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF },
21493 21495
21494 21496 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21495 21497 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21496 21498 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21497 21499 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21498 21500 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
21499 21501 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF },
21500 21502
21501 21503 /* SSSE3 */
21502 21504 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI },
21503 21505 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI },
21504 21506 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21505 21507 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI },
21506 21508 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI },
21507 21509 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI },
21508 21510
21509 21511 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21510 21512 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21511 21513 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21512 21514 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21513 21515 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21514 21516 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21515 21517 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21516 21518 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21517 21519 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21518 21520 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21519 21521 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21520 21522 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21521 21523 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI },
21522 21524 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI },
21523 21525 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21524 21526 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21525 21527 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21526 21528 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21527 21529 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21528 21530 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI },
21529 21531 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21530 21532 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI },
21531 21533 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21532 21534 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI },
21533 21535
21534 21536 /* SSSE3. */
21535 21537 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT },
21536 21538 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT },
21537 21539
21538 21540 /* SSE4.1 */
21539 21541 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21540 21542 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21541 21543 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF },
21542 21544 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF },
21543 21545 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21544 21546 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21545 21547 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21546 21548 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT },
21547 21549 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI },
21548 21550 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT },
21549 21551
21550 21552 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21551 21553 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21552 21554 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21553 21555 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21554 21556 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21555 21557 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21556 21558 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI },
21557 21559 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI },
21558 21560 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI },
21559 21561 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI },
21560 21562 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI },
21561 21563 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI },
21562 21564 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI },
21563 21565
21564 21566 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI },
21565 21567 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21566 21568 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21567 21569 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21568 21570 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21569 21571 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21570 21572 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI },
21571 21573 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21572 21574 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21573 21575 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI },
21574 21576 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI },
21575 21577 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI },
21576 21578
21577 21579 /* SSE4.1 and SSE5 */
21578 21580 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21579 21581 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21580 21582 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21581 21583 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21582 21584
21583 21585 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21584 21586 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21585 21587 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST },
21586 21588
21587 21589 /* SSE4.2 */
21588 21590 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21589 21591 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR },
21590 21592 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT },
21591 21593 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT },
21592 21594 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 },
21593 21595
21594 21596 /* SSE4A */
21595 21597 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT },
21596 21598 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI },
21597 21599 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT },
21598 21600 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21599 21601
21600 21602 /* AES */
21601 21603 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT },
21602 21604 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI },
21603 21605
21604 21606 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21605 21607 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21606 21608 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21607 21609 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI },
21608 21610
21609 21611 /* PCLMUL */
21610 21612 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT },
21611 21613
21612 21614 /* AVX */
21613 21615 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21614 21616 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21615 21617 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21616 21618 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21617 21619 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21618 21620 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21619 21621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21620 21622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21621 21623 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21622 21624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21623 21625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21624 21626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21625 21627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21626 21628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21627 21629 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21628 21630 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21629 21631 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21630 21632 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21631 21633 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21632 21634 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21633 21635 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21634 21636 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21635 21637 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21636 21638 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21637 21639 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21638 21640 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21639 21641
21640 21642 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI },
21641 21643 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI },
21642 21644 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI },
21643 21645 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI },
21644 21646
21645 21647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21646 21648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21647 21649 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF },
21648 21650 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF },
21649 21651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21650 21652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21651 21653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21652 21654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21653 21655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21654 21656 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT },
21655 21657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT },
21656 21658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21657 21659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21658 21660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT },
21659 21661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT },
21660 21662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT },
21661 21663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI },
21662 21664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI },
21663 21665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF },
21664 21666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21665 21667 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF },
21666 21668 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21667 21669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF },
21668 21670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF },
21669 21671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT },
21670 21672 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT },
21671 21673 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT },
21672 21674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT },
21673 21675 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT },
21674 21676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21675 21677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21676 21678 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT },
21677 21679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT },
21678 21680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT },
21679 21681
21680 21682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21681 21683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21682 21684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21683 21685
21684 21686 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF },
21685 21687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21686 21688 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21687 21689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21688 21690 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21689 21691
21690 21692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF },
21691 21693
21692 21694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT },
21693 21695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT },
21694 21696
21695 21697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21696 21698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF },
21697 21699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21698 21700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF },
21699 21701
21700 21702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI },
21701 21703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF },
21702 21704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF },
21703 21705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI },
21704 21706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF },
21705 21707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF },
21706 21708
21707 21709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21708 21710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21709 21711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST },
21710 21712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21711 21713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21712 21714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST },
21713 21715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21714 21716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21715 21717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST },
21716 21718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21717 21719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21718 21720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST },
21719 21721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21720 21722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21721 21723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST },
21722 21724
21723 21725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF },
21724 21726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF },
21725 21727 };
21726 21728
21727 21729 /* SSE5 */
21728 21730 enum multi_arg_type {
21729 21731 MULTI_ARG_UNKNOWN,
21730 21732 MULTI_ARG_3_SF,
21731 21733 MULTI_ARG_3_DF,
21732 21734 MULTI_ARG_3_DI,
21733 21735 MULTI_ARG_3_SI,
21734 21736 MULTI_ARG_3_SI_DI,
21735 21737 MULTI_ARG_3_HI,
21736 21738 MULTI_ARG_3_HI_SI,
21737 21739 MULTI_ARG_3_QI,
21738 21740 MULTI_ARG_3_PERMPS,
21739 21741 MULTI_ARG_3_PERMPD,
21740 21742 MULTI_ARG_2_SF,
21741 21743 MULTI_ARG_2_DF,
21742 21744 MULTI_ARG_2_DI,
21743 21745 MULTI_ARG_2_SI,
21744 21746 MULTI_ARG_2_HI,
21745 21747 MULTI_ARG_2_QI,
21746 21748 MULTI_ARG_2_DI_IMM,
21747 21749 MULTI_ARG_2_SI_IMM,
21748 21750 MULTI_ARG_2_HI_IMM,
21749 21751 MULTI_ARG_2_QI_IMM,
21750 21752 MULTI_ARG_2_SF_CMP,
21751 21753 MULTI_ARG_2_DF_CMP,
21752 21754 MULTI_ARG_2_DI_CMP,
21753 21755 MULTI_ARG_2_SI_CMP,
21754 21756 MULTI_ARG_2_HI_CMP,
21755 21757 MULTI_ARG_2_QI_CMP,
21756 21758 MULTI_ARG_2_DI_TF,
21757 21759 MULTI_ARG_2_SI_TF,
21758 21760 MULTI_ARG_2_HI_TF,
21759 21761 MULTI_ARG_2_QI_TF,
21760 21762 MULTI_ARG_2_SF_TF,
21761 21763 MULTI_ARG_2_DF_TF,
21762 21764 MULTI_ARG_1_SF,
21763 21765 MULTI_ARG_1_DF,
21764 21766 MULTI_ARG_1_DI,
21765 21767 MULTI_ARG_1_SI,
21766 21768 MULTI_ARG_1_HI,
21767 21769 MULTI_ARG_1_QI,
21768 21770 MULTI_ARG_1_SI_DI,
21769 21771 MULTI_ARG_1_HI_DI,
21770 21772 MULTI_ARG_1_HI_SI,
21771 21773 MULTI_ARG_1_QI_DI,
21772 21774 MULTI_ARG_1_QI_SI,
21773 21775 MULTI_ARG_1_QI_HI,
21774 21776 MULTI_ARG_1_PH2PS,
21775 21777 MULTI_ARG_1_PS2PH
21776 21778 };
21777 21779
21778 21780 static const struct builtin_description bdesc_multi_arg[] =
21779 21781 {
21780 21782 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF },
21781 21783 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF },
21782 21784 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF },
21783 21785 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF },
21784 21786 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF },
21785 21787 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF },
21786 21788 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF },
21787 21789 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF },
21788 21790 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF },
21789 21791 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF },
21790 21792 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF },
21791 21793 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF },
21792 21794 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF },
21793 21795 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF },
21794 21796 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF },
21795 21797 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF },
21796 21798 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI },
21797 21799 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI },
21798 21800 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI },
21799 21801 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI },
21800 21802 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI },
21801 21803 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF },
21802 21804 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF },
21803 21805 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI },
21804 21806 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS },
21805 21807 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD },
21806 21808 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI },
21807 21809 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI },
21808 21810 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI },
21809 21811 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI },
21810 21812 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI },
21811 21813 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI },
21812 21814 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI },
21813 21815 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI },
21814 21816 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI },
21815 21817 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI },
21816 21818 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI },
21817 21819 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI },
21818 21820 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI },
21819 21821 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI },
21820 21822 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI },
21821 21823 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI },
21822 21824 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM },
21823 21825 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM },
21824 21826 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM },
21825 21827 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM },
21826 21828 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI },
21827 21829 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI },
21828 21830 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI },
21829 21831 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI },
21830 21832 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI },
21831 21833 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI },
21832 21834 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI },
21833 21835 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI },
21834 21836 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF },
21835 21837 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF },
21836 21838 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF },
21837 21839 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF },
21838 21840 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS },
21839 21841 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH },
21840 21842 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI },
21841 21843 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI },
21842 21844 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI },
21843 21845 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI },
21844 21846 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI },
21845 21847 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI },
21846 21848 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI },
21847 21849 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI },
21848 21850 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI },
21849 21851 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI },
21850 21852 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI },
21851 21853 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI },
21852 21854 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI },
21853 21855 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI },
21854 21856 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI },
21855 21857
21856 21858 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP },
21857 21859 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
21858 21860 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP },
21859 21861 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP },
21860 21862 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP },
21861 21863 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP },
21862 21864 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP },
21863 21865 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21864 21866 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21865 21867 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21866 21868 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21867 21869 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21868 21870 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21869 21871 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21870 21872 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21871 21873 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21872 21874
21873 21875 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP },
21874 21876 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21875 21877 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP },
21876 21878 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP },
21877 21879 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP },
21878 21880 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP },
21879 21881 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP },
21880 21882 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21881 21883 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21882 21884 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21883 21885 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21884 21886 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21885 21887 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21886 21888 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21887 21889 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21888 21890 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21889 21891
21890 21892 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP },
21891 21893 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21892 21894 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP },
21893 21895 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP },
21894 21896 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP },
21895 21897 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP },
21896 21898 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP },
21897 21899 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP },
21898 21900 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21899 21901 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP },
21900 21902 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP },
21901 21903 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP },
21902 21904 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP },
21903 21905 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP },
21904 21906 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP },
21905 21907 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP },
21906 21908
21907 21909 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP },
21908 21910 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21909 21911 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP },
21910 21912 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP },
21911 21913 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP },
21912 21914 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP },
21913 21915 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP },
21914 21916 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP },
21915 21917 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21916 21918 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP },
21917 21919 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP },
21918 21920 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP },
21919 21921 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP },
21920 21922 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP },
21921 21923 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP },
21922 21924 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP },
21923 21925
21924 21926 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP },
21925 21927 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21926 21928 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP },
21927 21929 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP },
21928 21930 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP },
21929 21931 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP },
21930 21932 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP },
21931 21933
21932 21934 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP },
21933 21935 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21934 21936 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP },
21935 21937 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP },
21936 21938 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP },
21937 21939 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP },
21938 21940 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP },
21939 21941
21940 21942 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP },
21941 21943 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21942 21944 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP },
21943 21945 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP },
21944 21946 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP },
21945 21947 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP },
21946 21948 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP },
21947 21949
21948 21950 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21949 21951 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21950 21952 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP },
21951 21953 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP },
21952 21954 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP },
21953 21955 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP },
21954 21956 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP },
21955 21957
21956 21958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP },
21957 21959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21958 21960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP },
21959 21961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP },
21960 21962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP },
21961 21963 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP },
21962 21964 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP },
21963 21965
21964 21966 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP },
21965 21967 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21966 21968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP },
21967 21969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP },
21968 21970 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP },
21969 21971 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP },
21970 21972 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP },
21971 21973
21972 21974 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP },
21973 21975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21974 21976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP },
21975 21977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP },
21976 21978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP },
21977 21979 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP },
21978 21980 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP },
21979 21981
21980 21982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP },
21981 21983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21982 21984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP },
21983 21985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP },
21984 21986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP },
21985 21987 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP },
21986 21988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP },
21987 21989
21988 21990 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF },
21989 21991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF },
21990 21992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF },
21991 21993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF },
21992 21994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF },
21993 21995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF },
21994 21996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF },
21995 21997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF },
21996 21998
21997 21999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
21998 22000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
21999 22001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22000 22002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22001 22003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF },
22002 22004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF },
22003 22005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF },
22004 22006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF },
22005 22007
22006 22008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22007 22009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22008 22010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22009 22011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22010 22012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF },
22011 22013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF },
22012 22014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF },
22013 22015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF },
22014 22016 };
22015 22017
22016 22018 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not
22017 22019 in the current target ISA to allow the user to compile particular modules
22018 22020 with different target specific options that differ from the command line
22019 22021 options. */
22020 22022 static void
22021 22023 ix86_init_mmx_sse_builtins (void)
22022 22024 {
22023 22025 const struct builtin_description * d;
22024 22026 size_t i;
22025 22027
22026 22028 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
22027 22029 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
22028 22030 tree V1DI_type_node
22029 22031 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode);
22030 22032 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
22031 22033 tree V2DI_type_node
22032 22034 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
22033 22035 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
22034 22036 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
22035 22037 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
22036 22038 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
22037 22039 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
22038 22040 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
22039 22041
22040 22042 tree pchar_type_node = build_pointer_type (char_type_node);
22041 22043 tree pcchar_type_node
22042 22044 = build_pointer_type (build_type_variant (char_type_node, 1, 0));
22043 22045 tree pfloat_type_node = build_pointer_type (float_type_node);
22044 22046 tree pcfloat_type_node
22045 22047 = build_pointer_type (build_type_variant (float_type_node, 1, 0));
22046 22048 tree pv2sf_type_node = build_pointer_type (V2SF_type_node);
22047 22049 tree pcv2sf_type_node
22048 22050 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0));
22049 22051 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
22050 22052 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
22051 22053
22052 22054 /* Comparisons. */
22053 22055 tree int_ftype_v4sf_v4sf
22054 22056 = build_function_type_list (integer_type_node,
22055 22057 V4SF_type_node, V4SF_type_node, NULL_TREE);
22056 22058 tree v4si_ftype_v4sf_v4sf
22057 22059 = build_function_type_list (V4SI_type_node,
22058 22060 V4SF_type_node, V4SF_type_node, NULL_TREE);
22059 22061 /* MMX/SSE/integer conversions. */
22060 22062 tree int_ftype_v4sf
22061 22063 = build_function_type_list (integer_type_node,
22062 22064 V4SF_type_node, NULL_TREE);
22063 22065 tree int64_ftype_v4sf
22064 22066 = build_function_type_list (long_long_integer_type_node,
22065 22067 V4SF_type_node, NULL_TREE);
22066 22068 tree int_ftype_v8qi
22067 22069 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
22068 22070 tree v4sf_ftype_v4sf_int
22069 22071 = build_function_type_list (V4SF_type_node,
22070 22072 V4SF_type_node, integer_type_node, NULL_TREE);
22071 22073 tree v4sf_ftype_v4sf_int64
22072 22074 = build_function_type_list (V4SF_type_node,
22073 22075 V4SF_type_node, long_long_integer_type_node,
22074 22076 NULL_TREE);
22075 22077 tree v4sf_ftype_v4sf_v2si
22076 22078 = build_function_type_list (V4SF_type_node,
22077 22079 V4SF_type_node, V2SI_type_node, NULL_TREE);
22078 22080
22079 22081 /* Miscellaneous. */
22080 22082 tree v8qi_ftype_v4hi_v4hi
22081 22083 = build_function_type_list (V8QI_type_node,
22082 22084 V4HI_type_node, V4HI_type_node, NULL_TREE);
22083 22085 tree v4hi_ftype_v2si_v2si
22084 22086 = build_function_type_list (V4HI_type_node,
22085 22087 V2SI_type_node, V2SI_type_node, NULL_TREE);
22086 22088 tree v4sf_ftype_v4sf_v4sf_int
22087 22089 = build_function_type_list (V4SF_type_node,
22088 22090 V4SF_type_node, V4SF_type_node,
22089 22091 integer_type_node, NULL_TREE);
22090 22092 tree v2si_ftype_v4hi_v4hi
22091 22093 = build_function_type_list (V2SI_type_node,
22092 22094 V4HI_type_node, V4HI_type_node, NULL_TREE);
22093 22095 tree v4hi_ftype_v4hi_int
22094 22096 = build_function_type_list (V4HI_type_node,
22095 22097 V4HI_type_node, integer_type_node, NULL_TREE);
22096 22098 tree v2si_ftype_v2si_int
22097 22099 = build_function_type_list (V2SI_type_node,
22098 22100 V2SI_type_node, integer_type_node, NULL_TREE);
22099 22101 tree v1di_ftype_v1di_int
22100 22102 = build_function_type_list (V1DI_type_node,
22101 22103 V1DI_type_node, integer_type_node, NULL_TREE);
22102 22104
22103 22105 tree void_ftype_void
22104 22106 = build_function_type (void_type_node, void_list_node);
22105 22107 tree void_ftype_unsigned
22106 22108 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
22107 22109 tree void_ftype_unsigned_unsigned
22108 22110 = build_function_type_list (void_type_node, unsigned_type_node,
22109 22111 unsigned_type_node, NULL_TREE);
22110 22112 tree void_ftype_pcvoid_unsigned_unsigned
22111 22113 = build_function_type_list (void_type_node, const_ptr_type_node,
22112 22114 unsigned_type_node, unsigned_type_node,
22113 22115 NULL_TREE);
22114 22116 tree unsigned_ftype_void
22115 22117 = build_function_type (unsigned_type_node, void_list_node);
22116 22118 tree v2si_ftype_v4sf
22117 22119 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
22118 22120 /* Loads/stores. */
22119 22121 tree void_ftype_v8qi_v8qi_pchar
22120 22122 = build_function_type_list (void_type_node,
22121 22123 V8QI_type_node, V8QI_type_node,
22122 22124 pchar_type_node, NULL_TREE);
22123 22125 tree v4sf_ftype_pcfloat
22124 22126 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
22125 22127 tree v4sf_ftype_v4sf_pcv2sf
22126 22128 = build_function_type_list (V4SF_type_node,
22127 22129 V4SF_type_node, pcv2sf_type_node, NULL_TREE);
22128 22130 tree void_ftype_pv2sf_v4sf
22129 22131 = build_function_type_list (void_type_node,
22130 22132 pv2sf_type_node, V4SF_type_node, NULL_TREE);
22131 22133 tree void_ftype_pfloat_v4sf
22132 22134 = build_function_type_list (void_type_node,
22133 22135 pfloat_type_node, V4SF_type_node, NULL_TREE);
22134 22136 tree void_ftype_pdi_di
22135 22137 = build_function_type_list (void_type_node,
22136 22138 pdi_type_node, long_long_unsigned_type_node,
22137 22139 NULL_TREE);
22138 22140 tree void_ftype_pv2di_v2di
22139 22141 = build_function_type_list (void_type_node,
22140 22142 pv2di_type_node, V2DI_type_node, NULL_TREE);
22141 22143 /* Normal vector unops. */
22142 22144 tree v4sf_ftype_v4sf
22143 22145 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
22144 22146 tree v16qi_ftype_v16qi
22145 22147 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
22146 22148 tree v8hi_ftype_v8hi
22147 22149 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
22148 22150 tree v4si_ftype_v4si
22149 22151 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
22150 22152 tree v8qi_ftype_v8qi
22151 22153 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
22152 22154 tree v4hi_ftype_v4hi
22153 22155 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
22154 22156
22155 22157 /* Normal vector binops. */
22156 22158 tree v4sf_ftype_v4sf_v4sf
22157 22159 = build_function_type_list (V4SF_type_node,
22158 22160 V4SF_type_node, V4SF_type_node, NULL_TREE);
22159 22161 tree v8qi_ftype_v8qi_v8qi
22160 22162 = build_function_type_list (V8QI_type_node,
22161 22163 V8QI_type_node, V8QI_type_node, NULL_TREE);
22162 22164 tree v4hi_ftype_v4hi_v4hi
22163 22165 = build_function_type_list (V4HI_type_node,
22164 22166 V4HI_type_node, V4HI_type_node, NULL_TREE);
22165 22167 tree v2si_ftype_v2si_v2si
22166 22168 = build_function_type_list (V2SI_type_node,
22167 22169 V2SI_type_node, V2SI_type_node, NULL_TREE);
22168 22170 tree v1di_ftype_v1di_v1di
22169 22171 = build_function_type_list (V1DI_type_node,
22170 22172 V1DI_type_node, V1DI_type_node, NULL_TREE);
22171 22173 tree v1di_ftype_v1di_v1di_int
22172 22174 = build_function_type_list (V1DI_type_node,
22173 22175 V1DI_type_node, V1DI_type_node,
22174 22176 integer_type_node, NULL_TREE);
22175 22177 tree v2si_ftype_v2sf
22176 22178 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
22177 22179 tree v2sf_ftype_v2si
22178 22180 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
22179 22181 tree v2si_ftype_v2si
22180 22182 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
22181 22183 tree v2sf_ftype_v2sf
22182 22184 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
22183 22185 tree v2sf_ftype_v2sf_v2sf
22184 22186 = build_function_type_list (V2SF_type_node,
22185 22187 V2SF_type_node, V2SF_type_node, NULL_TREE);
22186 22188 tree v2si_ftype_v2sf_v2sf
22187 22189 = build_function_type_list (V2SI_type_node,
22188 22190 V2SF_type_node, V2SF_type_node, NULL_TREE);
22189 22191 tree pint_type_node = build_pointer_type (integer_type_node);
22190 22192 tree pdouble_type_node = build_pointer_type (double_type_node);
22191 22193 tree pcdouble_type_node = build_pointer_type (
22192 22194 build_type_variant (double_type_node, 1, 0));
22193 22195 tree int_ftype_v2df_v2df
22194 22196 = build_function_type_list (integer_type_node,
22195 22197 V2DF_type_node, V2DF_type_node, NULL_TREE);
22196 22198
22197 22199 tree void_ftype_pcvoid
22198 22200 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
22199 22201 tree v4sf_ftype_v4si
22200 22202 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
22201 22203 tree v4si_ftype_v4sf
22202 22204 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
22203 22205 tree v2df_ftype_v4si
22204 22206 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
22205 22207 tree v4si_ftype_v2df
22206 22208 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
22207 22209 tree v4si_ftype_v2df_v2df
22208 22210 = build_function_type_list (V4SI_type_node,
22209 22211 V2DF_type_node, V2DF_type_node, NULL_TREE);
22210 22212 tree v2si_ftype_v2df
22211 22213 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
22212 22214 tree v4sf_ftype_v2df
22213 22215 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
22214 22216 tree v2df_ftype_v2si
22215 22217 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
22216 22218 tree v2df_ftype_v4sf
22217 22219 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
22218 22220 tree int_ftype_v2df
22219 22221 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
22220 22222 tree int64_ftype_v2df
22221 22223 = build_function_type_list (long_long_integer_type_node,
22222 22224 V2DF_type_node, NULL_TREE);
22223 22225 tree v2df_ftype_v2df_int
22224 22226 = build_function_type_list (V2DF_type_node,
22225 22227 V2DF_type_node, integer_type_node, NULL_TREE);
22226 22228 tree v2df_ftype_v2df_int64
22227 22229 = build_function_type_list (V2DF_type_node,
22228 22230 V2DF_type_node, long_long_integer_type_node,
22229 22231 NULL_TREE);
22230 22232 tree v4sf_ftype_v4sf_v2df
22231 22233 = build_function_type_list (V4SF_type_node,
22232 22234 V4SF_type_node, V2DF_type_node, NULL_TREE);
22233 22235 tree v2df_ftype_v2df_v4sf
22234 22236 = build_function_type_list (V2DF_type_node,
22235 22237 V2DF_type_node, V4SF_type_node, NULL_TREE);
22236 22238 tree v2df_ftype_v2df_v2df_int
22237 22239 = build_function_type_list (V2DF_type_node,
22238 22240 V2DF_type_node, V2DF_type_node,
22239 22241 integer_type_node,
22240 22242 NULL_TREE);
22241 22243 tree v2df_ftype_v2df_pcdouble
22242 22244 = build_function_type_list (V2DF_type_node,
22243 22245 V2DF_type_node, pcdouble_type_node, NULL_TREE);
22244 22246 tree void_ftype_pdouble_v2df
22245 22247 = build_function_type_list (void_type_node,
22246 22248 pdouble_type_node, V2DF_type_node, NULL_TREE);
22247 22249 tree void_ftype_pint_int
22248 22250 = build_function_type_list (void_type_node,
22249 22251 pint_type_node, integer_type_node, NULL_TREE);
22250 22252 tree void_ftype_v16qi_v16qi_pchar
22251 22253 = build_function_type_list (void_type_node,
22252 22254 V16QI_type_node, V16QI_type_node,
22253 22255 pchar_type_node, NULL_TREE);
22254 22256 tree v2df_ftype_pcdouble
22255 22257 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
22256 22258 tree v2df_ftype_v2df_v2df
22257 22259 = build_function_type_list (V2DF_type_node,
22258 22260 V2DF_type_node, V2DF_type_node, NULL_TREE);
22259 22261 tree v16qi_ftype_v16qi_v16qi
22260 22262 = build_function_type_list (V16QI_type_node,
22261 22263 V16QI_type_node, V16QI_type_node, NULL_TREE);
22262 22264 tree v8hi_ftype_v8hi_v8hi
22263 22265 = build_function_type_list (V8HI_type_node,
22264 22266 V8HI_type_node, V8HI_type_node, NULL_TREE);
22265 22267 tree v4si_ftype_v4si_v4si
22266 22268 = build_function_type_list (V4SI_type_node,
22267 22269 V4SI_type_node, V4SI_type_node, NULL_TREE);
22268 22270 tree v2di_ftype_v2di_v2di
22269 22271 = build_function_type_list (V2DI_type_node,
22270 22272 V2DI_type_node, V2DI_type_node, NULL_TREE);
22271 22273 tree v2di_ftype_v2df_v2df
22272 22274 = build_function_type_list (V2DI_type_node,
22273 22275 V2DF_type_node, V2DF_type_node, NULL_TREE);
22274 22276 tree v2df_ftype_v2df
22275 22277 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
22276 22278 tree v2di_ftype_v2di_int
22277 22279 = build_function_type_list (V2DI_type_node,
22278 22280 V2DI_type_node, integer_type_node, NULL_TREE);
22279 22281 tree v2di_ftype_v2di_v2di_int
22280 22282 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22281 22283 V2DI_type_node, integer_type_node, NULL_TREE);
22282 22284 tree v4si_ftype_v4si_int
22283 22285 = build_function_type_list (V4SI_type_node,
22284 22286 V4SI_type_node, integer_type_node, NULL_TREE);
22285 22287 tree v8hi_ftype_v8hi_int
22286 22288 = build_function_type_list (V8HI_type_node,
22287 22289 V8HI_type_node, integer_type_node, NULL_TREE);
22288 22290 tree v4si_ftype_v8hi_v8hi
22289 22291 = build_function_type_list (V4SI_type_node,
22290 22292 V8HI_type_node, V8HI_type_node, NULL_TREE);
22291 22293 tree v1di_ftype_v8qi_v8qi
22292 22294 = build_function_type_list (V1DI_type_node,
22293 22295 V8QI_type_node, V8QI_type_node, NULL_TREE);
22294 22296 tree v1di_ftype_v2si_v2si
22295 22297 = build_function_type_list (V1DI_type_node,
22296 22298 V2SI_type_node, V2SI_type_node, NULL_TREE);
22297 22299 tree v2di_ftype_v16qi_v16qi
22298 22300 = build_function_type_list (V2DI_type_node,
22299 22301 V16QI_type_node, V16QI_type_node, NULL_TREE);
22300 22302 tree v2di_ftype_v4si_v4si
22301 22303 = build_function_type_list (V2DI_type_node,
22302 22304 V4SI_type_node, V4SI_type_node, NULL_TREE);
22303 22305 tree int_ftype_v16qi
22304 22306 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
22305 22307 tree v16qi_ftype_pcchar
22306 22308 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
22307 22309 tree void_ftype_pchar_v16qi
22308 22310 = build_function_type_list (void_type_node,
22309 22311 pchar_type_node, V16QI_type_node, NULL_TREE);
22310 22312
22311 22313 tree v2di_ftype_v2di_unsigned_unsigned
22312 22314 = build_function_type_list (V2DI_type_node, V2DI_type_node,
22313 22315 unsigned_type_node, unsigned_type_node,
22314 22316 NULL_TREE);
22315 22317 tree v2di_ftype_v2di_v2di_unsigned_unsigned
22316 22318 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
22317 22319 unsigned_type_node, unsigned_type_node,
22318 22320 NULL_TREE);
22319 22321 tree v2di_ftype_v2di_v16qi
22320 22322 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
22321 22323 NULL_TREE);
22322 22324 tree v2df_ftype_v2df_v2df_v2df
22323 22325 = build_function_type_list (V2DF_type_node,
22324 22326 V2DF_type_node, V2DF_type_node,
22325 22327 V2DF_type_node, NULL_TREE);
22326 22328 tree v4sf_ftype_v4sf_v4sf_v4sf
22327 22329 = build_function_type_list (V4SF_type_node,
22328 22330 V4SF_type_node, V4SF_type_node,
22329 22331 V4SF_type_node, NULL_TREE);
22330 22332 tree v8hi_ftype_v16qi
22331 22333 = build_function_type_list (V8HI_type_node, V16QI_type_node,
22332 22334 NULL_TREE);
22333 22335 tree v4si_ftype_v16qi
22334 22336 = build_function_type_list (V4SI_type_node, V16QI_type_node,
22335 22337 NULL_TREE);
22336 22338 tree v2di_ftype_v16qi
22337 22339 = build_function_type_list (V2DI_type_node, V16QI_type_node,
22338 22340 NULL_TREE);
22339 22341 tree v4si_ftype_v8hi
22340 22342 = build_function_type_list (V4SI_type_node, V8HI_type_node,
22341 22343 NULL_TREE);
22342 22344 tree v2di_ftype_v8hi
22343 22345 = build_function_type_list (V2DI_type_node, V8HI_type_node,
22344 22346 NULL_TREE);
22345 22347 tree v2di_ftype_v4si
22346 22348 = build_function_type_list (V2DI_type_node, V4SI_type_node,
22347 22349 NULL_TREE);
22348 22350 tree v2di_ftype_pv2di
22349 22351 = build_function_type_list (V2DI_type_node, pv2di_type_node,
22350 22352 NULL_TREE);
22351 22353 tree v16qi_ftype_v16qi_v16qi_int
22352 22354 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22353 22355 V16QI_type_node, integer_type_node,
22354 22356 NULL_TREE);
22355 22357 tree v16qi_ftype_v16qi_v16qi_v16qi
22356 22358 = build_function_type_list (V16QI_type_node, V16QI_type_node,
22357 22359 V16QI_type_node, V16QI_type_node,
22358 22360 NULL_TREE);
22359 22361 tree v8hi_ftype_v8hi_v8hi_int
22360 22362 = build_function_type_list (V8HI_type_node, V8HI_type_node,
22361 22363 V8HI_type_node, integer_type_node,
22362 22364 NULL_TREE);
22363 22365 tree v4si_ftype_v4si_v4si_int
22364 22366 = build_function_type_list (V4SI_type_node, V4SI_type_node,
22365 22367 V4SI_type_node, integer_type_node,
22366 22368 NULL_TREE);
22367 22369 tree int_ftype_v2di_v2di
22368 22370 = build_function_type_list (integer_type_node,
22369 22371 V2DI_type_node, V2DI_type_node,
22370 22372 NULL_TREE);
22371 22373 tree int_ftype_v16qi_int_v16qi_int_int
22372 22374 = build_function_type_list (integer_type_node,
22373 22375 V16QI_type_node,
22374 22376 integer_type_node,
22375 22377 V16QI_type_node,
22376 22378 integer_type_node,
22377 22379 integer_type_node,
22378 22380 NULL_TREE);
22379 22381 tree v16qi_ftype_v16qi_int_v16qi_int_int
22380 22382 = build_function_type_list (V16QI_type_node,
22381 22383 V16QI_type_node,
22382 22384 integer_type_node,
22383 22385 V16QI_type_node,
22384 22386 integer_type_node,
22385 22387 integer_type_node,
22386 22388 NULL_TREE);
22387 22389 tree int_ftype_v16qi_v16qi_int
22388 22390 = build_function_type_list (integer_type_node,
22389 22391 V16QI_type_node,
22390 22392 V16QI_type_node,
22391 22393 integer_type_node,
22392 22394 NULL_TREE);
22393 22395
22394 22396 /* SSE5 instructions */
22395 22397 tree v2di_ftype_v2di_v2di_v2di
22396 22398 = build_function_type_list (V2DI_type_node,
22397 22399 V2DI_type_node,
22398 22400 V2DI_type_node,
22399 22401 V2DI_type_node,
22400 22402 NULL_TREE);
22401 22403
22402 22404 tree v4si_ftype_v4si_v4si_v4si
22403 22405 = build_function_type_list (V4SI_type_node,
22404 22406 V4SI_type_node,
22405 22407 V4SI_type_node,
22406 22408 V4SI_type_node,
22407 22409 NULL_TREE);
22408 22410
22409 22411 tree v4si_ftype_v4si_v4si_v2di
22410 22412 = build_function_type_list (V4SI_type_node,
22411 22413 V4SI_type_node,
22412 22414 V4SI_type_node,
22413 22415 V2DI_type_node,
22414 22416 NULL_TREE);
22415 22417
22416 22418 tree v8hi_ftype_v8hi_v8hi_v8hi
22417 22419 = build_function_type_list (V8HI_type_node,
22418 22420 V8HI_type_node,
22419 22421 V8HI_type_node,
22420 22422 V8HI_type_node,
22421 22423 NULL_TREE);
22422 22424
22423 22425 tree v8hi_ftype_v8hi_v8hi_v4si
22424 22426 = build_function_type_list (V8HI_type_node,
22425 22427 V8HI_type_node,
22426 22428 V8HI_type_node,
22427 22429 V4SI_type_node,
22428 22430 NULL_TREE);
22429 22431
22430 22432 tree v2df_ftype_v2df_v2df_v16qi
22431 22433 = build_function_type_list (V2DF_type_node,
22432 22434 V2DF_type_node,
22433 22435 V2DF_type_node,
22434 22436 V16QI_type_node,
22435 22437 NULL_TREE);
22436 22438
22437 22439 tree v4sf_ftype_v4sf_v4sf_v16qi
22438 22440 = build_function_type_list (V4SF_type_node,
22439 22441 V4SF_type_node,
22440 22442 V4SF_type_node,
22441 22443 V16QI_type_node,
22442 22444 NULL_TREE);
22443 22445
22444 22446 tree v2di_ftype_v2di_si
22445 22447 = build_function_type_list (V2DI_type_node,
22446 22448 V2DI_type_node,
22447 22449 integer_type_node,
22448 22450 NULL_TREE);
22449 22451
22450 22452 tree v4si_ftype_v4si_si
22451 22453 = build_function_type_list (V4SI_type_node,
22452 22454 V4SI_type_node,
22453 22455 integer_type_node,
22454 22456 NULL_TREE);
22455 22457
22456 22458 tree v8hi_ftype_v8hi_si
22457 22459 = build_function_type_list (V8HI_type_node,
22458 22460 V8HI_type_node,
22459 22461 integer_type_node,
22460 22462 NULL_TREE);
22461 22463
22462 22464 tree v16qi_ftype_v16qi_si
22463 22465 = build_function_type_list (V16QI_type_node,
22464 22466 V16QI_type_node,
22465 22467 integer_type_node,
22466 22468 NULL_TREE);
22467 22469 tree v4sf_ftype_v4hi
22468 22470 = build_function_type_list (V4SF_type_node,
22469 22471 V4HI_type_node,
22470 22472 NULL_TREE);
22471 22473
22472 22474 tree v4hi_ftype_v4sf
22473 22475 = build_function_type_list (V4HI_type_node,
22474 22476 V4SF_type_node,
22475 22477 NULL_TREE);
22476 22478
22477 22479 tree v2di_ftype_v2di
22478 22480 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
22479 22481
22480 22482 tree v16qi_ftype_v8hi_v8hi
22481 22483 = build_function_type_list (V16QI_type_node,
22482 22484 V8HI_type_node, V8HI_type_node,
22483 22485 NULL_TREE);
22484 22486 tree v8hi_ftype_v4si_v4si
22485 22487 = build_function_type_list (V8HI_type_node,
22486 22488 V4SI_type_node, V4SI_type_node,
22487 22489 NULL_TREE);
22488 22490 tree v8hi_ftype_v16qi_v16qi
22489 22491 = build_function_type_list (V8HI_type_node,
22490 22492 V16QI_type_node, V16QI_type_node,
22491 22493 NULL_TREE);
22492 22494 tree v4hi_ftype_v8qi_v8qi
22493 22495 = build_function_type_list (V4HI_type_node,
22494 22496 V8QI_type_node, V8QI_type_node,
22495 22497 NULL_TREE);
22496 22498 tree unsigned_ftype_unsigned_uchar
22497 22499 = build_function_type_list (unsigned_type_node,
22498 22500 unsigned_type_node,
22499 22501 unsigned_char_type_node,
22500 22502 NULL_TREE);
22501 22503 tree unsigned_ftype_unsigned_ushort
22502 22504 = build_function_type_list (unsigned_type_node,
22503 22505 unsigned_type_node,
22504 22506 short_unsigned_type_node,
22505 22507 NULL_TREE);
22506 22508 tree unsigned_ftype_unsigned_unsigned
22507 22509 = build_function_type_list (unsigned_type_node,
22508 22510 unsigned_type_node,
22509 22511 unsigned_type_node,
22510 22512 NULL_TREE);
22511 22513 tree uint64_ftype_uint64_uint64
22512 22514 = build_function_type_list (long_long_unsigned_type_node,
22513 22515 long_long_unsigned_type_node,
22514 22516 long_long_unsigned_type_node,
22515 22517 NULL_TREE);
22516 22518 tree float_ftype_float
22517 22519 = build_function_type_list (float_type_node,
22518 22520 float_type_node,
22519 22521 NULL_TREE);
22520 22522
22521 22523 /* AVX builtins */
22522 22524 tree V32QI_type_node = build_vector_type_for_mode (char_type_node,
22523 22525 V32QImode);
22524 22526 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node,
22525 22527 V8SImode);
22526 22528 tree V8SF_type_node = build_vector_type_for_mode (float_type_node,
22527 22529 V8SFmode);
22528 22530 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node,
22529 22531 V4DImode);
22530 22532 tree V4DF_type_node = build_vector_type_for_mode (double_type_node,
22531 22533 V4DFmode);
22532 22534 tree v8sf_ftype_v8sf
22533 22535 = build_function_type_list (V8SF_type_node,
22534 22536 V8SF_type_node,
22535 22537 NULL_TREE);
22536 22538 tree v8si_ftype_v8sf
22537 22539 = build_function_type_list (V8SI_type_node,
22538 22540 V8SF_type_node,
22539 22541 NULL_TREE);
22540 22542 tree v8sf_ftype_v8si
22541 22543 = build_function_type_list (V8SF_type_node,
22542 22544 V8SI_type_node,
22543 22545 NULL_TREE);
22544 22546 tree v4si_ftype_v4df
22545 22547 = build_function_type_list (V4SI_type_node,
22546 22548 V4DF_type_node,
22547 22549 NULL_TREE);
22548 22550 tree v4df_ftype_v4df
22549 22551 = build_function_type_list (V4DF_type_node,
22550 22552 V4DF_type_node,
22551 22553 NULL_TREE);
22552 22554 tree v4df_ftype_v4si
22553 22555 = build_function_type_list (V4DF_type_node,
22554 22556 V4SI_type_node,
22555 22557 NULL_TREE);
22556 22558 tree v4df_ftype_v4sf
22557 22559 = build_function_type_list (V4DF_type_node,
22558 22560 V4SF_type_node,
22559 22561 NULL_TREE);
22560 22562 tree v4sf_ftype_v4df
22561 22563 = build_function_type_list (V4SF_type_node,
22562 22564 V4DF_type_node,
22563 22565 NULL_TREE);
22564 22566 tree v8sf_ftype_v8sf_v8sf
22565 22567 = build_function_type_list (V8SF_type_node,
22566 22568 V8SF_type_node, V8SF_type_node,
22567 22569 NULL_TREE);
22568 22570 tree v4df_ftype_v4df_v4df
22569 22571 = build_function_type_list (V4DF_type_node,
22570 22572 V4DF_type_node, V4DF_type_node,
22571 22573 NULL_TREE);
22572 22574 tree v8sf_ftype_v8sf_int
22573 22575 = build_function_type_list (V8SF_type_node,
22574 22576 V8SF_type_node, integer_type_node,
22575 22577 NULL_TREE);
22576 22578 tree v4si_ftype_v8si_int
22577 22579 = build_function_type_list (V4SI_type_node,
22578 22580 V8SI_type_node, integer_type_node,
22579 22581 NULL_TREE);
22580 22582 tree v4df_ftype_v4df_int
22581 22583 = build_function_type_list (V4DF_type_node,
22582 22584 V4DF_type_node, integer_type_node,
22583 22585 NULL_TREE);
22584 22586 tree v4sf_ftype_v8sf_int
22585 22587 = build_function_type_list (V4SF_type_node,
22586 22588 V8SF_type_node, integer_type_node,
22587 22589 NULL_TREE);
22588 22590 tree v2df_ftype_v4df_int
22589 22591 = build_function_type_list (V2DF_type_node,
22590 22592 V4DF_type_node, integer_type_node,
22591 22593 NULL_TREE);
22592 22594 tree v8sf_ftype_v8sf_v8sf_int
22593 22595 = build_function_type_list (V8SF_type_node,
22594 22596 V8SF_type_node, V8SF_type_node,
22595 22597 integer_type_node,
22596 22598 NULL_TREE);
22597 22599 tree v8sf_ftype_v8sf_v8sf_v8sf
22598 22600 = build_function_type_list (V8SF_type_node,
22599 22601 V8SF_type_node, V8SF_type_node,
22600 22602 V8SF_type_node,
22601 22603 NULL_TREE);
22602 22604 tree v4df_ftype_v4df_v4df_v4df
22603 22605 = build_function_type_list (V4DF_type_node,
22604 22606 V4DF_type_node, V4DF_type_node,
22605 22607 V4DF_type_node,
22606 22608 NULL_TREE);
22607 22609 tree v8si_ftype_v8si_v8si_int
22608 22610 = build_function_type_list (V8SI_type_node,
22609 22611 V8SI_type_node, V8SI_type_node,
22610 22612 integer_type_node,
22611 22613 NULL_TREE);
22612 22614 tree v4df_ftype_v4df_v4df_int
22613 22615 = build_function_type_list (V4DF_type_node,
22614 22616 V4DF_type_node, V4DF_type_node,
22615 22617 integer_type_node,
22616 22618 NULL_TREE);
22617 22619 tree v8sf_ftype_pcfloat
22618 22620 = build_function_type_list (V8SF_type_node,
22619 22621 pcfloat_type_node,
22620 22622 NULL_TREE);
22621 22623 tree v4df_ftype_pcdouble
22622 22624 = build_function_type_list (V4DF_type_node,
22623 22625 pcdouble_type_node,
22624 22626 NULL_TREE);
22625 22627 tree pcv4sf_type_node
22626 22628 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0));
22627 22629 tree pcv2df_type_node
22628 22630 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0));
22629 22631 tree v8sf_ftype_pcv4sf
22630 22632 = build_function_type_list (V8SF_type_node,
22631 22633 pcv4sf_type_node,
22632 22634 NULL_TREE);
22633 22635 tree v4df_ftype_pcv2df
22634 22636 = build_function_type_list (V4DF_type_node,
22635 22637 pcv2df_type_node,
22636 22638 NULL_TREE);
22637 22639 tree v32qi_ftype_pcchar
22638 22640 = build_function_type_list (V32QI_type_node,
22639 22641 pcchar_type_node,
22640 22642 NULL_TREE);
22641 22643 tree void_ftype_pchar_v32qi
22642 22644 = build_function_type_list (void_type_node,
22643 22645 pchar_type_node, V32QI_type_node,
22644 22646 NULL_TREE);
22645 22647 tree v8si_ftype_v8si_v4si_int
22646 22648 = build_function_type_list (V8SI_type_node,
22647 22649 V8SI_type_node, V4SI_type_node,
22648 22650 integer_type_node,
22649 22651 NULL_TREE);
22650 22652 tree pv4di_type_node = build_pointer_type (V4DI_type_node);
22651 22653 tree void_ftype_pv4di_v4di
22652 22654 = build_function_type_list (void_type_node,
22653 22655 pv4di_type_node, V4DI_type_node,
22654 22656 NULL_TREE);
22655 22657 tree v8sf_ftype_v8sf_v4sf_int
22656 22658 = build_function_type_list (V8SF_type_node,
22657 22659 V8SF_type_node, V4SF_type_node,
22658 22660 integer_type_node,
22659 22661 NULL_TREE);
22660 22662 tree v4df_ftype_v4df_v2df_int
22661 22663 = build_function_type_list (V4DF_type_node,
22662 22664 V4DF_type_node, V2DF_type_node,
22663 22665 integer_type_node,
22664 22666 NULL_TREE);
22665 22667 tree void_ftype_pfloat_v8sf
22666 22668 = build_function_type_list (void_type_node,
22667 22669 pfloat_type_node, V8SF_type_node,
22668 22670 NULL_TREE);
22669 22671 tree void_ftype_pdouble_v4df
22670 22672 = build_function_type_list (void_type_node,
22671 22673 pdouble_type_node, V4DF_type_node,
22672 22674 NULL_TREE);
22673 22675 tree pv8sf_type_node = build_pointer_type (V8SF_type_node);
22674 22676 tree pv4sf_type_node = build_pointer_type (V4SF_type_node);
22675 22677 tree pv4df_type_node = build_pointer_type (V4DF_type_node);
22676 22678 tree pv2df_type_node = build_pointer_type (V2DF_type_node);
22677 22679 tree pcv8sf_type_node
22678 22680 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0));
22679 22681 tree pcv4df_type_node
22680 22682 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0));
22681 22683 tree v8sf_ftype_pcv8sf_v8sf
22682 22684 = build_function_type_list (V8SF_type_node,
22683 22685 pcv8sf_type_node, V8SF_type_node,
22684 22686 NULL_TREE);
22685 22687 tree v4df_ftype_pcv4df_v4df
22686 22688 = build_function_type_list (V4DF_type_node,
22687 22689 pcv4df_type_node, V4DF_type_node,
22688 22690 NULL_TREE);
22689 22691 tree v4sf_ftype_pcv4sf_v4sf
22690 22692 = build_function_type_list (V4SF_type_node,
22691 22693 pcv4sf_type_node, V4SF_type_node,
22692 22694 NULL_TREE);
22693 22695 tree v2df_ftype_pcv2df_v2df
22694 22696 = build_function_type_list (V2DF_type_node,
22695 22697 pcv2df_type_node, V2DF_type_node,
22696 22698 NULL_TREE);
22697 22699 tree void_ftype_pv8sf_v8sf_v8sf
22698 22700 = build_function_type_list (void_type_node,
22699 22701 pv8sf_type_node, V8SF_type_node,
22700 22702 V8SF_type_node,
22701 22703 NULL_TREE);
22702 22704 tree void_ftype_pv4df_v4df_v4df
22703 22705 = build_function_type_list (void_type_node,
22704 22706 pv4df_type_node, V4DF_type_node,
22705 22707 V4DF_type_node,
22706 22708 NULL_TREE);
22707 22709 tree void_ftype_pv4sf_v4sf_v4sf
22708 22710 = build_function_type_list (void_type_node,
22709 22711 pv4sf_type_node, V4SF_type_node,
22710 22712 V4SF_type_node,
22711 22713 NULL_TREE);
22712 22714 tree void_ftype_pv2df_v2df_v2df
22713 22715 = build_function_type_list (void_type_node,
22714 22716 pv2df_type_node, V2DF_type_node,
22715 22717 V2DF_type_node,
22716 22718 NULL_TREE);
22717 22719 tree v4df_ftype_v2df
22718 22720 = build_function_type_list (V4DF_type_node,
22719 22721 V2DF_type_node,
22720 22722 NULL_TREE);
22721 22723 tree v8sf_ftype_v4sf
22722 22724 = build_function_type_list (V8SF_type_node,
22723 22725 V4SF_type_node,
22724 22726 NULL_TREE);
22725 22727 tree v8si_ftype_v4si
22726 22728 = build_function_type_list (V8SI_type_node,
22727 22729 V4SI_type_node,
22728 22730 NULL_TREE);
22729 22731 tree v2df_ftype_v4df
22730 22732 = build_function_type_list (V2DF_type_node,
22731 22733 V4DF_type_node,
22732 22734 NULL_TREE);
22733 22735 tree v4sf_ftype_v8sf
22734 22736 = build_function_type_list (V4SF_type_node,
22735 22737 V8SF_type_node,
22736 22738 NULL_TREE);
22737 22739 tree v4si_ftype_v8si
22738 22740 = build_function_type_list (V4SI_type_node,
22739 22741 V8SI_type_node,
22740 22742 NULL_TREE);
22741 22743 tree int_ftype_v4df
22742 22744 = build_function_type_list (integer_type_node,
22743 22745 V4DF_type_node,
22744 22746 NULL_TREE);
22745 22747 tree int_ftype_v8sf
22746 22748 = build_function_type_list (integer_type_node,
22747 22749 V8SF_type_node,
22748 22750 NULL_TREE);
22749 22751 tree int_ftype_v8sf_v8sf
22750 22752 = build_function_type_list (integer_type_node,
22751 22753 V8SF_type_node, V8SF_type_node,
22752 22754 NULL_TREE);
22753 22755 tree int_ftype_v4di_v4di
22754 22756 = build_function_type_list (integer_type_node,
22755 22757 V4DI_type_node, V4DI_type_node,
22756 22758 NULL_TREE);
22757 22759 tree int_ftype_v4df_v4df
22758 22760 = build_function_type_list (integer_type_node,
22759 22761 V4DF_type_node, V4DF_type_node,
22760 22762 NULL_TREE);
22761 22763 tree v8sf_ftype_v8sf_v8si
22762 22764 = build_function_type_list (V8SF_type_node,
22763 22765 V8SF_type_node, V8SI_type_node,
22764 22766 NULL_TREE);
22765 22767 tree v4df_ftype_v4df_v4di
22766 22768 = build_function_type_list (V4DF_type_node,
22767 22769 V4DF_type_node, V4DI_type_node,
22768 22770 NULL_TREE);
22769 22771 tree v4sf_ftype_v4sf_v4si
22770 22772 = build_function_type_list (V4SF_type_node,
22771 22773 V4SF_type_node, V4SI_type_node, NULL_TREE);
22772 22774 tree v2df_ftype_v2df_v2di
22773 22775 = build_function_type_list (V2DF_type_node,
22774 22776 V2DF_type_node, V2DI_type_node, NULL_TREE);
22775 22777
22776 22778 tree ftype;
22777 22779
22778 22780 /* Add all special builtins with variable number of operands. */
22779 22781 for (i = 0, d = bdesc_special_args;
22780 22782 i < ARRAY_SIZE (bdesc_special_args);
22781 22783 i++, d++)
22782 22784 {
22783 22785 tree type;
22784 22786
22785 22787 if (d->name == 0)
22786 22788 continue;
22787 22789
22788 22790 switch ((enum ix86_special_builtin_type) d->flag)
22789 22791 {
22790 22792 case VOID_FTYPE_VOID:
22791 22793 type = void_ftype_void;
22792 22794 break;
22793 22795 case V32QI_FTYPE_PCCHAR:
22794 22796 type = v32qi_ftype_pcchar;
22795 22797 break;
22796 22798 case V16QI_FTYPE_PCCHAR:
22797 22799 type = v16qi_ftype_pcchar;
22798 22800 break;
22799 22801 case V8SF_FTYPE_PCV4SF:
22800 22802 type = v8sf_ftype_pcv4sf;
22801 22803 break;
22802 22804 case V8SF_FTYPE_PCFLOAT:
22803 22805 type = v8sf_ftype_pcfloat;
22804 22806 break;
22805 22807 case V4DF_FTYPE_PCV2DF:
22806 22808 type = v4df_ftype_pcv2df;
22807 22809 break;
22808 22810 case V4DF_FTYPE_PCDOUBLE:
22809 22811 type = v4df_ftype_pcdouble;
22810 22812 break;
22811 22813 case V4SF_FTYPE_PCFLOAT:
22812 22814 type = v4sf_ftype_pcfloat;
22813 22815 break;
22814 22816 case V2DI_FTYPE_PV2DI:
22815 22817 type = v2di_ftype_pv2di;
22816 22818 break;
22817 22819 case V2DF_FTYPE_PCDOUBLE:
22818 22820 type = v2df_ftype_pcdouble;
22819 22821 break;
22820 22822 case V8SF_FTYPE_PCV8SF_V8SF:
22821 22823 type = v8sf_ftype_pcv8sf_v8sf;
22822 22824 break;
22823 22825 case V4DF_FTYPE_PCV4DF_V4DF:
22824 22826 type = v4df_ftype_pcv4df_v4df;
22825 22827 break;
22826 22828 case V4SF_FTYPE_V4SF_PCV2SF:
22827 22829 type = v4sf_ftype_v4sf_pcv2sf;
22828 22830 break;
22829 22831 case V4SF_FTYPE_PCV4SF_V4SF:
22830 22832 type = v4sf_ftype_pcv4sf_v4sf;
22831 22833 break;
22832 22834 case V2DF_FTYPE_V2DF_PCDOUBLE:
22833 22835 type = v2df_ftype_v2df_pcdouble;
22834 22836 break;
22835 22837 case V2DF_FTYPE_PCV2DF_V2DF:
22836 22838 type = v2df_ftype_pcv2df_v2df;
22837 22839 break;
22838 22840 case VOID_FTYPE_PV2SF_V4SF:
22839 22841 type = void_ftype_pv2sf_v4sf;
22840 22842 break;
22841 22843 case VOID_FTYPE_PV4DI_V4DI:
22842 22844 type = void_ftype_pv4di_v4di;
22843 22845 break;
22844 22846 case VOID_FTYPE_PV2DI_V2DI:
22845 22847 type = void_ftype_pv2di_v2di;
22846 22848 break;
22847 22849 case VOID_FTYPE_PCHAR_V32QI:
22848 22850 type = void_ftype_pchar_v32qi;
22849 22851 break;
22850 22852 case VOID_FTYPE_PCHAR_V16QI:
22851 22853 type = void_ftype_pchar_v16qi;
22852 22854 break;
22853 22855 case VOID_FTYPE_PFLOAT_V8SF:
22854 22856 type = void_ftype_pfloat_v8sf;
22855 22857 break;
22856 22858 case VOID_FTYPE_PFLOAT_V4SF:
22857 22859 type = void_ftype_pfloat_v4sf;
22858 22860 break;
22859 22861 case VOID_FTYPE_PDOUBLE_V4DF:
22860 22862 type = void_ftype_pdouble_v4df;
22861 22863 break;
22862 22864 case VOID_FTYPE_PDOUBLE_V2DF:
22863 22865 type = void_ftype_pdouble_v2df;
22864 22866 break;
22865 22867 case VOID_FTYPE_PDI_DI:
22866 22868 type = void_ftype_pdi_di;
22867 22869 break;
22868 22870 case VOID_FTYPE_PINT_INT:
22869 22871 type = void_ftype_pint_int;
22870 22872 break;
22871 22873 case VOID_FTYPE_PV8SF_V8SF_V8SF:
22872 22874 type = void_ftype_pv8sf_v8sf_v8sf;
22873 22875 break;
22874 22876 case VOID_FTYPE_PV4DF_V4DF_V4DF:
22875 22877 type = void_ftype_pv4df_v4df_v4df;
22876 22878 break;
22877 22879 case VOID_FTYPE_PV4SF_V4SF_V4SF:
22878 22880 type = void_ftype_pv4sf_v4sf_v4sf;
22879 22881 break;
22880 22882 case VOID_FTYPE_PV2DF_V2DF_V2DF:
22881 22883 type = void_ftype_pv2df_v2df_v2df;
22882 22884 break;
22883 22885 default:
22884 22886 gcc_unreachable ();
22885 22887 }
22886 22888
22887 22889 def_builtin (d->mask, d->name, type, d->code);
22888 22890 }
22889 22891
22890 22892 /* Add all builtins with variable number of operands. */
22891 22893 for (i = 0, d = bdesc_args;
22892 22894 i < ARRAY_SIZE (bdesc_args);
22893 22895 i++, d++)
22894 22896 {
22895 22897 tree type;
22896 22898
22897 22899 if (d->name == 0)
22898 22900 continue;
22899 22901
22900 22902 switch ((enum ix86_builtin_type) d->flag)
22901 22903 {
22902 22904 case FLOAT_FTYPE_FLOAT:
22903 22905 type = float_ftype_float;
22904 22906 break;
22905 22907 case INT_FTYPE_V8SF_V8SF_PTEST:
22906 22908 type = int_ftype_v8sf_v8sf;
22907 22909 break;
22908 22910 case INT_FTYPE_V4DI_V4DI_PTEST:
22909 22911 type = int_ftype_v4di_v4di;
22910 22912 break;
22911 22913 case INT_FTYPE_V4DF_V4DF_PTEST:
22912 22914 type = int_ftype_v4df_v4df;
22913 22915 break;
22914 22916 case INT_FTYPE_V4SF_V4SF_PTEST:
22915 22917 type = int_ftype_v4sf_v4sf;
22916 22918 break;
22917 22919 case INT_FTYPE_V2DI_V2DI_PTEST:
22918 22920 type = int_ftype_v2di_v2di;
22919 22921 break;
22920 22922 case INT_FTYPE_V2DF_V2DF_PTEST:
22921 22923 type = int_ftype_v2df_v2df;
22922 22924 break;
22923 22925 case INT64_FTYPE_V4SF:
22924 22926 type = int64_ftype_v4sf;
22925 22927 break;
22926 22928 case INT64_FTYPE_V2DF:
22927 22929 type = int64_ftype_v2df;
22928 22930 break;
22929 22931 case INT_FTYPE_V16QI:
22930 22932 type = int_ftype_v16qi;
22931 22933 break;
22932 22934 case INT_FTYPE_V8QI:
22933 22935 type = int_ftype_v8qi;
22934 22936 break;
22935 22937 case INT_FTYPE_V8SF:
22936 22938 type = int_ftype_v8sf;
22937 22939 break;
22938 22940 case INT_FTYPE_V4DF:
22939 22941 type = int_ftype_v4df;
22940 22942 break;
22941 22943 case INT_FTYPE_V4SF:
22942 22944 type = int_ftype_v4sf;
22943 22945 break;
22944 22946 case INT_FTYPE_V2DF:
22945 22947 type = int_ftype_v2df;
22946 22948 break;
22947 22949 case V16QI_FTYPE_V16QI:
22948 22950 type = v16qi_ftype_v16qi;
22949 22951 break;
22950 22952 case V8SI_FTYPE_V8SF:
22951 22953 type = v8si_ftype_v8sf;
22952 22954 break;
22953 22955 case V8SI_FTYPE_V4SI:
22954 22956 type = v8si_ftype_v4si;
22955 22957 break;
22956 22958 case V8HI_FTYPE_V8HI:
22957 22959 type = v8hi_ftype_v8hi;
22958 22960 break;
22959 22961 case V8HI_FTYPE_V16QI:
22960 22962 type = v8hi_ftype_v16qi;
22961 22963 break;
22962 22964 case V8QI_FTYPE_V8QI:
22963 22965 type = v8qi_ftype_v8qi;
22964 22966 break;
22965 22967 case V8SF_FTYPE_V8SF:
22966 22968 type = v8sf_ftype_v8sf;
22967 22969 break;
22968 22970 case V8SF_FTYPE_V8SI:
22969 22971 type = v8sf_ftype_v8si;
22970 22972 break;
22971 22973 case V8SF_FTYPE_V4SF:
22972 22974 type = v8sf_ftype_v4sf;
22973 22975 break;
22974 22976 case V4SI_FTYPE_V4DF:
22975 22977 type = v4si_ftype_v4df;
22976 22978 break;
22977 22979 case V4SI_FTYPE_V4SI:
22978 22980 type = v4si_ftype_v4si;
22979 22981 break;
22980 22982 case V4SI_FTYPE_V16QI:
22981 22983 type = v4si_ftype_v16qi;
22982 22984 break;
22983 22985 case V4SI_FTYPE_V8SI:
22984 22986 type = v4si_ftype_v8si;
22985 22987 break;
22986 22988 case V4SI_FTYPE_V8HI:
22987 22989 type = v4si_ftype_v8hi;
22988 22990 break;
22989 22991 case V4SI_FTYPE_V4SF:
22990 22992 type = v4si_ftype_v4sf;
22991 22993 break;
22992 22994 case V4SI_FTYPE_V2DF:
22993 22995 type = v4si_ftype_v2df;
22994 22996 break;
22995 22997 case V4HI_FTYPE_V4HI:
22996 22998 type = v4hi_ftype_v4hi;
22997 22999 break;
22998 23000 case V4DF_FTYPE_V4DF:
22999 23001 type = v4df_ftype_v4df;
23000 23002 break;
23001 23003 case V4DF_FTYPE_V4SI:
23002 23004 type = v4df_ftype_v4si;
23003 23005 break;
23004 23006 case V4DF_FTYPE_V4SF:
23005 23007 type = v4df_ftype_v4sf;
23006 23008 break;
23007 23009 case V4DF_FTYPE_V2DF:
23008 23010 type = v4df_ftype_v2df;
23009 23011 break;
23010 23012 case V4SF_FTYPE_V4SF:
23011 23013 case V4SF_FTYPE_V4SF_VEC_MERGE:
23012 23014 type = v4sf_ftype_v4sf;
23013 23015 break;
23014 23016 case V4SF_FTYPE_V8SF:
23015 23017 type = v4sf_ftype_v8sf;
23016 23018 break;
23017 23019 case V4SF_FTYPE_V4SI:
23018 23020 type = v4sf_ftype_v4si;
23019 23021 break;
23020 23022 case V4SF_FTYPE_V4DF:
23021 23023 type = v4sf_ftype_v4df;
23022 23024 break;
23023 23025 case V4SF_FTYPE_V2DF:
23024 23026 type = v4sf_ftype_v2df;
23025 23027 break;
23026 23028 case V2DI_FTYPE_V2DI:
23027 23029 type = v2di_ftype_v2di;
23028 23030 break;
23029 23031 case V2DI_FTYPE_V16QI:
23030 23032 type = v2di_ftype_v16qi;
23031 23033 break;
23032 23034 case V2DI_FTYPE_V8HI:
23033 23035 type = v2di_ftype_v8hi;
23034 23036 break;
23035 23037 case V2DI_FTYPE_V4SI:
23036 23038 type = v2di_ftype_v4si;
23037 23039 break;
23038 23040 case V2SI_FTYPE_V2SI:
23039 23041 type = v2si_ftype_v2si;
23040 23042 break;
23041 23043 case V2SI_FTYPE_V4SF:
23042 23044 type = v2si_ftype_v4sf;
23043 23045 break;
23044 23046 case V2SI_FTYPE_V2DF:
23045 23047 type = v2si_ftype_v2df;
23046 23048 break;
23047 23049 case V2SI_FTYPE_V2SF:
23048 23050 type = v2si_ftype_v2sf;
23049 23051 break;
23050 23052 case V2DF_FTYPE_V4DF:
23051 23053 type = v2df_ftype_v4df;
23052 23054 break;
23053 23055 case V2DF_FTYPE_V4SF:
23054 23056 type = v2df_ftype_v4sf;
23055 23057 break;
23056 23058 case V2DF_FTYPE_V2DF:
23057 23059 case V2DF_FTYPE_V2DF_VEC_MERGE:
23058 23060 type = v2df_ftype_v2df;
23059 23061 break;
23060 23062 case V2DF_FTYPE_V2SI:
23061 23063 type = v2df_ftype_v2si;
23062 23064 break;
23063 23065 case V2DF_FTYPE_V4SI:
23064 23066 type = v2df_ftype_v4si;
23065 23067 break;
23066 23068 case V2SF_FTYPE_V2SF:
23067 23069 type = v2sf_ftype_v2sf;
23068 23070 break;
23069 23071 case V2SF_FTYPE_V2SI:
23070 23072 type = v2sf_ftype_v2si;
23071 23073 break;
23072 23074 case V16QI_FTYPE_V16QI_V16QI:
23073 23075 type = v16qi_ftype_v16qi_v16qi;
23074 23076 break;
23075 23077 case V16QI_FTYPE_V8HI_V8HI:
23076 23078 type = v16qi_ftype_v8hi_v8hi;
23077 23079 break;
23078 23080 case V8QI_FTYPE_V8QI_V8QI:
23079 23081 type = v8qi_ftype_v8qi_v8qi;
23080 23082 break;
23081 23083 case V8QI_FTYPE_V4HI_V4HI:
23082 23084 type = v8qi_ftype_v4hi_v4hi;
23083 23085 break;
23084 23086 case V8HI_FTYPE_V8HI_V8HI:
23085 23087 case V8HI_FTYPE_V8HI_V8HI_COUNT:
23086 23088 type = v8hi_ftype_v8hi_v8hi;
23087 23089 break;
23088 23090 case V8HI_FTYPE_V16QI_V16QI:
23089 23091 type = v8hi_ftype_v16qi_v16qi;
23090 23092 break;
23091 23093 case V8HI_FTYPE_V4SI_V4SI:
23092 23094 type = v8hi_ftype_v4si_v4si;
23093 23095 break;
23094 23096 case V8HI_FTYPE_V8HI_SI_COUNT:
23095 23097 type = v8hi_ftype_v8hi_int;
23096 23098 break;
23097 23099 case V8SF_FTYPE_V8SF_V8SF:
23098 23100 type = v8sf_ftype_v8sf_v8sf;
23099 23101 break;
23100 23102 case V8SF_FTYPE_V8SF_V8SI:
23101 23103 type = v8sf_ftype_v8sf_v8si;
23102 23104 break;
23103 23105 case V4SI_FTYPE_V4SI_V4SI:
23104 23106 case V4SI_FTYPE_V4SI_V4SI_COUNT:
23105 23107 type = v4si_ftype_v4si_v4si;
23106 23108 break;
23107 23109 case V4SI_FTYPE_V8HI_V8HI:
23108 23110 type = v4si_ftype_v8hi_v8hi;
23109 23111 break;
23110 23112 case V4SI_FTYPE_V4SF_V4SF:
23111 23113 type = v4si_ftype_v4sf_v4sf;
23112 23114 break;
23113 23115 case V4SI_FTYPE_V2DF_V2DF:
23114 23116 type = v4si_ftype_v2df_v2df;
23115 23117 break;
23116 23118 case V4SI_FTYPE_V4SI_SI_COUNT:
23117 23119 type = v4si_ftype_v4si_int;
23118 23120 break;
23119 23121 case V4HI_FTYPE_V4HI_V4HI:
23120 23122 case V4HI_FTYPE_V4HI_V4HI_COUNT:
23121 23123 type = v4hi_ftype_v4hi_v4hi;
23122 23124 break;
23123 23125 case V4HI_FTYPE_V8QI_V8QI:
23124 23126 type = v4hi_ftype_v8qi_v8qi;
23125 23127 break;
23126 23128 case V4HI_FTYPE_V2SI_V2SI:
23127 23129 type = v4hi_ftype_v2si_v2si;
23128 23130 break;
23129 23131 case V4HI_FTYPE_V4HI_SI_COUNT:
23130 23132 type = v4hi_ftype_v4hi_int;
23131 23133 break;
23132 23134 case V4DF_FTYPE_V4DF_V4DF:
23133 23135 type = v4df_ftype_v4df_v4df;
23134 23136 break;
23135 23137 case V4DF_FTYPE_V4DF_V4DI:
23136 23138 type = v4df_ftype_v4df_v4di;
23137 23139 break;
23138 23140 case V4SF_FTYPE_V4SF_V4SF:
23139 23141 case V4SF_FTYPE_V4SF_V4SF_SWAP:
23140 23142 type = v4sf_ftype_v4sf_v4sf;
23141 23143 break;
23142 23144 case V4SF_FTYPE_V4SF_V4SI:
23143 23145 type = v4sf_ftype_v4sf_v4si;
23144 23146 break;
23145 23147 case V4SF_FTYPE_V4SF_V2SI:
23146 23148 type = v4sf_ftype_v4sf_v2si;
23147 23149 break;
23148 23150 case V4SF_FTYPE_V4SF_V2DF:
23149 23151 type = v4sf_ftype_v4sf_v2df;
23150 23152 break;
23151 23153 case V4SF_FTYPE_V4SF_DI:
23152 23154 type = v4sf_ftype_v4sf_int64;
23153 23155 break;
23154 23156 case V4SF_FTYPE_V4SF_SI:
23155 23157 type = v4sf_ftype_v4sf_int;
23156 23158 break;
23157 23159 case V2DI_FTYPE_V2DI_V2DI:
23158 23160 case V2DI_FTYPE_V2DI_V2DI_COUNT:
23159 23161 type = v2di_ftype_v2di_v2di;
23160 23162 break;
23161 23163 case V2DI_FTYPE_V16QI_V16QI:
23162 23164 type = v2di_ftype_v16qi_v16qi;
23163 23165 break;
23164 23166 case V2DI_FTYPE_V4SI_V4SI:
23165 23167 type = v2di_ftype_v4si_v4si;
23166 23168 break;
23167 23169 case V2DI_FTYPE_V2DI_V16QI:
23168 23170 type = v2di_ftype_v2di_v16qi;
23169 23171 break;
23170 23172 case V2DI_FTYPE_V2DF_V2DF:
23171 23173 type = v2di_ftype_v2df_v2df;
23172 23174 break;
23173 23175 case V2DI_FTYPE_V2DI_SI_COUNT:
23174 23176 type = v2di_ftype_v2di_int;
23175 23177 break;
23176 23178 case V2SI_FTYPE_V2SI_V2SI:
23177 23179 case V2SI_FTYPE_V2SI_V2SI_COUNT:
23178 23180 type = v2si_ftype_v2si_v2si;
23179 23181 break;
23180 23182 case V2SI_FTYPE_V4HI_V4HI:
23181 23183 type = v2si_ftype_v4hi_v4hi;
23182 23184 break;
23183 23185 case V2SI_FTYPE_V2SF_V2SF:
23184 23186 type = v2si_ftype_v2sf_v2sf;
23185 23187 break;
23186 23188 case V2SI_FTYPE_V2SI_SI_COUNT:
23187 23189 type = v2si_ftype_v2si_int;
23188 23190 break;
23189 23191 case V2DF_FTYPE_V2DF_V2DF:
23190 23192 case V2DF_FTYPE_V2DF_V2DF_SWAP:
23191 23193 type = v2df_ftype_v2df_v2df;
23192 23194 break;
23193 23195 case V2DF_FTYPE_V2DF_V4SF:
23194 23196 type = v2df_ftype_v2df_v4sf;
23195 23197 break;
23196 23198 case V2DF_FTYPE_V2DF_V2DI:
23197 23199 type = v2df_ftype_v2df_v2di;
23198 23200 break;
23199 23201 case V2DF_FTYPE_V2DF_DI:
23200 23202 type = v2df_ftype_v2df_int64;
23201 23203 break;
23202 23204 case V2DF_FTYPE_V2DF_SI:
23203 23205 type = v2df_ftype_v2df_int;
23204 23206 break;
23205 23207 case V2SF_FTYPE_V2SF_V2SF:
23206 23208 type = v2sf_ftype_v2sf_v2sf;
23207 23209 break;
23208 23210 case V1DI_FTYPE_V1DI_V1DI:
23209 23211 case V1DI_FTYPE_V1DI_V1DI_COUNT:
23210 23212 type = v1di_ftype_v1di_v1di;
23211 23213 break;
23212 23214 case V1DI_FTYPE_V8QI_V8QI:
23213 23215 type = v1di_ftype_v8qi_v8qi;
23214 23216 break;
23215 23217 case V1DI_FTYPE_V2SI_V2SI:
23216 23218 type = v1di_ftype_v2si_v2si;
23217 23219 break;
23218 23220 case V1DI_FTYPE_V1DI_SI_COUNT:
23219 23221 type = v1di_ftype_v1di_int;
23220 23222 break;
23221 23223 case UINT64_FTYPE_UINT64_UINT64:
23222 23224 type = uint64_ftype_uint64_uint64;
23223 23225 break;
23224 23226 case UINT_FTYPE_UINT_UINT:
23225 23227 type = unsigned_ftype_unsigned_unsigned;
23226 23228 break;
23227 23229 case UINT_FTYPE_UINT_USHORT:
23228 23230 type = unsigned_ftype_unsigned_ushort;
23229 23231 break;
23230 23232 case UINT_FTYPE_UINT_UCHAR:
23231 23233 type = unsigned_ftype_unsigned_uchar;
23232 23234 break;
23233 23235 case V8HI_FTYPE_V8HI_INT:
23234 23236 type = v8hi_ftype_v8hi_int;
23235 23237 break;
23236 23238 case V8SF_FTYPE_V8SF_INT:
23237 23239 type = v8sf_ftype_v8sf_int;
23238 23240 break;
23239 23241 case V4SI_FTYPE_V4SI_INT:
23240 23242 type = v4si_ftype_v4si_int;
23241 23243 break;
23242 23244 case V4SI_FTYPE_V8SI_INT:
23243 23245 type = v4si_ftype_v8si_int;
23244 23246 break;
23245 23247 case V4HI_FTYPE_V4HI_INT:
23246 23248 type = v4hi_ftype_v4hi_int;
23247 23249 break;
23248 23250 case V4DF_FTYPE_V4DF_INT:
23249 23251 type = v4df_ftype_v4df_int;
23250 23252 break;
23251 23253 case V4SF_FTYPE_V4SF_INT:
23252 23254 type = v4sf_ftype_v4sf_int;
23253 23255 break;
23254 23256 case V4SF_FTYPE_V8SF_INT:
23255 23257 type = v4sf_ftype_v8sf_int;
23256 23258 break;
23257 23259 case V2DI_FTYPE_V2DI_INT:
23258 23260 case V2DI2TI_FTYPE_V2DI_INT:
23259 23261 type = v2di_ftype_v2di_int;
23260 23262 break;
23261 23263 case V2DF_FTYPE_V2DF_INT:
23262 23264 type = v2df_ftype_v2df_int;
23263 23265 break;
23264 23266 case V2DF_FTYPE_V4DF_INT:
23265 23267 type = v2df_ftype_v4df_int;
23266 23268 break;
23267 23269 case V16QI_FTYPE_V16QI_V16QI_V16QI:
23268 23270 type = v16qi_ftype_v16qi_v16qi_v16qi;
23269 23271 break;
23270 23272 case V8SF_FTYPE_V8SF_V8SF_V8SF:
23271 23273 type = v8sf_ftype_v8sf_v8sf_v8sf;
23272 23274 break;
23273 23275 case V4DF_FTYPE_V4DF_V4DF_V4DF:
23274 23276 type = v4df_ftype_v4df_v4df_v4df;
23275 23277 break;
23276 23278 case V4SF_FTYPE_V4SF_V4SF_V4SF:
23277 23279 type = v4sf_ftype_v4sf_v4sf_v4sf;
23278 23280 break;
23279 23281 case V2DF_FTYPE_V2DF_V2DF_V2DF:
23280 23282 type = v2df_ftype_v2df_v2df_v2df;
23281 23283 break;
23282 23284 case V16QI_FTYPE_V16QI_V16QI_INT:
23283 23285 type = v16qi_ftype_v16qi_v16qi_int;
23284 23286 break;
23285 23287 case V8SI_FTYPE_V8SI_V8SI_INT:
23286 23288 type = v8si_ftype_v8si_v8si_int;
23287 23289 break;
23288 23290 case V8SI_FTYPE_V8SI_V4SI_INT:
23289 23291 type = v8si_ftype_v8si_v4si_int;
23290 23292 break;
23291 23293 case V8HI_FTYPE_V8HI_V8HI_INT:
23292 23294 type = v8hi_ftype_v8hi_v8hi_int;
23293 23295 break;
23294 23296 case V8SF_FTYPE_V8SF_V8SF_INT:
23295 23297 type = v8sf_ftype_v8sf_v8sf_int;
23296 23298 break;
23297 23299 case V8SF_FTYPE_V8SF_V4SF_INT:
23298 23300 type = v8sf_ftype_v8sf_v4sf_int;
23299 23301 break;
23300 23302 case V4SI_FTYPE_V4SI_V4SI_INT:
23301 23303 type = v4si_ftype_v4si_v4si_int;
23302 23304 break;
23303 23305 case V4DF_FTYPE_V4DF_V4DF_INT:
23304 23306 type = v4df_ftype_v4df_v4df_int;
23305 23307 break;
23306 23308 case V4DF_FTYPE_V4DF_V2DF_INT:
23307 23309 type = v4df_ftype_v4df_v2df_int;
23308 23310 break;
23309 23311 case V4SF_FTYPE_V4SF_V4SF_INT:
23310 23312 type = v4sf_ftype_v4sf_v4sf_int;
23311 23313 break;
23312 23314 case V2DI_FTYPE_V2DI_V2DI_INT:
23313 23315 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
23314 23316 type = v2di_ftype_v2di_v2di_int;
23315 23317 break;
23316 23318 case V2DF_FTYPE_V2DF_V2DF_INT:
23317 23319 type = v2df_ftype_v2df_v2df_int;
23318 23320 break;
23319 23321 case V2DI_FTYPE_V2DI_UINT_UINT:
23320 23322 type = v2di_ftype_v2di_unsigned_unsigned;
23321 23323 break;
23322 23324 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
23323 23325 type = v2di_ftype_v2di_v2di_unsigned_unsigned;
23324 23326 break;
23325 23327 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
23326 23328 type = v1di_ftype_v1di_v1di_int;
23327 23329 break;
23328 23330 default:
23329 23331 gcc_unreachable ();
23330 23332 }
23331 23333
23332 23334 def_builtin_const (d->mask, d->name, type, d->code);
23333 23335 }
23334 23336
23335 23337 /* pcmpestr[im] insns. */
23336 23338 for (i = 0, d = bdesc_pcmpestr;
23337 23339 i < ARRAY_SIZE (bdesc_pcmpestr);
23338 23340 i++, d++)
23339 23341 {
23340 23342 if (d->code == IX86_BUILTIN_PCMPESTRM128)
23341 23343 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
23342 23344 else
23343 23345 ftype = int_ftype_v16qi_int_v16qi_int_int;
23344 23346 def_builtin_const (d->mask, d->name, ftype, d->code);
23345 23347 }
23346 23348
23347 23349 /* pcmpistr[im] insns. */
23348 23350 for (i = 0, d = bdesc_pcmpistr;
23349 23351 i < ARRAY_SIZE (bdesc_pcmpistr);
23350 23352 i++, d++)
23351 23353 {
23352 23354 if (d->code == IX86_BUILTIN_PCMPISTRM128)
23353 23355 ftype = v16qi_ftype_v16qi_v16qi_int;
23354 23356 else
23355 23357 ftype = int_ftype_v16qi_v16qi_int;
23356 23358 def_builtin_const (d->mask, d->name, ftype, d->code);
23357 23359 }
23358 23360
23359 23361 /* comi/ucomi insns. */
23360 23362 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
23361 23363 if (d->mask == OPTION_MASK_ISA_SSE2)
23362 23364 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
23363 23365 else
23364 23366 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
23365 23367
23366 23368 /* SSE */
23367 23369 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
23368 23370 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
23369 23371
23370 23372 /* SSE or 3DNow!A */
23371 23373 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
23372 23374
23373 23375 /* SSE2 */
23374 23376 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
23375 23377
23376 23378 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
23377 23379 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
23378 23380
23379 23381 /* SSE3. */
23380 23382 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
23381 23383 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
23382 23384
23383 23385 /* AES */
23384 23386 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128);
23385 23387 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128);
23386 23388 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128);
23387 23389 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128);
23388 23390 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128);
23389 23391 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128);
23390 23392
23391 23393 /* PCLMUL */
23392 23394 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128);
23393 23395
23394 23396 /* AVX */
23395 23397 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void,
23396 23398 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER);
23397 23399
23398 23400 /* Access to the vec_init patterns. */
23399 23401 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
23400 23402 integer_type_node, NULL_TREE);
23401 23403 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
23402 23404
23403 23405 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
23404 23406 short_integer_type_node,
23405 23407 short_integer_type_node,
23406 23408 short_integer_type_node, NULL_TREE);
23407 23409 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
23408 23410
23409 23411 ftype = build_function_type_list (V8QI_type_node, char_type_node,
23410 23412 char_type_node, char_type_node,
23411 23413 char_type_node, char_type_node,
23412 23414 char_type_node, char_type_node,
23413 23415 char_type_node, NULL_TREE);
23414 23416 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
23415 23417
23416 23418 /* Access to the vec_extract patterns. */
23417 23419 ftype = build_function_type_list (double_type_node, V2DF_type_node,
23418 23420 integer_type_node, NULL_TREE);
23419 23421 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
23420 23422
23421 23423 ftype = build_function_type_list (long_long_integer_type_node,
23422 23424 V2DI_type_node, integer_type_node,
23423 23425 NULL_TREE);
23424 23426 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
23425 23427
23426 23428 ftype = build_function_type_list (float_type_node, V4SF_type_node,
23427 23429 integer_type_node, NULL_TREE);
23428 23430 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
23429 23431
23430 23432 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
23431 23433 integer_type_node, NULL_TREE);
23432 23434 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
23433 23435
23434 23436 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
23435 23437 integer_type_node, NULL_TREE);
23436 23438 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
23437 23439
23438 23440 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
23439 23441 integer_type_node, NULL_TREE);
23440 23442 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
23441 23443
23442 23444 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
23443 23445 integer_type_node, NULL_TREE);
23444 23446 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
23445 23447
23446 23448 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
23447 23449 integer_type_node, NULL_TREE);
23448 23450 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
23449 23451
23450 23452 /* Access to the vec_set patterns. */
23451 23453 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
23452 23454 intDI_type_node,
23453 23455 integer_type_node, NULL_TREE);
23454 23456 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
23455 23457
23456 23458 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
23457 23459 float_type_node,
23458 23460 integer_type_node, NULL_TREE);
23459 23461 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
23460 23462
23461 23463 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
23462 23464 intSI_type_node,
23463 23465 integer_type_node, NULL_TREE);
23464 23466 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
23465 23467
23466 23468 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
23467 23469 intHI_type_node,
23468 23470 integer_type_node, NULL_TREE);
23469 23471 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
23470 23472
23471 23473 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
23472 23474 intHI_type_node,
23473 23475 integer_type_node, NULL_TREE);
23474 23476 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
23475 23477
23476 23478 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
23477 23479 intQI_type_node,
23478 23480 integer_type_node, NULL_TREE);
23479 23481 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
23480 23482
23481 23483 /* Add SSE5 multi-arg argument instructions */
23482 23484 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
23483 23485 {
23484 23486 tree mtype = NULL_TREE;
23485 23487
23486 23488 if (d->name == 0)
23487 23489 continue;
23488 23490
23489 23491 switch ((enum multi_arg_type)d->flag)
23490 23492 {
23491 23493 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break;
23492 23494 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break;
23493 23495 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break;
23494 23496 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break;
23495 23497 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break;
23496 23498 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break;
23497 23499 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break;
23498 23500 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break;
23499 23501 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break;
23500 23502 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break;
23501 23503 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break;
23502 23504 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break;
23503 23505 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break;
23504 23506 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break;
23505 23507 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break;
23506 23508 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break;
23507 23509 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break;
23508 23510 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break;
23509 23511 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break;
23510 23512 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break;
23511 23513 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break;
23512 23514 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break;
23513 23515 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break;
23514 23516 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break;
23515 23517 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break;
23516 23518 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break;
23517 23519 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break;
23518 23520 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break;
23519 23521 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break;
23520 23522 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break;
23521 23523 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break;
23522 23524 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break;
23523 23525 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break;
23524 23526 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break;
23525 23527 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break;
23526 23528 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break;
23527 23529 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break;
23528 23530 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break;
23529 23531 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break;
23530 23532 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break;
23531 23533 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break;
23532 23534 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break;
23533 23535 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break;
23534 23536 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break;
23535 23537 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break;
23536 23538 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break;
23537 23539 case MULTI_ARG_UNKNOWN:
23538 23540 default:
23539 23541 gcc_unreachable ();
23540 23542 }
23541 23543
23542 23544 if (mtype)
23543 23545 def_builtin_const (d->mask, d->name, mtype, d->code);
23544 23546 }
23545 23547 }
23546 23548
23547 23549 /* Internal method for ix86_init_builtins. */
23548 23550
23549 23551 static void
23550 23552 ix86_init_builtins_va_builtins_abi (void)
23551 23553 {
23552 23554 tree ms_va_ref, sysv_va_ref;
23553 23555 tree fnvoid_va_end_ms, fnvoid_va_end_sysv;
23554 23556 tree fnvoid_va_start_ms, fnvoid_va_start_sysv;
23555 23557 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv;
23556 23558 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE;
23557 23559
23558 23560 if (!TARGET_64BIT)
23559 23561 return;
23560 23562 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE);
23561 23563 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE);
23562 23564 ms_va_ref = build_reference_type (ms_va_list_type_node);
23563 23565 sysv_va_ref =
23564 23566 build_pointer_type (TREE_TYPE (sysv_va_list_type_node));
23565 23567
23566 23568 fnvoid_va_end_ms =
23567 23569 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23568 23570 fnvoid_va_start_ms =
23569 23571 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE);
23570 23572 fnvoid_va_end_sysv =
23571 23573 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE);
23572 23574 fnvoid_va_start_sysv =
23573 23575 build_varargs_function_type_list (void_type_node, sysv_va_ref,
23574 23576 NULL_TREE);
23575 23577 fnvoid_va_copy_ms =
23576 23578 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node,
23577 23579 NULL_TREE);
23578 23580 fnvoid_va_copy_sysv =
23579 23581 build_function_type_list (void_type_node, sysv_va_ref,
23580 23582 sysv_va_ref, NULL_TREE);
23581 23583
23582 23584 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms,
23583 23585 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms);
23584 23586 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms,
23585 23587 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms);
23586 23588 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms,
23587 23589 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms);
23588 23590 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv,
23589 23591 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23590 23592 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv,
23591 23593 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23592 23594 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv,
23593 23595 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv);
23594 23596 }
23595 23597
23596 23598 static void
23597 23599 ix86_init_builtins (void)
23598 23600 {
23599 23601 tree float128_type_node = make_node (REAL_TYPE);
23600 23602 tree ftype, decl;
23601 23603
23602 23604 /* The __float80 type. */
23603 23605 if (TYPE_MODE (long_double_type_node) == XFmode)
23604 23606 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
23605 23607 "__float80");
23606 23608 else
23607 23609 {
23608 23610 /* The __float80 type. */
23609 23611 tree float80_type_node = make_node (REAL_TYPE);
23610 23612
23611 23613 TYPE_PRECISION (float80_type_node) = 80;
23612 23614 layout_type (float80_type_node);
23613 23615 (*lang_hooks.types.register_builtin_type) (float80_type_node,
23614 23616 "__float80");
23615 23617 }
23616 23618
23617 23619 /* The __float128 type. */
23618 23620 TYPE_PRECISION (float128_type_node) = 128;
23619 23621 layout_type (float128_type_node);
23620 23622 (*lang_hooks.types.register_builtin_type) (float128_type_node,
23621 23623 "__float128");
23622 23624
23623 23625 /* TFmode support builtins. */
23624 23626 ftype = build_function_type (float128_type_node, void_list_node);
23625 23627 decl = add_builtin_function ("__builtin_infq", ftype,
23626 23628 IX86_BUILTIN_INFQ, BUILT_IN_MD,
23627 23629 NULL, NULL_TREE);
23628 23630 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl;
23629 23631
23630 23632 /* We will expand them to normal call if SSE2 isn't available since
23631 23633 they are used by libgcc. */
23632 23634 ftype = build_function_type_list (float128_type_node,
23633 23635 float128_type_node,
23634 23636 NULL_TREE);
23635 23637 decl = add_builtin_function ("__builtin_fabsq", ftype,
23636 23638 IX86_BUILTIN_FABSQ, BUILT_IN_MD,
23637 23639 "__fabstf2", NULL_TREE);
23638 23640 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl;
23639 23641 TREE_READONLY (decl) = 1;
23640 23642
23641 23643 ftype = build_function_type_list (float128_type_node,
23642 23644 float128_type_node,
23643 23645 float128_type_node,
23644 23646 NULL_TREE);
23645 23647 decl = add_builtin_function ("__builtin_copysignq", ftype,
23646 23648 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD,
23647 23649 "__copysigntf3", NULL_TREE);
23648 23650 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl;
23649 23651 TREE_READONLY (decl) = 1;
23650 23652
23651 23653 ix86_init_mmx_sse_builtins ();
23652 23654 if (TARGET_64BIT)
23653 23655 ix86_init_builtins_va_builtins_abi ();
23654 23656 }
23655 23657
23656 23658 /* Errors in the source file can cause expand_expr to return const0_rtx
23657 23659 where we expect a vector. To avoid crashing, use one of the vector
23658 23660 clear instructions. */
23659 23661 static rtx
23660 23662 safe_vector_operand (rtx x, enum machine_mode mode)
23661 23663 {
23662 23664 if (x == const0_rtx)
23663 23665 x = CONST0_RTX (mode);
23664 23666 return x;
23665 23667 }
23666 23668
23667 23669 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
23668 23670
23669 23671 static rtx
23670 23672 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
23671 23673 {
23672 23674 rtx pat;
23673 23675 tree arg0 = CALL_EXPR_ARG (exp, 0);
23674 23676 tree arg1 = CALL_EXPR_ARG (exp, 1);
23675 23677 rtx op0 = expand_normal (arg0);
23676 23678 rtx op1 = expand_normal (arg1);
23677 23679 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23678 23680 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23679 23681 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
23680 23682
23681 23683 if (VECTOR_MODE_P (mode0))
23682 23684 op0 = safe_vector_operand (op0, mode0);
23683 23685 if (VECTOR_MODE_P (mode1))
23684 23686 op1 = safe_vector_operand (op1, mode1);
23685 23687
23686 23688 if (optimize || !target
23687 23689 || GET_MODE (target) != tmode
23688 23690 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23689 23691 target = gen_reg_rtx (tmode);
23690 23692
23691 23693 if (GET_MODE (op1) == SImode && mode1 == TImode)
23692 23694 {
23693 23695 rtx x = gen_reg_rtx (V4SImode);
23694 23696 emit_insn (gen_sse2_loadd (x, op1));
23695 23697 op1 = gen_lowpart (TImode, x);
23696 23698 }
23697 23699
23698 23700 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
23699 23701 op0 = copy_to_mode_reg (mode0, op0);
23700 23702 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
23701 23703 op1 = copy_to_mode_reg (mode1, op1);
23702 23704
23703 23705 pat = GEN_FCN (icode) (target, op0, op1);
23704 23706 if (! pat)
23705 23707 return 0;
23706 23708
23707 23709 emit_insn (pat);
23708 23710
23709 23711 return target;
23710 23712 }
23711 23713
23712 23714 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */
23713 23715
23714 23716 static rtx
23715 23717 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target,
23716 23718 enum multi_arg_type m_type,
23717 23719 enum insn_code sub_code)
23718 23720 {
23719 23721 rtx pat;
23720 23722 int i;
23721 23723 int nargs;
23722 23724 bool comparison_p = false;
23723 23725 bool tf_p = false;
23724 23726 bool last_arg_constant = false;
23725 23727 int num_memory = 0;
23726 23728 struct {
23727 23729 rtx op;
23728 23730 enum machine_mode mode;
23729 23731 } args[4];
23730 23732
23731 23733 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23732 23734
23733 23735 switch (m_type)
23734 23736 {
23735 23737 case MULTI_ARG_3_SF:
23736 23738 case MULTI_ARG_3_DF:
23737 23739 case MULTI_ARG_3_DI:
23738 23740 case MULTI_ARG_3_SI:
23739 23741 case MULTI_ARG_3_SI_DI:
23740 23742 case MULTI_ARG_3_HI:
23741 23743 case MULTI_ARG_3_HI_SI:
23742 23744 case MULTI_ARG_3_QI:
23743 23745 case MULTI_ARG_3_PERMPS:
23744 23746 case MULTI_ARG_3_PERMPD:
23745 23747 nargs = 3;
23746 23748 break;
23747 23749
23748 23750 case MULTI_ARG_2_SF:
23749 23751 case MULTI_ARG_2_DF:
23750 23752 case MULTI_ARG_2_DI:
23751 23753 case MULTI_ARG_2_SI:
23752 23754 case MULTI_ARG_2_HI:
23753 23755 case MULTI_ARG_2_QI:
23754 23756 nargs = 2;
23755 23757 break;
23756 23758
23757 23759 case MULTI_ARG_2_DI_IMM:
23758 23760 case MULTI_ARG_2_SI_IMM:
23759 23761 case MULTI_ARG_2_HI_IMM:
23760 23762 case MULTI_ARG_2_QI_IMM:
23761 23763 nargs = 2;
23762 23764 last_arg_constant = true;
23763 23765 break;
23764 23766
23765 23767 case MULTI_ARG_1_SF:
23766 23768 case MULTI_ARG_1_DF:
23767 23769 case MULTI_ARG_1_DI:
23768 23770 case MULTI_ARG_1_SI:
23769 23771 case MULTI_ARG_1_HI:
23770 23772 case MULTI_ARG_1_QI:
23771 23773 case MULTI_ARG_1_SI_DI:
23772 23774 case MULTI_ARG_1_HI_DI:
23773 23775 case MULTI_ARG_1_HI_SI:
23774 23776 case MULTI_ARG_1_QI_DI:
23775 23777 case MULTI_ARG_1_QI_SI:
23776 23778 case MULTI_ARG_1_QI_HI:
23777 23779 case MULTI_ARG_1_PH2PS:
23778 23780 case MULTI_ARG_1_PS2PH:
23779 23781 nargs = 1;
23780 23782 break;
23781 23783
23782 23784 case MULTI_ARG_2_SF_CMP:
23783 23785 case MULTI_ARG_2_DF_CMP:
23784 23786 case MULTI_ARG_2_DI_CMP:
23785 23787 case MULTI_ARG_2_SI_CMP:
23786 23788 case MULTI_ARG_2_HI_CMP:
23787 23789 case MULTI_ARG_2_QI_CMP:
23788 23790 nargs = 2;
23789 23791 comparison_p = true;
23790 23792 break;
23791 23793
23792 23794 case MULTI_ARG_2_SF_TF:
23793 23795 case MULTI_ARG_2_DF_TF:
23794 23796 case MULTI_ARG_2_DI_TF:
23795 23797 case MULTI_ARG_2_SI_TF:
23796 23798 case MULTI_ARG_2_HI_TF:
23797 23799 case MULTI_ARG_2_QI_TF:
23798 23800 nargs = 2;
23799 23801 tf_p = true;
23800 23802 break;
23801 23803
23802 23804 case MULTI_ARG_UNKNOWN:
23803 23805 default:
23804 23806 gcc_unreachable ();
23805 23807 }
23806 23808
23807 23809 if (optimize || !target
23808 23810 || GET_MODE (target) != tmode
23809 23811 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23810 23812 target = gen_reg_rtx (tmode);
23811 23813
23812 23814 gcc_assert (nargs <= 4);
23813 23815
23814 23816 for (i = 0; i < nargs; i++)
23815 23817 {
23816 23818 tree arg = CALL_EXPR_ARG (exp, i);
23817 23819 rtx op = expand_normal (arg);
23818 23820 int adjust = (comparison_p) ? 1 : 0;
23819 23821 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode;
23820 23822
23821 23823 if (last_arg_constant && i == nargs-1)
23822 23824 {
23823 23825 if (GET_CODE (op) != CONST_INT)
23824 23826 {
23825 23827 error ("last argument must be an immediate");
23826 23828 return gen_reg_rtx (tmode);
23827 23829 }
23828 23830 }
23829 23831 else
23830 23832 {
23831 23833 if (VECTOR_MODE_P (mode))
23832 23834 op = safe_vector_operand (op, mode);
23833 23835
23834 23836 /* If we aren't optimizing, only allow one memory operand to be
23835 23837 generated. */
23836 23838 if (memory_operand (op, mode))
23837 23839 num_memory++;
23838 23840
23839 23841 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode);
23840 23842
23841 23843 if (optimize
23842 23844 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode)
23843 23845 || num_memory > 1)
23844 23846 op = force_reg (mode, op);
23845 23847 }
23846 23848
23847 23849 args[i].op = op;
23848 23850 args[i].mode = mode;
23849 23851 }
23850 23852
23851 23853 switch (nargs)
23852 23854 {
23853 23855 case 1:
23854 23856 pat = GEN_FCN (icode) (target, args[0].op);
23855 23857 break;
23856 23858
23857 23859 case 2:
23858 23860 if (tf_p)
23859 23861 pat = GEN_FCN (icode) (target, args[0].op, args[1].op,
23860 23862 GEN_INT ((int)sub_code));
23861 23863 else if (! comparison_p)
23862 23864 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
23863 23865 else
23864 23866 {
23865 23867 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target),
23866 23868 args[0].op,
23867 23869 args[1].op);
23868 23870
23869 23871 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op);
23870 23872 }
23871 23873 break;
23872 23874
23873 23875 case 3:
23874 23876 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op);
23875 23877 break;
23876 23878
23877 23879 default:
23878 23880 gcc_unreachable ();
23879 23881 }
23880 23882
23881 23883 if (! pat)
23882 23884 return 0;
23883 23885
23884 23886 emit_insn (pat);
23885 23887 return target;
23886 23888 }
23887 23889
23888 23890 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop
23889 23891 insns with vec_merge. */
23890 23892
23891 23893 static rtx
23892 23894 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp,
23893 23895 rtx target)
23894 23896 {
23895 23897 rtx pat;
23896 23898 tree arg0 = CALL_EXPR_ARG (exp, 0);
23897 23899 rtx op1, op0 = expand_normal (arg0);
23898 23900 enum machine_mode tmode = insn_data[icode].operand[0].mode;
23899 23901 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
23900 23902
23901 23903 if (optimize || !target
23902 23904 || GET_MODE (target) != tmode
23903 23905 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
23904 23906 target = gen_reg_rtx (tmode);
23905 23907
23906 23908 if (VECTOR_MODE_P (mode0))
23907 23909 op0 = safe_vector_operand (op0, mode0);
23908 23910
23909 23911 if ((optimize && !register_operand (op0, mode0))
23910 23912 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
23911 23913 op0 = copy_to_mode_reg (mode0, op0);
23912 23914
23913 23915 op1 = op0;
23914 23916 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
23915 23917 op1 = copy_to_mode_reg (mode0, op1);
23916 23918
23917 23919 pat = GEN_FCN (icode) (target, op0, op1);
23918 23920 if (! pat)
23919 23921 return 0;
23920 23922 emit_insn (pat);
23921 23923 return target;
23922 23924 }
23923 23925
23924 23926 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
23925 23927
23926 23928 static rtx
23927 23929 ix86_expand_sse_compare (const struct builtin_description *d,
23928 23930 tree exp, rtx target, bool swap)
23929 23931 {
23930 23932 rtx pat;
23931 23933 tree arg0 = CALL_EXPR_ARG (exp, 0);
23932 23934 tree arg1 = CALL_EXPR_ARG (exp, 1);
23933 23935 rtx op0 = expand_normal (arg0);
23934 23936 rtx op1 = expand_normal (arg1);
23935 23937 rtx op2;
23936 23938 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
23937 23939 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
23938 23940 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
23939 23941 enum rtx_code comparison = d->comparison;
23940 23942
23941 23943 if (VECTOR_MODE_P (mode0))
23942 23944 op0 = safe_vector_operand (op0, mode0);
23943 23945 if (VECTOR_MODE_P (mode1))
23944 23946 op1 = safe_vector_operand (op1, mode1);
23945 23947
23946 23948 /* Swap operands if we have a comparison that isn't available in
23947 23949 hardware. */
23948 23950 if (swap)
23949 23951 {
23950 23952 rtx tmp = gen_reg_rtx (mode1);
23951 23953 emit_move_insn (tmp, op1);
23952 23954 op1 = op0;
23953 23955 op0 = tmp;
23954 23956 }
23955 23957
23956 23958 if (optimize || !target
23957 23959 || GET_MODE (target) != tmode
23958 23960 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
23959 23961 target = gen_reg_rtx (tmode);
23960 23962
23961 23963 if ((optimize && !register_operand (op0, mode0))
23962 23964 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
23963 23965 op0 = copy_to_mode_reg (mode0, op0);
23964 23966 if ((optimize && !register_operand (op1, mode1))
23965 23967 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
23966 23968 op1 = copy_to_mode_reg (mode1, op1);
23967 23969
23968 23970 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
23969 23971 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
23970 23972 if (! pat)
23971 23973 return 0;
23972 23974 emit_insn (pat);
23973 23975 return target;
23974 23976 }
23975 23977
23976 23978 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
23977 23979
23978 23980 static rtx
23979 23981 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
23980 23982 rtx target)
23981 23983 {
23982 23984 rtx pat;
23983 23985 tree arg0 = CALL_EXPR_ARG (exp, 0);
23984 23986 tree arg1 = CALL_EXPR_ARG (exp, 1);
23985 23987 rtx op0 = expand_normal (arg0);
23986 23988 rtx op1 = expand_normal (arg1);
23987 23989 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
23988 23990 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
23989 23991 enum rtx_code comparison = d->comparison;
23990 23992
23991 23993 if (VECTOR_MODE_P (mode0))
23992 23994 op0 = safe_vector_operand (op0, mode0);
23993 23995 if (VECTOR_MODE_P (mode1))
23994 23996 op1 = safe_vector_operand (op1, mode1);
23995 23997
23996 23998 /* Swap operands if we have a comparison that isn't available in
23997 23999 hardware. */
23998 24000 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
23999 24001 {
24000 24002 rtx tmp = op1;
24001 24003 op1 = op0;
24002 24004 op0 = tmp;
24003 24005 }
24004 24006
24005 24007 target = gen_reg_rtx (SImode);
24006 24008 emit_move_insn (target, const0_rtx);
24007 24009 target = gen_rtx_SUBREG (QImode, target, 0);
24008 24010
24009 24011 if ((optimize && !register_operand (op0, mode0))
24010 24012 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24011 24013 op0 = copy_to_mode_reg (mode0, op0);
24012 24014 if ((optimize && !register_operand (op1, mode1))
24013 24015 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24014 24016 op1 = copy_to_mode_reg (mode1, op1);
24015 24017
24016 24018 pat = GEN_FCN (d->icode) (op0, op1);
24017 24019 if (! pat)
24018 24020 return 0;
24019 24021 emit_insn (pat);
24020 24022 emit_insn (gen_rtx_SET (VOIDmode,
24021 24023 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24022 24024 gen_rtx_fmt_ee (comparison, QImode,
24023 24025 SET_DEST (pat),
24024 24026 const0_rtx)));
24025 24027
24026 24028 return SUBREG_REG (target);
24027 24029 }
24028 24030
24029 24031 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
24030 24032
24031 24033 static rtx
24032 24034 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
24033 24035 rtx target)
24034 24036 {
24035 24037 rtx pat;
24036 24038 tree arg0 = CALL_EXPR_ARG (exp, 0);
24037 24039 tree arg1 = CALL_EXPR_ARG (exp, 1);
24038 24040 rtx op0 = expand_normal (arg0);
24039 24041 rtx op1 = expand_normal (arg1);
24040 24042 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
24041 24043 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
24042 24044 enum rtx_code comparison = d->comparison;
24043 24045
24044 24046 if (VECTOR_MODE_P (mode0))
24045 24047 op0 = safe_vector_operand (op0, mode0);
24046 24048 if (VECTOR_MODE_P (mode1))
24047 24049 op1 = safe_vector_operand (op1, mode1);
24048 24050
24049 24051 target = gen_reg_rtx (SImode);
24050 24052 emit_move_insn (target, const0_rtx);
24051 24053 target = gen_rtx_SUBREG (QImode, target, 0);
24052 24054
24053 24055 if ((optimize && !register_operand (op0, mode0))
24054 24056 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
24055 24057 op0 = copy_to_mode_reg (mode0, op0);
24056 24058 if ((optimize && !register_operand (op1, mode1))
24057 24059 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
24058 24060 op1 = copy_to_mode_reg (mode1, op1);
24059 24061
24060 24062 pat = GEN_FCN (d->icode) (op0, op1);
24061 24063 if (! pat)
24062 24064 return 0;
24063 24065 emit_insn (pat);
24064 24066 emit_insn (gen_rtx_SET (VOIDmode,
24065 24067 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24066 24068 gen_rtx_fmt_ee (comparison, QImode,
24067 24069 SET_DEST (pat),
24068 24070 const0_rtx)));
24069 24071
24070 24072 return SUBREG_REG (target);
24071 24073 }
24072 24074
24073 24075 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
24074 24076
24075 24077 static rtx
24076 24078 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
24077 24079 tree exp, rtx target)
24078 24080 {
24079 24081 rtx pat;
24080 24082 tree arg0 = CALL_EXPR_ARG (exp, 0);
24081 24083 tree arg1 = CALL_EXPR_ARG (exp, 1);
24082 24084 tree arg2 = CALL_EXPR_ARG (exp, 2);
24083 24085 tree arg3 = CALL_EXPR_ARG (exp, 3);
24084 24086 tree arg4 = CALL_EXPR_ARG (exp, 4);
24085 24087 rtx scratch0, scratch1;
24086 24088 rtx op0 = expand_normal (arg0);
24087 24089 rtx op1 = expand_normal (arg1);
24088 24090 rtx op2 = expand_normal (arg2);
24089 24091 rtx op3 = expand_normal (arg3);
24090 24092 rtx op4 = expand_normal (arg4);
24091 24093 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
24092 24094
24093 24095 tmode0 = insn_data[d->icode].operand[0].mode;
24094 24096 tmode1 = insn_data[d->icode].operand[1].mode;
24095 24097 modev2 = insn_data[d->icode].operand[2].mode;
24096 24098 modei3 = insn_data[d->icode].operand[3].mode;
24097 24099 modev4 = insn_data[d->icode].operand[4].mode;
24098 24100 modei5 = insn_data[d->icode].operand[5].mode;
24099 24101 modeimm = insn_data[d->icode].operand[6].mode;
24100 24102
24101 24103 if (VECTOR_MODE_P (modev2))
24102 24104 op0 = safe_vector_operand (op0, modev2);
24103 24105 if (VECTOR_MODE_P (modev4))
24104 24106 op2 = safe_vector_operand (op2, modev4);
24105 24107
24106 24108 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24107 24109 op0 = copy_to_mode_reg (modev2, op0);
24108 24110 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
24109 24111 op1 = copy_to_mode_reg (modei3, op1);
24110 24112 if ((optimize && !register_operand (op2, modev4))
24111 24113 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
24112 24114 op2 = copy_to_mode_reg (modev4, op2);
24113 24115 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
24114 24116 op3 = copy_to_mode_reg (modei5, op3);
24115 24117
24116 24118 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
24117 24119 {
24118 24120 error ("the fifth argument must be a 8-bit immediate");
24119 24121 return const0_rtx;
24120 24122 }
24121 24123
24122 24124 if (d->code == IX86_BUILTIN_PCMPESTRI128)
24123 24125 {
24124 24126 if (optimize || !target
24125 24127 || GET_MODE (target) != tmode0
24126 24128 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24127 24129 target = gen_reg_rtx (tmode0);
24128 24130
24129 24131 scratch1 = gen_reg_rtx (tmode1);
24130 24132
24131 24133 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
24132 24134 }
24133 24135 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
24134 24136 {
24135 24137 if (optimize || !target
24136 24138 || GET_MODE (target) != tmode1
24137 24139 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24138 24140 target = gen_reg_rtx (tmode1);
24139 24141
24140 24142 scratch0 = gen_reg_rtx (tmode0);
24141 24143
24142 24144 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
24143 24145 }
24144 24146 else
24145 24147 {
24146 24148 gcc_assert (d->flag);
24147 24149
24148 24150 scratch0 = gen_reg_rtx (tmode0);
24149 24151 scratch1 = gen_reg_rtx (tmode1);
24150 24152
24151 24153 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
24152 24154 }
24153 24155
24154 24156 if (! pat)
24155 24157 return 0;
24156 24158
24157 24159 emit_insn (pat);
24158 24160
24159 24161 if (d->flag)
24160 24162 {
24161 24163 target = gen_reg_rtx (SImode);
24162 24164 emit_move_insn (target, const0_rtx);
24163 24165 target = gen_rtx_SUBREG (QImode, target, 0);
24164 24166
24165 24167 emit_insn
24166 24168 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24167 24169 gen_rtx_fmt_ee (EQ, QImode,
24168 24170 gen_rtx_REG ((enum machine_mode) d->flag,
24169 24171 FLAGS_REG),
24170 24172 const0_rtx)));
24171 24173 return SUBREG_REG (target);
24172 24174 }
24173 24175 else
24174 24176 return target;
24175 24177 }
24176 24178
24177 24179
24178 24180 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
24179 24181
24180 24182 static rtx
24181 24183 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
24182 24184 tree exp, rtx target)
24183 24185 {
24184 24186 rtx pat;
24185 24187 tree arg0 = CALL_EXPR_ARG (exp, 0);
24186 24188 tree arg1 = CALL_EXPR_ARG (exp, 1);
24187 24189 tree arg2 = CALL_EXPR_ARG (exp, 2);
24188 24190 rtx scratch0, scratch1;
24189 24191 rtx op0 = expand_normal (arg0);
24190 24192 rtx op1 = expand_normal (arg1);
24191 24193 rtx op2 = expand_normal (arg2);
24192 24194 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
24193 24195
24194 24196 tmode0 = insn_data[d->icode].operand[0].mode;
24195 24197 tmode1 = insn_data[d->icode].operand[1].mode;
24196 24198 modev2 = insn_data[d->icode].operand[2].mode;
24197 24199 modev3 = insn_data[d->icode].operand[3].mode;
24198 24200 modeimm = insn_data[d->icode].operand[4].mode;
24199 24201
24200 24202 if (VECTOR_MODE_P (modev2))
24201 24203 op0 = safe_vector_operand (op0, modev2);
24202 24204 if (VECTOR_MODE_P (modev3))
24203 24205 op1 = safe_vector_operand (op1, modev3);
24204 24206
24205 24207 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
24206 24208 op0 = copy_to_mode_reg (modev2, op0);
24207 24209 if ((optimize && !register_operand (op1, modev3))
24208 24210 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
24209 24211 op1 = copy_to_mode_reg (modev3, op1);
24210 24212
24211 24213 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
24212 24214 {
24213 24215 error ("the third argument must be a 8-bit immediate");
24214 24216 return const0_rtx;
24215 24217 }
24216 24218
24217 24219 if (d->code == IX86_BUILTIN_PCMPISTRI128)
24218 24220 {
24219 24221 if (optimize || !target
24220 24222 || GET_MODE (target) != tmode0
24221 24223 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
24222 24224 target = gen_reg_rtx (tmode0);
24223 24225
24224 24226 scratch1 = gen_reg_rtx (tmode1);
24225 24227
24226 24228 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
24227 24229 }
24228 24230 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
24229 24231 {
24230 24232 if (optimize || !target
24231 24233 || GET_MODE (target) != tmode1
24232 24234 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
24233 24235 target = gen_reg_rtx (tmode1);
24234 24236
24235 24237 scratch0 = gen_reg_rtx (tmode0);
24236 24238
24237 24239 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
24238 24240 }
24239 24241 else
24240 24242 {
24241 24243 gcc_assert (d->flag);
24242 24244
24243 24245 scratch0 = gen_reg_rtx (tmode0);
24244 24246 scratch1 = gen_reg_rtx (tmode1);
24245 24247
24246 24248 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
24247 24249 }
24248 24250
24249 24251 if (! pat)
24250 24252 return 0;
24251 24253
24252 24254 emit_insn (pat);
24253 24255
24254 24256 if (d->flag)
24255 24257 {
24256 24258 target = gen_reg_rtx (SImode);
24257 24259 emit_move_insn (target, const0_rtx);
24258 24260 target = gen_rtx_SUBREG (QImode, target, 0);
24259 24261
24260 24262 emit_insn
24261 24263 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
24262 24264 gen_rtx_fmt_ee (EQ, QImode,
24263 24265 gen_rtx_REG ((enum machine_mode) d->flag,
24264 24266 FLAGS_REG),
24265 24267 const0_rtx)));
24266 24268 return SUBREG_REG (target);
24267 24269 }
24268 24270 else
24269 24271 return target;
24270 24272 }
24271 24273
24272 24274 /* Subroutine of ix86_expand_builtin to take care of insns with
24273 24275 variable number of operands. */
24274 24276
24275 24277 static rtx
24276 24278 ix86_expand_args_builtin (const struct builtin_description *d,
24277 24279 tree exp, rtx target)
24278 24280 {
24279 24281 rtx pat, real_target;
24280 24282 unsigned int i, nargs;
24281 24283 unsigned int nargs_constant = 0;
24282 24284 int num_memory = 0;
24283 24285 struct
24284 24286 {
24285 24287 rtx op;
24286 24288 enum machine_mode mode;
24287 24289 } args[4];
24288 24290 bool last_arg_count = false;
24289 24291 enum insn_code icode = d->icode;
24290 24292 const struct insn_data *insn_p = &insn_data[icode];
24291 24293 enum machine_mode tmode = insn_p->operand[0].mode;
24292 24294 enum machine_mode rmode = VOIDmode;
24293 24295 bool swap = false;
24294 24296 enum rtx_code comparison = d->comparison;
24295 24297
24296 24298 switch ((enum ix86_builtin_type) d->flag)
24297 24299 {
24298 24300 case INT_FTYPE_V8SF_V8SF_PTEST:
24299 24301 case INT_FTYPE_V4DI_V4DI_PTEST:
24300 24302 case INT_FTYPE_V4DF_V4DF_PTEST:
24301 24303 case INT_FTYPE_V4SF_V4SF_PTEST:
24302 24304 case INT_FTYPE_V2DI_V2DI_PTEST:
24303 24305 case INT_FTYPE_V2DF_V2DF_PTEST:
24304 24306 return ix86_expand_sse_ptest (d, exp, target);
24305 24307 case FLOAT128_FTYPE_FLOAT128:
24306 24308 case FLOAT_FTYPE_FLOAT:
24307 24309 case INT64_FTYPE_V4SF:
24308 24310 case INT64_FTYPE_V2DF:
24309 24311 case INT_FTYPE_V16QI:
24310 24312 case INT_FTYPE_V8QI:
24311 24313 case INT_FTYPE_V8SF:
24312 24314 case INT_FTYPE_V4DF:
24313 24315 case INT_FTYPE_V4SF:
24314 24316 case INT_FTYPE_V2DF:
24315 24317 case V16QI_FTYPE_V16QI:
24316 24318 case V8SI_FTYPE_V8SF:
24317 24319 case V8SI_FTYPE_V4SI:
24318 24320 case V8HI_FTYPE_V8HI:
24319 24321 case V8HI_FTYPE_V16QI:
24320 24322 case V8QI_FTYPE_V8QI:
24321 24323 case V8SF_FTYPE_V8SF:
24322 24324 case V8SF_FTYPE_V8SI:
24323 24325 case V8SF_FTYPE_V4SF:
24324 24326 case V4SI_FTYPE_V4SI:
24325 24327 case V4SI_FTYPE_V16QI:
24326 24328 case V4SI_FTYPE_V4SF:
24327 24329 case V4SI_FTYPE_V8SI:
24328 24330 case V4SI_FTYPE_V8HI:
24329 24331 case V4SI_FTYPE_V4DF:
24330 24332 case V4SI_FTYPE_V2DF:
24331 24333 case V4HI_FTYPE_V4HI:
24332 24334 case V4DF_FTYPE_V4DF:
24333 24335 case V4DF_FTYPE_V4SI:
24334 24336 case V4DF_FTYPE_V4SF:
24335 24337 case V4DF_FTYPE_V2DF:
24336 24338 case V4SF_FTYPE_V4SF:
24337 24339 case V4SF_FTYPE_V4SI:
24338 24340 case V4SF_FTYPE_V8SF:
24339 24341 case V4SF_FTYPE_V4DF:
24340 24342 case V4SF_FTYPE_V2DF:
24341 24343 case V2DI_FTYPE_V2DI:
24342 24344 case V2DI_FTYPE_V16QI:
24343 24345 case V2DI_FTYPE_V8HI:
24344 24346 case V2DI_FTYPE_V4SI:
24345 24347 case V2DF_FTYPE_V2DF:
24346 24348 case V2DF_FTYPE_V4SI:
24347 24349 case V2DF_FTYPE_V4DF:
24348 24350 case V2DF_FTYPE_V4SF:
24349 24351 case V2DF_FTYPE_V2SI:
24350 24352 case V2SI_FTYPE_V2SI:
24351 24353 case V2SI_FTYPE_V4SF:
24352 24354 case V2SI_FTYPE_V2SF:
24353 24355 case V2SI_FTYPE_V2DF:
24354 24356 case V2SF_FTYPE_V2SF:
24355 24357 case V2SF_FTYPE_V2SI:
24356 24358 nargs = 1;
24357 24359 break;
24358 24360 case V4SF_FTYPE_V4SF_VEC_MERGE:
24359 24361 case V2DF_FTYPE_V2DF_VEC_MERGE:
24360 24362 return ix86_expand_unop_vec_merge_builtin (icode, exp, target);
24361 24363 case FLOAT128_FTYPE_FLOAT128_FLOAT128:
24362 24364 case V16QI_FTYPE_V16QI_V16QI:
24363 24365 case V16QI_FTYPE_V8HI_V8HI:
24364 24366 case V8QI_FTYPE_V8QI_V8QI:
24365 24367 case V8QI_FTYPE_V4HI_V4HI:
24366 24368 case V8HI_FTYPE_V8HI_V8HI:
24367 24369 case V8HI_FTYPE_V16QI_V16QI:
24368 24370 case V8HI_FTYPE_V4SI_V4SI:
24369 24371 case V8SF_FTYPE_V8SF_V8SF:
24370 24372 case V8SF_FTYPE_V8SF_V8SI:
24371 24373 case V4SI_FTYPE_V4SI_V4SI:
24372 24374 case V4SI_FTYPE_V8HI_V8HI:
24373 24375 case V4SI_FTYPE_V4SF_V4SF:
24374 24376 case V4SI_FTYPE_V2DF_V2DF:
24375 24377 case V4HI_FTYPE_V4HI_V4HI:
24376 24378 case V4HI_FTYPE_V8QI_V8QI:
24377 24379 case V4HI_FTYPE_V2SI_V2SI:
24378 24380 case V4DF_FTYPE_V4DF_V4DF:
24379 24381 case V4DF_FTYPE_V4DF_V4DI:
24380 24382 case V4SF_FTYPE_V4SF_V4SF:
24381 24383 case V4SF_FTYPE_V4SF_V4SI:
24382 24384 case V4SF_FTYPE_V4SF_V2SI:
24383 24385 case V4SF_FTYPE_V4SF_V2DF:
24384 24386 case V4SF_FTYPE_V4SF_DI:
24385 24387 case V4SF_FTYPE_V4SF_SI:
24386 24388 case V2DI_FTYPE_V2DI_V2DI:
24387 24389 case V2DI_FTYPE_V16QI_V16QI:
24388 24390 case V2DI_FTYPE_V4SI_V4SI:
24389 24391 case V2DI_FTYPE_V2DI_V16QI:
24390 24392 case V2DI_FTYPE_V2DF_V2DF:
24391 24393 case V2SI_FTYPE_V2SI_V2SI:
24392 24394 case V2SI_FTYPE_V4HI_V4HI:
24393 24395 case V2SI_FTYPE_V2SF_V2SF:
24394 24396 case V2DF_FTYPE_V2DF_V2DF:
24395 24397 case V2DF_FTYPE_V2DF_V4SF:
24396 24398 case V2DF_FTYPE_V2DF_V2DI:
24397 24399 case V2DF_FTYPE_V2DF_DI:
24398 24400 case V2DF_FTYPE_V2DF_SI:
24399 24401 case V2SF_FTYPE_V2SF_V2SF:
24400 24402 case V1DI_FTYPE_V1DI_V1DI:
24401 24403 case V1DI_FTYPE_V8QI_V8QI:
24402 24404 case V1DI_FTYPE_V2SI_V2SI:
24403 24405 if (comparison == UNKNOWN)
24404 24406 return ix86_expand_binop_builtin (icode, exp, target);
24405 24407 nargs = 2;
24406 24408 break;
24407 24409 case V4SF_FTYPE_V4SF_V4SF_SWAP:
24408 24410 case V2DF_FTYPE_V2DF_V2DF_SWAP:
24409 24411 gcc_assert (comparison != UNKNOWN);
24410 24412 nargs = 2;
24411 24413 swap = true;
24412 24414 break;
24413 24415 case V8HI_FTYPE_V8HI_V8HI_COUNT:
24414 24416 case V8HI_FTYPE_V8HI_SI_COUNT:
24415 24417 case V4SI_FTYPE_V4SI_V4SI_COUNT:
24416 24418 case V4SI_FTYPE_V4SI_SI_COUNT:
24417 24419 case V4HI_FTYPE_V4HI_V4HI_COUNT:
24418 24420 case V4HI_FTYPE_V4HI_SI_COUNT:
24419 24421 case V2DI_FTYPE_V2DI_V2DI_COUNT:
24420 24422 case V2DI_FTYPE_V2DI_SI_COUNT:
24421 24423 case V2SI_FTYPE_V2SI_V2SI_COUNT:
24422 24424 case V2SI_FTYPE_V2SI_SI_COUNT:
24423 24425 case V1DI_FTYPE_V1DI_V1DI_COUNT:
24424 24426 case V1DI_FTYPE_V1DI_SI_COUNT:
24425 24427 nargs = 2;
24426 24428 last_arg_count = true;
24427 24429 break;
24428 24430 case UINT64_FTYPE_UINT64_UINT64:
24429 24431 case UINT_FTYPE_UINT_UINT:
24430 24432 case UINT_FTYPE_UINT_USHORT:
24431 24433 case UINT_FTYPE_UINT_UCHAR:
24432 24434 nargs = 2;
24433 24435 break;
24434 24436 case V2DI2TI_FTYPE_V2DI_INT:
24435 24437 nargs = 2;
24436 24438 rmode = V2DImode;
24437 24439 nargs_constant = 1;
24438 24440 break;
24439 24441 case V8HI_FTYPE_V8HI_INT:
24440 24442 case V8SF_FTYPE_V8SF_INT:
24441 24443 case V4SI_FTYPE_V4SI_INT:
24442 24444 case V4SI_FTYPE_V8SI_INT:
24443 24445 case V4HI_FTYPE_V4HI_INT:
24444 24446 case V4DF_FTYPE_V4DF_INT:
24445 24447 case V4SF_FTYPE_V4SF_INT:
24446 24448 case V4SF_FTYPE_V8SF_INT:
24447 24449 case V2DI_FTYPE_V2DI_INT:
24448 24450 case V2DF_FTYPE_V2DF_INT:
24449 24451 case V2DF_FTYPE_V4DF_INT:
24450 24452 nargs = 2;
24451 24453 nargs_constant = 1;
24452 24454 break;
24453 24455 case V16QI_FTYPE_V16QI_V16QI_V16QI:
24454 24456 case V8SF_FTYPE_V8SF_V8SF_V8SF:
24455 24457 case V4DF_FTYPE_V4DF_V4DF_V4DF:
24456 24458 case V4SF_FTYPE_V4SF_V4SF_V4SF:
24457 24459 case V2DF_FTYPE_V2DF_V2DF_V2DF:
24458 24460 nargs = 3;
24459 24461 break;
24460 24462 case V16QI_FTYPE_V16QI_V16QI_INT:
24461 24463 case V8HI_FTYPE_V8HI_V8HI_INT:
24462 24464 case V8SI_FTYPE_V8SI_V8SI_INT:
24463 24465 case V8SI_FTYPE_V8SI_V4SI_INT:
24464 24466 case V8SF_FTYPE_V8SF_V8SF_INT:
24465 24467 case V8SF_FTYPE_V8SF_V4SF_INT:
24466 24468 case V4SI_FTYPE_V4SI_V4SI_INT:
24467 24469 case V4DF_FTYPE_V4DF_V4DF_INT:
24468 24470 case V4DF_FTYPE_V4DF_V2DF_INT:
24469 24471 case V4SF_FTYPE_V4SF_V4SF_INT:
24470 24472 case V2DI_FTYPE_V2DI_V2DI_INT:
24471 24473 case V2DF_FTYPE_V2DF_V2DF_INT:
24472 24474 nargs = 3;
24473 24475 nargs_constant = 1;
24474 24476 break;
24475 24477 case V2DI2TI_FTYPE_V2DI_V2DI_INT:
24476 24478 nargs = 3;
24477 24479 rmode = V2DImode;
24478 24480 nargs_constant = 1;
24479 24481 break;
24480 24482 case V1DI2DI_FTYPE_V1DI_V1DI_INT:
24481 24483 nargs = 3;
24482 24484 rmode = DImode;
24483 24485 nargs_constant = 1;
24484 24486 break;
24485 24487 case V2DI_FTYPE_V2DI_UINT_UINT:
24486 24488 nargs = 3;
24487 24489 nargs_constant = 2;
24488 24490 break;
24489 24491 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT:
24490 24492 nargs = 4;
24491 24493 nargs_constant = 2;
24492 24494 break;
24493 24495 default:
24494 24496 gcc_unreachable ();
24495 24497 }
24496 24498
24497 24499 gcc_assert (nargs <= ARRAY_SIZE (args));
24498 24500
24499 24501 if (comparison != UNKNOWN)
24500 24502 {
24501 24503 gcc_assert (nargs == 2);
24502 24504 return ix86_expand_sse_compare (d, exp, target, swap);
24503 24505 }
24504 24506
24505 24507 if (rmode == VOIDmode || rmode == tmode)
24506 24508 {
24507 24509 if (optimize
24508 24510 || target == 0
24509 24511 || GET_MODE (target) != tmode
24510 24512 || ! (*insn_p->operand[0].predicate) (target, tmode))
24511 24513 target = gen_reg_rtx (tmode);
24512 24514 real_target = target;
24513 24515 }
24514 24516 else
24515 24517 {
24516 24518 target = gen_reg_rtx (rmode);
24517 24519 real_target = simplify_gen_subreg (tmode, target, rmode, 0);
24518 24520 }
24519 24521
24520 24522 for (i = 0; i < nargs; i++)
24521 24523 {
24522 24524 tree arg = CALL_EXPR_ARG (exp, i);
24523 24525 rtx op = expand_normal (arg);
24524 24526 enum machine_mode mode = insn_p->operand[i + 1].mode;
24525 24527 bool match = (*insn_p->operand[i + 1].predicate) (op, mode);
24526 24528
24527 24529 if (last_arg_count && (i + 1) == nargs)
24528 24530 {
24529 24531 /* SIMD shift insns take either an 8-bit immediate or
24530 24532 register as count. But builtin functions take int as
24531 24533 count. If count doesn't match, we put it in register. */
24532 24534 if (!match)
24533 24535 {
24534 24536 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0);
24535 24537 if (!(*insn_p->operand[i + 1].predicate) (op, mode))
24536 24538 op = copy_to_reg (op);
24537 24539 }
24538 24540 }
24539 24541 else if ((nargs - i) <= nargs_constant)
24540 24542 {
24541 24543 if (!match)
24542 24544 switch (icode)
24543 24545 {
24544 24546 case CODE_FOR_sse4_1_roundpd:
24545 24547 case CODE_FOR_sse4_1_roundps:
24546 24548 case CODE_FOR_sse4_1_roundsd:
24547 24549 case CODE_FOR_sse4_1_roundss:
24548 24550 case CODE_FOR_sse4_1_blendps:
24549 24551 case CODE_FOR_avx_blendpd256:
24550 24552 case CODE_FOR_avx_vpermilv4df:
24551 24553 case CODE_FOR_avx_roundpd256:
24552 24554 case CODE_FOR_avx_roundps256:
24553 24555 error ("the last argument must be a 4-bit immediate");
24554 24556 return const0_rtx;
24555 24557
24556 24558 case CODE_FOR_sse4_1_blendpd:
24557 24559 case CODE_FOR_avx_vpermilv2df:
24558 24560 error ("the last argument must be a 2-bit immediate");
24559 24561 return const0_rtx;
24560 24562
24561 24563 case CODE_FOR_avx_vextractf128v4df:
24562 24564 case CODE_FOR_avx_vextractf128v8sf:
24563 24565 case CODE_FOR_avx_vextractf128v8si:
24564 24566 case CODE_FOR_avx_vinsertf128v4df:
24565 24567 case CODE_FOR_avx_vinsertf128v8sf:
24566 24568 case CODE_FOR_avx_vinsertf128v8si:
24567 24569 error ("the last argument must be a 1-bit immediate");
24568 24570 return const0_rtx;
24569 24571
24570 24572 case CODE_FOR_avx_cmpsdv2df3:
24571 24573 case CODE_FOR_avx_cmpssv4sf3:
24572 24574 case CODE_FOR_avx_cmppdv2df3:
24573 24575 case CODE_FOR_avx_cmppsv4sf3:
24574 24576 case CODE_FOR_avx_cmppdv4df3:
24575 24577 case CODE_FOR_avx_cmppsv8sf3:
24576 24578 error ("the last argument must be a 5-bit immediate");
24577 24579 return const0_rtx;
24578 24580
24579 24581 default:
24580 24582 switch (nargs_constant)
24581 24583 {
24582 24584 case 2:
24583 24585 if ((nargs - i) == nargs_constant)
24584 24586 {
24585 24587 error ("the next to last argument must be an 8-bit immediate");
24586 24588 break;
24587 24589 }
24588 24590 case 1:
24589 24591 error ("the last argument must be an 8-bit immediate");
24590 24592 break;
24591 24593 default:
24592 24594 gcc_unreachable ();
24593 24595 }
24594 24596 return const0_rtx;
24595 24597 }
24596 24598 }
24597 24599 else
24598 24600 {
24599 24601 if (VECTOR_MODE_P (mode))
24600 24602 op = safe_vector_operand (op, mode);
24601 24603
24602 24604 /* If we aren't optimizing, only allow one memory operand to
24603 24605 be generated. */
24604 24606 if (memory_operand (op, mode))
24605 24607 num_memory++;
24606 24608
24607 24609 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode)
24608 24610 {
24609 24611 if (optimize || !match || num_memory > 1)
24610 24612 op = copy_to_mode_reg (mode, op);
24611 24613 }
24612 24614 else
24613 24615 {
24614 24616 op = copy_to_reg (op);
24615 24617 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0);
24616 24618 }
24617 24619 }
24618 24620
24619 24621 args[i].op = op;
24620 24622 args[i].mode = mode;
24621 24623 }
24622 24624
24623 24625 switch (nargs)
24624 24626 {
24625 24627 case 1:
24626 24628 pat = GEN_FCN (icode) (real_target, args[0].op);
24627 24629 break;
24628 24630 case 2:
24629 24631 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op);
24630 24632 break;
24631 24633 case 3:
24632 24634 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24633 24635 args[2].op);
24634 24636 break;
24635 24637 case 4:
24636 24638 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op,
24637 24639 args[2].op, args[3].op);
24638 24640 break;
24639 24641 default:
24640 24642 gcc_unreachable ();
24641 24643 }
24642 24644
24643 24645 if (! pat)
24644 24646 return 0;
24645 24647
24646 24648 emit_insn (pat);
24647 24649 return target;
24648 24650 }
24649 24651
24650 24652 /* Subroutine of ix86_expand_builtin to take care of special insns
24651 24653 with variable number of operands. */
24652 24654
24653 24655 static rtx
24654 24656 ix86_expand_special_args_builtin (const struct builtin_description *d,
24655 24657 tree exp, rtx target)
24656 24658 {
24657 24659 tree arg;
24658 24660 rtx pat, op;
24659 24661 unsigned int i, nargs, arg_adjust, memory;
24660 24662 struct
24661 24663 {
24662 24664 rtx op;
24663 24665 enum machine_mode mode;
24664 24666 } args[2];
24665 24667 enum insn_code icode = d->icode;
24666 24668 bool last_arg_constant = false;
24667 24669 const struct insn_data *insn_p = &insn_data[icode];
24668 24670 enum machine_mode tmode = insn_p->operand[0].mode;
24669 24671 enum { load, store } klass;
24670 24672
24671 24673 switch ((enum ix86_special_builtin_type) d->flag)
24672 24674 {
24673 24675 case VOID_FTYPE_VOID:
24674 24676 emit_insn (GEN_FCN (icode) (target));
24675 24677 return 0;
24676 24678 case V2DI_FTYPE_PV2DI:
24677 24679 case V32QI_FTYPE_PCCHAR:
24678 24680 case V16QI_FTYPE_PCCHAR:
24679 24681 case V8SF_FTYPE_PCV4SF:
24680 24682 case V8SF_FTYPE_PCFLOAT:
24681 24683 case V4SF_FTYPE_PCFLOAT:
24682 24684 case V4DF_FTYPE_PCV2DF:
24683 24685 case V4DF_FTYPE_PCDOUBLE:
24684 24686 case V2DF_FTYPE_PCDOUBLE:
24685 24687 nargs = 1;
24686 24688 klass = load;
24687 24689 memory = 0;
24688 24690 break;
24689 24691 case VOID_FTYPE_PV2SF_V4SF:
24690 24692 case VOID_FTYPE_PV4DI_V4DI:
24691 24693 case VOID_FTYPE_PV2DI_V2DI:
24692 24694 case VOID_FTYPE_PCHAR_V32QI:
24693 24695 case VOID_FTYPE_PCHAR_V16QI:
24694 24696 case VOID_FTYPE_PFLOAT_V8SF:
24695 24697 case VOID_FTYPE_PFLOAT_V4SF:
24696 24698 case VOID_FTYPE_PDOUBLE_V4DF:
24697 24699 case VOID_FTYPE_PDOUBLE_V2DF:
24698 24700 case VOID_FTYPE_PDI_DI:
24699 24701 case VOID_FTYPE_PINT_INT:
24700 24702 nargs = 1;
24701 24703 klass = store;
24702 24704 /* Reserve memory operand for target. */
24703 24705 memory = ARRAY_SIZE (args);
24704 24706 break;
24705 24707 case V4SF_FTYPE_V4SF_PCV2SF:
24706 24708 case V2DF_FTYPE_V2DF_PCDOUBLE:
24707 24709 nargs = 2;
24708 24710 klass = load;
24709 24711 memory = 1;
24710 24712 break;
24711 24713 case V8SF_FTYPE_PCV8SF_V8SF:
24712 24714 case V4DF_FTYPE_PCV4DF_V4DF:
24713 24715 case V4SF_FTYPE_PCV4SF_V4SF:
24714 24716 case V2DF_FTYPE_PCV2DF_V2DF:
24715 24717 nargs = 2;
24716 24718 klass = load;
24717 24719 memory = 0;
24718 24720 break;
24719 24721 case VOID_FTYPE_PV8SF_V8SF_V8SF:
24720 24722 case VOID_FTYPE_PV4DF_V4DF_V4DF:
24721 24723 case VOID_FTYPE_PV4SF_V4SF_V4SF:
24722 24724 case VOID_FTYPE_PV2DF_V2DF_V2DF:
24723 24725 nargs = 2;
24724 24726 klass = store;
24725 24727 /* Reserve memory operand for target. */
24726 24728 memory = ARRAY_SIZE (args);
24727 24729 break;
24728 24730 default:
24729 24731 gcc_unreachable ();
24730 24732 }
24731 24733
24732 24734 gcc_assert (nargs <= ARRAY_SIZE (args));
24733 24735
24734 24736 if (klass == store)
24735 24737 {
24736 24738 arg = CALL_EXPR_ARG (exp, 0);
24737 24739 op = expand_normal (arg);
24738 24740 gcc_assert (target == 0);
24739 24741 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op));
24740 24742 arg_adjust = 1;
24741 24743 }
24742 24744 else
24743 24745 {
24744 24746 arg_adjust = 0;
24745 24747 if (optimize
24746 24748 || target == 0
24747 24749 || GET_MODE (target) != tmode
24748 24750 || ! (*insn_p->operand[0].predicate) (target, tmode))
24749 24751 target = gen_reg_rtx (tmode);
24750 24752 }
24751 24753
24752 24754 for (i = 0; i < nargs; i++)
24753 24755 {
24754 24756 enum machine_mode mode = insn_p->operand[i + 1].mode;
24755 24757 bool match;
24756 24758
24757 24759 arg = CALL_EXPR_ARG (exp, i + arg_adjust);
24758 24760 op = expand_normal (arg);
24759 24761 match = (*insn_p->operand[i + 1].predicate) (op, mode);
24760 24762
24761 24763 if (last_arg_constant && (i + 1) == nargs)
24762 24764 {
24763 24765 if (!match)
24764 24766 switch (icode)
24765 24767 {
24766 24768 default:
24767 24769 error ("the last argument must be an 8-bit immediate");
24768 24770 return const0_rtx;
24769 24771 }
24770 24772 }
24771 24773 else
24772 24774 {
24773 24775 if (i == memory)
24774 24776 {
24775 24777 /* This must be the memory operand. */
24776 24778 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op));
24777 24779 gcc_assert (GET_MODE (op) == mode
24778 24780 || GET_MODE (op) == VOIDmode);
24779 24781 }
24780 24782 else
24781 24783 {
24782 24784 /* This must be register. */
24783 24785 if (VECTOR_MODE_P (mode))
24784 24786 op = safe_vector_operand (op, mode);
24785 24787
24786 24788 gcc_assert (GET_MODE (op) == mode
24787 24789 || GET_MODE (op) == VOIDmode);
24788 24790 op = copy_to_mode_reg (mode, op);
24789 24791 }
24790 24792 }
24791 24793
24792 24794 args[i].op = op;
24793 24795 args[i].mode = mode;
24794 24796 }
24795 24797
24796 24798 switch (nargs)
24797 24799 {
24798 24800 case 1:
24799 24801 pat = GEN_FCN (icode) (target, args[0].op);
24800 24802 break;
24801 24803 case 2:
24802 24804 pat = GEN_FCN (icode) (target, args[0].op, args[1].op);
24803 24805 break;
24804 24806 default:
24805 24807 gcc_unreachable ();
24806 24808 }
24807 24809
24808 24810 if (! pat)
24809 24811 return 0;
24810 24812 emit_insn (pat);
24811 24813 return klass == store ? 0 : target;
24812 24814 }
24813 24815
24814 24816 /* Return the integer constant in ARG. Constrain it to be in the range
24815 24817 of the subparts of VEC_TYPE; issue an error if not. */
24816 24818
24817 24819 static int
24818 24820 get_element_number (tree vec_type, tree arg)
24819 24821 {
24820 24822 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
24821 24823
24822 24824 if (!host_integerp (arg, 1)
24823 24825 || (elt = tree_low_cst (arg, 1), elt > max))
24824 24826 {
24825 24827 error ("selector must be an integer constant in the range 0..%wi", max);
24826 24828 return 0;
24827 24829 }
24828 24830
24829 24831 return elt;
24830 24832 }
24831 24833
24832 24834 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24833 24835 ix86_expand_vector_init. We DO have language-level syntax for this, in
24834 24836 the form of (type){ init-list }. Except that since we can't place emms
24835 24837 instructions from inside the compiler, we can't allow the use of MMX
24836 24838 registers unless the user explicitly asks for it. So we do *not* define
24837 24839 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
24838 24840 we have builtins invoked by mmintrin.h that gives us license to emit
24839 24841 these sorts of instructions. */
24840 24842
24841 24843 static rtx
24842 24844 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
24843 24845 {
24844 24846 enum machine_mode tmode = TYPE_MODE (type);
24845 24847 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
24846 24848 int i, n_elt = GET_MODE_NUNITS (tmode);
24847 24849 rtvec v = rtvec_alloc (n_elt);
24848 24850
24849 24851 gcc_assert (VECTOR_MODE_P (tmode));
24850 24852 gcc_assert (call_expr_nargs (exp) == n_elt);
24851 24853
24852 24854 for (i = 0; i < n_elt; ++i)
24853 24855 {
24854 24856 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
24855 24857 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
24856 24858 }
24857 24859
24858 24860 if (!target || !register_operand (target, tmode))
24859 24861 target = gen_reg_rtx (tmode);
24860 24862
24861 24863 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
24862 24864 return target;
24863 24865 }
24864 24866
24865 24867 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24866 24868 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
24867 24869 had a language-level syntax for referencing vector elements. */
24868 24870
24869 24871 static rtx
24870 24872 ix86_expand_vec_ext_builtin (tree exp, rtx target)
24871 24873 {
24872 24874 enum machine_mode tmode, mode0;
24873 24875 tree arg0, arg1;
24874 24876 int elt;
24875 24877 rtx op0;
24876 24878
24877 24879 arg0 = CALL_EXPR_ARG (exp, 0);
24878 24880 arg1 = CALL_EXPR_ARG (exp, 1);
24879 24881
24880 24882 op0 = expand_normal (arg0);
24881 24883 elt = get_element_number (TREE_TYPE (arg0), arg1);
24882 24884
24883 24885 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24884 24886 mode0 = TYPE_MODE (TREE_TYPE (arg0));
24885 24887 gcc_assert (VECTOR_MODE_P (mode0));
24886 24888
24887 24889 op0 = force_reg (mode0, op0);
24888 24890
24889 24891 if (optimize || !target || !register_operand (target, tmode))
24890 24892 target = gen_reg_rtx (tmode);
24891 24893
24892 24894 ix86_expand_vector_extract (true, target, op0, elt);
24893 24895
24894 24896 return target;
24895 24897 }
24896 24898
24897 24899 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
24898 24900 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
24899 24901 a language-level syntax for referencing vector elements. */
24900 24902
24901 24903 static rtx
24902 24904 ix86_expand_vec_set_builtin (tree exp)
24903 24905 {
24904 24906 enum machine_mode tmode, mode1;
24905 24907 tree arg0, arg1, arg2;
24906 24908 int elt;
24907 24909 rtx op0, op1, target;
24908 24910
24909 24911 arg0 = CALL_EXPR_ARG (exp, 0);
24910 24912 arg1 = CALL_EXPR_ARG (exp, 1);
24911 24913 arg2 = CALL_EXPR_ARG (exp, 2);
24912 24914
24913 24915 tmode = TYPE_MODE (TREE_TYPE (arg0));
24914 24916 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
24915 24917 gcc_assert (VECTOR_MODE_P (tmode));
24916 24918
24917 24919 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
24918 24920 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
24919 24921 elt = get_element_number (TREE_TYPE (arg0), arg2);
24920 24922
24921 24923 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
24922 24924 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
24923 24925
24924 24926 op0 = force_reg (tmode, op0);
24925 24927 op1 = force_reg (mode1, op1);
24926 24928
24927 24929 /* OP0 is the source of these builtin functions and shouldn't be
24928 24930 modified. Create a copy, use it and return it as target. */
24929 24931 target = gen_reg_rtx (tmode);
24930 24932 emit_move_insn (target, op0);
24931 24933 ix86_expand_vector_set (true, target, op1, elt);
24932 24934
24933 24935 return target;
24934 24936 }
24935 24937
24936 24938 /* Expand an expression EXP that calls a built-in function,
24937 24939 with result going to TARGET if that's convenient
24938 24940 (and in mode MODE if that's convenient).
24939 24941 SUBTARGET may be used as the target for computing one of EXP's operands.
24940 24942 IGNORE is nonzero if the value is to be ignored. */
24941 24943
24942 24944 static rtx
24943 24945 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
24944 24946 enum machine_mode mode ATTRIBUTE_UNUSED,
24945 24947 int ignore ATTRIBUTE_UNUSED)
24946 24948 {
24947 24949 const struct builtin_description *d;
24948 24950 size_t i;
24949 24951 enum insn_code icode;
24950 24952 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
24951 24953 tree arg0, arg1, arg2;
24952 24954 rtx op0, op1, op2, pat;
24953 24955 enum machine_mode mode0, mode1, mode2;
24954 24956 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
24955 24957
24956 24958 /* Determine whether the builtin function is available under the current ISA.
24957 24959 Originally the builtin was not created if it wasn't applicable to the
24958 24960 current ISA based on the command line switches. With function specific
24959 24961 options, we need to check in the context of the function making the call
24960 24962 whether it is supported. */
24961 24963 if (ix86_builtins_isa[fcode].isa
24962 24964 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags))
24963 24965 {
24964 24966 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL,
24965 24967 NULL, NULL, false);
24966 24968
24967 24969 if (!opts)
24968 24970 error ("%qE needs unknown isa option", fndecl);
24969 24971 else
24970 24972 {
24971 24973 gcc_assert (opts != NULL);
24972 24974 error ("%qE needs isa option %s", fndecl, opts);
24973 24975 free (opts);
24974 24976 }
24975 24977 return const0_rtx;
24976 24978 }
24977 24979
24978 24980 switch (fcode)
24979 24981 {
24980 24982 case IX86_BUILTIN_MASKMOVQ:
24981 24983 case IX86_BUILTIN_MASKMOVDQU:
24982 24984 icode = (fcode == IX86_BUILTIN_MASKMOVQ
24983 24985 ? CODE_FOR_mmx_maskmovq
24984 24986 : CODE_FOR_sse2_maskmovdqu);
24985 24987 /* Note the arg order is different from the operand order. */
24986 24988 arg1 = CALL_EXPR_ARG (exp, 0);
24987 24989 arg2 = CALL_EXPR_ARG (exp, 1);
24988 24990 arg0 = CALL_EXPR_ARG (exp, 2);
24989 24991 op0 = expand_normal (arg0);
24990 24992 op1 = expand_normal (arg1);
24991 24993 op2 = expand_normal (arg2);
24992 24994 mode0 = insn_data[icode].operand[0].mode;
24993 24995 mode1 = insn_data[icode].operand[1].mode;
24994 24996 mode2 = insn_data[icode].operand[2].mode;
24995 24997
24996 24998 op0 = force_reg (Pmode, op0);
24997 24999 op0 = gen_rtx_MEM (mode1, op0);
24998 25000
24999 25001 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
25000 25002 op0 = copy_to_mode_reg (mode0, op0);
25001 25003 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
25002 25004 op1 = copy_to_mode_reg (mode1, op1);
25003 25005 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
25004 25006 op2 = copy_to_mode_reg (mode2, op2);
25005 25007 pat = GEN_FCN (icode) (op0, op1, op2);
25006 25008 if (! pat)
25007 25009 return 0;
25008 25010 emit_insn (pat);
25009 25011 return 0;
25010 25012
25011 25013 case IX86_BUILTIN_LDMXCSR:
25012 25014 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
25013 25015 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25014 25016 emit_move_insn (target, op0);
25015 25017 emit_insn (gen_sse_ldmxcsr (target));
25016 25018 return 0;
25017 25019
25018 25020 case IX86_BUILTIN_STMXCSR:
25019 25021 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
25020 25022 emit_insn (gen_sse_stmxcsr (target));
25021 25023 return copy_to_mode_reg (SImode, target);
25022 25024
25023 25025 case IX86_BUILTIN_CLFLUSH:
25024 25026 arg0 = CALL_EXPR_ARG (exp, 0);
25025 25027 op0 = expand_normal (arg0);
25026 25028 icode = CODE_FOR_sse2_clflush;
25027 25029 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
25028 25030 op0 = copy_to_mode_reg (Pmode, op0);
25029 25031
25030 25032 emit_insn (gen_sse2_clflush (op0));
25031 25033 return 0;
25032 25034
25033 25035 case IX86_BUILTIN_MONITOR:
25034 25036 arg0 = CALL_EXPR_ARG (exp, 0);
25035 25037 arg1 = CALL_EXPR_ARG (exp, 1);
25036 25038 arg2 = CALL_EXPR_ARG (exp, 2);
25037 25039 op0 = expand_normal (arg0);
25038 25040 op1 = expand_normal (arg1);
25039 25041 op2 = expand_normal (arg2);
25040 25042 if (!REG_P (op0))
25041 25043 op0 = copy_to_mode_reg (Pmode, op0);
25042 25044 if (!REG_P (op1))
25043 25045 op1 = copy_to_mode_reg (SImode, op1);
25044 25046 if (!REG_P (op2))
25045 25047 op2 = copy_to_mode_reg (SImode, op2);
25046 25048 emit_insn ((*ix86_gen_monitor) (op0, op1, op2));
25047 25049 return 0;
25048 25050
25049 25051 case IX86_BUILTIN_MWAIT:
25050 25052 arg0 = CALL_EXPR_ARG (exp, 0);
25051 25053 arg1 = CALL_EXPR_ARG (exp, 1);
25052 25054 op0 = expand_normal (arg0);
25053 25055 op1 = expand_normal (arg1);
25054 25056 if (!REG_P (op0))
25055 25057 op0 = copy_to_mode_reg (SImode, op0);
25056 25058 if (!REG_P (op1))
25057 25059 op1 = copy_to_mode_reg (SImode, op1);
25058 25060 emit_insn (gen_sse3_mwait (op0, op1));
25059 25061 return 0;
25060 25062
25061 25063 case IX86_BUILTIN_VEC_INIT_V2SI:
25062 25064 case IX86_BUILTIN_VEC_INIT_V4HI:
25063 25065 case IX86_BUILTIN_VEC_INIT_V8QI:
25064 25066 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
25065 25067
25066 25068 case IX86_BUILTIN_VEC_EXT_V2DF:
25067 25069 case IX86_BUILTIN_VEC_EXT_V2DI:
25068 25070 case IX86_BUILTIN_VEC_EXT_V4SF:
25069 25071 case IX86_BUILTIN_VEC_EXT_V4SI:
25070 25072 case IX86_BUILTIN_VEC_EXT_V8HI:
25071 25073 case IX86_BUILTIN_VEC_EXT_V2SI:
25072 25074 case IX86_BUILTIN_VEC_EXT_V4HI:
25073 25075 case IX86_BUILTIN_VEC_EXT_V16QI:
25074 25076 return ix86_expand_vec_ext_builtin (exp, target);
25075 25077
25076 25078 case IX86_BUILTIN_VEC_SET_V2DI:
25077 25079 case IX86_BUILTIN_VEC_SET_V4SF:
25078 25080 case IX86_BUILTIN_VEC_SET_V4SI:
25079 25081 case IX86_BUILTIN_VEC_SET_V8HI:
25080 25082 case IX86_BUILTIN_VEC_SET_V4HI:
25081 25083 case IX86_BUILTIN_VEC_SET_V16QI:
25082 25084 return ix86_expand_vec_set_builtin (exp);
25083 25085
25084 25086 case IX86_BUILTIN_INFQ:
25085 25087 {
25086 25088 REAL_VALUE_TYPE inf;
25087 25089 rtx tmp;
25088 25090
25089 25091 real_inf (&inf);
25090 25092 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
25091 25093
25092 25094 tmp = validize_mem (force_const_mem (mode, tmp));
25093 25095
25094 25096 if (target == 0)
25095 25097 target = gen_reg_rtx (mode);
25096 25098
25097 25099 emit_move_insn (target, tmp);
25098 25100 return target;
25099 25101 }
25100 25102
25101 25103 default:
25102 25104 break;
25103 25105 }
25104 25106
25105 25107 for (i = 0, d = bdesc_special_args;
25106 25108 i < ARRAY_SIZE (bdesc_special_args);
25107 25109 i++, d++)
25108 25110 if (d->code == fcode)
25109 25111 return ix86_expand_special_args_builtin (d, exp, target);
25110 25112
25111 25113 for (i = 0, d = bdesc_args;
25112 25114 i < ARRAY_SIZE (bdesc_args);
25113 25115 i++, d++)
25114 25116 if (d->code == fcode)
25115 25117 switch (fcode)
25116 25118 {
25117 25119 case IX86_BUILTIN_FABSQ:
25118 25120 case IX86_BUILTIN_COPYSIGNQ:
25119 25121 if (!TARGET_SSE2)
25120 25122 /* Emit a normal call if SSE2 isn't available. */
25121 25123 return expand_call (exp, target, ignore);
25122 25124 default:
25123 25125 return ix86_expand_args_builtin (d, exp, target);
25124 25126 }
25125 25127
25126 25128 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
25127 25129 if (d->code == fcode)
25128 25130 return ix86_expand_sse_comi (d, exp, target);
25129 25131
25130 25132 for (i = 0, d = bdesc_pcmpestr;
25131 25133 i < ARRAY_SIZE (bdesc_pcmpestr);
25132 25134 i++, d++)
25133 25135 if (d->code == fcode)
25134 25136 return ix86_expand_sse_pcmpestr (d, exp, target);
25135 25137
25136 25138 for (i = 0, d = bdesc_pcmpistr;
25137 25139 i < ARRAY_SIZE (bdesc_pcmpistr);
25138 25140 i++, d++)
25139 25141 if (d->code == fcode)
25140 25142 return ix86_expand_sse_pcmpistr (d, exp, target);
25141 25143
25142 25144 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++)
25143 25145 if (d->code == fcode)
25144 25146 return ix86_expand_multi_arg_builtin (d->icode, exp, target,
25145 25147 (enum multi_arg_type)d->flag,
25146 25148 d->comparison);
25147 25149
25148 25150 gcc_unreachable ();
25149 25151 }
25150 25152
25151 25153 /* Returns a function decl for a vectorized version of the builtin function
25152 25154 with builtin function code FN and the result vector type TYPE, or NULL_TREE
25153 25155 if it is not available. */
25154 25156
25155 25157 static tree
25156 25158 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
25157 25159 tree type_in)
25158 25160 {
25159 25161 enum machine_mode in_mode, out_mode;
25160 25162 int in_n, out_n;
25161 25163
25162 25164 if (TREE_CODE (type_out) != VECTOR_TYPE
25163 25165 || TREE_CODE (type_in) != VECTOR_TYPE)
25164 25166 return NULL_TREE;
25165 25167
25166 25168 out_mode = TYPE_MODE (TREE_TYPE (type_out));
25167 25169 out_n = TYPE_VECTOR_SUBPARTS (type_out);
25168 25170 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25169 25171 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25170 25172
25171 25173 switch (fn)
25172 25174 {
25173 25175 case BUILT_IN_SQRT:
25174 25176 if (out_mode == DFmode && out_n == 2
25175 25177 && in_mode == DFmode && in_n == 2)
25176 25178 return ix86_builtins[IX86_BUILTIN_SQRTPD];
25177 25179 break;
25178 25180
25179 25181 case BUILT_IN_SQRTF:
25180 25182 if (out_mode == SFmode && out_n == 4
25181 25183 && in_mode == SFmode && in_n == 4)
25182 25184 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR];
25183 25185 break;
25184 25186
25185 25187 case BUILT_IN_LRINT:
25186 25188 if (out_mode == SImode && out_n == 4
25187 25189 && in_mode == DFmode && in_n == 2)
25188 25190 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
25189 25191 break;
25190 25192
25191 25193 case BUILT_IN_LRINTF:
25192 25194 if (out_mode == SImode && out_n == 4
25193 25195 && in_mode == SFmode && in_n == 4)
25194 25196 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
25195 25197 break;
25196 25198
25197 25199 default:
25198 25200 ;
25199 25201 }
25200 25202
25201 25203 /* Dispatch to a handler for a vectorization library. */
25202 25204 if (ix86_veclib_handler)
25203 25205 return (*ix86_veclib_handler)(fn, type_out, type_in);
25204 25206
25205 25207 return NULL_TREE;
25206 25208 }
25207 25209
25208 25210 /* Handler for an SVML-style interface to
25209 25211 a library with vectorized intrinsics. */
25210 25212
25211 25213 static tree
25212 25214 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in)
25213 25215 {
25214 25216 char name[20];
25215 25217 tree fntype, new_fndecl, args;
25216 25218 unsigned arity;
25217 25219 const char *bname;
25218 25220 enum machine_mode el_mode, in_mode;
25219 25221 int n, in_n;
25220 25222
25221 25223 /* The SVML is suitable for unsafe math only. */
25222 25224 if (!flag_unsafe_math_optimizations)
25223 25225 return NULL_TREE;
25224 25226
25225 25227 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25226 25228 n = TYPE_VECTOR_SUBPARTS (type_out);
25227 25229 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25228 25230 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25229 25231 if (el_mode != in_mode
25230 25232 || n != in_n)
25231 25233 return NULL_TREE;
25232 25234
25233 25235 switch (fn)
25234 25236 {
25235 25237 case BUILT_IN_EXP:
25236 25238 case BUILT_IN_LOG:
25237 25239 case BUILT_IN_LOG10:
25238 25240 case BUILT_IN_POW:
25239 25241 case BUILT_IN_TANH:
25240 25242 case BUILT_IN_TAN:
25241 25243 case BUILT_IN_ATAN:
25242 25244 case BUILT_IN_ATAN2:
25243 25245 case BUILT_IN_ATANH:
25244 25246 case BUILT_IN_CBRT:
25245 25247 case BUILT_IN_SINH:
25246 25248 case BUILT_IN_SIN:
25247 25249 case BUILT_IN_ASINH:
25248 25250 case BUILT_IN_ASIN:
25249 25251 case BUILT_IN_COSH:
25250 25252 case BUILT_IN_COS:
25251 25253 case BUILT_IN_ACOSH:
25252 25254 case BUILT_IN_ACOS:
25253 25255 if (el_mode != DFmode || n != 2)
25254 25256 return NULL_TREE;
25255 25257 break;
25256 25258
25257 25259 case BUILT_IN_EXPF:
25258 25260 case BUILT_IN_LOGF:
25259 25261 case BUILT_IN_LOG10F:
25260 25262 case BUILT_IN_POWF:
25261 25263 case BUILT_IN_TANHF:
25262 25264 case BUILT_IN_TANF:
25263 25265 case BUILT_IN_ATANF:
25264 25266 case BUILT_IN_ATAN2F:
25265 25267 case BUILT_IN_ATANHF:
25266 25268 case BUILT_IN_CBRTF:
25267 25269 case BUILT_IN_SINHF:
25268 25270 case BUILT_IN_SINF:
25269 25271 case BUILT_IN_ASINHF:
25270 25272 case BUILT_IN_ASINF:
25271 25273 case BUILT_IN_COSHF:
25272 25274 case BUILT_IN_COSF:
25273 25275 case BUILT_IN_ACOSHF:
25274 25276 case BUILT_IN_ACOSF:
25275 25277 if (el_mode != SFmode || n != 4)
25276 25278 return NULL_TREE;
25277 25279 break;
25278 25280
25279 25281 default:
25280 25282 return NULL_TREE;
25281 25283 }
25282 25284
25283 25285 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25284 25286
25285 25287 if (fn == BUILT_IN_LOGF)
25286 25288 strcpy (name, "vmlsLn4");
25287 25289 else if (fn == BUILT_IN_LOG)
25288 25290 strcpy (name, "vmldLn2");
25289 25291 else if (n == 4)
25290 25292 {
25291 25293 sprintf (name, "vmls%s", bname+10);
25292 25294 name[strlen (name)-1] = '4';
25293 25295 }
25294 25296 else
25295 25297 sprintf (name, "vmld%s2", bname+10);
25296 25298
25297 25299 /* Convert to uppercase. */
25298 25300 name[4] &= ~0x20;
25299 25301
25300 25302 arity = 0;
25301 25303 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25302 25304 args = TREE_CHAIN (args))
25303 25305 arity++;
25304 25306
25305 25307 if (arity == 1)
25306 25308 fntype = build_function_type_list (type_out, type_in, NULL);
25307 25309 else
25308 25310 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25309 25311
25310 25312 /* Build a function declaration for the vectorized function. */
25311 25313 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25312 25314 TREE_PUBLIC (new_fndecl) = 1;
25313 25315 DECL_EXTERNAL (new_fndecl) = 1;
25314 25316 DECL_IS_NOVOPS (new_fndecl) = 1;
25315 25317 TREE_READONLY (new_fndecl) = 1;
25316 25318
25317 25319 return new_fndecl;
25318 25320 }
25319 25321
25320 25322 /* Handler for an ACML-style interface to
25321 25323 a library with vectorized intrinsics. */
25322 25324
25323 25325 static tree
25324 25326 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
25325 25327 {
25326 25328 char name[20] = "__vr.._";
25327 25329 tree fntype, new_fndecl, args;
25328 25330 unsigned arity;
25329 25331 const char *bname;
25330 25332 enum machine_mode el_mode, in_mode;
25331 25333 int n, in_n;
25332 25334
25333 25335 /* The ACML is 64bits only and suitable for unsafe math only as
25334 25336 it does not correctly support parts of IEEE with the required
25335 25337 precision such as denormals. */
25336 25338 if (!TARGET_64BIT
25337 25339 || !flag_unsafe_math_optimizations)
25338 25340 return NULL_TREE;
25339 25341
25340 25342 el_mode = TYPE_MODE (TREE_TYPE (type_out));
25341 25343 n = TYPE_VECTOR_SUBPARTS (type_out);
25342 25344 in_mode = TYPE_MODE (TREE_TYPE (type_in));
25343 25345 in_n = TYPE_VECTOR_SUBPARTS (type_in);
25344 25346 if (el_mode != in_mode
25345 25347 || n != in_n)
25346 25348 return NULL_TREE;
25347 25349
25348 25350 switch (fn)
25349 25351 {
25350 25352 case BUILT_IN_SIN:
25351 25353 case BUILT_IN_COS:
25352 25354 case BUILT_IN_EXP:
25353 25355 case BUILT_IN_LOG:
25354 25356 case BUILT_IN_LOG2:
25355 25357 case BUILT_IN_LOG10:
25356 25358 name[4] = 'd';
25357 25359 name[5] = '2';
25358 25360 if (el_mode != DFmode
25359 25361 || n != 2)
25360 25362 return NULL_TREE;
25361 25363 break;
25362 25364
25363 25365 case BUILT_IN_SINF:
25364 25366 case BUILT_IN_COSF:
25365 25367 case BUILT_IN_EXPF:
25366 25368 case BUILT_IN_POWF:
25367 25369 case BUILT_IN_LOGF:
25368 25370 case BUILT_IN_LOG2F:
25369 25371 case BUILT_IN_LOG10F:
25370 25372 name[4] = 's';
25371 25373 name[5] = '4';
25372 25374 if (el_mode != SFmode
25373 25375 || n != 4)
25374 25376 return NULL_TREE;
25375 25377 break;
25376 25378
25377 25379 default:
25378 25380 return NULL_TREE;
25379 25381 }
25380 25382
25381 25383 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
25382 25384 sprintf (name + 7, "%s", bname+10);
25383 25385
25384 25386 arity = 0;
25385 25387 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
25386 25388 args = TREE_CHAIN (args))
25387 25389 arity++;
25388 25390
25389 25391 if (arity == 1)
25390 25392 fntype = build_function_type_list (type_out, type_in, NULL);
25391 25393 else
25392 25394 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
25393 25395
25394 25396 /* Build a function declaration for the vectorized function. */
25395 25397 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
25396 25398 TREE_PUBLIC (new_fndecl) = 1;
25397 25399 DECL_EXTERNAL (new_fndecl) = 1;
25398 25400 DECL_IS_NOVOPS (new_fndecl) = 1;
25399 25401 TREE_READONLY (new_fndecl) = 1;
25400 25402
25401 25403 return new_fndecl;
25402 25404 }
25403 25405
25404 25406
25405 25407 /* Returns a decl of a function that implements conversion of an integer vector
25406 25408 into a floating-point vector, or vice-versa. TYPE is the type of the integer
25407 25409 side of the conversion.
25408 25410 Return NULL_TREE if it is not available. */
25409 25411
25410 25412 static tree
25411 25413 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
25412 25414 {
25413 25415 if (!TARGET_SSE2 || TREE_CODE (type) != VECTOR_TYPE
25414 25416 /* There are only conversions from/to signed integers. */
25415 25417 || TYPE_UNSIGNED (TREE_TYPE (type)))
25416 25418 return NULL_TREE;
25417 25419
25418 25420 switch (code)
25419 25421 {
25420 25422 case FLOAT_EXPR:
25421 25423 switch (TYPE_MODE (type))
25422 25424 {
25423 25425 case V4SImode:
25424 25426 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
25425 25427 default:
25426 25428 return NULL_TREE;
25427 25429 }
25428 25430
25429 25431 case FIX_TRUNC_EXPR:
25430 25432 switch (TYPE_MODE (type))
25431 25433 {
25432 25434 case V4SImode:
25433 25435 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
25434 25436 default:
25435 25437 return NULL_TREE;
25436 25438 }
25437 25439 default:
25438 25440 return NULL_TREE;
25439 25441
25440 25442 }
25441 25443 }
25442 25444
25443 25445 /* Returns a code for a target-specific builtin that implements
25444 25446 reciprocal of the function, or NULL_TREE if not available. */
25445 25447
25446 25448 static tree
25447 25449 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
25448 25450 bool sqrt ATTRIBUTE_UNUSED)
25449 25451 {
25450 25452 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p ()
25451 25453 && flag_finite_math_only && !flag_trapping_math
25452 25454 && flag_unsafe_math_optimizations))
25453 25455 return NULL_TREE;
25454 25456
25455 25457 if (md_fn)
25456 25458 /* Machine dependent builtins. */
25457 25459 switch (fn)
25458 25460 {
25459 25461 /* Vectorized version of sqrt to rsqrt conversion. */
25460 25462 case IX86_BUILTIN_SQRTPS_NR:
25461 25463 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR];
25462 25464
25463 25465 default:
25464 25466 return NULL_TREE;
25465 25467 }
25466 25468 else
25467 25469 /* Normal builtins. */
25468 25470 switch (fn)
25469 25471 {
25470 25472 /* Sqrt to rsqrt conversion. */
25471 25473 case BUILT_IN_SQRTF:
25472 25474 return ix86_builtins[IX86_BUILTIN_RSQRTF];
25473 25475
25474 25476 default:
25475 25477 return NULL_TREE;
25476 25478 }
25477 25479 }
25478 25480
25479 25481 /* Store OPERAND to the memory after reload is completed. This means
25480 25482 that we can't easily use assign_stack_local. */
25481 25483 rtx
25482 25484 ix86_force_to_memory (enum machine_mode mode, rtx operand)
25483 25485 {
25484 25486 rtx result;
25485 25487
25486 25488 gcc_assert (reload_completed);
25487 25489 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE)
25488 25490 {
25489 25491 result = gen_rtx_MEM (mode,
25490 25492 gen_rtx_PLUS (Pmode,
25491 25493 stack_pointer_rtx,
25492 25494 GEN_INT (-RED_ZONE_SIZE)));
25493 25495 emit_move_insn (result, operand);
25494 25496 }
25495 25497 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT)
25496 25498 {
25497 25499 switch (mode)
25498 25500 {
25499 25501 case HImode:
25500 25502 case SImode:
25501 25503 operand = gen_lowpart (DImode, operand);
25502 25504 /* FALLTHRU */
25503 25505 case DImode:
25504 25506 emit_insn (
25505 25507 gen_rtx_SET (VOIDmode,
25506 25508 gen_rtx_MEM (DImode,
25507 25509 gen_rtx_PRE_DEC (DImode,
25508 25510 stack_pointer_rtx)),
25509 25511 operand));
25510 25512 break;
25511 25513 default:
25512 25514 gcc_unreachable ();
25513 25515 }
25514 25516 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25515 25517 }
25516 25518 else
25517 25519 {
25518 25520 switch (mode)
25519 25521 {
25520 25522 case DImode:
25521 25523 {
25522 25524 rtx operands[2];
25523 25525 split_di (&operand, 1, operands, operands + 1);
25524 25526 emit_insn (
25525 25527 gen_rtx_SET (VOIDmode,
25526 25528 gen_rtx_MEM (SImode,
25527 25529 gen_rtx_PRE_DEC (Pmode,
25528 25530 stack_pointer_rtx)),
25529 25531 operands[1]));
25530 25532 emit_insn (
25531 25533 gen_rtx_SET (VOIDmode,
25532 25534 gen_rtx_MEM (SImode,
25533 25535 gen_rtx_PRE_DEC (Pmode,
25534 25536 stack_pointer_rtx)),
25535 25537 operands[0]));
25536 25538 }
25537 25539 break;
25538 25540 case HImode:
25539 25541 /* Store HImodes as SImodes. */
25540 25542 operand = gen_lowpart (SImode, operand);
25541 25543 /* FALLTHRU */
25542 25544 case SImode:
25543 25545 emit_insn (
25544 25546 gen_rtx_SET (VOIDmode,
25545 25547 gen_rtx_MEM (GET_MODE (operand),
25546 25548 gen_rtx_PRE_DEC (SImode,
25547 25549 stack_pointer_rtx)),
25548 25550 operand));
25549 25551 break;
25550 25552 default:
25551 25553 gcc_unreachable ();
25552 25554 }
25553 25555 result = gen_rtx_MEM (mode, stack_pointer_rtx);
25554 25556 }
25555 25557 return result;
25556 25558 }
25557 25559
25558 25560 /* Free operand from the memory. */
25559 25561 void
25560 25562 ix86_free_from_memory (enum machine_mode mode)
25561 25563 {
25562 25564 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI)
25563 25565 {
25564 25566 int size;
25565 25567
25566 25568 if (mode == DImode || TARGET_64BIT)
25567 25569 size = 8;
25568 25570 else
25569 25571 size = 4;
25570 25572 /* Use LEA to deallocate stack space. In peephole2 it will be converted
25571 25573 to pop or add instruction if registers are available. */
25572 25574 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
25573 25575 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
25574 25576 GEN_INT (size))));
25575 25577 }
25576 25578 }
25577 25579
25578 25580 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
25579 25581 QImode must go into class Q_REGS.
25580 25582 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
25581 25583 movdf to do mem-to-mem moves through integer regs. */
25582 25584 enum reg_class
25583 25585 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
25584 25586 {
25585 25587 enum machine_mode mode = GET_MODE (x);
25586 25588
25587 25589 /* We're only allowed to return a subclass of CLASS. Many of the
25588 25590 following checks fail for NO_REGS, so eliminate that early. */
25589 25591 if (regclass == NO_REGS)
25590 25592 return NO_REGS;
25591 25593
25592 25594 /* All classes can load zeros. */
25593 25595 if (x == CONST0_RTX (mode))
25594 25596 return regclass;
25595 25597
25596 25598 /* Force constants into memory if we are loading a (nonzero) constant into
25597 25599 an MMX or SSE register. This is because there are no MMX/SSE instructions
25598 25600 to load from a constant. */
25599 25601 if (CONSTANT_P (x)
25600 25602 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
25601 25603 return NO_REGS;
25602 25604
25603 25605 /* Prefer SSE regs only, if we can use them for math. */
25604 25606 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
25605 25607 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
25606 25608
25607 25609 /* Floating-point constants need more complex checks. */
25608 25610 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
25609 25611 {
25610 25612 /* General regs can load everything. */
25611 25613 if (reg_class_subset_p (regclass, GENERAL_REGS))
25612 25614 return regclass;
25613 25615
25614 25616 /* Floats can load 0 and 1 plus some others. Note that we eliminated
25615 25617 zero above. We only want to wind up preferring 80387 registers if
25616 25618 we plan on doing computation with them. */
25617 25619 if (TARGET_80387
25618 25620 && standard_80387_constant_p (x))
25619 25621 {
25620 25622 /* Limit class to non-sse. */
25621 25623 if (regclass == FLOAT_SSE_REGS)
25622 25624 return FLOAT_REGS;
25623 25625 if (regclass == FP_TOP_SSE_REGS)
25624 25626 return FP_TOP_REG;
25625 25627 if (regclass == FP_SECOND_SSE_REGS)
25626 25628 return FP_SECOND_REG;
25627 25629 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
25628 25630 return regclass;
25629 25631 }
25630 25632
25631 25633 return NO_REGS;
25632 25634 }
25633 25635
25634 25636 /* Generally when we see PLUS here, it's the function invariant
25635 25637 (plus soft-fp const_int). Which can only be computed into general
25636 25638 regs. */
25637 25639 if (GET_CODE (x) == PLUS)
25638 25640 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
25639 25641
25640 25642 /* QImode constants are easy to load, but non-constant QImode data
25641 25643 must go into Q_REGS. */
25642 25644 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
25643 25645 {
25644 25646 if (reg_class_subset_p (regclass, Q_REGS))
25645 25647 return regclass;
25646 25648 if (reg_class_subset_p (Q_REGS, regclass))
25647 25649 return Q_REGS;
25648 25650 return NO_REGS;
25649 25651 }
25650 25652
25651 25653 return regclass;
25652 25654 }
25653 25655
25654 25656 /* Discourage putting floating-point values in SSE registers unless
25655 25657 SSE math is being used, and likewise for the 387 registers. */
25656 25658 enum reg_class
25657 25659 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
25658 25660 {
25659 25661 enum machine_mode mode = GET_MODE (x);
25660 25662
25661 25663 /* Restrict the output reload class to the register bank that we are doing
25662 25664 math on. If we would like not to return a subset of CLASS, reject this
25663 25665 alternative: if reload cannot do this, it will still use its choice. */
25664 25666 mode = GET_MODE (x);
25665 25667 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
25666 25668 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
25667 25669
25668 25670 if (X87_FLOAT_MODE_P (mode))
25669 25671 {
25670 25672 if (regclass == FP_TOP_SSE_REGS)
25671 25673 return FP_TOP_REG;
25672 25674 else if (regclass == FP_SECOND_SSE_REGS)
25673 25675 return FP_SECOND_REG;
25674 25676 else
25675 25677 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
25676 25678 }
25677 25679
25678 25680 return regclass;
25679 25681 }
25680 25682
25681 25683 static enum reg_class
25682 25684 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
25683 25685 enum machine_mode mode,
25684 25686 secondary_reload_info *sri ATTRIBUTE_UNUSED)
25685 25687 {
25686 25688 /* QImode spills from non-QI registers require
25687 25689 intermediate register on 32bit targets. */
25688 25690 if (!in_p && mode == QImode && !TARGET_64BIT
25689 25691 && (rclass == GENERAL_REGS
25690 25692 || rclass == LEGACY_REGS
25691 25693 || rclass == INDEX_REGS))
25692 25694 {
25693 25695 int regno;
25694 25696
25695 25697 if (REG_P (x))
25696 25698 regno = REGNO (x);
25697 25699 else
25698 25700 regno = -1;
25699 25701
25700 25702 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
25701 25703 regno = true_regnum (x);
25702 25704
25703 25705 /* Return Q_REGS if the operand is in memory. */
25704 25706 if (regno == -1)
25705 25707 return Q_REGS;
25706 25708 }
25707 25709
25708 25710 return NO_REGS;
25709 25711 }
25710 25712
25711 25713 /* If we are copying between general and FP registers, we need a memory
25712 25714 location. The same is true for SSE and MMX registers.
25713 25715
25714 25716 To optimize register_move_cost performance, allow inline variant.
25715 25717
25716 25718 The macro can't work reliably when one of the CLASSES is class containing
25717 25719 registers from multiple units (SSE, MMX, integer). We avoid this by never
25718 25720 combining those units in single alternative in the machine description.
25719 25721 Ensure that this constraint holds to avoid unexpected surprises.
25720 25722
25721 25723 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
25722 25724 enforce these sanity checks. */
25723 25725
25724 25726 static inline int
25725 25727 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25726 25728 enum machine_mode mode, int strict)
25727 25729 {
25728 25730 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
25729 25731 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
25730 25732 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
25731 25733 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
25732 25734 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
25733 25735 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
25734 25736 {
25735 25737 gcc_assert (!strict);
25736 25738 return true;
25737 25739 }
25738 25740
25739 25741 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
25740 25742 return true;
25741 25743
25742 25744 /* ??? This is a lie. We do have moves between mmx/general, and for
25743 25745 mmx/sse2. But by saying we need secondary memory we discourage the
25744 25746 register allocator from using the mmx registers unless needed. */
25745 25747 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
25746 25748 return true;
25747 25749
25748 25750 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25749 25751 {
25750 25752 /* SSE1 doesn't have any direct moves from other classes. */
25751 25753 if (!TARGET_SSE2)
25752 25754 return true;
25753 25755
25754 25756 /* If the target says that inter-unit moves are more expensive
25755 25757 than moving through memory, then don't generate them. */
25756 25758 if (!TARGET_INTER_UNIT_MOVES)
25757 25759 return true;
25758 25760
25759 25761 /* Between SSE and general, we have moves no larger than word size. */
25760 25762 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
25761 25763 return true;
25762 25764 }
25763 25765
25764 25766 return false;
25765 25767 }
25766 25768
25767 25769 int
25768 25770 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
25769 25771 enum machine_mode mode, int strict)
25770 25772 {
25771 25773 return inline_secondary_memory_needed (class1, class2, mode, strict);
25772 25774 }
25773 25775
25774 25776 /* Return true if the registers in CLASS cannot represent the change from
25775 25777 modes FROM to TO. */
25776 25778
25777 25779 bool
25778 25780 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
25779 25781 enum reg_class regclass)
25780 25782 {
25781 25783 if (from == to)
25782 25784 return false;
25783 25785
25784 25786 /* x87 registers can't do subreg at all, as all values are reformatted
25785 25787 to extended precision. */
25786 25788 if (MAYBE_FLOAT_CLASS_P (regclass))
25787 25789 return true;
25788 25790
25789 25791 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
25790 25792 {
25791 25793 /* Vector registers do not support QI or HImode loads. If we don't
25792 25794 disallow a change to these modes, reload will assume it's ok to
25793 25795 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
25794 25796 the vec_dupv4hi pattern. */
25795 25797 if (GET_MODE_SIZE (from) < 4)
25796 25798 return true;
25797 25799
25798 25800 /* Vector registers do not support subreg with nonzero offsets, which
25799 25801 are otherwise valid for integer registers. Since we can't see
25800 25802 whether we have a nonzero offset from here, prohibit all
25801 25803 nonparadoxical subregs changing size. */
25802 25804 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
25803 25805 return true;
25804 25806 }
25805 25807
25806 25808 return false;
25807 25809 }
25808 25810
25809 25811 /* Return the cost of moving data of mode M between a
25810 25812 register and memory. A value of 2 is the default; this cost is
25811 25813 relative to those in `REGISTER_MOVE_COST'.
25812 25814
25813 25815 This function is used extensively by register_move_cost that is used to
25814 25816 build tables at startup. Make it inline in this case.
25815 25817 When IN is 2, return maximum of in and out move cost.
25816 25818
25817 25819 If moving between registers and memory is more expensive than
25818 25820 between two registers, you should define this macro to express the
25819 25821 relative cost.
25820 25822
25821 25823 Model also increased moving costs of QImode registers in non
25822 25824 Q_REGS classes.
25823 25825 */
25824 25826 static inline int
25825 25827 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
25826 25828 int in)
25827 25829 {
25828 25830 int cost;
25829 25831 if (FLOAT_CLASS_P (regclass))
25830 25832 {
25831 25833 int index;
25832 25834 switch (mode)
25833 25835 {
25834 25836 case SFmode:
25835 25837 index = 0;
25836 25838 break;
25837 25839 case DFmode:
25838 25840 index = 1;
25839 25841 break;
25840 25842 case XFmode:
25841 25843 index = 2;
25842 25844 break;
25843 25845 default:
25844 25846 return 100;
25845 25847 }
25846 25848 if (in == 2)
25847 25849 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
25848 25850 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
25849 25851 }
25850 25852 if (SSE_CLASS_P (regclass))
25851 25853 {
25852 25854 int index;
25853 25855 switch (GET_MODE_SIZE (mode))
25854 25856 {
25855 25857 case 4:
25856 25858 index = 0;
25857 25859 break;
25858 25860 case 8:
25859 25861 index = 1;
25860 25862 break;
25861 25863 case 16:
25862 25864 index = 2;
25863 25865 break;
25864 25866 default:
25865 25867 return 100;
25866 25868 }
25867 25869 if (in == 2)
25868 25870 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
25869 25871 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
25870 25872 }
25871 25873 if (MMX_CLASS_P (regclass))
25872 25874 {
25873 25875 int index;
25874 25876 switch (GET_MODE_SIZE (mode))
25875 25877 {
25876 25878 case 4:
25877 25879 index = 0;
25878 25880 break;
25879 25881 case 8:
25880 25882 index = 1;
25881 25883 break;
25882 25884 default:
25883 25885 return 100;
25884 25886 }
25885 25887 if (in)
25886 25888 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
25887 25889 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
25888 25890 }
25889 25891 switch (GET_MODE_SIZE (mode))
25890 25892 {
25891 25893 case 1:
25892 25894 if (Q_CLASS_P (regclass) || TARGET_64BIT)
25893 25895 {
25894 25896 if (!in)
25895 25897 return ix86_cost->int_store[0];
25896 25898 if (TARGET_PARTIAL_REG_DEPENDENCY
25897 25899 && optimize_function_for_speed_p (cfun))
25898 25900 cost = ix86_cost->movzbl_load;
25899 25901 else
25900 25902 cost = ix86_cost->int_load[0];
25901 25903 if (in == 2)
25902 25904 return MAX (cost, ix86_cost->int_store[0]);
25903 25905 return cost;
25904 25906 }
25905 25907 else
25906 25908 {
25907 25909 if (in == 2)
25908 25910 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
25909 25911 if (in)
25910 25912 return ix86_cost->movzbl_load;
25911 25913 else
25912 25914 return ix86_cost->int_store[0] + 4;
25913 25915 }
25914 25916 break;
25915 25917 case 2:
25916 25918 if (in == 2)
25917 25919 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
25918 25920 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
25919 25921 default:
25920 25922 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
25921 25923 if (mode == TFmode)
25922 25924 mode = XFmode;
25923 25925 if (in == 2)
25924 25926 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
25925 25927 else if (in)
25926 25928 cost = ix86_cost->int_load[2];
25927 25929 else
25928 25930 cost = ix86_cost->int_store[2];
25929 25931 return (cost * (((int) GET_MODE_SIZE (mode)
25930 25932 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
25931 25933 }
25932 25934 }
25933 25935
25934 25936 int
25935 25937 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
25936 25938 {
25937 25939 return inline_memory_move_cost (mode, regclass, in);
25938 25940 }
25939 25941
25940 25942
25941 25943 /* Return the cost of moving data from a register in class CLASS1 to
25942 25944 one in class CLASS2.
25943 25945
25944 25946 It is not required that the cost always equal 2 when FROM is the same as TO;
25945 25947 on some machines it is expensive to move between registers if they are not
25946 25948 general registers. */
25947 25949
25948 25950 int
25949 25951 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
25950 25952 enum reg_class class2)
25951 25953 {
25952 25954 /* In case we require secondary memory, compute cost of the store followed
25953 25955 by load. In order to avoid bad register allocation choices, we need
25954 25956 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
25955 25957
25956 25958 if (inline_secondary_memory_needed (class1, class2, mode, 0))
25957 25959 {
25958 25960 int cost = 1;
25959 25961
25960 25962 cost += inline_memory_move_cost (mode, class1, 2);
25961 25963 cost += inline_memory_move_cost (mode, class2, 2);
25962 25964
25963 25965 /* In case of copying from general_purpose_register we may emit multiple
25964 25966 stores followed by single load causing memory size mismatch stall.
25965 25967 Count this as arbitrarily high cost of 20. */
25966 25968 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
25967 25969 cost += 20;
25968 25970
25969 25971 /* In the case of FP/MMX moves, the registers actually overlap, and we
25970 25972 have to switch modes in order to treat them differently. */
25971 25973 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
25972 25974 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
25973 25975 cost += 20;
25974 25976
25975 25977 return cost;
25976 25978 }
25977 25979
25978 25980 /* Moves between SSE/MMX and integer unit are expensive. */
25979 25981 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
25980 25982 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
25981 25983
25982 25984 /* ??? By keeping returned value relatively high, we limit the number
25983 25985 of moves between integer and MMX/SSE registers for all targets.
25984 25986 Additionally, high value prevents problem with x86_modes_tieable_p(),
25985 25987 where integer modes in MMX/SSE registers are not tieable
25986 25988 because of missing QImode and HImode moves to, from or between
25987 25989 MMX/SSE registers. */
25988 25990 return MAX (8, ix86_cost->mmxsse_to_integer);
25989 25991
25990 25992 if (MAYBE_FLOAT_CLASS_P (class1))
25991 25993 return ix86_cost->fp_move;
25992 25994 if (MAYBE_SSE_CLASS_P (class1))
25993 25995 return ix86_cost->sse_move;
25994 25996 if (MAYBE_MMX_CLASS_P (class1))
25995 25997 return ix86_cost->mmx_move;
25996 25998 return 2;
25997 25999 }
25998 26000
25999 26001 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
26000 26002
26001 26003 bool
26002 26004 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
26003 26005 {
26004 26006 /* Flags and only flags can only hold CCmode values. */
26005 26007 if (CC_REGNO_P (regno))
26006 26008 return GET_MODE_CLASS (mode) == MODE_CC;
26007 26009 if (GET_MODE_CLASS (mode) == MODE_CC
26008 26010 || GET_MODE_CLASS (mode) == MODE_RANDOM
26009 26011 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
26010 26012 return 0;
26011 26013 if (FP_REGNO_P (regno))
26012 26014 return VALID_FP_MODE_P (mode);
26013 26015 if (SSE_REGNO_P (regno))
26014 26016 {
26015 26017 /* We implement the move patterns for all vector modes into and
26016 26018 out of SSE registers, even when no operation instructions
26017 26019 are available. OImode move is available only when AVX is
26018 26020 enabled. */
26019 26021 return ((TARGET_AVX && mode == OImode)
26020 26022 || VALID_AVX256_REG_MODE (mode)
26021 26023 || VALID_SSE_REG_MODE (mode)
26022 26024 || VALID_SSE2_REG_MODE (mode)
26023 26025 || VALID_MMX_REG_MODE (mode)
26024 26026 || VALID_MMX_REG_MODE_3DNOW (mode));
26025 26027 }
26026 26028 if (MMX_REGNO_P (regno))
26027 26029 {
26028 26030 /* We implement the move patterns for 3DNOW modes even in MMX mode,
26029 26031 so if the register is available at all, then we can move data of
26030 26032 the given mode into or out of it. */
26031 26033 return (VALID_MMX_REG_MODE (mode)
26032 26034 || VALID_MMX_REG_MODE_3DNOW (mode));
26033 26035 }
26034 26036
26035 26037 if (mode == QImode)
26036 26038 {
26037 26039 /* Take care for QImode values - they can be in non-QI regs,
26038 26040 but then they do cause partial register stalls. */
26039 26041 if (regno <= BX_REG || TARGET_64BIT)
26040 26042 return 1;
26041 26043 if (!TARGET_PARTIAL_REG_STALL)
26042 26044 return 1;
26043 26045 return reload_in_progress || reload_completed;
26044 26046 }
26045 26047 /* We handle both integer and floats in the general purpose registers. */
26046 26048 else if (VALID_INT_MODE_P (mode))
26047 26049 return 1;
26048 26050 else if (VALID_FP_MODE_P (mode))
26049 26051 return 1;
26050 26052 else if (VALID_DFP_MODE_P (mode))
26051 26053 return 1;
26052 26054 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
26053 26055 on to use that value in smaller contexts, this can easily force a
26054 26056 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
26055 26057 supporting DImode, allow it. */
26056 26058 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
26057 26059 return 1;
26058 26060
26059 26061 return 0;
26060 26062 }
26061 26063
26062 26064 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
26063 26065 tieable integer mode. */
26064 26066
26065 26067 static bool
26066 26068 ix86_tieable_integer_mode_p (enum machine_mode mode)
26067 26069 {
26068 26070 switch (mode)
26069 26071 {
26070 26072 case HImode:
26071 26073 case SImode:
26072 26074 return true;
26073 26075
26074 26076 case QImode:
26075 26077 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
26076 26078
26077 26079 case DImode:
26078 26080 return TARGET_64BIT;
26079 26081
26080 26082 default:
26081 26083 return false;
26082 26084 }
26083 26085 }
26084 26086
26085 26087 /* Return true if MODE1 is accessible in a register that can hold MODE2
26086 26088 without copying. That is, all register classes that can hold MODE2
26087 26089 can also hold MODE1. */
26088 26090
26089 26091 bool
26090 26092 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
26091 26093 {
26092 26094 if (mode1 == mode2)
26093 26095 return true;
26094 26096
26095 26097 if (ix86_tieable_integer_mode_p (mode1)
26096 26098 && ix86_tieable_integer_mode_p (mode2))
26097 26099 return true;
26098 26100
26099 26101 /* MODE2 being XFmode implies fp stack or general regs, which means we
26100 26102 can tie any smaller floating point modes to it. Note that we do not
26101 26103 tie this with TFmode. */
26102 26104 if (mode2 == XFmode)
26103 26105 return mode1 == SFmode || mode1 == DFmode;
26104 26106
26105 26107 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
26106 26108 that we can tie it with SFmode. */
26107 26109 if (mode2 == DFmode)
26108 26110 return mode1 == SFmode;
26109 26111
26110 26112 /* If MODE2 is only appropriate for an SSE register, then tie with
26111 26113 any other mode acceptable to SSE registers. */
26112 26114 if (GET_MODE_SIZE (mode2) == 16
26113 26115 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
26114 26116 return (GET_MODE_SIZE (mode1) == 16
26115 26117 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
26116 26118
26117 26119 /* If MODE2 is appropriate for an MMX register, then tie
26118 26120 with any other mode acceptable to MMX registers. */
26119 26121 if (GET_MODE_SIZE (mode2) == 8
26120 26122 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
26121 26123 return (GET_MODE_SIZE (mode1) == 8
26122 26124 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
26123 26125
26124 26126 return false;
26125 26127 }
26126 26128
26127 26129 /* Compute a (partial) cost for rtx X. Return true if the complete
26128 26130 cost has been computed, and false if subexpressions should be
26129 26131 scanned. In either case, *TOTAL contains the cost result. */
26130 26132
26131 26133 static bool
26132 26134 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed)
26133 26135 {
26134 26136 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
26135 26137 enum machine_mode mode = GET_MODE (x);
26136 26138 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost;
26137 26139
26138 26140 switch (code)
26139 26141 {
26140 26142 case CONST_INT:
26141 26143 case CONST:
26142 26144 case LABEL_REF:
26143 26145 case SYMBOL_REF:
26144 26146 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
26145 26147 *total = 3;
26146 26148 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
26147 26149 *total = 2;
26148 26150 else if (flag_pic && SYMBOLIC_CONST (x)
26149 26151 && (!TARGET_64BIT
26150 26152 || (!GET_CODE (x) != LABEL_REF
26151 26153 && (GET_CODE (x) != SYMBOL_REF
26152 26154 || !SYMBOL_REF_LOCAL_P (x)))))
26153 26155 *total = 1;
26154 26156 else
26155 26157 *total = 0;
26156 26158 return true;
26157 26159
26158 26160 case CONST_DOUBLE:
26159 26161 if (mode == VOIDmode)
26160 26162 *total = 0;
26161 26163 else
26162 26164 switch (standard_80387_constant_p (x))
26163 26165 {
26164 26166 case 1: /* 0.0 */
26165 26167 *total = 1;
26166 26168 break;
26167 26169 default: /* Other constants */
26168 26170 *total = 2;
26169 26171 break;
26170 26172 case 0:
26171 26173 case -1:
26172 26174 /* Start with (MEM (SYMBOL_REF)), since that's where
26173 26175 it'll probably end up. Add a penalty for size. */
26174 26176 *total = (COSTS_N_INSNS (1)
26175 26177 + (flag_pic != 0 && !TARGET_64BIT)
26176 26178 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
26177 26179 break;
26178 26180 }
26179 26181 return true;
26180 26182
26181 26183 case ZERO_EXTEND:
26182 26184 /* The zero extensions is often completely free on x86_64, so make
26183 26185 it as cheap as possible. */
26184 26186 if (TARGET_64BIT && mode == DImode
26185 26187 && GET_MODE (XEXP (x, 0)) == SImode)
26186 26188 *total = 1;
26187 26189 else if (TARGET_ZERO_EXTEND_WITH_AND)
26188 26190 *total = cost->add;
26189 26191 else
26190 26192 *total = cost->movzx;
26191 26193 return false;
26192 26194
26193 26195 case SIGN_EXTEND:
26194 26196 *total = cost->movsx;
26195 26197 return false;
26196 26198
26197 26199 case ASHIFT:
26198 26200 if (CONST_INT_P (XEXP (x, 1))
26199 26201 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
26200 26202 {
26201 26203 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26202 26204 if (value == 1)
26203 26205 {
26204 26206 *total = cost->add;
26205 26207 return false;
26206 26208 }
26207 26209 if ((value == 2 || value == 3)
26208 26210 && cost->lea <= cost->shift_const)
26209 26211 {
26210 26212 *total = cost->lea;
26211 26213 return false;
26212 26214 }
26213 26215 }
26214 26216 /* FALLTHRU */
26215 26217
26216 26218 case ROTATE:
26217 26219 case ASHIFTRT:
26218 26220 case LSHIFTRT:
26219 26221 case ROTATERT:
26220 26222 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
26221 26223 {
26222 26224 if (CONST_INT_P (XEXP (x, 1)))
26223 26225 {
26224 26226 if (INTVAL (XEXP (x, 1)) > 32)
26225 26227 *total = cost->shift_const + COSTS_N_INSNS (2);
26226 26228 else
26227 26229 *total = cost->shift_const * 2;
26228 26230 }
26229 26231 else
26230 26232 {
26231 26233 if (GET_CODE (XEXP (x, 1)) == AND)
26232 26234 *total = cost->shift_var * 2;
26233 26235 else
26234 26236 *total = cost->shift_var * 6 + COSTS_N_INSNS (2);
26235 26237 }
26236 26238 }
26237 26239 else
26238 26240 {
26239 26241 if (CONST_INT_P (XEXP (x, 1)))
26240 26242 *total = cost->shift_const;
26241 26243 else
26242 26244 *total = cost->shift_var;
26243 26245 }
26244 26246 return false;
26245 26247
26246 26248 case MULT:
26247 26249 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26248 26250 {
26249 26251 /* ??? SSE scalar cost should be used here. */
26250 26252 *total = cost->fmul;
26251 26253 return false;
26252 26254 }
26253 26255 else if (X87_FLOAT_MODE_P (mode))
26254 26256 {
26255 26257 *total = cost->fmul;
26256 26258 return false;
26257 26259 }
26258 26260 else if (FLOAT_MODE_P (mode))
26259 26261 {
26260 26262 /* ??? SSE vector cost should be used here. */
26261 26263 *total = cost->fmul;
26262 26264 return false;
26263 26265 }
26264 26266 else
26265 26267 {
26266 26268 rtx op0 = XEXP (x, 0);
26267 26269 rtx op1 = XEXP (x, 1);
26268 26270 int nbits;
26269 26271 if (CONST_INT_P (XEXP (x, 1)))
26270 26272 {
26271 26273 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
26272 26274 for (nbits = 0; value != 0; value &= value - 1)
26273 26275 nbits++;
26274 26276 }
26275 26277 else
26276 26278 /* This is arbitrary. */
26277 26279 nbits = 7;
26278 26280
26279 26281 /* Compute costs correctly for widening multiplication. */
26280 26282 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
26281 26283 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
26282 26284 == GET_MODE_SIZE (mode))
26283 26285 {
26284 26286 int is_mulwiden = 0;
26285 26287 enum machine_mode inner_mode = GET_MODE (op0);
26286 26288
26287 26289 if (GET_CODE (op0) == GET_CODE (op1))
26288 26290 is_mulwiden = 1, op1 = XEXP (op1, 0);
26289 26291 else if (CONST_INT_P (op1))
26290 26292 {
26291 26293 if (GET_CODE (op0) == SIGN_EXTEND)
26292 26294 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
26293 26295 == INTVAL (op1);
26294 26296 else
26295 26297 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
26296 26298 }
26297 26299
26298 26300 if (is_mulwiden)
26299 26301 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
26300 26302 }
26301 26303
26302 26304 *total = (cost->mult_init[MODE_INDEX (mode)]
26303 26305 + nbits * cost->mult_bit
26304 26306 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed));
26305 26307
26306 26308 return true;
26307 26309 }
26308 26310
26309 26311 case DIV:
26310 26312 case UDIV:
26311 26313 case MOD:
26312 26314 case UMOD:
26313 26315 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26314 26316 /* ??? SSE cost should be used here. */
26315 26317 *total = cost->fdiv;
26316 26318 else if (X87_FLOAT_MODE_P (mode))
26317 26319 *total = cost->fdiv;
26318 26320 else if (FLOAT_MODE_P (mode))
26319 26321 /* ??? SSE vector cost should be used here. */
26320 26322 *total = cost->fdiv;
26321 26323 else
26322 26324 *total = cost->divide[MODE_INDEX (mode)];
26323 26325 return false;
26324 26326
26325 26327 case PLUS:
26326 26328 if (GET_MODE_CLASS (mode) == MODE_INT
26327 26329 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
26328 26330 {
26329 26331 if (GET_CODE (XEXP (x, 0)) == PLUS
26330 26332 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
26331 26333 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
26332 26334 && CONSTANT_P (XEXP (x, 1)))
26333 26335 {
26334 26336 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
26335 26337 if (val == 2 || val == 4 || val == 8)
26336 26338 {
26337 26339 *total = cost->lea;
26338 26340 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26339 26341 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
26340 26342 outer_code, speed);
26341 26343 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26342 26344 return true;
26343 26345 }
26344 26346 }
26345 26347 else if (GET_CODE (XEXP (x, 0)) == MULT
26346 26348 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
26347 26349 {
26348 26350 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
26349 26351 if (val == 2 || val == 4 || val == 8)
26350 26352 {
26351 26353 *total = cost->lea;
26352 26354 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26353 26355 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26354 26356 return true;
26355 26357 }
26356 26358 }
26357 26359 else if (GET_CODE (XEXP (x, 0)) == PLUS)
26358 26360 {
26359 26361 *total = cost->lea;
26360 26362 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed);
26361 26363 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed);
26362 26364 *total += rtx_cost (XEXP (x, 1), outer_code, speed);
26363 26365 return true;
26364 26366 }
26365 26367 }
26366 26368 /* FALLTHRU */
26367 26369
26368 26370 case MINUS:
26369 26371 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26370 26372 {
26371 26373 /* ??? SSE cost should be used here. */
26372 26374 *total = cost->fadd;
26373 26375 return false;
26374 26376 }
26375 26377 else if (X87_FLOAT_MODE_P (mode))
26376 26378 {
26377 26379 *total = cost->fadd;
26378 26380 return false;
26379 26381 }
26380 26382 else if (FLOAT_MODE_P (mode))
26381 26383 {
26382 26384 /* ??? SSE vector cost should be used here. */
26383 26385 *total = cost->fadd;
26384 26386 return false;
26385 26387 }
26386 26388 /* FALLTHRU */
26387 26389
26388 26390 case AND:
26389 26391 case IOR:
26390 26392 case XOR:
26391 26393 if (!TARGET_64BIT && mode == DImode)
26392 26394 {
26393 26395 *total = (cost->add * 2
26394 26396 + (rtx_cost (XEXP (x, 0), outer_code, speed)
26395 26397 << (GET_MODE (XEXP (x, 0)) != DImode))
26396 26398 + (rtx_cost (XEXP (x, 1), outer_code, speed)
26397 26399 << (GET_MODE (XEXP (x, 1)) != DImode)));
26398 26400 return true;
26399 26401 }
26400 26402 /* FALLTHRU */
26401 26403
26402 26404 case NEG:
26403 26405 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26404 26406 {
26405 26407 /* ??? SSE cost should be used here. */
26406 26408 *total = cost->fchs;
26407 26409 return false;
26408 26410 }
26409 26411 else if (X87_FLOAT_MODE_P (mode))
26410 26412 {
26411 26413 *total = cost->fchs;
26412 26414 return false;
26413 26415 }
26414 26416 else if (FLOAT_MODE_P (mode))
26415 26417 {
26416 26418 /* ??? SSE vector cost should be used here. */
26417 26419 *total = cost->fchs;
26418 26420 return false;
26419 26421 }
26420 26422 /* FALLTHRU */
26421 26423
26422 26424 case NOT:
26423 26425 if (!TARGET_64BIT && mode == DImode)
26424 26426 *total = cost->add * 2;
26425 26427 else
26426 26428 *total = cost->add;
26427 26429 return false;
26428 26430
26429 26431 case COMPARE:
26430 26432 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
26431 26433 && XEXP (XEXP (x, 0), 1) == const1_rtx
26432 26434 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
26433 26435 && XEXP (x, 1) == const0_rtx)
26434 26436 {
26435 26437 /* This kind of construct is implemented using test[bwl].
26436 26438 Treat it as if we had an AND. */
26437 26439 *total = (cost->add
26438 26440 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed)
26439 26441 + rtx_cost (const1_rtx, outer_code, speed));
26440 26442 return true;
26441 26443 }
26442 26444 return false;
26443 26445
26444 26446 case FLOAT_EXTEND:
26445 26447 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
26446 26448 *total = 0;
26447 26449 return false;
26448 26450
26449 26451 case ABS:
26450 26452 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26451 26453 /* ??? SSE cost should be used here. */
26452 26454 *total = cost->fabs;
26453 26455 else if (X87_FLOAT_MODE_P (mode))
26454 26456 *total = cost->fabs;
26455 26457 else if (FLOAT_MODE_P (mode))
26456 26458 /* ??? SSE vector cost should be used here. */
26457 26459 *total = cost->fabs;
26458 26460 return false;
26459 26461
26460 26462 case SQRT:
26461 26463 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
26462 26464 /* ??? SSE cost should be used here. */
26463 26465 *total = cost->fsqrt;
26464 26466 else if (X87_FLOAT_MODE_P (mode))
26465 26467 *total = cost->fsqrt;
26466 26468 else if (FLOAT_MODE_P (mode))
26467 26469 /* ??? SSE vector cost should be used here. */
26468 26470 *total = cost->fsqrt;
26469 26471 return false;
26470 26472
26471 26473 case UNSPEC:
26472 26474 if (XINT (x, 1) == UNSPEC_TP)
26473 26475 *total = 0;
26474 26476 return false;
26475 26477
26476 26478 default:
26477 26479 return false;
26478 26480 }
26479 26481 }
26480 26482
26481 26483 #if TARGET_MACHO
26482 26484
26483 26485 static int current_machopic_label_num;
26484 26486
26485 26487 /* Given a symbol name and its associated stub, write out the
26486 26488 definition of the stub. */
26487 26489
26488 26490 void
26489 26491 machopic_output_stub (FILE *file, const char *symb, const char *stub)
26490 26492 {
26491 26493 unsigned int length;
26492 26494 char *binder_name, *symbol_name, lazy_ptr_name[32];
26493 26495 int label = ++current_machopic_label_num;
26494 26496
26495 26497 /* For 64-bit we shouldn't get here. */
26496 26498 gcc_assert (!TARGET_64BIT);
26497 26499
26498 26500 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
26499 26501 symb = (*targetm.strip_name_encoding) (symb);
26500 26502
26501 26503 length = strlen (stub);
26502 26504 binder_name = XALLOCAVEC (char, length + 32);
26503 26505 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
26504 26506
26505 26507 length = strlen (symb);
26506 26508 symbol_name = XALLOCAVEC (char, length + 32);
26507 26509 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
26508 26510
26509 26511 sprintf (lazy_ptr_name, "L%d$lz", label);
26510 26512
26511 26513 if (MACHOPIC_PURE)
26512 26514 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
26513 26515 else
26514 26516 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
26515 26517
26516 26518 fprintf (file, "%s:\n", stub);
26517 26519 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26518 26520
26519 26521 if (MACHOPIC_PURE)
26520 26522 {
26521 26523 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
26522 26524 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
26523 26525 fprintf (file, "\tjmp\t*%%edx\n");
26524 26526 }
26525 26527 else
26526 26528 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
26527 26529
26528 26530 fprintf (file, "%s:\n", binder_name);
26529 26531
26530 26532 if (MACHOPIC_PURE)
26531 26533 {
26532 26534 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
26533 26535 fprintf (file, "\tpushl\t%%eax\n");
26534 26536 }
26535 26537 else
26536 26538 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
26537 26539
26538 26540 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
26539 26541
26540 26542 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
26541 26543 fprintf (file, "%s:\n", lazy_ptr_name);
26542 26544 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
26543 26545 fprintf (file, "\t.long %s\n", binder_name);
26544 26546 }
26545 26547
26546 26548 void
26547 26549 darwin_x86_file_end (void)
26548 26550 {
26549 26551 darwin_file_end ();
26550 26552 ix86_file_end ();
26551 26553 }
26552 26554 #endif /* TARGET_MACHO */
26553 26555
26554 26556 /* Order the registers for register allocator. */
26555 26557
26556 26558 void
26557 26559 x86_order_regs_for_local_alloc (void)
26558 26560 {
26559 26561 int pos = 0;
26560 26562 int i;
26561 26563
26562 26564 /* First allocate the local general purpose registers. */
26563 26565 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26564 26566 if (GENERAL_REGNO_P (i) && call_used_regs[i])
26565 26567 reg_alloc_order [pos++] = i;
26566 26568
26567 26569 /* Global general purpose registers. */
26568 26570 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
26569 26571 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
26570 26572 reg_alloc_order [pos++] = i;
26571 26573
26572 26574 /* x87 registers come first in case we are doing FP math
26573 26575 using them. */
26574 26576 if (!TARGET_SSE_MATH)
26575 26577 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26576 26578 reg_alloc_order [pos++] = i;
26577 26579
26578 26580 /* SSE registers. */
26579 26581 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
26580 26582 reg_alloc_order [pos++] = i;
26581 26583 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
26582 26584 reg_alloc_order [pos++] = i;
26583 26585
26584 26586 /* x87 registers. */
26585 26587 if (TARGET_SSE_MATH)
26586 26588 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
26587 26589 reg_alloc_order [pos++] = i;
26588 26590
26589 26591 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
26590 26592 reg_alloc_order [pos++] = i;
26591 26593
26592 26594 /* Initialize the rest of array as we do not allocate some registers
26593 26595 at all. */
26594 26596 while (pos < FIRST_PSEUDO_REGISTER)
26595 26597 reg_alloc_order [pos++] = 0;
26596 26598 }
26597 26599
26598 26600 /* Handle a "ms_abi" or "sysv" attribute; arguments as in
26599 26601 struct attribute_spec.handler. */
26600 26602 static tree
26601 26603 ix86_handle_abi_attribute (tree *node, tree name,
26602 26604 tree args ATTRIBUTE_UNUSED,
26603 26605 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26604 26606 {
26605 26607 if (TREE_CODE (*node) != FUNCTION_TYPE
26606 26608 && TREE_CODE (*node) != METHOD_TYPE
26607 26609 && TREE_CODE (*node) != FIELD_DECL
26608 26610 && TREE_CODE (*node) != TYPE_DECL)
26609 26611 {
26610 26612 warning (OPT_Wattributes, "%qs attribute only applies to functions",
26611 26613 IDENTIFIER_POINTER (name));
26612 26614 *no_add_attrs = true;
26613 26615 return NULL_TREE;
26614 26616 }
26615 26617 if (!TARGET_64BIT)
26616 26618 {
26617 26619 warning (OPT_Wattributes, "%qs attribute only available for 64-bit",
26618 26620 IDENTIFIER_POINTER (name));
26619 26621 *no_add_attrs = true;
26620 26622 return NULL_TREE;
26621 26623 }
26622 26624
26623 26625 /* Can combine regparm with all attributes but fastcall. */
26624 26626 if (is_attribute_p ("ms_abi", name))
26625 26627 {
26626 26628 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node)))
26627 26629 {
26628 26630 error ("ms_abi and sysv_abi attributes are not compatible");
26629 26631 }
26630 26632
26631 26633 return NULL_TREE;
26632 26634 }
26633 26635 else if (is_attribute_p ("sysv_abi", name))
26634 26636 {
26635 26637 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node)))
26636 26638 {
26637 26639 error ("ms_abi and sysv_abi attributes are not compatible");
26638 26640 }
26639 26641
26640 26642 return NULL_TREE;
26641 26643 }
26642 26644
26643 26645 return NULL_TREE;
26644 26646 }
26645 26647
26646 26648 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
26647 26649 struct attribute_spec.handler. */
26648 26650 static tree
26649 26651 ix86_handle_struct_attribute (tree *node, tree name,
26650 26652 tree args ATTRIBUTE_UNUSED,
26651 26653 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
26652 26654 {
26653 26655 tree *type = NULL;
26654 26656 if (DECL_P (*node))
26655 26657 {
26656 26658 if (TREE_CODE (*node) == TYPE_DECL)
26657 26659 type = &TREE_TYPE (*node);
26658 26660 }
26659 26661 else
26660 26662 type = node;
26661 26663
26662 26664 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
26663 26665 || TREE_CODE (*type) == UNION_TYPE)))
26664 26666 {
26665 26667 warning (OPT_Wattributes, "%qs attribute ignored",
26666 26668 IDENTIFIER_POINTER (name));
26667 26669 *no_add_attrs = true;
26668 26670 }
26669 26671
26670 26672 else if ((is_attribute_p ("ms_struct", name)
26671 26673 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
26672 26674 || ((is_attribute_p ("gcc_struct", name)
26673 26675 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
26674 26676 {
26675 26677 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
26676 26678 IDENTIFIER_POINTER (name));
26677 26679 *no_add_attrs = true;
26678 26680 }
26679 26681
26680 26682 return NULL_TREE;
26681 26683 }
26682 26684
26683 26685 static bool
26684 26686 ix86_ms_bitfield_layout_p (const_tree record_type)
26685 26687 {
26686 26688 return (TARGET_MS_BITFIELD_LAYOUT &&
26687 26689 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
26688 26690 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
26689 26691 }
26690 26692
26691 26693 /* Returns an expression indicating where the this parameter is
26692 26694 located on entry to the FUNCTION. */
26693 26695
26694 26696 static rtx
26695 26697 x86_this_parameter (tree function)
26696 26698 {
26697 26699 tree type = TREE_TYPE (function);
26698 26700 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
26699 26701 int nregs;
26700 26702
26701 26703 if (TARGET_64BIT)
26702 26704 {
26703 26705 const int *parm_regs;
26704 26706
26705 26707 if (ix86_function_type_abi (type) == MS_ABI)
26706 26708 parm_regs = x86_64_ms_abi_int_parameter_registers;
26707 26709 else
26708 26710 parm_regs = x86_64_int_parameter_registers;
26709 26711 return gen_rtx_REG (DImode, parm_regs[aggr]);
26710 26712 }
26711 26713
26712 26714 nregs = ix86_function_regparm (type, function);
26713 26715
26714 26716 if (nregs > 0 && !stdarg_p (type))
26715 26717 {
26716 26718 int regno;
26717 26719
26718 26720 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
26719 26721 regno = aggr ? DX_REG : CX_REG;
26720 26722 else
26721 26723 {
26722 26724 regno = AX_REG;
26723 26725 if (aggr)
26724 26726 {
26725 26727 regno = DX_REG;
26726 26728 if (nregs == 1)
26727 26729 return gen_rtx_MEM (SImode,
26728 26730 plus_constant (stack_pointer_rtx, 4));
26729 26731 }
26730 26732 }
26731 26733 return gen_rtx_REG (SImode, regno);
26732 26734 }
26733 26735
26734 26736 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
26735 26737 }
26736 26738
26737 26739 /* Determine whether x86_output_mi_thunk can succeed. */
26738 26740
26739 26741 static bool
26740 26742 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
26741 26743 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
26742 26744 HOST_WIDE_INT vcall_offset, const_tree function)
26743 26745 {
26744 26746 /* 64-bit can handle anything. */
26745 26747 if (TARGET_64BIT)
26746 26748 return true;
26747 26749
26748 26750 /* For 32-bit, everything's fine if we have one free register. */
26749 26751 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
26750 26752 return true;
26751 26753
26752 26754 /* Need a free register for vcall_offset. */
26753 26755 if (vcall_offset)
26754 26756 return false;
26755 26757
26756 26758 /* Need a free register for GOT references. */
26757 26759 if (flag_pic && !(*targetm.binds_local_p) (function))
26758 26760 return false;
26759 26761
26760 26762 /* Otherwise ok. */
26761 26763 return true;
26762 26764 }
26763 26765
26764 26766 /* Output the assembler code for a thunk function. THUNK_DECL is the
26765 26767 declaration for the thunk function itself, FUNCTION is the decl for
26766 26768 the target function. DELTA is an immediate constant offset to be
26767 26769 added to THIS. If VCALL_OFFSET is nonzero, the word at
26768 26770 *(*this + vcall_offset) should be added to THIS. */
26769 26771
26770 26772 static void
26771 26773 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
26772 26774 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
26773 26775 HOST_WIDE_INT vcall_offset, tree function)
26774 26776 {
26775 26777 rtx xops[3];
26776 26778 rtx this_param = x86_this_parameter (function);
26777 26779 rtx this_reg, tmp;
26778 26780
26779 26781 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
26780 26782 pull it in now and let DELTA benefit. */
26781 26783 if (REG_P (this_param))
26782 26784 this_reg = this_param;
26783 26785 else if (vcall_offset)
26784 26786 {
26785 26787 /* Put the this parameter into %eax. */
26786 26788 xops[0] = this_param;
26787 26789 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG);
26788 26790 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26789 26791 }
26790 26792 else
26791 26793 this_reg = NULL_RTX;
26792 26794
26793 26795 /* Adjust the this parameter by a fixed constant. */
26794 26796 if (delta)
26795 26797 {
26796 26798 xops[0] = GEN_INT (delta);
26797 26799 xops[1] = this_reg ? this_reg : this_param;
26798 26800 if (TARGET_64BIT)
26799 26801 {
26800 26802 if (!x86_64_general_operand (xops[0], DImode))
26801 26803 {
26802 26804 tmp = gen_rtx_REG (DImode, R10_REG);
26803 26805 xops[1] = tmp;
26804 26806 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
26805 26807 xops[0] = tmp;
26806 26808 xops[1] = this_param;
26807 26809 }
26808 26810 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
26809 26811 }
26810 26812 else
26811 26813 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
26812 26814 }
26813 26815
26814 26816 /* Adjust the this parameter by a value stored in the vtable. */
26815 26817 if (vcall_offset)
26816 26818 {
26817 26819 if (TARGET_64BIT)
26818 26820 tmp = gen_rtx_REG (DImode, R10_REG);
26819 26821 else
26820 26822 {
26821 26823 int tmp_regno = CX_REG;
26822 26824 if (lookup_attribute ("fastcall",
26823 26825 TYPE_ATTRIBUTES (TREE_TYPE (function))))
26824 26826 tmp_regno = AX_REG;
26825 26827 tmp = gen_rtx_REG (SImode, tmp_regno);
26826 26828 }
26827 26829
26828 26830 xops[0] = gen_rtx_MEM (Pmode, this_reg);
26829 26831 xops[1] = tmp;
26830 26832 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26831 26833
26832 26834 /* Adjust the this parameter. */
26833 26835 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
26834 26836 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
26835 26837 {
26836 26838 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
26837 26839 xops[0] = GEN_INT (vcall_offset);
26838 26840 xops[1] = tmp2;
26839 26841 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
26840 26842 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
26841 26843 }
26842 26844 xops[1] = this_reg;
26843 26845 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops);
26844 26846 }
26845 26847
26846 26848 /* If necessary, drop THIS back to its stack slot. */
26847 26849 if (this_reg && this_reg != this_param)
26848 26850 {
26849 26851 xops[0] = this_reg;
26850 26852 xops[1] = this_param;
26851 26853 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops);
26852 26854 }
26853 26855
26854 26856 xops[0] = XEXP (DECL_RTL (function), 0);
26855 26857 if (TARGET_64BIT)
26856 26858 {
26857 26859 if (!flag_pic || (*targetm.binds_local_p) (function))
26858 26860 output_asm_insn ("jmp\t%P0", xops);
26859 26861 /* All thunks should be in the same object as their target,
26860 26862 and thus binds_local_p should be true. */
26861 26863 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI)
26862 26864 gcc_unreachable ();
26863 26865 else
26864 26866 {
26865 26867 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
26866 26868 tmp = gen_rtx_CONST (Pmode, tmp);
26867 26869 tmp = gen_rtx_MEM (QImode, tmp);
26868 26870 xops[0] = tmp;
26869 26871 output_asm_insn ("jmp\t%A0", xops);
26870 26872 }
26871 26873 }
26872 26874 else
26873 26875 {
26874 26876 if (!flag_pic || (*targetm.binds_local_p) (function))
26875 26877 output_asm_insn ("jmp\t%P0", xops);
26876 26878 else
26877 26879 #if TARGET_MACHO
26878 26880 if (TARGET_MACHO)
26879 26881 {
26880 26882 rtx sym_ref = XEXP (DECL_RTL (function), 0);
26881 26883 tmp = (gen_rtx_SYMBOL_REF
26882 26884 (Pmode,
26883 26885 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
26884 26886 tmp = gen_rtx_MEM (QImode, tmp);
26885 26887 xops[0] = tmp;
26886 26888 output_asm_insn ("jmp\t%0", xops);
26887 26889 }
26888 26890 else
26889 26891 #endif /* TARGET_MACHO */
26890 26892 {
26891 26893 tmp = gen_rtx_REG (SImode, CX_REG);
26892 26894 output_set_got (tmp, NULL_RTX);
26893 26895
26894 26896 xops[1] = tmp;
26895 26897 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
26896 26898 output_asm_insn ("jmp\t{*}%1", xops);
26897 26899 }
26898 26900 }
26899 26901 }
26900 26902
26901 26903 static void
26902 26904 x86_file_start (void)
26903 26905 {
26904 26906 default_file_start ();
26905 26907 #if TARGET_MACHO
26906 26908 darwin_file_start ();
26907 26909 #endif
26908 26910 if (X86_FILE_START_VERSION_DIRECTIVE)
26909 26911 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
26910 26912 if (X86_FILE_START_FLTUSED)
26911 26913 fputs ("\t.global\t__fltused\n", asm_out_file);
26912 26914 if (ix86_asm_dialect == ASM_INTEL)
26913 26915 fputs ("\t.intel_syntax noprefix\n", asm_out_file);
26914 26916 }
26915 26917
26916 26918 int
26917 26919 x86_field_alignment (tree field, int computed)
26918 26920 {
26919 26921 enum machine_mode mode;
26920 26922 tree type = TREE_TYPE (field);
26921 26923
26922 26924 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
26923 26925 return computed;
26924 26926 mode = TYPE_MODE (strip_array_types (type));
26925 26927 if (mode == DFmode || mode == DCmode
26926 26928 || GET_MODE_CLASS (mode) == MODE_INT
26927 26929 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
26928 26930 return MIN (32, computed);
26929 26931 return computed;
26930 26932 }
26931 26933
26932 26934 /* Output assembler code to FILE to increment profiler label # LABELNO
26933 26935 for profiling a function entry. */
26934 26936 void
26935 26937 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
26936 26938 {
26937 26939 if (TARGET_64BIT)
26938 26940 {
26939 26941 #ifndef NO_PROFILE_COUNTERS
26940 26942 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno);
26941 26943 #endif
26942 26944
26943 26945 if (DEFAULT_ABI == SYSV_ABI && flag_pic)
26944 26946 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
26945 26947 else
26946 26948 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26947 26949 }
26948 26950 else if (flag_pic)
26949 26951 {
26950 26952 #ifndef NO_PROFILE_COUNTERS
26951 26953 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
26952 26954 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
26953 26955 #endif
26954 26956 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
26955 26957 }
26956 26958 else
26957 26959 {
26958 26960 #ifndef NO_PROFILE_COUNTERS
26959 26961 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
26960 26962 PROFILE_COUNT_REGISTER);
26961 26963 #endif
26962 26964 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
26963 26965 }
26964 26966 }
26965 26967
26966 26968 /* We don't have exact information about the insn sizes, but we may assume
26967 26969 quite safely that we are informed about all 1 byte insns and memory
26968 26970 address sizes. This is enough to eliminate unnecessary padding in
26969 26971 99% of cases. */
26970 26972
26971 26973 static int
26972 26974 min_insn_size (rtx insn)
26973 26975 {
26974 26976 int l = 0;
26975 26977
26976 26978 if (!INSN_P (insn) || !active_insn_p (insn))
26977 26979 return 0;
26978 26980
26979 26981 /* Discard alignments we've emit and jump instructions. */
26980 26982 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
26981 26983 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
26982 26984 return 0;
26983 26985 if (JUMP_P (insn)
26984 26986 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
26985 26987 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
26986 26988 return 0;
26987 26989
26988 26990 /* Important case - calls are always 5 bytes.
26989 26991 It is common to have many calls in the row. */
26990 26992 if (CALL_P (insn)
26991 26993 && symbolic_reference_mentioned_p (PATTERN (insn))
26992 26994 && !SIBLING_CALL_P (insn))
26993 26995 return 5;
26994 26996 if (get_attr_length (insn) <= 1)
26995 26997 return 1;
26996 26998
26997 26999 /* For normal instructions we may rely on the sizes of addresses
26998 27000 and the presence of symbol to require 4 bytes of encoding.
26999 27001 This is not the case for jumps where references are PC relative. */
27000 27002 if (!JUMP_P (insn))
27001 27003 {
27002 27004 l = get_attr_length_address (insn);
27003 27005 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
27004 27006 l = 4;
27005 27007 }
27006 27008 if (l)
27007 27009 return 1+l;
27008 27010 else
27009 27011 return 2;
27010 27012 }
27011 27013
27012 27014 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
27013 27015 window. */
27014 27016
27015 27017 static void
27016 27018 ix86_avoid_jump_misspredicts (void)
27017 27019 {
27018 27020 rtx insn, start = get_insns ();
27019 27021 int nbytes = 0, njumps = 0;
27020 27022 int isjump = 0;
27021 27023
27022 27024 /* Look for all minimal intervals of instructions containing 4 jumps.
27023 27025 The intervals are bounded by START and INSN. NBYTES is the total
27024 27026 size of instructions in the interval including INSN and not including
27025 27027 START. When the NBYTES is smaller than 16 bytes, it is possible
27026 27028 that the end of START and INSN ends up in the same 16byte page.
27027 27029
27028 27030 The smallest offset in the page INSN can start is the case where START
27029 27031 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
27030 27032 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
27031 27033 */
27032 27034 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
27033 27035 {
27034 27036
27035 27037 nbytes += min_insn_size (insn);
27036 27038 if (dump_file)
27037 27039 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
27038 27040 INSN_UID (insn), min_insn_size (insn));
27039 27041 if ((JUMP_P (insn)
27040 27042 && GET_CODE (PATTERN (insn)) != ADDR_VEC
27041 27043 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
27042 27044 || CALL_P (insn))
27043 27045 njumps++;
27044 27046 else
27045 27047 continue;
27046 27048
27047 27049 while (njumps > 3)
27048 27050 {
27049 27051 start = NEXT_INSN (start);
27050 27052 if ((JUMP_P (start)
27051 27053 && GET_CODE (PATTERN (start)) != ADDR_VEC
27052 27054 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
27053 27055 || CALL_P (start))
27054 27056 njumps--, isjump = 1;
27055 27057 else
27056 27058 isjump = 0;
27057 27059 nbytes -= min_insn_size (start);
27058 27060 }
27059 27061 gcc_assert (njumps >= 0);
27060 27062 if (dump_file)
27061 27063 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
27062 27064 INSN_UID (start), INSN_UID (insn), nbytes);
27063 27065
27064 27066 if (njumps == 3 && isjump && nbytes < 16)
27065 27067 {
27066 27068 int padsize = 15 - nbytes + min_insn_size (insn);
27067 27069
27068 27070 if (dump_file)
27069 27071 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
27070 27072 INSN_UID (insn), padsize);
27071 27073 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
27072 27074 }
27073 27075 }
27074 27076 }
27075 27077
27076 27078 /* AMD Athlon works faster
27077 27079 when RET is not destination of conditional jump or directly preceded
27078 27080 by other jump instruction. We avoid the penalty by inserting NOP just
27079 27081 before the RET instructions in such cases. */
27080 27082 static void
27081 27083 ix86_pad_returns (void)
27082 27084 {
27083 27085 edge e;
27084 27086 edge_iterator ei;
27085 27087
27086 27088 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
27087 27089 {
27088 27090 basic_block bb = e->src;
27089 27091 rtx ret = BB_END (bb);
27090 27092 rtx prev;
27091 27093 bool replace = false;
27092 27094
27093 27095 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
27094 27096 || optimize_bb_for_size_p (bb))
27095 27097 continue;
27096 27098 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
27097 27099 if (active_insn_p (prev) || LABEL_P (prev))
27098 27100 break;
27099 27101 if (prev && LABEL_P (prev))
27100 27102 {
27101 27103 edge e;
27102 27104 edge_iterator ei;
27103 27105
27104 27106 FOR_EACH_EDGE (e, ei, bb->preds)
27105 27107 if (EDGE_FREQUENCY (e) && e->src->index >= 0
27106 27108 && !(e->flags & EDGE_FALLTHRU))
27107 27109 replace = true;
27108 27110 }
27109 27111 if (!replace)
27110 27112 {
27111 27113 prev = prev_active_insn (ret);
27112 27114 if (prev
27113 27115 && ((JUMP_P (prev) && any_condjump_p (prev))
27114 27116 || CALL_P (prev)))
27115 27117 replace = true;
27116 27118 /* Empty functions get branch mispredict even when the jump destination
27117 27119 is not visible to us. */
27118 27120 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
27119 27121 replace = true;
27120 27122 }
27121 27123 if (replace)
27122 27124 {
27123 27125 emit_insn_before (gen_return_internal_long (), ret);
27124 27126 delete_insn (ret);
27125 27127 }
27126 27128 }
27127 27129 }
27128 27130
27129 27131 /* Implement machine specific optimizations. We implement padding of returns
27130 27132 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
27131 27133 static void
27132 27134 ix86_reorg (void)
27133 27135 {
27134 27136 if (TARGET_PAD_RETURNS && optimize
27135 27137 && optimize_function_for_speed_p (cfun))
27136 27138 ix86_pad_returns ();
27137 27139 if (TARGET_FOUR_JUMP_LIMIT && optimize
27138 27140 && optimize_function_for_speed_p (cfun))
27139 27141 ix86_avoid_jump_misspredicts ();
27140 27142 }
27141 27143
27142 27144 /* Return nonzero when QImode register that must be represented via REX prefix
27143 27145 is used. */
27144 27146 bool
27145 27147 x86_extended_QIreg_mentioned_p (rtx insn)
27146 27148 {
27147 27149 int i;
27148 27150 extract_insn_cached (insn);
27149 27151 for (i = 0; i < recog_data.n_operands; i++)
27150 27152 if (REG_P (recog_data.operand[i])
27151 27153 && REGNO (recog_data.operand[i]) > BX_REG)
27152 27154 return true;
27153 27155 return false;
27154 27156 }
27155 27157
27156 27158 /* Return nonzero when P points to register encoded via REX prefix.
27157 27159 Called via for_each_rtx. */
27158 27160 static int
27159 27161 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
27160 27162 {
27161 27163 unsigned int regno;
27162 27164 if (!REG_P (*p))
27163 27165 return 0;
27164 27166 regno = REGNO (*p);
27165 27167 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
27166 27168 }
27167 27169
27168 27170 /* Return true when INSN mentions register that must be encoded using REX
27169 27171 prefix. */
27170 27172 bool
27171 27173 x86_extended_reg_mentioned_p (rtx insn)
27172 27174 {
27173 27175 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn,
27174 27176 extended_reg_mentioned_1, NULL);
27175 27177 }
27176 27178
27177 27179 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
27178 27180 optabs would emit if we didn't have TFmode patterns. */
27179 27181
27180 27182 void
27181 27183 x86_emit_floatuns (rtx operands[2])
27182 27184 {
27183 27185 rtx neglab, donelab, i0, i1, f0, in, out;
27184 27186 enum machine_mode mode, inmode;
27185 27187
27186 27188 inmode = GET_MODE (operands[1]);
27187 27189 gcc_assert (inmode == SImode || inmode == DImode);
27188 27190
27189 27191 out = operands[0];
27190 27192 in = force_reg (inmode, operands[1]);
27191 27193 mode = GET_MODE (out);
27192 27194 neglab = gen_label_rtx ();
27193 27195 donelab = gen_label_rtx ();
27194 27196 f0 = gen_reg_rtx (mode);
27195 27197
27196 27198 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
27197 27199
27198 27200 expand_float (out, in, 0);
27199 27201
27200 27202 emit_jump_insn (gen_jump (donelab));
27201 27203 emit_barrier ();
27202 27204
27203 27205 emit_label (neglab);
27204 27206
27205 27207 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
27206 27208 1, OPTAB_DIRECT);
27207 27209 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
27208 27210 1, OPTAB_DIRECT);
27209 27211 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
27210 27212
27211 27213 expand_float (f0, i0, 0);
27212 27214
27213 27215 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
27214 27216
27215 27217 emit_label (donelab);
27216 27218 }
27217 27219
27218 27220 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27219 27221 with all elements equal to VAR. Return true if successful. */
27220 27222
27221 27223 static bool
27222 27224 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
27223 27225 rtx target, rtx val)
27224 27226 {
27225 27227 enum machine_mode hmode, smode, wsmode, wvmode;
27226 27228 rtx x;
27227 27229
27228 27230 switch (mode)
27229 27231 {
27230 27232 case V2SImode:
27231 27233 case V2SFmode:
27232 27234 if (!mmx_ok)
27233 27235 return false;
27234 27236 /* FALLTHRU */
27235 27237
27236 27238 case V2DFmode:
27237 27239 case V2DImode:
27238 27240 case V4SFmode:
27239 27241 case V4SImode:
27240 27242 val = force_reg (GET_MODE_INNER (mode), val);
27241 27243 x = gen_rtx_VEC_DUPLICATE (mode, val);
27242 27244 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27243 27245 return true;
27244 27246
27245 27247 case V4HImode:
27246 27248 if (!mmx_ok)
27247 27249 return false;
27248 27250 if (TARGET_SSE || TARGET_3DNOW_A)
27249 27251 {
27250 27252 val = gen_lowpart (SImode, val);
27251 27253 x = gen_rtx_TRUNCATE (HImode, val);
27252 27254 x = gen_rtx_VEC_DUPLICATE (mode, x);
27253 27255 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27254 27256 return true;
27255 27257 }
27256 27258 else
27257 27259 {
27258 27260 smode = HImode;
27259 27261 wsmode = SImode;
27260 27262 wvmode = V2SImode;
27261 27263 goto widen;
27262 27264 }
27263 27265
27264 27266 case V8QImode:
27265 27267 if (!mmx_ok)
27266 27268 return false;
27267 27269 smode = QImode;
27268 27270 wsmode = HImode;
27269 27271 wvmode = V4HImode;
27270 27272 goto widen;
27271 27273 case V8HImode:
27272 27274 if (TARGET_SSE2)
27273 27275 {
27274 27276 rtx tmp1, tmp2;
27275 27277 /* Extend HImode to SImode using a paradoxical SUBREG. */
27276 27278 tmp1 = gen_reg_rtx (SImode);
27277 27279 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27278 27280 /* Insert the SImode value as low element of V4SImode vector. */
27279 27281 tmp2 = gen_reg_rtx (V4SImode);
27280 27282 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27281 27283 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27282 27284 CONST0_RTX (V4SImode),
27283 27285 const1_rtx);
27284 27286 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27285 27287 /* Cast the V4SImode vector back to a V8HImode vector. */
27286 27288 tmp1 = gen_reg_rtx (V8HImode);
27287 27289 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
27288 27290 /* Duplicate the low short through the whole low SImode word. */
27289 27291 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
27290 27292 /* Cast the V8HImode vector back to a V4SImode vector. */
27291 27293 tmp2 = gen_reg_rtx (V4SImode);
27292 27294 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27293 27295 /* Replicate the low element of the V4SImode vector. */
27294 27296 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27295 27297 /* Cast the V2SImode back to V8HImode, and store in target. */
27296 27298 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
27297 27299 return true;
27298 27300 }
27299 27301 smode = HImode;
27300 27302 wsmode = SImode;
27301 27303 wvmode = V4SImode;
27302 27304 goto widen;
27303 27305 case V16QImode:
27304 27306 if (TARGET_SSE2)
27305 27307 {
27306 27308 rtx tmp1, tmp2;
27307 27309 /* Extend QImode to SImode using a paradoxical SUBREG. */
27308 27310 tmp1 = gen_reg_rtx (SImode);
27309 27311 emit_move_insn (tmp1, gen_lowpart (SImode, val));
27310 27312 /* Insert the SImode value as low element of V4SImode vector. */
27311 27313 tmp2 = gen_reg_rtx (V4SImode);
27312 27314 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
27313 27315 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
27314 27316 CONST0_RTX (V4SImode),
27315 27317 const1_rtx);
27316 27318 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
27317 27319 /* Cast the V4SImode vector back to a V16QImode vector. */
27318 27320 tmp1 = gen_reg_rtx (V16QImode);
27319 27321 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
27320 27322 /* Duplicate the low byte through the whole low SImode word. */
27321 27323 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27322 27324 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
27323 27325 /* Cast the V16QImode vector back to a V4SImode vector. */
27324 27326 tmp2 = gen_reg_rtx (V4SImode);
27325 27327 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
27326 27328 /* Replicate the low element of the V4SImode vector. */
27327 27329 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
27328 27330 /* Cast the V2SImode back to V16QImode, and store in target. */
27329 27331 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
27330 27332 return true;
27331 27333 }
27332 27334 smode = QImode;
27333 27335 wsmode = HImode;
27334 27336 wvmode = V8HImode;
27335 27337 goto widen;
27336 27338 widen:
27337 27339 /* Replicate the value once into the next wider mode and recurse. */
27338 27340 val = convert_modes (wsmode, smode, val, true);
27339 27341 x = expand_simple_binop (wsmode, ASHIFT, val,
27340 27342 GEN_INT (GET_MODE_BITSIZE (smode)),
27341 27343 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27342 27344 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
27343 27345
27344 27346 x = gen_reg_rtx (wvmode);
27345 27347 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
27346 27348 gcc_unreachable ();
27347 27349 emit_move_insn (target, gen_lowpart (mode, x));
27348 27350 return true;
27349 27351
27350 27352 case V4DFmode:
27351 27353 hmode = V2DFmode;
27352 27354 goto half;
27353 27355 case V4DImode:
27354 27356 hmode = V2DImode;
27355 27357 goto half;
27356 27358 case V8SFmode:
27357 27359 hmode = V4SFmode;
27358 27360 goto half;
27359 27361 case V8SImode:
27360 27362 hmode = V4SImode;
27361 27363 goto half;
27362 27364 case V16HImode:
27363 27365 hmode = V8HImode;
27364 27366 goto half;
27365 27367 case V32QImode:
27366 27368 hmode = V16QImode;
27367 27369 goto half;
27368 27370 half:
27369 27371 {
27370 27372 rtx tmp = gen_reg_rtx (hmode);
27371 27373 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val);
27372 27374 emit_insn (gen_rtx_SET (VOIDmode, target,
27373 27375 gen_rtx_VEC_CONCAT (mode, tmp, tmp)));
27374 27376 }
27375 27377 return true;
27376 27378
27377 27379 default:
27378 27380 return false;
27379 27381 }
27380 27382 }
27381 27383
27382 27384 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27383 27385 whose ONE_VAR element is VAR, and other elements are zero. Return true
27384 27386 if successful. */
27385 27387
27386 27388 static bool
27387 27389 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
27388 27390 rtx target, rtx var, int one_var)
27389 27391 {
27390 27392 enum machine_mode vsimode;
27391 27393 rtx new_target;
27392 27394 rtx x, tmp;
27393 27395 bool use_vector_set = false;
27394 27396
27395 27397 switch (mode)
27396 27398 {
27397 27399 case V2DImode:
27398 27400 /* For SSE4.1, we normally use vector set. But if the second
27399 27401 element is zero and inter-unit moves are OK, we use movq
27400 27402 instead. */
27401 27403 use_vector_set = (TARGET_64BIT
27402 27404 && TARGET_SSE4_1
27403 27405 && !(TARGET_INTER_UNIT_MOVES
27404 27406 && one_var == 0));
27405 27407 break;
27406 27408 case V16QImode:
27407 27409 case V4SImode:
27408 27410 case V4SFmode:
27409 27411 use_vector_set = TARGET_SSE4_1;
27410 27412 break;
27411 27413 case V8HImode:
27412 27414 use_vector_set = TARGET_SSE2;
27413 27415 break;
27414 27416 case V4HImode:
27415 27417 use_vector_set = TARGET_SSE || TARGET_3DNOW_A;
27416 27418 break;
27417 27419 case V32QImode:
27418 27420 case V16HImode:
27419 27421 case V8SImode:
27420 27422 case V8SFmode:
27421 27423 case V4DFmode:
27422 27424 use_vector_set = TARGET_AVX;
27423 27425 break;
27424 27426 case V4DImode:
27425 27427 /* Use ix86_expand_vector_set in 64bit mode only. */
27426 27428 use_vector_set = TARGET_AVX && TARGET_64BIT;
27427 27429 break;
27428 27430 default:
27429 27431 break;
27430 27432 }
27431 27433
27432 27434 if (use_vector_set)
27433 27435 {
27434 27436 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode)));
27435 27437 var = force_reg (GET_MODE_INNER (mode), var);
27436 27438 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27437 27439 return true;
27438 27440 }
27439 27441
27440 27442 switch (mode)
27441 27443 {
27442 27444 case V2SFmode:
27443 27445 case V2SImode:
27444 27446 if (!mmx_ok)
27445 27447 return false;
27446 27448 /* FALLTHRU */
27447 27449
27448 27450 case V2DFmode:
27449 27451 case V2DImode:
27450 27452 if (one_var != 0)
27451 27453 return false;
27452 27454 var = force_reg (GET_MODE_INNER (mode), var);
27453 27455 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
27454 27456 emit_insn (gen_rtx_SET (VOIDmode, target, x));
27455 27457 return true;
27456 27458
27457 27459 case V4SFmode:
27458 27460 case V4SImode:
27459 27461 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
27460 27462 new_target = gen_reg_rtx (mode);
27461 27463 else
27462 27464 new_target = target;
27463 27465 var = force_reg (GET_MODE_INNER (mode), var);
27464 27466 x = gen_rtx_VEC_DUPLICATE (mode, var);
27465 27467 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
27466 27468 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
27467 27469 if (one_var != 0)
27468 27470 {
27469 27471 /* We need to shuffle the value to the correct position, so
27470 27472 create a new pseudo to store the intermediate result. */
27471 27473
27472 27474 /* With SSE2, we can use the integer shuffle insns. */
27473 27475 if (mode != V4SFmode && TARGET_SSE2)
27474 27476 {
27475 27477 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
27476 27478 GEN_INT (1),
27477 27479 GEN_INT (one_var == 1 ? 0 : 1),
27478 27480 GEN_INT (one_var == 2 ? 0 : 1),
27479 27481 GEN_INT (one_var == 3 ? 0 : 1)));
27480 27482 if (target != new_target)
27481 27483 emit_move_insn (target, new_target);
27482 27484 return true;
27483 27485 }
27484 27486
27485 27487 /* Otherwise convert the intermediate result to V4SFmode and
27486 27488 use the SSE1 shuffle instructions. */
27487 27489 if (mode != V4SFmode)
27488 27490 {
27489 27491 tmp = gen_reg_rtx (V4SFmode);
27490 27492 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
27491 27493 }
27492 27494 else
27493 27495 tmp = new_target;
27494 27496
27495 27497 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp,
27496 27498 GEN_INT (1),
27497 27499 GEN_INT (one_var == 1 ? 0 : 1),
27498 27500 GEN_INT (one_var == 2 ? 0+4 : 1+4),
27499 27501 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
27500 27502
27501 27503 if (mode != V4SFmode)
27502 27504 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
27503 27505 else if (tmp != target)
27504 27506 emit_move_insn (target, tmp);
27505 27507 }
27506 27508 else if (target != new_target)
27507 27509 emit_move_insn (target, new_target);
27508 27510 return true;
27509 27511
27510 27512 case V8HImode:
27511 27513 case V16QImode:
27512 27514 vsimode = V4SImode;
27513 27515 goto widen;
27514 27516 case V4HImode:
27515 27517 case V8QImode:
27516 27518 if (!mmx_ok)
27517 27519 return false;
27518 27520 vsimode = V2SImode;
27519 27521 goto widen;
27520 27522 widen:
27521 27523 if (one_var != 0)
27522 27524 return false;
27523 27525
27524 27526 /* Zero extend the variable element to SImode and recurse. */
27525 27527 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
27526 27528
27527 27529 x = gen_reg_rtx (vsimode);
27528 27530 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
27529 27531 var, one_var))
27530 27532 gcc_unreachable ();
27531 27533
27532 27534 emit_move_insn (target, gen_lowpart (mode, x));
27533 27535 return true;
27534 27536
27535 27537 default:
27536 27538 return false;
27537 27539 }
27538 27540 }
27539 27541
27540 27542 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
27541 27543 consisting of the values in VALS. It is known that all elements
27542 27544 except ONE_VAR are constants. Return true if successful. */
27543 27545
27544 27546 static bool
27545 27547 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
27546 27548 rtx target, rtx vals, int one_var)
27547 27549 {
27548 27550 rtx var = XVECEXP (vals, 0, one_var);
27549 27551 enum machine_mode wmode;
27550 27552 rtx const_vec, x;
27551 27553
27552 27554 const_vec = copy_rtx (vals);
27553 27555 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
27554 27556 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
27555 27557
27556 27558 switch (mode)
27557 27559 {
27558 27560 case V2DFmode:
27559 27561 case V2DImode:
27560 27562 case V2SFmode:
27561 27563 case V2SImode:
27562 27564 /* For the two element vectors, it's just as easy to use
27563 27565 the general case. */
27564 27566 return false;
27565 27567
27566 27568 case V4DImode:
27567 27569 /* Use ix86_expand_vector_set in 64bit mode only. */
27568 27570 if (!TARGET_64BIT)
27569 27571 return false;
27570 27572 case V4DFmode:
27571 27573 case V8SFmode:
27572 27574 case V8SImode:
27573 27575 case V16HImode:
27574 27576 case V32QImode:
27575 27577 case V4SFmode:
27576 27578 case V4SImode:
27577 27579 case V8HImode:
27578 27580 case V4HImode:
27579 27581 break;
27580 27582
27581 27583 case V16QImode:
27582 27584 if (TARGET_SSE4_1)
27583 27585 break;
27584 27586 wmode = V8HImode;
27585 27587 goto widen;
27586 27588 case V8QImode:
27587 27589 wmode = V4HImode;
27588 27590 goto widen;
27589 27591 widen:
27590 27592 /* There's no way to set one QImode entry easily. Combine
27591 27593 the variable value with its adjacent constant value, and
27592 27594 promote to an HImode set. */
27593 27595 x = XVECEXP (vals, 0, one_var ^ 1);
27594 27596 if (one_var & 1)
27595 27597 {
27596 27598 var = convert_modes (HImode, QImode, var, true);
27597 27599 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
27598 27600 NULL_RTX, 1, OPTAB_LIB_WIDEN);
27599 27601 x = GEN_INT (INTVAL (x) & 0xff);
27600 27602 }
27601 27603 else
27602 27604 {
27603 27605 var = convert_modes (HImode, QImode, var, true);
27604 27606 x = gen_int_mode (INTVAL (x) << 8, HImode);
27605 27607 }
27606 27608 if (x != const0_rtx)
27607 27609 var = expand_simple_binop (HImode, IOR, var, x, var,
27608 27610 1, OPTAB_LIB_WIDEN);
27609 27611
27610 27612 x = gen_reg_rtx (wmode);
27611 27613 emit_move_insn (x, gen_lowpart (wmode, const_vec));
27612 27614 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
27613 27615
27614 27616 emit_move_insn (target, gen_lowpart (mode, x));
27615 27617 return true;
27616 27618
27617 27619 default:
27618 27620 return false;
27619 27621 }
27620 27622
27621 27623 emit_move_insn (target, const_vec);
27622 27624 ix86_expand_vector_set (mmx_ok, target, var, one_var);
27623 27625 return true;
27624 27626 }
27625 27627
27626 27628 /* A subroutine of ix86_expand_vector_init_general. Use vector
27627 27629 concatenate to handle the most general case: all values variable,
27628 27630 and none identical. */
27629 27631
27630 27632 static void
27631 27633 ix86_expand_vector_init_concat (enum machine_mode mode,
27632 27634 rtx target, rtx *ops, int n)
27633 27635 {
27634 27636 enum machine_mode cmode, hmode = VOIDmode;
27635 27637 rtx first[8], second[4];
27636 27638 rtvec v;
27637 27639 int i, j;
27638 27640
27639 27641 switch (n)
27640 27642 {
27641 27643 case 2:
27642 27644 switch (mode)
27643 27645 {
27644 27646 case V8SImode:
27645 27647 cmode = V4SImode;
27646 27648 break;
27647 27649 case V8SFmode:
27648 27650 cmode = V4SFmode;
27649 27651 break;
27650 27652 case V4DImode:
27651 27653 cmode = V2DImode;
27652 27654 break;
27653 27655 case V4DFmode:
27654 27656 cmode = V2DFmode;
27655 27657 break;
27656 27658 case V4SImode:
27657 27659 cmode = V2SImode;
27658 27660 break;
27659 27661 case V4SFmode:
27660 27662 cmode = V2SFmode;
27661 27663 break;
27662 27664 case V2DImode:
27663 27665 cmode = DImode;
27664 27666 break;
27665 27667 case V2SImode:
27666 27668 cmode = SImode;
27667 27669 break;
27668 27670 case V2DFmode:
27669 27671 cmode = DFmode;
27670 27672 break;
27671 27673 case V2SFmode:
27672 27674 cmode = SFmode;
27673 27675 break;
27674 27676 default:
27675 27677 gcc_unreachable ();
27676 27678 }
27677 27679
27678 27680 if (!register_operand (ops[1], cmode))
27679 27681 ops[1] = force_reg (cmode, ops[1]);
27680 27682 if (!register_operand (ops[0], cmode))
27681 27683 ops[0] = force_reg (cmode, ops[0]);
27682 27684 emit_insn (gen_rtx_SET (VOIDmode, target,
27683 27685 gen_rtx_VEC_CONCAT (mode, ops[0],
27684 27686 ops[1])));
27685 27687 break;
27686 27688
27687 27689 case 4:
27688 27690 switch (mode)
27689 27691 {
27690 27692 case V4DImode:
27691 27693 cmode = V2DImode;
27692 27694 break;
27693 27695 case V4DFmode:
27694 27696 cmode = V2DFmode;
27695 27697 break;
27696 27698 case V4SImode:
27697 27699 cmode = V2SImode;
27698 27700 break;
27699 27701 case V4SFmode:
27700 27702 cmode = V2SFmode;
27701 27703 break;
27702 27704 default:
27703 27705 gcc_unreachable ();
27704 27706 }
27705 27707 goto half;
27706 27708
27707 27709 case 8:
27708 27710 switch (mode)
27709 27711 {
27710 27712 case V8SImode:
27711 27713 cmode = V2SImode;
27712 27714 hmode = V4SImode;
27713 27715 break;
27714 27716 case V8SFmode:
27715 27717 cmode = V2SFmode;
27716 27718 hmode = V4SFmode;
27717 27719 break;
27718 27720 default:
27719 27721 gcc_unreachable ();
27720 27722 }
27721 27723 goto half;
27722 27724
27723 27725 half:
27724 27726 /* FIXME: We process inputs backward to help RA. PR 36222. */
27725 27727 i = n - 1;
27726 27728 j = (n >> 1) - 1;
27727 27729 for (; i > 0; i -= 2, j--)
27728 27730 {
27729 27731 first[j] = gen_reg_rtx (cmode);
27730 27732 v = gen_rtvec (2, ops[i - 1], ops[i]);
27731 27733 ix86_expand_vector_init (false, first[j],
27732 27734 gen_rtx_PARALLEL (cmode, v));
27733 27735 }
27734 27736
27735 27737 n >>= 1;
27736 27738 if (n > 2)
27737 27739 {
27738 27740 gcc_assert (hmode != VOIDmode);
27739 27741 for (i = j = 0; i < n; i += 2, j++)
27740 27742 {
27741 27743 second[j] = gen_reg_rtx (hmode);
27742 27744 ix86_expand_vector_init_concat (hmode, second [j],
27743 27745 &first [i], 2);
27744 27746 }
27745 27747 n >>= 1;
27746 27748 ix86_expand_vector_init_concat (mode, target, second, n);
27747 27749 }
27748 27750 else
27749 27751 ix86_expand_vector_init_concat (mode, target, first, n);
27750 27752 break;
27751 27753
27752 27754 default:
27753 27755 gcc_unreachable ();
27754 27756 }
27755 27757 }
27756 27758
27757 27759 /* A subroutine of ix86_expand_vector_init_general. Use vector
27758 27760 interleave to handle the most general case: all values variable,
27759 27761 and none identical. */
27760 27762
27761 27763 static void
27762 27764 ix86_expand_vector_init_interleave (enum machine_mode mode,
27763 27765 rtx target, rtx *ops, int n)
27764 27766 {
27765 27767 enum machine_mode first_imode, second_imode, third_imode, inner_mode;
27766 27768 int i, j;
27767 27769 rtx op0, op1;
27768 27770 rtx (*gen_load_even) (rtx, rtx, rtx);
27769 27771 rtx (*gen_interleave_first_low) (rtx, rtx, rtx);
27770 27772 rtx (*gen_interleave_second_low) (rtx, rtx, rtx);
27771 27773
27772 27774 switch (mode)
27773 27775 {
27774 27776 case V8HImode:
27775 27777 gen_load_even = gen_vec_setv8hi;
27776 27778 gen_interleave_first_low = gen_vec_interleave_lowv4si;
27777 27779 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27778 27780 inner_mode = HImode;
27779 27781 first_imode = V4SImode;
27780 27782 second_imode = V2DImode;
27781 27783 third_imode = VOIDmode;
27782 27784 break;
27783 27785 case V16QImode:
27784 27786 gen_load_even = gen_vec_setv16qi;
27785 27787 gen_interleave_first_low = gen_vec_interleave_lowv8hi;
27786 27788 gen_interleave_second_low = gen_vec_interleave_lowv4si;
27787 27789 inner_mode = QImode;
27788 27790 first_imode = V8HImode;
27789 27791 second_imode = V4SImode;
27790 27792 third_imode = V2DImode;
27791 27793 break;
27792 27794 default:
27793 27795 gcc_unreachable ();
27794 27796 }
27795 27797
27796 27798 for (i = 0; i < n; i++)
27797 27799 {
27798 27800 /* Extend the odd elment to SImode using a paradoxical SUBREG. */
27799 27801 op0 = gen_reg_rtx (SImode);
27800 27802 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i]));
27801 27803
27802 27804 /* Insert the SImode value as low element of V4SImode vector. */
27803 27805 op1 = gen_reg_rtx (V4SImode);
27804 27806 op0 = gen_rtx_VEC_MERGE (V4SImode,
27805 27807 gen_rtx_VEC_DUPLICATE (V4SImode,
27806 27808 op0),
27807 27809 CONST0_RTX (V4SImode),
27808 27810 const1_rtx);
27809 27811 emit_insn (gen_rtx_SET (VOIDmode, op1, op0));
27810 27812
27811 27813 /* Cast the V4SImode vector back to a vector in orignal mode. */
27812 27814 op0 = gen_reg_rtx (mode);
27813 27815 emit_move_insn (op0, gen_lowpart (mode, op1));
27814 27816
27815 27817 /* Load even elements into the second positon. */
27816 27818 emit_insn ((*gen_load_even) (op0,
27817 27819 force_reg (inner_mode,
27818 27820 ops [i + i + 1]),
27819 27821 const1_rtx));
27820 27822
27821 27823 /* Cast vector to FIRST_IMODE vector. */
27822 27824 ops[i] = gen_reg_rtx (first_imode);
27823 27825 emit_move_insn (ops[i], gen_lowpart (first_imode, op0));
27824 27826 }
27825 27827
27826 27828 /* Interleave low FIRST_IMODE vectors. */
27827 27829 for (i = j = 0; i < n; i += 2, j++)
27828 27830 {
27829 27831 op0 = gen_reg_rtx (first_imode);
27830 27832 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1]));
27831 27833
27832 27834 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */
27833 27835 ops[j] = gen_reg_rtx (second_imode);
27834 27836 emit_move_insn (ops[j], gen_lowpart (second_imode, op0));
27835 27837 }
27836 27838
27837 27839 /* Interleave low SECOND_IMODE vectors. */
27838 27840 switch (second_imode)
27839 27841 {
27840 27842 case V4SImode:
27841 27843 for (i = j = 0; i < n / 2; i += 2, j++)
27842 27844 {
27843 27845 op0 = gen_reg_rtx (second_imode);
27844 27846 emit_insn ((*gen_interleave_second_low) (op0, ops[i],
27845 27847 ops[i + 1]));
27846 27848
27847 27849 /* Cast the SECOND_IMODE vector to the THIRD_IMODE
27848 27850 vector. */
27849 27851 ops[j] = gen_reg_rtx (third_imode);
27850 27852 emit_move_insn (ops[j], gen_lowpart (third_imode, op0));
27851 27853 }
27852 27854 second_imode = V2DImode;
27853 27855 gen_interleave_second_low = gen_vec_interleave_lowv2di;
27854 27856 /* FALLTHRU */
27855 27857
27856 27858 case V2DImode:
27857 27859 op0 = gen_reg_rtx (second_imode);
27858 27860 emit_insn ((*gen_interleave_second_low) (op0, ops[0],
27859 27861 ops[1]));
27860 27862
27861 27863 /* Cast the SECOND_IMODE vector back to a vector on original
27862 27864 mode. */
27863 27865 emit_insn (gen_rtx_SET (VOIDmode, target,
27864 27866 gen_lowpart (mode, op0)));
27865 27867 break;
27866 27868
27867 27869 default:
27868 27870 gcc_unreachable ();
27869 27871 }
27870 27872 }
27871 27873
27872 27874 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
27873 27875 all values variable, and none identical. */
27874 27876
27875 27877 static void
27876 27878 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
27877 27879 rtx target, rtx vals)
27878 27880 {
27879 27881 rtx ops[32], op0, op1;
27880 27882 enum machine_mode half_mode = VOIDmode;
27881 27883 int n, i;
27882 27884
27883 27885 switch (mode)
27884 27886 {
27885 27887 case V2SFmode:
27886 27888 case V2SImode:
27887 27889 if (!mmx_ok && !TARGET_SSE)
27888 27890 break;
27889 27891 /* FALLTHRU */
27890 27892
27891 27893 case V8SFmode:
27892 27894 case V8SImode:
27893 27895 case V4DFmode:
27894 27896 case V4DImode:
27895 27897 case V4SFmode:
27896 27898 case V4SImode:
27897 27899 case V2DFmode:
27898 27900 case V2DImode:
27899 27901 n = GET_MODE_NUNITS (mode);
27900 27902 for (i = 0; i < n; i++)
27901 27903 ops[i] = XVECEXP (vals, 0, i);
27902 27904 ix86_expand_vector_init_concat (mode, target, ops, n);
27903 27905 return;
27904 27906
27905 27907 case V32QImode:
27906 27908 half_mode = V16QImode;
27907 27909 goto half;
27908 27910
27909 27911 case V16HImode:
27910 27912 half_mode = V8HImode;
27911 27913 goto half;
27912 27914
27913 27915 half:
27914 27916 n = GET_MODE_NUNITS (mode);
27915 27917 for (i = 0; i < n; i++)
27916 27918 ops[i] = XVECEXP (vals, 0, i);
27917 27919 op0 = gen_reg_rtx (half_mode);
27918 27920 op1 = gen_reg_rtx (half_mode);
27919 27921 ix86_expand_vector_init_interleave (half_mode, op0, ops,
27920 27922 n >> 2);
27921 27923 ix86_expand_vector_init_interleave (half_mode, op1,
27922 27924 &ops [n >> 1], n >> 2);
27923 27925 emit_insn (gen_rtx_SET (VOIDmode, target,
27924 27926 gen_rtx_VEC_CONCAT (mode, op0, op1)));
27925 27927 return;
27926 27928
27927 27929 case V16QImode:
27928 27930 if (!TARGET_SSE4_1)
27929 27931 break;
27930 27932 /* FALLTHRU */
27931 27933
27932 27934 case V8HImode:
27933 27935 if (!TARGET_SSE2)
27934 27936 break;
27935 27937
27936 27938 /* Don't use ix86_expand_vector_init_interleave if we can't
27937 27939 move from GPR to SSE register directly. */
27938 27940 if (!TARGET_INTER_UNIT_MOVES)
27939 27941 break;
27940 27942
27941 27943 n = GET_MODE_NUNITS (mode);
27942 27944 for (i = 0; i < n; i++)
27943 27945 ops[i] = XVECEXP (vals, 0, i);
27944 27946 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1);
27945 27947 return;
27946 27948
27947 27949 case V4HImode:
27948 27950 case V8QImode:
27949 27951 break;
27950 27952
27951 27953 default:
27952 27954 gcc_unreachable ();
27953 27955 }
27954 27956
27955 27957 {
27956 27958 int i, j, n_elts, n_words, n_elt_per_word;
27957 27959 enum machine_mode inner_mode;
27958 27960 rtx words[4], shift;
27959 27961
27960 27962 inner_mode = GET_MODE_INNER (mode);
27961 27963 n_elts = GET_MODE_NUNITS (mode);
27962 27964 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
27963 27965 n_elt_per_word = n_elts / n_words;
27964 27966 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
27965 27967
27966 27968 for (i = 0; i < n_words; ++i)
27967 27969 {
27968 27970 rtx word = NULL_RTX;
27969 27971
27970 27972 for (j = 0; j < n_elt_per_word; ++j)
27971 27973 {
27972 27974 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
27973 27975 elt = convert_modes (word_mode, inner_mode, elt, true);
27974 27976
27975 27977 if (j == 0)
27976 27978 word = elt;
27977 27979 else
27978 27980 {
27979 27981 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
27980 27982 word, 1, OPTAB_LIB_WIDEN);
27981 27983 word = expand_simple_binop (word_mode, IOR, word, elt,
27982 27984 word, 1, OPTAB_LIB_WIDEN);
27983 27985 }
27984 27986 }
27985 27987
27986 27988 words[i] = word;
27987 27989 }
27988 27990
27989 27991 if (n_words == 1)
27990 27992 emit_move_insn (target, gen_lowpart (mode, words[0]));
27991 27993 else if (n_words == 2)
27992 27994 {
27993 27995 rtx tmp = gen_reg_rtx (mode);
27994 27996 emit_clobber (tmp);
27995 27997 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
27996 27998 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
27997 27999 emit_move_insn (target, tmp);
27998 28000 }
27999 28001 else if (n_words == 4)
28000 28002 {
28001 28003 rtx tmp = gen_reg_rtx (V4SImode);
28002 28004 gcc_assert (word_mode == SImode);
28003 28005 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
28004 28006 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
28005 28007 emit_move_insn (target, gen_lowpart (mode, tmp));
28006 28008 }
28007 28009 else
28008 28010 gcc_unreachable ();
28009 28011 }
28010 28012 }
28011 28013
28012 28014 /* Initialize vector TARGET via VALS. Suppress the use of MMX
28013 28015 instructions unless MMX_OK is true. */
28014 28016
28015 28017 void
28016 28018 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
28017 28019 {
28018 28020 enum machine_mode mode = GET_MODE (target);
28019 28021 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28020 28022 int n_elts = GET_MODE_NUNITS (mode);
28021 28023 int n_var = 0, one_var = -1;
28022 28024 bool all_same = true, all_const_zero = true;
28023 28025 int i;
28024 28026 rtx x;
28025 28027
28026 28028 for (i = 0; i < n_elts; ++i)
28027 28029 {
28028 28030 x = XVECEXP (vals, 0, i);
28029 28031 if (!(CONST_INT_P (x)
28030 28032 || GET_CODE (x) == CONST_DOUBLE
28031 28033 || GET_CODE (x) == CONST_FIXED))
28032 28034 n_var++, one_var = i;
28033 28035 else if (x != CONST0_RTX (inner_mode))
28034 28036 all_const_zero = false;
28035 28037 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
28036 28038 all_same = false;
28037 28039 }
28038 28040
28039 28041 /* Constants are best loaded from the constant pool. */
28040 28042 if (n_var == 0)
28041 28043 {
28042 28044 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
28043 28045 return;
28044 28046 }
28045 28047
28046 28048 /* If all values are identical, broadcast the value. */
28047 28049 if (all_same
28048 28050 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
28049 28051 XVECEXP (vals, 0, 0)))
28050 28052 return;
28051 28053
28052 28054 /* Values where only one field is non-constant are best loaded from
28053 28055 the pool and overwritten via move later. */
28054 28056 if (n_var == 1)
28055 28057 {
28056 28058 if (all_const_zero
28057 28059 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
28058 28060 XVECEXP (vals, 0, one_var),
28059 28061 one_var))
28060 28062 return;
28061 28063
28062 28064 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
28063 28065 return;
28064 28066 }
28065 28067
28066 28068 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
28067 28069 }
28068 28070
28069 28071 void
28070 28072 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
28071 28073 {
28072 28074 enum machine_mode mode = GET_MODE (target);
28073 28075 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28074 28076 enum machine_mode half_mode;
28075 28077 bool use_vec_merge = false;
28076 28078 rtx tmp;
28077 28079 static rtx (*gen_extract[6][2]) (rtx, rtx)
28078 28080 = {
28079 28081 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi },
28080 28082 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi },
28081 28083 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si },
28082 28084 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di },
28083 28085 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf },
28084 28086 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df }
28085 28087 };
28086 28088 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx)
28087 28089 = {
28088 28090 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi },
28089 28091 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi },
28090 28092 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si },
28091 28093 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di },
28092 28094 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf },
28093 28095 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df }
28094 28096 };
28095 28097 int i, j, n;
28096 28098
28097 28099 switch (mode)
28098 28100 {
28099 28101 case V2SFmode:
28100 28102 case V2SImode:
28101 28103 if (mmx_ok)
28102 28104 {
28103 28105 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
28104 28106 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
28105 28107 if (elt == 0)
28106 28108 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
28107 28109 else
28108 28110 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
28109 28111 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28110 28112 return;
28111 28113 }
28112 28114 break;
28113 28115
28114 28116 case V2DImode:
28115 28117 use_vec_merge = TARGET_SSE4_1;
28116 28118 if (use_vec_merge)
28117 28119 break;
28118 28120
28119 28121 case V2DFmode:
28120 28122 {
28121 28123 rtx op0, op1;
28122 28124
28123 28125 /* For the two element vectors, we implement a VEC_CONCAT with
28124 28126 the extraction of the other element. */
28125 28127
28126 28128 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
28127 28129 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
28128 28130
28129 28131 if (elt == 0)
28130 28132 op0 = val, op1 = tmp;
28131 28133 else
28132 28134 op0 = tmp, op1 = val;
28133 28135
28134 28136 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
28135 28137 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28136 28138 }
28137 28139 return;
28138 28140
28139 28141 case V4SFmode:
28140 28142 use_vec_merge = TARGET_SSE4_1;
28141 28143 if (use_vec_merge)
28142 28144 break;
28143 28145
28144 28146 switch (elt)
28145 28147 {
28146 28148 case 0:
28147 28149 use_vec_merge = true;
28148 28150 break;
28149 28151
28150 28152 case 1:
28151 28153 /* tmp = target = A B C D */
28152 28154 tmp = copy_to_reg (target);
28153 28155 /* target = A A B B */
28154 28156 emit_insn (gen_sse_unpcklps (target, target, target));
28155 28157 /* target = X A B B */
28156 28158 ix86_expand_vector_set (false, target, val, 0);
28157 28159 /* target = A X C D */
28158 28160 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28159 28161 GEN_INT (1), GEN_INT (0),
28160 28162 GEN_INT (2+4), GEN_INT (3+4)));
28161 28163 return;
28162 28164
28163 28165 case 2:
28164 28166 /* tmp = target = A B C D */
28165 28167 tmp = copy_to_reg (target);
28166 28168 /* tmp = X B C D */
28167 28169 ix86_expand_vector_set (false, tmp, val, 0);
28168 28170 /* target = A B X D */
28169 28171 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28170 28172 GEN_INT (0), GEN_INT (1),
28171 28173 GEN_INT (0+4), GEN_INT (3+4)));
28172 28174 return;
28173 28175
28174 28176 case 3:
28175 28177 /* tmp = target = A B C D */
28176 28178 tmp = copy_to_reg (target);
28177 28179 /* tmp = X B C D */
28178 28180 ix86_expand_vector_set (false, tmp, val, 0);
28179 28181 /* target = A B X D */
28180 28182 emit_insn (gen_sse_shufps_v4sf (target, target, tmp,
28181 28183 GEN_INT (0), GEN_INT (1),
28182 28184 GEN_INT (2+4), GEN_INT (0+4)));
28183 28185 return;
28184 28186
28185 28187 default:
28186 28188 gcc_unreachable ();
28187 28189 }
28188 28190 break;
28189 28191
28190 28192 case V4SImode:
28191 28193 use_vec_merge = TARGET_SSE4_1;
28192 28194 if (use_vec_merge)
28193 28195 break;
28194 28196
28195 28197 /* Element 0 handled by vec_merge below. */
28196 28198 if (elt == 0)
28197 28199 {
28198 28200 use_vec_merge = true;
28199 28201 break;
28200 28202 }
28201 28203
28202 28204 if (TARGET_SSE2)
28203 28205 {
28204 28206 /* With SSE2, use integer shuffles to swap element 0 and ELT,
28205 28207 store into element 0, then shuffle them back. */
28206 28208
28207 28209 rtx order[4];
28208 28210
28209 28211 order[0] = GEN_INT (elt);
28210 28212 order[1] = const1_rtx;
28211 28213 order[2] = const2_rtx;
28212 28214 order[3] = GEN_INT (3);
28213 28215 order[elt] = const0_rtx;
28214 28216
28215 28217 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28216 28218 order[1], order[2], order[3]));
28217 28219
28218 28220 ix86_expand_vector_set (false, target, val, 0);
28219 28221
28220 28222 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
28221 28223 order[1], order[2], order[3]));
28222 28224 }
28223 28225 else
28224 28226 {
28225 28227 /* For SSE1, we have to reuse the V4SF code. */
28226 28228 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
28227 28229 gen_lowpart (SFmode, val), elt);
28228 28230 }
28229 28231 return;
28230 28232
28231 28233 case V8HImode:
28232 28234 use_vec_merge = TARGET_SSE2;
28233 28235 break;
28234 28236 case V4HImode:
28235 28237 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28236 28238 break;
28237 28239
28238 28240 case V16QImode:
28239 28241 use_vec_merge = TARGET_SSE4_1;
28240 28242 break;
28241 28243
28242 28244 case V8QImode:
28243 28245 break;
28244 28246
28245 28247 case V32QImode:
28246 28248 half_mode = V16QImode;
28247 28249 j = 0;
28248 28250 n = 16;
28249 28251 goto half;
28250 28252
28251 28253 case V16HImode:
28252 28254 half_mode = V8HImode;
28253 28255 j = 1;
28254 28256 n = 8;
28255 28257 goto half;
28256 28258
28257 28259 case V8SImode:
28258 28260 half_mode = V4SImode;
28259 28261 j = 2;
28260 28262 n = 4;
28261 28263 goto half;
28262 28264
28263 28265 case V4DImode:
28264 28266 half_mode = V2DImode;
28265 28267 j = 3;
28266 28268 n = 2;
28267 28269 goto half;
28268 28270
28269 28271 case V8SFmode:
28270 28272 half_mode = V4SFmode;
28271 28273 j = 4;
28272 28274 n = 4;
28273 28275 goto half;
28274 28276
28275 28277 case V4DFmode:
28276 28278 half_mode = V2DFmode;
28277 28279 j = 5;
28278 28280 n = 2;
28279 28281 goto half;
28280 28282
28281 28283 half:
28282 28284 /* Compute offset. */
28283 28285 i = elt / n;
28284 28286 elt %= n;
28285 28287
28286 28288 gcc_assert (i <= 1);
28287 28289
28288 28290 /* Extract the half. */
28289 28291 tmp = gen_reg_rtx (half_mode);
28290 28292 emit_insn ((*gen_extract[j][i]) (tmp, target));
28291 28293
28292 28294 /* Put val in tmp at elt. */
28293 28295 ix86_expand_vector_set (false, tmp, val, elt);
28294 28296
28295 28297 /* Put it back. */
28296 28298 emit_insn ((*gen_insert[j][i]) (target, target, tmp));
28297 28299 return;
28298 28300
28299 28301 default:
28300 28302 break;
28301 28303 }
28302 28304
28303 28305 if (use_vec_merge)
28304 28306 {
28305 28307 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
28306 28308 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
28307 28309 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28308 28310 }
28309 28311 else
28310 28312 {
28311 28313 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28312 28314
28313 28315 emit_move_insn (mem, target);
28314 28316
28315 28317 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28316 28318 emit_move_insn (tmp, val);
28317 28319
28318 28320 emit_move_insn (target, mem);
28319 28321 }
28320 28322 }
28321 28323
28322 28324 void
28323 28325 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
28324 28326 {
28325 28327 enum machine_mode mode = GET_MODE (vec);
28326 28328 enum machine_mode inner_mode = GET_MODE_INNER (mode);
28327 28329 bool use_vec_extr = false;
28328 28330 rtx tmp;
28329 28331
28330 28332 switch (mode)
28331 28333 {
28332 28334 case V2SImode:
28333 28335 case V2SFmode:
28334 28336 if (!mmx_ok)
28335 28337 break;
28336 28338 /* FALLTHRU */
28337 28339
28338 28340 case V2DFmode:
28339 28341 case V2DImode:
28340 28342 use_vec_extr = true;
28341 28343 break;
28342 28344
28343 28345 case V4SFmode:
28344 28346 use_vec_extr = TARGET_SSE4_1;
28345 28347 if (use_vec_extr)
28346 28348 break;
28347 28349
28348 28350 switch (elt)
28349 28351 {
28350 28352 case 0:
28351 28353 tmp = vec;
28352 28354 break;
28353 28355
28354 28356 case 1:
28355 28357 case 3:
28356 28358 tmp = gen_reg_rtx (mode);
28357 28359 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec,
28358 28360 GEN_INT (elt), GEN_INT (elt),
28359 28361 GEN_INT (elt+4), GEN_INT (elt+4)));
28360 28362 break;
28361 28363
28362 28364 case 2:
28363 28365 tmp = gen_reg_rtx (mode);
28364 28366 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
28365 28367 break;
28366 28368
28367 28369 default:
28368 28370 gcc_unreachable ();
28369 28371 }
28370 28372 vec = tmp;
28371 28373 use_vec_extr = true;
28372 28374 elt = 0;
28373 28375 break;
28374 28376
28375 28377 case V4SImode:
28376 28378 use_vec_extr = TARGET_SSE4_1;
28377 28379 if (use_vec_extr)
28378 28380 break;
28379 28381
28380 28382 if (TARGET_SSE2)
28381 28383 {
28382 28384 switch (elt)
28383 28385 {
28384 28386 case 0:
28385 28387 tmp = vec;
28386 28388 break;
28387 28389
28388 28390 case 1:
28389 28391 case 3:
28390 28392 tmp = gen_reg_rtx (mode);
28391 28393 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
28392 28394 GEN_INT (elt), GEN_INT (elt),
28393 28395 GEN_INT (elt), GEN_INT (elt)));
28394 28396 break;
28395 28397
28396 28398 case 2:
28397 28399 tmp = gen_reg_rtx (mode);
28398 28400 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
28399 28401 break;
28400 28402
28401 28403 default:
28402 28404 gcc_unreachable ();
28403 28405 }
28404 28406 vec = tmp;
28405 28407 use_vec_extr = true;
28406 28408 elt = 0;
28407 28409 }
28408 28410 else
28409 28411 {
28410 28412 /* For SSE1, we have to reuse the V4SF code. */
28411 28413 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
28412 28414 gen_lowpart (V4SFmode, vec), elt);
28413 28415 return;
28414 28416 }
28415 28417 break;
28416 28418
28417 28419 case V8HImode:
28418 28420 use_vec_extr = TARGET_SSE2;
28419 28421 break;
28420 28422 case V4HImode:
28421 28423 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
28422 28424 break;
28423 28425
28424 28426 case V16QImode:
28425 28427 use_vec_extr = TARGET_SSE4_1;
28426 28428 break;
28427 28429
28428 28430 case V8QImode:
28429 28431 /* ??? Could extract the appropriate HImode element and shift. */
28430 28432 default:
28431 28433 break;
28432 28434 }
28433 28435
28434 28436 if (use_vec_extr)
28435 28437 {
28436 28438 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
28437 28439 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
28438 28440
28439 28441 /* Let the rtl optimizers know about the zero extension performed. */
28440 28442 if (inner_mode == QImode || inner_mode == HImode)
28441 28443 {
28442 28444 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
28443 28445 target = gen_lowpart (SImode, target);
28444 28446 }
28445 28447
28446 28448 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
28447 28449 }
28448 28450 else
28449 28451 {
28450 28452 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
28451 28453
28452 28454 emit_move_insn (mem, vec);
28453 28455
28454 28456 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
28455 28457 emit_move_insn (target, tmp);
28456 28458 }
28457 28459 }
28458 28460
28459 28461 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
28460 28462 pattern to reduce; DEST is the destination; IN is the input vector. */
28461 28463
28462 28464 void
28463 28465 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
28464 28466 {
28465 28467 rtx tmp1, tmp2, tmp3;
28466 28468
28467 28469 tmp1 = gen_reg_rtx (V4SFmode);
28468 28470 tmp2 = gen_reg_rtx (V4SFmode);
28469 28471 tmp3 = gen_reg_rtx (V4SFmode);
28470 28472
28471 28473 emit_insn (gen_sse_movhlps (tmp1, in, in));
28472 28474 emit_insn (fn (tmp2, tmp1, in));
28473 28475
28474 28476 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2,
28475 28477 GEN_INT (1), GEN_INT (1),
28476 28478 GEN_INT (1+4), GEN_INT (1+4)));
28477 28479 emit_insn (fn (dest, tmp2, tmp3));
28478 28480 }
28479 28481
28480 28482 /* Target hook for scalar_mode_supported_p. */
28481 28483 static bool
28482 28484 ix86_scalar_mode_supported_p (enum machine_mode mode)
28483 28485 {
28484 28486 if (DECIMAL_FLOAT_MODE_P (mode))
28485 28487 return true;
28486 28488 else if (mode == TFmode)
28487 28489 return true;
28488 28490 else
28489 28491 return default_scalar_mode_supported_p (mode);
28490 28492 }
28491 28493
28492 28494 /* Implements target hook vector_mode_supported_p. */
28493 28495 static bool
28494 28496 ix86_vector_mode_supported_p (enum machine_mode mode)
28495 28497 {
28496 28498 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
28497 28499 return true;
28498 28500 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
28499 28501 return true;
28500 28502 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
28501 28503 return true;
28502 28504 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
28503 28505 return true;
28504 28506 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
28505 28507 return true;
28506 28508 return false;
28507 28509 }
28508 28510
28509 28511 /* Target hook for c_mode_for_suffix. */
28510 28512 static enum machine_mode
28511 28513 ix86_c_mode_for_suffix (char suffix)
28512 28514 {
28513 28515 if (suffix == 'q')
28514 28516 return TFmode;
28515 28517 if (suffix == 'w')
28516 28518 return XFmode;
28517 28519
28518 28520 return VOIDmode;
28519 28521 }
28520 28522
28521 28523 /* Worker function for TARGET_MD_ASM_CLOBBERS.
28522 28524
28523 28525 We do this in the new i386 backend to maintain source compatibility
28524 28526 with the old cc0-based compiler. */
28525 28527
28526 28528 static tree
28527 28529 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
28528 28530 tree inputs ATTRIBUTE_UNUSED,
28529 28531 tree clobbers)
28530 28532 {
28531 28533 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
28532 28534 clobbers);
28533 28535 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
28534 28536 clobbers);
28535 28537 return clobbers;
28536 28538 }
28537 28539
28538 28540 /* Implements target vector targetm.asm.encode_section_info. This
28539 28541 is not used by netware. */
28540 28542
28541 28543 static void ATTRIBUTE_UNUSED
28542 28544 ix86_encode_section_info (tree decl, rtx rtl, int first)
28543 28545 {
28544 28546 default_encode_section_info (decl, rtl, first);
28545 28547
28546 28548 if (TREE_CODE (decl) == VAR_DECL
28547 28549 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
28548 28550 && ix86_in_large_data_p (decl))
28549 28551 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
28550 28552 }
28551 28553
28552 28554 /* Worker function for REVERSE_CONDITION. */
28553 28555
28554 28556 enum rtx_code
28555 28557 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
28556 28558 {
28557 28559 return (mode != CCFPmode && mode != CCFPUmode
28558 28560 ? reverse_condition (code)
28559 28561 : reverse_condition_maybe_unordered (code));
28560 28562 }
28561 28563
28562 28564 /* Output code to perform an x87 FP register move, from OPERANDS[1]
28563 28565 to OPERANDS[0]. */
28564 28566
28565 28567 const char *
28566 28568 output_387_reg_move (rtx insn, rtx *operands)
28567 28569 {
28568 28570 if (REG_P (operands[0]))
28569 28571 {
28570 28572 if (REG_P (operands[1])
28571 28573 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28572 28574 {
28573 28575 if (REGNO (operands[0]) == FIRST_STACK_REG)
28574 28576 return output_387_ffreep (operands, 0);
28575 28577 return "fstp\t%y0";
28576 28578 }
28577 28579 if (STACK_TOP_P (operands[0]))
28578 28580 return "fld%z1\t%y1";
28579 28581 return "fst\t%y0";
28580 28582 }
28581 28583 else if (MEM_P (operands[0]))
28582 28584 {
28583 28585 gcc_assert (REG_P (operands[1]));
28584 28586 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
28585 28587 return "fstp%z0\t%y0";
28586 28588 else
28587 28589 {
28588 28590 /* There is no non-popping store to memory for XFmode.
28589 28591 So if we need one, follow the store with a load. */
28590 28592 if (GET_MODE (operands[0]) == XFmode)
28591 28593 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
28592 28594 else
28593 28595 return "fst%z0\t%y0";
28594 28596 }
28595 28597 }
28596 28598 else
28597 28599 gcc_unreachable();
28598 28600 }
28599 28601
28600 28602 /* Output code to perform a conditional jump to LABEL, if C2 flag in
28601 28603 FP status register is set. */
28602 28604
28603 28605 void
28604 28606 ix86_emit_fp_unordered_jump (rtx label)
28605 28607 {
28606 28608 rtx reg = gen_reg_rtx (HImode);
28607 28609 rtx temp;
28608 28610
28609 28611 emit_insn (gen_x86_fnstsw_1 (reg));
28610 28612
28611 28613 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))
28612 28614 {
28613 28615 emit_insn (gen_x86_sahf_1 (reg));
28614 28616
28615 28617 temp = gen_rtx_REG (CCmode, FLAGS_REG);
28616 28618 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
28617 28619 }
28618 28620 else
28619 28621 {
28620 28622 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
28621 28623
28622 28624 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
28623 28625 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
28624 28626 }
28625 28627
28626 28628 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
28627 28629 gen_rtx_LABEL_REF (VOIDmode, label),
28628 28630 pc_rtx);
28629 28631 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
28630 28632
28631 28633 emit_jump_insn (temp);
28632 28634 predict_jump (REG_BR_PROB_BASE * 10 / 100);
28633 28635 }
28634 28636
28635 28637 /* Output code to perform a log1p XFmode calculation. */
28636 28638
28637 28639 void ix86_emit_i387_log1p (rtx op0, rtx op1)
28638 28640 {
28639 28641 rtx label1 = gen_label_rtx ();
28640 28642 rtx label2 = gen_label_rtx ();
28641 28643
28642 28644 rtx tmp = gen_reg_rtx (XFmode);
28643 28645 rtx tmp2 = gen_reg_rtx (XFmode);
28644 28646
28645 28647 emit_insn (gen_absxf2 (tmp, op1));
28646 28648 emit_insn (gen_cmpxf (tmp,
28647 28649 CONST_DOUBLE_FROM_REAL_VALUE (
28648 28650 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
28649 28651 XFmode)));
28650 28652 emit_jump_insn (gen_bge (label1));
28651 28653
28652 28654 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28653 28655 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
28654 28656 emit_jump (label2);
28655 28657
28656 28658 emit_label (label1);
28657 28659 emit_move_insn (tmp, CONST1_RTX (XFmode));
28658 28660 emit_insn (gen_addxf3 (tmp, op1, tmp));
28659 28661 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
28660 28662 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
28661 28663
28662 28664 emit_label (label2);
28663 28665 }
28664 28666
28665 28667 /* Output code to perform a Newton-Rhapson approximation of a single precision
28666 28668 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
28667 28669
28668 28670 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
28669 28671 {
28670 28672 rtx x0, x1, e0, e1, two;
28671 28673
28672 28674 x0 = gen_reg_rtx (mode);
28673 28675 e0 = gen_reg_rtx (mode);
28674 28676 e1 = gen_reg_rtx (mode);
28675 28677 x1 = gen_reg_rtx (mode);
28676 28678
28677 28679 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
28678 28680
28679 28681 if (VECTOR_MODE_P (mode))
28680 28682 two = ix86_build_const_vector (SFmode, true, two);
28681 28683
28682 28684 two = force_reg (mode, two);
28683 28685
28684 28686 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
28685 28687
28686 28688 /* x0 = rcp(b) estimate */
28687 28689 emit_insn (gen_rtx_SET (VOIDmode, x0,
28688 28690 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
28689 28691 UNSPEC_RCP)));
28690 28692 /* e0 = x0 * b */
28691 28693 emit_insn (gen_rtx_SET (VOIDmode, e0,
28692 28694 gen_rtx_MULT (mode, x0, b)));
28693 28695 /* e1 = 2. - e0 */
28694 28696 emit_insn (gen_rtx_SET (VOIDmode, e1,
28695 28697 gen_rtx_MINUS (mode, two, e0)));
28696 28698 /* x1 = x0 * e1 */
28697 28699 emit_insn (gen_rtx_SET (VOIDmode, x1,
28698 28700 gen_rtx_MULT (mode, x0, e1)));
28699 28701 /* res = a * x1 */
28700 28702 emit_insn (gen_rtx_SET (VOIDmode, res,
28701 28703 gen_rtx_MULT (mode, a, x1)));
28702 28704 }
28703 28705
28704 28706 /* Output code to perform a Newton-Rhapson approximation of a
28705 28707 single precision floating point [reciprocal] square root. */
28706 28708
28707 28709 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
28708 28710 bool recip)
28709 28711 {
28710 28712 rtx x0, e0, e1, e2, e3, mthree, mhalf;
28711 28713 REAL_VALUE_TYPE r;
28712 28714
28713 28715 x0 = gen_reg_rtx (mode);
28714 28716 e0 = gen_reg_rtx (mode);
28715 28717 e1 = gen_reg_rtx (mode);
28716 28718 e2 = gen_reg_rtx (mode);
28717 28719 e3 = gen_reg_rtx (mode);
28718 28720
28719 28721 real_from_integer (&r, VOIDmode, -3, -1, 0);
28720 28722 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28721 28723
28722 28724 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL);
28723 28725 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode);
28724 28726
28725 28727 if (VECTOR_MODE_P (mode))
28726 28728 {
28727 28729 mthree = ix86_build_const_vector (SFmode, true, mthree);
28728 28730 mhalf = ix86_build_const_vector (SFmode, true, mhalf);
28729 28731 }
28730 28732
28731 28733 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0)
28732 28734 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */
28733 28735
28734 28736 /* x0 = rsqrt(a) estimate */
28735 28737 emit_insn (gen_rtx_SET (VOIDmode, x0,
28736 28738 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
28737 28739 UNSPEC_RSQRT)));
28738 28740
28739 28741 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */
28740 28742 if (!recip)
28741 28743 {
28742 28744 rtx zero, mask;
28743 28745
28744 28746 zero = gen_reg_rtx (mode);
28745 28747 mask = gen_reg_rtx (mode);
28746 28748
28747 28749 zero = force_reg (mode, CONST0_RTX(mode));
28748 28750 emit_insn (gen_rtx_SET (VOIDmode, mask,
28749 28751 gen_rtx_NE (mode, zero, a)));
28750 28752
28751 28753 emit_insn (gen_rtx_SET (VOIDmode, x0,
28752 28754 gen_rtx_AND (mode, x0, mask)));
28753 28755 }
28754 28756
28755 28757 /* e0 = x0 * a */
28756 28758 emit_insn (gen_rtx_SET (VOIDmode, e0,
28757 28759 gen_rtx_MULT (mode, x0, a)));
28758 28760 /* e1 = e0 * x0 */
28759 28761 emit_insn (gen_rtx_SET (VOIDmode, e1,
28760 28762 gen_rtx_MULT (mode, e0, x0)));
28761 28763
28762 28764 /* e2 = e1 - 3. */
28763 28765 mthree = force_reg (mode, mthree);
28764 28766 emit_insn (gen_rtx_SET (VOIDmode, e2,
28765 28767 gen_rtx_PLUS (mode, e1, mthree)));
28766 28768
28767 28769 mhalf = force_reg (mode, mhalf);
28768 28770 if (recip)
28769 28771 /* e3 = -.5 * x0 */
28770 28772 emit_insn (gen_rtx_SET (VOIDmode, e3,
28771 28773 gen_rtx_MULT (mode, x0, mhalf)));
28772 28774 else
28773 28775 /* e3 = -.5 * e0 */
28774 28776 emit_insn (gen_rtx_SET (VOIDmode, e3,
28775 28777 gen_rtx_MULT (mode, e0, mhalf)));
28776 28778 /* ret = e2 * e3 */
28777 28779 emit_insn (gen_rtx_SET (VOIDmode, res,
28778 28780 gen_rtx_MULT (mode, e2, e3)));
28779 28781 }
28780 28782
28781 28783 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
28782 28784
28783 28785 static void ATTRIBUTE_UNUSED
28784 28786 i386_solaris_elf_named_section (const char *name, unsigned int flags,
28785 28787 tree decl)
28786 28788 {
28787 28789 /* With Binutils 2.15, the "@unwind" marker must be specified on
28788 28790 every occurrence of the ".eh_frame" section, not just the first
28789 28791 one. */
28790 28792 if (TARGET_64BIT
28791 28793 && strcmp (name, ".eh_frame") == 0)
28792 28794 {
28793 28795 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
28794 28796 flags & SECTION_WRITE ? "aw" : "a");
28795 28797 return;
28796 28798 }
28797 28799 default_elf_asm_named_section (name, flags, decl);
28798 28800 }
28799 28801
28800 28802 /* Return the mangling of TYPE if it is an extended fundamental type. */
28801 28803
28802 28804 static const char *
28803 28805 ix86_mangle_type (const_tree type)
28804 28806 {
28805 28807 type = TYPE_MAIN_VARIANT (type);
28806 28808
28807 28809 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28808 28810 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28809 28811 return NULL;
28810 28812
28811 28813 switch (TYPE_MODE (type))
28812 28814 {
28813 28815 case TFmode:
28814 28816 /* __float128 is "g". */
28815 28817 return "g";
28816 28818 case XFmode:
28817 28819 /* "long double" or __float80 is "e". */
28818 28820 return "e";
28819 28821 default:
28820 28822 return NULL;
28821 28823 }
28822 28824 }
28823 28825
28824 28826 /* For 32-bit code we can save PIC register setup by using
28825 28827 __stack_chk_fail_local hidden function instead of calling
28826 28828 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
28827 28829 register, so it is better to call __stack_chk_fail directly. */
28828 28830
28829 28831 static tree
28830 28832 ix86_stack_protect_fail (void)
28831 28833 {
28832 28834 return TARGET_64BIT
28833 28835 ? default_external_stack_protect_fail ()
28834 28836 : default_hidden_stack_protect_fail ();
28835 28837 }
28836 28838
28837 28839 /* Select a format to encode pointers in exception handling data. CODE
28838 28840 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
28839 28841 true if the symbol may be affected by dynamic relocations.
28840 28842
28841 28843 ??? All x86 object file formats are capable of representing this.
28842 28844 After all, the relocation needed is the same as for the call insn.
28843 28845 Whether or not a particular assembler allows us to enter such, I
28844 28846 guess we'll have to see. */
28845 28847 int
28846 28848 asm_preferred_eh_data_format (int code, int global)
28847 28849 {
28848 28850 if (flag_pic)
28849 28851 {
28850 28852 int type = DW_EH_PE_sdata8;
28851 28853 if (!TARGET_64BIT
28852 28854 || ix86_cmodel == CM_SMALL_PIC
28853 28855 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
28854 28856 type = DW_EH_PE_sdata4;
28855 28857 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
28856 28858 }
28857 28859 if (ix86_cmodel == CM_SMALL
28858 28860 || (ix86_cmodel == CM_MEDIUM && code))
28859 28861 return DW_EH_PE_udata4;
28860 28862 return DW_EH_PE_absptr;
28861 28863 }
28862 28864
28863 28865 /* Expand copysign from SIGN to the positive value ABS_VALUE
28864 28866 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
28865 28867 the sign-bit. */
28866 28868 static void
28867 28869 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
28868 28870 {
28869 28871 enum machine_mode mode = GET_MODE (sign);
28870 28872 rtx sgn = gen_reg_rtx (mode);
28871 28873 if (mask == NULL_RTX)
28872 28874 {
28873 28875 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
28874 28876 if (!VECTOR_MODE_P (mode))
28875 28877 {
28876 28878 /* We need to generate a scalar mode mask in this case. */
28877 28879 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28878 28880 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28879 28881 mask = gen_reg_rtx (mode);
28880 28882 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28881 28883 }
28882 28884 }
28883 28885 else
28884 28886 mask = gen_rtx_NOT (mode, mask);
28885 28887 emit_insn (gen_rtx_SET (VOIDmode, sgn,
28886 28888 gen_rtx_AND (mode, mask, sign)));
28887 28889 emit_insn (gen_rtx_SET (VOIDmode, result,
28888 28890 gen_rtx_IOR (mode, abs_value, sgn)));
28889 28891 }
28890 28892
28891 28893 /* Expand fabs (OP0) and return a new rtx that holds the result. The
28892 28894 mask for masking out the sign-bit is stored in *SMASK, if that is
28893 28895 non-null. */
28894 28896 static rtx
28895 28897 ix86_expand_sse_fabs (rtx op0, rtx *smask)
28896 28898 {
28897 28899 enum machine_mode mode = GET_MODE (op0);
28898 28900 rtx xa, mask;
28899 28901
28900 28902 xa = gen_reg_rtx (mode);
28901 28903 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
28902 28904 if (!VECTOR_MODE_P (mode))
28903 28905 {
28904 28906 /* We need to generate a scalar mode mask in this case. */
28905 28907 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
28906 28908 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
28907 28909 mask = gen_reg_rtx (mode);
28908 28910 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
28909 28911 }
28910 28912 emit_insn (gen_rtx_SET (VOIDmode, xa,
28911 28913 gen_rtx_AND (mode, op0, mask)));
28912 28914
28913 28915 if (smask)
28914 28916 *smask = mask;
28915 28917
28916 28918 return xa;
28917 28919 }
28918 28920
28919 28921 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
28920 28922 swapping the operands if SWAP_OPERANDS is true. The expanded
28921 28923 code is a forward jump to a newly created label in case the
28922 28924 comparison is true. The generated label rtx is returned. */
28923 28925 static rtx
28924 28926 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
28925 28927 bool swap_operands)
28926 28928 {
28927 28929 rtx label, tmp;
28928 28930
28929 28931 if (swap_operands)
28930 28932 {
28931 28933 tmp = op0;
28932 28934 op0 = op1;
28933 28935 op1 = tmp;
28934 28936 }
28935 28937
28936 28938 label = gen_label_rtx ();
28937 28939 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
28938 28940 emit_insn (gen_rtx_SET (VOIDmode, tmp,
28939 28941 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
28940 28942 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
28941 28943 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
28942 28944 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
28943 28945 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
28944 28946 JUMP_LABEL (tmp) = label;
28945 28947
28946 28948 return label;
28947 28949 }
28948 28950
28949 28951 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
28950 28952 using comparison code CODE. Operands are swapped for the comparison if
28951 28953 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
28952 28954 static rtx
28953 28955 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
28954 28956 bool swap_operands)
28955 28957 {
28956 28958 enum machine_mode mode = GET_MODE (op0);
28957 28959 rtx mask = gen_reg_rtx (mode);
28958 28960
28959 28961 if (swap_operands)
28960 28962 {
28961 28963 rtx tmp = op0;
28962 28964 op0 = op1;
28963 28965 op1 = tmp;
28964 28966 }
28965 28967
28966 28968 if (mode == DFmode)
28967 28969 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
28968 28970 gen_rtx_fmt_ee (code, mode, op0, op1)));
28969 28971 else
28970 28972 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
28971 28973 gen_rtx_fmt_ee (code, mode, op0, op1)));
28972 28974
28973 28975 return mask;
28974 28976 }
28975 28977
28976 28978 /* Generate and return a rtx of mode MODE for 2**n where n is the number
28977 28979 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
28978 28980 static rtx
28979 28981 ix86_gen_TWO52 (enum machine_mode mode)
28980 28982 {
28981 28983 REAL_VALUE_TYPE TWO52r;
28982 28984 rtx TWO52;
28983 28985
28984 28986 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
28985 28987 TWO52 = const_double_from_real_value (TWO52r, mode);
28986 28988 TWO52 = force_reg (mode, TWO52);
28987 28989
28988 28990 return TWO52;
28989 28991 }
28990 28992
28991 28993 /* Expand SSE sequence for computing lround from OP1 storing
28992 28994 into OP0. */
28993 28995 void
28994 28996 ix86_expand_lround (rtx op0, rtx op1)
28995 28997 {
28996 28998 /* C code for the stuff we're doing below:
28997 28999 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
28998 29000 return (long)tmp;
28999 29001 */
29000 29002 enum machine_mode mode = GET_MODE (op1);
29001 29003 const struct real_format *fmt;
29002 29004 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29003 29005 rtx adj;
29004 29006
29005 29007 /* load nextafter (0.5, 0.0) */
29006 29008 fmt = REAL_MODE_FORMAT (mode);
29007 29009 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29008 29010 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29009 29011
29010 29012 /* adj = copysign (0.5, op1) */
29011 29013 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
29012 29014 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
29013 29015
29014 29016 /* adj = op1 + adj */
29015 29017 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
29016 29018
29017 29019 /* op0 = (imode)adj */
29018 29020 expand_fix (op0, adj, 0);
29019 29021 }
29020 29022
29021 29023 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
29022 29024 into OPERAND0. */
29023 29025 void
29024 29026 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
29025 29027 {
29026 29028 /* C code for the stuff we're doing below (for do_floor):
29027 29029 xi = (long)op1;
29028 29030 xi -= (double)xi > op1 ? 1 : 0;
29029 29031 return xi;
29030 29032 */
29031 29033 enum machine_mode fmode = GET_MODE (op1);
29032 29034 enum machine_mode imode = GET_MODE (op0);
29033 29035 rtx ireg, freg, label, tmp;
29034 29036
29035 29037 /* reg = (long)op1 */
29036 29038 ireg = gen_reg_rtx (imode);
29037 29039 expand_fix (ireg, op1, 0);
29038 29040
29039 29041 /* freg = (double)reg */
29040 29042 freg = gen_reg_rtx (fmode);
29041 29043 expand_float (freg, ireg, 0);
29042 29044
29043 29045 /* ireg = (freg > op1) ? ireg - 1 : ireg */
29044 29046 label = ix86_expand_sse_compare_and_jump (UNLE,
29045 29047 freg, op1, !do_floor);
29046 29048 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
29047 29049 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
29048 29050 emit_move_insn (ireg, tmp);
29049 29051
29050 29052 emit_label (label);
29051 29053 LABEL_NUSES (label) = 1;
29052 29054
29053 29055 emit_move_insn (op0, ireg);
29054 29056 }
29055 29057
29056 29058 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
29057 29059 result in OPERAND0. */
29058 29060 void
29059 29061 ix86_expand_rint (rtx operand0, rtx operand1)
29060 29062 {
29061 29063 /* C code for the stuff we're doing below:
29062 29064 xa = fabs (operand1);
29063 29065 if (!isless (xa, 2**52))
29064 29066 return operand1;
29065 29067 xa = xa + 2**52 - 2**52;
29066 29068 return copysign (xa, operand1);
29067 29069 */
29068 29070 enum machine_mode mode = GET_MODE (operand0);
29069 29071 rtx res, xa, label, TWO52, mask;
29070 29072
29071 29073 res = gen_reg_rtx (mode);
29072 29074 emit_move_insn (res, operand1);
29073 29075
29074 29076 /* xa = abs (operand1) */
29075 29077 xa = ix86_expand_sse_fabs (res, &mask);
29076 29078
29077 29079 /* if (!isless (xa, TWO52)) goto label; */
29078 29080 TWO52 = ix86_gen_TWO52 (mode);
29079 29081 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29080 29082
29081 29083 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29082 29084 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29083 29085
29084 29086 ix86_sse_copysign_to_positive (res, xa, res, mask);
29085 29087
29086 29088 emit_label (label);
29087 29089 LABEL_NUSES (label) = 1;
29088 29090
29089 29091 emit_move_insn (operand0, res);
29090 29092 }
29091 29093
29092 29094 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29093 29095 into OPERAND0. */
29094 29096 void
29095 29097 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
29096 29098 {
29097 29099 /* C code for the stuff we expand below.
29098 29100 double xa = fabs (x), x2;
29099 29101 if (!isless (xa, TWO52))
29100 29102 return x;
29101 29103 xa = xa + TWO52 - TWO52;
29102 29104 x2 = copysign (xa, x);
29103 29105 Compensate. Floor:
29104 29106 if (x2 > x)
29105 29107 x2 -= 1;
29106 29108 Compensate. Ceil:
29107 29109 if (x2 < x)
29108 29110 x2 -= -1;
29109 29111 return x2;
29110 29112 */
29111 29113 enum machine_mode mode = GET_MODE (operand0);
29112 29114 rtx xa, TWO52, tmp, label, one, res, mask;
29113 29115
29114 29116 TWO52 = ix86_gen_TWO52 (mode);
29115 29117
29116 29118 /* Temporary for holding the result, initialized to the input
29117 29119 operand to ease control flow. */
29118 29120 res = gen_reg_rtx (mode);
29119 29121 emit_move_insn (res, operand1);
29120 29122
29121 29123 /* xa = abs (operand1) */
29122 29124 xa = ix86_expand_sse_fabs (res, &mask);
29123 29125
29124 29126 /* if (!isless (xa, TWO52)) goto label; */
29125 29127 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29126 29128
29127 29129 /* xa = xa + TWO52 - TWO52; */
29128 29130 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29129 29131 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
29130 29132
29131 29133 /* xa = copysign (xa, operand1) */
29132 29134 ix86_sse_copysign_to_positive (xa, xa, res, mask);
29133 29135
29134 29136 /* generate 1.0 or -1.0 */
29135 29137 one = force_reg (mode,
29136 29138 const_double_from_real_value (do_floor
29137 29139 ? dconst1 : dconstm1, mode));
29138 29140
29139 29141 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29140 29142 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29141 29143 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29142 29144 gen_rtx_AND (mode, one, tmp)));
29143 29145 /* We always need to subtract here to preserve signed zero. */
29144 29146 tmp = expand_simple_binop (mode, MINUS,
29145 29147 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29146 29148 emit_move_insn (res, tmp);
29147 29149
29148 29150 emit_label (label);
29149 29151 LABEL_NUSES (label) = 1;
29150 29152
29151 29153 emit_move_insn (operand0, res);
29152 29154 }
29153 29155
29154 29156 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
29155 29157 into OPERAND0. */
29156 29158 void
29157 29159 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
29158 29160 {
29159 29161 /* C code for the stuff we expand below.
29160 29162 double xa = fabs (x), x2;
29161 29163 if (!isless (xa, TWO52))
29162 29164 return x;
29163 29165 x2 = (double)(long)x;
29164 29166 Compensate. Floor:
29165 29167 if (x2 > x)
29166 29168 x2 -= 1;
29167 29169 Compensate. Ceil:
29168 29170 if (x2 < x)
29169 29171 x2 += 1;
29170 29172 if (HONOR_SIGNED_ZEROS (mode))
29171 29173 return copysign (x2, x);
29172 29174 return x2;
29173 29175 */
29174 29176 enum machine_mode mode = GET_MODE (operand0);
29175 29177 rtx xa, xi, TWO52, tmp, label, one, res, mask;
29176 29178
29177 29179 TWO52 = ix86_gen_TWO52 (mode);
29178 29180
29179 29181 /* Temporary for holding the result, initialized to the input
29180 29182 operand to ease control flow. */
29181 29183 res = gen_reg_rtx (mode);
29182 29184 emit_move_insn (res, operand1);
29183 29185
29184 29186 /* xa = abs (operand1) */
29185 29187 xa = ix86_expand_sse_fabs (res, &mask);
29186 29188
29187 29189 /* if (!isless (xa, TWO52)) goto label; */
29188 29190 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29189 29191
29190 29192 /* xa = (double)(long)x */
29191 29193 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29192 29194 expand_fix (xi, res, 0);
29193 29195 expand_float (xa, xi, 0);
29194 29196
29195 29197 /* generate 1.0 */
29196 29198 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29197 29199
29198 29200 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
29199 29201 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
29200 29202 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29201 29203 gen_rtx_AND (mode, one, tmp)));
29202 29204 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
29203 29205 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29204 29206 emit_move_insn (res, tmp);
29205 29207
29206 29208 if (HONOR_SIGNED_ZEROS (mode))
29207 29209 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29208 29210
29209 29211 emit_label (label);
29210 29212 LABEL_NUSES (label) = 1;
29211 29213
29212 29214 emit_move_insn (operand0, res);
29213 29215 }
29214 29216
29215 29217 /* Expand SSE sequence for computing round from OPERAND1 storing
29216 29218 into OPERAND0. Sequence that works without relying on DImode truncation
29217 29219 via cvttsd2siq that is only available on 64bit targets. */
29218 29220 void
29219 29221 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
29220 29222 {
29221 29223 /* C code for the stuff we expand below.
29222 29224 double xa = fabs (x), xa2, x2;
29223 29225 if (!isless (xa, TWO52))
29224 29226 return x;
29225 29227 Using the absolute value and copying back sign makes
29226 29228 -0.0 -> -0.0 correct.
29227 29229 xa2 = xa + TWO52 - TWO52;
29228 29230 Compensate.
29229 29231 dxa = xa2 - xa;
29230 29232 if (dxa <= -0.5)
29231 29233 xa2 += 1;
29232 29234 else if (dxa > 0.5)
29233 29235 xa2 -= 1;
29234 29236 x2 = copysign (xa2, x);
29235 29237 return x2;
29236 29238 */
29237 29239 enum machine_mode mode = GET_MODE (operand0);
29238 29240 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
29239 29241
29240 29242 TWO52 = ix86_gen_TWO52 (mode);
29241 29243
29242 29244 /* Temporary for holding the result, initialized to the input
29243 29245 operand to ease control flow. */
29244 29246 res = gen_reg_rtx (mode);
29245 29247 emit_move_insn (res, operand1);
29246 29248
29247 29249 /* xa = abs (operand1) */
29248 29250 xa = ix86_expand_sse_fabs (res, &mask);
29249 29251
29250 29252 /* if (!isless (xa, TWO52)) goto label; */
29251 29253 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29252 29254
29253 29255 /* xa2 = xa + TWO52 - TWO52; */
29254 29256 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29255 29257 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
29256 29258
29257 29259 /* dxa = xa2 - xa; */
29258 29260 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
29259 29261
29260 29262 /* generate 0.5, 1.0 and -0.5 */
29261 29263 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
29262 29264 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
29263 29265 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
29264 29266 0, OPTAB_DIRECT);
29265 29267
29266 29268 /* Compensate. */
29267 29269 tmp = gen_reg_rtx (mode);
29268 29270 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
29269 29271 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
29270 29272 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29271 29273 gen_rtx_AND (mode, one, tmp)));
29272 29274 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29273 29275 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
29274 29276 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
29275 29277 emit_insn (gen_rtx_SET (VOIDmode, tmp,
29276 29278 gen_rtx_AND (mode, one, tmp)));
29277 29279 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
29278 29280
29279 29281 /* res = copysign (xa2, operand1) */
29280 29282 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
29281 29283
29282 29284 emit_label (label);
29283 29285 LABEL_NUSES (label) = 1;
29284 29286
29285 29287 emit_move_insn (operand0, res);
29286 29288 }
29287 29289
29288 29290 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29289 29291 into OPERAND0. */
29290 29292 void
29291 29293 ix86_expand_trunc (rtx operand0, rtx operand1)
29292 29294 {
29293 29295 /* C code for SSE variant we expand below.
29294 29296 double xa = fabs (x), x2;
29295 29297 if (!isless (xa, TWO52))
29296 29298 return x;
29297 29299 x2 = (double)(long)x;
29298 29300 if (HONOR_SIGNED_ZEROS (mode))
29299 29301 return copysign (x2, x);
29300 29302 return x2;
29301 29303 */
29302 29304 enum machine_mode mode = GET_MODE (operand0);
29303 29305 rtx xa, xi, TWO52, label, res, mask;
29304 29306
29305 29307 TWO52 = ix86_gen_TWO52 (mode);
29306 29308
29307 29309 /* Temporary for holding the result, initialized to the input
29308 29310 operand to ease control flow. */
29309 29311 res = gen_reg_rtx (mode);
29310 29312 emit_move_insn (res, operand1);
29311 29313
29312 29314 /* xa = abs (operand1) */
29313 29315 xa = ix86_expand_sse_fabs (res, &mask);
29314 29316
29315 29317 /* if (!isless (xa, TWO52)) goto label; */
29316 29318 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29317 29319
29318 29320 /* x = (double)(long)x */
29319 29321 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29320 29322 expand_fix (xi, res, 0);
29321 29323 expand_float (res, xi, 0);
29322 29324
29323 29325 if (HONOR_SIGNED_ZEROS (mode))
29324 29326 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
29325 29327
29326 29328 emit_label (label);
29327 29329 LABEL_NUSES (label) = 1;
29328 29330
29329 29331 emit_move_insn (operand0, res);
29330 29332 }
29331 29333
29332 29334 /* Expand SSE sequence for computing trunc from OPERAND1 storing
29333 29335 into OPERAND0. */
29334 29336 void
29335 29337 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
29336 29338 {
29337 29339 enum machine_mode mode = GET_MODE (operand0);
29338 29340 rtx xa, mask, TWO52, label, one, res, smask, tmp;
29339 29341
29340 29342 /* C code for SSE variant we expand below.
29341 29343 double xa = fabs (x), x2;
29342 29344 if (!isless (xa, TWO52))
29343 29345 return x;
29344 29346 xa2 = xa + TWO52 - TWO52;
29345 29347 Compensate:
29346 29348 if (xa2 > xa)
29347 29349 xa2 -= 1.0;
29348 29350 x2 = copysign (xa2, x);
29349 29351 return x2;
29350 29352 */
29351 29353
29352 29354 TWO52 = ix86_gen_TWO52 (mode);
29353 29355
29354 29356 /* Temporary for holding the result, initialized to the input
29355 29357 operand to ease control flow. */
29356 29358 res = gen_reg_rtx (mode);
29357 29359 emit_move_insn (res, operand1);
29358 29360
29359 29361 /* xa = abs (operand1) */
29360 29362 xa = ix86_expand_sse_fabs (res, &smask);
29361 29363
29362 29364 /* if (!isless (xa, TWO52)) goto label; */
29363 29365 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29364 29366
29365 29367 /* res = xa + TWO52 - TWO52; */
29366 29368 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
29367 29369 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
29368 29370 emit_move_insn (res, tmp);
29369 29371
29370 29372 /* generate 1.0 */
29371 29373 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
29372 29374
29373 29375 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
29374 29376 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
29375 29377 emit_insn (gen_rtx_SET (VOIDmode, mask,
29376 29378 gen_rtx_AND (mode, mask, one)));
29377 29379 tmp = expand_simple_binop (mode, MINUS,
29378 29380 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
29379 29381 emit_move_insn (res, tmp);
29380 29382
29381 29383 /* res = copysign (res, operand1) */
29382 29384 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
29383 29385
29384 29386 emit_label (label);
29385 29387 LABEL_NUSES (label) = 1;
29386 29388
29387 29389 emit_move_insn (operand0, res);
29388 29390 }
29389 29391
29390 29392 /* Expand SSE sequence for computing round from OPERAND1 storing
29391 29393 into OPERAND0. */
29392 29394 void
29393 29395 ix86_expand_round (rtx operand0, rtx operand1)
29394 29396 {
29395 29397 /* C code for the stuff we're doing below:
29396 29398 double xa = fabs (x);
29397 29399 if (!isless (xa, TWO52))
29398 29400 return x;
29399 29401 xa = (double)(long)(xa + nextafter (0.5, 0.0));
29400 29402 return copysign (xa, x);
29401 29403 */
29402 29404 enum machine_mode mode = GET_MODE (operand0);
29403 29405 rtx res, TWO52, xa, label, xi, half, mask;
29404 29406 const struct real_format *fmt;
29405 29407 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
29406 29408
29407 29409 /* Temporary for holding the result, initialized to the input
29408 29410 operand to ease control flow. */
29409 29411 res = gen_reg_rtx (mode);
29410 29412 emit_move_insn (res, operand1);
29411 29413
29412 29414 TWO52 = ix86_gen_TWO52 (mode);
29413 29415 xa = ix86_expand_sse_fabs (res, &mask);
29414 29416 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
29415 29417
29416 29418 /* load nextafter (0.5, 0.0) */
29417 29419 fmt = REAL_MODE_FORMAT (mode);
29418 29420 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
29419 29421 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
29420 29422
29421 29423 /* xa = xa + 0.5 */
29422 29424 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
29423 29425 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
29424 29426
29425 29427 /* xa = (double)(int64_t)xa */
29426 29428 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
29427 29429 expand_fix (xi, xa, 0);
29428 29430 expand_float (xa, xi, 0);
29429 29431
29430 29432 /* res = copysign (xa, operand1) */
29431 29433 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
29432 29434
29433 29435 emit_label (label);
29434 29436 LABEL_NUSES (label) = 1;
29435 29437
29436 29438 emit_move_insn (operand0, res);
29437 29439 }
29438 29440
29439 29441
29440 29442 /* Validate whether a SSE5 instruction is valid or not.
29441 29443 OPERANDS is the array of operands.
29442 29444 NUM is the number of operands.
29443 29445 USES_OC0 is true if the instruction uses OC0 and provides 4 variants.
29444 29446 NUM_MEMORY is the maximum number of memory operands to accept.
29445 29447 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */
29446 29448
29447 29449 bool
29448 29450 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num,
29449 29451 bool uses_oc0, int num_memory, bool commutative)
29450 29452 {
29451 29453 int mem_mask;
29452 29454 int mem_count;
29453 29455 int i;
29454 29456
29455 29457 /* Count the number of memory arguments */
29456 29458 mem_mask = 0;
29457 29459 mem_count = 0;
29458 29460 for (i = 0; i < num; i++)
29459 29461 {
29460 29462 enum machine_mode mode = GET_MODE (operands[i]);
29461 29463 if (register_operand (operands[i], mode))
29462 29464 ;
29463 29465
29464 29466 else if (memory_operand (operands[i], mode))
29465 29467 {
29466 29468 mem_mask |= (1 << i);
29467 29469 mem_count++;
29468 29470 }
29469 29471
29470 29472 else
29471 29473 {
29472 29474 rtx pattern = PATTERN (insn);
29473 29475
29474 29476 /* allow 0 for pcmov */
29475 29477 if (GET_CODE (pattern) != SET
29476 29478 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE
29477 29479 || i < 2
29478 29480 || operands[i] != CONST0_RTX (mode))
29479 29481 return false;
29480 29482 }
29481 29483 }
29482 29484
29483 29485 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be
29484 29486 a memory operation. */
29485 29487 if (num_memory < 0)
29486 29488 {
29487 29489 num_memory = -num_memory;
29488 29490 if ((mem_mask & (1 << (num-1))) != 0)
29489 29491 {
29490 29492 mem_mask &= ~(1 << (num-1));
29491 29493 mem_count--;
29492 29494 }
29493 29495 }
29494 29496
29495 29497 /* If there were no memory operations, allow the insn */
29496 29498 if (mem_mask == 0)
29497 29499 return true;
29498 29500
29499 29501 /* Do not allow the destination register to be a memory operand. */
29500 29502 else if (mem_mask & (1 << 0))
29501 29503 return false;
29502 29504
29503 29505 /* If there are too many memory operations, disallow the instruction. While
29504 29506 the hardware only allows 1 memory reference, before register allocation
29505 29507 for some insns, we allow two memory operations sometimes in order to allow
29506 29508 code like the following to be optimized:
29507 29509
29508 29510 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; }
29509 29511
29510 29512 or similar cases that are vectorized into using the fmaddss
29511 29513 instruction. */
29512 29514 else if (mem_count > num_memory)
29513 29515 return false;
29514 29516
29515 29517 /* Don't allow more than one memory operation if not optimizing. */
29516 29518 else if (mem_count > 1 && !optimize)
29517 29519 return false;
29518 29520
29519 29521 else if (num == 4 && mem_count == 1)
29520 29522 {
29521 29523 /* formats (destination is the first argument), example fmaddss:
29522 29524 xmm1, xmm1, xmm2, xmm3/mem
29523 29525 xmm1, xmm1, xmm2/mem, xmm3
29524 29526 xmm1, xmm2, xmm3/mem, xmm1
29525 29527 xmm1, xmm2/mem, xmm3, xmm1 */
29526 29528 if (uses_oc0)
29527 29529 return ((mem_mask == (1 << 1))
29528 29530 || (mem_mask == (1 << 2))
29529 29531 || (mem_mask == (1 << 3)));
29530 29532
29531 29533 /* format, example pmacsdd:
29532 29534 xmm1, xmm2, xmm3/mem, xmm1 */
29533 29535 if (commutative)
29534 29536 return (mem_mask == (1 << 2) || mem_mask == (1 << 1));
29535 29537 else
29536 29538 return (mem_mask == (1 << 2));
29537 29539 }
29538 29540
29539 29541 else if (num == 4 && num_memory == 2)
29540 29542 {
29541 29543 /* If there are two memory operations, we can load one of the memory ops
29542 29544 into the destination register. This is for optimizing the
29543 29545 multiply/add ops, which the combiner has optimized both the multiply
29544 29546 and the add insns to have a memory operation. We have to be careful
29545 29547 that the destination doesn't overlap with the inputs. */
29546 29548 rtx op0 = operands[0];
29547 29549
29548 29550 if (reg_mentioned_p (op0, operands[1])
29549 29551 || reg_mentioned_p (op0, operands[2])
29550 29552 || reg_mentioned_p (op0, operands[3]))
29551 29553 return false;
29552 29554
29553 29555 /* formats (destination is the first argument), example fmaddss:
29554 29556 xmm1, xmm1, xmm2, xmm3/mem
29555 29557 xmm1, xmm1, xmm2/mem, xmm3
29556 29558 xmm1, xmm2, xmm3/mem, xmm1
29557 29559 xmm1, xmm2/mem, xmm3, xmm1
29558 29560
29559 29561 For the oc0 case, we will load either operands[1] or operands[3] into
29560 29562 operands[0], so any combination of 2 memory operands is ok. */
29561 29563 if (uses_oc0)
29562 29564 return true;
29563 29565
29564 29566 /* format, example pmacsdd:
29565 29567 xmm1, xmm2, xmm3/mem, xmm1
29566 29568
29567 29569 For the integer multiply/add instructions be more restrictive and
29568 29570 require operands[2] and operands[3] to be the memory operands. */
29569 29571 if (commutative)
29570 29572 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3)));
29571 29573 else
29572 29574 return (mem_mask == ((1 << 2) | (1 << 3)));
29573 29575 }
29574 29576
29575 29577 else if (num == 3 && num_memory == 1)
29576 29578 {
29577 29579 /* formats, example protb:
29578 29580 xmm1, xmm2, xmm3/mem
29579 29581 xmm1, xmm2/mem, xmm3 */
29580 29582 if (uses_oc0)
29581 29583 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2)));
29582 29584
29583 29585 /* format, example comeq:
29584 29586 xmm1, xmm2, xmm3/mem */
29585 29587 else
29586 29588 return (mem_mask == (1 << 2));
29587 29589 }
29588 29590
29589 29591 else
29590 29592 gcc_unreachable ();
29591 29593
29592 29594 return false;
29593 29595 }
29594 29596
29595 29597
29596 29598 /* Fixup an SSE5 instruction that has 2 memory input references into a form the
29597 29599 hardware will allow by using the destination register to load one of the
29598 29600 memory operations. Presently this is used by the multiply/add routines to
29599 29601 allow 2 memory references. */
29600 29602
29601 29603 void
29602 29604 ix86_expand_sse5_multiple_memory (rtx operands[],
29603 29605 int num,
29604 29606 enum machine_mode mode)
29605 29607 {
29606 29608 rtx op0 = operands[0];
29607 29609 if (num != 4
29608 29610 || memory_operand (op0, mode)
29609 29611 || reg_mentioned_p (op0, operands[1])
29610 29612 || reg_mentioned_p (op0, operands[2])
29611 29613 || reg_mentioned_p (op0, operands[3]))
29612 29614 gcc_unreachable ();
29613 29615
29614 29616 /* For 2 memory operands, pick either operands[1] or operands[3] to move into
29615 29617 the destination register. */
29616 29618 if (memory_operand (operands[1], mode))
29617 29619 {
29618 29620 emit_move_insn (op0, operands[1]);
29619 29621 operands[1] = op0;
29620 29622 }
29621 29623 else if (memory_operand (operands[3], mode))
29622 29624 {
29623 29625 emit_move_insn (op0, operands[3]);
29624 29626 operands[3] = op0;
29625 29627 }
29626 29628 else
29627 29629 gcc_unreachable ();
29628 29630
29629 29631 return;
29630 29632 }
29631 29633
29632 29634
29633 29635 /* Table of valid machine attributes. */
29634 29636 static const struct attribute_spec ix86_attribute_table[] =
29635 29637 {
29636 29638 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
29637 29639 /* Stdcall attribute says callee is responsible for popping arguments
29638 29640 if they are not variable. */
29639 29641 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29640 29642 /* Fastcall attribute says callee is responsible for popping arguments
29641 29643 if they are not variable. */
29642 29644 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29643 29645 /* Cdecl attribute says the callee is a normal C declaration */
29644 29646 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29645 29647 /* Regparm attribute specifies how many integer arguments are to be
29646 29648 passed in registers. */
29647 29649 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
29648 29650 /* Sseregparm attribute says we are using x86_64 calling conventions
29649 29651 for FP arguments. */
29650 29652 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
29651 29653 /* force_align_arg_pointer says this function realigns the stack at entry. */
29652 29654 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
29653 29655 false, true, true, ix86_handle_cconv_attribute },
29654 29656 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29655 29657 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
29656 29658 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
29657 29659 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
29658 29660 #endif
29659 29661 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29660 29662 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
29661 29663 #ifdef SUBTARGET_ATTRIBUTE_TABLE
29662 29664 SUBTARGET_ATTRIBUTE_TABLE,
29663 29665 #endif
29664 29666 /* ms_abi and sysv_abi calling convention function attributes. */
29665 29667 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29666 29668 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute },
29667 29669 /* End element. */
29668 29670 { NULL, 0, 0, false, false, false, NULL }
29669 29671 };
29670 29672
29671 29673 /* Implement targetm.vectorize.builtin_vectorization_cost. */
29672 29674 static int
29673 29675 x86_builtin_vectorization_cost (bool runtime_test)
29674 29676 {
29675 29677 /* If the branch of the runtime test is taken - i.e. - the vectorized
29676 29678 version is skipped - this incurs a misprediction cost (because the
29677 29679 vectorized version is expected to be the fall-through). So we subtract
29678 29680 the latency of a mispredicted branch from the costs that are incured
29679 29681 when the vectorized version is executed.
29680 29682
29681 29683 TODO: The values in individual target tables have to be tuned or new
29682 29684 fields may be needed. For eg. on K8, the default branch path is the
29683 29685 not-taken path. If the taken path is predicted correctly, the minimum
29684 29686 penalty of going down the taken-path is 1 cycle. If the taken-path is
29685 29687 not predicted correctly, then the minimum penalty is 10 cycles. */
29686 29688
29687 29689 if (runtime_test)
29688 29690 {
29689 29691 return (-(ix86_cost->cond_taken_branch_cost));
29690 29692 }
29691 29693 else
29692 29694 return 0;
29693 29695 }
29694 29696
29695 29697 /* This function returns the calling abi specific va_list type node.
29696 29698 It returns the FNDECL specific va_list type. */
29697 29699
29698 29700 tree
29699 29701 ix86_fn_abi_va_list (tree fndecl)
29700 29702 {
29701 29703 int abi;
29702 29704
29703 29705 if (!TARGET_64BIT)
29704 29706 return va_list_type_node;
29705 29707 gcc_assert (fndecl != NULL_TREE);
29706 29708 abi = ix86_function_abi ((const_tree) fndecl);
29707 29709
29708 29710 if (abi == MS_ABI)
29709 29711 return ms_va_list_type_node;
29710 29712 else
29711 29713 return sysv_va_list_type_node;
29712 29714 }
29713 29715
29714 29716 /* Returns the canonical va_list type specified by TYPE. If there
29715 29717 is no valid TYPE provided, it return NULL_TREE. */
29716 29718
29717 29719 tree
29718 29720 ix86_canonical_va_list_type (tree type)
29719 29721 {
29720 29722 tree wtype, htype;
29721 29723
29722 29724 /* Resolve references and pointers to va_list type. */
29723 29725 if (INDIRECT_REF_P (type))
29724 29726 type = TREE_TYPE (type);
29725 29727 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type)))
29726 29728 type = TREE_TYPE (type);
29727 29729
29728 29730 if (TARGET_64BIT)
29729 29731 {
29730 29732 wtype = va_list_type_node;
29731 29733 gcc_assert (wtype != NULL_TREE);
29732 29734 htype = type;
29733 29735 if (TREE_CODE (wtype) == ARRAY_TYPE)
29734 29736 {
29735 29737 /* If va_list is an array type, the argument may have decayed
29736 29738 to a pointer type, e.g. by being passed to another function.
29737 29739 In that case, unwrap both types so that we can compare the
29738 29740 underlying records. */
29739 29741 if (TREE_CODE (htype) == ARRAY_TYPE
29740 29742 || POINTER_TYPE_P (htype))
29741 29743 {
29742 29744 wtype = TREE_TYPE (wtype);
29743 29745 htype = TREE_TYPE (htype);
29744 29746 }
29745 29747 }
29746 29748 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29747 29749 return va_list_type_node;
29748 29750 wtype = sysv_va_list_type_node;
29749 29751 gcc_assert (wtype != NULL_TREE);
29750 29752 htype = type;
29751 29753 if (TREE_CODE (wtype) == ARRAY_TYPE)
29752 29754 {
29753 29755 /* If va_list is an array type, the argument may have decayed
29754 29756 to a pointer type, e.g. by being passed to another function.
29755 29757 In that case, unwrap both types so that we can compare the
29756 29758 underlying records. */
29757 29759 if (TREE_CODE (htype) == ARRAY_TYPE
29758 29760 || POINTER_TYPE_P (htype))
29759 29761 {
29760 29762 wtype = TREE_TYPE (wtype);
29761 29763 htype = TREE_TYPE (htype);
29762 29764 }
29763 29765 }
29764 29766 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29765 29767 return sysv_va_list_type_node;
29766 29768 wtype = ms_va_list_type_node;
29767 29769 gcc_assert (wtype != NULL_TREE);
29768 29770 htype = type;
29769 29771 if (TREE_CODE (wtype) == ARRAY_TYPE)
29770 29772 {
29771 29773 /* If va_list is an array type, the argument may have decayed
29772 29774 to a pointer type, e.g. by being passed to another function.
29773 29775 In that case, unwrap both types so that we can compare the
29774 29776 underlying records. */
29775 29777 if (TREE_CODE (htype) == ARRAY_TYPE
29776 29778 || POINTER_TYPE_P (htype))
29777 29779 {
29778 29780 wtype = TREE_TYPE (wtype);
29779 29781 htype = TREE_TYPE (htype);
29780 29782 }
29781 29783 }
29782 29784 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype))
29783 29785 return ms_va_list_type_node;
29784 29786 return NULL_TREE;
29785 29787 }
29786 29788 return std_canonical_va_list_type (type);
29787 29789 }
29788 29790
29789 29791 /* Iterate through the target-specific builtin types for va_list.
29790 29792 IDX denotes the iterator, *PTREE is set to the result type of
29791 29793 the va_list builtin, and *PNAME to its internal type.
29792 29794 Returns zero if there is no element for this index, otherwise
29793 29795 IDX should be increased upon the next call.
29794 29796 Note, do not iterate a base builtin's name like __builtin_va_list.
29795 29797 Used from c_common_nodes_and_builtins. */
29796 29798
29797 29799 int
29798 29800 ix86_enum_va_list (int idx, const char **pname, tree *ptree)
29799 29801 {
29800 29802 if (!TARGET_64BIT)
29801 29803 return 0;
29802 29804 switch (idx) {
29803 29805 case 0:
29804 29806 *ptree = ms_va_list_type_node;
29805 29807 *pname = "__builtin_ms_va_list";
29806 29808 break;
29807 29809 case 1:
29808 29810 *ptree = sysv_va_list_type_node;
29809 29811 *pname = "__builtin_sysv_va_list";
29810 29812 break;
29811 29813 default:
29812 29814 return 0;
29813 29815 }
29814 29816 return 1;
29815 29817 }
29816 29818
29817 29819 /* Initialize the GCC target structure. */
29818 29820 #undef TARGET_RETURN_IN_MEMORY
29819 29821 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory
29820 29822
29821 29823 #undef TARGET_ATTRIBUTE_TABLE
29822 29824 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
29823 29825 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29824 29826 # undef TARGET_MERGE_DECL_ATTRIBUTES
29825 29827 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
29826 29828 #endif
29827 29829
29828 29830 #undef TARGET_COMP_TYPE_ATTRIBUTES
29829 29831 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
29830 29832
29831 29833 #undef TARGET_INIT_BUILTINS
29832 29834 #define TARGET_INIT_BUILTINS ix86_init_builtins
29833 29835 #undef TARGET_EXPAND_BUILTIN
29834 29836 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
29835 29837
29836 29838 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
29837 29839 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
29838 29840 ix86_builtin_vectorized_function
29839 29841
29840 29842 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
29841 29843 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
29842 29844
29843 29845 #undef TARGET_BUILTIN_RECIPROCAL
29844 29846 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
29845 29847
29846 29848 #undef TARGET_ASM_FUNCTION_EPILOGUE
29847 29849 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
29848 29850
29849 29851 #undef TARGET_ENCODE_SECTION_INFO
29850 29852 #ifndef SUBTARGET_ENCODE_SECTION_INFO
29851 29853 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
29852 29854 #else
29853 29855 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
29854 29856 #endif
29855 29857
29856 29858 #undef TARGET_ASM_OPEN_PAREN
29857 29859 #define TARGET_ASM_OPEN_PAREN ""
29858 29860 #undef TARGET_ASM_CLOSE_PAREN
29859 29861 #define TARGET_ASM_CLOSE_PAREN ""
29860 29862
29861 29863 #undef TARGET_ASM_ALIGNED_HI_OP
29862 29864 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
29863 29865 #undef TARGET_ASM_ALIGNED_SI_OP
29864 29866 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
29865 29867 #ifdef ASM_QUAD
29866 29868 #undef TARGET_ASM_ALIGNED_DI_OP
29867 29869 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
29868 29870 #endif
29869 29871
29870 29872 #undef TARGET_ASM_UNALIGNED_HI_OP
29871 29873 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
29872 29874 #undef TARGET_ASM_UNALIGNED_SI_OP
29873 29875 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
29874 29876 #undef TARGET_ASM_UNALIGNED_DI_OP
29875 29877 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
29876 29878
29877 29879 #undef TARGET_SCHED_ADJUST_COST
29878 29880 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
29879 29881 #undef TARGET_SCHED_ISSUE_RATE
29880 29882 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
29881 29883 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
29882 29884 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
29883 29885 ia32_multipass_dfa_lookahead
29884 29886
29885 29887 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
29886 29888 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
29887 29889
29888 29890 #ifdef HAVE_AS_TLS
29889 29891 #undef TARGET_HAVE_TLS
29890 29892 #define TARGET_HAVE_TLS true
29891 29893 #endif
29892 29894 #undef TARGET_CANNOT_FORCE_CONST_MEM
29893 29895 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
29894 29896 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
29895 29897 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
29896 29898
29897 29899 #undef TARGET_DELEGITIMIZE_ADDRESS
29898 29900 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
29899 29901
29900 29902 #undef TARGET_MS_BITFIELD_LAYOUT_P
29901 29903 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
29902 29904
29903 29905 #if TARGET_MACHO
29904 29906 #undef TARGET_BINDS_LOCAL_P
29905 29907 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
29906 29908 #endif
29907 29909 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
29908 29910 #undef TARGET_BINDS_LOCAL_P
29909 29911 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
29910 29912 #endif
29911 29913
29912 29914 #undef TARGET_ASM_OUTPUT_MI_THUNK
29913 29915 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
29914 29916 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
29915 29917 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
29916 29918
29917 29919 #undef TARGET_ASM_FILE_START
29918 29920 #define TARGET_ASM_FILE_START x86_file_start
29919 29921
29920 29922 #undef TARGET_DEFAULT_TARGET_FLAGS
29921 29923 #define TARGET_DEFAULT_TARGET_FLAGS \
29922 29924 (TARGET_DEFAULT \
29923 29925 | TARGET_SUBTARGET_DEFAULT \
29924 29926 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
29925 29927
29926 29928 #undef TARGET_HANDLE_OPTION
29927 29929 #define TARGET_HANDLE_OPTION ix86_handle_option
29928 29930
29929 29931 #undef TARGET_RTX_COSTS
29930 29932 #define TARGET_RTX_COSTS ix86_rtx_costs
29931 29933 #undef TARGET_ADDRESS_COST
29932 29934 #define TARGET_ADDRESS_COST ix86_address_cost
29933 29935
29934 29936 #undef TARGET_FIXED_CONDITION_CODE_REGS
29935 29937 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
29936 29938 #undef TARGET_CC_MODES_COMPATIBLE
29937 29939 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
29938 29940
29939 29941 #undef TARGET_MACHINE_DEPENDENT_REORG
29940 29942 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
29941 29943
29942 29944 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
29943 29945 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value
29944 29946
29945 29947 #undef TARGET_BUILD_BUILTIN_VA_LIST
29946 29948 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
29947 29949
29948 29950 #undef TARGET_FN_ABI_VA_LIST
29949 29951 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list
29950 29952
29951 29953 #undef TARGET_CANONICAL_VA_LIST_TYPE
29952 29954 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type
29953 29955
29954 29956 #undef TARGET_EXPAND_BUILTIN_VA_START
29955 29957 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start
29956 29958
29957 29959 #undef TARGET_MD_ASM_CLOBBERS
29958 29960 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
29959 29961
29960 29962 #undef TARGET_PROMOTE_PROTOTYPES
29961 29963 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
29962 29964 #undef TARGET_STRUCT_VALUE_RTX
29963 29965 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
29964 29966 #undef TARGET_SETUP_INCOMING_VARARGS
29965 29967 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
29966 29968 #undef TARGET_MUST_PASS_IN_STACK
29967 29969 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
29968 29970 #undef TARGET_PASS_BY_REFERENCE
29969 29971 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
29970 29972 #undef TARGET_INTERNAL_ARG_POINTER
29971 29973 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
29972 29974 #undef TARGET_UPDATE_STACK_BOUNDARY
29973 29975 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary
29974 29976 #undef TARGET_GET_DRAP_RTX
29975 29977 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx
29976 29978 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
29977 29979 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
29978 29980 #undef TARGET_STRICT_ARGUMENT_NAMING
29979 29981 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
29980 29982
29981 29983 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
29982 29984 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
29983 29985
29984 29986 #undef TARGET_SCALAR_MODE_SUPPORTED_P
29985 29987 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
29986 29988
29987 29989 #undef TARGET_VECTOR_MODE_SUPPORTED_P
29988 29990 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
29989 29991
29990 29992 #undef TARGET_C_MODE_FOR_SUFFIX
29991 29993 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
29992 29994
29993 29995 #ifdef HAVE_AS_TLS
29994 29996 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
29995 29997 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
29996 29998 #endif
29997 29999
29998 30000 #ifdef SUBTARGET_INSERT_ATTRIBUTES
29999 30001 #undef TARGET_INSERT_ATTRIBUTES
30000 30002 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
30001 30003 #endif
30002 30004
30003 30005 #undef TARGET_MANGLE_TYPE
30004 30006 #define TARGET_MANGLE_TYPE ix86_mangle_type
30005 30007
30006 30008 #undef TARGET_STACK_PROTECT_FAIL
30007 30009 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
30008 30010
30009 30011 #undef TARGET_FUNCTION_VALUE
30010 30012 #define TARGET_FUNCTION_VALUE ix86_function_value
30011 30013
30012 30014 #undef TARGET_SECONDARY_RELOAD
30013 30015 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload
30014 30016
30015 30017 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
30016 30018 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost
30017 30019
30018 30020 #undef TARGET_SET_CURRENT_FUNCTION
30019 30021 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function
30020 30022
30021 30023 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
30022 30024 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p
30023 30025
30024 30026 #undef TARGET_OPTION_SAVE
30025 30027 #define TARGET_OPTION_SAVE ix86_function_specific_save
30026 30028
30027 30029 #undef TARGET_OPTION_RESTORE
30028 30030 #define TARGET_OPTION_RESTORE ix86_function_specific_restore
30029 30031
30030 30032 #undef TARGET_OPTION_PRINT
30031 30033 #define TARGET_OPTION_PRINT ix86_function_specific_print
30032 30034
30033 30035 #undef TARGET_OPTION_CAN_INLINE_P
30034 30036 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p
30035 30037
30036 30038 #undef TARGET_EXPAND_TO_RTL_HOOK
30037 30039 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi
30038 30040
30039 30041 struct gcc_target targetm = TARGET_INITIALIZER;
30040 30042
30041 30043 #include "gt-i386.h"
↓ open down ↓ |
25541 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX