1 /* Subroutines used for code generation on IA-32. 2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 3 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 4 Free Software Foundation, Inc. 5 6 This file is part of GCC. 7 8 GCC is free software; you can redistribute it and/or modify 9 it under the terms of the GNU General Public License as published by 10 the Free Software Foundation; either version 3, or (at your option) 11 any later version. 12 13 GCC is distributed in the hope that it will be useful, 14 but WITHOUT ANY WARRANTY; without even the implied warranty of 15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 16 GNU General Public License for more details. 17 18 You should have received a copy of the GNU General Public License 19 along with GCC; see the file COPYING3. If not see 20 <http://www.gnu.org/licenses/>. */ 21 22 #include "config.h" 23 #include "system.h" 24 #include "coretypes.h" 25 #include "tm.h" 26 #include "rtl.h" 27 #include "tree.h" 28 #include "tm_p.h" 29 #include "regs.h" 30 #include "hard-reg-set.h" 31 #include "real.h" 32 #include "insn-config.h" 33 #include "conditions.h" 34 #include "output.h" 35 #include "insn-codes.h" 36 #include "insn-attr.h" 37 #include "flags.h" 38 #include "c-common.h" 39 #include "except.h" 40 #include "function.h" 41 #include "recog.h" 42 #include "expr.h" 43 #include "optabs.h" 44 #include "toplev.h" 45 #include "basic-block.h" 46 #include "ggc.h" 47 #include "target.h" 48 #include "target-def.h" 49 #include "langhooks.h" 50 #include "cgraph.h" 51 #include "gimple.h" 52 #include "dwarf2.h" 53 #include "df.h" 54 #include "tm-constrs.h" 55 #include "params.h" 56 #include "cselib.h" 57 58 static int x86_builtin_vectorization_cost (bool); 59 static rtx legitimize_dllimport_symbol (rtx, bool); 60 61 #ifndef CHECK_STACK_LIMIT 62 #define CHECK_STACK_LIMIT (-1) 63 #endif 64 65 /* Return index of given mode in mult and division cost tables. */ 66 #define MODE_INDEX(mode) \ 67 ((mode) == QImode ? 0 \ 68 : (mode) == HImode ? 1 \ 69 : (mode) == SImode ? 2 \ 70 : (mode) == DImode ? 3 \ 71 : 4) 72 73 /* Processor costs (relative to an add) */ 74 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */ 75 #define COSTS_N_BYTES(N) ((N) * 2) 76 77 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}} 78 79 const 80 struct processor_costs ix86_size_cost = {/* costs for tuning for size */ 81 COSTS_N_BYTES (2), /* cost of an add instruction */ 82 COSTS_N_BYTES (3), /* cost of a lea instruction */ 83 COSTS_N_BYTES (2), /* variable shift costs */ 84 COSTS_N_BYTES (3), /* constant shift costs */ 85 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */ 86 COSTS_N_BYTES (3), /* HI */ 87 COSTS_N_BYTES (3), /* SI */ 88 COSTS_N_BYTES (3), /* DI */ 89 COSTS_N_BYTES (5)}, /* other */ 90 0, /* cost of multiply per each bit set */ 91 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */ 92 COSTS_N_BYTES (3), /* HI */ 93 COSTS_N_BYTES (3), /* SI */ 94 COSTS_N_BYTES (3), /* DI */ 95 COSTS_N_BYTES (5)}, /* other */ 96 COSTS_N_BYTES (3), /* cost of movsx */ 97 COSTS_N_BYTES (3), /* cost of movzx */ 98 0, /* "large" insn */ 99 2, /* MOVE_RATIO */ 100 2, /* cost for loading QImode using movzbl */ 101 {2, 2, 2}, /* cost of loading integer registers 102 in QImode, HImode and SImode. 103 Relative to reg-reg move (2). */ 104 {2, 2, 2}, /* cost of storing integer registers */ 105 2, /* cost of reg,reg fld/fst */ 106 {2, 2, 2}, /* cost of loading fp registers 107 in SFmode, DFmode and XFmode */ 108 {2, 2, 2}, /* cost of storing fp registers 109 in SFmode, DFmode and XFmode */ 110 3, /* cost of moving MMX register */ 111 {3, 3}, /* cost of loading MMX registers 112 in SImode and DImode */ 113 {3, 3}, /* cost of storing MMX registers 114 in SImode and DImode */ 115 3, /* cost of moving SSE register */ 116 {3, 3, 3}, /* cost of loading SSE registers 117 in SImode, DImode and TImode */ 118 {3, 3, 3}, /* cost of storing SSE registers 119 in SImode, DImode and TImode */ 120 3, /* MMX or SSE register to integer */ 121 0, /* size of l1 cache */ 122 0, /* size of l2 cache */ 123 0, /* size of prefetch block */ 124 0, /* number of parallel prefetches */ 125 2, /* Branch cost */ 126 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */ 127 COSTS_N_BYTES (2), /* cost of FMUL instruction. */ 128 COSTS_N_BYTES (2), /* cost of FDIV instruction. */ 129 COSTS_N_BYTES (2), /* cost of FABS instruction. */ 130 COSTS_N_BYTES (2), /* cost of FCHS instruction. */ 131 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */ 132 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}, 133 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}, 134 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}, 135 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}, 136 1, /* scalar_stmt_cost. */ 137 1, /* scalar load_cost. */ 138 1, /* scalar_store_cost. */ 139 1, /* vec_stmt_cost. */ 140 1, /* vec_to_scalar_cost. */ 141 1, /* scalar_to_vec_cost. */ 142 1, /* vec_align_load_cost. */ 143 1, /* vec_unalign_load_cost. */ 144 1, /* vec_store_cost. */ 145 1, /* cond_taken_branch_cost. */ 146 1, /* cond_not_taken_branch_cost. */ 147 }; 148 149 /* Processor costs (relative to an add) */ 150 static const 151 struct processor_costs i386_cost = { /* 386 specific costs */ 152 COSTS_N_INSNS (1), /* cost of an add instruction */ 153 COSTS_N_INSNS (1), /* cost of a lea instruction */ 154 COSTS_N_INSNS (3), /* variable shift costs */ 155 COSTS_N_INSNS (2), /* constant shift costs */ 156 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */ 157 COSTS_N_INSNS (6), /* HI */ 158 COSTS_N_INSNS (6), /* SI */ 159 COSTS_N_INSNS (6), /* DI */ 160 COSTS_N_INSNS (6)}, /* other */ 161 COSTS_N_INSNS (1), /* cost of multiply per each bit set */ 162 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */ 163 COSTS_N_INSNS (23), /* HI */ 164 COSTS_N_INSNS (23), /* SI */ 165 COSTS_N_INSNS (23), /* DI */ 166 COSTS_N_INSNS (23)}, /* other */ 167 COSTS_N_INSNS (3), /* cost of movsx */ 168 COSTS_N_INSNS (2), /* cost of movzx */ 169 15, /* "large" insn */ 170 3, /* MOVE_RATIO */ 171 4, /* cost for loading QImode using movzbl */ 172 {2, 4, 2}, /* cost of loading integer registers 173 in QImode, HImode and SImode. 174 Relative to reg-reg move (2). */ 175 {2, 4, 2}, /* cost of storing integer registers */ 176 2, /* cost of reg,reg fld/fst */ 177 {8, 8, 8}, /* cost of loading fp registers 178 in SFmode, DFmode and XFmode */ 179 {8, 8, 8}, /* cost of storing fp registers 180 in SFmode, DFmode and XFmode */ 181 2, /* cost of moving MMX register */ 182 {4, 8}, /* cost of loading MMX registers 183 in SImode and DImode */ 184 {4, 8}, /* cost of storing MMX registers 185 in SImode and DImode */ 186 2, /* cost of moving SSE register */ 187 {4, 8, 16}, /* cost of loading SSE registers 188 in SImode, DImode and TImode */ 189 {4, 8, 16}, /* cost of storing SSE registers 190 in SImode, DImode and TImode */ 191 3, /* MMX or SSE register to integer */ 192 0, /* size of l1 cache */ 193 0, /* size of l2 cache */ 194 0, /* size of prefetch block */ 195 0, /* number of parallel prefetches */ 196 1, /* Branch cost */ 197 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */ 198 COSTS_N_INSNS (27), /* cost of FMUL instruction. */ 199 COSTS_N_INSNS (88), /* cost of FDIV instruction. */ 200 COSTS_N_INSNS (22), /* cost of FABS instruction. */ 201 COSTS_N_INSNS (24), /* cost of FCHS instruction. */ 202 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */ 203 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}, 204 DUMMY_STRINGOP_ALGS}, 205 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}, 206 DUMMY_STRINGOP_ALGS}, 207 1, /* scalar_stmt_cost. */ 208 1, /* scalar load_cost. */ 209 1, /* scalar_store_cost. */ 210 1, /* vec_stmt_cost. */ 211 1, /* vec_to_scalar_cost. */ 212 1, /* scalar_to_vec_cost. */ 213 1, /* vec_align_load_cost. */ 214 2, /* vec_unalign_load_cost. */ 215 1, /* vec_store_cost. */ 216 3, /* cond_taken_branch_cost. */ 217 1, /* cond_not_taken_branch_cost. */ 218 }; 219 220 static const 221 struct processor_costs i486_cost = { /* 486 specific costs */ 222 COSTS_N_INSNS (1), /* cost of an add instruction */ 223 COSTS_N_INSNS (1), /* cost of a lea instruction */ 224 COSTS_N_INSNS (3), /* variable shift costs */ 225 COSTS_N_INSNS (2), /* constant shift costs */ 226 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */ 227 COSTS_N_INSNS (12), /* HI */ 228 COSTS_N_INSNS (12), /* SI */ 229 COSTS_N_INSNS (12), /* DI */ 230 COSTS_N_INSNS (12)}, /* other */ 231 1, /* cost of multiply per each bit set */ 232 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */ 233 COSTS_N_INSNS (40), /* HI */ 234 COSTS_N_INSNS (40), /* SI */ 235 COSTS_N_INSNS (40), /* DI */ 236 COSTS_N_INSNS (40)}, /* other */ 237 COSTS_N_INSNS (3), /* cost of movsx */ 238 COSTS_N_INSNS (2), /* cost of movzx */ 239 15, /* "large" insn */ 240 3, /* MOVE_RATIO */ 241 4, /* cost for loading QImode using movzbl */ 242 {2, 4, 2}, /* cost of loading integer registers 243 in QImode, HImode and SImode. 244 Relative to reg-reg move (2). */ 245 {2, 4, 2}, /* cost of storing integer registers */ 246 2, /* cost of reg,reg fld/fst */ 247 {8, 8, 8}, /* cost of loading fp registers 248 in SFmode, DFmode and XFmode */ 249 {8, 8, 8}, /* cost of storing fp registers 250 in SFmode, DFmode and XFmode */ 251 2, /* cost of moving MMX register */ 252 {4, 8}, /* cost of loading MMX registers 253 in SImode and DImode */ 254 {4, 8}, /* cost of storing MMX registers 255 in SImode and DImode */ 256 2, /* cost of moving SSE register */ 257 {4, 8, 16}, /* cost of loading SSE registers 258 in SImode, DImode and TImode */ 259 {4, 8, 16}, /* cost of storing SSE registers 260 in SImode, DImode and TImode */ 261 3, /* MMX or SSE register to integer */ 262 4, /* size of l1 cache. 486 has 8kB cache 263 shared for code and data, so 4kB is 264 not really precise. */ 265 4, /* size of l2 cache */ 266 0, /* size of prefetch block */ 267 0, /* number of parallel prefetches */ 268 1, /* Branch cost */ 269 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 270 COSTS_N_INSNS (16), /* cost of FMUL instruction. */ 271 COSTS_N_INSNS (73), /* cost of FDIV instruction. */ 272 COSTS_N_INSNS (3), /* cost of FABS instruction. */ 273 COSTS_N_INSNS (3), /* cost of FCHS instruction. */ 274 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */ 275 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}}, 276 DUMMY_STRINGOP_ALGS}, 277 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}}, 278 DUMMY_STRINGOP_ALGS}, 279 1, /* scalar_stmt_cost. */ 280 1, /* scalar load_cost. */ 281 1, /* scalar_store_cost. */ 282 1, /* vec_stmt_cost. */ 283 1, /* vec_to_scalar_cost. */ 284 1, /* scalar_to_vec_cost. */ 285 1, /* vec_align_load_cost. */ 286 2, /* vec_unalign_load_cost. */ 287 1, /* vec_store_cost. */ 288 3, /* cond_taken_branch_cost. */ 289 1, /* cond_not_taken_branch_cost. */ 290 }; 291 292 static const 293 struct processor_costs pentium_cost = { 294 COSTS_N_INSNS (1), /* cost of an add instruction */ 295 COSTS_N_INSNS (1), /* cost of a lea instruction */ 296 COSTS_N_INSNS (4), /* variable shift costs */ 297 COSTS_N_INSNS (1), /* constant shift costs */ 298 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */ 299 COSTS_N_INSNS (11), /* HI */ 300 COSTS_N_INSNS (11), /* SI */ 301 COSTS_N_INSNS (11), /* DI */ 302 COSTS_N_INSNS (11)}, /* other */ 303 0, /* cost of multiply per each bit set */ 304 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */ 305 COSTS_N_INSNS (25), /* HI */ 306 COSTS_N_INSNS (25), /* SI */ 307 COSTS_N_INSNS (25), /* DI */ 308 COSTS_N_INSNS (25)}, /* other */ 309 COSTS_N_INSNS (3), /* cost of movsx */ 310 COSTS_N_INSNS (2), /* cost of movzx */ 311 8, /* "large" insn */ 312 6, /* MOVE_RATIO */ 313 6, /* cost for loading QImode using movzbl */ 314 {2, 4, 2}, /* cost of loading integer registers 315 in QImode, HImode and SImode. 316 Relative to reg-reg move (2). */ 317 {2, 4, 2}, /* cost of storing integer registers */ 318 2, /* cost of reg,reg fld/fst */ 319 {2, 2, 6}, /* cost of loading fp registers 320 in SFmode, DFmode and XFmode */ 321 {4, 4, 6}, /* cost of storing fp registers 322 in SFmode, DFmode and XFmode */ 323 8, /* cost of moving MMX register */ 324 {8, 8}, /* cost of loading MMX registers 325 in SImode and DImode */ 326 {8, 8}, /* cost of storing MMX registers 327 in SImode and DImode */ 328 2, /* cost of moving SSE register */ 329 {4, 8, 16}, /* cost of loading SSE registers 330 in SImode, DImode and TImode */ 331 {4, 8, 16}, /* cost of storing SSE registers 332 in SImode, DImode and TImode */ 333 3, /* MMX or SSE register to integer */ 334 8, /* size of l1 cache. */ 335 8, /* size of l2 cache */ 336 0, /* size of prefetch block */ 337 0, /* number of parallel prefetches */ 338 2, /* Branch cost */ 339 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ 340 COSTS_N_INSNS (3), /* cost of FMUL instruction. */ 341 COSTS_N_INSNS (39), /* cost of FDIV instruction. */ 342 COSTS_N_INSNS (1), /* cost of FABS instruction. */ 343 COSTS_N_INSNS (1), /* cost of FCHS instruction. */ 344 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */ 345 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, 346 DUMMY_STRINGOP_ALGS}, 347 {{libcall, {{-1, rep_prefix_4_byte}}}, 348 DUMMY_STRINGOP_ALGS}, 349 1, /* scalar_stmt_cost. */ 350 1, /* scalar load_cost. */ 351 1, /* scalar_store_cost. */ 352 1, /* vec_stmt_cost. */ 353 1, /* vec_to_scalar_cost. */ 354 1, /* scalar_to_vec_cost. */ 355 1, /* vec_align_load_cost. */ 356 2, /* vec_unalign_load_cost. */ 357 1, /* vec_store_cost. */ 358 3, /* cond_taken_branch_cost. */ 359 1, /* cond_not_taken_branch_cost. */ 360 }; 361 362 static const 363 struct processor_costs pentiumpro_cost = { 364 COSTS_N_INSNS (1), /* cost of an add instruction */ 365 COSTS_N_INSNS (1), /* cost of a lea instruction */ 366 COSTS_N_INSNS (1), /* variable shift costs */ 367 COSTS_N_INSNS (1), /* constant shift costs */ 368 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */ 369 COSTS_N_INSNS (4), /* HI */ 370 COSTS_N_INSNS (4), /* SI */ 371 COSTS_N_INSNS (4), /* DI */ 372 COSTS_N_INSNS (4)}, /* other */ 373 0, /* cost of multiply per each bit set */ 374 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */ 375 COSTS_N_INSNS (17), /* HI */ 376 COSTS_N_INSNS (17), /* SI */ 377 COSTS_N_INSNS (17), /* DI */ 378 COSTS_N_INSNS (17)}, /* other */ 379 COSTS_N_INSNS (1), /* cost of movsx */ 380 COSTS_N_INSNS (1), /* cost of movzx */ 381 8, /* "large" insn */ 382 6, /* MOVE_RATIO */ 383 2, /* cost for loading QImode using movzbl */ 384 {4, 4, 4}, /* cost of loading integer registers 385 in QImode, HImode and SImode. 386 Relative to reg-reg move (2). */ 387 {2, 2, 2}, /* cost of storing integer registers */ 388 2, /* cost of reg,reg fld/fst */ 389 {2, 2, 6}, /* cost of loading fp registers 390 in SFmode, DFmode and XFmode */ 391 {4, 4, 6}, /* cost of storing fp registers 392 in SFmode, DFmode and XFmode */ 393 2, /* cost of moving MMX register */ 394 {2, 2}, /* cost of loading MMX registers 395 in SImode and DImode */ 396 {2, 2}, /* cost of storing MMX registers 397 in SImode and DImode */ 398 2, /* cost of moving SSE register */ 399 {2, 2, 8}, /* cost of loading SSE registers 400 in SImode, DImode and TImode */ 401 {2, 2, 8}, /* cost of storing SSE registers 402 in SImode, DImode and TImode */ 403 3, /* MMX or SSE register to integer */ 404 8, /* size of l1 cache. */ 405 256, /* size of l2 cache */ 406 32, /* size of prefetch block */ 407 6, /* number of parallel prefetches */ 408 2, /* Branch cost */ 409 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ 410 COSTS_N_INSNS (5), /* cost of FMUL instruction. */ 411 COSTS_N_INSNS (56), /* cost of FDIV instruction. */ 412 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 413 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 414 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ 415 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure 416 the alignment). For small blocks inline loop is still a noticeable win, for bigger 417 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently 418 more expensive startup time in CPU, but after 4K the difference is down in the noise. 419 */ 420 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop}, 421 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}}, 422 DUMMY_STRINGOP_ALGS}, 423 {{rep_prefix_4_byte, {{1024, unrolled_loop}, 424 {8192, rep_prefix_4_byte}, {-1, libcall}}}, 425 DUMMY_STRINGOP_ALGS}, 426 1, /* scalar_stmt_cost. */ 427 1, /* scalar load_cost. */ 428 1, /* scalar_store_cost. */ 429 1, /* vec_stmt_cost. */ 430 1, /* vec_to_scalar_cost. */ 431 1, /* scalar_to_vec_cost. */ 432 1, /* vec_align_load_cost. */ 433 2, /* vec_unalign_load_cost. */ 434 1, /* vec_store_cost. */ 435 3, /* cond_taken_branch_cost. */ 436 1, /* cond_not_taken_branch_cost. */ 437 }; 438 439 static const 440 struct processor_costs geode_cost = { 441 COSTS_N_INSNS (1), /* cost of an add instruction */ 442 COSTS_N_INSNS (1), /* cost of a lea instruction */ 443 COSTS_N_INSNS (2), /* variable shift costs */ 444 COSTS_N_INSNS (1), /* constant shift costs */ 445 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 446 COSTS_N_INSNS (4), /* HI */ 447 COSTS_N_INSNS (7), /* SI */ 448 COSTS_N_INSNS (7), /* DI */ 449 COSTS_N_INSNS (7)}, /* other */ 450 0, /* cost of multiply per each bit set */ 451 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */ 452 COSTS_N_INSNS (23), /* HI */ 453 COSTS_N_INSNS (39), /* SI */ 454 COSTS_N_INSNS (39), /* DI */ 455 COSTS_N_INSNS (39)}, /* other */ 456 COSTS_N_INSNS (1), /* cost of movsx */ 457 COSTS_N_INSNS (1), /* cost of movzx */ 458 8, /* "large" insn */ 459 4, /* MOVE_RATIO */ 460 1, /* cost for loading QImode using movzbl */ 461 {1, 1, 1}, /* cost of loading integer registers 462 in QImode, HImode and SImode. 463 Relative to reg-reg move (2). */ 464 {1, 1, 1}, /* cost of storing integer registers */ 465 1, /* cost of reg,reg fld/fst */ 466 {1, 1, 1}, /* cost of loading fp registers 467 in SFmode, DFmode and XFmode */ 468 {4, 6, 6}, /* cost of storing fp registers 469 in SFmode, DFmode and XFmode */ 470 471 1, /* cost of moving MMX register */ 472 {1, 1}, /* cost of loading MMX registers 473 in SImode and DImode */ 474 {1, 1}, /* cost of storing MMX registers 475 in SImode and DImode */ 476 1, /* cost of moving SSE register */ 477 {1, 1, 1}, /* cost of loading SSE registers 478 in SImode, DImode and TImode */ 479 {1, 1, 1}, /* cost of storing SSE registers 480 in SImode, DImode and TImode */ 481 1, /* MMX or SSE register to integer */ 482 64, /* size of l1 cache. */ 483 128, /* size of l2 cache. */ 484 32, /* size of prefetch block */ 485 1, /* number of parallel prefetches */ 486 1, /* Branch cost */ 487 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ 488 COSTS_N_INSNS (11), /* cost of FMUL instruction. */ 489 COSTS_N_INSNS (47), /* cost of FDIV instruction. */ 490 COSTS_N_INSNS (1), /* cost of FABS instruction. */ 491 COSTS_N_INSNS (1), /* cost of FCHS instruction. */ 492 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */ 493 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, 494 DUMMY_STRINGOP_ALGS}, 495 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, 496 DUMMY_STRINGOP_ALGS}, 497 1, /* scalar_stmt_cost. */ 498 1, /* scalar load_cost. */ 499 1, /* scalar_store_cost. */ 500 1, /* vec_stmt_cost. */ 501 1, /* vec_to_scalar_cost. */ 502 1, /* scalar_to_vec_cost. */ 503 1, /* vec_align_load_cost. */ 504 2, /* vec_unalign_load_cost. */ 505 1, /* vec_store_cost. */ 506 3, /* cond_taken_branch_cost. */ 507 1, /* cond_not_taken_branch_cost. */ 508 }; 509 510 static const 511 struct processor_costs k6_cost = { 512 COSTS_N_INSNS (1), /* cost of an add instruction */ 513 COSTS_N_INSNS (2), /* cost of a lea instruction */ 514 COSTS_N_INSNS (1), /* variable shift costs */ 515 COSTS_N_INSNS (1), /* constant shift costs */ 516 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 517 COSTS_N_INSNS (3), /* HI */ 518 COSTS_N_INSNS (3), /* SI */ 519 COSTS_N_INSNS (3), /* DI */ 520 COSTS_N_INSNS (3)}, /* other */ 521 0, /* cost of multiply per each bit set */ 522 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 523 COSTS_N_INSNS (18), /* HI */ 524 COSTS_N_INSNS (18), /* SI */ 525 COSTS_N_INSNS (18), /* DI */ 526 COSTS_N_INSNS (18)}, /* other */ 527 COSTS_N_INSNS (2), /* cost of movsx */ 528 COSTS_N_INSNS (2), /* cost of movzx */ 529 8, /* "large" insn */ 530 4, /* MOVE_RATIO */ 531 3, /* cost for loading QImode using movzbl */ 532 {4, 5, 4}, /* cost of loading integer registers 533 in QImode, HImode and SImode. 534 Relative to reg-reg move (2). */ 535 {2, 3, 2}, /* cost of storing integer registers */ 536 4, /* cost of reg,reg fld/fst */ 537 {6, 6, 6}, /* cost of loading fp registers 538 in SFmode, DFmode and XFmode */ 539 {4, 4, 4}, /* cost of storing fp registers 540 in SFmode, DFmode and XFmode */ 541 2, /* cost of moving MMX register */ 542 {2, 2}, /* cost of loading MMX registers 543 in SImode and DImode */ 544 {2, 2}, /* cost of storing MMX registers 545 in SImode and DImode */ 546 2, /* cost of moving SSE register */ 547 {2, 2, 8}, /* cost of loading SSE registers 548 in SImode, DImode and TImode */ 549 {2, 2, 8}, /* cost of storing SSE registers 550 in SImode, DImode and TImode */ 551 6, /* MMX or SSE register to integer */ 552 32, /* size of l1 cache. */ 553 32, /* size of l2 cache. Some models 554 have integrated l2 cache, but 555 optimizing for k6 is not important 556 enough to worry about that. */ 557 32, /* size of prefetch block */ 558 1, /* number of parallel prefetches */ 559 1, /* Branch cost */ 560 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */ 561 COSTS_N_INSNS (2), /* cost of FMUL instruction. */ 562 COSTS_N_INSNS (56), /* cost of FDIV instruction. */ 563 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 564 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 565 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */ 566 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, 567 DUMMY_STRINGOP_ALGS}, 568 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}}, 569 DUMMY_STRINGOP_ALGS}, 570 1, /* scalar_stmt_cost. */ 571 1, /* scalar load_cost. */ 572 1, /* scalar_store_cost. */ 573 1, /* vec_stmt_cost. */ 574 1, /* vec_to_scalar_cost. */ 575 1, /* scalar_to_vec_cost. */ 576 1, /* vec_align_load_cost. */ 577 2, /* vec_unalign_load_cost. */ 578 1, /* vec_store_cost. */ 579 3, /* cond_taken_branch_cost. */ 580 1, /* cond_not_taken_branch_cost. */ 581 }; 582 583 static const 584 struct processor_costs athlon_cost = { 585 COSTS_N_INSNS (1), /* cost of an add instruction */ 586 COSTS_N_INSNS (2), /* cost of a lea instruction */ 587 COSTS_N_INSNS (1), /* variable shift costs */ 588 COSTS_N_INSNS (1), /* constant shift costs */ 589 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */ 590 COSTS_N_INSNS (5), /* HI */ 591 COSTS_N_INSNS (5), /* SI */ 592 COSTS_N_INSNS (5), /* DI */ 593 COSTS_N_INSNS (5)}, /* other */ 594 0, /* cost of multiply per each bit set */ 595 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 596 COSTS_N_INSNS (26), /* HI */ 597 COSTS_N_INSNS (42), /* SI */ 598 COSTS_N_INSNS (74), /* DI */ 599 COSTS_N_INSNS (74)}, /* other */ 600 COSTS_N_INSNS (1), /* cost of movsx */ 601 COSTS_N_INSNS (1), /* cost of movzx */ 602 8, /* "large" insn */ 603 9, /* MOVE_RATIO */ 604 4, /* cost for loading QImode using movzbl */ 605 {3, 4, 3}, /* cost of loading integer registers 606 in QImode, HImode and SImode. 607 Relative to reg-reg move (2). */ 608 {3, 4, 3}, /* cost of storing integer registers */ 609 4, /* cost of reg,reg fld/fst */ 610 {4, 4, 12}, /* cost of loading fp registers 611 in SFmode, DFmode and XFmode */ 612 {6, 6, 8}, /* cost of storing fp registers 613 in SFmode, DFmode and XFmode */ 614 2, /* cost of moving MMX register */ 615 {4, 4}, /* cost of loading MMX registers 616 in SImode and DImode */ 617 {4, 4}, /* cost of storing MMX registers 618 in SImode and DImode */ 619 2, /* cost of moving SSE register */ 620 {4, 4, 6}, /* cost of loading SSE registers 621 in SImode, DImode and TImode */ 622 {4, 4, 5}, /* cost of storing SSE registers 623 in SImode, DImode and TImode */ 624 5, /* MMX or SSE register to integer */ 625 64, /* size of l1 cache. */ 626 256, /* size of l2 cache. */ 627 64, /* size of prefetch block */ 628 6, /* number of parallel prefetches */ 629 5, /* Branch cost */ 630 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ 631 COSTS_N_INSNS (4), /* cost of FMUL instruction. */ 632 COSTS_N_INSNS (24), /* cost of FDIV instruction. */ 633 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 634 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 635 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ 636 /* For some reason, Athlon deals better with REP prefix (relative to loops) 637 compared to K8. Alignment becomes important after 8 bytes for memcpy and 638 128 bytes for memset. */ 639 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}}, 640 DUMMY_STRINGOP_ALGS}, 641 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}}, 642 DUMMY_STRINGOP_ALGS}, 643 1, /* scalar_stmt_cost. */ 644 1, /* scalar load_cost. */ 645 1, /* scalar_store_cost. */ 646 1, /* vec_stmt_cost. */ 647 1, /* vec_to_scalar_cost. */ 648 1, /* scalar_to_vec_cost. */ 649 1, /* vec_align_load_cost. */ 650 2, /* vec_unalign_load_cost. */ 651 1, /* vec_store_cost. */ 652 3, /* cond_taken_branch_cost. */ 653 1, /* cond_not_taken_branch_cost. */ 654 }; 655 656 static const 657 struct processor_costs k8_cost = { 658 COSTS_N_INSNS (1), /* cost of an add instruction */ 659 COSTS_N_INSNS (2), /* cost of a lea instruction */ 660 COSTS_N_INSNS (1), /* variable shift costs */ 661 COSTS_N_INSNS (1), /* constant shift costs */ 662 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 663 COSTS_N_INSNS (4), /* HI */ 664 COSTS_N_INSNS (3), /* SI */ 665 COSTS_N_INSNS (4), /* DI */ 666 COSTS_N_INSNS (5)}, /* other */ 667 0, /* cost of multiply per each bit set */ 668 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 669 COSTS_N_INSNS (26), /* HI */ 670 COSTS_N_INSNS (42), /* SI */ 671 COSTS_N_INSNS (74), /* DI */ 672 COSTS_N_INSNS (74)}, /* other */ 673 COSTS_N_INSNS (1), /* cost of movsx */ 674 COSTS_N_INSNS (1), /* cost of movzx */ 675 8, /* "large" insn */ 676 9, /* MOVE_RATIO */ 677 4, /* cost for loading QImode using movzbl */ 678 {3, 4, 3}, /* cost of loading integer registers 679 in QImode, HImode and SImode. 680 Relative to reg-reg move (2). */ 681 {3, 4, 3}, /* cost of storing integer registers */ 682 4, /* cost of reg,reg fld/fst */ 683 {4, 4, 12}, /* cost of loading fp registers 684 in SFmode, DFmode and XFmode */ 685 {6, 6, 8}, /* cost of storing fp registers 686 in SFmode, DFmode and XFmode */ 687 2, /* cost of moving MMX register */ 688 {3, 3}, /* cost of loading MMX registers 689 in SImode and DImode */ 690 {4, 4}, /* cost of storing MMX registers 691 in SImode and DImode */ 692 2, /* cost of moving SSE register */ 693 {4, 3, 6}, /* cost of loading SSE registers 694 in SImode, DImode and TImode */ 695 {4, 4, 5}, /* cost of storing SSE registers 696 in SImode, DImode and TImode */ 697 5, /* MMX or SSE register to integer */ 698 64, /* size of l1 cache. */ 699 512, /* size of l2 cache. */ 700 64, /* size of prefetch block */ 701 /* New AMD processors never drop prefetches; if they cannot be performed 702 immediately, they are queued. We set number of simultaneous prefetches 703 to a large constant to reflect this (it probably is not a good idea not 704 to limit number of prefetches at all, as their execution also takes some 705 time). */ 706 100, /* number of parallel prefetches */ 707 3, /* Branch cost */ 708 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ 709 COSTS_N_INSNS (4), /* cost of FMUL instruction. */ 710 COSTS_N_INSNS (19), /* cost of FDIV instruction. */ 711 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 712 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 713 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ 714 /* K8 has optimized REP instruction for medium sized blocks, but for very small 715 blocks it is better to use loop. For large blocks, libcall can do 716 nontemporary accesses and beat inline considerably. */ 717 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}}, 718 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 719 {{libcall, {{8, loop}, {24, unrolled_loop}, 720 {2048, rep_prefix_4_byte}, {-1, libcall}}}, 721 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 722 4, /* scalar_stmt_cost. */ 723 2, /* scalar load_cost. */ 724 2, /* scalar_store_cost. */ 725 5, /* vec_stmt_cost. */ 726 0, /* vec_to_scalar_cost. */ 727 2, /* scalar_to_vec_cost. */ 728 2, /* vec_align_load_cost. */ 729 3, /* vec_unalign_load_cost. */ 730 3, /* vec_store_cost. */ 731 3, /* cond_taken_branch_cost. */ 732 2, /* cond_not_taken_branch_cost. */ 733 }; 734 735 struct processor_costs amdfam10_cost = { 736 COSTS_N_INSNS (1), /* cost of an add instruction */ 737 COSTS_N_INSNS (2), /* cost of a lea instruction */ 738 COSTS_N_INSNS (1), /* variable shift costs */ 739 COSTS_N_INSNS (1), /* constant shift costs */ 740 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 741 COSTS_N_INSNS (4), /* HI */ 742 COSTS_N_INSNS (3), /* SI */ 743 COSTS_N_INSNS (4), /* DI */ 744 COSTS_N_INSNS (5)}, /* other */ 745 0, /* cost of multiply per each bit set */ 746 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */ 747 COSTS_N_INSNS (35), /* HI */ 748 COSTS_N_INSNS (51), /* SI */ 749 COSTS_N_INSNS (83), /* DI */ 750 COSTS_N_INSNS (83)}, /* other */ 751 COSTS_N_INSNS (1), /* cost of movsx */ 752 COSTS_N_INSNS (1), /* cost of movzx */ 753 8, /* "large" insn */ 754 9, /* MOVE_RATIO */ 755 4, /* cost for loading QImode using movzbl */ 756 {3, 4, 3}, /* cost of loading integer registers 757 in QImode, HImode and SImode. 758 Relative to reg-reg move (2). */ 759 {3, 4, 3}, /* cost of storing integer registers */ 760 4, /* cost of reg,reg fld/fst */ 761 {4, 4, 12}, /* cost of loading fp registers 762 in SFmode, DFmode and XFmode */ 763 {6, 6, 8}, /* cost of storing fp registers 764 in SFmode, DFmode and XFmode */ 765 2, /* cost of moving MMX register */ 766 {3, 3}, /* cost of loading MMX registers 767 in SImode and DImode */ 768 {4, 4}, /* cost of storing MMX registers 769 in SImode and DImode */ 770 2, /* cost of moving SSE register */ 771 {4, 4, 3}, /* cost of loading SSE registers 772 in SImode, DImode and TImode */ 773 {4, 4, 5}, /* cost of storing SSE registers 774 in SImode, DImode and TImode */ 775 3, /* MMX or SSE register to integer */ 776 /* On K8 777 MOVD reg64, xmmreg Double FSTORE 4 778 MOVD reg32, xmmreg Double FSTORE 4 779 On AMDFAM10 780 MOVD reg64, xmmreg Double FADD 3 781 1/1 1/1 782 MOVD reg32, xmmreg Double FADD 3 783 1/1 1/1 */ 784 64, /* size of l1 cache. */ 785 512, /* size of l2 cache. */ 786 64, /* size of prefetch block */ 787 /* New AMD processors never drop prefetches; if they cannot be performed 788 immediately, they are queued. We set number of simultaneous prefetches 789 to a large constant to reflect this (it probably is not a good idea not 790 to limit number of prefetches at all, as their execution also takes some 791 time). */ 792 100, /* number of parallel prefetches */ 793 2, /* Branch cost */ 794 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */ 795 COSTS_N_INSNS (4), /* cost of FMUL instruction. */ 796 COSTS_N_INSNS (19), /* cost of FDIV instruction. */ 797 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 798 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 799 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */ 800 801 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for 802 very small blocks it is better to use loop. For large blocks, libcall can 803 do nontemporary accesses and beat inline considerably. */ 804 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}}, 805 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 806 {{libcall, {{8, loop}, {24, unrolled_loop}, 807 {2048, rep_prefix_4_byte}, {-1, libcall}}}, 808 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 809 4, /* scalar_stmt_cost. */ 810 2, /* scalar load_cost. */ 811 2, /* scalar_store_cost. */ 812 6, /* vec_stmt_cost. */ 813 0, /* vec_to_scalar_cost. */ 814 2, /* scalar_to_vec_cost. */ 815 2, /* vec_align_load_cost. */ 816 2, /* vec_unalign_load_cost. */ 817 2, /* vec_store_cost. */ 818 2, /* cond_taken_branch_cost. */ 819 1, /* cond_not_taken_branch_cost. */ 820 }; 821 822 static const 823 struct processor_costs pentium4_cost = { 824 COSTS_N_INSNS (1), /* cost of an add instruction */ 825 COSTS_N_INSNS (3), /* cost of a lea instruction */ 826 COSTS_N_INSNS (4), /* variable shift costs */ 827 COSTS_N_INSNS (4), /* constant shift costs */ 828 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */ 829 COSTS_N_INSNS (15), /* HI */ 830 COSTS_N_INSNS (15), /* SI */ 831 COSTS_N_INSNS (15), /* DI */ 832 COSTS_N_INSNS (15)}, /* other */ 833 0, /* cost of multiply per each bit set */ 834 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */ 835 COSTS_N_INSNS (56), /* HI */ 836 COSTS_N_INSNS (56), /* SI */ 837 COSTS_N_INSNS (56), /* DI */ 838 COSTS_N_INSNS (56)}, /* other */ 839 COSTS_N_INSNS (1), /* cost of movsx */ 840 COSTS_N_INSNS (1), /* cost of movzx */ 841 16, /* "large" insn */ 842 6, /* MOVE_RATIO */ 843 2, /* cost for loading QImode using movzbl */ 844 {4, 5, 4}, /* cost of loading integer registers 845 in QImode, HImode and SImode. 846 Relative to reg-reg move (2). */ 847 {2, 3, 2}, /* cost of storing integer registers */ 848 2, /* cost of reg,reg fld/fst */ 849 {2, 2, 6}, /* cost of loading fp registers 850 in SFmode, DFmode and XFmode */ 851 {4, 4, 6}, /* cost of storing fp registers 852 in SFmode, DFmode and XFmode */ 853 2, /* cost of moving MMX register */ 854 {2, 2}, /* cost of loading MMX registers 855 in SImode and DImode */ 856 {2, 2}, /* cost of storing MMX registers 857 in SImode and DImode */ 858 12, /* cost of moving SSE register */ 859 {12, 12, 12}, /* cost of loading SSE registers 860 in SImode, DImode and TImode */ 861 {2, 2, 8}, /* cost of storing SSE registers 862 in SImode, DImode and TImode */ 863 10, /* MMX or SSE register to integer */ 864 8, /* size of l1 cache. */ 865 256, /* size of l2 cache. */ 866 64, /* size of prefetch block */ 867 6, /* number of parallel prefetches */ 868 2, /* Branch cost */ 869 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */ 870 COSTS_N_INSNS (7), /* cost of FMUL instruction. */ 871 COSTS_N_INSNS (43), /* cost of FDIV instruction. */ 872 COSTS_N_INSNS (2), /* cost of FABS instruction. */ 873 COSTS_N_INSNS (2), /* cost of FCHS instruction. */ 874 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */ 875 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}}, 876 DUMMY_STRINGOP_ALGS}, 877 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte}, 878 {-1, libcall}}}, 879 DUMMY_STRINGOP_ALGS}, 880 1, /* scalar_stmt_cost. */ 881 1, /* scalar load_cost. */ 882 1, /* scalar_store_cost. */ 883 1, /* vec_stmt_cost. */ 884 1, /* vec_to_scalar_cost. */ 885 1, /* scalar_to_vec_cost. */ 886 1, /* vec_align_load_cost. */ 887 2, /* vec_unalign_load_cost. */ 888 1, /* vec_store_cost. */ 889 3, /* cond_taken_branch_cost. */ 890 1, /* cond_not_taken_branch_cost. */ 891 }; 892 893 static const 894 struct processor_costs nocona_cost = { 895 COSTS_N_INSNS (1), /* cost of an add instruction */ 896 COSTS_N_INSNS (1), /* cost of a lea instruction */ 897 COSTS_N_INSNS (1), /* variable shift costs */ 898 COSTS_N_INSNS (1), /* constant shift costs */ 899 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */ 900 COSTS_N_INSNS (10), /* HI */ 901 COSTS_N_INSNS (10), /* SI */ 902 COSTS_N_INSNS (10), /* DI */ 903 COSTS_N_INSNS (10)}, /* other */ 904 0, /* cost of multiply per each bit set */ 905 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */ 906 COSTS_N_INSNS (66), /* HI */ 907 COSTS_N_INSNS (66), /* SI */ 908 COSTS_N_INSNS (66), /* DI */ 909 COSTS_N_INSNS (66)}, /* other */ 910 COSTS_N_INSNS (1), /* cost of movsx */ 911 COSTS_N_INSNS (1), /* cost of movzx */ 912 16, /* "large" insn */ 913 17, /* MOVE_RATIO */ 914 4, /* cost for loading QImode using movzbl */ 915 {4, 4, 4}, /* cost of loading integer registers 916 in QImode, HImode and SImode. 917 Relative to reg-reg move (2). */ 918 {4, 4, 4}, /* cost of storing integer registers */ 919 3, /* cost of reg,reg fld/fst */ 920 {12, 12, 12}, /* cost of loading fp registers 921 in SFmode, DFmode and XFmode */ 922 {4, 4, 4}, /* cost of storing fp registers 923 in SFmode, DFmode and XFmode */ 924 6, /* cost of moving MMX register */ 925 {12, 12}, /* cost of loading MMX registers 926 in SImode and DImode */ 927 {12, 12}, /* cost of storing MMX registers 928 in SImode and DImode */ 929 6, /* cost of moving SSE register */ 930 {12, 12, 12}, /* cost of loading SSE registers 931 in SImode, DImode and TImode */ 932 {12, 12, 12}, /* cost of storing SSE registers 933 in SImode, DImode and TImode */ 934 8, /* MMX or SSE register to integer */ 935 8, /* size of l1 cache. */ 936 1024, /* size of l2 cache. */ 937 128, /* size of prefetch block */ 938 8, /* number of parallel prefetches */ 939 1, /* Branch cost */ 940 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */ 941 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 942 COSTS_N_INSNS (40), /* cost of FDIV instruction. */ 943 COSTS_N_INSNS (3), /* cost of FABS instruction. */ 944 COSTS_N_INSNS (3), /* cost of FCHS instruction. */ 945 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */ 946 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}}, 947 {libcall, {{32, loop}, {20000, rep_prefix_8_byte}, 948 {100000, unrolled_loop}, {-1, libcall}}}}, 949 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte}, 950 {-1, libcall}}}, 951 {libcall, {{24, loop}, {64, unrolled_loop}, 952 {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 953 1, /* scalar_stmt_cost. */ 954 1, /* scalar load_cost. */ 955 1, /* scalar_store_cost. */ 956 1, /* vec_stmt_cost. */ 957 1, /* vec_to_scalar_cost. */ 958 1, /* scalar_to_vec_cost. */ 959 1, /* vec_align_load_cost. */ 960 2, /* vec_unalign_load_cost. */ 961 1, /* vec_store_cost. */ 962 3, /* cond_taken_branch_cost. */ 963 1, /* cond_not_taken_branch_cost. */ 964 }; 965 966 static const 967 struct processor_costs core2_cost = { 968 COSTS_N_INSNS (1), /* cost of an add instruction */ 969 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 970 COSTS_N_INSNS (1), /* variable shift costs */ 971 COSTS_N_INSNS (1), /* constant shift costs */ 972 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 973 COSTS_N_INSNS (3), /* HI */ 974 COSTS_N_INSNS (3), /* SI */ 975 COSTS_N_INSNS (3), /* DI */ 976 COSTS_N_INSNS (3)}, /* other */ 977 0, /* cost of multiply per each bit set */ 978 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */ 979 COSTS_N_INSNS (22), /* HI */ 980 COSTS_N_INSNS (22), /* SI */ 981 COSTS_N_INSNS (22), /* DI */ 982 COSTS_N_INSNS (22)}, /* other */ 983 COSTS_N_INSNS (1), /* cost of movsx */ 984 COSTS_N_INSNS (1), /* cost of movzx */ 985 8, /* "large" insn */ 986 16, /* MOVE_RATIO */ 987 2, /* cost for loading QImode using movzbl */ 988 {6, 6, 6}, /* cost of loading integer registers 989 in QImode, HImode and SImode. 990 Relative to reg-reg move (2). */ 991 {4, 4, 4}, /* cost of storing integer registers */ 992 2, /* cost of reg,reg fld/fst */ 993 {6, 6, 6}, /* cost of loading fp registers 994 in SFmode, DFmode and XFmode */ 995 {4, 4, 4}, /* cost of storing fp registers 996 in SFmode, DFmode and XFmode */ 997 2, /* cost of moving MMX register */ 998 {6, 6}, /* cost of loading MMX registers 999 in SImode and DImode */ 1000 {4, 4}, /* cost of storing MMX registers 1001 in SImode and DImode */ 1002 2, /* cost of moving SSE register */ 1003 {6, 6, 6}, /* cost of loading SSE registers 1004 in SImode, DImode and TImode */ 1005 {4, 4, 4}, /* cost of storing SSE registers 1006 in SImode, DImode and TImode */ 1007 2, /* MMX or SSE register to integer */ 1008 32, /* size of l1 cache. */ 1009 2048, /* size of l2 cache. */ 1010 128, /* size of prefetch block */ 1011 8, /* number of parallel prefetches */ 1012 3, /* Branch cost */ 1013 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */ 1014 COSTS_N_INSNS (5), /* cost of FMUL instruction. */ 1015 COSTS_N_INSNS (32), /* cost of FDIV instruction. */ 1016 COSTS_N_INSNS (1), /* cost of FABS instruction. */ 1017 COSTS_N_INSNS (1), /* cost of FCHS instruction. */ 1018 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */ 1019 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}}, 1020 {libcall, {{32, loop}, {64, rep_prefix_4_byte}, 1021 {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 1022 {{libcall, {{8, loop}, {15, unrolled_loop}, 1023 {2048, rep_prefix_4_byte}, {-1, libcall}}}, 1024 {libcall, {{24, loop}, {32, unrolled_loop}, 1025 {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 1026 1, /* scalar_stmt_cost. */ 1027 1, /* scalar load_cost. */ 1028 1, /* scalar_store_cost. */ 1029 1, /* vec_stmt_cost. */ 1030 1, /* vec_to_scalar_cost. */ 1031 1, /* scalar_to_vec_cost. */ 1032 1, /* vec_align_load_cost. */ 1033 2, /* vec_unalign_load_cost. */ 1034 1, /* vec_store_cost. */ 1035 3, /* cond_taken_branch_cost. */ 1036 1, /* cond_not_taken_branch_cost. */ 1037 }; 1038 1039 /* Generic64 should produce code tuned for Nocona and K8. */ 1040 static const 1041 struct processor_costs generic64_cost = { 1042 COSTS_N_INSNS (1), /* cost of an add instruction */ 1043 /* On all chips taken into consideration lea is 2 cycles and more. With 1044 this cost however our current implementation of synth_mult results in 1045 use of unnecessary temporary registers causing regression on several 1046 SPECfp benchmarks. */ 1047 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 1048 COSTS_N_INSNS (1), /* variable shift costs */ 1049 COSTS_N_INSNS (1), /* constant shift costs */ 1050 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 1051 COSTS_N_INSNS (4), /* HI */ 1052 COSTS_N_INSNS (3), /* SI */ 1053 COSTS_N_INSNS (4), /* DI */ 1054 COSTS_N_INSNS (2)}, /* other */ 1055 0, /* cost of multiply per each bit set */ 1056 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 1057 COSTS_N_INSNS (26), /* HI */ 1058 COSTS_N_INSNS (42), /* SI */ 1059 COSTS_N_INSNS (74), /* DI */ 1060 COSTS_N_INSNS (74)}, /* other */ 1061 COSTS_N_INSNS (1), /* cost of movsx */ 1062 COSTS_N_INSNS (1), /* cost of movzx */ 1063 8, /* "large" insn */ 1064 17, /* MOVE_RATIO */ 1065 4, /* cost for loading QImode using movzbl */ 1066 {4, 4, 4}, /* cost of loading integer registers 1067 in QImode, HImode and SImode. 1068 Relative to reg-reg move (2). */ 1069 {4, 4, 4}, /* cost of storing integer registers */ 1070 4, /* cost of reg,reg fld/fst */ 1071 {12, 12, 12}, /* cost of loading fp registers 1072 in SFmode, DFmode and XFmode */ 1073 {6, 6, 8}, /* cost of storing fp registers 1074 in SFmode, DFmode and XFmode */ 1075 2, /* cost of moving MMX register */ 1076 {8, 8}, /* cost of loading MMX registers 1077 in SImode and DImode */ 1078 {8, 8}, /* cost of storing MMX registers 1079 in SImode and DImode */ 1080 2, /* cost of moving SSE register */ 1081 {8, 8, 8}, /* cost of loading SSE registers 1082 in SImode, DImode and TImode */ 1083 {8, 8, 8}, /* cost of storing SSE registers 1084 in SImode, DImode and TImode */ 1085 5, /* MMX or SSE register to integer */ 1086 32, /* size of l1 cache. */ 1087 512, /* size of l2 cache. */ 1088 64, /* size of prefetch block */ 1089 6, /* number of parallel prefetches */ 1090 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value 1091 is increased to perhaps more appropriate value of 5. */ 1092 3, /* Branch cost */ 1093 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 1094 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 1095 COSTS_N_INSNS (20), /* cost of FDIV instruction. */ 1096 COSTS_N_INSNS (8), /* cost of FABS instruction. */ 1097 COSTS_N_INSNS (8), /* cost of FCHS instruction. */ 1098 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ 1099 {DUMMY_STRINGOP_ALGS, 1100 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 1101 {DUMMY_STRINGOP_ALGS, 1102 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}, 1103 1, /* scalar_stmt_cost. */ 1104 1, /* scalar load_cost. */ 1105 1, /* scalar_store_cost. */ 1106 1, /* vec_stmt_cost. */ 1107 1, /* vec_to_scalar_cost. */ 1108 1, /* scalar_to_vec_cost. */ 1109 1, /* vec_align_load_cost. */ 1110 2, /* vec_unalign_load_cost. */ 1111 1, /* vec_store_cost. */ 1112 3, /* cond_taken_branch_cost. */ 1113 1, /* cond_not_taken_branch_cost. */ 1114 }; 1115 1116 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */ 1117 static const 1118 struct processor_costs generic32_cost = { 1119 COSTS_N_INSNS (1), /* cost of an add instruction */ 1120 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */ 1121 COSTS_N_INSNS (1), /* variable shift costs */ 1122 COSTS_N_INSNS (1), /* constant shift costs */ 1123 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */ 1124 COSTS_N_INSNS (4), /* HI */ 1125 COSTS_N_INSNS (3), /* SI */ 1126 COSTS_N_INSNS (4), /* DI */ 1127 COSTS_N_INSNS (2)}, /* other */ 1128 0, /* cost of multiply per each bit set */ 1129 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */ 1130 COSTS_N_INSNS (26), /* HI */ 1131 COSTS_N_INSNS (42), /* SI */ 1132 COSTS_N_INSNS (74), /* DI */ 1133 COSTS_N_INSNS (74)}, /* other */ 1134 COSTS_N_INSNS (1), /* cost of movsx */ 1135 COSTS_N_INSNS (1), /* cost of movzx */ 1136 8, /* "large" insn */ 1137 17, /* MOVE_RATIO */ 1138 4, /* cost for loading QImode using movzbl */ 1139 {4, 4, 4}, /* cost of loading integer registers 1140 in QImode, HImode and SImode. 1141 Relative to reg-reg move (2). */ 1142 {4, 4, 4}, /* cost of storing integer registers */ 1143 4, /* cost of reg,reg fld/fst */ 1144 {12, 12, 12}, /* cost of loading fp registers 1145 in SFmode, DFmode and XFmode */ 1146 {6, 6, 8}, /* cost of storing fp registers 1147 in SFmode, DFmode and XFmode */ 1148 2, /* cost of moving MMX register */ 1149 {8, 8}, /* cost of loading MMX registers 1150 in SImode and DImode */ 1151 {8, 8}, /* cost of storing MMX registers 1152 in SImode and DImode */ 1153 2, /* cost of moving SSE register */ 1154 {8, 8, 8}, /* cost of loading SSE registers 1155 in SImode, DImode and TImode */ 1156 {8, 8, 8}, /* cost of storing SSE registers 1157 in SImode, DImode and TImode */ 1158 5, /* MMX or SSE register to integer */ 1159 32, /* size of l1 cache. */ 1160 256, /* size of l2 cache. */ 1161 64, /* size of prefetch block */ 1162 6, /* number of parallel prefetches */ 1163 3, /* Branch cost */ 1164 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */ 1165 COSTS_N_INSNS (8), /* cost of FMUL instruction. */ 1166 COSTS_N_INSNS (20), /* cost of FDIV instruction. */ 1167 COSTS_N_INSNS (8), /* cost of FABS instruction. */ 1168 COSTS_N_INSNS (8), /* cost of FCHS instruction. */ 1169 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */ 1170 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}}, 1171 DUMMY_STRINGOP_ALGS}, 1172 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}}, 1173 DUMMY_STRINGOP_ALGS}, 1174 1, /* scalar_stmt_cost. */ 1175 1, /* scalar load_cost. */ 1176 1, /* scalar_store_cost. */ 1177 1, /* vec_stmt_cost. */ 1178 1, /* vec_to_scalar_cost. */ 1179 1, /* scalar_to_vec_cost. */ 1180 1, /* vec_align_load_cost. */ 1181 2, /* vec_unalign_load_cost. */ 1182 1, /* vec_store_cost. */ 1183 3, /* cond_taken_branch_cost. */ 1184 1, /* cond_not_taken_branch_cost. */ 1185 }; 1186 1187 const struct processor_costs *ix86_cost = &pentium_cost; 1188 1189 /* Processor feature/optimization bitmasks. */ 1190 #define m_386 (1<<PROCESSOR_I386) 1191 #define m_486 (1<<PROCESSOR_I486) 1192 #define m_PENT (1<<PROCESSOR_PENTIUM) 1193 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO) 1194 #define m_PENT4 (1<<PROCESSOR_PENTIUM4) 1195 #define m_NOCONA (1<<PROCESSOR_NOCONA) 1196 #define m_CORE2 (1<<PROCESSOR_CORE2) 1197 1198 #define m_GEODE (1<<PROCESSOR_GEODE) 1199 #define m_K6 (1<<PROCESSOR_K6) 1200 #define m_K6_GEODE (m_K6 | m_GEODE) 1201 #define m_K8 (1<<PROCESSOR_K8) 1202 #define m_ATHLON (1<<PROCESSOR_ATHLON) 1203 #define m_ATHLON_K8 (m_K8 | m_ATHLON) 1204 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10) 1205 #define m_AMD_MULTIPLE (m_K8 | m_ATHLON | m_AMDFAM10) 1206 1207 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32) 1208 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64) 1209 1210 /* Generic instruction choice should be common subset of supported CPUs 1211 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */ 1212 #define m_GENERIC (m_GENERIC32 | m_GENERIC64) 1213 1214 /* Feature tests against the various tunings. */ 1215 unsigned char ix86_tune_features[X86_TUNE_LAST]; 1216 1217 /* Feature tests against the various tunings used to create ix86_tune_features 1218 based on the processor mask. */ 1219 static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = { 1220 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results 1221 negatively, so enabling for Generic64 seems like good code size 1222 tradeoff. We can't enable it for 32bit generic because it does not 1223 work well with PPro base chips. */ 1224 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_CORE2 | m_GENERIC64, 1225 1226 /* X86_TUNE_PUSH_MEMORY */ 1227 m_386 | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 1228 | m_NOCONA | m_CORE2 | m_GENERIC, 1229 1230 /* X86_TUNE_ZERO_EXTEND_WITH_AND */ 1231 m_486 | m_PENT, 1232 1233 /* X86_TUNE_UNROLL_STRLEN */ 1234 m_486 | m_PENT | m_PPRO | m_AMD_MULTIPLE | m_K6 | m_CORE2 | m_GENERIC, 1235 1236 /* X86_TUNE_DEEP_BRANCH_PREDICTION */ 1237 m_PPRO | m_K6_GEODE | m_AMD_MULTIPLE | m_PENT4 | m_GENERIC, 1238 1239 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based 1240 on simulation result. But after P4 was made, no performance benefit 1241 was observed with branch hints. It also increases the code size. 1242 As a result, icc never generates branch hints. */ 1243 0, 1244 1245 /* X86_TUNE_DOUBLE_WITH_ADD */ 1246 ~m_386, 1247 1248 /* X86_TUNE_USE_SAHF */ 1249 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4 1250 | m_NOCONA | m_CORE2 | m_GENERIC, 1251 1252 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid 1253 partial dependencies. */ 1254 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA 1255 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */, 1256 1257 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial 1258 register stalls on Generic32 compilation setting as well. However 1259 in current implementation the partial register stalls are not eliminated 1260 very well - they can be introduced via subregs synthesized by combine 1261 and can happen in caller/callee saving sequences. Because this option 1262 pays back little on PPro based chips and is in conflict with partial reg 1263 dependencies used by Athlon/P4 based chips, it is better to leave it off 1264 for generic32 for now. */ 1265 m_PPRO, 1266 1267 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */ 1268 m_CORE2 | m_GENERIC, 1269 1270 /* X86_TUNE_USE_HIMODE_FIOP */ 1271 m_386 | m_486 | m_K6_GEODE, 1272 1273 /* X86_TUNE_USE_SIMODE_FIOP */ 1274 ~(m_PPRO | m_AMD_MULTIPLE | m_PENT | m_CORE2 | m_GENERIC), 1275 1276 /* X86_TUNE_USE_MOV0 */ 1277 m_K6, 1278 1279 /* X86_TUNE_USE_CLTD */ 1280 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC), 1281 1282 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */ 1283 m_PENT4, 1284 1285 /* X86_TUNE_SPLIT_LONG_MOVES */ 1286 m_PPRO, 1287 1288 /* X86_TUNE_READ_MODIFY_WRITE */ 1289 ~m_PENT, 1290 1291 /* X86_TUNE_READ_MODIFY */ 1292 ~(m_PENT | m_PPRO), 1293 1294 /* X86_TUNE_PROMOTE_QIMODE */ 1295 m_K6_GEODE | m_PENT | m_386 | m_486 | m_AMD_MULTIPLE | m_CORE2 1296 | m_GENERIC /* | m_PENT4 ? */, 1297 1298 /* X86_TUNE_FAST_PREFIX */ 1299 ~(m_PENT | m_486 | m_386), 1300 1301 /* X86_TUNE_SINGLE_STRINGOP */ 1302 m_386 | m_PENT4 | m_NOCONA, 1303 1304 /* X86_TUNE_QIMODE_MATH */ 1305 ~0, 1306 1307 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial 1308 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option 1309 might be considered for Generic32 if our scheme for avoiding partial 1310 stalls was more effective. */ 1311 ~m_PPRO, 1312 1313 /* X86_TUNE_PROMOTE_QI_REGS */ 1314 0, 1315 1316 /* X86_TUNE_PROMOTE_HI_REGS */ 1317 m_PPRO, 1318 1319 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */ 1320 m_AMD_MULTIPLE | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1321 1322 /* X86_TUNE_ADD_ESP_8 */ 1323 m_AMD_MULTIPLE | m_PPRO | m_K6_GEODE | m_386 1324 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1325 1326 /* X86_TUNE_SUB_ESP_4 */ 1327 m_AMD_MULTIPLE | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1328 1329 /* X86_TUNE_SUB_ESP_8 */ 1330 m_AMD_MULTIPLE | m_PPRO | m_386 | m_486 1331 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1332 1333 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred 1334 for DFmode copies */ 1335 ~(m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 1336 | m_GENERIC | m_GEODE), 1337 1338 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */ 1339 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1340 1341 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a 1342 conflict here in between PPro/Pentium4 based chips that thread 128bit 1343 SSE registers as single units versus K8 based chips that divide SSE 1344 registers to two 64bit halves. This knob promotes all store destinations 1345 to be 128bit to allow register renaming on 128bit SSE units, but usually 1346 results in one extra microop on 64bit SSE units. Experimental results 1347 shows that disabling this option on P4 brings over 20% SPECfp regression, 1348 while enabling it on K8 brings roughly 2.4% regression that can be partly 1349 masked by careful scheduling of moves. */ 1350 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10, 1351 1352 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */ 1353 m_AMDFAM10, 1354 1355 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies 1356 are resolved on SSE register parts instead of whole registers, so we may 1357 maintain just lower part of scalar values in proper format leaving the 1358 upper part undefined. */ 1359 m_ATHLON_K8, 1360 1361 /* X86_TUNE_SSE_TYPELESS_STORES */ 1362 m_AMD_MULTIPLE, 1363 1364 /* X86_TUNE_SSE_LOAD0_BY_PXOR */ 1365 m_PPRO | m_PENT4 | m_NOCONA, 1366 1367 /* X86_TUNE_MEMORY_MISMATCH_STALL */ 1368 m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1369 1370 /* X86_TUNE_PROLOGUE_USING_MOVE */ 1371 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC, 1372 1373 /* X86_TUNE_EPILOGUE_USING_MOVE */ 1374 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC, 1375 1376 /* X86_TUNE_SHIFT1 */ 1377 ~m_486, 1378 1379 /* X86_TUNE_USE_FFREEP */ 1380 m_AMD_MULTIPLE, 1381 1382 /* X86_TUNE_INTER_UNIT_MOVES */ 1383 ~(m_AMD_MULTIPLE | m_GENERIC), 1384 1385 /* X86_TUNE_INTER_UNIT_CONVERSIONS */ 1386 ~(m_AMDFAM10), 1387 1388 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more 1389 than 4 branch instructions in the 16 byte window. */ 1390 m_PPRO | m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC, 1391 1392 /* X86_TUNE_SCHEDULE */ 1393 m_PPRO | m_AMD_MULTIPLE | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC, 1394 1395 /* X86_TUNE_USE_BT */ 1396 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC, 1397 1398 /* X86_TUNE_USE_INCDEC */ 1399 ~(m_PENT4 | m_NOCONA | m_GENERIC), 1400 1401 /* X86_TUNE_PAD_RETURNS */ 1402 m_AMD_MULTIPLE | m_CORE2 | m_GENERIC, 1403 1404 /* X86_TUNE_EXT_80387_CONSTANTS */ 1405 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC, 1406 1407 /* X86_TUNE_SHORTEN_X87_SSE */ 1408 ~m_K8, 1409 1410 /* X86_TUNE_AVOID_VECTOR_DECODE */ 1411 m_K8 | m_GENERIC64, 1412 1413 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode 1414 and SImode multiply, but 386 and 486 do HImode multiply faster. */ 1415 ~(m_386 | m_486), 1416 1417 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is 1418 vector path on AMD machines. */ 1419 m_K8 | m_GENERIC64 | m_AMDFAM10, 1420 1421 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD 1422 machines. */ 1423 m_K8 | m_GENERIC64 | m_AMDFAM10, 1424 1425 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR 1426 than a MOV. */ 1427 m_PENT, 1428 1429 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is, 1430 but one byte longer. */ 1431 m_PENT, 1432 1433 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory 1434 operand that cannot be represented using a modRM byte. The XOR 1435 replacement is long decoded, so this split helps here as well. */ 1436 m_K6, 1437 1438 /* X86_TUNE_USE_VECTOR_FP_CONVERTS: Prefer vector packed SSE conversion 1439 from FP to FP. */ 1440 m_AMDFAM10 | m_GENERIC, 1441 1442 /* X86_TUNE_USE_VECTOR_CONVERTS: Prefer vector packed SSE conversion 1443 from integer to FP. */ 1444 m_AMDFAM10, 1445 1446 /* X86_TUNE_FUSE_CMP_AND_BRANCH: Fuse a compare or test instruction 1447 with a subsequent conditional jump instruction into a single 1448 compare-and-branch uop. */ 1449 m_CORE2, 1450 }; 1451 1452 /* Feature tests against the various architecture variations. */ 1453 unsigned char ix86_arch_features[X86_ARCH_LAST]; 1454 1455 /* Feature tests against the various architecture variations, used to create 1456 ix86_arch_features based on the processor mask. */ 1457 static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = { 1458 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */ 1459 ~(m_386 | m_486 | m_PENT | m_K6), 1460 1461 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */ 1462 ~m_386, 1463 1464 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */ 1465 ~(m_386 | m_486), 1466 1467 /* X86_ARCH_XADD: Exchange and add was added for 80486. */ 1468 ~m_386, 1469 1470 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */ 1471 ~m_386, 1472 }; 1473 1474 static const unsigned int x86_accumulate_outgoing_args 1475 = m_AMD_MULTIPLE | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC; 1476 1477 static const unsigned int x86_arch_always_fancy_math_387 1478 = m_PENT | m_PPRO | m_AMD_MULTIPLE | m_PENT4 1479 | m_NOCONA | m_CORE2 | m_GENERIC; 1480 1481 static enum stringop_alg stringop_alg = no_stringop; 1482 1483 /* In case the average insn count for single function invocation is 1484 lower than this constant, emit fast (but longer) prologue and 1485 epilogue code. */ 1486 #define FAST_PROLOGUE_INSN_COUNT 20 1487 1488 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */ 1489 static const char *const qi_reg_name[] = QI_REGISTER_NAMES; 1490 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES; 1491 static const char *const hi_reg_name[] = HI_REGISTER_NAMES; 1492 1493 /* Array of the smallest class containing reg number REGNO, indexed by 1494 REGNO. Used by REGNO_REG_CLASS in i386.h. */ 1495 1496 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] = 1497 { 1498 /* ax, dx, cx, bx */ 1499 AREG, DREG, CREG, BREG, 1500 /* si, di, bp, sp */ 1501 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS, 1502 /* FP registers */ 1503 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS, 1504 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, 1505 /* arg pointer */ 1506 NON_Q_REGS, 1507 /* flags, fpsr, fpcr, frame */ 1508 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS, 1509 /* SSE registers */ 1510 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 1511 SSE_REGS, SSE_REGS, 1512 /* MMX registers */ 1513 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, 1514 MMX_REGS, MMX_REGS, 1515 /* REX registers */ 1516 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 1517 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, 1518 /* SSE REX registers */ 1519 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, 1520 SSE_REGS, SSE_REGS, 1521 }; 1522 1523 /* The "default" register map used in 32bit mode. */ 1524 1525 int const dbx_register_map[FIRST_PSEUDO_REGISTER] = 1526 { 1527 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */ 1528 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */ 1529 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */ 1530 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */ 1531 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */ 1532 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 1533 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 1534 }; 1535 1536 /* The "default" register map used in 64bit mode. */ 1537 1538 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] = 1539 { 1540 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */ 1541 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */ 1542 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */ 1543 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */ 1544 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */ 1545 8,9,10,11,12,13,14,15, /* extended integer registers */ 1546 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */ 1547 }; 1548 1549 /* Define the register numbers to be used in Dwarf debugging information. 1550 The SVR4 reference port C compiler uses the following register numbers 1551 in its Dwarf output code: 1552 0 for %eax (gcc regno = 0) 1553 1 for %ecx (gcc regno = 2) 1554 2 for %edx (gcc regno = 1) 1555 3 for %ebx (gcc regno = 3) 1556 4 for %esp (gcc regno = 7) 1557 5 for %ebp (gcc regno = 6) 1558 6 for %esi (gcc regno = 4) 1559 7 for %edi (gcc regno = 5) 1560 The following three DWARF register numbers are never generated by 1561 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4 1562 believes these numbers have these meanings. 1563 8 for %eip (no gcc equivalent) 1564 9 for %eflags (gcc regno = 17) 1565 10 for %trapno (no gcc equivalent) 1566 It is not at all clear how we should number the FP stack registers 1567 for the x86 architecture. If the version of SDB on x86/svr4 were 1568 a bit less brain dead with respect to floating-point then we would 1569 have a precedent to follow with respect to DWARF register numbers 1570 for x86 FP registers, but the SDB on x86/svr4 is so completely 1571 broken with respect to FP registers that it is hardly worth thinking 1572 of it as something to strive for compatibility with. 1573 The version of x86/svr4 SDB I have at the moment does (partially) 1574 seem to believe that DWARF register number 11 is associated with 1575 the x86 register %st(0), but that's about all. Higher DWARF 1576 register numbers don't seem to be associated with anything in 1577 particular, and even for DWARF regno 11, SDB only seems to under- 1578 stand that it should say that a variable lives in %st(0) (when 1579 asked via an `=' command) if we said it was in DWARF regno 11, 1580 but SDB still prints garbage when asked for the value of the 1581 variable in question (via a `/' command). 1582 (Also note that the labels SDB prints for various FP stack regs 1583 when doing an `x' command are all wrong.) 1584 Note that these problems generally don't affect the native SVR4 1585 C compiler because it doesn't allow the use of -O with -g and 1586 because when it is *not* optimizing, it allocates a memory 1587 location for each floating-point variable, and the memory 1588 location is what gets described in the DWARF AT_location 1589 attribute for the variable in question. 1590 Regardless of the severe mental illness of the x86/svr4 SDB, we 1591 do something sensible here and we use the following DWARF 1592 register numbers. Note that these are all stack-top-relative 1593 numbers. 1594 11 for %st(0) (gcc regno = 8) 1595 12 for %st(1) (gcc regno = 9) 1596 13 for %st(2) (gcc regno = 10) 1597 14 for %st(3) (gcc regno = 11) 1598 15 for %st(4) (gcc regno = 12) 1599 16 for %st(5) (gcc regno = 13) 1600 17 for %st(6) (gcc regno = 14) 1601 18 for %st(7) (gcc regno = 15) 1602 */ 1603 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] = 1604 { 1605 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */ 1606 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */ 1607 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */ 1608 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */ 1609 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */ 1610 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */ 1611 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */ 1612 }; 1613 1614 /* Test and compare insns in i386.md store the information needed to 1615 generate branch and scc insns here. */ 1616 1617 rtx ix86_compare_op0 = NULL_RTX; 1618 rtx ix86_compare_op1 = NULL_RTX; 1619 rtx ix86_compare_emitted = NULL_RTX; 1620 1621 /* Define parameter passing and return registers. */ 1622 1623 static int const x86_64_int_parameter_registers[6] = 1624 { 1625 DI_REG, SI_REG, DX_REG, CX_REG, R8_REG, R9_REG 1626 }; 1627 1628 static int const x86_64_ms_abi_int_parameter_registers[4] = 1629 { 1630 CX_REG, DX_REG, R8_REG, R9_REG 1631 }; 1632 1633 static int const x86_64_int_return_registers[4] = 1634 { 1635 AX_REG, DX_REG, DI_REG, SI_REG 1636 }; 1637 1638 /* Define the structure for the machine field in struct function. */ 1639 1640 struct stack_local_entry GTY(()) 1641 { 1642 unsigned short mode; 1643 unsigned short n; 1644 rtx rtl; 1645 struct stack_local_entry *next; 1646 }; 1647 1648 /* Structure describing stack frame layout. 1649 Stack grows downward: 1650 1651 [arguments] 1652 <- ARG_POINTER 1653 saved pc 1654 1655 saved frame pointer if frame_pointer_needed 1656 <- HARD_FRAME_POINTER 1657 [-msave-args] 1658 1659 [padding0] 1660 1661 [saved regs] 1662 1663 [padding05] 1664 1665 [saved SSE regs] 1666 1667 [padding1] \ 1668 ) 1669 [va_arg registers] ( 1670 > to_allocate <- FRAME_POINTER 1671 [frame] ( 1672 ) 1673 [padding2] / 1674 */ 1675 struct ix86_frame 1676 { 1677 int nmsave_args; 1678 int padding0; 1679 int nsseregs; 1680 int padding05; 1681 int nregs; 1682 int padding1; 1683 int va_arg_size; 1684 HOST_WIDE_INT frame; 1685 int padding2; 1686 int outgoing_arguments_size; 1687 int red_zone_size; 1688 1689 HOST_WIDE_INT to_allocate; 1690 /* The offsets relative to ARG_POINTER. */ 1691 HOST_WIDE_INT frame_pointer_offset; 1692 HOST_WIDE_INT hard_frame_pointer_offset; 1693 HOST_WIDE_INT stack_pointer_offset; 1694 1695 /* When save_regs_using_mov is set, emit prologue using 1696 move instead of push instructions. */ 1697 bool save_regs_using_mov; 1698 }; 1699 1700 /* Code model option. */ 1701 enum cmodel ix86_cmodel; 1702 /* Asm dialect. */ 1703 enum asm_dialect ix86_asm_dialect = ASM_ATT; 1704 /* TLS dialects. */ 1705 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU; 1706 1707 /* Which unit we are generating floating point math for. */ 1708 enum fpmath_unit ix86_fpmath; 1709 1710 /* Which cpu are we scheduling for. */ 1711 enum attr_cpu ix86_schedule; 1712 1713 /* Which cpu are we optimizing for. */ 1714 enum processor_type ix86_tune; 1715 1716 /* Which instruction set architecture to use. */ 1717 enum processor_type ix86_arch; 1718 1719 /* true if sse prefetch instruction is not NOOP. */ 1720 int x86_prefetch_sse; 1721 1722 /* ix86_regparm_string as a number */ 1723 static int ix86_regparm; 1724 1725 /* -mstackrealign option */ 1726 extern int ix86_force_align_arg_pointer; 1727 static const char ix86_force_align_arg_pointer_string[] 1728 = "force_align_arg_pointer"; 1729 1730 static rtx (*ix86_gen_leave) (void); 1731 static rtx (*ix86_gen_pop1) (rtx); 1732 static rtx (*ix86_gen_add3) (rtx, rtx, rtx); 1733 static rtx (*ix86_gen_sub3) (rtx, rtx, rtx); 1734 static rtx (*ix86_gen_sub3_carry) (rtx, rtx, rtx, rtx); 1735 static rtx (*ix86_gen_one_cmpl2) (rtx, rtx); 1736 static rtx (*ix86_gen_monitor) (rtx, rtx, rtx); 1737 static rtx (*ix86_gen_andsp) (rtx, rtx, rtx); 1738 1739 /* Preferred alignment for stack boundary in bits. */ 1740 unsigned int ix86_preferred_stack_boundary; 1741 1742 /* Alignment for incoming stack boundary in bits specified at 1743 command line. */ 1744 static unsigned int ix86_user_incoming_stack_boundary; 1745 1746 /* Default alignment for incoming stack boundary in bits. */ 1747 static unsigned int ix86_default_incoming_stack_boundary; 1748 1749 /* Alignment for incoming stack boundary in bits. */ 1750 unsigned int ix86_incoming_stack_boundary; 1751 1752 /* Values 1-5: see jump.c */ 1753 int ix86_branch_cost; 1754 1755 /* Calling abi specific va_list type nodes. */ 1756 static GTY(()) tree sysv_va_list_type_node; 1757 static GTY(()) tree ms_va_list_type_node; 1758 1759 /* Variables which are this size or smaller are put in the data/bss 1760 or ldata/lbss sections. */ 1761 1762 int ix86_section_threshold = 65536; 1763 1764 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */ 1765 char internal_label_prefix[16]; 1766 int internal_label_prefix_len; 1767 1768 /* Fence to use after loop using movnt. */ 1769 tree x86_mfence; 1770 1771 static int ix86_nsaved_args (void); 1772 1773 /* Register class used for passing given 64bit part of the argument. 1774 These represent classes as documented by the PS ABI, with the exception 1775 of SSESF, SSEDF classes, that are basically SSE class, just gcc will 1776 use SF or DFmode move instead of DImode to avoid reformatting penalties. 1777 1778 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves 1779 whenever possible (upper half does contain padding). */ 1780 enum x86_64_reg_class 1781 { 1782 X86_64_NO_CLASS, 1783 X86_64_INTEGER_CLASS, 1784 X86_64_INTEGERSI_CLASS, 1785 X86_64_SSE_CLASS, 1786 X86_64_SSESF_CLASS, 1787 X86_64_SSEDF_CLASS, 1788 X86_64_SSEUP_CLASS, 1789 X86_64_X87_CLASS, 1790 X86_64_X87UP_CLASS, 1791 X86_64_COMPLEX_X87_CLASS, 1792 X86_64_MEMORY_CLASS 1793 }; 1794 1795 #define MAX_CLASSES 4 1796 1797 /* Table of constants used by fldpi, fldln2, etc.... */ 1798 static REAL_VALUE_TYPE ext_80387_constants_table [5]; 1799 static bool ext_80387_constants_init = 0; 1800 1801 1802 static struct machine_function * ix86_init_machine_status (void); 1803 static rtx ix86_function_value (const_tree, const_tree, bool); 1804 static int ix86_function_regparm (const_tree, const_tree); 1805 static void ix86_compute_frame_layout (struct ix86_frame *); 1806 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode, 1807 rtx, rtx, int); 1808 static void ix86_add_new_builtins (int); 1809 1810 enum ix86_function_specific_strings 1811 { 1812 IX86_FUNCTION_SPECIFIC_ARCH, 1813 IX86_FUNCTION_SPECIFIC_TUNE, 1814 IX86_FUNCTION_SPECIFIC_FPMATH, 1815 IX86_FUNCTION_SPECIFIC_MAX 1816 }; 1817 1818 static char *ix86_target_string (int, int, const char *, const char *, 1819 const char *, bool); 1820 static void ix86_debug_options (void) ATTRIBUTE_UNUSED; 1821 static void ix86_function_specific_save (struct cl_target_option *); 1822 static void ix86_function_specific_restore (struct cl_target_option *); 1823 static void ix86_function_specific_print (FILE *, int, 1824 struct cl_target_option *); 1825 static bool ix86_valid_target_attribute_p (tree, tree, tree, int); 1826 static bool ix86_valid_target_attribute_inner_p (tree, char *[]); 1827 static bool ix86_can_inline_p (tree, tree); 1828 static void ix86_set_current_function (tree); 1829 1830 static void pro_epilogue_adjust_stack (rtx, rtx, rtx, int); 1831 1832 1833 /* The svr4 ABI for the i386 says that records and unions are returned 1834 in memory. */ 1835 #ifndef DEFAULT_PCC_STRUCT_RETURN 1836 #define DEFAULT_PCC_STRUCT_RETURN 1 1837 #endif 1838 1839 /* Whether -mtune= or -march= were specified */ 1840 static int ix86_tune_defaulted; 1841 static int ix86_arch_specified; 1842 1843 /* Bit flags that specify the ISA we are compiling for. */ 1844 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT; 1845 1846 /* A mask of ix86_isa_flags that includes bit X if X 1847 was set or cleared on the command line. */ 1848 static int ix86_isa_flags_explicit; 1849 1850 /* Define a set of ISAs which are available when a given ISA is 1851 enabled. MMX and SSE ISAs are handled separately. */ 1852 1853 #define OPTION_MASK_ISA_MMX_SET OPTION_MASK_ISA_MMX 1854 #define OPTION_MASK_ISA_3DNOW_SET \ 1855 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_MMX_SET) 1856 1857 #define OPTION_MASK_ISA_SSE_SET OPTION_MASK_ISA_SSE 1858 #define OPTION_MASK_ISA_SSE2_SET \ 1859 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE_SET) 1860 #define OPTION_MASK_ISA_SSE3_SET \ 1861 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE2_SET) 1862 #define OPTION_MASK_ISA_SSSE3_SET \ 1863 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE3_SET) 1864 #define OPTION_MASK_ISA_SSE4_1_SET \ 1865 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSSE3_SET) 1866 #define OPTION_MASK_ISA_SSE4_2_SET \ 1867 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_1_SET) 1868 #define OPTION_MASK_ISA_AVX_SET \ 1869 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_SSE4_2_SET) 1870 #define OPTION_MASK_ISA_FMA_SET \ 1871 (OPTION_MASK_ISA_FMA | OPTION_MASK_ISA_AVX_SET) 1872 1873 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same 1874 as -msse4.2. */ 1875 #define OPTION_MASK_ISA_SSE4_SET OPTION_MASK_ISA_SSE4_2_SET 1876 1877 #define OPTION_MASK_ISA_SSE4A_SET \ 1878 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE3_SET) 1879 #define OPTION_MASK_ISA_SSE5_SET \ 1880 (OPTION_MASK_ISA_SSE5 | OPTION_MASK_ISA_SSE4A_SET) 1881 1882 /* AES and PCLMUL need SSE2 because they use xmm registers */ 1883 #define OPTION_MASK_ISA_AES_SET \ 1884 (OPTION_MASK_ISA_AES | OPTION_MASK_ISA_SSE2_SET) 1885 #define OPTION_MASK_ISA_PCLMUL_SET \ 1886 (OPTION_MASK_ISA_PCLMUL | OPTION_MASK_ISA_SSE2_SET) 1887 1888 #define OPTION_MASK_ISA_ABM_SET \ 1889 (OPTION_MASK_ISA_ABM | OPTION_MASK_ISA_POPCNT) 1890 #define OPTION_MASK_ISA_POPCNT_SET OPTION_MASK_ISA_POPCNT 1891 #define OPTION_MASK_ISA_CX16_SET OPTION_MASK_ISA_CX16 1892 #define OPTION_MASK_ISA_SAHF_SET OPTION_MASK_ISA_SAHF 1893 1894 /* Define a set of ISAs which aren't available when a given ISA is 1895 disabled. MMX and SSE ISAs are handled separately. */ 1896 1897 #define OPTION_MASK_ISA_MMX_UNSET \ 1898 (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_3DNOW_UNSET) 1899 #define OPTION_MASK_ISA_3DNOW_UNSET \ 1900 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_A_UNSET) 1901 #define OPTION_MASK_ISA_3DNOW_A_UNSET OPTION_MASK_ISA_3DNOW_A 1902 1903 #define OPTION_MASK_ISA_SSE_UNSET \ 1904 (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2_UNSET) 1905 #define OPTION_MASK_ISA_SSE2_UNSET \ 1906 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE3_UNSET) 1907 #define OPTION_MASK_ISA_SSE3_UNSET \ 1908 (OPTION_MASK_ISA_SSE3 \ 1909 | OPTION_MASK_ISA_SSSE3_UNSET \ 1910 | OPTION_MASK_ISA_SSE4A_UNSET ) 1911 #define OPTION_MASK_ISA_SSSE3_UNSET \ 1912 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSE4_1_UNSET) 1913 #define OPTION_MASK_ISA_SSE4_1_UNSET \ 1914 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2_UNSET) 1915 #define OPTION_MASK_ISA_SSE4_2_UNSET \ 1916 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_AVX_UNSET ) 1917 #define OPTION_MASK_ISA_AVX_UNSET \ 1918 (OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_FMA_UNSET) 1919 #define OPTION_MASK_ISA_FMA_UNSET OPTION_MASK_ISA_FMA 1920 1921 /* SSE4 includes both SSE4.1 and SSE4.2. -mno-sse4 should the same 1922 as -mno-sse4.1. */ 1923 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET 1924 1925 #define OPTION_MASK_ISA_SSE4A_UNSET \ 1926 (OPTION_MASK_ISA_SSE4A | OPTION_MASK_ISA_SSE5_UNSET) 1927 #define OPTION_MASK_ISA_SSE5_UNSET OPTION_MASK_ISA_SSE5 1928 #define OPTION_MASK_ISA_AES_UNSET OPTION_MASK_ISA_AES 1929 #define OPTION_MASK_ISA_PCLMUL_UNSET OPTION_MASK_ISA_PCLMUL 1930 #define OPTION_MASK_ISA_ABM_UNSET OPTION_MASK_ISA_ABM 1931 #define OPTION_MASK_ISA_POPCNT_UNSET OPTION_MASK_ISA_POPCNT 1932 #define OPTION_MASK_ISA_CX16_UNSET OPTION_MASK_ISA_CX16 1933 #define OPTION_MASK_ISA_SAHF_UNSET OPTION_MASK_ISA_SAHF 1934 1935 /* Vectorization library interface and handlers. */ 1936 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL; 1937 static tree ix86_veclibabi_svml (enum built_in_function, tree, tree); 1938 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree); 1939 1940 /* Processor target table, indexed by processor number */ 1941 struct ptt 1942 { 1943 const struct processor_costs *cost; /* Processor costs */ 1944 const int align_loop; /* Default alignments. */ 1945 const int align_loop_max_skip; 1946 const int align_jump; 1947 const int align_jump_max_skip; 1948 const int align_func; 1949 }; 1950 1951 static const struct ptt processor_target_table[PROCESSOR_max] = 1952 { 1953 {&i386_cost, 4, 3, 4, 3, 4}, 1954 {&i486_cost, 16, 15, 16, 15, 16}, 1955 {&pentium_cost, 16, 7, 16, 7, 16}, 1956 {&pentiumpro_cost, 16, 15, 16, 10, 16}, 1957 {&geode_cost, 0, 0, 0, 0, 0}, 1958 {&k6_cost, 32, 7, 32, 7, 32}, 1959 {&athlon_cost, 16, 7, 16, 7, 16}, 1960 {&pentium4_cost, 0, 0, 0, 0, 0}, 1961 {&k8_cost, 16, 7, 16, 7, 16}, 1962 {&nocona_cost, 0, 0, 0, 0, 0}, 1963 {&core2_cost, 16, 10, 16, 10, 16}, 1964 {&generic32_cost, 16, 7, 16, 7, 16}, 1965 {&generic64_cost, 16, 10, 16, 10, 16}, 1966 {&amdfam10_cost, 32, 24, 32, 7, 32} 1967 }; 1968 1969 static const char *const cpu_names[TARGET_CPU_DEFAULT_max] = 1970 { 1971 "generic", 1972 "i386", 1973 "i486", 1974 "pentium", 1975 "pentium-mmx", 1976 "pentiumpro", 1977 "pentium2", 1978 "pentium3", 1979 "pentium4", 1980 "pentium-m", 1981 "prescott", 1982 "nocona", 1983 "core2", 1984 "geode", 1985 "k6", 1986 "k6-2", 1987 "k6-3", 1988 "athlon", 1989 "athlon-4", 1990 "k8", 1991 "amdfam10" 1992 }; 1993 1994 /* Implement TARGET_HANDLE_OPTION. */ 1995 1996 static bool 1997 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value) 1998 { 1999 switch (code) 2000 { 2001 case OPT_mmmx: 2002 if (value) 2003 { 2004 ix86_isa_flags |= OPTION_MASK_ISA_MMX_SET; 2005 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_SET; 2006 } 2007 else 2008 { 2009 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET; 2010 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET; 2011 } 2012 return true; 2013 2014 case OPT_m3dnow: 2015 if (value) 2016 { 2017 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_SET; 2018 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_SET; 2019 } 2020 else 2021 { 2022 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET; 2023 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET; 2024 } 2025 return true; 2026 2027 case OPT_m3dnowa: 2028 return false; 2029 2030 case OPT_msse: 2031 if (value) 2032 { 2033 ix86_isa_flags |= OPTION_MASK_ISA_SSE_SET; 2034 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_SET; 2035 } 2036 else 2037 { 2038 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET; 2039 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET; 2040 } 2041 return true; 2042 2043 case OPT_msse2: 2044 if (value) 2045 { 2046 ix86_isa_flags |= OPTION_MASK_ISA_SSE2_SET; 2047 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_SET; 2048 } 2049 else 2050 { 2051 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET; 2052 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET; 2053 } 2054 return true; 2055 2056 case OPT_msse3: 2057 if (value) 2058 { 2059 ix86_isa_flags |= OPTION_MASK_ISA_SSE3_SET; 2060 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_SET; 2061 } 2062 else 2063 { 2064 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET; 2065 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET; 2066 } 2067 return true; 2068 2069 case OPT_mssse3: 2070 if (value) 2071 { 2072 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3_SET; 2073 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_SET; 2074 } 2075 else 2076 { 2077 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET; 2078 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET; 2079 } 2080 return true; 2081 2082 case OPT_msse4_1: 2083 if (value) 2084 { 2085 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1_SET; 2086 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_SET; 2087 } 2088 else 2089 { 2090 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET; 2091 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET; 2092 } 2093 return true; 2094 2095 case OPT_msse4_2: 2096 if (value) 2097 { 2098 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2_SET; 2099 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_SET; 2100 } 2101 else 2102 { 2103 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET; 2104 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET; 2105 } 2106 return true; 2107 2108 case OPT_mavx: 2109 if (value) 2110 { 2111 ix86_isa_flags |= OPTION_MASK_ISA_AVX_SET; 2112 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_SET; 2113 } 2114 else 2115 { 2116 ix86_isa_flags &= ~OPTION_MASK_ISA_AVX_UNSET; 2117 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AVX_UNSET; 2118 } 2119 return true; 2120 2121 case OPT_mfma: 2122 if (value) 2123 { 2124 ix86_isa_flags |= OPTION_MASK_ISA_FMA_SET; 2125 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_SET; 2126 } 2127 else 2128 { 2129 ix86_isa_flags &= ~OPTION_MASK_ISA_FMA_UNSET; 2130 ix86_isa_flags_explicit |= OPTION_MASK_ISA_FMA_UNSET; 2131 } 2132 return true; 2133 2134 case OPT_msse4: 2135 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_SET; 2136 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_SET; 2137 return true; 2138 2139 case OPT_mno_sse4: 2140 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET; 2141 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET; 2142 return true; 2143 2144 case OPT_msse4a: 2145 if (value) 2146 { 2147 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A_SET; 2148 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_SET; 2149 } 2150 else 2151 { 2152 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET; 2153 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET; 2154 } 2155 return true; 2156 2157 case OPT_msse5: 2158 if (value) 2159 { 2160 ix86_isa_flags |= OPTION_MASK_ISA_SSE5_SET; 2161 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_SET; 2162 } 2163 else 2164 { 2165 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE5_UNSET; 2166 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE5_UNSET; 2167 } 2168 return true; 2169 2170 case OPT_mabm: 2171 if (value) 2172 { 2173 ix86_isa_flags |= OPTION_MASK_ISA_ABM_SET; 2174 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_SET; 2175 } 2176 else 2177 { 2178 ix86_isa_flags &= ~OPTION_MASK_ISA_ABM_UNSET; 2179 ix86_isa_flags_explicit |= OPTION_MASK_ISA_ABM_UNSET; 2180 } 2181 return true; 2182 2183 case OPT_mpopcnt: 2184 if (value) 2185 { 2186 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT_SET; 2187 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_SET; 2188 } 2189 else 2190 { 2191 ix86_isa_flags &= ~OPTION_MASK_ISA_POPCNT_UNSET; 2192 ix86_isa_flags_explicit |= OPTION_MASK_ISA_POPCNT_UNSET; 2193 } 2194 return true; 2195 2196 case OPT_msahf: 2197 if (value) 2198 { 2199 ix86_isa_flags |= OPTION_MASK_ISA_SAHF_SET; 2200 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_SET; 2201 } 2202 else 2203 { 2204 ix86_isa_flags &= ~OPTION_MASK_ISA_SAHF_UNSET; 2205 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SAHF_UNSET; 2206 } 2207 return true; 2208 2209 case OPT_mcx16: 2210 if (value) 2211 { 2212 ix86_isa_flags |= OPTION_MASK_ISA_CX16_SET; 2213 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_SET; 2214 } 2215 else 2216 { 2217 ix86_isa_flags &= ~OPTION_MASK_ISA_CX16_UNSET; 2218 ix86_isa_flags_explicit |= OPTION_MASK_ISA_CX16_UNSET; 2219 } 2220 return true; 2221 2222 case OPT_maes: 2223 if (value) 2224 { 2225 ix86_isa_flags |= OPTION_MASK_ISA_AES_SET; 2226 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_SET; 2227 } 2228 else 2229 { 2230 ix86_isa_flags &= ~OPTION_MASK_ISA_AES_UNSET; 2231 ix86_isa_flags_explicit |= OPTION_MASK_ISA_AES_UNSET; 2232 } 2233 return true; 2234 2235 case OPT_mpclmul: 2236 if (value) 2237 { 2238 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL_SET; 2239 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_SET; 2240 } 2241 else 2242 { 2243 ix86_isa_flags &= ~OPTION_MASK_ISA_PCLMUL_UNSET; 2244 ix86_isa_flags_explicit |= OPTION_MASK_ISA_PCLMUL_UNSET; 2245 } 2246 return true; 2247 2248 default: 2249 return true; 2250 } 2251 } 2252 2253 /* Return a string the documents the current -m options. The caller is 2254 responsible for freeing the string. */ 2255 2256 static char * 2257 ix86_target_string (int isa, int flags, const char *arch, const char *tune, 2258 const char *fpmath, bool add_nl_p) 2259 { 2260 struct ix86_target_opts 2261 { 2262 const char *option; /* option string */ 2263 int mask; /* isa mask options */ 2264 }; 2265 2266 /* This table is ordered so that options like -msse5 or -msse4.2 that imply 2267 preceding options while match those first. */ 2268 static struct ix86_target_opts isa_opts[] = 2269 { 2270 { "-m64", OPTION_MASK_ISA_64BIT }, 2271 { "-msse5", OPTION_MASK_ISA_SSE5 }, 2272 { "-msse4a", OPTION_MASK_ISA_SSE4A }, 2273 { "-msse4.2", OPTION_MASK_ISA_SSE4_2 }, 2274 { "-msse4.1", OPTION_MASK_ISA_SSE4_1 }, 2275 { "-mssse3", OPTION_MASK_ISA_SSSE3 }, 2276 { "-msse3", OPTION_MASK_ISA_SSE3 }, 2277 { "-msse2", OPTION_MASK_ISA_SSE2 }, 2278 { "-msse", OPTION_MASK_ISA_SSE }, 2279 { "-m3dnow", OPTION_MASK_ISA_3DNOW }, 2280 { "-m3dnowa", OPTION_MASK_ISA_3DNOW_A }, 2281 { "-mmmx", OPTION_MASK_ISA_MMX }, 2282 { "-mabm", OPTION_MASK_ISA_ABM }, 2283 { "-mpopcnt", OPTION_MASK_ISA_POPCNT }, 2284 { "-maes", OPTION_MASK_ISA_AES }, 2285 { "-mpclmul", OPTION_MASK_ISA_PCLMUL }, 2286 }; 2287 2288 /* Flag options. */ 2289 static struct ix86_target_opts flag_opts[] = 2290 { 2291 { "-m128bit-long-double", MASK_128BIT_LONG_DOUBLE }, 2292 { "-m80387", MASK_80387 }, 2293 { "-maccumulate-outgoing-args", MASK_ACCUMULATE_OUTGOING_ARGS }, 2294 { "-malign-double", MASK_ALIGN_DOUBLE }, 2295 { "-mcld", MASK_CLD }, 2296 { "-mfp-ret-in-387", MASK_FLOAT_RETURNS }, 2297 { "-mieee-fp", MASK_IEEE_FP }, 2298 { "-minline-all-stringops", MASK_INLINE_ALL_STRINGOPS }, 2299 { "-minline-stringops-dynamically", MASK_INLINE_STRINGOPS_DYNAMICALLY }, 2300 { "-mms-bitfields", MASK_MS_BITFIELD_LAYOUT }, 2301 { "-mno-align-stringops", MASK_NO_ALIGN_STRINGOPS }, 2302 { "-mno-fancy-math-387", MASK_NO_FANCY_MATH_387 }, 2303 { "-mno-fused-madd", MASK_NO_FUSED_MADD }, 2304 { "-mno-push-args", MASK_NO_PUSH_ARGS }, 2305 { "-mno-red-zone", MASK_NO_RED_ZONE }, 2306 { "-momit-leaf-frame-pointer", MASK_OMIT_LEAF_FRAME_POINTER }, 2307 { "-mrecip", MASK_RECIP }, 2308 { "-mrtd", MASK_RTD }, 2309 { "-msseregparm", MASK_SSEREGPARM }, 2310 { "-mstack-arg-probe", MASK_STACK_PROBE }, 2311 { "-mtls-direct-seg-refs", MASK_TLS_DIRECT_SEG_REFS }, 2312 }; 2313 2314 const char *opts[ARRAY_SIZE (isa_opts) + ARRAY_SIZE (flag_opts) + 6][2]; 2315 2316 char isa_other[40]; 2317 char target_other[40]; 2318 unsigned num = 0; 2319 unsigned i, j; 2320 char *ret; 2321 char *ptr; 2322 size_t len; 2323 size_t line_len; 2324 size_t sep_len; 2325 2326 memset (opts, '\0', sizeof (opts)); 2327 2328 /* Add -march= option. */ 2329 if (arch) 2330 { 2331 opts[num][0] = "-march="; 2332 opts[num++][1] = arch; 2333 } 2334 2335 /* Add -mtune= option. */ 2336 if (tune) 2337 { 2338 opts[num][0] = "-mtune="; 2339 opts[num++][1] = tune; 2340 } 2341 2342 /* Pick out the options in isa options. */ 2343 for (i = 0; i < ARRAY_SIZE (isa_opts); i++) 2344 { 2345 if ((isa & isa_opts[i].mask) != 0) 2346 { 2347 opts[num++][0] = isa_opts[i].option; 2348 isa &= ~ isa_opts[i].mask; 2349 } 2350 } 2351 2352 if (isa && add_nl_p) 2353 { 2354 opts[num++][0] = isa_other; 2355 sprintf (isa_other, "(other isa: 0x%x)", isa); 2356 } 2357 2358 /* Add flag options. */ 2359 for (i = 0; i < ARRAY_SIZE (flag_opts); i++) 2360 { 2361 if ((flags & flag_opts[i].mask) != 0) 2362 { 2363 opts[num++][0] = flag_opts[i].option; 2364 flags &= ~ flag_opts[i].mask; 2365 } 2366 } 2367 2368 if (flags && add_nl_p) 2369 { 2370 opts[num++][0] = target_other; 2371 sprintf (target_other, "(other flags: 0x%x)", isa); 2372 } 2373 2374 /* Add -fpmath= option. */ 2375 if (fpmath) 2376 { 2377 opts[num][0] = "-mfpmath="; 2378 opts[num++][1] = fpmath; 2379 } 2380 2381 /* Any options? */ 2382 if (num == 0) 2383 return NULL; 2384 2385 gcc_assert (num < ARRAY_SIZE (opts)); 2386 2387 /* Size the string. */ 2388 len = 0; 2389 sep_len = (add_nl_p) ? 3 : 1; 2390 for (i = 0; i < num; i++) 2391 { 2392 len += sep_len; 2393 for (j = 0; j < 2; j++) 2394 if (opts[i][j]) 2395 len += strlen (opts[i][j]); 2396 } 2397 2398 /* Build the string. */ 2399 ret = ptr = (char *) xmalloc (len); 2400 line_len = 0; 2401 2402 for (i = 0; i < num; i++) 2403 { 2404 size_t len2[2]; 2405 2406 for (j = 0; j < 2; j++) 2407 len2[j] = (opts[i][j]) ? strlen (opts[i][j]) : 0; 2408 2409 if (i != 0) 2410 { 2411 *ptr++ = ' '; 2412 line_len++; 2413 2414 if (add_nl_p && line_len + len2[0] + len2[1] > 70) 2415 { 2416 *ptr++ = '\\'; 2417 *ptr++ = '\n'; 2418 line_len = 0; 2419 } 2420 } 2421 2422 for (j = 0; j < 2; j++) 2423 if (opts[i][j]) 2424 { 2425 memcpy (ptr, opts[i][j], len2[j]); 2426 ptr += len2[j]; 2427 line_len += len2[j]; 2428 } 2429 } 2430 2431 *ptr = '\0'; 2432 gcc_assert (ret + len >= ptr); 2433 2434 return ret; 2435 } 2436 2437 /* Function that is callable from the debugger to print the current 2438 options. */ 2439 void 2440 ix86_debug_options (void) 2441 { 2442 char *opts = ix86_target_string (ix86_isa_flags, target_flags, 2443 ix86_arch_string, ix86_tune_string, 2444 ix86_fpmath_string, true); 2445 2446 if (opts) 2447 { 2448 fprintf (stderr, "%s\n\n", opts); 2449 free (opts); 2450 } 2451 else 2452 fprintf (stderr, "<no options>\n\n"); 2453 2454 return; 2455 } 2456 2457 /* Sometimes certain combinations of command options do not make 2458 sense on a particular target machine. You can define a macro 2459 `OVERRIDE_OPTIONS' to take account of this. This macro, if 2460 defined, is executed once just after all the command options have 2461 been parsed. 2462 2463 Don't use this macro to turn on various extra optimizations for 2464 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */ 2465 2466 void 2467 override_options (bool main_args_p) 2468 { 2469 int i; 2470 unsigned int ix86_arch_mask, ix86_tune_mask; 2471 const char *prefix; 2472 const char *suffix; 2473 const char *sw; 2474 2475 /* Comes from final.c -- no real reason to change it. */ 2476 #define MAX_CODE_ALIGN 16 2477 2478 enum pta_flags 2479 { 2480 PTA_SSE = 1 << 0, 2481 PTA_SSE2 = 1 << 1, 2482 PTA_SSE3 = 1 << 2, 2483 PTA_MMX = 1 << 3, 2484 PTA_PREFETCH_SSE = 1 << 4, 2485 PTA_3DNOW = 1 << 5, 2486 PTA_3DNOW_A = 1 << 6, 2487 PTA_64BIT = 1 << 7, 2488 PTA_SSSE3 = 1 << 8, 2489 PTA_CX16 = 1 << 9, 2490 PTA_POPCNT = 1 << 10, 2491 PTA_ABM = 1 << 11, 2492 PTA_SSE4A = 1 << 12, 2493 PTA_NO_SAHF = 1 << 13, 2494 PTA_SSE4_1 = 1 << 14, 2495 PTA_SSE4_2 = 1 << 15, 2496 PTA_SSE5 = 1 << 16, 2497 PTA_AES = 1 << 17, 2498 PTA_PCLMUL = 1 << 18, 2499 PTA_AVX = 1 << 19, 2500 PTA_FMA = 1 << 20 2501 }; 2502 2503 static struct pta 2504 { 2505 const char *const name; /* processor name or nickname. */ 2506 const enum processor_type processor; 2507 const enum attr_cpu schedule; 2508 const unsigned /*enum pta_flags*/ flags; 2509 } 2510 const processor_alias_table[] = 2511 { 2512 {"i386", PROCESSOR_I386, CPU_NONE, 0}, 2513 {"i486", PROCESSOR_I486, CPU_NONE, 0}, 2514 {"i586", PROCESSOR_PENTIUM, CPU_PENTIUM, 0}, 2515 {"pentium", PROCESSOR_PENTIUM, CPU_PENTIUM, 0}, 2516 {"pentium-mmx", PROCESSOR_PENTIUM, CPU_PENTIUM, PTA_MMX}, 2517 {"winchip-c6", PROCESSOR_I486, CPU_NONE, PTA_MMX}, 2518 {"winchip2", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW}, 2519 {"c3", PROCESSOR_I486, CPU_NONE, PTA_MMX | PTA_3DNOW}, 2520 {"c3-2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX | PTA_SSE}, 2521 {"i686", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0}, 2522 {"pentiumpro", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 0}, 2523 {"pentium2", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, PTA_MMX}, 2524 {"pentium3", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 2525 PTA_MMX | PTA_SSE}, 2526 {"pentium3m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 2527 PTA_MMX | PTA_SSE}, 2528 {"pentium-m", PROCESSOR_PENTIUMPRO, CPU_PENTIUMPRO, 2529 PTA_MMX | PTA_SSE | PTA_SSE2}, 2530 {"pentium4", PROCESSOR_PENTIUM4, CPU_NONE, 2531 PTA_MMX |PTA_SSE | PTA_SSE2}, 2532 {"pentium4m", PROCESSOR_PENTIUM4, CPU_NONE, 2533 PTA_MMX | PTA_SSE | PTA_SSE2}, 2534 {"prescott", PROCESSOR_NOCONA, CPU_NONE, 2535 PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3}, 2536 {"nocona", PROCESSOR_NOCONA, CPU_NONE, 2537 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 2538 | PTA_CX16 | PTA_NO_SAHF}, 2539 {"core2", PROCESSOR_CORE2, CPU_CORE2, 2540 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3 2541 | PTA_SSSE3 | PTA_CX16}, 2542 {"geode", PROCESSOR_GEODE, CPU_GEODE, 2543 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A |PTA_PREFETCH_SSE}, 2544 {"k6", PROCESSOR_K6, CPU_K6, PTA_MMX}, 2545 {"k6-2", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW}, 2546 {"k6-3", PROCESSOR_K6, CPU_K6, PTA_MMX | PTA_3DNOW}, 2547 {"athlon", PROCESSOR_ATHLON, CPU_ATHLON, 2548 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE}, 2549 {"athlon-tbird", PROCESSOR_ATHLON, CPU_ATHLON, 2550 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE}, 2551 {"athlon-4", PROCESSOR_ATHLON, CPU_ATHLON, 2552 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE}, 2553 {"athlon-xp", PROCESSOR_ATHLON, CPU_ATHLON, 2554 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE}, 2555 {"athlon-mp", PROCESSOR_ATHLON, CPU_ATHLON, 2556 PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE}, 2557 {"x86-64", PROCESSOR_K8, CPU_K8, 2558 PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF}, 2559 {"k8", PROCESSOR_K8, CPU_K8, 2560 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE 2561 | PTA_SSE2 | PTA_NO_SAHF}, 2562 {"k8-sse3", PROCESSOR_K8, CPU_K8, 2563 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE 2564 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF}, 2565 {"opteron", PROCESSOR_K8, CPU_K8, 2566 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE 2567 | PTA_SSE2 | PTA_NO_SAHF}, 2568 {"opteron-sse3", PROCESSOR_K8, CPU_K8, 2569 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE 2570 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF}, 2571 {"athlon64", PROCESSOR_K8, CPU_K8, 2572 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE 2573 | PTA_SSE2 | PTA_NO_SAHF}, 2574 {"athlon64-sse3", PROCESSOR_K8, CPU_K8, 2575 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE 2576 | PTA_SSE2 | PTA_SSE3 | PTA_NO_SAHF}, 2577 {"athlon-fx", PROCESSOR_K8, CPU_K8, 2578 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE 2579 | PTA_SSE2 | PTA_NO_SAHF}, 2580 {"amdfam10", PROCESSOR_AMDFAM10, CPU_AMDFAM10, 2581 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE 2582 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM}, 2583 {"barcelona", PROCESSOR_AMDFAM10, CPU_AMDFAM10, 2584 PTA_64BIT | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_SSE 2585 | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM}, 2586 {"generic32", PROCESSOR_GENERIC32, CPU_PENTIUMPRO, 2587 0 /* flags are only used for -march switch. */ }, 2588 {"generic64", PROCESSOR_GENERIC64, CPU_GENERIC64, 2589 PTA_64BIT /* flags are only used for -march switch. */ }, 2590 }; 2591 2592 int const pta_size = ARRAY_SIZE (processor_alias_table); 2593 2594 /* Set up prefix/suffix so the error messages refer to either the command 2595 line argument, or the attribute(target). */ 2596 if (main_args_p) 2597 { 2598 prefix = "-m"; 2599 suffix = ""; 2600 sw = "switch"; 2601 } 2602 else 2603 { 2604 prefix = "option(\""; 2605 suffix = "\")"; 2606 sw = "attribute"; 2607 } 2608 2609 #ifdef SUBTARGET_OVERRIDE_OPTIONS 2610 SUBTARGET_OVERRIDE_OPTIONS; 2611 #endif 2612 2613 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS 2614 SUBSUBTARGET_OVERRIDE_OPTIONS; 2615 #endif 2616 2617 /* -fPIC is the default for x86_64. */ 2618 if (TARGET_MACHO && TARGET_64BIT) 2619 flag_pic = 2; 2620 2621 /* Set the default values for switches whose default depends on TARGET_64BIT 2622 in case they weren't overwritten by command line options. */ 2623 if (TARGET_64BIT) 2624 { 2625 /* Mach-O doesn't support omitting the frame pointer for now. */ 2626 if (flag_omit_frame_pointer == 2) 2627 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1); 2628 if (flag_asynchronous_unwind_tables == 2) 2629 flag_asynchronous_unwind_tables = 1; 2630 if (flag_pcc_struct_return == 2) 2631 flag_pcc_struct_return = 0; 2632 } 2633 else 2634 { 2635 if (flag_omit_frame_pointer == 2) 2636 flag_omit_frame_pointer = 0; 2637 if (flag_asynchronous_unwind_tables == 2) 2638 flag_asynchronous_unwind_tables = 0; 2639 if (flag_pcc_struct_return == 2) 2640 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN; 2641 } 2642 2643 /* Need to check -mtune=generic first. */ 2644 if (ix86_tune_string) 2645 { 2646 if (!strcmp (ix86_tune_string, "generic") 2647 || !strcmp (ix86_tune_string, "i686") 2648 /* As special support for cross compilers we read -mtune=native 2649 as -mtune=generic. With native compilers we won't see the 2650 -mtune=native, as it was changed by the driver. */ 2651 || !strcmp (ix86_tune_string, "native")) 2652 { 2653 if (TARGET_64BIT) 2654 ix86_tune_string = "generic64"; 2655 else 2656 ix86_tune_string = "generic32"; 2657 } 2658 /* If this call is for setting the option attribute, allow the 2659 generic32/generic64 that was previously set. */ 2660 else if (!main_args_p 2661 && (!strcmp (ix86_tune_string, "generic32") 2662 || !strcmp (ix86_tune_string, "generic64"))) 2663 ; 2664 else if (!strncmp (ix86_tune_string, "generic", 7)) 2665 error ("bad value (%s) for %stune=%s %s", 2666 ix86_tune_string, prefix, suffix, sw); 2667 } 2668 else 2669 { 2670 if (ix86_arch_string) 2671 ix86_tune_string = ix86_arch_string; 2672 if (!ix86_tune_string) 2673 { 2674 ix86_tune_string = cpu_names[TARGET_CPU_DEFAULT]; 2675 ix86_tune_defaulted = 1; 2676 } 2677 2678 /* ix86_tune_string is set to ix86_arch_string or defaulted. We 2679 need to use a sensible tune option. */ 2680 if (!strcmp (ix86_tune_string, "generic") 2681 || !strcmp (ix86_tune_string, "x86-64") 2682 || !strcmp (ix86_tune_string, "i686")) 2683 { 2684 if (TARGET_64BIT) 2685 ix86_tune_string = "generic64"; 2686 else 2687 ix86_tune_string = "generic32"; 2688 } 2689 } 2690 if (ix86_stringop_string) 2691 { 2692 if (!strcmp (ix86_stringop_string, "rep_byte")) 2693 stringop_alg = rep_prefix_1_byte; 2694 else if (!strcmp (ix86_stringop_string, "libcall")) 2695 stringop_alg = libcall; 2696 else if (!strcmp (ix86_stringop_string, "rep_4byte")) 2697 stringop_alg = rep_prefix_4_byte; 2698 else if (!strcmp (ix86_stringop_string, "rep_8byte") 2699 && TARGET_64BIT) 2700 /* rep; movq isn't available in 32-bit code. */ 2701 stringop_alg = rep_prefix_8_byte; 2702 else if (!strcmp (ix86_stringop_string, "byte_loop")) 2703 stringop_alg = loop_1_byte; 2704 else if (!strcmp (ix86_stringop_string, "loop")) 2705 stringop_alg = loop; 2706 else if (!strcmp (ix86_stringop_string, "unrolled_loop")) 2707 stringop_alg = unrolled_loop; 2708 else 2709 error ("bad value (%s) for %sstringop-strategy=%s %s", 2710 ix86_stringop_string, prefix, suffix, sw); 2711 } 2712 if (!strcmp (ix86_tune_string, "x86-64")) 2713 warning (OPT_Wdeprecated, "%stune=x86-64%s is deprecated. Use " 2714 "%stune=k8%s or %stune=generic%s instead as appropriate.", 2715 prefix, suffix, prefix, suffix, prefix, suffix); 2716 2717 if (!ix86_arch_string) 2718 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386"; 2719 else 2720 ix86_arch_specified = 1; 2721 2722 if (!strcmp (ix86_arch_string, "generic")) 2723 error ("generic CPU can be used only for %stune=%s %s", 2724 prefix, suffix, sw); 2725 if (!strncmp (ix86_arch_string, "generic", 7)) 2726 error ("bad value (%s) for %sarch=%s %s", 2727 ix86_arch_string, prefix, suffix, sw); 2728 2729 if (ix86_cmodel_string != 0) 2730 { 2731 if (!strcmp (ix86_cmodel_string, "small")) 2732 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 2733 else if (!strcmp (ix86_cmodel_string, "medium")) 2734 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM; 2735 else if (!strcmp (ix86_cmodel_string, "large")) 2736 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE; 2737 else if (flag_pic) 2738 error ("code model %s does not support PIC mode", ix86_cmodel_string); 2739 else if (!strcmp (ix86_cmodel_string, "32")) 2740 ix86_cmodel = CM_32; 2741 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic) 2742 ix86_cmodel = CM_KERNEL; 2743 else 2744 error ("bad value (%s) for %scmodel=%s %s", 2745 ix86_cmodel_string, prefix, suffix, sw); 2746 } 2747 else 2748 { 2749 /* For TARGET_64BIT and MS_ABI, force pic on, in order to enable the 2750 use of rip-relative addressing. This eliminates fixups that 2751 would otherwise be needed if this object is to be placed in a 2752 DLL, and is essentially just as efficient as direct addressing. */ 2753 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI) 2754 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1; 2755 else if (TARGET_64BIT) 2756 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL; 2757 else 2758 ix86_cmodel = CM_32; 2759 } 2760 if (ix86_asm_string != 0) 2761 { 2762 if (! TARGET_MACHO 2763 && !strcmp (ix86_asm_string, "intel")) 2764 ix86_asm_dialect = ASM_INTEL; 2765 else if (!strcmp (ix86_asm_string, "att")) 2766 ix86_asm_dialect = ASM_ATT; 2767 else 2768 error ("bad value (%s) for %sasm=%s %s", 2769 ix86_asm_string, prefix, suffix, sw); 2770 } 2771 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32)) 2772 error ("code model %qs not supported in the %s bit mode", 2773 ix86_cmodel_string, TARGET_64BIT ? "64" : "32"); 2774 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0)) 2775 sorry ("%i-bit mode not compiled in", 2776 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32); 2777 2778 for (i = 0; i < pta_size; i++) 2779 if (! strcmp (ix86_arch_string, processor_alias_table[i].name)) 2780 { 2781 ix86_schedule = processor_alias_table[i].schedule; 2782 ix86_arch = processor_alias_table[i].processor; 2783 /* Default cpu tuning to the architecture. */ 2784 ix86_tune = ix86_arch; 2785 2786 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) 2787 error ("CPU you selected does not support x86-64 " 2788 "instruction set"); 2789 2790 if (processor_alias_table[i].flags & PTA_MMX 2791 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX)) 2792 ix86_isa_flags |= OPTION_MASK_ISA_MMX; 2793 if (processor_alias_table[i].flags & PTA_3DNOW 2794 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW)) 2795 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW; 2796 if (processor_alias_table[i].flags & PTA_3DNOW_A 2797 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A)) 2798 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A; 2799 if (processor_alias_table[i].flags & PTA_SSE 2800 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE)) 2801 ix86_isa_flags |= OPTION_MASK_ISA_SSE; 2802 if (processor_alias_table[i].flags & PTA_SSE2 2803 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2)) 2804 ix86_isa_flags |= OPTION_MASK_ISA_SSE2; 2805 if (processor_alias_table[i].flags & PTA_SSE3 2806 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3)) 2807 ix86_isa_flags |= OPTION_MASK_ISA_SSE3; 2808 if (processor_alias_table[i].flags & PTA_SSSE3 2809 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3)) 2810 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3; 2811 if (processor_alias_table[i].flags & PTA_SSE4_1 2812 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1)) 2813 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1; 2814 if (processor_alias_table[i].flags & PTA_SSE4_2 2815 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2)) 2816 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2; 2817 if (processor_alias_table[i].flags & PTA_AVX 2818 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX)) 2819 ix86_isa_flags |= OPTION_MASK_ISA_AVX; 2820 if (processor_alias_table[i].flags & PTA_FMA 2821 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_FMA)) 2822 ix86_isa_flags |= OPTION_MASK_ISA_FMA; 2823 if (processor_alias_table[i].flags & PTA_SSE4A 2824 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A)) 2825 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A; 2826 if (processor_alias_table[i].flags & PTA_SSE5 2827 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE5)) 2828 ix86_isa_flags |= OPTION_MASK_ISA_SSE5; 2829 if (processor_alias_table[i].flags & PTA_ABM 2830 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_ABM)) 2831 ix86_isa_flags |= OPTION_MASK_ISA_ABM; 2832 if (processor_alias_table[i].flags & PTA_CX16 2833 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_CX16)) 2834 ix86_isa_flags |= OPTION_MASK_ISA_CX16; 2835 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM) 2836 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_POPCNT)) 2837 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT; 2838 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)) 2839 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SAHF)) 2840 ix86_isa_flags |= OPTION_MASK_ISA_SAHF; 2841 if (processor_alias_table[i].flags & PTA_AES 2842 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_AES)) 2843 ix86_isa_flags |= OPTION_MASK_ISA_AES; 2844 if (processor_alias_table[i].flags & PTA_PCLMUL 2845 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_PCLMUL)) 2846 ix86_isa_flags |= OPTION_MASK_ISA_PCLMUL; 2847 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)) 2848 x86_prefetch_sse = true; 2849 2850 break; 2851 } 2852 2853 if (i == pta_size) 2854 error ("bad value (%s) for %sarch=%s %s", 2855 ix86_arch_string, prefix, suffix, sw); 2856 2857 ix86_arch_mask = 1u << ix86_arch; 2858 for (i = 0; i < X86_ARCH_LAST; ++i) 2859 ix86_arch_features[i] = !!(initial_ix86_arch_features[i] & ix86_arch_mask); 2860 2861 for (i = 0; i < pta_size; i++) 2862 if (! strcmp (ix86_tune_string, processor_alias_table[i].name)) 2863 { 2864 ix86_schedule = processor_alias_table[i].schedule; 2865 ix86_tune = processor_alias_table[i].processor; 2866 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT)) 2867 { 2868 if (ix86_tune_defaulted) 2869 { 2870 ix86_tune_string = "x86-64"; 2871 for (i = 0; i < pta_size; i++) 2872 if (! strcmp (ix86_tune_string, 2873 processor_alias_table[i].name)) 2874 break; 2875 ix86_schedule = processor_alias_table[i].schedule; 2876 ix86_tune = processor_alias_table[i].processor; 2877 } 2878 else 2879 error ("CPU you selected does not support x86-64 " 2880 "instruction set"); 2881 } 2882 2883 /* Intel CPUs have always interpreted SSE prefetch instructions as 2884 NOPs; so, we can enable SSE prefetch instructions even when 2885 -mtune (rather than -march) points us to a processor that has them. 2886 However, the VIA C3 gives a SIGILL, so we only do that for i686 and 2887 higher processors. */ 2888 if (TARGET_CMOVE 2889 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))) 2890 x86_prefetch_sse = true; 2891 break; 2892 } 2893 if (i == pta_size) 2894 error ("bad value (%s) for %stune=%s %s", 2895 ix86_tune_string, prefix, suffix, sw); 2896 2897 ix86_tune_mask = 1u << ix86_tune; 2898 for (i = 0; i < X86_TUNE_LAST; ++i) 2899 ix86_tune_features[i] = !!(initial_ix86_tune_features[i] & ix86_tune_mask); 2900 2901 if (optimize_size) 2902 ix86_cost = &ix86_size_cost; 2903 else 2904 ix86_cost = processor_target_table[ix86_tune].cost; 2905 2906 /* Arrange to set up i386_stack_locals for all functions. */ 2907 init_machine_status = ix86_init_machine_status; 2908 2909 /* Validate -mregparm= value. */ 2910 if (ix86_regparm_string) 2911 { 2912 if (TARGET_64BIT) 2913 warning (0, "%sregparm%s is ignored in 64-bit mode", prefix, suffix); 2914 i = atoi (ix86_regparm_string); 2915 if (i < 0 || i > REGPARM_MAX) 2916 error ("%sregparm=%d%s is not between 0 and %d", 2917 prefix, i, suffix, REGPARM_MAX); 2918 else 2919 ix86_regparm = i; 2920 } 2921 if (TARGET_64BIT) 2922 ix86_regparm = REGPARM_MAX; 2923 2924 /* If the user has provided any of the -malign-* options, 2925 warn and use that value only if -falign-* is not set. 2926 Remove this code in GCC 3.2 or later. */ 2927 if (ix86_align_loops_string) 2928 { 2929 warning (0, "%salign-loops%s is obsolete, use -falign-loops%s", 2930 prefix, suffix, suffix); 2931 if (align_loops == 0) 2932 { 2933 i = atoi (ix86_align_loops_string); 2934 if (i < 0 || i > MAX_CODE_ALIGN) 2935 error ("%salign-loops=%d%s is not between 0 and %d", 2936 prefix, i, suffix, MAX_CODE_ALIGN); 2937 else 2938 align_loops = 1 << i; 2939 } 2940 } 2941 2942 if (ix86_align_jumps_string) 2943 { 2944 warning (0, "%salign-jumps%s is obsolete, use -falign-jumps%s", 2945 prefix, suffix, suffix); 2946 if (align_jumps == 0) 2947 { 2948 i = atoi (ix86_align_jumps_string); 2949 if (i < 0 || i > MAX_CODE_ALIGN) 2950 error ("%salign-loops=%d%s is not between 0 and %d", 2951 prefix, i, suffix, MAX_CODE_ALIGN); 2952 else 2953 align_jumps = 1 << i; 2954 } 2955 } 2956 2957 if (ix86_align_funcs_string) 2958 { 2959 warning (0, "%salign-functions%s is obsolete, use -falign-functions%s", 2960 prefix, suffix, suffix); 2961 if (align_functions == 0) 2962 { 2963 i = atoi (ix86_align_funcs_string); 2964 if (i < 0 || i > MAX_CODE_ALIGN) 2965 error ("%salign-loops=%d%s is not between 0 and %d", 2966 prefix, i, suffix, MAX_CODE_ALIGN); 2967 else 2968 align_functions = 1 << i; 2969 } 2970 } 2971 2972 /* Default align_* from the processor table. */ 2973 if (align_loops == 0) 2974 { 2975 align_loops = processor_target_table[ix86_tune].align_loop; 2976 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip; 2977 } 2978 if (align_jumps == 0) 2979 { 2980 align_jumps = processor_target_table[ix86_tune].align_jump; 2981 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip; 2982 } 2983 if (align_functions == 0) 2984 { 2985 align_functions = processor_target_table[ix86_tune].align_func; 2986 } 2987 2988 /* Validate -mbranch-cost= value, or provide default. */ 2989 ix86_branch_cost = ix86_cost->branch_cost; 2990 if (ix86_branch_cost_string) 2991 { 2992 i = atoi (ix86_branch_cost_string); 2993 if (i < 0 || i > 5) 2994 error ("%sbranch-cost=%d%s is not between 0 and 5", prefix, i, suffix); 2995 else 2996 ix86_branch_cost = i; 2997 } 2998 if (ix86_section_threshold_string) 2999 { 3000 i = atoi (ix86_section_threshold_string); 3001 if (i < 0) 3002 error ("%slarge-data-threshold=%d%s is negative", prefix, i, suffix); 3003 else 3004 ix86_section_threshold = i; 3005 } 3006 3007 if (ix86_tls_dialect_string) 3008 { 3009 if (strcmp (ix86_tls_dialect_string, "gnu") == 0) 3010 ix86_tls_dialect = TLS_DIALECT_GNU; 3011 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0) 3012 ix86_tls_dialect = TLS_DIALECT_GNU2; 3013 else if (strcmp (ix86_tls_dialect_string, "sun") == 0) 3014 ix86_tls_dialect = TLS_DIALECT_SUN; 3015 else 3016 error ("bad value (%s) for %stls-dialect=%s %s", 3017 ix86_tls_dialect_string, prefix, suffix, sw); 3018 } 3019 3020 if (ix87_precision_string) 3021 { 3022 i = atoi (ix87_precision_string); 3023 if (i != 32 && i != 64 && i != 80) 3024 error ("pc%d is not valid precision setting (32, 64 or 80)", i); 3025 } 3026 3027 if (TARGET_64BIT) 3028 { 3029 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit; 3030 3031 /* Enable by default the SSE and MMX builtins. Do allow the user to 3032 explicitly disable any of these. In particular, disabling SSE and 3033 MMX for kernel code is extremely useful. */ 3034 if (!ix86_arch_specified) 3035 ix86_isa_flags 3036 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX 3037 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit); 3038 3039 if (TARGET_RTD) 3040 warning (0, "%srtd%s is ignored in 64bit mode", prefix, suffix); 3041 } 3042 else 3043 { 3044 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit; 3045 3046 if (!ix86_arch_specified) 3047 ix86_isa_flags 3048 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit; 3049 3050 /* i386 ABI does not specify red zone. It still makes sense to use it 3051 when programmer takes care to stack from being destroyed. */ 3052 if (!(target_flags_explicit & MASK_NO_RED_ZONE)) 3053 target_flags |= MASK_NO_RED_ZONE; 3054 } 3055 3056 /* Keep nonleaf frame pointers. */ 3057 if (flag_omit_frame_pointer) 3058 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER; 3059 else if (TARGET_OMIT_LEAF_FRAME_POINTER) 3060 flag_omit_frame_pointer = 1; 3061 3062 /* If we're doing fast math, we don't care about comparison order 3063 wrt NaNs. This lets us use a shorter comparison sequence. */ 3064 if (flag_finite_math_only) 3065 target_flags &= ~MASK_IEEE_FP; 3066 3067 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387, 3068 since the insns won't need emulation. */ 3069 if (x86_arch_always_fancy_math_387 & ix86_arch_mask) 3070 target_flags &= ~MASK_NO_FANCY_MATH_387; 3071 3072 /* Likewise, if the target doesn't have a 387, or we've specified 3073 software floating point, don't use 387 inline intrinsics. */ 3074 if (!TARGET_80387) 3075 target_flags |= MASK_NO_FANCY_MATH_387; 3076 3077 /* Turn on MMX builtins for -msse. */ 3078 if (TARGET_SSE) 3079 { 3080 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit; 3081 x86_prefetch_sse = true; 3082 } 3083 3084 /* Turn on popcnt instruction for -msse4.2 or -mabm. */ 3085 if (TARGET_SSE4_2 || TARGET_ABM) 3086 ix86_isa_flags |= OPTION_MASK_ISA_POPCNT & ~ix86_isa_flags_explicit; 3087 3088 if (!TARGET_64BIT && TARGET_SAVE_ARGS) 3089 error ("-msave-args makes no sense in the 32-bit mode"); 3090 3091 /* Validate -mpreferred-stack-boundary= value or default it to 3092 PREFERRED_STACK_BOUNDARY_DEFAULT. */ 3093 ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT; 3094 if (ix86_preferred_stack_boundary_string) 3095 { 3096 i = atoi (ix86_preferred_stack_boundary_string); 3097 if (i < (TARGET_64BIT ? 4 : 2) || i > 12) 3098 error ("%spreferred-stack-boundary=%d%s is not between %d and 12", 3099 prefix, i, suffix, TARGET_64BIT ? 4 : 2); 3100 else 3101 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT; 3102 } 3103 3104 /* Set the default value for -mstackrealign. */ 3105 if (ix86_force_align_arg_pointer == -1) 3106 ix86_force_align_arg_pointer = STACK_REALIGN_DEFAULT; 3107 3108 /* Validate -mincoming-stack-boundary= value or default it to 3109 MIN_STACK_BOUNDARY/PREFERRED_STACK_BOUNDARY. */ 3110 if (ix86_force_align_arg_pointer) 3111 ix86_default_incoming_stack_boundary = MIN_STACK_BOUNDARY; 3112 else 3113 ix86_default_incoming_stack_boundary = PREFERRED_STACK_BOUNDARY; 3114 ix86_incoming_stack_boundary = ix86_default_incoming_stack_boundary; 3115 if (ix86_incoming_stack_boundary_string) 3116 { 3117 i = atoi (ix86_incoming_stack_boundary_string); 3118 if (i < (TARGET_64BIT ? 4 : 2) || i > 12) 3119 error ("-mincoming-stack-boundary=%d is not between %d and 12", 3120 i, TARGET_64BIT ? 4 : 2); 3121 else 3122 { 3123 ix86_user_incoming_stack_boundary = (1 << i) * BITS_PER_UNIT; 3124 ix86_incoming_stack_boundary 3125 = ix86_user_incoming_stack_boundary; 3126 } 3127 } 3128 3129 /* Accept -msseregparm only if at least SSE support is enabled. */ 3130 if (TARGET_SSEREGPARM 3131 && ! TARGET_SSE) 3132 error ("%ssseregparm%s used without SSE enabled", prefix, suffix); 3133 3134 ix86_fpmath = TARGET_FPMATH_DEFAULT; 3135 if (ix86_fpmath_string != 0) 3136 { 3137 if (! strcmp (ix86_fpmath_string, "387")) 3138 ix86_fpmath = FPMATH_387; 3139 else if (! strcmp (ix86_fpmath_string, "sse")) 3140 { 3141 if (!TARGET_SSE) 3142 { 3143 warning (0, "SSE instruction set disabled, using 387 arithmetics"); 3144 ix86_fpmath = FPMATH_387; 3145 } 3146 else 3147 ix86_fpmath = FPMATH_SSE; 3148 } 3149 else if (! strcmp (ix86_fpmath_string, "387,sse") 3150 || ! strcmp (ix86_fpmath_string, "387+sse") 3151 || ! strcmp (ix86_fpmath_string, "sse,387") 3152 || ! strcmp (ix86_fpmath_string, "sse+387") 3153 || ! strcmp (ix86_fpmath_string, "both")) 3154 { 3155 if (!TARGET_SSE) 3156 { 3157 warning (0, "SSE instruction set disabled, using 387 arithmetics"); 3158 ix86_fpmath = FPMATH_387; 3159 } 3160 else if (!TARGET_80387) 3161 { 3162 warning (0, "387 instruction set disabled, using SSE arithmetics"); 3163 ix86_fpmath = FPMATH_SSE; 3164 } 3165 else 3166 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387); 3167 } 3168 else 3169 error ("bad value (%s) for %sfpmath=%s %s", 3170 ix86_fpmath_string, prefix, suffix, sw); 3171 } 3172 3173 /* If the i387 is disabled, then do not return values in it. */ 3174 if (!TARGET_80387) 3175 target_flags &= ~MASK_FLOAT_RETURNS; 3176 3177 /* Use external vectorized library in vectorizing intrinsics. */ 3178 if (ix86_veclibabi_string) 3179 { 3180 if (strcmp (ix86_veclibabi_string, "svml") == 0) 3181 ix86_veclib_handler = ix86_veclibabi_svml; 3182 else if (strcmp (ix86_veclibabi_string, "acml") == 0) 3183 ix86_veclib_handler = ix86_veclibabi_acml; 3184 else 3185 error ("unknown vectorization library ABI type (%s) for " 3186 "%sveclibabi=%s %s", ix86_veclibabi_string, 3187 prefix, suffix, sw); 3188 } 3189 3190 if ((x86_accumulate_outgoing_args & ix86_tune_mask) 3191 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 3192 && !optimize_size) 3193 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 3194 3195 /* ??? Unwind info is not correct around the CFG unless either a frame 3196 pointer is present or M_A_O_A is set. Fixing this requires rewriting 3197 unwind info generation to be aware of the CFG and propagating states 3198 around edges. */ 3199 if ((flag_unwind_tables || flag_asynchronous_unwind_tables 3200 || flag_exceptions || flag_non_call_exceptions) 3201 && flag_omit_frame_pointer 3202 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) 3203 { 3204 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 3205 warning (0, "unwind tables currently require either a frame pointer " 3206 "or %saccumulate-outgoing-args%s for correctness", 3207 prefix, suffix); 3208 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 3209 } 3210 3211 /* If stack probes are required, the space used for large function 3212 arguments on the stack must also be probed, so enable 3213 -maccumulate-outgoing-args so this happens in the prologue. */ 3214 if (TARGET_STACK_PROBE 3215 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS)) 3216 { 3217 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS) 3218 warning (0, "stack probing requires %saccumulate-outgoing-args%s " 3219 "for correctness", prefix, suffix); 3220 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS; 3221 } 3222 3223 /* For sane SSE instruction set generation we need fcomi instruction. 3224 It is safe to enable all CMOVE instructions. */ 3225 if (TARGET_SSE) 3226 TARGET_CMOVE = 1; 3227 3228 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */ 3229 { 3230 char *p; 3231 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0); 3232 p = strchr (internal_label_prefix, 'X'); 3233 internal_label_prefix_len = p - internal_label_prefix; 3234 *p = '\0'; 3235 } 3236 3237 /* When scheduling description is not available, disable scheduler pass 3238 so it won't slow down the compilation and make x87 code slower. */ 3239 if (!TARGET_SCHEDULE) 3240 flag_schedule_insns_after_reload = flag_schedule_insns = 0; 3241 3242 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES)) 3243 set_param_value ("simultaneous-prefetches", 3244 ix86_cost->simultaneous_prefetches); 3245 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE)) 3246 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block); 3247 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE)) 3248 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size); 3249 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE)) 3250 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size); 3251 3252 /* If using typedef char *va_list, signal that __builtin_va_start (&ap, 0) 3253 can be optimized to ap = __builtin_next_arg (0). */ 3254 if (!TARGET_64BIT) 3255 targetm.expand_builtin_va_start = NULL; 3256 3257 if (TARGET_64BIT) 3258 { 3259 ix86_gen_leave = gen_leave_rex64; 3260 ix86_gen_pop1 = gen_popdi1; 3261 ix86_gen_add3 = gen_adddi3; 3262 ix86_gen_sub3 = gen_subdi3; 3263 ix86_gen_sub3_carry = gen_subdi3_carry_rex64; 3264 ix86_gen_one_cmpl2 = gen_one_cmpldi2; 3265 ix86_gen_monitor = gen_sse3_monitor64; 3266 ix86_gen_andsp = gen_anddi3; 3267 } 3268 else 3269 { 3270 ix86_gen_leave = gen_leave; 3271 ix86_gen_pop1 = gen_popsi1; 3272 ix86_gen_add3 = gen_addsi3; 3273 ix86_gen_sub3 = gen_subsi3; 3274 ix86_gen_sub3_carry = gen_subsi3_carry; 3275 ix86_gen_one_cmpl2 = gen_one_cmplsi2; 3276 ix86_gen_monitor = gen_sse3_monitor; 3277 ix86_gen_andsp = gen_andsi3; 3278 } 3279 3280 #ifdef USE_IX86_CLD 3281 /* Use -mcld by default for 32-bit code if configured with --enable-cld. */ 3282 if (!TARGET_64BIT) 3283 target_flags |= MASK_CLD & ~target_flags_explicit; 3284 #endif 3285 3286 /* Save the initial options in case the user does function specific options */ 3287 if (main_args_p) 3288 target_option_default_node = target_option_current_node 3289 = build_target_option_node (); 3290 } 3291 3292 /* Update register usage after having seen the compiler flags. */ 3293 3294 void 3295 ix86_conditional_register_usage (void) 3296 { 3297 int i; 3298 unsigned int j; 3299 3300 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 3301 { 3302 if (fixed_regs[i] > 1) 3303 fixed_regs[i] = (fixed_regs[i] == (TARGET_64BIT ? 3 : 2)); 3304 if (call_used_regs[i] > 1) 3305 call_used_regs[i] = (call_used_regs[i] == (TARGET_64BIT ? 3 : 2)); 3306 } 3307 3308 /* The PIC register, if it exists, is fixed. */ 3309 j = PIC_OFFSET_TABLE_REGNUM; 3310 if (j != INVALID_REGNUM) 3311 fixed_regs[j] = call_used_regs[j] = 1; 3312 3313 /* The MS_ABI changes the set of call-used registers. */ 3314 if (TARGET_64BIT && ix86_cfun_abi () == MS_ABI) 3315 { 3316 call_used_regs[SI_REG] = 0; 3317 call_used_regs[DI_REG] = 0; 3318 call_used_regs[XMM6_REG] = 0; 3319 call_used_regs[XMM7_REG] = 0; 3320 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) 3321 call_used_regs[i] = 0; 3322 } 3323 3324 /* The default setting of CLOBBERED_REGS is for 32-bit; add in the 3325 other call-clobbered regs for 64-bit. */ 3326 if (TARGET_64BIT) 3327 { 3328 CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]); 3329 3330 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 3331 if (TEST_HARD_REG_BIT (reg_class_contents[(int)GENERAL_REGS], i) 3332 && call_used_regs[i]) 3333 SET_HARD_REG_BIT (reg_class_contents[(int)CLOBBERED_REGS], i); 3334 } 3335 3336 /* If MMX is disabled, squash the registers. */ 3337 if (! TARGET_MMX) 3338 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 3339 if (TEST_HARD_REG_BIT (reg_class_contents[(int)MMX_REGS], i)) 3340 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; 3341 3342 /* If SSE is disabled, squash the registers. */ 3343 if (! TARGET_SSE) 3344 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 3345 if (TEST_HARD_REG_BIT (reg_class_contents[(int)SSE_REGS], i)) 3346 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; 3347 3348 /* If the FPU is disabled, squash the registers. */ 3349 if (! (TARGET_80387 || TARGET_FLOAT_RETURNS_IN_80387)) 3350 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 3351 if (TEST_HARD_REG_BIT (reg_class_contents[(int)FLOAT_REGS], i)) 3352 fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = ""; 3353 3354 /* If 32-bit, squash the 64-bit registers. */ 3355 if (! TARGET_64BIT) 3356 { 3357 for (i = FIRST_REX_INT_REG; i <= LAST_REX_INT_REG; i++) 3358 reg_names[i] = ""; 3359 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) 3360 reg_names[i] = ""; 3361 } 3362 } 3363 3364 3365 /* Save the current options */ 3366 3367 static void 3368 ix86_function_specific_save (struct cl_target_option *ptr) 3369 { 3370 gcc_assert (IN_RANGE (ix86_arch, 0, 255)); 3371 gcc_assert (IN_RANGE (ix86_schedule, 0, 255)); 3372 gcc_assert (IN_RANGE (ix86_tune, 0, 255)); 3373 gcc_assert (IN_RANGE (ix86_fpmath, 0, 255)); 3374 gcc_assert (IN_RANGE (ix86_branch_cost, 0, 255)); 3375 3376 ptr->arch = ix86_arch; 3377 ptr->schedule = ix86_schedule; 3378 ptr->tune = ix86_tune; 3379 ptr->fpmath = ix86_fpmath; 3380 ptr->branch_cost = ix86_branch_cost; 3381 ptr->tune_defaulted = ix86_tune_defaulted; 3382 ptr->arch_specified = ix86_arch_specified; 3383 ptr->ix86_isa_flags_explicit = ix86_isa_flags_explicit; 3384 ptr->target_flags_explicit = target_flags_explicit; 3385 } 3386 3387 /* Restore the current options */ 3388 3389 static void 3390 ix86_function_specific_restore (struct cl_target_option *ptr) 3391 { 3392 enum processor_type old_tune = ix86_tune; 3393 enum processor_type old_arch = ix86_arch; 3394 unsigned int ix86_arch_mask, ix86_tune_mask; 3395 int i; 3396 3397 ix86_arch = ptr->arch; 3398 ix86_schedule = ptr->schedule; 3399 ix86_tune = ptr->tune; 3400 ix86_fpmath = ptr->fpmath; 3401 ix86_branch_cost = ptr->branch_cost; 3402 ix86_tune_defaulted = ptr->tune_defaulted; 3403 ix86_arch_specified = ptr->arch_specified; 3404 ix86_isa_flags_explicit = ptr->ix86_isa_flags_explicit; 3405 target_flags_explicit = ptr->target_flags_explicit; 3406 3407 /* Recreate the arch feature tests if the arch changed */ 3408 if (old_arch != ix86_arch) 3409 { 3410 ix86_arch_mask = 1u << ix86_arch; 3411 for (i = 0; i < X86_ARCH_LAST; ++i) 3412 ix86_arch_features[i] 3413 = !!(initial_ix86_arch_features[i] & ix86_arch_mask); 3414 } 3415 3416 /* Recreate the tune optimization tests */ 3417 if (old_tune != ix86_tune) 3418 { 3419 ix86_tune_mask = 1u << ix86_tune; 3420 for (i = 0; i < X86_TUNE_LAST; ++i) 3421 ix86_tune_features[i] 3422 = !!(initial_ix86_tune_features[i] & ix86_tune_mask); 3423 } 3424 } 3425 3426 /* Print the current options */ 3427 3428 static void 3429 ix86_function_specific_print (FILE *file, int indent, 3430 struct cl_target_option *ptr) 3431 { 3432 char *target_string 3433 = ix86_target_string (ptr->ix86_isa_flags, ptr->target_flags, 3434 NULL, NULL, NULL, false); 3435 3436 fprintf (file, "%*sarch = %d (%s)\n", 3437 indent, "", 3438 ptr->arch, 3439 ((ptr->arch < TARGET_CPU_DEFAULT_max) 3440 ? cpu_names[ptr->arch] 3441 : "<unknown>")); 3442 3443 fprintf (file, "%*stune = %d (%s)\n", 3444 indent, "", 3445 ptr->tune, 3446 ((ptr->tune < TARGET_CPU_DEFAULT_max) 3447 ? cpu_names[ptr->tune] 3448 : "<unknown>")); 3449 3450 fprintf (file, "%*sfpmath = %d%s%s\n", indent, "", ptr->fpmath, 3451 (ptr->fpmath & FPMATH_387) ? ", 387" : "", 3452 (ptr->fpmath & FPMATH_SSE) ? ", sse" : ""); 3453 fprintf (file, "%*sbranch_cost = %d\n", indent, "", ptr->branch_cost); 3454 3455 if (target_string) 3456 { 3457 fprintf (file, "%*s%s\n", indent, "", target_string); 3458 free (target_string); 3459 } 3460 } 3461 3462 3463 /* Inner function to process the attribute((target(...))), take an argument and 3464 set the current options from the argument. If we have a list, recursively go 3465 over the list. */ 3466 3467 static bool 3468 ix86_valid_target_attribute_inner_p (tree args, char *p_strings[]) 3469 { 3470 char *next_optstr; 3471 bool ret = true; 3472 3473 #define IX86_ATTR_ISA(S,O) { S, sizeof (S)-1, ix86_opt_isa, O, 0 } 3474 #define IX86_ATTR_STR(S,O) { S, sizeof (S)-1, ix86_opt_str, O, 0 } 3475 #define IX86_ATTR_YES(S,O,M) { S, sizeof (S)-1, ix86_opt_yes, O, M } 3476 #define IX86_ATTR_NO(S,O,M) { S, sizeof (S)-1, ix86_opt_no, O, M } 3477 3478 enum ix86_opt_type 3479 { 3480 ix86_opt_unknown, 3481 ix86_opt_yes, 3482 ix86_opt_no, 3483 ix86_opt_str, 3484 ix86_opt_isa 3485 }; 3486 3487 static const struct 3488 { 3489 const char *string; 3490 size_t len; 3491 enum ix86_opt_type type; 3492 int opt; 3493 int mask; 3494 } attrs[] = { 3495 /* isa options */ 3496 IX86_ATTR_ISA ("3dnow", OPT_m3dnow), 3497 IX86_ATTR_ISA ("abm", OPT_mabm), 3498 IX86_ATTR_ISA ("aes", OPT_maes), 3499 IX86_ATTR_ISA ("avx", OPT_mavx), 3500 IX86_ATTR_ISA ("mmx", OPT_mmmx), 3501 IX86_ATTR_ISA ("pclmul", OPT_mpclmul), 3502 IX86_ATTR_ISA ("popcnt", OPT_mpopcnt), 3503 IX86_ATTR_ISA ("sse", OPT_msse), 3504 IX86_ATTR_ISA ("sse2", OPT_msse2), 3505 IX86_ATTR_ISA ("sse3", OPT_msse3), 3506 IX86_ATTR_ISA ("sse4", OPT_msse4), 3507 IX86_ATTR_ISA ("sse4.1", OPT_msse4_1), 3508 IX86_ATTR_ISA ("sse4.2", OPT_msse4_2), 3509 IX86_ATTR_ISA ("sse4a", OPT_msse4a), 3510 IX86_ATTR_ISA ("sse5", OPT_msse5), 3511 IX86_ATTR_ISA ("ssse3", OPT_mssse3), 3512 3513 /* string options */ 3514 IX86_ATTR_STR ("arch=", IX86_FUNCTION_SPECIFIC_ARCH), 3515 IX86_ATTR_STR ("fpmath=", IX86_FUNCTION_SPECIFIC_FPMATH), 3516 IX86_ATTR_STR ("tune=", IX86_FUNCTION_SPECIFIC_TUNE), 3517 3518 /* flag options */ 3519 IX86_ATTR_YES ("cld", 3520 OPT_mcld, 3521 MASK_CLD), 3522 3523 IX86_ATTR_NO ("fancy-math-387", 3524 OPT_mfancy_math_387, 3525 MASK_NO_FANCY_MATH_387), 3526 3527 IX86_ATTR_NO ("fused-madd", 3528 OPT_mfused_madd, 3529 MASK_NO_FUSED_MADD), 3530 3531 IX86_ATTR_YES ("ieee-fp", 3532 OPT_mieee_fp, 3533 MASK_IEEE_FP), 3534 3535 IX86_ATTR_YES ("inline-all-stringops", 3536 OPT_minline_all_stringops, 3537 MASK_INLINE_ALL_STRINGOPS), 3538 3539 IX86_ATTR_YES ("inline-stringops-dynamically", 3540 OPT_minline_stringops_dynamically, 3541 MASK_INLINE_STRINGOPS_DYNAMICALLY), 3542 3543 IX86_ATTR_NO ("align-stringops", 3544 OPT_mno_align_stringops, 3545 MASK_NO_ALIGN_STRINGOPS), 3546 3547 IX86_ATTR_YES ("recip", 3548 OPT_mrecip, 3549 MASK_RECIP), 3550 3551 }; 3552 3553 /* If this is a list, recurse to get the options. */ 3554 if (TREE_CODE (args) == TREE_LIST) 3555 { 3556 bool ret = true; 3557 3558 for (; args; args = TREE_CHAIN (args)) 3559 if (TREE_VALUE (args) 3560 && !ix86_valid_target_attribute_inner_p (TREE_VALUE (args), p_strings)) 3561 ret = false; 3562 3563 return ret; 3564 } 3565 3566 else if (TREE_CODE (args) != STRING_CST) 3567 gcc_unreachable (); 3568 3569 /* Handle multiple arguments separated by commas. */ 3570 next_optstr = ASTRDUP (TREE_STRING_POINTER (args)); 3571 3572 while (next_optstr && *next_optstr != '\0') 3573 { 3574 char *p = next_optstr; 3575 char *orig_p = p; 3576 char *comma = strchr (next_optstr, ','); 3577 const char *opt_string; 3578 size_t len, opt_len; 3579 int opt; 3580 bool opt_set_p; 3581 char ch; 3582 unsigned i; 3583 enum ix86_opt_type type = ix86_opt_unknown; 3584 int mask = 0; 3585 3586 if (comma) 3587 { 3588 *comma = '\0'; 3589 len = comma - next_optstr; 3590 next_optstr = comma + 1; 3591 } 3592 else 3593 { 3594 len = strlen (p); 3595 next_optstr = NULL; 3596 } 3597 3598 /* Recognize no-xxx. */ 3599 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-') 3600 { 3601 opt_set_p = false; 3602 p += 3; 3603 len -= 3; 3604 } 3605 else 3606 opt_set_p = true; 3607 3608 /* Find the option. */ 3609 ch = *p; 3610 opt = N_OPTS; 3611 for (i = 0; i < ARRAY_SIZE (attrs); i++) 3612 { 3613 type = attrs[i].type; 3614 opt_len = attrs[i].len; 3615 if (ch == attrs[i].string[0] 3616 && ((type != ix86_opt_str) ? len == opt_len : len > opt_len) 3617 && memcmp (p, attrs[i].string, opt_len) == 0) 3618 { 3619 opt = attrs[i].opt; 3620 mask = attrs[i].mask; 3621 opt_string = attrs[i].string; 3622 break; 3623 } 3624 } 3625 3626 /* Process the option. */ 3627 if (opt == N_OPTS) 3628 { 3629 error ("attribute(target(\"%s\")) is unknown", orig_p); 3630 ret = false; 3631 } 3632 3633 else if (type == ix86_opt_isa) 3634 ix86_handle_option (opt, p, opt_set_p); 3635 3636 else if (type == ix86_opt_yes || type == ix86_opt_no) 3637 { 3638 if (type == ix86_opt_no) 3639 opt_set_p = !opt_set_p; 3640 3641 if (opt_set_p) 3642 target_flags |= mask; 3643 else 3644 target_flags &= ~mask; 3645 } 3646 3647 else if (type == ix86_opt_str) 3648 { 3649 if (p_strings[opt]) 3650 { 3651 error ("option(\"%s\") was already specified", opt_string); 3652 ret = false; 3653 } 3654 else 3655 p_strings[opt] = xstrdup (p + opt_len); 3656 } 3657 3658 else 3659 gcc_unreachable (); 3660 } 3661 3662 return ret; 3663 } 3664 3665 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */ 3666 3667 tree 3668 ix86_valid_target_attribute_tree (tree args) 3669 { 3670 const char *orig_arch_string = ix86_arch_string; 3671 const char *orig_tune_string = ix86_tune_string; 3672 const char *orig_fpmath_string = ix86_fpmath_string; 3673 int orig_tune_defaulted = ix86_tune_defaulted; 3674 int orig_arch_specified = ix86_arch_specified; 3675 char *option_strings[IX86_FUNCTION_SPECIFIC_MAX] = { NULL, NULL, NULL }; 3676 tree t = NULL_TREE; 3677 int i; 3678 struct cl_target_option *def 3679 = TREE_TARGET_OPTION (target_option_default_node); 3680 3681 /* Process each of the options on the chain. */ 3682 if (! ix86_valid_target_attribute_inner_p (args, option_strings)) 3683 return NULL_TREE; 3684 3685 /* If the changed options are different from the default, rerun override_options, 3686 and then save the options away. The string options are are attribute options, 3687 and will be undone when we copy the save structure. */ 3688 if (ix86_isa_flags != def->ix86_isa_flags 3689 || target_flags != def->target_flags 3690 || option_strings[IX86_FUNCTION_SPECIFIC_ARCH] 3691 || option_strings[IX86_FUNCTION_SPECIFIC_TUNE] 3692 || option_strings[IX86_FUNCTION_SPECIFIC_FPMATH]) 3693 { 3694 /* If we are using the default tune= or arch=, undo the string assigned, 3695 and use the default. */ 3696 if (option_strings[IX86_FUNCTION_SPECIFIC_ARCH]) 3697 ix86_arch_string = option_strings[IX86_FUNCTION_SPECIFIC_ARCH]; 3698 else if (!orig_arch_specified) 3699 ix86_arch_string = NULL; 3700 3701 if (option_strings[IX86_FUNCTION_SPECIFIC_TUNE]) 3702 ix86_tune_string = option_strings[IX86_FUNCTION_SPECIFIC_TUNE]; 3703 else if (orig_tune_defaulted) 3704 ix86_tune_string = NULL; 3705 3706 /* If fpmath= is not set, and we now have sse2 on 32-bit, use it. */ 3707 if (option_strings[IX86_FUNCTION_SPECIFIC_FPMATH]) 3708 ix86_fpmath_string = option_strings[IX86_FUNCTION_SPECIFIC_FPMATH]; 3709 else if (!TARGET_64BIT && TARGET_SSE) 3710 ix86_fpmath_string = "sse,387"; 3711 3712 /* Do any overrides, such as arch=xxx, or tune=xxx support. */ 3713 override_options (false); 3714 3715 /* Add any builtin functions with the new isa if any. */ 3716 ix86_add_new_builtins (ix86_isa_flags); 3717 3718 /* Save the current options unless we are validating options for 3719 #pragma. */ 3720 t = build_target_option_node (); 3721 3722 ix86_arch_string = orig_arch_string; 3723 ix86_tune_string = orig_tune_string; 3724 ix86_fpmath_string = orig_fpmath_string; 3725 3726 /* Free up memory allocated to hold the strings */ 3727 for (i = 0; i < IX86_FUNCTION_SPECIFIC_MAX; i++) 3728 if (option_strings[i]) 3729 free (option_strings[i]); 3730 } 3731 3732 return t; 3733 } 3734 3735 /* Hook to validate attribute((target("string"))). */ 3736 3737 static bool 3738 ix86_valid_target_attribute_p (tree fndecl, 3739 tree ARG_UNUSED (name), 3740 tree args, 3741 int ARG_UNUSED (flags)) 3742 { 3743 struct cl_target_option cur_target; 3744 bool ret = true; 3745 tree old_optimize = build_optimization_node (); 3746 tree new_target, new_optimize; 3747 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl); 3748 3749 /* If the function changed the optimization levels as well as setting target 3750 options, start with the optimizations specified. */ 3751 if (func_optimize && func_optimize != old_optimize) 3752 cl_optimization_restore (TREE_OPTIMIZATION (func_optimize)); 3753 3754 /* The target attributes may also change some optimization flags, so update 3755 the optimization options if necessary. */ 3756 cl_target_option_save (&cur_target); 3757 new_target = ix86_valid_target_attribute_tree (args); 3758 new_optimize = build_optimization_node (); 3759 3760 if (!new_target) 3761 ret = false; 3762 3763 else if (fndecl) 3764 { 3765 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target; 3766 3767 if (old_optimize != new_optimize) 3768 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize; 3769 } 3770 3771 cl_target_option_restore (&cur_target); 3772 3773 if (old_optimize != new_optimize) 3774 cl_optimization_restore (TREE_OPTIMIZATION (old_optimize)); 3775 3776 return ret; 3777 } 3778 3779 3780 /* Hook to determine if one function can safely inline another. */ 3781 3782 static bool 3783 ix86_can_inline_p (tree caller, tree callee) 3784 { 3785 bool ret = false; 3786 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller); 3787 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee); 3788 3789 /* If callee has no option attributes, then it is ok to inline. */ 3790 if (!callee_tree) 3791 ret = true; 3792 3793 /* If caller has no option attributes, but callee does then it is not ok to 3794 inline. */ 3795 else if (!caller_tree) 3796 ret = false; 3797 3798 else 3799 { 3800 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree); 3801 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree); 3802 3803 /* Callee's isa options should a subset of the caller's, i.e. a SSE5 function 3804 can inline a SSE2 function but a SSE2 function can't inline a SSE5 3805 function. */ 3806 if ((caller_opts->ix86_isa_flags & callee_opts->ix86_isa_flags) 3807 != callee_opts->ix86_isa_flags) 3808 ret = false; 3809 3810 /* See if we have the same non-isa options. */ 3811 else if (caller_opts->target_flags != callee_opts->target_flags) 3812 ret = false; 3813 3814 /* See if arch, tune, etc. are the same. */ 3815 else if (caller_opts->arch != callee_opts->arch) 3816 ret = false; 3817 3818 else if (caller_opts->tune != callee_opts->tune) 3819 ret = false; 3820 3821 else if (caller_opts->fpmath != callee_opts->fpmath) 3822 ret = false; 3823 3824 else if (caller_opts->branch_cost != callee_opts->branch_cost) 3825 ret = false; 3826 3827 else 3828 ret = true; 3829 } 3830 3831 return ret; 3832 } 3833 3834 3835 /* Remember the last target of ix86_set_current_function. */ 3836 static GTY(()) tree ix86_previous_fndecl; 3837 3838 /* Establish appropriate back-end context for processing the function 3839 FNDECL. The argument might be NULL to indicate processing at top 3840 level, outside of any function scope. */ 3841 static void 3842 ix86_set_current_function (tree fndecl) 3843 { 3844 /* Only change the context if the function changes. This hook is called 3845 several times in the course of compiling a function, and we don't want to 3846 slow things down too much or call target_reinit when it isn't safe. */ 3847 if (fndecl && fndecl != ix86_previous_fndecl) 3848 { 3849 tree old_tree = (ix86_previous_fndecl 3850 ? DECL_FUNCTION_SPECIFIC_TARGET (ix86_previous_fndecl) 3851 : NULL_TREE); 3852 3853 tree new_tree = (fndecl 3854 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl) 3855 : NULL_TREE); 3856 3857 ix86_previous_fndecl = fndecl; 3858 if (old_tree == new_tree) 3859 ; 3860 3861 else if (new_tree) 3862 { 3863 cl_target_option_restore (TREE_TARGET_OPTION (new_tree)); 3864 target_reinit (); 3865 } 3866 3867 else if (old_tree) 3868 { 3869 struct cl_target_option *def 3870 = TREE_TARGET_OPTION (target_option_current_node); 3871 3872 cl_target_option_restore (def); 3873 target_reinit (); 3874 } 3875 } 3876 } 3877 3878 3879 /* Return true if this goes in large data/bss. */ 3880 3881 static bool 3882 ix86_in_large_data_p (tree exp) 3883 { 3884 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC) 3885 return false; 3886 3887 /* Functions are never large data. */ 3888 if (TREE_CODE (exp) == FUNCTION_DECL) 3889 return false; 3890 3891 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp)) 3892 { 3893 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp)); 3894 if (strcmp (section, ".ldata") == 0 3895 || strcmp (section, ".lbss") == 0) 3896 return true; 3897 return false; 3898 } 3899 else 3900 { 3901 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp)); 3902 3903 /* If this is an incomplete type with size 0, then we can't put it 3904 in data because it might be too big when completed. */ 3905 if (!size || size > ix86_section_threshold) 3906 return true; 3907 } 3908 3909 return false; 3910 } 3911 3912 /* Switch to the appropriate section for output of DECL. 3913 DECL is either a `VAR_DECL' node or a constant of some sort. 3914 RELOC indicates whether forming the initial value of DECL requires 3915 link-time relocations. */ 3916 3917 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT) 3918 ATTRIBUTE_UNUSED; 3919 3920 static section * 3921 x86_64_elf_select_section (tree decl, int reloc, 3922 unsigned HOST_WIDE_INT align) 3923 { 3924 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 3925 && ix86_in_large_data_p (decl)) 3926 { 3927 const char *sname = NULL; 3928 unsigned int flags = SECTION_WRITE; 3929 switch (categorize_decl_for_section (decl, reloc)) 3930 { 3931 case SECCAT_DATA: 3932 sname = ".ldata"; 3933 break; 3934 case SECCAT_DATA_REL: 3935 sname = ".ldata.rel"; 3936 break; 3937 case SECCAT_DATA_REL_LOCAL: 3938 sname = ".ldata.rel.local"; 3939 break; 3940 case SECCAT_DATA_REL_RO: 3941 sname = ".ldata.rel.ro"; 3942 break; 3943 case SECCAT_DATA_REL_RO_LOCAL: 3944 sname = ".ldata.rel.ro.local"; 3945 break; 3946 case SECCAT_BSS: 3947 sname = ".lbss"; 3948 flags |= SECTION_BSS; 3949 break; 3950 case SECCAT_RODATA: 3951 case SECCAT_RODATA_MERGE_STR: 3952 case SECCAT_RODATA_MERGE_STR_INIT: 3953 case SECCAT_RODATA_MERGE_CONST: 3954 sname = ".lrodata"; 3955 flags = 0; 3956 break; 3957 case SECCAT_SRODATA: 3958 case SECCAT_SDATA: 3959 case SECCAT_SBSS: 3960 gcc_unreachable (); 3961 case SECCAT_TEXT: 3962 case SECCAT_TDATA: 3963 case SECCAT_TBSS: 3964 /* We don't split these for medium model. Place them into 3965 default sections and hope for best. */ 3966 break; 3967 case SECCAT_EMUTLS_VAR: 3968 case SECCAT_EMUTLS_TMPL: 3969 gcc_unreachable (); 3970 } 3971 if (sname) 3972 { 3973 /* We might get called with string constants, but get_named_section 3974 doesn't like them as they are not DECLs. Also, we need to set 3975 flags in that case. */ 3976 if (!DECL_P (decl)) 3977 return get_section (sname, flags, NULL); 3978 return get_named_section (decl, sname, reloc); 3979 } 3980 } 3981 return default_elf_select_section (decl, reloc, align); 3982 } 3983 3984 /* Build up a unique section name, expressed as a 3985 STRING_CST node, and assign it to DECL_SECTION_NAME (decl). 3986 RELOC indicates whether the initial value of EXP requires 3987 link-time relocations. */ 3988 3989 static void ATTRIBUTE_UNUSED 3990 x86_64_elf_unique_section (tree decl, int reloc) 3991 { 3992 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 3993 && ix86_in_large_data_p (decl)) 3994 { 3995 const char *prefix = NULL; 3996 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */ 3997 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP; 3998 3999 switch (categorize_decl_for_section (decl, reloc)) 4000 { 4001 case SECCAT_DATA: 4002 case SECCAT_DATA_REL: 4003 case SECCAT_DATA_REL_LOCAL: 4004 case SECCAT_DATA_REL_RO: 4005 case SECCAT_DATA_REL_RO_LOCAL: 4006 prefix = one_only ? ".ld" : ".ldata"; 4007 break; 4008 case SECCAT_BSS: 4009 prefix = one_only ? ".lb" : ".lbss"; 4010 break; 4011 case SECCAT_RODATA: 4012 case SECCAT_RODATA_MERGE_STR: 4013 case SECCAT_RODATA_MERGE_STR_INIT: 4014 case SECCAT_RODATA_MERGE_CONST: 4015 prefix = one_only ? ".lr" : ".lrodata"; 4016 break; 4017 case SECCAT_SRODATA: 4018 case SECCAT_SDATA: 4019 case SECCAT_SBSS: 4020 gcc_unreachable (); 4021 case SECCAT_TEXT: 4022 case SECCAT_TDATA: 4023 case SECCAT_TBSS: 4024 /* We don't split these for medium model. Place them into 4025 default sections and hope for best. */ 4026 break; 4027 case SECCAT_EMUTLS_VAR: 4028 prefix = targetm.emutls.var_section; 4029 break; 4030 case SECCAT_EMUTLS_TMPL: 4031 prefix = targetm.emutls.tmpl_section; 4032 break; 4033 } 4034 if (prefix) 4035 { 4036 const char *name, *linkonce; 4037 char *string; 4038 4039 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); 4040 name = targetm.strip_name_encoding (name); 4041 4042 /* If we're using one_only, then there needs to be a .gnu.linkonce 4043 prefix to the section name. */ 4044 linkonce = one_only ? ".gnu.linkonce" : ""; 4045 4046 string = ACONCAT ((linkonce, prefix, ".", name, NULL)); 4047 4048 DECL_SECTION_NAME (decl) = build_string (strlen (string), string); 4049 return; 4050 } 4051 } 4052 default_unique_section (decl, reloc); 4053 } 4054 4055 #ifdef COMMON_ASM_OP 4056 /* This says how to output assembler code to declare an 4057 uninitialized external linkage data object. 4058 4059 For medium model x86-64 we need to use .largecomm opcode for 4060 large objects. */ 4061 void 4062 x86_elf_aligned_common (FILE *file, 4063 const char *name, unsigned HOST_WIDE_INT size, 4064 int align) 4065 { 4066 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 4067 && size > (unsigned int)ix86_section_threshold) 4068 fprintf (file, ".largecomm\t"); 4069 else 4070 fprintf (file, "%s", COMMON_ASM_OP); 4071 assemble_name (file, name); 4072 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", 4073 size, align / BITS_PER_UNIT); 4074 } 4075 #endif 4076 4077 /* Utility function for targets to use in implementing 4078 ASM_OUTPUT_ALIGNED_BSS. */ 4079 4080 void 4081 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED, 4082 const char *name, unsigned HOST_WIDE_INT size, 4083 int align) 4084 { 4085 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC) 4086 && size > (unsigned int)ix86_section_threshold) 4087 switch_to_section (get_named_section (decl, ".lbss", 0)); 4088 else 4089 switch_to_section (bss_section); 4090 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT)); 4091 #ifdef ASM_DECLARE_OBJECT_NAME 4092 last_assemble_variable_decl = decl; 4093 ASM_DECLARE_OBJECT_NAME (file, name, decl); 4094 #else 4095 /* Standard thing is just output label for the object. */ 4096 ASM_OUTPUT_LABEL (file, name); 4097 #endif /* ASM_DECLARE_OBJECT_NAME */ 4098 ASM_OUTPUT_SKIP (file, size ? size : 1); 4099 } 4100 4101 void 4102 optimization_options (int level, int size ATTRIBUTE_UNUSED) 4103 { 4104 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to 4105 make the problem with not enough registers even worse. */ 4106 #ifdef INSN_SCHEDULING 4107 if (level > 1) 4108 flag_schedule_insns = 0; 4109 #endif 4110 4111 if (TARGET_MACHO) 4112 /* The Darwin libraries never set errno, so we might as well 4113 avoid calling them when that's the only reason we would. */ 4114 flag_errno_math = 0; 4115 4116 /* The default values of these switches depend on the TARGET_64BIT 4117 that is not known at this moment. Mark these values with 2 and 4118 let user the to override these. In case there is no command line option 4119 specifying them, we will set the defaults in override_options. */ 4120 if (optimize >= 1) 4121 flag_omit_frame_pointer = 2; 4122 flag_pcc_struct_return = 2; 4123 flag_asynchronous_unwind_tables = 2; 4124 flag_vect_cost_model = 1; 4125 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS 4126 SUBTARGET_OPTIMIZATION_OPTIONS; 4127 #endif 4128 } 4129 4130 /* Decide whether we can make a sibling call to a function. DECL is the 4131 declaration of the function being targeted by the call and EXP is the 4132 CALL_EXPR representing the call. */ 4133 4134 static bool 4135 ix86_function_ok_for_sibcall (tree decl, tree exp) 4136 { 4137 tree type, decl_or_type; 4138 rtx a, b; 4139 4140 /* If we are generating position-independent code, we cannot sibcall 4141 optimize any indirect call, or a direct call to a global function, 4142 as the PLT requires %ebx be live. */ 4143 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl))) 4144 return false; 4145 4146 /* If we need to align the outgoing stack, then sibcalling would 4147 unalign the stack, which may break the called function. */ 4148 if (ix86_incoming_stack_boundary < PREFERRED_STACK_BOUNDARY) 4149 return false; 4150 4151 if (decl) 4152 { 4153 decl_or_type = decl; 4154 type = TREE_TYPE (decl); 4155 } 4156 else 4157 { 4158 /* We're looking at the CALL_EXPR, we need the type of the function. */ 4159 type = CALL_EXPR_FN (exp); /* pointer expression */ 4160 type = TREE_TYPE (type); /* pointer type */ 4161 type = TREE_TYPE (type); /* function type */ 4162 decl_or_type = type; 4163 } 4164 4165 /* Check that the return value locations are the same. Like 4166 if we are returning floats on the 80387 register stack, we cannot 4167 make a sibcall from a function that doesn't return a float to a 4168 function that does or, conversely, from a function that does return 4169 a float to a function that doesn't; the necessary stack adjustment 4170 would not be executed. This is also the place we notice 4171 differences in the return value ABI. Note that it is ok for one 4172 of the functions to have void return type as long as the return 4173 value of the other is passed in a register. */ 4174 a = ix86_function_value (TREE_TYPE (exp), decl_or_type, false); 4175 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)), 4176 cfun->decl, false); 4177 if (STACK_REG_P (a) || STACK_REG_P (b)) 4178 { 4179 if (!rtx_equal_p (a, b)) 4180 return false; 4181 } 4182 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl)))) 4183 ; 4184 else if (!rtx_equal_p (a, b)) 4185 return false; 4186 4187 if (TARGET_64BIT) 4188 { 4189 /* The SYSV ABI has more call-clobbered registers; 4190 disallow sibcalls from MS to SYSV. */ 4191 if (cfun->machine->call_abi == MS_ABI 4192 && ix86_function_type_abi (type) == SYSV_ABI) 4193 return false; 4194 } 4195 else 4196 { 4197 /* If this call is indirect, we'll need to be able to use a 4198 call-clobbered register for the address of the target function. 4199 Make sure that all such registers are not used for passing 4200 parameters. Note that DLLIMPORT functions are indirect. */ 4201 if (!decl 4202 || (TARGET_DLLIMPORT_DECL_ATTRIBUTES && DECL_DLLIMPORT_P (decl))) 4203 { 4204 if (ix86_function_regparm (type, NULL) >= 3) 4205 { 4206 /* ??? Need to count the actual number of registers to be used, 4207 not the possible number of registers. Fix later. */ 4208 return false; 4209 } 4210 } 4211 } 4212 4213 /* Otherwise okay. That also includes certain types of indirect calls. */ 4214 return true; 4215 } 4216 4217 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm" 4218 calling convention attributes; 4219 arguments as in struct attribute_spec.handler. */ 4220 4221 static tree 4222 ix86_handle_cconv_attribute (tree *node, tree name, 4223 tree args, 4224 int flags ATTRIBUTE_UNUSED, 4225 bool *no_add_attrs) 4226 { 4227 if (TREE_CODE (*node) != FUNCTION_TYPE 4228 && TREE_CODE (*node) != METHOD_TYPE 4229 && TREE_CODE (*node) != FIELD_DECL 4230 && TREE_CODE (*node) != TYPE_DECL) 4231 { 4232 warning (OPT_Wattributes, "%qs attribute only applies to functions", 4233 IDENTIFIER_POINTER (name)); 4234 *no_add_attrs = true; 4235 return NULL_TREE; 4236 } 4237 4238 /* Can combine regparm with all attributes but fastcall. */ 4239 if (is_attribute_p ("regparm", name)) 4240 { 4241 tree cst; 4242 4243 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 4244 { 4245 error ("fastcall and regparm attributes are not compatible"); 4246 } 4247 4248 cst = TREE_VALUE (args); 4249 if (TREE_CODE (cst) != INTEGER_CST) 4250 { 4251 warning (OPT_Wattributes, 4252 "%qs attribute requires an integer constant argument", 4253 IDENTIFIER_POINTER (name)); 4254 *no_add_attrs = true; 4255 } 4256 else if (compare_tree_int (cst, REGPARM_MAX) > 0) 4257 { 4258 warning (OPT_Wattributes, "argument to %qs attribute larger than %d", 4259 IDENTIFIER_POINTER (name), REGPARM_MAX); 4260 *no_add_attrs = true; 4261 } 4262 4263 return NULL_TREE; 4264 } 4265 4266 if (TARGET_64BIT) 4267 { 4268 /* Do not warn when emulating the MS ABI. */ 4269 if (TREE_CODE (*node) != FUNCTION_TYPE || ix86_function_type_abi (*node)!=MS_ABI) 4270 warning (OPT_Wattributes, "%qs attribute ignored", 4271 IDENTIFIER_POINTER (name)); 4272 *no_add_attrs = true; 4273 return NULL_TREE; 4274 } 4275 4276 /* Can combine fastcall with stdcall (redundant) and sseregparm. */ 4277 if (is_attribute_p ("fastcall", name)) 4278 { 4279 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) 4280 { 4281 error ("fastcall and cdecl attributes are not compatible"); 4282 } 4283 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) 4284 { 4285 error ("fastcall and stdcall attributes are not compatible"); 4286 } 4287 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node))) 4288 { 4289 error ("fastcall and regparm attributes are not compatible"); 4290 } 4291 } 4292 4293 /* Can combine stdcall with fastcall (redundant), regparm and 4294 sseregparm. */ 4295 else if (is_attribute_p ("stdcall", name)) 4296 { 4297 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node))) 4298 { 4299 error ("stdcall and cdecl attributes are not compatible"); 4300 } 4301 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 4302 { 4303 error ("stdcall and fastcall attributes are not compatible"); 4304 } 4305 } 4306 4307 /* Can combine cdecl with regparm and sseregparm. */ 4308 else if (is_attribute_p ("cdecl", name)) 4309 { 4310 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node))) 4311 { 4312 error ("stdcall and cdecl attributes are not compatible"); 4313 } 4314 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node))) 4315 { 4316 error ("fastcall and cdecl attributes are not compatible"); 4317 } 4318 } 4319 4320 /* Can combine sseregparm with all attributes. */ 4321 4322 return NULL_TREE; 4323 } 4324 4325 /* Return 0 if the attributes for two types are incompatible, 1 if they 4326 are compatible, and 2 if they are nearly compatible (which causes a 4327 warning to be generated). */ 4328 4329 static int 4330 ix86_comp_type_attributes (const_tree type1, const_tree type2) 4331 { 4332 /* Check for mismatch of non-default calling convention. */ 4333 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall"; 4334 4335 if (TREE_CODE (type1) != FUNCTION_TYPE 4336 && TREE_CODE (type1) != METHOD_TYPE) 4337 return 1; 4338 4339 /* Check for mismatched fastcall/regparm types. */ 4340 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1)) 4341 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2))) 4342 || (ix86_function_regparm (type1, NULL) 4343 != ix86_function_regparm (type2, NULL))) 4344 return 0; 4345 4346 /* Check for mismatched sseregparm types. */ 4347 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1)) 4348 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2))) 4349 return 0; 4350 4351 /* Check for mismatched return types (cdecl vs stdcall). */ 4352 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1)) 4353 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2))) 4354 return 0; 4355 4356 return 1; 4357 } 4358 4359 /* Return the regparm value for a function with the indicated TYPE and DECL. 4360 DECL may be NULL when calling function indirectly 4361 or considering a libcall. */ 4362 4363 static int 4364 ix86_function_regparm (const_tree type, const_tree decl) 4365 { 4366 tree attr; 4367 int regparm; 4368 4369 static bool error_issued; 4370 4371 if (TARGET_64BIT) 4372 return (ix86_function_type_abi (type) == SYSV_ABI 4373 ? X86_64_REGPARM_MAX : X64_REGPARM_MAX); 4374 4375 regparm = ix86_regparm; 4376 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type)); 4377 if (attr) 4378 { 4379 regparm 4380 = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr))); 4381 4382 if (decl && TREE_CODE (decl) == FUNCTION_DECL) 4383 { 4384 /* We can't use regparm(3) for nested functions because 4385 these pass static chain pointer in %ecx register. */ 4386 if (!error_issued && regparm == 3 4387 && decl_function_context (decl) 4388 && !DECL_NO_STATIC_CHAIN (decl)) 4389 { 4390 error ("nested functions are limited to 2 register parameters"); 4391 error_issued = true; 4392 return 0; 4393 } 4394 } 4395 4396 return regparm; 4397 } 4398 4399 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) 4400 return 2; 4401 4402 /* Use register calling convention for local functions when possible. */ 4403 if (decl 4404 && TREE_CODE (decl) == FUNCTION_DECL 4405 && optimize 4406 && (TARGET_64BIT || !flag_strict_calling_conventions) 4407 && !profile_flag) 4408 { 4409 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */ 4410 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl)); 4411 if (i && i->local) 4412 { 4413 int local_regparm, globals = 0, regno; 4414 struct function *f; 4415 4416 /* Make sure no regparm register is taken by a 4417 fixed register variable. */ 4418 for (local_regparm = 0; local_regparm < REGPARM_MAX; local_regparm++) 4419 if (fixed_regs[local_regparm]) 4420 break; 4421 4422 /* We can't use regparm(3) for nested functions as these use 4423 static chain pointer in third argument. */ 4424 if (local_regparm == 3 4425 && decl_function_context (decl) 4426 && !DECL_NO_STATIC_CHAIN (decl)) 4427 local_regparm = 2; 4428 4429 /* If the function realigns its stackpointer, the prologue will 4430 clobber %ecx. If we've already generated code for the callee, 4431 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to 4432 scanning the attributes for the self-realigning property. */ 4433 f = DECL_STRUCT_FUNCTION (decl); 4434 /* Since current internal arg pointer won't conflict with 4435 parameter passing regs, so no need to change stack 4436 realignment and adjust regparm number. 4437 4438 Each fixed register usage increases register pressure, 4439 so less registers should be used for argument passing. 4440 This functionality can be overriden by an explicit 4441 regparm value. */ 4442 for (regno = 0; regno <= DI_REG; regno++) 4443 if (fixed_regs[regno]) 4444 globals++; 4445 4446 local_regparm 4447 = globals < local_regparm ? local_regparm - globals : 0; 4448 4449 if (local_regparm > regparm) 4450 regparm = local_regparm; 4451 } 4452 } 4453 4454 return regparm; 4455 } 4456 4457 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and 4458 DFmode (2) arguments in SSE registers for a function with the 4459 indicated TYPE and DECL. DECL may be NULL when calling function 4460 indirectly or considering a libcall. Otherwise return 0. */ 4461 4462 static int 4463 ix86_function_sseregparm (const_tree type, const_tree decl, bool warn) 4464 { 4465 gcc_assert (!TARGET_64BIT); 4466 4467 /* Use SSE registers to pass SFmode and DFmode arguments if requested 4468 by the sseregparm attribute. */ 4469 if (TARGET_SSEREGPARM 4470 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type)))) 4471 { 4472 if (!TARGET_SSE) 4473 { 4474 if (warn) 4475 { 4476 if (decl) 4477 error ("Calling %qD with attribute sseregparm without " 4478 "SSE/SSE2 enabled", decl); 4479 else 4480 error ("Calling %qT with attribute sseregparm without " 4481 "SSE/SSE2 enabled", type); 4482 } 4483 return 0; 4484 } 4485 4486 return 2; 4487 } 4488 4489 /* For local functions, pass up to SSE_REGPARM_MAX SFmode 4490 (and DFmode for SSE2) arguments in SSE registers. */ 4491 if (decl && TARGET_SSE_MATH && optimize && !profile_flag && 4492 (TARGET_64BIT || !flag_strict_calling_conventions)) 4493 { 4494 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */ 4495 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl)); 4496 if (i && i->local) 4497 return TARGET_SSE2 ? 2 : 1; 4498 } 4499 4500 return 0; 4501 } 4502 4503 /* Return true if EAX is live at the start of the function. Used by 4504 ix86_expand_prologue to determine if we need special help before 4505 calling allocate_stack_worker. */ 4506 4507 static bool 4508 ix86_eax_live_at_start_p (void) 4509 { 4510 /* Cheat. Don't bother working forward from ix86_function_regparm 4511 to the function type to whether an actual argument is located in 4512 eax. Instead just look at cfg info, which is still close enough 4513 to correct at this point. This gives false positives for broken 4514 functions that might use uninitialized data that happens to be 4515 allocated in eax, but who cares? */ 4516 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0); 4517 } 4518 4519 /* Value is the number of bytes of arguments automatically 4520 popped when returning from a subroutine call. 4521 FUNDECL is the declaration node of the function (as a tree), 4522 FUNTYPE is the data type of the function (as a tree), 4523 or for a library call it is an identifier node for the subroutine name. 4524 SIZE is the number of bytes of arguments passed on the stack. 4525 4526 On the 80386, the RTD insn may be used to pop them if the number 4527 of args is fixed, but if the number is variable then the caller 4528 must pop them all. RTD can't be used for library calls now 4529 because the library is compiled with the Unix compiler. 4530 Use of RTD is a selectable option, since it is incompatible with 4531 standard Unix calling sequences. If the option is not selected, 4532 the caller must always pop the args. 4533 4534 The attribute stdcall is equivalent to RTD on a per module basis. */ 4535 4536 int 4537 ix86_return_pops_args (tree fundecl, tree funtype, int size) 4538 { 4539 int rtd; 4540 4541 /* None of the 64-bit ABIs pop arguments. */ 4542 if (TARGET_64BIT) 4543 return 0; 4544 4545 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE); 4546 4547 /* Cdecl functions override -mrtd, and never pop the stack. */ 4548 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) 4549 { 4550 /* Stdcall and fastcall functions will pop the stack if not 4551 variable args. */ 4552 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype)) 4553 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype))) 4554 rtd = 1; 4555 4556 if (rtd && ! stdarg_p (funtype)) 4557 return size; 4558 } 4559 4560 /* Lose any fake structure return argument if it is passed on the stack. */ 4561 if (aggregate_value_p (TREE_TYPE (funtype), fundecl) 4562 && !KEEP_AGGREGATE_RETURN_POINTER) 4563 { 4564 int nregs = ix86_function_regparm (funtype, fundecl); 4565 if (nregs == 0) 4566 return GET_MODE_SIZE (Pmode); 4567 } 4568 4569 return 0; 4570 } 4571 4572 /* Argument support functions. */ 4573 4574 /* Return true when register may be used to pass function parameters. */ 4575 bool 4576 ix86_function_arg_regno_p (int regno) 4577 { 4578 int i; 4579 const int *parm_regs; 4580 4581 if (!TARGET_64BIT) 4582 { 4583 if (TARGET_MACHO) 4584 return (regno < REGPARM_MAX 4585 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno])); 4586 else 4587 return (regno < REGPARM_MAX 4588 || (TARGET_MMX && MMX_REGNO_P (regno) 4589 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX)) 4590 || (TARGET_SSE && SSE_REGNO_P (regno) 4591 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))); 4592 } 4593 4594 if (TARGET_MACHO) 4595 { 4596 if (SSE_REGNO_P (regno) && TARGET_SSE) 4597 return true; 4598 } 4599 else 4600 { 4601 if (TARGET_SSE && SSE_REGNO_P (regno) 4602 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)) 4603 return true; 4604 } 4605 4606 /* TODO: The function should depend on current function ABI but 4607 builtins.c would need updating then. Therefore we use the 4608 default ABI. */ 4609 4610 /* RAX is used as hidden argument to va_arg functions. */ 4611 if (DEFAULT_ABI == SYSV_ABI && regno == AX_REG) 4612 return true; 4613 4614 if (DEFAULT_ABI == MS_ABI) 4615 parm_regs = x86_64_ms_abi_int_parameter_registers; 4616 else 4617 parm_regs = x86_64_int_parameter_registers; 4618 for (i = 0; i < (DEFAULT_ABI == MS_ABI ? X64_REGPARM_MAX 4619 : X86_64_REGPARM_MAX); i++) 4620 if (regno == parm_regs[i]) 4621 return true; 4622 return false; 4623 } 4624 4625 /* Return if we do not know how to pass TYPE solely in registers. */ 4626 4627 static bool 4628 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type) 4629 { 4630 if (must_pass_in_stack_var_size_or_pad (mode, type)) 4631 return true; 4632 4633 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out! 4634 The layout_type routine is crafty and tries to trick us into passing 4635 currently unsupported vector types on the stack by using TImode. */ 4636 return (!TARGET_64BIT && mode == TImode 4637 && type && TREE_CODE (type) != VECTOR_TYPE); 4638 } 4639 4640 /* It returns the size, in bytes, of the area reserved for arguments passed 4641 in registers for the function represented by fndecl dependent to the used 4642 abi format. */ 4643 int 4644 ix86_reg_parm_stack_space (const_tree fndecl) 4645 { 4646 int call_abi = SYSV_ABI; 4647 if (fndecl != NULL_TREE && TREE_CODE (fndecl) == FUNCTION_DECL) 4648 call_abi = ix86_function_abi (fndecl); 4649 else 4650 call_abi = ix86_function_type_abi (fndecl); 4651 if (call_abi == MS_ABI) 4652 return 32; 4653 return 0; 4654 } 4655 4656 /* Returns value SYSV_ABI, MS_ABI dependent on fntype, specifying the 4657 call abi used. */ 4658 int 4659 ix86_function_type_abi (const_tree fntype) 4660 { 4661 if (TARGET_64BIT && fntype != NULL) 4662 { 4663 int abi; 4664 if (DEFAULT_ABI == SYSV_ABI) 4665 abi = lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (fntype)) ? MS_ABI : SYSV_ABI; 4666 else 4667 abi = lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (fntype)) ? SYSV_ABI : MS_ABI; 4668 4669 return abi; 4670 } 4671 return DEFAULT_ABI; 4672 } 4673 4674 int 4675 ix86_function_abi (const_tree fndecl) 4676 { 4677 if (! fndecl) 4678 return DEFAULT_ABI; 4679 return ix86_function_type_abi (TREE_TYPE (fndecl)); 4680 } 4681 4682 /* Returns value SYSV_ABI, MS_ABI dependent on cfun, specifying the 4683 call abi used. */ 4684 int 4685 ix86_cfun_abi (void) 4686 { 4687 if (! cfun || ! TARGET_64BIT) 4688 return DEFAULT_ABI; 4689 return cfun->machine->call_abi; 4690 } 4691 4692 /* regclass.c */ 4693 extern void init_regs (void); 4694 4695 /* Implementation of call abi switching target hook. Specific to FNDECL 4696 the specific call register sets are set. See also CONDITIONAL_REGISTER_USAGE 4697 for more details. */ 4698 void 4699 ix86_call_abi_override (const_tree fndecl) 4700 { 4701 if (fndecl == NULL_TREE) 4702 cfun->machine->call_abi = DEFAULT_ABI; 4703 else 4704 cfun->machine->call_abi = ix86_function_type_abi (TREE_TYPE (fndecl)); 4705 } 4706 4707 /* MS and SYSV ABI have different set of call used registers. Avoid expensive 4708 re-initialization of init_regs each time we switch function context since 4709 this is needed only during RTL expansion. */ 4710 static void 4711 ix86_maybe_switch_abi (void) 4712 { 4713 if (TARGET_64BIT && 4714 call_used_regs[SI_REG] == (cfun->machine->call_abi == MS_ABI)) 4715 reinit_regs (); 4716 } 4717 4718 /* Initialize a variable CUM of type CUMULATIVE_ARGS 4719 for a call to a function whose data type is FNTYPE. 4720 For a library call, FNTYPE is 0. */ 4721 4722 void 4723 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */ 4724 tree fntype, /* tree ptr for function decl */ 4725 rtx libname, /* SYMBOL_REF of library name or 0 */ 4726 tree fndecl) 4727 { 4728 struct cgraph_local_info *i = fndecl ? cgraph_local_info (fndecl) : NULL; 4729 memset (cum, 0, sizeof (*cum)); 4730 4731 if (fndecl) 4732 cum->call_abi = ix86_function_abi (fndecl); 4733 else 4734 cum->call_abi = ix86_function_type_abi (fntype); 4735 /* Set up the number of registers to use for passing arguments. */ 4736 4737 if (cum->call_abi == MS_ABI && !ACCUMULATE_OUTGOING_ARGS) 4738 sorry ("ms_abi attribute requires -maccumulate-outgoing-args " 4739 "or subtarget optimization implying it"); 4740 cum->nregs = ix86_regparm; 4741 if (TARGET_64BIT) 4742 { 4743 if (cum->call_abi != DEFAULT_ABI) 4744 cum->nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX 4745 : X64_REGPARM_MAX; 4746 } 4747 if (TARGET_SSE) 4748 { 4749 cum->sse_nregs = SSE_REGPARM_MAX; 4750 if (TARGET_64BIT) 4751 { 4752 if (cum->call_abi != DEFAULT_ABI) 4753 cum->sse_nregs = DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX 4754 : X64_SSE_REGPARM_MAX; 4755 } 4756 } 4757 if (TARGET_MMX) 4758 cum->mmx_nregs = MMX_REGPARM_MAX; 4759 cum->warn_avx = true; 4760 cum->warn_sse = true; 4761 cum->warn_mmx = true; 4762 4763 /* Because type might mismatch in between caller and callee, we need to 4764 use actual type of function for local calls. 4765 FIXME: cgraph_analyze can be told to actually record if function uses 4766 va_start so for local functions maybe_vaarg can be made aggressive 4767 helping K&R code. 4768 FIXME: once typesytem is fixed, we won't need this code anymore. */ 4769 if (i && i->local) 4770 fntype = TREE_TYPE (fndecl); 4771 cum->maybe_vaarg = (fntype 4772 ? (!prototype_p (fntype) || stdarg_p (fntype)) 4773 : !libname); 4774 4775 if (!TARGET_64BIT) 4776 { 4777 /* If there are variable arguments, then we won't pass anything 4778 in registers in 32-bit mode. */ 4779 if (stdarg_p (fntype)) 4780 { 4781 cum->nregs = 0; 4782 cum->sse_nregs = 0; 4783 cum->mmx_nregs = 0; 4784 cum->warn_avx = 0; 4785 cum->warn_sse = 0; 4786 cum->warn_mmx = 0; 4787 return; 4788 } 4789 4790 /* Use ecx and edx registers if function has fastcall attribute, 4791 else look for regparm information. */ 4792 if (fntype) 4793 { 4794 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype))) 4795 { 4796 cum->nregs = 2; 4797 cum->fastcall = 1; 4798 } 4799 else 4800 cum->nregs = ix86_function_regparm (fntype, fndecl); 4801 } 4802 4803 /* Set up the number of SSE registers used for passing SFmode 4804 and DFmode arguments. Warn for mismatching ABI. */ 4805 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl, true); 4806 } 4807 } 4808 4809 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE. 4810 But in the case of vector types, it is some vector mode. 4811 4812 When we have only some of our vector isa extensions enabled, then there 4813 are some modes for which vector_mode_supported_p is false. For these 4814 modes, the generic vector support in gcc will choose some non-vector mode 4815 in order to implement the type. By computing the natural mode, we'll 4816 select the proper ABI location for the operand and not depend on whatever 4817 the middle-end decides to do with these vector types. 4818 4819 The midde-end can't deal with the vector types > 16 bytes. In this 4820 case, we return the original mode and warn ABI change if CUM isn't 4821 NULL. */ 4822 4823 static enum machine_mode 4824 type_natural_mode (const_tree type, CUMULATIVE_ARGS *cum) 4825 { 4826 enum machine_mode mode = TYPE_MODE (type); 4827 4828 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode)) 4829 { 4830 HOST_WIDE_INT size = int_size_in_bytes (type); 4831 if ((size == 8 || size == 16 || size == 32) 4832 /* ??? Generic code allows us to create width 1 vectors. Ignore. */ 4833 && TYPE_VECTOR_SUBPARTS (type) > 1) 4834 { 4835 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type)); 4836 4837 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE) 4838 mode = MIN_MODE_VECTOR_FLOAT; 4839 else 4840 mode = MIN_MODE_VECTOR_INT; 4841 4842 /* Get the mode which has this inner mode and number of units. */ 4843 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode)) 4844 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type) 4845 && GET_MODE_INNER (mode) == innermode) 4846 { 4847 if (size == 32 && !TARGET_AVX) 4848 { 4849 static bool warnedavx; 4850 4851 if (cum 4852 && !warnedavx 4853 && cum->warn_avx) 4854 { 4855 warnedavx = true; 4856 warning (0, "AVX vector argument without AVX " 4857 "enabled changes the ABI"); 4858 } 4859 return TYPE_MODE (type); 4860 } 4861 else 4862 return mode; 4863 } 4864 4865 gcc_unreachable (); 4866 } 4867 } 4868 4869 return mode; 4870 } 4871 4872 /* We want to pass a value in REGNO whose "natural" mode is MODE. However, 4873 this may not agree with the mode that the type system has chosen for the 4874 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can 4875 go ahead and use it. Otherwise we have to build a PARALLEL instead. */ 4876 4877 static rtx 4878 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode, 4879 unsigned int regno) 4880 { 4881 rtx tmp; 4882 4883 if (orig_mode != BLKmode) 4884 tmp = gen_rtx_REG (orig_mode, regno); 4885 else 4886 { 4887 tmp = gen_rtx_REG (mode, regno); 4888 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx); 4889 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp)); 4890 } 4891 4892 return tmp; 4893 } 4894 4895 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal 4896 of this code is to classify each 8bytes of incoming argument by the register 4897 class and assign registers accordingly. */ 4898 4899 /* Return the union class of CLASS1 and CLASS2. 4900 See the x86-64 PS ABI for details. */ 4901 4902 static enum x86_64_reg_class 4903 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2) 4904 { 4905 /* Rule #1: If both classes are equal, this is the resulting class. */ 4906 if (class1 == class2) 4907 return class1; 4908 4909 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is 4910 the other class. */ 4911 if (class1 == X86_64_NO_CLASS) 4912 return class2; 4913 if (class2 == X86_64_NO_CLASS) 4914 return class1; 4915 4916 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */ 4917 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS) 4918 return X86_64_MEMORY_CLASS; 4919 4920 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */ 4921 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS) 4922 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS)) 4923 return X86_64_INTEGERSI_CLASS; 4924 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS 4925 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS) 4926 return X86_64_INTEGER_CLASS; 4927 4928 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class, 4929 MEMORY is used. */ 4930 if (class1 == X86_64_X87_CLASS 4931 || class1 == X86_64_X87UP_CLASS 4932 || class1 == X86_64_COMPLEX_X87_CLASS 4933 || class2 == X86_64_X87_CLASS 4934 || class2 == X86_64_X87UP_CLASS 4935 || class2 == X86_64_COMPLEX_X87_CLASS) 4936 return X86_64_MEMORY_CLASS; 4937 4938 /* Rule #6: Otherwise class SSE is used. */ 4939 return X86_64_SSE_CLASS; 4940 } 4941 4942 /* Classify the argument of type TYPE and mode MODE. 4943 CLASSES will be filled by the register class used to pass each word 4944 of the operand. The number of words is returned. In case the parameter 4945 should be passed in memory, 0 is returned. As a special case for zero 4946 sized containers, classes[0] will be NO_CLASS and 1 is returned. 4947 4948 BIT_OFFSET is used internally for handling records and specifies offset 4949 of the offset in bits modulo 256 to avoid overflow cases. 4950 4951 See the x86-64 PS ABI for details. 4952 */ 4953 4954 static int 4955 classify_argument (enum machine_mode mode, const_tree type, 4956 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset) 4957 { 4958 HOST_WIDE_INT bytes = 4959 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 4960 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 4961 4962 /* Variable sized entities are always passed/returned in memory. */ 4963 if (bytes < 0) 4964 return 0; 4965 4966 if (mode != VOIDmode 4967 && targetm.calls.must_pass_in_stack (mode, type)) 4968 return 0; 4969 4970 if (type && AGGREGATE_TYPE_P (type)) 4971 { 4972 int i; 4973 tree field; 4974 enum x86_64_reg_class subclasses[MAX_CLASSES]; 4975 4976 /* On x86-64 we pass structures larger than 32 bytes on the stack. */ 4977 if (bytes > 32) 4978 return 0; 4979 4980 for (i = 0; i < words; i++) 4981 classes[i] = X86_64_NO_CLASS; 4982 4983 /* Zero sized arrays or structures are NO_CLASS. We return 0 to 4984 signalize memory class, so handle it as special case. */ 4985 if (!words) 4986 { 4987 classes[0] = X86_64_NO_CLASS; 4988 return 1; 4989 } 4990 4991 /* Classify each field of record and merge classes. */ 4992 switch (TREE_CODE (type)) 4993 { 4994 case RECORD_TYPE: 4995 /* And now merge the fields of structure. */ 4996 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 4997 { 4998 if (TREE_CODE (field) == FIELD_DECL) 4999 { 5000 int num; 5001 5002 if (TREE_TYPE (field) == error_mark_node) 5003 continue; 5004 5005 /* Bitfields are always classified as integer. Handle them 5006 early, since later code would consider them to be 5007 misaligned integers. */ 5008 if (DECL_BIT_FIELD (field)) 5009 { 5010 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; 5011 i < ((int_bit_position (field) + (bit_offset % 64)) 5012 + tree_low_cst (DECL_SIZE (field), 0) 5013 + 63) / 8 / 8; i++) 5014 classes[i] = 5015 merge_classes (X86_64_INTEGER_CLASS, 5016 classes[i]); 5017 } 5018 else 5019 { 5020 type = TREE_TYPE (field); 5021 5022 /* Flexible array member is ignored. */ 5023 if (TYPE_MODE (type) == BLKmode 5024 && TREE_CODE (type) == ARRAY_TYPE 5025 && TYPE_SIZE (type) == NULL_TREE 5026 && TYPE_DOMAIN (type) != NULL_TREE 5027 && (TYPE_MAX_VALUE (TYPE_DOMAIN (type)) 5028 == NULL_TREE)) 5029 { 5030 static bool warned; 5031 5032 if (!warned && warn_psabi) 5033 { 5034 warned = true; 5035 inform (input_location, 5036 "The ABI of passing struct with" 5037 " a flexible array member has" 5038 " changed in GCC 4.4"); 5039 } 5040 continue; 5041 } 5042 num = classify_argument (TYPE_MODE (type), type, 5043 subclasses, 5044 (int_bit_position (field) 5045 + bit_offset) % 256); 5046 if (!num) 5047 return 0; 5048 for (i = 0; i < num; i++) 5049 { 5050 int pos = 5051 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8; 5052 classes[i + pos] = 5053 merge_classes (subclasses[i], classes[i + pos]); 5054 } 5055 } 5056 } 5057 } 5058 break; 5059 5060 case ARRAY_TYPE: 5061 /* Arrays are handled as small records. */ 5062 { 5063 int num; 5064 num = classify_argument (TYPE_MODE (TREE_TYPE (type)), 5065 TREE_TYPE (type), subclasses, bit_offset); 5066 if (!num) 5067 return 0; 5068 5069 /* The partial classes are now full classes. */ 5070 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4) 5071 subclasses[0] = X86_64_SSE_CLASS; 5072 if (subclasses[0] == X86_64_INTEGERSI_CLASS 5073 && !((bit_offset % 64) == 0 && bytes == 4)) 5074 subclasses[0] = X86_64_INTEGER_CLASS; 5075 5076 for (i = 0; i < words; i++) 5077 classes[i] = subclasses[i % num]; 5078 5079 break; 5080 } 5081 case UNION_TYPE: 5082 case QUAL_UNION_TYPE: 5083 /* Unions are similar to RECORD_TYPE but offset is always 0. 5084 */ 5085 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 5086 { 5087 if (TREE_CODE (field) == FIELD_DECL) 5088 { 5089 int num; 5090 5091 if (TREE_TYPE (field) == error_mark_node) 5092 continue; 5093 5094 num = classify_argument (TYPE_MODE (TREE_TYPE (field)), 5095 TREE_TYPE (field), subclasses, 5096 bit_offset); 5097 if (!num) 5098 return 0; 5099 for (i = 0; i < num; i++) 5100 classes[i] = merge_classes (subclasses[i], classes[i]); 5101 } 5102 } 5103 break; 5104 5105 default: 5106 gcc_unreachable (); 5107 } 5108 5109 if (words > 2) 5110 { 5111 /* When size > 16 bytes, if the first one isn't 5112 X86_64_SSE_CLASS or any other ones aren't 5113 X86_64_SSEUP_CLASS, everything should be passed in 5114 memory. */ 5115 if (classes[0] != X86_64_SSE_CLASS) 5116 return 0; 5117 5118 for (i = 1; i < words; i++) 5119 if (classes[i] != X86_64_SSEUP_CLASS) 5120 return 0; 5121 } 5122 5123 /* Final merger cleanup. */ 5124 for (i = 0; i < words; i++) 5125 { 5126 /* If one class is MEMORY, everything should be passed in 5127 memory. */ 5128 if (classes[i] == X86_64_MEMORY_CLASS) 5129 return 0; 5130 5131 /* The X86_64_SSEUP_CLASS should be always preceded by 5132 X86_64_SSE_CLASS or X86_64_SSEUP_CLASS. */ 5133 if (classes[i] == X86_64_SSEUP_CLASS 5134 && classes[i - 1] != X86_64_SSE_CLASS 5135 && classes[i - 1] != X86_64_SSEUP_CLASS) 5136 { 5137 /* The first one should never be X86_64_SSEUP_CLASS. */ 5138 gcc_assert (i != 0); 5139 classes[i] = X86_64_SSE_CLASS; 5140 } 5141 5142 /* If X86_64_X87UP_CLASS isn't preceded by X86_64_X87_CLASS, 5143 everything should be passed in memory. */ 5144 if (classes[i] == X86_64_X87UP_CLASS 5145 && (classes[i - 1] != X86_64_X87_CLASS)) 5146 { 5147 static bool warned; 5148 5149 /* The first one should never be X86_64_X87UP_CLASS. */ 5150 gcc_assert (i != 0); 5151 if (!warned && warn_psabi) 5152 { 5153 warned = true; 5154 inform (input_location, 5155 "The ABI of passing union with long double" 5156 " has changed in GCC 4.4"); 5157 } 5158 return 0; 5159 } 5160 } 5161 return words; 5162 } 5163 5164 /* Compute alignment needed. We align all types to natural boundaries with 5165 exception of XFmode that is aligned to 64bits. */ 5166 if (mode != VOIDmode && mode != BLKmode) 5167 { 5168 int mode_alignment = GET_MODE_BITSIZE (mode); 5169 5170 if (mode == XFmode) 5171 mode_alignment = 128; 5172 else if (mode == XCmode) 5173 mode_alignment = 256; 5174 if (COMPLEX_MODE_P (mode)) 5175 mode_alignment /= 2; 5176 /* Misaligned fields are always returned in memory. */ 5177 if (bit_offset % mode_alignment) 5178 return 0; 5179 } 5180 5181 /* for V1xx modes, just use the base mode */ 5182 if (VECTOR_MODE_P (mode) && mode != V1DImode 5183 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes) 5184 mode = GET_MODE_INNER (mode); 5185 5186 /* Classification of atomic types. */ 5187 switch (mode) 5188 { 5189 case SDmode: 5190 case DDmode: 5191 classes[0] = X86_64_SSE_CLASS; 5192 return 1; 5193 case TDmode: 5194 classes[0] = X86_64_SSE_CLASS; 5195 classes[1] = X86_64_SSEUP_CLASS; 5196 return 2; 5197 case DImode: 5198 case SImode: 5199 case HImode: 5200 case QImode: 5201 case CSImode: 5202 case CHImode: 5203 case CQImode: 5204 { 5205 int size = (bit_offset % 64)+ (int) GET_MODE_BITSIZE (mode); 5206 5207 if (size <= 32) 5208 { 5209 classes[0] = X86_64_INTEGERSI_CLASS; 5210 return 1; 5211 } 5212 else if (size <= 64) 5213 { 5214 classes[0] = X86_64_INTEGER_CLASS; 5215 return 1; 5216 } 5217 else if (size <= 64+32) 5218 { 5219 classes[0] = X86_64_INTEGER_CLASS; 5220 classes[1] = X86_64_INTEGERSI_CLASS; 5221 return 2; 5222 } 5223 else if (size <= 64+64) 5224 { 5225 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 5226 return 2; 5227 } 5228 else 5229 gcc_unreachable (); 5230 } 5231 case CDImode: 5232 case TImode: 5233 classes[0] = classes[1] = X86_64_INTEGER_CLASS; 5234 return 2; 5235 case COImode: 5236 case OImode: 5237 /* OImode shouldn't be used directly. */ 5238 gcc_unreachable (); 5239 case CTImode: 5240 return 0; 5241 case SFmode: 5242 if (!(bit_offset % 64)) 5243 classes[0] = X86_64_SSESF_CLASS; 5244 else 5245 classes[0] = X86_64_SSE_CLASS; 5246 return 1; 5247 case DFmode: 5248 classes[0] = X86_64_SSEDF_CLASS; 5249 return 1; 5250 case XFmode: 5251 classes[0] = X86_64_X87_CLASS; 5252 classes[1] = X86_64_X87UP_CLASS; 5253 return 2; 5254 case TFmode: 5255 classes[0] = X86_64_SSE_CLASS; 5256 classes[1] = X86_64_SSEUP_CLASS; 5257 return 2; 5258 case SCmode: 5259 classes[0] = X86_64_SSE_CLASS; 5260 if (!(bit_offset % 64)) 5261 return 1; 5262 else 5263 { 5264 static bool warned; 5265 5266 if (!warned && warn_psabi) 5267 { 5268 warned = true; 5269 inform (input_location, 5270 "The ABI of passing structure with complex float" 5271 " member has changed in GCC 4.4"); 5272 } 5273 classes[1] = X86_64_SSESF_CLASS; 5274 return 2; 5275 } 5276 case DCmode: 5277 classes[0] = X86_64_SSEDF_CLASS; 5278 classes[1] = X86_64_SSEDF_CLASS; 5279 return 2; 5280 case XCmode: 5281 classes[0] = X86_64_COMPLEX_X87_CLASS; 5282 return 1; 5283 case TCmode: 5284 /* This modes is larger than 16 bytes. */ 5285 return 0; 5286 case V8SFmode: 5287 case V8SImode: 5288 case V32QImode: 5289 case V16HImode: 5290 case V4DFmode: 5291 case V4DImode: 5292 classes[0] = X86_64_SSE_CLASS; 5293 classes[1] = X86_64_SSEUP_CLASS; 5294 classes[2] = X86_64_SSEUP_CLASS; 5295 classes[3] = X86_64_SSEUP_CLASS; 5296 return 4; 5297 case V4SFmode: 5298 case V4SImode: 5299 case V16QImode: 5300 case V8HImode: 5301 case V2DFmode: 5302 case V2DImode: 5303 classes[0] = X86_64_SSE_CLASS; 5304 classes[1] = X86_64_SSEUP_CLASS; 5305 return 2; 5306 case V1DImode: 5307 case V2SFmode: 5308 case V2SImode: 5309 case V4HImode: 5310 case V8QImode: 5311 classes[0] = X86_64_SSE_CLASS; 5312 return 1; 5313 case BLKmode: 5314 case VOIDmode: 5315 return 0; 5316 default: 5317 gcc_assert (VECTOR_MODE_P (mode)); 5318 5319 if (bytes > 16) 5320 return 0; 5321 5322 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT); 5323 5324 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32) 5325 classes[0] = X86_64_INTEGERSI_CLASS; 5326 else 5327 classes[0] = X86_64_INTEGER_CLASS; 5328 classes[1] = X86_64_INTEGER_CLASS; 5329 return 1 + (bytes > 8); 5330 } 5331 } 5332 5333 /* Examine the argument and return set number of register required in each 5334 class. Return 0 iff parameter should be passed in memory. */ 5335 static int 5336 examine_argument (enum machine_mode mode, const_tree type, int in_return, 5337 int *int_nregs, int *sse_nregs) 5338 { 5339 enum x86_64_reg_class regclass[MAX_CLASSES]; 5340 int n = classify_argument (mode, type, regclass, 0); 5341 5342 *int_nregs = 0; 5343 *sse_nregs = 0; 5344 if (!n) 5345 return 0; 5346 for (n--; n >= 0; n--) 5347 switch (regclass[n]) 5348 { 5349 case X86_64_INTEGER_CLASS: 5350 case X86_64_INTEGERSI_CLASS: 5351 (*int_nregs)++; 5352 break; 5353 case X86_64_SSE_CLASS: 5354 case X86_64_SSESF_CLASS: 5355 case X86_64_SSEDF_CLASS: 5356 (*sse_nregs)++; 5357 break; 5358 case X86_64_NO_CLASS: 5359 case X86_64_SSEUP_CLASS: 5360 break; 5361 case X86_64_X87_CLASS: 5362 case X86_64_X87UP_CLASS: 5363 if (!in_return) 5364 return 0; 5365 break; 5366 case X86_64_COMPLEX_X87_CLASS: 5367 return in_return ? 2 : 0; 5368 case X86_64_MEMORY_CLASS: 5369 gcc_unreachable (); 5370 } 5371 return 1; 5372 } 5373 5374 /* Construct container for the argument used by GCC interface. See 5375 FUNCTION_ARG for the detailed description. */ 5376 5377 static rtx 5378 construct_container (enum machine_mode mode, enum machine_mode orig_mode, 5379 const_tree type, int in_return, int nintregs, int nsseregs, 5380 const int *intreg, int sse_regno) 5381 { 5382 /* The following variables hold the static issued_error state. */ 5383 static bool issued_sse_arg_error; 5384 static bool issued_sse_ret_error; 5385 static bool issued_x87_ret_error; 5386 5387 enum machine_mode tmpmode; 5388 int bytes = 5389 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode); 5390 enum x86_64_reg_class regclass[MAX_CLASSES]; 5391 int n; 5392 int i; 5393 int nexps = 0; 5394 int needed_sseregs, needed_intregs; 5395 rtx exp[MAX_CLASSES]; 5396 rtx ret; 5397 5398 n = classify_argument (mode, type, regclass, 0); 5399 if (!n) 5400 return NULL; 5401 if (!examine_argument (mode, type, in_return, &needed_intregs, 5402 &needed_sseregs)) 5403 return NULL; 5404 if (needed_intregs > nintregs || needed_sseregs > nsseregs) 5405 return NULL; 5406 5407 /* We allowed the user to turn off SSE for kernel mode. Don't crash if 5408 some less clueful developer tries to use floating-point anyway. */ 5409 if (needed_sseregs && !TARGET_SSE) 5410 { 5411 if (in_return) 5412 { 5413 if (!issued_sse_ret_error) 5414 { 5415 error ("SSE register return with SSE disabled"); 5416 issued_sse_ret_error = true; 5417 } 5418 } 5419 else if (!issued_sse_arg_error) 5420 { 5421 error ("SSE register argument with SSE disabled"); 5422 issued_sse_arg_error = true; 5423 } 5424 return NULL; 5425 } 5426 5427 /* Likewise, error if the ABI requires us to return values in the 5428 x87 registers and the user specified -mno-80387. */ 5429 if (!TARGET_80387 && in_return) 5430 for (i = 0; i < n; i++) 5431 if (regclass[i] == X86_64_X87_CLASS 5432 || regclass[i] == X86_64_X87UP_CLASS 5433 || regclass[i] == X86_64_COMPLEX_X87_CLASS) 5434 { 5435 if (!issued_x87_ret_error) 5436 { 5437 error ("x87 register return with x87 disabled"); 5438 issued_x87_ret_error = true; 5439 } 5440 return NULL; 5441 } 5442 5443 /* First construct simple cases. Avoid SCmode, since we want to use 5444 single register to pass this type. */ 5445 if (n == 1 && mode != SCmode) 5446 switch (regclass[0]) 5447 { 5448 case X86_64_INTEGER_CLASS: 5449 case X86_64_INTEGERSI_CLASS: 5450 return gen_rtx_REG (mode, intreg[0]); 5451 case X86_64_SSE_CLASS: 5452 case X86_64_SSESF_CLASS: 5453 case X86_64_SSEDF_CLASS: 5454 if (mode != BLKmode) 5455 return gen_reg_or_parallel (mode, orig_mode, 5456 SSE_REGNO (sse_regno)); 5457 break; 5458 case X86_64_X87_CLASS: 5459 case X86_64_COMPLEX_X87_CLASS: 5460 return gen_rtx_REG (mode, FIRST_STACK_REG); 5461 case X86_64_NO_CLASS: 5462 /* Zero sized array, struct or class. */ 5463 return NULL; 5464 default: 5465 gcc_unreachable (); 5466 } 5467 if (n == 2 && regclass[0] == X86_64_SSE_CLASS 5468 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode) 5469 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 5470 if (n == 4 5471 && regclass[0] == X86_64_SSE_CLASS 5472 && regclass[1] == X86_64_SSEUP_CLASS 5473 && regclass[2] == X86_64_SSEUP_CLASS 5474 && regclass[3] == X86_64_SSEUP_CLASS 5475 && mode != BLKmode) 5476 return gen_rtx_REG (mode, SSE_REGNO (sse_regno)); 5477 5478 if (n == 2 5479 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS) 5480 return gen_rtx_REG (XFmode, FIRST_STACK_REG); 5481 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS 5482 && regclass[1] == X86_64_INTEGER_CLASS 5483 && (mode == CDImode || mode == TImode || mode == TFmode) 5484 && intreg[0] + 1 == intreg[1]) 5485 return gen_rtx_REG (mode, intreg[0]); 5486 5487 /* Otherwise figure out the entries of the PARALLEL. */ 5488 for (i = 0; i < n; i++) 5489 { 5490 int pos; 5491 5492 switch (regclass[i]) 5493 { 5494 case X86_64_NO_CLASS: 5495 break; 5496 case X86_64_INTEGER_CLASS: 5497 case X86_64_INTEGERSI_CLASS: 5498 /* Merge TImodes on aligned occasions here too. */ 5499 if (i * 8 + 8 > bytes) 5500 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0); 5501 else if (regclass[i] == X86_64_INTEGERSI_CLASS) 5502 tmpmode = SImode; 5503 else 5504 tmpmode = DImode; 5505 /* We've requested 24 bytes we don't have mode for. Use DImode. */ 5506 if (tmpmode == BLKmode) 5507 tmpmode = DImode; 5508 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 5509 gen_rtx_REG (tmpmode, *intreg), 5510 GEN_INT (i*8)); 5511 intreg++; 5512 break; 5513 case X86_64_SSESF_CLASS: 5514 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 5515 gen_rtx_REG (SFmode, 5516 SSE_REGNO (sse_regno)), 5517 GEN_INT (i*8)); 5518 sse_regno++; 5519 break; 5520 case X86_64_SSEDF_CLASS: 5521 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 5522 gen_rtx_REG (DFmode, 5523 SSE_REGNO (sse_regno)), 5524 GEN_INT (i*8)); 5525 sse_regno++; 5526 break; 5527 case X86_64_SSE_CLASS: 5528 pos = i; 5529 switch (n) 5530 { 5531 case 1: 5532 tmpmode = DImode; 5533 break; 5534 case 2: 5535 if (i == 0 && regclass[1] == X86_64_SSEUP_CLASS) 5536 { 5537 tmpmode = TImode; 5538 i++; 5539 } 5540 else 5541 tmpmode = DImode; 5542 break; 5543 case 4: 5544 gcc_assert (i == 0 5545 && regclass[1] == X86_64_SSEUP_CLASS 5546 && regclass[2] == X86_64_SSEUP_CLASS 5547 && regclass[3] == X86_64_SSEUP_CLASS); 5548 tmpmode = OImode; 5549 i += 3; 5550 break; 5551 default: 5552 gcc_unreachable (); 5553 } 5554 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode, 5555 gen_rtx_REG (tmpmode, 5556 SSE_REGNO (sse_regno)), 5557 GEN_INT (pos*8)); 5558 sse_regno++; 5559 break; 5560 default: 5561 gcc_unreachable (); 5562 } 5563 } 5564 5565 /* Empty aligned struct, union or class. */ 5566 if (nexps == 0) 5567 return NULL; 5568 5569 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps)); 5570 for (i = 0; i < nexps; i++) 5571 XVECEXP (ret, 0, i) = exp [i]; 5572 return ret; 5573 } 5574 5575 /* Update the data in CUM to advance over an argument of mode MODE 5576 and data type TYPE. (TYPE is null for libcalls where that information 5577 may not be available.) */ 5578 5579 static void 5580 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, 5581 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words) 5582 { 5583 switch (mode) 5584 { 5585 default: 5586 break; 5587 5588 case BLKmode: 5589 if (bytes < 0) 5590 break; 5591 /* FALLTHRU */ 5592 5593 case DImode: 5594 case SImode: 5595 case HImode: 5596 case QImode: 5597 cum->words += words; 5598 cum->nregs -= words; 5599 cum->regno += words; 5600 5601 if (cum->nregs <= 0) 5602 { 5603 cum->nregs = 0; 5604 cum->regno = 0; 5605 } 5606 break; 5607 5608 case OImode: 5609 /* OImode shouldn't be used directly. */ 5610 gcc_unreachable (); 5611 5612 case DFmode: 5613 if (cum->float_in_sse < 2) 5614 break; 5615 case SFmode: 5616 if (cum->float_in_sse < 1) 5617 break; 5618 /* FALLTHRU */ 5619 5620 case V8SFmode: 5621 case V8SImode: 5622 case V32QImode: 5623 case V16HImode: 5624 case V4DFmode: 5625 case V4DImode: 5626 case TImode: 5627 case V16QImode: 5628 case V8HImode: 5629 case V4SImode: 5630 case V2DImode: 5631 case V4SFmode: 5632 case V2DFmode: 5633 if (!type || !AGGREGATE_TYPE_P (type)) 5634 { 5635 cum->sse_words += words; 5636 cum->sse_nregs -= 1; 5637 cum->sse_regno += 1; 5638 if (cum->sse_nregs <= 0) 5639 { 5640 cum->sse_nregs = 0; 5641 cum->sse_regno = 0; 5642 } 5643 } 5644 break; 5645 5646 case V8QImode: 5647 case V4HImode: 5648 case V2SImode: 5649 case V2SFmode: 5650 case V1DImode: 5651 if (!type || !AGGREGATE_TYPE_P (type)) 5652 { 5653 cum->mmx_words += words; 5654 cum->mmx_nregs -= 1; 5655 cum->mmx_regno += 1; 5656 if (cum->mmx_nregs <= 0) 5657 { 5658 cum->mmx_nregs = 0; 5659 cum->mmx_regno = 0; 5660 } 5661 } 5662 break; 5663 } 5664 } 5665 5666 static void 5667 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, 5668 tree type, HOST_WIDE_INT words, int named) 5669 { 5670 int int_nregs, sse_nregs; 5671 5672 /* Unnamed 256bit vector mode parameters are passed on stack. */ 5673 if (!named && VALID_AVX256_REG_MODE (mode)) 5674 return; 5675 5676 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs)) 5677 cum->words += words; 5678 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs) 5679 { 5680 cum->nregs -= int_nregs; 5681 cum->sse_nregs -= sse_nregs; 5682 cum->regno += int_nregs; 5683 cum->sse_regno += sse_nregs; 5684 } 5685 else 5686 cum->words += words; 5687 } 5688 5689 static void 5690 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes, 5691 HOST_WIDE_INT words) 5692 { 5693 /* Otherwise, this should be passed indirect. */ 5694 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8); 5695 5696 cum->words += words; 5697 if (cum->nregs > 0) 5698 { 5699 cum->nregs -= 1; 5700 cum->regno += 1; 5701 } 5702 } 5703 5704 void 5705 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, 5706 tree type, int named) 5707 { 5708 HOST_WIDE_INT bytes, words; 5709 5710 if (mode == BLKmode) 5711 bytes = int_size_in_bytes (type); 5712 else 5713 bytes = GET_MODE_SIZE (mode); 5714 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 5715 5716 if (type) 5717 mode = type_natural_mode (type, NULL); 5718 5719 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI) 5720 function_arg_advance_ms_64 (cum, bytes, words); 5721 else if (TARGET_64BIT) 5722 function_arg_advance_64 (cum, mode, type, words, named); 5723 else 5724 function_arg_advance_32 (cum, mode, type, bytes, words); 5725 } 5726 5727 /* Define where to put the arguments to a function. 5728 Value is zero to push the argument on the stack, 5729 or a hard register in which to store the argument. 5730 5731 MODE is the argument's machine mode. 5732 TYPE is the data type of the argument (as a tree). 5733 This is null for libcalls where that information may 5734 not be available. 5735 CUM is a variable of type CUMULATIVE_ARGS which gives info about 5736 the preceding args and about the function being called. 5737 NAMED is nonzero if this argument is a named parameter 5738 (otherwise it is an extra parameter matching an ellipsis). */ 5739 5740 static rtx 5741 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode, 5742 enum machine_mode orig_mode, tree type, 5743 HOST_WIDE_INT bytes, HOST_WIDE_INT words) 5744 { 5745 static bool warnedsse, warnedmmx; 5746 5747 /* Avoid the AL settings for the Unix64 ABI. */ 5748 if (mode == VOIDmode) 5749 return constm1_rtx; 5750 5751 switch (mode) 5752 { 5753 default: 5754 break; 5755 5756 case BLKmode: 5757 if (bytes < 0) 5758 break; 5759 /* FALLTHRU */ 5760 case DImode: 5761 case SImode: 5762 case HImode: 5763 case QImode: 5764 if (words <= cum->nregs) 5765 { 5766 int regno = cum->regno; 5767 5768 /* Fastcall allocates the first two DWORD (SImode) or 5769 smaller arguments to ECX and EDX if it isn't an 5770 aggregate type . */ 5771 if (cum->fastcall) 5772 { 5773 if (mode == BLKmode 5774 || mode == DImode 5775 || (type && AGGREGATE_TYPE_P (type))) 5776 break; 5777 5778 /* ECX not EAX is the first allocated register. */ 5779 if (regno == AX_REG) 5780 regno = CX_REG; 5781 } 5782 return gen_rtx_REG (mode, regno); 5783 } 5784 break; 5785 5786 case DFmode: 5787 if (cum->float_in_sse < 2) 5788 break; 5789 case SFmode: 5790 if (cum->float_in_sse < 1) 5791 break; 5792 /* FALLTHRU */ 5793 case TImode: 5794 /* In 32bit, we pass TImode in xmm registers. */ 5795 case V16QImode: 5796 case V8HImode: 5797 case V4SImode: 5798 case V2DImode: 5799 case V4SFmode: 5800 case V2DFmode: 5801 if (!type || !AGGREGATE_TYPE_P (type)) 5802 { 5803 if (!TARGET_SSE && !warnedsse && cum->warn_sse) 5804 { 5805 warnedsse = true; 5806 warning (0, "SSE vector argument without SSE enabled " 5807 "changes the ABI"); 5808 } 5809 if (cum->sse_nregs) 5810 return gen_reg_or_parallel (mode, orig_mode, 5811 cum->sse_regno + FIRST_SSE_REG); 5812 } 5813 break; 5814 5815 case OImode: 5816 /* OImode shouldn't be used directly. */ 5817 gcc_unreachable (); 5818 5819 case V8SFmode: 5820 case V8SImode: 5821 case V32QImode: 5822 case V16HImode: 5823 case V4DFmode: 5824 case V4DImode: 5825 if (!type || !AGGREGATE_TYPE_P (type)) 5826 { 5827 if (cum->sse_nregs) 5828 return gen_reg_or_parallel (mode, orig_mode, 5829 cum->sse_regno + FIRST_SSE_REG); 5830 } 5831 break; 5832 5833 case V8QImode: 5834 case V4HImode: 5835 case V2SImode: 5836 case V2SFmode: 5837 case V1DImode: 5838 if (!type || !AGGREGATE_TYPE_P (type)) 5839 { 5840 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx) 5841 { 5842 warnedmmx = true; 5843 warning (0, "MMX vector argument without MMX enabled " 5844 "changes the ABI"); 5845 } 5846 if (cum->mmx_nregs) 5847 return gen_reg_or_parallel (mode, orig_mode, 5848 cum->mmx_regno + FIRST_MMX_REG); 5849 } 5850 break; 5851 } 5852 5853 return NULL_RTX; 5854 } 5855 5856 static rtx 5857 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, 5858 enum machine_mode orig_mode, tree type, int named) 5859 { 5860 /* Handle a hidden AL argument containing number of registers 5861 for varargs x86-64 functions. */ 5862 if (mode == VOIDmode) 5863 return GEN_INT (cum->maybe_vaarg 5864 ? (cum->sse_nregs < 0 5865 ? (cum->call_abi == DEFAULT_ABI 5866 ? SSE_REGPARM_MAX 5867 : (DEFAULT_ABI != SYSV_ABI ? X86_64_SSE_REGPARM_MAX 5868 : X64_SSE_REGPARM_MAX)) 5869 : cum->sse_regno) 5870 : -1); 5871 5872 switch (mode) 5873 { 5874 default: 5875 break; 5876 5877 case V8SFmode: 5878 case V8SImode: 5879 case V32QImode: 5880 case V16HImode: 5881 case V4DFmode: 5882 case V4DImode: 5883 /* Unnamed 256bit vector mode parameters are passed on stack. */ 5884 if (!named) 5885 return NULL; 5886 break; 5887 } 5888 5889 return construct_container (mode, orig_mode, type, 0, cum->nregs, 5890 cum->sse_nregs, 5891 &x86_64_int_parameter_registers [cum->regno], 5892 cum->sse_regno); 5893 } 5894 5895 static rtx 5896 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode, 5897 enum machine_mode orig_mode, int named, 5898 HOST_WIDE_INT bytes) 5899 { 5900 unsigned int regno; 5901 5902 /* We need to add clobber for MS_ABI->SYSV ABI calls in expand_call. 5903 We use value of -2 to specify that current function call is MSABI. */ 5904 if (mode == VOIDmode) 5905 return GEN_INT (-2); 5906 5907 /* If we've run out of registers, it goes on the stack. */ 5908 if (cum->nregs == 0) 5909 return NULL_RTX; 5910 5911 regno = x86_64_ms_abi_int_parameter_registers[cum->regno]; 5912 5913 /* Only floating point modes are passed in anything but integer regs. */ 5914 if (TARGET_SSE && (mode == SFmode || mode == DFmode)) 5915 { 5916 if (named) 5917 regno = cum->regno + FIRST_SSE_REG; 5918 else 5919 { 5920 rtx t1, t2; 5921 5922 /* Unnamed floating parameters are passed in both the 5923 SSE and integer registers. */ 5924 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG); 5925 t2 = gen_rtx_REG (mode, regno); 5926 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx); 5927 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx); 5928 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2)); 5929 } 5930 } 5931 /* Handle aggregated types passed in register. */ 5932 if (orig_mode == BLKmode) 5933 { 5934 if (bytes > 0 && bytes <= 8) 5935 mode = (bytes > 4 ? DImode : SImode); 5936 if (mode == BLKmode) 5937 mode = DImode; 5938 } 5939 5940 return gen_reg_or_parallel (mode, orig_mode, regno); 5941 } 5942 5943 rtx 5944 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode, 5945 tree type, int named) 5946 { 5947 enum machine_mode mode = omode; 5948 HOST_WIDE_INT bytes, words; 5949 5950 if (mode == BLKmode) 5951 bytes = int_size_in_bytes (type); 5952 else 5953 bytes = GET_MODE_SIZE (mode); 5954 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 5955 5956 /* To simplify the code below, represent vector types with a vector mode 5957 even if MMX/SSE are not active. */ 5958 if (type && TREE_CODE (type) == VECTOR_TYPE) 5959 mode = type_natural_mode (type, cum); 5960 5961 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI) 5962 return function_arg_ms_64 (cum, mode, omode, named, bytes); 5963 else if (TARGET_64BIT) 5964 return function_arg_64 (cum, mode, omode, type, named); 5965 else 5966 return function_arg_32 (cum, mode, omode, type, bytes, words); 5967 } 5968 5969 /* A C expression that indicates when an argument must be passed by 5970 reference. If nonzero for an argument, a copy of that argument is 5971 made in memory and a pointer to the argument is passed instead of 5972 the argument itself. The pointer is passed in whatever way is 5973 appropriate for passing a pointer to that type. */ 5974 5975 static bool 5976 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED, 5977 enum machine_mode mode ATTRIBUTE_UNUSED, 5978 const_tree type, bool named ATTRIBUTE_UNUSED) 5979 { 5980 /* See Windows x64 Software Convention. */ 5981 if (TARGET_64BIT && (cum ? cum->call_abi : DEFAULT_ABI) == MS_ABI) 5982 { 5983 int msize = (int) GET_MODE_SIZE (mode); 5984 if (type) 5985 { 5986 /* Arrays are passed by reference. */ 5987 if (TREE_CODE (type) == ARRAY_TYPE) 5988 return true; 5989 5990 if (AGGREGATE_TYPE_P (type)) 5991 { 5992 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits 5993 are passed by reference. */ 5994 msize = int_size_in_bytes (type); 5995 } 5996 } 5997 5998 /* __m128 is passed by reference. */ 5999 switch (msize) { 6000 case 1: case 2: case 4: case 8: 6001 break; 6002 default: 6003 return true; 6004 } 6005 } 6006 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1) 6007 return 1; 6008 6009 return 0; 6010 } 6011 6012 /* Return true when TYPE should be 128bit aligned for 32bit argument passing 6013 ABI. */ 6014 static bool 6015 contains_aligned_value_p (tree type) 6016 { 6017 enum machine_mode mode = TYPE_MODE (type); 6018 if (((TARGET_SSE && SSE_REG_MODE_P (mode)) 6019 || mode == TDmode 6020 || mode == TFmode 6021 || mode == TCmode) 6022 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128)) 6023 return true; 6024 if (TYPE_ALIGN (type) < 128) 6025 return false; 6026 6027 if (AGGREGATE_TYPE_P (type)) 6028 { 6029 /* Walk the aggregates recursively. */ 6030 switch (TREE_CODE (type)) 6031 { 6032 case RECORD_TYPE: 6033 case UNION_TYPE: 6034 case QUAL_UNION_TYPE: 6035 { 6036 tree field; 6037 6038 /* Walk all the structure fields. */ 6039 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field)) 6040 { 6041 if (TREE_CODE (field) == FIELD_DECL 6042 && contains_aligned_value_p (TREE_TYPE (field))) 6043 return true; 6044 } 6045 break; 6046 } 6047 6048 case ARRAY_TYPE: 6049 /* Just for use if some languages passes arrays by value. */ 6050 if (contains_aligned_value_p (TREE_TYPE (type))) 6051 return true; 6052 break; 6053 6054 default: 6055 gcc_unreachable (); 6056 } 6057 } 6058 return false; 6059 } 6060 6061 /* Gives the alignment boundary, in bits, of an argument with the 6062 specified mode and type. */ 6063 6064 int 6065 ix86_function_arg_boundary (enum machine_mode mode, tree type) 6066 { 6067 int align; 6068 if (type) 6069 { 6070 /* Since canonical type is used for call, we convert it to 6071 canonical type if needed. */ 6072 if (!TYPE_STRUCTURAL_EQUALITY_P (type)) 6073 type = TYPE_CANONICAL (type); 6074 align = TYPE_ALIGN (type); 6075 } 6076 else 6077 align = GET_MODE_ALIGNMENT (mode); 6078 if (align < PARM_BOUNDARY) 6079 align = PARM_BOUNDARY; 6080 /* In 32bit, only _Decimal128 and __float128 are aligned to their 6081 natural boundaries. */ 6082 if (!TARGET_64BIT && mode != TDmode && mode != TFmode) 6083 { 6084 /* i386 ABI defines all arguments to be 4 byte aligned. We have to 6085 make an exception for SSE modes since these require 128bit 6086 alignment. 6087 6088 The handling here differs from field_alignment. ICC aligns MMX 6089 arguments to 4 byte boundaries, while structure fields are aligned 6090 to 8 byte boundaries. */ 6091 if (!type) 6092 { 6093 if (!(TARGET_SSE && SSE_REG_MODE_P (mode))) 6094 align = PARM_BOUNDARY; 6095 } 6096 else 6097 { 6098 if (!contains_aligned_value_p (type)) 6099 align = PARM_BOUNDARY; 6100 } 6101 } 6102 if (align > BIGGEST_ALIGNMENT) 6103 align = BIGGEST_ALIGNMENT; 6104 return align; 6105 } 6106 6107 /* Return true if N is a possible register number of function value. */ 6108 6109 bool 6110 ix86_function_value_regno_p (int regno) 6111 { 6112 switch (regno) 6113 { 6114 case 0: 6115 return true; 6116 6117 case FIRST_FLOAT_REG: 6118 /* TODO: The function should depend on current function ABI but 6119 builtins.c would need updating then. Therefore we use the 6120 default ABI. */ 6121 if (TARGET_64BIT && DEFAULT_ABI == MS_ABI) 6122 return false; 6123 return TARGET_FLOAT_RETURNS_IN_80387; 6124 6125 case FIRST_SSE_REG: 6126 return TARGET_SSE; 6127 6128 case FIRST_MMX_REG: 6129 if (TARGET_MACHO || TARGET_64BIT) 6130 return false; 6131 return TARGET_MMX; 6132 } 6133 6134 return false; 6135 } 6136 6137 /* Define how to find the value returned by a function. 6138 VALTYPE is the data type of the value (as a tree). 6139 If the precise function being called is known, FUNC is its FUNCTION_DECL; 6140 otherwise, FUNC is 0. */ 6141 6142 static rtx 6143 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode, 6144 const_tree fntype, const_tree fn) 6145 { 6146 unsigned int regno; 6147 6148 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where 6149 we normally prevent this case when mmx is not available. However 6150 some ABIs may require the result to be returned like DImode. */ 6151 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) 6152 regno = TARGET_MMX ? FIRST_MMX_REG : 0; 6153 6154 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where 6155 we prevent this case when sse is not available. However some ABIs 6156 may require the result to be returned like integer TImode. */ 6157 else if (mode == TImode 6158 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) 6159 regno = TARGET_SSE ? FIRST_SSE_REG : 0; 6160 6161 /* 32-byte vector modes in %ymm0. */ 6162 else if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 32) 6163 regno = TARGET_AVX ? FIRST_SSE_REG : 0; 6164 6165 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */ 6166 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387) 6167 regno = FIRST_FLOAT_REG; 6168 else 6169 /* Most things go in %eax. */ 6170 regno = AX_REG; 6171 6172 /* Override FP return register with %xmm0 for local functions when 6173 SSE math is enabled or for functions with sseregparm attribute. */ 6174 if ((fn || fntype) && (mode == SFmode || mode == DFmode)) 6175 { 6176 int sse_level = ix86_function_sseregparm (fntype, fn, false); 6177 if ((sse_level >= 1 && mode == SFmode) 6178 || (sse_level == 2 && mode == DFmode)) 6179 regno = FIRST_SSE_REG; 6180 } 6181 6182 /* OImode shouldn't be used directly. */ 6183 gcc_assert (mode != OImode); 6184 6185 return gen_rtx_REG (orig_mode, regno); 6186 } 6187 6188 static rtx 6189 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode, 6190 const_tree valtype) 6191 { 6192 rtx ret; 6193 6194 /* Handle libcalls, which don't provide a type node. */ 6195 if (valtype == NULL) 6196 { 6197 switch (mode) 6198 { 6199 case SFmode: 6200 case SCmode: 6201 case DFmode: 6202 case DCmode: 6203 case TFmode: 6204 case SDmode: 6205 case DDmode: 6206 case TDmode: 6207 return gen_rtx_REG (mode, FIRST_SSE_REG); 6208 case XFmode: 6209 case XCmode: 6210 return gen_rtx_REG (mode, FIRST_FLOAT_REG); 6211 case TCmode: 6212 return NULL; 6213 default: 6214 return gen_rtx_REG (mode, AX_REG); 6215 } 6216 } 6217 6218 ret = construct_container (mode, orig_mode, valtype, 1, 6219 X86_64_REGPARM_MAX, X86_64_SSE_REGPARM_MAX, 6220 x86_64_int_return_registers, 0); 6221 6222 /* For zero sized structures, construct_container returns NULL, but we 6223 need to keep rest of compiler happy by returning meaningful value. */ 6224 if (!ret) 6225 ret = gen_rtx_REG (orig_mode, AX_REG); 6226 6227 return ret; 6228 } 6229 6230 static rtx 6231 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode) 6232 { 6233 unsigned int regno = AX_REG; 6234 6235 if (TARGET_SSE) 6236 { 6237 switch (GET_MODE_SIZE (mode)) 6238 { 6239 case 16: 6240 if((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) 6241 && !COMPLEX_MODE_P (mode)) 6242 regno = FIRST_SSE_REG; 6243 break; 6244 case 8: 6245 case 4: 6246 if (mode == SFmode || mode == DFmode) 6247 regno = FIRST_SSE_REG; 6248 break; 6249 default: 6250 break; 6251 } 6252 } 6253 return gen_rtx_REG (orig_mode, regno); 6254 } 6255 6256 static rtx 6257 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl, 6258 enum machine_mode orig_mode, enum machine_mode mode) 6259 { 6260 const_tree fn, fntype; 6261 6262 fn = NULL_TREE; 6263 if (fntype_or_decl && DECL_P (fntype_or_decl)) 6264 fn = fntype_or_decl; 6265 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl; 6266 6267 if (TARGET_64BIT && ix86_function_type_abi (fntype) == MS_ABI) 6268 return function_value_ms_64 (orig_mode, mode); 6269 else if (TARGET_64BIT) 6270 return function_value_64 (orig_mode, mode, valtype); 6271 else 6272 return function_value_32 (orig_mode, mode, fntype, fn); 6273 } 6274 6275 static rtx 6276 ix86_function_value (const_tree valtype, const_tree fntype_or_decl, 6277 bool outgoing ATTRIBUTE_UNUSED) 6278 { 6279 enum machine_mode mode, orig_mode; 6280 6281 orig_mode = TYPE_MODE (valtype); 6282 mode = type_natural_mode (valtype, NULL); 6283 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode); 6284 } 6285 6286 rtx 6287 ix86_libcall_value (enum machine_mode mode) 6288 { 6289 return ix86_function_value_1 (NULL, NULL, mode, mode); 6290 } 6291 6292 /* Return true iff type is returned in memory. */ 6293 6294 static int ATTRIBUTE_UNUSED 6295 return_in_memory_32 (const_tree type, enum machine_mode mode) 6296 { 6297 HOST_WIDE_INT size; 6298 6299 if (mode == BLKmode) 6300 return 1; 6301 6302 size = int_size_in_bytes (type); 6303 6304 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8) 6305 return 0; 6306 6307 if (VECTOR_MODE_P (mode) || mode == TImode) 6308 { 6309 /* User-created vectors small enough to fit in EAX. */ 6310 if (size < 8) 6311 return 0; 6312 6313 /* MMX/3dNow values are returned in MM0, 6314 except when it doesn't exits. */ 6315 if (size == 8) 6316 return (TARGET_MMX ? 0 : 1); 6317 6318 /* SSE values are returned in XMM0, except when it doesn't exist. */ 6319 if (size == 16) 6320 return (TARGET_SSE ? 0 : 1); 6321 6322 /* AVX values are returned in YMM0, except when it doesn't exist. */ 6323 if (size == 32) 6324 return TARGET_AVX ? 0 : 1; 6325 } 6326 6327 if (mode == XFmode) 6328 return 0; 6329 6330 if (size > 12) 6331 return 1; 6332 6333 /* OImode shouldn't be used directly. */ 6334 gcc_assert (mode != OImode); 6335 6336 return 0; 6337 } 6338 6339 static int ATTRIBUTE_UNUSED 6340 return_in_memory_64 (const_tree type, enum machine_mode mode) 6341 { 6342 int needed_intregs, needed_sseregs; 6343 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs); 6344 } 6345 6346 static int ATTRIBUTE_UNUSED 6347 return_in_memory_ms_64 (const_tree type, enum machine_mode mode) 6348 { 6349 HOST_WIDE_INT size = int_size_in_bytes (type); 6350 6351 /* __m128 is returned in xmm0. */ 6352 if ((SCALAR_INT_MODE_P (mode) || VECTOR_MODE_P (mode)) 6353 && !COMPLEX_MODE_P (mode) && (GET_MODE_SIZE (mode) == 16 || size == 16)) 6354 return 0; 6355 6356 /* Otherwise, the size must be exactly in [1248]. */ 6357 return (size != 1 && size != 2 && size != 4 && size != 8); 6358 } 6359 6360 static bool 6361 ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) 6362 { 6363 #ifdef SUBTARGET_RETURN_IN_MEMORY 6364 return SUBTARGET_RETURN_IN_MEMORY (type, fntype); 6365 #else 6366 const enum machine_mode mode = type_natural_mode (type, NULL); 6367 6368 if (TARGET_64BIT) 6369 { 6370 if (ix86_function_type_abi (fntype) == MS_ABI) 6371 return return_in_memory_ms_64 (type, mode); 6372 else 6373 return return_in_memory_64 (type, mode); 6374 } 6375 else 6376 return return_in_memory_32 (type, mode); 6377 #endif 6378 } 6379 6380 /* Return false iff TYPE is returned in memory. This version is used 6381 on Solaris 10. It is similar to the generic ix86_return_in_memory, 6382 but differs notably in that when MMX is available, 8-byte vectors 6383 are returned in memory, rather than in MMX registers. */ 6384 6385 bool 6386 ix86_sol10_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED) 6387 { 6388 int size; 6389 enum machine_mode mode = type_natural_mode (type, NULL); 6390 6391 if (TARGET_64BIT) 6392 return return_in_memory_64 (type, mode); 6393 6394 if (mode == BLKmode) 6395 return 1; 6396 6397 size = int_size_in_bytes (type); 6398 6399 if (VECTOR_MODE_P (mode)) 6400 { 6401 /* Return in memory only if MMX registers *are* available. This 6402 seems backwards, but it is consistent with the existing 6403 Solaris x86 ABI. */ 6404 if (size == 8) 6405 return TARGET_MMX; 6406 if (size == 16) 6407 return !TARGET_SSE; 6408 } 6409 else if (mode == TImode) 6410 return !TARGET_SSE; 6411 else if (mode == XFmode) 6412 return 0; 6413 6414 return size > 12; 6415 } 6416 6417 /* When returning SSE vector types, we have a choice of either 6418 (1) being abi incompatible with a -march switch, or 6419 (2) generating an error. 6420 Given no good solution, I think the safest thing is one warning. 6421 The user won't be able to use -Werror, but.... 6422 6423 Choose the STRUCT_VALUE_RTX hook because that's (at present) only 6424 called in response to actually generating a caller or callee that 6425 uses such a type. As opposed to TARGET_RETURN_IN_MEMORY, which is called 6426 via aggregate_value_p for general type probing from tree-ssa. */ 6427 6428 static rtx 6429 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED) 6430 { 6431 static bool warnedsse, warnedmmx; 6432 6433 if (!TARGET_64BIT && type) 6434 { 6435 /* Look at the return type of the function, not the function type. */ 6436 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type)); 6437 6438 if (!TARGET_SSE && !warnedsse) 6439 { 6440 if (mode == TImode 6441 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16)) 6442 { 6443 warnedsse = true; 6444 warning (0, "SSE vector return without SSE enabled " 6445 "changes the ABI"); 6446 } 6447 } 6448 6449 if (!TARGET_MMX && !warnedmmx) 6450 { 6451 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8) 6452 { 6453 warnedmmx = true; 6454 warning (0, "MMX vector return without MMX enabled " 6455 "changes the ABI"); 6456 } 6457 } 6458 } 6459 6460 return NULL; 6461 } 6462 6463 6464 /* Create the va_list data type. */ 6465 6466 /* Returns the calling convention specific va_list date type. 6467 The argument ABI can be DEFAULT_ABI, MS_ABI, or SYSV_ABI. */ 6468 6469 static tree 6470 ix86_build_builtin_va_list_abi (enum calling_abi abi) 6471 { 6472 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl; 6473 6474 /* For i386 we use plain pointer to argument area. */ 6475 if (!TARGET_64BIT || abi == MS_ABI) 6476 return build_pointer_type (char_type_node); 6477 6478 record = (*lang_hooks.types.make_type) (RECORD_TYPE); 6479 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record); 6480 6481 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"), 6482 unsigned_type_node); 6483 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"), 6484 unsigned_type_node); 6485 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"), 6486 ptr_type_node); 6487 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"), 6488 ptr_type_node); 6489 6490 va_list_gpr_counter_field = f_gpr; 6491 va_list_fpr_counter_field = f_fpr; 6492 6493 DECL_FIELD_CONTEXT (f_gpr) = record; 6494 DECL_FIELD_CONTEXT (f_fpr) = record; 6495 DECL_FIELD_CONTEXT (f_ovf) = record; 6496 DECL_FIELD_CONTEXT (f_sav) = record; 6497 6498 TREE_CHAIN (record) = type_decl; 6499 TYPE_NAME (record) = type_decl; 6500 TYPE_FIELDS (record) = f_gpr; 6501 TREE_CHAIN (f_gpr) = f_fpr; 6502 TREE_CHAIN (f_fpr) = f_ovf; 6503 TREE_CHAIN (f_ovf) = f_sav; 6504 6505 layout_type (record); 6506 6507 /* The correct type is an array type of one element. */ 6508 return build_array_type (record, build_index_type (size_zero_node)); 6509 } 6510 6511 /* Setup the builtin va_list data type and for 64-bit the additional 6512 calling convention specific va_list data types. */ 6513 6514 static tree 6515 ix86_build_builtin_va_list (void) 6516 { 6517 tree ret = ix86_build_builtin_va_list_abi (DEFAULT_ABI); 6518 6519 /* Initialize abi specific va_list builtin types. */ 6520 if (TARGET_64BIT) 6521 { 6522 tree t; 6523 if (DEFAULT_ABI == MS_ABI) 6524 { 6525 t = ix86_build_builtin_va_list_abi (SYSV_ABI); 6526 if (TREE_CODE (t) != RECORD_TYPE) 6527 t = build_variant_type_copy (t); 6528 sysv_va_list_type_node = t; 6529 } 6530 else 6531 { 6532 t = ret; 6533 if (TREE_CODE (t) != RECORD_TYPE) 6534 t = build_variant_type_copy (t); 6535 sysv_va_list_type_node = t; 6536 } 6537 if (DEFAULT_ABI != MS_ABI) 6538 { 6539 t = ix86_build_builtin_va_list_abi (MS_ABI); 6540 if (TREE_CODE (t) != RECORD_TYPE) 6541 t = build_variant_type_copy (t); 6542 ms_va_list_type_node = t; 6543 } 6544 else 6545 { 6546 t = ret; 6547 if (TREE_CODE (t) != RECORD_TYPE) 6548 t = build_variant_type_copy (t); 6549 ms_va_list_type_node = t; 6550 } 6551 } 6552 6553 return ret; 6554 } 6555 6556 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */ 6557 6558 static void 6559 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum) 6560 { 6561 rtx save_area, mem; 6562 rtx label; 6563 rtx label_ref; 6564 rtx tmp_reg; 6565 rtx nsse_reg; 6566 alias_set_type set; 6567 int i; 6568 int regparm = ix86_regparm; 6569 6570 if (cum->call_abi != DEFAULT_ABI) 6571 regparm = DEFAULT_ABI != SYSV_ABI ? X86_64_REGPARM_MAX : X64_REGPARM_MAX; 6572 6573 /* GPR size of varargs save area. */ 6574 if (cfun->va_list_gpr_size) 6575 ix86_varargs_gpr_size = X86_64_REGPARM_MAX * UNITS_PER_WORD; 6576 else 6577 ix86_varargs_gpr_size = 0; 6578 6579 /* FPR size of varargs save area. We don't need it if we don't pass 6580 anything in SSE registers. */ 6581 if (cum->sse_nregs && cfun->va_list_fpr_size) 6582 ix86_varargs_fpr_size = X86_64_SSE_REGPARM_MAX * 16; 6583 else 6584 ix86_varargs_fpr_size = 0; 6585 6586 if (! ix86_varargs_gpr_size && ! ix86_varargs_fpr_size) 6587 return; 6588 6589 save_area = frame_pointer_rtx; 6590 set = get_varargs_alias_set (); 6591 6592 for (i = cum->regno; 6593 i < regparm 6594 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD; 6595 i++) 6596 { 6597 mem = gen_rtx_MEM (Pmode, 6598 plus_constant (save_area, i * UNITS_PER_WORD)); 6599 MEM_NOTRAP_P (mem) = 1; 6600 set_mem_alias_set (mem, set); 6601 emit_move_insn (mem, gen_rtx_REG (Pmode, 6602 x86_64_int_parameter_registers[i])); 6603 } 6604 6605 if (ix86_varargs_fpr_size) 6606 { 6607 /* Stack must be aligned to 16byte for FP register save area. */ 6608 if (crtl->stack_alignment_needed < 128) 6609 crtl->stack_alignment_needed = 128; 6610 6611 /* Now emit code to save SSE registers. The AX parameter contains number 6612 of SSE parameter registers used to call this function. We use 6613 sse_prologue_save insn template that produces computed jump across 6614 SSE saves. We need some preparation work to get this working. */ 6615 6616 label = gen_label_rtx (); 6617 label_ref = gen_rtx_LABEL_REF (Pmode, label); 6618 6619 /* Compute address to jump to : 6620 label - eax*4 + nnamed_sse_arguments*4 Or 6621 label - eax*5 + nnamed_sse_arguments*5 for AVX. */ 6622 tmp_reg = gen_reg_rtx (Pmode); 6623 nsse_reg = gen_reg_rtx (Pmode); 6624 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, AX_REG))); 6625 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 6626 gen_rtx_MULT (Pmode, nsse_reg, 6627 GEN_INT (4)))); 6628 6629 /* vmovaps is one byte longer than movaps. */ 6630 if (TARGET_AVX) 6631 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 6632 gen_rtx_PLUS (Pmode, tmp_reg, 6633 nsse_reg))); 6634 6635 if (cum->sse_regno) 6636 emit_move_insn 6637 (nsse_reg, 6638 gen_rtx_CONST (DImode, 6639 gen_rtx_PLUS (DImode, 6640 label_ref, 6641 GEN_INT (cum->sse_regno 6642 * (TARGET_AVX ? 5 : 4))))); 6643 else 6644 emit_move_insn (nsse_reg, label_ref); 6645 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg)); 6646 6647 /* Compute address of memory block we save into. We always use pointer 6648 pointing 127 bytes after first byte to store - this is needed to keep 6649 instruction size limited by 4 bytes (5 bytes for AVX) with one 6650 byte displacement. */ 6651 tmp_reg = gen_reg_rtx (Pmode); 6652 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg, 6653 plus_constant (save_area, 6654 ix86_varargs_gpr_size + 127))); 6655 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127)); 6656 MEM_NOTRAP_P (mem) = 1; 6657 set_mem_alias_set (mem, set); 6658 set_mem_align (mem, BITS_PER_WORD); 6659 6660 /* And finally do the dirty job! */ 6661 emit_insn (gen_sse_prologue_save (mem, nsse_reg, 6662 GEN_INT (cum->sse_regno), label)); 6663 } 6664 } 6665 6666 static void 6667 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum) 6668 { 6669 alias_set_type set = get_varargs_alias_set (); 6670 int i; 6671 6672 for (i = cum->regno; i < X64_REGPARM_MAX; i++) 6673 { 6674 rtx reg, mem; 6675 6676 mem = gen_rtx_MEM (Pmode, 6677 plus_constant (virtual_incoming_args_rtx, 6678 i * UNITS_PER_WORD)); 6679 MEM_NOTRAP_P (mem) = 1; 6680 set_mem_alias_set (mem, set); 6681 6682 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]); 6683 emit_move_insn (mem, reg); 6684 } 6685 } 6686 6687 static void 6688 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode, 6689 tree type, int *pretend_size ATTRIBUTE_UNUSED, 6690 int no_rtl) 6691 { 6692 CUMULATIVE_ARGS next_cum; 6693 tree fntype; 6694 6695 /* This argument doesn't appear to be used anymore. Which is good, 6696 because the old code here didn't suppress rtl generation. */ 6697 gcc_assert (!no_rtl); 6698 6699 if (!TARGET_64BIT) 6700 return; 6701 6702 fntype = TREE_TYPE (current_function_decl); 6703 6704 /* For varargs, we do not want to skip the dummy va_dcl argument. 6705 For stdargs, we do want to skip the last named argument. */ 6706 next_cum = *cum; 6707 if (stdarg_p (fntype)) 6708 function_arg_advance (&next_cum, mode, type, 1); 6709 6710 if (cum->call_abi == MS_ABI) 6711 setup_incoming_varargs_ms_64 (&next_cum); 6712 else 6713 setup_incoming_varargs_64 (&next_cum); 6714 } 6715 6716 /* Checks if TYPE is of kind va_list char *. */ 6717 6718 static bool 6719 is_va_list_char_pointer (tree type) 6720 { 6721 tree canonic; 6722 6723 /* For 32-bit it is always true. */ 6724 if (!TARGET_64BIT) 6725 return true; 6726 canonic = ix86_canonical_va_list_type (type); 6727 return (canonic == ms_va_list_type_node 6728 || (DEFAULT_ABI == MS_ABI && canonic == va_list_type_node)); 6729 } 6730 6731 /* Implement va_start. */ 6732 6733 static void 6734 ix86_va_start (tree valist, rtx nextarg) 6735 { 6736 HOST_WIDE_INT words, n_gpr, n_fpr; 6737 tree f_gpr, f_fpr, f_ovf, f_sav; 6738 tree gpr, fpr, ovf, sav, t; 6739 tree type; 6740 6741 /* Only 64bit target needs something special. */ 6742 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist))) 6743 { 6744 std_expand_builtin_va_start (valist, nextarg); 6745 return; 6746 } 6747 6748 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); 6749 f_fpr = TREE_CHAIN (f_gpr); 6750 f_ovf = TREE_CHAIN (f_fpr); 6751 f_sav = TREE_CHAIN (f_ovf); 6752 6753 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist); 6754 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE); 6755 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); 6756 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); 6757 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); 6758 6759 /* Count number of gp and fp argument registers used. */ 6760 words = crtl->args.info.words; 6761 n_gpr = crtl->args.info.regno; 6762 n_fpr = crtl->args.info.sse_regno; 6763 6764 if (cfun->va_list_gpr_size) 6765 { 6766 type = TREE_TYPE (gpr); 6767 t = build2 (MODIFY_EXPR, type, 6768 gpr, build_int_cst (type, n_gpr * 8)); 6769 TREE_SIDE_EFFECTS (t) = 1; 6770 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 6771 } 6772 6773 if (TARGET_SSE && cfun->va_list_fpr_size) 6774 { 6775 type = TREE_TYPE (fpr); 6776 t = build2 (MODIFY_EXPR, type, fpr, 6777 build_int_cst (type, n_fpr * 16 + 8*X86_64_REGPARM_MAX)); 6778 TREE_SIDE_EFFECTS (t) = 1; 6779 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 6780 } 6781 6782 /* Find the overflow area. */ 6783 type = TREE_TYPE (ovf); 6784 t = make_tree (type, crtl->args.internal_arg_pointer); 6785 if (words != 0) 6786 t = build2 (POINTER_PLUS_EXPR, type, t, 6787 size_int (words * UNITS_PER_WORD)); 6788 t = build2 (MODIFY_EXPR, type, ovf, t); 6789 TREE_SIDE_EFFECTS (t) = 1; 6790 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 6791 6792 if (ix86_varargs_gpr_size || ix86_varargs_fpr_size) 6793 { 6794 /* Find the register save area. 6795 Prologue of the function save it right above stack frame. */ 6796 type = TREE_TYPE (sav); 6797 t = make_tree (type, frame_pointer_rtx); 6798 if (!ix86_varargs_gpr_size) 6799 t = build2 (POINTER_PLUS_EXPR, type, t, 6800 size_int (-8 * X86_64_REGPARM_MAX)); 6801 t = build2 (MODIFY_EXPR, type, sav, t); 6802 TREE_SIDE_EFFECTS (t) = 1; 6803 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); 6804 } 6805 } 6806 6807 /* Implement va_arg. */ 6808 6809 static tree 6810 ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p, 6811 gimple_seq *post_p) 6812 { 6813 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 }; 6814 tree f_gpr, f_fpr, f_ovf, f_sav; 6815 tree gpr, fpr, ovf, sav, t; 6816 int size, rsize; 6817 tree lab_false, lab_over = NULL_TREE; 6818 tree addr, t2; 6819 rtx container; 6820 int indirect_p = 0; 6821 tree ptrtype; 6822 enum machine_mode nat_mode; 6823 int arg_boundary; 6824 6825 /* Only 64bit target needs something special. */ 6826 if (!TARGET_64BIT || is_va_list_char_pointer (TREE_TYPE (valist))) 6827 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p); 6828 6829 f_gpr = TYPE_FIELDS (TREE_TYPE (sysv_va_list_type_node)); 6830 f_fpr = TREE_CHAIN (f_gpr); 6831 f_ovf = TREE_CHAIN (f_fpr); 6832 f_sav = TREE_CHAIN (f_ovf); 6833 6834 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), 6835 build_va_arg_indirect_ref (valist), f_gpr, NULL_TREE); 6836 valist = build_va_arg_indirect_ref (valist); 6837 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE); 6838 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE); 6839 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE); 6840 6841 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false); 6842 if (indirect_p) 6843 type = build_pointer_type (type); 6844 size = int_size_in_bytes (type); 6845 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD; 6846 6847 nat_mode = type_natural_mode (type, NULL); 6848 switch (nat_mode) 6849 { 6850 case V8SFmode: 6851 case V8SImode: 6852 case V32QImode: 6853 case V16HImode: 6854 case V4DFmode: 6855 case V4DImode: 6856 /* Unnamed 256bit vector mode parameters are passed on stack. */ 6857 if (ix86_cfun_abi () == SYSV_ABI) 6858 { 6859 container = NULL; 6860 break; 6861 } 6862 6863 default: 6864 container = construct_container (nat_mode, TYPE_MODE (type), 6865 type, 0, X86_64_REGPARM_MAX, 6866 X86_64_SSE_REGPARM_MAX, intreg, 6867 0); 6868 break; 6869 } 6870 6871 /* Pull the value out of the saved registers. */ 6872 6873 addr = create_tmp_var (ptr_type_node, "addr"); 6874 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set (); 6875 6876 if (container) 6877 { 6878 int needed_intregs, needed_sseregs; 6879 bool need_temp; 6880 tree int_addr, sse_addr; 6881 6882 lab_false = create_artificial_label (); 6883 lab_over = create_artificial_label (); 6884 6885 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs); 6886 6887 need_temp = (!REG_P (container) 6888 && ((needed_intregs && TYPE_ALIGN (type) > 64) 6889 || TYPE_ALIGN (type) > 128)); 6890 6891 /* In case we are passing structure, verify that it is consecutive block 6892 on the register save area. If not we need to do moves. */ 6893 if (!need_temp && !REG_P (container)) 6894 { 6895 /* Verify that all registers are strictly consecutive */ 6896 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0)))) 6897 { 6898 int i; 6899 6900 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 6901 { 6902 rtx slot = XVECEXP (container, 0, i); 6903 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i 6904 || INTVAL (XEXP (slot, 1)) != i * 16) 6905 need_temp = 1; 6906 } 6907 } 6908 else 6909 { 6910 int i; 6911 6912 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++) 6913 { 6914 rtx slot = XVECEXP (container, 0, i); 6915 if (REGNO (XEXP (slot, 0)) != (unsigned int) i 6916 || INTVAL (XEXP (slot, 1)) != i * 8) 6917 need_temp = 1; 6918 } 6919 } 6920 } 6921 if (!need_temp) 6922 { 6923 int_addr = addr; 6924 sse_addr = addr; 6925 } 6926 else 6927 { 6928 int_addr = create_tmp_var (ptr_type_node, "int_addr"); 6929 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set (); 6930 sse_addr = create_tmp_var (ptr_type_node, "sse_addr"); 6931 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set (); 6932 } 6933 6934 /* First ensure that we fit completely in registers. */ 6935 if (needed_intregs) 6936 { 6937 t = build_int_cst (TREE_TYPE (gpr), 6938 (X86_64_REGPARM_MAX - needed_intregs + 1) * 8); 6939 t = build2 (GE_EXPR, boolean_type_node, gpr, t); 6940 t2 = build1 (GOTO_EXPR, void_type_node, lab_false); 6941 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); 6942 gimplify_and_add (t, pre_p); 6943 } 6944 if (needed_sseregs) 6945 { 6946 t = build_int_cst (TREE_TYPE (fpr), 6947 (X86_64_SSE_REGPARM_MAX - needed_sseregs + 1) * 16 6948 + X86_64_REGPARM_MAX * 8); 6949 t = build2 (GE_EXPR, boolean_type_node, fpr, t); 6950 t2 = build1 (GOTO_EXPR, void_type_node, lab_false); 6951 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE); 6952 gimplify_and_add (t, pre_p); 6953 } 6954 6955 /* Compute index to start of area used for integer regs. */ 6956 if (needed_intregs) 6957 { 6958 /* int_addr = gpr + sav; */ 6959 t = fold_convert (sizetype, gpr); 6960 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t); 6961 gimplify_assign (int_addr, t, pre_p); 6962 } 6963 if (needed_sseregs) 6964 { 6965 /* sse_addr = fpr + sav; */ 6966 t = fold_convert (sizetype, fpr); 6967 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t); 6968 gimplify_assign (sse_addr, t, pre_p); 6969 } 6970 if (need_temp) 6971 { 6972 int i; 6973 tree temp = create_tmp_var (type, "va_arg_tmp"); 6974 6975 /* addr = &temp; */ 6976 t = build1 (ADDR_EXPR, build_pointer_type (type), temp); 6977 gimplify_assign (addr, t, pre_p); 6978 6979 for (i = 0; i < XVECLEN (container, 0); i++) 6980 { 6981 rtx slot = XVECEXP (container, 0, i); 6982 rtx reg = XEXP (slot, 0); 6983 enum machine_mode mode = GET_MODE (reg); 6984 tree piece_type = lang_hooks.types.type_for_mode (mode, 1); 6985 tree addr_type = build_pointer_type (piece_type); 6986 tree daddr_type = build_pointer_type_for_mode (piece_type, 6987 ptr_mode, true); 6988 tree src_addr, src; 6989 int src_offset; 6990 tree dest_addr, dest; 6991 6992 if (SSE_REGNO_P (REGNO (reg))) 6993 { 6994 src_addr = sse_addr; 6995 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16; 6996 } 6997 else 6998 { 6999 src_addr = int_addr; 7000 src_offset = REGNO (reg) * 8; 7001 } 7002 src_addr = fold_convert (addr_type, src_addr); 7003 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr, 7004 size_int (src_offset)); 7005 src = build_va_arg_indirect_ref (src_addr); 7006 7007 dest_addr = fold_convert (daddr_type, addr); 7008 dest_addr = fold_build2 (POINTER_PLUS_EXPR, daddr_type, dest_addr, 7009 size_int (INTVAL (XEXP (slot, 1)))); 7010 dest = build_va_arg_indirect_ref (dest_addr); 7011 7012 gimplify_assign (dest, src, pre_p); 7013 } 7014 } 7015 7016 if (needed_intregs) 7017 { 7018 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr, 7019 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8)); 7020 gimplify_assign (gpr, t, pre_p); 7021 } 7022 7023 if (needed_sseregs) 7024 { 7025 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr, 7026 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16)); 7027 gimplify_assign (fpr, t, pre_p); 7028 } 7029 7030 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over)); 7031 7032 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false)); 7033 } 7034 7035 /* ... otherwise out of the overflow area. */ 7036 7037 /* When we align parameter on stack for caller, if the parameter 7038 alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be 7039 aligned at MAX_SUPPORTED_STACK_ALIGNMENT. We will match callee 7040 here with caller. */ 7041 arg_boundary = FUNCTION_ARG_BOUNDARY (VOIDmode, type); 7042 if ((unsigned int) arg_boundary > MAX_SUPPORTED_STACK_ALIGNMENT) 7043 arg_boundary = MAX_SUPPORTED_STACK_ALIGNMENT; 7044 7045 /* Care for on-stack alignment if needed. */ 7046 if (arg_boundary <= 64 7047 || integer_zerop (TYPE_SIZE (type))) 7048 t = ovf; 7049 else 7050 { 7051 HOST_WIDE_INT align = arg_boundary / 8; 7052 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf, 7053 size_int (align - 1)); 7054 t = fold_convert (sizetype, t); 7055 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t, 7056 size_int (-align)); 7057 t = fold_convert (TREE_TYPE (ovf), t); 7058 } 7059 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue); 7060 gimplify_assign (addr, t, pre_p); 7061 7062 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t, 7063 size_int (rsize * UNITS_PER_WORD)); 7064 gimplify_assign (unshare_expr (ovf), t, pre_p); 7065 7066 if (container) 7067 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over)); 7068 7069 ptrtype = build_pointer_type (type); 7070 addr = fold_convert (ptrtype, addr); 7071 7072 if (indirect_p) 7073 addr = build_va_arg_indirect_ref (addr); 7074 return build_va_arg_indirect_ref (addr); 7075 } 7076 7077 /* Return nonzero if OPNUM's MEM should be matched 7078 in movabs* patterns. */ 7079 7080 int 7081 ix86_check_movabs (rtx insn, int opnum) 7082 { 7083 rtx set, mem; 7084 7085 set = PATTERN (insn); 7086 if (GET_CODE (set) == PARALLEL) 7087 set = XVECEXP (set, 0, 0); 7088 gcc_assert (GET_CODE (set) == SET); 7089 mem = XEXP (set, opnum); 7090 while (GET_CODE (mem) == SUBREG) 7091 mem = SUBREG_REG (mem); 7092 gcc_assert (MEM_P (mem)); 7093 return (volatile_ok || !MEM_VOLATILE_P (mem)); 7094 } 7095 7096 /* Initialize the table of extra 80387 mathematical constants. */ 7097 7098 static void 7099 init_ext_80387_constants (void) 7100 { 7101 static const char * cst[5] = 7102 { 7103 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */ 7104 "0.6931471805599453094286904741849753009", /* 1: fldln2 */ 7105 "1.4426950408889634073876517827983434472", /* 2: fldl2e */ 7106 "3.3219280948873623478083405569094566090", /* 3: fldl2t */ 7107 "3.1415926535897932385128089594061862044", /* 4: fldpi */ 7108 }; 7109 int i; 7110 7111 for (i = 0; i < 5; i++) 7112 { 7113 real_from_string (&ext_80387_constants_table[i], cst[i]); 7114 /* Ensure each constant is rounded to XFmode precision. */ 7115 real_convert (&ext_80387_constants_table[i], 7116 XFmode, &ext_80387_constants_table[i]); 7117 } 7118 7119 ext_80387_constants_init = 1; 7120 } 7121 7122 /* Return true if the constant is something that can be loaded with 7123 a special instruction. */ 7124 7125 int 7126 standard_80387_constant_p (rtx x) 7127 { 7128 enum machine_mode mode = GET_MODE (x); 7129 7130 REAL_VALUE_TYPE r; 7131 7132 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE))) 7133 return -1; 7134 7135 if (x == CONST0_RTX (mode)) 7136 return 1; 7137 if (x == CONST1_RTX (mode)) 7138 return 2; 7139 7140 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 7141 7142 /* For XFmode constants, try to find a special 80387 instruction when 7143 optimizing for size or on those CPUs that benefit from them. */ 7144 if (mode == XFmode 7145 && (optimize_function_for_size_p (cfun) || TARGET_EXT_80387_CONSTANTS)) 7146 { 7147 int i; 7148 7149 if (! ext_80387_constants_init) 7150 init_ext_80387_constants (); 7151 7152 for (i = 0; i < 5; i++) 7153 if (real_identical (&r, &ext_80387_constants_table[i])) 7154 return i + 3; 7155 } 7156 7157 /* Load of the constant -0.0 or -1.0 will be split as 7158 fldz;fchs or fld1;fchs sequence. */ 7159 if (real_isnegzero (&r)) 7160 return 8; 7161 if (real_identical (&r, &dconstm1)) 7162 return 9; 7163 7164 return 0; 7165 } 7166 7167 /* Return the opcode of the special instruction to be used to load 7168 the constant X. */ 7169 7170 const char * 7171 standard_80387_constant_opcode (rtx x) 7172 { 7173 switch (standard_80387_constant_p (x)) 7174 { 7175 case 1: 7176 return "fldz"; 7177 case 2: 7178 return "fld1"; 7179 case 3: 7180 return "fldlg2"; 7181 case 4: 7182 return "fldln2"; 7183 case 5: 7184 return "fldl2e"; 7185 case 6: 7186 return "fldl2t"; 7187 case 7: 7188 return "fldpi"; 7189 case 8: 7190 case 9: 7191 return "#"; 7192 default: 7193 gcc_unreachable (); 7194 } 7195 } 7196 7197 /* Return the CONST_DOUBLE representing the 80387 constant that is 7198 loaded by the specified special instruction. The argument IDX 7199 matches the return value from standard_80387_constant_p. */ 7200 7201 rtx 7202 standard_80387_constant_rtx (int idx) 7203 { 7204 int i; 7205 7206 if (! ext_80387_constants_init) 7207 init_ext_80387_constants (); 7208 7209 switch (idx) 7210 { 7211 case 3: 7212 case 4: 7213 case 5: 7214 case 6: 7215 case 7: 7216 i = idx - 3; 7217 break; 7218 7219 default: 7220 gcc_unreachable (); 7221 } 7222 7223 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i], 7224 XFmode); 7225 } 7226 7227 /* Return 1 if mode is a valid mode for sse. */ 7228 static int 7229 standard_sse_mode_p (enum machine_mode mode) 7230 { 7231 switch (mode) 7232 { 7233 case V16QImode: 7234 case V8HImode: 7235 case V4SImode: 7236 case V2DImode: 7237 case V4SFmode: 7238 case V2DFmode: 7239 return 1; 7240 7241 default: 7242 return 0; 7243 } 7244 } 7245 7246 /* Return 1 if X is all 0s. For all 1s, return 2 if X is in 128bit 7247 SSE modes and SSE2 is enabled, return 3 if X is in 256bit AVX 7248 modes and AVX is enabled. */ 7249 7250 int 7251 standard_sse_constant_p (rtx x) 7252 { 7253 enum machine_mode mode = GET_MODE (x); 7254 7255 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x))) 7256 return 1; 7257 if (vector_all_ones_operand (x, mode)) 7258 { 7259 if (standard_sse_mode_p (mode)) 7260 return TARGET_SSE2 ? 2 : -2; 7261 else if (VALID_AVX256_REG_MODE (mode)) 7262 return TARGET_AVX ? 3 : -3; 7263 } 7264 7265 return 0; 7266 } 7267 7268 /* Return the opcode of the special instruction to be used to load 7269 the constant X. */ 7270 7271 const char * 7272 standard_sse_constant_opcode (rtx insn, rtx x) 7273 { 7274 switch (standard_sse_constant_p (x)) 7275 { 7276 case 1: 7277 switch (get_attr_mode (insn)) 7278 { 7279 case MODE_V4SF: 7280 return TARGET_AVX ? "vxorps\t%0, %0, %0" : "xorps\t%0, %0"; 7281 case MODE_V2DF: 7282 return TARGET_AVX ? "vxorpd\t%0, %0, %0" : "xorpd\t%0, %0"; 7283 case MODE_TI: 7284 return TARGET_AVX ? "vpxor\t%0, %0, %0" : "pxor\t%0, %0"; 7285 case MODE_V8SF: 7286 return "vxorps\t%x0, %x0, %x0"; 7287 case MODE_V4DF: 7288 return "vxorpd\t%x0, %x0, %x0"; 7289 case MODE_OI: 7290 return "vpxor\t%x0, %x0, %x0"; 7291 default: 7292 gcc_unreachable (); 7293 } 7294 case 2: 7295 if (TARGET_AVX) 7296 switch (get_attr_mode (insn)) 7297 { 7298 case MODE_V4SF: 7299 case MODE_V2DF: 7300 case MODE_TI: 7301 return "vpcmpeqd\t%0, %0, %0"; 7302 break; 7303 default: 7304 gcc_unreachable (); 7305 } 7306 else 7307 return "pcmpeqd\t%0, %0"; 7308 } 7309 gcc_unreachable (); 7310 } 7311 7312 /* Returns 1 if OP contains a symbol reference */ 7313 7314 int 7315 symbolic_reference_mentioned_p (rtx op) 7316 { 7317 const char *fmt; 7318 int i; 7319 7320 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF) 7321 return 1; 7322 7323 fmt = GET_RTX_FORMAT (GET_CODE (op)); 7324 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--) 7325 { 7326 if (fmt[i] == 'E') 7327 { 7328 int j; 7329 7330 for (j = XVECLEN (op, i) - 1; j >= 0; j--) 7331 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j))) 7332 return 1; 7333 } 7334 7335 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i))) 7336 return 1; 7337 } 7338 7339 return 0; 7340 } 7341 7342 /* Return 1 if it is appropriate to emit `ret' instructions in the 7343 body of a function. Do this only if the epilogue is simple, needing a 7344 couple of insns. Prior to reloading, we can't tell how many registers 7345 must be saved, so return 0 then. Return 0 if there is no frame 7346 marker to de-allocate. */ 7347 7348 int 7349 ix86_can_use_return_insn_p (void) 7350 { 7351 struct ix86_frame frame; 7352 7353 if (! reload_completed || frame_pointer_needed) 7354 return 0; 7355 7356 /* Don't allow more than 32 pop, since that's all we can do 7357 with one instruction. */ 7358 if (crtl->args.pops_args 7359 && crtl->args.size >= 32768) 7360 return 0; 7361 7362 ix86_compute_frame_layout (&frame); 7363 return frame.to_allocate == 0 && frame.padding05 == 0 && 7364 frame.nmsave_args == 0 && (frame.nregs + frame.nsseregs) == 0; 7365 } 7366 7367 /* Value should be nonzero if functions must have frame pointers. 7368 Zero means the frame pointer need not be set up (and parms may 7369 be accessed via the stack pointer) in functions that seem suitable. */ 7370 7371 int 7372 ix86_frame_pointer_required (void) 7373 { 7374 /* If we accessed previous frames, then the generated code expects 7375 to be able to access the saved ebp value in our frame. */ 7376 if (cfun->machine->accesses_prev_frame) 7377 return 1; 7378 7379 /* Several x86 os'es need a frame pointer for other reasons, 7380 usually pertaining to setjmp. */ 7381 if (SUBTARGET_FRAME_POINTER_REQUIRED) 7382 return 1; 7383 7384 if (TARGET_SAVE_ARGS) 7385 return 1; 7386 7387 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off 7388 the frame pointer by default. Turn it back on now if we've not 7389 got a leaf function. */ 7390 if (TARGET_OMIT_LEAF_FRAME_POINTER 7391 && (!current_function_is_leaf 7392 || ix86_current_function_calls_tls_descriptor)) 7393 return 1; 7394 7395 if (crtl->profile) 7396 return 1; 7397 7398 return 0; 7399 } 7400 7401 /* Record that the current function accesses previous call frames. */ 7402 7403 void 7404 ix86_setup_frame_addresses (void) 7405 { 7406 cfun->machine->accesses_prev_frame = 1; 7407 } 7408 7409 #ifndef USE_HIDDEN_LINKONCE 7410 # if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO 7411 # define USE_HIDDEN_LINKONCE 1 7412 # else 7413 # define USE_HIDDEN_LINKONCE 0 7414 # endif 7415 #endif 7416 7417 static int pic_labels_used; 7418 7419 /* Fills in the label name that should be used for a pc thunk for 7420 the given register. */ 7421 7422 static void 7423 get_pc_thunk_name (char name[32], unsigned int regno) 7424 { 7425 gcc_assert (!TARGET_64BIT); 7426 7427 if (USE_HIDDEN_LINKONCE) 7428 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]); 7429 else 7430 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno); 7431 } 7432 7433 7434 /* This function generates code for -fpic that loads %ebx with 7435 the return address of the caller and then returns. */ 7436 7437 void 7438 ix86_file_end (void) 7439 { 7440 rtx xops[2]; 7441 int regno; 7442 7443 for (regno = 0; regno < 8; ++regno) 7444 { 7445 char name[32]; 7446 7447 if (! ((pic_labels_used >> regno) & 1)) 7448 continue; 7449 7450 get_pc_thunk_name (name, regno); 7451 7452 #if TARGET_MACHO 7453 if (TARGET_MACHO) 7454 { 7455 switch_to_section (darwin_sections[text_coal_section]); 7456 fputs ("\t.weak_definition\t", asm_out_file); 7457 assemble_name (asm_out_file, name); 7458 fputs ("\n\t.private_extern\t", asm_out_file); 7459 assemble_name (asm_out_file, name); 7460 fputs ("\n", asm_out_file); 7461 ASM_OUTPUT_LABEL (asm_out_file, name); 7462 } 7463 else 7464 #endif 7465 if (USE_HIDDEN_LINKONCE) 7466 { 7467 tree decl; 7468 7469 decl = build_decl (FUNCTION_DECL, get_identifier (name), 7470 error_mark_node); 7471 TREE_PUBLIC (decl) = 1; 7472 TREE_STATIC (decl) = 1; 7473 DECL_ONE_ONLY (decl) = 1; 7474 7475 (*targetm.asm_out.unique_section) (decl, 0); 7476 switch_to_section (get_named_section (decl, NULL, 0)); 7477 7478 (*targetm.asm_out.globalize_label) (asm_out_file, name); 7479 fputs ("\t.hidden\t", asm_out_file); 7480 assemble_name (asm_out_file, name); 7481 fputc ('\n', asm_out_file); 7482 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl); 7483 } 7484 else 7485 { 7486 switch_to_section (text_section); 7487 ASM_OUTPUT_LABEL (asm_out_file, name); 7488 } 7489 7490 xops[0] = gen_rtx_REG (Pmode, regno); 7491 xops[1] = gen_rtx_MEM (Pmode, stack_pointer_rtx); 7492 output_asm_insn ("mov%z0\t{%1, %0|%0, %1}", xops); 7493 output_asm_insn ("ret", xops); 7494 } 7495 7496 if (NEED_INDICATE_EXEC_STACK) 7497 file_end_indicate_exec_stack (); 7498 } 7499 7500 /* Emit code for the SET_GOT patterns. */ 7501 7502 const char * 7503 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED) 7504 { 7505 rtx xops[3]; 7506 7507 xops[0] = dest; 7508 7509 if (TARGET_VXWORKS_RTP && flag_pic) 7510 { 7511 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */ 7512 xops[2] = gen_rtx_MEM (Pmode, 7513 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE)); 7514 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops); 7515 7516 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register. 7517 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as 7518 an unadorned address. */ 7519 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX); 7520 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL; 7521 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops); 7522 return ""; 7523 } 7524 7525 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME); 7526 7527 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic) 7528 { 7529 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ()); 7530 7531 if (!flag_pic) 7532 output_asm_insn ("mov%z0\t{%2, %0|%0, %2}", xops); 7533 else 7534 output_asm_insn ("call\t%a2", xops); 7535 7536 #if TARGET_MACHO 7537 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This 7538 is what will be referenced by the Mach-O PIC subsystem. */ 7539 if (!label) 7540 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME); 7541 #endif 7542 7543 (*targetm.asm_out.internal_label) (asm_out_file, "L", 7544 CODE_LABEL_NUMBER (XEXP (xops[2], 0))); 7545 7546 if (flag_pic) 7547 output_asm_insn ("pop%z0\t%0", xops); 7548 } 7549 else 7550 { 7551 char name[32]; 7552 get_pc_thunk_name (name, REGNO (dest)); 7553 pic_labels_used |= 1 << REGNO (dest); 7554 7555 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name)); 7556 xops[2] = gen_rtx_MEM (QImode, xops[2]); 7557 output_asm_insn ("call\t%X2", xops); 7558 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This 7559 is what will be referenced by the Mach-O PIC subsystem. */ 7560 #if TARGET_MACHO 7561 if (!label) 7562 ASM_OUTPUT_LABEL (asm_out_file, MACHOPIC_FUNCTION_BASE_NAME); 7563 else 7564 targetm.asm_out.internal_label (asm_out_file, "L", 7565 CODE_LABEL_NUMBER (label)); 7566 #endif 7567 } 7568 7569 if (TARGET_MACHO) 7570 return ""; 7571 7572 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION) 7573 output_asm_insn ("add%z0\t{%1, %0|%0, %1}", xops); 7574 else 7575 output_asm_insn ("add%z0\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops); 7576 7577 return ""; 7578 } 7579 7580 /* Generate an "push" pattern for input ARG. */ 7581 7582 static rtx 7583 gen_push (rtx arg) 7584 { 7585 return gen_rtx_SET (VOIDmode, 7586 gen_rtx_MEM (Pmode, 7587 gen_rtx_PRE_DEC (Pmode, 7588 stack_pointer_rtx)), 7589 arg); 7590 } 7591 7592 /* Return >= 0 if there is an unused call-clobbered register available 7593 for the entire function. */ 7594 7595 static unsigned int 7596 ix86_select_alt_pic_regnum (void) 7597 { 7598 if (current_function_is_leaf && !crtl->profile 7599 && !ix86_current_function_calls_tls_descriptor) 7600 { 7601 int i, drap; 7602 /* Can't use the same register for both PIC and DRAP. */ 7603 if (crtl->drap_reg) 7604 drap = REGNO (crtl->drap_reg); 7605 else 7606 drap = -1; 7607 for (i = 2; i >= 0; --i) 7608 if (i != drap && !df_regs_ever_live_p (i)) 7609 return i; 7610 } 7611 7612 return INVALID_REGNUM; 7613 } 7614 7615 /* Return 1 if we need to save REGNO. */ 7616 static int 7617 ix86_save_reg (unsigned int regno, int maybe_eh_return) 7618 { 7619 if (pic_offset_table_rtx 7620 && regno == REAL_PIC_OFFSET_TABLE_REGNUM 7621 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) 7622 || crtl->profile 7623 || crtl->calls_eh_return 7624 || crtl->uses_const_pool)) 7625 { 7626 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM) 7627 return 0; 7628 return 1; 7629 } 7630 7631 if (crtl->calls_eh_return && maybe_eh_return) 7632 { 7633 unsigned i; 7634 for (i = 0; ; i++) 7635 { 7636 unsigned test = EH_RETURN_DATA_REGNO (i); 7637 if (test == INVALID_REGNUM) 7638 break; 7639 if (test == regno) 7640 return 1; 7641 } 7642 } 7643 7644 if (crtl->drap_reg 7645 && regno == REGNO (crtl->drap_reg)) 7646 return 1; 7647 7648 return (df_regs_ever_live_p (regno) 7649 && !call_used_regs[regno] 7650 && !fixed_regs[regno] 7651 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed)); 7652 } 7653 7654 /* Return number of saved general prupose registers. */ 7655 7656 static int 7657 ix86_nsaved_regs (void) 7658 { 7659 int nregs = 0; 7660 int regno; 7661 7662 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 7663 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) 7664 nregs ++; 7665 return nregs; 7666 } 7667 7668 /* Return number of saved SSE registrers. */ 7669 7670 static int 7671 ix86_nsaved_sseregs (void) 7672 { 7673 int nregs = 0; 7674 int regno; 7675 7676 if (ix86_cfun_abi () != MS_ABI) 7677 return 0; 7678 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 7679 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) 7680 nregs ++; 7681 return nregs; 7682 } 7683 7684 /* Given FROM and TO register numbers, say whether this elimination is 7685 allowed. If stack alignment is needed, we can only replace argument 7686 pointer with hard frame pointer, or replace frame pointer with stack 7687 pointer. Otherwise, frame pointer elimination is automatically 7688 handled and all other eliminations are valid. */ 7689 7690 int 7691 ix86_can_eliminate (int from, int to) 7692 { 7693 if (stack_realign_fp) 7694 return ((from == ARG_POINTER_REGNUM 7695 && to == HARD_FRAME_POINTER_REGNUM) 7696 || (from == FRAME_POINTER_REGNUM 7697 && to == STACK_POINTER_REGNUM)); 7698 else 7699 return to == STACK_POINTER_REGNUM ? !frame_pointer_needed : 1; 7700 } 7701 7702 /* Return the offset between two registers, one to be eliminated, and the other 7703 its replacement, at the start of a routine. */ 7704 7705 HOST_WIDE_INT 7706 ix86_initial_elimination_offset (int from, int to) 7707 { 7708 struct ix86_frame frame; 7709 ix86_compute_frame_layout (&frame); 7710 7711 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM) 7712 return frame.hard_frame_pointer_offset; 7713 else if (from == FRAME_POINTER_REGNUM 7714 && to == HARD_FRAME_POINTER_REGNUM) 7715 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset; 7716 else 7717 { 7718 gcc_assert (to == STACK_POINTER_REGNUM); 7719 7720 if (from == ARG_POINTER_REGNUM) 7721 return frame.stack_pointer_offset; 7722 7723 gcc_assert (from == FRAME_POINTER_REGNUM); 7724 return frame.stack_pointer_offset - frame.frame_pointer_offset; 7725 } 7726 } 7727 7728 /* In a dynamically-aligned function, we can't know the offset from 7729 stack pointer to frame pointer, so we must ensure that setjmp 7730 eliminates fp against the hard fp (%ebp) rather than trying to 7731 index from %esp up to the top of the frame across a gap that is 7732 of unknown (at compile-time) size. */ 7733 static rtx 7734 ix86_builtin_setjmp_frame_value (void) 7735 { 7736 return stack_realign_fp ? hard_frame_pointer_rtx : virtual_stack_vars_rtx; 7737 } 7738 7739 /* Fill structure ix86_frame about frame of currently computed function. */ 7740 7741 static void 7742 ix86_compute_frame_layout (struct ix86_frame *frame) 7743 { 7744 HOST_WIDE_INT total_size; 7745 unsigned int stack_alignment_needed; 7746 HOST_WIDE_INT offset; 7747 unsigned int preferred_alignment; 7748 HOST_WIDE_INT size = get_frame_size (); 7749 7750 frame->nregs = ix86_nsaved_regs (); 7751 frame->nsseregs = ix86_nsaved_sseregs (); 7752 frame->nmsave_args = ix86_nsaved_args (); 7753 total_size = size; 7754 7755 stack_alignment_needed = crtl->stack_alignment_needed / BITS_PER_UNIT; 7756 preferred_alignment = crtl->preferred_stack_boundary / BITS_PER_UNIT; 7757 7758 /* MS ABI seem to require stack alignment to be always 16 except for function 7759 prologues. */ 7760 if (ix86_cfun_abi () == MS_ABI && preferred_alignment < 16) 7761 { 7762 preferred_alignment = 16; 7763 stack_alignment_needed = 16; 7764 crtl->preferred_stack_boundary = 128; 7765 crtl->stack_alignment_needed = 128; 7766 } 7767 7768 gcc_assert (!size || stack_alignment_needed); 7769 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT); 7770 gcc_assert (preferred_alignment <= stack_alignment_needed); 7771 7772 /* During reload iteration the amount of registers saved can change. 7773 Recompute the value as needed. Do not recompute when amount of registers 7774 didn't change as reload does multiple calls to the function and does not 7775 expect the decision to change within single iteration. */ 7776 if (!optimize_function_for_size_p (cfun) 7777 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs) 7778 { 7779 int count = frame->nregs; 7780 7781 cfun->machine->use_fast_prologue_epilogue_nregs = count; 7782 /* The fast prologue uses move instead of push to save registers. This 7783 is significantly longer, but also executes faster as modern hardware 7784 can execute the moves in parallel, but can't do that for push/pop. 7785 7786 Be careful about choosing what prologue to emit: When function takes 7787 many instructions to execute we may use slow version as well as in 7788 case function is known to be outside hot spot (this is known with 7789 feedback only). Weight the size of function by number of registers 7790 to save as it is cheap to use one or two push instructions but very 7791 slow to use many of them. */ 7792 if (count) 7793 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT; 7794 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL 7795 || (flag_branch_probabilities 7796 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT)) 7797 cfun->machine->use_fast_prologue_epilogue = false; 7798 else 7799 cfun->machine->use_fast_prologue_epilogue 7800 = !expensive_function_p (count); 7801 } 7802 if (TARGET_PROLOGUE_USING_MOVE 7803 && cfun->machine->use_fast_prologue_epilogue) 7804 frame->save_regs_using_mov = true; 7805 else 7806 frame->save_regs_using_mov = false; 7807 7808 if (TARGET_SAVE_ARGS) 7809 { 7810 cfun->machine->use_fast_prologue_epilogue = true; 7811 frame->save_regs_using_mov = true; 7812 } 7813 7814 /* Skip return address and saved base pointer. */ 7815 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD; 7816 7817 frame->hard_frame_pointer_offset = offset; 7818 7819 /* Set offset to aligned because the realigned frame starts from 7820 here. */ 7821 if (stack_realign_fp) 7822 offset = (offset + stack_alignment_needed -1) & -stack_alignment_needed; 7823 7824 /* Argument save area */ 7825 if (TARGET_SAVE_ARGS) 7826 { 7827 offset += frame->nmsave_args * UNITS_PER_WORD; 7828 frame->padding0 = (frame->nmsave_args % 2) * UNITS_PER_WORD; 7829 offset += frame->padding0; 7830 } 7831 else 7832 frame->padding0 = 0; 7833 7834 /* Register save area */ 7835 offset += frame->nregs * UNITS_PER_WORD; 7836 7837 /* Align SSE reg save area. */ 7838 if (frame->nsseregs) 7839 frame->padding05 = ((offset + 16 - 1) & -16) - offset; 7840 else 7841 frame->padding05 = 0; 7842 7843 /* SSE register save area. */ 7844 offset += frame->padding05 + frame->nsseregs * 16; 7845 7846 /* Va-arg area */ 7847 frame->va_arg_size = ix86_varargs_gpr_size + ix86_varargs_fpr_size; 7848 offset += frame->va_arg_size; 7849 7850 /* Align start of frame for local function. */ 7851 frame->padding1 = ((offset + stack_alignment_needed - 1) 7852 & -stack_alignment_needed) - offset; 7853 7854 offset += frame->padding1; 7855 7856 /* Frame pointer points here. */ 7857 frame->frame_pointer_offset = offset; 7858 7859 offset += size; 7860 7861 /* Add outgoing arguments area. Can be skipped if we eliminated 7862 all the function calls as dead code. 7863 Skipping is however impossible when function calls alloca. Alloca 7864 expander assumes that last crtl->outgoing_args_size 7865 of stack frame are unused. */ 7866 if (ACCUMULATE_OUTGOING_ARGS 7867 && (!current_function_is_leaf || cfun->calls_alloca 7868 || ix86_current_function_calls_tls_descriptor)) 7869 { 7870 offset += crtl->outgoing_args_size; 7871 frame->outgoing_arguments_size = crtl->outgoing_args_size; 7872 } 7873 else 7874 frame->outgoing_arguments_size = 0; 7875 7876 /* Align stack boundary. Only needed if we're calling another function 7877 or using alloca. */ 7878 if (!current_function_is_leaf || cfun->calls_alloca 7879 || ix86_current_function_calls_tls_descriptor) 7880 frame->padding2 = ((offset + preferred_alignment - 1) 7881 & -preferred_alignment) - offset; 7882 else 7883 frame->padding2 = 0; 7884 7885 offset += frame->padding2; 7886 7887 /* We've reached end of stack frame. */ 7888 frame->stack_pointer_offset = offset; 7889 7890 /* Size prologue needs to allocate. */ 7891 frame->to_allocate = 7892 (size + frame->padding1 + frame->padding2 7893 + frame->outgoing_arguments_size + frame->va_arg_size); 7894 7895 if (!TARGET_SAVE_ARGS 7896 && ((!frame->to_allocate && frame->nregs <= 1) 7897 || (TARGET_64BIT 7898 && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))) 7899 frame->save_regs_using_mov = false; 7900 7901 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE 7902 && current_function_sp_is_unchanging 7903 && current_function_is_leaf 7904 && !ix86_current_function_calls_tls_descriptor) 7905 { 7906 frame->red_zone_size = frame->to_allocate; 7907 if (frame->save_regs_using_mov) 7908 { 7909 frame->red_zone_size 7910 += (frame->nregs + frame->nmsave_args) * UNITS_PER_WORD; 7911 frame->red_zone_size += frame->padding0; 7912 } 7913 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE) 7914 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE; 7915 } 7916 else 7917 frame->red_zone_size = 0; 7918 frame->to_allocate -= frame->red_zone_size; 7919 frame->stack_pointer_offset -= frame->red_zone_size; 7920 #if 0 7921 fprintf (stderr, "\n"); 7922 fprintf (stderr, "size: %ld\n", (long)size); 7923 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs); 7924 fprintf (stderr, "nsseregs: %ld\n", (long)frame->nsseregs); 7925 fprintf (stderr, "nmsave_args: %ld\n", (long)frame->nmsave_args); 7926 fprintf (stderr, "padding0: %ld\n", (long)frame->padding0); 7927 fprintf (stderr, "padding05: %ld\n", (long)frame->padding0); 7928 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed); 7929 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1); 7930 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size); 7931 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2); 7932 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate); 7933 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size); 7934 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset); 7935 fprintf (stderr, "hard_frame_pointer_offset: %ld\n", 7936 (long)frame->hard_frame_pointer_offset); 7937 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset); 7938 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf); 7939 fprintf (stderr, "cfun->calls_alloca: %ld\n", (long)cfun->calls_alloca); 7940 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor); 7941 #endif 7942 } 7943 7944 7945 /* Emit code to save registers in the prologue. */ 7946 7947 static void 7948 ix86_emit_save_regs (void) 7949 { 7950 unsigned int regno; 7951 rtx insn; 7952 7953 if (TARGET_SAVE_ARGS) 7954 { 7955 int i; 7956 int nsaved = ix86_nsaved_args (); 7957 int start = cfun->returns_struct; 7958 for (i = start; i < start + nsaved; i++) 7959 { 7960 regno = x86_64_int_parameter_registers[i]; 7961 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); 7962 RTX_FRAME_RELATED_P (insn) = 1; 7963 } 7964 if (nsaved % 2 != 0) 7965 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 7966 GEN_INT (-UNITS_PER_WORD), -1); 7967 } 7968 7969 for (regno = FIRST_PSEUDO_REGISTER - 1; regno-- > 0; ) 7970 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) 7971 { 7972 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno))); 7973 RTX_FRAME_RELATED_P (insn) = 1; 7974 } 7975 } 7976 7977 /* Emit code to save registers using MOV insns. First register 7978 is restored from POINTER + OFFSET. */ 7979 static void 7980 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) 7981 { 7982 unsigned int regno; 7983 rtx insn; 7984 7985 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 7986 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) 7987 { 7988 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), 7989 Pmode, offset), 7990 gen_rtx_REG (Pmode, regno)); 7991 RTX_FRAME_RELATED_P (insn) = 1; 7992 offset += UNITS_PER_WORD; 7993 } 7994 7995 if (TARGET_SAVE_ARGS) 7996 { 7997 int i; 7998 int nsaved = ix86_nsaved_args (); 7999 int start = cfun->returns_struct; 8000 if (nsaved % 2 != 0) 8001 offset += UNITS_PER_WORD; 8002 for (i = start + nsaved - 1; i >= start; i--) 8003 { 8004 regno = x86_64_int_parameter_registers[i]; 8005 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer), 8006 Pmode, offset), 8007 gen_rtx_REG (Pmode, regno)); 8008 RTX_FRAME_RELATED_P (insn) = 1; 8009 offset += UNITS_PER_WORD; 8010 } 8011 } 8012 } 8013 8014 /* Emit code to save registers using MOV insns. First register 8015 is restored from POINTER + OFFSET. */ 8016 static void 8017 ix86_emit_save_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset) 8018 { 8019 unsigned int regno; 8020 rtx insn; 8021 rtx mem; 8022 8023 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 8024 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true)) 8025 { 8026 mem = adjust_address (gen_rtx_MEM (TImode, pointer), TImode, offset); 8027 set_mem_align (mem, 128); 8028 insn = emit_move_insn (mem, gen_rtx_REG (TImode, regno)); 8029 RTX_FRAME_RELATED_P (insn) = 1; 8030 offset += 16; 8031 } 8032 } 8033 8034 /* Expand prologue or epilogue stack adjustment. 8035 The pattern exist to put a dependency on all ebp-based memory accesses. 8036 STYLE should be negative if instructions should be marked as frame related, 8037 zero if %r11 register is live and cannot be freely used and positive 8038 otherwise. */ 8039 8040 static void 8041 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style) 8042 { 8043 rtx insn; 8044 8045 if (! TARGET_64BIT) 8046 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset)); 8047 else if (x86_64_immediate_operand (offset, DImode)) 8048 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset)); 8049 else 8050 { 8051 rtx r11; 8052 /* r11 is used by indirect sibcall return as well, set before the 8053 epilogue and used after the epilogue. ATM indirect sibcall 8054 shouldn't be used together with huge frame sizes in one 8055 function because of the frame_size check in sibcall.c. */ 8056 gcc_assert (style); 8057 r11 = gen_rtx_REG (DImode, R11_REG); 8058 insn = emit_insn (gen_rtx_SET (DImode, r11, offset)); 8059 if (style < 0) 8060 RTX_FRAME_RELATED_P (insn) = 1; 8061 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11, 8062 offset)); 8063 } 8064 if (style < 0) 8065 RTX_FRAME_RELATED_P (insn) = 1; 8066 } 8067 8068 /* Find an available register to be used as dynamic realign argument 8069 pointer regsiter. Such a register will be written in prologue and 8070 used in begin of body, so it must not be 8071 1. parameter passing register. 8072 2. GOT pointer. 8073 We reuse static-chain register if it is available. Otherwise, we 8074 use DI for i386 and R13 for x86-64. We chose R13 since it has 8075 shorter encoding. 8076 8077 Return: the regno of chosen register. */ 8078 8079 static unsigned int 8080 find_drap_reg (void) 8081 { 8082 tree decl = cfun->decl; 8083 8084 if (TARGET_64BIT) 8085 { 8086 /* Use R13 for nested function or function need static chain. 8087 Since function with tail call may use any caller-saved 8088 registers in epilogue, DRAP must not use caller-saved 8089 register in such case. */ 8090 if ((decl_function_context (decl) 8091 && !DECL_NO_STATIC_CHAIN (decl)) 8092 || crtl->tail_call_emit) 8093 return R13_REG; 8094 8095 return R10_REG; 8096 } 8097 else 8098 { 8099 /* Use DI for nested function or function need static chain. 8100 Since function with tail call may use any caller-saved 8101 registers in epilogue, DRAP must not use caller-saved 8102 register in such case. */ 8103 if ((decl_function_context (decl) 8104 && !DECL_NO_STATIC_CHAIN (decl)) 8105 || crtl->tail_call_emit) 8106 return DI_REG; 8107 8108 /* Reuse static chain register if it isn't used for parameter 8109 passing. */ 8110 if (ix86_function_regparm (TREE_TYPE (decl), decl) <= 2 8111 && !lookup_attribute ("fastcall", 8112 TYPE_ATTRIBUTES (TREE_TYPE (decl)))) 8113 return CX_REG; 8114 else 8115 return DI_REG; 8116 } 8117 } 8118 8119 /* Update incoming stack boundary and estimated stack alignment. */ 8120 8121 static void 8122 ix86_update_stack_boundary (void) 8123 { 8124 /* Prefer the one specified at command line. */ 8125 ix86_incoming_stack_boundary 8126 = (ix86_user_incoming_stack_boundary 8127 ? ix86_user_incoming_stack_boundary 8128 : ix86_default_incoming_stack_boundary); 8129 8130 /* Incoming stack alignment can be changed on individual functions 8131 via force_align_arg_pointer attribute. We use the smallest 8132 incoming stack boundary. */ 8133 if (ix86_incoming_stack_boundary > MIN_STACK_BOUNDARY 8134 && lookup_attribute (ix86_force_align_arg_pointer_string, 8135 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) 8136 ix86_incoming_stack_boundary = MIN_STACK_BOUNDARY; 8137 8138 /* The incoming stack frame has to be aligned at least at 8139 parm_stack_boundary. */ 8140 if (ix86_incoming_stack_boundary < crtl->parm_stack_boundary) 8141 ix86_incoming_stack_boundary = crtl->parm_stack_boundary; 8142 8143 /* Stack at entrance of main is aligned by runtime. We use the 8144 smallest incoming stack boundary. */ 8145 if (ix86_incoming_stack_boundary > MAIN_STACK_BOUNDARY 8146 && DECL_NAME (current_function_decl) 8147 && MAIN_NAME_P (DECL_NAME (current_function_decl)) 8148 && DECL_FILE_SCOPE_P (current_function_decl)) 8149 ix86_incoming_stack_boundary = MAIN_STACK_BOUNDARY; 8150 8151 /* x86_64 vararg needs 16byte stack alignment for register save 8152 area. */ 8153 if (TARGET_64BIT 8154 && cfun->stdarg 8155 && crtl->stack_alignment_estimated < 128) 8156 crtl->stack_alignment_estimated = 128; 8157 } 8158 8159 /* Handle the TARGET_GET_DRAP_RTX hook. Return NULL if no DRAP is 8160 needed or an rtx for DRAP otherwise. */ 8161 8162 static rtx 8163 ix86_get_drap_rtx (void) 8164 { 8165 if (ix86_force_drap || !ACCUMULATE_OUTGOING_ARGS) 8166 crtl->need_drap = true; 8167 8168 if (stack_realign_drap) 8169 { 8170 /* Assign DRAP to vDRAP and returns vDRAP */ 8171 unsigned int regno = find_drap_reg (); 8172 rtx drap_vreg; 8173 rtx arg_ptr; 8174 rtx seq, insn; 8175 8176 arg_ptr = gen_rtx_REG (Pmode, regno); 8177 crtl->drap_reg = arg_ptr; 8178 8179 start_sequence (); 8180 drap_vreg = copy_to_reg (arg_ptr); 8181 seq = get_insns (); 8182 end_sequence (); 8183 8184 insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ())); 8185 RTX_FRAME_RELATED_P (insn) = 1; 8186 return drap_vreg; 8187 } 8188 else 8189 return NULL; 8190 } 8191 8192 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */ 8193 8194 static rtx 8195 ix86_internal_arg_pointer (void) 8196 { 8197 return virtual_incoming_args_rtx; 8198 } 8199 8200 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook. 8201 This is called from dwarf2out.c to emit call frame instructions 8202 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */ 8203 static void 8204 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index) 8205 { 8206 rtx unspec = SET_SRC (pattern); 8207 gcc_assert (GET_CODE (unspec) == UNSPEC); 8208 8209 switch (index) 8210 { 8211 case UNSPEC_REG_SAVE: 8212 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0), 8213 SET_DEST (pattern)); 8214 break; 8215 case UNSPEC_DEF_CFA: 8216 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)), 8217 INTVAL (XVECEXP (unspec, 0, 0))); 8218 break; 8219 default: 8220 gcc_unreachable (); 8221 } 8222 } 8223 8224 /* Finalize stack_realign_needed flag, which will guide prologue/epilogue 8225 to be generated in correct form. */ 8226 static void 8227 ix86_finalize_stack_realign_flags (void) 8228 { 8229 /* Check if stack realign is really needed after reload, and 8230 stores result in cfun */ 8231 unsigned int incoming_stack_boundary 8232 = (crtl->parm_stack_boundary > ix86_incoming_stack_boundary 8233 ? crtl->parm_stack_boundary : ix86_incoming_stack_boundary); 8234 unsigned int stack_realign = (incoming_stack_boundary 8235 < (current_function_is_leaf 8236 ? crtl->max_used_stack_slot_alignment 8237 : crtl->stack_alignment_needed)); 8238 8239 if (crtl->stack_realign_finalized) 8240 { 8241 /* After stack_realign_needed is finalized, we can't no longer 8242 change it. */ 8243 gcc_assert (crtl->stack_realign_needed == stack_realign); 8244 } 8245 else 8246 { 8247 crtl->stack_realign_needed = stack_realign; 8248 crtl->stack_realign_finalized = true; 8249 } 8250 } 8251 8252 /* Expand the prologue into a bunch of separate insns. */ 8253 8254 void 8255 ix86_expand_prologue (void) 8256 { 8257 rtx insn; 8258 bool pic_reg_used; 8259 struct ix86_frame frame; 8260 HOST_WIDE_INT allocate; 8261 8262 ix86_finalize_stack_realign_flags (); 8263 8264 /* DRAP should not coexist with stack_realign_fp */ 8265 gcc_assert (!(crtl->drap_reg && stack_realign_fp)); 8266 8267 ix86_compute_frame_layout (&frame); 8268 8269 /* Emit prologue code to adjust stack alignment and setup DRAP, in case 8270 of DRAP is needed and stack realignment is really needed after reload */ 8271 if (crtl->drap_reg && crtl->stack_realign_needed) 8272 { 8273 rtx x, y; 8274 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; 8275 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)] 8276 ? 0 : UNITS_PER_WORD); 8277 8278 gcc_assert (stack_realign_drap); 8279 8280 /* Grab the argument pointer. */ 8281 x = plus_constant (stack_pointer_rtx, 8282 (UNITS_PER_WORD + param_ptr_offset)); 8283 y = crtl->drap_reg; 8284 8285 /* Only need to push parameter pointer reg if it is caller 8286 saved reg */ 8287 if (!call_used_regs[REGNO (crtl->drap_reg)]) 8288 { 8289 /* Push arg pointer reg */ 8290 insn = emit_insn (gen_push (y)); 8291 RTX_FRAME_RELATED_P (insn) = 1; 8292 } 8293 8294 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x)); 8295 RTX_FRAME_RELATED_P (insn) = 1; 8296 8297 /* Align the stack. */ 8298 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx, 8299 stack_pointer_rtx, 8300 GEN_INT (-align_bytes))); 8301 RTX_FRAME_RELATED_P (insn) = 1; 8302 8303 /* Replicate the return address on the stack so that return 8304 address can be reached via (argp - 1) slot. This is needed 8305 to implement macro RETURN_ADDR_RTX and intrinsic function 8306 expand_builtin_return_addr etc. */ 8307 x = crtl->drap_reg; 8308 x = gen_frame_mem (Pmode, 8309 plus_constant (x, -UNITS_PER_WORD)); 8310 insn = emit_insn (gen_push (x)); 8311 RTX_FRAME_RELATED_P (insn) = 1; 8312 } 8313 8314 /* Note: AT&T enter does NOT have reversed args. Enter is probably 8315 slower on all targets. Also sdb doesn't like it. */ 8316 8317 if (frame_pointer_needed) 8318 { 8319 insn = emit_insn (gen_push (hard_frame_pointer_rtx)); 8320 RTX_FRAME_RELATED_P (insn) = 1; 8321 8322 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx); 8323 RTX_FRAME_RELATED_P (insn) = 1; 8324 } 8325 8326 if (stack_realign_fp) 8327 { 8328 int align_bytes = crtl->stack_alignment_needed / BITS_PER_UNIT; 8329 gcc_assert (align_bytes > MIN_STACK_BOUNDARY / BITS_PER_UNIT); 8330 8331 /* Align the stack. */ 8332 insn = emit_insn ((*ix86_gen_andsp) (stack_pointer_rtx, 8333 stack_pointer_rtx, 8334 GEN_INT (-align_bytes))); 8335 RTX_FRAME_RELATED_P (insn) = 1; 8336 } 8337 8338 allocate = frame.to_allocate + frame.nsseregs * 16 + frame.padding05; 8339 8340 if (!frame.save_regs_using_mov) 8341 ix86_emit_save_regs (); 8342 else 8343 allocate += (frame.nregs + frame.nmsave_args) * UNITS_PER_WORD 8344 + frame.padding0; 8345 8346 /* When using red zone we may start register saving before allocating 8347 the stack frame saving one cycle of the prologue. However I will 8348 avoid doing this if I am going to have to probe the stack since 8349 at least on x86_64 the stack probe can turn into a call that clobbers 8350 a red zone location */ 8351 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE && frame.save_regs_using_mov 8352 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)) 8353 ix86_emit_save_regs_using_mov ((frame_pointer_needed 8354 && !crtl->stack_realign_needed) 8355 ? hard_frame_pointer_rtx 8356 : stack_pointer_rtx, 8357 -(frame.nregs + frame.nmsave_args) 8358 * UNITS_PER_WORD - frame.padding0); 8359 8360 if (allocate == 0) 8361 ; 8362 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT) 8363 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 8364 GEN_INT (-allocate), -1); 8365 else 8366 { 8367 rtx eax = gen_rtx_REG (Pmode, AX_REG); 8368 bool eax_live; 8369 rtx t; 8370 8371 if (cfun->machine->call_abi == MS_ABI) 8372 eax_live = false; 8373 else 8374 eax_live = ix86_eax_live_at_start_p (); 8375 8376 if (eax_live) 8377 { 8378 emit_insn (gen_push (eax)); 8379 allocate -= UNITS_PER_WORD; 8380 } 8381 8382 emit_move_insn (eax, GEN_INT (allocate)); 8383 8384 if (TARGET_64BIT) 8385 insn = gen_allocate_stack_worker_64 (eax, eax); 8386 else 8387 insn = gen_allocate_stack_worker_32 (eax, eax); 8388 insn = emit_insn (insn); 8389 RTX_FRAME_RELATED_P (insn) = 1; 8390 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate)); 8391 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t); 8392 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, 8393 t, REG_NOTES (insn)); 8394 8395 if (eax_live) 8396 { 8397 if (frame_pointer_needed) 8398 t = plus_constant (hard_frame_pointer_rtx, 8399 allocate 8400 - frame.to_allocate 8401 - frame.nregs * UNITS_PER_WORD); 8402 else 8403 t = plus_constant (stack_pointer_rtx, allocate); 8404 emit_move_insn (eax, gen_rtx_MEM (Pmode, t)); 8405 } 8406 } 8407 8408 if (frame.save_regs_using_mov 8409 && !(!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE 8410 && (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT))) 8411 { 8412 if (!TARGET_SAVE_ARGS && 8413 (!frame_pointer_needed 8414 || !(frame.to_allocate + frame.padding05) 8415 || crtl->stack_realign_needed)) 8416 ix86_emit_save_regs_using_mov (stack_pointer_rtx, 8417 frame.to_allocate 8418 + frame.nsseregs * 16 + frame.padding05); 8419 else 8420 /* XXX: Does this need help for SSE? */ 8421 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx, 8422 -(frame.nregs + frame.nmsave_args) 8423 * UNITS_PER_WORD - frame.padding0); 8424 } 8425 /* XXX: Does these need help for save-args? */ 8426 if (!frame_pointer_needed 8427 || !(frame.to_allocate + frame.padding0) 8428 || crtl->stack_realign_needed) 8429 ix86_emit_save_sse_regs_using_mov (stack_pointer_rtx, 8430 frame.to_allocate); 8431 else 8432 ix86_emit_save_sse_regs_using_mov (hard_frame_pointer_rtx, 8433 - frame.nregs * UNITS_PER_WORD 8434 - frame.nsseregs * 16 8435 - frame.padding05); 8436 8437 pic_reg_used = false; 8438 if (pic_offset_table_rtx 8439 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM) 8440 || crtl->profile)) 8441 { 8442 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum (); 8443 8444 if (alt_pic_reg_used != INVALID_REGNUM) 8445 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used); 8446 8447 pic_reg_used = true; 8448 } 8449 8450 if (pic_reg_used) 8451 { 8452 if (TARGET_64BIT) 8453 { 8454 if (ix86_cmodel == CM_LARGE_PIC) 8455 { 8456 rtx tmp_reg = gen_rtx_REG (DImode, R11_REG); 8457 rtx label = gen_label_rtx (); 8458 emit_label (label); 8459 LABEL_PRESERVE_P (label) = 1; 8460 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg)); 8461 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label)); 8462 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label)); 8463 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx, 8464 pic_offset_table_rtx, tmp_reg)); 8465 } 8466 else 8467 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx)); 8468 } 8469 else 8470 insn = emit_insn (gen_set_got (pic_offset_table_rtx)); 8471 } 8472 8473 /* In the pic_reg_used case, make sure that the got load isn't deleted 8474 when mcount needs it. Blockage to avoid call movement across mcount 8475 call is emitted in generic code after the NOTE_INSN_PROLOGUE_END 8476 note. */ 8477 if (crtl->profile && pic_reg_used) 8478 emit_insn (gen_prologue_use (pic_offset_table_rtx)); 8479 8480 if (crtl->drap_reg && !crtl->stack_realign_needed) 8481 { 8482 /* vDRAP is setup but after reload it turns out stack realign 8483 isn't necessary, here we will emit prologue to setup DRAP 8484 without stack realign adjustment */ 8485 int drap_bp_offset = UNITS_PER_WORD * 2; 8486 rtx x = plus_constant (hard_frame_pointer_rtx, drap_bp_offset); 8487 insn = emit_insn (gen_rtx_SET (VOIDmode, crtl->drap_reg, x)); 8488 } 8489 8490 /* Prevent instructions from being scheduled into register save push 8491 sequence when access to the redzone area is done through frame pointer. 8492 The offset betweeh the frame pointer and the stack pointer is calculated 8493 relative to the value of the stack pointer at the end of the function 8494 prologue, and moving instructions that access redzone area via frame 8495 pointer inside push sequence violates this assumption. */ 8496 if (frame_pointer_needed && frame.red_zone_size) 8497 emit_insn (gen_memory_blockage ()); 8498 8499 /* Emit cld instruction if stringops are used in the function. */ 8500 if (TARGET_CLD && ix86_current_function_needs_cld) 8501 emit_insn (gen_cld ()); 8502 } 8503 8504 /* Emit code to restore saved registers using MOV insns. First register 8505 is restored from POINTER + OFFSET. */ 8506 static void 8507 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, 8508 int maybe_eh_return) 8509 { 8510 int regno; 8511 rtx base_address = gen_rtx_MEM (Pmode, pointer); 8512 8513 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 8514 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return)) 8515 { 8516 /* Ensure that adjust_address won't be forced to produce pointer 8517 out of range allowed by x86-64 instruction set. */ 8518 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode)) 8519 { 8520 rtx r11; 8521 8522 r11 = gen_rtx_REG (DImode, R11_REG); 8523 emit_move_insn (r11, GEN_INT (offset)); 8524 emit_insn (gen_adddi3 (r11, r11, pointer)); 8525 base_address = gen_rtx_MEM (Pmode, r11); 8526 offset = 0; 8527 } 8528 emit_move_insn (gen_rtx_REG (Pmode, regno), 8529 adjust_address (base_address, Pmode, offset)); 8530 offset += UNITS_PER_WORD; 8531 } 8532 } 8533 8534 /* Emit code to restore saved registers using MOV insns. First register 8535 is restored from POINTER + OFFSET. */ 8536 static void 8537 ix86_emit_restore_sse_regs_using_mov (rtx pointer, HOST_WIDE_INT offset, 8538 int maybe_eh_return) 8539 { 8540 int regno; 8541 rtx base_address = gen_rtx_MEM (TImode, pointer); 8542 rtx mem; 8543 8544 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 8545 if (SSE_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return)) 8546 { 8547 /* Ensure that adjust_address won't be forced to produce pointer 8548 out of range allowed by x86-64 instruction set. */ 8549 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode)) 8550 { 8551 rtx r11; 8552 8553 r11 = gen_rtx_REG (DImode, R11_REG); 8554 emit_move_insn (r11, GEN_INT (offset)); 8555 emit_insn (gen_adddi3 (r11, r11, pointer)); 8556 base_address = gen_rtx_MEM (TImode, r11); 8557 offset = 0; 8558 } 8559 mem = adjust_address (base_address, TImode, offset); 8560 set_mem_align (mem, 128); 8561 emit_move_insn (gen_rtx_REG (TImode, regno), mem); 8562 offset += 16; 8563 } 8564 } 8565 8566 /* Restore function stack, frame, and registers. */ 8567 8568 void 8569 ix86_expand_epilogue (int style) 8570 { 8571 int regno; 8572 int sp_valid; 8573 struct ix86_frame frame; 8574 HOST_WIDE_INT offset; 8575 8576 ix86_finalize_stack_realign_flags (); 8577 8578 /* When stack is realigned, SP must be valid. */ 8579 sp_valid = (!frame_pointer_needed 8580 || current_function_sp_is_unchanging 8581 || stack_realign_fp); 8582 8583 ix86_compute_frame_layout (&frame); 8584 8585 /* See the comment about red zone and frame 8586 pointer usage in ix86_expand_prologue. */ 8587 if (frame_pointer_needed && frame.red_zone_size) 8588 emit_insn (gen_memory_blockage ()); 8589 8590 /* Calculate start of saved registers relative to ebp. Special care 8591 must be taken for the normal return case of a function using 8592 eh_return: the eax and edx registers are marked as saved, but not 8593 restored along this path. */ 8594 offset = frame.nregs + frame.nmsave_args; 8595 if (crtl->calls_eh_return && style != 2) 8596 offset -= 2; 8597 offset *= -UNITS_PER_WORD; 8598 offset -= frame.nsseregs * 16 + frame.padding05 + frame.padding0; 8599 8600 /* If we're only restoring one register and sp is not valid then 8601 using a move instruction to restore the register since it's 8602 less work than reloading sp and popping the register. 8603 8604 The default code result in stack adjustment using add/lea instruction, 8605 while this code results in LEAVE instruction (or discrete equivalent), 8606 so it is profitable in some other cases as well. Especially when there 8607 are no registers to restore. We also use this code when TARGET_USE_LEAVE 8608 and there is exactly one register to pop. This heuristic may need some 8609 tuning in future. */ 8610 if ((!sp_valid && (frame.nregs + frame.nsseregs) <= 1) 8611 || (TARGET_EPILOGUE_USING_MOVE 8612 && cfun->machine->use_fast_prologue_epilogue 8613 && ((frame.nregs + frame.nsseregs) > 1 8614 || (frame.to_allocate + frame.padding0) != 0)) 8615 || (frame_pointer_needed && !(frame.nregs + frame.nsseregs) 8616 && (frame.to_allocate + frame.padding0) != 0) 8617 || (frame_pointer_needed && TARGET_USE_LEAVE 8618 && cfun->machine->use_fast_prologue_epilogue 8619 && (frame.nregs + frame.nsseregs) == 1) 8620 || crtl->calls_eh_return) 8621 { 8622 /* Restore registers. We can use ebp or esp to address the memory 8623 locations. If both are available, default to ebp, since offsets 8624 are known to be small. Only exception is esp pointing directly 8625 to the end of block of saved registers, where we may simplify 8626 addressing mode. 8627 8628 If we are realigning stack with bp and sp, regs restore can't 8629 be addressed by bp. sp must be used instead. */ 8630 8631 if (!frame_pointer_needed 8632 || (sp_valid && !(frame.to_allocate + frame.padding0)) 8633 || stack_realign_fp) 8634 { 8635 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx, 8636 frame.to_allocate, style == 2); 8637 ix86_emit_restore_regs_using_mov (stack_pointer_rtx, 8638 frame.to_allocate 8639 + frame.nsseregs * 16 8640 + frame.padding05, style == 2); 8641 } 8642 else 8643 { 8644 ix86_emit_restore_sse_regs_using_mov (hard_frame_pointer_rtx, 8645 offset, style == 2); 8646 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx, 8647 offset 8648 + frame.nsseregs * 16 8649 + frame.padding05, style == 2); 8650 } 8651 8652 /* eh_return epilogues need %ecx added to the stack pointer. */ 8653 if (style == 2) 8654 { 8655 rtx tmp, sa = EH_RETURN_STACKADJ_RTX; 8656 8657 /* Stack align doesn't work with eh_return. */ 8658 gcc_assert (!crtl->stack_realign_needed); 8659 8660 if (frame_pointer_needed) 8661 { 8662 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa); 8663 tmp = plus_constant (tmp, UNITS_PER_WORD); 8664 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp)); 8665 8666 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx); 8667 emit_move_insn (hard_frame_pointer_rtx, tmp); 8668 8669 pro_epilogue_adjust_stack (stack_pointer_rtx, sa, 8670 const0_rtx, style); 8671 } 8672 else 8673 { 8674 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa); 8675 tmp = plus_constant (tmp, (frame.to_allocate 8676 + (frame.nregs + frame.nmsave_args) 8677 * UNITS_PER_WORD 8678 + frame.nsseregs * 16 8679 + frame.padding05 + frame.padding0)); 8680 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp)); 8681 } 8682 } 8683 else if (!frame_pointer_needed) 8684 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 8685 GEN_INT (frame.to_allocate 8686 + (frame.nregs + frame.nmsave_args) 8687 * UNITS_PER_WORD 8688 + frame.nsseregs * 16 8689 + frame.padding05 + frame.padding0), 8690 style); 8691 /* If not an i386, mov & pop is faster than "leave". */ 8692 else if (TARGET_USE_LEAVE || optimize_function_for_size_p (cfun) 8693 || !cfun->machine->use_fast_prologue_epilogue) 8694 emit_insn ((*ix86_gen_leave) ()); 8695 else 8696 { 8697 pro_epilogue_adjust_stack (stack_pointer_rtx, 8698 hard_frame_pointer_rtx, 8699 const0_rtx, style); 8700 8701 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx)); 8702 } 8703 } 8704 else 8705 { 8706 /* First step is to deallocate the stack frame so that we can 8707 pop the registers. 8708 8709 If we realign stack with frame pointer, then stack pointer 8710 won't be able to recover via lea $offset(%bp), %sp, because 8711 there is a padding area between bp and sp for realign. 8712 "add $to_allocate, %sp" must be used instead. */ 8713 if (!sp_valid) 8714 { 8715 gcc_assert (frame_pointer_needed); 8716 gcc_assert (!stack_realign_fp); 8717 pro_epilogue_adjust_stack (stack_pointer_rtx, 8718 hard_frame_pointer_rtx, 8719 GEN_INT (offset), style); 8720 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx, 8721 0, style == 2); 8722 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 8723 GEN_INT (frame.nsseregs * 16 + 8724 frame.padding0), style); 8725 } 8726 else if (frame.to_allocate || frame.padding0 || frame.nsseregs) 8727 { 8728 ix86_emit_restore_sse_regs_using_mov (stack_pointer_rtx, 8729 frame.to_allocate, 8730 style == 2); 8731 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 8732 GEN_INT (frame.to_allocate 8733 + frame.nsseregs * 16 8734 + frame.padding05), style); 8735 } 8736 8737 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++) 8738 if (!SSE_REGNO_P (regno) && ix86_save_reg (regno, false)) 8739 emit_insn ((*ix86_gen_pop1) (gen_rtx_REG (Pmode, regno))); 8740 8741 /* XXX: Needs adjustment for SSE regs? */ 8742 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx, 8743 GEN_INT (frame.nmsave_args * UNITS_PER_WORD 8744 + frame.padding0), style); 8745 if (frame_pointer_needed) 8746 { 8747 /* Leave results in shorter dependency chains on CPUs that are 8748 able to grok it fast. */ 8749 if (TARGET_USE_LEAVE) 8750 emit_insn ((*ix86_gen_leave) ()); 8751 else 8752 { 8753 /* For stack realigned really happens, recover stack 8754 pointer to hard frame pointer is a must, if not using 8755 leave. */ 8756 if (stack_realign_fp) 8757 pro_epilogue_adjust_stack (stack_pointer_rtx, 8758 hard_frame_pointer_rtx, 8759 const0_rtx, style); 8760 emit_insn ((*ix86_gen_pop1) (hard_frame_pointer_rtx)); 8761 } 8762 } 8763 } 8764 8765 if (crtl->drap_reg && crtl->stack_realign_needed) 8766 { 8767 int param_ptr_offset = (call_used_regs[REGNO (crtl->drap_reg)] 8768 ? 0 : UNITS_PER_WORD); 8769 gcc_assert (stack_realign_drap); 8770 emit_insn ((*ix86_gen_add3) (stack_pointer_rtx, 8771 crtl->drap_reg, 8772 GEN_INT (-(UNITS_PER_WORD 8773 + param_ptr_offset)))); 8774 if (!call_used_regs[REGNO (crtl->drap_reg)]) 8775 emit_insn ((*ix86_gen_pop1) (crtl->drap_reg)); 8776 8777 } 8778 8779 /* Sibcall epilogues don't want a return instruction. */ 8780 if (style == 0) 8781 return; 8782 8783 if (crtl->args.pops_args && crtl->args.size) 8784 { 8785 rtx popc = GEN_INT (crtl->args.pops_args); 8786 8787 /* i386 can only pop 64K bytes. If asked to pop more, pop 8788 return address, do explicit add, and jump indirectly to the 8789 caller. */ 8790 8791 if (crtl->args.pops_args >= 65536) 8792 { 8793 rtx ecx = gen_rtx_REG (SImode, CX_REG); 8794 8795 /* There is no "pascal" calling convention in any 64bit ABI. */ 8796 gcc_assert (!TARGET_64BIT); 8797 8798 emit_insn (gen_popsi1 (ecx)); 8799 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc)); 8800 emit_jump_insn (gen_return_indirect_internal (ecx)); 8801 } 8802 else 8803 emit_jump_insn (gen_return_pop_internal (popc)); 8804 } 8805 else 8806 emit_jump_insn (gen_return_internal ()); 8807 } 8808 8809 /* Reset from the function's potential modifications. */ 8810 8811 static void 8812 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED, 8813 HOST_WIDE_INT size ATTRIBUTE_UNUSED) 8814 { 8815 if (pic_offset_table_rtx) 8816 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM); 8817 #if TARGET_MACHO 8818 /* Mach-O doesn't support labels at the end of objects, so if 8819 it looks like we might want one, insert a NOP. */ 8820 { 8821 rtx insn = get_last_insn (); 8822 while (insn 8823 && NOTE_P (insn) 8824 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL) 8825 insn = PREV_INSN (insn); 8826 if (insn 8827 && (LABEL_P (insn) 8828 || (NOTE_P (insn) 8829 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL))) 8830 fputs ("\tnop\n", file); 8831 } 8832 #endif 8833 8834 } 8835 8836 /* Extract the parts of an RTL expression that is a valid memory address 8837 for an instruction. Return 0 if the structure of the address is 8838 grossly off. Return -1 if the address contains ASHIFT, so it is not 8839 strictly valid, but still used for computing length of lea instruction. */ 8840 8841 int 8842 ix86_decompose_address (rtx addr, struct ix86_address *out) 8843 { 8844 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX; 8845 rtx base_reg, index_reg; 8846 HOST_WIDE_INT scale = 1; 8847 rtx scale_rtx = NULL_RTX; 8848 int retval = 1; 8849 enum ix86_address_seg seg = SEG_DEFAULT; 8850 8851 if (REG_P (addr) || GET_CODE (addr) == SUBREG) 8852 base = addr; 8853 else if (GET_CODE (addr) == PLUS) 8854 { 8855 rtx addends[4], op; 8856 int n = 0, i; 8857 8858 op = addr; 8859 do 8860 { 8861 if (n >= 4) 8862 return 0; 8863 addends[n++] = XEXP (op, 1); 8864 op = XEXP (op, 0); 8865 } 8866 while (GET_CODE (op) == PLUS); 8867 if (n >= 4) 8868 return 0; 8869 addends[n] = op; 8870 8871 for (i = n; i >= 0; --i) 8872 { 8873 op = addends[i]; 8874 switch (GET_CODE (op)) 8875 { 8876 case MULT: 8877 if (index) 8878 return 0; 8879 index = XEXP (op, 0); 8880 scale_rtx = XEXP (op, 1); 8881 break; 8882 8883 case UNSPEC: 8884 if (XINT (op, 1) == UNSPEC_TP 8885 && TARGET_TLS_DIRECT_SEG_REFS 8886 && seg == SEG_DEFAULT) 8887 seg = TARGET_64BIT ? SEG_FS : SEG_GS; 8888 else 8889 return 0; 8890 break; 8891 8892 case REG: 8893 case SUBREG: 8894 if (!base) 8895 base = op; 8896 else if (!index) 8897 index = op; 8898 else 8899 return 0; 8900 break; 8901 8902 case CONST: 8903 case CONST_INT: 8904 case SYMBOL_REF: 8905 case LABEL_REF: 8906 if (disp) 8907 return 0; 8908 disp = op; 8909 break; 8910 8911 default: 8912 return 0; 8913 } 8914 } 8915 } 8916 else if (GET_CODE (addr) == MULT) 8917 { 8918 index = XEXP (addr, 0); /* index*scale */ 8919 scale_rtx = XEXP (addr, 1); 8920 } 8921 else if (GET_CODE (addr) == ASHIFT) 8922 { 8923 rtx tmp; 8924 8925 /* We're called for lea too, which implements ashift on occasion. */ 8926 index = XEXP (addr, 0); 8927 tmp = XEXP (addr, 1); 8928 if (!CONST_INT_P (tmp)) 8929 return 0; 8930 scale = INTVAL (tmp); 8931 if ((unsigned HOST_WIDE_INT) scale > 3) 8932 return 0; 8933 scale = 1 << scale; 8934 retval = -1; 8935 } 8936 else 8937 disp = addr; /* displacement */ 8938 8939 /* Extract the integral value of scale. */ 8940 if (scale_rtx) 8941 { 8942 if (!CONST_INT_P (scale_rtx)) 8943 return 0; 8944 scale = INTVAL (scale_rtx); 8945 } 8946 8947 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base; 8948 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index; 8949 8950 /* Allow arg pointer and stack pointer as index if there is not scaling. */ 8951 if (base_reg && index_reg && scale == 1 8952 && (index_reg == arg_pointer_rtx 8953 || index_reg == frame_pointer_rtx 8954 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM))) 8955 { 8956 rtx tmp; 8957 tmp = base, base = index, index = tmp; 8958 tmp = base_reg, base_reg = index_reg, index_reg = tmp; 8959 } 8960 8961 /* Special case: %ebp cannot be encoded as a base without a displacement. */ 8962 if ((base_reg == hard_frame_pointer_rtx 8963 || base_reg == frame_pointer_rtx 8964 || base_reg == arg_pointer_rtx) && !disp) 8965 disp = const0_rtx; 8966 8967 /* Special case: on K6, [%esi] makes the instruction vector decoded. 8968 Avoid this by transforming to [%esi+0]. 8969 Reload calls address legitimization without cfun defined, so we need 8970 to test cfun for being non-NULL. */ 8971 if (TARGET_K6 && cfun && optimize_function_for_speed_p (cfun) 8972 && base_reg && !index_reg && !disp 8973 && REG_P (base_reg) 8974 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG) 8975 disp = const0_rtx; 8976 8977 /* Special case: encode reg+reg instead of reg*2. */ 8978 if (!base && index && scale && scale == 2) 8979 base = index, base_reg = index_reg, scale = 1; 8980 8981 /* Special case: scaling cannot be encoded without base or displacement. */ 8982 if (!base && !disp && index && scale != 1) 8983 disp = const0_rtx; 8984 8985 out->base = base; 8986 out->index = index; 8987 out->disp = disp; 8988 out->scale = scale; 8989 out->seg = seg; 8990 8991 return retval; 8992 } 8993 8994 /* Return cost of the memory address x. 8995 For i386, it is better to use a complex address than let gcc copy 8996 the address into a reg and make a new pseudo. But not if the address 8997 requires to two regs - that would mean more pseudos with longer 8998 lifetimes. */ 8999 static int 9000 ix86_address_cost (rtx x, bool speed ATTRIBUTE_UNUSED) 9001 { 9002 struct ix86_address parts; 9003 int cost = 1; 9004 int ok = ix86_decompose_address (x, &parts); 9005 9006 gcc_assert (ok); 9007 9008 if (parts.base && GET_CODE (parts.base) == SUBREG) 9009 parts.base = SUBREG_REG (parts.base); 9010 if (parts.index && GET_CODE (parts.index) == SUBREG) 9011 parts.index = SUBREG_REG (parts.index); 9012 9013 /* Attempt to minimize number of registers in the address. */ 9014 if ((parts.base 9015 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)) 9016 || (parts.index 9017 && (!REG_P (parts.index) 9018 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER))) 9019 cost++; 9020 9021 if (parts.base 9022 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER) 9023 && parts.index 9024 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER) 9025 && parts.base != parts.index) 9026 cost++; 9027 9028 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b, 9029 since it's predecode logic can't detect the length of instructions 9030 and it degenerates to vector decoded. Increase cost of such 9031 addresses here. The penalty is minimally 2 cycles. It may be worthwhile 9032 to split such addresses or even refuse such addresses at all. 9033 9034 Following addressing modes are affected: 9035 [base+scale*index] 9036 [scale*index+disp] 9037 [base+index] 9038 9039 The first and last case may be avoidable by explicitly coding the zero in 9040 memory address, but I don't have AMD-K6 machine handy to check this 9041 theory. */ 9042 9043 if (TARGET_K6 9044 && ((!parts.disp && parts.base && parts.index && parts.scale != 1) 9045 || (parts.disp && !parts.base && parts.index && parts.scale != 1) 9046 || (!parts.disp && parts.base && parts.index && parts.scale == 1))) 9047 cost += 10; 9048 9049 return cost; 9050 } 9051 9052 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as 9053 this is used for to form addresses to local data when -fPIC is in 9054 use. */ 9055 9056 static bool 9057 darwin_local_data_pic (rtx disp) 9058 { 9059 return (GET_CODE (disp) == UNSPEC 9060 && XINT (disp, 1) == UNSPEC_MACHOPIC_OFFSET); 9061 } 9062 9063 /* Determine if a given RTX is a valid constant. We already know this 9064 satisfies CONSTANT_P. */ 9065 9066 bool 9067 legitimate_constant_p (rtx x) 9068 { 9069 switch (GET_CODE (x)) 9070 { 9071 case CONST: 9072 x = XEXP (x, 0); 9073 9074 if (GET_CODE (x) == PLUS) 9075 { 9076 if (!CONST_INT_P (XEXP (x, 1))) 9077 return false; 9078 x = XEXP (x, 0); 9079 } 9080 9081 if (TARGET_MACHO && darwin_local_data_pic (x)) 9082 return true; 9083 9084 /* Only some unspecs are valid as "constants". */ 9085 if (GET_CODE (x) == UNSPEC) 9086 switch (XINT (x, 1)) 9087 { 9088 case UNSPEC_GOT: 9089 case UNSPEC_GOTOFF: 9090 case UNSPEC_PLTOFF: 9091 return TARGET_64BIT; 9092 case UNSPEC_TPOFF: 9093 case UNSPEC_NTPOFF: 9094 x = XVECEXP (x, 0, 0); 9095 return (GET_CODE (x) == SYMBOL_REF 9096 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); 9097 case UNSPEC_DTPOFF: 9098 x = XVECEXP (x, 0, 0); 9099 return (GET_CODE (x) == SYMBOL_REF 9100 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC); 9101 default: 9102 return false; 9103 } 9104 9105 /* We must have drilled down to a symbol. */ 9106 if (GET_CODE (x) == LABEL_REF) 9107 return true; 9108 if (GET_CODE (x) != SYMBOL_REF) 9109 return false; 9110 /* FALLTHRU */ 9111 9112 case SYMBOL_REF: 9113 /* TLS symbols are never valid. */ 9114 if (SYMBOL_REF_TLS_MODEL (x)) 9115 return false; 9116 9117 /* DLLIMPORT symbols are never valid. */ 9118 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES 9119 && SYMBOL_REF_DLLIMPORT_P (x)) 9120 return false; 9121 break; 9122 9123 case CONST_DOUBLE: 9124 if (GET_MODE (x) == TImode 9125 && x != CONST0_RTX (TImode) 9126 && !TARGET_64BIT) 9127 return false; 9128 break; 9129 9130 case CONST_VECTOR: 9131 if (x == CONST0_RTX (GET_MODE (x))) 9132 return true; 9133 return false; 9134 9135 default: 9136 break; 9137 } 9138 9139 /* Otherwise we handle everything else in the move patterns. */ 9140 return true; 9141 } 9142 9143 /* Determine if it's legal to put X into the constant pool. This 9144 is not possible for the address of thread-local symbols, which 9145 is checked above. */ 9146 9147 static bool 9148 ix86_cannot_force_const_mem (rtx x) 9149 { 9150 /* We can always put integral constants and vectors in memory. */ 9151 switch (GET_CODE (x)) 9152 { 9153 case CONST_INT: 9154 case CONST_DOUBLE: 9155 case CONST_VECTOR: 9156 return false; 9157 9158 default: 9159 break; 9160 } 9161 return !legitimate_constant_p (x); 9162 } 9163 9164 /* Determine if a given RTX is a valid constant address. */ 9165 9166 bool 9167 constant_address_p (rtx x) 9168 { 9169 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1); 9170 } 9171 9172 /* Return number of arguments to be saved on the stack with 9173 -msave-args. */ 9174 9175 static int 9176 ix86_nsaved_args (void) 9177 { 9178 if (TARGET_SAVE_ARGS) 9179 return crtl->args.info.regno - cfun->returns_struct; 9180 else 9181 return 0; 9182 } 9183 9184 /* Nonzero if the constant value X is a legitimate general operand 9185 when generating PIC code. It is given that flag_pic is on and 9186 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */ 9187 bool 9188 legitimate_pic_operand_p (rtx x) 9189 { 9190 rtx inner; 9191 9192 switch (GET_CODE (x)) 9193 { 9194 case CONST: 9195 inner = XEXP (x, 0); 9196 if (GET_CODE (inner) == PLUS 9197 && CONST_INT_P (XEXP (inner, 1))) 9198 inner = XEXP (inner, 0); 9199 9200 /* Only some unspecs are valid as "constants". */ 9201 if (GET_CODE (inner) == UNSPEC) 9202 switch (XINT (inner, 1)) 9203 { 9204 case UNSPEC_GOT: 9205 case UNSPEC_GOTOFF: 9206 case UNSPEC_PLTOFF: 9207 return TARGET_64BIT; 9208 case UNSPEC_TPOFF: 9209 x = XVECEXP (inner, 0, 0); 9210 return (GET_CODE (x) == SYMBOL_REF 9211 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC); 9212 case UNSPEC_MACHOPIC_OFFSET: 9213 return legitimate_pic_address_disp_p (x); 9214 default: 9215 return false; 9216 } 9217 /* FALLTHRU */ 9218 9219 case SYMBOL_REF: 9220 case LABEL_REF: 9221 return legitimate_pic_address_disp_p (x); 9222 9223 default: 9224 return true; 9225 } 9226 } 9227 9228 /* Determine if a given CONST RTX is a valid memory displacement 9229 in PIC mode. */ 9230 9231 int 9232 legitimate_pic_address_disp_p (rtx disp) 9233 { 9234 bool saw_plus; 9235 9236 /* In 64bit mode we can allow direct addresses of symbols and labels 9237 when they are not dynamic symbols. */ 9238 if (TARGET_64BIT) 9239 { 9240 rtx op0 = disp, op1; 9241 9242 switch (GET_CODE (disp)) 9243 { 9244 case LABEL_REF: 9245 return true; 9246 9247 case CONST: 9248 if (GET_CODE (XEXP (disp, 0)) != PLUS) 9249 break; 9250 op0 = XEXP (XEXP (disp, 0), 0); 9251 op1 = XEXP (XEXP (disp, 0), 1); 9252 if (!CONST_INT_P (op1) 9253 || INTVAL (op1) >= 16*1024*1024 9254 || INTVAL (op1) < -16*1024*1024) 9255 break; 9256 if (GET_CODE (op0) == LABEL_REF) 9257 return true; 9258 if (GET_CODE (op0) != SYMBOL_REF) 9259 break; 9260 /* FALLTHRU */ 9261 9262 case SYMBOL_REF: 9263 /* TLS references should always be enclosed in UNSPEC. */ 9264 if (SYMBOL_REF_TLS_MODEL (op0)) 9265 return false; 9266 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0) 9267 && ix86_cmodel != CM_LARGE_PIC) 9268 return true; 9269 break; 9270 9271 default: 9272 break; 9273 } 9274 } 9275 if (GET_CODE (disp) != CONST) 9276 return 0; 9277 disp = XEXP (disp, 0); 9278 9279 if (TARGET_64BIT) 9280 { 9281 /* We are unsafe to allow PLUS expressions. This limit allowed distance 9282 of GOT tables. We should not need these anyway. */ 9283 if (GET_CODE (disp) != UNSPEC 9284 || (XINT (disp, 1) != UNSPEC_GOTPCREL 9285 && XINT (disp, 1) != UNSPEC_GOTOFF 9286 && XINT (disp, 1) != UNSPEC_PLTOFF)) 9287 return 0; 9288 9289 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF 9290 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF) 9291 return 0; 9292 return 1; 9293 } 9294 9295 saw_plus = false; 9296 if (GET_CODE (disp) == PLUS) 9297 { 9298 if (!CONST_INT_P (XEXP (disp, 1))) 9299 return 0; 9300 disp = XEXP (disp, 0); 9301 saw_plus = true; 9302 } 9303 9304 if (TARGET_MACHO && darwin_local_data_pic (disp)) 9305 return 1; 9306 9307 if (GET_CODE (disp) != UNSPEC) 9308 return 0; 9309 9310 switch (XINT (disp, 1)) 9311 { 9312 case UNSPEC_GOT: 9313 if (saw_plus) 9314 return false; 9315 /* We need to check for both symbols and labels because VxWorks loads 9316 text labels with @GOT rather than @GOTOFF. See gotoff_operand for 9317 details. */ 9318 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF 9319 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF); 9320 case UNSPEC_GOTOFF: 9321 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used. 9322 While ABI specify also 32bit relocation but we don't produce it in 9323 small PIC model at all. */ 9324 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF 9325 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF) 9326 && !TARGET_64BIT) 9327 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode); 9328 return false; 9329 case UNSPEC_GOTTPOFF: 9330 case UNSPEC_GOTNTPOFF: 9331 case UNSPEC_INDNTPOFF: 9332 if (saw_plus) 9333 return false; 9334 disp = XVECEXP (disp, 0, 0); 9335 return (GET_CODE (disp) == SYMBOL_REF 9336 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC); 9337 case UNSPEC_NTPOFF: 9338 disp = XVECEXP (disp, 0, 0); 9339 return (GET_CODE (disp) == SYMBOL_REF 9340 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC); 9341 case UNSPEC_DTPOFF: 9342 disp = XVECEXP (disp, 0, 0); 9343 return (GET_CODE (disp) == SYMBOL_REF 9344 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC); 9345 } 9346 9347 return 0; 9348 } 9349 9350 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid 9351 memory address for an instruction. The MODE argument is the machine mode 9352 for the MEM expression that wants to use this address. 9353 9354 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should 9355 convert common non-canonical forms to canonical form so that they will 9356 be recognized. */ 9357 9358 int 9359 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED, 9360 rtx addr, int strict) 9361 { 9362 struct ix86_address parts; 9363 rtx base, index, disp; 9364 HOST_WIDE_INT scale; 9365 const char *reason = NULL; 9366 rtx reason_rtx = NULL_RTX; 9367 9368 if (ix86_decompose_address (addr, &parts) <= 0) 9369 { 9370 reason = "decomposition failed"; 9371 goto report_error; 9372 } 9373 9374 base = parts.base; 9375 index = parts.index; 9376 disp = parts.disp; 9377 scale = parts.scale; 9378 9379 /* Validate base register. 9380 9381 Don't allow SUBREG's that span more than a word here. It can lead to spill 9382 failures when the base is one word out of a two word structure, which is 9383 represented internally as a DImode int. */ 9384 9385 if (base) 9386 { 9387 rtx reg; 9388 reason_rtx = base; 9389 9390 if (REG_P (base)) 9391 reg = base; 9392 else if (GET_CODE (base) == SUBREG 9393 && REG_P (SUBREG_REG (base)) 9394 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base))) 9395 <= UNITS_PER_WORD) 9396 reg = SUBREG_REG (base); 9397 else 9398 { 9399 reason = "base is not a register"; 9400 goto report_error; 9401 } 9402 9403 if (GET_MODE (base) != Pmode) 9404 { 9405 reason = "base is not in Pmode"; 9406 goto report_error; 9407 } 9408 9409 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg)) 9410 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg))) 9411 { 9412 reason = "base is not valid"; 9413 goto report_error; 9414 } 9415 } 9416 9417 /* Validate index register. 9418 9419 Don't allow SUBREG's that span more than a word here -- same as above. */ 9420 9421 if (index) 9422 { 9423 rtx reg; 9424 reason_rtx = index; 9425 9426 if (REG_P (index)) 9427 reg = index; 9428 else if (GET_CODE (index) == SUBREG 9429 && REG_P (SUBREG_REG (index)) 9430 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index))) 9431 <= UNITS_PER_WORD) 9432 reg = SUBREG_REG (index); 9433 else 9434 { 9435 reason = "index is not a register"; 9436 goto report_error; 9437 } 9438 9439 if (GET_MODE (index) != Pmode) 9440 { 9441 reason = "index is not in Pmode"; 9442 goto report_error; 9443 } 9444 9445 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg)) 9446 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg))) 9447 { 9448 reason = "index is not valid"; 9449 goto report_error; 9450 } 9451 } 9452 9453 /* Validate scale factor. */ 9454 if (scale != 1) 9455 { 9456 reason_rtx = GEN_INT (scale); 9457 if (!index) 9458 { 9459 reason = "scale without index"; 9460 goto report_error; 9461 } 9462 9463 if (scale != 2 && scale != 4 && scale != 8) 9464 { 9465 reason = "scale is not a valid multiplier"; 9466 goto report_error; 9467 } 9468 } 9469 9470 /* Validate displacement. */ 9471 if (disp) 9472 { 9473 reason_rtx = disp; 9474 9475 if (GET_CODE (disp) == CONST 9476 && GET_CODE (XEXP (disp, 0)) == UNSPEC 9477 && XINT (XEXP (disp, 0), 1) != UNSPEC_MACHOPIC_OFFSET) 9478 switch (XINT (XEXP (disp, 0), 1)) 9479 { 9480 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when 9481 used. While ABI specify also 32bit relocations, we don't produce 9482 them at all and use IP relative instead. */ 9483 case UNSPEC_GOT: 9484 case UNSPEC_GOTOFF: 9485 gcc_assert (flag_pic); 9486 if (!TARGET_64BIT) 9487 goto is_legitimate_pic; 9488 reason = "64bit address unspec"; 9489 goto report_error; 9490 9491 case UNSPEC_GOTPCREL: 9492 gcc_assert (flag_pic); 9493 goto is_legitimate_pic; 9494 9495 case UNSPEC_GOTTPOFF: 9496 case UNSPEC_GOTNTPOFF: 9497 case UNSPEC_INDNTPOFF: 9498 case UNSPEC_NTPOFF: 9499 case UNSPEC_DTPOFF: 9500 break; 9501 9502 default: 9503 reason = "invalid address unspec"; 9504 goto report_error; 9505 } 9506 9507 else if (SYMBOLIC_CONST (disp) 9508 && (flag_pic 9509 || (TARGET_MACHO 9510 #if TARGET_MACHO 9511 && MACHOPIC_INDIRECT 9512 && !machopic_operand_p (disp) 9513 #endif 9514 ))) 9515 { 9516 9517 is_legitimate_pic: 9518 if (TARGET_64BIT && (index || base)) 9519 { 9520 /* foo@dtpoff(%rX) is ok. */ 9521 if (GET_CODE (disp) != CONST 9522 || GET_CODE (XEXP (disp, 0)) != PLUS 9523 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC 9524 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1)) 9525 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF 9526 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF)) 9527 { 9528 reason = "non-constant pic memory reference"; 9529 goto report_error; 9530 } 9531 } 9532 else if (! legitimate_pic_address_disp_p (disp)) 9533 { 9534 reason = "displacement is an invalid pic construct"; 9535 goto report_error; 9536 } 9537 9538 /* This code used to verify that a symbolic pic displacement 9539 includes the pic_offset_table_rtx register. 9540 9541 While this is good idea, unfortunately these constructs may 9542 be created by "adds using lea" optimization for incorrect 9543 code like: 9544 9545 int a; 9546 int foo(int i) 9547 { 9548 return *(&a+i); 9549 } 9550 9551 This code is nonsensical, but results in addressing 9552 GOT table with pic_offset_table_rtx base. We can't 9553 just refuse it easily, since it gets matched by 9554 "addsi3" pattern, that later gets split to lea in the 9555 case output register differs from input. While this 9556 can be handled by separate addsi pattern for this case 9557 that never results in lea, this seems to be easier and 9558 correct fix for crash to disable this test. */ 9559 } 9560 else if (GET_CODE (disp) != LABEL_REF 9561 && !CONST_INT_P (disp) 9562 && (GET_CODE (disp) != CONST 9563 || !legitimate_constant_p (disp)) 9564 && (GET_CODE (disp) != SYMBOL_REF 9565 || !legitimate_constant_p (disp))) 9566 { 9567 reason = "displacement is not constant"; 9568 goto report_error; 9569 } 9570 else if (TARGET_64BIT 9571 && !x86_64_immediate_operand (disp, VOIDmode)) 9572 { 9573 reason = "displacement is out of range"; 9574 goto report_error; 9575 } 9576 } 9577 9578 /* Everything looks valid. */ 9579 return TRUE; 9580 9581 report_error: 9582 return FALSE; 9583 } 9584 9585 /* Return a unique alias set for the GOT. */ 9586 9587 static alias_set_type 9588 ix86_GOT_alias_set (void) 9589 { 9590 static alias_set_type set = -1; 9591 if (set == -1) 9592 set = new_alias_set (); 9593 return set; 9594 } 9595 9596 /* Return a legitimate reference for ORIG (an address) using the 9597 register REG. If REG is 0, a new pseudo is generated. 9598 9599 There are two types of references that must be handled: 9600 9601 1. Global data references must load the address from the GOT, via 9602 the PIC reg. An insn is emitted to do this load, and the reg is 9603 returned. 9604 9605 2. Static data references, constant pool addresses, and code labels 9606 compute the address as an offset from the GOT, whose base is in 9607 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to 9608 differentiate them from global data objects. The returned 9609 address is the PIC reg + an unspec constant. 9610 9611 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC 9612 reg also appears in the address. */ 9613 9614 static rtx 9615 legitimize_pic_address (rtx orig, rtx reg) 9616 { 9617 rtx addr = orig; 9618 rtx new_rtx = orig; 9619 rtx base; 9620 9621 #if TARGET_MACHO 9622 if (TARGET_MACHO && !TARGET_64BIT) 9623 { 9624 if (reg == 0) 9625 reg = gen_reg_rtx (Pmode); 9626 /* Use the generic Mach-O PIC machinery. */ 9627 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg); 9628 } 9629 #endif 9630 9631 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr)) 9632 new_rtx = addr; 9633 else if (TARGET_64BIT 9634 && ix86_cmodel != CM_SMALL_PIC 9635 && gotoff_operand (addr, Pmode)) 9636 { 9637 rtx tmpreg; 9638 /* This symbol may be referenced via a displacement from the PIC 9639 base address (@GOTOFF). */ 9640 9641 if (reload_in_progress) 9642 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 9643 if (GET_CODE (addr) == CONST) 9644 addr = XEXP (addr, 0); 9645 if (GET_CODE (addr) == PLUS) 9646 { 9647 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), 9648 UNSPEC_GOTOFF); 9649 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1)); 9650 } 9651 else 9652 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 9653 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 9654 if (!reg) 9655 tmpreg = gen_reg_rtx (Pmode); 9656 else 9657 tmpreg = reg; 9658 emit_move_insn (tmpreg, new_rtx); 9659 9660 if (reg != 0) 9661 { 9662 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx, 9663 tmpreg, 1, OPTAB_DIRECT); 9664 new_rtx = reg; 9665 } 9666 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg); 9667 } 9668 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode)) 9669 { 9670 /* This symbol may be referenced via a displacement from the PIC 9671 base address (@GOTOFF). */ 9672 9673 if (reload_in_progress) 9674 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 9675 if (GET_CODE (addr) == CONST) 9676 addr = XEXP (addr, 0); 9677 if (GET_CODE (addr) == PLUS) 9678 { 9679 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), 9680 UNSPEC_GOTOFF); 9681 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1)); 9682 } 9683 else 9684 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF); 9685 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 9686 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); 9687 9688 if (reg != 0) 9689 { 9690 emit_move_insn (reg, new_rtx); 9691 new_rtx = reg; 9692 } 9693 } 9694 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0) 9695 /* We can't use @GOTOFF for text labels on VxWorks; 9696 see gotoff_operand. */ 9697 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF)) 9698 { 9699 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) 9700 { 9701 if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (addr)) 9702 return legitimize_dllimport_symbol (addr, true); 9703 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS 9704 && GET_CODE (XEXP (XEXP (addr, 0), 0)) == SYMBOL_REF 9705 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (addr, 0), 0))) 9706 { 9707 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (addr, 0), 0), true); 9708 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (addr, 0), 1)); 9709 } 9710 } 9711 9712 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC) 9713 { 9714 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL); 9715 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 9716 new_rtx = gen_const_mem (Pmode, new_rtx); 9717 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); 9718 9719 if (reg == 0) 9720 reg = gen_reg_rtx (Pmode); 9721 /* Use directly gen_movsi, otherwise the address is loaded 9722 into register for CSE. We don't want to CSE this addresses, 9723 instead we CSE addresses from the GOT table, so skip this. */ 9724 emit_insn (gen_movsi (reg, new_rtx)); 9725 new_rtx = reg; 9726 } 9727 else 9728 { 9729 /* This symbol must be referenced via a load from the 9730 Global Offset Table (@GOT). */ 9731 9732 if (reload_in_progress) 9733 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 9734 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT); 9735 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 9736 if (TARGET_64BIT) 9737 new_rtx = force_reg (Pmode, new_rtx); 9738 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); 9739 new_rtx = gen_const_mem (Pmode, new_rtx); 9740 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ()); 9741 9742 if (reg == 0) 9743 reg = gen_reg_rtx (Pmode); 9744 emit_move_insn (reg, new_rtx); 9745 new_rtx = reg; 9746 } 9747 } 9748 else 9749 { 9750 if (CONST_INT_P (addr) 9751 && !x86_64_immediate_operand (addr, VOIDmode)) 9752 { 9753 if (reg) 9754 { 9755 emit_move_insn (reg, addr); 9756 new_rtx = reg; 9757 } 9758 else 9759 new_rtx = force_reg (Pmode, addr); 9760 } 9761 else if (GET_CODE (addr) == CONST) 9762 { 9763 addr = XEXP (addr, 0); 9764 9765 /* We must match stuff we generate before. Assume the only 9766 unspecs that can get here are ours. Not that we could do 9767 anything with them anyway.... */ 9768 if (GET_CODE (addr) == UNSPEC 9769 || (GET_CODE (addr) == PLUS 9770 && GET_CODE (XEXP (addr, 0)) == UNSPEC)) 9771 return orig; 9772 gcc_assert (GET_CODE (addr) == PLUS); 9773 } 9774 if (GET_CODE (addr) == PLUS) 9775 { 9776 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1); 9777 9778 /* Check first to see if this is a constant offset from a @GOTOFF 9779 symbol reference. */ 9780 if (gotoff_operand (op0, Pmode) 9781 && CONST_INT_P (op1)) 9782 { 9783 if (!TARGET_64BIT) 9784 { 9785 if (reload_in_progress) 9786 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 9787 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0), 9788 UNSPEC_GOTOFF); 9789 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1); 9790 new_rtx = gen_rtx_CONST (Pmode, new_rtx); 9791 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx); 9792 9793 if (reg != 0) 9794 { 9795 emit_move_insn (reg, new_rtx); 9796 new_rtx = reg; 9797 } 9798 } 9799 else 9800 { 9801 if (INTVAL (op1) < -16*1024*1024 9802 || INTVAL (op1) >= 16*1024*1024) 9803 { 9804 if (!x86_64_immediate_operand (op1, Pmode)) 9805 op1 = force_reg (Pmode, op1); 9806 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1); 9807 } 9808 } 9809 } 9810 else 9811 { 9812 base = legitimize_pic_address (XEXP (addr, 0), reg); 9813 new_rtx = legitimize_pic_address (XEXP (addr, 1), 9814 base == reg ? NULL_RTX : reg); 9815 9816 if (CONST_INT_P (new_rtx)) 9817 new_rtx = plus_constant (base, INTVAL (new_rtx)); 9818 else 9819 { 9820 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1))) 9821 { 9822 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0)); 9823 new_rtx = XEXP (new_rtx, 1); 9824 } 9825 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx); 9826 } 9827 } 9828 } 9829 } 9830 return new_rtx; 9831 } 9832 9833 /* Load the thread pointer. If TO_REG is true, force it into a register. */ 9834 9835 static rtx 9836 get_thread_pointer (int to_reg) 9837 { 9838 rtx tp, reg, insn; 9839 9840 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP); 9841 if (!to_reg) 9842 return tp; 9843 9844 reg = gen_reg_rtx (Pmode); 9845 insn = gen_rtx_SET (VOIDmode, reg, tp); 9846 insn = emit_insn (insn); 9847 9848 return reg; 9849 } 9850 9851 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is 9852 false if we expect this to be used for a memory address and true if 9853 we expect to load the address into a register. */ 9854 9855 static rtx 9856 legitimize_tls_address (rtx x, enum tls_model model, int for_mov) 9857 { 9858 rtx dest, base, off, pic, tp; 9859 int type; 9860 9861 switch (model) 9862 { 9863 case TLS_MODEL_GLOBAL_DYNAMIC: 9864 dest = gen_reg_rtx (Pmode); 9865 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; 9866 9867 if (TARGET_64BIT && ! TARGET_GNU2_TLS) 9868 { 9869 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns; 9870 9871 start_sequence (); 9872 emit_call_insn (gen_tls_global_dynamic_64 (rax, x)); 9873 insns = get_insns (); 9874 end_sequence (); 9875 9876 RTL_CONST_CALL_P (insns) = 1; 9877 emit_libcall_block (insns, dest, rax, x); 9878 } 9879 else if (TARGET_64BIT && TARGET_GNU2_TLS) 9880 emit_insn (gen_tls_global_dynamic_64 (dest, x)); 9881 else 9882 emit_insn (gen_tls_global_dynamic_32 (dest, x)); 9883 9884 if (TARGET_GNU2_TLS) 9885 { 9886 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest)); 9887 9888 set_unique_reg_note (get_last_insn (), REG_EQUIV, x); 9889 } 9890 break; 9891 9892 case TLS_MODEL_LOCAL_DYNAMIC: 9893 base = gen_reg_rtx (Pmode); 9894 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0; 9895 9896 if (TARGET_64BIT && ! TARGET_GNU2_TLS) 9897 { 9898 rtx rax = gen_rtx_REG (Pmode, AX_REG), insns, note; 9899 9900 start_sequence (); 9901 emit_call_insn (gen_tls_local_dynamic_base_64 (rax)); 9902 insns = get_insns (); 9903 end_sequence (); 9904 9905 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL); 9906 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note); 9907 RTL_CONST_CALL_P (insns) = 1; 9908 emit_libcall_block (insns, base, rax, note); 9909 } 9910 else if (TARGET_64BIT && TARGET_GNU2_TLS) 9911 emit_insn (gen_tls_local_dynamic_base_64 (base)); 9912 else 9913 emit_insn (gen_tls_local_dynamic_base_32 (base)); 9914 9915 if (TARGET_GNU2_TLS) 9916 { 9917 rtx x = ix86_tls_module_base (); 9918 9919 set_unique_reg_note (get_last_insn (), REG_EQUIV, 9920 gen_rtx_MINUS (Pmode, x, tp)); 9921 } 9922 9923 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF); 9924 off = gen_rtx_CONST (Pmode, off); 9925 9926 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off)); 9927 9928 if (TARGET_GNU2_TLS) 9929 { 9930 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp)); 9931 9932 set_unique_reg_note (get_last_insn (), REG_EQUIV, x); 9933 } 9934 9935 break; 9936 9937 case TLS_MODEL_INITIAL_EXEC: 9938 if (TARGET_64BIT) 9939 { 9940 pic = NULL; 9941 type = UNSPEC_GOTNTPOFF; 9942 } 9943 else if (flag_pic) 9944 { 9945 if (reload_in_progress) 9946 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true); 9947 pic = pic_offset_table_rtx; 9948 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF; 9949 } 9950 else if (!TARGET_ANY_GNU_TLS) 9951 { 9952 pic = gen_reg_rtx (Pmode); 9953 emit_insn (gen_set_got (pic)); 9954 type = UNSPEC_GOTTPOFF; 9955 } 9956 else 9957 { 9958 pic = NULL; 9959 type = UNSPEC_INDNTPOFF; 9960 } 9961 9962 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type); 9963 off = gen_rtx_CONST (Pmode, off); 9964 if (pic) 9965 off = gen_rtx_PLUS (Pmode, pic, off); 9966 off = gen_const_mem (Pmode, off); 9967 set_mem_alias_set (off, ix86_GOT_alias_set ()); 9968 9969 if (TARGET_64BIT || TARGET_ANY_GNU_TLS) 9970 { 9971 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 9972 off = force_reg (Pmode, off); 9973 return gen_rtx_PLUS (Pmode, base, off); 9974 } 9975 else 9976 { 9977 base = get_thread_pointer (true); 9978 dest = gen_reg_rtx (Pmode); 9979 emit_insn (gen_subsi3 (dest, base, off)); 9980 } 9981 break; 9982 9983 case TLS_MODEL_LOCAL_EXEC: 9984 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), 9985 (TARGET_64BIT || TARGET_ANY_GNU_TLS) 9986 ? UNSPEC_NTPOFF : UNSPEC_TPOFF); 9987 off = gen_rtx_CONST (Pmode, off); 9988 9989 if (TARGET_64BIT || TARGET_ANY_GNU_TLS) 9990 { 9991 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS); 9992 return gen_rtx_PLUS (Pmode, base, off); 9993 } 9994 else 9995 { 9996 base = get_thread_pointer (true); 9997 dest = gen_reg_rtx (Pmode); 9998 emit_insn (gen_subsi3 (dest, base, off)); 9999 } 10000 break; 10001 10002 default: 10003 gcc_unreachable (); 10004 } 10005 10006 return dest; 10007 } 10008 10009 /* Create or return the unique __imp_DECL dllimport symbol corresponding 10010 to symbol DECL. */ 10011 10012 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map))) 10013 htab_t dllimport_map; 10014 10015 static tree 10016 get_dllimport_decl (tree decl) 10017 { 10018 struct tree_map *h, in; 10019 void **loc; 10020 const char *name; 10021 const char *prefix; 10022 size_t namelen, prefixlen; 10023 char *imp_name; 10024 tree to; 10025 rtx rtl; 10026 10027 if (!dllimport_map) 10028 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0); 10029 10030 in.hash = htab_hash_pointer (decl); 10031 in.base.from = decl; 10032 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT); 10033 h = (struct tree_map *) *loc; 10034 if (h) 10035 return h->to; 10036 10037 *loc = h = GGC_NEW (struct tree_map); 10038 h->hash = in.hash; 10039 h->base.from = decl; 10040 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node); 10041 DECL_ARTIFICIAL (to) = 1; 10042 DECL_IGNORED_P (to) = 1; 10043 DECL_EXTERNAL (to) = 1; 10044 TREE_READONLY (to) = 1; 10045 10046 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl)); 10047 name = targetm.strip_name_encoding (name); 10048 prefix = name[0] == FASTCALL_PREFIX || user_label_prefix[0] == 0 10049 ? "*__imp_" : "*__imp__"; 10050 namelen = strlen (name); 10051 prefixlen = strlen (prefix); 10052 imp_name = (char *) alloca (namelen + prefixlen + 1); 10053 memcpy (imp_name, prefix, prefixlen); 10054 memcpy (imp_name + prefixlen, name, namelen + 1); 10055 10056 name = ggc_alloc_string (imp_name, namelen + prefixlen); 10057 rtl = gen_rtx_SYMBOL_REF (Pmode, name); 10058 SET_SYMBOL_REF_DECL (rtl, to); 10059 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL; 10060 10061 rtl = gen_const_mem (Pmode, rtl); 10062 set_mem_alias_set (rtl, ix86_GOT_alias_set ()); 10063 10064 SET_DECL_RTL (to, rtl); 10065 SET_DECL_ASSEMBLER_NAME (to, get_identifier (name)); 10066 10067 return to; 10068 } 10069 10070 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is 10071 true if we require the result be a register. */ 10072 10073 static rtx 10074 legitimize_dllimport_symbol (rtx symbol, bool want_reg) 10075 { 10076 tree imp_decl; 10077 rtx x; 10078 10079 gcc_assert (SYMBOL_REF_DECL (symbol)); 10080 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol)); 10081 10082 x = DECL_RTL (imp_decl); 10083 if (want_reg) 10084 x = force_reg (Pmode, x); 10085 return x; 10086 } 10087 10088 /* Try machine-dependent ways of modifying an illegitimate address 10089 to be legitimate. If we find one, return the new, valid address. 10090 This macro is used in only one place: `memory_address' in explow.c. 10091 10092 OLDX is the address as it was before break_out_memory_refs was called. 10093 In some cases it is useful to look at this to decide what needs to be done. 10094 10095 MODE and WIN are passed so that this macro can use 10096 GO_IF_LEGITIMATE_ADDRESS. 10097 10098 It is always safe for this macro to do nothing. It exists to recognize 10099 opportunities to optimize the output. 10100 10101 For the 80386, we handle X+REG by loading X into a register R and 10102 using R+REG. R will go in a general reg and indexing will be used. 10103 However, if REG is a broken-out memory address or multiplication, 10104 nothing needs to be done because REG can certainly go in a general reg. 10105 10106 When -fpic is used, special handling is needed for symbolic references. 10107 See comments by legitimize_pic_address in i386.c for details. */ 10108 10109 rtx 10110 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode) 10111 { 10112 int changed = 0; 10113 unsigned log; 10114 10115 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0; 10116 if (log) 10117 return legitimize_tls_address (x, (enum tls_model) log, false); 10118 if (GET_CODE (x) == CONST 10119 && GET_CODE (XEXP (x, 0)) == PLUS 10120 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF 10121 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)))) 10122 { 10123 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), 10124 (enum tls_model) log, false); 10125 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); 10126 } 10127 10128 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES) 10129 { 10130 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x)) 10131 return legitimize_dllimport_symbol (x, true); 10132 if (GET_CODE (x) == CONST 10133 && GET_CODE (XEXP (x, 0)) == PLUS 10134 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF 10135 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0))) 10136 { 10137 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true); 10138 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1)); 10139 } 10140 } 10141 10142 if (flag_pic && SYMBOLIC_CONST (x)) 10143 return legitimize_pic_address (x, 0); 10144 10145 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */ 10146 if (GET_CODE (x) == ASHIFT 10147 && CONST_INT_P (XEXP (x, 1)) 10148 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4) 10149 { 10150 changed = 1; 10151 log = INTVAL (XEXP (x, 1)); 10152 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)), 10153 GEN_INT (1 << log)); 10154 } 10155 10156 if (GET_CODE (x) == PLUS) 10157 { 10158 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */ 10159 10160 if (GET_CODE (XEXP (x, 0)) == ASHIFT 10161 && CONST_INT_P (XEXP (XEXP (x, 0), 1)) 10162 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4) 10163 { 10164 changed = 1; 10165 log = INTVAL (XEXP (XEXP (x, 0), 1)); 10166 XEXP (x, 0) = gen_rtx_MULT (Pmode, 10167 force_reg (Pmode, XEXP (XEXP (x, 0), 0)), 10168 GEN_INT (1 << log)); 10169 } 10170 10171 if (GET_CODE (XEXP (x, 1)) == ASHIFT 10172 && CONST_INT_P (XEXP (XEXP (x, 1), 1)) 10173 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4) 10174 { 10175 changed = 1; 10176 log = INTVAL (XEXP (XEXP (x, 1), 1)); 10177 XEXP (x, 1) = gen_rtx_MULT (Pmode, 10178 force_reg (Pmode, XEXP (XEXP (x, 1), 0)), 10179 GEN_INT (1 << log)); 10180 } 10181 10182 /* Put multiply first if it isn't already. */ 10183 if (GET_CODE (XEXP (x, 1)) == MULT) 10184 { 10185 rtx tmp = XEXP (x, 0); 10186 XEXP (x, 0) = XEXP (x, 1); 10187 XEXP (x, 1) = tmp; 10188 changed = 1; 10189 } 10190 10191 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const))) 10192 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be 10193 created by virtual register instantiation, register elimination, and 10194 similar optimizations. */ 10195 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS) 10196 { 10197 changed = 1; 10198 x = gen_rtx_PLUS (Pmode, 10199 gen_rtx_PLUS (Pmode, XEXP (x, 0), 10200 XEXP (XEXP (x, 1), 0)), 10201 XEXP (XEXP (x, 1), 1)); 10202 } 10203 10204 /* Canonicalize 10205 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const) 10206 into (plus (plus (mult (reg) (const)) (reg)) (const)). */ 10207 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS 10208 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 10209 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS 10210 && CONSTANT_P (XEXP (x, 1))) 10211 { 10212 rtx constant; 10213 rtx other = NULL_RTX; 10214 10215 if (CONST_INT_P (XEXP (x, 1))) 10216 { 10217 constant = XEXP (x, 1); 10218 other = XEXP (XEXP (XEXP (x, 0), 1), 1); 10219 } 10220 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1))) 10221 { 10222 constant = XEXP (XEXP (XEXP (x, 0), 1), 1); 10223 other = XEXP (x, 1); 10224 } 10225 else 10226 constant = 0; 10227 10228 if (constant) 10229 { 10230 changed = 1; 10231 x = gen_rtx_PLUS (Pmode, 10232 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0), 10233 XEXP (XEXP (XEXP (x, 0), 1), 0)), 10234 plus_constant (other, INTVAL (constant))); 10235 } 10236 } 10237 10238 if (changed && legitimate_address_p (mode, x, FALSE)) 10239 return x; 10240 10241 if (GET_CODE (XEXP (x, 0)) == MULT) 10242 { 10243 changed = 1; 10244 XEXP (x, 0) = force_operand (XEXP (x, 0), 0); 10245 } 10246 10247 if (GET_CODE (XEXP (x, 1)) == MULT) 10248 { 10249 changed = 1; 10250 XEXP (x, 1) = force_operand (XEXP (x, 1), 0); 10251 } 10252 10253 if (changed 10254 && REG_P (XEXP (x, 1)) 10255 && REG_P (XEXP (x, 0))) 10256 return x; 10257 10258 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1))) 10259 { 10260 changed = 1; 10261 x = legitimize_pic_address (x, 0); 10262 } 10263 10264 if (changed && legitimate_address_p (mode, x, FALSE)) 10265 return x; 10266 10267 if (REG_P (XEXP (x, 0))) 10268 { 10269 rtx temp = gen_reg_rtx (Pmode); 10270 rtx val = force_operand (XEXP (x, 1), temp); 10271 if (val != temp) 10272 emit_move_insn (temp, val); 10273 10274 XEXP (x, 1) = temp; 10275 return x; 10276 } 10277 10278 else if (REG_P (XEXP (x, 1))) 10279 { 10280 rtx temp = gen_reg_rtx (Pmode); 10281 rtx val = force_operand (XEXP (x, 0), temp); 10282 if (val != temp) 10283 emit_move_insn (temp, val); 10284 10285 XEXP (x, 0) = temp; 10286 return x; 10287 } 10288 } 10289 10290 return x; 10291 } 10292 10293 /* Print an integer constant expression in assembler syntax. Addition 10294 and subtraction are the only arithmetic that may appear in these 10295 expressions. FILE is the stdio stream to write to, X is the rtx, and 10296 CODE is the operand print code from the output string. */ 10297 10298 static void 10299 output_pic_addr_const (FILE *file, rtx x, int code) 10300 { 10301 char buf[256]; 10302 10303 switch (GET_CODE (x)) 10304 { 10305 case PC: 10306 gcc_assert (flag_pic); 10307 putc ('.', file); 10308 break; 10309 10310 case SYMBOL_REF: 10311 if (! TARGET_MACHO || TARGET_64BIT) 10312 output_addr_const (file, x); 10313 else 10314 { 10315 const char *name = XSTR (x, 0); 10316 10317 /* Mark the decl as referenced so that cgraph will 10318 output the function. */ 10319 if (SYMBOL_REF_DECL (x)) 10320 mark_decl_referenced (SYMBOL_REF_DECL (x)); 10321 10322 #if TARGET_MACHO 10323 if (MACHOPIC_INDIRECT 10324 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION) 10325 name = machopic_indirection_name (x, /*stub_p=*/true); 10326 #endif 10327 assemble_name (file, name); 10328 } 10329 if (!TARGET_MACHO && !(TARGET_64BIT && DEFAULT_ABI == MS_ABI) 10330 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x)) 10331 fputs ("@PLT", file); 10332 break; 10333 10334 case LABEL_REF: 10335 x = XEXP (x, 0); 10336 /* FALLTHRU */ 10337 case CODE_LABEL: 10338 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x)); 10339 assemble_name (asm_out_file, buf); 10340 break; 10341 10342 case CONST_INT: 10343 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 10344 break; 10345 10346 case CONST: 10347 /* This used to output parentheses around the expression, 10348 but that does not work on the 386 (either ATT or BSD assembler). */ 10349 output_pic_addr_const (file, XEXP (x, 0), code); 10350 break; 10351 10352 case CONST_DOUBLE: 10353 if (GET_MODE (x) == VOIDmode) 10354 { 10355 /* We can use %d if the number is <32 bits and positive. */ 10356 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0) 10357 fprintf (file, "0x%lx%08lx", 10358 (unsigned long) CONST_DOUBLE_HIGH (x), 10359 (unsigned long) CONST_DOUBLE_LOW (x)); 10360 else 10361 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x)); 10362 } 10363 else 10364 /* We can't handle floating point constants; 10365 PRINT_OPERAND must handle them. */ 10366 output_operand_lossage ("floating constant misused"); 10367 break; 10368 10369 case PLUS: 10370 /* Some assemblers need integer constants to appear first. */ 10371 if (CONST_INT_P (XEXP (x, 0))) 10372 { 10373 output_pic_addr_const (file, XEXP (x, 0), code); 10374 putc ('+', file); 10375 output_pic_addr_const (file, XEXP (x, 1), code); 10376 } 10377 else 10378 { 10379 gcc_assert (CONST_INT_P (XEXP (x, 1))); 10380 output_pic_addr_const (file, XEXP (x, 1), code); 10381 putc ('+', file); 10382 output_pic_addr_const (file, XEXP (x, 0), code); 10383 } 10384 break; 10385 10386 case MINUS: 10387 if (!TARGET_MACHO) 10388 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file); 10389 output_pic_addr_const (file, XEXP (x, 0), code); 10390 putc ('-', file); 10391 output_pic_addr_const (file, XEXP (x, 1), code); 10392 if (!TARGET_MACHO) 10393 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file); 10394 break; 10395 10396 case UNSPEC: 10397 gcc_assert (XVECLEN (x, 0) == 1); 10398 output_pic_addr_const (file, XVECEXP (x, 0, 0), code); 10399 switch (XINT (x, 1)) 10400 { 10401 case UNSPEC_GOT: 10402 fputs ("@GOT", file); 10403 break; 10404 case UNSPEC_GOTOFF: 10405 fputs ("@GOTOFF", file); 10406 break; 10407 case UNSPEC_PLTOFF: 10408 fputs ("@PLTOFF", file); 10409 break; 10410 case UNSPEC_GOTPCREL: 10411 fputs (ASSEMBLER_DIALECT == ASM_ATT ? 10412 "@GOTPCREL(%rip)" : "@GOTPCREL[rip]", file); 10413 break; 10414 case UNSPEC_GOTTPOFF: 10415 /* FIXME: This might be @TPOFF in Sun ld too. */ 10416 fputs ("@GOTTPOFF", file); 10417 break; 10418 case UNSPEC_TPOFF: 10419 fputs ("@TPOFF", file); 10420 break; 10421 case UNSPEC_NTPOFF: 10422 if (TARGET_64BIT) 10423 fputs ("@TPOFF", file); 10424 else 10425 fputs ("@NTPOFF", file); 10426 break; 10427 case UNSPEC_DTPOFF: 10428 fputs ("@DTPOFF", file); 10429 break; 10430 case UNSPEC_GOTNTPOFF: 10431 if (TARGET_64BIT) 10432 fputs (ASSEMBLER_DIALECT == ASM_ATT ? 10433 "@GOTTPOFF(%rip)": "@GOTTPOFF[rip]", file); 10434 else 10435 fputs ("@GOTNTPOFF", file); 10436 break; 10437 case UNSPEC_INDNTPOFF: 10438 fputs ("@INDNTPOFF", file); 10439 break; 10440 #if TARGET_MACHO 10441 case UNSPEC_MACHOPIC_OFFSET: 10442 putc ('-', file); 10443 machopic_output_function_base_name (file); 10444 break; 10445 #endif 10446 default: 10447 output_operand_lossage ("invalid UNSPEC as operand"); 10448 break; 10449 } 10450 break; 10451 10452 default: 10453 output_operand_lossage ("invalid expression as operand"); 10454 } 10455 } 10456 10457 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL. 10458 We need to emit DTP-relative relocations. */ 10459 10460 static void ATTRIBUTE_UNUSED 10461 i386_output_dwarf_dtprel (FILE *file, int size, rtx x) 10462 { 10463 fputs (ASM_LONG, file); 10464 output_addr_const (file, x); 10465 fputs ("@DTPOFF", file); 10466 switch (size) 10467 { 10468 case 4: 10469 break; 10470 case 8: 10471 fputs (", 0", file); 10472 break; 10473 default: 10474 gcc_unreachable (); 10475 } 10476 } 10477 10478 /* Return true if X is a representation of the PIC register. This copes 10479 with calls from ix86_find_base_term, where the register might have 10480 been replaced by a cselib value. */ 10481 10482 static bool 10483 ix86_pic_register_p (rtx x) 10484 { 10485 if (GET_CODE (x) == VALUE && CSELIB_VAL_PTR (x)) 10486 return (pic_offset_table_rtx 10487 && rtx_equal_for_cselib_p (x, pic_offset_table_rtx)); 10488 else 10489 return REG_P (x) && REGNO (x) == PIC_OFFSET_TABLE_REGNUM; 10490 } 10491 10492 /* In the name of slightly smaller debug output, and to cater to 10493 general assembler lossage, recognize PIC+GOTOFF and turn it back 10494 into a direct symbol reference. 10495 10496 On Darwin, this is necessary to avoid a crash, because Darwin 10497 has a different PIC label for each routine but the DWARF debugging 10498 information is not associated with any particular routine, so it's 10499 necessary to remove references to the PIC label from RTL stored by 10500 the DWARF output code. */ 10501 10502 static rtx 10503 ix86_delegitimize_address (rtx orig_x) 10504 { 10505 rtx x = orig_x; 10506 /* reg_addend is NULL or a multiple of some register. */ 10507 rtx reg_addend = NULL_RTX; 10508 /* const_addend is NULL or a const_int. */ 10509 rtx const_addend = NULL_RTX; 10510 /* This is the result, or NULL. */ 10511 rtx result = NULL_RTX; 10512 10513 if (MEM_P (x)) 10514 x = XEXP (x, 0); 10515 10516 if (TARGET_64BIT) 10517 { 10518 if (GET_CODE (x) != CONST 10519 || GET_CODE (XEXP (x, 0)) != UNSPEC 10520 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL 10521 || !MEM_P (orig_x)) 10522 return orig_x; 10523 return XVECEXP (XEXP (x, 0), 0, 0); 10524 } 10525 10526 if (GET_CODE (x) != PLUS 10527 || GET_CODE (XEXP (x, 1)) != CONST) 10528 return orig_x; 10529 10530 if (ix86_pic_register_p (XEXP (x, 0))) 10531 /* %ebx + GOT/GOTOFF */ 10532 ; 10533 else if (GET_CODE (XEXP (x, 0)) == PLUS) 10534 { 10535 /* %ebx + %reg * scale + GOT/GOTOFF */ 10536 reg_addend = XEXP (x, 0); 10537 if (ix86_pic_register_p (XEXP (reg_addend, 0))) 10538 reg_addend = XEXP (reg_addend, 1); 10539 else if (ix86_pic_register_p (XEXP (reg_addend, 1))) 10540 reg_addend = XEXP (reg_addend, 0); 10541 else 10542 return orig_x; 10543 if (!REG_P (reg_addend) 10544 && GET_CODE (reg_addend) != MULT 10545 && GET_CODE (reg_addend) != ASHIFT) 10546 return orig_x; 10547 } 10548 else 10549 return orig_x; 10550 10551 x = XEXP (XEXP (x, 1), 0); 10552 if (GET_CODE (x) == PLUS 10553 && CONST_INT_P (XEXP (x, 1))) 10554 { 10555 const_addend = XEXP (x, 1); 10556 x = XEXP (x, 0); 10557 } 10558 10559 if (GET_CODE (x) == UNSPEC 10560 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x)) 10561 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x)))) 10562 result = XVECEXP (x, 0, 0); 10563 10564 if (TARGET_MACHO && darwin_local_data_pic (x) 10565 && !MEM_P (orig_x)) 10566 result = XVECEXP (x, 0, 0); 10567 10568 if (! result) 10569 return orig_x; 10570 10571 if (const_addend) 10572 result = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, result, const_addend)); 10573 if (reg_addend) 10574 result = gen_rtx_PLUS (Pmode, reg_addend, result); 10575 return result; 10576 } 10577 10578 /* If X is a machine specific address (i.e. a symbol or label being 10579 referenced as a displacement from the GOT implemented using an 10580 UNSPEC), then return the base term. Otherwise return X. */ 10581 10582 rtx 10583 ix86_find_base_term (rtx x) 10584 { 10585 rtx term; 10586 10587 if (TARGET_64BIT) 10588 { 10589 if (GET_CODE (x) != CONST) 10590 return x; 10591 term = XEXP (x, 0); 10592 if (GET_CODE (term) == PLUS 10593 && (CONST_INT_P (XEXP (term, 1)) 10594 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE)) 10595 term = XEXP (term, 0); 10596 if (GET_CODE (term) != UNSPEC 10597 || XINT (term, 1) != UNSPEC_GOTPCREL) 10598 return x; 10599 10600 return XVECEXP (term, 0, 0); 10601 } 10602 10603 return ix86_delegitimize_address (x); 10604 } 10605 10606 static void 10607 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse, 10608 int fp, FILE *file) 10609 { 10610 const char *suffix; 10611 10612 if (mode == CCFPmode || mode == CCFPUmode) 10613 { 10614 enum rtx_code second_code, bypass_code; 10615 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code); 10616 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN); 10617 code = ix86_fp_compare_code_to_integer (code); 10618 mode = CCmode; 10619 } 10620 if (reverse) 10621 code = reverse_condition (code); 10622 10623 switch (code) 10624 { 10625 case EQ: 10626 switch (mode) 10627 { 10628 case CCAmode: 10629 suffix = "a"; 10630 break; 10631 10632 case CCCmode: 10633 suffix = "c"; 10634 break; 10635 10636 case CCOmode: 10637 suffix = "o"; 10638 break; 10639 10640 case CCSmode: 10641 suffix = "s"; 10642 break; 10643 10644 default: 10645 suffix = "e"; 10646 } 10647 break; 10648 case NE: 10649 switch (mode) 10650 { 10651 case CCAmode: 10652 suffix = "na"; 10653 break; 10654 10655 case CCCmode: 10656 suffix = "nc"; 10657 break; 10658 10659 case CCOmode: 10660 suffix = "no"; 10661 break; 10662 10663 case CCSmode: 10664 suffix = "ns"; 10665 break; 10666 10667 default: 10668 suffix = "ne"; 10669 } 10670 break; 10671 case GT: 10672 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode); 10673 suffix = "g"; 10674 break; 10675 case GTU: 10676 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers. 10677 Those same assemblers have the same but opposite lossage on cmov. */ 10678 if (mode == CCmode) 10679 suffix = fp ? "nbe" : "a"; 10680 else if (mode == CCCmode) 10681 suffix = "b"; 10682 else 10683 gcc_unreachable (); 10684 break; 10685 case LT: 10686 switch (mode) 10687 { 10688 case CCNOmode: 10689 case CCGOCmode: 10690 suffix = "s"; 10691 break; 10692 10693 case CCmode: 10694 case CCGCmode: 10695 suffix = "l"; 10696 break; 10697 10698 default: 10699 gcc_unreachable (); 10700 } 10701 break; 10702 case LTU: 10703 gcc_assert (mode == CCmode || mode == CCCmode); 10704 suffix = "b"; 10705 break; 10706 case GE: 10707 switch (mode) 10708 { 10709 case CCNOmode: 10710 case CCGOCmode: 10711 suffix = "ns"; 10712 break; 10713 10714 case CCmode: 10715 case CCGCmode: 10716 suffix = "ge"; 10717 break; 10718 10719 default: 10720 gcc_unreachable (); 10721 } 10722 break; 10723 case GEU: 10724 /* ??? As above. */ 10725 gcc_assert (mode == CCmode || mode == CCCmode); 10726 suffix = fp ? "nb" : "ae"; 10727 break; 10728 case LE: 10729 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode); 10730 suffix = "le"; 10731 break; 10732 case LEU: 10733 /* ??? As above. */ 10734 if (mode == CCmode) 10735 suffix = "be"; 10736 else if (mode == CCCmode) 10737 suffix = fp ? "nb" : "ae"; 10738 else 10739 gcc_unreachable (); 10740 break; 10741 case UNORDERED: 10742 suffix = fp ? "u" : "p"; 10743 break; 10744 case ORDERED: 10745 suffix = fp ? "nu" : "np"; 10746 break; 10747 default: 10748 gcc_unreachable (); 10749 } 10750 fputs (suffix, file); 10751 } 10752 10753 /* Print the name of register X to FILE based on its machine mode and number. 10754 If CODE is 'w', pretend the mode is HImode. 10755 If CODE is 'b', pretend the mode is QImode. 10756 If CODE is 'k', pretend the mode is SImode. 10757 If CODE is 'q', pretend the mode is DImode. 10758 If CODE is 'x', pretend the mode is V4SFmode. 10759 If CODE is 't', pretend the mode is V8SFmode. 10760 If CODE is 'h', pretend the reg is the 'high' byte register. 10761 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. 10762 If CODE is 'd', duplicate the operand for AVX instruction. 10763 */ 10764 10765 void 10766 print_reg (rtx x, int code, FILE *file) 10767 { 10768 const char *reg; 10769 bool duplicated = code == 'd' && TARGET_AVX; 10770 10771 gcc_assert (x == pc_rtx 10772 || (REGNO (x) != ARG_POINTER_REGNUM 10773 && REGNO (x) != FRAME_POINTER_REGNUM 10774 && REGNO (x) != FLAGS_REG 10775 && REGNO (x) != FPSR_REG 10776 && REGNO (x) != FPCR_REG)); 10777 10778 if (ASSEMBLER_DIALECT == ASM_ATT) 10779 putc ('%', file); 10780 10781 if (x == pc_rtx) 10782 { 10783 gcc_assert (TARGET_64BIT); 10784 fputs ("rip", file); 10785 return; 10786 } 10787 10788 if (code == 'w' || MMX_REG_P (x)) 10789 code = 2; 10790 else if (code == 'b') 10791 code = 1; 10792 else if (code == 'k') 10793 code = 4; 10794 else if (code == 'q') 10795 code = 8; 10796 else if (code == 'y') 10797 code = 3; 10798 else if (code == 'h') 10799 code = 0; 10800 else if (code == 'x') 10801 code = 16; 10802 else if (code == 't') 10803 code = 32; 10804 else 10805 code = GET_MODE_SIZE (GET_MODE (x)); 10806 10807 /* Irritatingly, AMD extended registers use different naming convention 10808 from the normal registers. */ 10809 if (REX_INT_REG_P (x)) 10810 { 10811 gcc_assert (TARGET_64BIT); 10812 switch (code) 10813 { 10814 case 0: 10815 error ("extended registers have no high halves"); 10816 break; 10817 case 1: 10818 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8); 10819 break; 10820 case 2: 10821 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8); 10822 break; 10823 case 4: 10824 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8); 10825 break; 10826 case 8: 10827 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8); 10828 break; 10829 default: 10830 error ("unsupported operand size for extended register"); 10831 break; 10832 } 10833 return; 10834 } 10835 10836 reg = NULL; 10837 switch (code) 10838 { 10839 case 3: 10840 if (STACK_TOP_P (x)) 10841 { 10842 reg = "st(0)"; 10843 break; 10844 } 10845 /* FALLTHRU */ 10846 case 8: 10847 case 4: 10848 case 12: 10849 if (! ANY_FP_REG_P (x)) 10850 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file); 10851 /* FALLTHRU */ 10852 case 16: 10853 case 2: 10854 normal: 10855 reg = hi_reg_name[REGNO (x)]; 10856 break; 10857 case 1: 10858 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name)) 10859 goto normal; 10860 reg = qi_reg_name[REGNO (x)]; 10861 break; 10862 case 0: 10863 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name)) 10864 goto normal; 10865 reg = qi_high_reg_name[REGNO (x)]; 10866 break; 10867 case 32: 10868 if (SSE_REG_P (x)) 10869 { 10870 gcc_assert (!duplicated); 10871 putc ('y', file); 10872 fputs (hi_reg_name[REGNO (x)] + 1, file); 10873 return; 10874 } 10875 break; 10876 default: 10877 gcc_unreachable (); 10878 } 10879 10880 fputs (reg, file); 10881 if (duplicated) 10882 { 10883 if (ASSEMBLER_DIALECT == ASM_ATT) 10884 fprintf (file, ", %%%s", reg); 10885 else 10886 fprintf (file, ", %s", reg); 10887 } 10888 } 10889 10890 /* Locate some local-dynamic symbol still in use by this function 10891 so that we can print its name in some tls_local_dynamic_base 10892 pattern. */ 10893 10894 static int 10895 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED) 10896 { 10897 rtx x = *px; 10898 10899 if (GET_CODE (x) == SYMBOL_REF 10900 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC) 10901 { 10902 cfun->machine->some_ld_name = XSTR (x, 0); 10903 return 1; 10904 } 10905 10906 return 0; 10907 } 10908 10909 static const char * 10910 get_some_local_dynamic_name (void) 10911 { 10912 rtx insn; 10913 10914 if (cfun->machine->some_ld_name) 10915 return cfun->machine->some_ld_name; 10916 10917 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn)) 10918 if (INSN_P (insn) 10919 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0)) 10920 return cfun->machine->some_ld_name; 10921 10922 gcc_unreachable (); 10923 } 10924 10925 /* Meaning of CODE: 10926 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand. 10927 C -- print opcode suffix for set/cmov insn. 10928 c -- like C, but print reversed condition 10929 F,f -- likewise, but for floating-point. 10930 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.", 10931 otherwise nothing 10932 R -- print the prefix for register names. 10933 z -- print the opcode suffix for the size of the current operand. 10934 * -- print a star (in certain assembler syntax) 10935 A -- print an absolute memory reference. 10936 w -- print the operand as if it's a "word" (HImode) even if it isn't. 10937 s -- print a shift double count, followed by the assemblers argument 10938 delimiter. 10939 b -- print the QImode name of the register for the indicated operand. 10940 %b0 would print %al if operands[0] is reg 0. 10941 w -- likewise, print the HImode name of the register. 10942 k -- likewise, print the SImode name of the register. 10943 q -- likewise, print the DImode name of the register. 10944 x -- likewise, print the V4SFmode name of the register. 10945 t -- likewise, print the V8SFmode name of the register. 10946 h -- print the QImode name for a "high" register, either ah, bh, ch or dh. 10947 y -- print "st(0)" instead of "st" as a register. 10948 d -- print duplicated register operand for AVX instruction. 10949 D -- print condition for SSE cmp instruction. 10950 P -- if PIC, print an @PLT suffix. 10951 X -- don't print any sort of PIC '@' suffix for a symbol. 10952 & -- print some in-use local-dynamic symbol name. 10953 H -- print a memory address offset by 8; used for sse high-parts 10954 Y -- print condition for SSE5 com* instruction. 10955 + -- print a branch hint as 'cs' or 'ds' prefix 10956 ; -- print a semicolon (after prefixes due to bug in older gas). 10957 */ 10958 10959 void 10960 print_operand (FILE *file, rtx x, int code) 10961 { 10962 if (code) 10963 { 10964 switch (code) 10965 { 10966 case '*': 10967 if (ASSEMBLER_DIALECT == ASM_ATT) 10968 putc ('*', file); 10969 return; 10970 10971 case '&': 10972 assemble_name (file, get_some_local_dynamic_name ()); 10973 return; 10974 10975 case 'A': 10976 switch (ASSEMBLER_DIALECT) 10977 { 10978 case ASM_ATT: 10979 putc ('*', file); 10980 break; 10981 10982 case ASM_INTEL: 10983 /* Intel syntax. For absolute addresses, registers should not 10984 be surrounded by braces. */ 10985 if (!REG_P (x)) 10986 { 10987 putc ('[', file); 10988 PRINT_OPERAND (file, x, 0); 10989 putc (']', file); 10990 return; 10991 } 10992 break; 10993 10994 default: 10995 gcc_unreachable (); 10996 } 10997 10998 PRINT_OPERAND (file, x, 0); 10999 return; 11000 11001 11002 case 'L': 11003 if (ASSEMBLER_DIALECT == ASM_ATT) 11004 putc ('l', file); 11005 return; 11006 11007 case 'W': 11008 if (ASSEMBLER_DIALECT == ASM_ATT) 11009 putc ('w', file); 11010 return; 11011 11012 case 'B': 11013 if (ASSEMBLER_DIALECT == ASM_ATT) 11014 putc ('b', file); 11015 return; 11016 11017 case 'Q': 11018 if (ASSEMBLER_DIALECT == ASM_ATT) 11019 putc ('l', file); 11020 return; 11021 11022 case 'S': 11023 if (ASSEMBLER_DIALECT == ASM_ATT) 11024 putc ('s', file); 11025 return; 11026 11027 case 'T': 11028 if (ASSEMBLER_DIALECT == ASM_ATT) 11029 putc ('t', file); 11030 return; 11031 11032 case 'z': 11033 /* 387 opcodes don't get size suffixes if the operands are 11034 registers. */ 11035 if (STACK_REG_P (x)) 11036 return; 11037 11038 /* Likewise if using Intel opcodes. */ 11039 if (ASSEMBLER_DIALECT == ASM_INTEL) 11040 return; 11041 11042 /* This is the size of op from size of operand. */ 11043 switch (GET_MODE_SIZE (GET_MODE (x))) 11044 { 11045 case 1: 11046 putc ('b', file); 11047 return; 11048 11049 case 2: 11050 if (MEM_P (x)) 11051 { 11052 #ifdef HAVE_GAS_FILDS_FISTS 11053 putc ('s', file); 11054 #endif 11055 return; 11056 } 11057 else 11058 putc ('w', file); 11059 return; 11060 11061 case 4: 11062 if (GET_MODE (x) == SFmode) 11063 { 11064 putc ('s', file); 11065 return; 11066 } 11067 else 11068 putc ('l', file); 11069 return; 11070 11071 case 12: 11072 case 16: 11073 putc ('t', file); 11074 return; 11075 11076 case 8: 11077 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT) 11078 { 11079 if (MEM_P (x)) 11080 { 11081 #ifdef GAS_MNEMONICS 11082 putc ('q', file); 11083 #else 11084 putc ('l', file); 11085 putc ('l', file); 11086 #endif 11087 } 11088 else 11089 putc ('q', file); 11090 } 11091 else 11092 putc ('l', file); 11093 return; 11094 11095 default: 11096 gcc_unreachable (); 11097 } 11098 11099 case 'd': 11100 case 'b': 11101 case 'w': 11102 case 'k': 11103 case 'q': 11104 case 'h': 11105 case 't': 11106 case 'y': 11107 case 'x': 11108 case 'X': 11109 case 'P': 11110 break; 11111 11112 case 's': 11113 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT) 11114 { 11115 PRINT_OPERAND (file, x, 0); 11116 fputs (", ", file); 11117 } 11118 return; 11119 11120 case 'D': 11121 /* Little bit of braindamage here. The SSE compare instructions 11122 does use completely different names for the comparisons that the 11123 fp conditional moves. */ 11124 if (TARGET_AVX) 11125 { 11126 switch (GET_CODE (x)) 11127 { 11128 case EQ: 11129 fputs ("eq", file); 11130 break; 11131 case UNEQ: 11132 fputs ("eq_us", file); 11133 break; 11134 case LT: 11135 fputs ("lt", file); 11136 break; 11137 case UNLT: 11138 fputs ("nge", file); 11139 break; 11140 case LE: 11141 fputs ("le", file); 11142 break; 11143 case UNLE: 11144 fputs ("ngt", file); 11145 break; 11146 case UNORDERED: 11147 fputs ("unord", file); 11148 break; 11149 case NE: 11150 fputs ("neq", file); 11151 break; 11152 case LTGT: 11153 fputs ("neq_oq", file); 11154 break; 11155 case GE: 11156 fputs ("ge", file); 11157 break; 11158 case UNGE: 11159 fputs ("nlt", file); 11160 break; 11161 case GT: 11162 fputs ("gt", file); 11163 break; 11164 case UNGT: 11165 fputs ("nle", file); 11166 break; 11167 case ORDERED: 11168 fputs ("ord", file); 11169 break; 11170 default: 11171 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'"); 11172 return; 11173 } 11174 } 11175 else 11176 { 11177 switch (GET_CODE (x)) 11178 { 11179 case EQ: 11180 case UNEQ: 11181 fputs ("eq", file); 11182 break; 11183 case LT: 11184 case UNLT: 11185 fputs ("lt", file); 11186 break; 11187 case LE: 11188 case UNLE: 11189 fputs ("le", file); 11190 break; 11191 case UNORDERED: 11192 fputs ("unord", file); 11193 break; 11194 case NE: 11195 case LTGT: 11196 fputs ("neq", file); 11197 break; 11198 case UNGE: 11199 case GE: 11200 fputs ("nlt", file); 11201 break; 11202 case UNGT: 11203 case GT: 11204 fputs ("nle", file); 11205 break; 11206 case ORDERED: 11207 fputs ("ord", file); 11208 break; 11209 default: 11210 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'"); 11211 return; 11212 } 11213 } 11214 return; 11215 case 'O': 11216 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 11217 if (ASSEMBLER_DIALECT == ASM_ATT) 11218 { 11219 switch (GET_MODE (x)) 11220 { 11221 case HImode: putc ('w', file); break; 11222 case SImode: 11223 case SFmode: putc ('l', file); break; 11224 case DImode: 11225 case DFmode: putc ('q', file); break; 11226 default: gcc_unreachable (); 11227 } 11228 putc ('.', file); 11229 } 11230 #endif 11231 return; 11232 case 'C': 11233 if (!COMPARISON_P (x)) 11234 { 11235 output_operand_lossage ("operand is neither a constant nor a " 11236 "condition code, invalid operand code " 11237 "'C'"); 11238 return; 11239 } 11240 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file); 11241 return; 11242 case 'F': 11243 if (!COMPARISON_P (x)) 11244 { 11245 output_operand_lossage ("operand is neither a constant nor a " 11246 "condition code, invalid operand code " 11247 "'F'"); 11248 return; 11249 } 11250 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 11251 if (ASSEMBLER_DIALECT == ASM_ATT) 11252 putc ('.', file); 11253 #endif 11254 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file); 11255 return; 11256 11257 /* Like above, but reverse condition */ 11258 case 'c': 11259 /* Check to see if argument to %c is really a constant 11260 and not a condition code which needs to be reversed. */ 11261 if (!COMPARISON_P (x)) 11262 { 11263 output_operand_lossage ("operand is neither a constant nor a " 11264 "condition code, invalid operand " 11265 "code 'c'"); 11266 return; 11267 } 11268 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file); 11269 return; 11270 case 'f': 11271 if (!COMPARISON_P (x)) 11272 { 11273 output_operand_lossage ("operand is neither a constant nor a " 11274 "condition code, invalid operand " 11275 "code 'f'"); 11276 return; 11277 } 11278 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX 11279 if (ASSEMBLER_DIALECT == ASM_ATT) 11280 putc ('.', file); 11281 #endif 11282 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file); 11283 return; 11284 11285 case 'H': 11286 /* It doesn't actually matter what mode we use here, as we're 11287 only going to use this for printing. */ 11288 x = adjust_address_nv (x, DImode, 8); 11289 break; 11290 11291 case '+': 11292 { 11293 rtx x; 11294 11295 if (!optimize 11296 || optimize_function_for_size_p (cfun) || !TARGET_BRANCH_PREDICTION_HINTS) 11297 return; 11298 11299 x = find_reg_note (current_output_insn, REG_BR_PROB, 0); 11300 if (x) 11301 { 11302 int pred_val = INTVAL (XEXP (x, 0)); 11303 11304 if (pred_val < REG_BR_PROB_BASE * 45 / 100 11305 || pred_val > REG_BR_PROB_BASE * 55 / 100) 11306 { 11307 int taken = pred_val > REG_BR_PROB_BASE / 2; 11308 int cputaken = final_forward_branch_p (current_output_insn) == 0; 11309 11310 /* Emit hints only in the case default branch prediction 11311 heuristics would fail. */ 11312 if (taken != cputaken) 11313 { 11314 /* We use 3e (DS) prefix for taken branches and 11315 2e (CS) prefix for not taken branches. */ 11316 if (taken) 11317 fputs ("ds ; ", file); 11318 else 11319 fputs ("cs ; ", file); 11320 } 11321 } 11322 } 11323 return; 11324 } 11325 11326 case 'Y': 11327 switch (GET_CODE (x)) 11328 { 11329 case NE: 11330 fputs ("neq", file); 11331 break; 11332 case EQ: 11333 fputs ("eq", file); 11334 break; 11335 case GE: 11336 case GEU: 11337 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "ge" : "unlt", file); 11338 break; 11339 case GT: 11340 case GTU: 11341 fputs (INTEGRAL_MODE_P (GET_MODE (x)) ? "gt" : "unle", file); 11342 break; 11343 case LE: 11344 case LEU: 11345 fputs ("le", file); 11346 break; 11347 case LT: 11348 case LTU: 11349 fputs ("lt", file); 11350 break; 11351 case UNORDERED: 11352 fputs ("unord", file); 11353 break; 11354 case ORDERED: 11355 fputs ("ord", file); 11356 break; 11357 case UNEQ: 11358 fputs ("ueq", file); 11359 break; 11360 case UNGE: 11361 fputs ("nlt", file); 11362 break; 11363 case UNGT: 11364 fputs ("nle", file); 11365 break; 11366 case UNLE: 11367 fputs ("ule", file); 11368 break; 11369 case UNLT: 11370 fputs ("ult", file); 11371 break; 11372 case LTGT: 11373 fputs ("une", file); 11374 break; 11375 default: 11376 output_operand_lossage ("operand is not a condition code, invalid operand code 'D'"); 11377 return; 11378 } 11379 return; 11380 11381 case ';': 11382 #if TARGET_MACHO 11383 fputs (" ; ", file); 11384 #else 11385 fputc (' ', file); 11386 #endif 11387 return; 11388 11389 default: 11390 output_operand_lossage ("invalid operand code '%c'", code); 11391 } 11392 } 11393 11394 if (REG_P (x)) 11395 print_reg (x, code, file); 11396 11397 else if (MEM_P (x)) 11398 { 11399 /* No `byte ptr' prefix for call instructions or BLKmode operands. */ 11400 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P' 11401 && GET_MODE (x) != BLKmode) 11402 { 11403 const char * size; 11404 switch (GET_MODE_SIZE (GET_MODE (x))) 11405 { 11406 case 1: size = "BYTE"; break; 11407 case 2: size = "WORD"; break; 11408 case 4: size = "DWORD"; break; 11409 case 8: size = "QWORD"; break; 11410 case 12: size = "TBYTE"; break; 11411 case 16: 11412 if (GET_MODE (x) == XFmode) 11413 size = "TBYTE"; 11414 else 11415 size = "XMMWORD"; 11416 break; 11417 case 32: size = "YMMWORD"; break; 11418 default: 11419 gcc_unreachable (); 11420 } 11421 11422 /* Check for explicit size override (codes 'b', 'w' and 'k') */ 11423 if (code == 'b') 11424 size = "BYTE"; 11425 else if (code == 'w') 11426 size = "WORD"; 11427 else if (code == 'k') 11428 size = "DWORD"; 11429 11430 fputs (size, file); 11431 fputs (" PTR ", file); 11432 } 11433 11434 x = XEXP (x, 0); 11435 /* Avoid (%rip) for call operands. */ 11436 if (CONSTANT_ADDRESS_P (x) && code == 'P' 11437 && !CONST_INT_P (x)) 11438 output_addr_const (file, x); 11439 else if (this_is_asm_operands && ! address_operand (x, VOIDmode)) 11440 output_operand_lossage ("invalid constraints for operand"); 11441 else 11442 output_address (x); 11443 } 11444 11445 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode) 11446 { 11447 REAL_VALUE_TYPE r; 11448 long l; 11449 11450 REAL_VALUE_FROM_CONST_DOUBLE (r, x); 11451 REAL_VALUE_TO_TARGET_SINGLE (r, l); 11452 11453 if (ASSEMBLER_DIALECT == ASM_ATT) 11454 putc ('$', file); 11455 fprintf (file, "0x%08lx", (long unsigned int) l); 11456 } 11457 11458 /* These float cases don't actually occur as immediate operands. */ 11459 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode) 11460 { 11461 char dstr[30]; 11462 11463 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 11464 fprintf (file, "%s", dstr); 11465 } 11466 11467 else if (GET_CODE (x) == CONST_DOUBLE 11468 && GET_MODE (x) == XFmode) 11469 { 11470 char dstr[30]; 11471 11472 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1); 11473 fprintf (file, "%s", dstr); 11474 } 11475 11476 else 11477 { 11478 /* We have patterns that allow zero sets of memory, for instance. 11479 In 64-bit mode, we should probably support all 8-byte vectors, 11480 since we can in fact encode that into an immediate. */ 11481 if (GET_CODE (x) == CONST_VECTOR) 11482 { 11483 gcc_assert (x == CONST0_RTX (GET_MODE (x))); 11484 x = const0_rtx; 11485 } 11486 11487 if (code != 'P') 11488 { 11489 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE) 11490 { 11491 if (ASSEMBLER_DIALECT == ASM_ATT) 11492 putc ('$', file); 11493 } 11494 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF 11495 || GET_CODE (x) == LABEL_REF) 11496 { 11497 if (ASSEMBLER_DIALECT == ASM_ATT) 11498 putc ('$', file); 11499 else 11500 fputs ("OFFSET FLAT:", file); 11501 } 11502 } 11503 if (CONST_INT_P (x)) 11504 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x)); 11505 else if (flag_pic) 11506 output_pic_addr_const (file, x, code); 11507 else 11508 output_addr_const (file, x); 11509 } 11510 } 11511 11512 /* Print a memory operand whose address is ADDR. */ 11513 11514 void 11515 print_operand_address (FILE *file, rtx addr) 11516 { 11517 struct ix86_address parts; 11518 rtx base, index, disp; 11519 int scale; 11520 int ok = ix86_decompose_address (addr, &parts); 11521 11522 gcc_assert (ok); 11523 11524 base = parts.base; 11525 index = parts.index; 11526 disp = parts.disp; 11527 scale = parts.scale; 11528 11529 switch (parts.seg) 11530 { 11531 case SEG_DEFAULT: 11532 break; 11533 case SEG_FS: 11534 case SEG_GS: 11535 if (ASSEMBLER_DIALECT == ASM_ATT) 11536 putc ('%', file); 11537 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file); 11538 break; 11539 default: 11540 gcc_unreachable (); 11541 } 11542 11543 /* Use one byte shorter RIP relative addressing for 64bit mode. */ 11544 if (TARGET_64BIT && !base && !index) 11545 { 11546 rtx symbol = disp; 11547 11548 if (GET_CODE (disp) == CONST 11549 && GET_CODE (XEXP (disp, 0)) == PLUS 11550 && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) 11551 symbol = XEXP (XEXP (disp, 0), 0); 11552 11553 if (GET_CODE (symbol) == LABEL_REF 11554 || (GET_CODE (symbol) == SYMBOL_REF 11555 && SYMBOL_REF_TLS_MODEL (symbol) == 0)) 11556 base = pc_rtx; 11557 } 11558 if (!base && !index) 11559 { 11560 /* Displacement only requires special attention. */ 11561 11562 if (CONST_INT_P (disp)) 11563 { 11564 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT) 11565 fputs ("ds:", file); 11566 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp)); 11567 } 11568 else if (flag_pic) 11569 output_pic_addr_const (file, disp, 0); 11570 else 11571 output_addr_const (file, disp); 11572 } 11573 else 11574 { 11575 if (ASSEMBLER_DIALECT == ASM_ATT) 11576 { 11577 if (disp) 11578 { 11579 if (flag_pic) 11580 output_pic_addr_const (file, disp, 0); 11581 else if (GET_CODE (disp) == LABEL_REF) 11582 output_asm_label (disp); 11583 else 11584 output_addr_const (file, disp); 11585 } 11586 11587 putc ('(', file); 11588 if (base) 11589 print_reg (base, 0, file); 11590 if (index) 11591 { 11592 putc (',', file); 11593 print_reg (index, 0, file); 11594 if (scale != 1) 11595 fprintf (file, ",%d", scale); 11596 } 11597 putc (')', file); 11598 } 11599 else 11600 { 11601 rtx offset = NULL_RTX; 11602 11603 if (disp) 11604 { 11605 /* Pull out the offset of a symbol; print any symbol itself. */ 11606 if (GET_CODE (disp) == CONST 11607 && GET_CODE (XEXP (disp, 0)) == PLUS 11608 && CONST_INT_P (XEXP (XEXP (disp, 0), 1))) 11609 { 11610 offset = XEXP (XEXP (disp, 0), 1); 11611 disp = gen_rtx_CONST (VOIDmode, 11612 XEXP (XEXP (disp, 0), 0)); 11613 } 11614 11615 if (flag_pic) 11616 output_pic_addr_const (file, disp, 0); 11617 else if (GET_CODE (disp) == LABEL_REF) 11618 output_asm_label (disp); 11619 else if (CONST_INT_P (disp)) 11620 offset = disp; 11621 else 11622 output_addr_const (file, disp); 11623 } 11624 11625 putc ('[', file); 11626 if (base) 11627 { 11628 print_reg (base, 0, file); 11629 if (offset) 11630 { 11631 if (INTVAL (offset) >= 0) 11632 putc ('+', file); 11633 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 11634 } 11635 } 11636 else if (offset) 11637 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset)); 11638 else 11639 putc ('0', file); 11640 11641 if (index) 11642 { 11643 putc ('+', file); 11644 print_reg (index, 0, file); 11645 if (scale != 1) 11646 fprintf (file, "*%d", scale); 11647 } 11648 putc (']', file); 11649 } 11650 } 11651 } 11652 11653 bool 11654 output_addr_const_extra (FILE *file, rtx x) 11655 { 11656 rtx op; 11657 11658 if (GET_CODE (x) != UNSPEC) 11659 return false; 11660 11661 op = XVECEXP (x, 0, 0); 11662 switch (XINT (x, 1)) 11663 { 11664 case UNSPEC_GOTTPOFF: 11665 output_addr_const (file, op); 11666 /* FIXME: This might be @TPOFF in Sun ld. */ 11667 fputs ("@GOTTPOFF", file); 11668 break; 11669 case UNSPEC_TPOFF: 11670 output_addr_const (file, op); 11671 fputs ("@TPOFF", file); 11672 break; 11673 case UNSPEC_NTPOFF: 11674 output_addr_const (file, op); 11675 if (TARGET_64BIT) 11676 fputs ("@TPOFF", file); 11677 else 11678 fputs ("@NTPOFF", file); 11679 break; 11680 case UNSPEC_DTPOFF: 11681 output_addr_const (file, op); 11682 fputs ("@DTPOFF", file); 11683 break; 11684 case UNSPEC_GOTNTPOFF: 11685 output_addr_const (file, op); 11686 if (TARGET_64BIT) 11687 fputs (ASSEMBLER_DIALECT == ASM_ATT ? 11688 "@GOTTPOFF(%rip)" : "@GOTTPOFF[rip]", file); 11689 else 11690 fputs ("@GOTNTPOFF", file); 11691 break; 11692 case UNSPEC_INDNTPOFF: 11693 output_addr_const (file, op); 11694 fputs ("@INDNTPOFF", file); 11695 break; 11696 #if TARGET_MACHO 11697 case UNSPEC_MACHOPIC_OFFSET: 11698 output_addr_const (file, op); 11699 putc ('-', file); 11700 machopic_output_function_base_name (file); 11701 break; 11702 #endif 11703 11704 default: 11705 return false; 11706 } 11707 11708 return true; 11709 } 11710 11711 /* Split one or more DImode RTL references into pairs of SImode 11712 references. The RTL can be REG, offsettable MEM, integer constant, or 11713 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 11714 split and "num" is its length. lo_half and hi_half are output arrays 11715 that parallel "operands". */ 11716 11717 void 11718 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) 11719 { 11720 while (num--) 11721 { 11722 rtx op = operands[num]; 11723 11724 /* simplify_subreg refuse to split volatile memory addresses, 11725 but we still have to handle it. */ 11726 if (MEM_P (op)) 11727 { 11728 lo_half[num] = adjust_address (op, SImode, 0); 11729 hi_half[num] = adjust_address (op, SImode, 4); 11730 } 11731 else 11732 { 11733 lo_half[num] = simplify_gen_subreg (SImode, op, 11734 GET_MODE (op) == VOIDmode 11735 ? DImode : GET_MODE (op), 0); 11736 hi_half[num] = simplify_gen_subreg (SImode, op, 11737 GET_MODE (op) == VOIDmode 11738 ? DImode : GET_MODE (op), 4); 11739 } 11740 } 11741 } 11742 /* Split one or more TImode RTL references into pairs of DImode 11743 references. The RTL can be REG, offsettable MEM, integer constant, or 11744 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to 11745 split and "num" is its length. lo_half and hi_half are output arrays 11746 that parallel "operands". */ 11747 11748 void 11749 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[]) 11750 { 11751 while (num--) 11752 { 11753 rtx op = operands[num]; 11754 11755 /* simplify_subreg refuse to split volatile memory addresses, but we 11756 still have to handle it. */ 11757 if (MEM_P (op)) 11758 { 11759 lo_half[num] = adjust_address (op, DImode, 0); 11760 hi_half[num] = adjust_address (op, DImode, 8); 11761 } 11762 else 11763 { 11764 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0); 11765 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8); 11766 } 11767 } 11768 } 11769 11770 /* Output code to perform a 387 binary operation in INSN, one of PLUS, 11771 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3] 11772 is the expression of the binary operation. The output may either be 11773 emitted here, or returned to the caller, like all output_* functions. 11774 11775 There is no guarantee that the operands are the same mode, as they 11776 might be within FLOAT or FLOAT_EXTEND expressions. */ 11777 11778 #ifndef SYSV386_COMPAT 11779 /* Set to 1 for compatibility with brain-damaged assemblers. No-one 11780 wants to fix the assemblers because that causes incompatibility 11781 with gcc. No-one wants to fix gcc because that causes 11782 incompatibility with assemblers... You can use the option of 11783 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */ 11784 #define SYSV386_COMPAT 1 11785 #endif 11786 11787 const char * 11788 output_387_binary_op (rtx insn, rtx *operands) 11789 { 11790 static char buf[40]; 11791 const char *p; 11792 const char *ssep; 11793 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]); 11794 11795 #ifdef ENABLE_CHECKING 11796 /* Even if we do not want to check the inputs, this documents input 11797 constraints. Which helps in understanding the following code. */ 11798 if (STACK_REG_P (operands[0]) 11799 && ((REG_P (operands[1]) 11800 && REGNO (operands[0]) == REGNO (operands[1]) 11801 && (STACK_REG_P (operands[2]) || MEM_P (operands[2]))) 11802 || (REG_P (operands[2]) 11803 && REGNO (operands[0]) == REGNO (operands[2]) 11804 && (STACK_REG_P (operands[1]) || MEM_P (operands[1])))) 11805 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2]))) 11806 ; /* ok */ 11807 else 11808 gcc_assert (is_sse); 11809 #endif 11810 11811 switch (GET_CODE (operands[3])) 11812 { 11813 case PLUS: 11814 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 11815 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 11816 p = "fiadd"; 11817 else 11818 p = "fadd"; 11819 ssep = "vadd"; 11820 break; 11821 11822 case MINUS: 11823 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 11824 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 11825 p = "fisub"; 11826 else 11827 p = "fsub"; 11828 ssep = "vsub"; 11829 break; 11830 11831 case MULT: 11832 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 11833 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 11834 p = "fimul"; 11835 else 11836 p = "fmul"; 11837 ssep = "vmul"; 11838 break; 11839 11840 case DIV: 11841 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT 11842 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT) 11843 p = "fidiv"; 11844 else 11845 p = "fdiv"; 11846 ssep = "vdiv"; 11847 break; 11848 11849 default: 11850 gcc_unreachable (); 11851 } 11852 11853 if (is_sse) 11854 { 11855 if (TARGET_AVX) 11856 { 11857 strcpy (buf, ssep); 11858 if (GET_MODE (operands[0]) == SFmode) 11859 strcat (buf, "ss\t{%2, %1, %0|%0, %1, %2}"); 11860 else 11861 strcat (buf, "sd\t{%2, %1, %0|%0, %1, %2}"); 11862 } 11863 else 11864 { 11865 strcpy (buf, ssep + 1); 11866 if (GET_MODE (operands[0]) == SFmode) 11867 strcat (buf, "ss\t{%2, %0|%0, %2}"); 11868 else 11869 strcat (buf, "sd\t{%2, %0|%0, %2}"); 11870 } 11871 return buf; 11872 } 11873 strcpy (buf, p); 11874 11875 switch (GET_CODE (operands[3])) 11876 { 11877 case MULT: 11878 case PLUS: 11879 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2])) 11880 { 11881 rtx temp = operands[2]; 11882 operands[2] = operands[1]; 11883 operands[1] = temp; 11884 } 11885 11886 /* know operands[0] == operands[1]. */ 11887 11888 if (MEM_P (operands[2])) 11889 { 11890 p = "%z2\t%2"; 11891 break; 11892 } 11893 11894 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 11895 { 11896 if (STACK_TOP_P (operands[0])) 11897 /* How is it that we are storing to a dead operand[2]? 11898 Well, presumably operands[1] is dead too. We can't 11899 store the result to st(0) as st(0) gets popped on this 11900 instruction. Instead store to operands[2] (which I 11901 think has to be st(1)). st(1) will be popped later. 11902 gcc <= 2.8.1 didn't have this check and generated 11903 assembly code that the Unixware assembler rejected. */ 11904 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 11905 else 11906 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 11907 break; 11908 } 11909 11910 if (STACK_TOP_P (operands[0])) 11911 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 11912 else 11913 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 11914 break; 11915 11916 case MINUS: 11917 case DIV: 11918 if (MEM_P (operands[1])) 11919 { 11920 p = "r%z1\t%1"; 11921 break; 11922 } 11923 11924 if (MEM_P (operands[2])) 11925 { 11926 p = "%z2\t%2"; 11927 break; 11928 } 11929 11930 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2]))) 11931 { 11932 #if SYSV386_COMPAT 11933 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T 11934 derived assemblers, confusingly reverse the direction of 11935 the operation for fsub{r} and fdiv{r} when the 11936 destination register is not st(0). The Intel assembler 11937 doesn't have this brain damage. Read !SYSV386_COMPAT to 11938 figure out what the hardware really does. */ 11939 if (STACK_TOP_P (operands[0])) 11940 p = "{p\t%0, %2|rp\t%2, %0}"; 11941 else 11942 p = "{rp\t%2, %0|p\t%0, %2}"; 11943 #else 11944 if (STACK_TOP_P (operands[0])) 11945 /* As above for fmul/fadd, we can't store to st(0). */ 11946 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */ 11947 else 11948 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */ 11949 #endif 11950 break; 11951 } 11952 11953 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 11954 { 11955 #if SYSV386_COMPAT 11956 if (STACK_TOP_P (operands[0])) 11957 p = "{rp\t%0, %1|p\t%1, %0}"; 11958 else 11959 p = "{p\t%1, %0|rp\t%0, %1}"; 11960 #else 11961 if (STACK_TOP_P (operands[0])) 11962 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */ 11963 else 11964 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */ 11965 #endif 11966 break; 11967 } 11968 11969 if (STACK_TOP_P (operands[0])) 11970 { 11971 if (STACK_TOP_P (operands[1])) 11972 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */ 11973 else 11974 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */ 11975 break; 11976 } 11977 else if (STACK_TOP_P (operands[1])) 11978 { 11979 #if SYSV386_COMPAT 11980 p = "{\t%1, %0|r\t%0, %1}"; 11981 #else 11982 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */ 11983 #endif 11984 } 11985 else 11986 { 11987 #if SYSV386_COMPAT 11988 p = "{r\t%2, %0|\t%0, %2}"; 11989 #else 11990 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */ 11991 #endif 11992 } 11993 break; 11994 11995 default: 11996 gcc_unreachable (); 11997 } 11998 11999 strcat (buf, p); 12000 return buf; 12001 } 12002 12003 /* Return needed mode for entity in optimize_mode_switching pass. */ 12004 12005 int 12006 ix86_mode_needed (int entity, rtx insn) 12007 { 12008 enum attr_i387_cw mode; 12009 12010 /* The mode UNINITIALIZED is used to store control word after a 12011 function call or ASM pattern. The mode ANY specify that function 12012 has no requirements on the control word and make no changes in the 12013 bits we are interested in. */ 12014 12015 if (CALL_P (insn) 12016 || (NONJUMP_INSN_P (insn) 12017 && (asm_noperands (PATTERN (insn)) >= 0 12018 || GET_CODE (PATTERN (insn)) == ASM_INPUT))) 12019 return I387_CW_UNINITIALIZED; 12020 12021 if (recog_memoized (insn) < 0) 12022 return I387_CW_ANY; 12023 12024 mode = get_attr_i387_cw (insn); 12025 12026 switch (entity) 12027 { 12028 case I387_TRUNC: 12029 if (mode == I387_CW_TRUNC) 12030 return mode; 12031 break; 12032 12033 case I387_FLOOR: 12034 if (mode == I387_CW_FLOOR) 12035 return mode; 12036 break; 12037 12038 case I387_CEIL: 12039 if (mode == I387_CW_CEIL) 12040 return mode; 12041 break; 12042 12043 case I387_MASK_PM: 12044 if (mode == I387_CW_MASK_PM) 12045 return mode; 12046 break; 12047 12048 default: 12049 gcc_unreachable (); 12050 } 12051 12052 return I387_CW_ANY; 12053 } 12054 12055 /* Output code to initialize control word copies used by trunc?f?i and 12056 rounding patterns. CURRENT_MODE is set to current control word, 12057 while NEW_MODE is set to new control word. */ 12058 12059 void 12060 emit_i387_cw_initialization (int mode) 12061 { 12062 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED); 12063 rtx new_mode; 12064 12065 enum ix86_stack_slot slot; 12066 12067 rtx reg = gen_reg_rtx (HImode); 12068 12069 emit_insn (gen_x86_fnstcw_1 (stored_mode)); 12070 emit_move_insn (reg, copy_rtx (stored_mode)); 12071 12072 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL 12073 || optimize_function_for_size_p (cfun)) 12074 { 12075 switch (mode) 12076 { 12077 case I387_CW_TRUNC: 12078 /* round toward zero (truncate) */ 12079 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00))); 12080 slot = SLOT_CW_TRUNC; 12081 break; 12082 12083 case I387_CW_FLOOR: 12084 /* round down toward -oo */ 12085 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); 12086 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400))); 12087 slot = SLOT_CW_FLOOR; 12088 break; 12089 12090 case I387_CW_CEIL: 12091 /* round up toward +oo */ 12092 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00))); 12093 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800))); 12094 slot = SLOT_CW_CEIL; 12095 break; 12096 12097 case I387_CW_MASK_PM: 12098 /* mask precision exception for nearbyint() */ 12099 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); 12100 slot = SLOT_CW_MASK_PM; 12101 break; 12102 12103 default: 12104 gcc_unreachable (); 12105 } 12106 } 12107 else 12108 { 12109 switch (mode) 12110 { 12111 case I387_CW_TRUNC: 12112 /* round toward zero (truncate) */ 12113 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc))); 12114 slot = SLOT_CW_TRUNC; 12115 break; 12116 12117 case I387_CW_FLOOR: 12118 /* round down toward -oo */ 12119 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4))); 12120 slot = SLOT_CW_FLOOR; 12121 break; 12122 12123 case I387_CW_CEIL: 12124 /* round up toward +oo */ 12125 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8))); 12126 slot = SLOT_CW_CEIL; 12127 break; 12128 12129 case I387_CW_MASK_PM: 12130 /* mask precision exception for nearbyint() */ 12131 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020))); 12132 slot = SLOT_CW_MASK_PM; 12133 break; 12134 12135 default: 12136 gcc_unreachable (); 12137 } 12138 } 12139 12140 gcc_assert (slot < MAX_386_STACK_LOCALS); 12141 12142 new_mode = assign_386_stack_local (HImode, slot); 12143 emit_move_insn (new_mode, reg); 12144 } 12145 12146 /* Output code for INSN to convert a float to a signed int. OPERANDS 12147 are the insn operands. The output may be [HSD]Imode and the input 12148 operand may be [SDX]Fmode. */ 12149 12150 const char * 12151 output_fix_trunc (rtx insn, rtx *operands, int fisttp) 12152 { 12153 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 12154 int dimode_p = GET_MODE (operands[0]) == DImode; 12155 int round_mode = get_attr_i387_cw (insn); 12156 12157 /* Jump through a hoop or two for DImode, since the hardware has no 12158 non-popping instruction. We used to do this a different way, but 12159 that was somewhat fragile and broke with post-reload splitters. */ 12160 if ((dimode_p || fisttp) && !stack_top_dies) 12161 output_asm_insn ("fld\t%y1", operands); 12162 12163 gcc_assert (STACK_TOP_P (operands[1])); 12164 gcc_assert (MEM_P (operands[0])); 12165 gcc_assert (GET_MODE (operands[1]) != TFmode); 12166 12167 if (fisttp) 12168 output_asm_insn ("fisttp%z0\t%0", operands); 12169 else 12170 { 12171 if (round_mode != I387_CW_ANY) 12172 output_asm_insn ("fldcw\t%3", operands); 12173 if (stack_top_dies || dimode_p) 12174 output_asm_insn ("fistp%z0\t%0", operands); 12175 else 12176 output_asm_insn ("fist%z0\t%0", operands); 12177 if (round_mode != I387_CW_ANY) 12178 output_asm_insn ("fldcw\t%2", operands); 12179 } 12180 12181 return ""; 12182 } 12183 12184 /* Output code for x87 ffreep insn. The OPNO argument, which may only 12185 have the values zero or one, indicates the ffreep insn's operand 12186 from the OPERANDS array. */ 12187 12188 static const char * 12189 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno) 12190 { 12191 if (TARGET_USE_FFREEP) 12192 #ifdef HAVE_AS_IX86_FFREEP 12193 return opno ? "ffreep\t%y1" : "ffreep\t%y0"; 12194 #else 12195 { 12196 static char retval[32]; 12197 int regno = REGNO (operands[opno]); 12198 12199 gcc_assert (FP_REGNO_P (regno)); 12200 12201 regno -= FIRST_STACK_REG; 12202 12203 snprintf (retval, sizeof (retval), ASM_SHORT "0xc%ddf", regno); 12204 return retval; 12205 } 12206 #endif 12207 12208 return opno ? "fstp\t%y1" : "fstp\t%y0"; 12209 } 12210 12211 12212 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi 12213 should be used. UNORDERED_P is true when fucom should be used. */ 12214 12215 const char * 12216 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p) 12217 { 12218 int stack_top_dies; 12219 rtx cmp_op0, cmp_op1; 12220 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]); 12221 12222 if (eflags_p) 12223 { 12224 cmp_op0 = operands[0]; 12225 cmp_op1 = operands[1]; 12226 } 12227 else 12228 { 12229 cmp_op0 = operands[1]; 12230 cmp_op1 = operands[2]; 12231 } 12232 12233 if (is_sse) 12234 { 12235 static const char ucomiss[] = "vucomiss\t{%1, %0|%0, %1}"; 12236 static const char ucomisd[] = "vucomisd\t{%1, %0|%0, %1}"; 12237 static const char comiss[] = "vcomiss\t{%1, %0|%0, %1}"; 12238 static const char comisd[] = "vcomisd\t{%1, %0|%0, %1}"; 12239 12240 if (GET_MODE (operands[0]) == SFmode) 12241 if (unordered_p) 12242 return &ucomiss[TARGET_AVX ? 0 : 1]; 12243 else 12244 return &comiss[TARGET_AVX ? 0 : 1]; 12245 else 12246 if (unordered_p) 12247 return &ucomisd[TARGET_AVX ? 0 : 1]; 12248 else 12249 return &comisd[TARGET_AVX ? 0 : 1]; 12250 } 12251 12252 gcc_assert (STACK_TOP_P (cmp_op0)); 12253 12254 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0; 12255 12256 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1))) 12257 { 12258 if (stack_top_dies) 12259 { 12260 output_asm_insn ("ftst\n\tfnstsw\t%0", operands); 12261 return output_387_ffreep (operands, 1); 12262 } 12263 else 12264 return "ftst\n\tfnstsw\t%0"; 12265 } 12266 12267 if (STACK_REG_P (cmp_op1) 12268 && stack_top_dies 12269 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1)) 12270 && REGNO (cmp_op1) != FIRST_STACK_REG) 12271 { 12272 /* If both the top of the 387 stack dies, and the other operand 12273 is also a stack register that dies, then this must be a 12274 `fcompp' float compare */ 12275 12276 if (eflags_p) 12277 { 12278 /* There is no double popping fcomi variant. Fortunately, 12279 eflags is immune from the fstp's cc clobbering. */ 12280 if (unordered_p) 12281 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands); 12282 else 12283 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands); 12284 return output_387_ffreep (operands, 0); 12285 } 12286 else 12287 { 12288 if (unordered_p) 12289 return "fucompp\n\tfnstsw\t%0"; 12290 else 12291 return "fcompp\n\tfnstsw\t%0"; 12292 } 12293 } 12294 else 12295 { 12296 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */ 12297 12298 static const char * const alt[16] = 12299 { 12300 "fcom%z2\t%y2\n\tfnstsw\t%0", 12301 "fcomp%z2\t%y2\n\tfnstsw\t%0", 12302 "fucom%z2\t%y2\n\tfnstsw\t%0", 12303 "fucomp%z2\t%y2\n\tfnstsw\t%0", 12304 12305 "ficom%z2\t%y2\n\tfnstsw\t%0", 12306 "ficomp%z2\t%y2\n\tfnstsw\t%0", 12307 NULL, 12308 NULL, 12309 12310 "fcomi\t{%y1, %0|%0, %y1}", 12311 "fcomip\t{%y1, %0|%0, %y1}", 12312 "fucomi\t{%y1, %0|%0, %y1}", 12313 "fucomip\t{%y1, %0|%0, %y1}", 12314 12315 NULL, 12316 NULL, 12317 NULL, 12318 NULL 12319 }; 12320 12321 int mask; 12322 const char *ret; 12323 12324 mask = eflags_p << 3; 12325 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2; 12326 mask |= unordered_p << 1; 12327 mask |= stack_top_dies; 12328 12329 gcc_assert (mask < 16); 12330 ret = alt[mask]; 12331 gcc_assert (ret); 12332 12333 return ret; 12334 } 12335 } 12336 12337 void 12338 ix86_output_addr_vec_elt (FILE *file, int value) 12339 { 12340 const char *directive = ASM_LONG; 12341 12342 #ifdef ASM_QUAD 12343 if (TARGET_64BIT) 12344 directive = ASM_QUAD; 12345 #else 12346 gcc_assert (!TARGET_64BIT); 12347 #endif 12348 12349 fprintf (file, "%s%s%d\n", directive, LPREFIX, value); 12350 } 12351 12352 void 12353 ix86_output_addr_diff_elt (FILE *file, int value, int rel) 12354 { 12355 const char *directive = ASM_LONG; 12356 12357 #ifdef ASM_QUAD 12358 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode) 12359 directive = ASM_QUAD; 12360 #else 12361 gcc_assert (!TARGET_64BIT); 12362 #endif 12363 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */ 12364 if (TARGET_64BIT || TARGET_VXWORKS_RTP) 12365 fprintf (file, "%s%s%d-%s%d\n", 12366 directive, LPREFIX, value, LPREFIX, rel); 12367 else if (HAVE_AS_GOTOFF_IN_DATA) 12368 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value); 12369 #if TARGET_MACHO 12370 else if (TARGET_MACHO) 12371 { 12372 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value); 12373 machopic_output_function_base_name (file); 12374 fprintf(file, "\n"); 12375 } 12376 #endif 12377 else 12378 asm_fprintf (file, "%s%U%s+[.-%s%d]\n", 12379 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value); 12380 } 12381 12382 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate 12383 for the target. */ 12384 12385 void 12386 ix86_expand_clear (rtx dest) 12387 { 12388 rtx tmp; 12389 12390 /* We play register width games, which are only valid after reload. */ 12391 gcc_assert (reload_completed); 12392 12393 /* Avoid HImode and its attendant prefix byte. */ 12394 if (GET_MODE_SIZE (GET_MODE (dest)) < 4) 12395 dest = gen_rtx_REG (SImode, REGNO (dest)); 12396 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx); 12397 12398 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */ 12399 if (reload_completed && (!TARGET_USE_MOV0 || optimize_insn_for_speed_p ())) 12400 { 12401 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 12402 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob)); 12403 } 12404 12405 emit_insn (tmp); 12406 } 12407 12408 /* X is an unchanging MEM. If it is a constant pool reference, return 12409 the constant pool rtx, else NULL. */ 12410 12411 rtx 12412 maybe_get_pool_constant (rtx x) 12413 { 12414 x = ix86_delegitimize_address (XEXP (x, 0)); 12415 12416 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x)) 12417 return get_pool_constant (x); 12418 12419 return NULL_RTX; 12420 } 12421 12422 void 12423 ix86_expand_move (enum machine_mode mode, rtx operands[]) 12424 { 12425 rtx op0, op1; 12426 enum tls_model model; 12427 12428 op0 = operands[0]; 12429 op1 = operands[1]; 12430 12431 if (GET_CODE (op1) == SYMBOL_REF) 12432 { 12433 model = SYMBOL_REF_TLS_MODEL (op1); 12434 if (model) 12435 { 12436 op1 = legitimize_tls_address (op1, model, true); 12437 op1 = force_operand (op1, op0); 12438 if (op1 == op0) 12439 return; 12440 } 12441 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES 12442 && SYMBOL_REF_DLLIMPORT_P (op1)) 12443 op1 = legitimize_dllimport_symbol (op1, false); 12444 } 12445 else if (GET_CODE (op1) == CONST 12446 && GET_CODE (XEXP (op1, 0)) == PLUS 12447 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF) 12448 { 12449 rtx addend = XEXP (XEXP (op1, 0), 1); 12450 rtx symbol = XEXP (XEXP (op1, 0), 0); 12451 rtx tmp = NULL; 12452 12453 model = SYMBOL_REF_TLS_MODEL (symbol); 12454 if (model) 12455 tmp = legitimize_tls_address (symbol, model, true); 12456 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES 12457 && SYMBOL_REF_DLLIMPORT_P (symbol)) 12458 tmp = legitimize_dllimport_symbol (symbol, true); 12459 12460 if (tmp) 12461 { 12462 tmp = force_operand (tmp, NULL); 12463 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend, 12464 op0, 1, OPTAB_DIRECT); 12465 if (tmp == op0) 12466 return; 12467 } 12468 } 12469 12470 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode)) 12471 { 12472 if (TARGET_MACHO && !TARGET_64BIT) 12473 { 12474 #if TARGET_MACHO 12475 if (MACHOPIC_PURE) 12476 { 12477 rtx temp = ((reload_in_progress 12478 || ((op0 && REG_P (op0)) 12479 && mode == Pmode)) 12480 ? op0 : gen_reg_rtx (Pmode)); 12481 op1 = machopic_indirect_data_reference (op1, temp); 12482 op1 = machopic_legitimize_pic_address (op1, mode, 12483 temp == op1 ? 0 : temp); 12484 } 12485 else if (MACHOPIC_INDIRECT) 12486 op1 = machopic_indirect_data_reference (op1, 0); 12487 if (op0 == op1) 12488 return; 12489 #endif 12490 } 12491 else 12492 { 12493 if (MEM_P (op0)) 12494 op1 = force_reg (Pmode, op1); 12495 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode)) 12496 { 12497 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX; 12498 op1 = legitimize_pic_address (op1, reg); 12499 if (op0 == op1) 12500 return; 12501 } 12502 } 12503 } 12504 else 12505 { 12506 if (MEM_P (op0) 12507 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode) 12508 || !push_operand (op0, mode)) 12509 && MEM_P (op1)) 12510 op1 = force_reg (mode, op1); 12511 12512 if (push_operand (op0, mode) 12513 && ! general_no_elim_operand (op1, mode)) 12514 op1 = copy_to_mode_reg (mode, op1); 12515 12516 /* Force large constants in 64bit compilation into register 12517 to get them CSEed. */ 12518 if (can_create_pseudo_p () 12519 && (mode == DImode) && TARGET_64BIT 12520 && immediate_operand (op1, mode) 12521 && !x86_64_zext_immediate_operand (op1, VOIDmode) 12522 && !register_operand (op0, mode) 12523 && optimize) 12524 op1 = copy_to_mode_reg (mode, op1); 12525 12526 if (can_create_pseudo_p () 12527 && FLOAT_MODE_P (mode) 12528 && GET_CODE (op1) == CONST_DOUBLE) 12529 { 12530 /* If we are loading a floating point constant to a register, 12531 force the value to memory now, since we'll get better code 12532 out the back end. */ 12533 12534 op1 = validize_mem (force_const_mem (mode, op1)); 12535 if (!register_operand (op0, mode)) 12536 { 12537 rtx temp = gen_reg_rtx (mode); 12538 emit_insn (gen_rtx_SET (VOIDmode, temp, op1)); 12539 emit_move_insn (op0, temp); 12540 return; 12541 } 12542 } 12543 } 12544 12545 emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); 12546 } 12547 12548 void 12549 ix86_expand_vector_move (enum machine_mode mode, rtx operands[]) 12550 { 12551 rtx op0 = operands[0], op1 = operands[1]; 12552 unsigned int align = GET_MODE_ALIGNMENT (mode); 12553 12554 /* Force constants other than zero into memory. We do not know how 12555 the instructions used to build constants modify the upper 64 bits 12556 of the register, once we have that information we may be able 12557 to handle some of them more efficiently. */ 12558 if (can_create_pseudo_p () 12559 && register_operand (op0, mode) 12560 && (CONSTANT_P (op1) 12561 || (GET_CODE (op1) == SUBREG 12562 && CONSTANT_P (SUBREG_REG (op1)))) 12563 && standard_sse_constant_p (op1) <= 0) 12564 op1 = validize_mem (force_const_mem (mode, op1)); 12565 12566 /* We need to check memory alignment for SSE mode since attribute 12567 can make operands unaligned. */ 12568 if (can_create_pseudo_p () 12569 && SSE_REG_MODE_P (mode) 12570 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align)) 12571 || (MEM_P (op1) && (MEM_ALIGN (op1) < align)))) 12572 { 12573 rtx tmp[2]; 12574 12575 /* ix86_expand_vector_move_misalign() does not like constants ... */ 12576 if (CONSTANT_P (op1) 12577 || (GET_CODE (op1) == SUBREG 12578 && CONSTANT_P (SUBREG_REG (op1)))) 12579 op1 = validize_mem (force_const_mem (mode, op1)); 12580 12581 /* ... nor both arguments in memory. */ 12582 if (!register_operand (op0, mode) 12583 && !register_operand (op1, mode)) 12584 op1 = force_reg (mode, op1); 12585 12586 tmp[0] = op0; tmp[1] = op1; 12587 ix86_expand_vector_move_misalign (mode, tmp); 12588 return; 12589 } 12590 12591 /* Make operand1 a register if it isn't already. */ 12592 if (can_create_pseudo_p () 12593 && !register_operand (op0, mode) 12594 && !register_operand (op1, mode)) 12595 { 12596 emit_move_insn (op0, force_reg (GET_MODE (op0), op1)); 12597 return; 12598 } 12599 12600 emit_insn (gen_rtx_SET (VOIDmode, op0, op1)); 12601 } 12602 12603 /* Implement the movmisalign patterns for SSE. Non-SSE modes go 12604 straight to ix86_expand_vector_move. */ 12605 /* Code generation for scalar reg-reg moves of single and double precision data: 12606 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true) 12607 movaps reg, reg 12608 else 12609 movss reg, reg 12610 if (x86_sse_partial_reg_dependency == true) 12611 movapd reg, reg 12612 else 12613 movsd reg, reg 12614 12615 Code generation for scalar loads of double precision data: 12616 if (x86_sse_split_regs == true) 12617 movlpd mem, reg (gas syntax) 12618 else 12619 movsd mem, reg 12620 12621 Code generation for unaligned packed loads of single precision data 12622 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency): 12623 if (x86_sse_unaligned_move_optimal) 12624 movups mem, reg 12625 12626 if (x86_sse_partial_reg_dependency == true) 12627 { 12628 xorps reg, reg 12629 movlps mem, reg 12630 movhps mem+8, reg 12631 } 12632 else 12633 { 12634 movlps mem, reg 12635 movhps mem+8, reg 12636 } 12637 12638 Code generation for unaligned packed loads of double precision data 12639 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs): 12640 if (x86_sse_unaligned_move_optimal) 12641 movupd mem, reg 12642 12643 if (x86_sse_split_regs == true) 12644 { 12645 movlpd mem, reg 12646 movhpd mem+8, reg 12647 } 12648 else 12649 { 12650 movsd mem, reg 12651 movhpd mem+8, reg 12652 } 12653 */ 12654 12655 void 12656 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[]) 12657 { 12658 rtx op0, op1, m; 12659 12660 op0 = operands[0]; 12661 op1 = operands[1]; 12662 12663 if (TARGET_AVX) 12664 { 12665 switch (GET_MODE_CLASS (mode)) 12666 { 12667 case MODE_VECTOR_INT: 12668 case MODE_INT: 12669 switch (GET_MODE_SIZE (mode)) 12670 { 12671 case 16: 12672 op0 = gen_lowpart (V16QImode, op0); 12673 op1 = gen_lowpart (V16QImode, op1); 12674 emit_insn (gen_avx_movdqu (op0, op1)); 12675 break; 12676 case 32: 12677 op0 = gen_lowpart (V32QImode, op0); 12678 op1 = gen_lowpart (V32QImode, op1); 12679 emit_insn (gen_avx_movdqu256 (op0, op1)); 12680 break; 12681 default: 12682 gcc_unreachable (); 12683 } 12684 break; 12685 case MODE_VECTOR_FLOAT: 12686 op0 = gen_lowpart (mode, op0); 12687 op1 = gen_lowpart (mode, op1); 12688 12689 switch (mode) 12690 { 12691 case V4SFmode: 12692 emit_insn (gen_avx_movups (op0, op1)); 12693 break; 12694 case V8SFmode: 12695 emit_insn (gen_avx_movups256 (op0, op1)); 12696 break; 12697 case V2DFmode: 12698 emit_insn (gen_avx_movupd (op0, op1)); 12699 break; 12700 case V4DFmode: 12701 emit_insn (gen_avx_movupd256 (op0, op1)); 12702 break; 12703 default: 12704 gcc_unreachable (); 12705 } 12706 break; 12707 12708 default: 12709 gcc_unreachable (); 12710 } 12711 12712 return; 12713 } 12714 12715 if (MEM_P (op1)) 12716 { 12717 /* If we're optimizing for size, movups is the smallest. */ 12718 if (optimize_insn_for_size_p ()) 12719 { 12720 op0 = gen_lowpart (V4SFmode, op0); 12721 op1 = gen_lowpart (V4SFmode, op1); 12722 emit_insn (gen_sse_movups (op0, op1)); 12723 return; 12724 } 12725 12726 /* ??? If we have typed data, then it would appear that using 12727 movdqu is the only way to get unaligned data loaded with 12728 integer type. */ 12729 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 12730 { 12731 op0 = gen_lowpart (V16QImode, op0); 12732 op1 = gen_lowpart (V16QImode, op1); 12733 emit_insn (gen_sse2_movdqu (op0, op1)); 12734 return; 12735 } 12736 12737 if (TARGET_SSE2 && mode == V2DFmode) 12738 { 12739 rtx zero; 12740 12741 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL) 12742 { 12743 op0 = gen_lowpart (V2DFmode, op0); 12744 op1 = gen_lowpart (V2DFmode, op1); 12745 emit_insn (gen_sse2_movupd (op0, op1)); 12746 return; 12747 } 12748 12749 /* When SSE registers are split into halves, we can avoid 12750 writing to the top half twice. */ 12751 if (TARGET_SSE_SPLIT_REGS) 12752 { 12753 emit_clobber (op0); 12754 zero = op0; 12755 } 12756 else 12757 { 12758 /* ??? Not sure about the best option for the Intel chips. 12759 The following would seem to satisfy; the register is 12760 entirely cleared, breaking the dependency chain. We 12761 then store to the upper half, with a dependency depth 12762 of one. A rumor has it that Intel recommends two movsd 12763 followed by an unpacklpd, but this is unconfirmed. And 12764 given that the dependency depth of the unpacklpd would 12765 still be one, I'm not sure why this would be better. */ 12766 zero = CONST0_RTX (V2DFmode); 12767 } 12768 12769 m = adjust_address (op1, DFmode, 0); 12770 emit_insn (gen_sse2_loadlpd (op0, zero, m)); 12771 m = adjust_address (op1, DFmode, 8); 12772 emit_insn (gen_sse2_loadhpd (op0, op0, m)); 12773 } 12774 else 12775 { 12776 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL) 12777 { 12778 op0 = gen_lowpart (V4SFmode, op0); 12779 op1 = gen_lowpart (V4SFmode, op1); 12780 emit_insn (gen_sse_movups (op0, op1)); 12781 return; 12782 } 12783 12784 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY) 12785 emit_move_insn (op0, CONST0_RTX (mode)); 12786 else 12787 emit_clobber (op0); 12788 12789 if (mode != V4SFmode) 12790 op0 = gen_lowpart (V4SFmode, op0); 12791 m = adjust_address (op1, V2SFmode, 0); 12792 emit_insn (gen_sse_loadlps (op0, op0, m)); 12793 m = adjust_address (op1, V2SFmode, 8); 12794 emit_insn (gen_sse_loadhps (op0, op0, m)); 12795 } 12796 } 12797 else if (MEM_P (op0)) 12798 { 12799 /* If we're optimizing for size, movups is the smallest. */ 12800 if (optimize_insn_for_size_p ()) 12801 { 12802 op0 = gen_lowpart (V4SFmode, op0); 12803 op1 = gen_lowpart (V4SFmode, op1); 12804 emit_insn (gen_sse_movups (op0, op1)); 12805 return; 12806 } 12807 12808 /* ??? Similar to above, only less clear because of quote 12809 typeless stores unquote. */ 12810 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES 12811 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT) 12812 { 12813 op0 = gen_lowpart (V16QImode, op0); 12814 op1 = gen_lowpart (V16QImode, op1); 12815 emit_insn (gen_sse2_movdqu (op0, op1)); 12816 return; 12817 } 12818 12819 if (TARGET_SSE2 && mode == V2DFmode) 12820 { 12821 m = adjust_address (op0, DFmode, 0); 12822 emit_insn (gen_sse2_storelpd (m, op1)); 12823 m = adjust_address (op0, DFmode, 8); 12824 emit_insn (gen_sse2_storehpd (m, op1)); 12825 } 12826 else 12827 { 12828 if (mode != V4SFmode) 12829 op1 = gen_lowpart (V4SFmode, op1); 12830 m = adjust_address (op0, V2SFmode, 0); 12831 emit_insn (gen_sse_storelps (m, op1)); 12832 m = adjust_address (op0, V2SFmode, 8); 12833 emit_insn (gen_sse_storehps (m, op1)); 12834 } 12835 } 12836 else 12837 gcc_unreachable (); 12838 } 12839 12840 /* Expand a push in MODE. This is some mode for which we do not support 12841 proper push instructions, at least from the registers that we expect 12842 the value to live in. */ 12843 12844 void 12845 ix86_expand_push (enum machine_mode mode, rtx x) 12846 { 12847 rtx tmp; 12848 12849 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx, 12850 GEN_INT (-GET_MODE_SIZE (mode)), 12851 stack_pointer_rtx, 1, OPTAB_DIRECT); 12852 if (tmp != stack_pointer_rtx) 12853 emit_move_insn (stack_pointer_rtx, tmp); 12854 12855 tmp = gen_rtx_MEM (mode, stack_pointer_rtx); 12856 12857 /* When we push an operand onto stack, it has to be aligned at least 12858 at the function argument boundary. However since we don't have 12859 the argument type, we can't determine the actual argument 12860 boundary. */ 12861 emit_move_insn (tmp, x); 12862 } 12863 12864 /* Helper function of ix86_fixup_binary_operands to canonicalize 12865 operand order. Returns true if the operands should be swapped. */ 12866 12867 static bool 12868 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode, 12869 rtx operands[]) 12870 { 12871 rtx dst = operands[0]; 12872 rtx src1 = operands[1]; 12873 rtx src2 = operands[2]; 12874 12875 /* If the operation is not commutative, we can't do anything. */ 12876 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH) 12877 return false; 12878 12879 /* Highest priority is that src1 should match dst. */ 12880 if (rtx_equal_p (dst, src1)) 12881 return false; 12882 if (rtx_equal_p (dst, src2)) 12883 return true; 12884 12885 /* Next highest priority is that immediate constants come second. */ 12886 if (immediate_operand (src2, mode)) 12887 return false; 12888 if (immediate_operand (src1, mode)) 12889 return true; 12890 12891 /* Lowest priority is that memory references should come second. */ 12892 if (MEM_P (src2)) 12893 return false; 12894 if (MEM_P (src1)) 12895 return true; 12896 12897 return false; 12898 } 12899 12900 12901 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the 12902 destination to use for the operation. If different from the true 12903 destination in operands[0], a copy operation will be required. */ 12904 12905 rtx 12906 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode, 12907 rtx operands[]) 12908 { 12909 rtx dst = operands[0]; 12910 rtx src1 = operands[1]; 12911 rtx src2 = operands[2]; 12912 12913 /* Canonicalize operand order. */ 12914 if (ix86_swap_binary_operands_p (code, mode, operands)) 12915 { 12916 rtx temp; 12917 12918 /* It is invalid to swap operands of different modes. */ 12919 gcc_assert (GET_MODE (src1) == GET_MODE (src2)); 12920 12921 temp = src1; 12922 src1 = src2; 12923 src2 = temp; 12924 } 12925 12926 /* Both source operands cannot be in memory. */ 12927 if (MEM_P (src1) && MEM_P (src2)) 12928 { 12929 /* Optimization: Only read from memory once. */ 12930 if (rtx_equal_p (src1, src2)) 12931 { 12932 src2 = force_reg (mode, src2); 12933 src1 = src2; 12934 } 12935 else 12936 src2 = force_reg (mode, src2); 12937 } 12938 12939 /* If the destination is memory, and we do not have matching source 12940 operands, do things in registers. */ 12941 if (MEM_P (dst) && !rtx_equal_p (dst, src1)) 12942 dst = gen_reg_rtx (mode); 12943 12944 /* Source 1 cannot be a constant. */ 12945 if (CONSTANT_P (src1)) 12946 src1 = force_reg (mode, src1); 12947 12948 /* Source 1 cannot be a non-matching memory. */ 12949 if (MEM_P (src1) && !rtx_equal_p (dst, src1)) 12950 src1 = force_reg (mode, src1); 12951 12952 operands[1] = src1; 12953 operands[2] = src2; 12954 return dst; 12955 } 12956 12957 /* Similarly, but assume that the destination has already been 12958 set up properly. */ 12959 12960 void 12961 ix86_fixup_binary_operands_no_copy (enum rtx_code code, 12962 enum machine_mode mode, rtx operands[]) 12963 { 12964 rtx dst = ix86_fixup_binary_operands (code, mode, operands); 12965 gcc_assert (dst == operands[0]); 12966 } 12967 12968 /* Attempt to expand a binary operator. Make the expansion closer to the 12969 actual machine, then just general_operand, which will allow 3 separate 12970 memory references (one output, two input) in a single insn. */ 12971 12972 void 12973 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode, 12974 rtx operands[]) 12975 { 12976 rtx src1, src2, dst, op, clob; 12977 12978 dst = ix86_fixup_binary_operands (code, mode, operands); 12979 src1 = operands[1]; 12980 src2 = operands[2]; 12981 12982 /* Emit the instruction. */ 12983 12984 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2)); 12985 if (reload_in_progress) 12986 { 12987 /* Reload doesn't know about the flags register, and doesn't know that 12988 it doesn't want to clobber it. We can only do this with PLUS. */ 12989 gcc_assert (code == PLUS); 12990 emit_insn (op); 12991 } 12992 else 12993 { 12994 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 12995 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 12996 } 12997 12998 /* Fix up the destination if needed. */ 12999 if (dst != operands[0]) 13000 emit_move_insn (operands[0], dst); 13001 } 13002 13003 /* Return TRUE or FALSE depending on whether the binary operator meets the 13004 appropriate constraints. */ 13005 13006 int 13007 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode, 13008 rtx operands[3]) 13009 { 13010 rtx dst = operands[0]; 13011 rtx src1 = operands[1]; 13012 rtx src2 = operands[2]; 13013 13014 /* Both source operands cannot be in memory. */ 13015 if (MEM_P (src1) && MEM_P (src2)) 13016 return 0; 13017 13018 /* Canonicalize operand order for commutative operators. */ 13019 if (ix86_swap_binary_operands_p (code, mode, operands)) 13020 { 13021 rtx temp = src1; 13022 src1 = src2; 13023 src2 = temp; 13024 } 13025 13026 /* If the destination is memory, we must have a matching source operand. */ 13027 if (MEM_P (dst) && !rtx_equal_p (dst, src1)) 13028 return 0; 13029 13030 /* Source 1 cannot be a constant. */ 13031 if (CONSTANT_P (src1)) 13032 return 0; 13033 13034 /* Source 1 cannot be a non-matching memory. */ 13035 if (MEM_P (src1) && !rtx_equal_p (dst, src1)) 13036 return 0; 13037 13038 return 1; 13039 } 13040 13041 /* Attempt to expand a unary operator. Make the expansion closer to the 13042 actual machine, then just general_operand, which will allow 2 separate 13043 memory references (one output, one input) in a single insn. */ 13044 13045 void 13046 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode, 13047 rtx operands[]) 13048 { 13049 int matching_memory; 13050 rtx src, dst, op, clob; 13051 13052 dst = operands[0]; 13053 src = operands[1]; 13054 13055 /* If the destination is memory, and we do not have matching source 13056 operands, do things in registers. */ 13057 matching_memory = 0; 13058 if (MEM_P (dst)) 13059 { 13060 if (rtx_equal_p (dst, src)) 13061 matching_memory = 1; 13062 else 13063 dst = gen_reg_rtx (mode); 13064 } 13065 13066 /* When source operand is memory, destination must match. */ 13067 if (MEM_P (src) && !matching_memory) 13068 src = force_reg (mode, src); 13069 13070 /* Emit the instruction. */ 13071 13072 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src)); 13073 if (reload_in_progress || code == NOT) 13074 { 13075 /* Reload doesn't know about the flags register, and doesn't know that 13076 it doesn't want to clobber it. */ 13077 gcc_assert (code == NOT); 13078 emit_insn (op); 13079 } 13080 else 13081 { 13082 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 13083 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob))); 13084 } 13085 13086 /* Fix up the destination if needed. */ 13087 if (dst != operands[0]) 13088 emit_move_insn (operands[0], dst); 13089 } 13090 13091 /* Return TRUE or FALSE depending on whether the unary operator meets the 13092 appropriate constraints. */ 13093 13094 int 13095 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED, 13096 enum machine_mode mode ATTRIBUTE_UNUSED, 13097 rtx operands[2] ATTRIBUTE_UNUSED) 13098 { 13099 /* If one of operands is memory, source and destination must match. */ 13100 if ((MEM_P (operands[0]) 13101 || MEM_P (operands[1])) 13102 && ! rtx_equal_p (operands[0], operands[1])) 13103 return FALSE; 13104 return TRUE; 13105 } 13106 13107 /* Post-reload splitter for converting an SF or DFmode value in an 13108 SSE register into an unsigned SImode. */ 13109 13110 void 13111 ix86_split_convert_uns_si_sse (rtx operands[]) 13112 { 13113 enum machine_mode vecmode; 13114 rtx value, large, zero_or_two31, input, two31, x; 13115 13116 large = operands[1]; 13117 zero_or_two31 = operands[2]; 13118 input = operands[3]; 13119 two31 = operands[4]; 13120 vecmode = GET_MODE (large); 13121 value = gen_rtx_REG (vecmode, REGNO (operands[0])); 13122 13123 /* Load up the value into the low element. We must ensure that the other 13124 elements are valid floats -- zero is the easiest such value. */ 13125 if (MEM_P (input)) 13126 { 13127 if (vecmode == V4SFmode) 13128 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input)); 13129 else 13130 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input)); 13131 } 13132 else 13133 { 13134 input = gen_rtx_REG (vecmode, REGNO (input)); 13135 emit_move_insn (value, CONST0_RTX (vecmode)); 13136 if (vecmode == V4SFmode) 13137 emit_insn (gen_sse_movss (value, value, input)); 13138 else 13139 emit_insn (gen_sse2_movsd (value, value, input)); 13140 } 13141 13142 emit_move_insn (large, two31); 13143 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31); 13144 13145 x = gen_rtx_fmt_ee (LE, vecmode, large, value); 13146 emit_insn (gen_rtx_SET (VOIDmode, large, x)); 13147 13148 x = gen_rtx_AND (vecmode, zero_or_two31, large); 13149 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x)); 13150 13151 x = gen_rtx_MINUS (vecmode, value, zero_or_two31); 13152 emit_insn (gen_rtx_SET (VOIDmode, value, x)); 13153 13154 large = gen_rtx_REG (V4SImode, REGNO (large)); 13155 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31))); 13156 13157 x = gen_rtx_REG (V4SImode, REGNO (value)); 13158 if (vecmode == V4SFmode) 13159 emit_insn (gen_sse2_cvttps2dq (x, value)); 13160 else 13161 emit_insn (gen_sse2_cvttpd2dq (x, value)); 13162 value = x; 13163 13164 emit_insn (gen_xorv4si3 (value, value, large)); 13165 } 13166 13167 /* Convert an unsigned DImode value into a DFmode, using only SSE. 13168 Expects the 64-bit DImode to be supplied in a pair of integral 13169 registers. Requires SSE2; will use SSE3 if available. For x86_32, 13170 -mfpmath=sse, !optimize_size only. */ 13171 13172 void 13173 ix86_expand_convert_uns_didf_sse (rtx target, rtx input) 13174 { 13175 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt; 13176 rtx int_xmm, fp_xmm; 13177 rtx biases, exponents; 13178 rtx x; 13179 13180 int_xmm = gen_reg_rtx (V4SImode); 13181 if (TARGET_INTER_UNIT_MOVES) 13182 emit_insn (gen_movdi_to_sse (int_xmm, input)); 13183 else if (TARGET_SSE_SPLIT_REGS) 13184 { 13185 emit_clobber (int_xmm); 13186 emit_move_insn (gen_lowpart (DImode, int_xmm), input); 13187 } 13188 else 13189 { 13190 x = gen_reg_rtx (V2DImode); 13191 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0); 13192 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x)); 13193 } 13194 13195 x = gen_rtx_CONST_VECTOR (V4SImode, 13196 gen_rtvec (4, GEN_INT (0x43300000UL), 13197 GEN_INT (0x45300000UL), 13198 const0_rtx, const0_rtx)); 13199 exponents = validize_mem (force_const_mem (V4SImode, x)); 13200 13201 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */ 13202 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents)); 13203 13204 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm) 13205 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)). 13206 Similarly (0x45300000UL ## fp_value_hi_xmm) yields 13207 (0x1.0p84 + double(fp_value_hi_xmm)). 13208 Note these exponents differ by 32. */ 13209 13210 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm)); 13211 13212 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values 13213 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */ 13214 real_ldexp (&bias_lo_rvt, &dconst1, 52); 13215 real_ldexp (&bias_hi_rvt, &dconst1, 84); 13216 biases = const_double_from_real_value (bias_lo_rvt, DFmode); 13217 x = const_double_from_real_value (bias_hi_rvt, DFmode); 13218 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x)); 13219 biases = validize_mem (force_const_mem (V2DFmode, biases)); 13220 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases)); 13221 13222 /* Add the upper and lower DFmode values together. */ 13223 if (TARGET_SSE3) 13224 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm)); 13225 else 13226 { 13227 x = copy_to_mode_reg (V2DFmode, fp_xmm); 13228 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm)); 13229 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x)); 13230 } 13231 13232 ix86_expand_vector_extract (false, target, fp_xmm, 0); 13233 } 13234 13235 /* Not used, but eases macroization of patterns. */ 13236 void 13237 ix86_expand_convert_uns_sixf_sse (rtx target ATTRIBUTE_UNUSED, 13238 rtx input ATTRIBUTE_UNUSED) 13239 { 13240 gcc_unreachable (); 13241 } 13242 13243 /* Convert an unsigned SImode value into a DFmode. Only currently used 13244 for SSE, but applicable anywhere. */ 13245 13246 void 13247 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input) 13248 { 13249 REAL_VALUE_TYPE TWO31r; 13250 rtx x, fp; 13251 13252 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1), 13253 NULL, 1, OPTAB_DIRECT); 13254 13255 fp = gen_reg_rtx (DFmode); 13256 emit_insn (gen_floatsidf2 (fp, x)); 13257 13258 real_ldexp (&TWO31r, &dconst1, 31); 13259 x = const_double_from_real_value (TWO31r, DFmode); 13260 13261 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT); 13262 if (x != target) 13263 emit_move_insn (target, x); 13264 } 13265 13266 /* Convert a signed DImode value into a DFmode. Only used for SSE in 13267 32-bit mode; otherwise we have a direct convert instruction. */ 13268 13269 void 13270 ix86_expand_convert_sign_didf_sse (rtx target, rtx input) 13271 { 13272 REAL_VALUE_TYPE TWO32r; 13273 rtx fp_lo, fp_hi, x; 13274 13275 fp_lo = gen_reg_rtx (DFmode); 13276 fp_hi = gen_reg_rtx (DFmode); 13277 13278 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input))); 13279 13280 real_ldexp (&TWO32r, &dconst1, 32); 13281 x = const_double_from_real_value (TWO32r, DFmode); 13282 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT); 13283 13284 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input)); 13285 13286 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target, 13287 0, OPTAB_DIRECT); 13288 if (x != target) 13289 emit_move_insn (target, x); 13290 } 13291 13292 /* Convert an unsigned SImode value into a SFmode, using only SSE. 13293 For x86_32, -mfpmath=sse, !optimize_size only. */ 13294 void 13295 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input) 13296 { 13297 REAL_VALUE_TYPE ONE16r; 13298 rtx fp_hi, fp_lo, int_hi, int_lo, x; 13299 13300 real_ldexp (&ONE16r, &dconst1, 16); 13301 x = const_double_from_real_value (ONE16r, SFmode); 13302 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff), 13303 NULL, 0, OPTAB_DIRECT); 13304 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16), 13305 NULL, 0, OPTAB_DIRECT); 13306 fp_hi = gen_reg_rtx (SFmode); 13307 fp_lo = gen_reg_rtx (SFmode); 13308 emit_insn (gen_floatsisf2 (fp_hi, int_hi)); 13309 emit_insn (gen_floatsisf2 (fp_lo, int_lo)); 13310 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi, 13311 0, OPTAB_DIRECT); 13312 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target, 13313 0, OPTAB_DIRECT); 13314 if (!rtx_equal_p (target, fp_hi)) 13315 emit_move_insn (target, fp_hi); 13316 } 13317 13318 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true, 13319 then replicate the value for all elements of the vector 13320 register. */ 13321 13322 rtx 13323 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value) 13324 { 13325 rtvec v; 13326 switch (mode) 13327 { 13328 case SImode: 13329 gcc_assert (vect); 13330 v = gen_rtvec (4, value, value, value, value); 13331 return gen_rtx_CONST_VECTOR (V4SImode, v); 13332 13333 case DImode: 13334 gcc_assert (vect); 13335 v = gen_rtvec (2, value, value); 13336 return gen_rtx_CONST_VECTOR (V2DImode, v); 13337 13338 case SFmode: 13339 if (vect) 13340 v = gen_rtvec (4, value, value, value, value); 13341 else 13342 v = gen_rtvec (4, value, CONST0_RTX (SFmode), 13343 CONST0_RTX (SFmode), CONST0_RTX (SFmode)); 13344 return gen_rtx_CONST_VECTOR (V4SFmode, v); 13345 13346 case DFmode: 13347 if (vect) 13348 v = gen_rtvec (2, value, value); 13349 else 13350 v = gen_rtvec (2, value, CONST0_RTX (DFmode)); 13351 return gen_rtx_CONST_VECTOR (V2DFmode, v); 13352 13353 default: 13354 gcc_unreachable (); 13355 } 13356 } 13357 13358 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders 13359 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE 13360 for an SSE register. If VECT is true, then replicate the mask for 13361 all elements of the vector register. If INVERT is true, then create 13362 a mask excluding the sign bit. */ 13363 13364 rtx 13365 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert) 13366 { 13367 enum machine_mode vec_mode, imode; 13368 HOST_WIDE_INT hi, lo; 13369 int shift = 63; 13370 rtx v; 13371 rtx mask; 13372 13373 /* Find the sign bit, sign extended to 2*HWI. */ 13374 switch (mode) 13375 { 13376 case SImode: 13377 case SFmode: 13378 imode = SImode; 13379 vec_mode = (mode == SImode) ? V4SImode : V4SFmode; 13380 lo = 0x80000000, hi = lo < 0; 13381 break; 13382 13383 case DImode: 13384 case DFmode: 13385 imode = DImode; 13386 vec_mode = (mode == DImode) ? V2DImode : V2DFmode; 13387 if (HOST_BITS_PER_WIDE_INT >= 64) 13388 lo = (HOST_WIDE_INT)1 << shift, hi = -1; 13389 else 13390 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT); 13391 break; 13392 13393 case TImode: 13394 case TFmode: 13395 vec_mode = VOIDmode; 13396 if (HOST_BITS_PER_WIDE_INT >= 64) 13397 { 13398 imode = TImode; 13399 lo = 0, hi = (HOST_WIDE_INT)1 << shift; 13400 } 13401 else 13402 { 13403 rtvec vec; 13404 13405 imode = DImode; 13406 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT); 13407 13408 if (invert) 13409 { 13410 lo = ~lo, hi = ~hi; 13411 v = constm1_rtx; 13412 } 13413 else 13414 v = const0_rtx; 13415 13416 mask = immed_double_const (lo, hi, imode); 13417 13418 vec = gen_rtvec (2, v, mask); 13419 v = gen_rtx_CONST_VECTOR (V2DImode, vec); 13420 v = copy_to_mode_reg (mode, gen_lowpart (mode, v)); 13421 13422 return v; 13423 } 13424 break; 13425 13426 default: 13427 gcc_unreachable (); 13428 } 13429 13430 if (invert) 13431 lo = ~lo, hi = ~hi; 13432 13433 /* Force this value into the low part of a fp vector constant. */ 13434 mask = immed_double_const (lo, hi, imode); 13435 mask = gen_lowpart (mode, mask); 13436 13437 if (vec_mode == VOIDmode) 13438 return force_reg (mode, mask); 13439 13440 v = ix86_build_const_vector (mode, vect, mask); 13441 return force_reg (vec_mode, v); 13442 } 13443 13444 /* Generate code for floating point ABS or NEG. */ 13445 13446 void 13447 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode, 13448 rtx operands[]) 13449 { 13450 rtx mask, set, use, clob, dst, src; 13451 bool use_sse = false; 13452 bool vector_mode = VECTOR_MODE_P (mode); 13453 enum machine_mode elt_mode = mode; 13454 13455 if (vector_mode) 13456 { 13457 elt_mode = GET_MODE_INNER (mode); 13458 use_sse = true; 13459 } 13460 else if (mode == TFmode) 13461 use_sse = true; 13462 else if (TARGET_SSE_MATH) 13463 use_sse = SSE_FLOAT_MODE_P (mode); 13464 13465 /* NEG and ABS performed with SSE use bitwise mask operations. 13466 Create the appropriate mask now. */ 13467 if (use_sse) 13468 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS); 13469 else 13470 mask = NULL_RTX; 13471 13472 dst = operands[0]; 13473 src = operands[1]; 13474 13475 if (vector_mode) 13476 { 13477 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask); 13478 set = gen_rtx_SET (VOIDmode, dst, set); 13479 emit_insn (set); 13480 } 13481 else 13482 { 13483 set = gen_rtx_fmt_e (code, mode, src); 13484 set = gen_rtx_SET (VOIDmode, dst, set); 13485 if (mask) 13486 { 13487 use = gen_rtx_USE (VOIDmode, mask); 13488 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG)); 13489 emit_insn (gen_rtx_PARALLEL (VOIDmode, 13490 gen_rtvec (3, set, use, clob))); 13491 } 13492 else 13493 emit_insn (set); 13494 } 13495 } 13496 13497 /* Expand a copysign operation. Special case operand 0 being a constant. */ 13498 13499 void 13500 ix86_expand_copysign (rtx operands[]) 13501 { 13502 enum machine_mode mode; 13503 rtx dest, op0, op1, mask, nmask; 13504 13505 dest = operands[0]; 13506 op0 = operands[1]; 13507 op1 = operands[2]; 13508 13509 mode = GET_MODE (dest); 13510 13511 if (GET_CODE (op0) == CONST_DOUBLE) 13512 { 13513 rtx (*copysign_insn)(rtx, rtx, rtx, rtx); 13514 13515 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0))) 13516 op0 = simplify_unary_operation (ABS, mode, op0, mode); 13517 13518 if (mode == SFmode || mode == DFmode) 13519 { 13520 enum machine_mode vmode; 13521 13522 vmode = mode == SFmode ? V4SFmode : V2DFmode; 13523 13524 if (op0 == CONST0_RTX (mode)) 13525 op0 = CONST0_RTX (vmode); 13526 else 13527 { 13528 rtvec v; 13529 13530 if (mode == SFmode) 13531 v = gen_rtvec (4, op0, CONST0_RTX (SFmode), 13532 CONST0_RTX (SFmode), CONST0_RTX (SFmode)); 13533 else 13534 v = gen_rtvec (2, op0, CONST0_RTX (DFmode)); 13535 13536 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v)); 13537 } 13538 } 13539 else if (op0 != CONST0_RTX (mode)) 13540 op0 = force_reg (mode, op0); 13541 13542 mask = ix86_build_signbit_mask (mode, 0, 0); 13543 13544 if (mode == SFmode) 13545 copysign_insn = gen_copysignsf3_const; 13546 else if (mode == DFmode) 13547 copysign_insn = gen_copysigndf3_const; 13548 else 13549 copysign_insn = gen_copysigntf3_const; 13550 13551 emit_insn (copysign_insn (dest, op0, op1, mask)); 13552 } 13553 else 13554 { 13555 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx); 13556 13557 nmask = ix86_build_signbit_mask (mode, 0, 1); 13558 mask = ix86_build_signbit_mask (mode, 0, 0); 13559 13560 if (mode == SFmode) 13561 copysign_insn = gen_copysignsf3_var; 13562 else if (mode == DFmode) 13563 copysign_insn = gen_copysigndf3_var; 13564 else 13565 copysign_insn = gen_copysigntf3_var; 13566 13567 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask)); 13568 } 13569 } 13570 13571 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to 13572 be a constant, and so has already been expanded into a vector constant. */ 13573 13574 void 13575 ix86_split_copysign_const (rtx operands[]) 13576 { 13577 enum machine_mode mode, vmode; 13578 rtx dest, op0, op1, mask, x; 13579 13580 dest = operands[0]; 13581 op0 = operands[1]; 13582 op1 = operands[2]; 13583 mask = operands[3]; 13584 13585 mode = GET_MODE (dest); 13586 vmode = GET_MODE (mask); 13587 13588 dest = simplify_gen_subreg (vmode, dest, mode, 0); 13589 x = gen_rtx_AND (vmode, dest, mask); 13590 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 13591 13592 if (op0 != CONST0_RTX (vmode)) 13593 { 13594 x = gen_rtx_IOR (vmode, dest, op0); 13595 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 13596 } 13597 } 13598 13599 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable, 13600 so we have to do two masks. */ 13601 13602 void 13603 ix86_split_copysign_var (rtx operands[]) 13604 { 13605 enum machine_mode mode, vmode; 13606 rtx dest, scratch, op0, op1, mask, nmask, x; 13607 13608 dest = operands[0]; 13609 scratch = operands[1]; 13610 op0 = operands[2]; 13611 op1 = operands[3]; 13612 nmask = operands[4]; 13613 mask = operands[5]; 13614 13615 mode = GET_MODE (dest); 13616 vmode = GET_MODE (mask); 13617 13618 if (rtx_equal_p (op0, op1)) 13619 { 13620 /* Shouldn't happen often (it's useless, obviously), but when it does 13621 we'd generate incorrect code if we continue below. */ 13622 emit_move_insn (dest, op0); 13623 return; 13624 } 13625 13626 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */ 13627 { 13628 gcc_assert (REGNO (op1) == REGNO (scratch)); 13629 13630 x = gen_rtx_AND (vmode, scratch, mask); 13631 emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); 13632 13633 dest = mask; 13634 op0 = simplify_gen_subreg (vmode, op0, mode, 0); 13635 x = gen_rtx_NOT (vmode, dest); 13636 x = gen_rtx_AND (vmode, x, op0); 13637 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 13638 } 13639 else 13640 { 13641 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */ 13642 { 13643 x = gen_rtx_AND (vmode, scratch, mask); 13644 } 13645 else /* alternative 2,4 */ 13646 { 13647 gcc_assert (REGNO (mask) == REGNO (scratch)); 13648 op1 = simplify_gen_subreg (vmode, op1, mode, 0); 13649 x = gen_rtx_AND (vmode, scratch, op1); 13650 } 13651 emit_insn (gen_rtx_SET (VOIDmode, scratch, x)); 13652 13653 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */ 13654 { 13655 dest = simplify_gen_subreg (vmode, op0, mode, 0); 13656 x = gen_rtx_AND (vmode, dest, nmask); 13657 } 13658 else /* alternative 3,4 */ 13659 { 13660 gcc_assert (REGNO (nmask) == REGNO (dest)); 13661 dest = nmask; 13662 op0 = simplify_gen_subreg (vmode, op0, mode, 0); 13663 x = gen_rtx_AND (vmode, dest, op0); 13664 } 13665 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 13666 } 13667 13668 x = gen_rtx_IOR (vmode, dest, scratch); 13669 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 13670 } 13671 13672 /* Return TRUE or FALSE depending on whether the first SET in INSN 13673 has source and destination with matching CC modes, and that the 13674 CC mode is at least as constrained as REQ_MODE. */ 13675 13676 int 13677 ix86_match_ccmode (rtx insn, enum machine_mode req_mode) 13678 { 13679 rtx set; 13680 enum machine_mode set_mode; 13681 13682 set = PATTERN (insn); 13683 if (GET_CODE (set) == PARALLEL) 13684 set = XVECEXP (set, 0, 0); 13685 gcc_assert (GET_CODE (set) == SET); 13686 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE); 13687 13688 set_mode = GET_MODE (SET_DEST (set)); 13689 switch (set_mode) 13690 { 13691 case CCNOmode: 13692 if (req_mode != CCNOmode 13693 && (req_mode != CCmode 13694 || XEXP (SET_SRC (set), 1) != const0_rtx)) 13695 return 0; 13696 break; 13697 case CCmode: 13698 if (req_mode == CCGCmode) 13699 return 0; 13700 /* FALLTHRU */ 13701 case CCGCmode: 13702 if (req_mode == CCGOCmode || req_mode == CCNOmode) 13703 return 0; 13704 /* FALLTHRU */ 13705 case CCGOCmode: 13706 if (req_mode == CCZmode) 13707 return 0; 13708 /* FALLTHRU */ 13709 case CCAmode: 13710 case CCCmode: 13711 case CCOmode: 13712 case CCSmode: 13713 case CCZmode: 13714 break; 13715 13716 default: 13717 gcc_unreachable (); 13718 } 13719 13720 return (GET_MODE (SET_SRC (set)) == set_mode); 13721 } 13722 13723 /* Generate insn patterns to do an integer compare of OPERANDS. */ 13724 13725 static rtx 13726 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1) 13727 { 13728 enum machine_mode cmpmode; 13729 rtx tmp, flags; 13730 13731 cmpmode = SELECT_CC_MODE (code, op0, op1); 13732 flags = gen_rtx_REG (cmpmode, FLAGS_REG); 13733 13734 /* This is very simple, but making the interface the same as in the 13735 FP case makes the rest of the code easier. */ 13736 tmp = gen_rtx_COMPARE (cmpmode, op0, op1); 13737 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp)); 13738 13739 /* Return the test that should be put into the flags user, i.e. 13740 the bcc, scc, or cmov instruction. */ 13741 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx); 13742 } 13743 13744 /* Figure out whether to use ordered or unordered fp comparisons. 13745 Return the appropriate mode to use. */ 13746 13747 enum machine_mode 13748 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED) 13749 { 13750 /* ??? In order to make all comparisons reversible, we do all comparisons 13751 non-trapping when compiling for IEEE. Once gcc is able to distinguish 13752 all forms trapping and nontrapping comparisons, we can make inequality 13753 comparisons trapping again, since it results in better code when using 13754 FCOM based compares. */ 13755 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode; 13756 } 13757 13758 enum machine_mode 13759 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1) 13760 { 13761 enum machine_mode mode = GET_MODE (op0); 13762 13763 if (SCALAR_FLOAT_MODE_P (mode)) 13764 { 13765 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); 13766 return ix86_fp_compare_mode (code); 13767 } 13768 13769 switch (code) 13770 { 13771 /* Only zero flag is needed. */ 13772 case EQ: /* ZF=0 */ 13773 case NE: /* ZF!=0 */ 13774 return CCZmode; 13775 /* Codes needing carry flag. */ 13776 case GEU: /* CF=0 */ 13777 case LTU: /* CF=1 */ 13778 /* Detect overflow checks. They need just the carry flag. */ 13779 if (GET_CODE (op0) == PLUS 13780 && rtx_equal_p (op1, XEXP (op0, 0))) 13781 return CCCmode; 13782 else 13783 return CCmode; 13784 case GTU: /* CF=0 & ZF=0 */ 13785 case LEU: /* CF=1 | ZF=1 */ 13786 /* Detect overflow checks. They need just the carry flag. */ 13787 if (GET_CODE (op0) == MINUS 13788 && rtx_equal_p (op1, XEXP (op0, 0))) 13789 return CCCmode; 13790 else 13791 return CCmode; 13792 /* Codes possibly doable only with sign flag when 13793 comparing against zero. */ 13794 case GE: /* SF=OF or SF=0 */ 13795 case LT: /* SF<>OF or SF=1 */ 13796 if (op1 == const0_rtx) 13797 return CCGOCmode; 13798 else 13799 /* For other cases Carry flag is not required. */ 13800 return CCGCmode; 13801 /* Codes doable only with sign flag when comparing 13802 against zero, but we miss jump instruction for it 13803 so we need to use relational tests against overflow 13804 that thus needs to be zero. */ 13805 case GT: /* ZF=0 & SF=OF */ 13806 case LE: /* ZF=1 | SF<>OF */ 13807 if (op1 == const0_rtx) 13808 return CCNOmode; 13809 else 13810 return CCGCmode; 13811 /* strcmp pattern do (use flags) and combine may ask us for proper 13812 mode. */ 13813 case USE: 13814 return CCmode; 13815 default: 13816 gcc_unreachable (); 13817 } 13818 } 13819 13820 /* Return the fixed registers used for condition codes. */ 13821 13822 static bool 13823 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2) 13824 { 13825 *p1 = FLAGS_REG; 13826 *p2 = FPSR_REG; 13827 return true; 13828 } 13829 13830 /* If two condition code modes are compatible, return a condition code 13831 mode which is compatible with both. Otherwise, return 13832 VOIDmode. */ 13833 13834 static enum machine_mode 13835 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2) 13836 { 13837 if (m1 == m2) 13838 return m1; 13839 13840 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC) 13841 return VOIDmode; 13842 13843 if ((m1 == CCGCmode && m2 == CCGOCmode) 13844 || (m1 == CCGOCmode && m2 == CCGCmode)) 13845 return CCGCmode; 13846 13847 switch (m1) 13848 { 13849 default: 13850 gcc_unreachable (); 13851 13852 case CCmode: 13853 case CCGCmode: 13854 case CCGOCmode: 13855 case CCNOmode: 13856 case CCAmode: 13857 case CCCmode: 13858 case CCOmode: 13859 case CCSmode: 13860 case CCZmode: 13861 switch (m2) 13862 { 13863 default: 13864 return VOIDmode; 13865 13866 case CCmode: 13867 case CCGCmode: 13868 case CCGOCmode: 13869 case CCNOmode: 13870 case CCAmode: 13871 case CCCmode: 13872 case CCOmode: 13873 case CCSmode: 13874 case CCZmode: 13875 return CCmode; 13876 } 13877 13878 case CCFPmode: 13879 case CCFPUmode: 13880 /* These are only compatible with themselves, which we already 13881 checked above. */ 13882 return VOIDmode; 13883 } 13884 } 13885 13886 /* Split comparison code CODE into comparisons we can do using branch 13887 instructions. BYPASS_CODE is comparison code for branch that will 13888 branch around FIRST_CODE and SECOND_CODE. If some of branches 13889 is not required, set value to UNKNOWN. 13890 We never require more than two branches. */ 13891 13892 void 13893 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code, 13894 enum rtx_code *first_code, 13895 enum rtx_code *second_code) 13896 { 13897 *first_code = code; 13898 *bypass_code = UNKNOWN; 13899 *second_code = UNKNOWN; 13900 13901 /* The fcomi comparison sets flags as follows: 13902 13903 cmp ZF PF CF 13904 > 0 0 0 13905 < 0 0 1 13906 = 1 0 0 13907 un 1 1 1 */ 13908 13909 switch (code) 13910 { 13911 case GT: /* GTU - CF=0 & ZF=0 */ 13912 case GE: /* GEU - CF=0 */ 13913 case ORDERED: /* PF=0 */ 13914 case UNORDERED: /* PF=1 */ 13915 case UNEQ: /* EQ - ZF=1 */ 13916 case UNLT: /* LTU - CF=1 */ 13917 case UNLE: /* LEU - CF=1 | ZF=1 */ 13918 case LTGT: /* EQ - ZF=0 */ 13919 break; 13920 case LT: /* LTU - CF=1 - fails on unordered */ 13921 *first_code = UNLT; 13922 *bypass_code = UNORDERED; 13923 break; 13924 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */ 13925 *first_code = UNLE; 13926 *bypass_code = UNORDERED; 13927 break; 13928 case EQ: /* EQ - ZF=1 - fails on unordered */ 13929 *first_code = UNEQ; 13930 *bypass_code = UNORDERED; 13931 break; 13932 case NE: /* NE - ZF=0 - fails on unordered */ 13933 *first_code = LTGT; 13934 *second_code = UNORDERED; 13935 break; 13936 case UNGE: /* GEU - CF=0 - fails on unordered */ 13937 *first_code = GE; 13938 *second_code = UNORDERED; 13939 break; 13940 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */ 13941 *first_code = GT; 13942 *second_code = UNORDERED; 13943 break; 13944 default: 13945 gcc_unreachable (); 13946 } 13947 if (!TARGET_IEEE_FP) 13948 { 13949 *second_code = UNKNOWN; 13950 *bypass_code = UNKNOWN; 13951 } 13952 } 13953 13954 /* Return cost of comparison done fcom + arithmetics operations on AX. 13955 All following functions do use number of instructions as a cost metrics. 13956 In future this should be tweaked to compute bytes for optimize_size and 13957 take into account performance of various instructions on various CPUs. */ 13958 static int 13959 ix86_fp_comparison_arithmetics_cost (enum rtx_code code) 13960 { 13961 if (!TARGET_IEEE_FP) 13962 return 4; 13963 /* The cost of code output by ix86_expand_fp_compare. */ 13964 switch (code) 13965 { 13966 case UNLE: 13967 case UNLT: 13968 case LTGT: 13969 case GT: 13970 case GE: 13971 case UNORDERED: 13972 case ORDERED: 13973 case UNEQ: 13974 return 4; 13975 break; 13976 case LT: 13977 case NE: 13978 case EQ: 13979 case UNGE: 13980 return 5; 13981 break; 13982 case LE: 13983 case UNGT: 13984 return 6; 13985 break; 13986 default: 13987 gcc_unreachable (); 13988 } 13989 } 13990 13991 /* Return cost of comparison done using fcomi operation. 13992 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 13993 static int 13994 ix86_fp_comparison_fcomi_cost (enum rtx_code code) 13995 { 13996 enum rtx_code bypass_code, first_code, second_code; 13997 /* Return arbitrarily high cost when instruction is not supported - this 13998 prevents gcc from using it. */ 13999 if (!TARGET_CMOVE) 14000 return 1024; 14001 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 14002 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2; 14003 } 14004 14005 /* Return cost of comparison done using sahf operation. 14006 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 14007 static int 14008 ix86_fp_comparison_sahf_cost (enum rtx_code code) 14009 { 14010 enum rtx_code bypass_code, first_code, second_code; 14011 /* Return arbitrarily high cost when instruction is not preferred - this 14012 avoids gcc from using it. */ 14013 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ()))) 14014 return 1024; 14015 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 14016 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3; 14017 } 14018 14019 /* Compute cost of the comparison done using any method. 14020 See ix86_fp_comparison_arithmetics_cost for the metrics. */ 14021 static int 14022 ix86_fp_comparison_cost (enum rtx_code code) 14023 { 14024 int fcomi_cost, sahf_cost, arithmetics_cost = 1024; 14025 int min; 14026 14027 fcomi_cost = ix86_fp_comparison_fcomi_cost (code); 14028 sahf_cost = ix86_fp_comparison_sahf_cost (code); 14029 14030 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code); 14031 if (min > sahf_cost) 14032 min = sahf_cost; 14033 if (min > fcomi_cost) 14034 min = fcomi_cost; 14035 return min; 14036 } 14037 14038 /* Return true if we should use an FCOMI instruction for this 14039 fp comparison. */ 14040 14041 int 14042 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED) 14043 { 14044 enum rtx_code swapped_code = swap_condition (code); 14045 14046 return ((ix86_fp_comparison_cost (code) 14047 == ix86_fp_comparison_fcomi_cost (code)) 14048 || (ix86_fp_comparison_cost (swapped_code) 14049 == ix86_fp_comparison_fcomi_cost (swapped_code))); 14050 } 14051 14052 /* Swap, force into registers, or otherwise massage the two operands 14053 to a fp comparison. The operands are updated in place; the new 14054 comparison code is returned. */ 14055 14056 static enum rtx_code 14057 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1) 14058 { 14059 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code); 14060 rtx op0 = *pop0, op1 = *pop1; 14061 enum machine_mode op_mode = GET_MODE (op0); 14062 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode); 14063 14064 /* All of the unordered compare instructions only work on registers. 14065 The same is true of the fcomi compare instructions. The XFmode 14066 compare instructions require registers except when comparing 14067 against zero or when converting operand 1 from fixed point to 14068 floating point. */ 14069 14070 if (!is_sse 14071 && (fpcmp_mode == CCFPUmode 14072 || (op_mode == XFmode 14073 && ! (standard_80387_constant_p (op0) == 1 14074 || standard_80387_constant_p (op1) == 1) 14075 && GET_CODE (op1) != FLOAT) 14076 || ix86_use_fcomi_compare (code))) 14077 { 14078 op0 = force_reg (op_mode, op0); 14079 op1 = force_reg (op_mode, op1); 14080 } 14081 else 14082 { 14083 /* %%% We only allow op1 in memory; op0 must be st(0). So swap 14084 things around if they appear profitable, otherwise force op0 14085 into a register. */ 14086 14087 if (standard_80387_constant_p (op0) == 0 14088 || (MEM_P (op0) 14089 && ! (standard_80387_constant_p (op1) == 0 14090 || MEM_P (op1)))) 14091 { 14092 rtx tmp; 14093 tmp = op0, op0 = op1, op1 = tmp; 14094 code = swap_condition (code); 14095 } 14096 14097 if (!REG_P (op0)) 14098 op0 = force_reg (op_mode, op0); 14099 14100 if (CONSTANT_P (op1)) 14101 { 14102 int tmp = standard_80387_constant_p (op1); 14103 if (tmp == 0) 14104 op1 = validize_mem (force_const_mem (op_mode, op1)); 14105 else if (tmp == 1) 14106 { 14107 if (TARGET_CMOVE) 14108 op1 = force_reg (op_mode, op1); 14109 } 14110 else 14111 op1 = force_reg (op_mode, op1); 14112 } 14113 } 14114 14115 /* Try to rearrange the comparison to make it cheaper. */ 14116 if (ix86_fp_comparison_cost (code) 14117 > ix86_fp_comparison_cost (swap_condition (code)) 14118 && (REG_P (op1) || can_create_pseudo_p ())) 14119 { 14120 rtx tmp; 14121 tmp = op0, op0 = op1, op1 = tmp; 14122 code = swap_condition (code); 14123 if (!REG_P (op0)) 14124 op0 = force_reg (op_mode, op0); 14125 } 14126 14127 *pop0 = op0; 14128 *pop1 = op1; 14129 return code; 14130 } 14131 14132 /* Convert comparison codes we use to represent FP comparison to integer 14133 code that will result in proper branch. Return UNKNOWN if no such code 14134 is available. */ 14135 14136 enum rtx_code 14137 ix86_fp_compare_code_to_integer (enum rtx_code code) 14138 { 14139 switch (code) 14140 { 14141 case GT: 14142 return GTU; 14143 case GE: 14144 return GEU; 14145 case ORDERED: 14146 case UNORDERED: 14147 return code; 14148 break; 14149 case UNEQ: 14150 return EQ; 14151 break; 14152 case UNLT: 14153 return LTU; 14154 break; 14155 case UNLE: 14156 return LEU; 14157 break; 14158 case LTGT: 14159 return NE; 14160 break; 14161 default: 14162 return UNKNOWN; 14163 } 14164 } 14165 14166 /* Generate insn patterns to do a floating point compare of OPERANDS. */ 14167 14168 static rtx 14169 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch, 14170 rtx *second_test, rtx *bypass_test) 14171 { 14172 enum machine_mode fpcmp_mode, intcmp_mode; 14173 rtx tmp, tmp2; 14174 int cost = ix86_fp_comparison_cost (code); 14175 enum rtx_code bypass_code, first_code, second_code; 14176 14177 fpcmp_mode = ix86_fp_compare_mode (code); 14178 code = ix86_prepare_fp_compare_args (code, &op0, &op1); 14179 14180 if (second_test) 14181 *second_test = NULL_RTX; 14182 if (bypass_test) 14183 *bypass_test = NULL_RTX; 14184 14185 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 14186 14187 /* Do fcomi/sahf based test when profitable. */ 14188 if (ix86_fp_comparison_arithmetics_cost (code) > cost 14189 && (bypass_code == UNKNOWN || bypass_test) 14190 && (second_code == UNKNOWN || second_test)) 14191 { 14192 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 14193 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG), 14194 tmp); 14195 if (TARGET_CMOVE) 14196 emit_insn (tmp); 14197 else 14198 { 14199 gcc_assert (TARGET_SAHF); 14200 14201 if (!scratch) 14202 scratch = gen_reg_rtx (HImode); 14203 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch); 14204 14205 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2))); 14206 } 14207 14208 /* The FP codes work out to act like unsigned. */ 14209 intcmp_mode = fpcmp_mode; 14210 code = first_code; 14211 if (bypass_code != UNKNOWN) 14212 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode, 14213 gen_rtx_REG (intcmp_mode, FLAGS_REG), 14214 const0_rtx); 14215 if (second_code != UNKNOWN) 14216 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode, 14217 gen_rtx_REG (intcmp_mode, FLAGS_REG), 14218 const0_rtx); 14219 } 14220 else 14221 { 14222 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */ 14223 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1); 14224 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW); 14225 if (!scratch) 14226 scratch = gen_reg_rtx (HImode); 14227 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2)); 14228 14229 /* In the unordered case, we have to check C2 for NaN's, which 14230 doesn't happen to work out to anything nice combination-wise. 14231 So do some bit twiddling on the value we've got in AH to come 14232 up with an appropriate set of condition codes. */ 14233 14234 intcmp_mode = CCNOmode; 14235 switch (code) 14236 { 14237 case GT: 14238 case UNGT: 14239 if (code == GT || !TARGET_IEEE_FP) 14240 { 14241 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 14242 code = EQ; 14243 } 14244 else 14245 { 14246 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 14247 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 14248 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44))); 14249 intcmp_mode = CCmode; 14250 code = GEU; 14251 } 14252 break; 14253 case LT: 14254 case UNLT: 14255 if (code == LT && TARGET_IEEE_FP) 14256 { 14257 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 14258 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01))); 14259 intcmp_mode = CCmode; 14260 code = EQ; 14261 } 14262 else 14263 { 14264 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01))); 14265 code = NE; 14266 } 14267 break; 14268 case GE: 14269 case UNGE: 14270 if (code == GE || !TARGET_IEEE_FP) 14271 { 14272 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05))); 14273 code = EQ; 14274 } 14275 else 14276 { 14277 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 14278 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 14279 GEN_INT (0x01))); 14280 code = NE; 14281 } 14282 break; 14283 case LE: 14284 case UNLE: 14285 if (code == LE && TARGET_IEEE_FP) 14286 { 14287 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 14288 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx)); 14289 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 14290 intcmp_mode = CCmode; 14291 code = LTU; 14292 } 14293 else 14294 { 14295 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45))); 14296 code = NE; 14297 } 14298 break; 14299 case EQ: 14300 case UNEQ: 14301 if (code == EQ && TARGET_IEEE_FP) 14302 { 14303 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 14304 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40))); 14305 intcmp_mode = CCmode; 14306 code = EQ; 14307 } 14308 else 14309 { 14310 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 14311 code = NE; 14312 break; 14313 } 14314 break; 14315 case NE: 14316 case LTGT: 14317 if (code == NE && TARGET_IEEE_FP) 14318 { 14319 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45))); 14320 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch, 14321 GEN_INT (0x40))); 14322 code = NE; 14323 } 14324 else 14325 { 14326 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40))); 14327 code = EQ; 14328 } 14329 break; 14330 14331 case UNORDERED: 14332 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 14333 code = NE; 14334 break; 14335 case ORDERED: 14336 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04))); 14337 code = EQ; 14338 break; 14339 14340 default: 14341 gcc_unreachable (); 14342 } 14343 } 14344 14345 /* Return the test that should be put into the flags user, i.e. 14346 the bcc, scc, or cmov instruction. */ 14347 return gen_rtx_fmt_ee (code, VOIDmode, 14348 gen_rtx_REG (intcmp_mode, FLAGS_REG), 14349 const0_rtx); 14350 } 14351 14352 rtx 14353 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test) 14354 { 14355 rtx op0, op1, ret; 14356 op0 = ix86_compare_op0; 14357 op1 = ix86_compare_op1; 14358 14359 if (second_test) 14360 *second_test = NULL_RTX; 14361 if (bypass_test) 14362 *bypass_test = NULL_RTX; 14363 14364 if (ix86_compare_emitted) 14365 { 14366 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx); 14367 ix86_compare_emitted = NULL_RTX; 14368 } 14369 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0))) 14370 { 14371 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0))); 14372 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 14373 second_test, bypass_test); 14374 } 14375 else 14376 ret = ix86_expand_int_compare (code, op0, op1); 14377 14378 return ret; 14379 } 14380 14381 /* Return true if the CODE will result in nontrivial jump sequence. */ 14382 bool 14383 ix86_fp_jump_nontrivial_p (enum rtx_code code) 14384 { 14385 enum rtx_code bypass_code, first_code, second_code; 14386 if (!TARGET_CMOVE) 14387 return true; 14388 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 14389 return bypass_code != UNKNOWN || second_code != UNKNOWN; 14390 } 14391 14392 void 14393 ix86_expand_branch (enum rtx_code code, rtx label) 14394 { 14395 rtx tmp; 14396 14397 /* If we have emitted a compare insn, go straight to simple. 14398 ix86_expand_compare won't emit anything if ix86_compare_emitted 14399 is non NULL. */ 14400 if (ix86_compare_emitted) 14401 goto simple; 14402 14403 switch (GET_MODE (ix86_compare_op0)) 14404 { 14405 case QImode: 14406 case HImode: 14407 case SImode: 14408 simple: 14409 tmp = ix86_expand_compare (code, NULL, NULL); 14410 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 14411 gen_rtx_LABEL_REF (VOIDmode, label), 14412 pc_rtx); 14413 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 14414 return; 14415 14416 case SFmode: 14417 case DFmode: 14418 case XFmode: 14419 { 14420 rtvec vec; 14421 int use_fcomi; 14422 enum rtx_code bypass_code, first_code, second_code; 14423 14424 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0, 14425 &ix86_compare_op1); 14426 14427 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code); 14428 14429 /* Check whether we will use the natural sequence with one jump. If 14430 so, we can expand jump early. Otherwise delay expansion by 14431 creating compound insn to not confuse optimizers. */ 14432 if (bypass_code == UNKNOWN && second_code == UNKNOWN) 14433 { 14434 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1, 14435 gen_rtx_LABEL_REF (VOIDmode, label), 14436 pc_rtx, NULL_RTX, NULL_RTX); 14437 } 14438 else 14439 { 14440 tmp = gen_rtx_fmt_ee (code, VOIDmode, 14441 ix86_compare_op0, ix86_compare_op1); 14442 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 14443 gen_rtx_LABEL_REF (VOIDmode, label), 14444 pc_rtx); 14445 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp); 14446 14447 use_fcomi = ix86_use_fcomi_compare (code); 14448 vec = rtvec_alloc (3 + !use_fcomi); 14449 RTVEC_ELT (vec, 0) = tmp; 14450 RTVEC_ELT (vec, 1) 14451 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG)); 14452 RTVEC_ELT (vec, 2) 14453 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG)); 14454 if (! use_fcomi) 14455 RTVEC_ELT (vec, 3) 14456 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode)); 14457 14458 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec)); 14459 } 14460 return; 14461 } 14462 14463 case DImode: 14464 if (TARGET_64BIT) 14465 goto simple; 14466 case TImode: 14467 /* Expand DImode branch into multiple compare+branch. */ 14468 { 14469 rtx lo[2], hi[2], label2; 14470 enum rtx_code code1, code2, code3; 14471 enum machine_mode submode; 14472 14473 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1)) 14474 { 14475 tmp = ix86_compare_op0; 14476 ix86_compare_op0 = ix86_compare_op1; 14477 ix86_compare_op1 = tmp; 14478 code = swap_condition (code); 14479 } 14480 if (GET_MODE (ix86_compare_op0) == DImode) 14481 { 14482 split_di (&ix86_compare_op0, 1, lo+0, hi+0); 14483 split_di (&ix86_compare_op1, 1, lo+1, hi+1); 14484 submode = SImode; 14485 } 14486 else 14487 { 14488 split_ti (&ix86_compare_op0, 1, lo+0, hi+0); 14489 split_ti (&ix86_compare_op1, 1, lo+1, hi+1); 14490 submode = DImode; 14491 } 14492 14493 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to 14494 avoid two branches. This costs one extra insn, so disable when 14495 optimizing for size. */ 14496 14497 if ((code == EQ || code == NE) 14498 && (!optimize_insn_for_size_p () 14499 || hi[1] == const0_rtx || lo[1] == const0_rtx)) 14500 { 14501 rtx xor0, xor1; 14502 14503 xor1 = hi[0]; 14504 if (hi[1] != const0_rtx) 14505 xor1 = expand_binop (submode, xor_optab, xor1, hi[1], 14506 NULL_RTX, 0, OPTAB_WIDEN); 14507 14508 xor0 = lo[0]; 14509 if (lo[1] != const0_rtx) 14510 xor0 = expand_binop (submode, xor_optab, xor0, lo[1], 14511 NULL_RTX, 0, OPTAB_WIDEN); 14512 14513 tmp = expand_binop (submode, ior_optab, xor1, xor0, 14514 NULL_RTX, 0, OPTAB_WIDEN); 14515 14516 ix86_compare_op0 = tmp; 14517 ix86_compare_op1 = const0_rtx; 14518 ix86_expand_branch (code, label); 14519 return; 14520 } 14521 14522 /* Otherwise, if we are doing less-than or greater-or-equal-than, 14523 op1 is a constant and the low word is zero, then we can just 14524 examine the high word. Similarly for low word -1 and 14525 less-or-equal-than or greater-than. */ 14526 14527 if (CONST_INT_P (hi[1])) 14528 switch (code) 14529 { 14530 case LT: case LTU: case GE: case GEU: 14531 if (lo[1] == const0_rtx) 14532 { 14533 ix86_compare_op0 = hi[0]; 14534 ix86_compare_op1 = hi[1]; 14535 ix86_expand_branch (code, label); 14536 return; 14537 } 14538 break; 14539 case LE: case LEU: case GT: case GTU: 14540 if (lo[1] == constm1_rtx) 14541 { 14542 ix86_compare_op0 = hi[0]; 14543 ix86_compare_op1 = hi[1]; 14544 ix86_expand_branch (code, label); 14545 return; 14546 } 14547 break; 14548 default: 14549 break; 14550 } 14551 14552 /* Otherwise, we need two or three jumps. */ 14553 14554 label2 = gen_label_rtx (); 14555 14556 code1 = code; 14557 code2 = swap_condition (code); 14558 code3 = unsigned_condition (code); 14559 14560 switch (code) 14561 { 14562 case LT: case GT: case LTU: case GTU: 14563 break; 14564 14565 case LE: code1 = LT; code2 = GT; break; 14566 case GE: code1 = GT; code2 = LT; break; 14567 case LEU: code1 = LTU; code2 = GTU; break; 14568 case GEU: code1 = GTU; code2 = LTU; break; 14569 14570 case EQ: code1 = UNKNOWN; code2 = NE; break; 14571 case NE: code2 = UNKNOWN; break; 14572 14573 default: 14574 gcc_unreachable (); 14575 } 14576 14577 /* 14578 * a < b => 14579 * if (hi(a) < hi(b)) goto true; 14580 * if (hi(a) > hi(b)) goto false; 14581 * if (lo(a) < lo(b)) goto true; 14582 * false: 14583 */ 14584 14585 ix86_compare_op0 = hi[0]; 14586 ix86_compare_op1 = hi[1]; 14587 14588 if (code1 != UNKNOWN) 14589 ix86_expand_branch (code1, label); 14590 if (code2 != UNKNOWN) 14591 ix86_expand_branch (code2, label2); 14592 14593 ix86_compare_op0 = lo[0]; 14594 ix86_compare_op1 = lo[1]; 14595 ix86_expand_branch (code3, label); 14596 14597 if (code2 != UNKNOWN) 14598 emit_label (label2); 14599 return; 14600 } 14601 14602 default: 14603 gcc_unreachable (); 14604 } 14605 } 14606 14607 /* Split branch based on floating point condition. */ 14608 void 14609 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2, 14610 rtx target1, rtx target2, rtx tmp, rtx pushed) 14611 { 14612 rtx second, bypass; 14613 rtx label = NULL_RTX; 14614 rtx condition; 14615 int bypass_probability = -1, second_probability = -1, probability = -1; 14616 rtx i; 14617 14618 if (target2 != pc_rtx) 14619 { 14620 rtx tmp = target2; 14621 code = reverse_condition_maybe_unordered (code); 14622 target2 = target1; 14623 target1 = tmp; 14624 } 14625 14626 condition = ix86_expand_fp_compare (code, op1, op2, 14627 tmp, &second, &bypass); 14628 14629 /* Remove pushed operand from stack. */ 14630 if (pushed) 14631 ix86_free_from_memory (GET_MODE (pushed)); 14632 14633 if (split_branch_probability >= 0) 14634 { 14635 /* Distribute the probabilities across the jumps. 14636 Assume the BYPASS and SECOND to be always test 14637 for UNORDERED. */ 14638 probability = split_branch_probability; 14639 14640 /* Value of 1 is low enough to make no need for probability 14641 to be updated. Later we may run some experiments and see 14642 if unordered values are more frequent in practice. */ 14643 if (bypass) 14644 bypass_probability = 1; 14645 if (second) 14646 second_probability = 1; 14647 } 14648 if (bypass != NULL_RTX) 14649 { 14650 label = gen_label_rtx (); 14651 i = emit_jump_insn (gen_rtx_SET 14652 (VOIDmode, pc_rtx, 14653 gen_rtx_IF_THEN_ELSE (VOIDmode, 14654 bypass, 14655 gen_rtx_LABEL_REF (VOIDmode, 14656 label), 14657 pc_rtx))); 14658 if (bypass_probability >= 0) 14659 REG_NOTES (i) 14660 = gen_rtx_EXPR_LIST (REG_BR_PROB, 14661 GEN_INT (bypass_probability), 14662 REG_NOTES (i)); 14663 } 14664 i = emit_jump_insn (gen_rtx_SET 14665 (VOIDmode, pc_rtx, 14666 gen_rtx_IF_THEN_ELSE (VOIDmode, 14667 condition, target1, target2))); 14668 if (probability >= 0) 14669 REG_NOTES (i) 14670 = gen_rtx_EXPR_LIST (REG_BR_PROB, 14671 GEN_INT (probability), 14672 REG_NOTES (i)); 14673 if (second != NULL_RTX) 14674 { 14675 i = emit_jump_insn (gen_rtx_SET 14676 (VOIDmode, pc_rtx, 14677 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1, 14678 target2))); 14679 if (second_probability >= 0) 14680 REG_NOTES (i) 14681 = gen_rtx_EXPR_LIST (REG_BR_PROB, 14682 GEN_INT (second_probability), 14683 REG_NOTES (i)); 14684 } 14685 if (label != NULL_RTX) 14686 emit_label (label); 14687 } 14688 14689 int 14690 ix86_expand_setcc (enum rtx_code code, rtx dest) 14691 { 14692 rtx ret, tmp, tmpreg, equiv; 14693 rtx second_test, bypass_test; 14694 14695 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode)) 14696 return 0; /* FAIL */ 14697 14698 gcc_assert (GET_MODE (dest) == QImode); 14699 14700 ret = ix86_expand_compare (code, &second_test, &bypass_test); 14701 PUT_MODE (ret, QImode); 14702 14703 tmp = dest; 14704 tmpreg = dest; 14705 14706 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret)); 14707 if (bypass_test || second_test) 14708 { 14709 rtx test = second_test; 14710 int bypass = 0; 14711 rtx tmp2 = gen_reg_rtx (QImode); 14712 if (bypass_test) 14713 { 14714 gcc_assert (!second_test); 14715 test = bypass_test; 14716 bypass = 1; 14717 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test))); 14718 } 14719 PUT_MODE (test, QImode); 14720 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test)); 14721 14722 if (bypass) 14723 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2)); 14724 else 14725 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2)); 14726 } 14727 14728 /* Attach a REG_EQUAL note describing the comparison result. */ 14729 if (ix86_compare_op0 && ix86_compare_op1) 14730 { 14731 equiv = simplify_gen_relational (code, QImode, 14732 GET_MODE (ix86_compare_op0), 14733 ix86_compare_op0, ix86_compare_op1); 14734 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv); 14735 } 14736 14737 return 1; /* DONE */ 14738 } 14739 14740 /* Expand comparison setting or clearing carry flag. Return true when 14741 successful and set pop for the operation. */ 14742 static bool 14743 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop) 14744 { 14745 enum machine_mode mode = 14746 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1); 14747 14748 /* Do not handle DImode compares that go through special path. */ 14749 if (mode == (TARGET_64BIT ? TImode : DImode)) 14750 return false; 14751 14752 if (SCALAR_FLOAT_MODE_P (mode)) 14753 { 14754 rtx second_test = NULL, bypass_test = NULL; 14755 rtx compare_op, compare_seq; 14756 14757 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode)); 14758 14759 /* Shortcut: following common codes never translate 14760 into carry flag compares. */ 14761 if (code == EQ || code == NE || code == UNEQ || code == LTGT 14762 || code == ORDERED || code == UNORDERED) 14763 return false; 14764 14765 /* These comparisons require zero flag; swap operands so they won't. */ 14766 if ((code == GT || code == UNLE || code == LE || code == UNGT) 14767 && !TARGET_IEEE_FP) 14768 { 14769 rtx tmp = op0; 14770 op0 = op1; 14771 op1 = tmp; 14772 code = swap_condition (code); 14773 } 14774 14775 /* Try to expand the comparison and verify that we end up with 14776 carry flag based comparison. This fails to be true only when 14777 we decide to expand comparison using arithmetic that is not 14778 too common scenario. */ 14779 start_sequence (); 14780 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX, 14781 &second_test, &bypass_test); 14782 compare_seq = get_insns (); 14783 end_sequence (); 14784 14785 if (second_test || bypass_test) 14786 return false; 14787 14788 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 14789 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 14790 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op)); 14791 else 14792 code = GET_CODE (compare_op); 14793 14794 if (code != LTU && code != GEU) 14795 return false; 14796 14797 emit_insn (compare_seq); 14798 *pop = compare_op; 14799 return true; 14800 } 14801 14802 if (!INTEGRAL_MODE_P (mode)) 14803 return false; 14804 14805 switch (code) 14806 { 14807 case LTU: 14808 case GEU: 14809 break; 14810 14811 /* Convert a==0 into (unsigned)a<1. */ 14812 case EQ: 14813 case NE: 14814 if (op1 != const0_rtx) 14815 return false; 14816 op1 = const1_rtx; 14817 code = (code == EQ ? LTU : GEU); 14818 break; 14819 14820 /* Convert a>b into b<a or a>=b-1. */ 14821 case GTU: 14822 case LEU: 14823 if (CONST_INT_P (op1)) 14824 { 14825 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0)); 14826 /* Bail out on overflow. We still can swap operands but that 14827 would force loading of the constant into register. */ 14828 if (op1 == const0_rtx 14829 || !x86_64_immediate_operand (op1, GET_MODE (op1))) 14830 return false; 14831 code = (code == GTU ? GEU : LTU); 14832 } 14833 else 14834 { 14835 rtx tmp = op1; 14836 op1 = op0; 14837 op0 = tmp; 14838 code = (code == GTU ? LTU : GEU); 14839 } 14840 break; 14841 14842 /* Convert a>=0 into (unsigned)a<0x80000000. */ 14843 case LT: 14844 case GE: 14845 if (mode == DImode || op1 != const0_rtx) 14846 return false; 14847 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); 14848 code = (code == LT ? GEU : LTU); 14849 break; 14850 case LE: 14851 case GT: 14852 if (mode == DImode || op1 != constm1_rtx) 14853 return false; 14854 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode); 14855 code = (code == LE ? GEU : LTU); 14856 break; 14857 14858 default: 14859 return false; 14860 } 14861 /* Swapping operands may cause constant to appear as first operand. */ 14862 if (!nonimmediate_operand (op0, VOIDmode)) 14863 { 14864 if (!can_create_pseudo_p ()) 14865 return false; 14866 op0 = force_reg (mode, op0); 14867 } 14868 ix86_compare_op0 = op0; 14869 ix86_compare_op1 = op1; 14870 *pop = ix86_expand_compare (code, NULL, NULL); 14871 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU); 14872 return true; 14873 } 14874 14875 int 14876 ix86_expand_int_movcc (rtx operands[]) 14877 { 14878 enum rtx_code code = GET_CODE (operands[1]), compare_code; 14879 rtx compare_seq, compare_op; 14880 rtx second_test, bypass_test; 14881 enum machine_mode mode = GET_MODE (operands[0]); 14882 bool sign_bit_compare_p = false;; 14883 14884 start_sequence (); 14885 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 14886 compare_seq = get_insns (); 14887 end_sequence (); 14888 14889 compare_code = GET_CODE (compare_op); 14890 14891 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT)) 14892 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE))) 14893 sign_bit_compare_p = true; 14894 14895 /* Don't attempt mode expansion here -- if we had to expand 5 or 6 14896 HImode insns, we'd be swallowed in word prefix ops. */ 14897 14898 if ((mode != HImode || TARGET_FAST_PREFIX) 14899 && (mode != (TARGET_64BIT ? TImode : DImode)) 14900 && CONST_INT_P (operands[2]) 14901 && CONST_INT_P (operands[3])) 14902 { 14903 rtx out = operands[0]; 14904 HOST_WIDE_INT ct = INTVAL (operands[2]); 14905 HOST_WIDE_INT cf = INTVAL (operands[3]); 14906 HOST_WIDE_INT diff; 14907 14908 diff = ct - cf; 14909 /* Sign bit compares are better done using shifts than we do by using 14910 sbb. */ 14911 if (sign_bit_compare_p 14912 || ix86_expand_carry_flag_compare (code, ix86_compare_op0, 14913 ix86_compare_op1, &compare_op)) 14914 { 14915 /* Detect overlap between destination and compare sources. */ 14916 rtx tmp = out; 14917 14918 if (!sign_bit_compare_p) 14919 { 14920 bool fpcmp = false; 14921 14922 compare_code = GET_CODE (compare_op); 14923 14924 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 14925 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 14926 { 14927 fpcmp = true; 14928 compare_code = ix86_fp_compare_code_to_integer (compare_code); 14929 } 14930 14931 /* To simplify rest of code, restrict to the GEU case. */ 14932 if (compare_code == LTU) 14933 { 14934 HOST_WIDE_INT tmp = ct; 14935 ct = cf; 14936 cf = tmp; 14937 compare_code = reverse_condition (compare_code); 14938 code = reverse_condition (code); 14939 } 14940 else 14941 { 14942 if (fpcmp) 14943 PUT_CODE (compare_op, 14944 reverse_condition_maybe_unordered 14945 (GET_CODE (compare_op))); 14946 else 14947 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); 14948 } 14949 diff = ct - cf; 14950 14951 if (reg_overlap_mentioned_p (out, ix86_compare_op0) 14952 || reg_overlap_mentioned_p (out, ix86_compare_op1)) 14953 tmp = gen_reg_rtx (mode); 14954 14955 if (mode == DImode) 14956 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op)); 14957 else 14958 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op)); 14959 } 14960 else 14961 { 14962 if (code == GT || code == GE) 14963 code = reverse_condition (code); 14964 else 14965 { 14966 HOST_WIDE_INT tmp = ct; 14967 ct = cf; 14968 cf = tmp; 14969 diff = ct - cf; 14970 } 14971 tmp = emit_store_flag (tmp, code, ix86_compare_op0, 14972 ix86_compare_op1, VOIDmode, 0, -1); 14973 } 14974 14975 if (diff == 1) 14976 { 14977 /* 14978 * cmpl op0,op1 14979 * sbbl dest,dest 14980 * [addl dest, ct] 14981 * 14982 * Size 5 - 8. 14983 */ 14984 if (ct) 14985 tmp = expand_simple_binop (mode, PLUS, 14986 tmp, GEN_INT (ct), 14987 copy_rtx (tmp), 1, OPTAB_DIRECT); 14988 } 14989 else if (cf == -1) 14990 { 14991 /* 14992 * cmpl op0,op1 14993 * sbbl dest,dest 14994 * orl $ct, dest 14995 * 14996 * Size 8. 14997 */ 14998 tmp = expand_simple_binop (mode, IOR, 14999 tmp, GEN_INT (ct), 15000 copy_rtx (tmp), 1, OPTAB_DIRECT); 15001 } 15002 else if (diff == -1 && ct) 15003 { 15004 /* 15005 * cmpl op0,op1 15006 * sbbl dest,dest 15007 * notl dest 15008 * [addl dest, cf] 15009 * 15010 * Size 8 - 11. 15011 */ 15012 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); 15013 if (cf) 15014 tmp = expand_simple_binop (mode, PLUS, 15015 copy_rtx (tmp), GEN_INT (cf), 15016 copy_rtx (tmp), 1, OPTAB_DIRECT); 15017 } 15018 else 15019 { 15020 /* 15021 * cmpl op0,op1 15022 * sbbl dest,dest 15023 * [notl dest] 15024 * andl cf - ct, dest 15025 * [addl dest, ct] 15026 * 15027 * Size 8 - 11. 15028 */ 15029 15030 if (cf == 0) 15031 { 15032 cf = ct; 15033 ct = 0; 15034 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1); 15035 } 15036 15037 tmp = expand_simple_binop (mode, AND, 15038 copy_rtx (tmp), 15039 gen_int_mode (cf - ct, mode), 15040 copy_rtx (tmp), 1, OPTAB_DIRECT); 15041 if (ct) 15042 tmp = expand_simple_binop (mode, PLUS, 15043 copy_rtx (tmp), GEN_INT (ct), 15044 copy_rtx (tmp), 1, OPTAB_DIRECT); 15045 } 15046 15047 if (!rtx_equal_p (tmp, out)) 15048 emit_move_insn (copy_rtx (out), copy_rtx (tmp)); 15049 15050 return 1; /* DONE */ 15051 } 15052 15053 if (diff < 0) 15054 { 15055 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0); 15056 15057 HOST_WIDE_INT tmp; 15058 tmp = ct, ct = cf, cf = tmp; 15059 diff = -diff; 15060 15061 if (SCALAR_FLOAT_MODE_P (cmp_mode)) 15062 { 15063 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode)); 15064 15065 /* We may be reversing unordered compare to normal compare, that 15066 is not valid in general (we may convert non-trapping condition 15067 to trapping one), however on i386 we currently emit all 15068 comparisons unordered. */ 15069 compare_code = reverse_condition_maybe_unordered (compare_code); 15070 code = reverse_condition_maybe_unordered (code); 15071 } 15072 else 15073 { 15074 compare_code = reverse_condition (compare_code); 15075 code = reverse_condition (code); 15076 } 15077 } 15078 15079 compare_code = UNKNOWN; 15080 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT 15081 && CONST_INT_P (ix86_compare_op1)) 15082 { 15083 if (ix86_compare_op1 == const0_rtx 15084 && (code == LT || code == GE)) 15085 compare_code = code; 15086 else if (ix86_compare_op1 == constm1_rtx) 15087 { 15088 if (code == LE) 15089 compare_code = LT; 15090 else if (code == GT) 15091 compare_code = GE; 15092 } 15093 } 15094 15095 /* Optimize dest = (op0 < 0) ? -1 : cf. */ 15096 if (compare_code != UNKNOWN 15097 && GET_MODE (ix86_compare_op0) == GET_MODE (out) 15098 && (cf == -1 || ct == -1)) 15099 { 15100 /* If lea code below could be used, only optimize 15101 if it results in a 2 insn sequence. */ 15102 15103 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8 15104 || diff == 3 || diff == 5 || diff == 9) 15105 || (compare_code == LT && ct == -1) 15106 || (compare_code == GE && cf == -1)) 15107 { 15108 /* 15109 * notl op1 (if necessary) 15110 * sarl $31, op1 15111 * orl cf, op1 15112 */ 15113 if (ct != -1) 15114 { 15115 cf = ct; 15116 ct = -1; 15117 code = reverse_condition (code); 15118 } 15119 15120 out = emit_store_flag (out, code, ix86_compare_op0, 15121 ix86_compare_op1, VOIDmode, 0, -1); 15122 15123 out = expand_simple_binop (mode, IOR, 15124 out, GEN_INT (cf), 15125 out, 1, OPTAB_DIRECT); 15126 if (out != operands[0]) 15127 emit_move_insn (operands[0], out); 15128 15129 return 1; /* DONE */ 15130 } 15131 } 15132 15133 15134 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8 15135 || diff == 3 || diff == 5 || diff == 9) 15136 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL) 15137 && (mode != DImode 15138 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode))) 15139 { 15140 /* 15141 * xorl dest,dest 15142 * cmpl op1,op2 15143 * setcc dest 15144 * lea cf(dest*(ct-cf)),dest 15145 * 15146 * Size 14. 15147 * 15148 * This also catches the degenerate setcc-only case. 15149 */ 15150 15151 rtx tmp; 15152 int nops; 15153 15154 out = emit_store_flag (out, code, ix86_compare_op0, 15155 ix86_compare_op1, VOIDmode, 0, 1); 15156 15157 nops = 0; 15158 /* On x86_64 the lea instruction operates on Pmode, so we need 15159 to get arithmetics done in proper mode to match. */ 15160 if (diff == 1) 15161 tmp = copy_rtx (out); 15162 else 15163 { 15164 rtx out1; 15165 out1 = copy_rtx (out); 15166 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1)); 15167 nops++; 15168 if (diff & 1) 15169 { 15170 tmp = gen_rtx_PLUS (mode, tmp, out1); 15171 nops++; 15172 } 15173 } 15174 if (cf != 0) 15175 { 15176 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf)); 15177 nops++; 15178 } 15179 if (!rtx_equal_p (tmp, out)) 15180 { 15181 if (nops == 1) 15182 out = force_operand (tmp, copy_rtx (out)); 15183 else 15184 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp))); 15185 } 15186 if (!rtx_equal_p (out, operands[0])) 15187 emit_move_insn (operands[0], copy_rtx (out)); 15188 15189 return 1; /* DONE */ 15190 } 15191 15192 /* 15193 * General case: Jumpful: 15194 * xorl dest,dest cmpl op1, op2 15195 * cmpl op1, op2 movl ct, dest 15196 * setcc dest jcc 1f 15197 * decl dest movl cf, dest 15198 * andl (cf-ct),dest 1: 15199 * addl ct,dest 15200 * 15201 * Size 20. Size 14. 15202 * 15203 * This is reasonably steep, but branch mispredict costs are 15204 * high on modern cpus, so consider failing only if optimizing 15205 * for space. 15206 */ 15207 15208 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) 15209 && BRANCH_COST (optimize_insn_for_speed_p (), 15210 false) >= 2) 15211 { 15212 if (cf == 0) 15213 { 15214 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0); 15215 15216 cf = ct; 15217 ct = 0; 15218 15219 if (SCALAR_FLOAT_MODE_P (cmp_mode)) 15220 { 15221 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode)); 15222 15223 /* We may be reversing unordered compare to normal compare, 15224 that is not valid in general (we may convert non-trapping 15225 condition to trapping one), however on i386 we currently 15226 emit all comparisons unordered. */ 15227 code = reverse_condition_maybe_unordered (code); 15228 } 15229 else 15230 { 15231 code = reverse_condition (code); 15232 if (compare_code != UNKNOWN) 15233 compare_code = reverse_condition (compare_code); 15234 } 15235 } 15236 15237 if (compare_code != UNKNOWN) 15238 { 15239 /* notl op1 (if needed) 15240 sarl $31, op1 15241 andl (cf-ct), op1 15242 addl ct, op1 15243 15244 For x < 0 (resp. x <= -1) there will be no notl, 15245 so if possible swap the constants to get rid of the 15246 complement. 15247 True/false will be -1/0 while code below (store flag 15248 followed by decrement) is 0/-1, so the constants need 15249 to be exchanged once more. */ 15250 15251 if (compare_code == GE || !cf) 15252 { 15253 code = reverse_condition (code); 15254 compare_code = LT; 15255 } 15256 else 15257 { 15258 HOST_WIDE_INT tmp = cf; 15259 cf = ct; 15260 ct = tmp; 15261 } 15262 15263 out = emit_store_flag (out, code, ix86_compare_op0, 15264 ix86_compare_op1, VOIDmode, 0, -1); 15265 } 15266 else 15267 { 15268 out = emit_store_flag (out, code, ix86_compare_op0, 15269 ix86_compare_op1, VOIDmode, 0, 1); 15270 15271 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx, 15272 copy_rtx (out), 1, OPTAB_DIRECT); 15273 } 15274 15275 out = expand_simple_binop (mode, AND, copy_rtx (out), 15276 gen_int_mode (cf - ct, mode), 15277 copy_rtx (out), 1, OPTAB_DIRECT); 15278 if (ct) 15279 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct), 15280 copy_rtx (out), 1, OPTAB_DIRECT); 15281 if (!rtx_equal_p (out, operands[0])) 15282 emit_move_insn (operands[0], copy_rtx (out)); 15283 15284 return 1; /* DONE */ 15285 } 15286 } 15287 15288 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL)) 15289 { 15290 /* Try a few things more with specific constants and a variable. */ 15291 15292 optab op; 15293 rtx var, orig_out, out, tmp; 15294 15295 if (BRANCH_COST (optimize_insn_for_speed_p (), false) <= 2) 15296 return 0; /* FAIL */ 15297 15298 /* If one of the two operands is an interesting constant, load a 15299 constant with the above and mask it in with a logical operation. */ 15300 15301 if (CONST_INT_P (operands[2])) 15302 { 15303 var = operands[3]; 15304 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx) 15305 operands[3] = constm1_rtx, op = and_optab; 15306 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx) 15307 operands[3] = const0_rtx, op = ior_optab; 15308 else 15309 return 0; /* FAIL */ 15310 } 15311 else if (CONST_INT_P (operands[3])) 15312 { 15313 var = operands[2]; 15314 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx) 15315 operands[2] = constm1_rtx, op = and_optab; 15316 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx) 15317 operands[2] = const0_rtx, op = ior_optab; 15318 else 15319 return 0; /* FAIL */ 15320 } 15321 else 15322 return 0; /* FAIL */ 15323 15324 orig_out = operands[0]; 15325 tmp = gen_reg_rtx (mode); 15326 operands[0] = tmp; 15327 15328 /* Recurse to get the constant loaded. */ 15329 if (ix86_expand_int_movcc (operands) == 0) 15330 return 0; /* FAIL */ 15331 15332 /* Mask in the interesting variable. */ 15333 out = expand_binop (mode, op, var, tmp, orig_out, 0, 15334 OPTAB_WIDEN); 15335 if (!rtx_equal_p (out, orig_out)) 15336 emit_move_insn (copy_rtx (orig_out), copy_rtx (out)); 15337 15338 return 1; /* DONE */ 15339 } 15340 15341 /* 15342 * For comparison with above, 15343 * 15344 * movl cf,dest 15345 * movl ct,tmp 15346 * cmpl op1,op2 15347 * cmovcc tmp,dest 15348 * 15349 * Size 15. 15350 */ 15351 15352 if (! nonimmediate_operand (operands[2], mode)) 15353 operands[2] = force_reg (mode, operands[2]); 15354 if (! nonimmediate_operand (operands[3], mode)) 15355 operands[3] = force_reg (mode, operands[3]); 15356 15357 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 15358 { 15359 rtx tmp = gen_reg_rtx (mode); 15360 emit_move_insn (tmp, operands[3]); 15361 operands[3] = tmp; 15362 } 15363 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 15364 { 15365 rtx tmp = gen_reg_rtx (mode); 15366 emit_move_insn (tmp, operands[2]); 15367 operands[2] = tmp; 15368 } 15369 15370 if (! register_operand (operands[2], VOIDmode) 15371 && (mode == QImode 15372 || ! register_operand (operands[3], VOIDmode))) 15373 operands[2] = force_reg (mode, operands[2]); 15374 15375 if (mode == QImode 15376 && ! register_operand (operands[3], VOIDmode)) 15377 operands[3] = force_reg (mode, operands[3]); 15378 15379 emit_insn (compare_seq); 15380 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 15381 gen_rtx_IF_THEN_ELSE (mode, 15382 compare_op, operands[2], 15383 operands[3]))); 15384 if (bypass_test) 15385 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), 15386 gen_rtx_IF_THEN_ELSE (mode, 15387 bypass_test, 15388 copy_rtx (operands[3]), 15389 copy_rtx (operands[0])))); 15390 if (second_test) 15391 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]), 15392 gen_rtx_IF_THEN_ELSE (mode, 15393 second_test, 15394 copy_rtx (operands[2]), 15395 copy_rtx (operands[0])))); 15396 15397 return 1; /* DONE */ 15398 } 15399 15400 /* Swap, force into registers, or otherwise massage the two operands 15401 to an sse comparison with a mask result. Thus we differ a bit from 15402 ix86_prepare_fp_compare_args which expects to produce a flags result. 15403 15404 The DEST operand exists to help determine whether to commute commutative 15405 operators. The POP0/POP1 operands are updated in place. The new 15406 comparison code is returned, or UNKNOWN if not implementable. */ 15407 15408 static enum rtx_code 15409 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code, 15410 rtx *pop0, rtx *pop1) 15411 { 15412 rtx tmp; 15413 15414 switch (code) 15415 { 15416 case LTGT: 15417 case UNEQ: 15418 /* We have no LTGT as an operator. We could implement it with 15419 NE & ORDERED, but this requires an extra temporary. It's 15420 not clear that it's worth it. */ 15421 return UNKNOWN; 15422 15423 case LT: 15424 case LE: 15425 case UNGT: 15426 case UNGE: 15427 /* These are supported directly. */ 15428 break; 15429 15430 case EQ: 15431 case NE: 15432 case UNORDERED: 15433 case ORDERED: 15434 /* For commutative operators, try to canonicalize the destination 15435 operand to be first in the comparison - this helps reload to 15436 avoid extra moves. */ 15437 if (!dest || !rtx_equal_p (dest, *pop1)) 15438 break; 15439 /* FALLTHRU */ 15440 15441 case GE: 15442 case GT: 15443 case UNLE: 15444 case UNLT: 15445 /* These are not supported directly. Swap the comparison operands 15446 to transform into something that is supported. */ 15447 tmp = *pop0; 15448 *pop0 = *pop1; 15449 *pop1 = tmp; 15450 code = swap_condition (code); 15451 break; 15452 15453 default: 15454 gcc_unreachable (); 15455 } 15456 15457 return code; 15458 } 15459 15460 /* Detect conditional moves that exactly match min/max operational 15461 semantics. Note that this is IEEE safe, as long as we don't 15462 interchange the operands. 15463 15464 Returns FALSE if this conditional move doesn't match a MIN/MAX, 15465 and TRUE if the operation is successful and instructions are emitted. */ 15466 15467 static bool 15468 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0, 15469 rtx cmp_op1, rtx if_true, rtx if_false) 15470 { 15471 enum machine_mode mode; 15472 bool is_min; 15473 rtx tmp; 15474 15475 if (code == LT) 15476 ; 15477 else if (code == UNGE) 15478 { 15479 tmp = if_true; 15480 if_true = if_false; 15481 if_false = tmp; 15482 } 15483 else 15484 return false; 15485 15486 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false)) 15487 is_min = true; 15488 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false)) 15489 is_min = false; 15490 else 15491 return false; 15492 15493 mode = GET_MODE (dest); 15494 15495 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here, 15496 but MODE may be a vector mode and thus not appropriate. */ 15497 if (!flag_finite_math_only || !flag_unsafe_math_optimizations) 15498 { 15499 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX; 15500 rtvec v; 15501 15502 if_true = force_reg (mode, if_true); 15503 v = gen_rtvec (2, if_true, if_false); 15504 tmp = gen_rtx_UNSPEC (mode, v, u); 15505 } 15506 else 15507 { 15508 code = is_min ? SMIN : SMAX; 15509 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false); 15510 } 15511 15512 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp)); 15513 return true; 15514 } 15515 15516 /* Expand an sse vector comparison. Return the register with the result. */ 15517 15518 static rtx 15519 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1, 15520 rtx op_true, rtx op_false) 15521 { 15522 enum machine_mode mode = GET_MODE (dest); 15523 rtx x; 15524 15525 cmp_op0 = force_reg (mode, cmp_op0); 15526 if (!nonimmediate_operand (cmp_op1, mode)) 15527 cmp_op1 = force_reg (mode, cmp_op1); 15528 15529 if (optimize 15530 || reg_overlap_mentioned_p (dest, op_true) 15531 || reg_overlap_mentioned_p (dest, op_false)) 15532 dest = gen_reg_rtx (mode); 15533 15534 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1); 15535 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 15536 15537 return dest; 15538 } 15539 15540 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical 15541 operations. This is used for both scalar and vector conditional moves. */ 15542 15543 static void 15544 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false) 15545 { 15546 enum machine_mode mode = GET_MODE (dest); 15547 rtx t2, t3, x; 15548 15549 if (op_false == CONST0_RTX (mode)) 15550 { 15551 op_true = force_reg (mode, op_true); 15552 x = gen_rtx_AND (mode, cmp, op_true); 15553 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 15554 } 15555 else if (op_true == CONST0_RTX (mode)) 15556 { 15557 op_false = force_reg (mode, op_false); 15558 x = gen_rtx_NOT (mode, cmp); 15559 x = gen_rtx_AND (mode, x, op_false); 15560 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 15561 } 15562 else if (TARGET_SSE5) 15563 { 15564 rtx pcmov = gen_rtx_SET (mode, dest, 15565 gen_rtx_IF_THEN_ELSE (mode, cmp, 15566 op_true, 15567 op_false)); 15568 emit_insn (pcmov); 15569 } 15570 else 15571 { 15572 op_true = force_reg (mode, op_true); 15573 op_false = force_reg (mode, op_false); 15574 15575 t2 = gen_reg_rtx (mode); 15576 if (optimize) 15577 t3 = gen_reg_rtx (mode); 15578 else 15579 t3 = dest; 15580 15581 x = gen_rtx_AND (mode, op_true, cmp); 15582 emit_insn (gen_rtx_SET (VOIDmode, t2, x)); 15583 15584 x = gen_rtx_NOT (mode, cmp); 15585 x = gen_rtx_AND (mode, x, op_false); 15586 emit_insn (gen_rtx_SET (VOIDmode, t3, x)); 15587 15588 x = gen_rtx_IOR (mode, t3, t2); 15589 emit_insn (gen_rtx_SET (VOIDmode, dest, x)); 15590 } 15591 } 15592 15593 /* Expand a floating-point conditional move. Return true if successful. */ 15594 15595 int 15596 ix86_expand_fp_movcc (rtx operands[]) 15597 { 15598 enum machine_mode mode = GET_MODE (operands[0]); 15599 enum rtx_code code = GET_CODE (operands[1]); 15600 rtx tmp, compare_op, second_test, bypass_test; 15601 15602 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) 15603 { 15604 enum machine_mode cmode; 15605 15606 /* Since we've no cmove for sse registers, don't force bad register 15607 allocation just to gain access to it. Deny movcc when the 15608 comparison mode doesn't match the move mode. */ 15609 cmode = GET_MODE (ix86_compare_op0); 15610 if (cmode == VOIDmode) 15611 cmode = GET_MODE (ix86_compare_op1); 15612 if (cmode != mode) 15613 return 0; 15614 15615 code = ix86_prepare_sse_fp_compare_args (operands[0], code, 15616 &ix86_compare_op0, 15617 &ix86_compare_op1); 15618 if (code == UNKNOWN) 15619 return 0; 15620 15621 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0, 15622 ix86_compare_op1, operands[2], 15623 operands[3])) 15624 return 1; 15625 15626 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0, 15627 ix86_compare_op1, operands[2], operands[3]); 15628 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]); 15629 return 1; 15630 } 15631 15632 /* The floating point conditional move instructions don't directly 15633 support conditions resulting from a signed integer comparison. */ 15634 15635 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 15636 15637 /* The floating point conditional move instructions don't directly 15638 support signed integer comparisons. */ 15639 15640 if (!fcmov_comparison_operator (compare_op, VOIDmode)) 15641 { 15642 gcc_assert (!second_test && !bypass_test); 15643 tmp = gen_reg_rtx (QImode); 15644 ix86_expand_setcc (code, tmp); 15645 code = NE; 15646 ix86_compare_op0 = tmp; 15647 ix86_compare_op1 = const0_rtx; 15648 compare_op = ix86_expand_compare (code, &second_test, &bypass_test); 15649 } 15650 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3])) 15651 { 15652 tmp = gen_reg_rtx (mode); 15653 emit_move_insn (tmp, operands[3]); 15654 operands[3] = tmp; 15655 } 15656 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2])) 15657 { 15658 tmp = gen_reg_rtx (mode); 15659 emit_move_insn (tmp, operands[2]); 15660 operands[2] = tmp; 15661 } 15662 15663 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 15664 gen_rtx_IF_THEN_ELSE (mode, compare_op, 15665 operands[2], operands[3]))); 15666 if (bypass_test) 15667 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 15668 gen_rtx_IF_THEN_ELSE (mode, bypass_test, 15669 operands[3], operands[0]))); 15670 if (second_test) 15671 emit_insn (gen_rtx_SET (VOIDmode, operands[0], 15672 gen_rtx_IF_THEN_ELSE (mode, second_test, 15673 operands[2], operands[0]))); 15674 15675 return 1; 15676 } 15677 15678 /* Expand a floating-point vector conditional move; a vcond operation 15679 rather than a movcc operation. */ 15680 15681 bool 15682 ix86_expand_fp_vcond (rtx operands[]) 15683 { 15684 enum rtx_code code = GET_CODE (operands[3]); 15685 rtx cmp; 15686 15687 code = ix86_prepare_sse_fp_compare_args (operands[0], code, 15688 &operands[4], &operands[5]); 15689 if (code == UNKNOWN) 15690 return false; 15691 15692 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4], 15693 operands[5], operands[1], operands[2])) 15694 return true; 15695 15696 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5], 15697 operands[1], operands[2]); 15698 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]); 15699 return true; 15700 } 15701 15702 /* Expand a signed/unsigned integral vector conditional move. */ 15703 15704 bool 15705 ix86_expand_int_vcond (rtx operands[]) 15706 { 15707 enum machine_mode mode = GET_MODE (operands[0]); 15708 enum rtx_code code = GET_CODE (operands[3]); 15709 bool negate = false; 15710 rtx x, cop0, cop1; 15711 15712 cop0 = operands[4]; 15713 cop1 = operands[5]; 15714 15715 /* SSE5 supports all of the comparisons on all vector int types. */ 15716 if (!TARGET_SSE5) 15717 { 15718 /* Canonicalize the comparison to EQ, GT, GTU. */ 15719 switch (code) 15720 { 15721 case EQ: 15722 case GT: 15723 case GTU: 15724 break; 15725 15726 case NE: 15727 case LE: 15728 case LEU: 15729 code = reverse_condition (code); 15730 negate = true; 15731 break; 15732 15733 case GE: 15734 case GEU: 15735 code = reverse_condition (code); 15736 negate = true; 15737 /* FALLTHRU */ 15738 15739 case LT: 15740 case LTU: 15741 code = swap_condition (code); 15742 x = cop0, cop0 = cop1, cop1 = x; 15743 break; 15744 15745 default: 15746 gcc_unreachable (); 15747 } 15748 15749 /* Only SSE4.1/SSE4.2 supports V2DImode. */ 15750 if (mode == V2DImode) 15751 { 15752 switch (code) 15753 { 15754 case EQ: 15755 /* SSE4.1 supports EQ. */ 15756 if (!TARGET_SSE4_1) 15757 return false; 15758 break; 15759 15760 case GT: 15761 case GTU: 15762 /* SSE4.2 supports GT/GTU. */ 15763 if (!TARGET_SSE4_2) 15764 return false; 15765 break; 15766 15767 default: 15768 gcc_unreachable (); 15769 } 15770 } 15771 15772 /* Unsigned parallel compare is not supported by the hardware. 15773 Play some tricks to turn this into a signed comparison 15774 against 0. */ 15775 if (code == GTU) 15776 { 15777 cop0 = force_reg (mode, cop0); 15778 15779 switch (mode) 15780 { 15781 case V4SImode: 15782 case V2DImode: 15783 { 15784 rtx t1, t2, mask; 15785 rtx (*gen_sub3) (rtx, rtx, rtx); 15786 15787 /* Subtract (-(INT MAX) - 1) from both operands to make 15788 them signed. */ 15789 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode), 15790 true, false); 15791 gen_sub3 = (mode == V4SImode 15792 ? gen_subv4si3 : gen_subv2di3); 15793 t1 = gen_reg_rtx (mode); 15794 emit_insn (gen_sub3 (t1, cop0, mask)); 15795 15796 t2 = gen_reg_rtx (mode); 15797 emit_insn (gen_sub3 (t2, cop1, mask)); 15798 15799 cop0 = t1; 15800 cop1 = t2; 15801 code = GT; 15802 } 15803 break; 15804 15805 case V16QImode: 15806 case V8HImode: 15807 /* Perform a parallel unsigned saturating subtraction. */ 15808 x = gen_reg_rtx (mode); 15809 emit_insn (gen_rtx_SET (VOIDmode, x, 15810 gen_rtx_US_MINUS (mode, cop0, cop1))); 15811 15812 cop0 = x; 15813 cop1 = CONST0_RTX (mode); 15814 code = EQ; 15815 negate = !negate; 15816 break; 15817 15818 default: 15819 gcc_unreachable (); 15820 } 15821 } 15822 } 15823 15824 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1, 15825 operands[1+negate], operands[2-negate]); 15826 15827 ix86_expand_sse_movcc (operands[0], x, operands[1+negate], 15828 operands[2-negate]); 15829 return true; 15830 } 15831 15832 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is 15833 true if we should do zero extension, else sign extension. HIGH_P is 15834 true if we want the N/2 high elements, else the low elements. */ 15835 15836 void 15837 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p) 15838 { 15839 enum machine_mode imode = GET_MODE (operands[1]); 15840 rtx (*unpack)(rtx, rtx, rtx); 15841 rtx se, dest; 15842 15843 switch (imode) 15844 { 15845 case V16QImode: 15846 if (high_p) 15847 unpack = gen_vec_interleave_highv16qi; 15848 else 15849 unpack = gen_vec_interleave_lowv16qi; 15850 break; 15851 case V8HImode: 15852 if (high_p) 15853 unpack = gen_vec_interleave_highv8hi; 15854 else 15855 unpack = gen_vec_interleave_lowv8hi; 15856 break; 15857 case V4SImode: 15858 if (high_p) 15859 unpack = gen_vec_interleave_highv4si; 15860 else 15861 unpack = gen_vec_interleave_lowv4si; 15862 break; 15863 default: 15864 gcc_unreachable (); 15865 } 15866 15867 dest = gen_lowpart (imode, operands[0]); 15868 15869 if (unsigned_p) 15870 se = force_reg (imode, CONST0_RTX (imode)); 15871 else 15872 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode), 15873 operands[1], pc_rtx, pc_rtx); 15874 15875 emit_insn (unpack (dest, operands[1], se)); 15876 } 15877 15878 /* This function performs the same task as ix86_expand_sse_unpack, 15879 but with SSE4.1 instructions. */ 15880 15881 void 15882 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p) 15883 { 15884 enum machine_mode imode = GET_MODE (operands[1]); 15885 rtx (*unpack)(rtx, rtx); 15886 rtx src, dest; 15887 15888 switch (imode) 15889 { 15890 case V16QImode: 15891 if (unsigned_p) 15892 unpack = gen_sse4_1_zero_extendv8qiv8hi2; 15893 else 15894 unpack = gen_sse4_1_extendv8qiv8hi2; 15895 break; 15896 case V8HImode: 15897 if (unsigned_p) 15898 unpack = gen_sse4_1_zero_extendv4hiv4si2; 15899 else 15900 unpack = gen_sse4_1_extendv4hiv4si2; 15901 break; 15902 case V4SImode: 15903 if (unsigned_p) 15904 unpack = gen_sse4_1_zero_extendv2siv2di2; 15905 else 15906 unpack = gen_sse4_1_extendv2siv2di2; 15907 break; 15908 default: 15909 gcc_unreachable (); 15910 } 15911 15912 dest = operands[0]; 15913 if (high_p) 15914 { 15915 /* Shift higher 8 bytes to lower 8 bytes. */ 15916 src = gen_reg_rtx (imode); 15917 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src), 15918 gen_lowpart (TImode, operands[1]), 15919 GEN_INT (64))); 15920 } 15921 else 15922 src = operands[1]; 15923 15924 emit_insn (unpack (dest, src)); 15925 } 15926 15927 /* This function performs the same task as ix86_expand_sse_unpack, 15928 but with sse5 instructions. */ 15929 15930 void 15931 ix86_expand_sse5_unpack (rtx operands[2], bool unsigned_p, bool high_p) 15932 { 15933 enum machine_mode imode = GET_MODE (operands[1]); 15934 int pperm_bytes[16]; 15935 int i; 15936 int h = (high_p) ? 8 : 0; 15937 int h2; 15938 int sign_extend; 15939 rtvec v = rtvec_alloc (16); 15940 rtvec vs; 15941 rtx x, p; 15942 rtx op0 = operands[0], op1 = operands[1]; 15943 15944 switch (imode) 15945 { 15946 case V16QImode: 15947 vs = rtvec_alloc (8); 15948 h2 = (high_p) ? 8 : 0; 15949 for (i = 0; i < 8; i++) 15950 { 15951 pperm_bytes[2*i+0] = PPERM_SRC | PPERM_SRC2 | i | h; 15952 pperm_bytes[2*i+1] = ((unsigned_p) 15953 ? PPERM_ZERO 15954 : PPERM_SIGN | PPERM_SRC2 | i | h); 15955 } 15956 15957 for (i = 0; i < 16; i++) 15958 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); 15959 15960 for (i = 0; i < 8; i++) 15961 RTVEC_ELT (vs, i) = GEN_INT (i + h2); 15962 15963 p = gen_rtx_PARALLEL (VOIDmode, vs); 15964 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); 15965 if (unsigned_p) 15966 emit_insn (gen_sse5_pperm_zero_v16qi_v8hi (op0, op1, p, x)); 15967 else 15968 emit_insn (gen_sse5_pperm_sign_v16qi_v8hi (op0, op1, p, x)); 15969 break; 15970 15971 case V8HImode: 15972 vs = rtvec_alloc (4); 15973 h2 = (high_p) ? 4 : 0; 15974 for (i = 0; i < 4; i++) 15975 { 15976 sign_extend = ((unsigned_p) 15977 ? PPERM_ZERO 15978 : PPERM_SIGN | PPERM_SRC2 | ((2*i) + 1 + h)); 15979 pperm_bytes[4*i+0] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 0 + h); 15980 pperm_bytes[4*i+1] = PPERM_SRC | PPERM_SRC2 | ((2*i) + 1 + h); 15981 pperm_bytes[4*i+2] = sign_extend; 15982 pperm_bytes[4*i+3] = sign_extend; 15983 } 15984 15985 for (i = 0; i < 16; i++) 15986 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); 15987 15988 for (i = 0; i < 4; i++) 15989 RTVEC_ELT (vs, i) = GEN_INT (i + h2); 15990 15991 p = gen_rtx_PARALLEL (VOIDmode, vs); 15992 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); 15993 if (unsigned_p) 15994 emit_insn (gen_sse5_pperm_zero_v8hi_v4si (op0, op1, p, x)); 15995 else 15996 emit_insn (gen_sse5_pperm_sign_v8hi_v4si (op0, op1, p, x)); 15997 break; 15998 15999 case V4SImode: 16000 vs = rtvec_alloc (2); 16001 h2 = (high_p) ? 2 : 0; 16002 for (i = 0; i < 2; i++) 16003 { 16004 sign_extend = ((unsigned_p) 16005 ? PPERM_ZERO 16006 : PPERM_SIGN | PPERM_SRC2 | ((4*i) + 3 + h)); 16007 pperm_bytes[8*i+0] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 0 + h); 16008 pperm_bytes[8*i+1] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 1 + h); 16009 pperm_bytes[8*i+2] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 2 + h); 16010 pperm_bytes[8*i+3] = PPERM_SRC | PPERM_SRC2 | ((4*i) + 3 + h); 16011 pperm_bytes[8*i+4] = sign_extend; 16012 pperm_bytes[8*i+5] = sign_extend; 16013 pperm_bytes[8*i+6] = sign_extend; 16014 pperm_bytes[8*i+7] = sign_extend; 16015 } 16016 16017 for (i = 0; i < 16; i++) 16018 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); 16019 16020 for (i = 0; i < 2; i++) 16021 RTVEC_ELT (vs, i) = GEN_INT (i + h2); 16022 16023 p = gen_rtx_PARALLEL (VOIDmode, vs); 16024 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); 16025 if (unsigned_p) 16026 emit_insn (gen_sse5_pperm_zero_v4si_v2di (op0, op1, p, x)); 16027 else 16028 emit_insn (gen_sse5_pperm_sign_v4si_v2di (op0, op1, p, x)); 16029 break; 16030 16031 default: 16032 gcc_unreachable (); 16033 } 16034 16035 return; 16036 } 16037 16038 /* Pack the high bits from OPERANDS[1] and low bits from OPERANDS[2] into the 16039 next narrower integer vector type */ 16040 void 16041 ix86_expand_sse5_pack (rtx operands[3]) 16042 { 16043 enum machine_mode imode = GET_MODE (operands[0]); 16044 int pperm_bytes[16]; 16045 int i; 16046 rtvec v = rtvec_alloc (16); 16047 rtx x; 16048 rtx op0 = operands[0]; 16049 rtx op1 = operands[1]; 16050 rtx op2 = operands[2]; 16051 16052 switch (imode) 16053 { 16054 case V16QImode: 16055 for (i = 0; i < 8; i++) 16056 { 16057 pperm_bytes[i+0] = PPERM_SRC | PPERM_SRC1 | (i*2); 16058 pperm_bytes[i+8] = PPERM_SRC | PPERM_SRC2 | (i*2); 16059 } 16060 16061 for (i = 0; i < 16; i++) 16062 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); 16063 16064 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); 16065 emit_insn (gen_sse5_pperm_pack_v8hi_v16qi (op0, op1, op2, x)); 16066 break; 16067 16068 case V8HImode: 16069 for (i = 0; i < 4; i++) 16070 { 16071 pperm_bytes[(2*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 0); 16072 pperm_bytes[(2*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*4) + 1); 16073 pperm_bytes[(2*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 0); 16074 pperm_bytes[(2*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*4) + 1); 16075 } 16076 16077 for (i = 0; i < 16; i++) 16078 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); 16079 16080 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); 16081 emit_insn (gen_sse5_pperm_pack_v4si_v8hi (op0, op1, op2, x)); 16082 break; 16083 16084 case V4SImode: 16085 for (i = 0; i < 2; i++) 16086 { 16087 pperm_bytes[(4*i)+0] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 0); 16088 pperm_bytes[(4*i)+1] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 1); 16089 pperm_bytes[(4*i)+2] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 2); 16090 pperm_bytes[(4*i)+3] = PPERM_SRC | PPERM_SRC1 | ((i*8) + 3); 16091 pperm_bytes[(4*i)+8] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 0); 16092 pperm_bytes[(4*i)+9] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 1); 16093 pperm_bytes[(4*i)+10] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 2); 16094 pperm_bytes[(4*i)+11] = PPERM_SRC | PPERM_SRC2 | ((i*8) + 3); 16095 } 16096 16097 for (i = 0; i < 16; i++) 16098 RTVEC_ELT (v, i) = GEN_INT (pperm_bytes[i]); 16099 16100 x = force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, v)); 16101 emit_insn (gen_sse5_pperm_pack_v2di_v4si (op0, op1, op2, x)); 16102 break; 16103 16104 default: 16105 gcc_unreachable (); 16106 } 16107 16108 return; 16109 } 16110 16111 /* Expand conditional increment or decrement using adb/sbb instructions. 16112 The default case using setcc followed by the conditional move can be 16113 done by generic code. */ 16114 int 16115 ix86_expand_int_addcc (rtx operands[]) 16116 { 16117 enum rtx_code code = GET_CODE (operands[1]); 16118 rtx compare_op; 16119 rtx val = const0_rtx; 16120 bool fpcmp = false; 16121 enum machine_mode mode = GET_MODE (operands[0]); 16122 16123 if (operands[3] != const1_rtx 16124 && operands[3] != constm1_rtx) 16125 return 0; 16126 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0, 16127 ix86_compare_op1, &compare_op)) 16128 return 0; 16129 code = GET_CODE (compare_op); 16130 16131 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode 16132 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode) 16133 { 16134 fpcmp = true; 16135 code = ix86_fp_compare_code_to_integer (code); 16136 } 16137 16138 if (code != LTU) 16139 { 16140 val = constm1_rtx; 16141 if (fpcmp) 16142 PUT_CODE (compare_op, 16143 reverse_condition_maybe_unordered 16144 (GET_CODE (compare_op))); 16145 else 16146 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op))); 16147 } 16148 PUT_MODE (compare_op, mode); 16149 16150 /* Construct either adc or sbb insn. */ 16151 if ((code == LTU) == (operands[3] == constm1_rtx)) 16152 { 16153 switch (GET_MODE (operands[0])) 16154 { 16155 case QImode: 16156 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op)); 16157 break; 16158 case HImode: 16159 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op)); 16160 break; 16161 case SImode: 16162 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op)); 16163 break; 16164 case DImode: 16165 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op)); 16166 break; 16167 default: 16168 gcc_unreachable (); 16169 } 16170 } 16171 else 16172 { 16173 switch (GET_MODE (operands[0])) 16174 { 16175 case QImode: 16176 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op)); 16177 break; 16178 case HImode: 16179 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op)); 16180 break; 16181 case SImode: 16182 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op)); 16183 break; 16184 case DImode: 16185 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op)); 16186 break; 16187 default: 16188 gcc_unreachable (); 16189 } 16190 } 16191 return 1; /* DONE */ 16192 } 16193 16194 16195 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but 16196 works for floating pointer parameters and nonoffsetable memories. 16197 For pushes, it returns just stack offsets; the values will be saved 16198 in the right order. Maximally three parts are generated. */ 16199 16200 static int 16201 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode) 16202 { 16203 int size; 16204 16205 if (!TARGET_64BIT) 16206 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4; 16207 else 16208 size = (GET_MODE_SIZE (mode) + 4) / 8; 16209 16210 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand))); 16211 gcc_assert (size >= 2 && size <= 4); 16212 16213 /* Optimize constant pool reference to immediates. This is used by fp 16214 moves, that force all constants to memory to allow combining. */ 16215 if (MEM_P (operand) && MEM_READONLY_P (operand)) 16216 { 16217 rtx tmp = maybe_get_pool_constant (operand); 16218 if (tmp) 16219 operand = tmp; 16220 } 16221 16222 if (MEM_P (operand) && !offsettable_memref_p (operand)) 16223 { 16224 /* The only non-offsetable memories we handle are pushes. */ 16225 int ok = push_operand (operand, VOIDmode); 16226 16227 gcc_assert (ok); 16228 16229 operand = copy_rtx (operand); 16230 PUT_MODE (operand, Pmode); 16231 parts[0] = parts[1] = parts[2] = parts[3] = operand; 16232 return size; 16233 } 16234 16235 if (GET_CODE (operand) == CONST_VECTOR) 16236 { 16237 enum machine_mode imode = int_mode_for_mode (mode); 16238 /* Caution: if we looked through a constant pool memory above, 16239 the operand may actually have a different mode now. That's 16240 ok, since we want to pun this all the way back to an integer. */ 16241 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0); 16242 gcc_assert (operand != NULL); 16243 mode = imode; 16244 } 16245 16246 if (!TARGET_64BIT) 16247 { 16248 if (mode == DImode) 16249 split_di (&operand, 1, &parts[0], &parts[1]); 16250 else 16251 { 16252 int i; 16253 16254 if (REG_P (operand)) 16255 { 16256 gcc_assert (reload_completed); 16257 for (i = 0; i < size; i++) 16258 parts[i] = gen_rtx_REG (SImode, REGNO (operand) + i); 16259 } 16260 else if (offsettable_memref_p (operand)) 16261 { 16262 operand = adjust_address (operand, SImode, 0); 16263 parts[0] = operand; 16264 for (i = 1; i < size; i++) 16265 parts[i] = adjust_address (operand, SImode, 4 * i); 16266 } 16267 else if (GET_CODE (operand) == CONST_DOUBLE) 16268 { 16269 REAL_VALUE_TYPE r; 16270 long l[4]; 16271 16272 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 16273 switch (mode) 16274 { 16275 case TFmode: 16276 real_to_target (l, &r, mode); 16277 parts[3] = gen_int_mode (l[3], SImode); 16278 parts[2] = gen_int_mode (l[2], SImode); 16279 break; 16280 case XFmode: 16281 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l); 16282 parts[2] = gen_int_mode (l[2], SImode); 16283 break; 16284 case DFmode: 16285 REAL_VALUE_TO_TARGET_DOUBLE (r, l); 16286 break; 16287 default: 16288 gcc_unreachable (); 16289 } 16290 parts[1] = gen_int_mode (l[1], SImode); 16291 parts[0] = gen_int_mode (l[0], SImode); 16292 } 16293 else 16294 gcc_unreachable (); 16295 } 16296 } 16297 else 16298 { 16299 if (mode == TImode) 16300 split_ti (&operand, 1, &parts[0], &parts[1]); 16301 if (mode == XFmode || mode == TFmode) 16302 { 16303 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode; 16304 if (REG_P (operand)) 16305 { 16306 gcc_assert (reload_completed); 16307 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0); 16308 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1); 16309 } 16310 else if (offsettable_memref_p (operand)) 16311 { 16312 operand = adjust_address (operand, DImode, 0); 16313 parts[0] = operand; 16314 parts[1] = adjust_address (operand, upper_mode, 8); 16315 } 16316 else if (GET_CODE (operand) == CONST_DOUBLE) 16317 { 16318 REAL_VALUE_TYPE r; 16319 long l[4]; 16320 16321 REAL_VALUE_FROM_CONST_DOUBLE (r, operand); 16322 real_to_target (l, &r, mode); 16323 16324 /* Do not use shift by 32 to avoid warning on 32bit systems. */ 16325 if (HOST_BITS_PER_WIDE_INT >= 64) 16326 parts[0] 16327 = gen_int_mode 16328 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1)) 16329 + ((((HOST_WIDE_INT) l[1]) << 31) << 1), 16330 DImode); 16331 else 16332 parts[0] = immed_double_const (l[0], l[1], DImode); 16333 16334 if (upper_mode == SImode) 16335 parts[1] = gen_int_mode (l[2], SImode); 16336 else if (HOST_BITS_PER_WIDE_INT >= 64) 16337 parts[1] 16338 = gen_int_mode 16339 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1)) 16340 + ((((HOST_WIDE_INT) l[3]) << 31) << 1), 16341 DImode); 16342 else 16343 parts[1] = immed_double_const (l[2], l[3], DImode); 16344 } 16345 else 16346 gcc_unreachable (); 16347 } 16348 } 16349 16350 return size; 16351 } 16352 16353 /* Emit insns to perform a move or push of DI, DF, XF, and TF values. 16354 Return false when normal moves are needed; true when all required 16355 insns have been emitted. Operands 2-4 contain the input values 16356 int the correct order; operands 5-7 contain the output values. */ 16357 16358 void 16359 ix86_split_long_move (rtx operands[]) 16360 { 16361 rtx part[2][4]; 16362 int nparts, i, j; 16363 int push = 0; 16364 int collisions = 0; 16365 enum machine_mode mode = GET_MODE (operands[0]); 16366 bool collisionparts[4]; 16367 16368 /* The DFmode expanders may ask us to move double. 16369 For 64bit target this is single move. By hiding the fact 16370 here we simplify i386.md splitters. */ 16371 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT) 16372 { 16373 /* Optimize constant pool reference to immediates. This is used by 16374 fp moves, that force all constants to memory to allow combining. */ 16375 16376 if (MEM_P (operands[1]) 16377 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF 16378 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0))) 16379 operands[1] = get_pool_constant (XEXP (operands[1], 0)); 16380 if (push_operand (operands[0], VOIDmode)) 16381 { 16382 operands[0] = copy_rtx (operands[0]); 16383 PUT_MODE (operands[0], Pmode); 16384 } 16385 else 16386 operands[0] = gen_lowpart (DImode, operands[0]); 16387 operands[1] = gen_lowpart (DImode, operands[1]); 16388 emit_move_insn (operands[0], operands[1]); 16389 return; 16390 } 16391 16392 /* The only non-offsettable memory we handle is push. */ 16393 if (push_operand (operands[0], VOIDmode)) 16394 push = 1; 16395 else 16396 gcc_assert (!MEM_P (operands[0]) 16397 || offsettable_memref_p (operands[0])); 16398 16399 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0])); 16400 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0])); 16401 16402 /* When emitting push, take care for source operands on the stack. */ 16403 if (push && MEM_P (operands[1]) 16404 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1])) 16405 { 16406 rtx src_base = XEXP (part[1][nparts - 1], 0); 16407 16408 /* Compensate for the stack decrement by 4. */ 16409 if (!TARGET_64BIT && nparts == 3 16410 && mode == XFmode && TARGET_128BIT_LONG_DOUBLE) 16411 src_base = plus_constant (src_base, 4); 16412 16413 /* src_base refers to the stack pointer and is 16414 automatically decreased by emitted push. */ 16415 for (i = 0; i < nparts; i++) 16416 part[1][i] = change_address (part[1][i], 16417 GET_MODE (part[1][i]), src_base); 16418 } 16419 16420 /* We need to do copy in the right order in case an address register 16421 of the source overlaps the destination. */ 16422 if (REG_P (part[0][0]) && MEM_P (part[1][0])) 16423 { 16424 rtx tmp; 16425 16426 for (i = 0; i < nparts; i++) 16427 { 16428 collisionparts[i] 16429 = reg_overlap_mentioned_p (part[0][i], XEXP (part[1][0], 0)); 16430 if (collisionparts[i]) 16431 collisions++; 16432 } 16433 16434 /* Collision in the middle part can be handled by reordering. */ 16435 if (collisions == 1 && nparts == 3 && collisionparts [1]) 16436 { 16437 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; 16438 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; 16439 } 16440 else if (collisions == 1 16441 && nparts == 4 16442 && (collisionparts [1] || collisionparts [2])) 16443 { 16444 if (collisionparts [1]) 16445 { 16446 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp; 16447 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp; 16448 } 16449 else 16450 { 16451 tmp = part[0][2]; part[0][2] = part[0][3]; part[0][3] = tmp; 16452 tmp = part[1][2]; part[1][2] = part[1][3]; part[1][3] = tmp; 16453 } 16454 } 16455 16456 /* If there are more collisions, we can't handle it by reordering. 16457 Do an lea to the last part and use only one colliding move. */ 16458 else if (collisions > 1) 16459 { 16460 rtx base; 16461 16462 collisions = 1; 16463 16464 base = part[0][nparts - 1]; 16465 16466 /* Handle the case when the last part isn't valid for lea. 16467 Happens in 64-bit mode storing the 12-byte XFmode. */ 16468 if (GET_MODE (base) != Pmode) 16469 base = gen_rtx_REG (Pmode, REGNO (base)); 16470 16471 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0))); 16472 part[1][0] = replace_equiv_address (part[1][0], base); 16473 for (i = 1; i < nparts; i++) 16474 { 16475 tmp = plus_constant (base, UNITS_PER_WORD * i); 16476 part[1][i] = replace_equiv_address (part[1][i], tmp); 16477 } 16478 } 16479 } 16480 16481 if (push) 16482 { 16483 if (!TARGET_64BIT) 16484 { 16485 if (nparts == 3) 16486 { 16487 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode) 16488 emit_insn (gen_addsi3 (stack_pointer_rtx, 16489 stack_pointer_rtx, GEN_INT (-4))); 16490 emit_move_insn (part[0][2], part[1][2]); 16491 } 16492 else if (nparts == 4) 16493 { 16494 emit_move_insn (part[0][3], part[1][3]); 16495 emit_move_insn (part[0][2], part[1][2]); 16496 } 16497 } 16498 else 16499 { 16500 /* In 64bit mode we don't have 32bit push available. In case this is 16501 register, it is OK - we will just use larger counterpart. We also 16502 retype memory - these comes from attempt to avoid REX prefix on 16503 moving of second half of TFmode value. */ 16504 if (GET_MODE (part[1][1]) == SImode) 16505 { 16506 switch (GET_CODE (part[1][1])) 16507 { 16508 case MEM: 16509 part[1][1] = adjust_address (part[1][1], DImode, 0); 16510 break; 16511 16512 case REG: 16513 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1])); 16514 break; 16515 16516 default: 16517 gcc_unreachable (); 16518 } 16519 16520 if (GET_MODE (part[1][0]) == SImode) 16521 part[1][0] = part[1][1]; 16522 } 16523 } 16524 emit_move_insn (part[0][1], part[1][1]); 16525 emit_move_insn (part[0][0], part[1][0]); 16526 return; 16527 } 16528 16529 /* Choose correct order to not overwrite the source before it is copied. */ 16530 if ((REG_P (part[0][0]) 16531 && REG_P (part[1][1]) 16532 && (REGNO (part[0][0]) == REGNO (part[1][1]) 16533 || (nparts == 3 16534 && REGNO (part[0][0]) == REGNO (part[1][2])) 16535 || (nparts == 4 16536 && REGNO (part[0][0]) == REGNO (part[1][3])))) 16537 || (collisions > 0 16538 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))) 16539 { 16540 for (i = 0, j = nparts - 1; i < nparts; i++, j--) 16541 { 16542 operands[2 + i] = part[0][j]; 16543 operands[6 + i] = part[1][j]; 16544 } 16545 } 16546 else 16547 { 16548 for (i = 0; i < nparts; i++) 16549 { 16550 operands[2 + i] = part[0][i]; 16551 operands[6 + i] = part[1][i]; 16552 } 16553 } 16554 16555 /* If optimizing for size, attempt to locally unCSE nonzero constants. */ 16556 if (optimize_insn_for_size_p ()) 16557 { 16558 for (j = 0; j < nparts - 1; j++) 16559 if (CONST_INT_P (operands[6 + j]) 16560 && operands[6 + j] != const0_rtx 16561 && REG_P (operands[2 + j])) 16562 for (i = j; i < nparts - 1; i++) 16563 if (CONST_INT_P (operands[7 + i]) 16564 && INTVAL (operands[7 + i]) == INTVAL (operands[6 + j])) 16565 operands[7 + i] = operands[2 + j]; 16566 } 16567 16568 for (i = 0; i < nparts; i++) 16569 emit_move_insn (operands[2 + i], operands[6 + i]); 16570 16571 return; 16572 } 16573 16574 /* Helper function of ix86_split_ashl used to generate an SImode/DImode 16575 left shift by a constant, either using a single shift or 16576 a sequence of add instructions. */ 16577 16578 static void 16579 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode) 16580 { 16581 if (count == 1) 16582 { 16583 emit_insn ((mode == DImode 16584 ? gen_addsi3 16585 : gen_adddi3) (operand, operand, operand)); 16586 } 16587 else if (!optimize_insn_for_size_p () 16588 && count * ix86_cost->add <= ix86_cost->shift_const) 16589 { 16590 int i; 16591 for (i=0; i<count; i++) 16592 { 16593 emit_insn ((mode == DImode 16594 ? gen_addsi3 16595 : gen_adddi3) (operand, operand, operand)); 16596 } 16597 } 16598 else 16599 emit_insn ((mode == DImode 16600 ? gen_ashlsi3 16601 : gen_ashldi3) (operand, operand, GEN_INT (count))); 16602 } 16603 16604 void 16605 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode) 16606 { 16607 rtx low[2], high[2]; 16608 int count; 16609 const int single_width = mode == DImode ? 32 : 64; 16610 16611 if (CONST_INT_P (operands[2])) 16612 { 16613 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 16614 count = INTVAL (operands[2]) & (single_width * 2 - 1); 16615 16616 if (count >= single_width) 16617 { 16618 emit_move_insn (high[0], low[1]); 16619 emit_move_insn (low[0], const0_rtx); 16620 16621 if (count > single_width) 16622 ix86_expand_ashl_const (high[0], count - single_width, mode); 16623 } 16624 else 16625 { 16626 if (!rtx_equal_p (operands[0], operands[1])) 16627 emit_move_insn (operands[0], operands[1]); 16628 emit_insn ((mode == DImode 16629 ? gen_x86_shld 16630 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count))); 16631 ix86_expand_ashl_const (low[0], count, mode); 16632 } 16633 return; 16634 } 16635 16636 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 16637 16638 if (operands[1] == const1_rtx) 16639 { 16640 /* Assuming we've chosen a QImode capable registers, then 1 << N 16641 can be done with two 32/64-bit shifts, no branches, no cmoves. */ 16642 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0])) 16643 { 16644 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG); 16645 16646 ix86_expand_clear (low[0]); 16647 ix86_expand_clear (high[0]); 16648 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width))); 16649 16650 d = gen_lowpart (QImode, low[0]); 16651 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); 16652 s = gen_rtx_EQ (QImode, flags, const0_rtx); 16653 emit_insn (gen_rtx_SET (VOIDmode, d, s)); 16654 16655 d = gen_lowpart (QImode, high[0]); 16656 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d); 16657 s = gen_rtx_NE (QImode, flags, const0_rtx); 16658 emit_insn (gen_rtx_SET (VOIDmode, d, s)); 16659 } 16660 16661 /* Otherwise, we can get the same results by manually performing 16662 a bit extract operation on bit 5/6, and then performing the two 16663 shifts. The two methods of getting 0/1 into low/high are exactly 16664 the same size. Avoiding the shift in the bit extract case helps 16665 pentium4 a bit; no one else seems to care much either way. */ 16666 else 16667 { 16668 rtx x; 16669 16670 if (TARGET_PARTIAL_REG_STALL && !optimize_insn_for_size_p ()) 16671 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]); 16672 else 16673 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]); 16674 emit_insn (gen_rtx_SET (VOIDmode, high[0], x)); 16675 16676 emit_insn ((mode == DImode 16677 ? gen_lshrsi3 16678 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6))); 16679 emit_insn ((mode == DImode 16680 ? gen_andsi3 16681 : gen_anddi3) (high[0], high[0], GEN_INT (1))); 16682 emit_move_insn (low[0], high[0]); 16683 emit_insn ((mode == DImode 16684 ? gen_xorsi3 16685 : gen_xordi3) (low[0], low[0], GEN_INT (1))); 16686 } 16687 16688 emit_insn ((mode == DImode 16689 ? gen_ashlsi3 16690 : gen_ashldi3) (low[0], low[0], operands[2])); 16691 emit_insn ((mode == DImode 16692 ? gen_ashlsi3 16693 : gen_ashldi3) (high[0], high[0], operands[2])); 16694 return; 16695 } 16696 16697 if (operands[1] == constm1_rtx) 16698 { 16699 /* For -1 << N, we can avoid the shld instruction, because we 16700 know that we're shifting 0...31/63 ones into a -1. */ 16701 emit_move_insn (low[0], constm1_rtx); 16702 if (optimize_insn_for_size_p ()) 16703 emit_move_insn (high[0], low[0]); 16704 else 16705 emit_move_insn (high[0], constm1_rtx); 16706 } 16707 else 16708 { 16709 if (!rtx_equal_p (operands[0], operands[1])) 16710 emit_move_insn (operands[0], operands[1]); 16711 16712 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 16713 emit_insn ((mode == DImode 16714 ? gen_x86_shld 16715 : gen_x86_64_shld) (high[0], low[0], operands[2])); 16716 } 16717 16718 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2])); 16719 16720 if (TARGET_CMOVE && scratch) 16721 { 16722 ix86_expand_clear (scratch); 16723 emit_insn ((mode == DImode 16724 ? gen_x86_shift_adj_1 16725 : gen_x86_64_shift_adj_1) (high[0], low[0], operands[2], 16726 scratch)); 16727 } 16728 else 16729 emit_insn ((mode == DImode 16730 ? gen_x86_shift_adj_2 16731 : gen_x86_64_shift_adj_2) (high[0], low[0], operands[2])); 16732 } 16733 16734 void 16735 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode) 16736 { 16737 rtx low[2], high[2]; 16738 int count; 16739 const int single_width = mode == DImode ? 32 : 64; 16740 16741 if (CONST_INT_P (operands[2])) 16742 { 16743 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 16744 count = INTVAL (operands[2]) & (single_width * 2 - 1); 16745 16746 if (count == single_width * 2 - 1) 16747 { 16748 emit_move_insn (high[0], high[1]); 16749 emit_insn ((mode == DImode 16750 ? gen_ashrsi3 16751 : gen_ashrdi3) (high[0], high[0], 16752 GEN_INT (single_width - 1))); 16753 emit_move_insn (low[0], high[0]); 16754 16755 } 16756 else if (count >= single_width) 16757 { 16758 emit_move_insn (low[0], high[1]); 16759 emit_move_insn (high[0], low[0]); 16760 emit_insn ((mode == DImode 16761 ? gen_ashrsi3 16762 : gen_ashrdi3) (high[0], high[0], 16763 GEN_INT (single_width - 1))); 16764 if (count > single_width) 16765 emit_insn ((mode == DImode 16766 ? gen_ashrsi3 16767 : gen_ashrdi3) (low[0], low[0], 16768 GEN_INT (count - single_width))); 16769 } 16770 else 16771 { 16772 if (!rtx_equal_p (operands[0], operands[1])) 16773 emit_move_insn (operands[0], operands[1]); 16774 emit_insn ((mode == DImode 16775 ? gen_x86_shrd 16776 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); 16777 emit_insn ((mode == DImode 16778 ? gen_ashrsi3 16779 : gen_ashrdi3) (high[0], high[0], GEN_INT (count))); 16780 } 16781 } 16782 else 16783 { 16784 if (!rtx_equal_p (operands[0], operands[1])) 16785 emit_move_insn (operands[0], operands[1]); 16786 16787 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 16788 16789 emit_insn ((mode == DImode 16790 ? gen_x86_shrd 16791 : gen_x86_64_shrd) (low[0], high[0], operands[2])); 16792 emit_insn ((mode == DImode 16793 ? gen_ashrsi3 16794 : gen_ashrdi3) (high[0], high[0], operands[2])); 16795 16796 if (TARGET_CMOVE && scratch) 16797 { 16798 emit_move_insn (scratch, high[0]); 16799 emit_insn ((mode == DImode 16800 ? gen_ashrsi3 16801 : gen_ashrdi3) (scratch, scratch, 16802 GEN_INT (single_width - 1))); 16803 emit_insn ((mode == DImode 16804 ? gen_x86_shift_adj_1 16805 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2], 16806 scratch)); 16807 } 16808 else 16809 emit_insn ((mode == DImode 16810 ? gen_x86_shift_adj_3 16811 : gen_x86_64_shift_adj_3) (low[0], high[0], operands[2])); 16812 } 16813 } 16814 16815 void 16816 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode) 16817 { 16818 rtx low[2], high[2]; 16819 int count; 16820 const int single_width = mode == DImode ? 32 : 64; 16821 16822 if (CONST_INT_P (operands[2])) 16823 { 16824 (mode == DImode ? split_di : split_ti) (operands, 2, low, high); 16825 count = INTVAL (operands[2]) & (single_width * 2 - 1); 16826 16827 if (count >= single_width) 16828 { 16829 emit_move_insn (low[0], high[1]); 16830 ix86_expand_clear (high[0]); 16831 16832 if (count > single_width) 16833 emit_insn ((mode == DImode 16834 ? gen_lshrsi3 16835 : gen_lshrdi3) (low[0], low[0], 16836 GEN_INT (count - single_width))); 16837 } 16838 else 16839 { 16840 if (!rtx_equal_p (operands[0], operands[1])) 16841 emit_move_insn (operands[0], operands[1]); 16842 emit_insn ((mode == DImode 16843 ? gen_x86_shrd 16844 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count))); 16845 emit_insn ((mode == DImode 16846 ? gen_lshrsi3 16847 : gen_lshrdi3) (high[0], high[0], GEN_INT (count))); 16848 } 16849 } 16850 else 16851 { 16852 if (!rtx_equal_p (operands[0], operands[1])) 16853 emit_move_insn (operands[0], operands[1]); 16854 16855 (mode == DImode ? split_di : split_ti) (operands, 1, low, high); 16856 16857 emit_insn ((mode == DImode 16858 ? gen_x86_shrd 16859 : gen_x86_64_shrd) (low[0], high[0], operands[2])); 16860 emit_insn ((mode == DImode 16861 ? gen_lshrsi3 16862 : gen_lshrdi3) (high[0], high[0], operands[2])); 16863 16864 /* Heh. By reversing the arguments, we can reuse this pattern. */ 16865 if (TARGET_CMOVE && scratch) 16866 { 16867 ix86_expand_clear (scratch); 16868 emit_insn ((mode == DImode 16869 ? gen_x86_shift_adj_1 16870 : gen_x86_64_shift_adj_1) (low[0], high[0], operands[2], 16871 scratch)); 16872 } 16873 else 16874 emit_insn ((mode == DImode 16875 ? gen_x86_shift_adj_2 16876 : gen_x86_64_shift_adj_2) (low[0], high[0], operands[2])); 16877 } 16878 } 16879 16880 /* Predict just emitted jump instruction to be taken with probability PROB. */ 16881 static void 16882 predict_jump (int prob) 16883 { 16884 rtx insn = get_last_insn (); 16885 gcc_assert (JUMP_P (insn)); 16886 REG_NOTES (insn) 16887 = gen_rtx_EXPR_LIST (REG_BR_PROB, 16888 GEN_INT (prob), 16889 REG_NOTES (insn)); 16890 } 16891 16892 /* Helper function for the string operations below. Dest VARIABLE whether 16893 it is aligned to VALUE bytes. If true, jump to the label. */ 16894 static rtx 16895 ix86_expand_aligntest (rtx variable, int value, bool epilogue) 16896 { 16897 rtx label = gen_label_rtx (); 16898 rtx tmpcount = gen_reg_rtx (GET_MODE (variable)); 16899 if (GET_MODE (variable) == DImode) 16900 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value))); 16901 else 16902 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value))); 16903 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable), 16904 1, label); 16905 if (epilogue) 16906 predict_jump (REG_BR_PROB_BASE * 50 / 100); 16907 else 16908 predict_jump (REG_BR_PROB_BASE * 90 / 100); 16909 return label; 16910 } 16911 16912 /* Adjust COUNTER by the VALUE. */ 16913 static void 16914 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value) 16915 { 16916 if (GET_MODE (countreg) == DImode) 16917 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value))); 16918 else 16919 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value))); 16920 } 16921 16922 /* Zero extend possibly SImode EXP to Pmode register. */ 16923 rtx 16924 ix86_zero_extend_to_Pmode (rtx exp) 16925 { 16926 rtx r; 16927 if (GET_MODE (exp) == VOIDmode) 16928 return force_reg (Pmode, exp); 16929 if (GET_MODE (exp) == Pmode) 16930 return copy_to_mode_reg (Pmode, exp); 16931 r = gen_reg_rtx (Pmode); 16932 emit_insn (gen_zero_extendsidi2 (r, exp)); 16933 return r; 16934 } 16935 16936 /* Divide COUNTREG by SCALE. */ 16937 static rtx 16938 scale_counter (rtx countreg, int scale) 16939 { 16940 rtx sc; 16941 rtx piece_size_mask; 16942 16943 if (scale == 1) 16944 return countreg; 16945 if (CONST_INT_P (countreg)) 16946 return GEN_INT (INTVAL (countreg) / scale); 16947 gcc_assert (REG_P (countreg)); 16948 16949 piece_size_mask = GEN_INT (scale - 1); 16950 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg, 16951 GEN_INT (exact_log2 (scale)), 16952 NULL, 1, OPTAB_DIRECT); 16953 return sc; 16954 } 16955 16956 /* Return mode for the memcpy/memset loop counter. Prefer SImode over 16957 DImode for constant loop counts. */ 16958 16959 static enum machine_mode 16960 counter_mode (rtx count_exp) 16961 { 16962 if (GET_MODE (count_exp) != VOIDmode) 16963 return GET_MODE (count_exp); 16964 if (GET_CODE (count_exp) != CONST_INT) 16965 return Pmode; 16966 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff)) 16967 return DImode; 16968 return SImode; 16969 } 16970 16971 /* When SRCPTR is non-NULL, output simple loop to move memory 16972 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times, 16973 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the 16974 equivalent loop to set memory by VALUE (supposed to be in MODE). 16975 16976 The size is rounded down to whole number of chunk size moved at once. 16977 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */ 16978 16979 16980 static void 16981 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem, 16982 rtx destptr, rtx srcptr, rtx value, 16983 rtx count, enum machine_mode mode, int unroll, 16984 int expected_size) 16985 { 16986 rtx out_label, top_label, iter, tmp; 16987 enum machine_mode iter_mode = counter_mode (count); 16988 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll); 16989 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1)); 16990 rtx size; 16991 rtx x_addr; 16992 rtx y_addr; 16993 int i; 16994 16995 top_label = gen_label_rtx (); 16996 out_label = gen_label_rtx (); 16997 iter = gen_reg_rtx (iter_mode); 16998 16999 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask, 17000 NULL, 1, OPTAB_DIRECT); 17001 /* Those two should combine. */ 17002 if (piece_size == const1_rtx) 17003 { 17004 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode, 17005 true, out_label); 17006 predict_jump (REG_BR_PROB_BASE * 10 / 100); 17007 } 17008 emit_move_insn (iter, const0_rtx); 17009 17010 emit_label (top_label); 17011 17012 tmp = convert_modes (Pmode, iter_mode, iter, true); 17013 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp); 17014 destmem = change_address (destmem, mode, x_addr); 17015 17016 if (srcmem) 17017 { 17018 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp)); 17019 srcmem = change_address (srcmem, mode, y_addr); 17020 17021 /* When unrolling for chips that reorder memory reads and writes, 17022 we can save registers by using single temporary. 17023 Also using 4 temporaries is overkill in 32bit mode. */ 17024 if (!TARGET_64BIT && 0) 17025 { 17026 for (i = 0; i < unroll; i++) 17027 { 17028 if (i) 17029 { 17030 destmem = 17031 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode)); 17032 srcmem = 17033 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode)); 17034 } 17035 emit_move_insn (destmem, srcmem); 17036 } 17037 } 17038 else 17039 { 17040 rtx tmpreg[4]; 17041 gcc_assert (unroll <= 4); 17042 for (i = 0; i < unroll; i++) 17043 { 17044 tmpreg[i] = gen_reg_rtx (mode); 17045 if (i) 17046 { 17047 srcmem = 17048 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode)); 17049 } 17050 emit_move_insn (tmpreg[i], srcmem); 17051 } 17052 for (i = 0; i < unroll; i++) 17053 { 17054 if (i) 17055 { 17056 destmem = 17057 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode)); 17058 } 17059 emit_move_insn (destmem, tmpreg[i]); 17060 } 17061 } 17062 } 17063 else 17064 for (i = 0; i < unroll; i++) 17065 { 17066 if (i) 17067 destmem = 17068 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode)); 17069 emit_move_insn (destmem, value); 17070 } 17071 17072 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter, 17073 true, OPTAB_LIB_WIDEN); 17074 if (tmp != iter) 17075 emit_move_insn (iter, tmp); 17076 17077 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode, 17078 true, top_label); 17079 if (expected_size != -1) 17080 { 17081 expected_size /= GET_MODE_SIZE (mode) * unroll; 17082 if (expected_size == 0) 17083 predict_jump (0); 17084 else if (expected_size > REG_BR_PROB_BASE) 17085 predict_jump (REG_BR_PROB_BASE - 1); 17086 else 17087 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size); 17088 } 17089 else 17090 predict_jump (REG_BR_PROB_BASE * 80 / 100); 17091 iter = ix86_zero_extend_to_Pmode (iter); 17092 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr, 17093 true, OPTAB_LIB_WIDEN); 17094 if (tmp != destptr) 17095 emit_move_insn (destptr, tmp); 17096 if (srcptr) 17097 { 17098 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr, 17099 true, OPTAB_LIB_WIDEN); 17100 if (tmp != srcptr) 17101 emit_move_insn (srcptr, tmp); 17102 } 17103 emit_label (out_label); 17104 } 17105 17106 /* Output "rep; mov" instruction. 17107 Arguments have same meaning as for previous function */ 17108 static void 17109 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem, 17110 rtx destptr, rtx srcptr, 17111 rtx count, 17112 enum machine_mode mode) 17113 { 17114 rtx destexp; 17115 rtx srcexp; 17116 rtx countreg; 17117 17118 /* If the size is known, it is shorter to use rep movs. */ 17119 if (mode == QImode && CONST_INT_P (count) 17120 && !(INTVAL (count) & 3)) 17121 mode = SImode; 17122 17123 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode) 17124 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0); 17125 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode) 17126 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0); 17127 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode))); 17128 if (mode != QImode) 17129 { 17130 destexp = gen_rtx_ASHIFT (Pmode, countreg, 17131 GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); 17132 destexp = gen_rtx_PLUS (Pmode, destexp, destptr); 17133 srcexp = gen_rtx_ASHIFT (Pmode, countreg, 17134 GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); 17135 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr); 17136 } 17137 else 17138 { 17139 destexp = gen_rtx_PLUS (Pmode, destptr, countreg); 17140 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg); 17141 } 17142 if (CONST_INT_P (count)) 17143 { 17144 count = GEN_INT (INTVAL (count) 17145 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1)); 17146 destmem = shallow_copy_rtx (destmem); 17147 srcmem = shallow_copy_rtx (srcmem); 17148 set_mem_size (destmem, count); 17149 set_mem_size (srcmem, count); 17150 } 17151 else 17152 { 17153 if (MEM_SIZE (destmem)) 17154 set_mem_size (destmem, NULL_RTX); 17155 if (MEM_SIZE (srcmem)) 17156 set_mem_size (srcmem, NULL_RTX); 17157 } 17158 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg, 17159 destexp, srcexp)); 17160 } 17161 17162 /* Output "rep; stos" instruction. 17163 Arguments have same meaning as for previous function */ 17164 static void 17165 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value, 17166 rtx count, enum machine_mode mode, 17167 rtx orig_value) 17168 { 17169 rtx destexp; 17170 rtx countreg; 17171 17172 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode) 17173 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0); 17174 value = force_reg (mode, gen_lowpart (mode, value)); 17175 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode))); 17176 if (mode != QImode) 17177 { 17178 destexp = gen_rtx_ASHIFT (Pmode, countreg, 17179 GEN_INT (exact_log2 (GET_MODE_SIZE (mode)))); 17180 destexp = gen_rtx_PLUS (Pmode, destexp, destptr); 17181 } 17182 else 17183 destexp = gen_rtx_PLUS (Pmode, destptr, countreg); 17184 if (orig_value == const0_rtx && CONST_INT_P (count)) 17185 { 17186 count = GEN_INT (INTVAL (count) 17187 & ~((HOST_WIDE_INT) GET_MODE_SIZE (mode) - 1)); 17188 destmem = shallow_copy_rtx (destmem); 17189 set_mem_size (destmem, count); 17190 } 17191 else if (MEM_SIZE (destmem)) 17192 set_mem_size (destmem, NULL_RTX); 17193 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp)); 17194 } 17195 17196 static void 17197 emit_strmov (rtx destmem, rtx srcmem, 17198 rtx destptr, rtx srcptr, enum machine_mode mode, int offset) 17199 { 17200 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset); 17201 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset); 17202 emit_insn (gen_strmov (destptr, dest, srcptr, src)); 17203 } 17204 17205 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */ 17206 static void 17207 expand_movmem_epilogue (rtx destmem, rtx srcmem, 17208 rtx destptr, rtx srcptr, rtx count, int max_size) 17209 { 17210 rtx src, dest; 17211 if (CONST_INT_P (count)) 17212 { 17213 HOST_WIDE_INT countval = INTVAL (count); 17214 int offset = 0; 17215 17216 if ((countval & 0x10) && max_size > 16) 17217 { 17218 if (TARGET_64BIT) 17219 { 17220 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset); 17221 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8); 17222 } 17223 else 17224 gcc_unreachable (); 17225 offset += 16; 17226 } 17227 if ((countval & 0x08) && max_size > 8) 17228 { 17229 if (TARGET_64BIT) 17230 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset); 17231 else 17232 { 17233 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset); 17234 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4); 17235 } 17236 offset += 8; 17237 } 17238 if ((countval & 0x04) && max_size > 4) 17239 { 17240 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset); 17241 offset += 4; 17242 } 17243 if ((countval & 0x02) && max_size > 2) 17244 { 17245 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset); 17246 offset += 2; 17247 } 17248 if ((countval & 0x01) && max_size > 1) 17249 { 17250 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset); 17251 offset += 1; 17252 } 17253 return; 17254 } 17255 if (max_size > 8) 17256 { 17257 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1), 17258 count, 1, OPTAB_DIRECT); 17259 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL, 17260 count, QImode, 1, 4); 17261 return; 17262 } 17263 17264 /* When there are stringops, we can cheaply increase dest and src pointers. 17265 Otherwise we save code size by maintaining offset (zero is readily 17266 available from preceding rep operation) and using x86 addressing modes. 17267 */ 17268 if (TARGET_SINGLE_STRINGOP) 17269 { 17270 if (max_size > 4) 17271 { 17272 rtx label = ix86_expand_aligntest (count, 4, true); 17273 src = change_address (srcmem, SImode, srcptr); 17274 dest = change_address (destmem, SImode, destptr); 17275 emit_insn (gen_strmov (destptr, dest, srcptr, src)); 17276 emit_label (label); 17277 LABEL_NUSES (label) = 1; 17278 } 17279 if (max_size > 2) 17280 { 17281 rtx label = ix86_expand_aligntest (count, 2, true); 17282 src = change_address (srcmem, HImode, srcptr); 17283 dest = change_address (destmem, HImode, destptr); 17284 emit_insn (gen_strmov (destptr, dest, srcptr, src)); 17285 emit_label (label); 17286 LABEL_NUSES (label) = 1; 17287 } 17288 if (max_size > 1) 17289 { 17290 rtx label = ix86_expand_aligntest (count, 1, true); 17291 src = change_address (srcmem, QImode, srcptr); 17292 dest = change_address (destmem, QImode, destptr); 17293 emit_insn (gen_strmov (destptr, dest, srcptr, src)); 17294 emit_label (label); 17295 LABEL_NUSES (label) = 1; 17296 } 17297 } 17298 else 17299 { 17300 rtx offset = force_reg (Pmode, const0_rtx); 17301 rtx tmp; 17302 17303 if (max_size > 4) 17304 { 17305 rtx label = ix86_expand_aligntest (count, 4, true); 17306 src = change_address (srcmem, SImode, srcptr); 17307 dest = change_address (destmem, SImode, destptr); 17308 emit_move_insn (dest, src); 17309 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL, 17310 true, OPTAB_LIB_WIDEN); 17311 if (tmp != offset) 17312 emit_move_insn (offset, tmp); 17313 emit_label (label); 17314 LABEL_NUSES (label) = 1; 17315 } 17316 if (max_size > 2) 17317 { 17318 rtx label = ix86_expand_aligntest (count, 2, true); 17319 tmp = gen_rtx_PLUS (Pmode, srcptr, offset); 17320 src = change_address (srcmem, HImode, tmp); 17321 tmp = gen_rtx_PLUS (Pmode, destptr, offset); 17322 dest = change_address (destmem, HImode, tmp); 17323 emit_move_insn (dest, src); 17324 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp, 17325 true, OPTAB_LIB_WIDEN); 17326 if (tmp != offset) 17327 emit_move_insn (offset, tmp); 17328 emit_label (label); 17329 LABEL_NUSES (label) = 1; 17330 } 17331 if (max_size > 1) 17332 { 17333 rtx label = ix86_expand_aligntest (count, 1, true); 17334 tmp = gen_rtx_PLUS (Pmode, srcptr, offset); 17335 src = change_address (srcmem, QImode, tmp); 17336 tmp = gen_rtx_PLUS (Pmode, destptr, offset); 17337 dest = change_address (destmem, QImode, tmp); 17338 emit_move_insn (dest, src); 17339 emit_label (label); 17340 LABEL_NUSES (label) = 1; 17341 } 17342 } 17343 } 17344 17345 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */ 17346 static void 17347 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value, 17348 rtx count, int max_size) 17349 { 17350 count = 17351 expand_simple_binop (counter_mode (count), AND, count, 17352 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT); 17353 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL, 17354 gen_lowpart (QImode, value), count, QImode, 17355 1, max_size / 2); 17356 } 17357 17358 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */ 17359 static void 17360 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size) 17361 { 17362 rtx dest; 17363 17364 if (CONST_INT_P (count)) 17365 { 17366 HOST_WIDE_INT countval = INTVAL (count); 17367 int offset = 0; 17368 17369 if ((countval & 0x10) && max_size > 16) 17370 { 17371 if (TARGET_64BIT) 17372 { 17373 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset); 17374 emit_insn (gen_strset (destptr, dest, value)); 17375 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8); 17376 emit_insn (gen_strset (destptr, dest, value)); 17377 } 17378 else 17379 gcc_unreachable (); 17380 offset += 16; 17381 } 17382 if ((countval & 0x08) && max_size > 8) 17383 { 17384 if (TARGET_64BIT) 17385 { 17386 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset); 17387 emit_insn (gen_strset (destptr, dest, value)); 17388 } 17389 else 17390 { 17391 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset); 17392 emit_insn (gen_strset (destptr, dest, value)); 17393 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4); 17394 emit_insn (gen_strset (destptr, dest, value)); 17395 } 17396 offset += 8; 17397 } 17398 if ((countval & 0x04) && max_size > 4) 17399 { 17400 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset); 17401 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value))); 17402 offset += 4; 17403 } 17404 if ((countval & 0x02) && max_size > 2) 17405 { 17406 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset); 17407 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value))); 17408 offset += 2; 17409 } 17410 if ((countval & 0x01) && max_size > 1) 17411 { 17412 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset); 17413 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value))); 17414 offset += 1; 17415 } 17416 return; 17417 } 17418 if (max_size > 32) 17419 { 17420 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size); 17421 return; 17422 } 17423 if (max_size > 16) 17424 { 17425 rtx label = ix86_expand_aligntest (count, 16, true); 17426 if (TARGET_64BIT) 17427 { 17428 dest = change_address (destmem, DImode, destptr); 17429 emit_insn (gen_strset (destptr, dest, value)); 17430 emit_insn (gen_strset (destptr, dest, value)); 17431 } 17432 else 17433 { 17434 dest = change_address (destmem, SImode, destptr); 17435 emit_insn (gen_strset (destptr, dest, value)); 17436 emit_insn (gen_strset (destptr, dest, value)); 17437 emit_insn (gen_strset (destptr, dest, value)); 17438 emit_insn (gen_strset (destptr, dest, value)); 17439 } 17440 emit_label (label); 17441 LABEL_NUSES (label) = 1; 17442 } 17443 if (max_size > 8) 17444 { 17445 rtx label = ix86_expand_aligntest (count, 8, true); 17446 if (TARGET_64BIT) 17447 { 17448 dest = change_address (destmem, DImode, destptr); 17449 emit_insn (gen_strset (destptr, dest, value)); 17450 } 17451 else 17452 { 17453 dest = change_address (destmem, SImode, destptr); 17454 emit_insn (gen_strset (destptr, dest, value)); 17455 emit_insn (gen_strset (destptr, dest, value)); 17456 } 17457 emit_label (label); 17458 LABEL_NUSES (label) = 1; 17459 } 17460 if (max_size > 4) 17461 { 17462 rtx label = ix86_expand_aligntest (count, 4, true); 17463 dest = change_address (destmem, SImode, destptr); 17464 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value))); 17465 emit_label (label); 17466 LABEL_NUSES (label) = 1; 17467 } 17468 if (max_size > 2) 17469 { 17470 rtx label = ix86_expand_aligntest (count, 2, true); 17471 dest = change_address (destmem, HImode, destptr); 17472 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value))); 17473 emit_label (label); 17474 LABEL_NUSES (label) = 1; 17475 } 17476 if (max_size > 1) 17477 { 17478 rtx label = ix86_expand_aligntest (count, 1, true); 17479 dest = change_address (destmem, QImode, destptr); 17480 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value))); 17481 emit_label (label); 17482 LABEL_NUSES (label) = 1; 17483 } 17484 } 17485 17486 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to 17487 DESIRED_ALIGNMENT. */ 17488 static void 17489 expand_movmem_prologue (rtx destmem, rtx srcmem, 17490 rtx destptr, rtx srcptr, rtx count, 17491 int align, int desired_alignment) 17492 { 17493 if (align <= 1 && desired_alignment > 1) 17494 { 17495 rtx label = ix86_expand_aligntest (destptr, 1, false); 17496 srcmem = change_address (srcmem, QImode, srcptr); 17497 destmem = change_address (destmem, QImode, destptr); 17498 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem)); 17499 ix86_adjust_counter (count, 1); 17500 emit_label (label); 17501 LABEL_NUSES (label) = 1; 17502 } 17503 if (align <= 2 && desired_alignment > 2) 17504 { 17505 rtx label = ix86_expand_aligntest (destptr, 2, false); 17506 srcmem = change_address (srcmem, HImode, srcptr); 17507 destmem = change_address (destmem, HImode, destptr); 17508 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem)); 17509 ix86_adjust_counter (count, 2); 17510 emit_label (label); 17511 LABEL_NUSES (label) = 1; 17512 } 17513 if (align <= 4 && desired_alignment > 4) 17514 { 17515 rtx label = ix86_expand_aligntest (destptr, 4, false); 17516 srcmem = change_address (srcmem, SImode, srcptr); 17517 destmem = change_address (destmem, SImode, destptr); 17518 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem)); 17519 ix86_adjust_counter (count, 4); 17520 emit_label (label); 17521 LABEL_NUSES (label) = 1; 17522 } 17523 gcc_assert (desired_alignment <= 8); 17524 } 17525 17526 /* Copy enough from DST to SRC to align DST known to DESIRED_ALIGN. 17527 ALIGN_BYTES is how many bytes need to be copied. */ 17528 static rtx 17529 expand_constant_movmem_prologue (rtx dst, rtx *srcp, rtx destreg, rtx srcreg, 17530 int desired_align, int align_bytes) 17531 { 17532 rtx src = *srcp; 17533 rtx src_size, dst_size; 17534 int off = 0; 17535 int src_align_bytes = get_mem_align_offset (src, desired_align * BITS_PER_UNIT); 17536 if (src_align_bytes >= 0) 17537 src_align_bytes = desired_align - src_align_bytes; 17538 src_size = MEM_SIZE (src); 17539 dst_size = MEM_SIZE (dst); 17540 if (align_bytes & 1) 17541 { 17542 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0); 17543 src = adjust_automodify_address_nv (src, QImode, srcreg, 0); 17544 off = 1; 17545 emit_insn (gen_strmov (destreg, dst, srcreg, src)); 17546 } 17547 if (align_bytes & 2) 17548 { 17549 dst = adjust_automodify_address_nv (dst, HImode, destreg, off); 17550 src = adjust_automodify_address_nv (src, HImode, srcreg, off); 17551 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT) 17552 set_mem_align (dst, 2 * BITS_PER_UNIT); 17553 if (src_align_bytes >= 0 17554 && (src_align_bytes & 1) == (align_bytes & 1) 17555 && MEM_ALIGN (src) < 2 * BITS_PER_UNIT) 17556 set_mem_align (src, 2 * BITS_PER_UNIT); 17557 off = 2; 17558 emit_insn (gen_strmov (destreg, dst, srcreg, src)); 17559 } 17560 if (align_bytes & 4) 17561 { 17562 dst = adjust_automodify_address_nv (dst, SImode, destreg, off); 17563 src = adjust_automodify_address_nv (src, SImode, srcreg, off); 17564 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT) 17565 set_mem_align (dst, 4 * BITS_PER_UNIT); 17566 if (src_align_bytes >= 0) 17567 { 17568 unsigned int src_align = 0; 17569 if ((src_align_bytes & 3) == (align_bytes & 3)) 17570 src_align = 4; 17571 else if ((src_align_bytes & 1) == (align_bytes & 1)) 17572 src_align = 2; 17573 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT) 17574 set_mem_align (src, src_align * BITS_PER_UNIT); 17575 } 17576 off = 4; 17577 emit_insn (gen_strmov (destreg, dst, srcreg, src)); 17578 } 17579 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off); 17580 src = adjust_automodify_address_nv (src, BLKmode, srcreg, off); 17581 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT) 17582 set_mem_align (dst, desired_align * BITS_PER_UNIT); 17583 if (src_align_bytes >= 0) 17584 { 17585 unsigned int src_align = 0; 17586 if ((src_align_bytes & 7) == (align_bytes & 7)) 17587 src_align = 8; 17588 else if ((src_align_bytes & 3) == (align_bytes & 3)) 17589 src_align = 4; 17590 else if ((src_align_bytes & 1) == (align_bytes & 1)) 17591 src_align = 2; 17592 if (src_align > (unsigned int) desired_align) 17593 src_align = desired_align; 17594 if (MEM_ALIGN (src) < src_align * BITS_PER_UNIT) 17595 set_mem_align (src, src_align * BITS_PER_UNIT); 17596 } 17597 if (dst_size) 17598 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes)); 17599 if (src_size) 17600 set_mem_size (dst, GEN_INT (INTVAL (src_size) - align_bytes)); 17601 *srcp = src; 17602 return dst; 17603 } 17604 17605 /* Set enough from DEST to align DEST known to by aligned by ALIGN to 17606 DESIRED_ALIGNMENT. */ 17607 static void 17608 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count, 17609 int align, int desired_alignment) 17610 { 17611 if (align <= 1 && desired_alignment > 1) 17612 { 17613 rtx label = ix86_expand_aligntest (destptr, 1, false); 17614 destmem = change_address (destmem, QImode, destptr); 17615 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value))); 17616 ix86_adjust_counter (count, 1); 17617 emit_label (label); 17618 LABEL_NUSES (label) = 1; 17619 } 17620 if (align <= 2 && desired_alignment > 2) 17621 { 17622 rtx label = ix86_expand_aligntest (destptr, 2, false); 17623 destmem = change_address (destmem, HImode, destptr); 17624 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value))); 17625 ix86_adjust_counter (count, 2); 17626 emit_label (label); 17627 LABEL_NUSES (label) = 1; 17628 } 17629 if (align <= 4 && desired_alignment > 4) 17630 { 17631 rtx label = ix86_expand_aligntest (destptr, 4, false); 17632 destmem = change_address (destmem, SImode, destptr); 17633 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value))); 17634 ix86_adjust_counter (count, 4); 17635 emit_label (label); 17636 LABEL_NUSES (label) = 1; 17637 } 17638 gcc_assert (desired_alignment <= 8); 17639 } 17640 17641 /* Set enough from DST to align DST known to by aligned by ALIGN to 17642 DESIRED_ALIGN. ALIGN_BYTES is how many bytes need to be stored. */ 17643 static rtx 17644 expand_constant_setmem_prologue (rtx dst, rtx destreg, rtx value, 17645 int desired_align, int align_bytes) 17646 { 17647 int off = 0; 17648 rtx dst_size = MEM_SIZE (dst); 17649 if (align_bytes & 1) 17650 { 17651 dst = adjust_automodify_address_nv (dst, QImode, destreg, 0); 17652 off = 1; 17653 emit_insn (gen_strset (destreg, dst, 17654 gen_lowpart (QImode, value))); 17655 } 17656 if (align_bytes & 2) 17657 { 17658 dst = adjust_automodify_address_nv (dst, HImode, destreg, off); 17659 if (MEM_ALIGN (dst) < 2 * BITS_PER_UNIT) 17660 set_mem_align (dst, 2 * BITS_PER_UNIT); 17661 off = 2; 17662 emit_insn (gen_strset (destreg, dst, 17663 gen_lowpart (HImode, value))); 17664 } 17665 if (align_bytes & 4) 17666 { 17667 dst = adjust_automodify_address_nv (dst, SImode, destreg, off); 17668 if (MEM_ALIGN (dst) < 4 * BITS_PER_UNIT) 17669 set_mem_align (dst, 4 * BITS_PER_UNIT); 17670 off = 4; 17671 emit_insn (gen_strset (destreg, dst, 17672 gen_lowpart (SImode, value))); 17673 } 17674 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, off); 17675 if (MEM_ALIGN (dst) < (unsigned int) desired_align * BITS_PER_UNIT) 17676 set_mem_align (dst, desired_align * BITS_PER_UNIT); 17677 if (dst_size) 17678 set_mem_size (dst, GEN_INT (INTVAL (dst_size) - align_bytes)); 17679 return dst; 17680 } 17681 17682 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */ 17683 static enum stringop_alg 17684 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset, 17685 int *dynamic_check) 17686 { 17687 const struct stringop_algs * algs; 17688 bool optimize_for_speed; 17689 /* Algorithms using the rep prefix want at least edi and ecx; 17690 additionally, memset wants eax and memcpy wants esi. Don't 17691 consider such algorithms if the user has appropriated those 17692 registers for their own purposes. */ 17693 bool rep_prefix_usable = !(fixed_regs[CX_REG] || fixed_regs[DI_REG] 17694 || (memset 17695 ? fixed_regs[AX_REG] : fixed_regs[SI_REG])); 17696 17697 #define ALG_USABLE_P(alg) (rep_prefix_usable \ 17698 || (alg != rep_prefix_1_byte \ 17699 && alg != rep_prefix_4_byte \ 17700 && alg != rep_prefix_8_byte)) 17701 const struct processor_costs *cost; 17702 17703 /* Even if the string operation call is cold, we still might spend a lot 17704 of time processing large blocks. */ 17705 if (optimize_function_for_size_p (cfun) 17706 || (optimize_insn_for_size_p () 17707 && expected_size != -1 && expected_size < 256)) 17708 optimize_for_speed = false; 17709 else 17710 optimize_for_speed = true; 17711 17712 cost = optimize_for_speed ? ix86_cost : &ix86_size_cost; 17713 17714 *dynamic_check = -1; 17715 if (memset) 17716 algs = &cost->memset[TARGET_64BIT != 0]; 17717 else 17718 algs = &cost->memcpy[TARGET_64BIT != 0]; 17719 if (stringop_alg != no_stringop && ALG_USABLE_P (stringop_alg)) 17720 return stringop_alg; 17721 /* rep; movq or rep; movl is the smallest variant. */ 17722 else if (!optimize_for_speed) 17723 { 17724 if (!count || (count & 3)) 17725 return rep_prefix_usable ? rep_prefix_1_byte : loop_1_byte; 17726 else 17727 return rep_prefix_usable ? rep_prefix_4_byte : loop; 17728 } 17729 /* Very tiny blocks are best handled via the loop, REP is expensive to setup. 17730 */ 17731 else if (expected_size != -1 && expected_size < 4) 17732 return loop_1_byte; 17733 else if (expected_size != -1) 17734 { 17735 unsigned int i; 17736 enum stringop_alg alg = libcall; 17737 for (i = 0; i < NAX_STRINGOP_ALGS; i++) 17738 { 17739 /* We get here if the algorithms that were not libcall-based 17740 were rep-prefix based and we are unable to use rep prefixes 17741 based on global register usage. Break out of the loop and 17742 use the heuristic below. */ 17743 if (algs->size[i].max == 0) 17744 break; 17745 if (algs->size[i].max >= expected_size || algs->size[i].max == -1) 17746 { 17747 enum stringop_alg candidate = algs->size[i].alg; 17748 17749 if (candidate != libcall && ALG_USABLE_P (candidate)) 17750 alg = candidate; 17751 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking 17752 last non-libcall inline algorithm. */ 17753 if (TARGET_INLINE_ALL_STRINGOPS) 17754 { 17755 /* When the current size is best to be copied by a libcall, 17756 but we are still forced to inline, run the heuristic below 17757 that will pick code for medium sized blocks. */ 17758 if (alg != libcall) 17759 return alg; 17760 break; 17761 } 17762 else if (ALG_USABLE_P (candidate)) 17763 return candidate; 17764 } 17765 } 17766 gcc_assert (TARGET_INLINE_ALL_STRINGOPS || !rep_prefix_usable); 17767 } 17768 /* When asked to inline the call anyway, try to pick meaningful choice. 17769 We look for maximal size of block that is faster to copy by hand and 17770 take blocks of at most of that size guessing that average size will 17771 be roughly half of the block. 17772 17773 If this turns out to be bad, we might simply specify the preferred 17774 choice in ix86_costs. */ 17775 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY) 17776 && (algs->unknown_size == libcall || !ALG_USABLE_P (algs->unknown_size))) 17777 { 17778 int max = -1; 17779 enum stringop_alg alg; 17780 int i; 17781 bool any_alg_usable_p = true; 17782 17783 for (i = 0; i < NAX_STRINGOP_ALGS; i++) 17784 { 17785 enum stringop_alg candidate = algs->size[i].alg; 17786 any_alg_usable_p = any_alg_usable_p && ALG_USABLE_P (candidate); 17787 17788 if (candidate != libcall && candidate 17789 && ALG_USABLE_P (candidate)) 17790 max = algs->size[i].max; 17791 } 17792 /* If there aren't any usable algorithms, then recursing on 17793 smaller sizes isn't going to find anything. Just return the 17794 simple byte-at-a-time copy loop. */ 17795 if (!any_alg_usable_p) 17796 { 17797 /* Pick something reasonable. */ 17798 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY) 17799 *dynamic_check = 128; 17800 return loop_1_byte; 17801 } 17802 if (max == -1) 17803 max = 4096; 17804 alg = decide_alg (count, max / 2, memset, dynamic_check); 17805 gcc_assert (*dynamic_check == -1); 17806 gcc_assert (alg != libcall); 17807 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY) 17808 *dynamic_check = max; 17809 return alg; 17810 } 17811 return ALG_USABLE_P (algs->unknown_size) ? algs->unknown_size : libcall; 17812 #undef ALG_USABLE_P 17813 } 17814 17815 /* Decide on alignment. We know that the operand is already aligned to ALIGN 17816 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */ 17817 static int 17818 decide_alignment (int align, 17819 enum stringop_alg alg, 17820 int expected_size) 17821 { 17822 int desired_align = 0; 17823 switch (alg) 17824 { 17825 case no_stringop: 17826 gcc_unreachable (); 17827 case loop: 17828 case unrolled_loop: 17829 desired_align = GET_MODE_SIZE (Pmode); 17830 break; 17831 case rep_prefix_8_byte: 17832 desired_align = 8; 17833 break; 17834 case rep_prefix_4_byte: 17835 /* PentiumPro has special logic triggering for 8 byte aligned blocks. 17836 copying whole cacheline at once. */ 17837 if (TARGET_PENTIUMPRO) 17838 desired_align = 8; 17839 else 17840 desired_align = 4; 17841 break; 17842 case rep_prefix_1_byte: 17843 /* PentiumPro has special logic triggering for 8 byte aligned blocks. 17844 copying whole cacheline at once. */ 17845 if (TARGET_PENTIUMPRO) 17846 desired_align = 8; 17847 else 17848 desired_align = 1; 17849 break; 17850 case loop_1_byte: 17851 desired_align = 1; 17852 break; 17853 case libcall: 17854 return 0; 17855 } 17856 17857 if (optimize_size) 17858 desired_align = 1; 17859 if (desired_align < align) 17860 desired_align = align; 17861 if (expected_size != -1 && expected_size < 4) 17862 desired_align = align; 17863 return desired_align; 17864 } 17865 17866 /* Return the smallest power of 2 greater than VAL. */ 17867 static int 17868 smallest_pow2_greater_than (int val) 17869 { 17870 int ret = 1; 17871 while (ret <= val) 17872 ret <<= 1; 17873 return ret; 17874 } 17875 17876 /* Expand string move (memcpy) operation. Use i386 string operations when 17877 profitable. expand_setmem contains similar code. The code depends upon 17878 architecture, block size and alignment, but always has the same 17879 overall structure: 17880 17881 1) Prologue guard: Conditional that jumps up to epilogues for small 17882 blocks that can be handled by epilogue alone. This is faster but 17883 also needed for correctness, since prologue assume the block is larger 17884 than the desired alignment. 17885 17886 Optional dynamic check for size and libcall for large 17887 blocks is emitted here too, with -minline-stringops-dynamically. 17888 17889 2) Prologue: copy first few bytes in order to get destination aligned 17890 to DESIRED_ALIGN. It is emitted only when ALIGN is less than 17891 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied. 17892 We emit either a jump tree on power of two sized blocks, or a byte loop. 17893 17894 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks 17895 with specified algorithm. 17896 17897 4) Epilogue: code copying tail of the block that is too small to be 17898 handled by main body (or up to size guarded by prologue guard). */ 17899 17900 int 17901 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp, 17902 rtx expected_align_exp, rtx expected_size_exp) 17903 { 17904 rtx destreg; 17905 rtx srcreg; 17906 rtx label = NULL; 17907 rtx tmp; 17908 rtx jump_around_label = NULL; 17909 HOST_WIDE_INT align = 1; 17910 unsigned HOST_WIDE_INT count = 0; 17911 HOST_WIDE_INT expected_size = -1; 17912 int size_needed = 0, epilogue_size_needed; 17913 int desired_align = 0, align_bytes = 0; 17914 enum stringop_alg alg; 17915 int dynamic_check; 17916 bool need_zero_guard = false; 17917 17918 if (CONST_INT_P (align_exp)) 17919 align = INTVAL (align_exp); 17920 /* i386 can do misaligned access on reasonably increased cost. */ 17921 if (CONST_INT_P (expected_align_exp) 17922 && INTVAL (expected_align_exp) > align) 17923 align = INTVAL (expected_align_exp); 17924 /* ALIGN is the minimum of destination and source alignment, but we care here 17925 just about destination alignment. */ 17926 else if (MEM_ALIGN (dst) > (unsigned HOST_WIDE_INT) align * BITS_PER_UNIT) 17927 align = MEM_ALIGN (dst) / BITS_PER_UNIT; 17928 17929 if (CONST_INT_P (count_exp)) 17930 count = expected_size = INTVAL (count_exp); 17931 if (CONST_INT_P (expected_size_exp) && count == 0) 17932 expected_size = INTVAL (expected_size_exp); 17933 17934 /* Make sure we don't need to care about overflow later on. */ 17935 if (count > ((unsigned HOST_WIDE_INT) 1 << 30)) 17936 return 0; 17937 17938 /* Step 0: Decide on preferred algorithm, desired alignment and 17939 size of chunks to be copied by main loop. */ 17940 17941 alg = decide_alg (count, expected_size, false, &dynamic_check); 17942 desired_align = decide_alignment (align, alg, expected_size); 17943 17944 if (!TARGET_ALIGN_STRINGOPS) 17945 align = desired_align; 17946 17947 if (alg == libcall) 17948 return 0; 17949 gcc_assert (alg != no_stringop); 17950 if (!count) 17951 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp); 17952 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 17953 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0)); 17954 switch (alg) 17955 { 17956 case libcall: 17957 case no_stringop: 17958 gcc_unreachable (); 17959 case loop: 17960 need_zero_guard = true; 17961 size_needed = GET_MODE_SIZE (Pmode); 17962 break; 17963 case unrolled_loop: 17964 need_zero_guard = true; 17965 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2); 17966 break; 17967 case rep_prefix_8_byte: 17968 size_needed = 8; 17969 break; 17970 case rep_prefix_4_byte: 17971 size_needed = 4; 17972 break; 17973 case rep_prefix_1_byte: 17974 size_needed = 1; 17975 break; 17976 case loop_1_byte: 17977 need_zero_guard = true; 17978 size_needed = 1; 17979 break; 17980 } 17981 17982 epilogue_size_needed = size_needed; 17983 17984 /* Step 1: Prologue guard. */ 17985 17986 /* Alignment code needs count to be in register. */ 17987 if (CONST_INT_P (count_exp) && desired_align > align) 17988 { 17989 if (INTVAL (count_exp) > desired_align 17990 && INTVAL (count_exp) > size_needed) 17991 { 17992 align_bytes 17993 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT); 17994 if (align_bytes <= 0) 17995 align_bytes = 0; 17996 else 17997 align_bytes = desired_align - align_bytes; 17998 } 17999 if (align_bytes == 0) 18000 count_exp = force_reg (counter_mode (count_exp), count_exp); 18001 } 18002 gcc_assert (desired_align >= 1 && align >= 1); 18003 18004 /* Ensure that alignment prologue won't copy past end of block. */ 18005 if (size_needed > 1 || (desired_align > 1 && desired_align > align)) 18006 { 18007 epilogue_size_needed = MAX (size_needed - 1, desired_align - align); 18008 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes. 18009 Make sure it is power of 2. */ 18010 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed); 18011 18012 if (count) 18013 { 18014 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed) 18015 { 18016 /* If main algorithm works on QImode, no epilogue is needed. 18017 For small sizes just don't align anything. */ 18018 if (size_needed == 1) 18019 desired_align = align; 18020 else 18021 goto epilogue; 18022 } 18023 } 18024 else 18025 { 18026 label = gen_label_rtx (); 18027 emit_cmp_and_jump_insns (count_exp, 18028 GEN_INT (epilogue_size_needed), 18029 LTU, 0, counter_mode (count_exp), 1, label); 18030 if (expected_size == -1 || expected_size < epilogue_size_needed) 18031 predict_jump (REG_BR_PROB_BASE * 60 / 100); 18032 else 18033 predict_jump (REG_BR_PROB_BASE * 20 / 100); 18034 } 18035 } 18036 18037 /* Emit code to decide on runtime whether library call or inline should be 18038 used. */ 18039 if (dynamic_check != -1) 18040 { 18041 if (CONST_INT_P (count_exp)) 18042 { 18043 if (UINTVAL (count_exp) >= (unsigned HOST_WIDE_INT)dynamic_check) 18044 { 18045 emit_block_move_via_libcall (dst, src, count_exp, false); 18046 count_exp = const0_rtx; 18047 goto epilogue; 18048 } 18049 } 18050 else 18051 { 18052 rtx hot_label = gen_label_rtx (); 18053 jump_around_label = gen_label_rtx (); 18054 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1), 18055 LEU, 0, GET_MODE (count_exp), 1, hot_label); 18056 predict_jump (REG_BR_PROB_BASE * 90 / 100); 18057 emit_block_move_via_libcall (dst, src, count_exp, false); 18058 emit_jump (jump_around_label); 18059 emit_label (hot_label); 18060 } 18061 } 18062 18063 /* Step 2: Alignment prologue. */ 18064 18065 if (desired_align > align) 18066 { 18067 if (align_bytes == 0) 18068 { 18069 /* Except for the first move in epilogue, we no longer know 18070 constant offset in aliasing info. It don't seems to worth 18071 the pain to maintain it for the first move, so throw away 18072 the info early. */ 18073 src = change_address (src, BLKmode, srcreg); 18074 dst = change_address (dst, BLKmode, destreg); 18075 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align, 18076 desired_align); 18077 } 18078 else 18079 { 18080 /* If we know how many bytes need to be stored before dst is 18081 sufficiently aligned, maintain aliasing info accurately. */ 18082 dst = expand_constant_movmem_prologue (dst, &src, destreg, srcreg, 18083 desired_align, align_bytes); 18084 count_exp = plus_constant (count_exp, -align_bytes); 18085 count -= align_bytes; 18086 } 18087 if (need_zero_guard 18088 && (count < (unsigned HOST_WIDE_INT) size_needed 18089 || (align_bytes == 0 18090 && count < ((unsigned HOST_WIDE_INT) size_needed 18091 + desired_align - align)))) 18092 { 18093 /* It is possible that we copied enough so the main loop will not 18094 execute. */ 18095 gcc_assert (size_needed > 1); 18096 if (label == NULL_RTX) 18097 label = gen_label_rtx (); 18098 emit_cmp_and_jump_insns (count_exp, 18099 GEN_INT (size_needed), 18100 LTU, 0, counter_mode (count_exp), 1, label); 18101 if (expected_size == -1 18102 || expected_size < (desired_align - align) / 2 + size_needed) 18103 predict_jump (REG_BR_PROB_BASE * 20 / 100); 18104 else 18105 predict_jump (REG_BR_PROB_BASE * 60 / 100); 18106 } 18107 } 18108 if (label && size_needed == 1) 18109 { 18110 emit_label (label); 18111 LABEL_NUSES (label) = 1; 18112 label = NULL; 18113 epilogue_size_needed = 1; 18114 } 18115 else if (label == NULL_RTX) 18116 epilogue_size_needed = size_needed; 18117 18118 /* Step 3: Main loop. */ 18119 18120 switch (alg) 18121 { 18122 case libcall: 18123 case no_stringop: 18124 gcc_unreachable (); 18125 case loop_1_byte: 18126 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL, 18127 count_exp, QImode, 1, expected_size); 18128 break; 18129 case loop: 18130 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL, 18131 count_exp, Pmode, 1, expected_size); 18132 break; 18133 case unrolled_loop: 18134 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough 18135 registers for 4 temporaries anyway. */ 18136 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL, 18137 count_exp, Pmode, TARGET_64BIT ? 4 : 2, 18138 expected_size); 18139 break; 18140 case rep_prefix_8_byte: 18141 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp, 18142 DImode); 18143 break; 18144 case rep_prefix_4_byte: 18145 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp, 18146 SImode); 18147 break; 18148 case rep_prefix_1_byte: 18149 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp, 18150 QImode); 18151 break; 18152 } 18153 /* Adjust properly the offset of src and dest memory for aliasing. */ 18154 if (CONST_INT_P (count_exp)) 18155 { 18156 src = adjust_automodify_address_nv (src, BLKmode, srcreg, 18157 (count / size_needed) * size_needed); 18158 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, 18159 (count / size_needed) * size_needed); 18160 } 18161 else 18162 { 18163 src = change_address (src, BLKmode, srcreg); 18164 dst = change_address (dst, BLKmode, destreg); 18165 } 18166 18167 /* Step 4: Epilogue to copy the remaining bytes. */ 18168 epilogue: 18169 if (label) 18170 { 18171 /* When the main loop is done, COUNT_EXP might hold original count, 18172 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes. 18173 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED 18174 bytes. Compensate if needed. */ 18175 18176 if (size_needed < epilogue_size_needed) 18177 { 18178 tmp = 18179 expand_simple_binop (counter_mode (count_exp), AND, count_exp, 18180 GEN_INT (size_needed - 1), count_exp, 1, 18181 OPTAB_DIRECT); 18182 if (tmp != count_exp) 18183 emit_move_insn (count_exp, tmp); 18184 } 18185 emit_label (label); 18186 LABEL_NUSES (label) = 1; 18187 } 18188 18189 if (count_exp != const0_rtx && epilogue_size_needed > 1) 18190 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp, 18191 epilogue_size_needed); 18192 if (jump_around_label) 18193 emit_label (jump_around_label); 18194 return 1; 18195 } 18196 18197 /* Helper function for memcpy. For QImode value 0xXY produce 18198 0xXYXYXYXY of wide specified by MODE. This is essentially 18199 a * 0x10101010, but we can do slightly better than 18200 synth_mult by unwinding the sequence by hand on CPUs with 18201 slow multiply. */ 18202 static rtx 18203 promote_duplicated_reg (enum machine_mode mode, rtx val) 18204 { 18205 enum machine_mode valmode = GET_MODE (val); 18206 rtx tmp; 18207 int nops = mode == DImode ? 3 : 2; 18208 18209 gcc_assert (mode == SImode || mode == DImode); 18210 if (val == const0_rtx) 18211 return copy_to_mode_reg (mode, const0_rtx); 18212 if (CONST_INT_P (val)) 18213 { 18214 HOST_WIDE_INT v = INTVAL (val) & 255; 18215 18216 v |= v << 8; 18217 v |= v << 16; 18218 if (mode == DImode) 18219 v |= (v << 16) << 16; 18220 return copy_to_mode_reg (mode, gen_int_mode (v, mode)); 18221 } 18222 18223 if (valmode == VOIDmode) 18224 valmode = QImode; 18225 if (valmode != QImode) 18226 val = gen_lowpart (QImode, val); 18227 if (mode == QImode) 18228 return val; 18229 if (!TARGET_PARTIAL_REG_STALL) 18230 nops--; 18231 if (ix86_cost->mult_init[mode == DImode ? 3 : 2] 18232 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4) 18233 <= (ix86_cost->shift_const + ix86_cost->add) * nops 18234 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0))) 18235 { 18236 rtx reg = convert_modes (mode, QImode, val, true); 18237 tmp = promote_duplicated_reg (mode, const1_rtx); 18238 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1, 18239 OPTAB_DIRECT); 18240 } 18241 else 18242 { 18243 rtx reg = convert_modes (mode, QImode, val, true); 18244 18245 if (!TARGET_PARTIAL_REG_STALL) 18246 if (mode == SImode) 18247 emit_insn (gen_movsi_insv_1 (reg, reg)); 18248 else 18249 emit_insn (gen_movdi_insv_1_rex64 (reg, reg)); 18250 else 18251 { 18252 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8), 18253 NULL, 1, OPTAB_DIRECT); 18254 reg = 18255 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); 18256 } 18257 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16), 18258 NULL, 1, OPTAB_DIRECT); 18259 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); 18260 if (mode == SImode) 18261 return reg; 18262 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32), 18263 NULL, 1, OPTAB_DIRECT); 18264 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT); 18265 return reg; 18266 } 18267 } 18268 18269 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will 18270 be needed by main loop copying SIZE_NEEDED chunks and prologue getting 18271 alignment from ALIGN to DESIRED_ALIGN. */ 18272 static rtx 18273 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align) 18274 { 18275 rtx promoted_val; 18276 18277 if (TARGET_64BIT 18278 && (size_needed > 4 || (desired_align > align && desired_align > 4))) 18279 promoted_val = promote_duplicated_reg (DImode, val); 18280 else if (size_needed > 2 || (desired_align > align && desired_align > 2)) 18281 promoted_val = promote_duplicated_reg (SImode, val); 18282 else if (size_needed > 1 || (desired_align > align && desired_align > 1)) 18283 promoted_val = promote_duplicated_reg (HImode, val); 18284 else 18285 promoted_val = val; 18286 18287 return promoted_val; 18288 } 18289 18290 /* Expand string clear operation (bzero). Use i386 string operations when 18291 profitable. See expand_movmem comment for explanation of individual 18292 steps performed. */ 18293 int 18294 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp, 18295 rtx expected_align_exp, rtx expected_size_exp) 18296 { 18297 rtx destreg; 18298 rtx label = NULL; 18299 rtx tmp; 18300 rtx jump_around_label = NULL; 18301 HOST_WIDE_INT align = 1; 18302 unsigned HOST_WIDE_INT count = 0; 18303 HOST_WIDE_INT expected_size = -1; 18304 int size_needed = 0, epilogue_size_needed; 18305 int desired_align = 0, align_bytes = 0; 18306 enum stringop_alg alg; 18307 rtx promoted_val = NULL; 18308 bool force_loopy_epilogue = false; 18309 int dynamic_check; 18310 bool need_zero_guard = false; 18311 18312 if (CONST_INT_P (align_exp)) 18313 align = INTVAL (align_exp); 18314 /* i386 can do misaligned access on reasonably increased cost. */ 18315 if (CONST_INT_P (expected_align_exp) 18316 && INTVAL (expected_align_exp) > align) 18317 align = INTVAL (expected_align_exp); 18318 if (CONST_INT_P (count_exp)) 18319 count = expected_size = INTVAL (count_exp); 18320 if (CONST_INT_P (expected_size_exp) && count == 0) 18321 expected_size = INTVAL (expected_size_exp); 18322 18323 /* Make sure we don't need to care about overflow later on. */ 18324 if (count > ((unsigned HOST_WIDE_INT) 1 << 30)) 18325 return 0; 18326 18327 /* Step 0: Decide on preferred algorithm, desired alignment and 18328 size of chunks to be copied by main loop. */ 18329 18330 alg = decide_alg (count, expected_size, true, &dynamic_check); 18331 desired_align = decide_alignment (align, alg, expected_size); 18332 18333 if (!TARGET_ALIGN_STRINGOPS) 18334 align = desired_align; 18335 18336 if (alg == libcall) 18337 return 0; 18338 gcc_assert (alg != no_stringop); 18339 if (!count) 18340 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp); 18341 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0)); 18342 switch (alg) 18343 { 18344 case libcall: 18345 case no_stringop: 18346 gcc_unreachable (); 18347 case loop: 18348 need_zero_guard = true; 18349 size_needed = GET_MODE_SIZE (Pmode); 18350 break; 18351 case unrolled_loop: 18352 need_zero_guard = true; 18353 size_needed = GET_MODE_SIZE (Pmode) * 4; 18354 break; 18355 case rep_prefix_8_byte: 18356 size_needed = 8; 18357 break; 18358 case rep_prefix_4_byte: 18359 size_needed = 4; 18360 break; 18361 case rep_prefix_1_byte: 18362 size_needed = 1; 18363 break; 18364 case loop_1_byte: 18365 need_zero_guard = true; 18366 size_needed = 1; 18367 break; 18368 } 18369 epilogue_size_needed = size_needed; 18370 18371 /* Step 1: Prologue guard. */ 18372 18373 /* Alignment code needs count to be in register. */ 18374 if (CONST_INT_P (count_exp) && desired_align > align) 18375 { 18376 if (INTVAL (count_exp) > desired_align 18377 && INTVAL (count_exp) > size_needed) 18378 { 18379 align_bytes 18380 = get_mem_align_offset (dst, desired_align * BITS_PER_UNIT); 18381 if (align_bytes <= 0) 18382 align_bytes = 0; 18383 else 18384 align_bytes = desired_align - align_bytes; 18385 } 18386 if (align_bytes == 0) 18387 { 18388 enum machine_mode mode = SImode; 18389 if (TARGET_64BIT && (count & ~0xffffffff)) 18390 mode = DImode; 18391 count_exp = force_reg (mode, count_exp); 18392 } 18393 } 18394 /* Do the cheap promotion to allow better CSE across the 18395 main loop and epilogue (ie one load of the big constant in the 18396 front of all code. */ 18397 if (CONST_INT_P (val_exp)) 18398 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed, 18399 desired_align, align); 18400 /* Ensure that alignment prologue won't copy past end of block. */ 18401 if (size_needed > 1 || (desired_align > 1 && desired_align > align)) 18402 { 18403 epilogue_size_needed = MAX (size_needed - 1, desired_align - align); 18404 /* Epilogue always copies COUNT_EXP & (EPILOGUE_SIZE_NEEDED - 1) bytes. 18405 Make sure it is power of 2. */ 18406 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed); 18407 18408 /* To improve performance of small blocks, we jump around the VAL 18409 promoting mode. This mean that if the promoted VAL is not constant, 18410 we might not use it in the epilogue and have to use byte 18411 loop variant. */ 18412 if (epilogue_size_needed > 2 && !promoted_val) 18413 force_loopy_epilogue = true; 18414 if (count) 18415 { 18416 if (count < (unsigned HOST_WIDE_INT)epilogue_size_needed) 18417 { 18418 /* If main algorithm works on QImode, no epilogue is needed. 18419 For small sizes just don't align anything. */ 18420 if (size_needed == 1) 18421 desired_align = align; 18422 else 18423 goto epilogue; 18424 } 18425 } 18426 else 18427 { 18428 label = gen_label_rtx (); 18429 emit_cmp_and_jump_insns (count_exp, 18430 GEN_INT (epilogue_size_needed), 18431 LTU, 0, counter_mode (count_exp), 1, label); 18432 if (expected_size == -1 || expected_size <= epilogue_size_needed) 18433 predict_jump (REG_BR_PROB_BASE * 60 / 100); 18434 else 18435 predict_jump (REG_BR_PROB_BASE * 20 / 100); 18436 } 18437 } 18438 if (dynamic_check != -1) 18439 { 18440 rtx hot_label = gen_label_rtx (); 18441 jump_around_label = gen_label_rtx (); 18442 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1), 18443 LEU, 0, counter_mode (count_exp), 1, hot_label); 18444 predict_jump (REG_BR_PROB_BASE * 90 / 100); 18445 set_storage_via_libcall (dst, count_exp, val_exp, false); 18446 emit_jump (jump_around_label); 18447 emit_label (hot_label); 18448 } 18449 18450 /* Step 2: Alignment prologue. */ 18451 18452 /* Do the expensive promotion once we branched off the small blocks. */ 18453 if (!promoted_val) 18454 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed, 18455 desired_align, align); 18456 gcc_assert (desired_align >= 1 && align >= 1); 18457 18458 if (desired_align > align) 18459 { 18460 if (align_bytes == 0) 18461 { 18462 /* Except for the first move in epilogue, we no longer know 18463 constant offset in aliasing info. It don't seems to worth 18464 the pain to maintain it for the first move, so throw away 18465 the info early. */ 18466 dst = change_address (dst, BLKmode, destreg); 18467 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align, 18468 desired_align); 18469 } 18470 else 18471 { 18472 /* If we know how many bytes need to be stored before dst is 18473 sufficiently aligned, maintain aliasing info accurately. */ 18474 dst = expand_constant_setmem_prologue (dst, destreg, promoted_val, 18475 desired_align, align_bytes); 18476 count_exp = plus_constant (count_exp, -align_bytes); 18477 count -= align_bytes; 18478 } 18479 if (need_zero_guard 18480 && (count < (unsigned HOST_WIDE_INT) size_needed 18481 || (align_bytes == 0 18482 && count < ((unsigned HOST_WIDE_INT) size_needed 18483 + desired_align - align)))) 18484 { 18485 /* It is possible that we copied enough so the main loop will not 18486 execute. */ 18487 gcc_assert (size_needed > 1); 18488 if (label == NULL_RTX) 18489 label = gen_label_rtx (); 18490 emit_cmp_and_jump_insns (count_exp, 18491 GEN_INT (size_needed), 18492 LTU, 0, counter_mode (count_exp), 1, label); 18493 if (expected_size == -1 18494 || expected_size < (desired_align - align) / 2 + size_needed) 18495 predict_jump (REG_BR_PROB_BASE * 20 / 100); 18496 else 18497 predict_jump (REG_BR_PROB_BASE * 60 / 100); 18498 } 18499 } 18500 if (label && size_needed == 1) 18501 { 18502 emit_label (label); 18503 LABEL_NUSES (label) = 1; 18504 label = NULL; 18505 promoted_val = val_exp; 18506 epilogue_size_needed = 1; 18507 } 18508 else if (label == NULL_RTX) 18509 epilogue_size_needed = size_needed; 18510 18511 /* Step 3: Main loop. */ 18512 18513 switch (alg) 18514 { 18515 case libcall: 18516 case no_stringop: 18517 gcc_unreachable (); 18518 case loop_1_byte: 18519 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val, 18520 count_exp, QImode, 1, expected_size); 18521 break; 18522 case loop: 18523 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val, 18524 count_exp, Pmode, 1, expected_size); 18525 break; 18526 case unrolled_loop: 18527 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val, 18528 count_exp, Pmode, 4, expected_size); 18529 break; 18530 case rep_prefix_8_byte: 18531 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp, 18532 DImode, val_exp); 18533 break; 18534 case rep_prefix_4_byte: 18535 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp, 18536 SImode, val_exp); 18537 break; 18538 case rep_prefix_1_byte: 18539 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp, 18540 QImode, val_exp); 18541 break; 18542 } 18543 /* Adjust properly the offset of src and dest memory for aliasing. */ 18544 if (CONST_INT_P (count_exp)) 18545 dst = adjust_automodify_address_nv (dst, BLKmode, destreg, 18546 (count / size_needed) * size_needed); 18547 else 18548 dst = change_address (dst, BLKmode, destreg); 18549 18550 /* Step 4: Epilogue to copy the remaining bytes. */ 18551 18552 if (label) 18553 { 18554 /* When the main loop is done, COUNT_EXP might hold original count, 18555 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes. 18556 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED 18557 bytes. Compensate if needed. */ 18558 18559 if (size_needed < epilogue_size_needed) 18560 { 18561 tmp = 18562 expand_simple_binop (counter_mode (count_exp), AND, count_exp, 18563 GEN_INT (size_needed - 1), count_exp, 1, 18564 OPTAB_DIRECT); 18565 if (tmp != count_exp) 18566 emit_move_insn (count_exp, tmp); 18567 } 18568 emit_label (label); 18569 LABEL_NUSES (label) = 1; 18570 } 18571 epilogue: 18572 if (count_exp != const0_rtx && epilogue_size_needed > 1) 18573 { 18574 if (force_loopy_epilogue) 18575 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp, 18576 epilogue_size_needed); 18577 else 18578 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp, 18579 epilogue_size_needed); 18580 } 18581 if (jump_around_label) 18582 emit_label (jump_around_label); 18583 return 1; 18584 } 18585 18586 /* Expand the appropriate insns for doing strlen if not just doing 18587 repnz; scasb 18588 18589 out = result, initialized with the start address 18590 align_rtx = alignment of the address. 18591 scratch = scratch register, initialized with the startaddress when 18592 not aligned, otherwise undefined 18593 18594 This is just the body. It needs the initializations mentioned above and 18595 some address computing at the end. These things are done in i386.md. */ 18596 18597 static void 18598 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx) 18599 { 18600 int align; 18601 rtx tmp; 18602 rtx align_2_label = NULL_RTX; 18603 rtx align_3_label = NULL_RTX; 18604 rtx align_4_label = gen_label_rtx (); 18605 rtx end_0_label = gen_label_rtx (); 18606 rtx mem; 18607 rtx tmpreg = gen_reg_rtx (SImode); 18608 rtx scratch = gen_reg_rtx (SImode); 18609 rtx cmp; 18610 18611 align = 0; 18612 if (CONST_INT_P (align_rtx)) 18613 align = INTVAL (align_rtx); 18614 18615 /* Loop to check 1..3 bytes for null to get an aligned pointer. */ 18616 18617 /* Is there a known alignment and is it less than 4? */ 18618 if (align < 4) 18619 { 18620 rtx scratch1 = gen_reg_rtx (Pmode); 18621 emit_move_insn (scratch1, out); 18622 /* Is there a known alignment and is it not 2? */ 18623 if (align != 2) 18624 { 18625 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */ 18626 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */ 18627 18628 /* Leave just the 3 lower bits. */ 18629 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3), 18630 NULL_RTX, 0, OPTAB_WIDEN); 18631 18632 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 18633 Pmode, 1, align_4_label); 18634 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL, 18635 Pmode, 1, align_2_label); 18636 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL, 18637 Pmode, 1, align_3_label); 18638 } 18639 else 18640 { 18641 /* Since the alignment is 2, we have to check 2 or 0 bytes; 18642 check if is aligned to 4 - byte. */ 18643 18644 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx, 18645 NULL_RTX, 0, OPTAB_WIDEN); 18646 18647 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL, 18648 Pmode, 1, align_4_label); 18649 } 18650 18651 mem = change_address (src, QImode, out); 18652 18653 /* Now compare the bytes. */ 18654 18655 /* Compare the first n unaligned byte on a byte per byte basis. */ 18656 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, 18657 QImode, 1, end_0_label); 18658 18659 /* Increment the address. */ 18660 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx)); 18661 18662 /* Not needed with an alignment of 2 */ 18663 if (align != 2) 18664 { 18665 emit_label (align_2_label); 18666 18667 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 18668 end_0_label); 18669 18670 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx)); 18671 18672 emit_label (align_3_label); 18673 } 18674 18675 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1, 18676 end_0_label); 18677 18678 emit_insn ((*ix86_gen_add3) (out, out, const1_rtx)); 18679 } 18680 18681 /* Generate loop to check 4 bytes at a time. It is not a good idea to 18682 align this loop. It gives only huge programs, but does not help to 18683 speed up. */ 18684 emit_label (align_4_label); 18685 18686 mem = change_address (src, SImode, out); 18687 emit_move_insn (scratch, mem); 18688 emit_insn ((*ix86_gen_add3) (out, out, GEN_INT (4))); 18689 18690 /* This formula yields a nonzero result iff one of the bytes is zero. 18691 This saves three branches inside loop and many cycles. */ 18692 18693 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101))); 18694 emit_insn (gen_one_cmplsi2 (scratch, scratch)); 18695 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch)); 18696 emit_insn (gen_andsi3 (tmpreg, tmpreg, 18697 gen_int_mode (0x80808080, SImode))); 18698 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1, 18699 align_4_label); 18700 18701 if (TARGET_CMOVE) 18702 { 18703 rtx reg = gen_reg_rtx (SImode); 18704 rtx reg2 = gen_reg_rtx (Pmode); 18705 emit_move_insn (reg, tmpreg); 18706 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16))); 18707 18708 /* If zero is not in the first two bytes, move two bytes forward. */ 18709 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 18710 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 18711 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 18712 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, 18713 gen_rtx_IF_THEN_ELSE (SImode, tmp, 18714 reg, 18715 tmpreg))); 18716 /* Emit lea manually to avoid clobbering of flags. */ 18717 emit_insn (gen_rtx_SET (SImode, reg2, 18718 gen_rtx_PLUS (Pmode, out, const2_rtx))); 18719 18720 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 18721 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx); 18722 emit_insn (gen_rtx_SET (VOIDmode, out, 18723 gen_rtx_IF_THEN_ELSE (Pmode, tmp, 18724 reg2, 18725 out))); 18726 18727 } 18728 else 18729 { 18730 rtx end_2_label = gen_label_rtx (); 18731 /* Is zero in the first two bytes? */ 18732 18733 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080))); 18734 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG); 18735 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx); 18736 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 18737 gen_rtx_LABEL_REF (VOIDmode, end_2_label), 18738 pc_rtx); 18739 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 18740 JUMP_LABEL (tmp) = end_2_label; 18741 18742 /* Not in the first two. Move two bytes forward. */ 18743 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16))); 18744 emit_insn ((*ix86_gen_add3) (out, out, const2_rtx)); 18745 18746 emit_label (end_2_label); 18747 18748 } 18749 18750 /* Avoid branch in fixing the byte. */ 18751 tmpreg = gen_lowpart (QImode, tmpreg); 18752 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg)); 18753 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx); 18754 emit_insn ((*ix86_gen_sub3_carry) (out, out, GEN_INT (3), cmp)); 18755 18756 emit_label (end_0_label); 18757 } 18758 18759 /* Expand strlen. */ 18760 18761 int 18762 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align) 18763 { 18764 rtx addr, scratch1, scratch2, scratch3, scratch4; 18765 18766 /* The generic case of strlen expander is long. Avoid it's 18767 expanding unless TARGET_INLINE_ALL_STRINGOPS. */ 18768 18769 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 18770 && !TARGET_INLINE_ALL_STRINGOPS 18771 && !optimize_insn_for_size_p () 18772 && (!CONST_INT_P (align) || INTVAL (align) < 4)) 18773 return 0; 18774 18775 addr = force_reg (Pmode, XEXP (src, 0)); 18776 scratch1 = gen_reg_rtx (Pmode); 18777 18778 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1 18779 && !optimize_insn_for_size_p ()) 18780 { 18781 /* Well it seems that some optimizer does not combine a call like 18782 foo(strlen(bar), strlen(bar)); 18783 when the move and the subtraction is done here. It does calculate 18784 the length just once when these instructions are done inside of 18785 output_strlen_unroll(). But I think since &bar[strlen(bar)] is 18786 often used and I use one fewer register for the lifetime of 18787 output_strlen_unroll() this is better. */ 18788 18789 emit_move_insn (out, addr); 18790 18791 ix86_expand_strlensi_unroll_1 (out, src, align); 18792 18793 /* strlensi_unroll_1 returns the address of the zero at the end of 18794 the string, like memchr(), so compute the length by subtracting 18795 the start address. */ 18796 emit_insn ((*ix86_gen_sub3) (out, out, addr)); 18797 } 18798 else 18799 { 18800 rtx unspec; 18801 18802 /* Can't use this if the user has appropriated eax, ecx, or edi. */ 18803 if (fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG]) 18804 return false; 18805 18806 scratch2 = gen_reg_rtx (Pmode); 18807 scratch3 = gen_reg_rtx (Pmode); 18808 scratch4 = force_reg (Pmode, constm1_rtx); 18809 18810 emit_move_insn (scratch3, addr); 18811 eoschar = force_reg (QImode, eoschar); 18812 18813 src = replace_equiv_address_nv (src, scratch3); 18814 18815 /* If .md starts supporting :P, this can be done in .md. */ 18816 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align, 18817 scratch4), UNSPEC_SCAS); 18818 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec)); 18819 emit_insn ((*ix86_gen_one_cmpl2) (scratch2, scratch1)); 18820 emit_insn ((*ix86_gen_add3) (out, scratch2, constm1_rtx)); 18821 } 18822 return 1; 18823 } 18824 18825 /* For given symbol (function) construct code to compute address of it's PLT 18826 entry in large x86-64 PIC model. */ 18827 rtx 18828 construct_plt_address (rtx symbol) 18829 { 18830 rtx tmp = gen_reg_rtx (Pmode); 18831 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF); 18832 18833 gcc_assert (GET_CODE (symbol) == SYMBOL_REF); 18834 gcc_assert (ix86_cmodel == CM_LARGE_PIC); 18835 18836 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec)); 18837 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx)); 18838 return tmp; 18839 } 18840 18841 void 18842 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1, 18843 rtx callarg2, 18844 rtx pop, int sibcall) 18845 { 18846 rtx use = NULL, call; 18847 18848 if (pop == const0_rtx) 18849 pop = NULL; 18850 gcc_assert (!TARGET_64BIT || !pop); 18851 18852 if (TARGET_MACHO && !TARGET_64BIT) 18853 { 18854 #if TARGET_MACHO 18855 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF) 18856 fnaddr = machopic_indirect_call_target (fnaddr); 18857 #endif 18858 } 18859 else 18860 { 18861 /* Static functions and indirect calls don't need the pic register. */ 18862 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC) 18863 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF 18864 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0))) 18865 use_reg (&use, pic_offset_table_rtx); 18866 } 18867 18868 if (TARGET_64BIT && INTVAL (callarg2) >= 0) 18869 { 18870 rtx al = gen_rtx_REG (QImode, AX_REG); 18871 emit_move_insn (al, callarg2); 18872 use_reg (&use, al); 18873 } 18874 18875 if (ix86_cmodel == CM_LARGE_PIC 18876 && GET_CODE (fnaddr) == MEM 18877 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF 18878 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode)) 18879 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0))); 18880 else if (sibcall 18881 ? !sibcall_insn_operand (XEXP (fnaddr, 0), Pmode) 18882 : !call_insn_operand (XEXP (fnaddr, 0), Pmode)) 18883 { 18884 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0)); 18885 fnaddr = gen_rtx_MEM (QImode, fnaddr); 18886 } 18887 18888 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1); 18889 if (retval) 18890 call = gen_rtx_SET (VOIDmode, retval, call); 18891 if (pop) 18892 { 18893 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop); 18894 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop); 18895 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop)); 18896 } 18897 if (TARGET_64BIT 18898 && ix86_cfun_abi () == MS_ABI 18899 && (!callarg2 || INTVAL (callarg2) != -2)) 18900 { 18901 /* We need to represent that SI and DI registers are clobbered 18902 by SYSV calls. */ 18903 static int clobbered_registers[] = { 18904 XMM6_REG, XMM7_REG, XMM8_REG, 18905 XMM9_REG, XMM10_REG, XMM11_REG, 18906 XMM12_REG, XMM13_REG, XMM14_REG, 18907 XMM15_REG, SI_REG, DI_REG 18908 }; 18909 unsigned int i; 18910 rtx vec[ARRAY_SIZE (clobbered_registers) + 2]; 18911 rtx unspec = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), 18912 UNSPEC_MS_TO_SYSV_CALL); 18913 18914 vec[0] = call; 18915 vec[1] = unspec; 18916 for (i = 0; i < ARRAY_SIZE (clobbered_registers); i++) 18917 vec[i + 2] = gen_rtx_CLOBBER (SSE_REGNO_P (clobbered_registers[i]) 18918 ? TImode : DImode, 18919 gen_rtx_REG 18920 (SSE_REGNO_P (clobbered_registers[i]) 18921 ? TImode : DImode, 18922 clobbered_registers[i])); 18923 18924 call = gen_rtx_PARALLEL (VOIDmode, 18925 gen_rtvec_v (ARRAY_SIZE (clobbered_registers) 18926 + 2, vec)); 18927 } 18928 18929 call = emit_call_insn (call); 18930 if (use) 18931 CALL_INSN_FUNCTION_USAGE (call) = use; 18932 } 18933 18934 18935 /* Clear stack slot assignments remembered from previous functions. 18936 This is called from INIT_EXPANDERS once before RTL is emitted for each 18937 function. */ 18938 18939 static struct machine_function * 18940 ix86_init_machine_status (void) 18941 { 18942 struct machine_function *f; 18943 18944 f = GGC_CNEW (struct machine_function); 18945 f->use_fast_prologue_epilogue_nregs = -1; 18946 f->tls_descriptor_call_expanded_p = 0; 18947 f->call_abi = DEFAULT_ABI; 18948 18949 return f; 18950 } 18951 18952 /* Return a MEM corresponding to a stack slot with mode MODE. 18953 Allocate a new slot if necessary. 18954 18955 The RTL for a function can have several slots available: N is 18956 which slot to use. */ 18957 18958 rtx 18959 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n) 18960 { 18961 struct stack_local_entry *s; 18962 18963 gcc_assert (n < MAX_386_STACK_LOCALS); 18964 18965 /* Virtual slot is valid only before vregs are instantiated. */ 18966 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated); 18967 18968 for (s = ix86_stack_locals; s; s = s->next) 18969 if (s->mode == mode && s->n == n) 18970 return copy_rtx (s->rtl); 18971 18972 s = (struct stack_local_entry *) 18973 ggc_alloc (sizeof (struct stack_local_entry)); 18974 s->n = n; 18975 s->mode = mode; 18976 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0); 18977 18978 s->next = ix86_stack_locals; 18979 ix86_stack_locals = s; 18980 return s->rtl; 18981 } 18982 18983 /* Construct the SYMBOL_REF for the tls_get_addr function. */ 18984 18985 static GTY(()) rtx ix86_tls_symbol; 18986 rtx 18987 ix86_tls_get_addr (void) 18988 { 18989 18990 if (!ix86_tls_symbol) 18991 { 18992 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, 18993 (TARGET_ANY_GNU_TLS 18994 && !TARGET_64BIT) 18995 ? "___tls_get_addr" 18996 : "__tls_get_addr"); 18997 } 18998 18999 return ix86_tls_symbol; 19000 } 19001 19002 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */ 19003 19004 static GTY(()) rtx ix86_tls_module_base_symbol; 19005 rtx 19006 ix86_tls_module_base (void) 19007 { 19008 19009 if (!ix86_tls_module_base_symbol) 19010 { 19011 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode, 19012 "_TLS_MODULE_BASE_"); 19013 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol) 19014 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT; 19015 } 19016 19017 return ix86_tls_module_base_symbol; 19018 } 19019 19020 /* Calculate the length of the memory address in the instruction 19021 encoding. Does not include the one-byte modrm, opcode, or prefix. */ 19022 19023 int 19024 memory_address_length (rtx addr) 19025 { 19026 struct ix86_address parts; 19027 rtx base, index, disp; 19028 int len; 19029 int ok; 19030 19031 if (GET_CODE (addr) == PRE_DEC 19032 || GET_CODE (addr) == POST_INC 19033 || GET_CODE (addr) == PRE_MODIFY 19034 || GET_CODE (addr) == POST_MODIFY) 19035 return 0; 19036 19037 ok = ix86_decompose_address (addr, &parts); 19038 gcc_assert (ok); 19039 19040 if (parts.base && GET_CODE (parts.base) == SUBREG) 19041 parts.base = SUBREG_REG (parts.base); 19042 if (parts.index && GET_CODE (parts.index) == SUBREG) 19043 parts.index = SUBREG_REG (parts.index); 19044 19045 base = parts.base; 19046 index = parts.index; 19047 disp = parts.disp; 19048 len = 0; 19049 19050 /* Rule of thumb: 19051 - esp as the base always wants an index, 19052 - ebp as the base always wants a displacement. */ 19053 19054 /* Register Indirect. */ 19055 if (base && !index && !disp) 19056 { 19057 /* esp (for its index) and ebp (for its displacement) need 19058 the two-byte modrm form. */ 19059 if (addr == stack_pointer_rtx 19060 || addr == arg_pointer_rtx 19061 || addr == frame_pointer_rtx 19062 || addr == hard_frame_pointer_rtx) 19063 len = 1; 19064 } 19065 19066 /* Direct Addressing. */ 19067 else if (disp && !base && !index) 19068 len = 4; 19069 19070 else 19071 { 19072 /* Find the length of the displacement constant. */ 19073 if (disp) 19074 { 19075 if (base && satisfies_constraint_K (disp)) 19076 len = 1; 19077 else 19078 len = 4; 19079 } 19080 /* ebp always wants a displacement. */ 19081 else if (base == hard_frame_pointer_rtx) 19082 len = 1; 19083 19084 /* An index requires the two-byte modrm form.... */ 19085 if (index 19086 /* ...like esp, which always wants an index. */ 19087 || base == stack_pointer_rtx 19088 || base == arg_pointer_rtx 19089 || base == frame_pointer_rtx) 19090 len += 1; 19091 } 19092 19093 return len; 19094 } 19095 19096 /* Compute default value for "length_immediate" attribute. When SHORTFORM 19097 is set, expect that insn have 8bit immediate alternative. */ 19098 int 19099 ix86_attr_length_immediate_default (rtx insn, int shortform) 19100 { 19101 int len = 0; 19102 int i; 19103 extract_insn_cached (insn); 19104 for (i = recog_data.n_operands - 1; i >= 0; --i) 19105 if (CONSTANT_P (recog_data.operand[i])) 19106 { 19107 gcc_assert (!len); 19108 if (shortform && satisfies_constraint_K (recog_data.operand[i])) 19109 len = 1; 19110 else 19111 { 19112 switch (get_attr_mode (insn)) 19113 { 19114 case MODE_QI: 19115 len+=1; 19116 break; 19117 case MODE_HI: 19118 len+=2; 19119 break; 19120 case MODE_SI: 19121 len+=4; 19122 break; 19123 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */ 19124 case MODE_DI: 19125 len+=4; 19126 break; 19127 default: 19128 fatal_insn ("unknown insn mode", insn); 19129 } 19130 } 19131 } 19132 return len; 19133 } 19134 /* Compute default value for "length_address" attribute. */ 19135 int 19136 ix86_attr_length_address_default (rtx insn) 19137 { 19138 int i; 19139 19140 if (get_attr_type (insn) == TYPE_LEA) 19141 { 19142 rtx set = PATTERN (insn); 19143 19144 if (GET_CODE (set) == PARALLEL) 19145 set = XVECEXP (set, 0, 0); 19146 19147 gcc_assert (GET_CODE (set) == SET); 19148 19149 return memory_address_length (SET_SRC (set)); 19150 } 19151 19152 extract_insn_cached (insn); 19153 for (i = recog_data.n_operands - 1; i >= 0; --i) 19154 if (MEM_P (recog_data.operand[i])) 19155 { 19156 return memory_address_length (XEXP (recog_data.operand[i], 0)); 19157 break; 19158 } 19159 return 0; 19160 } 19161 19162 /* Compute default value for "length_vex" attribute. It includes 19163 2 or 3 byte VEX prefix and 1 opcode byte. */ 19164 19165 int 19166 ix86_attr_length_vex_default (rtx insn, int has_0f_opcode, 19167 int has_vex_w) 19168 { 19169 int i; 19170 19171 /* Only 0f opcode can use 2 byte VEX prefix and VEX W bit uses 3 19172 byte VEX prefix. */ 19173 if (!has_0f_opcode || has_vex_w) 19174 return 3 + 1; 19175 19176 /* We can always use 2 byte VEX prefix in 32bit. */ 19177 if (!TARGET_64BIT) 19178 return 2 + 1; 19179 19180 extract_insn_cached (insn); 19181 19182 for (i = recog_data.n_operands - 1; i >= 0; --i) 19183 if (REG_P (recog_data.operand[i])) 19184 { 19185 /* REX.W bit uses 3 byte VEX prefix. */ 19186 if (GET_MODE (recog_data.operand[i]) == DImode) 19187 return 3 + 1; 19188 } 19189 else 19190 { 19191 /* REX.X or REX.B bits use 3 byte VEX prefix. */ 19192 if (MEM_P (recog_data.operand[i]) 19193 && x86_extended_reg_mentioned_p (recog_data.operand[i])) 19194 return 3 + 1; 19195 } 19196 19197 return 2 + 1; 19198 } 19199 19200 /* Return the maximum number of instructions a cpu can issue. */ 19201 19202 static int 19203 ix86_issue_rate (void) 19204 { 19205 switch (ix86_tune) 19206 { 19207 case PROCESSOR_PENTIUM: 19208 case PROCESSOR_K6: 19209 return 2; 19210 19211 case PROCESSOR_PENTIUMPRO: 19212 case PROCESSOR_PENTIUM4: 19213 case PROCESSOR_ATHLON: 19214 case PROCESSOR_K8: 19215 case PROCESSOR_AMDFAM10: 19216 case PROCESSOR_NOCONA: 19217 case PROCESSOR_GENERIC32: 19218 case PROCESSOR_GENERIC64: 19219 return 3; 19220 19221 case PROCESSOR_CORE2: 19222 return 4; 19223 19224 default: 19225 return 1; 19226 } 19227 } 19228 19229 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set 19230 by DEP_INSN and nothing set by DEP_INSN. */ 19231 19232 static int 19233 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) 19234 { 19235 rtx set, set2; 19236 19237 /* Simplify the test for uninteresting insns. */ 19238 if (insn_type != TYPE_SETCC 19239 && insn_type != TYPE_ICMOV 19240 && insn_type != TYPE_FCMOV 19241 && insn_type != TYPE_IBR) 19242 return 0; 19243 19244 if ((set = single_set (dep_insn)) != 0) 19245 { 19246 set = SET_DEST (set); 19247 set2 = NULL_RTX; 19248 } 19249 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL 19250 && XVECLEN (PATTERN (dep_insn), 0) == 2 19251 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET 19252 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET) 19253 { 19254 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 19255 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0)); 19256 } 19257 else 19258 return 0; 19259 19260 if (!REG_P (set) || REGNO (set) != FLAGS_REG) 19261 return 0; 19262 19263 /* This test is true if the dependent insn reads the flags but 19264 not any other potentially set register. */ 19265 if (!reg_overlap_mentioned_p (set, PATTERN (insn))) 19266 return 0; 19267 19268 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn))) 19269 return 0; 19270 19271 return 1; 19272 } 19273 19274 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory 19275 address with operands set by DEP_INSN. */ 19276 19277 static int 19278 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type) 19279 { 19280 rtx addr; 19281 19282 if (insn_type == TYPE_LEA 19283 && TARGET_PENTIUM) 19284 { 19285 addr = PATTERN (insn); 19286 19287 if (GET_CODE (addr) == PARALLEL) 19288 addr = XVECEXP (addr, 0, 0); 19289 19290 gcc_assert (GET_CODE (addr) == SET); 19291 19292 addr = SET_SRC (addr); 19293 } 19294 else 19295 { 19296 int i; 19297 extract_insn_cached (insn); 19298 for (i = recog_data.n_operands - 1; i >= 0; --i) 19299 if (MEM_P (recog_data.operand[i])) 19300 { 19301 addr = XEXP (recog_data.operand[i], 0); 19302 goto found; 19303 } 19304 return 0; 19305 found:; 19306 } 19307 19308 return modified_in_p (addr, dep_insn); 19309 } 19310 19311 static int 19312 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost) 19313 { 19314 enum attr_type insn_type, dep_insn_type; 19315 enum attr_memory memory; 19316 rtx set, set2; 19317 int dep_insn_code_number; 19318 19319 /* Anti and output dependencies have zero cost on all CPUs. */ 19320 if (REG_NOTE_KIND (link) != 0) 19321 return 0; 19322 19323 dep_insn_code_number = recog_memoized (dep_insn); 19324 19325 /* If we can't recognize the insns, we can't really do anything. */ 19326 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0) 19327 return cost; 19328 19329 insn_type = get_attr_type (insn); 19330 dep_insn_type = get_attr_type (dep_insn); 19331 19332 switch (ix86_tune) 19333 { 19334 case PROCESSOR_PENTIUM: 19335 /* Address Generation Interlock adds a cycle of latency. */ 19336 if (ix86_agi_dependent (insn, dep_insn, insn_type)) 19337 cost += 1; 19338 19339 /* ??? Compares pair with jump/setcc. */ 19340 if (ix86_flags_dependent (insn, dep_insn, insn_type)) 19341 cost = 0; 19342 19343 /* Floating point stores require value to be ready one cycle earlier. */ 19344 if (insn_type == TYPE_FMOV 19345 && get_attr_memory (insn) == MEMORY_STORE 19346 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 19347 cost += 1; 19348 break; 19349 19350 case PROCESSOR_PENTIUMPRO: 19351 memory = get_attr_memory (insn); 19352 19353 /* INT->FP conversion is expensive. */ 19354 if (get_attr_fp_int_src (dep_insn)) 19355 cost += 5; 19356 19357 /* There is one cycle extra latency between an FP op and a store. */ 19358 if (insn_type == TYPE_FMOV 19359 && (set = single_set (dep_insn)) != NULL_RTX 19360 && (set2 = single_set (insn)) != NULL_RTX 19361 && rtx_equal_p (SET_DEST (set), SET_SRC (set2)) 19362 && MEM_P (SET_DEST (set2))) 19363 cost += 1; 19364 19365 /* Show ability of reorder buffer to hide latency of load by executing 19366 in parallel with previous instruction in case 19367 previous instruction is not needed to compute the address. */ 19368 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 19369 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 19370 { 19371 /* Claim moves to take one cycle, as core can issue one load 19372 at time and the next load can start cycle later. */ 19373 if (dep_insn_type == TYPE_IMOV 19374 || dep_insn_type == TYPE_FMOV) 19375 cost = 1; 19376 else if (cost > 1) 19377 cost--; 19378 } 19379 break; 19380 19381 case PROCESSOR_K6: 19382 memory = get_attr_memory (insn); 19383 19384 /* The esp dependency is resolved before the instruction is really 19385 finished. */ 19386 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP) 19387 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP)) 19388 return 1; 19389 19390 /* INT->FP conversion is expensive. */ 19391 if (get_attr_fp_int_src (dep_insn)) 19392 cost += 5; 19393 19394 /* Show ability of reorder buffer to hide latency of load by executing 19395 in parallel with previous instruction in case 19396 previous instruction is not needed to compute the address. */ 19397 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 19398 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 19399 { 19400 /* Claim moves to take one cycle, as core can issue one load 19401 at time and the next load can start cycle later. */ 19402 if (dep_insn_type == TYPE_IMOV 19403 || dep_insn_type == TYPE_FMOV) 19404 cost = 1; 19405 else if (cost > 2) 19406 cost -= 2; 19407 else 19408 cost = 1; 19409 } 19410 break; 19411 19412 case PROCESSOR_ATHLON: 19413 case PROCESSOR_K8: 19414 case PROCESSOR_AMDFAM10: 19415 case PROCESSOR_GENERIC32: 19416 case PROCESSOR_GENERIC64: 19417 memory = get_attr_memory (insn); 19418 19419 /* Show ability of reorder buffer to hide latency of load by executing 19420 in parallel with previous instruction in case 19421 previous instruction is not needed to compute the address. */ 19422 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH) 19423 && !ix86_agi_dependent (insn, dep_insn, insn_type)) 19424 { 19425 enum attr_unit unit = get_attr_unit (insn); 19426 int loadcost = 3; 19427 19428 /* Because of the difference between the length of integer and 19429 floating unit pipeline preparation stages, the memory operands 19430 for floating point are cheaper. 19431 19432 ??? For Athlon it the difference is most probably 2. */ 19433 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN) 19434 loadcost = 3; 19435 else 19436 loadcost = TARGET_ATHLON ? 2 : 0; 19437 19438 if (cost >= loadcost) 19439 cost -= loadcost; 19440 else 19441 cost = 0; 19442 } 19443 19444 default: 19445 break; 19446 } 19447 19448 return cost; 19449 } 19450 19451 /* How many alternative schedules to try. This should be as wide as the 19452 scheduling freedom in the DFA, but no wider. Making this value too 19453 large results extra work for the scheduler. */ 19454 19455 static int 19456 ia32_multipass_dfa_lookahead (void) 19457 { 19458 switch (ix86_tune) 19459 { 19460 case PROCESSOR_PENTIUM: 19461 return 2; 19462 19463 case PROCESSOR_PENTIUMPRO: 19464 case PROCESSOR_K6: 19465 return 1; 19466 19467 default: 19468 return 0; 19469 } 19470 } 19471 19472 19473 /* Compute the alignment given to a constant that is being placed in memory. 19474 EXP is the constant and ALIGN is the alignment that the object would 19475 ordinarily have. 19476 The value of this function is used instead of that alignment to align 19477 the object. */ 19478 19479 int 19480 ix86_constant_alignment (tree exp, int align) 19481 { 19482 if (TREE_CODE (exp) == REAL_CST || TREE_CODE (exp) == VECTOR_CST 19483 || TREE_CODE (exp) == INTEGER_CST) 19484 { 19485 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64) 19486 return 64; 19487 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128) 19488 return 128; 19489 } 19490 else if (!optimize_size && TREE_CODE (exp) == STRING_CST 19491 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD) 19492 return BITS_PER_WORD; 19493 19494 return align; 19495 } 19496 19497 /* Compute the alignment for a static variable. 19498 TYPE is the data type, and ALIGN is the alignment that 19499 the object would ordinarily have. The value of this function is used 19500 instead of that alignment to align the object. */ 19501 19502 int 19503 ix86_data_alignment (tree type, int align) 19504 { 19505 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT); 19506 19507 if (AGGREGATE_TYPE_P (type) 19508 && TYPE_SIZE (type) 19509 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 19510 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align 19511 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) 19512 && align < max_align) 19513 align = max_align; 19514 19515 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 19516 to 16byte boundary. */ 19517 if (TARGET_64BIT) 19518 { 19519 if (AGGREGATE_TYPE_P (type) 19520 && TYPE_SIZE (type) 19521 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 19522 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128 19523 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 19524 return 128; 19525 } 19526 19527 if (TREE_CODE (type) == ARRAY_TYPE) 19528 { 19529 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 19530 return 64; 19531 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 19532 return 128; 19533 } 19534 else if (TREE_CODE (type) == COMPLEX_TYPE) 19535 { 19536 19537 if (TYPE_MODE (type) == DCmode && align < 64) 19538 return 64; 19539 if ((TYPE_MODE (type) == XCmode 19540 || TYPE_MODE (type) == TCmode) && align < 128) 19541 return 128; 19542 } 19543 else if ((TREE_CODE (type) == RECORD_TYPE 19544 || TREE_CODE (type) == UNION_TYPE 19545 || TREE_CODE (type) == QUAL_UNION_TYPE) 19546 && TYPE_FIELDS (type)) 19547 { 19548 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 19549 return 64; 19550 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 19551 return 128; 19552 } 19553 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 19554 || TREE_CODE (type) == INTEGER_TYPE) 19555 { 19556 if (TYPE_MODE (type) == DFmode && align < 64) 19557 return 64; 19558 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 19559 return 128; 19560 } 19561 19562 return align; 19563 } 19564 19565 /* Compute the alignment for a local variable or a stack slot. EXP is 19566 the data type or decl itself, MODE is the widest mode available and 19567 ALIGN is the alignment that the object would ordinarily have. The 19568 value of this macro is used instead of that alignment to align the 19569 object. */ 19570 19571 unsigned int 19572 ix86_local_alignment (tree exp, enum machine_mode mode, 19573 unsigned int align) 19574 { 19575 tree type, decl; 19576 19577 if (exp && DECL_P (exp)) 19578 { 19579 type = TREE_TYPE (exp); 19580 decl = exp; 19581 } 19582 else 19583 { 19584 type = exp; 19585 decl = NULL; 19586 } 19587 19588 /* Don't do dynamic stack realignment for long long objects with 19589 -mpreferred-stack-boundary=2. */ 19590 if (!TARGET_64BIT 19591 && align == 64 19592 && ix86_preferred_stack_boundary < 64 19593 && (mode == DImode || (type && TYPE_MODE (type) == DImode)) 19594 && (!type || !TYPE_USER_ALIGN (type)) 19595 && (!decl || !DECL_USER_ALIGN (decl))) 19596 align = 32; 19597 19598 /* If TYPE is NULL, we are allocating a stack slot for caller-save 19599 register in MODE. We will return the largest alignment of XF 19600 and DF. */ 19601 if (!type) 19602 { 19603 if (mode == XFmode && align < GET_MODE_ALIGNMENT (DFmode)) 19604 align = GET_MODE_ALIGNMENT (DFmode); 19605 return align; 19606 } 19607 19608 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned 19609 to 16byte boundary. */ 19610 if (TARGET_64BIT) 19611 { 19612 if (AGGREGATE_TYPE_P (type) 19613 && TYPE_SIZE (type) 19614 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST 19615 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16 19616 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128) 19617 return 128; 19618 } 19619 if (TREE_CODE (type) == ARRAY_TYPE) 19620 { 19621 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64) 19622 return 64; 19623 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128) 19624 return 128; 19625 } 19626 else if (TREE_CODE (type) == COMPLEX_TYPE) 19627 { 19628 if (TYPE_MODE (type) == DCmode && align < 64) 19629 return 64; 19630 if ((TYPE_MODE (type) == XCmode 19631 || TYPE_MODE (type) == TCmode) && align < 128) 19632 return 128; 19633 } 19634 else if ((TREE_CODE (type) == RECORD_TYPE 19635 || TREE_CODE (type) == UNION_TYPE 19636 || TREE_CODE (type) == QUAL_UNION_TYPE) 19637 && TYPE_FIELDS (type)) 19638 { 19639 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64) 19640 return 64; 19641 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128) 19642 return 128; 19643 } 19644 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE 19645 || TREE_CODE (type) == INTEGER_TYPE) 19646 { 19647 19648 if (TYPE_MODE (type) == DFmode && align < 64) 19649 return 64; 19650 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128) 19651 return 128; 19652 } 19653 return align; 19654 } 19655 19656 /* Compute the minimum required alignment for dynamic stack realignment 19657 purposes for a local variable, parameter or a stack slot. EXP is 19658 the data type or decl itself, MODE is its mode and ALIGN is the 19659 alignment that the object would ordinarily have. */ 19660 19661 unsigned int 19662 ix86_minimum_alignment (tree exp, enum machine_mode mode, 19663 unsigned int align) 19664 { 19665 tree type, decl; 19666 19667 if (TARGET_64BIT || align != 64 || ix86_preferred_stack_boundary >= 64) 19668 return align; 19669 19670 if (exp && DECL_P (exp)) 19671 { 19672 type = TREE_TYPE (exp); 19673 decl = exp; 19674 } 19675 else 19676 { 19677 type = exp; 19678 decl = NULL; 19679 } 19680 19681 /* Don't do dynamic stack realignment for long long objects with 19682 -mpreferred-stack-boundary=2. */ 19683 if ((mode == DImode || (type && TYPE_MODE (type) == DImode)) 19684 && (!type || !TYPE_USER_ALIGN (type)) 19685 && (!decl || !DECL_USER_ALIGN (decl))) 19686 return 32; 19687 19688 return align; 19689 } 19690 19691 /* Emit RTL insns to initialize the variable parts of a trampoline. 19692 FNADDR is an RTX for the address of the function's pure code. 19693 CXT is an RTX for the static chain value for the function. */ 19694 void 19695 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt) 19696 { 19697 if (!TARGET_64BIT) 19698 { 19699 /* Compute offset from the end of the jmp to the target function. */ 19700 rtx disp = expand_binop (SImode, sub_optab, fnaddr, 19701 plus_constant (tramp, 10), 19702 NULL_RTX, 1, OPTAB_DIRECT); 19703 emit_move_insn (gen_rtx_MEM (QImode, tramp), 19704 gen_int_mode (0xb9, QImode)); 19705 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt); 19706 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)), 19707 gen_int_mode (0xe9, QImode)); 19708 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp); 19709 } 19710 else 19711 { 19712 int offset = 0; 19713 /* Try to load address using shorter movl instead of movabs. 19714 We may want to support movq for kernel mode, but kernel does not use 19715 trampolines at the moment. */ 19716 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode)) 19717 { 19718 fnaddr = copy_to_mode_reg (DImode, fnaddr); 19719 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 19720 gen_int_mode (0xbb41, HImode)); 19721 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)), 19722 gen_lowpart (SImode, fnaddr)); 19723 offset += 6; 19724 } 19725 else 19726 { 19727 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 19728 gen_int_mode (0xbb49, HImode)); 19729 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 19730 fnaddr); 19731 offset += 10; 19732 } 19733 /* Load static chain using movabs to r10. */ 19734 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 19735 gen_int_mode (0xba49, HImode)); 19736 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)), 19737 cxt); 19738 offset += 10; 19739 /* Jump to the r11 */ 19740 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)), 19741 gen_int_mode (0xff49, HImode)); 19742 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)), 19743 gen_int_mode (0xe3, QImode)); 19744 offset += 3; 19745 gcc_assert (offset <= TRAMPOLINE_SIZE); 19746 } 19747 19748 #ifdef ENABLE_EXECUTE_STACK 19749 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"), 19750 LCT_NORMAL, VOIDmode, 1, tramp, Pmode); 19751 #endif 19752 } 19753 19754 /* Codes for all the SSE/MMX builtins. */ 19755 enum ix86_builtins 19756 { 19757 IX86_BUILTIN_ADDPS, 19758 IX86_BUILTIN_ADDSS, 19759 IX86_BUILTIN_DIVPS, 19760 IX86_BUILTIN_DIVSS, 19761 IX86_BUILTIN_MULPS, 19762 IX86_BUILTIN_MULSS, 19763 IX86_BUILTIN_SUBPS, 19764 IX86_BUILTIN_SUBSS, 19765 19766 IX86_BUILTIN_CMPEQPS, 19767 IX86_BUILTIN_CMPLTPS, 19768 IX86_BUILTIN_CMPLEPS, 19769 IX86_BUILTIN_CMPGTPS, 19770 IX86_BUILTIN_CMPGEPS, 19771 IX86_BUILTIN_CMPNEQPS, 19772 IX86_BUILTIN_CMPNLTPS, 19773 IX86_BUILTIN_CMPNLEPS, 19774 IX86_BUILTIN_CMPNGTPS, 19775 IX86_BUILTIN_CMPNGEPS, 19776 IX86_BUILTIN_CMPORDPS, 19777 IX86_BUILTIN_CMPUNORDPS, 19778 IX86_BUILTIN_CMPEQSS, 19779 IX86_BUILTIN_CMPLTSS, 19780 IX86_BUILTIN_CMPLESS, 19781 IX86_BUILTIN_CMPNEQSS, 19782 IX86_BUILTIN_CMPNLTSS, 19783 IX86_BUILTIN_CMPNLESS, 19784 IX86_BUILTIN_CMPNGTSS, 19785 IX86_BUILTIN_CMPNGESS, 19786 IX86_BUILTIN_CMPORDSS, 19787 IX86_BUILTIN_CMPUNORDSS, 19788 19789 IX86_BUILTIN_COMIEQSS, 19790 IX86_BUILTIN_COMILTSS, 19791 IX86_BUILTIN_COMILESS, 19792 IX86_BUILTIN_COMIGTSS, 19793 IX86_BUILTIN_COMIGESS, 19794 IX86_BUILTIN_COMINEQSS, 19795 IX86_BUILTIN_UCOMIEQSS, 19796 IX86_BUILTIN_UCOMILTSS, 19797 IX86_BUILTIN_UCOMILESS, 19798 IX86_BUILTIN_UCOMIGTSS, 19799 IX86_BUILTIN_UCOMIGESS, 19800 IX86_BUILTIN_UCOMINEQSS, 19801 19802 IX86_BUILTIN_CVTPI2PS, 19803 IX86_BUILTIN_CVTPS2PI, 19804 IX86_BUILTIN_CVTSI2SS, 19805 IX86_BUILTIN_CVTSI642SS, 19806 IX86_BUILTIN_CVTSS2SI, 19807 IX86_BUILTIN_CVTSS2SI64, 19808 IX86_BUILTIN_CVTTPS2PI, 19809 IX86_BUILTIN_CVTTSS2SI, 19810 IX86_BUILTIN_CVTTSS2SI64, 19811 19812 IX86_BUILTIN_MAXPS, 19813 IX86_BUILTIN_MAXSS, 19814 IX86_BUILTIN_MINPS, 19815 IX86_BUILTIN_MINSS, 19816 19817 IX86_BUILTIN_LOADUPS, 19818 IX86_BUILTIN_STOREUPS, 19819 IX86_BUILTIN_MOVSS, 19820 19821 IX86_BUILTIN_MOVHLPS, 19822 IX86_BUILTIN_MOVLHPS, 19823 IX86_BUILTIN_LOADHPS, 19824 IX86_BUILTIN_LOADLPS, 19825 IX86_BUILTIN_STOREHPS, 19826 IX86_BUILTIN_STORELPS, 19827 19828 IX86_BUILTIN_MASKMOVQ, 19829 IX86_BUILTIN_MOVMSKPS, 19830 IX86_BUILTIN_PMOVMSKB, 19831 19832 IX86_BUILTIN_MOVNTPS, 19833 IX86_BUILTIN_MOVNTQ, 19834 19835 IX86_BUILTIN_LOADDQU, 19836 IX86_BUILTIN_STOREDQU, 19837 19838 IX86_BUILTIN_PACKSSWB, 19839 IX86_BUILTIN_PACKSSDW, 19840 IX86_BUILTIN_PACKUSWB, 19841 19842 IX86_BUILTIN_PADDB, 19843 IX86_BUILTIN_PADDW, 19844 IX86_BUILTIN_PADDD, 19845 IX86_BUILTIN_PADDQ, 19846 IX86_BUILTIN_PADDSB, 19847 IX86_BUILTIN_PADDSW, 19848 IX86_BUILTIN_PADDUSB, 19849 IX86_BUILTIN_PADDUSW, 19850 IX86_BUILTIN_PSUBB, 19851 IX86_BUILTIN_PSUBW, 19852 IX86_BUILTIN_PSUBD, 19853 IX86_BUILTIN_PSUBQ, 19854 IX86_BUILTIN_PSUBSB, 19855 IX86_BUILTIN_PSUBSW, 19856 IX86_BUILTIN_PSUBUSB, 19857 IX86_BUILTIN_PSUBUSW, 19858 19859 IX86_BUILTIN_PAND, 19860 IX86_BUILTIN_PANDN, 19861 IX86_BUILTIN_POR, 19862 IX86_BUILTIN_PXOR, 19863 19864 IX86_BUILTIN_PAVGB, 19865 IX86_BUILTIN_PAVGW, 19866 19867 IX86_BUILTIN_PCMPEQB, 19868 IX86_BUILTIN_PCMPEQW, 19869 IX86_BUILTIN_PCMPEQD, 19870 IX86_BUILTIN_PCMPGTB, 19871 IX86_BUILTIN_PCMPGTW, 19872 IX86_BUILTIN_PCMPGTD, 19873 19874 IX86_BUILTIN_PMADDWD, 19875 19876 IX86_BUILTIN_PMAXSW, 19877 IX86_BUILTIN_PMAXUB, 19878 IX86_BUILTIN_PMINSW, 19879 IX86_BUILTIN_PMINUB, 19880 19881 IX86_BUILTIN_PMULHUW, 19882 IX86_BUILTIN_PMULHW, 19883 IX86_BUILTIN_PMULLW, 19884 19885 IX86_BUILTIN_PSADBW, 19886 IX86_BUILTIN_PSHUFW, 19887 19888 IX86_BUILTIN_PSLLW, 19889 IX86_BUILTIN_PSLLD, 19890 IX86_BUILTIN_PSLLQ, 19891 IX86_BUILTIN_PSRAW, 19892 IX86_BUILTIN_PSRAD, 19893 IX86_BUILTIN_PSRLW, 19894 IX86_BUILTIN_PSRLD, 19895 IX86_BUILTIN_PSRLQ, 19896 IX86_BUILTIN_PSLLWI, 19897 IX86_BUILTIN_PSLLDI, 19898 IX86_BUILTIN_PSLLQI, 19899 IX86_BUILTIN_PSRAWI, 19900 IX86_BUILTIN_PSRADI, 19901 IX86_BUILTIN_PSRLWI, 19902 IX86_BUILTIN_PSRLDI, 19903 IX86_BUILTIN_PSRLQI, 19904 19905 IX86_BUILTIN_PUNPCKHBW, 19906 IX86_BUILTIN_PUNPCKHWD, 19907 IX86_BUILTIN_PUNPCKHDQ, 19908 IX86_BUILTIN_PUNPCKLBW, 19909 IX86_BUILTIN_PUNPCKLWD, 19910 IX86_BUILTIN_PUNPCKLDQ, 19911 19912 IX86_BUILTIN_SHUFPS, 19913 19914 IX86_BUILTIN_RCPPS, 19915 IX86_BUILTIN_RCPSS, 19916 IX86_BUILTIN_RSQRTPS, 19917 IX86_BUILTIN_RSQRTPS_NR, 19918 IX86_BUILTIN_RSQRTSS, 19919 IX86_BUILTIN_RSQRTF, 19920 IX86_BUILTIN_SQRTPS, 19921 IX86_BUILTIN_SQRTPS_NR, 19922 IX86_BUILTIN_SQRTSS, 19923 19924 IX86_BUILTIN_UNPCKHPS, 19925 IX86_BUILTIN_UNPCKLPS, 19926 19927 IX86_BUILTIN_ANDPS, 19928 IX86_BUILTIN_ANDNPS, 19929 IX86_BUILTIN_ORPS, 19930 IX86_BUILTIN_XORPS, 19931 19932 IX86_BUILTIN_EMMS, 19933 IX86_BUILTIN_LDMXCSR, 19934 IX86_BUILTIN_STMXCSR, 19935 IX86_BUILTIN_SFENCE, 19936 19937 /* 3DNow! Original */ 19938 IX86_BUILTIN_FEMMS, 19939 IX86_BUILTIN_PAVGUSB, 19940 IX86_BUILTIN_PF2ID, 19941 IX86_BUILTIN_PFACC, 19942 IX86_BUILTIN_PFADD, 19943 IX86_BUILTIN_PFCMPEQ, 19944 IX86_BUILTIN_PFCMPGE, 19945 IX86_BUILTIN_PFCMPGT, 19946 IX86_BUILTIN_PFMAX, 19947 IX86_BUILTIN_PFMIN, 19948 IX86_BUILTIN_PFMUL, 19949 IX86_BUILTIN_PFRCP, 19950 IX86_BUILTIN_PFRCPIT1, 19951 IX86_BUILTIN_PFRCPIT2, 19952 IX86_BUILTIN_PFRSQIT1, 19953 IX86_BUILTIN_PFRSQRT, 19954 IX86_BUILTIN_PFSUB, 19955 IX86_BUILTIN_PFSUBR, 19956 IX86_BUILTIN_PI2FD, 19957 IX86_BUILTIN_PMULHRW, 19958 19959 /* 3DNow! Athlon Extensions */ 19960 IX86_BUILTIN_PF2IW, 19961 IX86_BUILTIN_PFNACC, 19962 IX86_BUILTIN_PFPNACC, 19963 IX86_BUILTIN_PI2FW, 19964 IX86_BUILTIN_PSWAPDSI, 19965 IX86_BUILTIN_PSWAPDSF, 19966 19967 /* SSE2 */ 19968 IX86_BUILTIN_ADDPD, 19969 IX86_BUILTIN_ADDSD, 19970 IX86_BUILTIN_DIVPD, 19971 IX86_BUILTIN_DIVSD, 19972 IX86_BUILTIN_MULPD, 19973 IX86_BUILTIN_MULSD, 19974 IX86_BUILTIN_SUBPD, 19975 IX86_BUILTIN_SUBSD, 19976 19977 IX86_BUILTIN_CMPEQPD, 19978 IX86_BUILTIN_CMPLTPD, 19979 IX86_BUILTIN_CMPLEPD, 19980 IX86_BUILTIN_CMPGTPD, 19981 IX86_BUILTIN_CMPGEPD, 19982 IX86_BUILTIN_CMPNEQPD, 19983 IX86_BUILTIN_CMPNLTPD, 19984 IX86_BUILTIN_CMPNLEPD, 19985 IX86_BUILTIN_CMPNGTPD, 19986 IX86_BUILTIN_CMPNGEPD, 19987 IX86_BUILTIN_CMPORDPD, 19988 IX86_BUILTIN_CMPUNORDPD, 19989 IX86_BUILTIN_CMPEQSD, 19990 IX86_BUILTIN_CMPLTSD, 19991 IX86_BUILTIN_CMPLESD, 19992 IX86_BUILTIN_CMPNEQSD, 19993 IX86_BUILTIN_CMPNLTSD, 19994 IX86_BUILTIN_CMPNLESD, 19995 IX86_BUILTIN_CMPORDSD, 19996 IX86_BUILTIN_CMPUNORDSD, 19997 19998 IX86_BUILTIN_COMIEQSD, 19999 IX86_BUILTIN_COMILTSD, 20000 IX86_BUILTIN_COMILESD, 20001 IX86_BUILTIN_COMIGTSD, 20002 IX86_BUILTIN_COMIGESD, 20003 IX86_BUILTIN_COMINEQSD, 20004 IX86_BUILTIN_UCOMIEQSD, 20005 IX86_BUILTIN_UCOMILTSD, 20006 IX86_BUILTIN_UCOMILESD, 20007 IX86_BUILTIN_UCOMIGTSD, 20008 IX86_BUILTIN_UCOMIGESD, 20009 IX86_BUILTIN_UCOMINEQSD, 20010 20011 IX86_BUILTIN_MAXPD, 20012 IX86_BUILTIN_MAXSD, 20013 IX86_BUILTIN_MINPD, 20014 IX86_BUILTIN_MINSD, 20015 20016 IX86_BUILTIN_ANDPD, 20017 IX86_BUILTIN_ANDNPD, 20018 IX86_BUILTIN_ORPD, 20019 IX86_BUILTIN_XORPD, 20020 20021 IX86_BUILTIN_SQRTPD, 20022 IX86_BUILTIN_SQRTSD, 20023 20024 IX86_BUILTIN_UNPCKHPD, 20025 IX86_BUILTIN_UNPCKLPD, 20026 20027 IX86_BUILTIN_SHUFPD, 20028 20029 IX86_BUILTIN_LOADUPD, 20030 IX86_BUILTIN_STOREUPD, 20031 IX86_BUILTIN_MOVSD, 20032 20033 IX86_BUILTIN_LOADHPD, 20034 IX86_BUILTIN_LOADLPD, 20035 20036 IX86_BUILTIN_CVTDQ2PD, 20037 IX86_BUILTIN_CVTDQ2PS, 20038 20039 IX86_BUILTIN_CVTPD2DQ, 20040 IX86_BUILTIN_CVTPD2PI, 20041 IX86_BUILTIN_CVTPD2PS, 20042 IX86_BUILTIN_CVTTPD2DQ, 20043 IX86_BUILTIN_CVTTPD2PI, 20044 20045 IX86_BUILTIN_CVTPI2PD, 20046 IX86_BUILTIN_CVTSI2SD, 20047 IX86_BUILTIN_CVTSI642SD, 20048 20049 IX86_BUILTIN_CVTSD2SI, 20050 IX86_BUILTIN_CVTSD2SI64, 20051 IX86_BUILTIN_CVTSD2SS, 20052 IX86_BUILTIN_CVTSS2SD, 20053 IX86_BUILTIN_CVTTSD2SI, 20054 IX86_BUILTIN_CVTTSD2SI64, 20055 20056 IX86_BUILTIN_CVTPS2DQ, 20057 IX86_BUILTIN_CVTPS2PD, 20058 IX86_BUILTIN_CVTTPS2DQ, 20059 20060 IX86_BUILTIN_MOVNTI, 20061 IX86_BUILTIN_MOVNTPD, 20062 IX86_BUILTIN_MOVNTDQ, 20063 20064 IX86_BUILTIN_MOVQ128, 20065 20066 /* SSE2 MMX */ 20067 IX86_BUILTIN_MASKMOVDQU, 20068 IX86_BUILTIN_MOVMSKPD, 20069 IX86_BUILTIN_PMOVMSKB128, 20070 20071 IX86_BUILTIN_PACKSSWB128, 20072 IX86_BUILTIN_PACKSSDW128, 20073 IX86_BUILTIN_PACKUSWB128, 20074 20075 IX86_BUILTIN_PADDB128, 20076 IX86_BUILTIN_PADDW128, 20077 IX86_BUILTIN_PADDD128, 20078 IX86_BUILTIN_PADDQ128, 20079 IX86_BUILTIN_PADDSB128, 20080 IX86_BUILTIN_PADDSW128, 20081 IX86_BUILTIN_PADDUSB128, 20082 IX86_BUILTIN_PADDUSW128, 20083 IX86_BUILTIN_PSUBB128, 20084 IX86_BUILTIN_PSUBW128, 20085 IX86_BUILTIN_PSUBD128, 20086 IX86_BUILTIN_PSUBQ128, 20087 IX86_BUILTIN_PSUBSB128, 20088 IX86_BUILTIN_PSUBSW128, 20089 IX86_BUILTIN_PSUBUSB128, 20090 IX86_BUILTIN_PSUBUSW128, 20091 20092 IX86_BUILTIN_PAND128, 20093 IX86_BUILTIN_PANDN128, 20094 IX86_BUILTIN_POR128, 20095 IX86_BUILTIN_PXOR128, 20096 20097 IX86_BUILTIN_PAVGB128, 20098 IX86_BUILTIN_PAVGW128, 20099 20100 IX86_BUILTIN_PCMPEQB128, 20101 IX86_BUILTIN_PCMPEQW128, 20102 IX86_BUILTIN_PCMPEQD128, 20103 IX86_BUILTIN_PCMPGTB128, 20104 IX86_BUILTIN_PCMPGTW128, 20105 IX86_BUILTIN_PCMPGTD128, 20106 20107 IX86_BUILTIN_PMADDWD128, 20108 20109 IX86_BUILTIN_PMAXSW128, 20110 IX86_BUILTIN_PMAXUB128, 20111 IX86_BUILTIN_PMINSW128, 20112 IX86_BUILTIN_PMINUB128, 20113 20114 IX86_BUILTIN_PMULUDQ, 20115 IX86_BUILTIN_PMULUDQ128, 20116 IX86_BUILTIN_PMULHUW128, 20117 IX86_BUILTIN_PMULHW128, 20118 IX86_BUILTIN_PMULLW128, 20119 20120 IX86_BUILTIN_PSADBW128, 20121 IX86_BUILTIN_PSHUFHW, 20122 IX86_BUILTIN_PSHUFLW, 20123 IX86_BUILTIN_PSHUFD, 20124 20125 IX86_BUILTIN_PSLLDQI128, 20126 IX86_BUILTIN_PSLLWI128, 20127 IX86_BUILTIN_PSLLDI128, 20128 IX86_BUILTIN_PSLLQI128, 20129 IX86_BUILTIN_PSRAWI128, 20130 IX86_BUILTIN_PSRADI128, 20131 IX86_BUILTIN_PSRLDQI128, 20132 IX86_BUILTIN_PSRLWI128, 20133 IX86_BUILTIN_PSRLDI128, 20134 IX86_BUILTIN_PSRLQI128, 20135 20136 IX86_BUILTIN_PSLLDQ128, 20137 IX86_BUILTIN_PSLLW128, 20138 IX86_BUILTIN_PSLLD128, 20139 IX86_BUILTIN_PSLLQ128, 20140 IX86_BUILTIN_PSRAW128, 20141 IX86_BUILTIN_PSRAD128, 20142 IX86_BUILTIN_PSRLW128, 20143 IX86_BUILTIN_PSRLD128, 20144 IX86_BUILTIN_PSRLQ128, 20145 20146 IX86_BUILTIN_PUNPCKHBW128, 20147 IX86_BUILTIN_PUNPCKHWD128, 20148 IX86_BUILTIN_PUNPCKHDQ128, 20149 IX86_BUILTIN_PUNPCKHQDQ128, 20150 IX86_BUILTIN_PUNPCKLBW128, 20151 IX86_BUILTIN_PUNPCKLWD128, 20152 IX86_BUILTIN_PUNPCKLDQ128, 20153 IX86_BUILTIN_PUNPCKLQDQ128, 20154 20155 IX86_BUILTIN_CLFLUSH, 20156 IX86_BUILTIN_MFENCE, 20157 IX86_BUILTIN_LFENCE, 20158 20159 /* SSE3. */ 20160 IX86_BUILTIN_ADDSUBPS, 20161 IX86_BUILTIN_HADDPS, 20162 IX86_BUILTIN_HSUBPS, 20163 IX86_BUILTIN_MOVSHDUP, 20164 IX86_BUILTIN_MOVSLDUP, 20165 IX86_BUILTIN_ADDSUBPD, 20166 IX86_BUILTIN_HADDPD, 20167 IX86_BUILTIN_HSUBPD, 20168 IX86_BUILTIN_LDDQU, 20169 20170 IX86_BUILTIN_MONITOR, 20171 IX86_BUILTIN_MWAIT, 20172 20173 /* SSSE3. */ 20174 IX86_BUILTIN_PHADDW, 20175 IX86_BUILTIN_PHADDD, 20176 IX86_BUILTIN_PHADDSW, 20177 IX86_BUILTIN_PHSUBW, 20178 IX86_BUILTIN_PHSUBD, 20179 IX86_BUILTIN_PHSUBSW, 20180 IX86_BUILTIN_PMADDUBSW, 20181 IX86_BUILTIN_PMULHRSW, 20182 IX86_BUILTIN_PSHUFB, 20183 IX86_BUILTIN_PSIGNB, 20184 IX86_BUILTIN_PSIGNW, 20185 IX86_BUILTIN_PSIGND, 20186 IX86_BUILTIN_PALIGNR, 20187 IX86_BUILTIN_PABSB, 20188 IX86_BUILTIN_PABSW, 20189 IX86_BUILTIN_PABSD, 20190 20191 IX86_BUILTIN_PHADDW128, 20192 IX86_BUILTIN_PHADDD128, 20193 IX86_BUILTIN_PHADDSW128, 20194 IX86_BUILTIN_PHSUBW128, 20195 IX86_BUILTIN_PHSUBD128, 20196 IX86_BUILTIN_PHSUBSW128, 20197 IX86_BUILTIN_PMADDUBSW128, 20198 IX86_BUILTIN_PMULHRSW128, 20199 IX86_BUILTIN_PSHUFB128, 20200 IX86_BUILTIN_PSIGNB128, 20201 IX86_BUILTIN_PSIGNW128, 20202 IX86_BUILTIN_PSIGND128, 20203 IX86_BUILTIN_PALIGNR128, 20204 IX86_BUILTIN_PABSB128, 20205 IX86_BUILTIN_PABSW128, 20206 IX86_BUILTIN_PABSD128, 20207 20208 /* AMDFAM10 - SSE4A New Instructions. */ 20209 IX86_BUILTIN_MOVNTSD, 20210 IX86_BUILTIN_MOVNTSS, 20211 IX86_BUILTIN_EXTRQI, 20212 IX86_BUILTIN_EXTRQ, 20213 IX86_BUILTIN_INSERTQI, 20214 IX86_BUILTIN_INSERTQ, 20215 20216 /* SSE4.1. */ 20217 IX86_BUILTIN_BLENDPD, 20218 IX86_BUILTIN_BLENDPS, 20219 IX86_BUILTIN_BLENDVPD, 20220 IX86_BUILTIN_BLENDVPS, 20221 IX86_BUILTIN_PBLENDVB128, 20222 IX86_BUILTIN_PBLENDW128, 20223 20224 IX86_BUILTIN_DPPD, 20225 IX86_BUILTIN_DPPS, 20226 20227 IX86_BUILTIN_INSERTPS128, 20228 20229 IX86_BUILTIN_MOVNTDQA, 20230 IX86_BUILTIN_MPSADBW128, 20231 IX86_BUILTIN_PACKUSDW128, 20232 IX86_BUILTIN_PCMPEQQ, 20233 IX86_BUILTIN_PHMINPOSUW128, 20234 20235 IX86_BUILTIN_PMAXSB128, 20236 IX86_BUILTIN_PMAXSD128, 20237 IX86_BUILTIN_PMAXUD128, 20238 IX86_BUILTIN_PMAXUW128, 20239 20240 IX86_BUILTIN_PMINSB128, 20241 IX86_BUILTIN_PMINSD128, 20242 IX86_BUILTIN_PMINUD128, 20243 IX86_BUILTIN_PMINUW128, 20244 20245 IX86_BUILTIN_PMOVSXBW128, 20246 IX86_BUILTIN_PMOVSXBD128, 20247 IX86_BUILTIN_PMOVSXBQ128, 20248 IX86_BUILTIN_PMOVSXWD128, 20249 IX86_BUILTIN_PMOVSXWQ128, 20250 IX86_BUILTIN_PMOVSXDQ128, 20251 20252 IX86_BUILTIN_PMOVZXBW128, 20253 IX86_BUILTIN_PMOVZXBD128, 20254 IX86_BUILTIN_PMOVZXBQ128, 20255 IX86_BUILTIN_PMOVZXWD128, 20256 IX86_BUILTIN_PMOVZXWQ128, 20257 IX86_BUILTIN_PMOVZXDQ128, 20258 20259 IX86_BUILTIN_PMULDQ128, 20260 IX86_BUILTIN_PMULLD128, 20261 20262 IX86_BUILTIN_ROUNDPD, 20263 IX86_BUILTIN_ROUNDPS, 20264 IX86_BUILTIN_ROUNDSD, 20265 IX86_BUILTIN_ROUNDSS, 20266 20267 IX86_BUILTIN_PTESTZ, 20268 IX86_BUILTIN_PTESTC, 20269 IX86_BUILTIN_PTESTNZC, 20270 20271 IX86_BUILTIN_VEC_INIT_V2SI, 20272 IX86_BUILTIN_VEC_INIT_V4HI, 20273 IX86_BUILTIN_VEC_INIT_V8QI, 20274 IX86_BUILTIN_VEC_EXT_V2DF, 20275 IX86_BUILTIN_VEC_EXT_V2DI, 20276 IX86_BUILTIN_VEC_EXT_V4SF, 20277 IX86_BUILTIN_VEC_EXT_V4SI, 20278 IX86_BUILTIN_VEC_EXT_V8HI, 20279 IX86_BUILTIN_VEC_EXT_V2SI, 20280 IX86_BUILTIN_VEC_EXT_V4HI, 20281 IX86_BUILTIN_VEC_EXT_V16QI, 20282 IX86_BUILTIN_VEC_SET_V2DI, 20283 IX86_BUILTIN_VEC_SET_V4SF, 20284 IX86_BUILTIN_VEC_SET_V4SI, 20285 IX86_BUILTIN_VEC_SET_V8HI, 20286 IX86_BUILTIN_VEC_SET_V4HI, 20287 IX86_BUILTIN_VEC_SET_V16QI, 20288 20289 IX86_BUILTIN_VEC_PACK_SFIX, 20290 20291 /* SSE4.2. */ 20292 IX86_BUILTIN_CRC32QI, 20293 IX86_BUILTIN_CRC32HI, 20294 IX86_BUILTIN_CRC32SI, 20295 IX86_BUILTIN_CRC32DI, 20296 20297 IX86_BUILTIN_PCMPESTRI128, 20298 IX86_BUILTIN_PCMPESTRM128, 20299 IX86_BUILTIN_PCMPESTRA128, 20300 IX86_BUILTIN_PCMPESTRC128, 20301 IX86_BUILTIN_PCMPESTRO128, 20302 IX86_BUILTIN_PCMPESTRS128, 20303 IX86_BUILTIN_PCMPESTRZ128, 20304 IX86_BUILTIN_PCMPISTRI128, 20305 IX86_BUILTIN_PCMPISTRM128, 20306 IX86_BUILTIN_PCMPISTRA128, 20307 IX86_BUILTIN_PCMPISTRC128, 20308 IX86_BUILTIN_PCMPISTRO128, 20309 IX86_BUILTIN_PCMPISTRS128, 20310 IX86_BUILTIN_PCMPISTRZ128, 20311 20312 IX86_BUILTIN_PCMPGTQ, 20313 20314 /* AES instructions */ 20315 IX86_BUILTIN_AESENC128, 20316 IX86_BUILTIN_AESENCLAST128, 20317 IX86_BUILTIN_AESDEC128, 20318 IX86_BUILTIN_AESDECLAST128, 20319 IX86_BUILTIN_AESIMC128, 20320 IX86_BUILTIN_AESKEYGENASSIST128, 20321 20322 /* PCLMUL instruction */ 20323 IX86_BUILTIN_PCLMULQDQ128, 20324 20325 /* AVX */ 20326 IX86_BUILTIN_ADDPD256, 20327 IX86_BUILTIN_ADDPS256, 20328 IX86_BUILTIN_ADDSUBPD256, 20329 IX86_BUILTIN_ADDSUBPS256, 20330 IX86_BUILTIN_ANDPD256, 20331 IX86_BUILTIN_ANDPS256, 20332 IX86_BUILTIN_ANDNPD256, 20333 IX86_BUILTIN_ANDNPS256, 20334 IX86_BUILTIN_BLENDPD256, 20335 IX86_BUILTIN_BLENDPS256, 20336 IX86_BUILTIN_BLENDVPD256, 20337 IX86_BUILTIN_BLENDVPS256, 20338 IX86_BUILTIN_DIVPD256, 20339 IX86_BUILTIN_DIVPS256, 20340 IX86_BUILTIN_DPPS256, 20341 IX86_BUILTIN_HADDPD256, 20342 IX86_BUILTIN_HADDPS256, 20343 IX86_BUILTIN_HSUBPD256, 20344 IX86_BUILTIN_HSUBPS256, 20345 IX86_BUILTIN_MAXPD256, 20346 IX86_BUILTIN_MAXPS256, 20347 IX86_BUILTIN_MINPD256, 20348 IX86_BUILTIN_MINPS256, 20349 IX86_BUILTIN_MULPD256, 20350 IX86_BUILTIN_MULPS256, 20351 IX86_BUILTIN_ORPD256, 20352 IX86_BUILTIN_ORPS256, 20353 IX86_BUILTIN_SHUFPD256, 20354 IX86_BUILTIN_SHUFPS256, 20355 IX86_BUILTIN_SUBPD256, 20356 IX86_BUILTIN_SUBPS256, 20357 IX86_BUILTIN_XORPD256, 20358 IX86_BUILTIN_XORPS256, 20359 IX86_BUILTIN_CMPSD, 20360 IX86_BUILTIN_CMPSS, 20361 IX86_BUILTIN_CMPPD, 20362 IX86_BUILTIN_CMPPS, 20363 IX86_BUILTIN_CMPPD256, 20364 IX86_BUILTIN_CMPPS256, 20365 IX86_BUILTIN_CVTDQ2PD256, 20366 IX86_BUILTIN_CVTDQ2PS256, 20367 IX86_BUILTIN_CVTPD2PS256, 20368 IX86_BUILTIN_CVTPS2DQ256, 20369 IX86_BUILTIN_CVTPS2PD256, 20370 IX86_BUILTIN_CVTTPD2DQ256, 20371 IX86_BUILTIN_CVTPD2DQ256, 20372 IX86_BUILTIN_CVTTPS2DQ256, 20373 IX86_BUILTIN_EXTRACTF128PD256, 20374 IX86_BUILTIN_EXTRACTF128PS256, 20375 IX86_BUILTIN_EXTRACTF128SI256, 20376 IX86_BUILTIN_VZEROALL, 20377 IX86_BUILTIN_VZEROUPPER, 20378 IX86_BUILTIN_VZEROUPPER_REX64, 20379 IX86_BUILTIN_VPERMILVARPD, 20380 IX86_BUILTIN_VPERMILVARPS, 20381 IX86_BUILTIN_VPERMILVARPD256, 20382 IX86_BUILTIN_VPERMILVARPS256, 20383 IX86_BUILTIN_VPERMILPD, 20384 IX86_BUILTIN_VPERMILPS, 20385 IX86_BUILTIN_VPERMILPD256, 20386 IX86_BUILTIN_VPERMILPS256, 20387 IX86_BUILTIN_VPERM2F128PD256, 20388 IX86_BUILTIN_VPERM2F128PS256, 20389 IX86_BUILTIN_VPERM2F128SI256, 20390 IX86_BUILTIN_VBROADCASTSS, 20391 IX86_BUILTIN_VBROADCASTSD256, 20392 IX86_BUILTIN_VBROADCASTSS256, 20393 IX86_BUILTIN_VBROADCASTPD256, 20394 IX86_BUILTIN_VBROADCASTPS256, 20395 IX86_BUILTIN_VINSERTF128PD256, 20396 IX86_BUILTIN_VINSERTF128PS256, 20397 IX86_BUILTIN_VINSERTF128SI256, 20398 IX86_BUILTIN_LOADUPD256, 20399 IX86_BUILTIN_LOADUPS256, 20400 IX86_BUILTIN_STOREUPD256, 20401 IX86_BUILTIN_STOREUPS256, 20402 IX86_BUILTIN_LDDQU256, 20403 IX86_BUILTIN_MOVNTDQ256, 20404 IX86_BUILTIN_MOVNTPD256, 20405 IX86_BUILTIN_MOVNTPS256, 20406 IX86_BUILTIN_LOADDQU256, 20407 IX86_BUILTIN_STOREDQU256, 20408 IX86_BUILTIN_MASKLOADPD, 20409 IX86_BUILTIN_MASKLOADPS, 20410 IX86_BUILTIN_MASKSTOREPD, 20411 IX86_BUILTIN_MASKSTOREPS, 20412 IX86_BUILTIN_MASKLOADPD256, 20413 IX86_BUILTIN_MASKLOADPS256, 20414 IX86_BUILTIN_MASKSTOREPD256, 20415 IX86_BUILTIN_MASKSTOREPS256, 20416 IX86_BUILTIN_MOVSHDUP256, 20417 IX86_BUILTIN_MOVSLDUP256, 20418 IX86_BUILTIN_MOVDDUP256, 20419 20420 IX86_BUILTIN_SQRTPD256, 20421 IX86_BUILTIN_SQRTPS256, 20422 IX86_BUILTIN_SQRTPS_NR256, 20423 IX86_BUILTIN_RSQRTPS256, 20424 IX86_BUILTIN_RSQRTPS_NR256, 20425 20426 IX86_BUILTIN_RCPPS256, 20427 20428 IX86_BUILTIN_ROUNDPD256, 20429 IX86_BUILTIN_ROUNDPS256, 20430 20431 IX86_BUILTIN_UNPCKHPD256, 20432 IX86_BUILTIN_UNPCKLPD256, 20433 IX86_BUILTIN_UNPCKHPS256, 20434 IX86_BUILTIN_UNPCKLPS256, 20435 20436 IX86_BUILTIN_SI256_SI, 20437 IX86_BUILTIN_PS256_PS, 20438 IX86_BUILTIN_PD256_PD, 20439 IX86_BUILTIN_SI_SI256, 20440 IX86_BUILTIN_PS_PS256, 20441 IX86_BUILTIN_PD_PD256, 20442 20443 IX86_BUILTIN_VTESTZPD, 20444 IX86_BUILTIN_VTESTCPD, 20445 IX86_BUILTIN_VTESTNZCPD, 20446 IX86_BUILTIN_VTESTZPS, 20447 IX86_BUILTIN_VTESTCPS, 20448 IX86_BUILTIN_VTESTNZCPS, 20449 IX86_BUILTIN_VTESTZPD256, 20450 IX86_BUILTIN_VTESTCPD256, 20451 IX86_BUILTIN_VTESTNZCPD256, 20452 IX86_BUILTIN_VTESTZPS256, 20453 IX86_BUILTIN_VTESTCPS256, 20454 IX86_BUILTIN_VTESTNZCPS256, 20455 IX86_BUILTIN_PTESTZ256, 20456 IX86_BUILTIN_PTESTC256, 20457 IX86_BUILTIN_PTESTNZC256, 20458 20459 IX86_BUILTIN_MOVMSKPD256, 20460 IX86_BUILTIN_MOVMSKPS256, 20461 20462 /* TFmode support builtins. */ 20463 IX86_BUILTIN_INFQ, 20464 IX86_BUILTIN_FABSQ, 20465 IX86_BUILTIN_COPYSIGNQ, 20466 20467 /* SSE5 instructions */ 20468 IX86_BUILTIN_FMADDSS, 20469 IX86_BUILTIN_FMADDSD, 20470 IX86_BUILTIN_FMADDPS, 20471 IX86_BUILTIN_FMADDPD, 20472 IX86_BUILTIN_FMSUBSS, 20473 IX86_BUILTIN_FMSUBSD, 20474 IX86_BUILTIN_FMSUBPS, 20475 IX86_BUILTIN_FMSUBPD, 20476 IX86_BUILTIN_FNMADDSS, 20477 IX86_BUILTIN_FNMADDSD, 20478 IX86_BUILTIN_FNMADDPS, 20479 IX86_BUILTIN_FNMADDPD, 20480 IX86_BUILTIN_FNMSUBSS, 20481 IX86_BUILTIN_FNMSUBSD, 20482 IX86_BUILTIN_FNMSUBPS, 20483 IX86_BUILTIN_FNMSUBPD, 20484 IX86_BUILTIN_PCMOV, 20485 IX86_BUILTIN_PCMOV_V2DI, 20486 IX86_BUILTIN_PCMOV_V4SI, 20487 IX86_BUILTIN_PCMOV_V8HI, 20488 IX86_BUILTIN_PCMOV_V16QI, 20489 IX86_BUILTIN_PCMOV_V4SF, 20490 IX86_BUILTIN_PCMOV_V2DF, 20491 IX86_BUILTIN_PPERM, 20492 IX86_BUILTIN_PERMPS, 20493 IX86_BUILTIN_PERMPD, 20494 IX86_BUILTIN_PMACSSWW, 20495 IX86_BUILTIN_PMACSWW, 20496 IX86_BUILTIN_PMACSSWD, 20497 IX86_BUILTIN_PMACSWD, 20498 IX86_BUILTIN_PMACSSDD, 20499 IX86_BUILTIN_PMACSDD, 20500 IX86_BUILTIN_PMACSSDQL, 20501 IX86_BUILTIN_PMACSSDQH, 20502 IX86_BUILTIN_PMACSDQL, 20503 IX86_BUILTIN_PMACSDQH, 20504 IX86_BUILTIN_PMADCSSWD, 20505 IX86_BUILTIN_PMADCSWD, 20506 IX86_BUILTIN_PHADDBW, 20507 IX86_BUILTIN_PHADDBD, 20508 IX86_BUILTIN_PHADDBQ, 20509 IX86_BUILTIN_PHADDWD, 20510 IX86_BUILTIN_PHADDWQ, 20511 IX86_BUILTIN_PHADDDQ, 20512 IX86_BUILTIN_PHADDUBW, 20513 IX86_BUILTIN_PHADDUBD, 20514 IX86_BUILTIN_PHADDUBQ, 20515 IX86_BUILTIN_PHADDUWD, 20516 IX86_BUILTIN_PHADDUWQ, 20517 IX86_BUILTIN_PHADDUDQ, 20518 IX86_BUILTIN_PHSUBBW, 20519 IX86_BUILTIN_PHSUBWD, 20520 IX86_BUILTIN_PHSUBDQ, 20521 IX86_BUILTIN_PROTB, 20522 IX86_BUILTIN_PROTW, 20523 IX86_BUILTIN_PROTD, 20524 IX86_BUILTIN_PROTQ, 20525 IX86_BUILTIN_PROTB_IMM, 20526 IX86_BUILTIN_PROTW_IMM, 20527 IX86_BUILTIN_PROTD_IMM, 20528 IX86_BUILTIN_PROTQ_IMM, 20529 IX86_BUILTIN_PSHLB, 20530 IX86_BUILTIN_PSHLW, 20531 IX86_BUILTIN_PSHLD, 20532 IX86_BUILTIN_PSHLQ, 20533 IX86_BUILTIN_PSHAB, 20534 IX86_BUILTIN_PSHAW, 20535 IX86_BUILTIN_PSHAD, 20536 IX86_BUILTIN_PSHAQ, 20537 IX86_BUILTIN_FRCZSS, 20538 IX86_BUILTIN_FRCZSD, 20539 IX86_BUILTIN_FRCZPS, 20540 IX86_BUILTIN_FRCZPD, 20541 IX86_BUILTIN_CVTPH2PS, 20542 IX86_BUILTIN_CVTPS2PH, 20543 20544 IX86_BUILTIN_COMEQSS, 20545 IX86_BUILTIN_COMNESS, 20546 IX86_BUILTIN_COMLTSS, 20547 IX86_BUILTIN_COMLESS, 20548 IX86_BUILTIN_COMGTSS, 20549 IX86_BUILTIN_COMGESS, 20550 IX86_BUILTIN_COMUEQSS, 20551 IX86_BUILTIN_COMUNESS, 20552 IX86_BUILTIN_COMULTSS, 20553 IX86_BUILTIN_COMULESS, 20554 IX86_BUILTIN_COMUGTSS, 20555 IX86_BUILTIN_COMUGESS, 20556 IX86_BUILTIN_COMORDSS, 20557 IX86_BUILTIN_COMUNORDSS, 20558 IX86_BUILTIN_COMFALSESS, 20559 IX86_BUILTIN_COMTRUESS, 20560 20561 IX86_BUILTIN_COMEQSD, 20562 IX86_BUILTIN_COMNESD, 20563 IX86_BUILTIN_COMLTSD, 20564 IX86_BUILTIN_COMLESD, 20565 IX86_BUILTIN_COMGTSD, 20566 IX86_BUILTIN_COMGESD, 20567 IX86_BUILTIN_COMUEQSD, 20568 IX86_BUILTIN_COMUNESD, 20569 IX86_BUILTIN_COMULTSD, 20570 IX86_BUILTIN_COMULESD, 20571 IX86_BUILTIN_COMUGTSD, 20572 IX86_BUILTIN_COMUGESD, 20573 IX86_BUILTIN_COMORDSD, 20574 IX86_BUILTIN_COMUNORDSD, 20575 IX86_BUILTIN_COMFALSESD, 20576 IX86_BUILTIN_COMTRUESD, 20577 20578 IX86_BUILTIN_COMEQPS, 20579 IX86_BUILTIN_COMNEPS, 20580 IX86_BUILTIN_COMLTPS, 20581 IX86_BUILTIN_COMLEPS, 20582 IX86_BUILTIN_COMGTPS, 20583 IX86_BUILTIN_COMGEPS, 20584 IX86_BUILTIN_COMUEQPS, 20585 IX86_BUILTIN_COMUNEPS, 20586 IX86_BUILTIN_COMULTPS, 20587 IX86_BUILTIN_COMULEPS, 20588 IX86_BUILTIN_COMUGTPS, 20589 IX86_BUILTIN_COMUGEPS, 20590 IX86_BUILTIN_COMORDPS, 20591 IX86_BUILTIN_COMUNORDPS, 20592 IX86_BUILTIN_COMFALSEPS, 20593 IX86_BUILTIN_COMTRUEPS, 20594 20595 IX86_BUILTIN_COMEQPD, 20596 IX86_BUILTIN_COMNEPD, 20597 IX86_BUILTIN_COMLTPD, 20598 IX86_BUILTIN_COMLEPD, 20599 IX86_BUILTIN_COMGTPD, 20600 IX86_BUILTIN_COMGEPD, 20601 IX86_BUILTIN_COMUEQPD, 20602 IX86_BUILTIN_COMUNEPD, 20603 IX86_BUILTIN_COMULTPD, 20604 IX86_BUILTIN_COMULEPD, 20605 IX86_BUILTIN_COMUGTPD, 20606 IX86_BUILTIN_COMUGEPD, 20607 IX86_BUILTIN_COMORDPD, 20608 IX86_BUILTIN_COMUNORDPD, 20609 IX86_BUILTIN_COMFALSEPD, 20610 IX86_BUILTIN_COMTRUEPD, 20611 20612 IX86_BUILTIN_PCOMEQUB, 20613 IX86_BUILTIN_PCOMNEUB, 20614 IX86_BUILTIN_PCOMLTUB, 20615 IX86_BUILTIN_PCOMLEUB, 20616 IX86_BUILTIN_PCOMGTUB, 20617 IX86_BUILTIN_PCOMGEUB, 20618 IX86_BUILTIN_PCOMFALSEUB, 20619 IX86_BUILTIN_PCOMTRUEUB, 20620 IX86_BUILTIN_PCOMEQUW, 20621 IX86_BUILTIN_PCOMNEUW, 20622 IX86_BUILTIN_PCOMLTUW, 20623 IX86_BUILTIN_PCOMLEUW, 20624 IX86_BUILTIN_PCOMGTUW, 20625 IX86_BUILTIN_PCOMGEUW, 20626 IX86_BUILTIN_PCOMFALSEUW, 20627 IX86_BUILTIN_PCOMTRUEUW, 20628 IX86_BUILTIN_PCOMEQUD, 20629 IX86_BUILTIN_PCOMNEUD, 20630 IX86_BUILTIN_PCOMLTUD, 20631 IX86_BUILTIN_PCOMLEUD, 20632 IX86_BUILTIN_PCOMGTUD, 20633 IX86_BUILTIN_PCOMGEUD, 20634 IX86_BUILTIN_PCOMFALSEUD, 20635 IX86_BUILTIN_PCOMTRUEUD, 20636 IX86_BUILTIN_PCOMEQUQ, 20637 IX86_BUILTIN_PCOMNEUQ, 20638 IX86_BUILTIN_PCOMLTUQ, 20639 IX86_BUILTIN_PCOMLEUQ, 20640 IX86_BUILTIN_PCOMGTUQ, 20641 IX86_BUILTIN_PCOMGEUQ, 20642 IX86_BUILTIN_PCOMFALSEUQ, 20643 IX86_BUILTIN_PCOMTRUEUQ, 20644 20645 IX86_BUILTIN_PCOMEQB, 20646 IX86_BUILTIN_PCOMNEB, 20647 IX86_BUILTIN_PCOMLTB, 20648 IX86_BUILTIN_PCOMLEB, 20649 IX86_BUILTIN_PCOMGTB, 20650 IX86_BUILTIN_PCOMGEB, 20651 IX86_BUILTIN_PCOMFALSEB, 20652 IX86_BUILTIN_PCOMTRUEB, 20653 IX86_BUILTIN_PCOMEQW, 20654 IX86_BUILTIN_PCOMNEW, 20655 IX86_BUILTIN_PCOMLTW, 20656 IX86_BUILTIN_PCOMLEW, 20657 IX86_BUILTIN_PCOMGTW, 20658 IX86_BUILTIN_PCOMGEW, 20659 IX86_BUILTIN_PCOMFALSEW, 20660 IX86_BUILTIN_PCOMTRUEW, 20661 IX86_BUILTIN_PCOMEQD, 20662 IX86_BUILTIN_PCOMNED, 20663 IX86_BUILTIN_PCOMLTD, 20664 IX86_BUILTIN_PCOMLED, 20665 IX86_BUILTIN_PCOMGTD, 20666 IX86_BUILTIN_PCOMGED, 20667 IX86_BUILTIN_PCOMFALSED, 20668 IX86_BUILTIN_PCOMTRUED, 20669 IX86_BUILTIN_PCOMEQQ, 20670 IX86_BUILTIN_PCOMNEQ, 20671 IX86_BUILTIN_PCOMLTQ, 20672 IX86_BUILTIN_PCOMLEQ, 20673 IX86_BUILTIN_PCOMGTQ, 20674 IX86_BUILTIN_PCOMGEQ, 20675 IX86_BUILTIN_PCOMFALSEQ, 20676 IX86_BUILTIN_PCOMTRUEQ, 20677 20678 IX86_BUILTIN_MAX 20679 }; 20680 20681 /* Table for the ix86 builtin decls. */ 20682 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX]; 20683 20684 /* Table of all of the builtin functions that are possible with different ISA's 20685 but are waiting to be built until a function is declared to use that 20686 ISA. */ 20687 struct builtin_isa GTY(()) 20688 { 20689 tree type; /* builtin type to use in the declaration */ 20690 const char *name; /* function name */ 20691 int isa; /* isa_flags this builtin is defined for */ 20692 bool const_p; /* true if the declaration is constant */ 20693 }; 20694 20695 static GTY(()) struct builtin_isa ix86_builtins_isa[(int) IX86_BUILTIN_MAX]; 20696 20697 20698 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Save the MASK 20699 * of which isa_flags to use in the ix86_builtins_isa array. Stores the 20700 * function decl in the ix86_builtins array. Returns the function decl or 20701 * NULL_TREE, if the builtin was not added. 20702 * 20703 * If the front end has a special hook for builtin functions, delay adding 20704 * builtin functions that aren't in the current ISA until the ISA is changed 20705 * with function specific optimization. Doing so, can save about 300K for the 20706 * default compiler. When the builtin is expanded, check at that time whether 20707 * it is valid. 20708 * 20709 * If the front end doesn't have a special hook, record all builtins, even if 20710 * it isn't an instruction set in the current ISA in case the user uses 20711 * function specific options for a different ISA, so that we don't get scope 20712 * errors if a builtin is added in the middle of a function scope. */ 20713 20714 static inline tree 20715 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code) 20716 { 20717 tree decl = NULL_TREE; 20718 20719 if (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT) 20720 { 20721 ix86_builtins_isa[(int) code].isa = mask; 20722 20723 if ((mask & ix86_isa_flags) != 0 20724 || (lang_hooks.builtin_function 20725 == lang_hooks.builtin_function_ext_scope)) 20726 20727 { 20728 decl = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, 20729 NULL_TREE); 20730 ix86_builtins[(int) code] = decl; 20731 ix86_builtins_isa[(int) code].type = NULL_TREE; 20732 } 20733 else 20734 { 20735 ix86_builtins[(int) code] = NULL_TREE; 20736 ix86_builtins_isa[(int) code].const_p = false; 20737 ix86_builtins_isa[(int) code].type = type; 20738 ix86_builtins_isa[(int) code].name = name; 20739 } 20740 } 20741 20742 return decl; 20743 } 20744 20745 /* Like def_builtin, but also marks the function decl "const". */ 20746 20747 static inline tree 20748 def_builtin_const (int mask, const char *name, tree type, 20749 enum ix86_builtins code) 20750 { 20751 tree decl = def_builtin (mask, name, type, code); 20752 if (decl) 20753 TREE_READONLY (decl) = 1; 20754 else 20755 ix86_builtins_isa[(int) code].const_p = true; 20756 20757 return decl; 20758 } 20759 20760 /* Add any new builtin functions for a given ISA that may not have been 20761 declared. This saves a bit of space compared to adding all of the 20762 declarations to the tree, even if we didn't use them. */ 20763 20764 static void 20765 ix86_add_new_builtins (int isa) 20766 { 20767 int i; 20768 tree decl; 20769 20770 for (i = 0; i < (int)IX86_BUILTIN_MAX; i++) 20771 { 20772 if ((ix86_builtins_isa[i].isa & isa) != 0 20773 && ix86_builtins_isa[i].type != NULL_TREE) 20774 { 20775 decl = add_builtin_function_ext_scope (ix86_builtins_isa[i].name, 20776 ix86_builtins_isa[i].type, 20777 i, BUILT_IN_MD, NULL, 20778 NULL_TREE); 20779 20780 ix86_builtins[i] = decl; 20781 ix86_builtins_isa[i].type = NULL_TREE; 20782 if (ix86_builtins_isa[i].const_p) 20783 TREE_READONLY (decl) = 1; 20784 } 20785 } 20786 } 20787 20788 /* Bits for builtin_description.flag. */ 20789 20790 /* Set when we don't support the comparison natively, and should 20791 swap_comparison in order to support it. */ 20792 #define BUILTIN_DESC_SWAP_OPERANDS 1 20793 20794 struct builtin_description 20795 { 20796 const unsigned int mask; 20797 const enum insn_code icode; 20798 const char *const name; 20799 const enum ix86_builtins code; 20800 const enum rtx_code comparison; 20801 const int flag; 20802 }; 20803 20804 static const struct builtin_description bdesc_comi[] = 20805 { 20806 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 }, 20807 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 }, 20808 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 }, 20809 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 }, 20810 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 }, 20811 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 }, 20812 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 }, 20813 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 }, 20814 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 }, 20815 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 }, 20816 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 }, 20817 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 }, 20818 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 }, 20819 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 }, 20820 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 }, 20821 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 }, 20822 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 }, 20823 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 }, 20824 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 }, 20825 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 }, 20826 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 }, 20827 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 }, 20828 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 }, 20829 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 }, 20830 }; 20831 20832 static const struct builtin_description bdesc_pcmpestr[] = 20833 { 20834 /* SSE4.2 */ 20835 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 }, 20836 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 }, 20837 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode }, 20838 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode }, 20839 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode }, 20840 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode }, 20841 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode }, 20842 }; 20843 20844 static const struct builtin_description bdesc_pcmpistr[] = 20845 { 20846 /* SSE4.2 */ 20847 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 }, 20848 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 }, 20849 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode }, 20850 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode }, 20851 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode }, 20852 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode }, 20853 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode }, 20854 }; 20855 20856 /* Special builtin types */ 20857 enum ix86_special_builtin_type 20858 { 20859 SPECIAL_FTYPE_UNKNOWN, 20860 VOID_FTYPE_VOID, 20861 V32QI_FTYPE_PCCHAR, 20862 V16QI_FTYPE_PCCHAR, 20863 V8SF_FTYPE_PCV4SF, 20864 V8SF_FTYPE_PCFLOAT, 20865 V4DF_FTYPE_PCV2DF, 20866 V4DF_FTYPE_PCDOUBLE, 20867 V4SF_FTYPE_PCFLOAT, 20868 V2DF_FTYPE_PCDOUBLE, 20869 V8SF_FTYPE_PCV8SF_V8SF, 20870 V4DF_FTYPE_PCV4DF_V4DF, 20871 V4SF_FTYPE_V4SF_PCV2SF, 20872 V4SF_FTYPE_PCV4SF_V4SF, 20873 V2DF_FTYPE_V2DF_PCDOUBLE, 20874 V2DF_FTYPE_PCV2DF_V2DF, 20875 V2DI_FTYPE_PV2DI, 20876 VOID_FTYPE_PV2SF_V4SF, 20877 VOID_FTYPE_PV4DI_V4DI, 20878 VOID_FTYPE_PV2DI_V2DI, 20879 VOID_FTYPE_PCHAR_V32QI, 20880 VOID_FTYPE_PCHAR_V16QI, 20881 VOID_FTYPE_PFLOAT_V8SF, 20882 VOID_FTYPE_PFLOAT_V4SF, 20883 VOID_FTYPE_PDOUBLE_V4DF, 20884 VOID_FTYPE_PDOUBLE_V2DF, 20885 VOID_FTYPE_PDI_DI, 20886 VOID_FTYPE_PINT_INT, 20887 VOID_FTYPE_PV8SF_V8SF_V8SF, 20888 VOID_FTYPE_PV4DF_V4DF_V4DF, 20889 VOID_FTYPE_PV4SF_V4SF_V4SF, 20890 VOID_FTYPE_PV2DF_V2DF_V2DF 20891 }; 20892 20893 /* Builtin types */ 20894 enum ix86_builtin_type 20895 { 20896 FTYPE_UNKNOWN, 20897 FLOAT128_FTYPE_FLOAT128, 20898 FLOAT_FTYPE_FLOAT, 20899 FLOAT128_FTYPE_FLOAT128_FLOAT128, 20900 INT_FTYPE_V8SF_V8SF_PTEST, 20901 INT_FTYPE_V4DI_V4DI_PTEST, 20902 INT_FTYPE_V4DF_V4DF_PTEST, 20903 INT_FTYPE_V4SF_V4SF_PTEST, 20904 INT_FTYPE_V2DI_V2DI_PTEST, 20905 INT_FTYPE_V2DF_V2DF_PTEST, 20906 INT64_FTYPE_V4SF, 20907 INT64_FTYPE_V2DF, 20908 INT_FTYPE_V16QI, 20909 INT_FTYPE_V8QI, 20910 INT_FTYPE_V8SF, 20911 INT_FTYPE_V4DF, 20912 INT_FTYPE_V4SF, 20913 INT_FTYPE_V2DF, 20914 V16QI_FTYPE_V16QI, 20915 V8SI_FTYPE_V8SF, 20916 V8SI_FTYPE_V4SI, 20917 V8HI_FTYPE_V8HI, 20918 V8HI_FTYPE_V16QI, 20919 V8QI_FTYPE_V8QI, 20920 V8SF_FTYPE_V8SF, 20921 V8SF_FTYPE_V8SI, 20922 V8SF_FTYPE_V4SF, 20923 V4SI_FTYPE_V4SI, 20924 V4SI_FTYPE_V16QI, 20925 V4SI_FTYPE_V8SI, 20926 V4SI_FTYPE_V8HI, 20927 V4SI_FTYPE_V4DF, 20928 V4SI_FTYPE_V4SF, 20929 V4SI_FTYPE_V2DF, 20930 V4HI_FTYPE_V4HI, 20931 V4DF_FTYPE_V4DF, 20932 V4DF_FTYPE_V4SI, 20933 V4DF_FTYPE_V4SF, 20934 V4DF_FTYPE_V2DF, 20935 V4SF_FTYPE_V4DF, 20936 V4SF_FTYPE_V4SF, 20937 V4SF_FTYPE_V4SF_VEC_MERGE, 20938 V4SF_FTYPE_V8SF, 20939 V4SF_FTYPE_V4SI, 20940 V4SF_FTYPE_V2DF, 20941 V2DI_FTYPE_V2DI, 20942 V2DI_FTYPE_V16QI, 20943 V2DI_FTYPE_V8HI, 20944 V2DI_FTYPE_V4SI, 20945 V2DF_FTYPE_V2DF, 20946 V2DF_FTYPE_V2DF_VEC_MERGE, 20947 V2DF_FTYPE_V4SI, 20948 V2DF_FTYPE_V4DF, 20949 V2DF_FTYPE_V4SF, 20950 V2DF_FTYPE_V2SI, 20951 V2SI_FTYPE_V2SI, 20952 V2SI_FTYPE_V4SF, 20953 V2SI_FTYPE_V2SF, 20954 V2SI_FTYPE_V2DF, 20955 V2SF_FTYPE_V2SF, 20956 V2SF_FTYPE_V2SI, 20957 V16QI_FTYPE_V16QI_V16QI, 20958 V16QI_FTYPE_V8HI_V8HI, 20959 V8QI_FTYPE_V8QI_V8QI, 20960 V8QI_FTYPE_V4HI_V4HI, 20961 V8HI_FTYPE_V8HI_V8HI, 20962 V8HI_FTYPE_V8HI_V8HI_COUNT, 20963 V8HI_FTYPE_V16QI_V16QI, 20964 V8HI_FTYPE_V4SI_V4SI, 20965 V8HI_FTYPE_V8HI_SI_COUNT, 20966 V8SF_FTYPE_V8SF_V8SF, 20967 V8SF_FTYPE_V8SF_V8SI, 20968 V4SI_FTYPE_V4SI_V4SI, 20969 V4SI_FTYPE_V4SI_V4SI_COUNT, 20970 V4SI_FTYPE_V8HI_V8HI, 20971 V4SI_FTYPE_V4SF_V4SF, 20972 V4SI_FTYPE_V2DF_V2DF, 20973 V4SI_FTYPE_V4SI_SI_COUNT, 20974 V4HI_FTYPE_V4HI_V4HI, 20975 V4HI_FTYPE_V4HI_V4HI_COUNT, 20976 V4HI_FTYPE_V8QI_V8QI, 20977 V4HI_FTYPE_V2SI_V2SI, 20978 V4HI_FTYPE_V4HI_SI_COUNT, 20979 V4DF_FTYPE_V4DF_V4DF, 20980 V4DF_FTYPE_V4DF_V4DI, 20981 V4SF_FTYPE_V4SF_V4SF, 20982 V4SF_FTYPE_V4SF_V4SF_SWAP, 20983 V4SF_FTYPE_V4SF_V4SI, 20984 V4SF_FTYPE_V4SF_V2SI, 20985 V4SF_FTYPE_V4SF_V2DF, 20986 V4SF_FTYPE_V4SF_DI, 20987 V4SF_FTYPE_V4SF_SI, 20988 V2DI_FTYPE_V2DI_V2DI, 20989 V2DI_FTYPE_V2DI_V2DI_COUNT, 20990 V2DI_FTYPE_V16QI_V16QI, 20991 V2DI_FTYPE_V4SI_V4SI, 20992 V2DI_FTYPE_V2DI_V16QI, 20993 V2DI_FTYPE_V2DF_V2DF, 20994 V2DI_FTYPE_V2DI_SI_COUNT, 20995 V2SI_FTYPE_V2SI_V2SI, 20996 V2SI_FTYPE_V2SI_V2SI_COUNT, 20997 V2SI_FTYPE_V4HI_V4HI, 20998 V2SI_FTYPE_V2SF_V2SF, 20999 V2SI_FTYPE_V2SI_SI_COUNT, 21000 V2DF_FTYPE_V2DF_V2DF, 21001 V2DF_FTYPE_V2DF_V2DF_SWAP, 21002 V2DF_FTYPE_V2DF_V4SF, 21003 V2DF_FTYPE_V2DF_V2DI, 21004 V2DF_FTYPE_V2DF_DI, 21005 V2DF_FTYPE_V2DF_SI, 21006 V2SF_FTYPE_V2SF_V2SF, 21007 V1DI_FTYPE_V1DI_V1DI, 21008 V1DI_FTYPE_V1DI_V1DI_COUNT, 21009 V1DI_FTYPE_V8QI_V8QI, 21010 V1DI_FTYPE_V2SI_V2SI, 21011 V1DI_FTYPE_V1DI_SI_COUNT, 21012 UINT64_FTYPE_UINT64_UINT64, 21013 UINT_FTYPE_UINT_UINT, 21014 UINT_FTYPE_UINT_USHORT, 21015 UINT_FTYPE_UINT_UCHAR, 21016 V8HI_FTYPE_V8HI_INT, 21017 V4SI_FTYPE_V4SI_INT, 21018 V4HI_FTYPE_V4HI_INT, 21019 V8SF_FTYPE_V8SF_INT, 21020 V4SI_FTYPE_V8SI_INT, 21021 V4SF_FTYPE_V8SF_INT, 21022 V2DF_FTYPE_V4DF_INT, 21023 V4DF_FTYPE_V4DF_INT, 21024 V4SF_FTYPE_V4SF_INT, 21025 V2DI_FTYPE_V2DI_INT, 21026 V2DI2TI_FTYPE_V2DI_INT, 21027 V2DF_FTYPE_V2DF_INT, 21028 V16QI_FTYPE_V16QI_V16QI_V16QI, 21029 V8SF_FTYPE_V8SF_V8SF_V8SF, 21030 V4DF_FTYPE_V4DF_V4DF_V4DF, 21031 V4SF_FTYPE_V4SF_V4SF_V4SF, 21032 V2DF_FTYPE_V2DF_V2DF_V2DF, 21033 V16QI_FTYPE_V16QI_V16QI_INT, 21034 V8SI_FTYPE_V8SI_V8SI_INT, 21035 V8SI_FTYPE_V8SI_V4SI_INT, 21036 V8HI_FTYPE_V8HI_V8HI_INT, 21037 V8SF_FTYPE_V8SF_V8SF_INT, 21038 V8SF_FTYPE_V8SF_V4SF_INT, 21039 V4SI_FTYPE_V4SI_V4SI_INT, 21040 V4DF_FTYPE_V4DF_V4DF_INT, 21041 V4DF_FTYPE_V4DF_V2DF_INT, 21042 V4SF_FTYPE_V4SF_V4SF_INT, 21043 V2DI_FTYPE_V2DI_V2DI_INT, 21044 V2DI2TI_FTYPE_V2DI_V2DI_INT, 21045 V1DI2DI_FTYPE_V1DI_V1DI_INT, 21046 V2DF_FTYPE_V2DF_V2DF_INT, 21047 V2DI_FTYPE_V2DI_UINT_UINT, 21048 V2DI_FTYPE_V2DI_V2DI_UINT_UINT 21049 }; 21050 21051 /* Special builtins with variable number of arguments. */ 21052 static const struct builtin_description bdesc_special_args[] = 21053 { 21054 /* MMX */ 21055 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID }, 21056 21057 /* 3DNow! */ 21058 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_femms, "__builtin_ia32_femms", IX86_BUILTIN_FEMMS, UNKNOWN, (int) VOID_FTYPE_VOID }, 21059 21060 /* SSE */ 21061 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_storeups", IX86_BUILTIN_STOREUPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, 21062 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movntv4sf, "__builtin_ia32_movntps", IX86_BUILTIN_MOVNTPS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, 21063 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movups, "__builtin_ia32_loadups", IX86_BUILTIN_LOADUPS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT }, 21064 21065 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadhps_exp, "__builtin_ia32_loadhps", IX86_BUILTIN_LOADHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF }, 21066 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_loadlps_exp, "__builtin_ia32_loadlps", IX86_BUILTIN_LOADLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_PCV2SF }, 21067 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storehps, "__builtin_ia32_storehps", IX86_BUILTIN_STOREHPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF }, 21068 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX86_BUILTIN_STORELPS, UNKNOWN, (int) VOID_FTYPE_PV2SF_V4SF }, 21069 21070 /* SSE or 3DNow!A */ 21071 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, 21072 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntdi, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PDI_DI }, 21073 21074 /* SSE2 */ 21075 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, 21076 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_mfence, 0, IX86_BUILTIN_MFENCE, UNKNOWN, (int) VOID_FTYPE_VOID }, 21077 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_storeupd", IX86_BUILTIN_STOREUPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, 21078 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_storedqu", IX86_BUILTIN_STOREDQU, UNKNOWN, (int) VOID_FTYPE_PCHAR_V16QI }, 21079 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2df, "__builtin_ia32_movntpd", IX86_BUILTIN_MOVNTPD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, 21080 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntv2di, "__builtin_ia32_movntdq", IX86_BUILTIN_MOVNTDQ, UNKNOWN, (int) VOID_FTYPE_PV2DI_V2DI }, 21081 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movntsi, "__builtin_ia32_movnti", IX86_BUILTIN_MOVNTI, UNKNOWN, (int) VOID_FTYPE_PINT_INT }, 21082 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movupd, "__builtin_ia32_loadupd", IX86_BUILTIN_LOADUPD, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE }, 21083 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movdqu, "__builtin_ia32_loaddqu", IX86_BUILTIN_LOADDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR }, 21084 21085 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadhpd_exp, "__builtin_ia32_loadhpd", IX86_BUILTIN_LOADHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE }, 21086 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_loadlpd_exp, "__builtin_ia32_loadlpd", IX86_BUILTIN_LOADLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_PCDOUBLE }, 21087 21088 /* SSE3 */ 21089 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_lddqu, "__builtin_ia32_lddqu", IX86_BUILTIN_LDDQU, UNKNOWN, (int) V16QI_FTYPE_PCCHAR }, 21090 21091 /* SSE4.1 */ 21092 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_movntdqa, "__builtin_ia32_movntdqa", IX86_BUILTIN_MOVNTDQA, UNKNOWN, (int) V2DI_FTYPE_PV2DI }, 21093 21094 /* SSE4A */ 21095 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv2df, "__builtin_ia32_movntsd", IX86_BUILTIN_MOVNTSD, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF }, 21096 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_vmmovntv4sf, "__builtin_ia32_movntss", IX86_BUILTIN_MOVNTSS, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V4SF }, 21097 21098 /* AVX */ 21099 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroall, "__builtin_ia32_vzeroall", IX86_BUILTIN_VZEROALL, UNKNOWN, (int) VOID_FTYPE_VOID }, 21100 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vzeroupper, 0, IX86_BUILTIN_VZEROUPPER, UNKNOWN, (int) VOID_FTYPE_VOID }, 21101 { OPTION_MASK_ISA_AVX | OPTION_MASK_ISA_64BIT, CODE_FOR_avx_vzeroupper_rex64, 0, IX86_BUILTIN_VZEROUPPER_REX64, UNKNOWN, (int) VOID_FTYPE_VOID }, 21102 21103 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss, "__builtin_ia32_vbroadcastss", IX86_BUILTIN_VBROADCASTSS, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT }, 21104 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastsd256, "__builtin_ia32_vbroadcastsd256", IX86_BUILTIN_VBROADCASTSD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE }, 21105 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastss256, "__builtin_ia32_vbroadcastss256", IX86_BUILTIN_VBROADCASTSS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT }, 21106 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_pd256, "__builtin_ia32_vbroadcastf128_pd256", IX86_BUILTIN_VBROADCASTPD256, UNKNOWN, (int) V4DF_FTYPE_PCV2DF }, 21107 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vbroadcastf128_ps256, "__builtin_ia32_vbroadcastf128_ps256", IX86_BUILTIN_VBROADCASTPS256, UNKNOWN, (int) V8SF_FTYPE_PCV4SF }, 21108 21109 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_loadupd256", IX86_BUILTIN_LOADUPD256, UNKNOWN, (int) V4DF_FTYPE_PCDOUBLE }, 21110 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_loadups256", IX86_BUILTIN_LOADUPS256, UNKNOWN, (int) V8SF_FTYPE_PCFLOAT }, 21111 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movupd256, "__builtin_ia32_storeupd256", IX86_BUILTIN_STOREUPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF }, 21112 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movups256, "__builtin_ia32_storeups256", IX86_BUILTIN_STOREUPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF }, 21113 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_loaddqu256", IX86_BUILTIN_LOADDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, 21114 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movdqu256, "__builtin_ia32_storedqu256", IX86_BUILTIN_STOREDQU256, UNKNOWN, (int) VOID_FTYPE_PCHAR_V32QI }, 21115 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_lddqu256, "__builtin_ia32_lddqu256", IX86_BUILTIN_LDDQU256, UNKNOWN, (int) V32QI_FTYPE_PCCHAR }, 21116 21117 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4di, "__builtin_ia32_movntdq256", IX86_BUILTIN_MOVNTDQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI }, 21118 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv4df, "__builtin_ia32_movntpd256", IX86_BUILTIN_MOVNTPD256, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V4DF }, 21119 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movntv8sf, "__builtin_ia32_movntps256", IX86_BUILTIN_MOVNTPS256, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V8SF }, 21120 21121 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd, "__builtin_ia32_maskloadpd", IX86_BUILTIN_MASKLOADPD, UNKNOWN, (int) V2DF_FTYPE_PCV2DF_V2DF }, 21122 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps, "__builtin_ia32_maskloadps", IX86_BUILTIN_MASKLOADPS, UNKNOWN, (int) V4SF_FTYPE_PCV4SF_V4SF }, 21123 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadpd256, "__builtin_ia32_maskloadpd256", IX86_BUILTIN_MASKLOADPD256, UNKNOWN, (int) V4DF_FTYPE_PCV4DF_V4DF }, 21124 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskloadps256, "__builtin_ia32_maskloadps256", IX86_BUILTIN_MASKLOADPS256, UNKNOWN, (int) V8SF_FTYPE_PCV8SF_V8SF }, 21125 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd, "__builtin_ia32_maskstorepd", IX86_BUILTIN_MASKSTOREPD, UNKNOWN, (int) VOID_FTYPE_PV2DF_V2DF_V2DF }, 21126 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps, "__builtin_ia32_maskstoreps", IX86_BUILTIN_MASKSTOREPS, UNKNOWN, (int) VOID_FTYPE_PV4SF_V4SF_V4SF }, 21127 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstorepd256, "__builtin_ia32_maskstorepd256", IX86_BUILTIN_MASKSTOREPD256, UNKNOWN, (int) VOID_FTYPE_PV4DF_V4DF_V4DF }, 21128 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_maskstoreps256, "__builtin_ia32_maskstoreps256", IX86_BUILTIN_MASKSTOREPS256, UNKNOWN, (int) VOID_FTYPE_PV8SF_V8SF_V8SF }, 21129 }; 21130 21131 /* Builtins with variable number of arguments. */ 21132 static const struct builtin_description bdesc_args[] = 21133 { 21134 /* MMX */ 21135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21138 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21139 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21141 21142 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21143 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21150 21151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21152 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21153 21154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21155 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andnotv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21156 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21158 21159 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21161 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21162 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21163 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21164 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21165 21166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21168 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21170 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI}, 21171 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI}, 21172 21173 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, "__builtin_ia32_packsswb", IX86_BUILTIN_PACKSSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI }, 21174 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, "__builtin_ia32_packssdw", IX86_BUILTIN_PACKSSDW, UNKNOWN, (int) V4HI_FTYPE_V2SI_V2SI }, 21175 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, "__builtin_ia32_packuswb", IX86_BUILTIN_PACKUSWB, UNKNOWN, (int) V8QI_FTYPE_V4HI_V4HI }, 21176 21177 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, "__builtin_ia32_pmaddwd", IX86_BUILTIN_PMADDWD, UNKNOWN, (int) V2SI_FTYPE_V4HI_V4HI }, 21178 21179 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllwi", IX86_BUILTIN_PSLLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT }, 21180 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslldi", IX86_BUILTIN_PSLLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT }, 21181 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllqi", IX86_BUILTIN_PSLLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT }, 21182 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, "__builtin_ia32_psllw", IX86_BUILTIN_PSLLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT }, 21183 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, "__builtin_ia32_pslld", IX86_BUILTIN_PSLLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT }, 21184 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv1di3, "__builtin_ia32_psllq", IX86_BUILTIN_PSLLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT }, 21185 21186 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlwi", IX86_BUILTIN_PSRLWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT }, 21187 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrldi", IX86_BUILTIN_PSRLDI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT }, 21188 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlqi", IX86_BUILTIN_PSRLQI, UNKNOWN, (int) V1DI_FTYPE_V1DI_SI_COUNT }, 21189 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, "__builtin_ia32_psrlw", IX86_BUILTIN_PSRLW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT }, 21190 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, "__builtin_ia32_psrld", IX86_BUILTIN_PSRLD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT }, 21191 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv1di3, "__builtin_ia32_psrlq", IX86_BUILTIN_PSRLQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_COUNT }, 21192 21193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psrawi", IX86_BUILTIN_PSRAWI, UNKNOWN, (int) V4HI_FTYPE_V4HI_SI_COUNT }, 21194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psradi", IX86_BUILTIN_PSRADI, UNKNOWN, (int) V2SI_FTYPE_V2SI_SI_COUNT }, 21195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, "__builtin_ia32_psraw", IX86_BUILTIN_PSRAW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI_COUNT }, 21196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, "__builtin_ia32_psrad", IX86_BUILTIN_PSRAD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI_COUNT }, 21197 21198 /* 3DNow! */ 21199 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pf2id, "__builtin_ia32_pf2id", IX86_BUILTIN_PF2ID, UNKNOWN, (int) V2SI_FTYPE_V2SF }, 21200 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_floatv2si2, "__builtin_ia32_pi2fd", IX86_BUILTIN_PI2FD, UNKNOWN, (int) V2SF_FTYPE_V2SI }, 21201 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpv2sf2, "__builtin_ia32_pfrcp", IX86_BUILTIN_PFRCP, UNKNOWN, (int) V2SF_FTYPE_V2SF }, 21202 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqrtv2sf2, "__builtin_ia32_pfrsqrt", IX86_BUILTIN_PFRSQRT, UNKNOWN, (int) V2SF_FTYPE_V2SF }, 21203 21204 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgusb", IX86_BUILTIN_PAVGUSB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21205 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_haddv2sf3, "__builtin_ia32_pfacc", IX86_BUILTIN_PFACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21206 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_addv2sf3, "__builtin_ia32_pfadd", IX86_BUILTIN_PFADD, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21207 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_eqv2sf3, "__builtin_ia32_pfcmpeq", IX86_BUILTIN_PFCMPEQ, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF }, 21208 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gev2sf3, "__builtin_ia32_pfcmpge", IX86_BUILTIN_PFCMPGE, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF }, 21209 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_gtv2sf3, "__builtin_ia32_pfcmpgt", IX86_BUILTIN_PFCMPGT, UNKNOWN, (int) V2SI_FTYPE_V2SF_V2SF }, 21210 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_smaxv2sf3, "__builtin_ia32_pfmax", IX86_BUILTIN_PFMAX, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21211 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_sminv2sf3, "__builtin_ia32_pfmin", IX86_BUILTIN_PFMIN, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21212 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_mulv2sf3, "__builtin_ia32_pfmul", IX86_BUILTIN_PFMUL, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21213 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit1v2sf3, "__builtin_ia32_pfrcpit1", IX86_BUILTIN_PFRCPIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21214 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rcpit2v2sf3, "__builtin_ia32_pfrcpit2", IX86_BUILTIN_PFRCPIT2, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21215 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_rsqit1v2sf3, "__builtin_ia32_pfrsqit1", IX86_BUILTIN_PFRSQIT1, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21216 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subv2sf3, "__builtin_ia32_pfsub", IX86_BUILTIN_PFSUB, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21217 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_subrv2sf3, "__builtin_ia32_pfsubr", IX86_BUILTIN_PFSUBR, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21218 { OPTION_MASK_ISA_3DNOW, CODE_FOR_mmx_pmulhrwv4hi3, "__builtin_ia32_pmulhrw", IX86_BUILTIN_PMULHRW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21219 21220 /* 3DNow!A */ 21221 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pf2iw, "__builtin_ia32_pf2iw", IX86_BUILTIN_PF2IW, UNKNOWN, (int) V2SI_FTYPE_V2SF }, 21222 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pi2fw, "__builtin_ia32_pi2fw", IX86_BUILTIN_PI2FW, UNKNOWN, (int) V2SF_FTYPE_V2SI }, 21223 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2si2, "__builtin_ia32_pswapdsi", IX86_BUILTIN_PSWAPDSI, UNKNOWN, (int) V2SI_FTYPE_V2SI }, 21224 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pswapdv2sf2, "__builtin_ia32_pswapdsf", IX86_BUILTIN_PSWAPDSF, UNKNOWN, (int) V2SF_FTYPE_V2SF }, 21225 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_hsubv2sf3, "__builtin_ia32_pfnacc", IX86_BUILTIN_PFNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21226 { OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_addsubv2sf3, "__builtin_ia32_pfpnacc", IX86_BUILTIN_PFPNACC, UNKNOWN, (int) V2SF_FTYPE_V2SF_V2SF }, 21227 21228 /* SSE */ 21229 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, "__builtin_ia32_movmskps", IX86_BUILTIN_MOVMSKPS, UNKNOWN, (int) INT_FTYPE_V4SF }, 21230 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_sqrtv4sf2, "__builtin_ia32_sqrtps", IX86_BUILTIN_SQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF }, 21231 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86_BUILTIN_SQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF }, 21232 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF }, 21233 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF }, 21234 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF }, 21235 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF }, 21236 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF }, 21237 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF }, 21238 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF }, 21239 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF }, 21240 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF }, 21241 21242 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_shufps, "__builtin_ia32_shufps", IX86_BUILTIN_SHUFPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, 21243 21244 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21245 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21246 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21247 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21248 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21249 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21250 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21251 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21252 21253 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, (int) V4SF_FTYPE_V4SF_V4SF }, 21254 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF }, 21255 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF }, 21256 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, 21257 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, 21258 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF }, 21259 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, (int) V4SF_FTYPE_V4SF_V4SF }, 21260 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF }, 21261 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF }, 21262 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, 21263 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP}, 21264 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF }, 21265 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, (int) V4SF_FTYPE_V4SF_V4SF }, 21266 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, (int) V4SF_FTYPE_V4SF_V4SF }, 21267 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, (int) V4SF_FTYPE_V4SF_V4SF }, 21268 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, (int) V4SF_FTYPE_V4SF_V4SF }, 21269 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF }, 21270 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF }, 21271 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF }, 21272 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, 21273 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP }, 21274 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF }, 21275 21276 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21277 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21278 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21279 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21280 21281 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21282 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_andnotv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21283 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21284 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21285 21286 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21287 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps_exp, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21288 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21289 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21290 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21291 21292 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI }, 21293 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI }, 21294 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI }, 21295 21296 { OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtsf2, "__builtin_ia32_rsqrtf", IX86_BUILTIN_RSQRTF, UNKNOWN, (int) FLOAT_FTYPE_FLOAT }, 21297 21298 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsqrtv4sf2, "__builtin_ia32_sqrtss", IX86_BUILTIN_SQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE }, 21299 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrsqrtv4sf2, "__builtin_ia32_rsqrtss", IX86_BUILTIN_RSQRTSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE }, 21300 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmrcpv4sf2, "__builtin_ia32_rcpss", IX86_BUILTIN_RCPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_VEC_MERGE }, 21301 21302 /* SSE MMX or 3Dnow!A */ 21303 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21304 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21305 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21306 21307 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21308 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21309 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21310 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21311 21312 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI }, 21313 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI }, 21314 21315 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT }, 21316 21317 /* SSE2 */ 21318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, 21319 21320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, "__builtin_ia32_movmskpd", IX86_BUILTIN_MOVMSKPD, UNKNOWN, (int) INT_FTYPE_V2DF }, 21321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, "__builtin_ia32_pmovmskb128", IX86_BUILTIN_PMOVMSKB128, UNKNOWN, (int) INT_FTYPE_V16QI }, 21322 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, "__builtin_ia32_sqrtpd", IX86_BUILTIN_SQRTPD, UNKNOWN, (int) V2DF_FTYPE_V2DF }, 21323 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd", IX86_BUILTIN_CVTDQ2PD, UNKNOWN, (int) V2DF_FTYPE_V4SI }, 21324 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI }, 21325 21326 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF }, 21327 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF }, 21328 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF }, 21329 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF }, 21330 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF }, 21331 21332 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI }, 21333 21334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF }, 21335 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF }, 21336 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, "__builtin_ia32_cvtsd2si64", IX86_BUILTIN_CVTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF }, 21337 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, "__builtin_ia32_cvttsd2si64", IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, (int) INT64_FTYPE_V2DF }, 21338 21339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, "__builtin_ia32_cvtps2dq", IX86_BUILTIN_CVTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF }, 21340 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, "__builtin_ia32_cvtps2pd", IX86_BUILTIN_CVTPS2PD, UNKNOWN, (int) V2DF_FTYPE_V4SF }, 21341 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, "__builtin_ia32_cvttps2dq", IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, (int) V4SI_FTYPE_V4SF }, 21342 21343 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21344 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21345 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21346 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21347 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21348 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21349 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21350 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21351 21352 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, (int) V2DF_FTYPE_V2DF_V2DF }, 21353 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF }, 21354 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF }, 21355 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP }, 21356 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP}, 21357 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF }, 21358 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, (int) V2DF_FTYPE_V2DF_V2DF }, 21359 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF }, 21360 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF }, 21361 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF_SWAP }, 21362 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF_SWAP }, 21363 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF }, 21364 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, (int) V2DF_FTYPE_V2DF_V2DF }, 21365 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, (int) V2DF_FTYPE_V2DF_V2DF }, 21366 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, (int) V2DF_FTYPE_V2DF_V2DF }, 21367 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, (int) V2DF_FTYPE_V2DF_V2DF }, 21368 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, (int) V2DF_FTYPE_V2DF_V2DF }, 21369 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, (int) V2DF_FTYPE_V2DF_V2DF }, 21370 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, (int) V2DF_FTYPE_V2DF_V2DF }, 21371 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, (int) V2DF_FTYPE_V2DF_V2DF }, 21372 21373 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21374 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21375 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21376 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21377 21378 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21379 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21380 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21381 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21382 21383 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21384 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd_exp, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21385 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd_exp, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21386 21387 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, (int) V4SI_FTYPE_V2DF_V2DF }, 21388 21389 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21390 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21391 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21392 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21393 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21394 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21395 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21396 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21397 21398 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21399 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21400 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21401 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21402 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21403 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21404 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21405 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21406 21407 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21408 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN,(int) V8HI_FTYPE_V8HI_V8HI }, 21409 21410 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21411 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_andnotv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21412 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21413 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21414 21415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21416 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21417 21418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21422 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21424 21425 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21426 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21427 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21428 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21429 21430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21433 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21434 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21435 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21436 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21437 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21438 21439 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI }, 21440 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI }, 21441 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, (int) V16QI_FTYPE_V8HI_V8HI }, 21442 21443 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21444 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI }, 21445 21446 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI }, 21447 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI }, 21448 21449 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI }, 21450 21451 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, "__builtin_ia32_cvtsi2sd", IX86_BUILTIN_CVTSI2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_SI }, 21452 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, "__builtin_ia32_cvtsi642sd", IX86_BUILTIN_CVTSI642SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_DI }, 21453 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, "__builtin_ia32_cvtsd2ss", IX86_BUILTIN_CVTSD2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF }, 21454 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, "__builtin_ia32_cvtss2sd", IX86_BUILTIN_CVTSS2SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V4SF }, 21455 21456 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ashlti3, "__builtin_ia32_pslldqi128", IX86_BUILTIN_PSLLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT }, 21457 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllwi128", IX86_BUILTIN_PSLLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT }, 21458 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslldi128", IX86_BUILTIN_PSLLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT }, 21459 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllqi128", IX86_BUILTIN_PSLLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT }, 21460 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, "__builtin_ia32_psllw128", IX86_BUILTIN_PSLLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT }, 21461 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, "__builtin_ia32_pslld128", IX86_BUILTIN_PSLLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT }, 21462 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, "__builtin_ia32_psllq128", IX86_BUILTIN_PSLLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT }, 21463 21464 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lshrti3, "__builtin_ia32_psrldqi128", IX86_BUILTIN_PSRLDQI128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_INT }, 21465 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlwi128", IX86_BUILTIN_PSRLWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT }, 21466 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrldi128", IX86_BUILTIN_PSRLDI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT }, 21467 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlqi128", IX86_BUILTIN_PSRLQI128, UNKNOWN, (int) V2DI_FTYPE_V2DI_SI_COUNT }, 21468 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, "__builtin_ia32_psrlw128", IX86_BUILTIN_PSRLW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT }, 21469 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, "__builtin_ia32_psrld128", IX86_BUILTIN_PSRLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT }, 21470 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, "__builtin_ia32_psrlq128", IX86_BUILTIN_PSRLQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_COUNT }, 21471 21472 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psrawi128", IX86_BUILTIN_PSRAWI128, UNKNOWN, (int) V8HI_FTYPE_V8HI_SI_COUNT }, 21473 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psradi128", IX86_BUILTIN_PSRADI128, UNKNOWN, (int) V4SI_FTYPE_V4SI_SI_COUNT }, 21474 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, "__builtin_ia32_psraw128", IX86_BUILTIN_PSRAW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_COUNT }, 21475 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, "__builtin_ia32_psrad128", IX86_BUILTIN_PSRAD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_COUNT }, 21476 21477 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufd, "__builtin_ia32_pshufd", IX86_BUILTIN_PSHUFD, UNKNOWN, (int) V4SI_FTYPE_V4SI_INT }, 21478 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshuflw, "__builtin_ia32_pshuflw", IX86_BUILTIN_PSHUFLW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT }, 21479 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pshufhw, "__builtin_ia32_pshufhw", IX86_BUILTIN_PSHUFHW, UNKNOWN, (int) V8HI_FTYPE_V8HI_INT }, 21480 21481 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd", IX86_BUILTIN_SQRTSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_VEC_MERGE }, 21482 21483 { OPTION_MASK_ISA_SSE2, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128 }, 21484 { OPTION_MASK_ISA_SSE2, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128 }, 21485 21486 { OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI }, 21487 21488 /* SSE2 MMX */ 21489 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI }, 21490 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI }, 21491 21492 /* SSE3 */ 21493 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF}, 21494 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF }, 21495 21496 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21497 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21498 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21499 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21500 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF }, 21501 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF }, 21502 21503 /* SSSE3 */ 21504 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI }, 21505 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI }, 21506 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI }, 21507 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI }, 21508 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI }, 21509 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI }, 21510 21511 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21512 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21513 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21514 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21515 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21516 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21517 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21518 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21519 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21520 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21521 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21522 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21523 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI }, 21524 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI }, 21525 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21526 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21527 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21528 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21529 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21530 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI }, 21531 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21532 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI }, 21533 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21534 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI }, 21535 21536 /* SSSE3. */ 21537 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI2TI_FTYPE_V2DI_V2DI_INT }, 21538 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI2DI_FTYPE_V1DI_V1DI_INT }, 21539 21540 /* SSE4.1 */ 21541 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, 21542 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, 21543 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF }, 21544 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF }, 21545 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, 21546 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, 21547 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, 21548 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT }, 21549 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI }, 21550 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT }, 21551 21552 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, "__builtin_ia32_pmovsxbw128", IX86_BUILTIN_PMOVSXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI }, 21553 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, "__builtin_ia32_pmovsxbd128", IX86_BUILTIN_PMOVSXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI }, 21554 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, "__builtin_ia32_pmovsxbq128", IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI }, 21555 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, "__builtin_ia32_pmovsxwd128", IX86_BUILTIN_PMOVSXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI }, 21556 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, "__builtin_ia32_pmovsxwq128", IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI }, 21557 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, "__builtin_ia32_pmovsxdq128", IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI }, 21558 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, "__builtin_ia32_pmovzxbw128", IX86_BUILTIN_PMOVZXBW128, UNKNOWN, (int) V8HI_FTYPE_V16QI }, 21559 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, "__builtin_ia32_pmovzxbd128", IX86_BUILTIN_PMOVZXBD128, UNKNOWN, (int) V4SI_FTYPE_V16QI }, 21560 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, "__builtin_ia32_pmovzxbq128", IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, (int) V2DI_FTYPE_V16QI }, 21561 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, "__builtin_ia32_pmovzxwd128", IX86_BUILTIN_PMOVZXWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI }, 21562 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, "__builtin_ia32_pmovzxwq128", IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, (int) V2DI_FTYPE_V8HI }, 21563 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, "__builtin_ia32_pmovzxdq128", IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI }, 21564 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI }, 21565 21566 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, (int) V8HI_FTYPE_V4SI_V4SI }, 21567 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21568 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21569 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21570 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21571 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21572 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI }, 21573 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21574 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21575 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI }, 21576 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, "__builtin_ia32_pmuldq128", IX86_BUILTIN_PMULDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI }, 21577 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI }, 21578 21579 /* SSE4.1 and SSE5 */ 21580 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundpd, "__builtin_ia32_roundpd", IX86_BUILTIN_ROUNDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT }, 21581 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundps, "__builtin_ia32_roundps", IX86_BUILTIN_ROUNDPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT }, 21582 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundsd, "__builtin_ia32_roundsd", IX86_BUILTIN_ROUNDSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, 21583 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_roundss, "__builtin_ia32_roundss", IX86_BUILTIN_ROUNDSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, 21584 21585 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, (int) INT_FTYPE_V2DI_V2DI_PTEST }, 21586 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, (int) INT_FTYPE_V2DI_V2DI_PTEST }, 21587 { OPTION_MASK_ISA_ROUND, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, (int) INT_FTYPE_V2DI_V2DI_PTEST }, 21588 21589 /* SSE4.2 */ 21590 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21591 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32qi, "__builtin_ia32_crc32qi", IX86_BUILTIN_CRC32QI, UNKNOWN, (int) UINT_FTYPE_UINT_UCHAR }, 21592 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, "__builtin_ia32_crc32hi", IX86_BUILTIN_CRC32HI, UNKNOWN, (int) UINT_FTYPE_UINT_USHORT }, 21593 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, "__builtin_ia32_crc32si", IX86_BUILTIN_CRC32SI, UNKNOWN, (int) UINT_FTYPE_UINT_UINT }, 21594 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32di, "__builtin_ia32_crc32di", IX86_BUILTIN_CRC32DI, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64 }, 21595 21596 /* SSE4A */ 21597 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrqi, "__builtin_ia32_extrqi", IX86_BUILTIN_EXTRQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_UINT_UINT }, 21598 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_extrq, "__builtin_ia32_extrq", IX86_BUILTIN_EXTRQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V16QI }, 21599 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertqi, "__builtin_ia32_insertqi", IX86_BUILTIN_INSERTQI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_UINT_UINT }, 21600 { OPTION_MASK_ISA_SSE4A, CODE_FOR_sse4a_insertq, "__builtin_ia32_insertq", IX86_BUILTIN_INSERTQ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21601 21602 /* AES */ 21603 { OPTION_MASK_ISA_SSE2, CODE_FOR_aeskeygenassist, 0, IX86_BUILTIN_AESKEYGENASSIST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_INT }, 21604 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesimc, 0, IX86_BUILTIN_AESIMC128, UNKNOWN, (int) V2DI_FTYPE_V2DI }, 21605 21606 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenc, 0, IX86_BUILTIN_AESENC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21607 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesenclast, 0, IX86_BUILTIN_AESENCLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21608 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdec, 0, IX86_BUILTIN_AESDEC128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21609 { OPTION_MASK_ISA_SSE2, CODE_FOR_aesdeclast, 0, IX86_BUILTIN_AESDECLAST128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI }, 21610 21611 /* PCLMUL */ 21612 { OPTION_MASK_ISA_SSE2, CODE_FOR_pclmulqdq, 0, IX86_BUILTIN_PCLMULQDQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT }, 21613 21614 /* AVX */ 21615 { OPTION_MASK_ISA_AVX, CODE_FOR_addv4df3, "__builtin_ia32_addpd256", IX86_BUILTIN_ADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21616 { OPTION_MASK_ISA_AVX, CODE_FOR_addv8sf3, "__builtin_ia32_addps256", IX86_BUILTIN_ADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21617 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv4df3, "__builtin_ia32_addsubpd256", IX86_BUILTIN_ADDSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21618 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_addsubv8sf3, "__builtin_ia32_addsubps256", IX86_BUILTIN_ADDSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21619 { OPTION_MASK_ISA_AVX, CODE_FOR_andv4df3, "__builtin_ia32_andpd256", IX86_BUILTIN_ANDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21620 { OPTION_MASK_ISA_AVX, CODE_FOR_andv8sf3, "__builtin_ia32_andps256", IX86_BUILTIN_ANDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21621 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv4df3, "__builtin_ia32_andnpd256", IX86_BUILTIN_ANDNPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21622 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_andnotv8sf3, "__builtin_ia32_andnps256", IX86_BUILTIN_ANDNPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21623 { OPTION_MASK_ISA_AVX, CODE_FOR_divv4df3, "__builtin_ia32_divpd256", IX86_BUILTIN_DIVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21624 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_divv8sf3, "__builtin_ia32_divps256", IX86_BUILTIN_DIVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21625 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv4df3, "__builtin_ia32_haddpd256", IX86_BUILTIN_HADDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21626 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv8sf3, "__builtin_ia32_hsubps256", IX86_BUILTIN_HSUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21627 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_hsubv4df3, "__builtin_ia32_hsubpd256", IX86_BUILTIN_HSUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21628 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_haddv8sf3, "__builtin_ia32_haddps256", IX86_BUILTIN_HADDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21629 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv4df3, "__builtin_ia32_maxpd256", IX86_BUILTIN_MAXPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21630 { OPTION_MASK_ISA_AVX, CODE_FOR_smaxv8sf3, "__builtin_ia32_maxps256", IX86_BUILTIN_MAXPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21631 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv4df3, "__builtin_ia32_minpd256", IX86_BUILTIN_MINPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21632 { OPTION_MASK_ISA_AVX, CODE_FOR_sminv8sf3, "__builtin_ia32_minps256", IX86_BUILTIN_MINPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21633 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv4df3, "__builtin_ia32_mulpd256", IX86_BUILTIN_MULPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21634 { OPTION_MASK_ISA_AVX, CODE_FOR_mulv8sf3, "__builtin_ia32_mulps256", IX86_BUILTIN_MULPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21635 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv4df3, "__builtin_ia32_orpd256", IX86_BUILTIN_ORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21636 { OPTION_MASK_ISA_AVX, CODE_FOR_iorv8sf3, "__builtin_ia32_orps256", IX86_BUILTIN_ORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21637 { OPTION_MASK_ISA_AVX, CODE_FOR_subv4df3, "__builtin_ia32_subpd256", IX86_BUILTIN_SUBPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21638 { OPTION_MASK_ISA_AVX, CODE_FOR_subv8sf3, "__builtin_ia32_subps256", IX86_BUILTIN_SUBPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21639 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv4df3, "__builtin_ia32_xorpd256", IX86_BUILTIN_XORPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21640 { OPTION_MASK_ISA_AVX, CODE_FOR_xorv8sf3, "__builtin_ia32_xorps256", IX86_BUILTIN_XORPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21641 21642 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv2df3, "__builtin_ia32_vpermilvarpd", IX86_BUILTIN_VPERMILVARPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DI }, 21643 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4sf3, "__builtin_ia32_vpermilvarps", IX86_BUILTIN_VPERMILVARPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SI }, 21644 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv4df3, "__builtin_ia32_vpermilvarpd256", IX86_BUILTIN_VPERMILVARPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DI }, 21645 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilvarv8sf3, "__builtin_ia32_vpermilvarps256", IX86_BUILTIN_VPERMILVARPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SI }, 21646 21647 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendpd256, "__builtin_ia32_blendpd256", IX86_BUILTIN_BLENDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, 21648 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendps256, "__builtin_ia32_blendps256", IX86_BUILTIN_BLENDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, 21649 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvpd256, "__builtin_ia32_blendvpd256", IX86_BUILTIN_BLENDVPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF }, 21650 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_blendvps256, "__builtin_ia32_blendvps256", IX86_BUILTIN_BLENDVPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF }, 21651 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_dpps256, "__builtin_ia32_dpps256", IX86_BUILTIN_DPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, 21652 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufpd256, "__builtin_ia32_shufpd256", IX86_BUILTIN_SHUFPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, 21653 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_shufps256, "__builtin_ia32_shufps256", IX86_BUILTIN_SHUFPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, 21654 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpsdv2df3, "__builtin_ia32_cmpsd", IX86_BUILTIN_CMPSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, 21655 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmpssv4sf3, "__builtin_ia32_cmpss", IX86_BUILTIN_CMPSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, 21656 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv2df3, "__builtin_ia32_cmppd", IX86_BUILTIN_CMPPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT }, 21657 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv4sf3, "__builtin_ia32_cmpps", IX86_BUILTIN_CMPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT }, 21658 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppdv4df3, "__builtin_ia32_cmppd256", IX86_BUILTIN_CMPPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, 21659 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cmppsv8sf3, "__builtin_ia32_cmpps256", IX86_BUILTIN_CMPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, 21660 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v4df, "__builtin_ia32_vextractf128_pd256", IX86_BUILTIN_EXTRACTF128PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF_INT }, 21661 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8sf, "__builtin_ia32_vextractf128_ps256", IX86_BUILTIN_EXTRACTF128PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF_INT }, 21662 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vextractf128v8si, "__builtin_ia32_vextractf128_si256", IX86_BUILTIN_EXTRACTF128SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI_INT }, 21663 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2pd256, "__builtin_ia32_cvtdq2pd256", IX86_BUILTIN_CVTDQ2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SI }, 21664 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtdq2ps256, "__builtin_ia32_cvtdq2ps256", IX86_BUILTIN_CVTDQ2PS256, UNKNOWN, (int) V8SF_FTYPE_V8SI }, 21665 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2ps256, "__builtin_ia32_cvtpd2ps256", IX86_BUILTIN_CVTPD2PS256, UNKNOWN, (int) V4SF_FTYPE_V4DF }, 21666 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2dq256, "__builtin_ia32_cvtps2dq256", IX86_BUILTIN_CVTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF }, 21667 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtps2pd256, "__builtin_ia32_cvtps2pd256", IX86_BUILTIN_CVTPS2PD256, UNKNOWN, (int) V4DF_FTYPE_V4SF }, 21668 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttpd2dq256, "__builtin_ia32_cvttpd2dq256", IX86_BUILTIN_CVTTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF }, 21669 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvtpd2dq256, "__builtin_ia32_cvtpd2dq256", IX86_BUILTIN_CVTPD2DQ256, UNKNOWN, (int) V4SI_FTYPE_V4DF }, 21670 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_cvttps2dq256, "__builtin_ia32_cvttps2dq256", IX86_BUILTIN_CVTTPS2DQ256, UNKNOWN, (int) V8SI_FTYPE_V8SF }, 21671 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v4df3, "__builtin_ia32_vperm2f128_pd256", IX86_BUILTIN_VPERM2F128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT }, 21672 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8sf3, "__builtin_ia32_vperm2f128_ps256", IX86_BUILTIN_VPERM2F128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT }, 21673 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vperm2f128v8si3, "__builtin_ia32_vperm2f128_si256", IX86_BUILTIN_VPERM2F128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT }, 21674 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv2df, "__builtin_ia32_vpermilpd", IX86_BUILTIN_VPERMILPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT }, 21675 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4sf, "__builtin_ia32_vpermilps", IX86_BUILTIN_VPERMILPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_INT }, 21676 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv4df, "__builtin_ia32_vpermilpd256", IX86_BUILTIN_VPERMILPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT }, 21677 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vpermilv8sf, "__builtin_ia32_vpermilps256", IX86_BUILTIN_VPERMILPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT }, 21678 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v4df, "__builtin_ia32_vinsertf128_pd256", IX86_BUILTIN_VINSERTF128PD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V2DF_INT }, 21679 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8sf, "__builtin_ia32_vinsertf128_ps256", IX86_BUILTIN_VINSERTF128PS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V4SF_INT }, 21680 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vinsertf128v8si, "__builtin_ia32_vinsertf128_si256", IX86_BUILTIN_VINSERTF128SI256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V4SI_INT }, 21681 21682 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movshdup256, "__builtin_ia32_movshdup256", IX86_BUILTIN_MOVSHDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, 21683 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movsldup256, "__builtin_ia32_movsldup256", IX86_BUILTIN_MOVSLDUP256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, 21684 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movddup256, "__builtin_ia32_movddup256", IX86_BUILTIN_MOVDDUP256, UNKNOWN, (int) V4DF_FTYPE_V4DF }, 21685 21686 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv4df2, "__builtin_ia32_sqrtpd256", IX86_BUILTIN_SQRTPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF }, 21687 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_sqrtv8sf2, "__builtin_ia32_sqrtps256", IX86_BUILTIN_SQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, 21688 { OPTION_MASK_ISA_AVX, CODE_FOR_sqrtv8sf2, "__builtin_ia32_sqrtps_nr256", IX86_BUILTIN_SQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, 21689 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rsqrtv8sf2, "__builtin_ia32_rsqrtps256", IX86_BUILTIN_RSQRTPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, 21690 { OPTION_MASK_ISA_AVX, CODE_FOR_rsqrtv8sf2, "__builtin_ia32_rsqrtps_nr256", IX86_BUILTIN_RSQRTPS_NR256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, 21691 21692 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_rcpv8sf2, "__builtin_ia32_rcpps256", IX86_BUILTIN_RCPPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF }, 21693 21694 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundpd256, "__builtin_ia32_roundpd256", IX86_BUILTIN_ROUNDPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT }, 21695 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_roundps256, "__builtin_ia32_roundps256", IX86_BUILTIN_ROUNDPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT }, 21696 21697 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhpd256, "__builtin_ia32_unpckhpd256", IX86_BUILTIN_UNPCKHPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21698 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklpd256, "__builtin_ia32_unpcklpd256", IX86_BUILTIN_UNPCKLPD256, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF }, 21699 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpckhps256, "__builtin_ia32_unpckhps256", IX86_BUILTIN_UNPCKHPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21700 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_unpcklps256, "__builtin_ia32_unpcklps256", IX86_BUILTIN_UNPCKLPS256, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF }, 21701 21702 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si256_si, "__builtin_ia32_si256_si", IX86_BUILTIN_SI256_SI, UNKNOWN, (int) V8SI_FTYPE_V4SI }, 21703 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps256_ps, "__builtin_ia32_ps256_ps", IX86_BUILTIN_PS256_PS, UNKNOWN, (int) V8SF_FTYPE_V4SF }, 21704 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd256_pd, "__builtin_ia32_pd256_pd", IX86_BUILTIN_PD256_PD, UNKNOWN, (int) V4DF_FTYPE_V2DF }, 21705 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_si_si256, "__builtin_ia32_si_si256", IX86_BUILTIN_SI_SI256, UNKNOWN, (int) V4SI_FTYPE_V8SI }, 21706 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ps_ps256, "__builtin_ia32_ps_ps256", IX86_BUILTIN_PS_PS256, UNKNOWN, (int) V4SF_FTYPE_V8SF }, 21707 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_pd_pd256, "__builtin_ia32_pd_pd256", IX86_BUILTIN_PD_PD256, UNKNOWN, (int) V2DF_FTYPE_V4DF }, 21708 21709 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestzpd", IX86_BUILTIN_VTESTZPD, EQ, (int) INT_FTYPE_V2DF_V2DF_PTEST }, 21710 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestcpd", IX86_BUILTIN_VTESTCPD, LTU, (int) INT_FTYPE_V2DF_V2DF_PTEST }, 21711 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd, "__builtin_ia32_vtestnzcpd", IX86_BUILTIN_VTESTNZCPD, GTU, (int) INT_FTYPE_V2DF_V2DF_PTEST }, 21712 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestzps", IX86_BUILTIN_VTESTZPS, EQ, (int) INT_FTYPE_V4SF_V4SF_PTEST }, 21713 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestcps", IX86_BUILTIN_VTESTCPS, LTU, (int) INT_FTYPE_V4SF_V4SF_PTEST }, 21714 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps, "__builtin_ia32_vtestnzcps", IX86_BUILTIN_VTESTNZCPS, GTU, (int) INT_FTYPE_V4SF_V4SF_PTEST }, 21715 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestzpd256", IX86_BUILTIN_VTESTZPD256, EQ, (int) INT_FTYPE_V4DF_V4DF_PTEST }, 21716 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestcpd256", IX86_BUILTIN_VTESTCPD256, LTU, (int) INT_FTYPE_V4DF_V4DF_PTEST }, 21717 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestpd256, "__builtin_ia32_vtestnzcpd256", IX86_BUILTIN_VTESTNZCPD256, GTU, (int) INT_FTYPE_V4DF_V4DF_PTEST }, 21718 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestzps256", IX86_BUILTIN_VTESTZPS256, EQ, (int) INT_FTYPE_V8SF_V8SF_PTEST }, 21719 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestcps256", IX86_BUILTIN_VTESTCPS256, LTU, (int) INT_FTYPE_V8SF_V8SF_PTEST }, 21720 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_vtestps256, "__builtin_ia32_vtestnzcps256", IX86_BUILTIN_VTESTNZCPS256, GTU, (int) INT_FTYPE_V8SF_V8SF_PTEST }, 21721 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestz256", IX86_BUILTIN_PTESTZ256, EQ, (int) INT_FTYPE_V4DI_V4DI_PTEST }, 21722 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestc256", IX86_BUILTIN_PTESTC256, LTU, (int) INT_FTYPE_V4DI_V4DI_PTEST }, 21723 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_ptest256, "__builtin_ia32_ptestnzc256", IX86_BUILTIN_PTESTNZC256, GTU, (int) INT_FTYPE_V4DI_V4DI_PTEST }, 21724 21725 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskpd256, "__builtin_ia32_movmskpd256", IX86_BUILTIN_MOVMSKPD256, UNKNOWN, (int) INT_FTYPE_V4DF }, 21726 { OPTION_MASK_ISA_AVX, CODE_FOR_avx_movmskps256, "__builtin_ia32_movmskps256", IX86_BUILTIN_MOVMSKPS256, UNKNOWN, (int) INT_FTYPE_V8SF }, 21727 }; 21728 21729 /* SSE5 */ 21730 enum multi_arg_type { 21731 MULTI_ARG_UNKNOWN, 21732 MULTI_ARG_3_SF, 21733 MULTI_ARG_3_DF, 21734 MULTI_ARG_3_DI, 21735 MULTI_ARG_3_SI, 21736 MULTI_ARG_3_SI_DI, 21737 MULTI_ARG_3_HI, 21738 MULTI_ARG_3_HI_SI, 21739 MULTI_ARG_3_QI, 21740 MULTI_ARG_3_PERMPS, 21741 MULTI_ARG_3_PERMPD, 21742 MULTI_ARG_2_SF, 21743 MULTI_ARG_2_DF, 21744 MULTI_ARG_2_DI, 21745 MULTI_ARG_2_SI, 21746 MULTI_ARG_2_HI, 21747 MULTI_ARG_2_QI, 21748 MULTI_ARG_2_DI_IMM, 21749 MULTI_ARG_2_SI_IMM, 21750 MULTI_ARG_2_HI_IMM, 21751 MULTI_ARG_2_QI_IMM, 21752 MULTI_ARG_2_SF_CMP, 21753 MULTI_ARG_2_DF_CMP, 21754 MULTI_ARG_2_DI_CMP, 21755 MULTI_ARG_2_SI_CMP, 21756 MULTI_ARG_2_HI_CMP, 21757 MULTI_ARG_2_QI_CMP, 21758 MULTI_ARG_2_DI_TF, 21759 MULTI_ARG_2_SI_TF, 21760 MULTI_ARG_2_HI_TF, 21761 MULTI_ARG_2_QI_TF, 21762 MULTI_ARG_2_SF_TF, 21763 MULTI_ARG_2_DF_TF, 21764 MULTI_ARG_1_SF, 21765 MULTI_ARG_1_DF, 21766 MULTI_ARG_1_DI, 21767 MULTI_ARG_1_SI, 21768 MULTI_ARG_1_HI, 21769 MULTI_ARG_1_QI, 21770 MULTI_ARG_1_SI_DI, 21771 MULTI_ARG_1_HI_DI, 21772 MULTI_ARG_1_HI_SI, 21773 MULTI_ARG_1_QI_DI, 21774 MULTI_ARG_1_QI_SI, 21775 MULTI_ARG_1_QI_HI, 21776 MULTI_ARG_1_PH2PS, 21777 MULTI_ARG_1_PS2PH 21778 }; 21779 21780 static const struct builtin_description bdesc_multi_arg[] = 21781 { 21782 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv4sf4, "__builtin_ia32_fmaddss", IX86_BUILTIN_FMADDSS, 0, (int)MULTI_ARG_3_SF }, 21783 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmaddv2df4, "__builtin_ia32_fmaddsd", IX86_BUILTIN_FMADDSD, 0, (int)MULTI_ARG_3_DF }, 21784 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv4sf4, "__builtin_ia32_fmaddps", IX86_BUILTIN_FMADDPS, 0, (int)MULTI_ARG_3_SF }, 21785 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmaddv2df4, "__builtin_ia32_fmaddpd", IX86_BUILTIN_FMADDPD, 0, (int)MULTI_ARG_3_DF }, 21786 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv4sf4, "__builtin_ia32_fmsubss", IX86_BUILTIN_FMSUBSS, 0, (int)MULTI_ARG_3_SF }, 21787 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfmsubv2df4, "__builtin_ia32_fmsubsd", IX86_BUILTIN_FMSUBSD, 0, (int)MULTI_ARG_3_DF }, 21788 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv4sf4, "__builtin_ia32_fmsubps", IX86_BUILTIN_FMSUBPS, 0, (int)MULTI_ARG_3_SF }, 21789 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fmsubv2df4, "__builtin_ia32_fmsubpd", IX86_BUILTIN_FMSUBPD, 0, (int)MULTI_ARG_3_DF }, 21790 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv4sf4, "__builtin_ia32_fnmaddss", IX86_BUILTIN_FNMADDSS, 0, (int)MULTI_ARG_3_SF }, 21791 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmaddv2df4, "__builtin_ia32_fnmaddsd", IX86_BUILTIN_FNMADDSD, 0, (int)MULTI_ARG_3_DF }, 21792 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv4sf4, "__builtin_ia32_fnmaddps", IX86_BUILTIN_FNMADDPS, 0, (int)MULTI_ARG_3_SF }, 21793 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmaddv2df4, "__builtin_ia32_fnmaddpd", IX86_BUILTIN_FNMADDPD, 0, (int)MULTI_ARG_3_DF }, 21794 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv4sf4, "__builtin_ia32_fnmsubss", IX86_BUILTIN_FNMSUBSS, 0, (int)MULTI_ARG_3_SF }, 21795 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_vmfnmsubv2df4, "__builtin_ia32_fnmsubsd", IX86_BUILTIN_FNMSUBSD, 0, (int)MULTI_ARG_3_DF }, 21796 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv4sf4, "__builtin_ia32_fnmsubps", IX86_BUILTIN_FNMSUBPS, 0, (int)MULTI_ARG_3_SF }, 21797 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5i_fnmsubv2df4, "__builtin_ia32_fnmsubpd", IX86_BUILTIN_FNMSUBPD, 0, (int)MULTI_ARG_3_DF }, 21798 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov", IX86_BUILTIN_PCMOV, 0, (int)MULTI_ARG_3_DI }, 21799 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2di, "__builtin_ia32_pcmov_v2di", IX86_BUILTIN_PCMOV_V2DI, 0, (int)MULTI_ARG_3_DI }, 21800 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4si, "__builtin_ia32_pcmov_v4si", IX86_BUILTIN_PCMOV_V4SI, 0, (int)MULTI_ARG_3_SI }, 21801 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v8hi, "__builtin_ia32_pcmov_v8hi", IX86_BUILTIN_PCMOV_V8HI, 0, (int)MULTI_ARG_3_HI }, 21802 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v16qi, "__builtin_ia32_pcmov_v16qi",IX86_BUILTIN_PCMOV_V16QI,0, (int)MULTI_ARG_3_QI }, 21803 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v2df, "__builtin_ia32_pcmov_v2df", IX86_BUILTIN_PCMOV_V2DF, 0, (int)MULTI_ARG_3_DF }, 21804 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcmov_v4sf, "__builtin_ia32_pcmov_v4sf", IX86_BUILTIN_PCMOV_V4SF, 0, (int)MULTI_ARG_3_SF }, 21805 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pperm, "__builtin_ia32_pperm", IX86_BUILTIN_PPERM, 0, (int)MULTI_ARG_3_QI }, 21806 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv4sf, "__builtin_ia32_permps", IX86_BUILTIN_PERMPS, 0, (int)MULTI_ARG_3_PERMPS }, 21807 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_permv2df, "__builtin_ia32_permpd", IX86_BUILTIN_PERMPD, 0, (int)MULTI_ARG_3_PERMPD }, 21808 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssww, "__builtin_ia32_pmacssww", IX86_BUILTIN_PMACSSWW, 0, (int)MULTI_ARG_3_HI }, 21809 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsww, "__builtin_ia32_pmacsww", IX86_BUILTIN_PMACSWW, 0, (int)MULTI_ARG_3_HI }, 21810 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsswd, "__builtin_ia32_pmacsswd", IX86_BUILTIN_PMACSSWD, 0, (int)MULTI_ARG_3_HI_SI }, 21811 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacswd, "__builtin_ia32_pmacswd", IX86_BUILTIN_PMACSWD, 0, (int)MULTI_ARG_3_HI_SI }, 21812 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdd, "__builtin_ia32_pmacssdd", IX86_BUILTIN_PMACSSDD, 0, (int)MULTI_ARG_3_SI }, 21813 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdd, "__builtin_ia32_pmacsdd", IX86_BUILTIN_PMACSDD, 0, (int)MULTI_ARG_3_SI }, 21814 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdql, "__builtin_ia32_pmacssdql", IX86_BUILTIN_PMACSSDQL, 0, (int)MULTI_ARG_3_SI_DI }, 21815 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacssdqh, "__builtin_ia32_pmacssdqh", IX86_BUILTIN_PMACSSDQH, 0, (int)MULTI_ARG_3_SI_DI }, 21816 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdql, "__builtin_ia32_pmacsdql", IX86_BUILTIN_PMACSDQL, 0, (int)MULTI_ARG_3_SI_DI }, 21817 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmacsdqh, "__builtin_ia32_pmacsdqh", IX86_BUILTIN_PMACSDQH, 0, (int)MULTI_ARG_3_SI_DI }, 21818 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcsswd, "__builtin_ia32_pmadcsswd", IX86_BUILTIN_PMADCSSWD, 0, (int)MULTI_ARG_3_HI_SI }, 21819 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pmadcswd, "__builtin_ia32_pmadcswd", IX86_BUILTIN_PMADCSWD, 0, (int)MULTI_ARG_3_HI_SI }, 21820 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv2di3, "__builtin_ia32_protq", IX86_BUILTIN_PROTQ, 0, (int)MULTI_ARG_2_DI }, 21821 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv4si3, "__builtin_ia32_protd", IX86_BUILTIN_PROTD, 0, (int)MULTI_ARG_2_SI }, 21822 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv8hi3, "__builtin_ia32_protw", IX86_BUILTIN_PROTW, 0, (int)MULTI_ARG_2_HI }, 21823 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vrotlv16qi3, "__builtin_ia32_protb", IX86_BUILTIN_PROTB, 0, (int)MULTI_ARG_2_QI }, 21824 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv2di3, "__builtin_ia32_protqi", IX86_BUILTIN_PROTQ_IMM, 0, (int)MULTI_ARG_2_DI_IMM }, 21825 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv4si3, "__builtin_ia32_protdi", IX86_BUILTIN_PROTD_IMM, 0, (int)MULTI_ARG_2_SI_IMM }, 21826 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv8hi3, "__builtin_ia32_protwi", IX86_BUILTIN_PROTW_IMM, 0, (int)MULTI_ARG_2_HI_IMM }, 21827 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_rotlv16qi3, "__builtin_ia32_protbi", IX86_BUILTIN_PROTB_IMM, 0, (int)MULTI_ARG_2_QI_IMM }, 21828 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv2di3, "__builtin_ia32_pshaq", IX86_BUILTIN_PSHAQ, 0, (int)MULTI_ARG_2_DI }, 21829 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv4si3, "__builtin_ia32_pshad", IX86_BUILTIN_PSHAD, 0, (int)MULTI_ARG_2_SI }, 21830 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv8hi3, "__builtin_ia32_pshaw", IX86_BUILTIN_PSHAW, 0, (int)MULTI_ARG_2_HI }, 21831 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_ashlv16qi3, "__builtin_ia32_pshab", IX86_BUILTIN_PSHAB, 0, (int)MULTI_ARG_2_QI }, 21832 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv2di3, "__builtin_ia32_pshlq", IX86_BUILTIN_PSHLQ, 0, (int)MULTI_ARG_2_DI }, 21833 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv4si3, "__builtin_ia32_pshld", IX86_BUILTIN_PSHLD, 0, (int)MULTI_ARG_2_SI }, 21834 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv8hi3, "__builtin_ia32_pshlw", IX86_BUILTIN_PSHLW, 0, (int)MULTI_ARG_2_HI }, 21835 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_lshlv16qi3, "__builtin_ia32_pshlb", IX86_BUILTIN_PSHLB, 0, (int)MULTI_ARG_2_QI }, 21836 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv4sf2, "__builtin_ia32_frczss", IX86_BUILTIN_FRCZSS, 0, (int)MULTI_ARG_2_SF }, 21837 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmfrczv2df2, "__builtin_ia32_frczsd", IX86_BUILTIN_FRCZSD, 0, (int)MULTI_ARG_2_DF }, 21838 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv4sf2, "__builtin_ia32_frczps", IX86_BUILTIN_FRCZPS, 0, (int)MULTI_ARG_1_SF }, 21839 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_frczv2df2, "__builtin_ia32_frczpd", IX86_BUILTIN_FRCZPD, 0, (int)MULTI_ARG_1_DF }, 21840 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtph2ps, "__builtin_ia32_cvtph2ps", IX86_BUILTIN_CVTPH2PS, 0, (int)MULTI_ARG_1_PH2PS }, 21841 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_cvtps2ph, "__builtin_ia32_cvtps2ph", IX86_BUILTIN_CVTPS2PH, 0, (int)MULTI_ARG_1_PS2PH }, 21842 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbw, "__builtin_ia32_phaddbw", IX86_BUILTIN_PHADDBW, 0, (int)MULTI_ARG_1_QI_HI }, 21843 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbd, "__builtin_ia32_phaddbd", IX86_BUILTIN_PHADDBD, 0, (int)MULTI_ARG_1_QI_SI }, 21844 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddbq, "__builtin_ia32_phaddbq", IX86_BUILTIN_PHADDBQ, 0, (int)MULTI_ARG_1_QI_DI }, 21845 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwd, "__builtin_ia32_phaddwd", IX86_BUILTIN_PHADDWD, 0, (int)MULTI_ARG_1_HI_SI }, 21846 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddwq, "__builtin_ia32_phaddwq", IX86_BUILTIN_PHADDWQ, 0, (int)MULTI_ARG_1_HI_DI }, 21847 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadddq, "__builtin_ia32_phadddq", IX86_BUILTIN_PHADDDQ, 0, (int)MULTI_ARG_1_SI_DI }, 21848 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubw, "__builtin_ia32_phaddubw", IX86_BUILTIN_PHADDUBW, 0, (int)MULTI_ARG_1_QI_HI }, 21849 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubd, "__builtin_ia32_phaddubd", IX86_BUILTIN_PHADDUBD, 0, (int)MULTI_ARG_1_QI_SI }, 21850 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddubq, "__builtin_ia32_phaddubq", IX86_BUILTIN_PHADDUBQ, 0, (int)MULTI_ARG_1_QI_DI }, 21851 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwd, "__builtin_ia32_phadduwd", IX86_BUILTIN_PHADDUWD, 0, (int)MULTI_ARG_1_HI_SI }, 21852 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phadduwq, "__builtin_ia32_phadduwq", IX86_BUILTIN_PHADDUWQ, 0, (int)MULTI_ARG_1_HI_DI }, 21853 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phaddudq, "__builtin_ia32_phaddudq", IX86_BUILTIN_PHADDUDQ, 0, (int)MULTI_ARG_1_SI_DI }, 21854 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubbw, "__builtin_ia32_phsubbw", IX86_BUILTIN_PHSUBBW, 0, (int)MULTI_ARG_1_QI_HI }, 21855 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubwd, "__builtin_ia32_phsubwd", IX86_BUILTIN_PHSUBWD, 0, (int)MULTI_ARG_1_HI_SI }, 21856 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_phsubdq, "__builtin_ia32_phsubdq", IX86_BUILTIN_PHSUBDQ, 0, (int)MULTI_ARG_1_SI_DI }, 21857 21858 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comeqss", IX86_BUILTIN_COMEQSS, EQ, (int)MULTI_ARG_2_SF_CMP }, 21859 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comness", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP }, 21860 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comneqss", IX86_BUILTIN_COMNESS, NE, (int)MULTI_ARG_2_SF_CMP }, 21861 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comltss", IX86_BUILTIN_COMLTSS, LT, (int)MULTI_ARG_2_SF_CMP }, 21862 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comless", IX86_BUILTIN_COMLESS, LE, (int)MULTI_ARG_2_SF_CMP }, 21863 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgtss", IX86_BUILTIN_COMGTSS, GT, (int)MULTI_ARG_2_SF_CMP }, 21864 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comgess", IX86_BUILTIN_COMGESS, GE, (int)MULTI_ARG_2_SF_CMP }, 21865 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comueqss", IX86_BUILTIN_COMUEQSS, UNEQ, (int)MULTI_ARG_2_SF_CMP }, 21866 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuness", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP }, 21867 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comuneqss", IX86_BUILTIN_COMUNESS, LTGT, (int)MULTI_ARG_2_SF_CMP }, 21868 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunltss", IX86_BUILTIN_COMULTSS, UNLT, (int)MULTI_ARG_2_SF_CMP }, 21869 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunless", IX86_BUILTIN_COMULESS, UNLE, (int)MULTI_ARG_2_SF_CMP }, 21870 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungtss", IX86_BUILTIN_COMUGTSS, UNGT, (int)MULTI_ARG_2_SF_CMP }, 21871 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comungess", IX86_BUILTIN_COMUGESS, UNGE, (int)MULTI_ARG_2_SF_CMP }, 21872 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comordss", IX86_BUILTIN_COMORDSS, ORDERED, (int)MULTI_ARG_2_SF_CMP }, 21873 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv4sf3, "__builtin_ia32_comunordss", IX86_BUILTIN_COMUNORDSS, UNORDERED, (int)MULTI_ARG_2_SF_CMP }, 21874 21875 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comeqsd", IX86_BUILTIN_COMEQSD, EQ, (int)MULTI_ARG_2_DF_CMP }, 21876 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comnesd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP }, 21877 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comneqsd", IX86_BUILTIN_COMNESD, NE, (int)MULTI_ARG_2_DF_CMP }, 21878 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comltsd", IX86_BUILTIN_COMLTSD, LT, (int)MULTI_ARG_2_DF_CMP }, 21879 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comlesd", IX86_BUILTIN_COMLESD, LE, (int)MULTI_ARG_2_DF_CMP }, 21880 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgtsd", IX86_BUILTIN_COMGTSD, GT, (int)MULTI_ARG_2_DF_CMP }, 21881 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comgesd", IX86_BUILTIN_COMGESD, GE, (int)MULTI_ARG_2_DF_CMP }, 21882 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comueqsd", IX86_BUILTIN_COMUEQSD, UNEQ, (int)MULTI_ARG_2_DF_CMP }, 21883 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunesd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP }, 21884 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comuneqsd", IX86_BUILTIN_COMUNESD, LTGT, (int)MULTI_ARG_2_DF_CMP }, 21885 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunltsd", IX86_BUILTIN_COMULTSD, UNLT, (int)MULTI_ARG_2_DF_CMP }, 21886 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunlesd", IX86_BUILTIN_COMULESD, UNLE, (int)MULTI_ARG_2_DF_CMP }, 21887 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungtsd", IX86_BUILTIN_COMUGTSD, UNGT, (int)MULTI_ARG_2_DF_CMP }, 21888 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comungesd", IX86_BUILTIN_COMUGESD, UNGE, (int)MULTI_ARG_2_DF_CMP }, 21889 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comordsd", IX86_BUILTIN_COMORDSD, ORDERED, (int)MULTI_ARG_2_DF_CMP }, 21890 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_vmmaskcmpv2df3, "__builtin_ia32_comunordsd", IX86_BUILTIN_COMUNORDSD, UNORDERED, (int)MULTI_ARG_2_DF_CMP }, 21891 21892 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comeqps", IX86_BUILTIN_COMEQPS, EQ, (int)MULTI_ARG_2_SF_CMP }, 21893 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP }, 21894 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comneqps", IX86_BUILTIN_COMNEPS, NE, (int)MULTI_ARG_2_SF_CMP }, 21895 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comltps", IX86_BUILTIN_COMLTPS, LT, (int)MULTI_ARG_2_SF_CMP }, 21896 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comleps", IX86_BUILTIN_COMLEPS, LE, (int)MULTI_ARG_2_SF_CMP }, 21897 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgtps", IX86_BUILTIN_COMGTPS, GT, (int)MULTI_ARG_2_SF_CMP }, 21898 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comgeps", IX86_BUILTIN_COMGEPS, GE, (int)MULTI_ARG_2_SF_CMP }, 21899 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comueqps", IX86_BUILTIN_COMUEQPS, UNEQ, (int)MULTI_ARG_2_SF_CMP }, 21900 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP }, 21901 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comuneqps", IX86_BUILTIN_COMUNEPS, LTGT, (int)MULTI_ARG_2_SF_CMP }, 21902 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunltps", IX86_BUILTIN_COMULTPS, UNLT, (int)MULTI_ARG_2_SF_CMP }, 21903 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunleps", IX86_BUILTIN_COMULEPS, UNLE, (int)MULTI_ARG_2_SF_CMP }, 21904 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungtps", IX86_BUILTIN_COMUGTPS, UNGT, (int)MULTI_ARG_2_SF_CMP }, 21905 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comungeps", IX86_BUILTIN_COMUGEPS, UNGE, (int)MULTI_ARG_2_SF_CMP }, 21906 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comordps", IX86_BUILTIN_COMORDPS, ORDERED, (int)MULTI_ARG_2_SF_CMP }, 21907 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4sf3, "__builtin_ia32_comunordps", IX86_BUILTIN_COMUNORDPS, UNORDERED, (int)MULTI_ARG_2_SF_CMP }, 21908 21909 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comeqpd", IX86_BUILTIN_COMEQPD, EQ, (int)MULTI_ARG_2_DF_CMP }, 21910 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comnepd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP }, 21911 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comneqpd", IX86_BUILTIN_COMNEPD, NE, (int)MULTI_ARG_2_DF_CMP }, 21912 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comltpd", IX86_BUILTIN_COMLTPD, LT, (int)MULTI_ARG_2_DF_CMP }, 21913 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comlepd", IX86_BUILTIN_COMLEPD, LE, (int)MULTI_ARG_2_DF_CMP }, 21914 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgtpd", IX86_BUILTIN_COMGTPD, GT, (int)MULTI_ARG_2_DF_CMP }, 21915 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comgepd", IX86_BUILTIN_COMGEPD, GE, (int)MULTI_ARG_2_DF_CMP }, 21916 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comueqpd", IX86_BUILTIN_COMUEQPD, UNEQ, (int)MULTI_ARG_2_DF_CMP }, 21917 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunepd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP }, 21918 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comuneqpd", IX86_BUILTIN_COMUNEPD, LTGT, (int)MULTI_ARG_2_DF_CMP }, 21919 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunltpd", IX86_BUILTIN_COMULTPD, UNLT, (int)MULTI_ARG_2_DF_CMP }, 21920 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunlepd", IX86_BUILTIN_COMULEPD, UNLE, (int)MULTI_ARG_2_DF_CMP }, 21921 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungtpd", IX86_BUILTIN_COMUGTPD, UNGT, (int)MULTI_ARG_2_DF_CMP }, 21922 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comungepd", IX86_BUILTIN_COMUGEPD, UNGE, (int)MULTI_ARG_2_DF_CMP }, 21923 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comordpd", IX86_BUILTIN_COMORDPD, ORDERED, (int)MULTI_ARG_2_DF_CMP }, 21924 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2df3, "__builtin_ia32_comunordpd", IX86_BUILTIN_COMUNORDPD, UNORDERED, (int)MULTI_ARG_2_DF_CMP }, 21925 21926 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomeqb", IX86_BUILTIN_PCOMEQB, EQ, (int)MULTI_ARG_2_QI_CMP }, 21927 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP }, 21928 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomneqb", IX86_BUILTIN_PCOMNEB, NE, (int)MULTI_ARG_2_QI_CMP }, 21929 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomltb", IX86_BUILTIN_PCOMLTB, LT, (int)MULTI_ARG_2_QI_CMP }, 21930 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomleb", IX86_BUILTIN_PCOMLEB, LE, (int)MULTI_ARG_2_QI_CMP }, 21931 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgtb", IX86_BUILTIN_PCOMGTB, GT, (int)MULTI_ARG_2_QI_CMP }, 21932 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv16qi3, "__builtin_ia32_pcomgeb", IX86_BUILTIN_PCOMGEB, GE, (int)MULTI_ARG_2_QI_CMP }, 21933 21934 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomeqw", IX86_BUILTIN_PCOMEQW, EQ, (int)MULTI_ARG_2_HI_CMP }, 21935 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomnew", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP }, 21936 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomneqw", IX86_BUILTIN_PCOMNEW, NE, (int)MULTI_ARG_2_HI_CMP }, 21937 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomltw", IX86_BUILTIN_PCOMLTW, LT, (int)MULTI_ARG_2_HI_CMP }, 21938 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomlew", IX86_BUILTIN_PCOMLEW, LE, (int)MULTI_ARG_2_HI_CMP }, 21939 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgtw", IX86_BUILTIN_PCOMGTW, GT, (int)MULTI_ARG_2_HI_CMP }, 21940 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv8hi3, "__builtin_ia32_pcomgew", IX86_BUILTIN_PCOMGEW, GE, (int)MULTI_ARG_2_HI_CMP }, 21941 21942 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomeqd", IX86_BUILTIN_PCOMEQD, EQ, (int)MULTI_ARG_2_SI_CMP }, 21943 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomned", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP }, 21944 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomneqd", IX86_BUILTIN_PCOMNED, NE, (int)MULTI_ARG_2_SI_CMP }, 21945 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomltd", IX86_BUILTIN_PCOMLTD, LT, (int)MULTI_ARG_2_SI_CMP }, 21946 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomled", IX86_BUILTIN_PCOMLED, LE, (int)MULTI_ARG_2_SI_CMP }, 21947 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomgtd", IX86_BUILTIN_PCOMGTD, GT, (int)MULTI_ARG_2_SI_CMP }, 21948 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv4si3, "__builtin_ia32_pcomged", IX86_BUILTIN_PCOMGED, GE, (int)MULTI_ARG_2_SI_CMP }, 21949 21950 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomeqq", IX86_BUILTIN_PCOMEQQ, EQ, (int)MULTI_ARG_2_DI_CMP }, 21951 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP }, 21952 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomneqq", IX86_BUILTIN_PCOMNEQ, NE, (int)MULTI_ARG_2_DI_CMP }, 21953 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomltq", IX86_BUILTIN_PCOMLTQ, LT, (int)MULTI_ARG_2_DI_CMP }, 21954 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomleq", IX86_BUILTIN_PCOMLEQ, LE, (int)MULTI_ARG_2_DI_CMP }, 21955 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgtq", IX86_BUILTIN_PCOMGTQ, GT, (int)MULTI_ARG_2_DI_CMP }, 21956 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmpv2di3, "__builtin_ia32_pcomgeq", IX86_BUILTIN_PCOMGEQ, GE, (int)MULTI_ARG_2_DI_CMP }, 21957 21958 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomequb", IX86_BUILTIN_PCOMEQUB, EQ, (int)MULTI_ARG_2_QI_CMP }, 21959 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomneub", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP }, 21960 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v16qi3,"__builtin_ia32_pcomnequb", IX86_BUILTIN_PCOMNEUB, NE, (int)MULTI_ARG_2_QI_CMP }, 21961 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomltub", IX86_BUILTIN_PCOMLTUB, LTU, (int)MULTI_ARG_2_QI_CMP }, 21962 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomleub", IX86_BUILTIN_PCOMLEUB, LEU, (int)MULTI_ARG_2_QI_CMP }, 21963 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgtub", IX86_BUILTIN_PCOMGTUB, GTU, (int)MULTI_ARG_2_QI_CMP }, 21964 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv16qi3, "__builtin_ia32_pcomgeub", IX86_BUILTIN_PCOMGEUB, GEU, (int)MULTI_ARG_2_QI_CMP }, 21965 21966 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomequw", IX86_BUILTIN_PCOMEQUW, EQ, (int)MULTI_ARG_2_HI_CMP }, 21967 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomneuw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP }, 21968 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v8hi3, "__builtin_ia32_pcomnequw", IX86_BUILTIN_PCOMNEUW, NE, (int)MULTI_ARG_2_HI_CMP }, 21969 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomltuw", IX86_BUILTIN_PCOMLTUW, LTU, (int)MULTI_ARG_2_HI_CMP }, 21970 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomleuw", IX86_BUILTIN_PCOMLEUW, LEU, (int)MULTI_ARG_2_HI_CMP }, 21971 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgtuw", IX86_BUILTIN_PCOMGTUW, GTU, (int)MULTI_ARG_2_HI_CMP }, 21972 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv8hi3, "__builtin_ia32_pcomgeuw", IX86_BUILTIN_PCOMGEUW, GEU, (int)MULTI_ARG_2_HI_CMP }, 21973 21974 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomequd", IX86_BUILTIN_PCOMEQUD, EQ, (int)MULTI_ARG_2_SI_CMP }, 21975 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomneud", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP }, 21976 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v4si3, "__builtin_ia32_pcomnequd", IX86_BUILTIN_PCOMNEUD, NE, (int)MULTI_ARG_2_SI_CMP }, 21977 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomltud", IX86_BUILTIN_PCOMLTUD, LTU, (int)MULTI_ARG_2_SI_CMP }, 21978 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomleud", IX86_BUILTIN_PCOMLEUD, LEU, (int)MULTI_ARG_2_SI_CMP }, 21979 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgtud", IX86_BUILTIN_PCOMGTUD, GTU, (int)MULTI_ARG_2_SI_CMP }, 21980 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv4si3, "__builtin_ia32_pcomgeud", IX86_BUILTIN_PCOMGEUD, GEU, (int)MULTI_ARG_2_SI_CMP }, 21981 21982 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomequq", IX86_BUILTIN_PCOMEQUQ, EQ, (int)MULTI_ARG_2_DI_CMP }, 21983 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomneuq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP }, 21984 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_uns2v2di3, "__builtin_ia32_pcomnequq", IX86_BUILTIN_PCOMNEUQ, NE, (int)MULTI_ARG_2_DI_CMP }, 21985 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomltuq", IX86_BUILTIN_PCOMLTUQ, LTU, (int)MULTI_ARG_2_DI_CMP }, 21986 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomleuq", IX86_BUILTIN_PCOMLEUQ, LEU, (int)MULTI_ARG_2_DI_CMP }, 21987 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgtuq", IX86_BUILTIN_PCOMGTUQ, GTU, (int)MULTI_ARG_2_DI_CMP }, 21988 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_maskcmp_unsv2di3, "__builtin_ia32_pcomgeuq", IX86_BUILTIN_PCOMGEUQ, GEU, (int)MULTI_ARG_2_DI_CMP }, 21989 21990 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalsess", IX86_BUILTIN_COMFALSESS, COM_FALSE_S, (int)MULTI_ARG_2_SF_TF }, 21991 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtruess", IX86_BUILTIN_COMTRUESS, COM_TRUE_S, (int)MULTI_ARG_2_SF_TF }, 21992 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comfalseps", IX86_BUILTIN_COMFALSEPS, COM_FALSE_P, (int)MULTI_ARG_2_SF_TF }, 21993 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv4sf3, "__builtin_ia32_comtrueps", IX86_BUILTIN_COMTRUEPS, COM_TRUE_P, (int)MULTI_ARG_2_SF_TF }, 21994 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsesd", IX86_BUILTIN_COMFALSESD, COM_FALSE_S, (int)MULTI_ARG_2_DF_TF }, 21995 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruesd", IX86_BUILTIN_COMTRUESD, COM_TRUE_S, (int)MULTI_ARG_2_DF_TF }, 21996 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comfalsepd", IX86_BUILTIN_COMFALSEPD, COM_FALSE_P, (int)MULTI_ARG_2_DF_TF }, 21997 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_com_tfv2df3, "__builtin_ia32_comtruepd", IX86_BUILTIN_COMTRUEPD, COM_TRUE_P, (int)MULTI_ARG_2_DF_TF }, 21998 21999 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseb", IX86_BUILTIN_PCOMFALSEB, PCOM_FALSE, (int)MULTI_ARG_2_QI_TF }, 22000 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalsew", IX86_BUILTIN_PCOMFALSEW, PCOM_FALSE, (int)MULTI_ARG_2_HI_TF }, 22001 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalsed", IX86_BUILTIN_PCOMFALSED, PCOM_FALSE, (int)MULTI_ARG_2_SI_TF }, 22002 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseq", IX86_BUILTIN_PCOMFALSEQ, PCOM_FALSE, (int)MULTI_ARG_2_DI_TF }, 22003 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomfalseub",IX86_BUILTIN_PCOMFALSEUB,PCOM_FALSE, (int)MULTI_ARG_2_QI_TF }, 22004 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomfalseuw",IX86_BUILTIN_PCOMFALSEUW,PCOM_FALSE, (int)MULTI_ARG_2_HI_TF }, 22005 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomfalseud",IX86_BUILTIN_PCOMFALSEUD,PCOM_FALSE, (int)MULTI_ARG_2_SI_TF }, 22006 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomfalseuq",IX86_BUILTIN_PCOMFALSEUQ,PCOM_FALSE, (int)MULTI_ARG_2_DI_TF }, 22007 22008 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueb", IX86_BUILTIN_PCOMTRUEB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, 22009 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtruew", IX86_BUILTIN_PCOMTRUEW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, 22010 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrued", IX86_BUILTIN_PCOMTRUED, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, 22011 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueq", IX86_BUILTIN_PCOMTRUEQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, 22012 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv16qi3, "__builtin_ia32_pcomtrueub", IX86_BUILTIN_PCOMTRUEUB, PCOM_TRUE, (int)MULTI_ARG_2_QI_TF }, 22013 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv8hi3, "__builtin_ia32_pcomtrueuw", IX86_BUILTIN_PCOMTRUEUW, PCOM_TRUE, (int)MULTI_ARG_2_HI_TF }, 22014 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv4si3, "__builtin_ia32_pcomtrueud", IX86_BUILTIN_PCOMTRUEUD, PCOM_TRUE, (int)MULTI_ARG_2_SI_TF }, 22015 { OPTION_MASK_ISA_SSE5, CODE_FOR_sse5_pcom_tfv2di3, "__builtin_ia32_pcomtrueuq", IX86_BUILTIN_PCOMTRUEUQ, PCOM_TRUE, (int)MULTI_ARG_2_DI_TF }, 22016 }; 22017 22018 /* Set up all the MMX/SSE builtins, even builtins for instructions that are not 22019 in the current target ISA to allow the user to compile particular modules 22020 with different target specific options that differ from the command line 22021 options. */ 22022 static void 22023 ix86_init_mmx_sse_builtins (void) 22024 { 22025 const struct builtin_description * d; 22026 size_t i; 22027 22028 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode); 22029 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode); 22030 tree V1DI_type_node 22031 = build_vector_type_for_mode (long_long_integer_type_node, V1DImode); 22032 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode); 22033 tree V2DI_type_node 22034 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode); 22035 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode); 22036 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode); 22037 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode); 22038 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode); 22039 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode); 22040 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode); 22041 22042 tree pchar_type_node = build_pointer_type (char_type_node); 22043 tree pcchar_type_node 22044 = build_pointer_type (build_type_variant (char_type_node, 1, 0)); 22045 tree pfloat_type_node = build_pointer_type (float_type_node); 22046 tree pcfloat_type_node 22047 = build_pointer_type (build_type_variant (float_type_node, 1, 0)); 22048 tree pv2sf_type_node = build_pointer_type (V2SF_type_node); 22049 tree pcv2sf_type_node 22050 = build_pointer_type (build_type_variant (V2SF_type_node, 1, 0)); 22051 tree pv2di_type_node = build_pointer_type (V2DI_type_node); 22052 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node); 22053 22054 /* Comparisons. */ 22055 tree int_ftype_v4sf_v4sf 22056 = build_function_type_list (integer_type_node, 22057 V4SF_type_node, V4SF_type_node, NULL_TREE); 22058 tree v4si_ftype_v4sf_v4sf 22059 = build_function_type_list (V4SI_type_node, 22060 V4SF_type_node, V4SF_type_node, NULL_TREE); 22061 /* MMX/SSE/integer conversions. */ 22062 tree int_ftype_v4sf 22063 = build_function_type_list (integer_type_node, 22064 V4SF_type_node, NULL_TREE); 22065 tree int64_ftype_v4sf 22066 = build_function_type_list (long_long_integer_type_node, 22067 V4SF_type_node, NULL_TREE); 22068 tree int_ftype_v8qi 22069 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE); 22070 tree v4sf_ftype_v4sf_int 22071 = build_function_type_list (V4SF_type_node, 22072 V4SF_type_node, integer_type_node, NULL_TREE); 22073 tree v4sf_ftype_v4sf_int64 22074 = build_function_type_list (V4SF_type_node, 22075 V4SF_type_node, long_long_integer_type_node, 22076 NULL_TREE); 22077 tree v4sf_ftype_v4sf_v2si 22078 = build_function_type_list (V4SF_type_node, 22079 V4SF_type_node, V2SI_type_node, NULL_TREE); 22080 22081 /* Miscellaneous. */ 22082 tree v8qi_ftype_v4hi_v4hi 22083 = build_function_type_list (V8QI_type_node, 22084 V4HI_type_node, V4HI_type_node, NULL_TREE); 22085 tree v4hi_ftype_v2si_v2si 22086 = build_function_type_list (V4HI_type_node, 22087 V2SI_type_node, V2SI_type_node, NULL_TREE); 22088 tree v4sf_ftype_v4sf_v4sf_int 22089 = build_function_type_list (V4SF_type_node, 22090 V4SF_type_node, V4SF_type_node, 22091 integer_type_node, NULL_TREE); 22092 tree v2si_ftype_v4hi_v4hi 22093 = build_function_type_list (V2SI_type_node, 22094 V4HI_type_node, V4HI_type_node, NULL_TREE); 22095 tree v4hi_ftype_v4hi_int 22096 = build_function_type_list (V4HI_type_node, 22097 V4HI_type_node, integer_type_node, NULL_TREE); 22098 tree v2si_ftype_v2si_int 22099 = build_function_type_list (V2SI_type_node, 22100 V2SI_type_node, integer_type_node, NULL_TREE); 22101 tree v1di_ftype_v1di_int 22102 = build_function_type_list (V1DI_type_node, 22103 V1DI_type_node, integer_type_node, NULL_TREE); 22104 22105 tree void_ftype_void 22106 = build_function_type (void_type_node, void_list_node); 22107 tree void_ftype_unsigned 22108 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE); 22109 tree void_ftype_unsigned_unsigned 22110 = build_function_type_list (void_type_node, unsigned_type_node, 22111 unsigned_type_node, NULL_TREE); 22112 tree void_ftype_pcvoid_unsigned_unsigned 22113 = build_function_type_list (void_type_node, const_ptr_type_node, 22114 unsigned_type_node, unsigned_type_node, 22115 NULL_TREE); 22116 tree unsigned_ftype_void 22117 = build_function_type (unsigned_type_node, void_list_node); 22118 tree v2si_ftype_v4sf 22119 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE); 22120 /* Loads/stores. */ 22121 tree void_ftype_v8qi_v8qi_pchar 22122 = build_function_type_list (void_type_node, 22123 V8QI_type_node, V8QI_type_node, 22124 pchar_type_node, NULL_TREE); 22125 tree v4sf_ftype_pcfloat 22126 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE); 22127 tree v4sf_ftype_v4sf_pcv2sf 22128 = build_function_type_list (V4SF_type_node, 22129 V4SF_type_node, pcv2sf_type_node, NULL_TREE); 22130 tree void_ftype_pv2sf_v4sf 22131 = build_function_type_list (void_type_node, 22132 pv2sf_type_node, V4SF_type_node, NULL_TREE); 22133 tree void_ftype_pfloat_v4sf 22134 = build_function_type_list (void_type_node, 22135 pfloat_type_node, V4SF_type_node, NULL_TREE); 22136 tree void_ftype_pdi_di 22137 = build_function_type_list (void_type_node, 22138 pdi_type_node, long_long_unsigned_type_node, 22139 NULL_TREE); 22140 tree void_ftype_pv2di_v2di 22141 = build_function_type_list (void_type_node, 22142 pv2di_type_node, V2DI_type_node, NULL_TREE); 22143 /* Normal vector unops. */ 22144 tree v4sf_ftype_v4sf 22145 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE); 22146 tree v16qi_ftype_v16qi 22147 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE); 22148 tree v8hi_ftype_v8hi 22149 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE); 22150 tree v4si_ftype_v4si 22151 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE); 22152 tree v8qi_ftype_v8qi 22153 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE); 22154 tree v4hi_ftype_v4hi 22155 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE); 22156 22157 /* Normal vector binops. */ 22158 tree v4sf_ftype_v4sf_v4sf 22159 = build_function_type_list (V4SF_type_node, 22160 V4SF_type_node, V4SF_type_node, NULL_TREE); 22161 tree v8qi_ftype_v8qi_v8qi 22162 = build_function_type_list (V8QI_type_node, 22163 V8QI_type_node, V8QI_type_node, NULL_TREE); 22164 tree v4hi_ftype_v4hi_v4hi 22165 = build_function_type_list (V4HI_type_node, 22166 V4HI_type_node, V4HI_type_node, NULL_TREE); 22167 tree v2si_ftype_v2si_v2si 22168 = build_function_type_list (V2SI_type_node, 22169 V2SI_type_node, V2SI_type_node, NULL_TREE); 22170 tree v1di_ftype_v1di_v1di 22171 = build_function_type_list (V1DI_type_node, 22172 V1DI_type_node, V1DI_type_node, NULL_TREE); 22173 tree v1di_ftype_v1di_v1di_int 22174 = build_function_type_list (V1DI_type_node, 22175 V1DI_type_node, V1DI_type_node, 22176 integer_type_node, NULL_TREE); 22177 tree v2si_ftype_v2sf 22178 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE); 22179 tree v2sf_ftype_v2si 22180 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE); 22181 tree v2si_ftype_v2si 22182 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE); 22183 tree v2sf_ftype_v2sf 22184 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE); 22185 tree v2sf_ftype_v2sf_v2sf 22186 = build_function_type_list (V2SF_type_node, 22187 V2SF_type_node, V2SF_type_node, NULL_TREE); 22188 tree v2si_ftype_v2sf_v2sf 22189 = build_function_type_list (V2SI_type_node, 22190 V2SF_type_node, V2SF_type_node, NULL_TREE); 22191 tree pint_type_node = build_pointer_type (integer_type_node); 22192 tree pdouble_type_node = build_pointer_type (double_type_node); 22193 tree pcdouble_type_node = build_pointer_type ( 22194 build_type_variant (double_type_node, 1, 0)); 22195 tree int_ftype_v2df_v2df 22196 = build_function_type_list (integer_type_node, 22197 V2DF_type_node, V2DF_type_node, NULL_TREE); 22198 22199 tree void_ftype_pcvoid 22200 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE); 22201 tree v4sf_ftype_v4si 22202 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE); 22203 tree v4si_ftype_v4sf 22204 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE); 22205 tree v2df_ftype_v4si 22206 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE); 22207 tree v4si_ftype_v2df 22208 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE); 22209 tree v4si_ftype_v2df_v2df 22210 = build_function_type_list (V4SI_type_node, 22211 V2DF_type_node, V2DF_type_node, NULL_TREE); 22212 tree v2si_ftype_v2df 22213 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE); 22214 tree v4sf_ftype_v2df 22215 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE); 22216 tree v2df_ftype_v2si 22217 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE); 22218 tree v2df_ftype_v4sf 22219 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE); 22220 tree int_ftype_v2df 22221 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE); 22222 tree int64_ftype_v2df 22223 = build_function_type_list (long_long_integer_type_node, 22224 V2DF_type_node, NULL_TREE); 22225 tree v2df_ftype_v2df_int 22226 = build_function_type_list (V2DF_type_node, 22227 V2DF_type_node, integer_type_node, NULL_TREE); 22228 tree v2df_ftype_v2df_int64 22229 = build_function_type_list (V2DF_type_node, 22230 V2DF_type_node, long_long_integer_type_node, 22231 NULL_TREE); 22232 tree v4sf_ftype_v4sf_v2df 22233 = build_function_type_list (V4SF_type_node, 22234 V4SF_type_node, V2DF_type_node, NULL_TREE); 22235 tree v2df_ftype_v2df_v4sf 22236 = build_function_type_list (V2DF_type_node, 22237 V2DF_type_node, V4SF_type_node, NULL_TREE); 22238 tree v2df_ftype_v2df_v2df_int 22239 = build_function_type_list (V2DF_type_node, 22240 V2DF_type_node, V2DF_type_node, 22241 integer_type_node, 22242 NULL_TREE); 22243 tree v2df_ftype_v2df_pcdouble 22244 = build_function_type_list (V2DF_type_node, 22245 V2DF_type_node, pcdouble_type_node, NULL_TREE); 22246 tree void_ftype_pdouble_v2df 22247 = build_function_type_list (void_type_node, 22248 pdouble_type_node, V2DF_type_node, NULL_TREE); 22249 tree void_ftype_pint_int 22250 = build_function_type_list (void_type_node, 22251 pint_type_node, integer_type_node, NULL_TREE); 22252 tree void_ftype_v16qi_v16qi_pchar 22253 = build_function_type_list (void_type_node, 22254 V16QI_type_node, V16QI_type_node, 22255 pchar_type_node, NULL_TREE); 22256 tree v2df_ftype_pcdouble 22257 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE); 22258 tree v2df_ftype_v2df_v2df 22259 = build_function_type_list (V2DF_type_node, 22260 V2DF_type_node, V2DF_type_node, NULL_TREE); 22261 tree v16qi_ftype_v16qi_v16qi 22262 = build_function_type_list (V16QI_type_node, 22263 V16QI_type_node, V16QI_type_node, NULL_TREE); 22264 tree v8hi_ftype_v8hi_v8hi 22265 = build_function_type_list (V8HI_type_node, 22266 V8HI_type_node, V8HI_type_node, NULL_TREE); 22267 tree v4si_ftype_v4si_v4si 22268 = build_function_type_list (V4SI_type_node, 22269 V4SI_type_node, V4SI_type_node, NULL_TREE); 22270 tree v2di_ftype_v2di_v2di 22271 = build_function_type_list (V2DI_type_node, 22272 V2DI_type_node, V2DI_type_node, NULL_TREE); 22273 tree v2di_ftype_v2df_v2df 22274 = build_function_type_list (V2DI_type_node, 22275 V2DF_type_node, V2DF_type_node, NULL_TREE); 22276 tree v2df_ftype_v2df 22277 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE); 22278 tree v2di_ftype_v2di_int 22279 = build_function_type_list (V2DI_type_node, 22280 V2DI_type_node, integer_type_node, NULL_TREE); 22281 tree v2di_ftype_v2di_v2di_int 22282 = build_function_type_list (V2DI_type_node, V2DI_type_node, 22283 V2DI_type_node, integer_type_node, NULL_TREE); 22284 tree v4si_ftype_v4si_int 22285 = build_function_type_list (V4SI_type_node, 22286 V4SI_type_node, integer_type_node, NULL_TREE); 22287 tree v8hi_ftype_v8hi_int 22288 = build_function_type_list (V8HI_type_node, 22289 V8HI_type_node, integer_type_node, NULL_TREE); 22290 tree v4si_ftype_v8hi_v8hi 22291 = build_function_type_list (V4SI_type_node, 22292 V8HI_type_node, V8HI_type_node, NULL_TREE); 22293 tree v1di_ftype_v8qi_v8qi 22294 = build_function_type_list (V1DI_type_node, 22295 V8QI_type_node, V8QI_type_node, NULL_TREE); 22296 tree v1di_ftype_v2si_v2si 22297 = build_function_type_list (V1DI_type_node, 22298 V2SI_type_node, V2SI_type_node, NULL_TREE); 22299 tree v2di_ftype_v16qi_v16qi 22300 = build_function_type_list (V2DI_type_node, 22301 V16QI_type_node, V16QI_type_node, NULL_TREE); 22302 tree v2di_ftype_v4si_v4si 22303 = build_function_type_list (V2DI_type_node, 22304 V4SI_type_node, V4SI_type_node, NULL_TREE); 22305 tree int_ftype_v16qi 22306 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE); 22307 tree v16qi_ftype_pcchar 22308 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE); 22309 tree void_ftype_pchar_v16qi 22310 = build_function_type_list (void_type_node, 22311 pchar_type_node, V16QI_type_node, NULL_TREE); 22312 22313 tree v2di_ftype_v2di_unsigned_unsigned 22314 = build_function_type_list (V2DI_type_node, V2DI_type_node, 22315 unsigned_type_node, unsigned_type_node, 22316 NULL_TREE); 22317 tree v2di_ftype_v2di_v2di_unsigned_unsigned 22318 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node, 22319 unsigned_type_node, unsigned_type_node, 22320 NULL_TREE); 22321 tree v2di_ftype_v2di_v16qi 22322 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node, 22323 NULL_TREE); 22324 tree v2df_ftype_v2df_v2df_v2df 22325 = build_function_type_list (V2DF_type_node, 22326 V2DF_type_node, V2DF_type_node, 22327 V2DF_type_node, NULL_TREE); 22328 tree v4sf_ftype_v4sf_v4sf_v4sf 22329 = build_function_type_list (V4SF_type_node, 22330 V4SF_type_node, V4SF_type_node, 22331 V4SF_type_node, NULL_TREE); 22332 tree v8hi_ftype_v16qi 22333 = build_function_type_list (V8HI_type_node, V16QI_type_node, 22334 NULL_TREE); 22335 tree v4si_ftype_v16qi 22336 = build_function_type_list (V4SI_type_node, V16QI_type_node, 22337 NULL_TREE); 22338 tree v2di_ftype_v16qi 22339 = build_function_type_list (V2DI_type_node, V16QI_type_node, 22340 NULL_TREE); 22341 tree v4si_ftype_v8hi 22342 = build_function_type_list (V4SI_type_node, V8HI_type_node, 22343 NULL_TREE); 22344 tree v2di_ftype_v8hi 22345 = build_function_type_list (V2DI_type_node, V8HI_type_node, 22346 NULL_TREE); 22347 tree v2di_ftype_v4si 22348 = build_function_type_list (V2DI_type_node, V4SI_type_node, 22349 NULL_TREE); 22350 tree v2di_ftype_pv2di 22351 = build_function_type_list (V2DI_type_node, pv2di_type_node, 22352 NULL_TREE); 22353 tree v16qi_ftype_v16qi_v16qi_int 22354 = build_function_type_list (V16QI_type_node, V16QI_type_node, 22355 V16QI_type_node, integer_type_node, 22356 NULL_TREE); 22357 tree v16qi_ftype_v16qi_v16qi_v16qi 22358 = build_function_type_list (V16QI_type_node, V16QI_type_node, 22359 V16QI_type_node, V16QI_type_node, 22360 NULL_TREE); 22361 tree v8hi_ftype_v8hi_v8hi_int 22362 = build_function_type_list (V8HI_type_node, V8HI_type_node, 22363 V8HI_type_node, integer_type_node, 22364 NULL_TREE); 22365 tree v4si_ftype_v4si_v4si_int 22366 = build_function_type_list (V4SI_type_node, V4SI_type_node, 22367 V4SI_type_node, integer_type_node, 22368 NULL_TREE); 22369 tree int_ftype_v2di_v2di 22370 = build_function_type_list (integer_type_node, 22371 V2DI_type_node, V2DI_type_node, 22372 NULL_TREE); 22373 tree int_ftype_v16qi_int_v16qi_int_int 22374 = build_function_type_list (integer_type_node, 22375 V16QI_type_node, 22376 integer_type_node, 22377 V16QI_type_node, 22378 integer_type_node, 22379 integer_type_node, 22380 NULL_TREE); 22381 tree v16qi_ftype_v16qi_int_v16qi_int_int 22382 = build_function_type_list (V16QI_type_node, 22383 V16QI_type_node, 22384 integer_type_node, 22385 V16QI_type_node, 22386 integer_type_node, 22387 integer_type_node, 22388 NULL_TREE); 22389 tree int_ftype_v16qi_v16qi_int 22390 = build_function_type_list (integer_type_node, 22391 V16QI_type_node, 22392 V16QI_type_node, 22393 integer_type_node, 22394 NULL_TREE); 22395 22396 /* SSE5 instructions */ 22397 tree v2di_ftype_v2di_v2di_v2di 22398 = build_function_type_list (V2DI_type_node, 22399 V2DI_type_node, 22400 V2DI_type_node, 22401 V2DI_type_node, 22402 NULL_TREE); 22403 22404 tree v4si_ftype_v4si_v4si_v4si 22405 = build_function_type_list (V4SI_type_node, 22406 V4SI_type_node, 22407 V4SI_type_node, 22408 V4SI_type_node, 22409 NULL_TREE); 22410 22411 tree v4si_ftype_v4si_v4si_v2di 22412 = build_function_type_list (V4SI_type_node, 22413 V4SI_type_node, 22414 V4SI_type_node, 22415 V2DI_type_node, 22416 NULL_TREE); 22417 22418 tree v8hi_ftype_v8hi_v8hi_v8hi 22419 = build_function_type_list (V8HI_type_node, 22420 V8HI_type_node, 22421 V8HI_type_node, 22422 V8HI_type_node, 22423 NULL_TREE); 22424 22425 tree v8hi_ftype_v8hi_v8hi_v4si 22426 = build_function_type_list (V8HI_type_node, 22427 V8HI_type_node, 22428 V8HI_type_node, 22429 V4SI_type_node, 22430 NULL_TREE); 22431 22432 tree v2df_ftype_v2df_v2df_v16qi 22433 = build_function_type_list (V2DF_type_node, 22434 V2DF_type_node, 22435 V2DF_type_node, 22436 V16QI_type_node, 22437 NULL_TREE); 22438 22439 tree v4sf_ftype_v4sf_v4sf_v16qi 22440 = build_function_type_list (V4SF_type_node, 22441 V4SF_type_node, 22442 V4SF_type_node, 22443 V16QI_type_node, 22444 NULL_TREE); 22445 22446 tree v2di_ftype_v2di_si 22447 = build_function_type_list (V2DI_type_node, 22448 V2DI_type_node, 22449 integer_type_node, 22450 NULL_TREE); 22451 22452 tree v4si_ftype_v4si_si 22453 = build_function_type_list (V4SI_type_node, 22454 V4SI_type_node, 22455 integer_type_node, 22456 NULL_TREE); 22457 22458 tree v8hi_ftype_v8hi_si 22459 = build_function_type_list (V8HI_type_node, 22460 V8HI_type_node, 22461 integer_type_node, 22462 NULL_TREE); 22463 22464 tree v16qi_ftype_v16qi_si 22465 = build_function_type_list (V16QI_type_node, 22466 V16QI_type_node, 22467 integer_type_node, 22468 NULL_TREE); 22469 tree v4sf_ftype_v4hi 22470 = build_function_type_list (V4SF_type_node, 22471 V4HI_type_node, 22472 NULL_TREE); 22473 22474 tree v4hi_ftype_v4sf 22475 = build_function_type_list (V4HI_type_node, 22476 V4SF_type_node, 22477 NULL_TREE); 22478 22479 tree v2di_ftype_v2di 22480 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE); 22481 22482 tree v16qi_ftype_v8hi_v8hi 22483 = build_function_type_list (V16QI_type_node, 22484 V8HI_type_node, V8HI_type_node, 22485 NULL_TREE); 22486 tree v8hi_ftype_v4si_v4si 22487 = build_function_type_list (V8HI_type_node, 22488 V4SI_type_node, V4SI_type_node, 22489 NULL_TREE); 22490 tree v8hi_ftype_v16qi_v16qi 22491 = build_function_type_list (V8HI_type_node, 22492 V16QI_type_node, V16QI_type_node, 22493 NULL_TREE); 22494 tree v4hi_ftype_v8qi_v8qi 22495 = build_function_type_list (V4HI_type_node, 22496 V8QI_type_node, V8QI_type_node, 22497 NULL_TREE); 22498 tree unsigned_ftype_unsigned_uchar 22499 = build_function_type_list (unsigned_type_node, 22500 unsigned_type_node, 22501 unsigned_char_type_node, 22502 NULL_TREE); 22503 tree unsigned_ftype_unsigned_ushort 22504 = build_function_type_list (unsigned_type_node, 22505 unsigned_type_node, 22506 short_unsigned_type_node, 22507 NULL_TREE); 22508 tree unsigned_ftype_unsigned_unsigned 22509 = build_function_type_list (unsigned_type_node, 22510 unsigned_type_node, 22511 unsigned_type_node, 22512 NULL_TREE); 22513 tree uint64_ftype_uint64_uint64 22514 = build_function_type_list (long_long_unsigned_type_node, 22515 long_long_unsigned_type_node, 22516 long_long_unsigned_type_node, 22517 NULL_TREE); 22518 tree float_ftype_float 22519 = build_function_type_list (float_type_node, 22520 float_type_node, 22521 NULL_TREE); 22522 22523 /* AVX builtins */ 22524 tree V32QI_type_node = build_vector_type_for_mode (char_type_node, 22525 V32QImode); 22526 tree V8SI_type_node = build_vector_type_for_mode (intSI_type_node, 22527 V8SImode); 22528 tree V8SF_type_node = build_vector_type_for_mode (float_type_node, 22529 V8SFmode); 22530 tree V4DI_type_node = build_vector_type_for_mode (long_long_integer_type_node, 22531 V4DImode); 22532 tree V4DF_type_node = build_vector_type_for_mode (double_type_node, 22533 V4DFmode); 22534 tree v8sf_ftype_v8sf 22535 = build_function_type_list (V8SF_type_node, 22536 V8SF_type_node, 22537 NULL_TREE); 22538 tree v8si_ftype_v8sf 22539 = build_function_type_list (V8SI_type_node, 22540 V8SF_type_node, 22541 NULL_TREE); 22542 tree v8sf_ftype_v8si 22543 = build_function_type_list (V8SF_type_node, 22544 V8SI_type_node, 22545 NULL_TREE); 22546 tree v4si_ftype_v4df 22547 = build_function_type_list (V4SI_type_node, 22548 V4DF_type_node, 22549 NULL_TREE); 22550 tree v4df_ftype_v4df 22551 = build_function_type_list (V4DF_type_node, 22552 V4DF_type_node, 22553 NULL_TREE); 22554 tree v4df_ftype_v4si 22555 = build_function_type_list (V4DF_type_node, 22556 V4SI_type_node, 22557 NULL_TREE); 22558 tree v4df_ftype_v4sf 22559 = build_function_type_list (V4DF_type_node, 22560 V4SF_type_node, 22561 NULL_TREE); 22562 tree v4sf_ftype_v4df 22563 = build_function_type_list (V4SF_type_node, 22564 V4DF_type_node, 22565 NULL_TREE); 22566 tree v8sf_ftype_v8sf_v8sf 22567 = build_function_type_list (V8SF_type_node, 22568 V8SF_type_node, V8SF_type_node, 22569 NULL_TREE); 22570 tree v4df_ftype_v4df_v4df 22571 = build_function_type_list (V4DF_type_node, 22572 V4DF_type_node, V4DF_type_node, 22573 NULL_TREE); 22574 tree v8sf_ftype_v8sf_int 22575 = build_function_type_list (V8SF_type_node, 22576 V8SF_type_node, integer_type_node, 22577 NULL_TREE); 22578 tree v4si_ftype_v8si_int 22579 = build_function_type_list (V4SI_type_node, 22580 V8SI_type_node, integer_type_node, 22581 NULL_TREE); 22582 tree v4df_ftype_v4df_int 22583 = build_function_type_list (V4DF_type_node, 22584 V4DF_type_node, integer_type_node, 22585 NULL_TREE); 22586 tree v4sf_ftype_v8sf_int 22587 = build_function_type_list (V4SF_type_node, 22588 V8SF_type_node, integer_type_node, 22589 NULL_TREE); 22590 tree v2df_ftype_v4df_int 22591 = build_function_type_list (V2DF_type_node, 22592 V4DF_type_node, integer_type_node, 22593 NULL_TREE); 22594 tree v8sf_ftype_v8sf_v8sf_int 22595 = build_function_type_list (V8SF_type_node, 22596 V8SF_type_node, V8SF_type_node, 22597 integer_type_node, 22598 NULL_TREE); 22599 tree v8sf_ftype_v8sf_v8sf_v8sf 22600 = build_function_type_list (V8SF_type_node, 22601 V8SF_type_node, V8SF_type_node, 22602 V8SF_type_node, 22603 NULL_TREE); 22604 tree v4df_ftype_v4df_v4df_v4df 22605 = build_function_type_list (V4DF_type_node, 22606 V4DF_type_node, V4DF_type_node, 22607 V4DF_type_node, 22608 NULL_TREE); 22609 tree v8si_ftype_v8si_v8si_int 22610 = build_function_type_list (V8SI_type_node, 22611 V8SI_type_node, V8SI_type_node, 22612 integer_type_node, 22613 NULL_TREE); 22614 tree v4df_ftype_v4df_v4df_int 22615 = build_function_type_list (V4DF_type_node, 22616 V4DF_type_node, V4DF_type_node, 22617 integer_type_node, 22618 NULL_TREE); 22619 tree v8sf_ftype_pcfloat 22620 = build_function_type_list (V8SF_type_node, 22621 pcfloat_type_node, 22622 NULL_TREE); 22623 tree v4df_ftype_pcdouble 22624 = build_function_type_list (V4DF_type_node, 22625 pcdouble_type_node, 22626 NULL_TREE); 22627 tree pcv4sf_type_node 22628 = build_pointer_type (build_type_variant (V4SF_type_node, 1, 0)); 22629 tree pcv2df_type_node 22630 = build_pointer_type (build_type_variant (V2DF_type_node, 1, 0)); 22631 tree v8sf_ftype_pcv4sf 22632 = build_function_type_list (V8SF_type_node, 22633 pcv4sf_type_node, 22634 NULL_TREE); 22635 tree v4df_ftype_pcv2df 22636 = build_function_type_list (V4DF_type_node, 22637 pcv2df_type_node, 22638 NULL_TREE); 22639 tree v32qi_ftype_pcchar 22640 = build_function_type_list (V32QI_type_node, 22641 pcchar_type_node, 22642 NULL_TREE); 22643 tree void_ftype_pchar_v32qi 22644 = build_function_type_list (void_type_node, 22645 pchar_type_node, V32QI_type_node, 22646 NULL_TREE); 22647 tree v8si_ftype_v8si_v4si_int 22648 = build_function_type_list (V8SI_type_node, 22649 V8SI_type_node, V4SI_type_node, 22650 integer_type_node, 22651 NULL_TREE); 22652 tree pv4di_type_node = build_pointer_type (V4DI_type_node); 22653 tree void_ftype_pv4di_v4di 22654 = build_function_type_list (void_type_node, 22655 pv4di_type_node, V4DI_type_node, 22656 NULL_TREE); 22657 tree v8sf_ftype_v8sf_v4sf_int 22658 = build_function_type_list (V8SF_type_node, 22659 V8SF_type_node, V4SF_type_node, 22660 integer_type_node, 22661 NULL_TREE); 22662 tree v4df_ftype_v4df_v2df_int 22663 = build_function_type_list (V4DF_type_node, 22664 V4DF_type_node, V2DF_type_node, 22665 integer_type_node, 22666 NULL_TREE); 22667 tree void_ftype_pfloat_v8sf 22668 = build_function_type_list (void_type_node, 22669 pfloat_type_node, V8SF_type_node, 22670 NULL_TREE); 22671 tree void_ftype_pdouble_v4df 22672 = build_function_type_list (void_type_node, 22673 pdouble_type_node, V4DF_type_node, 22674 NULL_TREE); 22675 tree pv8sf_type_node = build_pointer_type (V8SF_type_node); 22676 tree pv4sf_type_node = build_pointer_type (V4SF_type_node); 22677 tree pv4df_type_node = build_pointer_type (V4DF_type_node); 22678 tree pv2df_type_node = build_pointer_type (V2DF_type_node); 22679 tree pcv8sf_type_node 22680 = build_pointer_type (build_type_variant (V8SF_type_node, 1, 0)); 22681 tree pcv4df_type_node 22682 = build_pointer_type (build_type_variant (V4DF_type_node, 1, 0)); 22683 tree v8sf_ftype_pcv8sf_v8sf 22684 = build_function_type_list (V8SF_type_node, 22685 pcv8sf_type_node, V8SF_type_node, 22686 NULL_TREE); 22687 tree v4df_ftype_pcv4df_v4df 22688 = build_function_type_list (V4DF_type_node, 22689 pcv4df_type_node, V4DF_type_node, 22690 NULL_TREE); 22691 tree v4sf_ftype_pcv4sf_v4sf 22692 = build_function_type_list (V4SF_type_node, 22693 pcv4sf_type_node, V4SF_type_node, 22694 NULL_TREE); 22695 tree v2df_ftype_pcv2df_v2df 22696 = build_function_type_list (V2DF_type_node, 22697 pcv2df_type_node, V2DF_type_node, 22698 NULL_TREE); 22699 tree void_ftype_pv8sf_v8sf_v8sf 22700 = build_function_type_list (void_type_node, 22701 pv8sf_type_node, V8SF_type_node, 22702 V8SF_type_node, 22703 NULL_TREE); 22704 tree void_ftype_pv4df_v4df_v4df 22705 = build_function_type_list (void_type_node, 22706 pv4df_type_node, V4DF_type_node, 22707 V4DF_type_node, 22708 NULL_TREE); 22709 tree void_ftype_pv4sf_v4sf_v4sf 22710 = build_function_type_list (void_type_node, 22711 pv4sf_type_node, V4SF_type_node, 22712 V4SF_type_node, 22713 NULL_TREE); 22714 tree void_ftype_pv2df_v2df_v2df 22715 = build_function_type_list (void_type_node, 22716 pv2df_type_node, V2DF_type_node, 22717 V2DF_type_node, 22718 NULL_TREE); 22719 tree v4df_ftype_v2df 22720 = build_function_type_list (V4DF_type_node, 22721 V2DF_type_node, 22722 NULL_TREE); 22723 tree v8sf_ftype_v4sf 22724 = build_function_type_list (V8SF_type_node, 22725 V4SF_type_node, 22726 NULL_TREE); 22727 tree v8si_ftype_v4si 22728 = build_function_type_list (V8SI_type_node, 22729 V4SI_type_node, 22730 NULL_TREE); 22731 tree v2df_ftype_v4df 22732 = build_function_type_list (V2DF_type_node, 22733 V4DF_type_node, 22734 NULL_TREE); 22735 tree v4sf_ftype_v8sf 22736 = build_function_type_list (V4SF_type_node, 22737 V8SF_type_node, 22738 NULL_TREE); 22739 tree v4si_ftype_v8si 22740 = build_function_type_list (V4SI_type_node, 22741 V8SI_type_node, 22742 NULL_TREE); 22743 tree int_ftype_v4df 22744 = build_function_type_list (integer_type_node, 22745 V4DF_type_node, 22746 NULL_TREE); 22747 tree int_ftype_v8sf 22748 = build_function_type_list (integer_type_node, 22749 V8SF_type_node, 22750 NULL_TREE); 22751 tree int_ftype_v8sf_v8sf 22752 = build_function_type_list (integer_type_node, 22753 V8SF_type_node, V8SF_type_node, 22754 NULL_TREE); 22755 tree int_ftype_v4di_v4di 22756 = build_function_type_list (integer_type_node, 22757 V4DI_type_node, V4DI_type_node, 22758 NULL_TREE); 22759 tree int_ftype_v4df_v4df 22760 = build_function_type_list (integer_type_node, 22761 V4DF_type_node, V4DF_type_node, 22762 NULL_TREE); 22763 tree v8sf_ftype_v8sf_v8si 22764 = build_function_type_list (V8SF_type_node, 22765 V8SF_type_node, V8SI_type_node, 22766 NULL_TREE); 22767 tree v4df_ftype_v4df_v4di 22768 = build_function_type_list (V4DF_type_node, 22769 V4DF_type_node, V4DI_type_node, 22770 NULL_TREE); 22771 tree v4sf_ftype_v4sf_v4si 22772 = build_function_type_list (V4SF_type_node, 22773 V4SF_type_node, V4SI_type_node, NULL_TREE); 22774 tree v2df_ftype_v2df_v2di 22775 = build_function_type_list (V2DF_type_node, 22776 V2DF_type_node, V2DI_type_node, NULL_TREE); 22777 22778 tree ftype; 22779 22780 /* Add all special builtins with variable number of operands. */ 22781 for (i = 0, d = bdesc_special_args; 22782 i < ARRAY_SIZE (bdesc_special_args); 22783 i++, d++) 22784 { 22785 tree type; 22786 22787 if (d->name == 0) 22788 continue; 22789 22790 switch ((enum ix86_special_builtin_type) d->flag) 22791 { 22792 case VOID_FTYPE_VOID: 22793 type = void_ftype_void; 22794 break; 22795 case V32QI_FTYPE_PCCHAR: 22796 type = v32qi_ftype_pcchar; 22797 break; 22798 case V16QI_FTYPE_PCCHAR: 22799 type = v16qi_ftype_pcchar; 22800 break; 22801 case V8SF_FTYPE_PCV4SF: 22802 type = v8sf_ftype_pcv4sf; 22803 break; 22804 case V8SF_FTYPE_PCFLOAT: 22805 type = v8sf_ftype_pcfloat; 22806 break; 22807 case V4DF_FTYPE_PCV2DF: 22808 type = v4df_ftype_pcv2df; 22809 break; 22810 case V4DF_FTYPE_PCDOUBLE: 22811 type = v4df_ftype_pcdouble; 22812 break; 22813 case V4SF_FTYPE_PCFLOAT: 22814 type = v4sf_ftype_pcfloat; 22815 break; 22816 case V2DI_FTYPE_PV2DI: 22817 type = v2di_ftype_pv2di; 22818 break; 22819 case V2DF_FTYPE_PCDOUBLE: 22820 type = v2df_ftype_pcdouble; 22821 break; 22822 case V8SF_FTYPE_PCV8SF_V8SF: 22823 type = v8sf_ftype_pcv8sf_v8sf; 22824 break; 22825 case V4DF_FTYPE_PCV4DF_V4DF: 22826 type = v4df_ftype_pcv4df_v4df; 22827 break; 22828 case V4SF_FTYPE_V4SF_PCV2SF: 22829 type = v4sf_ftype_v4sf_pcv2sf; 22830 break; 22831 case V4SF_FTYPE_PCV4SF_V4SF: 22832 type = v4sf_ftype_pcv4sf_v4sf; 22833 break; 22834 case V2DF_FTYPE_V2DF_PCDOUBLE: 22835 type = v2df_ftype_v2df_pcdouble; 22836 break; 22837 case V2DF_FTYPE_PCV2DF_V2DF: 22838 type = v2df_ftype_pcv2df_v2df; 22839 break; 22840 case VOID_FTYPE_PV2SF_V4SF: 22841 type = void_ftype_pv2sf_v4sf; 22842 break; 22843 case VOID_FTYPE_PV4DI_V4DI: 22844 type = void_ftype_pv4di_v4di; 22845 break; 22846 case VOID_FTYPE_PV2DI_V2DI: 22847 type = void_ftype_pv2di_v2di; 22848 break; 22849 case VOID_FTYPE_PCHAR_V32QI: 22850 type = void_ftype_pchar_v32qi; 22851 break; 22852 case VOID_FTYPE_PCHAR_V16QI: 22853 type = void_ftype_pchar_v16qi; 22854 break; 22855 case VOID_FTYPE_PFLOAT_V8SF: 22856 type = void_ftype_pfloat_v8sf; 22857 break; 22858 case VOID_FTYPE_PFLOAT_V4SF: 22859 type = void_ftype_pfloat_v4sf; 22860 break; 22861 case VOID_FTYPE_PDOUBLE_V4DF: 22862 type = void_ftype_pdouble_v4df; 22863 break; 22864 case VOID_FTYPE_PDOUBLE_V2DF: 22865 type = void_ftype_pdouble_v2df; 22866 break; 22867 case VOID_FTYPE_PDI_DI: 22868 type = void_ftype_pdi_di; 22869 break; 22870 case VOID_FTYPE_PINT_INT: 22871 type = void_ftype_pint_int; 22872 break; 22873 case VOID_FTYPE_PV8SF_V8SF_V8SF: 22874 type = void_ftype_pv8sf_v8sf_v8sf; 22875 break; 22876 case VOID_FTYPE_PV4DF_V4DF_V4DF: 22877 type = void_ftype_pv4df_v4df_v4df; 22878 break; 22879 case VOID_FTYPE_PV4SF_V4SF_V4SF: 22880 type = void_ftype_pv4sf_v4sf_v4sf; 22881 break; 22882 case VOID_FTYPE_PV2DF_V2DF_V2DF: 22883 type = void_ftype_pv2df_v2df_v2df; 22884 break; 22885 default: 22886 gcc_unreachable (); 22887 } 22888 22889 def_builtin (d->mask, d->name, type, d->code); 22890 } 22891 22892 /* Add all builtins with variable number of operands. */ 22893 for (i = 0, d = bdesc_args; 22894 i < ARRAY_SIZE (bdesc_args); 22895 i++, d++) 22896 { 22897 tree type; 22898 22899 if (d->name == 0) 22900 continue; 22901 22902 switch ((enum ix86_builtin_type) d->flag) 22903 { 22904 case FLOAT_FTYPE_FLOAT: 22905 type = float_ftype_float; 22906 break; 22907 case INT_FTYPE_V8SF_V8SF_PTEST: 22908 type = int_ftype_v8sf_v8sf; 22909 break; 22910 case INT_FTYPE_V4DI_V4DI_PTEST: 22911 type = int_ftype_v4di_v4di; 22912 break; 22913 case INT_FTYPE_V4DF_V4DF_PTEST: 22914 type = int_ftype_v4df_v4df; 22915 break; 22916 case INT_FTYPE_V4SF_V4SF_PTEST: 22917 type = int_ftype_v4sf_v4sf; 22918 break; 22919 case INT_FTYPE_V2DI_V2DI_PTEST: 22920 type = int_ftype_v2di_v2di; 22921 break; 22922 case INT_FTYPE_V2DF_V2DF_PTEST: 22923 type = int_ftype_v2df_v2df; 22924 break; 22925 case INT64_FTYPE_V4SF: 22926 type = int64_ftype_v4sf; 22927 break; 22928 case INT64_FTYPE_V2DF: 22929 type = int64_ftype_v2df; 22930 break; 22931 case INT_FTYPE_V16QI: 22932 type = int_ftype_v16qi; 22933 break; 22934 case INT_FTYPE_V8QI: 22935 type = int_ftype_v8qi; 22936 break; 22937 case INT_FTYPE_V8SF: 22938 type = int_ftype_v8sf; 22939 break; 22940 case INT_FTYPE_V4DF: 22941 type = int_ftype_v4df; 22942 break; 22943 case INT_FTYPE_V4SF: 22944 type = int_ftype_v4sf; 22945 break; 22946 case INT_FTYPE_V2DF: 22947 type = int_ftype_v2df; 22948 break; 22949 case V16QI_FTYPE_V16QI: 22950 type = v16qi_ftype_v16qi; 22951 break; 22952 case V8SI_FTYPE_V8SF: 22953 type = v8si_ftype_v8sf; 22954 break; 22955 case V8SI_FTYPE_V4SI: 22956 type = v8si_ftype_v4si; 22957 break; 22958 case V8HI_FTYPE_V8HI: 22959 type = v8hi_ftype_v8hi; 22960 break; 22961 case V8HI_FTYPE_V16QI: 22962 type = v8hi_ftype_v16qi; 22963 break; 22964 case V8QI_FTYPE_V8QI: 22965 type = v8qi_ftype_v8qi; 22966 break; 22967 case V8SF_FTYPE_V8SF: 22968 type = v8sf_ftype_v8sf; 22969 break; 22970 case V8SF_FTYPE_V8SI: 22971 type = v8sf_ftype_v8si; 22972 break; 22973 case V8SF_FTYPE_V4SF: 22974 type = v8sf_ftype_v4sf; 22975 break; 22976 case V4SI_FTYPE_V4DF: 22977 type = v4si_ftype_v4df; 22978 break; 22979 case V4SI_FTYPE_V4SI: 22980 type = v4si_ftype_v4si; 22981 break; 22982 case V4SI_FTYPE_V16QI: 22983 type = v4si_ftype_v16qi; 22984 break; 22985 case V4SI_FTYPE_V8SI: 22986 type = v4si_ftype_v8si; 22987 break; 22988 case V4SI_FTYPE_V8HI: 22989 type = v4si_ftype_v8hi; 22990 break; 22991 case V4SI_FTYPE_V4SF: 22992 type = v4si_ftype_v4sf; 22993 break; 22994 case V4SI_FTYPE_V2DF: 22995 type = v4si_ftype_v2df; 22996 break; 22997 case V4HI_FTYPE_V4HI: 22998 type = v4hi_ftype_v4hi; 22999 break; 23000 case V4DF_FTYPE_V4DF: 23001 type = v4df_ftype_v4df; 23002 break; 23003 case V4DF_FTYPE_V4SI: 23004 type = v4df_ftype_v4si; 23005 break; 23006 case V4DF_FTYPE_V4SF: 23007 type = v4df_ftype_v4sf; 23008 break; 23009 case V4DF_FTYPE_V2DF: 23010 type = v4df_ftype_v2df; 23011 break; 23012 case V4SF_FTYPE_V4SF: 23013 case V4SF_FTYPE_V4SF_VEC_MERGE: 23014 type = v4sf_ftype_v4sf; 23015 break; 23016 case V4SF_FTYPE_V8SF: 23017 type = v4sf_ftype_v8sf; 23018 break; 23019 case V4SF_FTYPE_V4SI: 23020 type = v4sf_ftype_v4si; 23021 break; 23022 case V4SF_FTYPE_V4DF: 23023 type = v4sf_ftype_v4df; 23024 break; 23025 case V4SF_FTYPE_V2DF: 23026 type = v4sf_ftype_v2df; 23027 break; 23028 case V2DI_FTYPE_V2DI: 23029 type = v2di_ftype_v2di; 23030 break; 23031 case V2DI_FTYPE_V16QI: 23032 type = v2di_ftype_v16qi; 23033 break; 23034 case V2DI_FTYPE_V8HI: 23035 type = v2di_ftype_v8hi; 23036 break; 23037 case V2DI_FTYPE_V4SI: 23038 type = v2di_ftype_v4si; 23039 break; 23040 case V2SI_FTYPE_V2SI: 23041 type = v2si_ftype_v2si; 23042 break; 23043 case V2SI_FTYPE_V4SF: 23044 type = v2si_ftype_v4sf; 23045 break; 23046 case V2SI_FTYPE_V2DF: 23047 type = v2si_ftype_v2df; 23048 break; 23049 case V2SI_FTYPE_V2SF: 23050 type = v2si_ftype_v2sf; 23051 break; 23052 case V2DF_FTYPE_V4DF: 23053 type = v2df_ftype_v4df; 23054 break; 23055 case V2DF_FTYPE_V4SF: 23056 type = v2df_ftype_v4sf; 23057 break; 23058 case V2DF_FTYPE_V2DF: 23059 case V2DF_FTYPE_V2DF_VEC_MERGE: 23060 type = v2df_ftype_v2df; 23061 break; 23062 case V2DF_FTYPE_V2SI: 23063 type = v2df_ftype_v2si; 23064 break; 23065 case V2DF_FTYPE_V4SI: 23066 type = v2df_ftype_v4si; 23067 break; 23068 case V2SF_FTYPE_V2SF: 23069 type = v2sf_ftype_v2sf; 23070 break; 23071 case V2SF_FTYPE_V2SI: 23072 type = v2sf_ftype_v2si; 23073 break; 23074 case V16QI_FTYPE_V16QI_V16QI: 23075 type = v16qi_ftype_v16qi_v16qi; 23076 break; 23077 case V16QI_FTYPE_V8HI_V8HI: 23078 type = v16qi_ftype_v8hi_v8hi; 23079 break; 23080 case V8QI_FTYPE_V8QI_V8QI: 23081 type = v8qi_ftype_v8qi_v8qi; 23082 break; 23083 case V8QI_FTYPE_V4HI_V4HI: 23084 type = v8qi_ftype_v4hi_v4hi; 23085 break; 23086 case V8HI_FTYPE_V8HI_V8HI: 23087 case V8HI_FTYPE_V8HI_V8HI_COUNT: 23088 type = v8hi_ftype_v8hi_v8hi; 23089 break; 23090 case V8HI_FTYPE_V16QI_V16QI: 23091 type = v8hi_ftype_v16qi_v16qi; 23092 break; 23093 case V8HI_FTYPE_V4SI_V4SI: 23094 type = v8hi_ftype_v4si_v4si; 23095 break; 23096 case V8HI_FTYPE_V8HI_SI_COUNT: 23097 type = v8hi_ftype_v8hi_int; 23098 break; 23099 case V8SF_FTYPE_V8SF_V8SF: 23100 type = v8sf_ftype_v8sf_v8sf; 23101 break; 23102 case V8SF_FTYPE_V8SF_V8SI: 23103 type = v8sf_ftype_v8sf_v8si; 23104 break; 23105 case V4SI_FTYPE_V4SI_V4SI: 23106 case V4SI_FTYPE_V4SI_V4SI_COUNT: 23107 type = v4si_ftype_v4si_v4si; 23108 break; 23109 case V4SI_FTYPE_V8HI_V8HI: 23110 type = v4si_ftype_v8hi_v8hi; 23111 break; 23112 case V4SI_FTYPE_V4SF_V4SF: 23113 type = v4si_ftype_v4sf_v4sf; 23114 break; 23115 case V4SI_FTYPE_V2DF_V2DF: 23116 type = v4si_ftype_v2df_v2df; 23117 break; 23118 case V4SI_FTYPE_V4SI_SI_COUNT: 23119 type = v4si_ftype_v4si_int; 23120 break; 23121 case V4HI_FTYPE_V4HI_V4HI: 23122 case V4HI_FTYPE_V4HI_V4HI_COUNT: 23123 type = v4hi_ftype_v4hi_v4hi; 23124 break; 23125 case V4HI_FTYPE_V8QI_V8QI: 23126 type = v4hi_ftype_v8qi_v8qi; 23127 break; 23128 case V4HI_FTYPE_V2SI_V2SI: 23129 type = v4hi_ftype_v2si_v2si; 23130 break; 23131 case V4HI_FTYPE_V4HI_SI_COUNT: 23132 type = v4hi_ftype_v4hi_int; 23133 break; 23134 case V4DF_FTYPE_V4DF_V4DF: 23135 type = v4df_ftype_v4df_v4df; 23136 break; 23137 case V4DF_FTYPE_V4DF_V4DI: 23138 type = v4df_ftype_v4df_v4di; 23139 break; 23140 case V4SF_FTYPE_V4SF_V4SF: 23141 case V4SF_FTYPE_V4SF_V4SF_SWAP: 23142 type = v4sf_ftype_v4sf_v4sf; 23143 break; 23144 case V4SF_FTYPE_V4SF_V4SI: 23145 type = v4sf_ftype_v4sf_v4si; 23146 break; 23147 case V4SF_FTYPE_V4SF_V2SI: 23148 type = v4sf_ftype_v4sf_v2si; 23149 break; 23150 case V4SF_FTYPE_V4SF_V2DF: 23151 type = v4sf_ftype_v4sf_v2df; 23152 break; 23153 case V4SF_FTYPE_V4SF_DI: 23154 type = v4sf_ftype_v4sf_int64; 23155 break; 23156 case V4SF_FTYPE_V4SF_SI: 23157 type = v4sf_ftype_v4sf_int; 23158 break; 23159 case V2DI_FTYPE_V2DI_V2DI: 23160 case V2DI_FTYPE_V2DI_V2DI_COUNT: 23161 type = v2di_ftype_v2di_v2di; 23162 break; 23163 case V2DI_FTYPE_V16QI_V16QI: 23164 type = v2di_ftype_v16qi_v16qi; 23165 break; 23166 case V2DI_FTYPE_V4SI_V4SI: 23167 type = v2di_ftype_v4si_v4si; 23168 break; 23169 case V2DI_FTYPE_V2DI_V16QI: 23170 type = v2di_ftype_v2di_v16qi; 23171 break; 23172 case V2DI_FTYPE_V2DF_V2DF: 23173 type = v2di_ftype_v2df_v2df; 23174 break; 23175 case V2DI_FTYPE_V2DI_SI_COUNT: 23176 type = v2di_ftype_v2di_int; 23177 break; 23178 case V2SI_FTYPE_V2SI_V2SI: 23179 case V2SI_FTYPE_V2SI_V2SI_COUNT: 23180 type = v2si_ftype_v2si_v2si; 23181 break; 23182 case V2SI_FTYPE_V4HI_V4HI: 23183 type = v2si_ftype_v4hi_v4hi; 23184 break; 23185 case V2SI_FTYPE_V2SF_V2SF: 23186 type = v2si_ftype_v2sf_v2sf; 23187 break; 23188 case V2SI_FTYPE_V2SI_SI_COUNT: 23189 type = v2si_ftype_v2si_int; 23190 break; 23191 case V2DF_FTYPE_V2DF_V2DF: 23192 case V2DF_FTYPE_V2DF_V2DF_SWAP: 23193 type = v2df_ftype_v2df_v2df; 23194 break; 23195 case V2DF_FTYPE_V2DF_V4SF: 23196 type = v2df_ftype_v2df_v4sf; 23197 break; 23198 case V2DF_FTYPE_V2DF_V2DI: 23199 type = v2df_ftype_v2df_v2di; 23200 break; 23201 case V2DF_FTYPE_V2DF_DI: 23202 type = v2df_ftype_v2df_int64; 23203 break; 23204 case V2DF_FTYPE_V2DF_SI: 23205 type = v2df_ftype_v2df_int; 23206 break; 23207 case V2SF_FTYPE_V2SF_V2SF: 23208 type = v2sf_ftype_v2sf_v2sf; 23209 break; 23210 case V1DI_FTYPE_V1DI_V1DI: 23211 case V1DI_FTYPE_V1DI_V1DI_COUNT: 23212 type = v1di_ftype_v1di_v1di; 23213 break; 23214 case V1DI_FTYPE_V8QI_V8QI: 23215 type = v1di_ftype_v8qi_v8qi; 23216 break; 23217 case V1DI_FTYPE_V2SI_V2SI: 23218 type = v1di_ftype_v2si_v2si; 23219 break; 23220 case V1DI_FTYPE_V1DI_SI_COUNT: 23221 type = v1di_ftype_v1di_int; 23222 break; 23223 case UINT64_FTYPE_UINT64_UINT64: 23224 type = uint64_ftype_uint64_uint64; 23225 break; 23226 case UINT_FTYPE_UINT_UINT: 23227 type = unsigned_ftype_unsigned_unsigned; 23228 break; 23229 case UINT_FTYPE_UINT_USHORT: 23230 type = unsigned_ftype_unsigned_ushort; 23231 break; 23232 case UINT_FTYPE_UINT_UCHAR: 23233 type = unsigned_ftype_unsigned_uchar; 23234 break; 23235 case V8HI_FTYPE_V8HI_INT: 23236 type = v8hi_ftype_v8hi_int; 23237 break; 23238 case V8SF_FTYPE_V8SF_INT: 23239 type = v8sf_ftype_v8sf_int; 23240 break; 23241 case V4SI_FTYPE_V4SI_INT: 23242 type = v4si_ftype_v4si_int; 23243 break; 23244 case V4SI_FTYPE_V8SI_INT: 23245 type = v4si_ftype_v8si_int; 23246 break; 23247 case V4HI_FTYPE_V4HI_INT: 23248 type = v4hi_ftype_v4hi_int; 23249 break; 23250 case V4DF_FTYPE_V4DF_INT: 23251 type = v4df_ftype_v4df_int; 23252 break; 23253 case V4SF_FTYPE_V4SF_INT: 23254 type = v4sf_ftype_v4sf_int; 23255 break; 23256 case V4SF_FTYPE_V8SF_INT: 23257 type = v4sf_ftype_v8sf_int; 23258 break; 23259 case V2DI_FTYPE_V2DI_INT: 23260 case V2DI2TI_FTYPE_V2DI_INT: 23261 type = v2di_ftype_v2di_int; 23262 break; 23263 case V2DF_FTYPE_V2DF_INT: 23264 type = v2df_ftype_v2df_int; 23265 break; 23266 case V2DF_FTYPE_V4DF_INT: 23267 type = v2df_ftype_v4df_int; 23268 break; 23269 case V16QI_FTYPE_V16QI_V16QI_V16QI: 23270 type = v16qi_ftype_v16qi_v16qi_v16qi; 23271 break; 23272 case V8SF_FTYPE_V8SF_V8SF_V8SF: 23273 type = v8sf_ftype_v8sf_v8sf_v8sf; 23274 break; 23275 case V4DF_FTYPE_V4DF_V4DF_V4DF: 23276 type = v4df_ftype_v4df_v4df_v4df; 23277 break; 23278 case V4SF_FTYPE_V4SF_V4SF_V4SF: 23279 type = v4sf_ftype_v4sf_v4sf_v4sf; 23280 break; 23281 case V2DF_FTYPE_V2DF_V2DF_V2DF: 23282 type = v2df_ftype_v2df_v2df_v2df; 23283 break; 23284 case V16QI_FTYPE_V16QI_V16QI_INT: 23285 type = v16qi_ftype_v16qi_v16qi_int; 23286 break; 23287 case V8SI_FTYPE_V8SI_V8SI_INT: 23288 type = v8si_ftype_v8si_v8si_int; 23289 break; 23290 case V8SI_FTYPE_V8SI_V4SI_INT: 23291 type = v8si_ftype_v8si_v4si_int; 23292 break; 23293 case V8HI_FTYPE_V8HI_V8HI_INT: 23294 type = v8hi_ftype_v8hi_v8hi_int; 23295 break; 23296 case V8SF_FTYPE_V8SF_V8SF_INT: 23297 type = v8sf_ftype_v8sf_v8sf_int; 23298 break; 23299 case V8SF_FTYPE_V8SF_V4SF_INT: 23300 type = v8sf_ftype_v8sf_v4sf_int; 23301 break; 23302 case V4SI_FTYPE_V4SI_V4SI_INT: 23303 type = v4si_ftype_v4si_v4si_int; 23304 break; 23305 case V4DF_FTYPE_V4DF_V4DF_INT: 23306 type = v4df_ftype_v4df_v4df_int; 23307 break; 23308 case V4DF_FTYPE_V4DF_V2DF_INT: 23309 type = v4df_ftype_v4df_v2df_int; 23310 break; 23311 case V4SF_FTYPE_V4SF_V4SF_INT: 23312 type = v4sf_ftype_v4sf_v4sf_int; 23313 break; 23314 case V2DI_FTYPE_V2DI_V2DI_INT: 23315 case V2DI2TI_FTYPE_V2DI_V2DI_INT: 23316 type = v2di_ftype_v2di_v2di_int; 23317 break; 23318 case V2DF_FTYPE_V2DF_V2DF_INT: 23319 type = v2df_ftype_v2df_v2df_int; 23320 break; 23321 case V2DI_FTYPE_V2DI_UINT_UINT: 23322 type = v2di_ftype_v2di_unsigned_unsigned; 23323 break; 23324 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT: 23325 type = v2di_ftype_v2di_v2di_unsigned_unsigned; 23326 break; 23327 case V1DI2DI_FTYPE_V1DI_V1DI_INT: 23328 type = v1di_ftype_v1di_v1di_int; 23329 break; 23330 default: 23331 gcc_unreachable (); 23332 } 23333 23334 def_builtin_const (d->mask, d->name, type, d->code); 23335 } 23336 23337 /* pcmpestr[im] insns. */ 23338 for (i = 0, d = bdesc_pcmpestr; 23339 i < ARRAY_SIZE (bdesc_pcmpestr); 23340 i++, d++) 23341 { 23342 if (d->code == IX86_BUILTIN_PCMPESTRM128) 23343 ftype = v16qi_ftype_v16qi_int_v16qi_int_int; 23344 else 23345 ftype = int_ftype_v16qi_int_v16qi_int_int; 23346 def_builtin_const (d->mask, d->name, ftype, d->code); 23347 } 23348 23349 /* pcmpistr[im] insns. */ 23350 for (i = 0, d = bdesc_pcmpistr; 23351 i < ARRAY_SIZE (bdesc_pcmpistr); 23352 i++, d++) 23353 { 23354 if (d->code == IX86_BUILTIN_PCMPISTRM128) 23355 ftype = v16qi_ftype_v16qi_v16qi_int; 23356 else 23357 ftype = int_ftype_v16qi_v16qi_int; 23358 def_builtin_const (d->mask, d->name, ftype, d->code); 23359 } 23360 23361 /* comi/ucomi insns. */ 23362 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 23363 if (d->mask == OPTION_MASK_ISA_SSE2) 23364 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code); 23365 else 23366 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code); 23367 23368 /* SSE */ 23369 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR); 23370 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR); 23371 23372 /* SSE or 3DNow!A */ 23373 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ); 23374 23375 /* SSE2 */ 23376 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU); 23377 23378 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH); 23379 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE); 23380 23381 /* SSE3. */ 23382 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR); 23383 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT); 23384 23385 /* AES */ 23386 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenc128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENC128); 23387 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesenclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESENCLAST128); 23388 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdec128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDEC128); 23389 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesdeclast128", v2di_ftype_v2di_v2di, IX86_BUILTIN_AESDECLAST128); 23390 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aesimc128", v2di_ftype_v2di, IX86_BUILTIN_AESIMC128); 23391 def_builtin_const (OPTION_MASK_ISA_AES, "__builtin_ia32_aeskeygenassist128", v2di_ftype_v2di_int, IX86_BUILTIN_AESKEYGENASSIST128); 23392 23393 /* PCLMUL */ 23394 def_builtin_const (OPTION_MASK_ISA_PCLMUL, "__builtin_ia32_pclmulqdq128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PCLMULQDQ128); 23395 23396 /* AVX */ 23397 def_builtin (OPTION_MASK_ISA_AVX, "__builtin_ia32_vzeroupper", void_ftype_void, 23398 TARGET_64BIT ? IX86_BUILTIN_VZEROUPPER_REX64 : IX86_BUILTIN_VZEROUPPER); 23399 23400 /* Access to the vec_init patterns. */ 23401 ftype = build_function_type_list (V2SI_type_node, integer_type_node, 23402 integer_type_node, NULL_TREE); 23403 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI); 23404 23405 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node, 23406 short_integer_type_node, 23407 short_integer_type_node, 23408 short_integer_type_node, NULL_TREE); 23409 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI); 23410 23411 ftype = build_function_type_list (V8QI_type_node, char_type_node, 23412 char_type_node, char_type_node, 23413 char_type_node, char_type_node, 23414 char_type_node, char_type_node, 23415 char_type_node, NULL_TREE); 23416 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI); 23417 23418 /* Access to the vec_extract patterns. */ 23419 ftype = build_function_type_list (double_type_node, V2DF_type_node, 23420 integer_type_node, NULL_TREE); 23421 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF); 23422 23423 ftype = build_function_type_list (long_long_integer_type_node, 23424 V2DI_type_node, integer_type_node, 23425 NULL_TREE); 23426 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI); 23427 23428 ftype = build_function_type_list (float_type_node, V4SF_type_node, 23429 integer_type_node, NULL_TREE); 23430 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF); 23431 23432 ftype = build_function_type_list (intSI_type_node, V4SI_type_node, 23433 integer_type_node, NULL_TREE); 23434 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI); 23435 23436 ftype = build_function_type_list (intHI_type_node, V8HI_type_node, 23437 integer_type_node, NULL_TREE); 23438 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI); 23439 23440 ftype = build_function_type_list (intHI_type_node, V4HI_type_node, 23441 integer_type_node, NULL_TREE); 23442 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI); 23443 23444 ftype = build_function_type_list (intSI_type_node, V2SI_type_node, 23445 integer_type_node, NULL_TREE); 23446 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI); 23447 23448 ftype = build_function_type_list (intQI_type_node, V16QI_type_node, 23449 integer_type_node, NULL_TREE); 23450 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI); 23451 23452 /* Access to the vec_set patterns. */ 23453 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node, 23454 intDI_type_node, 23455 integer_type_node, NULL_TREE); 23456 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI); 23457 23458 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node, 23459 float_type_node, 23460 integer_type_node, NULL_TREE); 23461 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF); 23462 23463 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node, 23464 intSI_type_node, 23465 integer_type_node, NULL_TREE); 23466 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI); 23467 23468 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node, 23469 intHI_type_node, 23470 integer_type_node, NULL_TREE); 23471 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI); 23472 23473 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node, 23474 intHI_type_node, 23475 integer_type_node, NULL_TREE); 23476 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI); 23477 23478 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node, 23479 intQI_type_node, 23480 integer_type_node, NULL_TREE); 23481 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI); 23482 23483 /* Add SSE5 multi-arg argument instructions */ 23484 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) 23485 { 23486 tree mtype = NULL_TREE; 23487 23488 if (d->name == 0) 23489 continue; 23490 23491 switch ((enum multi_arg_type)d->flag) 23492 { 23493 case MULTI_ARG_3_SF: mtype = v4sf_ftype_v4sf_v4sf_v4sf; break; 23494 case MULTI_ARG_3_DF: mtype = v2df_ftype_v2df_v2df_v2df; break; 23495 case MULTI_ARG_3_DI: mtype = v2di_ftype_v2di_v2di_v2di; break; 23496 case MULTI_ARG_3_SI: mtype = v4si_ftype_v4si_v4si_v4si; break; 23497 case MULTI_ARG_3_SI_DI: mtype = v4si_ftype_v4si_v4si_v2di; break; 23498 case MULTI_ARG_3_HI: mtype = v8hi_ftype_v8hi_v8hi_v8hi; break; 23499 case MULTI_ARG_3_HI_SI: mtype = v8hi_ftype_v8hi_v8hi_v4si; break; 23500 case MULTI_ARG_3_QI: mtype = v16qi_ftype_v16qi_v16qi_v16qi; break; 23501 case MULTI_ARG_3_PERMPS: mtype = v4sf_ftype_v4sf_v4sf_v16qi; break; 23502 case MULTI_ARG_3_PERMPD: mtype = v2df_ftype_v2df_v2df_v16qi; break; 23503 case MULTI_ARG_2_SF: mtype = v4sf_ftype_v4sf_v4sf; break; 23504 case MULTI_ARG_2_DF: mtype = v2df_ftype_v2df_v2df; break; 23505 case MULTI_ARG_2_DI: mtype = v2di_ftype_v2di_v2di; break; 23506 case MULTI_ARG_2_SI: mtype = v4si_ftype_v4si_v4si; break; 23507 case MULTI_ARG_2_HI: mtype = v8hi_ftype_v8hi_v8hi; break; 23508 case MULTI_ARG_2_QI: mtype = v16qi_ftype_v16qi_v16qi; break; 23509 case MULTI_ARG_2_DI_IMM: mtype = v2di_ftype_v2di_si; break; 23510 case MULTI_ARG_2_SI_IMM: mtype = v4si_ftype_v4si_si; break; 23511 case MULTI_ARG_2_HI_IMM: mtype = v8hi_ftype_v8hi_si; break; 23512 case MULTI_ARG_2_QI_IMM: mtype = v16qi_ftype_v16qi_si; break; 23513 case MULTI_ARG_2_SF_CMP: mtype = v4sf_ftype_v4sf_v4sf; break; 23514 case MULTI_ARG_2_DF_CMP: mtype = v2df_ftype_v2df_v2df; break; 23515 case MULTI_ARG_2_DI_CMP: mtype = v2di_ftype_v2di_v2di; break; 23516 case MULTI_ARG_2_SI_CMP: mtype = v4si_ftype_v4si_v4si; break; 23517 case MULTI_ARG_2_HI_CMP: mtype = v8hi_ftype_v8hi_v8hi; break; 23518 case MULTI_ARG_2_QI_CMP: mtype = v16qi_ftype_v16qi_v16qi; break; 23519 case MULTI_ARG_2_SF_TF: mtype = v4sf_ftype_v4sf_v4sf; break; 23520 case MULTI_ARG_2_DF_TF: mtype = v2df_ftype_v2df_v2df; break; 23521 case MULTI_ARG_2_DI_TF: mtype = v2di_ftype_v2di_v2di; break; 23522 case MULTI_ARG_2_SI_TF: mtype = v4si_ftype_v4si_v4si; break; 23523 case MULTI_ARG_2_HI_TF: mtype = v8hi_ftype_v8hi_v8hi; break; 23524 case MULTI_ARG_2_QI_TF: mtype = v16qi_ftype_v16qi_v16qi; break; 23525 case MULTI_ARG_1_SF: mtype = v4sf_ftype_v4sf; break; 23526 case MULTI_ARG_1_DF: mtype = v2df_ftype_v2df; break; 23527 case MULTI_ARG_1_DI: mtype = v2di_ftype_v2di; break; 23528 case MULTI_ARG_1_SI: mtype = v4si_ftype_v4si; break; 23529 case MULTI_ARG_1_HI: mtype = v8hi_ftype_v8hi; break; 23530 case MULTI_ARG_1_QI: mtype = v16qi_ftype_v16qi; break; 23531 case MULTI_ARG_1_SI_DI: mtype = v2di_ftype_v4si; break; 23532 case MULTI_ARG_1_HI_DI: mtype = v2di_ftype_v8hi; break; 23533 case MULTI_ARG_1_HI_SI: mtype = v4si_ftype_v8hi; break; 23534 case MULTI_ARG_1_QI_DI: mtype = v2di_ftype_v16qi; break; 23535 case MULTI_ARG_1_QI_SI: mtype = v4si_ftype_v16qi; break; 23536 case MULTI_ARG_1_QI_HI: mtype = v8hi_ftype_v16qi; break; 23537 case MULTI_ARG_1_PH2PS: mtype = v4sf_ftype_v4hi; break; 23538 case MULTI_ARG_1_PS2PH: mtype = v4hi_ftype_v4sf; break; 23539 case MULTI_ARG_UNKNOWN: 23540 default: 23541 gcc_unreachable (); 23542 } 23543 23544 if (mtype) 23545 def_builtin_const (d->mask, d->name, mtype, d->code); 23546 } 23547 } 23548 23549 /* Internal method for ix86_init_builtins. */ 23550 23551 static void 23552 ix86_init_builtins_va_builtins_abi (void) 23553 { 23554 tree ms_va_ref, sysv_va_ref; 23555 tree fnvoid_va_end_ms, fnvoid_va_end_sysv; 23556 tree fnvoid_va_start_ms, fnvoid_va_start_sysv; 23557 tree fnvoid_va_copy_ms, fnvoid_va_copy_sysv; 23558 tree fnattr_ms = NULL_TREE, fnattr_sysv = NULL_TREE; 23559 23560 if (!TARGET_64BIT) 23561 return; 23562 fnattr_ms = build_tree_list (get_identifier ("ms_abi"), NULL_TREE); 23563 fnattr_sysv = build_tree_list (get_identifier ("sysv_abi"), NULL_TREE); 23564 ms_va_ref = build_reference_type (ms_va_list_type_node); 23565 sysv_va_ref = 23566 build_pointer_type (TREE_TYPE (sysv_va_list_type_node)); 23567 23568 fnvoid_va_end_ms = 23569 build_function_type_list (void_type_node, ms_va_ref, NULL_TREE); 23570 fnvoid_va_start_ms = 23571 build_varargs_function_type_list (void_type_node, ms_va_ref, NULL_TREE); 23572 fnvoid_va_end_sysv = 23573 build_function_type_list (void_type_node, sysv_va_ref, NULL_TREE); 23574 fnvoid_va_start_sysv = 23575 build_varargs_function_type_list (void_type_node, sysv_va_ref, 23576 NULL_TREE); 23577 fnvoid_va_copy_ms = 23578 build_function_type_list (void_type_node, ms_va_ref, ms_va_list_type_node, 23579 NULL_TREE); 23580 fnvoid_va_copy_sysv = 23581 build_function_type_list (void_type_node, sysv_va_ref, 23582 sysv_va_ref, NULL_TREE); 23583 23584 add_builtin_function ("__builtin_ms_va_start", fnvoid_va_start_ms, 23585 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_ms); 23586 add_builtin_function ("__builtin_ms_va_end", fnvoid_va_end_ms, 23587 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_ms); 23588 add_builtin_function ("__builtin_ms_va_copy", fnvoid_va_copy_ms, 23589 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_ms); 23590 add_builtin_function ("__builtin_sysv_va_start", fnvoid_va_start_sysv, 23591 BUILT_IN_VA_START, BUILT_IN_NORMAL, NULL, fnattr_sysv); 23592 add_builtin_function ("__builtin_sysv_va_end", fnvoid_va_end_sysv, 23593 BUILT_IN_VA_END, BUILT_IN_NORMAL, NULL, fnattr_sysv); 23594 add_builtin_function ("__builtin_sysv_va_copy", fnvoid_va_copy_sysv, 23595 BUILT_IN_VA_COPY, BUILT_IN_NORMAL, NULL, fnattr_sysv); 23596 } 23597 23598 static void 23599 ix86_init_builtins (void) 23600 { 23601 tree float128_type_node = make_node (REAL_TYPE); 23602 tree ftype, decl; 23603 23604 /* The __float80 type. */ 23605 if (TYPE_MODE (long_double_type_node) == XFmode) 23606 (*lang_hooks.types.register_builtin_type) (long_double_type_node, 23607 "__float80"); 23608 else 23609 { 23610 /* The __float80 type. */ 23611 tree float80_type_node = make_node (REAL_TYPE); 23612 23613 TYPE_PRECISION (float80_type_node) = 80; 23614 layout_type (float80_type_node); 23615 (*lang_hooks.types.register_builtin_type) (float80_type_node, 23616 "__float80"); 23617 } 23618 23619 /* The __float128 type. */ 23620 TYPE_PRECISION (float128_type_node) = 128; 23621 layout_type (float128_type_node); 23622 (*lang_hooks.types.register_builtin_type) (float128_type_node, 23623 "__float128"); 23624 23625 /* TFmode support builtins. */ 23626 ftype = build_function_type (float128_type_node, void_list_node); 23627 decl = add_builtin_function ("__builtin_infq", ftype, 23628 IX86_BUILTIN_INFQ, BUILT_IN_MD, 23629 NULL, NULL_TREE); 23630 ix86_builtins[(int) IX86_BUILTIN_INFQ] = decl; 23631 23632 /* We will expand them to normal call if SSE2 isn't available since 23633 they are used by libgcc. */ 23634 ftype = build_function_type_list (float128_type_node, 23635 float128_type_node, 23636 NULL_TREE); 23637 decl = add_builtin_function ("__builtin_fabsq", ftype, 23638 IX86_BUILTIN_FABSQ, BUILT_IN_MD, 23639 "__fabstf2", NULL_TREE); 23640 ix86_builtins[(int) IX86_BUILTIN_FABSQ] = decl; 23641 TREE_READONLY (decl) = 1; 23642 23643 ftype = build_function_type_list (float128_type_node, 23644 float128_type_node, 23645 float128_type_node, 23646 NULL_TREE); 23647 decl = add_builtin_function ("__builtin_copysignq", ftype, 23648 IX86_BUILTIN_COPYSIGNQ, BUILT_IN_MD, 23649 "__copysigntf3", NULL_TREE); 23650 ix86_builtins[(int) IX86_BUILTIN_COPYSIGNQ] = decl; 23651 TREE_READONLY (decl) = 1; 23652 23653 ix86_init_mmx_sse_builtins (); 23654 if (TARGET_64BIT) 23655 ix86_init_builtins_va_builtins_abi (); 23656 } 23657 23658 /* Errors in the source file can cause expand_expr to return const0_rtx 23659 where we expect a vector. To avoid crashing, use one of the vector 23660 clear instructions. */ 23661 static rtx 23662 safe_vector_operand (rtx x, enum machine_mode mode) 23663 { 23664 if (x == const0_rtx) 23665 x = CONST0_RTX (mode); 23666 return x; 23667 } 23668 23669 /* Subroutine of ix86_expand_builtin to take care of binop insns. */ 23670 23671 static rtx 23672 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target) 23673 { 23674 rtx pat; 23675 tree arg0 = CALL_EXPR_ARG (exp, 0); 23676 tree arg1 = CALL_EXPR_ARG (exp, 1); 23677 rtx op0 = expand_normal (arg0); 23678 rtx op1 = expand_normal (arg1); 23679 enum machine_mode tmode = insn_data[icode].operand[0].mode; 23680 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 23681 enum machine_mode mode1 = insn_data[icode].operand[2].mode; 23682 23683 if (VECTOR_MODE_P (mode0)) 23684 op0 = safe_vector_operand (op0, mode0); 23685 if (VECTOR_MODE_P (mode1)) 23686 op1 = safe_vector_operand (op1, mode1); 23687 23688 if (optimize || !target 23689 || GET_MODE (target) != tmode 23690 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 23691 target = gen_reg_rtx (tmode); 23692 23693 if (GET_MODE (op1) == SImode && mode1 == TImode) 23694 { 23695 rtx x = gen_reg_rtx (V4SImode); 23696 emit_insn (gen_sse2_loadd (x, op1)); 23697 op1 = gen_lowpart (TImode, x); 23698 } 23699 23700 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0)) 23701 op0 = copy_to_mode_reg (mode0, op0); 23702 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1)) 23703 op1 = copy_to_mode_reg (mode1, op1); 23704 23705 pat = GEN_FCN (icode) (target, op0, op1); 23706 if (! pat) 23707 return 0; 23708 23709 emit_insn (pat); 23710 23711 return target; 23712 } 23713 23714 /* Subroutine of ix86_expand_builtin to take care of 2-4 argument insns. */ 23715 23716 static rtx 23717 ix86_expand_multi_arg_builtin (enum insn_code icode, tree exp, rtx target, 23718 enum multi_arg_type m_type, 23719 enum insn_code sub_code) 23720 { 23721 rtx pat; 23722 int i; 23723 int nargs; 23724 bool comparison_p = false; 23725 bool tf_p = false; 23726 bool last_arg_constant = false; 23727 int num_memory = 0; 23728 struct { 23729 rtx op; 23730 enum machine_mode mode; 23731 } args[4]; 23732 23733 enum machine_mode tmode = insn_data[icode].operand[0].mode; 23734 23735 switch (m_type) 23736 { 23737 case MULTI_ARG_3_SF: 23738 case MULTI_ARG_3_DF: 23739 case MULTI_ARG_3_DI: 23740 case MULTI_ARG_3_SI: 23741 case MULTI_ARG_3_SI_DI: 23742 case MULTI_ARG_3_HI: 23743 case MULTI_ARG_3_HI_SI: 23744 case MULTI_ARG_3_QI: 23745 case MULTI_ARG_3_PERMPS: 23746 case MULTI_ARG_3_PERMPD: 23747 nargs = 3; 23748 break; 23749 23750 case MULTI_ARG_2_SF: 23751 case MULTI_ARG_2_DF: 23752 case MULTI_ARG_2_DI: 23753 case MULTI_ARG_2_SI: 23754 case MULTI_ARG_2_HI: 23755 case MULTI_ARG_2_QI: 23756 nargs = 2; 23757 break; 23758 23759 case MULTI_ARG_2_DI_IMM: 23760 case MULTI_ARG_2_SI_IMM: 23761 case MULTI_ARG_2_HI_IMM: 23762 case MULTI_ARG_2_QI_IMM: 23763 nargs = 2; 23764 last_arg_constant = true; 23765 break; 23766 23767 case MULTI_ARG_1_SF: 23768 case MULTI_ARG_1_DF: 23769 case MULTI_ARG_1_DI: 23770 case MULTI_ARG_1_SI: 23771 case MULTI_ARG_1_HI: 23772 case MULTI_ARG_1_QI: 23773 case MULTI_ARG_1_SI_DI: 23774 case MULTI_ARG_1_HI_DI: 23775 case MULTI_ARG_1_HI_SI: 23776 case MULTI_ARG_1_QI_DI: 23777 case MULTI_ARG_1_QI_SI: 23778 case MULTI_ARG_1_QI_HI: 23779 case MULTI_ARG_1_PH2PS: 23780 case MULTI_ARG_1_PS2PH: 23781 nargs = 1; 23782 break; 23783 23784 case MULTI_ARG_2_SF_CMP: 23785 case MULTI_ARG_2_DF_CMP: 23786 case MULTI_ARG_2_DI_CMP: 23787 case MULTI_ARG_2_SI_CMP: 23788 case MULTI_ARG_2_HI_CMP: 23789 case MULTI_ARG_2_QI_CMP: 23790 nargs = 2; 23791 comparison_p = true; 23792 break; 23793 23794 case MULTI_ARG_2_SF_TF: 23795 case MULTI_ARG_2_DF_TF: 23796 case MULTI_ARG_2_DI_TF: 23797 case MULTI_ARG_2_SI_TF: 23798 case MULTI_ARG_2_HI_TF: 23799 case MULTI_ARG_2_QI_TF: 23800 nargs = 2; 23801 tf_p = true; 23802 break; 23803 23804 case MULTI_ARG_UNKNOWN: 23805 default: 23806 gcc_unreachable (); 23807 } 23808 23809 if (optimize || !target 23810 || GET_MODE (target) != tmode 23811 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 23812 target = gen_reg_rtx (tmode); 23813 23814 gcc_assert (nargs <= 4); 23815 23816 for (i = 0; i < nargs; i++) 23817 { 23818 tree arg = CALL_EXPR_ARG (exp, i); 23819 rtx op = expand_normal (arg); 23820 int adjust = (comparison_p) ? 1 : 0; 23821 enum machine_mode mode = insn_data[icode].operand[i+adjust+1].mode; 23822 23823 if (last_arg_constant && i == nargs-1) 23824 { 23825 if (GET_CODE (op) != CONST_INT) 23826 { 23827 error ("last argument must be an immediate"); 23828 return gen_reg_rtx (tmode); 23829 } 23830 } 23831 else 23832 { 23833 if (VECTOR_MODE_P (mode)) 23834 op = safe_vector_operand (op, mode); 23835 23836 /* If we aren't optimizing, only allow one memory operand to be 23837 generated. */ 23838 if (memory_operand (op, mode)) 23839 num_memory++; 23840 23841 gcc_assert (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode); 23842 23843 if (optimize 23844 || ! (*insn_data[icode].operand[i+adjust+1].predicate) (op, mode) 23845 || num_memory > 1) 23846 op = force_reg (mode, op); 23847 } 23848 23849 args[i].op = op; 23850 args[i].mode = mode; 23851 } 23852 23853 switch (nargs) 23854 { 23855 case 1: 23856 pat = GEN_FCN (icode) (target, args[0].op); 23857 break; 23858 23859 case 2: 23860 if (tf_p) 23861 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, 23862 GEN_INT ((int)sub_code)); 23863 else if (! comparison_p) 23864 pat = GEN_FCN (icode) (target, args[0].op, args[1].op); 23865 else 23866 { 23867 rtx cmp_op = gen_rtx_fmt_ee (sub_code, GET_MODE (target), 23868 args[0].op, 23869 args[1].op); 23870 23871 pat = GEN_FCN (icode) (target, cmp_op, args[0].op, args[1].op); 23872 } 23873 break; 23874 23875 case 3: 23876 pat = GEN_FCN (icode) (target, args[0].op, args[1].op, args[2].op); 23877 break; 23878 23879 default: 23880 gcc_unreachable (); 23881 } 23882 23883 if (! pat) 23884 return 0; 23885 23886 emit_insn (pat); 23887 return target; 23888 } 23889 23890 /* Subroutine of ix86_expand_args_builtin to take care of scalar unop 23891 insns with vec_merge. */ 23892 23893 static rtx 23894 ix86_expand_unop_vec_merge_builtin (enum insn_code icode, tree exp, 23895 rtx target) 23896 { 23897 rtx pat; 23898 tree arg0 = CALL_EXPR_ARG (exp, 0); 23899 rtx op1, op0 = expand_normal (arg0); 23900 enum machine_mode tmode = insn_data[icode].operand[0].mode; 23901 enum machine_mode mode0 = insn_data[icode].operand[1].mode; 23902 23903 if (optimize || !target 23904 || GET_MODE (target) != tmode 23905 || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) 23906 target = gen_reg_rtx (tmode); 23907 23908 if (VECTOR_MODE_P (mode0)) 23909 op0 = safe_vector_operand (op0, mode0); 23910 23911 if ((optimize && !register_operand (op0, mode0)) 23912 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0)) 23913 op0 = copy_to_mode_reg (mode0, op0); 23914 23915 op1 = op0; 23916 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0)) 23917 op1 = copy_to_mode_reg (mode0, op1); 23918 23919 pat = GEN_FCN (icode) (target, op0, op1); 23920 if (! pat) 23921 return 0; 23922 emit_insn (pat); 23923 return target; 23924 } 23925 23926 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */ 23927 23928 static rtx 23929 ix86_expand_sse_compare (const struct builtin_description *d, 23930 tree exp, rtx target, bool swap) 23931 { 23932 rtx pat; 23933 tree arg0 = CALL_EXPR_ARG (exp, 0); 23934 tree arg1 = CALL_EXPR_ARG (exp, 1); 23935 rtx op0 = expand_normal (arg0); 23936 rtx op1 = expand_normal (arg1); 23937 rtx op2; 23938 enum machine_mode tmode = insn_data[d->icode].operand[0].mode; 23939 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode; 23940 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode; 23941 enum rtx_code comparison = d->comparison; 23942 23943 if (VECTOR_MODE_P (mode0)) 23944 op0 = safe_vector_operand (op0, mode0); 23945 if (VECTOR_MODE_P (mode1)) 23946 op1 = safe_vector_operand (op1, mode1); 23947 23948 /* Swap operands if we have a comparison that isn't available in 23949 hardware. */ 23950 if (swap) 23951 { 23952 rtx tmp = gen_reg_rtx (mode1); 23953 emit_move_insn (tmp, op1); 23954 op1 = op0; 23955 op0 = tmp; 23956 } 23957 23958 if (optimize || !target 23959 || GET_MODE (target) != tmode 23960 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode)) 23961 target = gen_reg_rtx (tmode); 23962 23963 if ((optimize && !register_operand (op0, mode0)) 23964 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0)) 23965 op0 = copy_to_mode_reg (mode0, op0); 23966 if ((optimize && !register_operand (op1, mode1)) 23967 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1)) 23968 op1 = copy_to_mode_reg (mode1, op1); 23969 23970 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1); 23971 pat = GEN_FCN (d->icode) (target, op0, op1, op2); 23972 if (! pat) 23973 return 0; 23974 emit_insn (pat); 23975 return target; 23976 } 23977 23978 /* Subroutine of ix86_expand_builtin to take care of comi insns. */ 23979 23980 static rtx 23981 ix86_expand_sse_comi (const struct builtin_description *d, tree exp, 23982 rtx target) 23983 { 23984 rtx pat; 23985 tree arg0 = CALL_EXPR_ARG (exp, 0); 23986 tree arg1 = CALL_EXPR_ARG (exp, 1); 23987 rtx op0 = expand_normal (arg0); 23988 rtx op1 = expand_normal (arg1); 23989 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; 23990 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; 23991 enum rtx_code comparison = d->comparison; 23992 23993 if (VECTOR_MODE_P (mode0)) 23994 op0 = safe_vector_operand (op0, mode0); 23995 if (VECTOR_MODE_P (mode1)) 23996 op1 = safe_vector_operand (op1, mode1); 23997 23998 /* Swap operands if we have a comparison that isn't available in 23999 hardware. */ 24000 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS) 24001 { 24002 rtx tmp = op1; 24003 op1 = op0; 24004 op0 = tmp; 24005 } 24006 24007 target = gen_reg_rtx (SImode); 24008 emit_move_insn (target, const0_rtx); 24009 target = gen_rtx_SUBREG (QImode, target, 0); 24010 24011 if ((optimize && !register_operand (op0, mode0)) 24012 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0)) 24013 op0 = copy_to_mode_reg (mode0, op0); 24014 if ((optimize && !register_operand (op1, mode1)) 24015 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1)) 24016 op1 = copy_to_mode_reg (mode1, op1); 24017 24018 pat = GEN_FCN (d->icode) (op0, op1); 24019 if (! pat) 24020 return 0; 24021 emit_insn (pat); 24022 emit_insn (gen_rtx_SET (VOIDmode, 24023 gen_rtx_STRICT_LOW_PART (VOIDmode, target), 24024 gen_rtx_fmt_ee (comparison, QImode, 24025 SET_DEST (pat), 24026 const0_rtx))); 24027 24028 return SUBREG_REG (target); 24029 } 24030 24031 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */ 24032 24033 static rtx 24034 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp, 24035 rtx target) 24036 { 24037 rtx pat; 24038 tree arg0 = CALL_EXPR_ARG (exp, 0); 24039 tree arg1 = CALL_EXPR_ARG (exp, 1); 24040 rtx op0 = expand_normal (arg0); 24041 rtx op1 = expand_normal (arg1); 24042 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode; 24043 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode; 24044 enum rtx_code comparison = d->comparison; 24045 24046 if (VECTOR_MODE_P (mode0)) 24047 op0 = safe_vector_operand (op0, mode0); 24048 if (VECTOR_MODE_P (mode1)) 24049 op1 = safe_vector_operand (op1, mode1); 24050 24051 target = gen_reg_rtx (SImode); 24052 emit_move_insn (target, const0_rtx); 24053 target = gen_rtx_SUBREG (QImode, target, 0); 24054 24055 if ((optimize && !register_operand (op0, mode0)) 24056 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0)) 24057 op0 = copy_to_mode_reg (mode0, op0); 24058 if ((optimize && !register_operand (op1, mode1)) 24059 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1)) 24060 op1 = copy_to_mode_reg (mode1, op1); 24061 24062 pat = GEN_FCN (d->icode) (op0, op1); 24063 if (! pat) 24064 return 0; 24065 emit_insn (pat); 24066 emit_insn (gen_rtx_SET (VOIDmode, 24067 gen_rtx_STRICT_LOW_PART (VOIDmode, target), 24068 gen_rtx_fmt_ee (comparison, QImode, 24069 SET_DEST (pat), 24070 const0_rtx))); 24071 24072 return SUBREG_REG (target); 24073 } 24074 24075 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */ 24076 24077 static rtx 24078 ix86_expand_sse_pcmpestr (const struct builtin_description *d, 24079 tree exp, rtx target) 24080 { 24081 rtx pat; 24082 tree arg0 = CALL_EXPR_ARG (exp, 0); 24083 tree arg1 = CALL_EXPR_ARG (exp, 1); 24084 tree arg2 = CALL_EXPR_ARG (exp, 2); 24085 tree arg3 = CALL_EXPR_ARG (exp, 3); 24086 tree arg4 = CALL_EXPR_ARG (exp, 4); 24087 rtx scratch0, scratch1; 24088 rtx op0 = expand_normal (arg0); 24089 rtx op1 = expand_normal (arg1); 24090 rtx op2 = expand_normal (arg2); 24091 rtx op3 = expand_normal (arg3); 24092 rtx op4 = expand_normal (arg4); 24093 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm; 24094 24095 tmode0 = insn_data[d->icode].operand[0].mode; 24096 tmode1 = insn_data[d->icode].operand[1].mode; 24097 modev2 = insn_data[d->icode].operand[2].mode; 24098 modei3 = insn_data[d->icode].operand[3].mode; 24099 modev4 = insn_data[d->icode].operand[4].mode; 24100 modei5 = insn_data[d->icode].operand[5].mode; 24101 modeimm = insn_data[d->icode].operand[6].mode; 24102 24103 if (VECTOR_MODE_P (modev2)) 24104 op0 = safe_vector_operand (op0, modev2); 24105 if (VECTOR_MODE_P (modev4)) 24106 op2 = safe_vector_operand (op2, modev4); 24107 24108 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2)) 24109 op0 = copy_to_mode_reg (modev2, op0); 24110 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3)) 24111 op1 = copy_to_mode_reg (modei3, op1); 24112 if ((optimize && !register_operand (op2, modev4)) 24113 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4)) 24114 op2 = copy_to_mode_reg (modev4, op2); 24115 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5)) 24116 op3 = copy_to_mode_reg (modei5, op3); 24117 24118 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm)) 24119 { 24120 error ("the fifth argument must be a 8-bit immediate"); 24121 return const0_rtx; 24122 } 24123 24124 if (d->code == IX86_BUILTIN_PCMPESTRI128) 24125 { 24126 if (optimize || !target 24127 || GET_MODE (target) != tmode0 24128 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0)) 24129 target = gen_reg_rtx (tmode0); 24130 24131 scratch1 = gen_reg_rtx (tmode1); 24132 24133 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4); 24134 } 24135 else if (d->code == IX86_BUILTIN_PCMPESTRM128) 24136 { 24137 if (optimize || !target 24138 || GET_MODE (target) != tmode1 24139 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1)) 24140 target = gen_reg_rtx (tmode1); 24141 24142 scratch0 = gen_reg_rtx (tmode0); 24143 24144 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4); 24145 } 24146 else 24147 { 24148 gcc_assert (d->flag); 24149 24150 scratch0 = gen_reg_rtx (tmode0); 24151 scratch1 = gen_reg_rtx (tmode1); 24152 24153 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4); 24154 } 24155 24156 if (! pat) 24157 return 0; 24158 24159 emit_insn (pat); 24160 24161 if (d->flag) 24162 { 24163 target = gen_reg_rtx (SImode); 24164 emit_move_insn (target, const0_rtx); 24165 target = gen_rtx_SUBREG (QImode, target, 0); 24166 24167 emit_insn 24168 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target), 24169 gen_rtx_fmt_ee (EQ, QImode, 24170 gen_rtx_REG ((enum machine_mode) d->flag, 24171 FLAGS_REG), 24172 const0_rtx))); 24173 return SUBREG_REG (target); 24174 } 24175 else 24176 return target; 24177 } 24178 24179 24180 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */ 24181 24182 static rtx 24183 ix86_expand_sse_pcmpistr (const struct builtin_description *d, 24184 tree exp, rtx target) 24185 { 24186 rtx pat; 24187 tree arg0 = CALL_EXPR_ARG (exp, 0); 24188 tree arg1 = CALL_EXPR_ARG (exp, 1); 24189 tree arg2 = CALL_EXPR_ARG (exp, 2); 24190 rtx scratch0, scratch1; 24191 rtx op0 = expand_normal (arg0); 24192 rtx op1 = expand_normal (arg1); 24193 rtx op2 = expand_normal (arg2); 24194 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm; 24195 24196 tmode0 = insn_data[d->icode].operand[0].mode; 24197 tmode1 = insn_data[d->icode].operand[1].mode; 24198 modev2 = insn_data[d->icode].operand[2].mode; 24199 modev3 = insn_data[d->icode].operand[3].mode; 24200 modeimm = insn_data[d->icode].operand[4].mode; 24201 24202 if (VECTOR_MODE_P (modev2)) 24203 op0 = safe_vector_operand (op0, modev2); 24204 if (VECTOR_MODE_P (modev3)) 24205 op1 = safe_vector_operand (op1, modev3); 24206 24207 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2)) 24208 op0 = copy_to_mode_reg (modev2, op0); 24209 if ((optimize && !register_operand (op1, modev3)) 24210 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3)) 24211 op1 = copy_to_mode_reg (modev3, op1); 24212 24213 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm)) 24214 { 24215 error ("the third argument must be a 8-bit immediate"); 24216 return const0_rtx; 24217 } 24218 24219 if (d->code == IX86_BUILTIN_PCMPISTRI128) 24220 { 24221 if (optimize || !target 24222 || GET_MODE (target) != tmode0 24223 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0)) 24224 target = gen_reg_rtx (tmode0); 24225 24226 scratch1 = gen_reg_rtx (tmode1); 24227 24228 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2); 24229 } 24230 else if (d->code == IX86_BUILTIN_PCMPISTRM128) 24231 { 24232 if (optimize || !target 24233 || GET_MODE (target) != tmode1 24234 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1)) 24235 target = gen_reg_rtx (tmode1); 24236 24237 scratch0 = gen_reg_rtx (tmode0); 24238 24239 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2); 24240 } 24241 else 24242 { 24243 gcc_assert (d->flag); 24244 24245 scratch0 = gen_reg_rtx (tmode0); 24246 scratch1 = gen_reg_rtx (tmode1); 24247 24248 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2); 24249 } 24250 24251 if (! pat) 24252 return 0; 24253 24254 emit_insn (pat); 24255 24256 if (d->flag) 24257 { 24258 target = gen_reg_rtx (SImode); 24259 emit_move_insn (target, const0_rtx); 24260 target = gen_rtx_SUBREG (QImode, target, 0); 24261 24262 emit_insn 24263 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target), 24264 gen_rtx_fmt_ee (EQ, QImode, 24265 gen_rtx_REG ((enum machine_mode) d->flag, 24266 FLAGS_REG), 24267 const0_rtx))); 24268 return SUBREG_REG (target); 24269 } 24270 else 24271 return target; 24272 } 24273 24274 /* Subroutine of ix86_expand_builtin to take care of insns with 24275 variable number of operands. */ 24276 24277 static rtx 24278 ix86_expand_args_builtin (const struct builtin_description *d, 24279 tree exp, rtx target) 24280 { 24281 rtx pat, real_target; 24282 unsigned int i, nargs; 24283 unsigned int nargs_constant = 0; 24284 int num_memory = 0; 24285 struct 24286 { 24287 rtx op; 24288 enum machine_mode mode; 24289 } args[4]; 24290 bool last_arg_count = false; 24291 enum insn_code icode = d->icode; 24292 const struct insn_data *insn_p = &insn_data[icode]; 24293 enum machine_mode tmode = insn_p->operand[0].mode; 24294 enum machine_mode rmode = VOIDmode; 24295 bool swap = false; 24296 enum rtx_code comparison = d->comparison; 24297 24298 switch ((enum ix86_builtin_type) d->flag) 24299 { 24300 case INT_FTYPE_V8SF_V8SF_PTEST: 24301 case INT_FTYPE_V4DI_V4DI_PTEST: 24302 case INT_FTYPE_V4DF_V4DF_PTEST: 24303 case INT_FTYPE_V4SF_V4SF_PTEST: 24304 case INT_FTYPE_V2DI_V2DI_PTEST: 24305 case INT_FTYPE_V2DF_V2DF_PTEST: 24306 return ix86_expand_sse_ptest (d, exp, target); 24307 case FLOAT128_FTYPE_FLOAT128: 24308 case FLOAT_FTYPE_FLOAT: 24309 case INT64_FTYPE_V4SF: 24310 case INT64_FTYPE_V2DF: 24311 case INT_FTYPE_V16QI: 24312 case INT_FTYPE_V8QI: 24313 case INT_FTYPE_V8SF: 24314 case INT_FTYPE_V4DF: 24315 case INT_FTYPE_V4SF: 24316 case INT_FTYPE_V2DF: 24317 case V16QI_FTYPE_V16QI: 24318 case V8SI_FTYPE_V8SF: 24319 case V8SI_FTYPE_V4SI: 24320 case V8HI_FTYPE_V8HI: 24321 case V8HI_FTYPE_V16QI: 24322 case V8QI_FTYPE_V8QI: 24323 case V8SF_FTYPE_V8SF: 24324 case V8SF_FTYPE_V8SI: 24325 case V8SF_FTYPE_V4SF: 24326 case V4SI_FTYPE_V4SI: 24327 case V4SI_FTYPE_V16QI: 24328 case V4SI_FTYPE_V4SF: 24329 case V4SI_FTYPE_V8SI: 24330 case V4SI_FTYPE_V8HI: 24331 case V4SI_FTYPE_V4DF: 24332 case V4SI_FTYPE_V2DF: 24333 case V4HI_FTYPE_V4HI: 24334 case V4DF_FTYPE_V4DF: 24335 case V4DF_FTYPE_V4SI: 24336 case V4DF_FTYPE_V4SF: 24337 case V4DF_FTYPE_V2DF: 24338 case V4SF_FTYPE_V4SF: 24339 case V4SF_FTYPE_V4SI: 24340 case V4SF_FTYPE_V8SF: 24341 case V4SF_FTYPE_V4DF: 24342 case V4SF_FTYPE_V2DF: 24343 case V2DI_FTYPE_V2DI: 24344 case V2DI_FTYPE_V16QI: 24345 case V2DI_FTYPE_V8HI: 24346 case V2DI_FTYPE_V4SI: 24347 case V2DF_FTYPE_V2DF: 24348 case V2DF_FTYPE_V4SI: 24349 case V2DF_FTYPE_V4DF: 24350 case V2DF_FTYPE_V4SF: 24351 case V2DF_FTYPE_V2SI: 24352 case V2SI_FTYPE_V2SI: 24353 case V2SI_FTYPE_V4SF: 24354 case V2SI_FTYPE_V2SF: 24355 case V2SI_FTYPE_V2DF: 24356 case V2SF_FTYPE_V2SF: 24357 case V2SF_FTYPE_V2SI: 24358 nargs = 1; 24359 break; 24360 case V4SF_FTYPE_V4SF_VEC_MERGE: 24361 case V2DF_FTYPE_V2DF_VEC_MERGE: 24362 return ix86_expand_unop_vec_merge_builtin (icode, exp, target); 24363 case FLOAT128_FTYPE_FLOAT128_FLOAT128: 24364 case V16QI_FTYPE_V16QI_V16QI: 24365 case V16QI_FTYPE_V8HI_V8HI: 24366 case V8QI_FTYPE_V8QI_V8QI: 24367 case V8QI_FTYPE_V4HI_V4HI: 24368 case V8HI_FTYPE_V8HI_V8HI: 24369 case V8HI_FTYPE_V16QI_V16QI: 24370 case V8HI_FTYPE_V4SI_V4SI: 24371 case V8SF_FTYPE_V8SF_V8SF: 24372 case V8SF_FTYPE_V8SF_V8SI: 24373 case V4SI_FTYPE_V4SI_V4SI: 24374 case V4SI_FTYPE_V8HI_V8HI: 24375 case V4SI_FTYPE_V4SF_V4SF: 24376 case V4SI_FTYPE_V2DF_V2DF: 24377 case V4HI_FTYPE_V4HI_V4HI: 24378 case V4HI_FTYPE_V8QI_V8QI: 24379 case V4HI_FTYPE_V2SI_V2SI: 24380 case V4DF_FTYPE_V4DF_V4DF: 24381 case V4DF_FTYPE_V4DF_V4DI: 24382 case V4SF_FTYPE_V4SF_V4SF: 24383 case V4SF_FTYPE_V4SF_V4SI: 24384 case V4SF_FTYPE_V4SF_V2SI: 24385 case V4SF_FTYPE_V4SF_V2DF: 24386 case V4SF_FTYPE_V4SF_DI: 24387 case V4SF_FTYPE_V4SF_SI: 24388 case V2DI_FTYPE_V2DI_V2DI: 24389 case V2DI_FTYPE_V16QI_V16QI: 24390 case V2DI_FTYPE_V4SI_V4SI: 24391 case V2DI_FTYPE_V2DI_V16QI: 24392 case V2DI_FTYPE_V2DF_V2DF: 24393 case V2SI_FTYPE_V2SI_V2SI: 24394 case V2SI_FTYPE_V4HI_V4HI: 24395 case V2SI_FTYPE_V2SF_V2SF: 24396 case V2DF_FTYPE_V2DF_V2DF: 24397 case V2DF_FTYPE_V2DF_V4SF: 24398 case V2DF_FTYPE_V2DF_V2DI: 24399 case V2DF_FTYPE_V2DF_DI: 24400 case V2DF_FTYPE_V2DF_SI: 24401 case V2SF_FTYPE_V2SF_V2SF: 24402 case V1DI_FTYPE_V1DI_V1DI: 24403 case V1DI_FTYPE_V8QI_V8QI: 24404 case V1DI_FTYPE_V2SI_V2SI: 24405 if (comparison == UNKNOWN) 24406 return ix86_expand_binop_builtin (icode, exp, target); 24407 nargs = 2; 24408 break; 24409 case V4SF_FTYPE_V4SF_V4SF_SWAP: 24410 case V2DF_FTYPE_V2DF_V2DF_SWAP: 24411 gcc_assert (comparison != UNKNOWN); 24412 nargs = 2; 24413 swap = true; 24414 break; 24415 case V8HI_FTYPE_V8HI_V8HI_COUNT: 24416 case V8HI_FTYPE_V8HI_SI_COUNT: 24417 case V4SI_FTYPE_V4SI_V4SI_COUNT: 24418 case V4SI_FTYPE_V4SI_SI_COUNT: 24419 case V4HI_FTYPE_V4HI_V4HI_COUNT: 24420 case V4HI_FTYPE_V4HI_SI_COUNT: 24421 case V2DI_FTYPE_V2DI_V2DI_COUNT: 24422 case V2DI_FTYPE_V2DI_SI_COUNT: 24423 case V2SI_FTYPE_V2SI_V2SI_COUNT: 24424 case V2SI_FTYPE_V2SI_SI_COUNT: 24425 case V1DI_FTYPE_V1DI_V1DI_COUNT: 24426 case V1DI_FTYPE_V1DI_SI_COUNT: 24427 nargs = 2; 24428 last_arg_count = true; 24429 break; 24430 case UINT64_FTYPE_UINT64_UINT64: 24431 case UINT_FTYPE_UINT_UINT: 24432 case UINT_FTYPE_UINT_USHORT: 24433 case UINT_FTYPE_UINT_UCHAR: 24434 nargs = 2; 24435 break; 24436 case V2DI2TI_FTYPE_V2DI_INT: 24437 nargs = 2; 24438 rmode = V2DImode; 24439 nargs_constant = 1; 24440 break; 24441 case V8HI_FTYPE_V8HI_INT: 24442 case V8SF_FTYPE_V8SF_INT: 24443 case V4SI_FTYPE_V4SI_INT: 24444 case V4SI_FTYPE_V8SI_INT: 24445 case V4HI_FTYPE_V4HI_INT: 24446 case V4DF_FTYPE_V4DF_INT: 24447 case V4SF_FTYPE_V4SF_INT: 24448 case V4SF_FTYPE_V8SF_INT: 24449 case V2DI_FTYPE_V2DI_INT: 24450 case V2DF_FTYPE_V2DF_INT: 24451 case V2DF_FTYPE_V4DF_INT: 24452 nargs = 2; 24453 nargs_constant = 1; 24454 break; 24455 case V16QI_FTYPE_V16QI_V16QI_V16QI: 24456 case V8SF_FTYPE_V8SF_V8SF_V8SF: 24457 case V4DF_FTYPE_V4DF_V4DF_V4DF: 24458 case V4SF_FTYPE_V4SF_V4SF_V4SF: 24459 case V2DF_FTYPE_V2DF_V2DF_V2DF: 24460 nargs = 3; 24461 break; 24462 case V16QI_FTYPE_V16QI_V16QI_INT: 24463 case V8HI_FTYPE_V8HI_V8HI_INT: 24464 case V8SI_FTYPE_V8SI_V8SI_INT: 24465 case V8SI_FTYPE_V8SI_V4SI_INT: 24466 case V8SF_FTYPE_V8SF_V8SF_INT: 24467 case V8SF_FTYPE_V8SF_V4SF_INT: 24468 case V4SI_FTYPE_V4SI_V4SI_INT: 24469 case V4DF_FTYPE_V4DF_V4DF_INT: 24470 case V4DF_FTYPE_V4DF_V2DF_INT: 24471 case V4SF_FTYPE_V4SF_V4SF_INT: 24472 case V2DI_FTYPE_V2DI_V2DI_INT: 24473 case V2DF_FTYPE_V2DF_V2DF_INT: 24474 nargs = 3; 24475 nargs_constant = 1; 24476 break; 24477 case V2DI2TI_FTYPE_V2DI_V2DI_INT: 24478 nargs = 3; 24479 rmode = V2DImode; 24480 nargs_constant = 1; 24481 break; 24482 case V1DI2DI_FTYPE_V1DI_V1DI_INT: 24483 nargs = 3; 24484 rmode = DImode; 24485 nargs_constant = 1; 24486 break; 24487 case V2DI_FTYPE_V2DI_UINT_UINT: 24488 nargs = 3; 24489 nargs_constant = 2; 24490 break; 24491 case V2DI_FTYPE_V2DI_V2DI_UINT_UINT: 24492 nargs = 4; 24493 nargs_constant = 2; 24494 break; 24495 default: 24496 gcc_unreachable (); 24497 } 24498 24499 gcc_assert (nargs <= ARRAY_SIZE (args)); 24500 24501 if (comparison != UNKNOWN) 24502 { 24503 gcc_assert (nargs == 2); 24504 return ix86_expand_sse_compare (d, exp, target, swap); 24505 } 24506 24507 if (rmode == VOIDmode || rmode == tmode) 24508 { 24509 if (optimize 24510 || target == 0 24511 || GET_MODE (target) != tmode 24512 || ! (*insn_p->operand[0].predicate) (target, tmode)) 24513 target = gen_reg_rtx (tmode); 24514 real_target = target; 24515 } 24516 else 24517 { 24518 target = gen_reg_rtx (rmode); 24519 real_target = simplify_gen_subreg (tmode, target, rmode, 0); 24520 } 24521 24522 for (i = 0; i < nargs; i++) 24523 { 24524 tree arg = CALL_EXPR_ARG (exp, i); 24525 rtx op = expand_normal (arg); 24526 enum machine_mode mode = insn_p->operand[i + 1].mode; 24527 bool match = (*insn_p->operand[i + 1].predicate) (op, mode); 24528 24529 if (last_arg_count && (i + 1) == nargs) 24530 { 24531 /* SIMD shift insns take either an 8-bit immediate or 24532 register as count. But builtin functions take int as 24533 count. If count doesn't match, we put it in register. */ 24534 if (!match) 24535 { 24536 op = simplify_gen_subreg (SImode, op, GET_MODE (op), 0); 24537 if (!(*insn_p->operand[i + 1].predicate) (op, mode)) 24538 op = copy_to_reg (op); 24539 } 24540 } 24541 else if ((nargs - i) <= nargs_constant) 24542 { 24543 if (!match) 24544 switch (icode) 24545 { 24546 case CODE_FOR_sse4_1_roundpd: 24547 case CODE_FOR_sse4_1_roundps: 24548 case CODE_FOR_sse4_1_roundsd: 24549 case CODE_FOR_sse4_1_roundss: 24550 case CODE_FOR_sse4_1_blendps: 24551 case CODE_FOR_avx_blendpd256: 24552 case CODE_FOR_avx_vpermilv4df: 24553 case CODE_FOR_avx_roundpd256: 24554 case CODE_FOR_avx_roundps256: 24555 error ("the last argument must be a 4-bit immediate"); 24556 return const0_rtx; 24557 24558 case CODE_FOR_sse4_1_blendpd: 24559 case CODE_FOR_avx_vpermilv2df: 24560 error ("the last argument must be a 2-bit immediate"); 24561 return const0_rtx; 24562 24563 case CODE_FOR_avx_vextractf128v4df: 24564 case CODE_FOR_avx_vextractf128v8sf: 24565 case CODE_FOR_avx_vextractf128v8si: 24566 case CODE_FOR_avx_vinsertf128v4df: 24567 case CODE_FOR_avx_vinsertf128v8sf: 24568 case CODE_FOR_avx_vinsertf128v8si: 24569 error ("the last argument must be a 1-bit immediate"); 24570 return const0_rtx; 24571 24572 case CODE_FOR_avx_cmpsdv2df3: 24573 case CODE_FOR_avx_cmpssv4sf3: 24574 case CODE_FOR_avx_cmppdv2df3: 24575 case CODE_FOR_avx_cmppsv4sf3: 24576 case CODE_FOR_avx_cmppdv4df3: 24577 case CODE_FOR_avx_cmppsv8sf3: 24578 error ("the last argument must be a 5-bit immediate"); 24579 return const0_rtx; 24580 24581 default: 24582 switch (nargs_constant) 24583 { 24584 case 2: 24585 if ((nargs - i) == nargs_constant) 24586 { 24587 error ("the next to last argument must be an 8-bit immediate"); 24588 break; 24589 } 24590 case 1: 24591 error ("the last argument must be an 8-bit immediate"); 24592 break; 24593 default: 24594 gcc_unreachable (); 24595 } 24596 return const0_rtx; 24597 } 24598 } 24599 else 24600 { 24601 if (VECTOR_MODE_P (mode)) 24602 op = safe_vector_operand (op, mode); 24603 24604 /* If we aren't optimizing, only allow one memory operand to 24605 be generated. */ 24606 if (memory_operand (op, mode)) 24607 num_memory++; 24608 24609 if (GET_MODE (op) == mode || GET_MODE (op) == VOIDmode) 24610 { 24611 if (optimize || !match || num_memory > 1) 24612 op = copy_to_mode_reg (mode, op); 24613 } 24614 else 24615 { 24616 op = copy_to_reg (op); 24617 op = simplify_gen_subreg (mode, op, GET_MODE (op), 0); 24618 } 24619 } 24620 24621 args[i].op = op; 24622 args[i].mode = mode; 24623 } 24624 24625 switch (nargs) 24626 { 24627 case 1: 24628 pat = GEN_FCN (icode) (real_target, args[0].op); 24629 break; 24630 case 2: 24631 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op); 24632 break; 24633 case 3: 24634 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, 24635 args[2].op); 24636 break; 24637 case 4: 24638 pat = GEN_FCN (icode) (real_target, args[0].op, args[1].op, 24639 args[2].op, args[3].op); 24640 break; 24641 default: 24642 gcc_unreachable (); 24643 } 24644 24645 if (! pat) 24646 return 0; 24647 24648 emit_insn (pat); 24649 return target; 24650 } 24651 24652 /* Subroutine of ix86_expand_builtin to take care of special insns 24653 with variable number of operands. */ 24654 24655 static rtx 24656 ix86_expand_special_args_builtin (const struct builtin_description *d, 24657 tree exp, rtx target) 24658 { 24659 tree arg; 24660 rtx pat, op; 24661 unsigned int i, nargs, arg_adjust, memory; 24662 struct 24663 { 24664 rtx op; 24665 enum machine_mode mode; 24666 } args[2]; 24667 enum insn_code icode = d->icode; 24668 bool last_arg_constant = false; 24669 const struct insn_data *insn_p = &insn_data[icode]; 24670 enum machine_mode tmode = insn_p->operand[0].mode; 24671 enum { load, store } klass; 24672 24673 switch ((enum ix86_special_builtin_type) d->flag) 24674 { 24675 case VOID_FTYPE_VOID: 24676 emit_insn (GEN_FCN (icode) (target)); 24677 return 0; 24678 case V2DI_FTYPE_PV2DI: 24679 case V32QI_FTYPE_PCCHAR: 24680 case V16QI_FTYPE_PCCHAR: 24681 case V8SF_FTYPE_PCV4SF: 24682 case V8SF_FTYPE_PCFLOAT: 24683 case V4SF_FTYPE_PCFLOAT: 24684 case V4DF_FTYPE_PCV2DF: 24685 case V4DF_FTYPE_PCDOUBLE: 24686 case V2DF_FTYPE_PCDOUBLE: 24687 nargs = 1; 24688 klass = load; 24689 memory = 0; 24690 break; 24691 case VOID_FTYPE_PV2SF_V4SF: 24692 case VOID_FTYPE_PV4DI_V4DI: 24693 case VOID_FTYPE_PV2DI_V2DI: 24694 case VOID_FTYPE_PCHAR_V32QI: 24695 case VOID_FTYPE_PCHAR_V16QI: 24696 case VOID_FTYPE_PFLOAT_V8SF: 24697 case VOID_FTYPE_PFLOAT_V4SF: 24698 case VOID_FTYPE_PDOUBLE_V4DF: 24699 case VOID_FTYPE_PDOUBLE_V2DF: 24700 case VOID_FTYPE_PDI_DI: 24701 case VOID_FTYPE_PINT_INT: 24702 nargs = 1; 24703 klass = store; 24704 /* Reserve memory operand for target. */ 24705 memory = ARRAY_SIZE (args); 24706 break; 24707 case V4SF_FTYPE_V4SF_PCV2SF: 24708 case V2DF_FTYPE_V2DF_PCDOUBLE: 24709 nargs = 2; 24710 klass = load; 24711 memory = 1; 24712 break; 24713 case V8SF_FTYPE_PCV8SF_V8SF: 24714 case V4DF_FTYPE_PCV4DF_V4DF: 24715 case V4SF_FTYPE_PCV4SF_V4SF: 24716 case V2DF_FTYPE_PCV2DF_V2DF: 24717 nargs = 2; 24718 klass = load; 24719 memory = 0; 24720 break; 24721 case VOID_FTYPE_PV8SF_V8SF_V8SF: 24722 case VOID_FTYPE_PV4DF_V4DF_V4DF: 24723 case VOID_FTYPE_PV4SF_V4SF_V4SF: 24724 case VOID_FTYPE_PV2DF_V2DF_V2DF: 24725 nargs = 2; 24726 klass = store; 24727 /* Reserve memory operand for target. */ 24728 memory = ARRAY_SIZE (args); 24729 break; 24730 default: 24731 gcc_unreachable (); 24732 } 24733 24734 gcc_assert (nargs <= ARRAY_SIZE (args)); 24735 24736 if (klass == store) 24737 { 24738 arg = CALL_EXPR_ARG (exp, 0); 24739 op = expand_normal (arg); 24740 gcc_assert (target == 0); 24741 target = gen_rtx_MEM (tmode, copy_to_mode_reg (Pmode, op)); 24742 arg_adjust = 1; 24743 } 24744 else 24745 { 24746 arg_adjust = 0; 24747 if (optimize 24748 || target == 0 24749 || GET_MODE (target) != tmode 24750 || ! (*insn_p->operand[0].predicate) (target, tmode)) 24751 target = gen_reg_rtx (tmode); 24752 } 24753 24754 for (i = 0; i < nargs; i++) 24755 { 24756 enum machine_mode mode = insn_p->operand[i + 1].mode; 24757 bool match; 24758 24759 arg = CALL_EXPR_ARG (exp, i + arg_adjust); 24760 op = expand_normal (arg); 24761 match = (*insn_p->operand[i + 1].predicate) (op, mode); 24762 24763 if (last_arg_constant && (i + 1) == nargs) 24764 { 24765 if (!match) 24766 switch (icode) 24767 { 24768 default: 24769 error ("the last argument must be an 8-bit immediate"); 24770 return const0_rtx; 24771 } 24772 } 24773 else 24774 { 24775 if (i == memory) 24776 { 24777 /* This must be the memory operand. */ 24778 op = gen_rtx_MEM (mode, copy_to_mode_reg (Pmode, op)); 24779 gcc_assert (GET_MODE (op) == mode 24780 || GET_MODE (op) == VOIDmode); 24781 } 24782 else 24783 { 24784 /* This must be register. */ 24785 if (VECTOR_MODE_P (mode)) 24786 op = safe_vector_operand (op, mode); 24787 24788 gcc_assert (GET_MODE (op) == mode 24789 || GET_MODE (op) == VOIDmode); 24790 op = copy_to_mode_reg (mode, op); 24791 } 24792 } 24793 24794 args[i].op = op; 24795 args[i].mode = mode; 24796 } 24797 24798 switch (nargs) 24799 { 24800 case 1: 24801 pat = GEN_FCN (icode) (target, args[0].op); 24802 break; 24803 case 2: 24804 pat = GEN_FCN (icode) (target, args[0].op, args[1].op); 24805 break; 24806 default: 24807 gcc_unreachable (); 24808 } 24809 24810 if (! pat) 24811 return 0; 24812 emit_insn (pat); 24813 return klass == store ? 0 : target; 24814 } 24815 24816 /* Return the integer constant in ARG. Constrain it to be in the range 24817 of the subparts of VEC_TYPE; issue an error if not. */ 24818 24819 static int 24820 get_element_number (tree vec_type, tree arg) 24821 { 24822 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1; 24823 24824 if (!host_integerp (arg, 1) 24825 || (elt = tree_low_cst (arg, 1), elt > max)) 24826 { 24827 error ("selector must be an integer constant in the range 0..%wi", max); 24828 return 0; 24829 } 24830 24831 return elt; 24832 } 24833 24834 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 24835 ix86_expand_vector_init. We DO have language-level syntax for this, in 24836 the form of (type){ init-list }. Except that since we can't place emms 24837 instructions from inside the compiler, we can't allow the use of MMX 24838 registers unless the user explicitly asks for it. So we do *not* define 24839 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead 24840 we have builtins invoked by mmintrin.h that gives us license to emit 24841 these sorts of instructions. */ 24842 24843 static rtx 24844 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target) 24845 { 24846 enum machine_mode tmode = TYPE_MODE (type); 24847 enum machine_mode inner_mode = GET_MODE_INNER (tmode); 24848 int i, n_elt = GET_MODE_NUNITS (tmode); 24849 rtvec v = rtvec_alloc (n_elt); 24850 24851 gcc_assert (VECTOR_MODE_P (tmode)); 24852 gcc_assert (call_expr_nargs (exp) == n_elt); 24853 24854 for (i = 0; i < n_elt; ++i) 24855 { 24856 rtx x = expand_normal (CALL_EXPR_ARG (exp, i)); 24857 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x); 24858 } 24859 24860 if (!target || !register_operand (target, tmode)) 24861 target = gen_reg_rtx (tmode); 24862 24863 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v)); 24864 return target; 24865 } 24866 24867 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 24868 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we 24869 had a language-level syntax for referencing vector elements. */ 24870 24871 static rtx 24872 ix86_expand_vec_ext_builtin (tree exp, rtx target) 24873 { 24874 enum machine_mode tmode, mode0; 24875 tree arg0, arg1; 24876 int elt; 24877 rtx op0; 24878 24879 arg0 = CALL_EXPR_ARG (exp, 0); 24880 arg1 = CALL_EXPR_ARG (exp, 1); 24881 24882 op0 = expand_normal (arg0); 24883 elt = get_element_number (TREE_TYPE (arg0), arg1); 24884 24885 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); 24886 mode0 = TYPE_MODE (TREE_TYPE (arg0)); 24887 gcc_assert (VECTOR_MODE_P (mode0)); 24888 24889 op0 = force_reg (mode0, op0); 24890 24891 if (optimize || !target || !register_operand (target, tmode)) 24892 target = gen_reg_rtx (tmode); 24893 24894 ix86_expand_vector_extract (true, target, op0, elt); 24895 24896 return target; 24897 } 24898 24899 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around 24900 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had 24901 a language-level syntax for referencing vector elements. */ 24902 24903 static rtx 24904 ix86_expand_vec_set_builtin (tree exp) 24905 { 24906 enum machine_mode tmode, mode1; 24907 tree arg0, arg1, arg2; 24908 int elt; 24909 rtx op0, op1, target; 24910 24911 arg0 = CALL_EXPR_ARG (exp, 0); 24912 arg1 = CALL_EXPR_ARG (exp, 1); 24913 arg2 = CALL_EXPR_ARG (exp, 2); 24914 24915 tmode = TYPE_MODE (TREE_TYPE (arg0)); 24916 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0))); 24917 gcc_assert (VECTOR_MODE_P (tmode)); 24918 24919 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL); 24920 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL); 24921 elt = get_element_number (TREE_TYPE (arg0), arg2); 24922 24923 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode) 24924 op1 = convert_modes (mode1, GET_MODE (op1), op1, true); 24925 24926 op0 = force_reg (tmode, op0); 24927 op1 = force_reg (mode1, op1); 24928 24929 /* OP0 is the source of these builtin functions and shouldn't be 24930 modified. Create a copy, use it and return it as target. */ 24931 target = gen_reg_rtx (tmode); 24932 emit_move_insn (target, op0); 24933 ix86_expand_vector_set (true, target, op1, elt); 24934 24935 return target; 24936 } 24937 24938 /* Expand an expression EXP that calls a built-in function, 24939 with result going to TARGET if that's convenient 24940 (and in mode MODE if that's convenient). 24941 SUBTARGET may be used as the target for computing one of EXP's operands. 24942 IGNORE is nonzero if the value is to be ignored. */ 24943 24944 static rtx 24945 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED, 24946 enum machine_mode mode ATTRIBUTE_UNUSED, 24947 int ignore ATTRIBUTE_UNUSED) 24948 { 24949 const struct builtin_description *d; 24950 size_t i; 24951 enum insn_code icode; 24952 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0); 24953 tree arg0, arg1, arg2; 24954 rtx op0, op1, op2, pat; 24955 enum machine_mode mode0, mode1, mode2; 24956 unsigned int fcode = DECL_FUNCTION_CODE (fndecl); 24957 24958 /* Determine whether the builtin function is available under the current ISA. 24959 Originally the builtin was not created if it wasn't applicable to the 24960 current ISA based on the command line switches. With function specific 24961 options, we need to check in the context of the function making the call 24962 whether it is supported. */ 24963 if (ix86_builtins_isa[fcode].isa 24964 && !(ix86_builtins_isa[fcode].isa & ix86_isa_flags)) 24965 { 24966 char *opts = ix86_target_string (ix86_builtins_isa[fcode].isa, 0, NULL, 24967 NULL, NULL, false); 24968 24969 if (!opts) 24970 error ("%qE needs unknown isa option", fndecl); 24971 else 24972 { 24973 gcc_assert (opts != NULL); 24974 error ("%qE needs isa option %s", fndecl, opts); 24975 free (opts); 24976 } 24977 return const0_rtx; 24978 } 24979 24980 switch (fcode) 24981 { 24982 case IX86_BUILTIN_MASKMOVQ: 24983 case IX86_BUILTIN_MASKMOVDQU: 24984 icode = (fcode == IX86_BUILTIN_MASKMOVQ 24985 ? CODE_FOR_mmx_maskmovq 24986 : CODE_FOR_sse2_maskmovdqu); 24987 /* Note the arg order is different from the operand order. */ 24988 arg1 = CALL_EXPR_ARG (exp, 0); 24989 arg2 = CALL_EXPR_ARG (exp, 1); 24990 arg0 = CALL_EXPR_ARG (exp, 2); 24991 op0 = expand_normal (arg0); 24992 op1 = expand_normal (arg1); 24993 op2 = expand_normal (arg2); 24994 mode0 = insn_data[icode].operand[0].mode; 24995 mode1 = insn_data[icode].operand[1].mode; 24996 mode2 = insn_data[icode].operand[2].mode; 24997 24998 op0 = force_reg (Pmode, op0); 24999 op0 = gen_rtx_MEM (mode1, op0); 25000 25001 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0)) 25002 op0 = copy_to_mode_reg (mode0, op0); 25003 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1)) 25004 op1 = copy_to_mode_reg (mode1, op1); 25005 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2)) 25006 op2 = copy_to_mode_reg (mode2, op2); 25007 pat = GEN_FCN (icode) (op0, op1, op2); 25008 if (! pat) 25009 return 0; 25010 emit_insn (pat); 25011 return 0; 25012 25013 case IX86_BUILTIN_LDMXCSR: 25014 op0 = expand_normal (CALL_EXPR_ARG (exp, 0)); 25015 target = assign_386_stack_local (SImode, SLOT_VIRTUAL); 25016 emit_move_insn (target, op0); 25017 emit_insn (gen_sse_ldmxcsr (target)); 25018 return 0; 25019 25020 case IX86_BUILTIN_STMXCSR: 25021 target = assign_386_stack_local (SImode, SLOT_VIRTUAL); 25022 emit_insn (gen_sse_stmxcsr (target)); 25023 return copy_to_mode_reg (SImode, target); 25024 25025 case IX86_BUILTIN_CLFLUSH: 25026 arg0 = CALL_EXPR_ARG (exp, 0); 25027 op0 = expand_normal (arg0); 25028 icode = CODE_FOR_sse2_clflush; 25029 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode)) 25030 op0 = copy_to_mode_reg (Pmode, op0); 25031 25032 emit_insn (gen_sse2_clflush (op0)); 25033 return 0; 25034 25035 case IX86_BUILTIN_MONITOR: 25036 arg0 = CALL_EXPR_ARG (exp, 0); 25037 arg1 = CALL_EXPR_ARG (exp, 1); 25038 arg2 = CALL_EXPR_ARG (exp, 2); 25039 op0 = expand_normal (arg0); 25040 op1 = expand_normal (arg1); 25041 op2 = expand_normal (arg2); 25042 if (!REG_P (op0)) 25043 op0 = copy_to_mode_reg (Pmode, op0); 25044 if (!REG_P (op1)) 25045 op1 = copy_to_mode_reg (SImode, op1); 25046 if (!REG_P (op2)) 25047 op2 = copy_to_mode_reg (SImode, op2); 25048 emit_insn ((*ix86_gen_monitor) (op0, op1, op2)); 25049 return 0; 25050 25051 case IX86_BUILTIN_MWAIT: 25052 arg0 = CALL_EXPR_ARG (exp, 0); 25053 arg1 = CALL_EXPR_ARG (exp, 1); 25054 op0 = expand_normal (arg0); 25055 op1 = expand_normal (arg1); 25056 if (!REG_P (op0)) 25057 op0 = copy_to_mode_reg (SImode, op0); 25058 if (!REG_P (op1)) 25059 op1 = copy_to_mode_reg (SImode, op1); 25060 emit_insn (gen_sse3_mwait (op0, op1)); 25061 return 0; 25062 25063 case IX86_BUILTIN_VEC_INIT_V2SI: 25064 case IX86_BUILTIN_VEC_INIT_V4HI: 25065 case IX86_BUILTIN_VEC_INIT_V8QI: 25066 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target); 25067 25068 case IX86_BUILTIN_VEC_EXT_V2DF: 25069 case IX86_BUILTIN_VEC_EXT_V2DI: 25070 case IX86_BUILTIN_VEC_EXT_V4SF: 25071 case IX86_BUILTIN_VEC_EXT_V4SI: 25072 case IX86_BUILTIN_VEC_EXT_V8HI: 25073 case IX86_BUILTIN_VEC_EXT_V2SI: 25074 case IX86_BUILTIN_VEC_EXT_V4HI: 25075 case IX86_BUILTIN_VEC_EXT_V16QI: 25076 return ix86_expand_vec_ext_builtin (exp, target); 25077 25078 case IX86_BUILTIN_VEC_SET_V2DI: 25079 case IX86_BUILTIN_VEC_SET_V4SF: 25080 case IX86_BUILTIN_VEC_SET_V4SI: 25081 case IX86_BUILTIN_VEC_SET_V8HI: 25082 case IX86_BUILTIN_VEC_SET_V4HI: 25083 case IX86_BUILTIN_VEC_SET_V16QI: 25084 return ix86_expand_vec_set_builtin (exp); 25085 25086 case IX86_BUILTIN_INFQ: 25087 { 25088 REAL_VALUE_TYPE inf; 25089 rtx tmp; 25090 25091 real_inf (&inf); 25092 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode); 25093 25094 tmp = validize_mem (force_const_mem (mode, tmp)); 25095 25096 if (target == 0) 25097 target = gen_reg_rtx (mode); 25098 25099 emit_move_insn (target, tmp); 25100 return target; 25101 } 25102 25103 default: 25104 break; 25105 } 25106 25107 for (i = 0, d = bdesc_special_args; 25108 i < ARRAY_SIZE (bdesc_special_args); 25109 i++, d++) 25110 if (d->code == fcode) 25111 return ix86_expand_special_args_builtin (d, exp, target); 25112 25113 for (i = 0, d = bdesc_args; 25114 i < ARRAY_SIZE (bdesc_args); 25115 i++, d++) 25116 if (d->code == fcode) 25117 switch (fcode) 25118 { 25119 case IX86_BUILTIN_FABSQ: 25120 case IX86_BUILTIN_COPYSIGNQ: 25121 if (!TARGET_SSE2) 25122 /* Emit a normal call if SSE2 isn't available. */ 25123 return expand_call (exp, target, ignore); 25124 default: 25125 return ix86_expand_args_builtin (d, exp, target); 25126 } 25127 25128 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++) 25129 if (d->code == fcode) 25130 return ix86_expand_sse_comi (d, exp, target); 25131 25132 for (i = 0, d = bdesc_pcmpestr; 25133 i < ARRAY_SIZE (bdesc_pcmpestr); 25134 i++, d++) 25135 if (d->code == fcode) 25136 return ix86_expand_sse_pcmpestr (d, exp, target); 25137 25138 for (i = 0, d = bdesc_pcmpistr; 25139 i < ARRAY_SIZE (bdesc_pcmpistr); 25140 i++, d++) 25141 if (d->code == fcode) 25142 return ix86_expand_sse_pcmpistr (d, exp, target); 25143 25144 for (i = 0, d = bdesc_multi_arg; i < ARRAY_SIZE (bdesc_multi_arg); i++, d++) 25145 if (d->code == fcode) 25146 return ix86_expand_multi_arg_builtin (d->icode, exp, target, 25147 (enum multi_arg_type)d->flag, 25148 d->comparison); 25149 25150 gcc_unreachable (); 25151 } 25152 25153 /* Returns a function decl for a vectorized version of the builtin function 25154 with builtin function code FN and the result vector type TYPE, or NULL_TREE 25155 if it is not available. */ 25156 25157 static tree 25158 ix86_builtin_vectorized_function (unsigned int fn, tree type_out, 25159 tree type_in) 25160 { 25161 enum machine_mode in_mode, out_mode; 25162 int in_n, out_n; 25163 25164 if (TREE_CODE (type_out) != VECTOR_TYPE 25165 || TREE_CODE (type_in) != VECTOR_TYPE) 25166 return NULL_TREE; 25167 25168 out_mode = TYPE_MODE (TREE_TYPE (type_out)); 25169 out_n = TYPE_VECTOR_SUBPARTS (type_out); 25170 in_mode = TYPE_MODE (TREE_TYPE (type_in)); 25171 in_n = TYPE_VECTOR_SUBPARTS (type_in); 25172 25173 switch (fn) 25174 { 25175 case BUILT_IN_SQRT: 25176 if (out_mode == DFmode && out_n == 2 25177 && in_mode == DFmode && in_n == 2) 25178 return ix86_builtins[IX86_BUILTIN_SQRTPD]; 25179 break; 25180 25181 case BUILT_IN_SQRTF: 25182 if (out_mode == SFmode && out_n == 4 25183 && in_mode == SFmode && in_n == 4) 25184 return ix86_builtins[IX86_BUILTIN_SQRTPS_NR]; 25185 break; 25186 25187 case BUILT_IN_LRINT: 25188 if (out_mode == SImode && out_n == 4 25189 && in_mode == DFmode && in_n == 2) 25190 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX]; 25191 break; 25192 25193 case BUILT_IN_LRINTF: 25194 if (out_mode == SImode && out_n == 4 25195 && in_mode == SFmode && in_n == 4) 25196 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ]; 25197 break; 25198 25199 default: 25200 ; 25201 } 25202 25203 /* Dispatch to a handler for a vectorization library. */ 25204 if (ix86_veclib_handler) 25205 return (*ix86_veclib_handler)(fn, type_out, type_in); 25206 25207 return NULL_TREE; 25208 } 25209 25210 /* Handler for an SVML-style interface to 25211 a library with vectorized intrinsics. */ 25212 25213 static tree 25214 ix86_veclibabi_svml (enum built_in_function fn, tree type_out, tree type_in) 25215 { 25216 char name[20]; 25217 tree fntype, new_fndecl, args; 25218 unsigned arity; 25219 const char *bname; 25220 enum machine_mode el_mode, in_mode; 25221 int n, in_n; 25222 25223 /* The SVML is suitable for unsafe math only. */ 25224 if (!flag_unsafe_math_optimizations) 25225 return NULL_TREE; 25226 25227 el_mode = TYPE_MODE (TREE_TYPE (type_out)); 25228 n = TYPE_VECTOR_SUBPARTS (type_out); 25229 in_mode = TYPE_MODE (TREE_TYPE (type_in)); 25230 in_n = TYPE_VECTOR_SUBPARTS (type_in); 25231 if (el_mode != in_mode 25232 || n != in_n) 25233 return NULL_TREE; 25234 25235 switch (fn) 25236 { 25237 case BUILT_IN_EXP: 25238 case BUILT_IN_LOG: 25239 case BUILT_IN_LOG10: 25240 case BUILT_IN_POW: 25241 case BUILT_IN_TANH: 25242 case BUILT_IN_TAN: 25243 case BUILT_IN_ATAN: 25244 case BUILT_IN_ATAN2: 25245 case BUILT_IN_ATANH: 25246 case BUILT_IN_CBRT: 25247 case BUILT_IN_SINH: 25248 case BUILT_IN_SIN: 25249 case BUILT_IN_ASINH: 25250 case BUILT_IN_ASIN: 25251 case BUILT_IN_COSH: 25252 case BUILT_IN_COS: 25253 case BUILT_IN_ACOSH: 25254 case BUILT_IN_ACOS: 25255 if (el_mode != DFmode || n != 2) 25256 return NULL_TREE; 25257 break; 25258 25259 case BUILT_IN_EXPF: 25260 case BUILT_IN_LOGF: 25261 case BUILT_IN_LOG10F: 25262 case BUILT_IN_POWF: 25263 case BUILT_IN_TANHF: 25264 case BUILT_IN_TANF: 25265 case BUILT_IN_ATANF: 25266 case BUILT_IN_ATAN2F: 25267 case BUILT_IN_ATANHF: 25268 case BUILT_IN_CBRTF: 25269 case BUILT_IN_SINHF: 25270 case BUILT_IN_SINF: 25271 case BUILT_IN_ASINHF: 25272 case BUILT_IN_ASINF: 25273 case BUILT_IN_COSHF: 25274 case BUILT_IN_COSF: 25275 case BUILT_IN_ACOSHF: 25276 case BUILT_IN_ACOSF: 25277 if (el_mode != SFmode || n != 4) 25278 return NULL_TREE; 25279 break; 25280 25281 default: 25282 return NULL_TREE; 25283 } 25284 25285 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn])); 25286 25287 if (fn == BUILT_IN_LOGF) 25288 strcpy (name, "vmlsLn4"); 25289 else if (fn == BUILT_IN_LOG) 25290 strcpy (name, "vmldLn2"); 25291 else if (n == 4) 25292 { 25293 sprintf (name, "vmls%s", bname+10); 25294 name[strlen (name)-1] = '4'; 25295 } 25296 else 25297 sprintf (name, "vmld%s2", bname+10); 25298 25299 /* Convert to uppercase. */ 25300 name[4] &= ~0x20; 25301 25302 arity = 0; 25303 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args; 25304 args = TREE_CHAIN (args)) 25305 arity++; 25306 25307 if (arity == 1) 25308 fntype = build_function_type_list (type_out, type_in, NULL); 25309 else 25310 fntype = build_function_type_list (type_out, type_in, type_in, NULL); 25311 25312 /* Build a function declaration for the vectorized function. */ 25313 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype); 25314 TREE_PUBLIC (new_fndecl) = 1; 25315 DECL_EXTERNAL (new_fndecl) = 1; 25316 DECL_IS_NOVOPS (new_fndecl) = 1; 25317 TREE_READONLY (new_fndecl) = 1; 25318 25319 return new_fndecl; 25320 } 25321 25322 /* Handler for an ACML-style interface to 25323 a library with vectorized intrinsics. */ 25324 25325 static tree 25326 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in) 25327 { 25328 char name[20] = "__vr.._"; 25329 tree fntype, new_fndecl, args; 25330 unsigned arity; 25331 const char *bname; 25332 enum machine_mode el_mode, in_mode; 25333 int n, in_n; 25334 25335 /* The ACML is 64bits only and suitable for unsafe math only as 25336 it does not correctly support parts of IEEE with the required 25337 precision such as denormals. */ 25338 if (!TARGET_64BIT 25339 || !flag_unsafe_math_optimizations) 25340 return NULL_TREE; 25341 25342 el_mode = TYPE_MODE (TREE_TYPE (type_out)); 25343 n = TYPE_VECTOR_SUBPARTS (type_out); 25344 in_mode = TYPE_MODE (TREE_TYPE (type_in)); 25345 in_n = TYPE_VECTOR_SUBPARTS (type_in); 25346 if (el_mode != in_mode 25347 || n != in_n) 25348 return NULL_TREE; 25349 25350 switch (fn) 25351 { 25352 case BUILT_IN_SIN: 25353 case BUILT_IN_COS: 25354 case BUILT_IN_EXP: 25355 case BUILT_IN_LOG: 25356 case BUILT_IN_LOG2: 25357 case BUILT_IN_LOG10: 25358 name[4] = 'd'; 25359 name[5] = '2'; 25360 if (el_mode != DFmode 25361 || n != 2) 25362 return NULL_TREE; 25363 break; 25364 25365 case BUILT_IN_SINF: 25366 case BUILT_IN_COSF: 25367 case BUILT_IN_EXPF: 25368 case BUILT_IN_POWF: 25369 case BUILT_IN_LOGF: 25370 case BUILT_IN_LOG2F: 25371 case BUILT_IN_LOG10F: 25372 name[4] = 's'; 25373 name[5] = '4'; 25374 if (el_mode != SFmode 25375 || n != 4) 25376 return NULL_TREE; 25377 break; 25378 25379 default: 25380 return NULL_TREE; 25381 } 25382 25383 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn])); 25384 sprintf (name + 7, "%s", bname+10); 25385 25386 arity = 0; 25387 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args; 25388 args = TREE_CHAIN (args)) 25389 arity++; 25390 25391 if (arity == 1) 25392 fntype = build_function_type_list (type_out, type_in, NULL); 25393 else 25394 fntype = build_function_type_list (type_out, type_in, type_in, NULL); 25395 25396 /* Build a function declaration for the vectorized function. */ 25397 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype); 25398 TREE_PUBLIC (new_fndecl) = 1; 25399 DECL_EXTERNAL (new_fndecl) = 1; 25400 DECL_IS_NOVOPS (new_fndecl) = 1; 25401 TREE_READONLY (new_fndecl) = 1; 25402 25403 return new_fndecl; 25404 } 25405 25406 25407 /* Returns a decl of a function that implements conversion of an integer vector 25408 into a floating-point vector, or vice-versa. TYPE is the type of the integer 25409 side of the conversion. 25410 Return NULL_TREE if it is not available. */ 25411 25412 static tree 25413 ix86_vectorize_builtin_conversion (unsigned int code, tree type) 25414 { 25415 if (!TARGET_SSE2 || TREE_CODE (type) != VECTOR_TYPE 25416 /* There are only conversions from/to signed integers. */ 25417 || TYPE_UNSIGNED (TREE_TYPE (type))) 25418 return NULL_TREE; 25419 25420 switch (code) 25421 { 25422 case FLOAT_EXPR: 25423 switch (TYPE_MODE (type)) 25424 { 25425 case V4SImode: 25426 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS]; 25427 default: 25428 return NULL_TREE; 25429 } 25430 25431 case FIX_TRUNC_EXPR: 25432 switch (TYPE_MODE (type)) 25433 { 25434 case V4SImode: 25435 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ]; 25436 default: 25437 return NULL_TREE; 25438 } 25439 default: 25440 return NULL_TREE; 25441 25442 } 25443 } 25444 25445 /* Returns a code for a target-specific builtin that implements 25446 reciprocal of the function, or NULL_TREE if not available. */ 25447 25448 static tree 25449 ix86_builtin_reciprocal (unsigned int fn, bool md_fn, 25450 bool sqrt ATTRIBUTE_UNUSED) 25451 { 25452 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_insn_for_size_p () 25453 && flag_finite_math_only && !flag_trapping_math 25454 && flag_unsafe_math_optimizations)) 25455 return NULL_TREE; 25456 25457 if (md_fn) 25458 /* Machine dependent builtins. */ 25459 switch (fn) 25460 { 25461 /* Vectorized version of sqrt to rsqrt conversion. */ 25462 case IX86_BUILTIN_SQRTPS_NR: 25463 return ix86_builtins[IX86_BUILTIN_RSQRTPS_NR]; 25464 25465 default: 25466 return NULL_TREE; 25467 } 25468 else 25469 /* Normal builtins. */ 25470 switch (fn) 25471 { 25472 /* Sqrt to rsqrt conversion. */ 25473 case BUILT_IN_SQRTF: 25474 return ix86_builtins[IX86_BUILTIN_RSQRTF]; 25475 25476 default: 25477 return NULL_TREE; 25478 } 25479 } 25480 25481 /* Store OPERAND to the memory after reload is completed. This means 25482 that we can't easily use assign_stack_local. */ 25483 rtx 25484 ix86_force_to_memory (enum machine_mode mode, rtx operand) 25485 { 25486 rtx result; 25487 25488 gcc_assert (reload_completed); 25489 if (!TARGET_64BIT_MS_ABI && TARGET_RED_ZONE) 25490 { 25491 result = gen_rtx_MEM (mode, 25492 gen_rtx_PLUS (Pmode, 25493 stack_pointer_rtx, 25494 GEN_INT (-RED_ZONE_SIZE))); 25495 emit_move_insn (result, operand); 25496 } 25497 else if ((TARGET_64BIT_MS_ABI || !TARGET_RED_ZONE) && TARGET_64BIT) 25498 { 25499 switch (mode) 25500 { 25501 case HImode: 25502 case SImode: 25503 operand = gen_lowpart (DImode, operand); 25504 /* FALLTHRU */ 25505 case DImode: 25506 emit_insn ( 25507 gen_rtx_SET (VOIDmode, 25508 gen_rtx_MEM (DImode, 25509 gen_rtx_PRE_DEC (DImode, 25510 stack_pointer_rtx)), 25511 operand)); 25512 break; 25513 default: 25514 gcc_unreachable (); 25515 } 25516 result = gen_rtx_MEM (mode, stack_pointer_rtx); 25517 } 25518 else 25519 { 25520 switch (mode) 25521 { 25522 case DImode: 25523 { 25524 rtx operands[2]; 25525 split_di (&operand, 1, operands, operands + 1); 25526 emit_insn ( 25527 gen_rtx_SET (VOIDmode, 25528 gen_rtx_MEM (SImode, 25529 gen_rtx_PRE_DEC (Pmode, 25530 stack_pointer_rtx)), 25531 operands[1])); 25532 emit_insn ( 25533 gen_rtx_SET (VOIDmode, 25534 gen_rtx_MEM (SImode, 25535 gen_rtx_PRE_DEC (Pmode, 25536 stack_pointer_rtx)), 25537 operands[0])); 25538 } 25539 break; 25540 case HImode: 25541 /* Store HImodes as SImodes. */ 25542 operand = gen_lowpart (SImode, operand); 25543 /* FALLTHRU */ 25544 case SImode: 25545 emit_insn ( 25546 gen_rtx_SET (VOIDmode, 25547 gen_rtx_MEM (GET_MODE (operand), 25548 gen_rtx_PRE_DEC (SImode, 25549 stack_pointer_rtx)), 25550 operand)); 25551 break; 25552 default: 25553 gcc_unreachable (); 25554 } 25555 result = gen_rtx_MEM (mode, stack_pointer_rtx); 25556 } 25557 return result; 25558 } 25559 25560 /* Free operand from the memory. */ 25561 void 25562 ix86_free_from_memory (enum machine_mode mode) 25563 { 25564 if (!TARGET_RED_ZONE || TARGET_64BIT_MS_ABI) 25565 { 25566 int size; 25567 25568 if (mode == DImode || TARGET_64BIT) 25569 size = 8; 25570 else 25571 size = 4; 25572 /* Use LEA to deallocate stack space. In peephole2 it will be converted 25573 to pop or add instruction if registers are available. */ 25574 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, 25575 gen_rtx_PLUS (Pmode, stack_pointer_rtx, 25576 GEN_INT (size)))); 25577 } 25578 } 25579 25580 /* Put float CONST_DOUBLE in the constant pool instead of fp regs. 25581 QImode must go into class Q_REGS. 25582 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and 25583 movdf to do mem-to-mem moves through integer regs. */ 25584 enum reg_class 25585 ix86_preferred_reload_class (rtx x, enum reg_class regclass) 25586 { 25587 enum machine_mode mode = GET_MODE (x); 25588 25589 /* We're only allowed to return a subclass of CLASS. Many of the 25590 following checks fail for NO_REGS, so eliminate that early. */ 25591 if (regclass == NO_REGS) 25592 return NO_REGS; 25593 25594 /* All classes can load zeros. */ 25595 if (x == CONST0_RTX (mode)) 25596 return regclass; 25597 25598 /* Force constants into memory if we are loading a (nonzero) constant into 25599 an MMX or SSE register. This is because there are no MMX/SSE instructions 25600 to load from a constant. */ 25601 if (CONSTANT_P (x) 25602 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass))) 25603 return NO_REGS; 25604 25605 /* Prefer SSE regs only, if we can use them for math. */ 25606 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode)) 25607 return SSE_CLASS_P (regclass) ? regclass : NO_REGS; 25608 25609 /* Floating-point constants need more complex checks. */ 25610 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode) 25611 { 25612 /* General regs can load everything. */ 25613 if (reg_class_subset_p (regclass, GENERAL_REGS)) 25614 return regclass; 25615 25616 /* Floats can load 0 and 1 plus some others. Note that we eliminated 25617 zero above. We only want to wind up preferring 80387 registers if 25618 we plan on doing computation with them. */ 25619 if (TARGET_80387 25620 && standard_80387_constant_p (x)) 25621 { 25622 /* Limit class to non-sse. */ 25623 if (regclass == FLOAT_SSE_REGS) 25624 return FLOAT_REGS; 25625 if (regclass == FP_TOP_SSE_REGS) 25626 return FP_TOP_REG; 25627 if (regclass == FP_SECOND_SSE_REGS) 25628 return FP_SECOND_REG; 25629 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS) 25630 return regclass; 25631 } 25632 25633 return NO_REGS; 25634 } 25635 25636 /* Generally when we see PLUS here, it's the function invariant 25637 (plus soft-fp const_int). Which can only be computed into general 25638 regs. */ 25639 if (GET_CODE (x) == PLUS) 25640 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS; 25641 25642 /* QImode constants are easy to load, but non-constant QImode data 25643 must go into Q_REGS. */ 25644 if (GET_MODE (x) == QImode && !CONSTANT_P (x)) 25645 { 25646 if (reg_class_subset_p (regclass, Q_REGS)) 25647 return regclass; 25648 if (reg_class_subset_p (Q_REGS, regclass)) 25649 return Q_REGS; 25650 return NO_REGS; 25651 } 25652 25653 return regclass; 25654 } 25655 25656 /* Discourage putting floating-point values in SSE registers unless 25657 SSE math is being used, and likewise for the 387 registers. */ 25658 enum reg_class 25659 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass) 25660 { 25661 enum machine_mode mode = GET_MODE (x); 25662 25663 /* Restrict the output reload class to the register bank that we are doing 25664 math on. If we would like not to return a subset of CLASS, reject this 25665 alternative: if reload cannot do this, it will still use its choice. */ 25666 mode = GET_MODE (x); 25667 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode)) 25668 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS; 25669 25670 if (X87_FLOAT_MODE_P (mode)) 25671 { 25672 if (regclass == FP_TOP_SSE_REGS) 25673 return FP_TOP_REG; 25674 else if (regclass == FP_SECOND_SSE_REGS) 25675 return FP_SECOND_REG; 25676 else 25677 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS; 25678 } 25679 25680 return regclass; 25681 } 25682 25683 static enum reg_class 25684 ix86_secondary_reload (bool in_p, rtx x, enum reg_class rclass, 25685 enum machine_mode mode, 25686 secondary_reload_info *sri ATTRIBUTE_UNUSED) 25687 { 25688 /* QImode spills from non-QI registers require 25689 intermediate register on 32bit targets. */ 25690 if (!in_p && mode == QImode && !TARGET_64BIT 25691 && (rclass == GENERAL_REGS 25692 || rclass == LEGACY_REGS 25693 || rclass == INDEX_REGS)) 25694 { 25695 int regno; 25696 25697 if (REG_P (x)) 25698 regno = REGNO (x); 25699 else 25700 regno = -1; 25701 25702 if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG) 25703 regno = true_regnum (x); 25704 25705 /* Return Q_REGS if the operand is in memory. */ 25706 if (regno == -1) 25707 return Q_REGS; 25708 } 25709 25710 return NO_REGS; 25711 } 25712 25713 /* If we are copying between general and FP registers, we need a memory 25714 location. The same is true for SSE and MMX registers. 25715 25716 To optimize register_move_cost performance, allow inline variant. 25717 25718 The macro can't work reliably when one of the CLASSES is class containing 25719 registers from multiple units (SSE, MMX, integer). We avoid this by never 25720 combining those units in single alternative in the machine description. 25721 Ensure that this constraint holds to avoid unexpected surprises. 25722 25723 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not 25724 enforce these sanity checks. */ 25725 25726 static inline int 25727 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2, 25728 enum machine_mode mode, int strict) 25729 { 25730 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1) 25731 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2) 25732 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1) 25733 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2) 25734 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1) 25735 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2)) 25736 { 25737 gcc_assert (!strict); 25738 return true; 25739 } 25740 25741 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2)) 25742 return true; 25743 25744 /* ??? This is a lie. We do have moves between mmx/general, and for 25745 mmx/sse2. But by saying we need secondary memory we discourage the 25746 register allocator from using the mmx registers unless needed. */ 25747 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)) 25748 return true; 25749 25750 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 25751 { 25752 /* SSE1 doesn't have any direct moves from other classes. */ 25753 if (!TARGET_SSE2) 25754 return true; 25755 25756 /* If the target says that inter-unit moves are more expensive 25757 than moving through memory, then don't generate them. */ 25758 if (!TARGET_INTER_UNIT_MOVES) 25759 return true; 25760 25761 /* Between SSE and general, we have moves no larger than word size. */ 25762 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD) 25763 return true; 25764 } 25765 25766 return false; 25767 } 25768 25769 int 25770 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2, 25771 enum machine_mode mode, int strict) 25772 { 25773 return inline_secondary_memory_needed (class1, class2, mode, strict); 25774 } 25775 25776 /* Return true if the registers in CLASS cannot represent the change from 25777 modes FROM to TO. */ 25778 25779 bool 25780 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to, 25781 enum reg_class regclass) 25782 { 25783 if (from == to) 25784 return false; 25785 25786 /* x87 registers can't do subreg at all, as all values are reformatted 25787 to extended precision. */ 25788 if (MAYBE_FLOAT_CLASS_P (regclass)) 25789 return true; 25790 25791 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass)) 25792 { 25793 /* Vector registers do not support QI or HImode loads. If we don't 25794 disallow a change to these modes, reload will assume it's ok to 25795 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects 25796 the vec_dupv4hi pattern. */ 25797 if (GET_MODE_SIZE (from) < 4) 25798 return true; 25799 25800 /* Vector registers do not support subreg with nonzero offsets, which 25801 are otherwise valid for integer registers. Since we can't see 25802 whether we have a nonzero offset from here, prohibit all 25803 nonparadoxical subregs changing size. */ 25804 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from)) 25805 return true; 25806 } 25807 25808 return false; 25809 } 25810 25811 /* Return the cost of moving data of mode M between a 25812 register and memory. A value of 2 is the default; this cost is 25813 relative to those in `REGISTER_MOVE_COST'. 25814 25815 This function is used extensively by register_move_cost that is used to 25816 build tables at startup. Make it inline in this case. 25817 When IN is 2, return maximum of in and out move cost. 25818 25819 If moving between registers and memory is more expensive than 25820 between two registers, you should define this macro to express the 25821 relative cost. 25822 25823 Model also increased moving costs of QImode registers in non 25824 Q_REGS classes. 25825 */ 25826 static inline int 25827 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass, 25828 int in) 25829 { 25830 int cost; 25831 if (FLOAT_CLASS_P (regclass)) 25832 { 25833 int index; 25834 switch (mode) 25835 { 25836 case SFmode: 25837 index = 0; 25838 break; 25839 case DFmode: 25840 index = 1; 25841 break; 25842 case XFmode: 25843 index = 2; 25844 break; 25845 default: 25846 return 100; 25847 } 25848 if (in == 2) 25849 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]); 25850 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index]; 25851 } 25852 if (SSE_CLASS_P (regclass)) 25853 { 25854 int index; 25855 switch (GET_MODE_SIZE (mode)) 25856 { 25857 case 4: 25858 index = 0; 25859 break; 25860 case 8: 25861 index = 1; 25862 break; 25863 case 16: 25864 index = 2; 25865 break; 25866 default: 25867 return 100; 25868 } 25869 if (in == 2) 25870 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]); 25871 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index]; 25872 } 25873 if (MMX_CLASS_P (regclass)) 25874 { 25875 int index; 25876 switch (GET_MODE_SIZE (mode)) 25877 { 25878 case 4: 25879 index = 0; 25880 break; 25881 case 8: 25882 index = 1; 25883 break; 25884 default: 25885 return 100; 25886 } 25887 if (in) 25888 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]); 25889 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index]; 25890 } 25891 switch (GET_MODE_SIZE (mode)) 25892 { 25893 case 1: 25894 if (Q_CLASS_P (regclass) || TARGET_64BIT) 25895 { 25896 if (!in) 25897 return ix86_cost->int_store[0]; 25898 if (TARGET_PARTIAL_REG_DEPENDENCY 25899 && optimize_function_for_speed_p (cfun)) 25900 cost = ix86_cost->movzbl_load; 25901 else 25902 cost = ix86_cost->int_load[0]; 25903 if (in == 2) 25904 return MAX (cost, ix86_cost->int_store[0]); 25905 return cost; 25906 } 25907 else 25908 { 25909 if (in == 2) 25910 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4); 25911 if (in) 25912 return ix86_cost->movzbl_load; 25913 else 25914 return ix86_cost->int_store[0] + 4; 25915 } 25916 break; 25917 case 2: 25918 if (in == 2) 25919 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]); 25920 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1]; 25921 default: 25922 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */ 25923 if (mode == TFmode) 25924 mode = XFmode; 25925 if (in == 2) 25926 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]); 25927 else if (in) 25928 cost = ix86_cost->int_load[2]; 25929 else 25930 cost = ix86_cost->int_store[2]; 25931 return (cost * (((int) GET_MODE_SIZE (mode) 25932 + UNITS_PER_WORD - 1) / UNITS_PER_WORD)); 25933 } 25934 } 25935 25936 int 25937 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in) 25938 { 25939 return inline_memory_move_cost (mode, regclass, in); 25940 } 25941 25942 25943 /* Return the cost of moving data from a register in class CLASS1 to 25944 one in class CLASS2. 25945 25946 It is not required that the cost always equal 2 when FROM is the same as TO; 25947 on some machines it is expensive to move between registers if they are not 25948 general registers. */ 25949 25950 int 25951 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1, 25952 enum reg_class class2) 25953 { 25954 /* In case we require secondary memory, compute cost of the store followed 25955 by load. In order to avoid bad register allocation choices, we need 25956 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */ 25957 25958 if (inline_secondary_memory_needed (class1, class2, mode, 0)) 25959 { 25960 int cost = 1; 25961 25962 cost += inline_memory_move_cost (mode, class1, 2); 25963 cost += inline_memory_move_cost (mode, class2, 2); 25964 25965 /* In case of copying from general_purpose_register we may emit multiple 25966 stores followed by single load causing memory size mismatch stall. 25967 Count this as arbitrarily high cost of 20. */ 25968 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode)) 25969 cost += 20; 25970 25971 /* In the case of FP/MMX moves, the registers actually overlap, and we 25972 have to switch modes in order to treat them differently. */ 25973 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2)) 25974 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1))) 25975 cost += 20; 25976 25977 return cost; 25978 } 25979 25980 /* Moves between SSE/MMX and integer unit are expensive. */ 25981 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2) 25982 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2)) 25983 25984 /* ??? By keeping returned value relatively high, we limit the number 25985 of moves between integer and MMX/SSE registers for all targets. 25986 Additionally, high value prevents problem with x86_modes_tieable_p(), 25987 where integer modes in MMX/SSE registers are not tieable 25988 because of missing QImode and HImode moves to, from or between 25989 MMX/SSE registers. */ 25990 return MAX (8, ix86_cost->mmxsse_to_integer); 25991 25992 if (MAYBE_FLOAT_CLASS_P (class1)) 25993 return ix86_cost->fp_move; 25994 if (MAYBE_SSE_CLASS_P (class1)) 25995 return ix86_cost->sse_move; 25996 if (MAYBE_MMX_CLASS_P (class1)) 25997 return ix86_cost->mmx_move; 25998 return 2; 25999 } 26000 26001 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */ 26002 26003 bool 26004 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode) 26005 { 26006 /* Flags and only flags can only hold CCmode values. */ 26007 if (CC_REGNO_P (regno)) 26008 return GET_MODE_CLASS (mode) == MODE_CC; 26009 if (GET_MODE_CLASS (mode) == MODE_CC 26010 || GET_MODE_CLASS (mode) == MODE_RANDOM 26011 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT) 26012 return 0; 26013 if (FP_REGNO_P (regno)) 26014 return VALID_FP_MODE_P (mode); 26015 if (SSE_REGNO_P (regno)) 26016 { 26017 /* We implement the move patterns for all vector modes into and 26018 out of SSE registers, even when no operation instructions 26019 are available. OImode move is available only when AVX is 26020 enabled. */ 26021 return ((TARGET_AVX && mode == OImode) 26022 || VALID_AVX256_REG_MODE (mode) 26023 || VALID_SSE_REG_MODE (mode) 26024 || VALID_SSE2_REG_MODE (mode) 26025 || VALID_MMX_REG_MODE (mode) 26026 || VALID_MMX_REG_MODE_3DNOW (mode)); 26027 } 26028 if (MMX_REGNO_P (regno)) 26029 { 26030 /* We implement the move patterns for 3DNOW modes even in MMX mode, 26031 so if the register is available at all, then we can move data of 26032 the given mode into or out of it. */ 26033 return (VALID_MMX_REG_MODE (mode) 26034 || VALID_MMX_REG_MODE_3DNOW (mode)); 26035 } 26036 26037 if (mode == QImode) 26038 { 26039 /* Take care for QImode values - they can be in non-QI regs, 26040 but then they do cause partial register stalls. */ 26041 if (regno <= BX_REG || TARGET_64BIT) 26042 return 1; 26043 if (!TARGET_PARTIAL_REG_STALL) 26044 return 1; 26045 return reload_in_progress || reload_completed; 26046 } 26047 /* We handle both integer and floats in the general purpose registers. */ 26048 else if (VALID_INT_MODE_P (mode)) 26049 return 1; 26050 else if (VALID_FP_MODE_P (mode)) 26051 return 1; 26052 else if (VALID_DFP_MODE_P (mode)) 26053 return 1; 26054 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go 26055 on to use that value in smaller contexts, this can easily force a 26056 pseudo to be allocated to GENERAL_REGS. Since this is no worse than 26057 supporting DImode, allow it. */ 26058 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode)) 26059 return 1; 26060 26061 return 0; 26062 } 26063 26064 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a 26065 tieable integer mode. */ 26066 26067 static bool 26068 ix86_tieable_integer_mode_p (enum machine_mode mode) 26069 { 26070 switch (mode) 26071 { 26072 case HImode: 26073 case SImode: 26074 return true; 26075 26076 case QImode: 26077 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL; 26078 26079 case DImode: 26080 return TARGET_64BIT; 26081 26082 default: 26083 return false; 26084 } 26085 } 26086 26087 /* Return true if MODE1 is accessible in a register that can hold MODE2 26088 without copying. That is, all register classes that can hold MODE2 26089 can also hold MODE1. */ 26090 26091 bool 26092 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2) 26093 { 26094 if (mode1 == mode2) 26095 return true; 26096 26097 if (ix86_tieable_integer_mode_p (mode1) 26098 && ix86_tieable_integer_mode_p (mode2)) 26099 return true; 26100 26101 /* MODE2 being XFmode implies fp stack or general regs, which means we 26102 can tie any smaller floating point modes to it. Note that we do not 26103 tie this with TFmode. */ 26104 if (mode2 == XFmode) 26105 return mode1 == SFmode || mode1 == DFmode; 26106 26107 /* MODE2 being DFmode implies fp stack, general or sse regs, which means 26108 that we can tie it with SFmode. */ 26109 if (mode2 == DFmode) 26110 return mode1 == SFmode; 26111 26112 /* If MODE2 is only appropriate for an SSE register, then tie with 26113 any other mode acceptable to SSE registers. */ 26114 if (GET_MODE_SIZE (mode2) == 16 26115 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2)) 26116 return (GET_MODE_SIZE (mode1) == 16 26117 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1)); 26118 26119 /* If MODE2 is appropriate for an MMX register, then tie 26120 with any other mode acceptable to MMX registers. */ 26121 if (GET_MODE_SIZE (mode2) == 8 26122 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2)) 26123 return (GET_MODE_SIZE (mode1) == 8 26124 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1)); 26125 26126 return false; 26127 } 26128 26129 /* Compute a (partial) cost for rtx X. Return true if the complete 26130 cost has been computed, and false if subexpressions should be 26131 scanned. In either case, *TOTAL contains the cost result. */ 26132 26133 static bool 26134 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total, bool speed) 26135 { 26136 enum rtx_code outer_code = (enum rtx_code) outer_code_i; 26137 enum machine_mode mode = GET_MODE (x); 26138 const struct processor_costs *cost = speed ? ix86_cost : &ix86_size_cost; 26139 26140 switch (code) 26141 { 26142 case CONST_INT: 26143 case CONST: 26144 case LABEL_REF: 26145 case SYMBOL_REF: 26146 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode)) 26147 *total = 3; 26148 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode)) 26149 *total = 2; 26150 else if (flag_pic && SYMBOLIC_CONST (x) 26151 && (!TARGET_64BIT 26152 || (!GET_CODE (x) != LABEL_REF 26153 && (GET_CODE (x) != SYMBOL_REF 26154 || !SYMBOL_REF_LOCAL_P (x))))) 26155 *total = 1; 26156 else 26157 *total = 0; 26158 return true; 26159 26160 case CONST_DOUBLE: 26161 if (mode == VOIDmode) 26162 *total = 0; 26163 else 26164 switch (standard_80387_constant_p (x)) 26165 { 26166 case 1: /* 0.0 */ 26167 *total = 1; 26168 break; 26169 default: /* Other constants */ 26170 *total = 2; 26171 break; 26172 case 0: 26173 case -1: 26174 /* Start with (MEM (SYMBOL_REF)), since that's where 26175 it'll probably end up. Add a penalty for size. */ 26176 *total = (COSTS_N_INSNS (1) 26177 + (flag_pic != 0 && !TARGET_64BIT) 26178 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2)); 26179 break; 26180 } 26181 return true; 26182 26183 case ZERO_EXTEND: 26184 /* The zero extensions is often completely free on x86_64, so make 26185 it as cheap as possible. */ 26186 if (TARGET_64BIT && mode == DImode 26187 && GET_MODE (XEXP (x, 0)) == SImode) 26188 *total = 1; 26189 else if (TARGET_ZERO_EXTEND_WITH_AND) 26190 *total = cost->add; 26191 else 26192 *total = cost->movzx; 26193 return false; 26194 26195 case SIGN_EXTEND: 26196 *total = cost->movsx; 26197 return false; 26198 26199 case ASHIFT: 26200 if (CONST_INT_P (XEXP (x, 1)) 26201 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT)) 26202 { 26203 HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 26204 if (value == 1) 26205 { 26206 *total = cost->add; 26207 return false; 26208 } 26209 if ((value == 2 || value == 3) 26210 && cost->lea <= cost->shift_const) 26211 { 26212 *total = cost->lea; 26213 return false; 26214 } 26215 } 26216 /* FALLTHRU */ 26217 26218 case ROTATE: 26219 case ASHIFTRT: 26220 case LSHIFTRT: 26221 case ROTATERT: 26222 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode) 26223 { 26224 if (CONST_INT_P (XEXP (x, 1))) 26225 { 26226 if (INTVAL (XEXP (x, 1)) > 32) 26227 *total = cost->shift_const + COSTS_N_INSNS (2); 26228 else 26229 *total = cost->shift_const * 2; 26230 } 26231 else 26232 { 26233 if (GET_CODE (XEXP (x, 1)) == AND) 26234 *total = cost->shift_var * 2; 26235 else 26236 *total = cost->shift_var * 6 + COSTS_N_INSNS (2); 26237 } 26238 } 26239 else 26240 { 26241 if (CONST_INT_P (XEXP (x, 1))) 26242 *total = cost->shift_const; 26243 else 26244 *total = cost->shift_var; 26245 } 26246 return false; 26247 26248 case MULT: 26249 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 26250 { 26251 /* ??? SSE scalar cost should be used here. */ 26252 *total = cost->fmul; 26253 return false; 26254 } 26255 else if (X87_FLOAT_MODE_P (mode)) 26256 { 26257 *total = cost->fmul; 26258 return false; 26259 } 26260 else if (FLOAT_MODE_P (mode)) 26261 { 26262 /* ??? SSE vector cost should be used here. */ 26263 *total = cost->fmul; 26264 return false; 26265 } 26266 else 26267 { 26268 rtx op0 = XEXP (x, 0); 26269 rtx op1 = XEXP (x, 1); 26270 int nbits; 26271 if (CONST_INT_P (XEXP (x, 1))) 26272 { 26273 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1)); 26274 for (nbits = 0; value != 0; value &= value - 1) 26275 nbits++; 26276 } 26277 else 26278 /* This is arbitrary. */ 26279 nbits = 7; 26280 26281 /* Compute costs correctly for widening multiplication. */ 26282 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND) 26283 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2 26284 == GET_MODE_SIZE (mode)) 26285 { 26286 int is_mulwiden = 0; 26287 enum machine_mode inner_mode = GET_MODE (op0); 26288 26289 if (GET_CODE (op0) == GET_CODE (op1)) 26290 is_mulwiden = 1, op1 = XEXP (op1, 0); 26291 else if (CONST_INT_P (op1)) 26292 { 26293 if (GET_CODE (op0) == SIGN_EXTEND) 26294 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode) 26295 == INTVAL (op1); 26296 else 26297 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode)); 26298 } 26299 26300 if (is_mulwiden) 26301 op0 = XEXP (op0, 0), mode = GET_MODE (op0); 26302 } 26303 26304 *total = (cost->mult_init[MODE_INDEX (mode)] 26305 + nbits * cost->mult_bit 26306 + rtx_cost (op0, outer_code, speed) + rtx_cost (op1, outer_code, speed)); 26307 26308 return true; 26309 } 26310 26311 case DIV: 26312 case UDIV: 26313 case MOD: 26314 case UMOD: 26315 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 26316 /* ??? SSE cost should be used here. */ 26317 *total = cost->fdiv; 26318 else if (X87_FLOAT_MODE_P (mode)) 26319 *total = cost->fdiv; 26320 else if (FLOAT_MODE_P (mode)) 26321 /* ??? SSE vector cost should be used here. */ 26322 *total = cost->fdiv; 26323 else 26324 *total = cost->divide[MODE_INDEX (mode)]; 26325 return false; 26326 26327 case PLUS: 26328 if (GET_MODE_CLASS (mode) == MODE_INT 26329 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode)) 26330 { 26331 if (GET_CODE (XEXP (x, 0)) == PLUS 26332 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT 26333 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)) 26334 && CONSTANT_P (XEXP (x, 1))) 26335 { 26336 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1)); 26337 if (val == 2 || val == 4 || val == 8) 26338 { 26339 *total = cost->lea; 26340 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed); 26341 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), 26342 outer_code, speed); 26343 *total += rtx_cost (XEXP (x, 1), outer_code, speed); 26344 return true; 26345 } 26346 } 26347 else if (GET_CODE (XEXP (x, 0)) == MULT 26348 && CONST_INT_P (XEXP (XEXP (x, 0), 1))) 26349 { 26350 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1)); 26351 if (val == 2 || val == 4 || val == 8) 26352 { 26353 *total = cost->lea; 26354 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed); 26355 *total += rtx_cost (XEXP (x, 1), outer_code, speed); 26356 return true; 26357 } 26358 } 26359 else if (GET_CODE (XEXP (x, 0)) == PLUS) 26360 { 26361 *total = cost->lea; 26362 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed); 26363 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code, speed); 26364 *total += rtx_cost (XEXP (x, 1), outer_code, speed); 26365 return true; 26366 } 26367 } 26368 /* FALLTHRU */ 26369 26370 case MINUS: 26371 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 26372 { 26373 /* ??? SSE cost should be used here. */ 26374 *total = cost->fadd; 26375 return false; 26376 } 26377 else if (X87_FLOAT_MODE_P (mode)) 26378 { 26379 *total = cost->fadd; 26380 return false; 26381 } 26382 else if (FLOAT_MODE_P (mode)) 26383 { 26384 /* ??? SSE vector cost should be used here. */ 26385 *total = cost->fadd; 26386 return false; 26387 } 26388 /* FALLTHRU */ 26389 26390 case AND: 26391 case IOR: 26392 case XOR: 26393 if (!TARGET_64BIT && mode == DImode) 26394 { 26395 *total = (cost->add * 2 26396 + (rtx_cost (XEXP (x, 0), outer_code, speed) 26397 << (GET_MODE (XEXP (x, 0)) != DImode)) 26398 + (rtx_cost (XEXP (x, 1), outer_code, speed) 26399 << (GET_MODE (XEXP (x, 1)) != DImode))); 26400 return true; 26401 } 26402 /* FALLTHRU */ 26403 26404 case NEG: 26405 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 26406 { 26407 /* ??? SSE cost should be used here. */ 26408 *total = cost->fchs; 26409 return false; 26410 } 26411 else if (X87_FLOAT_MODE_P (mode)) 26412 { 26413 *total = cost->fchs; 26414 return false; 26415 } 26416 else if (FLOAT_MODE_P (mode)) 26417 { 26418 /* ??? SSE vector cost should be used here. */ 26419 *total = cost->fchs; 26420 return false; 26421 } 26422 /* FALLTHRU */ 26423 26424 case NOT: 26425 if (!TARGET_64BIT && mode == DImode) 26426 *total = cost->add * 2; 26427 else 26428 *total = cost->add; 26429 return false; 26430 26431 case COMPARE: 26432 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT 26433 && XEXP (XEXP (x, 0), 1) == const1_rtx 26434 && CONST_INT_P (XEXP (XEXP (x, 0), 2)) 26435 && XEXP (x, 1) == const0_rtx) 26436 { 26437 /* This kind of construct is implemented using test[bwl]. 26438 Treat it as if we had an AND. */ 26439 *total = (cost->add 26440 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code, speed) 26441 + rtx_cost (const1_rtx, outer_code, speed)); 26442 return true; 26443 } 26444 return false; 26445 26446 case FLOAT_EXTEND: 26447 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)) 26448 *total = 0; 26449 return false; 26450 26451 case ABS: 26452 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 26453 /* ??? SSE cost should be used here. */ 26454 *total = cost->fabs; 26455 else if (X87_FLOAT_MODE_P (mode)) 26456 *total = cost->fabs; 26457 else if (FLOAT_MODE_P (mode)) 26458 /* ??? SSE vector cost should be used here. */ 26459 *total = cost->fabs; 26460 return false; 26461 26462 case SQRT: 26463 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH) 26464 /* ??? SSE cost should be used here. */ 26465 *total = cost->fsqrt; 26466 else if (X87_FLOAT_MODE_P (mode)) 26467 *total = cost->fsqrt; 26468 else if (FLOAT_MODE_P (mode)) 26469 /* ??? SSE vector cost should be used here. */ 26470 *total = cost->fsqrt; 26471 return false; 26472 26473 case UNSPEC: 26474 if (XINT (x, 1) == UNSPEC_TP) 26475 *total = 0; 26476 return false; 26477 26478 default: 26479 return false; 26480 } 26481 } 26482 26483 #if TARGET_MACHO 26484 26485 static int current_machopic_label_num; 26486 26487 /* Given a symbol name and its associated stub, write out the 26488 definition of the stub. */ 26489 26490 void 26491 machopic_output_stub (FILE *file, const char *symb, const char *stub) 26492 { 26493 unsigned int length; 26494 char *binder_name, *symbol_name, lazy_ptr_name[32]; 26495 int label = ++current_machopic_label_num; 26496 26497 /* For 64-bit we shouldn't get here. */ 26498 gcc_assert (!TARGET_64BIT); 26499 26500 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */ 26501 symb = (*targetm.strip_name_encoding) (symb); 26502 26503 length = strlen (stub); 26504 binder_name = XALLOCAVEC (char, length + 32); 26505 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length); 26506 26507 length = strlen (symb); 26508 symbol_name = XALLOCAVEC (char, length + 32); 26509 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length); 26510 26511 sprintf (lazy_ptr_name, "L%d$lz", label); 26512 26513 if (MACHOPIC_PURE) 26514 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]); 26515 else 26516 switch_to_section (darwin_sections[machopic_symbol_stub_section]); 26517 26518 fprintf (file, "%s:\n", stub); 26519 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 26520 26521 if (MACHOPIC_PURE) 26522 { 26523 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label); 26524 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label); 26525 fprintf (file, "\tjmp\t*%%edx\n"); 26526 } 26527 else 26528 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name); 26529 26530 fprintf (file, "%s:\n", binder_name); 26531 26532 if (MACHOPIC_PURE) 26533 { 26534 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label); 26535 fprintf (file, "\tpushl\t%%eax\n"); 26536 } 26537 else 26538 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name); 26539 26540 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n"); 26541 26542 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]); 26543 fprintf (file, "%s:\n", lazy_ptr_name); 26544 fprintf (file, "\t.indirect_symbol %s\n", symbol_name); 26545 fprintf (file, "\t.long %s\n", binder_name); 26546 } 26547 26548 void 26549 darwin_x86_file_end (void) 26550 { 26551 darwin_file_end (); 26552 ix86_file_end (); 26553 } 26554 #endif /* TARGET_MACHO */ 26555 26556 /* Order the registers for register allocator. */ 26557 26558 void 26559 x86_order_regs_for_local_alloc (void) 26560 { 26561 int pos = 0; 26562 int i; 26563 26564 /* First allocate the local general purpose registers. */ 26565 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 26566 if (GENERAL_REGNO_P (i) && call_used_regs[i]) 26567 reg_alloc_order [pos++] = i; 26568 26569 /* Global general purpose registers. */ 26570 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++) 26571 if (GENERAL_REGNO_P (i) && !call_used_regs[i]) 26572 reg_alloc_order [pos++] = i; 26573 26574 /* x87 registers come first in case we are doing FP math 26575 using them. */ 26576 if (!TARGET_SSE_MATH) 26577 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 26578 reg_alloc_order [pos++] = i; 26579 26580 /* SSE registers. */ 26581 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++) 26582 reg_alloc_order [pos++] = i; 26583 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++) 26584 reg_alloc_order [pos++] = i; 26585 26586 /* x87 registers. */ 26587 if (TARGET_SSE_MATH) 26588 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++) 26589 reg_alloc_order [pos++] = i; 26590 26591 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++) 26592 reg_alloc_order [pos++] = i; 26593 26594 /* Initialize the rest of array as we do not allocate some registers 26595 at all. */ 26596 while (pos < FIRST_PSEUDO_REGISTER) 26597 reg_alloc_order [pos++] = 0; 26598 } 26599 26600 /* Handle a "ms_abi" or "sysv" attribute; arguments as in 26601 struct attribute_spec.handler. */ 26602 static tree 26603 ix86_handle_abi_attribute (tree *node, tree name, 26604 tree args ATTRIBUTE_UNUSED, 26605 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 26606 { 26607 if (TREE_CODE (*node) != FUNCTION_TYPE 26608 && TREE_CODE (*node) != METHOD_TYPE 26609 && TREE_CODE (*node) != FIELD_DECL 26610 && TREE_CODE (*node) != TYPE_DECL) 26611 { 26612 warning (OPT_Wattributes, "%qs attribute only applies to functions", 26613 IDENTIFIER_POINTER (name)); 26614 *no_add_attrs = true; 26615 return NULL_TREE; 26616 } 26617 if (!TARGET_64BIT) 26618 { 26619 warning (OPT_Wattributes, "%qs attribute only available for 64-bit", 26620 IDENTIFIER_POINTER (name)); 26621 *no_add_attrs = true; 26622 return NULL_TREE; 26623 } 26624 26625 /* Can combine regparm with all attributes but fastcall. */ 26626 if (is_attribute_p ("ms_abi", name)) 26627 { 26628 if (lookup_attribute ("sysv_abi", TYPE_ATTRIBUTES (*node))) 26629 { 26630 error ("ms_abi and sysv_abi attributes are not compatible"); 26631 } 26632 26633 return NULL_TREE; 26634 } 26635 else if (is_attribute_p ("sysv_abi", name)) 26636 { 26637 if (lookup_attribute ("ms_abi", TYPE_ATTRIBUTES (*node))) 26638 { 26639 error ("ms_abi and sysv_abi attributes are not compatible"); 26640 } 26641 26642 return NULL_TREE; 26643 } 26644 26645 return NULL_TREE; 26646 } 26647 26648 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in 26649 struct attribute_spec.handler. */ 26650 static tree 26651 ix86_handle_struct_attribute (tree *node, tree name, 26652 tree args ATTRIBUTE_UNUSED, 26653 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs) 26654 { 26655 tree *type = NULL; 26656 if (DECL_P (*node)) 26657 { 26658 if (TREE_CODE (*node) == TYPE_DECL) 26659 type = &TREE_TYPE (*node); 26660 } 26661 else 26662 type = node; 26663 26664 if (!(type && (TREE_CODE (*type) == RECORD_TYPE 26665 || TREE_CODE (*type) == UNION_TYPE))) 26666 { 26667 warning (OPT_Wattributes, "%qs attribute ignored", 26668 IDENTIFIER_POINTER (name)); 26669 *no_add_attrs = true; 26670 } 26671 26672 else if ((is_attribute_p ("ms_struct", name) 26673 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type))) 26674 || ((is_attribute_p ("gcc_struct", name) 26675 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type))))) 26676 { 26677 warning (OPT_Wattributes, "%qs incompatible attribute ignored", 26678 IDENTIFIER_POINTER (name)); 26679 *no_add_attrs = true; 26680 } 26681 26682 return NULL_TREE; 26683 } 26684 26685 static bool 26686 ix86_ms_bitfield_layout_p (const_tree record_type) 26687 { 26688 return (TARGET_MS_BITFIELD_LAYOUT && 26689 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type))) 26690 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type)); 26691 } 26692 26693 /* Returns an expression indicating where the this parameter is 26694 located on entry to the FUNCTION. */ 26695 26696 static rtx 26697 x86_this_parameter (tree function) 26698 { 26699 tree type = TREE_TYPE (function); 26700 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0; 26701 int nregs; 26702 26703 if (TARGET_64BIT) 26704 { 26705 const int *parm_regs; 26706 26707 if (ix86_function_type_abi (type) == MS_ABI) 26708 parm_regs = x86_64_ms_abi_int_parameter_registers; 26709 else 26710 parm_regs = x86_64_int_parameter_registers; 26711 return gen_rtx_REG (DImode, parm_regs[aggr]); 26712 } 26713 26714 nregs = ix86_function_regparm (type, function); 26715 26716 if (nregs > 0 && !stdarg_p (type)) 26717 { 26718 int regno; 26719 26720 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type))) 26721 regno = aggr ? DX_REG : CX_REG; 26722 else 26723 { 26724 regno = AX_REG; 26725 if (aggr) 26726 { 26727 regno = DX_REG; 26728 if (nregs == 1) 26729 return gen_rtx_MEM (SImode, 26730 plus_constant (stack_pointer_rtx, 4)); 26731 } 26732 } 26733 return gen_rtx_REG (SImode, regno); 26734 } 26735 26736 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4)); 26737 } 26738 26739 /* Determine whether x86_output_mi_thunk can succeed. */ 26740 26741 static bool 26742 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED, 26743 HOST_WIDE_INT delta ATTRIBUTE_UNUSED, 26744 HOST_WIDE_INT vcall_offset, const_tree function) 26745 { 26746 /* 64-bit can handle anything. */ 26747 if (TARGET_64BIT) 26748 return true; 26749 26750 /* For 32-bit, everything's fine if we have one free register. */ 26751 if (ix86_function_regparm (TREE_TYPE (function), function) < 3) 26752 return true; 26753 26754 /* Need a free register for vcall_offset. */ 26755 if (vcall_offset) 26756 return false; 26757 26758 /* Need a free register for GOT references. */ 26759 if (flag_pic && !(*targetm.binds_local_p) (function)) 26760 return false; 26761 26762 /* Otherwise ok. */ 26763 return true; 26764 } 26765 26766 /* Output the assembler code for a thunk function. THUNK_DECL is the 26767 declaration for the thunk function itself, FUNCTION is the decl for 26768 the target function. DELTA is an immediate constant offset to be 26769 added to THIS. If VCALL_OFFSET is nonzero, the word at 26770 *(*this + vcall_offset) should be added to THIS. */ 26771 26772 static void 26773 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED, 26774 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta, 26775 HOST_WIDE_INT vcall_offset, tree function) 26776 { 26777 rtx xops[3]; 26778 rtx this_param = x86_this_parameter (function); 26779 rtx this_reg, tmp; 26780 26781 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well 26782 pull it in now and let DELTA benefit. */ 26783 if (REG_P (this_param)) 26784 this_reg = this_param; 26785 else if (vcall_offset) 26786 { 26787 /* Put the this parameter into %eax. */ 26788 xops[0] = this_param; 26789 xops[1] = this_reg = gen_rtx_REG (Pmode, AX_REG); 26790 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops); 26791 } 26792 else 26793 this_reg = NULL_RTX; 26794 26795 /* Adjust the this parameter by a fixed constant. */ 26796 if (delta) 26797 { 26798 xops[0] = GEN_INT (delta); 26799 xops[1] = this_reg ? this_reg : this_param; 26800 if (TARGET_64BIT) 26801 { 26802 if (!x86_64_general_operand (xops[0], DImode)) 26803 { 26804 tmp = gen_rtx_REG (DImode, R10_REG); 26805 xops[1] = tmp; 26806 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops); 26807 xops[0] = tmp; 26808 xops[1] = this_param; 26809 } 26810 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops); 26811 } 26812 else 26813 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops); 26814 } 26815 26816 /* Adjust the this parameter by a value stored in the vtable. */ 26817 if (vcall_offset) 26818 { 26819 if (TARGET_64BIT) 26820 tmp = gen_rtx_REG (DImode, R10_REG); 26821 else 26822 { 26823 int tmp_regno = CX_REG; 26824 if (lookup_attribute ("fastcall", 26825 TYPE_ATTRIBUTES (TREE_TYPE (function)))) 26826 tmp_regno = AX_REG; 26827 tmp = gen_rtx_REG (SImode, tmp_regno); 26828 } 26829 26830 xops[0] = gen_rtx_MEM (Pmode, this_reg); 26831 xops[1] = tmp; 26832 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops); 26833 26834 /* Adjust the this parameter. */ 26835 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset)); 26836 if (TARGET_64BIT && !memory_operand (xops[0], Pmode)) 26837 { 26838 rtx tmp2 = gen_rtx_REG (DImode, R11_REG); 26839 xops[0] = GEN_INT (vcall_offset); 26840 xops[1] = tmp2; 26841 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops); 26842 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2)); 26843 } 26844 xops[1] = this_reg; 26845 output_asm_insn ("add%z1\t{%0, %1|%1, %0}", xops); 26846 } 26847 26848 /* If necessary, drop THIS back to its stack slot. */ 26849 if (this_reg && this_reg != this_param) 26850 { 26851 xops[0] = this_reg; 26852 xops[1] = this_param; 26853 output_asm_insn ("mov%z1\t{%0, %1|%1, %0}", xops); 26854 } 26855 26856 xops[0] = XEXP (DECL_RTL (function), 0); 26857 if (TARGET_64BIT) 26858 { 26859 if (!flag_pic || (*targetm.binds_local_p) (function)) 26860 output_asm_insn ("jmp\t%P0", xops); 26861 /* All thunks should be in the same object as their target, 26862 and thus binds_local_p should be true. */ 26863 else if (TARGET_64BIT && cfun->machine->call_abi == MS_ABI) 26864 gcc_unreachable (); 26865 else 26866 { 26867 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL); 26868 tmp = gen_rtx_CONST (Pmode, tmp); 26869 tmp = gen_rtx_MEM (QImode, tmp); 26870 xops[0] = tmp; 26871 output_asm_insn ("jmp\t%A0", xops); 26872 } 26873 } 26874 else 26875 { 26876 if (!flag_pic || (*targetm.binds_local_p) (function)) 26877 output_asm_insn ("jmp\t%P0", xops); 26878 else 26879 #if TARGET_MACHO 26880 if (TARGET_MACHO) 26881 { 26882 rtx sym_ref = XEXP (DECL_RTL (function), 0); 26883 tmp = (gen_rtx_SYMBOL_REF 26884 (Pmode, 26885 machopic_indirection_name (sym_ref, /*stub_p=*/true))); 26886 tmp = gen_rtx_MEM (QImode, tmp); 26887 xops[0] = tmp; 26888 output_asm_insn ("jmp\t%0", xops); 26889 } 26890 else 26891 #endif /* TARGET_MACHO */ 26892 { 26893 tmp = gen_rtx_REG (SImode, CX_REG); 26894 output_set_got (tmp, NULL_RTX); 26895 26896 xops[1] = tmp; 26897 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops); 26898 output_asm_insn ("jmp\t{*}%1", xops); 26899 } 26900 } 26901 } 26902 26903 static void 26904 x86_file_start (void) 26905 { 26906 default_file_start (); 26907 #if TARGET_MACHO 26908 darwin_file_start (); 26909 #endif 26910 if (X86_FILE_START_VERSION_DIRECTIVE) 26911 fputs ("\t.version\t\"01.01\"\n", asm_out_file); 26912 if (X86_FILE_START_FLTUSED) 26913 fputs ("\t.global\t__fltused\n", asm_out_file); 26914 if (ix86_asm_dialect == ASM_INTEL) 26915 fputs ("\t.intel_syntax noprefix\n", asm_out_file); 26916 } 26917 26918 int 26919 x86_field_alignment (tree field, int computed) 26920 { 26921 enum machine_mode mode; 26922 tree type = TREE_TYPE (field); 26923 26924 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE) 26925 return computed; 26926 mode = TYPE_MODE (strip_array_types (type)); 26927 if (mode == DFmode || mode == DCmode 26928 || GET_MODE_CLASS (mode) == MODE_INT 26929 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT) 26930 return MIN (32, computed); 26931 return computed; 26932 } 26933 26934 /* Output assembler code to FILE to increment profiler label # LABELNO 26935 for profiling a function entry. */ 26936 void 26937 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED) 26938 { 26939 if (TARGET_64BIT) 26940 { 26941 #ifndef NO_PROFILE_COUNTERS 26942 fprintf (file, "\tleaq\t%sP%d(%%rip),%%r11\n", LPREFIX, labelno); 26943 #endif 26944 26945 if (DEFAULT_ABI == SYSV_ABI && flag_pic) 26946 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME); 26947 else 26948 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 26949 } 26950 else if (flag_pic) 26951 { 26952 #ifndef NO_PROFILE_COUNTERS 26953 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n", 26954 LPREFIX, labelno, PROFILE_COUNT_REGISTER); 26955 #endif 26956 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME); 26957 } 26958 else 26959 { 26960 #ifndef NO_PROFILE_COUNTERS 26961 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno, 26962 PROFILE_COUNT_REGISTER); 26963 #endif 26964 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME); 26965 } 26966 } 26967 26968 /* We don't have exact information about the insn sizes, but we may assume 26969 quite safely that we are informed about all 1 byte insns and memory 26970 address sizes. This is enough to eliminate unnecessary padding in 26971 99% of cases. */ 26972 26973 static int 26974 min_insn_size (rtx insn) 26975 { 26976 int l = 0; 26977 26978 if (!INSN_P (insn) || !active_insn_p (insn)) 26979 return 0; 26980 26981 /* Discard alignments we've emit and jump instructions. */ 26982 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE 26983 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN) 26984 return 0; 26985 if (JUMP_P (insn) 26986 && (GET_CODE (PATTERN (insn)) == ADDR_VEC 26987 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC)) 26988 return 0; 26989 26990 /* Important case - calls are always 5 bytes. 26991 It is common to have many calls in the row. */ 26992 if (CALL_P (insn) 26993 && symbolic_reference_mentioned_p (PATTERN (insn)) 26994 && !SIBLING_CALL_P (insn)) 26995 return 5; 26996 if (get_attr_length (insn) <= 1) 26997 return 1; 26998 26999 /* For normal instructions we may rely on the sizes of addresses 27000 and the presence of symbol to require 4 bytes of encoding. 27001 This is not the case for jumps where references are PC relative. */ 27002 if (!JUMP_P (insn)) 27003 { 27004 l = get_attr_length_address (insn); 27005 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn))) 27006 l = 4; 27007 } 27008 if (l) 27009 return 1+l; 27010 else 27011 return 2; 27012 } 27013 27014 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte 27015 window. */ 27016 27017 static void 27018 ix86_avoid_jump_misspredicts (void) 27019 { 27020 rtx insn, start = get_insns (); 27021 int nbytes = 0, njumps = 0; 27022 int isjump = 0; 27023 27024 /* Look for all minimal intervals of instructions containing 4 jumps. 27025 The intervals are bounded by START and INSN. NBYTES is the total 27026 size of instructions in the interval including INSN and not including 27027 START. When the NBYTES is smaller than 16 bytes, it is possible 27028 that the end of START and INSN ends up in the same 16byte page. 27029 27030 The smallest offset in the page INSN can start is the case where START 27031 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN). 27032 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN). 27033 */ 27034 for (insn = get_insns (); insn; insn = NEXT_INSN (insn)) 27035 { 27036 27037 nbytes += min_insn_size (insn); 27038 if (dump_file) 27039 fprintf(dump_file, "Insn %i estimated to %i bytes\n", 27040 INSN_UID (insn), min_insn_size (insn)); 27041 if ((JUMP_P (insn) 27042 && GET_CODE (PATTERN (insn)) != ADDR_VEC 27043 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC) 27044 || CALL_P (insn)) 27045 njumps++; 27046 else 27047 continue; 27048 27049 while (njumps > 3) 27050 { 27051 start = NEXT_INSN (start); 27052 if ((JUMP_P (start) 27053 && GET_CODE (PATTERN (start)) != ADDR_VEC 27054 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC) 27055 || CALL_P (start)) 27056 njumps--, isjump = 1; 27057 else 27058 isjump = 0; 27059 nbytes -= min_insn_size (start); 27060 } 27061 gcc_assert (njumps >= 0); 27062 if (dump_file) 27063 fprintf (dump_file, "Interval %i to %i has %i bytes\n", 27064 INSN_UID (start), INSN_UID (insn), nbytes); 27065 27066 if (njumps == 3 && isjump && nbytes < 16) 27067 { 27068 int padsize = 15 - nbytes + min_insn_size (insn); 27069 27070 if (dump_file) 27071 fprintf (dump_file, "Padding insn %i by %i bytes!\n", 27072 INSN_UID (insn), padsize); 27073 emit_insn_before (gen_align (GEN_INT (padsize)), insn); 27074 } 27075 } 27076 } 27077 27078 /* AMD Athlon works faster 27079 when RET is not destination of conditional jump or directly preceded 27080 by other jump instruction. We avoid the penalty by inserting NOP just 27081 before the RET instructions in such cases. */ 27082 static void 27083 ix86_pad_returns (void) 27084 { 27085 edge e; 27086 edge_iterator ei; 27087 27088 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds) 27089 { 27090 basic_block bb = e->src; 27091 rtx ret = BB_END (bb); 27092 rtx prev; 27093 bool replace = false; 27094 27095 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN 27096 || optimize_bb_for_size_p (bb)) 27097 continue; 27098 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev)) 27099 if (active_insn_p (prev) || LABEL_P (prev)) 27100 break; 27101 if (prev && LABEL_P (prev)) 27102 { 27103 edge e; 27104 edge_iterator ei; 27105 27106 FOR_EACH_EDGE (e, ei, bb->preds) 27107 if (EDGE_FREQUENCY (e) && e->src->index >= 0 27108 && !(e->flags & EDGE_FALLTHRU)) 27109 replace = true; 27110 } 27111 if (!replace) 27112 { 27113 prev = prev_active_insn (ret); 27114 if (prev 27115 && ((JUMP_P (prev) && any_condjump_p (prev)) 27116 || CALL_P (prev))) 27117 replace = true; 27118 /* Empty functions get branch mispredict even when the jump destination 27119 is not visible to us. */ 27120 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED) 27121 replace = true; 27122 } 27123 if (replace) 27124 { 27125 emit_insn_before (gen_return_internal_long (), ret); 27126 delete_insn (ret); 27127 } 27128 } 27129 } 27130 27131 /* Implement machine specific optimizations. We implement padding of returns 27132 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */ 27133 static void 27134 ix86_reorg (void) 27135 { 27136 if (TARGET_PAD_RETURNS && optimize 27137 && optimize_function_for_speed_p (cfun)) 27138 ix86_pad_returns (); 27139 if (TARGET_FOUR_JUMP_LIMIT && optimize 27140 && optimize_function_for_speed_p (cfun)) 27141 ix86_avoid_jump_misspredicts (); 27142 } 27143 27144 /* Return nonzero when QImode register that must be represented via REX prefix 27145 is used. */ 27146 bool 27147 x86_extended_QIreg_mentioned_p (rtx insn) 27148 { 27149 int i; 27150 extract_insn_cached (insn); 27151 for (i = 0; i < recog_data.n_operands; i++) 27152 if (REG_P (recog_data.operand[i]) 27153 && REGNO (recog_data.operand[i]) > BX_REG) 27154 return true; 27155 return false; 27156 } 27157 27158 /* Return nonzero when P points to register encoded via REX prefix. 27159 Called via for_each_rtx. */ 27160 static int 27161 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED) 27162 { 27163 unsigned int regno; 27164 if (!REG_P (*p)) 27165 return 0; 27166 regno = REGNO (*p); 27167 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno); 27168 } 27169 27170 /* Return true when INSN mentions register that must be encoded using REX 27171 prefix. */ 27172 bool 27173 x86_extended_reg_mentioned_p (rtx insn) 27174 { 27175 return for_each_rtx (INSN_P (insn) ? &PATTERN (insn) : &insn, 27176 extended_reg_mentioned_1, NULL); 27177 } 27178 27179 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code 27180 optabs would emit if we didn't have TFmode patterns. */ 27181 27182 void 27183 x86_emit_floatuns (rtx operands[2]) 27184 { 27185 rtx neglab, donelab, i0, i1, f0, in, out; 27186 enum machine_mode mode, inmode; 27187 27188 inmode = GET_MODE (operands[1]); 27189 gcc_assert (inmode == SImode || inmode == DImode); 27190 27191 out = operands[0]; 27192 in = force_reg (inmode, operands[1]); 27193 mode = GET_MODE (out); 27194 neglab = gen_label_rtx (); 27195 donelab = gen_label_rtx (); 27196 f0 = gen_reg_rtx (mode); 27197 27198 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab); 27199 27200 expand_float (out, in, 0); 27201 27202 emit_jump_insn (gen_jump (donelab)); 27203 emit_barrier (); 27204 27205 emit_label (neglab); 27206 27207 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL, 27208 1, OPTAB_DIRECT); 27209 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL, 27210 1, OPTAB_DIRECT); 27211 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT); 27212 27213 expand_float (f0, i0, 0); 27214 27215 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0))); 27216 27217 emit_label (donelab); 27218 } 27219 27220 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 27221 with all elements equal to VAR. Return true if successful. */ 27222 27223 static bool 27224 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode, 27225 rtx target, rtx val) 27226 { 27227 enum machine_mode hmode, smode, wsmode, wvmode; 27228 rtx x; 27229 27230 switch (mode) 27231 { 27232 case V2SImode: 27233 case V2SFmode: 27234 if (!mmx_ok) 27235 return false; 27236 /* FALLTHRU */ 27237 27238 case V2DFmode: 27239 case V2DImode: 27240 case V4SFmode: 27241 case V4SImode: 27242 val = force_reg (GET_MODE_INNER (mode), val); 27243 x = gen_rtx_VEC_DUPLICATE (mode, val); 27244 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 27245 return true; 27246 27247 case V4HImode: 27248 if (!mmx_ok) 27249 return false; 27250 if (TARGET_SSE || TARGET_3DNOW_A) 27251 { 27252 val = gen_lowpart (SImode, val); 27253 x = gen_rtx_TRUNCATE (HImode, val); 27254 x = gen_rtx_VEC_DUPLICATE (mode, x); 27255 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 27256 return true; 27257 } 27258 else 27259 { 27260 smode = HImode; 27261 wsmode = SImode; 27262 wvmode = V2SImode; 27263 goto widen; 27264 } 27265 27266 case V8QImode: 27267 if (!mmx_ok) 27268 return false; 27269 smode = QImode; 27270 wsmode = HImode; 27271 wvmode = V4HImode; 27272 goto widen; 27273 case V8HImode: 27274 if (TARGET_SSE2) 27275 { 27276 rtx tmp1, tmp2; 27277 /* Extend HImode to SImode using a paradoxical SUBREG. */ 27278 tmp1 = gen_reg_rtx (SImode); 27279 emit_move_insn (tmp1, gen_lowpart (SImode, val)); 27280 /* Insert the SImode value as low element of V4SImode vector. */ 27281 tmp2 = gen_reg_rtx (V4SImode); 27282 tmp1 = gen_rtx_VEC_MERGE (V4SImode, 27283 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), 27284 CONST0_RTX (V4SImode), 27285 const1_rtx); 27286 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); 27287 /* Cast the V4SImode vector back to a V8HImode vector. */ 27288 tmp1 = gen_reg_rtx (V8HImode); 27289 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2)); 27290 /* Duplicate the low short through the whole low SImode word. */ 27291 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1)); 27292 /* Cast the V8HImode vector back to a V4SImode vector. */ 27293 tmp2 = gen_reg_rtx (V4SImode); 27294 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); 27295 /* Replicate the low element of the V4SImode vector. */ 27296 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); 27297 /* Cast the V2SImode back to V8HImode, and store in target. */ 27298 emit_move_insn (target, gen_lowpart (V8HImode, tmp2)); 27299 return true; 27300 } 27301 smode = HImode; 27302 wsmode = SImode; 27303 wvmode = V4SImode; 27304 goto widen; 27305 case V16QImode: 27306 if (TARGET_SSE2) 27307 { 27308 rtx tmp1, tmp2; 27309 /* Extend QImode to SImode using a paradoxical SUBREG. */ 27310 tmp1 = gen_reg_rtx (SImode); 27311 emit_move_insn (tmp1, gen_lowpart (SImode, val)); 27312 /* Insert the SImode value as low element of V4SImode vector. */ 27313 tmp2 = gen_reg_rtx (V4SImode); 27314 tmp1 = gen_rtx_VEC_MERGE (V4SImode, 27315 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1), 27316 CONST0_RTX (V4SImode), 27317 const1_rtx); 27318 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1)); 27319 /* Cast the V4SImode vector back to a V16QImode vector. */ 27320 tmp1 = gen_reg_rtx (V16QImode); 27321 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2)); 27322 /* Duplicate the low byte through the whole low SImode word. */ 27323 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); 27324 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1)); 27325 /* Cast the V16QImode vector back to a V4SImode vector. */ 27326 tmp2 = gen_reg_rtx (V4SImode); 27327 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1)); 27328 /* Replicate the low element of the V4SImode vector. */ 27329 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx)); 27330 /* Cast the V2SImode back to V16QImode, and store in target. */ 27331 emit_move_insn (target, gen_lowpart (V16QImode, tmp2)); 27332 return true; 27333 } 27334 smode = QImode; 27335 wsmode = HImode; 27336 wvmode = V8HImode; 27337 goto widen; 27338 widen: 27339 /* Replicate the value once into the next wider mode and recurse. */ 27340 val = convert_modes (wsmode, smode, val, true); 27341 x = expand_simple_binop (wsmode, ASHIFT, val, 27342 GEN_INT (GET_MODE_BITSIZE (smode)), 27343 NULL_RTX, 1, OPTAB_LIB_WIDEN); 27344 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN); 27345 27346 x = gen_reg_rtx (wvmode); 27347 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val)) 27348 gcc_unreachable (); 27349 emit_move_insn (target, gen_lowpart (mode, x)); 27350 return true; 27351 27352 case V4DFmode: 27353 hmode = V2DFmode; 27354 goto half; 27355 case V4DImode: 27356 hmode = V2DImode; 27357 goto half; 27358 case V8SFmode: 27359 hmode = V4SFmode; 27360 goto half; 27361 case V8SImode: 27362 hmode = V4SImode; 27363 goto half; 27364 case V16HImode: 27365 hmode = V8HImode; 27366 goto half; 27367 case V32QImode: 27368 hmode = V16QImode; 27369 goto half; 27370 half: 27371 { 27372 rtx tmp = gen_reg_rtx (hmode); 27373 ix86_expand_vector_init_duplicate (mmx_ok, hmode, tmp, val); 27374 emit_insn (gen_rtx_SET (VOIDmode, target, 27375 gen_rtx_VEC_CONCAT (mode, tmp, tmp))); 27376 } 27377 return true; 27378 27379 default: 27380 return false; 27381 } 27382 } 27383 27384 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 27385 whose ONE_VAR element is VAR, and other elements are zero. Return true 27386 if successful. */ 27387 27388 static bool 27389 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode, 27390 rtx target, rtx var, int one_var) 27391 { 27392 enum machine_mode vsimode; 27393 rtx new_target; 27394 rtx x, tmp; 27395 bool use_vector_set = false; 27396 27397 switch (mode) 27398 { 27399 case V2DImode: 27400 /* For SSE4.1, we normally use vector set. But if the second 27401 element is zero and inter-unit moves are OK, we use movq 27402 instead. */ 27403 use_vector_set = (TARGET_64BIT 27404 && TARGET_SSE4_1 27405 && !(TARGET_INTER_UNIT_MOVES 27406 && one_var == 0)); 27407 break; 27408 case V16QImode: 27409 case V4SImode: 27410 case V4SFmode: 27411 use_vector_set = TARGET_SSE4_1; 27412 break; 27413 case V8HImode: 27414 use_vector_set = TARGET_SSE2; 27415 break; 27416 case V4HImode: 27417 use_vector_set = TARGET_SSE || TARGET_3DNOW_A; 27418 break; 27419 case V32QImode: 27420 case V16HImode: 27421 case V8SImode: 27422 case V8SFmode: 27423 case V4DFmode: 27424 use_vector_set = TARGET_AVX; 27425 break; 27426 case V4DImode: 27427 /* Use ix86_expand_vector_set in 64bit mode only. */ 27428 use_vector_set = TARGET_AVX && TARGET_64BIT; 27429 break; 27430 default: 27431 break; 27432 } 27433 27434 if (use_vector_set) 27435 { 27436 emit_insn (gen_rtx_SET (VOIDmode, target, CONST0_RTX (mode))); 27437 var = force_reg (GET_MODE_INNER (mode), var); 27438 ix86_expand_vector_set (mmx_ok, target, var, one_var); 27439 return true; 27440 } 27441 27442 switch (mode) 27443 { 27444 case V2SFmode: 27445 case V2SImode: 27446 if (!mmx_ok) 27447 return false; 27448 /* FALLTHRU */ 27449 27450 case V2DFmode: 27451 case V2DImode: 27452 if (one_var != 0) 27453 return false; 27454 var = force_reg (GET_MODE_INNER (mode), var); 27455 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode))); 27456 emit_insn (gen_rtx_SET (VOIDmode, target, x)); 27457 return true; 27458 27459 case V4SFmode: 27460 case V4SImode: 27461 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER) 27462 new_target = gen_reg_rtx (mode); 27463 else 27464 new_target = target; 27465 var = force_reg (GET_MODE_INNER (mode), var); 27466 x = gen_rtx_VEC_DUPLICATE (mode, var); 27467 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx); 27468 emit_insn (gen_rtx_SET (VOIDmode, new_target, x)); 27469 if (one_var != 0) 27470 { 27471 /* We need to shuffle the value to the correct position, so 27472 create a new pseudo to store the intermediate result. */ 27473 27474 /* With SSE2, we can use the integer shuffle insns. */ 27475 if (mode != V4SFmode && TARGET_SSE2) 27476 { 27477 emit_insn (gen_sse2_pshufd_1 (new_target, new_target, 27478 GEN_INT (1), 27479 GEN_INT (one_var == 1 ? 0 : 1), 27480 GEN_INT (one_var == 2 ? 0 : 1), 27481 GEN_INT (one_var == 3 ? 0 : 1))); 27482 if (target != new_target) 27483 emit_move_insn (target, new_target); 27484 return true; 27485 } 27486 27487 /* Otherwise convert the intermediate result to V4SFmode and 27488 use the SSE1 shuffle instructions. */ 27489 if (mode != V4SFmode) 27490 { 27491 tmp = gen_reg_rtx (V4SFmode); 27492 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target)); 27493 } 27494 else 27495 tmp = new_target; 27496 27497 emit_insn (gen_sse_shufps_v4sf (tmp, tmp, tmp, 27498 GEN_INT (1), 27499 GEN_INT (one_var == 1 ? 0 : 1), 27500 GEN_INT (one_var == 2 ? 0+4 : 1+4), 27501 GEN_INT (one_var == 3 ? 0+4 : 1+4))); 27502 27503 if (mode != V4SFmode) 27504 emit_move_insn (target, gen_lowpart (V4SImode, tmp)); 27505 else if (tmp != target) 27506 emit_move_insn (target, tmp); 27507 } 27508 else if (target != new_target) 27509 emit_move_insn (target, new_target); 27510 return true; 27511 27512 case V8HImode: 27513 case V16QImode: 27514 vsimode = V4SImode; 27515 goto widen; 27516 case V4HImode: 27517 case V8QImode: 27518 if (!mmx_ok) 27519 return false; 27520 vsimode = V2SImode; 27521 goto widen; 27522 widen: 27523 if (one_var != 0) 27524 return false; 27525 27526 /* Zero extend the variable element to SImode and recurse. */ 27527 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true); 27528 27529 x = gen_reg_rtx (vsimode); 27530 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x, 27531 var, one_var)) 27532 gcc_unreachable (); 27533 27534 emit_move_insn (target, gen_lowpart (mode, x)); 27535 return true; 27536 27537 default: 27538 return false; 27539 } 27540 } 27541 27542 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector 27543 consisting of the values in VALS. It is known that all elements 27544 except ONE_VAR are constants. Return true if successful. */ 27545 27546 static bool 27547 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode, 27548 rtx target, rtx vals, int one_var) 27549 { 27550 rtx var = XVECEXP (vals, 0, one_var); 27551 enum machine_mode wmode; 27552 rtx const_vec, x; 27553 27554 const_vec = copy_rtx (vals); 27555 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode)); 27556 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0)); 27557 27558 switch (mode) 27559 { 27560 case V2DFmode: 27561 case V2DImode: 27562 case V2SFmode: 27563 case V2SImode: 27564 /* For the two element vectors, it's just as easy to use 27565 the general case. */ 27566 return false; 27567 27568 case V4DImode: 27569 /* Use ix86_expand_vector_set in 64bit mode only. */ 27570 if (!TARGET_64BIT) 27571 return false; 27572 case V4DFmode: 27573 case V8SFmode: 27574 case V8SImode: 27575 case V16HImode: 27576 case V32QImode: 27577 case V4SFmode: 27578 case V4SImode: 27579 case V8HImode: 27580 case V4HImode: 27581 break; 27582 27583 case V16QImode: 27584 if (TARGET_SSE4_1) 27585 break; 27586 wmode = V8HImode; 27587 goto widen; 27588 case V8QImode: 27589 wmode = V4HImode; 27590 goto widen; 27591 widen: 27592 /* There's no way to set one QImode entry easily. Combine 27593 the variable value with its adjacent constant value, and 27594 promote to an HImode set. */ 27595 x = XVECEXP (vals, 0, one_var ^ 1); 27596 if (one_var & 1) 27597 { 27598 var = convert_modes (HImode, QImode, var, true); 27599 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8), 27600 NULL_RTX, 1, OPTAB_LIB_WIDEN); 27601 x = GEN_INT (INTVAL (x) & 0xff); 27602 } 27603 else 27604 { 27605 var = convert_modes (HImode, QImode, var, true); 27606 x = gen_int_mode (INTVAL (x) << 8, HImode); 27607 } 27608 if (x != const0_rtx) 27609 var = expand_simple_binop (HImode, IOR, var, x, var, 27610 1, OPTAB_LIB_WIDEN); 27611 27612 x = gen_reg_rtx (wmode); 27613 emit_move_insn (x, gen_lowpart (wmode, const_vec)); 27614 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1); 27615 27616 emit_move_insn (target, gen_lowpart (mode, x)); 27617 return true; 27618 27619 default: 27620 return false; 27621 } 27622 27623 emit_move_insn (target, const_vec); 27624 ix86_expand_vector_set (mmx_ok, target, var, one_var); 27625 return true; 27626 } 27627 27628 /* A subroutine of ix86_expand_vector_init_general. Use vector 27629 concatenate to handle the most general case: all values variable, 27630 and none identical. */ 27631 27632 static void 27633 ix86_expand_vector_init_concat (enum machine_mode mode, 27634 rtx target, rtx *ops, int n) 27635 { 27636 enum machine_mode cmode, hmode = VOIDmode; 27637 rtx first[8], second[4]; 27638 rtvec v; 27639 int i, j; 27640 27641 switch (n) 27642 { 27643 case 2: 27644 switch (mode) 27645 { 27646 case V8SImode: 27647 cmode = V4SImode; 27648 break; 27649 case V8SFmode: 27650 cmode = V4SFmode; 27651 break; 27652 case V4DImode: 27653 cmode = V2DImode; 27654 break; 27655 case V4DFmode: 27656 cmode = V2DFmode; 27657 break; 27658 case V4SImode: 27659 cmode = V2SImode; 27660 break; 27661 case V4SFmode: 27662 cmode = V2SFmode; 27663 break; 27664 case V2DImode: 27665 cmode = DImode; 27666 break; 27667 case V2SImode: 27668 cmode = SImode; 27669 break; 27670 case V2DFmode: 27671 cmode = DFmode; 27672 break; 27673 case V2SFmode: 27674 cmode = SFmode; 27675 break; 27676 default: 27677 gcc_unreachable (); 27678 } 27679 27680 if (!register_operand (ops[1], cmode)) 27681 ops[1] = force_reg (cmode, ops[1]); 27682 if (!register_operand (ops[0], cmode)) 27683 ops[0] = force_reg (cmode, ops[0]); 27684 emit_insn (gen_rtx_SET (VOIDmode, target, 27685 gen_rtx_VEC_CONCAT (mode, ops[0], 27686 ops[1]))); 27687 break; 27688 27689 case 4: 27690 switch (mode) 27691 { 27692 case V4DImode: 27693 cmode = V2DImode; 27694 break; 27695 case V4DFmode: 27696 cmode = V2DFmode; 27697 break; 27698 case V4SImode: 27699 cmode = V2SImode; 27700 break; 27701 case V4SFmode: 27702 cmode = V2SFmode; 27703 break; 27704 default: 27705 gcc_unreachable (); 27706 } 27707 goto half; 27708 27709 case 8: 27710 switch (mode) 27711 { 27712 case V8SImode: 27713 cmode = V2SImode; 27714 hmode = V4SImode; 27715 break; 27716 case V8SFmode: 27717 cmode = V2SFmode; 27718 hmode = V4SFmode; 27719 break; 27720 default: 27721 gcc_unreachable (); 27722 } 27723 goto half; 27724 27725 half: 27726 /* FIXME: We process inputs backward to help RA. PR 36222. */ 27727 i = n - 1; 27728 j = (n >> 1) - 1; 27729 for (; i > 0; i -= 2, j--) 27730 { 27731 first[j] = gen_reg_rtx (cmode); 27732 v = gen_rtvec (2, ops[i - 1], ops[i]); 27733 ix86_expand_vector_init (false, first[j], 27734 gen_rtx_PARALLEL (cmode, v)); 27735 } 27736 27737 n >>= 1; 27738 if (n > 2) 27739 { 27740 gcc_assert (hmode != VOIDmode); 27741 for (i = j = 0; i < n; i += 2, j++) 27742 { 27743 second[j] = gen_reg_rtx (hmode); 27744 ix86_expand_vector_init_concat (hmode, second [j], 27745 &first [i], 2); 27746 } 27747 n >>= 1; 27748 ix86_expand_vector_init_concat (mode, target, second, n); 27749 } 27750 else 27751 ix86_expand_vector_init_concat (mode, target, first, n); 27752 break; 27753 27754 default: 27755 gcc_unreachable (); 27756 } 27757 } 27758 27759 /* A subroutine of ix86_expand_vector_init_general. Use vector 27760 interleave to handle the most general case: all values variable, 27761 and none identical. */ 27762 27763 static void 27764 ix86_expand_vector_init_interleave (enum machine_mode mode, 27765 rtx target, rtx *ops, int n) 27766 { 27767 enum machine_mode first_imode, second_imode, third_imode, inner_mode; 27768 int i, j; 27769 rtx op0, op1; 27770 rtx (*gen_load_even) (rtx, rtx, rtx); 27771 rtx (*gen_interleave_first_low) (rtx, rtx, rtx); 27772 rtx (*gen_interleave_second_low) (rtx, rtx, rtx); 27773 27774 switch (mode) 27775 { 27776 case V8HImode: 27777 gen_load_even = gen_vec_setv8hi; 27778 gen_interleave_first_low = gen_vec_interleave_lowv4si; 27779 gen_interleave_second_low = gen_vec_interleave_lowv2di; 27780 inner_mode = HImode; 27781 first_imode = V4SImode; 27782 second_imode = V2DImode; 27783 third_imode = VOIDmode; 27784 break; 27785 case V16QImode: 27786 gen_load_even = gen_vec_setv16qi; 27787 gen_interleave_first_low = gen_vec_interleave_lowv8hi; 27788 gen_interleave_second_low = gen_vec_interleave_lowv4si; 27789 inner_mode = QImode; 27790 first_imode = V8HImode; 27791 second_imode = V4SImode; 27792 third_imode = V2DImode; 27793 break; 27794 default: 27795 gcc_unreachable (); 27796 } 27797 27798 for (i = 0; i < n; i++) 27799 { 27800 /* Extend the odd elment to SImode using a paradoxical SUBREG. */ 27801 op0 = gen_reg_rtx (SImode); 27802 emit_move_insn (op0, gen_lowpart (SImode, ops [i + i])); 27803 27804 /* Insert the SImode value as low element of V4SImode vector. */ 27805 op1 = gen_reg_rtx (V4SImode); 27806 op0 = gen_rtx_VEC_MERGE (V4SImode, 27807 gen_rtx_VEC_DUPLICATE (V4SImode, 27808 op0), 27809 CONST0_RTX (V4SImode), 27810 const1_rtx); 27811 emit_insn (gen_rtx_SET (VOIDmode, op1, op0)); 27812 27813 /* Cast the V4SImode vector back to a vector in orignal mode. */ 27814 op0 = gen_reg_rtx (mode); 27815 emit_move_insn (op0, gen_lowpart (mode, op1)); 27816 27817 /* Load even elements into the second positon. */ 27818 emit_insn ((*gen_load_even) (op0, 27819 force_reg (inner_mode, 27820 ops [i + i + 1]), 27821 const1_rtx)); 27822 27823 /* Cast vector to FIRST_IMODE vector. */ 27824 ops[i] = gen_reg_rtx (first_imode); 27825 emit_move_insn (ops[i], gen_lowpart (first_imode, op0)); 27826 } 27827 27828 /* Interleave low FIRST_IMODE vectors. */ 27829 for (i = j = 0; i < n; i += 2, j++) 27830 { 27831 op0 = gen_reg_rtx (first_imode); 27832 emit_insn ((*gen_interleave_first_low) (op0, ops[i], ops[i + 1])); 27833 27834 /* Cast FIRST_IMODE vector to SECOND_IMODE vector. */ 27835 ops[j] = gen_reg_rtx (second_imode); 27836 emit_move_insn (ops[j], gen_lowpart (second_imode, op0)); 27837 } 27838 27839 /* Interleave low SECOND_IMODE vectors. */ 27840 switch (second_imode) 27841 { 27842 case V4SImode: 27843 for (i = j = 0; i < n / 2; i += 2, j++) 27844 { 27845 op0 = gen_reg_rtx (second_imode); 27846 emit_insn ((*gen_interleave_second_low) (op0, ops[i], 27847 ops[i + 1])); 27848 27849 /* Cast the SECOND_IMODE vector to the THIRD_IMODE 27850 vector. */ 27851 ops[j] = gen_reg_rtx (third_imode); 27852 emit_move_insn (ops[j], gen_lowpart (third_imode, op0)); 27853 } 27854 second_imode = V2DImode; 27855 gen_interleave_second_low = gen_vec_interleave_lowv2di; 27856 /* FALLTHRU */ 27857 27858 case V2DImode: 27859 op0 = gen_reg_rtx (second_imode); 27860 emit_insn ((*gen_interleave_second_low) (op0, ops[0], 27861 ops[1])); 27862 27863 /* Cast the SECOND_IMODE vector back to a vector on original 27864 mode. */ 27865 emit_insn (gen_rtx_SET (VOIDmode, target, 27866 gen_lowpart (mode, op0))); 27867 break; 27868 27869 default: 27870 gcc_unreachable (); 27871 } 27872 } 27873 27874 /* A subroutine of ix86_expand_vector_init. Handle the most general case: 27875 all values variable, and none identical. */ 27876 27877 static void 27878 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode, 27879 rtx target, rtx vals) 27880 { 27881 rtx ops[32], op0, op1; 27882 enum machine_mode half_mode = VOIDmode; 27883 int n, i; 27884 27885 switch (mode) 27886 { 27887 case V2SFmode: 27888 case V2SImode: 27889 if (!mmx_ok && !TARGET_SSE) 27890 break; 27891 /* FALLTHRU */ 27892 27893 case V8SFmode: 27894 case V8SImode: 27895 case V4DFmode: 27896 case V4DImode: 27897 case V4SFmode: 27898 case V4SImode: 27899 case V2DFmode: 27900 case V2DImode: 27901 n = GET_MODE_NUNITS (mode); 27902 for (i = 0; i < n; i++) 27903 ops[i] = XVECEXP (vals, 0, i); 27904 ix86_expand_vector_init_concat (mode, target, ops, n); 27905 return; 27906 27907 case V32QImode: 27908 half_mode = V16QImode; 27909 goto half; 27910 27911 case V16HImode: 27912 half_mode = V8HImode; 27913 goto half; 27914 27915 half: 27916 n = GET_MODE_NUNITS (mode); 27917 for (i = 0; i < n; i++) 27918 ops[i] = XVECEXP (vals, 0, i); 27919 op0 = gen_reg_rtx (half_mode); 27920 op1 = gen_reg_rtx (half_mode); 27921 ix86_expand_vector_init_interleave (half_mode, op0, ops, 27922 n >> 2); 27923 ix86_expand_vector_init_interleave (half_mode, op1, 27924 &ops [n >> 1], n >> 2); 27925 emit_insn (gen_rtx_SET (VOIDmode, target, 27926 gen_rtx_VEC_CONCAT (mode, op0, op1))); 27927 return; 27928 27929 case V16QImode: 27930 if (!TARGET_SSE4_1) 27931 break; 27932 /* FALLTHRU */ 27933 27934 case V8HImode: 27935 if (!TARGET_SSE2) 27936 break; 27937 27938 /* Don't use ix86_expand_vector_init_interleave if we can't 27939 move from GPR to SSE register directly. */ 27940 if (!TARGET_INTER_UNIT_MOVES) 27941 break; 27942 27943 n = GET_MODE_NUNITS (mode); 27944 for (i = 0; i < n; i++) 27945 ops[i] = XVECEXP (vals, 0, i); 27946 ix86_expand_vector_init_interleave (mode, target, ops, n >> 1); 27947 return; 27948 27949 case V4HImode: 27950 case V8QImode: 27951 break; 27952 27953 default: 27954 gcc_unreachable (); 27955 } 27956 27957 { 27958 int i, j, n_elts, n_words, n_elt_per_word; 27959 enum machine_mode inner_mode; 27960 rtx words[4], shift; 27961 27962 inner_mode = GET_MODE_INNER (mode); 27963 n_elts = GET_MODE_NUNITS (mode); 27964 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD; 27965 n_elt_per_word = n_elts / n_words; 27966 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode)); 27967 27968 for (i = 0; i < n_words; ++i) 27969 { 27970 rtx word = NULL_RTX; 27971 27972 for (j = 0; j < n_elt_per_word; ++j) 27973 { 27974 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1); 27975 elt = convert_modes (word_mode, inner_mode, elt, true); 27976 27977 if (j == 0) 27978 word = elt; 27979 else 27980 { 27981 word = expand_simple_binop (word_mode, ASHIFT, word, shift, 27982 word, 1, OPTAB_LIB_WIDEN); 27983 word = expand_simple_binop (word_mode, IOR, word, elt, 27984 word, 1, OPTAB_LIB_WIDEN); 27985 } 27986 } 27987 27988 words[i] = word; 27989 } 27990 27991 if (n_words == 1) 27992 emit_move_insn (target, gen_lowpart (mode, words[0])); 27993 else if (n_words == 2) 27994 { 27995 rtx tmp = gen_reg_rtx (mode); 27996 emit_clobber (tmp); 27997 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]); 27998 emit_move_insn (gen_highpart (word_mode, tmp), words[1]); 27999 emit_move_insn (target, tmp); 28000 } 28001 else if (n_words == 4) 28002 { 28003 rtx tmp = gen_reg_rtx (V4SImode); 28004 gcc_assert (word_mode == SImode); 28005 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words)); 28006 ix86_expand_vector_init_general (false, V4SImode, tmp, vals); 28007 emit_move_insn (target, gen_lowpart (mode, tmp)); 28008 } 28009 else 28010 gcc_unreachable (); 28011 } 28012 } 28013 28014 /* Initialize vector TARGET via VALS. Suppress the use of MMX 28015 instructions unless MMX_OK is true. */ 28016 28017 void 28018 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals) 28019 { 28020 enum machine_mode mode = GET_MODE (target); 28021 enum machine_mode inner_mode = GET_MODE_INNER (mode); 28022 int n_elts = GET_MODE_NUNITS (mode); 28023 int n_var = 0, one_var = -1; 28024 bool all_same = true, all_const_zero = true; 28025 int i; 28026 rtx x; 28027 28028 for (i = 0; i < n_elts; ++i) 28029 { 28030 x = XVECEXP (vals, 0, i); 28031 if (!(CONST_INT_P (x) 28032 || GET_CODE (x) == CONST_DOUBLE 28033 || GET_CODE (x) == CONST_FIXED)) 28034 n_var++, one_var = i; 28035 else if (x != CONST0_RTX (inner_mode)) 28036 all_const_zero = false; 28037 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0))) 28038 all_same = false; 28039 } 28040 28041 /* Constants are best loaded from the constant pool. */ 28042 if (n_var == 0) 28043 { 28044 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))); 28045 return; 28046 } 28047 28048 /* If all values are identical, broadcast the value. */ 28049 if (all_same 28050 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target, 28051 XVECEXP (vals, 0, 0))) 28052 return; 28053 28054 /* Values where only one field is non-constant are best loaded from 28055 the pool and overwritten via move later. */ 28056 if (n_var == 1) 28057 { 28058 if (all_const_zero 28059 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target, 28060 XVECEXP (vals, 0, one_var), 28061 one_var)) 28062 return; 28063 28064 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var)) 28065 return; 28066 } 28067 28068 ix86_expand_vector_init_general (mmx_ok, mode, target, vals); 28069 } 28070 28071 void 28072 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt) 28073 { 28074 enum machine_mode mode = GET_MODE (target); 28075 enum machine_mode inner_mode = GET_MODE_INNER (mode); 28076 enum machine_mode half_mode; 28077 bool use_vec_merge = false; 28078 rtx tmp; 28079 static rtx (*gen_extract[6][2]) (rtx, rtx) 28080 = { 28081 { gen_vec_extract_lo_v32qi, gen_vec_extract_hi_v32qi }, 28082 { gen_vec_extract_lo_v16hi, gen_vec_extract_hi_v16hi }, 28083 { gen_vec_extract_lo_v8si, gen_vec_extract_hi_v8si }, 28084 { gen_vec_extract_lo_v4di, gen_vec_extract_hi_v4di }, 28085 { gen_vec_extract_lo_v8sf, gen_vec_extract_hi_v8sf }, 28086 { gen_vec_extract_lo_v4df, gen_vec_extract_hi_v4df } 28087 }; 28088 static rtx (*gen_insert[6][2]) (rtx, rtx, rtx) 28089 = { 28090 { gen_vec_set_lo_v32qi, gen_vec_set_hi_v32qi }, 28091 { gen_vec_set_lo_v16hi, gen_vec_set_hi_v16hi }, 28092 { gen_vec_set_lo_v8si, gen_vec_set_hi_v8si }, 28093 { gen_vec_set_lo_v4di, gen_vec_set_hi_v4di }, 28094 { gen_vec_set_lo_v8sf, gen_vec_set_hi_v8sf }, 28095 { gen_vec_set_lo_v4df, gen_vec_set_hi_v4df } 28096 }; 28097 int i, j, n; 28098 28099 switch (mode) 28100 { 28101 case V2SFmode: 28102 case V2SImode: 28103 if (mmx_ok) 28104 { 28105 tmp = gen_reg_rtx (GET_MODE_INNER (mode)); 28106 ix86_expand_vector_extract (true, tmp, target, 1 - elt); 28107 if (elt == 0) 28108 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val); 28109 else 28110 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp); 28111 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 28112 return; 28113 } 28114 break; 28115 28116 case V2DImode: 28117 use_vec_merge = TARGET_SSE4_1; 28118 if (use_vec_merge) 28119 break; 28120 28121 case V2DFmode: 28122 { 28123 rtx op0, op1; 28124 28125 /* For the two element vectors, we implement a VEC_CONCAT with 28126 the extraction of the other element. */ 28127 28128 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt))); 28129 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp); 28130 28131 if (elt == 0) 28132 op0 = val, op1 = tmp; 28133 else 28134 op0 = tmp, op1 = val; 28135 28136 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1); 28137 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 28138 } 28139 return; 28140 28141 case V4SFmode: 28142 use_vec_merge = TARGET_SSE4_1; 28143 if (use_vec_merge) 28144 break; 28145 28146 switch (elt) 28147 { 28148 case 0: 28149 use_vec_merge = true; 28150 break; 28151 28152 case 1: 28153 /* tmp = target = A B C D */ 28154 tmp = copy_to_reg (target); 28155 /* target = A A B B */ 28156 emit_insn (gen_sse_unpcklps (target, target, target)); 28157 /* target = X A B B */ 28158 ix86_expand_vector_set (false, target, val, 0); 28159 /* target = A X C D */ 28160 emit_insn (gen_sse_shufps_v4sf (target, target, tmp, 28161 GEN_INT (1), GEN_INT (0), 28162 GEN_INT (2+4), GEN_INT (3+4))); 28163 return; 28164 28165 case 2: 28166 /* tmp = target = A B C D */ 28167 tmp = copy_to_reg (target); 28168 /* tmp = X B C D */ 28169 ix86_expand_vector_set (false, tmp, val, 0); 28170 /* target = A B X D */ 28171 emit_insn (gen_sse_shufps_v4sf (target, target, tmp, 28172 GEN_INT (0), GEN_INT (1), 28173 GEN_INT (0+4), GEN_INT (3+4))); 28174 return; 28175 28176 case 3: 28177 /* tmp = target = A B C D */ 28178 tmp = copy_to_reg (target); 28179 /* tmp = X B C D */ 28180 ix86_expand_vector_set (false, tmp, val, 0); 28181 /* target = A B X D */ 28182 emit_insn (gen_sse_shufps_v4sf (target, target, tmp, 28183 GEN_INT (0), GEN_INT (1), 28184 GEN_INT (2+4), GEN_INT (0+4))); 28185 return; 28186 28187 default: 28188 gcc_unreachable (); 28189 } 28190 break; 28191 28192 case V4SImode: 28193 use_vec_merge = TARGET_SSE4_1; 28194 if (use_vec_merge) 28195 break; 28196 28197 /* Element 0 handled by vec_merge below. */ 28198 if (elt == 0) 28199 { 28200 use_vec_merge = true; 28201 break; 28202 } 28203 28204 if (TARGET_SSE2) 28205 { 28206 /* With SSE2, use integer shuffles to swap element 0 and ELT, 28207 store into element 0, then shuffle them back. */ 28208 28209 rtx order[4]; 28210 28211 order[0] = GEN_INT (elt); 28212 order[1] = const1_rtx; 28213 order[2] = const2_rtx; 28214 order[3] = GEN_INT (3); 28215 order[elt] = const0_rtx; 28216 28217 emit_insn (gen_sse2_pshufd_1 (target, target, order[0], 28218 order[1], order[2], order[3])); 28219 28220 ix86_expand_vector_set (false, target, val, 0); 28221 28222 emit_insn (gen_sse2_pshufd_1 (target, target, order[0], 28223 order[1], order[2], order[3])); 28224 } 28225 else 28226 { 28227 /* For SSE1, we have to reuse the V4SF code. */ 28228 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target), 28229 gen_lowpart (SFmode, val), elt); 28230 } 28231 return; 28232 28233 case V8HImode: 28234 use_vec_merge = TARGET_SSE2; 28235 break; 28236 case V4HImode: 28237 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); 28238 break; 28239 28240 case V16QImode: 28241 use_vec_merge = TARGET_SSE4_1; 28242 break; 28243 28244 case V8QImode: 28245 break; 28246 28247 case V32QImode: 28248 half_mode = V16QImode; 28249 j = 0; 28250 n = 16; 28251 goto half; 28252 28253 case V16HImode: 28254 half_mode = V8HImode; 28255 j = 1; 28256 n = 8; 28257 goto half; 28258 28259 case V8SImode: 28260 half_mode = V4SImode; 28261 j = 2; 28262 n = 4; 28263 goto half; 28264 28265 case V4DImode: 28266 half_mode = V2DImode; 28267 j = 3; 28268 n = 2; 28269 goto half; 28270 28271 case V8SFmode: 28272 half_mode = V4SFmode; 28273 j = 4; 28274 n = 4; 28275 goto half; 28276 28277 case V4DFmode: 28278 half_mode = V2DFmode; 28279 j = 5; 28280 n = 2; 28281 goto half; 28282 28283 half: 28284 /* Compute offset. */ 28285 i = elt / n; 28286 elt %= n; 28287 28288 gcc_assert (i <= 1); 28289 28290 /* Extract the half. */ 28291 tmp = gen_reg_rtx (half_mode); 28292 emit_insn ((*gen_extract[j][i]) (tmp, target)); 28293 28294 /* Put val in tmp at elt. */ 28295 ix86_expand_vector_set (false, tmp, val, elt); 28296 28297 /* Put it back. */ 28298 emit_insn ((*gen_insert[j][i]) (target, target, tmp)); 28299 return; 28300 28301 default: 28302 break; 28303 } 28304 28305 if (use_vec_merge) 28306 { 28307 tmp = gen_rtx_VEC_DUPLICATE (mode, val); 28308 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt)); 28309 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 28310 } 28311 else 28312 { 28313 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); 28314 28315 emit_move_insn (mem, target); 28316 28317 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); 28318 emit_move_insn (tmp, val); 28319 28320 emit_move_insn (target, mem); 28321 } 28322 } 28323 28324 void 28325 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt) 28326 { 28327 enum machine_mode mode = GET_MODE (vec); 28328 enum machine_mode inner_mode = GET_MODE_INNER (mode); 28329 bool use_vec_extr = false; 28330 rtx tmp; 28331 28332 switch (mode) 28333 { 28334 case V2SImode: 28335 case V2SFmode: 28336 if (!mmx_ok) 28337 break; 28338 /* FALLTHRU */ 28339 28340 case V2DFmode: 28341 case V2DImode: 28342 use_vec_extr = true; 28343 break; 28344 28345 case V4SFmode: 28346 use_vec_extr = TARGET_SSE4_1; 28347 if (use_vec_extr) 28348 break; 28349 28350 switch (elt) 28351 { 28352 case 0: 28353 tmp = vec; 28354 break; 28355 28356 case 1: 28357 case 3: 28358 tmp = gen_reg_rtx (mode); 28359 emit_insn (gen_sse_shufps_v4sf (tmp, vec, vec, 28360 GEN_INT (elt), GEN_INT (elt), 28361 GEN_INT (elt+4), GEN_INT (elt+4))); 28362 break; 28363 28364 case 2: 28365 tmp = gen_reg_rtx (mode); 28366 emit_insn (gen_sse_unpckhps (tmp, vec, vec)); 28367 break; 28368 28369 default: 28370 gcc_unreachable (); 28371 } 28372 vec = tmp; 28373 use_vec_extr = true; 28374 elt = 0; 28375 break; 28376 28377 case V4SImode: 28378 use_vec_extr = TARGET_SSE4_1; 28379 if (use_vec_extr) 28380 break; 28381 28382 if (TARGET_SSE2) 28383 { 28384 switch (elt) 28385 { 28386 case 0: 28387 tmp = vec; 28388 break; 28389 28390 case 1: 28391 case 3: 28392 tmp = gen_reg_rtx (mode); 28393 emit_insn (gen_sse2_pshufd_1 (tmp, vec, 28394 GEN_INT (elt), GEN_INT (elt), 28395 GEN_INT (elt), GEN_INT (elt))); 28396 break; 28397 28398 case 2: 28399 tmp = gen_reg_rtx (mode); 28400 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec)); 28401 break; 28402 28403 default: 28404 gcc_unreachable (); 28405 } 28406 vec = tmp; 28407 use_vec_extr = true; 28408 elt = 0; 28409 } 28410 else 28411 { 28412 /* For SSE1, we have to reuse the V4SF code. */ 28413 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target), 28414 gen_lowpart (V4SFmode, vec), elt); 28415 return; 28416 } 28417 break; 28418 28419 case V8HImode: 28420 use_vec_extr = TARGET_SSE2; 28421 break; 28422 case V4HImode: 28423 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A); 28424 break; 28425 28426 case V16QImode: 28427 use_vec_extr = TARGET_SSE4_1; 28428 break; 28429 28430 case V8QImode: 28431 /* ??? Could extract the appropriate HImode element and shift. */ 28432 default: 28433 break; 28434 } 28435 28436 if (use_vec_extr) 28437 { 28438 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt))); 28439 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp); 28440 28441 /* Let the rtl optimizers know about the zero extension performed. */ 28442 if (inner_mode == QImode || inner_mode == HImode) 28443 { 28444 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp); 28445 target = gen_lowpart (SImode, target); 28446 } 28447 28448 emit_insn (gen_rtx_SET (VOIDmode, target, tmp)); 28449 } 28450 else 28451 { 28452 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false); 28453 28454 emit_move_insn (mem, vec); 28455 28456 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode)); 28457 emit_move_insn (target, tmp); 28458 } 28459 } 28460 28461 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary 28462 pattern to reduce; DEST is the destination; IN is the input vector. */ 28463 28464 void 28465 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in) 28466 { 28467 rtx tmp1, tmp2, tmp3; 28468 28469 tmp1 = gen_reg_rtx (V4SFmode); 28470 tmp2 = gen_reg_rtx (V4SFmode); 28471 tmp3 = gen_reg_rtx (V4SFmode); 28472 28473 emit_insn (gen_sse_movhlps (tmp1, in, in)); 28474 emit_insn (fn (tmp2, tmp1, in)); 28475 28476 emit_insn (gen_sse_shufps_v4sf (tmp3, tmp2, tmp2, 28477 GEN_INT (1), GEN_INT (1), 28478 GEN_INT (1+4), GEN_INT (1+4))); 28479 emit_insn (fn (dest, tmp2, tmp3)); 28480 } 28481 28482 /* Target hook for scalar_mode_supported_p. */ 28483 static bool 28484 ix86_scalar_mode_supported_p (enum machine_mode mode) 28485 { 28486 if (DECIMAL_FLOAT_MODE_P (mode)) 28487 return true; 28488 else if (mode == TFmode) 28489 return true; 28490 else 28491 return default_scalar_mode_supported_p (mode); 28492 } 28493 28494 /* Implements target hook vector_mode_supported_p. */ 28495 static bool 28496 ix86_vector_mode_supported_p (enum machine_mode mode) 28497 { 28498 if (TARGET_SSE && VALID_SSE_REG_MODE (mode)) 28499 return true; 28500 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode)) 28501 return true; 28502 if (TARGET_AVX && VALID_AVX256_REG_MODE (mode)) 28503 return true; 28504 if (TARGET_MMX && VALID_MMX_REG_MODE (mode)) 28505 return true; 28506 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode)) 28507 return true; 28508 return false; 28509 } 28510 28511 /* Target hook for c_mode_for_suffix. */ 28512 static enum machine_mode 28513 ix86_c_mode_for_suffix (char suffix) 28514 { 28515 if (suffix == 'q') 28516 return TFmode; 28517 if (suffix == 'w') 28518 return XFmode; 28519 28520 return VOIDmode; 28521 } 28522 28523 /* Worker function for TARGET_MD_ASM_CLOBBERS. 28524 28525 We do this in the new i386 backend to maintain source compatibility 28526 with the old cc0-based compiler. */ 28527 28528 static tree 28529 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED, 28530 tree inputs ATTRIBUTE_UNUSED, 28531 tree clobbers) 28532 { 28533 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"), 28534 clobbers); 28535 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"), 28536 clobbers); 28537 return clobbers; 28538 } 28539 28540 /* Implements target vector targetm.asm.encode_section_info. This 28541 is not used by netware. */ 28542 28543 static void ATTRIBUTE_UNUSED 28544 ix86_encode_section_info (tree decl, rtx rtl, int first) 28545 { 28546 default_encode_section_info (decl, rtl, first); 28547 28548 if (TREE_CODE (decl) == VAR_DECL 28549 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl)) 28550 && ix86_in_large_data_p (decl)) 28551 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR; 28552 } 28553 28554 /* Worker function for REVERSE_CONDITION. */ 28555 28556 enum rtx_code 28557 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode) 28558 { 28559 return (mode != CCFPmode && mode != CCFPUmode 28560 ? reverse_condition (code) 28561 : reverse_condition_maybe_unordered (code)); 28562 } 28563 28564 /* Output code to perform an x87 FP register move, from OPERANDS[1] 28565 to OPERANDS[0]. */ 28566 28567 const char * 28568 output_387_reg_move (rtx insn, rtx *operands) 28569 { 28570 if (REG_P (operands[0])) 28571 { 28572 if (REG_P (operands[1]) 28573 && find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 28574 { 28575 if (REGNO (operands[0]) == FIRST_STACK_REG) 28576 return output_387_ffreep (operands, 0); 28577 return "fstp\t%y0"; 28578 } 28579 if (STACK_TOP_P (operands[0])) 28580 return "fld%z1\t%y1"; 28581 return "fst\t%y0"; 28582 } 28583 else if (MEM_P (operands[0])) 28584 { 28585 gcc_assert (REG_P (operands[1])); 28586 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1]))) 28587 return "fstp%z0\t%y0"; 28588 else 28589 { 28590 /* There is no non-popping store to memory for XFmode. 28591 So if we need one, follow the store with a load. */ 28592 if (GET_MODE (operands[0]) == XFmode) 28593 return "fstp%z0\t%y0\n\tfld%z0\t%y0"; 28594 else 28595 return "fst%z0\t%y0"; 28596 } 28597 } 28598 else 28599 gcc_unreachable(); 28600 } 28601 28602 /* Output code to perform a conditional jump to LABEL, if C2 flag in 28603 FP status register is set. */ 28604 28605 void 28606 ix86_emit_fp_unordered_jump (rtx label) 28607 { 28608 rtx reg = gen_reg_rtx (HImode); 28609 rtx temp; 28610 28611 emit_insn (gen_x86_fnstsw_1 (reg)); 28612 28613 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_insn_for_size_p ())) 28614 { 28615 emit_insn (gen_x86_sahf_1 (reg)); 28616 28617 temp = gen_rtx_REG (CCmode, FLAGS_REG); 28618 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx); 28619 } 28620 else 28621 { 28622 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04))); 28623 28624 temp = gen_rtx_REG (CCNOmode, FLAGS_REG); 28625 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx); 28626 } 28627 28628 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp, 28629 gen_rtx_LABEL_REF (VOIDmode, label), 28630 pc_rtx); 28631 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp); 28632 28633 emit_jump_insn (temp); 28634 predict_jump (REG_BR_PROB_BASE * 10 / 100); 28635 } 28636 28637 /* Output code to perform a log1p XFmode calculation. */ 28638 28639 void ix86_emit_i387_log1p (rtx op0, rtx op1) 28640 { 28641 rtx label1 = gen_label_rtx (); 28642 rtx label2 = gen_label_rtx (); 28643 28644 rtx tmp = gen_reg_rtx (XFmode); 28645 rtx tmp2 = gen_reg_rtx (XFmode); 28646 28647 emit_insn (gen_absxf2 (tmp, op1)); 28648 emit_insn (gen_cmpxf (tmp, 28649 CONST_DOUBLE_FROM_REAL_VALUE ( 28650 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode), 28651 XFmode))); 28652 emit_jump_insn (gen_bge (label1)); 28653 28654 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ 28655 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2)); 28656 emit_jump (label2); 28657 28658 emit_label (label1); 28659 emit_move_insn (tmp, CONST1_RTX (XFmode)); 28660 emit_insn (gen_addxf3 (tmp, op1, tmp)); 28661 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */ 28662 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2)); 28663 28664 emit_label (label2); 28665 } 28666 28667 /* Output code to perform a Newton-Rhapson approximation of a single precision 28668 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */ 28669 28670 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode) 28671 { 28672 rtx x0, x1, e0, e1, two; 28673 28674 x0 = gen_reg_rtx (mode); 28675 e0 = gen_reg_rtx (mode); 28676 e1 = gen_reg_rtx (mode); 28677 x1 = gen_reg_rtx (mode); 28678 28679 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode); 28680 28681 if (VECTOR_MODE_P (mode)) 28682 two = ix86_build_const_vector (SFmode, true, two); 28683 28684 two = force_reg (mode, two); 28685 28686 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */ 28687 28688 /* x0 = rcp(b) estimate */ 28689 emit_insn (gen_rtx_SET (VOIDmode, x0, 28690 gen_rtx_UNSPEC (mode, gen_rtvec (1, b), 28691 UNSPEC_RCP))); 28692 /* e0 = x0 * b */ 28693 emit_insn (gen_rtx_SET (VOIDmode, e0, 28694 gen_rtx_MULT (mode, x0, b))); 28695 /* e1 = 2. - e0 */ 28696 emit_insn (gen_rtx_SET (VOIDmode, e1, 28697 gen_rtx_MINUS (mode, two, e0))); 28698 /* x1 = x0 * e1 */ 28699 emit_insn (gen_rtx_SET (VOIDmode, x1, 28700 gen_rtx_MULT (mode, x0, e1))); 28701 /* res = a * x1 */ 28702 emit_insn (gen_rtx_SET (VOIDmode, res, 28703 gen_rtx_MULT (mode, a, x1))); 28704 } 28705 28706 /* Output code to perform a Newton-Rhapson approximation of a 28707 single precision floating point [reciprocal] square root. */ 28708 28709 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode, 28710 bool recip) 28711 { 28712 rtx x0, e0, e1, e2, e3, mthree, mhalf; 28713 REAL_VALUE_TYPE r; 28714 28715 x0 = gen_reg_rtx (mode); 28716 e0 = gen_reg_rtx (mode); 28717 e1 = gen_reg_rtx (mode); 28718 e2 = gen_reg_rtx (mode); 28719 e3 = gen_reg_rtx (mode); 28720 28721 real_from_integer (&r, VOIDmode, -3, -1, 0); 28722 mthree = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode); 28723 28724 real_arithmetic (&r, NEGATE_EXPR, &dconsthalf, NULL); 28725 mhalf = CONST_DOUBLE_FROM_REAL_VALUE (r, SFmode); 28726 28727 if (VECTOR_MODE_P (mode)) 28728 { 28729 mthree = ix86_build_const_vector (SFmode, true, mthree); 28730 mhalf = ix86_build_const_vector (SFmode, true, mhalf); 28731 } 28732 28733 /* sqrt(a) = -0.5 * a * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) 28734 rsqrt(a) = -0.5 * rsqrtss(a) * (a * rsqrtss(a) * rsqrtss(a) - 3.0) */ 28735 28736 /* x0 = rsqrt(a) estimate */ 28737 emit_insn (gen_rtx_SET (VOIDmode, x0, 28738 gen_rtx_UNSPEC (mode, gen_rtvec (1, a), 28739 UNSPEC_RSQRT))); 28740 28741 /* If (a == 0.0) Filter out infinity to prevent NaN for sqrt(0.0). */ 28742 if (!recip) 28743 { 28744 rtx zero, mask; 28745 28746 zero = gen_reg_rtx (mode); 28747 mask = gen_reg_rtx (mode); 28748 28749 zero = force_reg (mode, CONST0_RTX(mode)); 28750 emit_insn (gen_rtx_SET (VOIDmode, mask, 28751 gen_rtx_NE (mode, zero, a))); 28752 28753 emit_insn (gen_rtx_SET (VOIDmode, x0, 28754 gen_rtx_AND (mode, x0, mask))); 28755 } 28756 28757 /* e0 = x0 * a */ 28758 emit_insn (gen_rtx_SET (VOIDmode, e0, 28759 gen_rtx_MULT (mode, x0, a))); 28760 /* e1 = e0 * x0 */ 28761 emit_insn (gen_rtx_SET (VOIDmode, e1, 28762 gen_rtx_MULT (mode, e0, x0))); 28763 28764 /* e2 = e1 - 3. */ 28765 mthree = force_reg (mode, mthree); 28766 emit_insn (gen_rtx_SET (VOIDmode, e2, 28767 gen_rtx_PLUS (mode, e1, mthree))); 28768 28769 mhalf = force_reg (mode, mhalf); 28770 if (recip) 28771 /* e3 = -.5 * x0 */ 28772 emit_insn (gen_rtx_SET (VOIDmode, e3, 28773 gen_rtx_MULT (mode, x0, mhalf))); 28774 else 28775 /* e3 = -.5 * e0 */ 28776 emit_insn (gen_rtx_SET (VOIDmode, e3, 28777 gen_rtx_MULT (mode, e0, mhalf))); 28778 /* ret = e2 * e3 */ 28779 emit_insn (gen_rtx_SET (VOIDmode, res, 28780 gen_rtx_MULT (mode, e2, e3))); 28781 } 28782 28783 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */ 28784 28785 static void ATTRIBUTE_UNUSED 28786 i386_solaris_elf_named_section (const char *name, unsigned int flags, 28787 tree decl) 28788 { 28789 /* With Binutils 2.15, the "@unwind" marker must be specified on 28790 every occurrence of the ".eh_frame" section, not just the first 28791 one. */ 28792 if (TARGET_64BIT 28793 && strcmp (name, ".eh_frame") == 0) 28794 { 28795 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name, 28796 flags & SECTION_WRITE ? "aw" : "a"); 28797 return; 28798 } 28799 default_elf_asm_named_section (name, flags, decl); 28800 } 28801 28802 /* Return the mangling of TYPE if it is an extended fundamental type. */ 28803 28804 static const char * 28805 ix86_mangle_type (const_tree type) 28806 { 28807 type = TYPE_MAIN_VARIANT (type); 28808 28809 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE 28810 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE) 28811 return NULL; 28812 28813 switch (TYPE_MODE (type)) 28814 { 28815 case TFmode: 28816 /* __float128 is "g". */ 28817 return "g"; 28818 case XFmode: 28819 /* "long double" or __float80 is "e". */ 28820 return "e"; 28821 default: 28822 return NULL; 28823 } 28824 } 28825 28826 /* For 32-bit code we can save PIC register setup by using 28827 __stack_chk_fail_local hidden function instead of calling 28828 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC 28829 register, so it is better to call __stack_chk_fail directly. */ 28830 28831 static tree 28832 ix86_stack_protect_fail (void) 28833 { 28834 return TARGET_64BIT 28835 ? default_external_stack_protect_fail () 28836 : default_hidden_stack_protect_fail (); 28837 } 28838 28839 /* Select a format to encode pointers in exception handling data. CODE 28840 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is 28841 true if the symbol may be affected by dynamic relocations. 28842 28843 ??? All x86 object file formats are capable of representing this. 28844 After all, the relocation needed is the same as for the call insn. 28845 Whether or not a particular assembler allows us to enter such, I 28846 guess we'll have to see. */ 28847 int 28848 asm_preferred_eh_data_format (int code, int global) 28849 { 28850 if (flag_pic) 28851 { 28852 int type = DW_EH_PE_sdata8; 28853 if (!TARGET_64BIT 28854 || ix86_cmodel == CM_SMALL_PIC 28855 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code))) 28856 type = DW_EH_PE_sdata4; 28857 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type; 28858 } 28859 if (ix86_cmodel == CM_SMALL 28860 || (ix86_cmodel == CM_MEDIUM && code)) 28861 return DW_EH_PE_udata4; 28862 return DW_EH_PE_absptr; 28863 } 28864 28865 /* Expand copysign from SIGN to the positive value ABS_VALUE 28866 storing in RESULT. If MASK is non-null, it shall be a mask to mask out 28867 the sign-bit. */ 28868 static void 28869 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask) 28870 { 28871 enum machine_mode mode = GET_MODE (sign); 28872 rtx sgn = gen_reg_rtx (mode); 28873 if (mask == NULL_RTX) 28874 { 28875 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false); 28876 if (!VECTOR_MODE_P (mode)) 28877 { 28878 /* We need to generate a scalar mode mask in this case. */ 28879 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); 28880 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp); 28881 mask = gen_reg_rtx (mode); 28882 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp)); 28883 } 28884 } 28885 else 28886 mask = gen_rtx_NOT (mode, mask); 28887 emit_insn (gen_rtx_SET (VOIDmode, sgn, 28888 gen_rtx_AND (mode, mask, sign))); 28889 emit_insn (gen_rtx_SET (VOIDmode, result, 28890 gen_rtx_IOR (mode, abs_value, sgn))); 28891 } 28892 28893 /* Expand fabs (OP0) and return a new rtx that holds the result. The 28894 mask for masking out the sign-bit is stored in *SMASK, if that is 28895 non-null. */ 28896 static rtx 28897 ix86_expand_sse_fabs (rtx op0, rtx *smask) 28898 { 28899 enum machine_mode mode = GET_MODE (op0); 28900 rtx xa, mask; 28901 28902 xa = gen_reg_rtx (mode); 28903 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true); 28904 if (!VECTOR_MODE_P (mode)) 28905 { 28906 /* We need to generate a scalar mode mask in this case. */ 28907 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx)); 28908 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp); 28909 mask = gen_reg_rtx (mode); 28910 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp)); 28911 } 28912 emit_insn (gen_rtx_SET (VOIDmode, xa, 28913 gen_rtx_AND (mode, op0, mask))); 28914 28915 if (smask) 28916 *smask = mask; 28917 28918 return xa; 28919 } 28920 28921 /* Expands a comparison of OP0 with OP1 using comparison code CODE, 28922 swapping the operands if SWAP_OPERANDS is true. The expanded 28923 code is a forward jump to a newly created label in case the 28924 comparison is true. The generated label rtx is returned. */ 28925 static rtx 28926 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1, 28927 bool swap_operands) 28928 { 28929 rtx label, tmp; 28930 28931 if (swap_operands) 28932 { 28933 tmp = op0; 28934 op0 = op1; 28935 op1 = tmp; 28936 } 28937 28938 label = gen_label_rtx (); 28939 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG); 28940 emit_insn (gen_rtx_SET (VOIDmode, tmp, 28941 gen_rtx_COMPARE (CCFPUmode, op0, op1))); 28942 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx); 28943 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp, 28944 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx); 28945 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp)); 28946 JUMP_LABEL (tmp) = label; 28947 28948 return label; 28949 } 28950 28951 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1 28952 using comparison code CODE. Operands are swapped for the comparison if 28953 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */ 28954 static rtx 28955 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1, 28956 bool swap_operands) 28957 { 28958 enum machine_mode mode = GET_MODE (op0); 28959 rtx mask = gen_reg_rtx (mode); 28960 28961 if (swap_operands) 28962 { 28963 rtx tmp = op0; 28964 op0 = op1; 28965 op1 = tmp; 28966 } 28967 28968 if (mode == DFmode) 28969 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1, 28970 gen_rtx_fmt_ee (code, mode, op0, op1))); 28971 else 28972 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1, 28973 gen_rtx_fmt_ee (code, mode, op0, op1))); 28974 28975 return mask; 28976 } 28977 28978 /* Generate and return a rtx of mode MODE for 2**n where n is the number 28979 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */ 28980 static rtx 28981 ix86_gen_TWO52 (enum machine_mode mode) 28982 { 28983 REAL_VALUE_TYPE TWO52r; 28984 rtx TWO52; 28985 28986 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23); 28987 TWO52 = const_double_from_real_value (TWO52r, mode); 28988 TWO52 = force_reg (mode, TWO52); 28989 28990 return TWO52; 28991 } 28992 28993 /* Expand SSE sequence for computing lround from OP1 storing 28994 into OP0. */ 28995 void 28996 ix86_expand_lround (rtx op0, rtx op1) 28997 { 28998 /* C code for the stuff we're doing below: 28999 tmp = op1 + copysign (nextafter (0.5, 0.0), op1) 29000 return (long)tmp; 29001 */ 29002 enum machine_mode mode = GET_MODE (op1); 29003 const struct real_format *fmt; 29004 REAL_VALUE_TYPE pred_half, half_minus_pred_half; 29005 rtx adj; 29006 29007 /* load nextafter (0.5, 0.0) */ 29008 fmt = REAL_MODE_FORMAT (mode); 29009 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); 29010 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half); 29011 29012 /* adj = copysign (0.5, op1) */ 29013 adj = force_reg (mode, const_double_from_real_value (pred_half, mode)); 29014 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX); 29015 29016 /* adj = op1 + adj */ 29017 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT); 29018 29019 /* op0 = (imode)adj */ 29020 expand_fix (op0, adj, 0); 29021 } 29022 29023 /* Expand SSE2 sequence for computing lround from OPERAND1 storing 29024 into OPERAND0. */ 29025 void 29026 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor) 29027 { 29028 /* C code for the stuff we're doing below (for do_floor): 29029 xi = (long)op1; 29030 xi -= (double)xi > op1 ? 1 : 0; 29031 return xi; 29032 */ 29033 enum machine_mode fmode = GET_MODE (op1); 29034 enum machine_mode imode = GET_MODE (op0); 29035 rtx ireg, freg, label, tmp; 29036 29037 /* reg = (long)op1 */ 29038 ireg = gen_reg_rtx (imode); 29039 expand_fix (ireg, op1, 0); 29040 29041 /* freg = (double)reg */ 29042 freg = gen_reg_rtx (fmode); 29043 expand_float (freg, ireg, 0); 29044 29045 /* ireg = (freg > op1) ? ireg - 1 : ireg */ 29046 label = ix86_expand_sse_compare_and_jump (UNLE, 29047 freg, op1, !do_floor); 29048 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS, 29049 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT); 29050 emit_move_insn (ireg, tmp); 29051 29052 emit_label (label); 29053 LABEL_NUSES (label) = 1; 29054 29055 emit_move_insn (op0, ireg); 29056 } 29057 29058 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the 29059 result in OPERAND0. */ 29060 void 29061 ix86_expand_rint (rtx operand0, rtx operand1) 29062 { 29063 /* C code for the stuff we're doing below: 29064 xa = fabs (operand1); 29065 if (!isless (xa, 2**52)) 29066 return operand1; 29067 xa = xa + 2**52 - 2**52; 29068 return copysign (xa, operand1); 29069 */ 29070 enum machine_mode mode = GET_MODE (operand0); 29071 rtx res, xa, label, TWO52, mask; 29072 29073 res = gen_reg_rtx (mode); 29074 emit_move_insn (res, operand1); 29075 29076 /* xa = abs (operand1) */ 29077 xa = ix86_expand_sse_fabs (res, &mask); 29078 29079 /* if (!isless (xa, TWO52)) goto label; */ 29080 TWO52 = ix86_gen_TWO52 (mode); 29081 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); 29082 29083 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); 29084 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT); 29085 29086 ix86_sse_copysign_to_positive (res, xa, res, mask); 29087 29088 emit_label (label); 29089 LABEL_NUSES (label) = 1; 29090 29091 emit_move_insn (operand0, res); 29092 } 29093 29094 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing 29095 into OPERAND0. */ 29096 void 29097 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor) 29098 { 29099 /* C code for the stuff we expand below. 29100 double xa = fabs (x), x2; 29101 if (!isless (xa, TWO52)) 29102 return x; 29103 xa = xa + TWO52 - TWO52; 29104 x2 = copysign (xa, x); 29105 Compensate. Floor: 29106 if (x2 > x) 29107 x2 -= 1; 29108 Compensate. Ceil: 29109 if (x2 < x) 29110 x2 -= -1; 29111 return x2; 29112 */ 29113 enum machine_mode mode = GET_MODE (operand0); 29114 rtx xa, TWO52, tmp, label, one, res, mask; 29115 29116 TWO52 = ix86_gen_TWO52 (mode); 29117 29118 /* Temporary for holding the result, initialized to the input 29119 operand to ease control flow. */ 29120 res = gen_reg_rtx (mode); 29121 emit_move_insn (res, operand1); 29122 29123 /* xa = abs (operand1) */ 29124 xa = ix86_expand_sse_fabs (res, &mask); 29125 29126 /* if (!isless (xa, TWO52)) goto label; */ 29127 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); 29128 29129 /* xa = xa + TWO52 - TWO52; */ 29130 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); 29131 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT); 29132 29133 /* xa = copysign (xa, operand1) */ 29134 ix86_sse_copysign_to_positive (xa, xa, res, mask); 29135 29136 /* generate 1.0 or -1.0 */ 29137 one = force_reg (mode, 29138 const_double_from_real_value (do_floor 29139 ? dconst1 : dconstm1, mode)); 29140 29141 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ 29142 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); 29143 emit_insn (gen_rtx_SET (VOIDmode, tmp, 29144 gen_rtx_AND (mode, one, tmp))); 29145 /* We always need to subtract here to preserve signed zero. */ 29146 tmp = expand_simple_binop (mode, MINUS, 29147 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT); 29148 emit_move_insn (res, tmp); 29149 29150 emit_label (label); 29151 LABEL_NUSES (label) = 1; 29152 29153 emit_move_insn (operand0, res); 29154 } 29155 29156 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing 29157 into OPERAND0. */ 29158 void 29159 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor) 29160 { 29161 /* C code for the stuff we expand below. 29162 double xa = fabs (x), x2; 29163 if (!isless (xa, TWO52)) 29164 return x; 29165 x2 = (double)(long)x; 29166 Compensate. Floor: 29167 if (x2 > x) 29168 x2 -= 1; 29169 Compensate. Ceil: 29170 if (x2 < x) 29171 x2 += 1; 29172 if (HONOR_SIGNED_ZEROS (mode)) 29173 return copysign (x2, x); 29174 return x2; 29175 */ 29176 enum machine_mode mode = GET_MODE (operand0); 29177 rtx xa, xi, TWO52, tmp, label, one, res, mask; 29178 29179 TWO52 = ix86_gen_TWO52 (mode); 29180 29181 /* Temporary for holding the result, initialized to the input 29182 operand to ease control flow. */ 29183 res = gen_reg_rtx (mode); 29184 emit_move_insn (res, operand1); 29185 29186 /* xa = abs (operand1) */ 29187 xa = ix86_expand_sse_fabs (res, &mask); 29188 29189 /* if (!isless (xa, TWO52)) goto label; */ 29190 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); 29191 29192 /* xa = (double)(long)x */ 29193 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); 29194 expand_fix (xi, res, 0); 29195 expand_float (xa, xi, 0); 29196 29197 /* generate 1.0 */ 29198 one = force_reg (mode, const_double_from_real_value (dconst1, mode)); 29199 29200 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */ 29201 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor); 29202 emit_insn (gen_rtx_SET (VOIDmode, tmp, 29203 gen_rtx_AND (mode, one, tmp))); 29204 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS, 29205 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT); 29206 emit_move_insn (res, tmp); 29207 29208 if (HONOR_SIGNED_ZEROS (mode)) 29209 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask); 29210 29211 emit_label (label); 29212 LABEL_NUSES (label) = 1; 29213 29214 emit_move_insn (operand0, res); 29215 } 29216 29217 /* Expand SSE sequence for computing round from OPERAND1 storing 29218 into OPERAND0. Sequence that works without relying on DImode truncation 29219 via cvttsd2siq that is only available on 64bit targets. */ 29220 void 29221 ix86_expand_rounddf_32 (rtx operand0, rtx operand1) 29222 { 29223 /* C code for the stuff we expand below. 29224 double xa = fabs (x), xa2, x2; 29225 if (!isless (xa, TWO52)) 29226 return x; 29227 Using the absolute value and copying back sign makes 29228 -0.0 -> -0.0 correct. 29229 xa2 = xa + TWO52 - TWO52; 29230 Compensate. 29231 dxa = xa2 - xa; 29232 if (dxa <= -0.5) 29233 xa2 += 1; 29234 else if (dxa > 0.5) 29235 xa2 -= 1; 29236 x2 = copysign (xa2, x); 29237 return x2; 29238 */ 29239 enum machine_mode mode = GET_MODE (operand0); 29240 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask; 29241 29242 TWO52 = ix86_gen_TWO52 (mode); 29243 29244 /* Temporary for holding the result, initialized to the input 29245 operand to ease control flow. */ 29246 res = gen_reg_rtx (mode); 29247 emit_move_insn (res, operand1); 29248 29249 /* xa = abs (operand1) */ 29250 xa = ix86_expand_sse_fabs (res, &mask); 29251 29252 /* if (!isless (xa, TWO52)) goto label; */ 29253 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); 29254 29255 /* xa2 = xa + TWO52 - TWO52; */ 29256 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); 29257 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT); 29258 29259 /* dxa = xa2 - xa; */ 29260 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT); 29261 29262 /* generate 0.5, 1.0 and -0.5 */ 29263 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode)); 29264 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT); 29265 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX, 29266 0, OPTAB_DIRECT); 29267 29268 /* Compensate. */ 29269 tmp = gen_reg_rtx (mode); 29270 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */ 29271 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false); 29272 emit_insn (gen_rtx_SET (VOIDmode, tmp, 29273 gen_rtx_AND (mode, one, tmp))); 29274 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); 29275 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */ 29276 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false); 29277 emit_insn (gen_rtx_SET (VOIDmode, tmp, 29278 gen_rtx_AND (mode, one, tmp))); 29279 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT); 29280 29281 /* res = copysign (xa2, operand1) */ 29282 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask); 29283 29284 emit_label (label); 29285 LABEL_NUSES (label) = 1; 29286 29287 emit_move_insn (operand0, res); 29288 } 29289 29290 /* Expand SSE sequence for computing trunc from OPERAND1 storing 29291 into OPERAND0. */ 29292 void 29293 ix86_expand_trunc (rtx operand0, rtx operand1) 29294 { 29295 /* C code for SSE variant we expand below. 29296 double xa = fabs (x), x2; 29297 if (!isless (xa, TWO52)) 29298 return x; 29299 x2 = (double)(long)x; 29300 if (HONOR_SIGNED_ZEROS (mode)) 29301 return copysign (x2, x); 29302 return x2; 29303 */ 29304 enum machine_mode mode = GET_MODE (operand0); 29305 rtx xa, xi, TWO52, label, res, mask; 29306 29307 TWO52 = ix86_gen_TWO52 (mode); 29308 29309 /* Temporary for holding the result, initialized to the input 29310 operand to ease control flow. */ 29311 res = gen_reg_rtx (mode); 29312 emit_move_insn (res, operand1); 29313 29314 /* xa = abs (operand1) */ 29315 xa = ix86_expand_sse_fabs (res, &mask); 29316 29317 /* if (!isless (xa, TWO52)) goto label; */ 29318 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); 29319 29320 /* x = (double)(long)x */ 29321 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); 29322 expand_fix (xi, res, 0); 29323 expand_float (res, xi, 0); 29324 29325 if (HONOR_SIGNED_ZEROS (mode)) 29326 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask); 29327 29328 emit_label (label); 29329 LABEL_NUSES (label) = 1; 29330 29331 emit_move_insn (operand0, res); 29332 } 29333 29334 /* Expand SSE sequence for computing trunc from OPERAND1 storing 29335 into OPERAND0. */ 29336 void 29337 ix86_expand_truncdf_32 (rtx operand0, rtx operand1) 29338 { 29339 enum machine_mode mode = GET_MODE (operand0); 29340 rtx xa, mask, TWO52, label, one, res, smask, tmp; 29341 29342 /* C code for SSE variant we expand below. 29343 double xa = fabs (x), x2; 29344 if (!isless (xa, TWO52)) 29345 return x; 29346 xa2 = xa + TWO52 - TWO52; 29347 Compensate: 29348 if (xa2 > xa) 29349 xa2 -= 1.0; 29350 x2 = copysign (xa2, x); 29351 return x2; 29352 */ 29353 29354 TWO52 = ix86_gen_TWO52 (mode); 29355 29356 /* Temporary for holding the result, initialized to the input 29357 operand to ease control flow. */ 29358 res = gen_reg_rtx (mode); 29359 emit_move_insn (res, operand1); 29360 29361 /* xa = abs (operand1) */ 29362 xa = ix86_expand_sse_fabs (res, &smask); 29363 29364 /* if (!isless (xa, TWO52)) goto label; */ 29365 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); 29366 29367 /* res = xa + TWO52 - TWO52; */ 29368 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT); 29369 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT); 29370 emit_move_insn (res, tmp); 29371 29372 /* generate 1.0 */ 29373 one = force_reg (mode, const_double_from_real_value (dconst1, mode)); 29374 29375 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */ 29376 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false); 29377 emit_insn (gen_rtx_SET (VOIDmode, mask, 29378 gen_rtx_AND (mode, mask, one))); 29379 tmp = expand_simple_binop (mode, MINUS, 29380 res, mask, NULL_RTX, 0, OPTAB_DIRECT); 29381 emit_move_insn (res, tmp); 29382 29383 /* res = copysign (res, operand1) */ 29384 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask); 29385 29386 emit_label (label); 29387 LABEL_NUSES (label) = 1; 29388 29389 emit_move_insn (operand0, res); 29390 } 29391 29392 /* Expand SSE sequence for computing round from OPERAND1 storing 29393 into OPERAND0. */ 29394 void 29395 ix86_expand_round (rtx operand0, rtx operand1) 29396 { 29397 /* C code for the stuff we're doing below: 29398 double xa = fabs (x); 29399 if (!isless (xa, TWO52)) 29400 return x; 29401 xa = (double)(long)(xa + nextafter (0.5, 0.0)); 29402 return copysign (xa, x); 29403 */ 29404 enum machine_mode mode = GET_MODE (operand0); 29405 rtx res, TWO52, xa, label, xi, half, mask; 29406 const struct real_format *fmt; 29407 REAL_VALUE_TYPE pred_half, half_minus_pred_half; 29408 29409 /* Temporary for holding the result, initialized to the input 29410 operand to ease control flow. */ 29411 res = gen_reg_rtx (mode); 29412 emit_move_insn (res, operand1); 29413 29414 TWO52 = ix86_gen_TWO52 (mode); 29415 xa = ix86_expand_sse_fabs (res, &mask); 29416 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false); 29417 29418 /* load nextafter (0.5, 0.0) */ 29419 fmt = REAL_MODE_FORMAT (mode); 29420 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode); 29421 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half); 29422 29423 /* xa = xa + 0.5 */ 29424 half = force_reg (mode, const_double_from_real_value (pred_half, mode)); 29425 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT); 29426 29427 /* xa = (double)(int64_t)xa */ 29428 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode); 29429 expand_fix (xi, xa, 0); 29430 expand_float (xa, xi, 0); 29431 29432 /* res = copysign (xa, operand1) */ 29433 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask); 29434 29435 emit_label (label); 29436 LABEL_NUSES (label) = 1; 29437 29438 emit_move_insn (operand0, res); 29439 } 29440 29441 29442 /* Validate whether a SSE5 instruction is valid or not. 29443 OPERANDS is the array of operands. 29444 NUM is the number of operands. 29445 USES_OC0 is true if the instruction uses OC0 and provides 4 variants. 29446 NUM_MEMORY is the maximum number of memory operands to accept. 29447 when COMMUTATIVE is set, operand 1 and 2 can be swapped. */ 29448 29449 bool 29450 ix86_sse5_valid_op_p (rtx operands[], rtx insn ATTRIBUTE_UNUSED, int num, 29451 bool uses_oc0, int num_memory, bool commutative) 29452 { 29453 int mem_mask; 29454 int mem_count; 29455 int i; 29456 29457 /* Count the number of memory arguments */ 29458 mem_mask = 0; 29459 mem_count = 0; 29460 for (i = 0; i < num; i++) 29461 { 29462 enum machine_mode mode = GET_MODE (operands[i]); 29463 if (register_operand (operands[i], mode)) 29464 ; 29465 29466 else if (memory_operand (operands[i], mode)) 29467 { 29468 mem_mask |= (1 << i); 29469 mem_count++; 29470 } 29471 29472 else 29473 { 29474 rtx pattern = PATTERN (insn); 29475 29476 /* allow 0 for pcmov */ 29477 if (GET_CODE (pattern) != SET 29478 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE 29479 || i < 2 29480 || operands[i] != CONST0_RTX (mode)) 29481 return false; 29482 } 29483 } 29484 29485 /* Special case pmacsdq{l,h} where we allow the 3rd argument to be 29486 a memory operation. */ 29487 if (num_memory < 0) 29488 { 29489 num_memory = -num_memory; 29490 if ((mem_mask & (1 << (num-1))) != 0) 29491 { 29492 mem_mask &= ~(1 << (num-1)); 29493 mem_count--; 29494 } 29495 } 29496 29497 /* If there were no memory operations, allow the insn */ 29498 if (mem_mask == 0) 29499 return true; 29500 29501 /* Do not allow the destination register to be a memory operand. */ 29502 else if (mem_mask & (1 << 0)) 29503 return false; 29504 29505 /* If there are too many memory operations, disallow the instruction. While 29506 the hardware only allows 1 memory reference, before register allocation 29507 for some insns, we allow two memory operations sometimes in order to allow 29508 code like the following to be optimized: 29509 29510 float fmadd (float *a, float *b, float *c) { return (*a * *b) + *c; } 29511 29512 or similar cases that are vectorized into using the fmaddss 29513 instruction. */ 29514 else if (mem_count > num_memory) 29515 return false; 29516 29517 /* Don't allow more than one memory operation if not optimizing. */ 29518 else if (mem_count > 1 && !optimize) 29519 return false; 29520 29521 else if (num == 4 && mem_count == 1) 29522 { 29523 /* formats (destination is the first argument), example fmaddss: 29524 xmm1, xmm1, xmm2, xmm3/mem 29525 xmm1, xmm1, xmm2/mem, xmm3 29526 xmm1, xmm2, xmm3/mem, xmm1 29527 xmm1, xmm2/mem, xmm3, xmm1 */ 29528 if (uses_oc0) 29529 return ((mem_mask == (1 << 1)) 29530 || (mem_mask == (1 << 2)) 29531 || (mem_mask == (1 << 3))); 29532 29533 /* format, example pmacsdd: 29534 xmm1, xmm2, xmm3/mem, xmm1 */ 29535 if (commutative) 29536 return (mem_mask == (1 << 2) || mem_mask == (1 << 1)); 29537 else 29538 return (mem_mask == (1 << 2)); 29539 } 29540 29541 else if (num == 4 && num_memory == 2) 29542 { 29543 /* If there are two memory operations, we can load one of the memory ops 29544 into the destination register. This is for optimizing the 29545 multiply/add ops, which the combiner has optimized both the multiply 29546 and the add insns to have a memory operation. We have to be careful 29547 that the destination doesn't overlap with the inputs. */ 29548 rtx op0 = operands[0]; 29549 29550 if (reg_mentioned_p (op0, operands[1]) 29551 || reg_mentioned_p (op0, operands[2]) 29552 || reg_mentioned_p (op0, operands[3])) 29553 return false; 29554 29555 /* formats (destination is the first argument), example fmaddss: 29556 xmm1, xmm1, xmm2, xmm3/mem 29557 xmm1, xmm1, xmm2/mem, xmm3 29558 xmm1, xmm2, xmm3/mem, xmm1 29559 xmm1, xmm2/mem, xmm3, xmm1 29560 29561 For the oc0 case, we will load either operands[1] or operands[3] into 29562 operands[0], so any combination of 2 memory operands is ok. */ 29563 if (uses_oc0) 29564 return true; 29565 29566 /* format, example pmacsdd: 29567 xmm1, xmm2, xmm3/mem, xmm1 29568 29569 For the integer multiply/add instructions be more restrictive and 29570 require operands[2] and operands[3] to be the memory operands. */ 29571 if (commutative) 29572 return (mem_mask == ((1 << 1) | (1 << 3)) || ((1 << 2) | (1 << 3))); 29573 else 29574 return (mem_mask == ((1 << 2) | (1 << 3))); 29575 } 29576 29577 else if (num == 3 && num_memory == 1) 29578 { 29579 /* formats, example protb: 29580 xmm1, xmm2, xmm3/mem 29581 xmm1, xmm2/mem, xmm3 */ 29582 if (uses_oc0) 29583 return ((mem_mask == (1 << 1)) || (mem_mask == (1 << 2))); 29584 29585 /* format, example comeq: 29586 xmm1, xmm2, xmm3/mem */ 29587 else 29588 return (mem_mask == (1 << 2)); 29589 } 29590 29591 else 29592 gcc_unreachable (); 29593 29594 return false; 29595 } 29596 29597 29598 /* Fixup an SSE5 instruction that has 2 memory input references into a form the 29599 hardware will allow by using the destination register to load one of the 29600 memory operations. Presently this is used by the multiply/add routines to 29601 allow 2 memory references. */ 29602 29603 void 29604 ix86_expand_sse5_multiple_memory (rtx operands[], 29605 int num, 29606 enum machine_mode mode) 29607 { 29608 rtx op0 = operands[0]; 29609 if (num != 4 29610 || memory_operand (op0, mode) 29611 || reg_mentioned_p (op0, operands[1]) 29612 || reg_mentioned_p (op0, operands[2]) 29613 || reg_mentioned_p (op0, operands[3])) 29614 gcc_unreachable (); 29615 29616 /* For 2 memory operands, pick either operands[1] or operands[3] to move into 29617 the destination register. */ 29618 if (memory_operand (operands[1], mode)) 29619 { 29620 emit_move_insn (op0, operands[1]); 29621 operands[1] = op0; 29622 } 29623 else if (memory_operand (operands[3], mode)) 29624 { 29625 emit_move_insn (op0, operands[3]); 29626 operands[3] = op0; 29627 } 29628 else 29629 gcc_unreachable (); 29630 29631 return; 29632 } 29633 29634 29635 /* Table of valid machine attributes. */ 29636 static const struct attribute_spec ix86_attribute_table[] = 29637 { 29638 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */ 29639 /* Stdcall attribute says callee is responsible for popping arguments 29640 if they are not variable. */ 29641 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 29642 /* Fastcall attribute says callee is responsible for popping arguments 29643 if they are not variable. */ 29644 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 29645 /* Cdecl attribute says the callee is a normal C declaration */ 29646 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 29647 /* Regparm attribute specifies how many integer arguments are to be 29648 passed in registers. */ 29649 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute }, 29650 /* Sseregparm attribute says we are using x86_64 calling conventions 29651 for FP arguments. */ 29652 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute }, 29653 /* force_align_arg_pointer says this function realigns the stack at entry. */ 29654 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0, 29655 false, true, true, ix86_handle_cconv_attribute }, 29656 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES 29657 { "dllimport", 0, 0, false, false, false, handle_dll_attribute }, 29658 { "dllexport", 0, 0, false, false, false, handle_dll_attribute }, 29659 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute }, 29660 #endif 29661 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, 29662 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute }, 29663 #ifdef SUBTARGET_ATTRIBUTE_TABLE 29664 SUBTARGET_ATTRIBUTE_TABLE, 29665 #endif 29666 /* ms_abi and sysv_abi calling convention function attributes. */ 29667 { "ms_abi", 0, 0, false, true, true, ix86_handle_abi_attribute }, 29668 { "sysv_abi", 0, 0, false, true, true, ix86_handle_abi_attribute }, 29669 /* End element. */ 29670 { NULL, 0, 0, false, false, false, NULL } 29671 }; 29672 29673 /* Implement targetm.vectorize.builtin_vectorization_cost. */ 29674 static int 29675 x86_builtin_vectorization_cost (bool runtime_test) 29676 { 29677 /* If the branch of the runtime test is taken - i.e. - the vectorized 29678 version is skipped - this incurs a misprediction cost (because the 29679 vectorized version is expected to be the fall-through). So we subtract 29680 the latency of a mispredicted branch from the costs that are incured 29681 when the vectorized version is executed. 29682 29683 TODO: The values in individual target tables have to be tuned or new 29684 fields may be needed. For eg. on K8, the default branch path is the 29685 not-taken path. If the taken path is predicted correctly, the minimum 29686 penalty of going down the taken-path is 1 cycle. If the taken-path is 29687 not predicted correctly, then the minimum penalty is 10 cycles. */ 29688 29689 if (runtime_test) 29690 { 29691 return (-(ix86_cost->cond_taken_branch_cost)); 29692 } 29693 else 29694 return 0; 29695 } 29696 29697 /* This function returns the calling abi specific va_list type node. 29698 It returns the FNDECL specific va_list type. */ 29699 29700 tree 29701 ix86_fn_abi_va_list (tree fndecl) 29702 { 29703 int abi; 29704 29705 if (!TARGET_64BIT) 29706 return va_list_type_node; 29707 gcc_assert (fndecl != NULL_TREE); 29708 abi = ix86_function_abi ((const_tree) fndecl); 29709 29710 if (abi == MS_ABI) 29711 return ms_va_list_type_node; 29712 else 29713 return sysv_va_list_type_node; 29714 } 29715 29716 /* Returns the canonical va_list type specified by TYPE. If there 29717 is no valid TYPE provided, it return NULL_TREE. */ 29718 29719 tree 29720 ix86_canonical_va_list_type (tree type) 29721 { 29722 tree wtype, htype; 29723 29724 /* Resolve references and pointers to va_list type. */ 29725 if (INDIRECT_REF_P (type)) 29726 type = TREE_TYPE (type); 29727 else if (POINTER_TYPE_P (type) && POINTER_TYPE_P (TREE_TYPE(type))) 29728 type = TREE_TYPE (type); 29729 29730 if (TARGET_64BIT) 29731 { 29732 wtype = va_list_type_node; 29733 gcc_assert (wtype != NULL_TREE); 29734 htype = type; 29735 if (TREE_CODE (wtype) == ARRAY_TYPE) 29736 { 29737 /* If va_list is an array type, the argument may have decayed 29738 to a pointer type, e.g. by being passed to another function. 29739 In that case, unwrap both types so that we can compare the 29740 underlying records. */ 29741 if (TREE_CODE (htype) == ARRAY_TYPE 29742 || POINTER_TYPE_P (htype)) 29743 { 29744 wtype = TREE_TYPE (wtype); 29745 htype = TREE_TYPE (htype); 29746 } 29747 } 29748 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype)) 29749 return va_list_type_node; 29750 wtype = sysv_va_list_type_node; 29751 gcc_assert (wtype != NULL_TREE); 29752 htype = type; 29753 if (TREE_CODE (wtype) == ARRAY_TYPE) 29754 { 29755 /* If va_list is an array type, the argument may have decayed 29756 to a pointer type, e.g. by being passed to another function. 29757 In that case, unwrap both types so that we can compare the 29758 underlying records. */ 29759 if (TREE_CODE (htype) == ARRAY_TYPE 29760 || POINTER_TYPE_P (htype)) 29761 { 29762 wtype = TREE_TYPE (wtype); 29763 htype = TREE_TYPE (htype); 29764 } 29765 } 29766 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype)) 29767 return sysv_va_list_type_node; 29768 wtype = ms_va_list_type_node; 29769 gcc_assert (wtype != NULL_TREE); 29770 htype = type; 29771 if (TREE_CODE (wtype) == ARRAY_TYPE) 29772 { 29773 /* If va_list is an array type, the argument may have decayed 29774 to a pointer type, e.g. by being passed to another function. 29775 In that case, unwrap both types so that we can compare the 29776 underlying records. */ 29777 if (TREE_CODE (htype) == ARRAY_TYPE 29778 || POINTER_TYPE_P (htype)) 29779 { 29780 wtype = TREE_TYPE (wtype); 29781 htype = TREE_TYPE (htype); 29782 } 29783 } 29784 if (TYPE_MAIN_VARIANT (wtype) == TYPE_MAIN_VARIANT (htype)) 29785 return ms_va_list_type_node; 29786 return NULL_TREE; 29787 } 29788 return std_canonical_va_list_type (type); 29789 } 29790 29791 /* Iterate through the target-specific builtin types for va_list. 29792 IDX denotes the iterator, *PTREE is set to the result type of 29793 the va_list builtin, and *PNAME to its internal type. 29794 Returns zero if there is no element for this index, otherwise 29795 IDX should be increased upon the next call. 29796 Note, do not iterate a base builtin's name like __builtin_va_list. 29797 Used from c_common_nodes_and_builtins. */ 29798 29799 int 29800 ix86_enum_va_list (int idx, const char **pname, tree *ptree) 29801 { 29802 if (!TARGET_64BIT) 29803 return 0; 29804 switch (idx) { 29805 case 0: 29806 *ptree = ms_va_list_type_node; 29807 *pname = "__builtin_ms_va_list"; 29808 break; 29809 case 1: 29810 *ptree = sysv_va_list_type_node; 29811 *pname = "__builtin_sysv_va_list"; 29812 break; 29813 default: 29814 return 0; 29815 } 29816 return 1; 29817 } 29818 29819 /* Initialize the GCC target structure. */ 29820 #undef TARGET_RETURN_IN_MEMORY 29821 #define TARGET_RETURN_IN_MEMORY ix86_return_in_memory 29822 29823 #undef TARGET_ATTRIBUTE_TABLE 29824 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table 29825 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES 29826 # undef TARGET_MERGE_DECL_ATTRIBUTES 29827 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes 29828 #endif 29829 29830 #undef TARGET_COMP_TYPE_ATTRIBUTES 29831 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes 29832 29833 #undef TARGET_INIT_BUILTINS 29834 #define TARGET_INIT_BUILTINS ix86_init_builtins 29835 #undef TARGET_EXPAND_BUILTIN 29836 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin 29837 29838 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION 29839 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \ 29840 ix86_builtin_vectorized_function 29841 29842 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION 29843 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion 29844 29845 #undef TARGET_BUILTIN_RECIPROCAL 29846 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal 29847 29848 #undef TARGET_ASM_FUNCTION_EPILOGUE 29849 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue 29850 29851 #undef TARGET_ENCODE_SECTION_INFO 29852 #ifndef SUBTARGET_ENCODE_SECTION_INFO 29853 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info 29854 #else 29855 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO 29856 #endif 29857 29858 #undef TARGET_ASM_OPEN_PAREN 29859 #define TARGET_ASM_OPEN_PAREN "" 29860 #undef TARGET_ASM_CLOSE_PAREN 29861 #define TARGET_ASM_CLOSE_PAREN "" 29862 29863 #undef TARGET_ASM_ALIGNED_HI_OP 29864 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT 29865 #undef TARGET_ASM_ALIGNED_SI_OP 29866 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG 29867 #ifdef ASM_QUAD 29868 #undef TARGET_ASM_ALIGNED_DI_OP 29869 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD 29870 #endif 29871 29872 #undef TARGET_ASM_UNALIGNED_HI_OP 29873 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP 29874 #undef TARGET_ASM_UNALIGNED_SI_OP 29875 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP 29876 #undef TARGET_ASM_UNALIGNED_DI_OP 29877 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP 29878 29879 #undef TARGET_SCHED_ADJUST_COST 29880 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost 29881 #undef TARGET_SCHED_ISSUE_RATE 29882 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate 29883 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD 29884 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \ 29885 ia32_multipass_dfa_lookahead 29886 29887 #undef TARGET_FUNCTION_OK_FOR_SIBCALL 29888 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall 29889 29890 #ifdef HAVE_AS_TLS 29891 #undef TARGET_HAVE_TLS 29892 #define TARGET_HAVE_TLS true 29893 #endif 29894 #undef TARGET_CANNOT_FORCE_CONST_MEM 29895 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem 29896 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P 29897 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true 29898 29899 #undef TARGET_DELEGITIMIZE_ADDRESS 29900 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address 29901 29902 #undef TARGET_MS_BITFIELD_LAYOUT_P 29903 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p 29904 29905 #if TARGET_MACHO 29906 #undef TARGET_BINDS_LOCAL_P 29907 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p 29908 #endif 29909 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES 29910 #undef TARGET_BINDS_LOCAL_P 29911 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p 29912 #endif 29913 29914 #undef TARGET_ASM_OUTPUT_MI_THUNK 29915 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk 29916 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK 29917 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk 29918 29919 #undef TARGET_ASM_FILE_START 29920 #define TARGET_ASM_FILE_START x86_file_start 29921 29922 #undef TARGET_DEFAULT_TARGET_FLAGS 29923 #define TARGET_DEFAULT_TARGET_FLAGS \ 29924 (TARGET_DEFAULT \ 29925 | TARGET_SUBTARGET_DEFAULT \ 29926 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT) 29927 29928 #undef TARGET_HANDLE_OPTION 29929 #define TARGET_HANDLE_OPTION ix86_handle_option 29930 29931 #undef TARGET_RTX_COSTS 29932 #define TARGET_RTX_COSTS ix86_rtx_costs 29933 #undef TARGET_ADDRESS_COST 29934 #define TARGET_ADDRESS_COST ix86_address_cost 29935 29936 #undef TARGET_FIXED_CONDITION_CODE_REGS 29937 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs 29938 #undef TARGET_CC_MODES_COMPATIBLE 29939 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible 29940 29941 #undef TARGET_MACHINE_DEPENDENT_REORG 29942 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg 29943 29944 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE 29945 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE ix86_builtin_setjmp_frame_value 29946 29947 #undef TARGET_BUILD_BUILTIN_VA_LIST 29948 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list 29949 29950 #undef TARGET_FN_ABI_VA_LIST 29951 #define TARGET_FN_ABI_VA_LIST ix86_fn_abi_va_list 29952 29953 #undef TARGET_CANONICAL_VA_LIST_TYPE 29954 #define TARGET_CANONICAL_VA_LIST_TYPE ix86_canonical_va_list_type 29955 29956 #undef TARGET_EXPAND_BUILTIN_VA_START 29957 #define TARGET_EXPAND_BUILTIN_VA_START ix86_va_start 29958 29959 #undef TARGET_MD_ASM_CLOBBERS 29960 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers 29961 29962 #undef TARGET_PROMOTE_PROTOTYPES 29963 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true 29964 #undef TARGET_STRUCT_VALUE_RTX 29965 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx 29966 #undef TARGET_SETUP_INCOMING_VARARGS 29967 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs 29968 #undef TARGET_MUST_PASS_IN_STACK 29969 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack 29970 #undef TARGET_PASS_BY_REFERENCE 29971 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference 29972 #undef TARGET_INTERNAL_ARG_POINTER 29973 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer 29974 #undef TARGET_UPDATE_STACK_BOUNDARY 29975 #define TARGET_UPDATE_STACK_BOUNDARY ix86_update_stack_boundary 29976 #undef TARGET_GET_DRAP_RTX 29977 #define TARGET_GET_DRAP_RTX ix86_get_drap_rtx 29978 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC 29979 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec 29980 #undef TARGET_STRICT_ARGUMENT_NAMING 29981 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true 29982 29983 #undef TARGET_GIMPLIFY_VA_ARG_EXPR 29984 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg 29985 29986 #undef TARGET_SCALAR_MODE_SUPPORTED_P 29987 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p 29988 29989 #undef TARGET_VECTOR_MODE_SUPPORTED_P 29990 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p 29991 29992 #undef TARGET_C_MODE_FOR_SUFFIX 29993 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix 29994 29995 #ifdef HAVE_AS_TLS 29996 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL 29997 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel 29998 #endif 29999 30000 #ifdef SUBTARGET_INSERT_ATTRIBUTES 30001 #undef TARGET_INSERT_ATTRIBUTES 30002 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES 30003 #endif 30004 30005 #undef TARGET_MANGLE_TYPE 30006 #define TARGET_MANGLE_TYPE ix86_mangle_type 30007 30008 #undef TARGET_STACK_PROTECT_FAIL 30009 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail 30010 30011 #undef TARGET_FUNCTION_VALUE 30012 #define TARGET_FUNCTION_VALUE ix86_function_value 30013 30014 #undef TARGET_SECONDARY_RELOAD 30015 #define TARGET_SECONDARY_RELOAD ix86_secondary_reload 30016 30017 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST 30018 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST x86_builtin_vectorization_cost 30019 30020 #undef TARGET_SET_CURRENT_FUNCTION 30021 #define TARGET_SET_CURRENT_FUNCTION ix86_set_current_function 30022 30023 #undef TARGET_OPTION_VALID_ATTRIBUTE_P 30024 #define TARGET_OPTION_VALID_ATTRIBUTE_P ix86_valid_target_attribute_p 30025 30026 #undef TARGET_OPTION_SAVE 30027 #define TARGET_OPTION_SAVE ix86_function_specific_save 30028 30029 #undef TARGET_OPTION_RESTORE 30030 #define TARGET_OPTION_RESTORE ix86_function_specific_restore 30031 30032 #undef TARGET_OPTION_PRINT 30033 #define TARGET_OPTION_PRINT ix86_function_specific_print 30034 30035 #undef TARGET_OPTION_CAN_INLINE_P 30036 #define TARGET_OPTION_CAN_INLINE_P ix86_can_inline_p 30037 30038 #undef TARGET_EXPAND_TO_RTL_HOOK 30039 #define TARGET_EXPAND_TO_RTL_HOOK ix86_maybe_switch_abi 30040 30041 struct gcc_target targetm = TARGET_INITIALIZER; 30042 30043 #include "gt-i386.h"